diff options
Diffstat (limited to '')
16 files changed, 147 insertions, 59 deletions
diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index 83b763447..19db17c6d 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -231,6 +231,7 @@ add_library(shader_recompiler STATIC ir_opt/rescaling_pass.cpp ir_opt/ssa_rewrite_pass.cpp ir_opt/texture_pass.cpp + ir_opt/vendor_workaround_pass.cpp ir_opt/verification_pass.cpp object_pool.h precompiled_headers.h diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp index d0e308124..64e7bad75 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp @@ -559,12 +559,12 @@ void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, const IR::Value& offset, const IR::Value& lod_clamp) { const auto info{inst.Flags<IR::TextureInstInfo>()}; ScopedRegister dpdx, dpdy, coords; - const bool multi_component{info.num_derivates > 1 || info.has_lod_clamp}; + const bool multi_component{info.num_derivatives > 1 || info.has_lod_clamp}; if (multi_component) { // Allocate this early to avoid aliasing other registers dpdx = ScopedRegister{ctx.reg_alloc}; dpdy = ScopedRegister{ctx.reg_alloc}; - if (info.num_derivates >= 3) { + if (info.num_derivatives >= 3) { coords = ScopedRegister{ctx.reg_alloc}; } } @@ -584,7 +584,7 @@ void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, dpdx.reg, derivatives_vec, dpdx.reg, derivatives_vec, dpdy.reg, derivatives_vec, dpdy.reg, derivatives_vec); Register final_coord; - if (info.num_derivates >= 3) { + if (info.num_derivatives >= 3) { ctx.Add("MOV.F {}.z,{}.x;" "MOV.F {}.z,{}.y;", dpdx.reg, coord_vec, dpdy.reg, coord_vec); diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp index d9872ecc2..6e940bd5a 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp @@ -548,15 +548,15 @@ void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, if (sparse_inst) { throw NotImplementedException("EmitImageGradient Sparse"); } - if (!offset.IsEmpty() && info.num_derivates <= 2) { + if (!offset.IsEmpty() && info.num_derivatives <= 2) { throw NotImplementedException("EmitImageGradient offset"); } const auto texture{Texture(ctx, info, index)}; const auto texel{ctx.var_alloc.Define(inst, GlslVarType::F32x4)}; - const bool multi_component{info.num_derivates > 1 || info.has_lod_clamp}; + const bool multi_component{info.num_derivatives > 1 || info.has_lod_clamp}; const auto derivatives_vec{ctx.var_alloc.Consume(derivatives)}; if (multi_component) { - if (info.num_derivates >= 3) { + if (info.num_derivatives >= 3) { const auto offset_vec{ctx.var_alloc.Consume(offset)}; ctx.Add("{}=textureGrad({},{},vec3({}.xz, {}.x),vec3({}.yw, {}.y));", texel, texture, coords, derivatives_vec, offset_vec, derivatives_vec, offset_vec); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 34592a01f..0031fa5fb 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -407,7 +407,7 @@ void SetupCapabilities(const Profile& profile, const Info& info, EmitContext& ct } ctx.AddCapability(spv::Capability::DemoteToHelperInvocation); } - if (info.stores[IR::Attribute::ViewportIndex]) { + if (info.stores[IR::Attribute::ViewportIndex] && profile.support_multi_viewport) { ctx.AddCapability(spv::Capability::MultiViewport); } if (info.stores[IR::Attribute::ViewportMask] && profile.support_viewport_mask) { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index 1d77426e0..e5a78a914 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -84,6 +84,10 @@ std::optional<OutAttr> OutputAttrPointer(EmitContext& ctx, IR::Attribute attr) { } return std::nullopt; case IR::Attribute::ViewportIndex: + if (!ctx.profile.support_multi_viewport) { + LOG_WARNING(Shader, "Ignoring viewport index store on non-supporting driver"); + return std::nullopt; + } if (ctx.profile.support_viewport_index_layer_non_geometry || ctx.stage == Shader::Stage::Geometry) { return OutAttr{ctx.viewport_index, ctx.U32[1]}; diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp index 8decdf399..22ceca19c 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp @@ -67,22 +67,22 @@ public: } } - explicit ImageOperands(EmitContext& ctx, bool has_lod_clamp, Id derivates, u32 num_derivates, - Id offset, Id lod_clamp) { - if (!Sirit::ValidId(derivates)) { - throw LogicError("Derivates must be present"); + explicit ImageOperands(EmitContext& ctx, bool has_lod_clamp, Id derivatives, + u32 num_derivatives, Id offset, Id lod_clamp) { + if (!Sirit::ValidId(derivatives)) { + throw LogicError("Derivatives must be present"); } boost::container::static_vector<Id, 3> deriv_x_accum; boost::container::static_vector<Id, 3> deriv_y_accum; - for (u32 i = 0; i < num_derivates; ++i) { - deriv_x_accum.push_back(ctx.OpCompositeExtract(ctx.F32[1], derivates, i * 2)); - deriv_y_accum.push_back(ctx.OpCompositeExtract(ctx.F32[1], derivates, i * 2 + 1)); + for (u32 i = 0; i < num_derivatives; ++i) { + deriv_x_accum.push_back(ctx.OpCompositeExtract(ctx.F32[1], derivatives, i * 2)); + deriv_y_accum.push_back(ctx.OpCompositeExtract(ctx.F32[1], derivatives, i * 2 + 1)); } - const Id derivates_X{ctx.OpCompositeConstruct( - ctx.F32[num_derivates], std::span{deriv_x_accum.data(), deriv_x_accum.size()})}; - const Id derivates_Y{ctx.OpCompositeConstruct( - ctx.F32[num_derivates], std::span{deriv_y_accum.data(), deriv_y_accum.size()})}; - Add(spv::ImageOperandsMask::Grad, derivates_X, derivates_Y); + const Id derivatives_X{ctx.OpCompositeConstruct( + ctx.F32[num_derivatives], std::span{deriv_x_accum.data(), deriv_x_accum.size()})}; + const Id derivatives_Y{ctx.OpCompositeConstruct( + ctx.F32[num_derivatives], std::span{deriv_y_accum.data(), deriv_y_accum.size()})}; + Add(spv::ImageOperandsMask::Grad, derivatives_X, derivatives_Y); if (Sirit::ValidId(offset)) { Add(spv::ImageOperandsMask::Offset, offset); } @@ -91,26 +91,26 @@ public: } } - explicit ImageOperands(EmitContext& ctx, bool has_lod_clamp, Id derivates_1, Id derivates_2, + explicit ImageOperands(EmitContext& ctx, bool has_lod_clamp, Id derivatives_1, Id derivatives_2, Id offset, Id lod_clamp) { - if (!Sirit::ValidId(derivates_1) || !Sirit::ValidId(derivates_2)) { - throw LogicError("Derivates must be present"); + if (!Sirit::ValidId(derivatives_1) || !Sirit::ValidId(derivatives_2)) { + throw LogicError("Derivatives must be present"); } boost::container::static_vector<Id, 3> deriv_1_accum{ - ctx.OpCompositeExtract(ctx.F32[1], derivates_1, 0), - ctx.OpCompositeExtract(ctx.F32[1], derivates_1, 2), - ctx.OpCompositeExtract(ctx.F32[1], derivates_2, 0), + ctx.OpCompositeExtract(ctx.F32[1], derivatives_1, 0), + ctx.OpCompositeExtract(ctx.F32[1], derivatives_1, 2), + ctx.OpCompositeExtract(ctx.F32[1], derivatives_2, 0), }; boost::container::static_vector<Id, 3> deriv_2_accum{ - ctx.OpCompositeExtract(ctx.F32[1], derivates_1, 1), - ctx.OpCompositeExtract(ctx.F32[1], derivates_1, 3), - ctx.OpCompositeExtract(ctx.F32[1], derivates_2, 1), + ctx.OpCompositeExtract(ctx.F32[1], derivatives_1, 1), + ctx.OpCompositeExtract(ctx.F32[1], derivatives_1, 3), + ctx.OpCompositeExtract(ctx.F32[1], derivatives_2, 1), }; - const Id derivates_id1{ctx.OpCompositeConstruct( + const Id derivatives_id1{ctx.OpCompositeConstruct( ctx.F32[3], std::span{deriv_1_accum.data(), deriv_1_accum.size()})}; - const Id derivates_id2{ctx.OpCompositeConstruct( + const Id derivatives_id2{ctx.OpCompositeConstruct( ctx.F32[3], std::span{deriv_2_accum.data(), deriv_2_accum.size()})}; - Add(spv::ImageOperandsMask::Grad, derivates_id1, derivates_id2); + Add(spv::ImageOperandsMask::Grad, derivatives_id1, derivatives_id2); if (Sirit::ValidId(offset)) { Add(spv::ImageOperandsMask::Offset, offset); } @@ -548,12 +548,12 @@ Id EmitImageQueryLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, I } Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, - Id derivates, Id offset, Id lod_clamp) { + Id derivatives, Id offset, Id lod_clamp) { const auto info{inst->Flags<IR::TextureInstInfo>()}; const auto operands = - info.num_derivates == 3 - ? ImageOperands(ctx, info.has_lod_clamp != 0, derivates, offset, {}, lod_clamp) - : ImageOperands(ctx, info.has_lod_clamp != 0, derivates, info.num_derivates, offset, + info.num_derivatives == 3 + ? ImageOperands(ctx, info.has_lod_clamp != 0, derivatives, offset, {}, lod_clamp) + : ImageOperands(ctx, info.has_lod_clamp != 0, derivatives, info.num_derivatives, offset, lod_clamp); return Emit(&EmitContext::OpImageSparseSampleExplicitLod, &EmitContext::OpImageSampleExplicitLod, ctx, inst, ctx.F32[4], diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h index a440b557d..7d34575c8 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h @@ -543,7 +543,7 @@ Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, const IR::Value& i const IR::Value& skip_mips); Id EmitImageQueryLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords); Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, - Id derivates, Id offset, Id lod_clamp); + Id derivatives, Id offset, Id lod_clamp); Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords); void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id color); Id EmitIsTextureScaled(EmitContext& ctx, const IR::Value& index); diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index b7caa4246..49171c470 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -1864,11 +1864,11 @@ Value IREmitter::ImageQueryLod(const Value& handle, const Value& coords, Texture return Inst(op, Flags{info}, handle, coords); } -Value IREmitter::ImageGradient(const Value& handle, const Value& coords, const Value& derivates, +Value IREmitter::ImageGradient(const Value& handle, const Value& coords, const Value& derivatives, const Value& offset, const F32& lod_clamp, TextureInstInfo info) { const Opcode op{handle.IsImmediate() ? Opcode::BoundImageGradient : Opcode::BindlessImageGradient}; - return Inst(op, Flags{info}, handle, coords, derivates, offset, lod_clamp); + return Inst(op, Flags{info}, handle, coords, derivatives, offset, lod_clamp); } Value IREmitter::ImageRead(const Value& handle, const Value& coords, TextureInstInfo info) { diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index f3c81dbe1..6c30897f4 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -335,7 +335,7 @@ public: [[nodiscard]] Value ImageFetch(const Value& handle, const Value& coords, const Value& offset, const U32& lod, const U32& multisampling, TextureInstInfo info); [[nodiscard]] Value ImageGradient(const Value& handle, const Value& coords, - const Value& derivates, const Value& offset, + const Value& derivatives, const Value& offset, const F32& lod_clamp, TextureInstInfo info); [[nodiscard]] Value ImageRead(const Value& handle, const Value& coords, TextureInstInfo info); void ImageWrite(const Value& handle, const Value& coords, const Value& color, diff --git a/src/shader_recompiler/frontend/ir/modifiers.h b/src/shader_recompiler/frontend/ir/modifiers.h index 1e9e8c8f5..c20c2401f 100644 --- a/src/shader_recompiler/frontend/ir/modifiers.h +++ b/src/shader_recompiler/frontend/ir/modifiers.h @@ -40,7 +40,7 @@ union TextureInstInfo { BitField<21, 1, u32> has_lod_clamp; BitField<22, 1, u32> relaxed_precision; BitField<23, 2, u32> gather_component; - BitField<25, 2, u32> num_derivates; + BitField<25, 2, u32> num_derivatives; BitField<27, 3, ImageFormat> image_format; BitField<30, 1, u32> ndv_is_active; }; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gradient.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gradient.cpp index dd34507bc..4ce3dd0cd 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gradient.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gradient.cpp @@ -59,7 +59,7 @@ void Impl(TranslatorVisitor& v, u64 insn, bool is_bindless) { BitField<51, 3, IR::Pred> sparse_pred; BitField<0, 8, IR::Reg> dest_reg; BitField<8, 8, IR::Reg> coord_reg; - BitField<20, 8, IR::Reg> derivate_reg; + BitField<20, 8, IR::Reg> derivative_reg; BitField<28, 3, TextureType> type; BitField<31, 4, u64> mask; BitField<36, 13, u64> cbuf_offset; @@ -71,7 +71,7 @@ void Impl(TranslatorVisitor& v, u64 insn, bool is_bindless) { } IR::Value coords; - u32 num_derivates{}; + u32 num_derivatives{}; IR::Reg base_reg{txd.coord_reg}; IR::Reg last_reg; IR::Value handle; @@ -90,42 +90,42 @@ void Impl(TranslatorVisitor& v, u64 insn, bool is_bindless) { switch (txd.type) { case TextureType::_1D: { coords = v.F(base_reg); - num_derivates = 1; + num_derivatives = 1; last_reg = base_reg + 1; break; } case TextureType::ARRAY_1D: { last_reg = base_reg + 1; coords = v.ir.CompositeConstruct(v.F(base_reg), read_array()); - num_derivates = 1; + num_derivatives = 1; break; } case TextureType::_2D: { last_reg = base_reg + 2; coords = v.ir.CompositeConstruct(v.F(base_reg), v.F(base_reg + 1)); - num_derivates = 2; + num_derivatives = 2; break; } case TextureType::ARRAY_2D: { last_reg = base_reg + 2; coords = v.ir.CompositeConstruct(v.F(base_reg), v.F(base_reg + 1), read_array()); - num_derivates = 2; + num_derivatives = 2; break; } default: throw NotImplementedException("Invalid texture type"); } - const IR::Reg derivate_reg{txd.derivate_reg}; - IR::Value derivates; - switch (num_derivates) { + const IR::Reg derivative_reg{txd.derivative_reg}; + IR::Value derivatives; + switch (num_derivatives) { case 1: { - derivates = v.ir.CompositeConstruct(v.F(derivate_reg), v.F(derivate_reg + 1)); + derivatives = v.ir.CompositeConstruct(v.F(derivative_reg), v.F(derivative_reg + 1)); break; } case 2: { - derivates = v.ir.CompositeConstruct(v.F(derivate_reg), v.F(derivate_reg + 1), - v.F(derivate_reg + 2), v.F(derivate_reg + 3)); + derivatives = v.ir.CompositeConstruct(v.F(derivative_reg), v.F(derivative_reg + 1), + v.F(derivative_reg + 2), v.F(derivative_reg + 3)); break; } default: @@ -150,9 +150,10 @@ void Impl(TranslatorVisitor& v, u64 insn, bool is_bindless) { IR::TextureInstInfo info{}; info.type.Assign(GetType(txd.type)); - info.num_derivates.Assign(num_derivates); + info.num_derivatives.Assign(num_derivatives); info.has_lod_clamp.Assign(has_lod_clamp ? 1 : 0); - const IR::Value sample{v.ir.ImageGradient(handle, coords, derivates, offset, lod_clamp, info)}; + const IR::Value sample{ + v.ir.ImageGradient(handle, coords, derivatives, offset, lod_clamp, info)}; IR::Reg dest_reg{txd.dest_reg}; for (size_t element = 0; element < 4; ++element) { diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.cpp b/src/shader_recompiler/frontend/maxwell/translate_program.cpp index 928b35561..8fac6bad3 100644 --- a/src/shader_recompiler/frontend/maxwell/translate_program.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp @@ -310,6 +310,7 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo } Optimization::CollectShaderInfoPass(env, program); Optimization::LayerPass(program, host_info); + Optimization::VendorWorkaroundPass(program); CollectInterpolationInfo(env, program); AddNVNStorageBuffers(program); diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp index f46e55122..ec12c843a 100644 --- a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp +++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp @@ -428,7 +428,7 @@ void FoldFPAdd32(IR::Inst& inst) { } } -bool FoldDerivateYFromCorrection(IR::Inst& inst) { +bool FoldDerivativeYFromCorrection(IR::Inst& inst) { const IR::Value lhs_value{inst.Arg(0)}; const IR::Value rhs_value{inst.Arg(1)}; IR::Inst* const lhs_op{lhs_value.InstRecursive()}; @@ -464,7 +464,7 @@ void FoldFPMul32(IR::Inst& inst) { if (lhs_value.IsImmediate() || rhs_value.IsImmediate()) { return; } - if (FoldDerivateYFromCorrection(inst)) { + if (FoldDerivativeYFromCorrection(inst)) { return; } IR::Inst* const lhs_op{lhs_value.InstRecursive()}; @@ -699,7 +699,7 @@ void FoldFSwizzleAdd(IR::Block& block, IR::Inst& inst) { } } -bool FindGradient3DDerivates(std::array<IR::Value, 3>& results, IR::Value coord) { +bool FindGradient3DDerivatives(std::array<IR::Value, 3>& results, IR::Value coord) { if (coord.IsImmediate()) { return false; } @@ -834,7 +834,7 @@ void FoldImageSampleImplicitLod(IR::Block& block, IR::Inst& inst) { IR::Inst* const inst2 = coords.InstRecursive(); std::array<std::array<IR::Value, 3>, 3> results_matrix; for (size_t i = 0; i < 3; i++) { - if (!FindGradient3DDerivates(results_matrix[i], inst2->Arg(i).Resolve())) { + if (!FindGradient3DDerivatives(results_matrix[i], inst2->Arg(i).Resolve())) { return; } } @@ -852,7 +852,7 @@ void FoldImageSampleImplicitLod(IR::Block& block, IR::Inst& inst) { IR::Value derivatives_1 = ir.CompositeConstruct(results_matrix[0][1], results_matrix[0][2], results_matrix[1][1], results_matrix[1][2]); IR::Value derivatives_2 = ir.CompositeConstruct(results_matrix[2][1], results_matrix[2][2]); - info.num_derivates.Assign(3); + info.num_derivatives.Assign(3); IR::Value new_gradient_instruction = ir.ImageGradient(handle, new_coords, derivatives_1, derivatives_2, lod_clamp, info); IR::Inst* const new_inst = new_gradient_instruction.InstRecursive(); diff --git a/src/shader_recompiler/ir_opt/passes.h b/src/shader_recompiler/ir_opt/passes.h index 629d18fa1..d4d5285e5 100644 --- a/src/shader_recompiler/ir_opt/passes.h +++ b/src/shader_recompiler/ir_opt/passes.h @@ -26,6 +26,7 @@ void SsaRewritePass(IR::Program& program); void PositionPass(Environment& env, IR::Program& program); void TexturePass(Environment& env, IR::Program& program, const HostTranslateInfo& host_info); void LayerPass(IR::Program& program, const HostTranslateInfo& host_info); +void VendorWorkaroundPass(IR::Program& program); void VerificationPass(const IR::Program& program); // Dual Vertex diff --git a/src/shader_recompiler/ir_opt/vendor_workaround_pass.cpp b/src/shader_recompiler/ir_opt/vendor_workaround_pass.cpp new file mode 100644 index 000000000..08c658cb8 --- /dev/null +++ b/src/shader_recompiler/ir_opt/vendor_workaround_pass.cpp @@ -0,0 +1,79 @@ +// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/ir/ir_emitter.h" +#include "shader_recompiler/frontend/ir/value.h" +#include "shader_recompiler/ir_opt/passes.h" + +namespace Shader::Optimization { + +namespace { +void AddingByteSwapsWorkaround(IR::Block& block, IR::Inst& inst) { + /* + * Workaround for an NVIDIA bug seen in Super Mario RPG + * + * We are looking for this pattern: + * %lhs_bfe = BitFieldUExtract %factor_a, #0, #16 + * %lhs_mul = IMul32 %lhs_bfe, %factor_b // potentially optional? + * %lhs_shl = ShiftLeftLogical32 %lhs_mul, #16 + * %rhs_bfe = BitFieldUExtract %factor_a, #16, #16 + * %result = IAdd32 %lhs_shl, %rhs_bfe + * + * And replacing the IAdd32 with a BitwiseOr32 + * %result = BitwiseOr32 %lhs_shl, %rhs_bfe + * + */ + IR::Inst* const lhs_shl{inst.Arg(0).TryInstRecursive()}; + IR::Inst* const rhs_bfe{inst.Arg(1).TryInstRecursive()}; + if (!lhs_shl || !rhs_bfe) { + return; + } + if (lhs_shl->GetOpcode() != IR::Opcode::ShiftLeftLogical32 || + lhs_shl->Arg(1) != IR::Value{16U}) { + return; + } + if (rhs_bfe->GetOpcode() != IR::Opcode::BitFieldUExtract || rhs_bfe->Arg(1) != IR::Value{16U} || + rhs_bfe->Arg(2) != IR::Value{16U}) { + return; + } + IR::Inst* const lhs_mul{lhs_shl->Arg(0).TryInstRecursive()}; + if (!lhs_mul) { + return; + } + const bool lhs_mul_optional{lhs_mul->GetOpcode() == IR::Opcode::BitFieldUExtract}; + if (lhs_mul->GetOpcode() != IR::Opcode::IMul32 && + lhs_mul->GetOpcode() != IR::Opcode::BitFieldUExtract) { + return; + } + IR::Inst* const lhs_bfe{lhs_mul_optional ? lhs_mul : lhs_mul->Arg(0).TryInstRecursive()}; + if (!lhs_bfe) { + return; + } + if (lhs_bfe->GetOpcode() != IR::Opcode::BitFieldUExtract) { + return; + } + if (lhs_bfe->Arg(1) != IR::Value{0U} || lhs_bfe->Arg(2) != IR::Value{16U}) { + return; + } + IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; + inst.ReplaceUsesWith(ir.BitwiseOr(IR::U32{inst.Arg(0)}, IR::U32{inst.Arg(1)})); +} + +} // Anonymous namespace + +void VendorWorkaroundPass(IR::Program& program) { + for (IR::Block* const block : program.post_order_blocks) { + for (IR::Inst& inst : block->Instructions()) { + switch (inst.GetOpcode()) { + case IR::Opcode::IAdd32: + AddingByteSwapsWorkaround(*block, inst); + break; + default: + break; + } + } + } +} + +} // namespace Shader::Optimization diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index 38d820db2..a9de9f4a9 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -43,6 +43,7 @@ struct Profile { bool support_gl_sparse_textures{}; bool support_gl_derivative_control{}; bool support_scaled_attributes{}; + bool support_multi_viewport{}; bool warp_size_potentially_larger_than_guest{}; |