From 9343b81afd5132aa41f50fe033faca4f9713b8cf Mon Sep 17 00:00:00 2001 From: Ameer J <52414509+ameerj@users.noreply.github.com> Date: Fri, 17 Nov 2023 22:29:12 -0500 Subject: shader_recompiler: add byteswap pattern workaround for Nvidia --- src/shader_recompiler/CMakeLists.txt | 1 + .../frontend/maxwell/translate_program.cpp | 1 + src/shader_recompiler/ir_opt/passes.h | 1 + .../ir_opt/vendor_workaround_pass.cpp | 79 ++++++++++++++++++++++ 4 files changed, 82 insertions(+) create mode 100644 src/shader_recompiler/ir_opt/vendor_workaround_pass.cpp diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index 83b763447..19db17c6d 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -231,6 +231,7 @@ add_library(shader_recompiler STATIC ir_opt/rescaling_pass.cpp ir_opt/ssa_rewrite_pass.cpp ir_opt/texture_pass.cpp + ir_opt/vendor_workaround_pass.cpp ir_opt/verification_pass.cpp object_pool.h precompiled_headers.h diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.cpp b/src/shader_recompiler/frontend/maxwell/translate_program.cpp index 928b35561..8fac6bad3 100644 --- a/src/shader_recompiler/frontend/maxwell/translate_program.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp @@ -310,6 +310,7 @@ IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPoolGetOpcode() != IR::Opcode::ShiftLeftLogical32 || + lhs_shl->Arg(1) != IR::Value{16U}) { + return; + } + if (rhs_bfe->GetOpcode() != IR::Opcode::BitFieldUExtract || rhs_bfe->Arg(1) != IR::Value{16U} || + rhs_bfe->Arg(2) != IR::Value{16U}) { + return; + } + IR::Inst* const lhs_mul{lhs_shl->Arg(0).TryInstRecursive()}; + if (!lhs_mul) { + return; + } + const bool lhs_mul_optional{lhs_mul->GetOpcode() == IR::Opcode::BitFieldUExtract}; + if (lhs_mul->GetOpcode() != IR::Opcode::IMul32 && + lhs_mul->GetOpcode() != IR::Opcode::BitFieldUExtract) { + return; + } + IR::Inst* const lhs_bfe{lhs_mul_optional ? lhs_mul : lhs_mul->Arg(0).TryInstRecursive()}; + if (!lhs_bfe) { + return; + } + if (lhs_bfe->GetOpcode() != IR::Opcode::BitFieldUExtract) { + return; + } + if (lhs_bfe->Arg(1) != IR::Value{0U} || lhs_bfe->Arg(2) != IR::Value{16U}) { + return; + } + IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; + inst.ReplaceUsesWith(ir.BitwiseOr(IR::U32{inst.Arg(0)}, IR::U32{inst.Arg(1)})); +} + +} // Anonymous namespace + +void VendorWorkaroundPass(IR::Program& program) { + for (IR::Block* const block : program.post_order_blocks) { + for (IR::Inst& inst : block->Instructions()) { + switch (inst.GetOpcode()) { + case IR::Opcode::IAdd32: + AddingByteSwapsWorkaround(*block, inst); + break; + default: + break; + } + } + } +} + +} // namespace Shader::Optimization -- cgit v1.2.3