From 9d6a98d950da39dd2a7ca5ad25525de4fb825415 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 23 Feb 2021 04:46:39 -0300 Subject: shader: Implement more of XMAD and FFMA32I and fix XMAD.CBCC --- src/shader_recompiler/backend/spirv/emit_spirv.h | 4 +- .../backend/spirv/emit_spirv_integer.cpp | 8 ++-- .../impl/floating_point_fused_multiply_add.cpp | 41 +++++++++++++----- .../translate/impl/floating_point_multiply.cpp | 2 + .../translate/impl/integer_short_multiply_add.cpp | 49 ++++++++++++++++------ 5 files changed, 76 insertions(+), 28 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index 130c71996..4b74cf04d 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -224,8 +224,8 @@ void EmitShiftRightArithmetic32(EmitContext& ctx); Id EmitBitwiseAnd32(EmitContext& ctx, Id a, Id b); Id EmitBitwiseOr32(EmitContext& ctx, Id a, Id b); Id EmitBitwiseXor32(EmitContext& ctx, Id a, Id b); -void EmitBitFieldInsert(EmitContext& ctx); -void EmitBitFieldSExtract(EmitContext& ctx); +Id EmitBitFieldInsert(EmitContext& ctx, Id base, Id insert, Id offset, Id count); +Id EmitBitFieldSExtract(EmitContext& ctx, Id base, Id offset, Id count); Id EmitBitFieldUExtract(EmitContext& ctx, Id base, Id offset, Id count); Id EmitSLessThan(EmitContext& ctx, Id lhs, Id rhs); Id EmitULessThan(EmitContext& ctx, Id lhs, Id rhs); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp index 329dcb351..8aaa0e381 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp @@ -90,12 +90,12 @@ Id EmitBitwiseXor32(EmitContext& ctx, Id a, Id b) { return ctx.OpBitwiseXor(ctx.U32[1], a, b); } -void EmitBitFieldInsert(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitBitFieldInsert(EmitContext& ctx, Id base, Id insert, Id offset, Id count) { + return ctx.OpBitFieldInsert(ctx.U32[1], base, insert, offset, count); } -void EmitBitFieldSExtract(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitBitFieldSExtract(EmitContext& ctx, Id base, Id offset, Id count) { + return ctx.OpBitFieldSExtract(ctx.U32[1], base, offset, count); } Id EmitBitFieldUExtract(EmitContext& ctx, Id base, Id offset, Id count) { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp index c2ca0873b..18561bc9c 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp @@ -17,9 +17,6 @@ void FFMA(TranslatorVisitor& v, u64 insn, const IR::F32& src_b, const IR::F32& s BitField<8, 8, IR::Reg> src_a; } const ffma{insn}; - if (sat) { - throw NotImplementedException("FFMA SAT"); - } if (cc) { throw NotImplementedException("FFMA CC"); } @@ -31,7 +28,20 @@ void FFMA(TranslatorVisitor& v, u64 insn, const IR::F32& src_b, const IR::F32& s .rounding{CastFpRounding(fp_rounding)}, .fmz_mode{CastFmzMode(fmz_mode)}, }; - v.F(ffma.dest_reg, v.ir.FPFma(op_a, op_b, op_c, fp_control)); + IR::F32 value{v.ir.FPFma(op_a, op_b, op_c, fp_control)}; + if (fmz_mode == FmzMode::FMZ && !sat) { + // Do not implement FMZ if SAT is enabled, as it does the logic for us. + // On D3D9 mode, anything * 0 is zero, even NAN and infinity + const IR::F32 zero{v.ir.Imm32(0.0f)}; + const IR::U1 zero_a{v.ir.FPEqual(op_a, zero)}; + const IR::U1 zero_b{v.ir.FPEqual(op_b, zero)}; + const IR::U1 any_zero{v.ir.LogicalOr(zero_a, zero_b)}; + value = IR::F32{v.ir.Select(any_zero, op_c, value)}; + } + if (sat) { + value = v.ir.FPSaturate(value); + } + v.F(ffma.dest_reg, value); } void FFMA(TranslatorVisitor& v, u64 insn, const IR::F32& src_b, const IR::F32& src_c) { @@ -54,20 +64,31 @@ void TranslatorVisitor::FFMA_reg(u64 insn) { FFMA(*this, insn, GetFloatReg20(insn), GetFloatReg39(insn)); } -void TranslatorVisitor::FFMA_rc(u64) { - throw NotImplementedException("FFMA (rc)"); +void TranslatorVisitor::FFMA_rc(u64 insn) { + FFMA(*this, insn, GetFloatReg39(insn), GetFloatCbuf(insn)); } void TranslatorVisitor::FFMA_cr(u64 insn) { FFMA(*this, insn, GetFloatCbuf(insn), GetFloatReg39(insn)); } -void TranslatorVisitor::FFMA_imm(u64) { - throw NotImplementedException("FFMA (imm)"); +void TranslatorVisitor::FFMA_imm(u64 insn) { + FFMA(*this, insn, GetFloatImm20(insn), GetFloatReg39(insn)); } -void TranslatorVisitor::FFMA32I(u64) { - throw NotImplementedException("FFMA32I"); +void TranslatorVisitor::FFMA32I(u64 insn) { + union { + u64 raw; + BitField<0, 8, IR::Reg> src_c; // FFMA32I mirrors the destination and addition register + BitField<52, 1, u64> cc; + BitField<53, 2, FmzMode> fmz_mode; + BitField<55, 1, u64> sat; + BitField<56, 1, u64> neg_a; + BitField<57, 1, u64> neg_c; + } const ffma32i{insn}; + + FFMA(*this, insn, GetFloatImm32(insn), F(ffma32i.src_c), ffma32i.neg_a != 0, false, + ffma32i.neg_c != 0, ffma32i.sat != 0, ffma32i.cc != 0, ffma32i.fmz_mode, FpRounding::RN); } } // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp index edf2cadae..72f0a18ae 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp @@ -94,6 +94,7 @@ void FMUL(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) { BitField<48, 1, u64> neg_b; BitField<50, 1, u64> sat; } const fmul{insn}; + FMUL(v, insn, src_b, fmul.fmz, fmul.fp_rounding, fmul.scale, fmul.sat != 0, fmul.cc != 0, fmul.neg_b != 0); } @@ -118,6 +119,7 @@ void TranslatorVisitor::FMUL32I(u64 insn) { BitField<53, 2, FmzMode> fmz; BitField<55, 1, u64> sat; } const fmul32i{insn}; + FMUL(*this, insn, GetFloatImm32(insn), fmul32i.fmz, FpRounding::RN, Scale::None, fmul32i.sat != 0, fmul32i.cc != 0, false); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_short_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_short_multiply_add.cpp index 70a7c76c5..2932cdc42 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_short_multiply_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_short_multiply_add.cpp @@ -58,7 +58,7 @@ void XMAD(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, const IR::U32& s case SelectMode::CHI: return ExtractHalf(v, src_c, Half::H1, false); case SelectMode::CBCC: - return v.ir.IAdd(v.ir.ShiftLeftLogical(src_b, v.ir.Imm32(16)), src_b); + return v.ir.IAdd(v.ir.ShiftLeftLogical(src_b, v.ir.Imm32(16)), src_c); case SelectMode::CSFU: throw NotImplementedException("XMAD CSFU"); } @@ -78,16 +78,44 @@ void XMAD(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, const IR::U32& s } } // Anonymous namespace -void TranslatorVisitor::XMAD_reg(u64) { - throw NotImplementedException("XMAD (reg)"); +void TranslatorVisitor::XMAD_reg(u64 insn) { + union { + u64 raw; + BitField<35, 1, Half> half_b; + BitField<36, 1, u64> psl; + BitField<37, 1, u64> mrg; + BitField<38, 1, u64> x; + BitField<50, 3, SelectMode> select_mode; + } const xmad{insn}; + + XMAD(*this, insn, GetReg20(insn), GetReg39(insn), xmad.select_mode, xmad.half_b, xmad.psl != 0, + xmad.mrg != 0, xmad.x != 0); } -void TranslatorVisitor::XMAD_rc(u64) { - throw NotImplementedException("XMAD (rc)"); +void TranslatorVisitor::XMAD_rc(u64 insn) { + union { + u64 raw; + BitField<50, 2, SelectMode> select_mode; + BitField<52, 1, Half> half_b; + BitField<54, 1, u64> x; + } const xmad{insn}; + + XMAD(*this, insn, GetReg39(insn), GetCbuf(insn), xmad.select_mode, xmad.half_b, false, false, + xmad.x != 0); } -void TranslatorVisitor::XMAD_cr(u64) { - throw NotImplementedException("XMAD (cr)"); +void TranslatorVisitor::XMAD_cr(u64 insn) { + union { + u64 raw; + BitField<50, 2, SelectMode> select_mode; + BitField<52, 1, Half> half_b; + BitField<54, 1, u64> x; + BitField<55, 1, u64> psl; + BitField<56, 1, u64> mrg; + } const xmad{insn}; + + XMAD(*this, insn, GetCbuf(insn), GetReg39(insn), xmad.select_mode, xmad.half_b, xmad.psl != 0, + xmad.mrg != 0, xmad.x != 0); } void TranslatorVisitor::XMAD_imm(u64 insn) { @@ -97,14 +125,11 @@ void TranslatorVisitor::XMAD_imm(u64 insn) { BitField<36, 1, u64> psl; BitField<37, 1, u64> mrg; BitField<38, 1, u64> x; - BitField<39, 8, IR::Reg> src_c; BitField<50, 3, SelectMode> select_mode; } const xmad{insn}; - const IR::U32 src_b{ir.Imm32(static_cast(xmad.src_b))}; - const IR::U32 src_c{X(xmad.src_c)}; - XMAD(*this, insn, src_b, src_c, xmad.select_mode, Half::H0, xmad.psl != 0, xmad.mrg != 0, - xmad.x != 0); + XMAD(*this, insn, ir.Imm32(static_cast(xmad.src_b)), GetReg39(insn), xmad.select_mode, + Half::H0, xmad.psl != 0, xmad.mrg != 0, xmad.x != 0); } } // namespace Shader::Maxwell -- cgit v1.2.3