// Copyright 2018 yuzu Emulator Project // Licensed under GPLv2 or any later version // Refer to the license.txt file included. #include "common/assert.h" #include "common/common_types.h" #include "common/logging/log.h" #include "video_core/engines/shader_bytecode.h" #include "video_core/shader/node_helper.h" #include "video_core/shader/shader_ir.h" namespace VideoCommon::Shader { using Tegra::Shader::Instruction; using Tegra::Shader::OpCode; using Tegra::Shader::SubOp; u32 ShaderIR::DecodeArithmetic(NodeBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); Node op_a = GetRegister(instr.gpr8); Node op_b = [&] { if (instr.is_b_imm) { return GetImmediate19(instr); } else if (instr.is_b_gpr) { return GetRegister(instr.gpr20); } else { return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); } }(); switch (opcode->get().GetId()) { case OpCode::Id::MOV_C: case OpCode::Id::MOV_R: { // MOV does not have neither 'abs' nor 'neg' bits. SetRegister(bb, instr.gpr0, op_b); break; } case OpCode::Id::FMUL_C: case OpCode::Id::FMUL_R: case OpCode::Id::FMUL_IMM: { // FMUL does not have 'abs' bits and only the second operand has a 'neg' bit. if (instr.fmul.tab5cb8_2 != 0) { LOG_DEBUG(HW_GPU, "FMUL tab5cb8_2({}) is not implemented", instr.fmul.tab5cb8_2.Value()); } if (instr.fmul.tab5c68_0 != 1) { LOG_DEBUG(HW_GPU, "FMUL tab5cb8_0({}) is not implemented", instr.fmul.tab5c68_0.Value()); } op_b = GetOperandAbsNegFloat(op_b, false, instr.fmul.negate_b); static constexpr std::array FmulPostFactor = { 1.000f, // None 0.500f, // Divide 2 0.250f, // Divide 4 0.125f, // Divide 8 8.000f, // Mul 8 4.000f, // Mul 4 2.000f, // Mul 2 }; if (instr.fmul.postfactor != 0) { op_a = Operation(OperationCode::FMul, NO_PRECISE, op_a, Immediate(FmulPostFactor[instr.fmul.postfactor])); } // TODO(Rodrigo): Should precise be used when there's a postfactor? Node value = Operation(OperationCode::FMul, PRECISE, op_a, op_b); value = GetSaturatedFloat(value, instr.alu.saturate_d); SetInternalFlagsFromFloat(bb, value, instr.generates_cc); SetRegister(bb, instr.gpr0, value); break; } case OpCode::Id::FADD_C: case OpCode::Id::FADD_R: case OpCode::Id::FADD_IMM: { op_a = GetOperandAbsNegFloat(op_a, instr.alu.abs_a, instr.alu.negate_a); op_b = GetOperandAbsNegFloat(op_b, instr.alu.abs_b, instr.alu.negate_b); Node value = Operation(OperationCode::FAdd, PRECISE, op_a, op_b); value = GetSaturatedFloat(value, instr.alu.saturate_d); SetInternalFlagsFromFloat(bb, value, instr.generates_cc); SetRegister(bb, instr.gpr0, value); break; } case OpCode::Id::MUFU: { op_a = GetOperandAbsNegFloat(op_a, instr.alu.abs_a, instr.alu.negate_a); Node value = [&]() { switch (instr.sub_op) { case SubOp::Cos: return Operation(OperationCode::FCos, PRECISE, op_a); case SubOp::Sin: return Operation(OperationCode::FSin, PRECISE, op_a); case SubOp::Ex2: return Operation(OperationCode::FExp2, PRECISE, op_a); case SubOp::Lg2: return Operation(OperationCode::FLog2, PRECISE, op_a); case SubOp::Rcp: return Operation(OperationCode::FDiv, PRECISE, Immediate(1.0f), op_a); case SubOp::Rsq: return Operation(OperationCode::FInverseSqrt, PRECISE, op_a); case SubOp::Sqrt: return Operation(OperationCode::FSqrt, PRECISE, op_a); default: UNIMPLEMENTED_MSG("Unhandled MUFU sub op={0:x}", static_cast(instr.sub_op.Value())); return Immediate(0); } }(); value = GetSaturatedFloat(value, instr.alu.saturate_d); SetRegister(bb, instr.gpr0, value); break; } case OpCode::Id::FMNMX_C: case OpCode::Id::FMNMX_R: case OpCode::Id::FMNMX_IMM: { op_a = GetOperandAbsNegFloat(op_a, instr.alu.abs_a, instr.alu.negate_a); op_b = GetOperandAbsNegFloat(op_b, instr.alu.abs_b, instr.alu.negate_b); const Node condition = GetPredicate(instr.alu.fmnmx.pred, instr.alu.fmnmx.negate_pred != 0); const Node min = Operation(OperationCode::FMin, NO_PRECISE, op_a, op_b); const Node max = Operation(OperationCode::FMax, NO_PRECISE, op_a, op_b); const Node value = Operation(OperationCode::Select, NO_PRECISE, condition, min, max); SetInternalFlagsFromFloat(bb, value, instr.generates_cc); SetRegister(bb, instr.gpr0, value); break; } case OpCode::Id::FCMP_RR: case OpCode::Id::FCMP_RC: { UNIMPLEMENTED_IF(instr.fcmp.ftz == 0); Node op_c = GetRegister(instr.gpr39); Node comp = GetPredicateComparisonFloat(instr.fcmp.cond, std::move(op_c), Immediate(0.0f)); SetRegister( bb, instr.gpr0, Operation(OperationCode::Select, std::move(comp), std::move(op_a), std::move(op_b))); break; } case OpCode::Id::RRO_C: case OpCode::Id::RRO_R: case OpCode::Id::RRO_IMM: { LOG_DEBUG(HW_GPU, "(STUBBED) RRO used"); // Currently RRO is only implemented as a register move. op_b = GetOperandAbsNegFloat(op_b, instr.alu.abs_b, instr.alu.negate_b); SetRegister(bb, instr.gpr0, op_b); break; } default: UNIMPLEMENTED_MSG("Unhandled arithmetic instruction: {}", opcode->get().GetName()); } return pc; } } // namespace VideoCommon::Shader