From 15a0e1481d9a1efb3e3aa61cbaf2fa1ba0392d71 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 20 Dec 2018 19:09:21 -0300 Subject: shader_ir: Initial implementation --- src/video_core/shader/decode/arithmetic.cpp | 24 ++++++++++++++++++++++ src/video_core/shader/decode/arithmetic_half.cpp | 24 ++++++++++++++++++++++ .../shader/decode/arithmetic_half_immediate.cpp | 24 ++++++++++++++++++++++ .../shader/decode/arithmetic_immediate.cpp | 24 ++++++++++++++++++++++ .../shader/decode/arithmetic_integer.cpp | 24 ++++++++++++++++++++++ .../shader/decode/arithmetic_integer_immediate.cpp | 24 ++++++++++++++++++++++ src/video_core/shader/decode/bfe.cpp | 24 ++++++++++++++++++++++ src/video_core/shader/decode/bfi.cpp | 24 ++++++++++++++++++++++ src/video_core/shader/decode/conversion.cpp | 24 ++++++++++++++++++++++ .../shader/decode/decode_integer_set.cpp | 0 src/video_core/shader/decode/ffma.cpp | 24 ++++++++++++++++++++++ src/video_core/shader/decode/float_set.cpp | 24 ++++++++++++++++++++++ .../shader/decode/float_set_predicate.cpp | 24 ++++++++++++++++++++++ src/video_core/shader/decode/half_set.cpp | 24 ++++++++++++++++++++++ .../shader/decode/half_set_predicate.cpp | 24 ++++++++++++++++++++++ src/video_core/shader/decode/hfma2.cpp | 24 ++++++++++++++++++++++ src/video_core/shader/decode/integer_set.cpp | 24 ++++++++++++++++++++++ .../shader/decode/integer_set_predicate.cpp | 24 ++++++++++++++++++++++ src/video_core/shader/decode/memory.cpp | 24 ++++++++++++++++++++++ src/video_core/shader/decode/other.cpp | 24 ++++++++++++++++++++++ .../shader/decode/predicate_set_predicate.cpp | 24 ++++++++++++++++++++++ .../shader/decode/predicate_set_register.cpp | 24 ++++++++++++++++++++++ .../shader/decode/register_set_predicate.cpp | 24 ++++++++++++++++++++++ src/video_core/shader/decode/shift.cpp | 24 ++++++++++++++++++++++ src/video_core/shader/decode/xmad.cpp | 24 ++++++++++++++++++++++ 25 files changed, 576 insertions(+) create mode 100644 src/video_core/shader/decode/arithmetic.cpp create mode 100644 src/video_core/shader/decode/arithmetic_half.cpp create mode 100644 src/video_core/shader/decode/arithmetic_half_immediate.cpp create mode 100644 src/video_core/shader/decode/arithmetic_immediate.cpp create mode 100644 src/video_core/shader/decode/arithmetic_integer.cpp create mode 100644 src/video_core/shader/decode/arithmetic_integer_immediate.cpp create mode 100644 src/video_core/shader/decode/bfe.cpp create mode 100644 src/video_core/shader/decode/bfi.cpp create mode 100644 src/video_core/shader/decode/conversion.cpp create mode 100644 src/video_core/shader/decode/decode_integer_set.cpp create mode 100644 src/video_core/shader/decode/ffma.cpp create mode 100644 src/video_core/shader/decode/float_set.cpp create mode 100644 src/video_core/shader/decode/float_set_predicate.cpp create mode 100644 src/video_core/shader/decode/half_set.cpp create mode 100644 src/video_core/shader/decode/half_set_predicate.cpp create mode 100644 src/video_core/shader/decode/hfma2.cpp create mode 100644 src/video_core/shader/decode/integer_set.cpp create mode 100644 src/video_core/shader/decode/integer_set_predicate.cpp create mode 100644 src/video_core/shader/decode/memory.cpp create mode 100644 src/video_core/shader/decode/other.cpp create mode 100644 src/video_core/shader/decode/predicate_set_predicate.cpp create mode 100644 src/video_core/shader/decode/predicate_set_register.cpp create mode 100644 src/video_core/shader/decode/register_set_predicate.cpp create mode 100644 src/video_core/shader/decode/shift.cpp create mode 100644 src/video_core/shader/decode/xmad.cpp (limited to 'src/video_core/shader/decode') diff --git a/src/video_core/shader/decode/arithmetic.cpp b/src/video_core/shader/decode/arithmetic.cpp new file mode 100644 index 000000000..9242a7389 --- /dev/null +++ b/src/video_core/shader/decode/arithmetic.cpp @@ -0,0 +1,24 @@ +// Copyright 2018 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/assert.h" +#include "common/common_types.h" +#include "video_core/engines/shader_bytecode.h" +#include "video_core/shader/shader_ir.h" + +namespace VideoCommon::Shader { + +using Tegra::Shader::Instruction; +using Tegra::Shader::OpCode; + +u32 ShaderIR::DecodeArithmetic(BasicBlock& bb, u32 pc) { + const Instruction instr = {program_code[pc]}; + const auto opcode = OpCode::Decode(instr); + + UNIMPLEMENTED(); + + return pc; +} + +} // namespace VideoCommon::Shader \ No newline at end of file diff --git a/src/video_core/shader/decode/arithmetic_half.cpp b/src/video_core/shader/decode/arithmetic_half.cpp new file mode 100644 index 000000000..3b189b0d1 --- /dev/null +++ b/src/video_core/shader/decode/arithmetic_half.cpp @@ -0,0 +1,24 @@ +// Copyright 2018 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/assert.h" +#include "common/common_types.h" +#include "video_core/engines/shader_bytecode.h" +#include "video_core/shader/shader_ir.h" + +namespace VideoCommon::Shader { + +using Tegra::Shader::Instruction; +using Tegra::Shader::OpCode; + +u32 ShaderIR::DecodeArithmeticHalf(BasicBlock& bb, u32 pc) { + const Instruction instr = {program_code[pc]}; + const auto opcode = OpCode::Decode(instr); + + UNIMPLEMENTED(); + + return pc; +} + +} // namespace VideoCommon::Shader \ No newline at end of file diff --git a/src/video_core/shader/decode/arithmetic_half_immediate.cpp b/src/video_core/shader/decode/arithmetic_half_immediate.cpp new file mode 100644 index 000000000..8d8a2dad9 --- /dev/null +++ b/src/video_core/shader/decode/arithmetic_half_immediate.cpp @@ -0,0 +1,24 @@ +// Copyright 2018 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/assert.h" +#include "common/common_types.h" +#include "video_core/engines/shader_bytecode.h" +#include "video_core/shader/shader_ir.h" + +namespace VideoCommon::Shader { + +using Tegra::Shader::Instruction; +using Tegra::Shader::OpCode; + +u32 ShaderIR::DecodeArithmeticHalfImmediate(BasicBlock& bb, u32 pc) { + const Instruction instr = {program_code[pc]}; + const auto opcode = OpCode::Decode(instr); + + UNIMPLEMENTED(); + + return pc; +} + +} // namespace VideoCommon::Shader \ No newline at end of file diff --git a/src/video_core/shader/decode/arithmetic_immediate.cpp b/src/video_core/shader/decode/arithmetic_immediate.cpp new file mode 100644 index 000000000..18fd2082e --- /dev/null +++ b/src/video_core/shader/decode/arithmetic_immediate.cpp @@ -0,0 +1,24 @@ +// Copyright 2018 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/assert.h" +#include "common/common_types.h" +#include "video_core/engines/shader_bytecode.h" +#include "video_core/shader/shader_ir.h" + +namespace VideoCommon::Shader { + +using Tegra::Shader::Instruction; +using Tegra::Shader::OpCode; + +u32 ShaderIR::DecodeArithmeticImmediate(BasicBlock& bb, u32 pc) { + const Instruction instr = {program_code[pc]}; + const auto opcode = OpCode::Decode(instr); + + UNIMPLEMENTED(); + + return pc; +} + +} // namespace VideoCommon::Shader \ No newline at end of file diff --git a/src/video_core/shader/decode/arithmetic_integer.cpp b/src/video_core/shader/decode/arithmetic_integer.cpp new file mode 100644 index 000000000..12c64e97a --- /dev/null +++ b/src/video_core/shader/decode/arithmetic_integer.cpp @@ -0,0 +1,24 @@ +// Copyright 2018 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/assert.h" +#include "common/common_types.h" +#include "video_core/engines/shader_bytecode.h" +#include "video_core/shader/shader_ir.h" + +namespace VideoCommon::Shader { + +using Tegra::Shader::Instruction; +using Tegra::Shader::OpCode; + +u32 ShaderIR::DecodeArithmeticInteger(BasicBlock& bb, u32 pc) { + const Instruction instr = {program_code[pc]}; + const auto opcode = OpCode::Decode(instr); + + UNIMPLEMENTED(); + + return pc; +} + +} // namespace VideoCommon::Shader \ No newline at end of file diff --git a/src/video_core/shader/decode/arithmetic_integer_immediate.cpp b/src/video_core/shader/decode/arithmetic_integer_immediate.cpp new file mode 100644 index 000000000..46f340235 --- /dev/null +++ b/src/video_core/shader/decode/arithmetic_integer_immediate.cpp @@ -0,0 +1,24 @@ +// Copyright 2018 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/assert.h" +#include "common/common_types.h" +#include "video_core/engines/shader_bytecode.h" +#include "video_core/shader/shader_ir.h" + +namespace VideoCommon::Shader { + +using Tegra::Shader::Instruction; +using Tegra::Shader::OpCode; + +u32 ShaderIR::DecodeArithmeticIntegerImmediate(BasicBlock& bb, u32 pc) { + const Instruction instr = {program_code[pc]}; + const auto opcode = OpCode::Decode(instr); + + UNIMPLEMENTED(); + + return pc; +} + +} // namespace VideoCommon::Shader \ No newline at end of file diff --git a/src/video_core/shader/decode/bfe.cpp b/src/video_core/shader/decode/bfe.cpp new file mode 100644 index 000000000..ffd904c54 --- /dev/null +++ b/src/video_core/shader/decode/bfe.cpp @@ -0,0 +1,24 @@ +// Copyright 2018 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/assert.h" +#include "common/common_types.h" +#include "video_core/engines/shader_bytecode.h" +#include "video_core/shader/shader_ir.h" + +namespace VideoCommon::Shader { + +using Tegra::Shader::Instruction; +using Tegra::Shader::OpCode; + +u32 ShaderIR::DecodeBfe(BasicBlock& bb, u32 pc) { + const Instruction instr = {program_code[pc]}; + const auto opcode = OpCode::Decode(instr); + + UNIMPLEMENTED(); + + return pc; +} + +} // namespace VideoCommon::Shader \ No newline at end of file diff --git a/src/video_core/shader/decode/bfi.cpp b/src/video_core/shader/decode/bfi.cpp new file mode 100644 index 000000000..b94d46ce6 --- /dev/null +++ b/src/video_core/shader/decode/bfi.cpp @@ -0,0 +1,24 @@ +// Copyright 2018 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/assert.h" +#include "common/common_types.h" +#include "video_core/engines/shader_bytecode.h" +#include "video_core/shader/shader_ir.h" + +namespace VideoCommon::Shader { + +using Tegra::Shader::Instruction; +using Tegra::Shader::OpCode; + +u32 ShaderIR::DecodeBfi(BasicBlock& bb, u32 pc) { + const Instruction instr = {program_code[pc]}; + const auto opcode = OpCode::Decode(instr); + + UNIMPLEMENTED(); + + return pc; +} + +} // namespace VideoCommon::Shader \ No newline at end of file diff --git a/src/video_core/shader/decode/conversion.cpp b/src/video_core/shader/decode/conversion.cpp new file mode 100644 index 000000000..c6eb2952c --- /dev/null +++ b/src/video_core/shader/decode/conversion.cpp @@ -0,0 +1,24 @@ +// Copyright 2018 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/assert.h" +#include "common/common_types.h" +#include "video_core/engines/shader_bytecode.h" +#include "video_core/shader/shader_ir.h" + +namespace VideoCommon::Shader { + +using Tegra::Shader::Instruction; +using Tegra::Shader::OpCode; + +u32 ShaderIR::DecodeConversion(BasicBlock& bb, u32 pc) { + const Instruction instr = {program_code[pc]}; + const auto opcode = OpCode::Decode(instr); + + UNIMPLEMENTED(); + + return pc; +} + +} // namespace VideoCommon::Shader \ No newline at end of file diff --git a/src/video_core/shader/decode/decode_integer_set.cpp b/src/video_core/shader/decode/decode_integer_set.cpp new file mode 100644 index 000000000..e69de29bb diff --git a/src/video_core/shader/decode/ffma.cpp b/src/video_core/shader/decode/ffma.cpp new file mode 100644 index 000000000..2044113f0 --- /dev/null +++ b/src/video_core/shader/decode/ffma.cpp @@ -0,0 +1,24 @@ +// Copyright 2018 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/assert.h" +#include "common/common_types.h" +#include "video_core/engines/shader_bytecode.h" +#include "video_core/shader/shader_ir.h" + +namespace VideoCommon::Shader { + +using Tegra::Shader::Instruction; +using Tegra::Shader::OpCode; + +u32 ShaderIR::DecodeFfma(BasicBlock& bb, u32 pc) { + const Instruction instr = {program_code[pc]}; + const auto opcode = OpCode::Decode(instr); + + UNIMPLEMENTED(); + + return pc; +} + +} // namespace VideoCommon::Shader \ No newline at end of file diff --git a/src/video_core/shader/decode/float_set.cpp b/src/video_core/shader/decode/float_set.cpp new file mode 100644 index 000000000..17d47c17a --- /dev/null +++ b/src/video_core/shader/decode/float_set.cpp @@ -0,0 +1,24 @@ +// Copyright 2018 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/assert.h" +#include "common/common_types.h" +#include "video_core/engines/shader_bytecode.h" +#include "video_core/shader/shader_ir.h" + +namespace VideoCommon::Shader { + +using Tegra::Shader::Instruction; +using Tegra::Shader::OpCode; + +u32 ShaderIR::DecodeFloatSet(BasicBlock& bb, u32 pc) { + const Instruction instr = {program_code[pc]}; + const auto opcode = OpCode::Decode(instr); + + UNIMPLEMENTED(); + + return pc; +} + +} // namespace VideoCommon::Shader \ No newline at end of file diff --git a/src/video_core/shader/decode/float_set_predicate.cpp b/src/video_core/shader/decode/float_set_predicate.cpp new file mode 100644 index 000000000..1dbe34353 --- /dev/null +++ b/src/video_core/shader/decode/float_set_predicate.cpp @@ -0,0 +1,24 @@ +// Copyright 2018 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/assert.h" +#include "common/common_types.h" +#include "video_core/engines/shader_bytecode.h" +#include "video_core/shader/shader_ir.h" + +namespace VideoCommon::Shader { + +using Tegra::Shader::Instruction; +using Tegra::Shader::OpCode; + +u32 ShaderIR::DecodeFloatSetPredicate(BasicBlock& bb, u32 pc) { + const Instruction instr = {program_code[pc]}; + const auto opcode = OpCode::Decode(instr); + + UNIMPLEMENTED(); + + return pc; +} + +} // namespace VideoCommon::Shader \ No newline at end of file diff --git a/src/video_core/shader/decode/half_set.cpp b/src/video_core/shader/decode/half_set.cpp new file mode 100644 index 000000000..af363d5d2 --- /dev/null +++ b/src/video_core/shader/decode/half_set.cpp @@ -0,0 +1,24 @@ +// Copyright 2018 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/assert.h" +#include "common/common_types.h" +#include "video_core/engines/shader_bytecode.h" +#include "video_core/shader/shader_ir.h" + +namespace VideoCommon::Shader { + +using Tegra::Shader::Instruction; +using Tegra::Shader::OpCode; + +u32 ShaderIR::DecodeHalfSet(BasicBlock& bb, u32 pc) { + const Instruction instr = {program_code[pc]}; + const auto opcode = OpCode::Decode(instr); + + UNIMPLEMENTED(); + + return pc; +} + +} // namespace VideoCommon::Shader \ No newline at end of file diff --git a/src/video_core/shader/decode/half_set_predicate.cpp b/src/video_core/shader/decode/half_set_predicate.cpp new file mode 100644 index 000000000..5fe123ea5 --- /dev/null +++ b/src/video_core/shader/decode/half_set_predicate.cpp @@ -0,0 +1,24 @@ +// Copyright 2018 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/assert.h" +#include "common/common_types.h" +#include "video_core/engines/shader_bytecode.h" +#include "video_core/shader/shader_ir.h" + +namespace VideoCommon::Shader { + +using Tegra::Shader::Instruction; +using Tegra::Shader::OpCode; + +u32 ShaderIR::DecodeHalfSetPredicate(BasicBlock& bb, u32 pc) { + const Instruction instr = {program_code[pc]}; + const auto opcode = OpCode::Decode(instr); + + UNIMPLEMENTED(); + + return pc; +} + +} // namespace VideoCommon::Shader \ No newline at end of file diff --git a/src/video_core/shader/decode/hfma2.cpp b/src/video_core/shader/decode/hfma2.cpp new file mode 100644 index 000000000..5ce08481e --- /dev/null +++ b/src/video_core/shader/decode/hfma2.cpp @@ -0,0 +1,24 @@ +// Copyright 2018 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/assert.h" +#include "common/common_types.h" +#include "video_core/engines/shader_bytecode.h" +#include "video_core/shader/shader_ir.h" + +namespace VideoCommon::Shader { + +using Tegra::Shader::Instruction; +using Tegra::Shader::OpCode; + +u32 ShaderIR::DecodeHfma2(BasicBlock& bb, u32 pc) { + const Instruction instr = {program_code[pc]}; + const auto opcode = OpCode::Decode(instr); + + UNIMPLEMENTED(); + + return pc; +} + +} // namespace VideoCommon::Shader \ No newline at end of file diff --git a/src/video_core/shader/decode/integer_set.cpp b/src/video_core/shader/decode/integer_set.cpp new file mode 100644 index 000000000..316a7d8ad --- /dev/null +++ b/src/video_core/shader/decode/integer_set.cpp @@ -0,0 +1,24 @@ +// Copyright 2018 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/assert.h" +#include "common/common_types.h" +#include "video_core/engines/shader_bytecode.h" +#include "video_core/shader/shader_ir.h" + +namespace VideoCommon::Shader { + +using Tegra::Shader::Instruction; +using Tegra::Shader::OpCode; + +u32 ShaderIR::DecodeIntegerSet(BasicBlock& bb, u32 pc) { + const Instruction instr = {program_code[pc]}; + const auto opcode = OpCode::Decode(instr); + + UNIMPLEMENTED(); + + return pc; +} + +} // namespace VideoCommon::Shader \ No newline at end of file diff --git a/src/video_core/shader/decode/integer_set_predicate.cpp b/src/video_core/shader/decode/integer_set_predicate.cpp new file mode 100644 index 000000000..10975c394 --- /dev/null +++ b/src/video_core/shader/decode/integer_set_predicate.cpp @@ -0,0 +1,24 @@ +// Copyright 2018 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/assert.h" +#include "common/common_types.h" +#include "video_core/engines/shader_bytecode.h" +#include "video_core/shader/shader_ir.h" + +namespace VideoCommon::Shader { + +using Tegra::Shader::Instruction; +using Tegra::Shader::OpCode; + +u32 ShaderIR::DecodeIntegerSetPredicate(BasicBlock& bb, u32 pc) { + const Instruction instr = {program_code[pc]}; + const auto opcode = OpCode::Decode(instr); + + UNIMPLEMENTED(); + + return pc; +} + +} // namespace VideoCommon::Shader \ No newline at end of file diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp new file mode 100644 index 000000000..d6086004b --- /dev/null +++ b/src/video_core/shader/decode/memory.cpp @@ -0,0 +1,24 @@ +// Copyright 2018 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/assert.h" +#include "common/common_types.h" +#include "video_core/engines/shader_bytecode.h" +#include "video_core/shader/shader_ir.h" + +namespace VideoCommon::Shader { + +using Tegra::Shader::Instruction; +using Tegra::Shader::OpCode; + +u32 ShaderIR::DecodeMemory(BasicBlock& bb, u32 pc) { + const Instruction instr = {program_code[pc]}; + const auto opcode = OpCode::Decode(instr); + + UNIMPLEMENTED(); + + return pc; +} + +} // namespace VideoCommon::Shader \ No newline at end of file diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp new file mode 100644 index 000000000..d84702a4f --- /dev/null +++ b/src/video_core/shader/decode/other.cpp @@ -0,0 +1,24 @@ +// Copyright 2018 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/assert.h" +#include "common/common_types.h" +#include "video_core/engines/shader_bytecode.h" +#include "video_core/shader/shader_ir.h" + +namespace VideoCommon::Shader { + +using Tegra::Shader::Instruction; +using Tegra::Shader::OpCode; + +u32 ShaderIR::DecodeOther(BasicBlock& bb, u32 pc) { + const Instruction instr = {program_code[pc]}; + const auto opcode = OpCode::Decode(instr); + + UNIMPLEMENTED(); + + return pc; +} + +} // namespace VideoCommon::Shader \ No newline at end of file diff --git a/src/video_core/shader/decode/predicate_set_predicate.cpp b/src/video_core/shader/decode/predicate_set_predicate.cpp new file mode 100644 index 000000000..1ad853fda --- /dev/null +++ b/src/video_core/shader/decode/predicate_set_predicate.cpp @@ -0,0 +1,24 @@ +// Copyright 2018 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/assert.h" +#include "common/common_types.h" +#include "video_core/engines/shader_bytecode.h" +#include "video_core/shader/shader_ir.h" + +namespace VideoCommon::Shader { + +using Tegra::Shader::Instruction; +using Tegra::Shader::OpCode; + +u32 ShaderIR::DecodePredicateSetPredicate(BasicBlock& bb, u32 pc) { + const Instruction instr = {program_code[pc]}; + const auto opcode = OpCode::Decode(instr); + + UNIMPLEMENTED(); + + return pc; +} + +} // namespace VideoCommon::Shader \ No newline at end of file diff --git a/src/video_core/shader/decode/predicate_set_register.cpp b/src/video_core/shader/decode/predicate_set_register.cpp new file mode 100644 index 000000000..67a06b5b4 --- /dev/null +++ b/src/video_core/shader/decode/predicate_set_register.cpp @@ -0,0 +1,24 @@ +// Copyright 2018 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/assert.h" +#include "common/common_types.h" +#include "video_core/engines/shader_bytecode.h" +#include "video_core/shader/shader_ir.h" + +namespace VideoCommon::Shader { + +using Tegra::Shader::Instruction; +using Tegra::Shader::OpCode; + +u32 ShaderIR::DecodePredicateSetRegister(BasicBlock& bb, u32 pc) { + const Instruction instr = {program_code[pc]}; + const auto opcode = OpCode::Decode(instr); + + UNIMPLEMENTED(); + + return pc; +} + +} // namespace VideoCommon::Shader \ No newline at end of file diff --git a/src/video_core/shader/decode/register_set_predicate.cpp b/src/video_core/shader/decode/register_set_predicate.cpp new file mode 100644 index 000000000..29a348cf5 --- /dev/null +++ b/src/video_core/shader/decode/register_set_predicate.cpp @@ -0,0 +1,24 @@ +// Copyright 2018 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/assert.h" +#include "common/common_types.h" +#include "video_core/engines/shader_bytecode.h" +#include "video_core/shader/shader_ir.h" + +namespace VideoCommon::Shader { + +using Tegra::Shader::Instruction; +using Tegra::Shader::OpCode; + +u32 ShaderIR::DecodeRegisterSetPredicate(BasicBlock& bb, u32 pc) { + const Instruction instr = {program_code[pc]}; + const auto opcode = OpCode::Decode(instr); + + UNIMPLEMENTED(); + + return pc; +} + +} // namespace VideoCommon::Shader \ No newline at end of file diff --git a/src/video_core/shader/decode/shift.cpp b/src/video_core/shader/decode/shift.cpp new file mode 100644 index 000000000..41f5b8cb0 --- /dev/null +++ b/src/video_core/shader/decode/shift.cpp @@ -0,0 +1,24 @@ +// Copyright 2018 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/assert.h" +#include "common/common_types.h" +#include "video_core/engines/shader_bytecode.h" +#include "video_core/shader/shader_ir.h" + +namespace VideoCommon::Shader { + +using Tegra::Shader::Instruction; +using Tegra::Shader::OpCode; + +u32 ShaderIR::DecodeShift(BasicBlock& bb, u32 pc) { + const Instruction instr = {program_code[pc]}; + const auto opcode = OpCode::Decode(instr); + + UNIMPLEMENTED(); + + return pc; +} + +} // namespace VideoCommon::Shader \ No newline at end of file diff --git a/src/video_core/shader/decode/xmad.cpp b/src/video_core/shader/decode/xmad.cpp new file mode 100644 index 000000000..27a2fc05d --- /dev/null +++ b/src/video_core/shader/decode/xmad.cpp @@ -0,0 +1,24 @@ +// Copyright 2018 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/assert.h" +#include "common/common_types.h" +#include "video_core/engines/shader_bytecode.h" +#include "video_core/shader/shader_ir.h" + +namespace VideoCommon::Shader { + +using Tegra::Shader::Instruction; +using Tegra::Shader::OpCode; + +u32 ShaderIR::DecodeXmad(BasicBlock& bb, u32 pc) { + const Instruction instr = {program_code[pc]}; + const auto opcode = OpCode::Decode(instr); + + UNIMPLEMENTED(); + + return pc; +} + +} // namespace VideoCommon::Shader \ No newline at end of file -- cgit v1.2.3 From 4c70d5b8eb68a61f5504a05dd597ecb2b04441b5 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 20 Dec 2018 23:53:50 -0300 Subject: shader_decode: Implement MOV_C and MOV_R --- src/video_core/shader/decode/arithmetic.cpp | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) (limited to 'src/video_core/shader/decode') diff --git a/src/video_core/shader/decode/arithmetic.cpp b/src/video_core/shader/decode/arithmetic.cpp index 9242a7389..c297f729e 100644 --- a/src/video_core/shader/decode/arithmetic.cpp +++ b/src/video_core/shader/decode/arithmetic.cpp @@ -11,12 +11,34 @@ namespace VideoCommon::Shader { using Tegra::Shader::Instruction; using Tegra::Shader::OpCode; +using Tegra::Shader::SubOp; u32 ShaderIR::DecodeArithmetic(BasicBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); - UNIMPLEMENTED(); + Node op_a = GetRegister(instr.gpr8); + + Node op_b = [&]() -> Node { + if (instr.is_b_imm) { + return GetImmediate19(instr); + } else if (instr.is_b_gpr) { + return GetRegister(instr.gpr20); + } else { + return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset); + } + }(); + + switch (opcode->get().GetId()) { + case OpCode::Id::MOV_C: + case OpCode::Id::MOV_R: { + // MOV does not have neither 'abs' nor 'neg' bits. + SetRegister(bb, instr.gpr0, op_b); + break; + } + default: + UNIMPLEMENTED_MSG("Unhandled arithmetic instruction: {}", opcode->get().GetName()); + } return pc; } -- cgit v1.2.3 From 7c192ec43fb6a08baea5d55aa47fcf3fa98d4343 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 20 Dec 2018 23:54:47 -0300 Subject: shader_decode: Implement FMUL_C, FMUL_R and FMUL_IMM --- src/video_core/shader/decode/arithmetic.cpp | 42 +++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) (limited to 'src/video_core/shader/decode') diff --git a/src/video_core/shader/decode/arithmetic.cpp b/src/video_core/shader/decode/arithmetic.cpp index c297f729e..78bca79e3 100644 --- a/src/video_core/shader/decode/arithmetic.cpp +++ b/src/video_core/shader/decode/arithmetic.cpp @@ -36,6 +36,48 @@ u32 ShaderIR::DecodeArithmetic(BasicBlock& bb, u32 pc) { SetRegister(bb, instr.gpr0, op_b); break; } + case OpCode::Id::FMUL_C: + case OpCode::Id::FMUL_R: + case OpCode::Id::FMUL_IMM: { + // FMUL does not have 'abs' bits and only the second operand has a 'neg' bit. + UNIMPLEMENTED_IF_MSG(instr.fmul.tab5cb8_2 != 0, "FMUL tab5cb8_2({}) is not implemented", + instr.fmul.tab5cb8_2.Value()); + UNIMPLEMENTED_IF_MSG( + instr.fmul.tab5c68_0 != 1, "FMUL tab5cb8_0({}) is not implemented", + instr.fmul.tab5c68_0.Value()); // SMO typical sends 1 here which seems to be the default + UNIMPLEMENTED_IF_MSG(instr.generates_cc, + "Condition codes generation in FMUL is not implemented"); + + op_b = GetOperandAbsNegFloat(op_b, false, instr.fmul.negate_b); + + // TODO(Rodrigo): Should precise be used when there's a postfactor? + Node value = Operation(OperationCode::FMul, PRECISE, op_a, op_b); + + if (instr.fmul.postfactor != 0) { + auto postfactor = static_cast(instr.fmul.postfactor); + + // Postfactor encoded as 3-bit 1's complement in instruction, interpreted with below + // logic. + if (postfactor >= 4) { + postfactor = 7 - postfactor; + } else { + postfactor = 0 - postfactor; + } + + if (postfactor > 0) { + value = Operation(OperationCode::FMul, NO_PRECISE, value, + Immediate(static_cast(1 << postfactor))); + } else { + value = Operation(OperationCode::FDiv, NO_PRECISE, value, + Immediate(static_cast(1 << -postfactor))); + } + } + + value = GetSaturatedFloat(value, instr.alu.saturate_d); + + SetRegister(bb, instr.gpr0, value); + break; + } default: UNIMPLEMENTED_MSG("Unhandled arithmetic instruction: {}", opcode->get().GetName()); } -- cgit v1.2.3 From 4ccaa1402d376af14d8527c0a0bcc77be007bd3c Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 20 Dec 2018 23:55:19 -0300 Subject: shader_decode: Implement FADD_C, FADD_R and FADD_IMM --- src/video_core/shader/decode/arithmetic.cpp | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'src/video_core/shader/decode') diff --git a/src/video_core/shader/decode/arithmetic.cpp b/src/video_core/shader/decode/arithmetic.cpp index 78bca79e3..d196d94b5 100644 --- a/src/video_core/shader/decode/arithmetic.cpp +++ b/src/video_core/shader/decode/arithmetic.cpp @@ -78,6 +78,21 @@ u32 ShaderIR::DecodeArithmetic(BasicBlock& bb, u32 pc) { SetRegister(bb, instr.gpr0, value); break; } + case OpCode::Id::FADD_C: + case OpCode::Id::FADD_R: + case OpCode::Id::FADD_IMM: { + UNIMPLEMENTED_IF_MSG(instr.generates_cc, + "Condition codes generation in FADD is not implemented"); + + op_a = GetOperandAbsNegFloat(op_a, instr.alu.abs_a, instr.alu.negate_a); + op_b = GetOperandAbsNegFloat(op_b, instr.alu.abs_b, instr.alu.negate_b); + + Node value = Operation(OperationCode::FAdd, PRECISE, op_a, op_b); + value = GetSaturatedFloat(value, instr.alu.saturate_d); + + SetRegister(bb, instr.gpr0, value); + break; + } default: UNIMPLEMENTED_MSG("Unhandled arithmetic instruction: {}", opcode->get().GetName()); } -- cgit v1.2.3 From 964ddeeb90b655d8b5558002db7c780c0394263c Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 20 Dec 2018 23:56:21 -0300 Subject: shader_decode: Implement MUFU --- src/video_core/shader/decode/arithmetic.cpp | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) (limited to 'src/video_core/shader/decode') diff --git a/src/video_core/shader/decode/arithmetic.cpp b/src/video_core/shader/decode/arithmetic.cpp index d196d94b5..fb688c324 100644 --- a/src/video_core/shader/decode/arithmetic.cpp +++ b/src/video_core/shader/decode/arithmetic.cpp @@ -93,6 +93,35 @@ u32 ShaderIR::DecodeArithmetic(BasicBlock& bb, u32 pc) { SetRegister(bb, instr.gpr0, value); break; } + case OpCode::Id::MUFU: { + op_a = GetOperandAbsNegFloat(op_a, instr.alu.abs_a, instr.alu.negate_a); + + Node value = [&]() { + switch (instr.sub_op) { + case SubOp::Cos: + return Operation(OperationCode::FCos, PRECISE, op_a); + case SubOp::Sin: + return Operation(OperationCode::FSin, PRECISE, op_a); + case SubOp::Ex2: + return Operation(OperationCode::FExp2, PRECISE, op_a); + case SubOp::Lg2: + return Operation(OperationCode::FLog2, PRECISE, op_a); + case SubOp::Rcp: + return Operation(OperationCode::FDiv, PRECISE, Immediate(1.0f), op_a); + case SubOp::Rsq: + return Operation(OperationCode::FInverseSqrt, PRECISE, op_a); + case SubOp::Sqrt: + return Operation(OperationCode::FSqrt, PRECISE, op_a); + default: + UNIMPLEMENTED_MSG("Unhandled MUFU sub op={0:x}", + static_cast(instr.sub_op.Value())); + } + }(); + value = GetSaturatedFloat(value, instr.alu.saturate_d); + + SetRegister(bb, instr.gpr0, value); + break; + } default: UNIMPLEMENTED_MSG("Unhandled arithmetic instruction: {}", opcode->get().GetName()); } -- cgit v1.2.3 From 5e6a0a08c14df8e1993f4f72b1bbfd388a5ea48e Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 20 Dec 2018 23:56:45 -0300 Subject: shader_decode: Implement FMNMX_C, FMNMX_R and FMNMX_IMM --- src/video_core/shader/decode/arithmetic.cpp | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'src/video_core/shader/decode') diff --git a/src/video_core/shader/decode/arithmetic.cpp b/src/video_core/shader/decode/arithmetic.cpp index fb688c324..0b6654397 100644 --- a/src/video_core/shader/decode/arithmetic.cpp +++ b/src/video_core/shader/decode/arithmetic.cpp @@ -122,6 +122,24 @@ u32 ShaderIR::DecodeArithmetic(BasicBlock& bb, u32 pc) { SetRegister(bb, instr.gpr0, value); break; } + case OpCode::Id::FMNMX_C: + case OpCode::Id::FMNMX_R: + case OpCode::Id::FMNMX_IMM: { + UNIMPLEMENTED_IF_MSG(instr.generates_cc, + "Condition codes generation in FMNMX is not implemented"); + + op_a = GetOperandAbsNegFloat(op_a, instr.alu.abs_a, instr.alu.negate_a); + op_b = GetOperandAbsNegFloat(op_b, instr.alu.abs_b, instr.alu.negate_b); + + const Node condition = GetPredicate(instr.alu.fmnmx.pred, instr.alu.fmnmx.negate_pred != 0); + + const Node min = Operation(OperationCode::FMin, NO_PRECISE, op_a, op_b); + const Node max = Operation(OperationCode::FMax, NO_PRECISE, op_a, op_b); + + SetRegister(bb, instr.gpr0, + Operation(OperationCode::Select, NO_PRECISE, condition, min, max)); + break; + } default: UNIMPLEMENTED_MSG("Unhandled arithmetic instruction: {}", opcode->get().GetName()); } -- cgit v1.2.3 From 06cb910c6d9b0be664db4305f90974198f84ae98 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 20 Dec 2018 23:57:09 -0300 Subject: shader_decode: Stub RRO_C, RRO_R and RRO_IMM --- src/video_core/shader/decode/arithmetic.cpp | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'src/video_core/shader/decode') diff --git a/src/video_core/shader/decode/arithmetic.cpp b/src/video_core/shader/decode/arithmetic.cpp index 0b6654397..9f8c27b3e 100644 --- a/src/video_core/shader/decode/arithmetic.cpp +++ b/src/video_core/shader/decode/arithmetic.cpp @@ -140,6 +140,15 @@ u32 ShaderIR::DecodeArithmetic(BasicBlock& bb, u32 pc) { Operation(OperationCode::Select, NO_PRECISE, condition, min, max)); break; } + case OpCode::Id::RRO_C: + case OpCode::Id::RRO_R: + case OpCode::Id::RRO_IMM: { + // Currently RRO is only implemented as a register move. + op_b = GetOperandAbsNegFloat(op_b, instr.alu.abs_b, instr.alu.negate_b); + SetRegister(bb, instr.gpr0, op_b); + LOG_WARNING(HW_GPU, "RRO instruction is incomplete"); + break; + } default: UNIMPLEMENTED_MSG("Unhandled arithmetic instruction: {}", opcode->get().GetName()); } -- cgit v1.2.3 From 2edee801ce003f3a097cbbdbaf1b9bbb4bcddbc4 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 20 Dec 2018 23:58:23 -0300 Subject: shader_decode: Implement MOV32_IMM --- src/video_core/shader/decode/arithmetic_immediate.cpp | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'src/video_core/shader/decode') diff --git a/src/video_core/shader/decode/arithmetic_immediate.cpp b/src/video_core/shader/decode/arithmetic_immediate.cpp index 18fd2082e..2d385f48a 100644 --- a/src/video_core/shader/decode/arithmetic_immediate.cpp +++ b/src/video_core/shader/decode/arithmetic_immediate.cpp @@ -16,7 +16,15 @@ u32 ShaderIR::DecodeArithmeticImmediate(BasicBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); - UNIMPLEMENTED(); + switch (opcode->get().GetId()) { + case OpCode::Id::MOV32_IMM: { + SetRegister(bb, instr.gpr0, GetImmediate32(instr)); + break; + } + default: + UNIMPLEMENTED_MSG("Unhandled arithmetic immediate instruction: {}", + opcode->get().GetName()); + } return pc; } -- cgit v1.2.3 From c9b2a1b051fe386fa33427b527ca626ad3fdbfaf Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 20 Dec 2018 23:58:48 -0300 Subject: shader_decode: Implement FMUL32_IMM --- src/video_core/shader/decode/arithmetic_immediate.cpp | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'src/video_core/shader/decode') diff --git a/src/video_core/shader/decode/arithmetic_immediate.cpp b/src/video_core/shader/decode/arithmetic_immediate.cpp index 2d385f48a..0e4cbccab 100644 --- a/src/video_core/shader/decode/arithmetic_immediate.cpp +++ b/src/video_core/shader/decode/arithmetic_immediate.cpp @@ -21,6 +21,16 @@ u32 ShaderIR::DecodeArithmeticImmediate(BasicBlock& bb, u32 pc) { SetRegister(bb, instr.gpr0, GetImmediate32(instr)); break; } + case OpCode::Id::FMUL32_IMM: { + UNIMPLEMENTED_IF_MSG(instr.op_32.generates_cc, + "Condition codes generation in FMUL32 is not implemented"); + Node value = + Operation(OperationCode::FMul, PRECISE, GetRegister(instr.gpr8), GetImmediate32(instr)); + value = GetSaturatedFloat(value, instr.fmul32.saturate); + + SetRegister(bb, instr.gpr0, value); + break; + } default: UNIMPLEMENTED_MSG("Unhandled arithmetic immediate instruction: {}", opcode->get().GetName()); -- cgit v1.2.3 From ea358bd4bf70b6b93b4022ede7a8bcd111f10f9e Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 20 Dec 2018 23:59:01 -0300 Subject: shader_decode: Implement FADD32I --- src/video_core/shader/decode/arithmetic_immediate.cpp | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'src/video_core/shader/decode') diff --git a/src/video_core/shader/decode/arithmetic_immediate.cpp b/src/video_core/shader/decode/arithmetic_immediate.cpp index 0e4cbccab..996b2537a 100644 --- a/src/video_core/shader/decode/arithmetic_immediate.cpp +++ b/src/video_core/shader/decode/arithmetic_immediate.cpp @@ -31,6 +31,18 @@ u32 ShaderIR::DecodeArithmeticImmediate(BasicBlock& bb, u32 pc) { SetRegister(bb, instr.gpr0, value); break; } + case OpCode::Id::FADD32I: { + UNIMPLEMENTED_IF_MSG(instr.op_32.generates_cc, + "Condition codes generation in FADD32I is not implemented"); + const Node op_a = GetOperandAbsNegFloat(GetRegister(instr.gpr8), instr.fadd32i.abs_a, + instr.fadd32i.negate_a); + const Node op_b = GetOperandAbsNegFloat(GetImmediate32(instr), instr.fadd32i.abs_b, + instr.fadd32i.negate_b); + + const Node value = Operation(OperationCode::FAdd, PRECISE, op_a, op_b); + SetRegister(bb, instr.gpr0, value); + break; + } default: UNIMPLEMENTED_MSG("Unhandled arithmetic immediate instruction: {}", opcode->get().GetName()); -- cgit v1.2.3 From e3f1233ce13d82623173d690a6aa7819d68f069e Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 21 Dec 2018 00:05:42 -0300 Subject: shader_decode: Implement LD_A --- src/video_core/shader/decode/memory.cpp | 40 ++++++++++++++++++++++++++++++++- 1 file changed, 39 insertions(+), 1 deletion(-) (limited to 'src/video_core/shader/decode') diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp index d6086004b..30e2b33a3 100644 --- a/src/video_core/shader/decode/memory.cpp +++ b/src/video_core/shader/decode/memory.cpp @@ -9,14 +9,52 @@ namespace VideoCommon::Shader { +using Tegra::Shader::Attribute; using Tegra::Shader::Instruction; using Tegra::Shader::OpCode; +using Tegra::Shader::Register; u32 ShaderIR::DecodeMemory(BasicBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); - UNIMPLEMENTED(); + switch (opcode->get().GetId()) { + case OpCode::Id::LD_A: { + // Note: Shouldn't this be interp mode flat? As in no interpolation made. + UNIMPLEMENTED_IF_MSG(instr.gpr8.Value() != Register::ZeroIndex, + "Indirect attribute loads are not supported"); + UNIMPLEMENTED_IF_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) != 0, + "Unaligned attribute loads are not supported"); + + Tegra::Shader::IpaMode input_mode{Tegra::Shader::IpaInterpMode::Perspective, + Tegra::Shader::IpaSampleMode::Default}; + + u64 next_element = instr.attribute.fmt20.element; + auto next_index = static_cast(instr.attribute.fmt20.index.Value()); + + const auto LoadNextElement = [&](u32 reg_offset) { + const Node buffer = GetRegister(instr.gpr39); + const Node attribute = GetInputAttribute(static_cast(next_index), + next_element, input_mode, buffer); + + SetRegister(bb, instr.gpr0.Value() + reg_offset, attribute); + + // Load the next attribute element into the following register. If the element + // to load goes beyond the vec4 size, load the first element of the next + // attribute. + next_element = (next_element + 1) % 4; + next_index = next_index + (next_element == 0 ? 1 : 0); + }; + + const u32 num_words = static_cast(instr.attribute.fmt20.size.Value()) + 1; + for (u32 reg_offset = 0; reg_offset < num_words; ++reg_offset) { + LoadNextElement(reg_offset); + } + break; + } + default: + UNIMPLEMENTED_MSG("Unhandled memory instruction: {}", opcode->get().GetName()); + } return pc; } -- cgit v1.2.3 From 0c049e0a2106ff1624dfe4dcbfe8703584863c7c Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 21 Dec 2018 00:06:13 -0300 Subject: shader_decode: Implement ST_A --- src/video_core/shader/decode/memory.cpp | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) (limited to 'src/video_core/shader/decode') diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp index 30e2b33a3..aea1a0675 100644 --- a/src/video_core/shader/decode/memory.cpp +++ b/src/video_core/shader/decode/memory.cpp @@ -52,6 +52,36 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, u32 pc) { } break; } + case OpCode::Id::ST_A: { + UNIMPLEMENTED_IF_MSG(instr.gpr8.Value() != Register::ZeroIndex, + "Indirect attribute loads are not supported"); + UNIMPLEMENTED_IF_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) != 0, + "Unaligned attribute loads are not supported"); + + u64 next_element = instr.attribute.fmt20.element; + auto next_index = static_cast(instr.attribute.fmt20.index.Value()); + + const auto StoreNextElement = [&](u32 reg_offset) { + const auto dest = GetOutputAttribute(static_cast(next_index), + next_element, GetRegister(instr.gpr39)); + const auto src = GetRegister(instr.gpr0.Value() + reg_offset); + + bb.push_back(Operation(OperationCode::Assign, dest, src)); + + // Load the next attribute element into the following register. If the element + // to load goes beyond the vec4 size, load the first element of the next + // attribute. + next_element = (next_element + 1) % 4; + next_index = next_index + (next_element == 0 ? 1 : 0); + }; + + const u32 num_words = static_cast(instr.attribute.fmt20.size.Value()) + 1; + for (u32 reg_offset = 0; reg_offset < num_words; ++reg_offset) { + StoreNextElement(reg_offset); + } + + break; + } default: UNIMPLEMENTED_MSG("Unhandled memory instruction: {}", opcode->get().GetName()); } -- cgit v1.2.3 From cacb934f21d5b2abcbae168f8916bf3e3a21d64b Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 21 Dec 2018 00:07:32 -0300 Subject: shader_decode: Implement EXIT --- src/video_core/shader/decode/other.cpp | 33 ++++++++++++++++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) (limited to 'src/video_core/shader/decode') diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp index d84702a4f..2a5b70b8b 100644 --- a/src/video_core/shader/decode/other.cpp +++ b/src/video_core/shader/decode/other.cpp @@ -16,7 +16,38 @@ u32 ShaderIR::DecodeOther(BasicBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); - UNIMPLEMENTED(); + switch (opcode->get().GetId()) { + case OpCode::Id::EXIT: { + const Tegra::Shader::ConditionCode cc = instr.flow_condition_code; + UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "EXIT condition code used: {}", + static_cast(cc)); + + switch (instr.flow.cond) { + case Tegra::Shader::FlowCondition::Always: + bb.push_back(Operation(OperationCode::Exit)); + if (instr.pred.pred_index == static_cast(Tegra::Shader::Pred::UnusedIndex)) { + // If this is an unconditional exit then just end processing here, + // otherwise we have to account for the possibility of the condition + // not being met, so continue processing the next instruction. + pc = MAX_PROGRAM_LENGTH - 1; + } + break; + + case Tegra::Shader::FlowCondition::Fcsm_Tr: + // TODO(bunnei): What is this used for? If we assume this conditon is not + // satisifed, dual vertex shaders in Farming Simulator make more sense + UNIMPLEMENTED_MSG("Skipping unknown FlowCondition::Fcsm_Tr"); + break; + + default: + UNIMPLEMENTED_MSG("Unhandled flow condition: {}", + static_cast(instr.flow.cond.Value())); + } + break; + } + default: + UNIMPLEMENTED_MSG("Unhandled instruction: {}", opcode->get().GetName()); + } return pc; } -- cgit v1.2.3 From 4f95dc950ee483ac9d19fb98209abeb26556e26f Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 21 Dec 2018 00:08:21 -0300 Subject: shader_decode: Implement IPA --- src/video_core/shader/decode/other.cpp | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'src/video_core/shader/decode') diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp index 2a5b70b8b..ffdc77d90 100644 --- a/src/video_core/shader/decode/other.cpp +++ b/src/video_core/shader/decode/other.cpp @@ -45,6 +45,18 @@ u32 ShaderIR::DecodeOther(BasicBlock& bb, u32 pc) { } break; } + case OpCode::Id::IPA: { + const auto& attribute = instr.attribute.fmt28; + const Tegra::Shader::IpaMode input_mode{instr.ipa.interp_mode.Value(), + instr.ipa.sample_mode.Value()}; + + const Node input_attr = GetInputAttribute(attribute.index, attribute.element, input_mode); + const Node ipa = Operation(OperationCode::Ipa, input_attr); + const Node value = GetSaturatedFloat(ipa, instr.ipa.saturate); + + SetRegister(bb, instr.gpr0, value); + break; + } default: UNIMPLEMENTED_MSG("Unhandled instruction: {}", opcode->get().GetName()); } -- cgit v1.2.3 From 8215ae942c72ec20c2ebebbf8fc5784e3f21bb3c Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 21 Dec 2018 00:11:33 -0300 Subject: shader_decode: Partially implement BRA --- src/video_core/shader/decode/other.cpp | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'src/video_core/shader/decode') diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp index ffdc77d90..3f058324c 100644 --- a/src/video_core/shader/decode/other.cpp +++ b/src/video_core/shader/decode/other.cpp @@ -11,6 +11,7 @@ namespace VideoCommon::Shader { using Tegra::Shader::Instruction; using Tegra::Shader::OpCode; +using Tegra::Shader::ConditionCode; u32 ShaderIR::DecodeOther(BasicBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; @@ -45,6 +46,17 @@ u32 ShaderIR::DecodeOther(BasicBlock& bb, u32 pc) { } break; } + case OpCode::Id::BRA: { + UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0, + "BRA with constant buffers are not implemented"); + + const Tegra::Shader::ConditionCode cc = instr.flow_condition_code; + UNIMPLEMENTED_IF(cc != Tegra::Shader::ConditionCode::T); + + const u32 target = pc + instr.bra.GetBranchTarget(); + bb.push_back(Operation(OperationCode::Bra, Immediate(target))); + break; + } case OpCode::Id::IPA: { const auto& attribute = instr.attribute.fmt28; const Tegra::Shader::IpaMode input_mode{instr.ipa.interp_mode.Value(), -- cgit v1.2.3 From c703f0aee41ba08219f10217169813ac97da06c2 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 21 Dec 2018 00:12:48 -0300 Subject: shader_decode: Implement FSETP --- .../shader/decode/float_set_predicate.cpp | 34 +++++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) (limited to 'src/video_core/shader/decode') diff --git a/src/video_core/shader/decode/float_set_predicate.cpp b/src/video_core/shader/decode/float_set_predicate.cpp index 1dbe34353..5dd085fea 100644 --- a/src/video_core/shader/decode/float_set_predicate.cpp +++ b/src/video_core/shader/decode/float_set_predicate.cpp @@ -11,12 +11,44 @@ namespace VideoCommon::Shader { using Tegra::Shader::Instruction; using Tegra::Shader::OpCode; +using Tegra::Shader::Pred; u32 ShaderIR::DecodeFloatSetPredicate(BasicBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); - UNIMPLEMENTED(); + const Node op_a = GetOperandAbsNegFloat(GetRegister(instr.gpr8), instr.fsetp.abs_a != 0, + instr.fsetp.neg_a != 0); + Node op_b = [&]() { + if (instr.is_b_imm) { + return GetImmediate19(instr); + } else if (instr.is_b_gpr) { + return GetRegister(instr.gpr20); + } else { + return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset); + } + }(); + op_b = GetOperandAbsNegFloat(op_b, instr.fsetp.abs_b, false); + + // We can't use the constant predicate as destination. + ASSERT(instr.fsetp.pred3 != static_cast(Pred::UnusedIndex)); + + const Node predicate = GetPredicateComparisonFloat(instr.fsetp.cond, op_a, op_b); + const Node second_pred = GetPredicate(instr.fsetp.pred39, instr.fsetp.neg_pred != 0); + + const OperationCode combiner = GetPredicateCombiner(instr.fsetp.op); + const Node value = Operation(combiner, predicate, second_pred); + + // Set the primary predicate to the result of Predicate OP SecondPredicate + SetPredicate(bb, instr.fsetp.pred3, value); + + if (instr.fsetp.pred0 != static_cast(Pred::UnusedIndex)) { + // Set the secondary predicate to the result of !Predicate OP SecondPredicate, + // if enabled + const Node negated_pred = Operation(OperationCode::LogicalNegate, predicate); + const Node second_value = Operation(combiner, negated_pred, second_pred); + SetPredicate(bb, instr.fsetp.pred0, second_value); + } return pc; } -- cgit v1.2.3 From 878672f371e71d7d7a5b44aec0dc4918a682732d Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 21 Dec 2018 01:27:47 -0300 Subject: shader_decode: Implement TEXS (F32) --- src/video_core/shader/decode/memory.cpp | 199 ++++++++++++++++++++++++++++++++ 1 file changed, 199 insertions(+) (limited to 'src/video_core/shader/decode') diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp index aea1a0675..1f458b6d7 100644 --- a/src/video_core/shader/decode/memory.cpp +++ b/src/video_core/shader/decode/memory.cpp @@ -2,6 +2,8 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include + #include "common/assert.h" #include "common/common_types.h" #include "video_core/engines/shader_bytecode.h" @@ -13,6 +15,24 @@ using Tegra::Shader::Attribute; using Tegra::Shader::Instruction; using Tegra::Shader::OpCode; using Tegra::Shader::Register; +using Tegra::Shader::TextureMiscMode; +using Tegra::Shader::TextureProcessMode; +using Tegra::Shader::TextureType; + +static std::size_t GetCoordCount(TextureType texture_type) { + switch (texture_type) { + case TextureType::Texture1D: + return 1; + case TextureType::Texture2D: + return 2; + case TextureType::Texture3D: + case TextureType::TextureCube: + return 3; + default: + UNIMPLEMENTED_MSG("Unhandled texture type: {}", static_cast(texture_type)); + return 0; + } +} u32 ShaderIR::DecodeMemory(BasicBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; @@ -82,6 +102,27 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, u32 pc) { break; } + case OpCode::Id::TEXS: { + Tegra::Shader::TextureType texture_type{instr.texs.GetTextureType()}; + const bool is_array{instr.texs.IsArrayTexture()}; + const bool depth_compare = instr.texs.UsesMiscMode(TextureMiscMode::DC); + const auto process_mode = instr.texs.GetTextureProcessMode(); + + if (instr.texs.UsesMiscMode(TextureMiscMode::NODEP)) { + LOG_WARNING(HW_GPU, "TEXS.NODEP implementation is incomplete"); + } + + const Node texture = + GetTexsCode(instr, texture_type, process_mode, depth_compare, is_array); + + if (instr.texs.fp32_flag) { + WriteTexsInstructionFloat(bb, instr, texture); + } else { + UNIMPLEMENTED(); + // WriteTexsInstructionHalfFloat(bb, instr, texture); + } + break; + } default: UNIMPLEMENTED_MSG("Unhandled memory instruction: {}", opcode->get().GetName()); } @@ -89,4 +130,162 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, u32 pc) { return pc; } +const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler, TextureType type, + bool is_array, bool is_shadow) { + const auto offset = static_cast(sampler.index.Value()); + + // If this sampler has already been used, return the existing mapping. + const auto itr = + std::find_if(used_samplers.begin(), used_samplers.end(), + [&](const Sampler& entry) { return entry.GetOffset() == offset; }); + if (itr != used_samplers.end()) { + ASSERT(itr->GetType() == type && itr->IsArray() == is_array && + itr->IsShadow() == is_shadow); + return *itr; + } + + // Otherwise create a new mapping for this sampler + const std::size_t next_index = used_samplers.size(); + const Sampler entry{offset, next_index, type, is_array, is_shadow}; + return *used_samplers.emplace(entry).first; +} + +void ShaderIR::WriteTexsInstructionFloat(BasicBlock& bb, Tegra::Shader::Instruction instr, + Node texture) { + // TEXS has two destination registers and a swizzle. The first two elements in the swizzle + // go into gpr0+0 and gpr0+1, and the rest goes into gpr28+0 and gpr28+1 + + MetaComponents meta; + std::array dest; + + std::size_t written_components = 0; + for (u32 component = 0; component < 4; ++component) { + if (!instr.texs.IsComponentEnabled(component)) { + continue; + } + meta.components_map[written_components] = static_cast(component); + + if (written_components < 2) { + // Write the first two swizzle components to gpr0 and gpr0+1 + dest[written_components] = GetRegister(instr.gpr0.Value() + written_components % 2); + } else { + ASSERT(instr.texs.HasTwoDestinations()); + // Write the rest of the swizzle components to gpr28 and gpr28+1 + dest[written_components] = GetRegister(instr.gpr28.Value() + written_components % 2); + } + + ++written_components; + } + + std::generate(dest.begin() + written_components, dest.end(), [&]() { return GetRegister(RZ); }); + + bb.push_back(Operation(OperationCode::AssignComposite, meta, texture, dest[0], dest[1], dest[2], + dest[3])); +} + +Node ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, + TextureProcessMode process_mode, bool depth_compare, bool is_array, + std::size_t bias_offset, std::vector&& coords) { + UNIMPLEMENTED_IF_MSG( + (texture_type == TextureType::Texture3D && (is_array || depth_compare)) || + (texture_type == TextureType::TextureCube && is_array && depth_compare), + "This method is not supported."); + + const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, depth_compare); + + const bool lod_needed = process_mode == TextureProcessMode::LZ || + process_mode == TextureProcessMode::LL || + process_mode == TextureProcessMode::LLA; + + const bool gl_lod_supported = + !((texture_type == TextureType::Texture2D && is_array && depth_compare) || + (texture_type == TextureType::TextureCube && !is_array && depth_compare)); + + const OperationCode read_method = + lod_needed && gl_lod_supported ? OperationCode::F4TextureLod : OperationCode::F4Texture; + + const MetaTexture meta{sampler, static_cast(coords.size())}; + + std::vector params = std::move(coords); + + if (process_mode != TextureProcessMode::None) { + if (process_mode == TextureProcessMode::LZ) { + if (gl_lod_supported) { + params.push_back(Immediate(0)); + } else { + // Lod 0 is emulated by a big negative bias in scenarios that are not supported by + // GLSL + params.push_back(Immediate(-1000)); + } + } else { + // If present, lod or bias are always stored in the register indexed by the gpr20 field + // with an offset depending on the usage of the other registers + params.push_back(GetRegister(instr.gpr20.Value() + bias_offset)); + } + } + + return Operation(read_method, meta, std::move(params)); +} + +Node ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type, + TextureProcessMode process_mode, bool depth_compare, bool is_array) { + + const bool lod_bias_enabled = (process_mode != Tegra::Shader::TextureProcessMode::None && + process_mode != Tegra::Shader::TextureProcessMode::LZ); + + const auto [coord_count, total_coord_count] = ValidateAndGetCoordinateElement( + texture_type, depth_compare, is_array, lod_bias_enabled, 4, 4); + // If enabled arrays index is always stored in the gpr8 field + const u64 array_register = instr.gpr8.Value(); + // First coordinate index is stored in gpr8 field or (gpr8 + 1) when arrays are used + const u64 coord_register = array_register + (is_array ? 1 : 0); + const u64 last_coord_register = + (is_array || !(lod_bias_enabled || depth_compare) || (coord_count > 2)) + ? static_cast(instr.gpr20.Value()) + : coord_register + 1; + + std::vector coords; + for (std::size_t i = 0; i < coord_count; ++i) { + const bool last = (i == (coord_count - 1)) && (coord_count > 1); + coords.push_back(GetRegister(last ? last_coord_register : coord_register + i)); + } + + if (depth_compare) { + // Depth is always stored in the register signaled by gpr20 + // or in the next register if lod or bias are used + const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0); + coords.push_back(GetRegister(depth_register)); + } + if (is_array) { + coords.push_back( + Operation(OperationCode::ICastFloat, NO_PRECISE, GetRegister(array_register))); + } + // Fill ignored coordinates + while (coords.size() < total_coord_count) { + coords.push_back(Immediate(0)); + } + + return GetTextureCode(instr, texture_type, process_mode, depth_compare, is_array, + (coord_count > 2 ? 1 : 0), std::move(coords)); +} + +std::tuple ShaderIR::ValidateAndGetCoordinateElement( + TextureType texture_type, bool depth_compare, bool is_array, bool lod_bias_enabled, + std::size_t max_coords, std::size_t max_inputs) { + + const std::size_t coord_count = GetCoordCount(texture_type); + + std::size_t total_coord_count = coord_count + (is_array ? 1 : 0) + (depth_compare ? 1 : 0); + const std::size_t total_reg_count = total_coord_count + (lod_bias_enabled ? 1 : 0); + if (total_coord_count > max_coords || total_reg_count > max_inputs) { + UNIMPLEMENTED_MSG("Unsupported Texture operation"); + total_coord_count = std::min(total_coord_count, max_coords); + } + // 1D.DC OpenGL is using a vec3 but 2nd component is ignored later. + total_coord_count += + (depth_compare && !is_array && texture_type == TextureType::Texture1D) ? 1 : 0; + + return {coord_count, total_coord_count}; +} + } // namespace VideoCommon::Shader \ No newline at end of file -- cgit v1.2.3 From 2b90637f4bb0358525715c113f903d0c069b0eb4 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 13 Dec 2018 16:59:28 -0300 Subject: shader_decode: Implement TEX and TXQ --- src/video_core/shader/decode/memory.cpp | 219 ++++++++++++++++++++++++++++++++ 1 file changed, 219 insertions(+) (limited to 'src/video_core/shader/decode') diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp index 1f458b6d7..220238ce8 100644 --- a/src/video_core/shader/decode/memory.cpp +++ b/src/video_core/shader/decode/memory.cpp @@ -2,6 +2,7 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include #include #include "common/assert.h" @@ -102,6 +103,44 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, u32 pc) { break; } + case OpCode::Id::TEX: { + Tegra::Shader::TextureType texture_type{instr.tex.texture_type}; + const bool is_array = instr.tex.array != 0; + const bool depth_compare = instr.tex.UsesMiscMode(TextureMiscMode::DC); + const auto process_mode = instr.tex.GetTextureProcessMode(); + UNIMPLEMENTED_IF_MSG(instr.tex.UsesMiscMode(TextureMiscMode::AOFFI), + "AOFFI is not implemented"); + + if (instr.tex.UsesMiscMode(TextureMiscMode::NODEP)) { + LOG_WARNING(HW_GPU, "TEX.NODEP is not implemented"); + } + + const Node texture = GetTexCode(instr, texture_type, process_mode, depth_compare, is_array); + + if (depth_compare) { + SetRegister(bb, instr.gpr0, texture); + } else { + MetaComponents meta; + std::array dest; + + std::size_t dest_elem = 0; + for (std::size_t elem = 0; elem < 4; ++elem) { + if (!instr.tex.IsComponentEnabled(elem)) { + // Skip disabled components + continue; + } + meta.components_map[dest_elem] = static_cast(elem); + dest[dest_elem] = GetRegister(instr.gpr0.Value() + dest_elem); + + ++dest_elem; + } + std::generate(dest.begin() + dest_elem, dest.end(), [&]() { return GetRegister(RZ); }); + + bb.push_back(Operation(OperationCode::AssignComposite, std::move(meta), texture, + dest[0], dest[1], dest[2], dest[3])); + } + break; + } case OpCode::Id::TEXS: { Tegra::Shader::TextureType texture_type{instr.texs.GetTextureType()}; const bool is_array{instr.texs.IsArrayTexture()}; @@ -123,6 +162,148 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, u32 pc) { } break; } + case OpCode::Id::TLD4: { + ASSERT(instr.tld4.texture_type == Tegra::Shader::TextureType::Texture2D); + ASSERT(instr.tld4.array == 0); + UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::AOFFI), + "AOFFI is not implemented"); + UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::NDV), + "NDV is not implemented"); + UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::PTP), + "PTP is not implemented"); + + if (instr.tld4.UsesMiscMode(TextureMiscMode::NODEP)) { + LOG_WARNING(HW_GPU, "TLD4.NODEP implementation is incomplete"); + } + + const bool depth_compare = instr.tld4.UsesMiscMode(TextureMiscMode::DC); + auto texture_type = instr.tld4.texture_type.Value(); + u32 num_coordinates = static_cast(GetCoordCount(texture_type)); + if (depth_compare) + num_coordinates += 1; + + std::vector params; + + switch (num_coordinates) { + case 2: { + params.push_back(GetRegister(instr.gpr8)); + params.push_back(GetRegister(instr.gpr8.Value() + 1)); + break; + } + case 3: { + params.push_back(GetRegister(instr.gpr8)); + params.push_back(GetRegister(instr.gpr8.Value() + 1)); + params.push_back(GetRegister(instr.gpr8.Value() + 2)); + break; + } + default: + UNIMPLEMENTED_MSG("Unhandled coordinates number {}", static_cast(num_coordinates)); + params.push_back(GetRegister(instr.gpr8)); + params.push_back(GetRegister(instr.gpr8.Value() + 1)); + num_coordinates = 2; + texture_type = Tegra::Shader::TextureType::Texture2D; + } + params.push_back(Immediate(static_cast(instr.tld4.component))); + + const auto& sampler = GetSampler(instr.sampler, texture_type, false, depth_compare); + const MetaTexture meta{sampler, num_coordinates}; + + const Node texture = + Operation(OperationCode::F4TextureGather, std::move(meta), std::move(params)); + + if (depth_compare) { + SetRegister(bb, instr.gpr0, texture); + } else { + MetaComponents meta; + std::array dest; + + std::size_t dest_elem = 0; + for (std::size_t elem = 0; elem < 4; ++elem) { + if (!instr.tex.IsComponentEnabled(elem)) { + // Skip disabled components + continue; + } + meta.components_map[dest_elem] = static_cast(elem); + dest[dest_elem] = GetRegister(instr.gpr0.Value() + dest_elem); + + ++dest_elem; + } + std::generate(dest.begin() + dest_elem, dest.end(), [&]() { return GetRegister(RZ); }); + + bb.push_back(Operation(OperationCode::AssignComposite, std::move(meta), texture, + dest[0], dest[1], dest[2], dest[3])); + } + break; + } + case OpCode::Id::TLD4S: { + UNIMPLEMENTED_IF_MSG(instr.tld4s.UsesMiscMode(TextureMiscMode::AOFFI), + "AOFFI is not implemented"); + + if (instr.tld4s.UsesMiscMode(TextureMiscMode::NODEP)) { + LOG_WARNING(HW_GPU, "TLD4S.NODEP is not implemented"); + } + + const bool depth_compare = instr.tld4s.UsesMiscMode(TextureMiscMode::DC); + const Node op_a = GetRegister(instr.gpr8); + const Node op_b = GetRegister(instr.gpr20); + + std::vector params; + + // TODO(Subv): Figure out how the sampler type is encoded in the TLD4S instruction. + if (depth_compare) { + // Note: TLD4S coordinate encoding works just like TEXS's + const Node op_y = GetRegister(instr.gpr8.Value() + 1); + params.push_back(op_a); + params.push_back(op_y); + params.push_back(op_b); + } else { + params.push_back(op_a); + params.push_back(op_b); + } + const auto num_coords = static_cast(params.size()); + params.push_back(Immediate(static_cast(instr.tld4s.component))); + + const auto& sampler = + GetSampler(instr.sampler, TextureType::Texture2D, false, depth_compare); + const MetaTexture meta{sampler, num_coords}; + + WriteTexsInstructionFloat( + bb, instr, Operation(OperationCode::F4TextureGather, meta, std::move(params))); + break; + } + case OpCode::Id::TXQ: { + if (instr.txq.UsesMiscMode(TextureMiscMode::NODEP)) { + LOG_WARNING(HW_GPU, "TXQ.NODEP is not implemented"); + } + + // TODO: The new commits on the texture refactor, change the way samplers work. + // Sadly, not all texture instructions specify the type of texture their sampler + // uses. This must be fixed at a later instance. + const auto& sampler = + GetSampler(instr.sampler, Tegra::Shader::TextureType::Texture2D, false, false); + + switch (instr.txq.query_type) { + case Tegra::Shader::TextureQueryType::Dimension: { + const MetaTexture meta_texture{sampler}; + const MetaComponents meta_components{{0, 1, 2, 3}}; + + const Node texture = Operation(OperationCode::F4TextureQueryDimensions, meta_texture, + GetRegister(instr.gpr8)); + std::array dest; + for (std::size_t i = 0; i < dest.size(); ++i) { + dest[i] = GetRegister(instr.gpr0.Value() + i); + } + + bb.push_back(Operation(OperationCode::AssignComposite, meta_components, texture, + dest[0], dest[1], dest[2], dest[3])); + break; + } + default: + UNIMPLEMENTED_MSG("Unhandled texture query type: {}", + static_cast(instr.txq.query_type.Value())); + } + break; + } default: UNIMPLEMENTED_MSG("Unhandled memory instruction: {}", opcode->get().GetName()); } @@ -227,6 +408,44 @@ Node ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, return Operation(read_method, meta, std::move(params)); } +Node ShaderIR::GetTexCode(Instruction instr, TextureType texture_type, + TextureProcessMode process_mode, bool depth_compare, bool is_array) { + const bool lod_bias_enabled = (process_mode != Tegra::Shader::TextureProcessMode::None && + process_mode != Tegra::Shader::TextureProcessMode::LZ); + + const auto [coord_count, total_coord_count] = ValidateAndGetCoordinateElement( + texture_type, depth_compare, is_array, lod_bias_enabled, 4, 5); + // If enabled arrays index is always stored in the gpr8 field + const u64 array_register = instr.gpr8.Value(); + // First coordinate index is the gpr8 or gpr8 + 1 when arrays are used + const u64 coord_register = array_register + (is_array ? 1 : 0); + + std::vector coords; + for (std::size_t i = 0; i < coord_count; ++i) { + coords.push_back(GetRegister(coord_register + i)); + } + // 1D.DC in opengl the 2nd component is ignored. + if (depth_compare && !is_array && texture_type == TextureType::Texture1D) { + coords.push_back(Immediate(0.0f)); + } + if (depth_compare) { + // Depth is always stored in the register signaled by gpr20 + // or in the next register if lod or bias are used + const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0); + coords.push_back(GetRegister(depth_register)); + } + if (is_array) { + coords.push_back(GetRegister(array_register)); + } + // Fill ignored coordinates + while (coords.size() < total_coord_count) { + coords.push_back(Immediate(0)); + } + + return GetTextureCode(instr, texture_type, process_mode, depth_compare, is_array, 0, + std::move(coords)); +} + Node ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type, TextureProcessMode process_mode, bool depth_compare, bool is_array) { -- cgit v1.2.3 From 802c23b8a8f1a14100fb9b291482ae197ba53293 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 13 Dec 2018 18:35:07 -0300 Subject: shader_decode: Implement TMML --- src/video_core/shader/decode/memory.cpp | 48 ++++++++++++++++++++++++++++++--- 1 file changed, 45 insertions(+), 3 deletions(-) (limited to 'src/video_core/shader/decode') diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp index 220238ce8..d8265d3fd 100644 --- a/src/video_core/shader/decode/memory.cpp +++ b/src/video_core/shader/decode/memory.cpp @@ -112,7 +112,7 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, u32 pc) { "AOFFI is not implemented"); if (instr.tex.UsesMiscMode(TextureMiscMode::NODEP)) { - LOG_WARNING(HW_GPU, "TEX.NODEP is not implemented"); + LOG_WARNING(HW_GPU, "TEX.NODEP implementation is incomplete"); } const Node texture = GetTexCode(instr, texture_type, process_mode, depth_compare, is_array); @@ -240,7 +240,7 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, u32 pc) { "AOFFI is not implemented"); if (instr.tld4s.UsesMiscMode(TextureMiscMode::NODEP)) { - LOG_WARNING(HW_GPU, "TLD4S.NODEP is not implemented"); + LOG_WARNING(HW_GPU, "TLD4S.NODEP implementation is incomplete"); } const bool depth_compare = instr.tld4s.UsesMiscMode(TextureMiscMode::DC); @@ -273,7 +273,7 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, u32 pc) { } case OpCode::Id::TXQ: { if (instr.txq.UsesMiscMode(TextureMiscMode::NODEP)) { - LOG_WARNING(HW_GPU, "TXQ.NODEP is not implemented"); + LOG_WARNING(HW_GPU, "TXQ.NODEP implementation is incomplete"); } // TODO: The new commits on the texture refactor, change the way samplers work. @@ -304,6 +304,48 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, u32 pc) { } break; } + case OpCode::Id::TMML: { + UNIMPLEMENTED_IF_MSG(instr.tmml.UsesMiscMode(Tegra::Shader::TextureMiscMode::NDV), + "NDV is not implemented"); + + if (instr.tmml.UsesMiscMode(TextureMiscMode::NODEP)) { + LOG_WARNING(HW_GPU, "TMML.NODEP implementation is incomplete"); + } + + auto texture_type = instr.tmml.texture_type.Value(); + const bool is_array = instr.tmml.array != 0; + const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false); + + std::vector coords; + + // TODO: Add coordinates for different samplers once other texture types are implemented. + switch (texture_type) { + case TextureType::Texture1D: + coords.push_back(GetRegister(instr.gpr8)); + break; + case TextureType::Texture2D: + coords.push_back(GetRegister(instr.gpr8.Value() + 0)); + coords.push_back(GetRegister(instr.gpr8.Value() + 1)); + break; + default: + UNIMPLEMENTED_MSG("Unhandled texture type {}", static_cast(texture_type)); + + // Fallback to interpreting as a 2D texture for now + coords.push_back(GetRegister(instr.gpr8.Value() + 0)); + coords.push_back(GetRegister(instr.gpr8.Value() + 1)); + texture_type = TextureType::Texture2D; + } + + const MetaTexture meta_texture{sampler, static_cast(coords.size())}; + const Node texture = + Operation(OperationCode::F4TextureQueryLod, meta_texture, std::move(coords)); + + const MetaComponents meta_composite{{0, 1, 2, 3}}; + bb.push_back(Operation(OperationCode::AssignComposite, meta_composite, texture, + GetRegister(instr.gpr0), GetRegister(instr.gpr0.Value() + 1), + GetRegister(RZ), GetRegister(RZ))); + break; + } default: UNIMPLEMENTED_MSG("Unhandled memory instruction: {}", opcode->get().GetName()); } -- cgit v1.2.3 From abdbafbc203b6dbb8e690d9dda02fc423608401f Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 15 Dec 2018 02:07:46 -0300 Subject: shader_decode: Implement PSETP --- .../shader/decode/predicate_set_predicate.cpp | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) (limited to 'src/video_core/shader/decode') diff --git a/src/video_core/shader/decode/predicate_set_predicate.cpp b/src/video_core/shader/decode/predicate_set_predicate.cpp index 1ad853fda..24352170d 100644 --- a/src/video_core/shader/decode/predicate_set_predicate.cpp +++ b/src/video_core/shader/decode/predicate_set_predicate.cpp @@ -11,12 +11,32 @@ namespace VideoCommon::Shader { using Tegra::Shader::Instruction; using Tegra::Shader::OpCode; +using Tegra::Shader::Pred; u32 ShaderIR::DecodePredicateSetPredicate(BasicBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); - UNIMPLEMENTED(); + const Node op_a = GetPredicate(instr.psetp.pred12, instr.psetp.neg_pred12 != 0); + const Node op_b = GetPredicate(instr.psetp.pred29, instr.psetp.neg_pred29 != 0); + + // We can't use the constant predicate as destination. + ASSERT(instr.psetp.pred3 != static_cast(Pred::UnusedIndex)); + + const Node second_pred = GetPredicate(instr.psetp.pred39, instr.psetp.neg_pred39 != 0); + + const OperationCode combiner = GetPredicateCombiner(instr.psetp.op); + const Node predicate = Operation(combiner, op_a, op_b); + + // Set the primary predicate to the result of Predicate OP SecondPredicate + SetPredicate(bb, instr.psetp.pred3, Operation(combiner, predicate, second_pred)); + + if (instr.psetp.pred0 != static_cast(Pred::UnusedIndex)) { + // Set the secondary predicate to the result of !Predicate OP SecondPredicate, if enabled + SetPredicate( + bb, instr.psetp.pred0, + Operation(combiner, Operation(OperationCode::LogicalNegate, predicate), second_pred)); + } return pc; } -- cgit v1.2.3 From 97f33f00cf4076e49f763c8139d388d47a41c84d Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 15 Dec 2018 03:18:25 -0300 Subject: shader_decode: Implement SSY and SYNC --- src/video_core/shader/decode/other.cpp | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'src/video_core/shader/decode') diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp index 3f058324c..4c2d24202 100644 --- a/src/video_core/shader/decode/other.cpp +++ b/src/video_core/shader/decode/other.cpp @@ -57,6 +57,25 @@ u32 ShaderIR::DecodeOther(BasicBlock& bb, u32 pc) { bb.push_back(Operation(OperationCode::Bra, Immediate(target))); break; } + case OpCode::Id::SSY: { + UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0, + "Constant buffer flow is not supported"); + + // The SSY opcode tells the GPU where to re-converge divergent execution paths, it sets the + // target of the jump that the SYNC instruction will make. The SSY opcode has a similar + // structure to the BRA opcode. + bb.push_back(Operation(OperationCode::Ssy, Immediate(pc + instr.bra.GetBranchTarget()))); + break; + } + case OpCode::Id::SYNC: { + const Tegra::Shader::ConditionCode cc = instr.flow_condition_code; + UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "SYNC condition code used: {}", + static_cast(cc)); + + // The SYNC opcode jumps to the address previously set by the SSY opcode + bb.push_back(Operation(OperationCode::Sync)); + break; + } case OpCode::Id::IPA: { const auto& attribute = instr.attribute.fmt28; const Tegra::Shader::IpaMode input_mode{instr.ipa.interp_mode.Value(), -- cgit v1.2.3 From 9118deb9904f5bb4012d32c8ed63262b3f6e74a3 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 15 Dec 2018 17:16:14 -0300 Subject: shader_decode: Stub DEPBAR --- src/video_core/shader/decode/other.cpp | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'src/video_core/shader/decode') diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp index 4c2d24202..ef0598d4f 100644 --- a/src/video_core/shader/decode/other.cpp +++ b/src/video_core/shader/decode/other.cpp @@ -88,6 +88,10 @@ u32 ShaderIR::DecodeOther(BasicBlock& bb, u32 pc) { SetRegister(bb, instr.gpr0, value); break; } + case OpCode::Id::DEPBAR: { + LOG_WARNING(HW_GPU, "DEPBAR instruction is stubbed"); + break; + } default: UNIMPLEMENTED_MSG("Unhandled instruction: {}", opcode->get().GetName()); } -- cgit v1.2.3 From c849b5b3201e8fda40727c3926b6389f609feafc Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 15 Dec 2018 17:32:51 -0300 Subject: shader_decode: Implement F2F --- src/video_core/shader/decode/conversion.cpp | 38 ++++++++++++++++++++++++++++- 1 file changed, 37 insertions(+), 1 deletion(-) (limited to 'src/video_core/shader/decode') diff --git a/src/video_core/shader/decode/conversion.cpp b/src/video_core/shader/decode/conversion.cpp index c6eb2952c..465c63a9e 100644 --- a/src/video_core/shader/decode/conversion.cpp +++ b/src/video_core/shader/decode/conversion.cpp @@ -11,12 +11,48 @@ namespace VideoCommon::Shader { using Tegra::Shader::Instruction; using Tegra::Shader::OpCode; +using Tegra::Shader::Register; u32 ShaderIR::DecodeConversion(BasicBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); - UNIMPLEMENTED(); + switch (opcode->get().GetId()) { + case OpCode::Id::F2F_R: { + UNIMPLEMENTED_IF(instr.conversion.dest_size != Register::Size::Word); + UNIMPLEMENTED_IF(instr.conversion.src_size != Register::Size::Word); + UNIMPLEMENTED_IF_MSG(instr.generates_cc, + "Condition codes generation in F2F is not implemented"); + + Node value = GetRegister(instr.gpr20); + value = GetOperandAbsNegFloat(value, instr.conversion.abs_a, instr.conversion.negate_a); + + value = [&]() { + switch (instr.conversion.f2f.rounding) { + case Tegra::Shader::F2fRoundingOp::None: + return value; + case Tegra::Shader::F2fRoundingOp::Round: + return Operation(OperationCode::FRoundEven, PRECISE, value); + case Tegra::Shader::F2fRoundingOp::Floor: + return Operation(OperationCode::FFloor, PRECISE, value); + case Tegra::Shader::F2fRoundingOp::Ceil: + return Operation(OperationCode::FCeil, PRECISE, value); + case Tegra::Shader::F2fRoundingOp::Trunc: + return Operation(OperationCode::FTrunc, PRECISE, value); + default: + UNIMPLEMENTED_MSG("Unimplemented F2F rounding mode {}", + static_cast(instr.conversion.f2f.rounding.Value())); + break; + } + }(); + value = GetSaturatedFloat(value, instr.alu.saturate_d); + + SetRegister(bb, instr.gpr0, value); + break; + } + default: + UNIMPLEMENTED_MSG("Unhandled conversion instruction: {}", opcode->get().GetName()); + } return pc; } -- cgit v1.2.3 From 8abe5ba2c8a5eb839849b6554782dfd455e85699 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 21 Dec 2018 01:53:05 -0300 Subject: shader_decode: Implement I2F --- src/video_core/shader/decode/conversion.cpp | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'src/video_core/shader/decode') diff --git a/src/video_core/shader/decode/conversion.cpp b/src/video_core/shader/decode/conversion.cpp index 465c63a9e..7c691982d 100644 --- a/src/video_core/shader/decode/conversion.cpp +++ b/src/video_core/shader/decode/conversion.cpp @@ -18,6 +18,29 @@ u32 ShaderIR::DecodeConversion(BasicBlock& bb, u32 pc) { const auto opcode = OpCode::Decode(instr); switch (opcode->get().GetId()) { + case OpCode::Id::I2F_R: + case OpCode::Id::I2F_C: { + UNIMPLEMENTED_IF(instr.conversion.dest_size != Register::Size::Word); + UNIMPLEMENTED_IF(instr.conversion.selector); + UNIMPLEMENTED_IF_MSG(instr.generates_cc, + "Condition codes generation in I2F is not implemented"); + + Node value = [&]() { + if (instr.is_b_gpr) { + return GetRegister(instr.gpr20); + } else { + return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset); + } + }(); + const bool input_signed = instr.conversion.is_input_signed; + value = ConvertIntegerSize(value, instr.conversion.src_size, input_signed); + value = GetOperandAbsNegInteger(value, instr.conversion.abs_a, false, input_signed); + value = SignedOperation(OperationCode::FCastInteger, input_signed, PRECISE, value); + value = GetOperandAbsNegFloat(value, false, instr.conversion.negate_a); + + SetRegister(bb, instr.gpr0, value); + break; + } case OpCode::Id::F2F_R: { UNIMPLEMENTED_IF(instr.conversion.dest_size != Register::Size::Word); UNIMPLEMENTED_IF(instr.conversion.src_size != Register::Size::Word); -- cgit v1.2.3 From 3052eae25e9a35bdffdd72c2598929e6a9c72607 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 21 Dec 2018 01:57:13 -0300 Subject: shader_decode: Implement F2I --- src/video_core/shader/decode/conversion.cpp | 37 +++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) (limited to 'src/video_core/shader/decode') diff --git a/src/video_core/shader/decode/conversion.cpp b/src/video_core/shader/decode/conversion.cpp index 7c691982d..82fe5e21a 100644 --- a/src/video_core/shader/decode/conversion.cpp +++ b/src/video_core/shader/decode/conversion.cpp @@ -73,6 +73,43 @@ u32 ShaderIR::DecodeConversion(BasicBlock& bb, u32 pc) { SetRegister(bb, instr.gpr0, value); break; } + case OpCode::Id::F2I_R: + case OpCode::Id::F2I_C: { + UNIMPLEMENTED_IF(instr.conversion.src_size != Register::Size::Word); + UNIMPLEMENTED_IF_MSG(instr.generates_cc, + "Condition codes generation in F2I is not implemented"); + Node value = [&]() { + if (instr.is_b_gpr) { + return GetRegister(instr.gpr20); + } else { + return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset); + } + }(); + + value = GetOperandAbsNegFloat(value, instr.conversion.abs_a, instr.conversion.negate_a); + + value = [&]() { + switch (instr.conversion.f2i.rounding) { + case Tegra::Shader::F2iRoundingOp::None: + return value; + case Tegra::Shader::F2iRoundingOp::Floor: + return Operation(OperationCode::FFloor, PRECISE, value); + case Tegra::Shader::F2iRoundingOp::Ceil: + return Operation(OperationCode::FCeil, PRECISE, value); + case Tegra::Shader::F2iRoundingOp::Trunc: + return Operation(OperationCode::FTrunc, PRECISE, value); + default: + UNIMPLEMENTED_MSG("Unimplemented F2I rounding mode {}", + static_cast(instr.conversion.f2i.rounding.Value())); + } + }(); + const bool is_signed = instr.conversion.is_output_signed; + value = SignedOperation(OperationCode::ICastFloat, is_signed, PRECISE, value); + value = ConvertIntegerSize(value, instr.conversion.dest_size, is_signed); + + SetRegister(bb, instr.gpr0, value); + break; + } default: UNIMPLEMENTED_MSG("Unhandled conversion instruction: {}", opcode->get().GetName()); } -- cgit v1.2.3 From e444a6553faa019b20a81a3b7380ecccfd7d3725 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 16 Dec 2018 03:57:10 -0300 Subject: shader_decode: Implement FSET --- src/video_core/shader/decode/float_set.cpp | 37 +++++++++++++++++++++++++++++- 1 file changed, 36 insertions(+), 1 deletion(-) (limited to 'src/video_core/shader/decode') diff --git a/src/video_core/shader/decode/float_set.cpp b/src/video_core/shader/decode/float_set.cpp index 17d47c17a..355fabc09 100644 --- a/src/video_core/shader/decode/float_set.cpp +++ b/src/video_core/shader/decode/float_set.cpp @@ -16,7 +16,42 @@ u32 ShaderIR::DecodeFloatSet(BasicBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); - UNIMPLEMENTED(); + const Node op_a = GetOperandAbsNegFloat(GetRegister(instr.gpr8), instr.fset.abs_a != 0, + instr.fset.neg_a != 0); + + Node op_b = [&]() { + if (instr.is_b_imm) { + return GetImmediate19(instr); + } else if (instr.is_b_gpr) { + return GetRegister(instr.gpr20); + } else { + return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset); + } + }(); + + op_b = GetOperandAbsNegFloat(op_b, instr.fset.abs_b != 0, instr.fset.neg_b != 0); + + // The fset instruction sets a register to 1.0 or -1 (depending on the bf bit) if the + // condition is true, and to 0 otherwise. + const Node second_pred = GetPredicate(instr.fset.pred39, instr.fset.neg_pred != 0); + + const OperationCode combiner = GetPredicateCombiner(instr.fset.op); + const Node first_pred = GetPredicateComparisonFloat(instr.fset.cond, op_a, op_b); + + const Node predicate = Operation(combiner, first_pred, second_pred); + + const Node true_value = instr.fset.bf ? Immediate(1.0f) : Immediate(-1); + const Node false_value = instr.fset.bf ? Immediate(0.0f) : Immediate(0); + const Node value = + Operation(OperationCode::Select, PRECISE, predicate, true_value, false_value); + + SetRegister(bb, instr.gpr0, value); + + if (instr.generates_cc.Value() != 0) { + const Node is_zero = Operation(OperationCode::LogicalFEqual, predicate, Immediate(0.0f)); + SetInternalFlag(bb, InternalFlag::Zero, is_zero); + LOG_WARNING(HW_GPU, "FSET condition code is incomplete"); + } return pc; } -- cgit v1.2.3 From 501284a81a60a19713aa0509f3db994617f44659 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 16 Dec 2018 18:19:17 -0300 Subject: shader_decode: Implement BFE --- src/video_core/shader/decode/bfe.cpp | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) (limited to 'src/video_core/shader/decode') diff --git a/src/video_core/shader/decode/bfe.cpp b/src/video_core/shader/decode/bfe.cpp index ffd904c54..6532a3bce 100644 --- a/src/video_core/shader/decode/bfe.cpp +++ b/src/video_core/shader/decode/bfe.cpp @@ -16,7 +16,31 @@ u32 ShaderIR::DecodeBfe(BasicBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); - UNIMPLEMENTED(); + UNIMPLEMENTED_IF(instr.bfe.negate_b); + + Node op_a = GetRegister(instr.gpr8); + op_a = GetOperandAbsNegInteger(op_a, false, instr.bfe.negate_a, false); + + switch (opcode->get().GetId()) { + case OpCode::Id::BFE_IMM: { + UNIMPLEMENTED_IF_MSG(instr.generates_cc, + "Condition codes generation in BFE is not implemented"); + + const Node inner_shift_imm = Immediate(static_cast(instr.bfe.GetLeftShiftValue())); + const Node outer_shift_imm = + Immediate(static_cast(instr.bfe.GetLeftShiftValue() + instr.bfe.shift_position)); + + const Node inner_shift = + Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, op_a, inner_shift_imm); + const Node outer_shift = + Operation(OperationCode::ILogicalShiftRight, NO_PRECISE, inner_shift, outer_shift_imm); + + SetRegister(bb, instr.gpr0, outer_shift); + break; + } + default: + UNIMPLEMENTED_MSG("Unhandled BFE instruction: {}", opcode->get().GetName()); + } return pc; } -- cgit v1.2.3 From 39f1c6246a3c5140f4c2b9a2ba3cbcaecf9521dd Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 21 Dec 2018 02:02:15 -0300 Subject: shader_decode: Implement LOP32I --- .../shader/decode/arithmetic_integer_immediate.cpp | 68 +++++++++++++++++++++- 1 file changed, 67 insertions(+), 1 deletion(-) (limited to 'src/video_core/shader/decode') diff --git a/src/video_core/shader/decode/arithmetic_integer_immediate.cpp b/src/video_core/shader/decode/arithmetic_integer_immediate.cpp index 46f340235..ee5754161 100644 --- a/src/video_core/shader/decode/arithmetic_integer_immediate.cpp +++ b/src/video_core/shader/decode/arithmetic_integer_immediate.cpp @@ -10,15 +10,81 @@ namespace VideoCommon::Shader { using Tegra::Shader::Instruction; +using Tegra::Shader::LogicOperation; using Tegra::Shader::OpCode; +using Tegra::Shader::Pred; +using Tegra::Shader::PredicateResultMode; +using Tegra::Shader::Register; u32 ShaderIR::DecodeArithmeticIntegerImmediate(BasicBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); - UNIMPLEMENTED(); + Node op_a = GetRegister(instr.gpr8); + Node op_b = Immediate(static_cast(instr.alu.imm20_32)); + + switch (opcode->get().GetId()) { + case OpCode::Id::LOP32I: { + UNIMPLEMENTED_IF_MSG(instr.op_32.generates_cc, + "Condition codes generation in LOP32I is not implemented"); + + if (instr.alu.lop32i.invert_a) + op_a = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_a); + + if (instr.alu.lop32i.invert_b) + op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_b); + + WriteLogicOperation(bb, instr.gpr0, instr.alu.lop32i.operation, op_a, op_b, + Tegra::Shader::PredicateResultMode::None, + Tegra::Shader::Pred::UnusedIndex); + break; + } + default: + UNIMPLEMENTED_MSG("Unhandled ArithmeticIntegerImmediate instruction: {}", + opcode->get().GetName()); + } return pc; } +void ShaderIR::WriteLogicOperation(BasicBlock& bb, Register dest, LogicOperation logic_op, + Node op_a, Node op_b, PredicateResultMode predicate_mode, + Pred predicate) { + const Node result = [&]() { + switch (logic_op) { + case LogicOperation::And: + return Operation(OperationCode::IBitwiseAnd, PRECISE, op_a, op_b); + case LogicOperation::Or: + return Operation(OperationCode::IBitwiseOr, PRECISE, op_a, op_b); + case LogicOperation::Xor: + return Operation(OperationCode::IBitwiseXor, PRECISE, op_a, op_b); + case LogicOperation::PassB: + return op_b; + default: + UNIMPLEMENTED_MSG("Unimplemented logic operation={}", static_cast(logic_op)); + } + }(); + + if (dest != Register::ZeroIndex) { + SetRegister(bb, dest, result); + } + + using Tegra::Shader::PredicateResultMode; + // Write the predicate value depending on the predicate mode. + switch (predicate_mode) { + case PredicateResultMode::None: + // Do nothing. + return; + case PredicateResultMode::NotZero: { + // Set the predicate to true if the result is not zero. + const Node compare = Operation(OperationCode::LogicalIEqual, result, Immediate(0)); + SetPredicate(bb, static_cast(predicate), compare); + break; + } + default: + UNIMPLEMENTED_MSG("Unimplemented predicate result mode: {}", + static_cast(predicate_mode)); + } +} + } // namespace VideoCommon::Shader \ No newline at end of file -- cgit v1.2.3 From a2819c204f1a72a63ee5e8cc9a9830cd27fb6853 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 21 Dec 2018 02:05:52 -0300 Subject: shader_decode: Implement SHR --- src/video_core/shader/decode/shift.cpp | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) (limited to 'src/video_core/shader/decode') diff --git a/src/video_core/shader/decode/shift.cpp b/src/video_core/shader/decode/shift.cpp index 41f5b8cb0..76938fa05 100644 --- a/src/video_core/shader/decode/shift.cpp +++ b/src/video_core/shader/decode/shift.cpp @@ -16,7 +16,32 @@ u32 ShaderIR::DecodeShift(BasicBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); - UNIMPLEMENTED(); + const Node op_a = GetRegister(instr.gpr8); + const Node op_b = [&]() { + if (instr.is_b_imm) { + return Immediate(instr.alu.GetSignedImm20_20()); + } else if (instr.is_b_gpr) { + return GetRegister(instr.gpr20); + } else { + return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset); + } + }(); + + switch (opcode->get().GetId()) { + case OpCode::Id::SHR_C: + case OpCode::Id::SHR_R: + case OpCode::Id::SHR_IMM: { + UNIMPLEMENTED_IF_MSG(instr.generates_cc, + "Condition codes generation in SHR is not implemented"); + + const Node value = SignedOperation(OperationCode::IArithmeticShiftRight, + instr.shift.is_signed, PRECISE, op_a, op_b); + SetRegister(bb, instr.gpr0, value); + break; + } + default: + UNIMPLEMENTED_MSG("Unhandled shift instruction: {}", opcode->get().GetName()); + } return pc; } -- cgit v1.2.3 From d79c462af067f78eb6fd84b0f02c385464412017 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 21 Dec 2018 02:05:59 -0300 Subject: shader_decode: Implement SHL --- src/video_core/shader/decode/shift.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'src/video_core/shader/decode') diff --git a/src/video_core/shader/decode/shift.cpp b/src/video_core/shader/decode/shift.cpp index 76938fa05..3ba039d21 100644 --- a/src/video_core/shader/decode/shift.cpp +++ b/src/video_core/shader/decode/shift.cpp @@ -39,6 +39,14 @@ u32 ShaderIR::DecodeShift(BasicBlock& bb, u32 pc) { SetRegister(bb, instr.gpr0, value); break; } + case OpCode::Id::SHL_C: + case OpCode::Id::SHL_R: + case OpCode::Id::SHL_IMM: + UNIMPLEMENTED_IF_MSG(instr.generates_cc, + "Condition codes generation in SHL is not implemented"); + SetRegister(bb, instr.gpr0, + Operation(OperationCode::ILogicalShiftLeft, PRECISE, op_a, op_b)); + break; default: UNIMPLEMENTED_MSG("Unhandled shift instruction: {}", opcode->get().GetName()); } -- cgit v1.2.3 From acdbbb88854b4c1dc75353018fcf2e5480cea858 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 21 Dec 2018 02:08:52 -0300 Subject: shader_decode: Implement LD_C --- src/video_core/shader/decode/memory.cpp | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) (limited to 'src/video_core/shader/decode') diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp index d8265d3fd..6219f8ee6 100644 --- a/src/video_core/shader/decode/memory.cpp +++ b/src/video_core/shader/decode/memory.cpp @@ -73,6 +73,37 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, u32 pc) { } break; } + case OpCode::Id::LD_C: { + UNIMPLEMENTED_IF(instr.ld_c.unknown != 0); + + Node index = GetRegister(instr.gpr8); + + const Node op_a = + GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.offset + 0, index); + + switch (instr.ld_c.type.Value()) { + case Tegra::Shader::UniformType::Single: + SetRegister(bb, instr.gpr0, op_a); + break; + + case Tegra::Shader::UniformType::Double: { + const Node op_b = + GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.offset + 4, index); + + const Node composite = + Operation(OperationCode::Composite, op_a, op_b, GetRegister(RZ), GetRegister(RZ)); + + MetaComponents meta{{0, 1, 2, 3}}; + bb.push_back(Operation(OperationCode::AssignComposite, meta, composite, + GetRegister(instr.gpr0), GetRegister(instr.gpr0.Value() + 1), + GetRegister(RZ), GetRegister(RZ))); + break; + } + default: + UNIMPLEMENTED_MSG("Unhandled type: {}", static_cast(instr.ld_c.type.Value())); + } + break; + } case OpCode::Id::ST_A: { UNIMPLEMENTED_IF_MSG(instr.gpr8.Value() != Register::ZeroIndex, "Indirect attribute loads are not supported"); -- cgit v1.2.3 From 078ba28e13b4ecd7fe51e361a577a178faa74b3f Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 21 Dec 2018 02:13:00 -0300 Subject: shader_decode: Implement ISET --- src/video_core/shader/decode/integer_set.cpp | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) (limited to 'src/video_core/shader/decode') diff --git a/src/video_core/shader/decode/integer_set.cpp b/src/video_core/shader/decode/integer_set.cpp index 316a7d8ad..eba1c5123 100644 --- a/src/video_core/shader/decode/integer_set.cpp +++ b/src/video_core/shader/decode/integer_set.cpp @@ -16,7 +16,33 @@ u32 ShaderIR::DecodeIntegerSet(BasicBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); - UNIMPLEMENTED(); + const Node op_a = GetRegister(instr.gpr8); + const Node op_b = [&]() { + if (instr.is_b_imm) { + return Immediate(instr.alu.GetSignedImm20_20()); + } else if (instr.is_b_gpr) { + return GetRegister(instr.gpr20); + } else { + return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset); + } + }(); + + // The iset instruction sets a register to 1.0 or -1 (depending on the bf bit) if the condition + // is true, and to 0 otherwise. + const Node second_pred = GetPredicate(instr.iset.pred39, instr.iset.neg_pred != 0); + const Node first_pred = + GetPredicateComparisonInteger(instr.iset.cond, instr.iset.is_signed, op_a, op_b); + + const OperationCode combiner = GetPredicateCombiner(instr.iset.op); + + const Node predicate = Operation(combiner, first_pred, second_pred); + + const Node true_value = instr.iset.bf ? Immediate(1.0f) : Immediate(-1); + const Node false_value = instr.iset.bf ? Immediate(0.0f) : Immediate(0); + const Node value = + Operation(OperationCode::Select, PRECISE, predicate, true_value, false_value); + + SetRegister(bb, instr.gpr0, value); return pc; } -- cgit v1.2.3 From 80183de8846ccf62631d48451535f9a6a4cb8284 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 17 Dec 2018 17:09:23 -0300 Subject: shader_decode: Implement BFI --- src/video_core/shader/decode/bfi.cpp | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) (limited to 'src/video_core/shader/decode') diff --git a/src/video_core/shader/decode/bfi.cpp b/src/video_core/shader/decode/bfi.cpp index b94d46ce6..6a851b22e 100644 --- a/src/video_core/shader/decode/bfi.cpp +++ b/src/video_core/shader/decode/bfi.cpp @@ -16,7 +16,28 @@ u32 ShaderIR::DecodeBfi(BasicBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); - UNIMPLEMENTED(); + UNIMPLEMENTED_IF(instr.generates_cc); + + const auto [base, packed_shift] = [&]() -> std::tuple { + switch (opcode->get().GetId()) { + case OpCode::Id::BFI_IMM_R: + return {GetRegister(instr.gpr39), Immediate(instr.alu.GetSignedImm20_20())}; + default: + UNREACHABLE(); + } + }(); + const Node insert = GetRegister(instr.gpr8); + + const Node offset = + Operation(OperationCode::UBitwiseAnd, NO_PRECISE, packed_shift, Immediate(0xff)); + + Node bits = + Operation(OperationCode::ULogicalShiftRight, NO_PRECISE, packed_shift, Immediate(8)); + bits = Operation(OperationCode::UBitwiseAnd, NO_PRECISE, bits, Immediate(0xff)); + + const Node value = + Operation(OperationCode::UBitfieldInsert, PRECISE, base, insert, offset, bits); + SetRegister(bb, instr.gpr0, value); return pc; } -- cgit v1.2.3 From faadae5814683d8d9252bdb6cafbdb07ea8619e4 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 17 Dec 2018 17:30:33 -0300 Subject: shader_decode: Implement ISETP --- .../shader/decode/integer_set_predicate.cpp | 31 +++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) (limited to 'src/video_core/shader/decode') diff --git a/src/video_core/shader/decode/integer_set_predicate.cpp b/src/video_core/shader/decode/integer_set_predicate.cpp index 10975c394..d76b8018c 100644 --- a/src/video_core/shader/decode/integer_set_predicate.cpp +++ b/src/video_core/shader/decode/integer_set_predicate.cpp @@ -11,12 +11,41 @@ namespace VideoCommon::Shader { using Tegra::Shader::Instruction; using Tegra::Shader::OpCode; +using Tegra::Shader::Pred; u32 ShaderIR::DecodeIntegerSetPredicate(BasicBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); - UNIMPLEMENTED(); + const Node op_a = GetRegister(instr.gpr8); + + const Node op_b = [&]() { + if (instr.is_b_imm) { + return Immediate(instr.alu.GetSignedImm20_20()); + } else if (instr.is_b_gpr) { + return GetRegister(instr.gpr20); + } else { + return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset); + } + }(); + + // We can't use the constant predicate as destination. + ASSERT(instr.isetp.pred3 != static_cast(Pred::UnusedIndex)); + + const Node second_pred = GetPredicate(instr.isetp.pred39, instr.isetp.neg_pred != 0); + const Node predicate = + GetPredicateComparisonInteger(instr.isetp.cond, instr.isetp.is_signed, op_a, op_b); + + // Set the primary predicate to the result of Predicate OP SecondPredicate + const OperationCode combiner = GetPredicateCombiner(instr.isetp.op); + const Node value = Operation(combiner, predicate, second_pred); + SetPredicate(bb, instr.isetp.pred3, value); + + if (instr.isetp.pred0 != static_cast(Pred::UnusedIndex)) { + // Set the secondary predicate to the result of !Predicate OP SecondPredicate, if enabled + const Node negated_pred = Operation(OperationCode::LogicalNegate, predicate); + SetPredicate(bb, instr.isetp.pred0, Operation(combiner, negated_pred, second_pred)); + } return pc; } -- cgit v1.2.3 From ccb71bece9e6e6c9ceabc0826624f645c5140c53 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 21 Dec 2018 02:22:18 -0300 Subject: shader_decode: Implement IADD --- .../shader/decode/arithmetic_integer.cpp | 29 +++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) (limited to 'src/video_core/shader/decode') diff --git a/src/video_core/shader/decode/arithmetic_integer.cpp b/src/video_core/shader/decode/arithmetic_integer.cpp index 12c64e97a..47b27ac5b 100644 --- a/src/video_core/shader/decode/arithmetic_integer.cpp +++ b/src/video_core/shader/decode/arithmetic_integer.cpp @@ -16,7 +16,34 @@ u32 ShaderIR::DecodeArithmeticInteger(BasicBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); - UNIMPLEMENTED(); + Node op_a = GetRegister(instr.gpr8); + Node op_b = [&]() { + if (instr.is_b_imm) { + return Immediate(instr.alu.GetSignedImm20_20()); + } else if (instr.is_b_gpr) { + return GetRegister(instr.gpr20); + } else { + return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset); + } + }(); + + switch (opcode->get().GetId()) { + case OpCode::Id::IADD_C: + case OpCode::Id::IADD_R: + case OpCode::Id::IADD_IMM: { + UNIMPLEMENTED_IF_MSG(instr.generates_cc, + "Condition codes generation in IADD is not implemented"); + UNIMPLEMENTED_IF_MSG(instr.alu.saturate_d, "IADD saturation not implemented"); + + op_a = GetOperandAbsNegInteger(op_a, false, instr.alu_integer.negate_a, true); + op_b = GetOperandAbsNegInteger(op_b, false, instr.alu_integer.negate_b, true); + + SetRegister(bb, instr.gpr0, Operation(OperationCode::IAdd, PRECISE, op_a, op_b)); + break; + } + default: + UNIMPLEMENTED_MSG("Unhandled ArithmeticInteger instruction: {}", opcode->get().GetName()); + } return pc; } -- cgit v1.2.3 From 8486e7f8c8367a7cc225da9fbac262a116744108 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 21 Dec 2018 02:22:44 -0300 Subject: shader_decode: Implement SEL --- src/video_core/shader/decode/arithmetic_integer.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'src/video_core/shader/decode') diff --git a/src/video_core/shader/decode/arithmetic_integer.cpp b/src/video_core/shader/decode/arithmetic_integer.cpp index 47b27ac5b..429b86813 100644 --- a/src/video_core/shader/decode/arithmetic_integer.cpp +++ b/src/video_core/shader/decode/arithmetic_integer.cpp @@ -41,6 +41,14 @@ u32 ShaderIR::DecodeArithmeticInteger(BasicBlock& bb, u32 pc) { SetRegister(bb, instr.gpr0, Operation(OperationCode::IAdd, PRECISE, op_a, op_b)); break; } + case OpCode::Id::SEL_C: + case OpCode::Id::SEL_R: + case OpCode::Id::SEL_IMM: { + const Node condition = GetPredicate(instr.sel.pred, instr.sel.neg_pred != 0); + const Node value = Operation(OperationCode::Select, PRECISE, condition, op_a, op_b); + SetRegister(bb, instr.gpr0, value); + break; + } default: UNIMPLEMENTED_MSG("Unhandled ArithmeticInteger instruction: {}", opcode->get().GetName()); } -- cgit v1.2.3 From 8f37531f8ef94e9a43d33232f4c2da980ce7bf80 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 21 Dec 2018 02:22:57 -0300 Subject: shader_decode: Implement LOP --- src/video_core/shader/decode/arithmetic_integer.cpp | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'src/video_core/shader/decode') diff --git a/src/video_core/shader/decode/arithmetic_integer.cpp b/src/video_core/shader/decode/arithmetic_integer.cpp index 429b86813..d01336e0e 100644 --- a/src/video_core/shader/decode/arithmetic_integer.cpp +++ b/src/video_core/shader/decode/arithmetic_integer.cpp @@ -49,6 +49,21 @@ u32 ShaderIR::DecodeArithmeticInteger(BasicBlock& bb, u32 pc) { SetRegister(bb, instr.gpr0, value); break; } + case OpCode::Id::LOP_C: + case OpCode::Id::LOP_R: + case OpCode::Id::LOP_IMM: { + UNIMPLEMENTED_IF_MSG(instr.generates_cc, + "Condition codes generation in LOP is not implemented"); + + if (instr.alu.lop.invert_a) + op_a = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_a); + if (instr.alu.lop.invert_b) + op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_b); + + WriteLogicOperation(bb, instr.gpr0, instr.alu.lop.operation, op_a, op_b, + instr.alu.lop.pred_result_mode, instr.alu.lop.pred48); + break; + } default: UNIMPLEMENTED_MSG("Unhandled ArithmeticInteger instruction: {}", opcode->get().GetName()); } -- cgit v1.2.3 From becfdb863845d9ea81c1844c8ee3c681d03fd9ea Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 17 Dec 2018 17:44:20 -0300 Subject: shader_decode: Implement PBK and BRK --- src/video_core/shader/decode/other.cpp | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) (limited to 'src/video_core/shader/decode') diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp index ef0598d4f..0416d7eaa 100644 --- a/src/video_core/shader/decode/other.cpp +++ b/src/video_core/shader/decode/other.cpp @@ -64,7 +64,19 @@ u32 ShaderIR::DecodeOther(BasicBlock& bb, u32 pc) { // The SSY opcode tells the GPU where to re-converge divergent execution paths, it sets the // target of the jump that the SYNC instruction will make. The SSY opcode has a similar // structure to the BRA opcode. - bb.push_back(Operation(OperationCode::Ssy, Immediate(pc + instr.bra.GetBranchTarget()))); + const u32 target = pc + instr.bra.GetBranchTarget(); + bb.push_back(Operation(OperationCode::Ssy, Immediate(target))); + break; + } + case OpCode::Id::PBK: { + UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0, + "Constant buffer PBK is not supported"); + + // PBK pushes to a stack the address where BRK will jump to. This shares stack with SSY but + // using SYNC on a PBK address will kill the shader execution. We don't emulate this because + // it's very unlikely a driver will emit such invalid shader. + const u32 target = pc + instr.bra.GetBranchTarget(); + bb.push_back(Operation(OperationCode::Pbk, Immediate(target))); break; } case OpCode::Id::SYNC: { @@ -76,6 +88,15 @@ u32 ShaderIR::DecodeOther(BasicBlock& bb, u32 pc) { bb.push_back(Operation(OperationCode::Sync)); break; } + case OpCode::Id::BRK: { + const Tegra::Shader::ConditionCode cc = instr.flow_condition_code; + UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "BRK condition code used: {}", + static_cast(cc)); + + // The BRK opcode jumps to the address previously set by the PBK opcode + bb.push_back(Operation(OperationCode::Brk)); + break; + } case OpCode::Id::IPA: { const auto& attribute = instr.attribute.fmt28; const Tegra::Shader::IpaMode input_mode{instr.ipa.interp_mode.Value(), -- cgit v1.2.3 From b0e79208385ca3183fd1abdd4c6628268840e9ef Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 17 Dec 2018 18:09:40 -0300 Subject: shader_decode: Implement XMAD --- src/video_core/shader/decode/xmad.cpp | 86 ++++++++++++++++++++++++++++++++++- 1 file changed, 85 insertions(+), 1 deletion(-) (limited to 'src/video_core/shader/decode') diff --git a/src/video_core/shader/decode/xmad.cpp b/src/video_core/shader/decode/xmad.cpp index 27a2fc05d..fcab1fb80 100644 --- a/src/video_core/shader/decode/xmad.cpp +++ b/src/video_core/shader/decode/xmad.cpp @@ -16,7 +16,91 @@ u32 ShaderIR::DecodeXmad(BasicBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); - UNIMPLEMENTED(); + UNIMPLEMENTED_IF(instr.xmad.sign_a); + UNIMPLEMENTED_IF(instr.xmad.sign_b); + UNIMPLEMENTED_IF_MSG(instr.generates_cc, + "Condition codes generation in XMAD is not implemented"); + + Node op_a = GetRegister(instr.gpr8); // instr.xmad.sign_a + + // TODO(bunnei): Needs to be fixed once op_a or op_b is signed + UNIMPLEMENTED_IF(instr.xmad.sign_a != instr.xmad.sign_b); + const bool is_signed_a = instr.xmad.sign_a == 1; + const bool is_signed_b = instr.xmad.sign_b == 1; + const bool is_signed_c = is_signed_a; + + auto [is_merge, op_b, op_c] = [&]() -> std::tuple { + switch (opcode->get().GetId()) { + case OpCode::Id::XMAD_CR: + return {instr.xmad.merge_56, GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset), + GetRegister(instr.gpr39)}; + case OpCode::Id::XMAD_RR: + return {instr.xmad.merge_37, GetRegister(instr.gpr20), GetRegister(instr.gpr39)}; + case OpCode::Id::XMAD_RC: + return {false, GetRegister(instr.gpr39), + GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset)}; + case OpCode::Id::XMAD_IMM: + return {instr.xmad.merge_37, Immediate(static_cast(instr.xmad.imm20_16)), + GetRegister(instr.gpr39)}; + default: + UNIMPLEMENTED_MSG("Unhandled XMAD instruction: {}", opcode->get().GetName()); + } + }(); + + if (instr.xmad.high_a) { + op_a = SignedOperation(OperationCode::ILogicalShiftRight, is_signed_a, NO_PRECISE, op_a, + Immediate(16)); + } else { + op_a = SignedOperation(OperationCode::IBitwiseAnd, is_signed_a, NO_PRECISE, op_a, + Immediate(0xffff)); + } + + const Node original_b = op_b; + if (instr.xmad.high_b) { + op_b = SignedOperation(OperationCode::ILogicalShiftRight, is_signed_b, NO_PRECISE, op_a, + Immediate(16)); + } else { + op_b = SignedOperation(OperationCode::IBitwiseAnd, is_signed_b, NO_PRECISE, op_b, + Immediate(0xffff)); + } + + // TODO(Rodrigo): Use an appropiate sign for this operation + Node product = Operation(OperationCode::IMul, NO_PRECISE, op_a, op_b); + if (instr.xmad.product_shift_left) { + product = Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, op_a, Immediate(16)); + } + + op_c = [&]() { + switch (instr.xmad.mode) { + case Tegra::Shader::XmadMode::None: + return op_c; + case Tegra::Shader::XmadMode::CLo: + return SignedOperation(OperationCode::IBitwiseAnd, is_signed_c, NO_PRECISE, op_c, + Immediate(0xffff)); + case Tegra::Shader::XmadMode::CHi: + return SignedOperation(OperationCode::ILogicalShiftRight, is_signed_c, NO_PRECISE, op_c, + Immediate(16)); + case Tegra::Shader::XmadMode::CBcc: { + const Node shifted_b = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed_b, + NO_PRECISE, original_b, Immediate(16)); + return SignedOperation(OperationCode::IAdd, is_signed_c, NO_PRECISE, op_c, shifted_b); + } + default: { + UNIMPLEMENTED_MSG("Unhandled XMAD mode: {}", static_cast(instr.xmad.mode.Value())); + } + } + }(); + + // TODO(Rodrigo): Use an appropiate sign for this operation + Node sum = Operation(OperationCode::IAdd, product, op_c); + if (is_merge) { + const Node a = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, sum, Immediate(0xffff)); + const Node b = + Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, original_b, Immediate(0xffff)); + sum = Operation(OperationCode::IBitwiseOr, NO_PRECISE, a, b); + } + + SetRegister(bb, instr.gpr0, sum); return pc; } -- cgit v1.2.3 From 210620ff314c774cd0da5a6b50501dec45914751 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 17 Dec 2018 18:49:48 -0300 Subject: shader_decode: Implement ISCADD --- src/video_core/shader/decode/arithmetic_integer.cpp | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'src/video_core/shader/decode') diff --git a/src/video_core/shader/decode/arithmetic_integer.cpp b/src/video_core/shader/decode/arithmetic_integer.cpp index d01336e0e..d494af736 100644 --- a/src/video_core/shader/decode/arithmetic_integer.cpp +++ b/src/video_core/shader/decode/arithmetic_integer.cpp @@ -41,6 +41,21 @@ u32 ShaderIR::DecodeArithmeticInteger(BasicBlock& bb, u32 pc) { SetRegister(bb, instr.gpr0, Operation(OperationCode::IAdd, PRECISE, op_a, op_b)); break; } + case OpCode::Id::ISCADD_C: + case OpCode::Id::ISCADD_R: + case OpCode::Id::ISCADD_IMM: { + UNIMPLEMENTED_IF_MSG(instr.generates_cc, + "Condition codes generation in ISCADD is not implemented"); + + op_a = GetOperandAbsNegInteger(op_a, false, instr.alu_integer.negate_a, true); + op_b = GetOperandAbsNegInteger(op_b, false, instr.alu_integer.negate_b, true); + + const Node shift = Immediate(static_cast(instr.alu_integer.shift_amount)); + const Node shifted_a = Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, op_a, shift); + const Node value = Operation(OperationCode::IAdd, NO_PRECISE, shifted_a, op_b); + SetRegister(bb, instr.gpr0, value); + break; + } case OpCode::Id::SEL_C: case OpCode::Id::SEL_R: case OpCode::Id::SEL_IMM: { -- cgit v1.2.3 From 6ca31f544a4559eca547b45b5158d210932cc428 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 17 Dec 2018 19:03:53 -0300 Subject: shader_decode: Implement BRA internal flag --- src/video_core/shader/decode/other.cpp | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'src/video_core/shader/decode') diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp index 0416d7eaa..5b3f9aa30 100644 --- a/src/video_core/shader/decode/other.cpp +++ b/src/video_core/shader/decode/other.cpp @@ -50,11 +50,15 @@ u32 ShaderIR::DecodeOther(BasicBlock& bb, u32 pc) { UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0, "BRA with constant buffers are not implemented"); - const Tegra::Shader::ConditionCode cc = instr.flow_condition_code; - UNIMPLEMENTED_IF(cc != Tegra::Shader::ConditionCode::T); - const u32 target = pc + instr.bra.GetBranchTarget(); - bb.push_back(Operation(OperationCode::Bra, Immediate(target))); + const Node branch = Operation(OperationCode::Bra, Immediate(target)); + + const Tegra::Shader::ConditionCode cc = instr.flow_condition_code; + if (cc != Tegra::Shader::ConditionCode::T) { + bb.push_back(Conditional(GetConditionCode(cc), {branch})); + } else { + bb.push_back(branch); + } break; } case OpCode::Id::SSY: { -- cgit v1.2.3 From e8235c0215d51236c5b968de971435b7cf74dc81 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 17 Dec 2018 21:14:25 -0300 Subject: shader_decode: Implement I2I --- src/video_core/shader/decode/conversion.cpp | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'src/video_core/shader/decode') diff --git a/src/video_core/shader/decode/conversion.cpp b/src/video_core/shader/decode/conversion.cpp index 82fe5e21a..b823b6119 100644 --- a/src/video_core/shader/decode/conversion.cpp +++ b/src/video_core/shader/decode/conversion.cpp @@ -18,6 +18,32 @@ u32 ShaderIR::DecodeConversion(BasicBlock& bb, u32 pc) { const auto opcode = OpCode::Decode(instr); switch (opcode->get().GetId()) { + case OpCode::Id::I2I_R: { + UNIMPLEMENTED_IF(instr.conversion.selector); + + const bool input_signed = instr.conversion.is_input_signed; + const bool output_signed = instr.conversion.is_output_signed; + + Node value = GetRegister(instr.gpr20); + value = ConvertIntegerSize(value, instr.conversion.src_size, input_signed); + + value = GetOperandAbsNegInteger(value, instr.conversion.abs_a, instr.conversion.negate_a, + input_signed); + if (input_signed != output_signed) { + value = SignedOperation(OperationCode::ICastUnsigned, output_signed, NO_PRECISE, value); + } + + SetRegister(bb, instr.gpr0, value); + + if (instr.generates_cc) { + const Node zero_condition = + SignedOperation(OperationCode::LogicalIEqual, output_signed, value, Immediate(0)); + SetInternalFlag(bb, InternalFlag::Zero, zero_condition); + LOG_WARNING(HW_GPU, "I2I Condition codes implementation is incomplete."); + } + + break; + } case OpCode::Id::I2F_R: case OpCode::Id::I2F_C: { UNIMPLEMENTED_IF(instr.conversion.dest_size != Register::Size::Word); -- cgit v1.2.3 From 07944a23455379dcc590735f67d764304c457bd7 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 17 Dec 2018 21:42:59 -0300 Subject: shader_decode: Implement F2F_C --- src/video_core/shader/decode/conversion.cpp | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'src/video_core/shader/decode') diff --git a/src/video_core/shader/decode/conversion.cpp b/src/video_core/shader/decode/conversion.cpp index b823b6119..ef46ab7a5 100644 --- a/src/video_core/shader/decode/conversion.cpp +++ b/src/video_core/shader/decode/conversion.cpp @@ -67,13 +67,21 @@ u32 ShaderIR::DecodeConversion(BasicBlock& bb, u32 pc) { SetRegister(bb, instr.gpr0, value); break; } - case OpCode::Id::F2F_R: { + case OpCode::Id::F2F_R: + case OpCode::Id::F2F_C: { UNIMPLEMENTED_IF(instr.conversion.dest_size != Register::Size::Word); UNIMPLEMENTED_IF(instr.conversion.src_size != Register::Size::Word); UNIMPLEMENTED_IF_MSG(instr.generates_cc, "Condition codes generation in F2F is not implemented"); - Node value = GetRegister(instr.gpr20); + Node value = [&]() { + if (instr.is_b_gpr) { + return GetRegister(instr.gpr20); + } else { + return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset); + } + }(); + value = GetOperandAbsNegFloat(value, instr.conversion.abs_a, instr.conversion.negate_a); value = [&]() { -- cgit v1.2.3 From 518a2bd2060a5c1e6b9acb987439e0009d74fb43 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 17 Dec 2018 22:01:23 -0300 Subject: shader_decode: Implement IMNMX --- src/video_core/shader/decode/arithmetic_integer.cpp | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'src/video_core/shader/decode') diff --git a/src/video_core/shader/decode/arithmetic_integer.cpp b/src/video_core/shader/decode/arithmetic_integer.cpp index d494af736..dbdcebbb4 100644 --- a/src/video_core/shader/decode/arithmetic_integer.cpp +++ b/src/video_core/shader/decode/arithmetic_integer.cpp @@ -79,6 +79,22 @@ u32 ShaderIR::DecodeArithmeticInteger(BasicBlock& bb, u32 pc) { instr.alu.lop.pred_result_mode, instr.alu.lop.pred48); break; } + case OpCode::Id::IMNMX_C: + case OpCode::Id::IMNMX_R: + case OpCode::Id::IMNMX_IMM: { + UNIMPLEMENTED_IF(instr.imnmx.exchange != Tegra::Shader::IMinMaxExchange::None); + UNIMPLEMENTED_IF_MSG(instr.generates_cc, + "Condition codes generation in IMNMX is not implemented"); + + const bool is_signed = instr.imnmx.is_signed; + + const Node condition = GetPredicate(instr.imnmx.pred, instr.imnmx.negate_pred != 0); + const Node min = SignedOperation(OperationCode::IMin, is_signed, NO_PRECISE, op_a, op_b); + const Node max = SignedOperation(OperationCode::IMax, is_signed, NO_PRECISE, op_a, op_b); + const Node value = Operation(OperationCode::Select, NO_PRECISE, condition, min, max); + SetRegister(bb, instr.gpr0, value); + break; + } default: UNIMPLEMENTED_MSG("Unhandled ArithmeticInteger instruction: {}", opcode->get().GetName()); } -- cgit v1.2.3 From 376a8375118937c577063c5d92ad33cfdc33439b Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 17 Dec 2018 22:18:46 -0300 Subject: shader_decode: Implement MOV_SYS --- src/video_core/shader/decode/other.cpp | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) (limited to 'src/video_core/shader/decode') diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp index 5b3f9aa30..9200b5da9 100644 --- a/src/video_core/shader/decode/other.cpp +++ b/src/video_core/shader/decode/other.cpp @@ -46,6 +46,33 @@ u32 ShaderIR::DecodeOther(BasicBlock& bb, u32 pc) { } break; } + case OpCode::Id::KIL: { + UNIMPLEMENTED_IF(instr.flow.cond != Tegra::Shader::FlowCondition::Always); + + const Tegra::Shader::ConditionCode cc = instr.flow_condition_code; + UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "KIL condition code used: {}", + static_cast(cc)); + + bb.push_back(Operation(OperationCode::Kil)); + break; + } + case OpCode::Id::MOV_SYS: { + switch (instr.sys20) { + case Tegra::Shader::SystemVariable::InvocationInfo: { + LOG_WARNING(HW_GPU, "MOV_SYS instruction with InvocationInfo is incomplete"); + SetRegister(bb, instr.gpr0, Immediate(0u)); + break; + } + case Tegra::Shader::SystemVariable::Ydirection: { + // Config pack's third value is Y_NEGATE's state. + SetRegister(bb, instr.gpr0, Operation(OperationCode::YNegate)); + break; + } + default: + UNIMPLEMENTED_MSG("Unhandled system move: {}", static_cast(instr.sys20.Value())); + } + break; + } case OpCode::Id::BRA: { UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0, "BRA with constant buffers are not implemented"); -- cgit v1.2.3 From cf4a08d95098370868fb631a0436f2a4968df9af Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 18 Dec 2018 03:16:09 -0300 Subject: shader_decode: Implement HADD2_IMM and HMUL2_IMM --- .../shader/decode/arithmetic_half_immediate.cpp | 29 +++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) (limited to 'src/video_core/shader/decode') diff --git a/src/video_core/shader/decode/arithmetic_half_immediate.cpp b/src/video_core/shader/decode/arithmetic_half_immediate.cpp index 8d8a2dad9..5c280a1a6 100644 --- a/src/video_core/shader/decode/arithmetic_half_immediate.cpp +++ b/src/video_core/shader/decode/arithmetic_half_immediate.cpp @@ -16,7 +16,34 @@ u32 ShaderIR::DecodeArithmeticHalfImmediate(BasicBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); - UNIMPLEMENTED(); + if (opcode->get().GetId() == OpCode::Id::HADD2_IMM) { + UNIMPLEMENTED_IF(instr.alu_half_imm.ftz != 0); + } else { + UNIMPLEMENTED_IF(instr.alu_half_imm.precision != Tegra::Shader::HalfPrecision::None); + } + UNIMPLEMENTED_IF_MSG(instr.alu_half_imm.saturate != 0, + "Half float immediate saturation not implemented"); + + Node op_a = GetRegister(instr.gpr8); + op_a = GetOperandAbsNegHalf(op_a, instr.alu_half_imm.abs_a, instr.alu_half_imm.negate_a); + + const Node op_b = UnpackHalfImmediate(instr, true); + + Node value = [&]() { + MetaHalfArithmetic meta{true, {instr.alu_half_imm.type_a}}; + switch (opcode->get().GetId()) { + case OpCode::Id::HADD2_IMM: + return Operation(OperationCode::HAdd, meta, op_a, op_b); + case OpCode::Id::HMUL2_IMM: + return Operation(OperationCode::HMul, meta, op_a, op_b); + default: + UNREACHABLE(); + return Immediate(0); + } + }(); + value = HalfMerge(GetRegister(instr.gpr0), value, instr.alu_half_imm.merge); + + SetRegister(bb, instr.gpr0, value); return pc; } -- cgit v1.2.3 From 68c99d2597717e5717e725efcfdb2bd53146d08c Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 21 Dec 2018 02:31:46 -0300 Subject: shader_decode: Implement HADD2 and HMUL2 --- src/video_core/shader/decode/arithmetic_half.cpp | 49 +++++++++++++++++++++++- 1 file changed, 48 insertions(+), 1 deletion(-) (limited to 'src/video_core/shader/decode') diff --git a/src/video_core/shader/decode/arithmetic_half.cpp b/src/video_core/shader/decode/arithmetic_half.cpp index 3b189b0d1..a6c6f3174 100644 --- a/src/video_core/shader/decode/arithmetic_half.cpp +++ b/src/video_core/shader/decode/arithmetic_half.cpp @@ -16,7 +16,54 @@ u32 ShaderIR::DecodeArithmeticHalf(BasicBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); - UNIMPLEMENTED(); + if (opcode->get().GetId() == OpCode::Id::HADD2_C || + opcode->get().GetId() == OpCode::Id::HADD2_R) { + UNIMPLEMENTED_IF(instr.alu_half.ftz != 0); + } + UNIMPLEMENTED_IF_MSG(instr.alu_half.saturate != 0, + "Half float saturation not implemented"); + + const bool negate_a = + opcode->get().GetId() != OpCode::Id::HMUL2_R && instr.alu_half.negate_a != 0; + const bool negate_b = + opcode->get().GetId() != OpCode::Id::HMUL2_C && instr.alu_half.negate_b != 0; + + const Node op_a = GetOperandAbsNegHalf(GetRegister(instr.gpr8), instr.alu_half.abs_a, negate_a); + + // instr.alu_half.type_a + + Node op_b = [&]() { + switch (opcode->get().GetId()) { + case OpCode::Id::HADD2_C: + case OpCode::Id::HMUL2_C: + return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset); + case OpCode::Id::HADD2_R: + case OpCode::Id::HMUL2_R: + return GetRegister(instr.gpr20); + default: + UNREACHABLE(); + return Immediate(0); + } + }(); + op_b = GetOperandAbsNegHalf(op_b, instr.alu_half.abs_b, negate_b); + + Node value = [&]() { + MetaHalfArithmetic meta{true, {instr.alu_half_imm.type_a, instr.alu_half.type_b}}; + switch (opcode->get().GetId()) { + case OpCode::Id::HADD2_C: + case OpCode::Id::HADD2_R: + return Operation(OperationCode::HAdd, meta, op_a, op_b); + case OpCode::Id::HMUL2_C: + case OpCode::Id::HMUL2_R: + return Operation(OperationCode::HMul, meta, op_a, op_b); + default: + UNIMPLEMENTED_MSG("Unhandled half float instruction: {}", opcode->get().GetName()); + return Immediate(0); + } + }(); + value = HalfMerge(GetRegister(instr.gpr0), value, instr.alu_half.merge); + + SetRegister(bb, instr.gpr0, value); return pc; } -- cgit v1.2.3 From 21f9e9da092ddd96fe9f149660a5bf4f676c8413 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 18 Dec 2018 19:54:12 -0300 Subject: shader_decode: Implement HSETP2 --- .../shader/decode/half_set_predicate.cpp | 38 +++++++++++++++++++++- 1 file changed, 37 insertions(+), 1 deletion(-) (limited to 'src/video_core/shader/decode') diff --git a/src/video_core/shader/decode/half_set_predicate.cpp b/src/video_core/shader/decode/half_set_predicate.cpp index 5fe123ea5..d7d63d50a 100644 --- a/src/video_core/shader/decode/half_set_predicate.cpp +++ b/src/video_core/shader/decode/half_set_predicate.cpp @@ -11,12 +11,48 @@ namespace VideoCommon::Shader { using Tegra::Shader::Instruction; using Tegra::Shader::OpCode; +using Tegra::Shader::Pred; u32 ShaderIR::DecodeHalfSetPredicate(BasicBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); - UNIMPLEMENTED(); + UNIMPLEMENTED_IF(instr.hsetp2.ftz != 0); + + Node op_a = GetRegister(instr.gpr8); + op_a = GetOperandAbsNegHalf(op_a, instr.hsetp2.abs_a, instr.hsetp2.negate_a); + + const Node op_b = [&]() { + switch (opcode->get().GetId()) { + case OpCode::Id::HSETP2_R: + return GetOperandAbsNegHalf(GetRegister(instr.gpr20), instr.hsetp2.abs_a, + instr.hsetp2.negate_b); + default: + UNREACHABLE(); + return Immediate(0); + } + }(); + + // We can't use the constant predicate as destination. + ASSERT(instr.hsetp2.pred3 != static_cast(Pred::UnusedIndex)); + + const Node second_pred = GetPredicate(instr.hsetp2.pred39, instr.hsetp2.neg_pred != 0); + + const OperationCode combiner = GetPredicateCombiner(instr.hsetp2.op); + + MetaHalfArithmetic meta = { + false, {instr.hsetp2.type_a, instr.hsetp2.type_b}, instr.hsetp2.h_and != 0}; + const Node first_pred = GetPredicateComparisonHalf(instr.hsetp2.cond, meta, op_a, op_b); + + // Set the primary predicate to the result of Predicate OP SecondPredicate + const Node value = Operation(combiner, first_pred, second_pred); + SetPredicate(bb, instr.hsetp2.pred3, value); + + if (instr.hsetp2.pred0 != static_cast(Pred::UnusedIndex)) { + // Set the secondary predicate to the result of !Predicate OP SecondPredicate, if enabled + const Node negated_pred = Operation(OperationCode::LogicalNegate, first_pred); + SetPredicate(bb, instr.hsetp2.pred0, Operation(combiner, negated_pred, second_pred)); + } return pc; } -- cgit v1.2.3 From 8d42feb09b25825dad786cf311c9e7721c0f6c7c Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 21 Dec 2018 02:33:15 -0300 Subject: shader_decode: Implement LD_L --- src/video_core/shader/decode/memory.cpp | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'src/video_core/shader/decode') diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp index 6219f8ee6..49b9a9eab 100644 --- a/src/video_core/shader/decode/memory.cpp +++ b/src/video_core/shader/decode/memory.cpp @@ -104,6 +104,24 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, u32 pc) { } break; } + case OpCode::Id::LD_L: { + UNIMPLEMENTED_IF_MSG(instr.ld_l.unknown == 1, "LD_L Unhandled mode: {}", + static_cast(instr.ld_l.unknown.Value())); + + const Node index = Operation(OperationCode::IAdd, GetRegister(instr.gpr8), + Immediate(static_cast(instr.smem_imm))); + const Node lmem = GetLocalMemory(index); + + switch (instr.ldst_sl.type.Value()) { + case Tegra::Shader::StoreType::Bytes32: + SetRegister(bb, instr.gpr0, lmem); + break; + default: + UNIMPLEMENTED_MSG("LD_L Unhandled type: {}", + static_cast(instr.ldst_sl.type.Value())); + } + break; + } case OpCode::Id::ST_A: { UNIMPLEMENTED_IF_MSG(instr.gpr8.Value() != Register::ZeroIndex, "Indirect attribute loads are not supported"); -- cgit v1.2.3 From b184ca9089a49646d074ef898c151089207ccd76 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 21 Dec 2018 02:33:31 -0300 Subject: shader_decode: Implement ST_L --- src/video_core/shader/decode/memory.cpp | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'src/video_core/shader/decode') diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp index 49b9a9eab..c70e2ff02 100644 --- a/src/video_core/shader/decode/memory.cpp +++ b/src/video_core/shader/decode/memory.cpp @@ -152,6 +152,23 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, u32 pc) { break; } + case OpCode::Id::ST_L: { + // UNIMPLEMENTED_IF_MSG(instr.st_l.unknown == 0, "ST_L Unhandled mode: {}", + // static_cast(instr.st_l.unknown.Value())); + + const Node index = Operation(OperationCode::IAdd, NO_PRECISE, GetRegister(instr.gpr8), + Immediate(static_cast(instr.smem_imm))); + + switch (instr.ldst_sl.type.Value()) { + case Tegra::Shader::StoreType::Bytes32: + SetLocalMemory(bb, index, GetRegister(instr.gpr0)); + break; + default: + UNIMPLEMENTED_MSG("ST_L Unhandled type: {}", + static_cast(instr.ldst_sl.type.Value())); + } + break; + } case OpCode::Id::TEX: { Tegra::Shader::TextureType texture_type{instr.tex.texture_type}; const bool is_array = instr.tex.array != 0; -- cgit v1.2.3 From a40fd075164a5f86367dfa7bea4d7815148e63b7 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 18 Dec 2018 22:18:44 -0300 Subject: shader_decode: Implement LOP3 --- .../shader/decode/arithmetic_integer.cpp | 60 ++++++++++++++++++++++ 1 file changed, 60 insertions(+) (limited to 'src/video_core/shader/decode') diff --git a/src/video_core/shader/decode/arithmetic_integer.cpp b/src/video_core/shader/decode/arithmetic_integer.cpp index dbdcebbb4..145bbcfc8 100644 --- a/src/video_core/shader/decode/arithmetic_integer.cpp +++ b/src/video_core/shader/decode/arithmetic_integer.cpp @@ -11,6 +11,7 @@ namespace VideoCommon::Shader { using Tegra::Shader::Instruction; using Tegra::Shader::OpCode; +using Tegra::Shader::Register; u32 ShaderIR::DecodeArithmeticInteger(BasicBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; @@ -79,6 +80,24 @@ u32 ShaderIR::DecodeArithmeticInteger(BasicBlock& bb, u32 pc) { instr.alu.lop.pred_result_mode, instr.alu.lop.pred48); break; } + case OpCode::Id::LOP3_C: + case OpCode::Id::LOP3_R: + case OpCode::Id::LOP3_IMM: { + UNIMPLEMENTED_IF_MSG(instr.generates_cc, + "Condition codes generation in LOP3 is not implemented"); + + const Node op_c = GetRegister(instr.gpr39); + const Node lut = [&]() { + if (opcode->get().GetId() == OpCode::Id::LOP3_R) { + return Immediate(instr.alu.lop3.GetImmLut28()); + } else { + return Immediate(instr.alu.lop3.GetImmLut48()); + } + }(); + + WriteLop3Instruction(bb, instr.gpr0, op_a, op_b, op_c, lut); + break; + } case OpCode::Id::IMNMX_C: case OpCode::Id::IMNMX_R: case OpCode::Id::IMNMX_IMM: { @@ -102,4 +121,45 @@ u32 ShaderIR::DecodeArithmeticInteger(BasicBlock& bb, u32 pc) { return pc; } +void ShaderIR::WriteLop3Instruction(BasicBlock& bb, Register dest, Node op_a, Node op_b, Node op_c, + Node imm_lut) { + constexpr u32 lop_iterations = 32; + const Node one = Immediate(1); + const Node two = Immediate(2); + + Node value{}; + for (u32 i = 0; i < lop_iterations; ++i) { + const Node shift_amount = Immediate(i); + + const Node a = Operation(OperationCode::ILogicalShiftRight, NO_PRECISE, op_c, shift_amount); + const Node pack_0 = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, a, one); + + const Node b = Operation(OperationCode::ILogicalShiftRight, NO_PRECISE, op_b, shift_amount); + const Node c = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, b, one); + const Node pack_1 = Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, c, one); + + const Node d = Operation(OperationCode::ILogicalShiftRight, NO_PRECISE, op_a, shift_amount); + const Node e = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, d, one); + const Node pack_2 = Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, e, two); + + const Node pack_01 = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, pack_0, pack_1); + const Node pack_012 = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, pack_01, pack_2); + + const Node shifted_bit = + Operation(OperationCode::ILogicalShiftRight, NO_PRECISE, imm_lut, pack_012); + const Node bit = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, shifted_bit, one); + + const Node right = + Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, bit, shift_amount); + + if (i > 0) { + value = Operation(OperationCode::IBitwiseOr, NO_PRECISE, value, right); + } else { + value = right; + } + } + + SetRegister(bb, dest, value); +} + } // namespace VideoCommon::Shader \ No newline at end of file -- cgit v1.2.3 From 4fd06efeb94ff5fc5af4c5e4b9e8a4fa95d3b383 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 19 Dec 2018 00:31:58 -0300 Subject: shader_decode: Implement IADD3 --- .../shader/decode/arithmetic_integer.cpp | 61 ++++++++++++++++++++++ 1 file changed, 61 insertions(+) (limited to 'src/video_core/shader/decode') diff --git a/src/video_core/shader/decode/arithmetic_integer.cpp b/src/video_core/shader/decode/arithmetic_integer.cpp index 145bbcfc8..3b9b9d6d9 100644 --- a/src/video_core/shader/decode/arithmetic_integer.cpp +++ b/src/video_core/shader/decode/arithmetic_integer.cpp @@ -9,6 +9,7 @@ namespace VideoCommon::Shader { +using Tegra::Shader::IAdd3Height; using Tegra::Shader::Instruction; using Tegra::Shader::OpCode; using Tegra::Shader::Register; @@ -42,6 +43,66 @@ u32 ShaderIR::DecodeArithmeticInteger(BasicBlock& bb, u32 pc) { SetRegister(bb, instr.gpr0, Operation(OperationCode::IAdd, PRECISE, op_a, op_b)); break; } + case OpCode::Id::IADD3_C: + case OpCode::Id::IADD3_R: + case OpCode::Id::IADD3_IMM: { + UNIMPLEMENTED_IF_MSG(instr.generates_cc, + "Condition codes generation in IADD3 is not implemented"); + + Node op_c = GetRegister(instr.gpr39); + + const auto ApplyHeight = [&](IAdd3Height height, Node value) { + switch (height) { + case IAdd3Height::None: + return value; + case IAdd3Height::LowerHalfWord: + return Operation(OperationCode::IBitwiseAnd, NO_PRECISE, value, Immediate(0xffff)); + case IAdd3Height::UpperHalfWord: + return Operation(OperationCode::ILogicalShiftRight, NO_PRECISE, value, + Immediate(16)); + default: + UNIMPLEMENTED_MSG("Unhandled IADD3 height: {}", static_cast(height)); + return Immediate(0); + } + }; + + if (opcode->get().GetId() == OpCode::Id::IADD3_R) { + op_a = ApplyHeight(instr.iadd3.height_a, op_a); + op_b = ApplyHeight(instr.iadd3.height_b, op_b); + op_c = ApplyHeight(instr.iadd3.height_c, op_c); + } + + op_a = GetOperandAbsNegInteger(op_a, false, instr.iadd3.neg_a, true); + op_b = GetOperandAbsNegInteger(op_b, false, instr.iadd3.neg_b, true); + op_c = GetOperandAbsNegInteger(op_c, false, instr.iadd3.neg_c, true); + + const Node value = [&]() { + const Node add_ab = Operation(OperationCode::IAdd, NO_PRECISE, op_a, op_b); + if (opcode->get().GetId() != OpCode::Id::IADD3_R) { + return Operation(OperationCode::IAdd, NO_PRECISE, add_ab, op_c); + } + const Node shifted = [&]() { + switch (instr.iadd3.mode) { + case Tegra::Shader::IAdd3Mode::RightShift: + // TODO(tech4me): According to + // https://envytools.readthedocs.io/en/latest/hw/graph/maxwell/cuda/int.html?highlight=iadd3 + // The addition between op_a and op_b should be done in uint33, more + // investigation required + return Operation(OperationCode::ILogicalShiftRight, NO_PRECISE, add_ab, + Immediate(16)); + case Tegra::Shader::IAdd3Mode::LeftShift: + return Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, add_ab, + Immediate(16)); + default: + return add_ab; + } + }(); + return Operation(OperationCode::IAdd, NO_PRECISE, shifted, op_c); + }(); + + SetRegister(bb, instr.gpr0, value); + break; + } case OpCode::Id::ISCADD_C: case OpCode::Id::ISCADD_R: case OpCode::Id::ISCADD_IMM: { -- cgit v1.2.3 From c9cf899d1852da73e90ead3d5c0eeee58de6152d Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 19 Dec 2018 00:43:23 -0300 Subject: shader_decode: Implement LEA --- .../shader/decode/arithmetic_integer.cpp | 55 ++++++++++++++++++++++ 1 file changed, 55 insertions(+) (limited to 'src/video_core/shader/decode') diff --git a/src/video_core/shader/decode/arithmetic_integer.cpp b/src/video_core/shader/decode/arithmetic_integer.cpp index 3b9b9d6d9..b12dc5ba8 100644 --- a/src/video_core/shader/decode/arithmetic_integer.cpp +++ b/src/video_core/shader/decode/arithmetic_integer.cpp @@ -12,6 +12,7 @@ namespace VideoCommon::Shader { using Tegra::Shader::IAdd3Height; using Tegra::Shader::Instruction; using Tegra::Shader::OpCode; +using Tegra::Shader::Pred; using Tegra::Shader::Register; u32 ShaderIR::DecodeArithmeticInteger(BasicBlock& bb, u32 pc) { @@ -175,6 +176,60 @@ u32 ShaderIR::DecodeArithmeticInteger(BasicBlock& bb, u32 pc) { SetRegister(bb, instr.gpr0, value); break; } + case OpCode::Id::LEA_R2: + case OpCode::Id::LEA_R1: + case OpCode::Id::LEA_IMM: + case OpCode::Id::LEA_RZ: + case OpCode::Id::LEA_HI: { + const auto [op_a, op_b, op_c] = [&]() -> std::tuple { + switch (opcode->get().GetId()) { + case OpCode::Id::LEA_R2: { + return {GetRegister(instr.gpr20), GetRegister(instr.gpr39), + Immediate(static_cast(instr.lea.r2.entry_a))}; + } + + case OpCode::Id::LEA_R1: { + const bool neg = instr.lea.r1.neg != 0; + return {GetOperandAbsNegInteger(GetRegister(instr.gpr8), false, neg, true), + GetRegister(instr.gpr20), + Immediate(static_cast(instr.lea.r1.entry_a))}; + } + + case OpCode::Id::LEA_IMM: { + const bool neg = instr.lea.imm.neg != 0; + return {Immediate(static_cast(instr.lea.imm.entry_a)), + GetOperandAbsNegInteger(GetRegister(instr.gpr8), false, neg, true), + Immediate(static_cast(instr.lea.imm.entry_b))}; + } + + case OpCode::Id::LEA_RZ: { + const bool neg = instr.lea.rz.neg != 0; + return {GetConstBuffer(instr.lea.rz.cb_index, instr.lea.rz.cb_offset), + GetOperandAbsNegInteger(GetRegister(instr.gpr8), false, neg, true), + Immediate(static_cast(instr.lea.rz.entry_a))}; + } + + case OpCode::Id::LEA_HI: + default: + UNIMPLEMENTED_MSG("Unhandled LEA subinstruction: {}", opcode->get().GetName()); + + return {Immediate(static_cast(instr.lea.imm.entry_a)), GetRegister(instr.gpr8), + Immediate(static_cast(instr.lea.imm.entry_b))}; + } + }(); + + UNIMPLEMENTED_IF_MSG(instr.lea.pred48 != static_cast(Pred::UnusedIndex), + "Unhandled LEA Predicate"); + + const Node shifted_c = + Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, Immediate(1), op_c); + const Node mul_bc = Operation(OperationCode::IMul, NO_PRECISE, op_b, shifted_c); + const Node value = Operation(OperationCode::IAdd, NO_PRECISE, op_a, mul_bc); + + SetRegister(bb, instr.gpr0, value); + + break; + } default: UNIMPLEMENTED_MSG("Unhandled ArithmeticInteger instruction: {}", opcode->get().GetName()); } -- cgit v1.2.3 From 946c86f0bb9fd7a2e1331d27c059bcc6e7cb3c99 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 21 Dec 2018 03:12:16 -0300 Subject: shader_decode: Fixup clang-format --- src/video_core/shader/decode/arithmetic_half.cpp | 3 +-- src/video_core/shader/decode/xmad.cpp | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) (limited to 'src/video_core/shader/decode') diff --git a/src/video_core/shader/decode/arithmetic_half.cpp b/src/video_core/shader/decode/arithmetic_half.cpp index a6c6f3174..9547eae5d 100644 --- a/src/video_core/shader/decode/arithmetic_half.cpp +++ b/src/video_core/shader/decode/arithmetic_half.cpp @@ -20,8 +20,7 @@ u32 ShaderIR::DecodeArithmeticHalf(BasicBlock& bb, u32 pc) { opcode->get().GetId() == OpCode::Id::HADD2_R) { UNIMPLEMENTED_IF(instr.alu_half.ftz != 0); } - UNIMPLEMENTED_IF_MSG(instr.alu_half.saturate != 0, - "Half float saturation not implemented"); + UNIMPLEMENTED_IF_MSG(instr.alu_half.saturate != 0, "Half float saturation not implemented"); const bool negate_a = opcode->get().GetId() != OpCode::Id::HMUL2_R && instr.alu_half.negate_a != 0; diff --git a/src/video_core/shader/decode/xmad.cpp b/src/video_core/shader/decode/xmad.cpp index fcab1fb80..596f0ddc8 100644 --- a/src/video_core/shader/decode/xmad.cpp +++ b/src/video_core/shader/decode/xmad.cpp @@ -21,7 +21,7 @@ u32 ShaderIR::DecodeXmad(BasicBlock& bb, u32 pc) { UNIMPLEMENTED_IF_MSG(instr.generates_cc, "Condition codes generation in XMAD is not implemented"); - Node op_a = GetRegister(instr.gpr8); // instr.xmad.sign_a + Node op_a = GetRegister(instr.gpr8); // TODO(bunnei): Needs to be fixed once op_a or op_b is signed UNIMPLEMENTED_IF(instr.xmad.sign_a != instr.xmad.sign_b); -- cgit v1.2.3 From 59b34b1d76371bc1bf70ca263a1ac63293a8409e Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 21 Dec 2018 03:18:54 -0300 Subject: shader_ir: Fixup file inclusions and clang-format --- src/video_core/shader/decode/other.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/video_core/shader/decode') diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp index 9200b5da9..9630ef831 100644 --- a/src/video_core/shader/decode/other.cpp +++ b/src/video_core/shader/decode/other.cpp @@ -9,9 +9,9 @@ namespace VideoCommon::Shader { +using Tegra::Shader::ConditionCode; using Tegra::Shader::Instruction; using Tegra::Shader::OpCode; -using Tegra::Shader::ConditionCode; u32 ShaderIR::DecodeOther(BasicBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; -- cgit v1.2.3 From 148a6418ede720681f464eca928c7c445f37db79 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 21 Dec 2018 17:25:49 -0300 Subject: shader_decode: Implement FFMA --- src/video_core/shader/decode/ffma.cpp | 37 ++++++++++++++++++++++++++++++++++- 1 file changed, 36 insertions(+), 1 deletion(-) (limited to 'src/video_core/shader/decode') diff --git a/src/video_core/shader/decode/ffma.cpp b/src/video_core/shader/decode/ffma.cpp index 2044113f0..0adc85476 100644 --- a/src/video_core/shader/decode/ffma.cpp +++ b/src/video_core/shader/decode/ffma.cpp @@ -16,7 +16,42 @@ u32 ShaderIR::DecodeFfma(BasicBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); - UNIMPLEMENTED(); + UNIMPLEMENTED_IF_MSG(instr.ffma.cc != 0, "FFMA cc not implemented"); + UNIMPLEMENTED_IF_MSG(instr.ffma.tab5980_0 != 1, "FFMA tab5980_0({}) not implemented", + instr.ffma.tab5980_0.Value()); // Seems to be 1 by default based on SMO + UNIMPLEMENTED_IF_MSG(instr.ffma.tab5980_1 != 0, "FFMA tab5980_1({}) not implemented", + instr.ffma.tab5980_1.Value()); + UNIMPLEMENTED_IF_MSG(instr.generates_cc, + "Condition codes generation in FFMA is not implemented"); + + const Node op_a = GetRegister(instr.gpr8); + + auto [op_b, op_c] = [&]() -> std::tuple { + switch (opcode->get().GetId()) { + case OpCode::Id::FFMA_CR: { + return {GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset), + GetRegister(instr.gpr39)}; + } + case OpCode::Id::FFMA_RR: + return {GetRegister(instr.gpr20), GetRegister(instr.gpr39)}; + case OpCode::Id::FFMA_RC: { + return {GetRegister(instr.gpr39), + GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset)}; + } + case OpCode::Id::FFMA_IMM: + return {GetImmediate19(instr), GetRegister(instr.gpr39)}; + default: + UNIMPLEMENTED_MSG("Unhandled FFMA instruction: {}", opcode->get().GetName()); + } + }(); + + op_b = GetOperandAbsNegFloat(op_b, false, instr.ffma.negate_b); + op_c = GetOperandAbsNegFloat(op_c, false, instr.ffma.negate_c); + + Node value = Operation(OperationCode::FFma, PRECISE, op_a, op_b, op_c); + value = GetSaturatedFloat(value, instr.alu.saturate_d); + + SetRegister(bb, instr.gpr0, value); return pc; } -- cgit v1.2.3 From fc46ecddb3bca4861babbf610cd64ab9fdc1bb08 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 21 Dec 2018 18:47:22 -0300 Subject: video_core: Return safe values after an assert hits --- src/video_core/shader/decode/arithmetic.cpp | 1 + src/video_core/shader/decode/arithmetic_integer_immediate.cpp | 1 + src/video_core/shader/decode/bfi.cpp | 1 + src/video_core/shader/decode/conversion.cpp | 8 ++++---- src/video_core/shader/decode/ffma.cpp | 1 + src/video_core/shader/decode/xmad.cpp | 8 ++++---- 6 files changed, 12 insertions(+), 8 deletions(-) (limited to 'src/video_core/shader/decode') diff --git a/src/video_core/shader/decode/arithmetic.cpp b/src/video_core/shader/decode/arithmetic.cpp index 9f8c27b3e..ef846bd9a 100644 --- a/src/video_core/shader/decode/arithmetic.cpp +++ b/src/video_core/shader/decode/arithmetic.cpp @@ -115,6 +115,7 @@ u32 ShaderIR::DecodeArithmetic(BasicBlock& bb, u32 pc) { default: UNIMPLEMENTED_MSG("Unhandled MUFU sub op={0:x}", static_cast(instr.sub_op.Value())); + return Immediate(0); } }(); value = GetSaturatedFloat(value, instr.alu.saturate_d); diff --git a/src/video_core/shader/decode/arithmetic_integer_immediate.cpp b/src/video_core/shader/decode/arithmetic_integer_immediate.cpp index ee5754161..57d9f54f7 100644 --- a/src/video_core/shader/decode/arithmetic_integer_immediate.cpp +++ b/src/video_core/shader/decode/arithmetic_integer_immediate.cpp @@ -62,6 +62,7 @@ void ShaderIR::WriteLogicOperation(BasicBlock& bb, Register dest, LogicOperation return op_b; default: UNIMPLEMENTED_MSG("Unimplemented logic operation={}", static_cast(logic_op)); + return Immediate(0); } }(); diff --git a/src/video_core/shader/decode/bfi.cpp b/src/video_core/shader/decode/bfi.cpp index 6a851b22e..a750aca30 100644 --- a/src/video_core/shader/decode/bfi.cpp +++ b/src/video_core/shader/decode/bfi.cpp @@ -24,6 +24,7 @@ u32 ShaderIR::DecodeBfi(BasicBlock& bb, u32 pc) { return {GetRegister(instr.gpr39), Immediate(instr.alu.GetSignedImm20_20())}; default: UNREACHABLE(); + return {Immediate(0), Immediate(0)}; } }(); const Node insert = GetRegister(instr.gpr8); diff --git a/src/video_core/shader/decode/conversion.cpp b/src/video_core/shader/decode/conversion.cpp index ef46ab7a5..791f03fe0 100644 --- a/src/video_core/shader/decode/conversion.cpp +++ b/src/video_core/shader/decode/conversion.cpp @@ -96,11 +96,10 @@ u32 ShaderIR::DecodeConversion(BasicBlock& bb, u32 pc) { return Operation(OperationCode::FCeil, PRECISE, value); case Tegra::Shader::F2fRoundingOp::Trunc: return Operation(OperationCode::FTrunc, PRECISE, value); - default: - UNIMPLEMENTED_MSG("Unimplemented F2F rounding mode {}", - static_cast(instr.conversion.f2f.rounding.Value())); - break; } + UNIMPLEMENTED_MSG("Unimplemented F2F rounding mode {}", + static_cast(instr.conversion.f2f.rounding.Value())); + return Immediate(0); }(); value = GetSaturatedFloat(value, instr.alu.saturate_d); @@ -135,6 +134,7 @@ u32 ShaderIR::DecodeConversion(BasicBlock& bb, u32 pc) { default: UNIMPLEMENTED_MSG("Unimplemented F2I rounding mode {}", static_cast(instr.conversion.f2i.rounding.Value())); + return Immediate(0); } }(); const bool is_signed = instr.conversion.is_output_signed; diff --git a/src/video_core/shader/decode/ffma.cpp b/src/video_core/shader/decode/ffma.cpp index 0adc85476..a17ebd6db 100644 --- a/src/video_core/shader/decode/ffma.cpp +++ b/src/video_core/shader/decode/ffma.cpp @@ -42,6 +42,7 @@ u32 ShaderIR::DecodeFfma(BasicBlock& bb, u32 pc) { return {GetImmediate19(instr), GetRegister(instr.gpr39)}; default: UNIMPLEMENTED_MSG("Unhandled FFMA instruction: {}", opcode->get().GetName()); + return {Immediate(0), Immediate(0)}; } }(); diff --git a/src/video_core/shader/decode/xmad.cpp b/src/video_core/shader/decode/xmad.cpp index 596f0ddc8..0466069ae 100644 --- a/src/video_core/shader/decode/xmad.cpp +++ b/src/video_core/shader/decode/xmad.cpp @@ -42,9 +42,9 @@ u32 ShaderIR::DecodeXmad(BasicBlock& bb, u32 pc) { case OpCode::Id::XMAD_IMM: return {instr.xmad.merge_37, Immediate(static_cast(instr.xmad.imm20_16)), GetRegister(instr.gpr39)}; - default: - UNIMPLEMENTED_MSG("Unhandled XMAD instruction: {}", opcode->get().GetName()); } + UNIMPLEMENTED_MSG("Unhandled XMAD instruction: {}", opcode->get().GetName()); + return {false, Immediate(0), Immediate(0)}; }(); if (instr.xmad.high_a) { @@ -85,9 +85,9 @@ u32 ShaderIR::DecodeXmad(BasicBlock& bb, u32 pc) { NO_PRECISE, original_b, Immediate(16)); return SignedOperation(OperationCode::IAdd, is_signed_c, NO_PRECISE, op_c, shifted_b); } - default: { + default: UNIMPLEMENTED_MSG("Unhandled XMAD mode: {}", static_cast(instr.xmad.mode.Value())); - } + return Immediate(0); } }(); -- cgit v1.2.3 From af5c6e4ccb0cedc764fff0a62b46cd969c21b006 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 21 Dec 2018 19:11:18 -0300 Subject: shader_decode: Implement IADD32I --- src/video_core/shader/decode/arithmetic_integer_immediate.cpp | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'src/video_core/shader/decode') diff --git a/src/video_core/shader/decode/arithmetic_integer_immediate.cpp b/src/video_core/shader/decode/arithmetic_integer_immediate.cpp index 57d9f54f7..a158d345a 100644 --- a/src/video_core/shader/decode/arithmetic_integer_immediate.cpp +++ b/src/video_core/shader/decode/arithmetic_integer_immediate.cpp @@ -24,6 +24,17 @@ u32 ShaderIR::DecodeArithmeticIntegerImmediate(BasicBlock& bb, u32 pc) { Node op_b = Immediate(static_cast(instr.alu.imm20_32)); switch (opcode->get().GetId()) { + case OpCode::Id::IADD32I: { + UNIMPLEMENTED_IF_MSG(instr.op_32.generates_cc, + "Condition codes generation in IADD32I is not implemented"); + UNIMPLEMENTED_IF_MSG(instr.iadd32i.saturate, "IADD32I saturation is not implemented"); + + op_a = GetOperandAbsNegInteger(op_a, false, instr.iadd32i.negate_a, true); + + const Node value = Operation(OperationCode::IAdd, PRECISE, op_a, op_b); + SetRegister(bb, instr.gpr0, value); + break; + } case OpCode::Id::LOP32I: { UNIMPLEMENTED_IF_MSG(instr.op_32.generates_cc, "Condition codes generation in LOP32I is not implemented"); -- cgit v1.2.3 From 2d9136cec60e8feaa4af258b977962b887d675df Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 21 Dec 2018 19:31:55 -0300 Subject: shader_decode: Fixup FSET --- src/video_core/shader/decode/float_set.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/video_core/shader/decode') diff --git a/src/video_core/shader/decode/float_set.cpp b/src/video_core/shader/decode/float_set.cpp index 355fabc09..b69d94c2e 100644 --- a/src/video_core/shader/decode/float_set.cpp +++ b/src/video_core/shader/decode/float_set.cpp @@ -47,8 +47,8 @@ u32 ShaderIR::DecodeFloatSet(BasicBlock& bb, u32 pc) { SetRegister(bb, instr.gpr0, value); - if (instr.generates_cc.Value() != 0) { - const Node is_zero = Operation(OperationCode::LogicalFEqual, predicate, Immediate(0.0f)); + if (instr.generates_cc) { + const Node is_zero = Operation(OperationCode::LogicalFEqual, value, Immediate(0.0f)); SetInternalFlag(bb, InternalFlag::Zero, is_zero); LOG_WARNING(HW_GPU, "FSET condition code is incomplete"); } -- cgit v1.2.3 From 03e088a4f44af1212da0c7c23f95293a6e129a35 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 22 Dec 2018 01:20:57 -0300 Subject: shader_ir: Fixup TEX and TEXS and partially fix TLD4 decompiling --- src/video_core/shader/decode/memory.cpp | 99 ++++++++++++++++----------------- 1 file changed, 49 insertions(+), 50 deletions(-) (limited to 'src/video_core/shader/decode') diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp index c70e2ff02..500a32af5 100644 --- a/src/video_core/shader/decode/memory.cpp +++ b/src/video_core/shader/decode/memory.cpp @@ -183,28 +183,24 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, u32 pc) { const Node texture = GetTexCode(instr, texture_type, process_mode, depth_compare, is_array); - if (depth_compare) { - SetRegister(bb, instr.gpr0, texture); - } else { - MetaComponents meta; - std::array dest; - - std::size_t dest_elem = 0; - for (std::size_t elem = 0; elem < 4; ++elem) { - if (!instr.tex.IsComponentEnabled(elem)) { - // Skip disabled components - continue; - } - meta.components_map[dest_elem] = static_cast(elem); - dest[dest_elem] = GetRegister(instr.gpr0.Value() + dest_elem); - - ++dest_elem; + MetaComponents meta; + std::array dest; + + std::size_t dest_elem = 0; + for (std::size_t elem = 0; elem < 4; ++elem) { + if (!instr.tex.IsComponentEnabled(elem)) { + // Skip disabled components + continue; } - std::generate(dest.begin() + dest_elem, dest.end(), [&]() { return GetRegister(RZ); }); + meta.components_map[dest_elem] = static_cast(elem); + dest[dest_elem] = GetRegister(instr.gpr0.Value() + dest_elem); - bb.push_back(Operation(OperationCode::AssignComposite, std::move(meta), texture, - dest[0], dest[1], dest[2], dest[3])); + ++dest_elem; } + std::generate(dest.begin() + dest_elem, dest.end(), [&]() { return GetRegister(RZ); }); + + bb.push_back(Operation(OperationCode::AssignComposite, std::move(meta), texture, dest[0], + dest[1], dest[2], dest[3])); break; } case OpCode::Id::TEXS: { @@ -272,7 +268,7 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, u32 pc) { params.push_back(Immediate(static_cast(instr.tld4.component))); const auto& sampler = GetSampler(instr.sampler, texture_type, false, depth_compare); - const MetaTexture meta{sampler, num_coordinates}; + MetaTexture meta{sampler, num_coordinates}; const Node texture = Operation(OperationCode::F4TextureGather, std::move(meta), std::move(params)); @@ -331,7 +327,7 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, u32 pc) { const auto& sampler = GetSampler(instr.sampler, TextureType::Texture2D, false, depth_compare); - const MetaTexture meta{sampler, num_coords}; + MetaTexture meta{sampler, num_coords}; WriteTexsInstructionFloat( bb, instr, Operation(OperationCode::F4TextureGather, meta, std::move(params))); @@ -350,7 +346,7 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, u32 pc) { switch (instr.txq.query_type) { case Tegra::Shader::TextureQueryType::Dimension: { - const MetaTexture meta_texture{sampler}; + MetaTexture meta_texture{sampler}; const MetaComponents meta_components{{0, 1, 2, 3}}; const Node texture = Operation(OperationCode::F4TextureQueryDimensions, meta_texture, @@ -402,7 +398,7 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, u32 pc) { texture_type = TextureType::Texture2D; } - const MetaTexture meta_texture{sampler, static_cast(coords.size())}; + MetaTexture meta_texture{sampler, static_cast(coords.size())}; const Node texture = Operation(OperationCode::F4TextureQueryLod, meta_texture, std::move(coords)); @@ -474,7 +470,8 @@ void ShaderIR::WriteTexsInstructionFloat(BasicBlock& bb, Tegra::Shader::Instruct Node ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, TextureProcessMode process_mode, bool depth_compare, bool is_array, - std::size_t bias_offset, std::vector&& coords) { + std::size_t array_offset, std::size_t bias_offset, + std::vector&& coords) { UNIMPLEMENTED_IF_MSG( (texture_type == TextureType::Texture3D && (is_array || depth_compare)) || (texture_type == TextureType::TextureCube && is_array && depth_compare), @@ -486,26 +483,26 @@ Node ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, process_mode == TextureProcessMode::LL || process_mode == TextureProcessMode::LLA; + // LOD selection (either via bias or explicit textureLod) not supported in GL for + // sampler2DArrayShadow and samplerCubeArrayShadow. const bool gl_lod_supported = - !((texture_type == TextureType::Texture2D && is_array && depth_compare) || - (texture_type == TextureType::TextureCube && !is_array && depth_compare)); + !((texture_type == Tegra::Shader::TextureType::Texture2D && is_array && depth_compare) || + (texture_type == Tegra::Shader::TextureType::TextureCube && is_array && depth_compare)); const OperationCode read_method = lod_needed && gl_lod_supported ? OperationCode::F4TextureLod : OperationCode::F4Texture; - const MetaTexture meta{sampler, static_cast(coords.size())}; + UNIMPLEMENTED_IF(process_mode != TextureProcessMode::None && !gl_lod_supported); + std::optional array_offset_value; + if (is_array) + array_offset_value = static_cast(array_offset); + MetaTexture meta{sampler, static_cast(coords.size()), array_offset_value}; std::vector params = std::move(coords); - if (process_mode != TextureProcessMode::None) { + if (process_mode != TextureProcessMode::None && gl_lod_supported) { if (process_mode == TextureProcessMode::LZ) { - if (gl_lod_supported) { - params.push_back(Immediate(0)); - } else { - // Lod 0 is emulated by a big negative bias in scenarios that are not supported by - // GLSL - params.push_back(Immediate(-1000)); - } + params.push_back(Immediate(0.0f)); } else { // If present, lod or bias are always stored in the register indexed by the gpr20 field // with an offset depending on the usage of the other registers @@ -518,8 +515,8 @@ Node ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, Node ShaderIR::GetTexCode(Instruction instr, TextureType texture_type, TextureProcessMode process_mode, bool depth_compare, bool is_array) { - const bool lod_bias_enabled = (process_mode != Tegra::Shader::TextureProcessMode::None && - process_mode != Tegra::Shader::TextureProcessMode::LZ); + const bool lod_bias_enabled = + (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ); const auto [coord_count, total_coord_count] = ValidateAndGetCoordinateElement( texture_type, depth_compare, is_array, lod_bias_enabled, 4, 5); @@ -536,29 +533,30 @@ Node ShaderIR::GetTexCode(Instruction instr, TextureType texture_type, if (depth_compare && !is_array && texture_type == TextureType::Texture1D) { coords.push_back(Immediate(0.0f)); } + std::size_t array_offset{}; + if (is_array) { + array_offset = coords.size(); + coords.push_back(GetRegister(array_register)); + } if (depth_compare) { // Depth is always stored in the register signaled by gpr20 // or in the next register if lod or bias are used const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0); coords.push_back(GetRegister(depth_register)); } - if (is_array) { - coords.push_back(GetRegister(array_register)); - } // Fill ignored coordinates while (coords.size() < total_coord_count) { coords.push_back(Immediate(0)); } - return GetTextureCode(instr, texture_type, process_mode, depth_compare, is_array, 0, - std::move(coords)); + return GetTextureCode(instr, texture_type, process_mode, depth_compare, is_array, array_offset, + 0, std::move(coords)); } Node ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type, TextureProcessMode process_mode, bool depth_compare, bool is_array) { - - const bool lod_bias_enabled = (process_mode != Tegra::Shader::TextureProcessMode::None && - process_mode != Tegra::Shader::TextureProcessMode::LZ); + const bool lod_bias_enabled = + (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ); const auto [coord_count, total_coord_count] = ValidateAndGetCoordinateElement( texture_type, depth_compare, is_array, lod_bias_enabled, 4, 4); @@ -577,22 +575,23 @@ Node ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type, coords.push_back(GetRegister(last ? last_coord_register : coord_register + i)); } + std::size_t array_offset{}; + if (is_array) { + array_offset = coords.size(); + coords.push_back(GetRegister(array_register)); + } if (depth_compare) { // Depth is always stored in the register signaled by gpr20 // or in the next register if lod or bias are used const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0); coords.push_back(GetRegister(depth_register)); } - if (is_array) { - coords.push_back( - Operation(OperationCode::ICastFloat, NO_PRECISE, GetRegister(array_register))); - } // Fill ignored coordinates while (coords.size() < total_coord_count) { coords.push_back(Immediate(0)); } - return GetTextureCode(instr, texture_type, process_mode, depth_compare, is_array, + return GetTextureCode(instr, texture_type, process_mode, depth_compare, is_array, array_offset, (coord_count > 2 ? 1 : 0), std::move(coords)); } -- cgit v1.2.3 From ec98e4d842d5ba04b329c866f5c9b1e7314069f2 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 23 Dec 2018 00:38:01 -0300 Subject: shader_decode: Update TLD4 reflecting #1862 changes --- src/video_core/shader/decode/memory.cpp | 101 ++++++++++++++++---------------- 1 file changed, 49 insertions(+), 52 deletions(-) (limited to 'src/video_core/shader/decode') diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp index 500a32af5..cfdb92807 100644 --- a/src/video_core/shader/decode/memory.cpp +++ b/src/video_core/shader/decode/memory.cpp @@ -225,7 +225,6 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, u32 pc) { break; } case OpCode::Id::TLD4: { - ASSERT(instr.tld4.texture_type == Tegra::Shader::TextureType::Texture2D); ASSERT(instr.tld4.array == 0); UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::AOFFI), "AOFFI is not implemented"); @@ -238,63 +237,29 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, u32 pc) { LOG_WARNING(HW_GPU, "TLD4.NODEP implementation is incomplete"); } + const auto texture_type = instr.tld4.texture_type.Value(); const bool depth_compare = instr.tld4.UsesMiscMode(TextureMiscMode::DC); - auto texture_type = instr.tld4.texture_type.Value(); - u32 num_coordinates = static_cast(GetCoordCount(texture_type)); - if (depth_compare) - num_coordinates += 1; + const bool is_array = instr.tld4.array != 0; + const Node texture = GetTld4Code(instr, texture_type, depth_compare, is_array); - std::vector params; - - switch (num_coordinates) { - case 2: { - params.push_back(GetRegister(instr.gpr8)); - params.push_back(GetRegister(instr.gpr8.Value() + 1)); - break; - } - case 3: { - params.push_back(GetRegister(instr.gpr8)); - params.push_back(GetRegister(instr.gpr8.Value() + 1)); - params.push_back(GetRegister(instr.gpr8.Value() + 2)); - break; - } - default: - UNIMPLEMENTED_MSG("Unhandled coordinates number {}", static_cast(num_coordinates)); - params.push_back(GetRegister(instr.gpr8)); - params.push_back(GetRegister(instr.gpr8.Value() + 1)); - num_coordinates = 2; - texture_type = Tegra::Shader::TextureType::Texture2D; - } - params.push_back(Immediate(static_cast(instr.tld4.component))); - - const auto& sampler = GetSampler(instr.sampler, texture_type, false, depth_compare); - MetaTexture meta{sampler, num_coordinates}; - - const Node texture = - Operation(OperationCode::F4TextureGather, std::move(meta), std::move(params)); - - if (depth_compare) { - SetRegister(bb, instr.gpr0, texture); - } else { - MetaComponents meta; - std::array dest; - - std::size_t dest_elem = 0; - for (std::size_t elem = 0; elem < 4; ++elem) { - if (!instr.tex.IsComponentEnabled(elem)) { - // Skip disabled components - continue; - } - meta.components_map[dest_elem] = static_cast(elem); - dest[dest_elem] = GetRegister(instr.gpr0.Value() + dest_elem); + MetaComponents meta_components; + std::array dest; - ++dest_elem; + std::size_t dest_elem = 0; + for (std::size_t elem = 0; elem < 4; ++elem) { + if (!instr.tex.IsComponentEnabled(elem)) { + // Skip disabled components + continue; } - std::generate(dest.begin() + dest_elem, dest.end(), [&]() { return GetRegister(RZ); }); + meta_components.components_map[dest_elem] = static_cast(elem); + dest[dest_elem] = GetRegister(instr.gpr0.Value() + dest_elem); - bb.push_back(Operation(OperationCode::AssignComposite, std::move(meta), texture, - dest[0], dest[1], dest[2], dest[3])); + ++dest_elem; } + std::generate(dest.begin() + dest_elem, dest.end(), [&]() { return GetRegister(RZ); }); + + bb.push_back(Operation(OperationCode::AssignComposite, std::move(meta_components), texture, + dest[0], dest[1], dest[2], dest[3])); break; } case OpCode::Id::TLD4S: { @@ -595,6 +560,38 @@ Node ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type, (coord_count > 2 ? 1 : 0), std::move(coords)); } +Node ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool depth_compare, + bool is_array) { + const std::size_t coord_count = GetCoordCount(texture_type); + const std::size_t total_coord_count = coord_count + (is_array ? 1 : 0); + const std::size_t total_reg_count = total_coord_count + (depth_compare ? 1 : 0); + + // If enabled arrays index is always stored in the gpr8 field + const u64 array_register = instr.gpr8.Value(); + // First coordinate index is the gpr8 or gpr8 + 1 when arrays are used + const u64 coord_register = array_register + (is_array ? 1 : 0); + + std::vector params; + + for (size_t i = 0; i < coord_count; ++i) { + params.push_back(GetRegister(coord_register + i)); + } + std::size_t array_offset{}; + if (is_array) { + array_offset = params.size(); + params.push_back(GetRegister(array_register)); + } + + const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, depth_compare); + + std::optional array_offset_value; + if (is_array) + array_offset_value = static_cast(array_offset); + MetaTexture meta{sampler, static_cast(params.size()), array_offset_value}; + + return Operation(OperationCode::F4TextureGather, std::move(meta), std::move(params)); +} + std::tuple ShaderIR::ValidateAndGetCoordinateElement( TextureType texture_type, bool depth_compare, bool is_array, bool lod_bias_enabled, std::size_t max_coords, std::size_t max_inputs) { -- cgit v1.2.3 From 55e6786254d33e3501002bf7fbdd52552a0df32a Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 23 Dec 2018 01:18:33 -0300 Subject: shader_decode: Implement TLDS (untested) --- src/video_core/shader/decode/memory.cpp | 69 +++++++++++++++++++++++++++++---- 1 file changed, 61 insertions(+), 8 deletions(-) (limited to 'src/video_core/shader/decode') diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp index cfdb92807..ce3445512 100644 --- a/src/video_core/shader/decode/memory.cpp +++ b/src/video_core/shader/decode/memory.cpp @@ -204,7 +204,7 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, u32 pc) { break; } case OpCode::Id::TEXS: { - Tegra::Shader::TextureType texture_type{instr.texs.GetTextureType()}; + const TextureType texture_type{instr.texs.GetTextureType()}; const bool is_array{instr.texs.IsArrayTexture()}; const bool depth_compare = instr.texs.UsesMiscMode(TextureMiscMode::DC); const auto process_mode = instr.texs.GetTextureProcessMode(); @@ -373,6 +373,22 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, u32 pc) { GetRegister(RZ), GetRegister(RZ))); break; } + case OpCode::Id::TLDS: { + const Tegra::Shader::TextureType texture_type{instr.tlds.GetTextureType()}; + const bool is_array{instr.tlds.IsArrayTexture()}; + + UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::AOFFI), + "AOFFI is not implemented"); + UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::MZ), "MZ is not implemented"); + + if (instr.tlds.UsesMiscMode(TextureMiscMode::NODEP)) { + LOG_WARNING(HW_GPU, "TMML.NODEP implementation is incomplete"); + } + + const Node texture = GetTldsCode(instr, texture_type, is_array); + WriteTexsInstructionFloat(bb, instr, texture); + break; + } default: UNIMPLEMENTED_MSG("Unhandled memory instruction: {}", opcode->get().GetName()); } @@ -576,22 +592,59 @@ Node ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool dep for (size_t i = 0; i < coord_count; ++i) { params.push_back(GetRegister(coord_register + i)); } - std::size_t array_offset{}; + std::optional array_offset; if (is_array) { - array_offset = params.size(); + array_offset = static_cast(params.size()); params.push_back(GetRegister(array_register)); } const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, depth_compare); - - std::optional array_offset_value; - if (is_array) - array_offset_value = static_cast(array_offset); - MetaTexture meta{sampler, static_cast(params.size()), array_offset_value}; + MetaTexture meta{sampler, static_cast(params.size()), array_offset}; return Operation(OperationCode::F4TextureGather, std::move(meta), std::move(params)); } +Node ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is_array) { + const std::size_t type_coord_count = GetCoordCount(texture_type); + const std::size_t total_coord_count = type_coord_count + (is_array ? 1 : 0); + const bool lod_enabled = instr.tlds.GetTextureProcessMode() == TextureProcessMode::LL; + + // If enabled arrays index is always stored in the gpr8 field + const u64 array_register = instr.gpr8.Value(); + // if is array gpr20 is used + const u64 coord_register = is_array ? instr.gpr20.Value() : instr.gpr8.Value(); + + const u64 last_coord_register = + ((type_coord_count > 2) || (type_coord_count == 2 && !lod_enabled)) && !is_array + ? static_cast(instr.gpr20.Value()) + : coord_register + 1; + + std::vector params; + + for (std::size_t i = 0; i < type_coord_count; ++i) { + const bool last = (i == (type_coord_count - 1)) && (type_coord_count > 1); + params.push_back(GetRegister(last ? last_coord_register : coord_register + i)); + } + std::optional array_offset; + if (is_array) { + array_offset = static_cast(params.size()); + params.push_back(GetRegister(array_register)); + } + const auto coords_count = static_cast(params.size()); + + if (lod_enabled) { + // When lod is used always is in grp20 + params.push_back(GetRegister(instr.gpr20)); + } else { + params.push_back(Immediate(0)); + } + + const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false); + MetaTexture meta{sampler, coords_count, array_offset}; + + return Operation(OperationCode::F4TexelFetch, std::move(meta), std::move(params)); +} + std::tuple ShaderIR::ValidateAndGetCoordinateElement( TextureType texture_type, bool depth_compare, bool is_array, bool lod_bias_enabled, std::size_t max_coords, std::size_t max_inputs) { -- cgit v1.2.3 From 027f443e699652fc30a849efaf8c12725a7b5729 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 23 Dec 2018 01:33:47 -0300 Subject: shader_decode: Implement POPC --- src/video_core/shader/decode/arithmetic_integer.cpp | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'src/video_core/shader/decode') diff --git a/src/video_core/shader/decode/arithmetic_integer.cpp b/src/video_core/shader/decode/arithmetic_integer.cpp index b12dc5ba8..271ce205b 100644 --- a/src/video_core/shader/decode/arithmetic_integer.cpp +++ b/src/video_core/shader/decode/arithmetic_integer.cpp @@ -119,6 +119,16 @@ u32 ShaderIR::DecodeArithmeticInteger(BasicBlock& bb, u32 pc) { SetRegister(bb, instr.gpr0, value); break; } + case OpCode::Id::POPC_C: + case OpCode::Id::POPC_R: + case OpCode::Id::POPC_IMM: { + if (instr.popc.invert) { + op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_b); + } + const Node value = Operation(OperationCode::IBitCount, PRECISE, op_b); + SetRegister(bb, instr.gpr0, value); + break; + } case OpCode::Id::SEL_C: case OpCode::Id::SEL_R: case OpCode::Id::SEL_IMM: { -- cgit v1.2.3 From dd91650aaf217196a2b1ced17df24bd74349843d Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 23 Dec 2018 02:26:35 -0300 Subject: shader_decode: Implement HFMA2 --- src/video_core/shader/decode/hfma2.cpp | 54 +++++++++++++++++++++++++++++++++- 1 file changed, 53 insertions(+), 1 deletion(-) (limited to 'src/video_core/shader/decode') diff --git a/src/video_core/shader/decode/hfma2.cpp b/src/video_core/shader/decode/hfma2.cpp index 5ce08481e..bf7491804 100644 --- a/src/video_core/shader/decode/hfma2.cpp +++ b/src/video_core/shader/decode/hfma2.cpp @@ -2,6 +2,8 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include + #include "common/assert.h" #include "common/common_types.h" #include "video_core/engines/shader_bytecode.h" @@ -9,6 +11,8 @@ namespace VideoCommon::Shader { +using Tegra::Shader::HalfPrecision; +using Tegra::Shader::HalfType; using Tegra::Shader::Instruction; using Tegra::Shader::OpCode; @@ -16,7 +20,55 @@ u32 ShaderIR::DecodeHfma2(BasicBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); - UNIMPLEMENTED(); + if (opcode->get().GetId() == OpCode::Id::HFMA2_RR) { + UNIMPLEMENTED_IF(instr.hfma2.rr.precision != HalfPrecision::None); + } else { + UNIMPLEMENTED_IF(instr.hfma2.precision != HalfPrecision::None); + } + + constexpr auto identity = HalfType::H0_H1; + + const HalfType type_a = instr.hfma2.type_a; + const Node op_a = GetRegister(instr.gpr8); + + bool neg_b{}, neg_c{}; + auto [saturate, type_b, op_b, type_c, + op_c] = [&]() -> std::tuple { + switch (opcode->get().GetId()) { + case OpCode::Id::HFMA2_CR: + neg_b = instr.hfma2.negate_b; + neg_c = instr.hfma2.negate_c; + return {instr.hfma2.saturate, instr.hfma2.type_b, + GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset), instr.hfma2.type_reg39, + GetRegister(instr.gpr39)}; + case OpCode::Id::HFMA2_RC: + neg_b = instr.hfma2.negate_b; + neg_c = instr.hfma2.negate_c; + return {instr.hfma2.saturate, instr.hfma2.type_reg39, GetRegister(instr.gpr39), + instr.hfma2.type_b, GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset)}; + case OpCode::Id::HFMA2_RR: + neg_b = instr.hfma2.rr.negate_b; + neg_c = instr.hfma2.rr.negate_c; + return {instr.hfma2.rr.saturate, instr.hfma2.type_b, GetRegister(instr.gpr20), + instr.hfma2.rr.type_c, GetRegister(instr.gpr39)}; + case OpCode::Id::HFMA2_IMM_R: + neg_c = instr.hfma2.negate_c; + return {instr.hfma2.saturate, identity, UnpackHalfImmediate(instr, true), + instr.hfma2.type_reg39, GetRegister(instr.gpr39)}; + default: + return {false, identity, Immediate(0), identity, Immediate(0)}; + } + }(); + UNIMPLEMENTED_IF_MSG(saturate, "HFMA2 saturation is not implemented"); + + op_b = GetOperandAbsNegHalf(op_b, false, neg_b); + op_c = GetOperandAbsNegHalf(op_c, false, neg_c); + + MetaHalfArithmetic meta{true, {type_a, type_b, type_c}}; + Node value = Operation(OperationCode::HFma, meta, op_a, op_b, op_c); + value = HalfMerge(GetRegister(instr.gpr0), value, instr.hfma2.merge); + + SetRegister(bb, instr.gpr0, value); return pc; } -- cgit v1.2.3 From 7e13e8bfcb4d3bb3c9d7eafb81e790e244cdfdd7 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 23 Dec 2018 17:07:49 -0300 Subject: shader_decode: Implement PSET --- src/video_core/shader/decode/predicate_set_register.cpp | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) (limited to 'src/video_core/shader/decode') diff --git a/src/video_core/shader/decode/predicate_set_register.cpp b/src/video_core/shader/decode/predicate_set_register.cpp index 67a06b5b4..04ddd9f9e 100644 --- a/src/video_core/shader/decode/predicate_set_register.cpp +++ b/src/video_core/shader/decode/predicate_set_register.cpp @@ -16,7 +16,22 @@ u32 ShaderIR::DecodePredicateSetRegister(BasicBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); - UNIMPLEMENTED(); + UNIMPLEMENTED_IF_MSG(instr.generates_cc, + "Condition codes generation in PSET is not implemented"); + + const Node op_a = GetPredicate(instr.pset.pred12, instr.pset.neg_pred12 != 0); + const Node op_b = GetPredicate(instr.pset.pred29, instr.pset.neg_pred29 != 0); + const Node first_pred = Operation(GetPredicateCombiner(instr.pset.cond), op_a, op_b); + + const Node second_pred = GetPredicate(instr.pset.pred39, instr.pset.neg_pred39 != 0); + + const OperationCode combiner = GetPredicateCombiner(instr.pset.op); + const Node result = Operation(combiner, first_pred, second_pred); + + const Node true_value = instr.pset.bf ? Immediate(1.0f) : Immediate(0xffffffff); + const Node false_value = instr.pset.bf ? Immediate(0.0f) : Immediate(0); + const Node value = Operation(OperationCode::Select, PRECISE, true_value, false_value); + SetRegister(bb, instr.gpr0, value); return pc; } -- cgit v1.2.3 From 3f1136ac6f39e3d0e0f2c250905a79c9b47aa28c Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 23 Dec 2018 17:14:43 -0300 Subject: shader_decode: Implement CSETP --- .../shader/decode/predicate_set_predicate.cpp | 51 ++++++++++++++++------ 1 file changed, 37 insertions(+), 14 deletions(-) (limited to 'src/video_core/shader/decode') diff --git a/src/video_core/shader/decode/predicate_set_predicate.cpp b/src/video_core/shader/decode/predicate_set_predicate.cpp index 24352170d..6ea6daceb 100644 --- a/src/video_core/shader/decode/predicate_set_predicate.cpp +++ b/src/video_core/shader/decode/predicate_set_predicate.cpp @@ -17,25 +17,48 @@ u32 ShaderIR::DecodePredicateSetPredicate(BasicBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); - const Node op_a = GetPredicate(instr.psetp.pred12, instr.psetp.neg_pred12 != 0); - const Node op_b = GetPredicate(instr.psetp.pred29, instr.psetp.neg_pred29 != 0); + switch (opcode->get().GetId()) { + case OpCode::Id::PSETP: { + const Node op_a = GetPredicate(instr.psetp.pred12, instr.psetp.neg_pred12 != 0); + const Node op_b = GetPredicate(instr.psetp.pred29, instr.psetp.neg_pred29 != 0); - // We can't use the constant predicate as destination. - ASSERT(instr.psetp.pred3 != static_cast(Pred::UnusedIndex)); + // We can't use the constant predicate as destination. + ASSERT(instr.psetp.pred3 != static_cast(Pred::UnusedIndex)); - const Node second_pred = GetPredicate(instr.psetp.pred39, instr.psetp.neg_pred39 != 0); + const Node second_pred = GetPredicate(instr.psetp.pred39, instr.psetp.neg_pred39 != 0); - const OperationCode combiner = GetPredicateCombiner(instr.psetp.op); - const Node predicate = Operation(combiner, op_a, op_b); + const OperationCode combiner = GetPredicateCombiner(instr.psetp.op); + const Node predicate = Operation(combiner, op_a, op_b); - // Set the primary predicate to the result of Predicate OP SecondPredicate - SetPredicate(bb, instr.psetp.pred3, Operation(combiner, predicate, second_pred)); + // Set the primary predicate to the result of Predicate OP SecondPredicate + SetPredicate(bb, instr.psetp.pred3, Operation(combiner, predicate, second_pred)); - if (instr.psetp.pred0 != static_cast(Pred::UnusedIndex)) { - // Set the secondary predicate to the result of !Predicate OP SecondPredicate, if enabled - SetPredicate( - bb, instr.psetp.pred0, - Operation(combiner, Operation(OperationCode::LogicalNegate, predicate), second_pred)); + if (instr.psetp.pred0 != static_cast(Pred::UnusedIndex)) { + // Set the secondary predicate to the result of !Predicate OP SecondPredicate, if + // enabled + SetPredicate(bb, instr.psetp.pred0, + Operation(combiner, Operation(OperationCode::LogicalNegate, predicate), + second_pred)); + } + break; + } + case OpCode::Id::CSETP: { + const Node pred = GetPredicate(instr.csetp.pred39, instr.csetp.neg_pred39 != 0); + const Node condition_code = GetConditionCode(instr.csetp.cc); + + const OperationCode combiner = GetPredicateCombiner(instr.csetp.op); + + if (instr.csetp.pred3 != static_cast(Pred::UnusedIndex)) { + SetPredicate(bb, instr.csetp.pred3, Operation(combiner, condition_code, pred)); + } + if (instr.csetp.pred0 != static_cast(Pred::UnusedIndex)) { + const Node neg_cc = Operation(OperationCode::LogicalNegate, condition_code); + SetPredicate(bb, instr.csetp.pred0, Operation(combiner, neg_cc, pred)); + } + break; + } + default: + UNIMPLEMENTED_MSG("Unhandled predicate instruction: {}", opcode->get().GetName()); } return pc; -- cgit v1.2.3 From 8332482c24136091c3fa2c95d7efdd3dd1fa9adf Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 23 Dec 2018 17:24:18 -0300 Subject: shader_decode: Implement R2P --- .../shader/decode/register_set_predicate.cpp | 29 +++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) (limited to 'src/video_core/shader/decode') diff --git a/src/video_core/shader/decode/register_set_predicate.cpp b/src/video_core/shader/decode/register_set_predicate.cpp index 29a348cf5..796039cd9 100644 --- a/src/video_core/shader/decode/register_set_predicate.cpp +++ b/src/video_core/shader/decode/register_set_predicate.cpp @@ -16,7 +16,34 @@ u32 ShaderIR::DecodeRegisterSetPredicate(BasicBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); - UNIMPLEMENTED(); + UNIMPLEMENTED_IF(instr.r2p.mode != Tegra::Shader::R2pMode::Pr); + + const Node apply_mask = [&]() { + switch (opcode->get().GetId()) { + case OpCode::Id::R2P_IMM: + return Immediate(static_cast(instr.r2p.immediate_mask)); + default: + UNREACHABLE(); + return Immediate(static_cast(instr.r2p.immediate_mask)); + } + }(); + const Node mask = + Operation(OperationCode::ULogicalShiftRight, NO_PRECISE, GetRegister(instr.gpr8), + Immediate(static_cast(instr.r2p.byte))); + + constexpr u32 programmable_preds = 7; + for (u64 pred = 0; pred < programmable_preds; ++pred) { + const Node shift = Immediate(1u << static_cast(pred)); + + const Node apply_compare = Operation(OperationCode::UBitwiseAnd, NO_PRECISE, apply_mask, shift); + const Node condition = Operation(OperationCode::LogicalUEqual, apply_compare, Immediate(0)); + + const Node value_compare = Operation(OperationCode::UBitwiseAnd, NO_PRECISE, mask, shift); + const Node value = Operation(OperationCode::LogicalUEqual, value_compare, Immediate(0)); + + const Node code = Operation(OperationCode::LogicalAssign, GetPredicate(pred), value); + bb.push_back(Conditional(condition, {code})); + } return pc; } -- cgit v1.2.3 From 2df55985b691d659073dce2d857d46bc152b4842 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 23 Dec 2018 20:59:49 -0300 Subject: shader_decode: Rework HSETP2 --- src/video_core/shader/decode/half_set_predicate.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'src/video_core/shader/decode') diff --git a/src/video_core/shader/decode/half_set_predicate.cpp b/src/video_core/shader/decode/half_set_predicate.cpp index d7d63d50a..72cc3d5c8 100644 --- a/src/video_core/shader/decode/half_set_predicate.cpp +++ b/src/video_core/shader/decode/half_set_predicate.cpp @@ -39,10 +39,12 @@ u32 ShaderIR::DecodeHalfSetPredicate(BasicBlock& bb, u32 pc) { const Node second_pred = GetPredicate(instr.hsetp2.pred39, instr.hsetp2.neg_pred != 0); const OperationCode combiner = GetPredicateCombiner(instr.hsetp2.op); + const OperationCode pair_combiner = + instr.hsetp2.h_and ? OperationCode::LogicalAll2 : OperationCode::LogicalAny2; - MetaHalfArithmetic meta = { - false, {instr.hsetp2.type_a, instr.hsetp2.type_b}, instr.hsetp2.h_and != 0}; - const Node first_pred = GetPredicateComparisonHalf(instr.hsetp2.cond, meta, op_a, op_b); + MetaHalfArithmetic meta = {false, {instr.hsetp2.type_a, instr.hsetp2.type_b}}; + const Node comparison = GetPredicateComparisonHalf(instr.hsetp2.cond, meta, op_a, op_b); + const Node first_pred = Operation(pair_combiner, comparison); // Set the primary predicate to the result of Predicate OP SecondPredicate const Node value = Operation(combiner, first_pred, second_pred); -- cgit v1.2.3 From b11e0b94c7ce0d965a6149c98c48cda967ec3c04 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 24 Dec 2018 00:51:52 -0300 Subject: shader_decode: Implement HSET2 --- src/video_core/shader/decode/half_set.cpp | 44 ++++++++++++++++++++++++++++++- 1 file changed, 43 insertions(+), 1 deletion(-) (limited to 'src/video_core/shader/decode') diff --git a/src/video_core/shader/decode/half_set.cpp b/src/video_core/shader/decode/half_set.cpp index af363d5d2..b4ac06144 100644 --- a/src/video_core/shader/decode/half_set.cpp +++ b/src/video_core/shader/decode/half_set.cpp @@ -2,6 +2,8 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include + #include "common/assert.h" #include "common/common_types.h" #include "video_core/engines/shader_bytecode.h" @@ -16,7 +18,47 @@ u32 ShaderIR::DecodeHalfSet(BasicBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); - UNIMPLEMENTED(); + UNIMPLEMENTED_IF(instr.hset2.ftz != 0); + + // instr.hset2.type_a + // instr.hset2.type_b + Node op_a = GetRegister(instr.gpr8); + Node op_b = [&]() { + switch (opcode->get().GetId()) { + case OpCode::Id::HSET2_R: + return GetRegister(instr.gpr20); + default: + UNREACHABLE(); + return Immediate(0); + } + }(); + + op_a = GetOperandAbsNegHalf(op_a, instr.hset2.abs_a, instr.hset2.negate_a); + op_b = GetOperandAbsNegHalf(op_b, instr.hset2.abs_b, instr.hset2.negate_b); + + const Node second_pred = GetPredicate(instr.hset2.pred39, instr.hset2.neg_pred); + + MetaHalfArithmetic meta{false, {instr.hset2.type_a, instr.hset2.type_b}}; + const Node comparison_pair = GetPredicateComparisonHalf(instr.hset2.cond, meta, op_a, op_b); + + const OperationCode combiner = GetPredicateCombiner(instr.hset2.op); + + // HSET2 operates on each half float in the pack. + std::array values; + for (u32 i = 0; i < 2; ++i) { + const u32 raw_value = instr.hset2.bf ? 0x3c00 : 0xffff; + const Node true_value = Immediate(raw_value << (i * 16)); + const Node false_value = Immediate(0); + + const Node comparison = + Operation(OperationCode::LogicalPick2, comparison_pair, Immediate(i)); + const Node predicate = Operation(combiner, comparison, second_pred); + + values[i] = Operation(OperationCode::Select, NO_PRECISE, predicate, true_value, false_value); + } + + const Node value = Operation(OperationCode::UBitwiseOr, NO_PRECISE, values[0], values[1]); + SetRegister(bb, instr.gpr0, value); return pc; } -- cgit v1.2.3 From a1b845b6514e135a5810b12c20261ec646216c28 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 24 Dec 2018 01:23:00 -0300 Subject: shader_decode: Implement VMAD and VSETP --- src/video_core/shader/decode/video.cpp | 120 +++++++++++++++++++++++++++++++++ 1 file changed, 120 insertions(+) create mode 100644 src/video_core/shader/decode/video.cpp (limited to 'src/video_core/shader/decode') diff --git a/src/video_core/shader/decode/video.cpp b/src/video_core/shader/decode/video.cpp new file mode 100644 index 000000000..9510896e4 --- /dev/null +++ b/src/video_core/shader/decode/video.cpp @@ -0,0 +1,120 @@ +// Copyright 2018 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/assert.h" +#include "common/common_types.h" +#include "video_core/engines/shader_bytecode.h" +#include "video_core/shader/shader_ir.h" + +namespace VideoCommon::Shader { + +using Tegra::Shader::Instruction; +using Tegra::Shader::OpCode; +using Tegra::Shader::Pred; +using Tegra::Shader::VideoType; +using Tegra::Shader::VmadShr; + +u32 ShaderIR::DecodeVideo(BasicBlock& bb, u32 pc) { + const Instruction instr = {program_code[pc]}; + const auto opcode = OpCode::Decode(instr); + + const Node op_a = + GetVideoOperand(GetRegister(instr.gpr8), instr.video.is_byte_chunk_a, instr.video.signed_a, + instr.video.type_a, instr.video.byte_height_a); + const Node op_b = [&]() { + if (instr.video.use_register_b) { + return GetVideoOperand(GetRegister(instr.gpr20), instr.video.is_byte_chunk_b, + instr.video.signed_b, instr.video.type_b, + instr.video.byte_height_b); + } + if (instr.video.signed_b) { + const auto imm = static_cast(instr.alu.GetImm20_16()); + return Immediate(static_cast(imm)); + } else { + return Immediate(instr.alu.GetImm20_16()); + } + }(); + + switch (opcode->get().GetId()) { + case OpCode::Id::VMAD: { + UNIMPLEMENTED_IF_MSG(instr.generates_cc, + "Condition codes generation in VMAD is not implemented"); + + const bool result_signed = instr.video.signed_a == 1 || instr.video.signed_b == 1; + const Node op_c = GetRegister(instr.gpr39); + + Node value = SignedOperation(OperationCode::IMul, result_signed, NO_PRECISE, op_a, op_b); + value = SignedOperation(OperationCode::IAdd, result_signed, NO_PRECISE, value, op_c); + + if (instr.vmad.shr == VmadShr::Shr7 || instr.vmad.shr == VmadShr::Shr15) { + const Node shift = Immediate(instr.vmad.shr == VmadShr::Shr7 ? 7 : 15); + value = + SignedOperation(OperationCode::IArithmeticShiftRight, result_signed, value, shift); + } + + SetRegister(bb, instr.gpr0, value); + + break; + } + case OpCode::Id::VSETP: { + // We can't use the constant predicate as destination. + ASSERT(instr.vsetp.pred3 != static_cast(Pred::UnusedIndex)); + + const bool sign = instr.video.signed_a == 1 || instr.video.signed_b == 1; + const Node first_pred = GetPredicateComparisonInteger(instr.vsetp.cond, sign, op_a, op_b); + const Node second_pred = GetPredicate(instr.vsetp.pred39, false); + + const OperationCode combiner = GetPredicateCombiner(instr.vsetp.op); + + // Set the primary predicate to the result of Predicate OP SecondPredicate + SetPredicate(bb, instr.vsetp.pred3, Operation(combiner, first_pred, second_pred)); + + if (instr.vsetp.pred0 != static_cast(Pred::UnusedIndex)) { + // Set the secondary predicate to the result of !Predicate OP SecondPredicate, + // if enabled + const Node negate_pred = Operation(OperationCode::LogicalNegate, first_pred); + SetPredicate(bb, instr.vsetp.pred0, Operation(combiner, negate_pred, second_pred)); + } + break; + } + default: + UNIMPLEMENTED_MSG("Unhandled video instruction: {}", opcode->get().GetName()); + } + + return pc; +} + +Node ShaderIR::GetVideoOperand(Node op, bool is_chunk, bool is_signed, + Tegra::Shader::VideoType type, u64 byte_height) { + if (!is_chunk) { + const auto offset = static_cast(byte_height * 8); + const Node shift = SignedOperation(OperationCode::ILogicalShiftRight, is_signed, NO_PRECISE, + op, Immediate(offset)); + return SignedOperation(OperationCode::IBitwiseAnd, is_signed, NO_PRECISE, shift, + Immediate(0xff)); + } + const Node zero = Immediate(0); + + switch (type) { + case Tegra::Shader::VideoType::Size16_Low: + return SignedOperation(OperationCode::IBitwiseAnd, is_signed, NO_PRECISE, op, + Immediate(0xffff)); + case Tegra::Shader::VideoType::Size16_High: + return SignedOperation(OperationCode::ILogicalShiftRight, is_signed, NO_PRECISE, op, + Immediate(16)); + case Tegra::Shader::VideoType::Size32: + // TODO(Rodrigo): From my hardware tests it becomes a bit "mad" when this type is used + // (1 * 1 + 0 == 0x5b800000). Until a better explanation is found: abort. + UNIMPLEMENTED(); + return zero; + case Tegra::Shader::VideoType::Invalid: + UNREACHABLE_MSG("Invalid instruction encoding"); + return zero; + default: + UNREACHABLE(); + return zero; + } +} + +} // namespace VideoCommon::Shader \ No newline at end of file -- cgit v1.2.3 From e1fea1e0c594cc7c5a404e7006a4b4b2f29200ae Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 24 Dec 2018 02:24:38 -0300 Subject: video_core: Implement IR based geometry shaders --- src/video_core/shader/decode/other.cpp | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'src/video_core/shader/decode') diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp index 9630ef831..1918762b8 100644 --- a/src/video_core/shader/decode/other.cpp +++ b/src/video_core/shader/decode/other.cpp @@ -12,6 +12,7 @@ namespace VideoCommon::Shader { using Tegra::Shader::ConditionCode; using Tegra::Shader::Instruction; using Tegra::Shader::OpCode; +using Tegra::Shader::Register; u32 ShaderIR::DecodeOther(BasicBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; @@ -140,6 +141,30 @@ u32 ShaderIR::DecodeOther(BasicBlock& bb, u32 pc) { SetRegister(bb, instr.gpr0, value); break; } + case OpCode::Id::OUT_R: { + UNIMPLEMENTED_IF_MSG(instr.gpr20.Value() != Register::ZeroIndex, + "Stream buffer is not supported"); + + if (instr.out.emit) { + // gpr0 is used to store the next address and gpr8 contains the address to emit. + // Hardware uses pointers here but we just ignore it + bb.push_back(Operation(OperationCode::EmitVertex)); + SetRegister(bb, instr.gpr0, Immediate(0)); + } + if (instr.out.cut) { + bb.push_back(Operation(OperationCode::EndPrimitive)); + } + break; + } + case OpCode::Id::ISBERD: { + UNIMPLEMENTED_IF(instr.isberd.o != 0); + UNIMPLEMENTED_IF(instr.isberd.skew != 0); + UNIMPLEMENTED_IF(instr.isberd.shift != Tegra::Shader::IsberdShift::None); + UNIMPLEMENTED_IF(instr.isberd.mode != Tegra::Shader::IsberdMode::None); + LOG_WARNING(HW_GPU, "ISBERD instruction is incomplete"); + SetRegister(bb, instr.gpr0, GetRegister(instr.gpr8)); + break; + } case OpCode::Id::DEPBAR: { LOG_WARNING(HW_GPU, "DEPBAR instruction is stubbed"); break; -- cgit v1.2.3 From a2e22b435947fd5fb835572c02369af83ceeafce Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 24 Dec 2018 02:26:40 -0300 Subject: shader_decode: Fixup clang-format --- src/video_core/shader/decode/half_set.cpp | 3 ++- src/video_core/shader/decode/register_set_predicate.cpp | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'src/video_core/shader/decode') diff --git a/src/video_core/shader/decode/half_set.cpp b/src/video_core/shader/decode/half_set.cpp index b4ac06144..e34deeff4 100644 --- a/src/video_core/shader/decode/half_set.cpp +++ b/src/video_core/shader/decode/half_set.cpp @@ -54,7 +54,8 @@ u32 ShaderIR::DecodeHalfSet(BasicBlock& bb, u32 pc) { Operation(OperationCode::LogicalPick2, comparison_pair, Immediate(i)); const Node predicate = Operation(combiner, comparison, second_pred); - values[i] = Operation(OperationCode::Select, NO_PRECISE, predicate, true_value, false_value); + values[i] = + Operation(OperationCode::Select, NO_PRECISE, predicate, true_value, false_value); } const Node value = Operation(OperationCode::UBitwiseOr, NO_PRECISE, values[0], values[1]); diff --git a/src/video_core/shader/decode/register_set_predicate.cpp b/src/video_core/shader/decode/register_set_predicate.cpp index 796039cd9..bbfe2ce05 100644 --- a/src/video_core/shader/decode/register_set_predicate.cpp +++ b/src/video_core/shader/decode/register_set_predicate.cpp @@ -35,7 +35,8 @@ u32 ShaderIR::DecodeRegisterSetPredicate(BasicBlock& bb, u32 pc) { for (u64 pred = 0; pred < programmable_preds; ++pred) { const Node shift = Immediate(1u << static_cast(pred)); - const Node apply_compare = Operation(OperationCode::UBitwiseAnd, NO_PRECISE, apply_mask, shift); + const Node apply_compare = + Operation(OperationCode::UBitwiseAnd, NO_PRECISE, apply_mask, shift); const Node condition = Operation(OperationCode::LogicalUEqual, apply_compare, Immediate(0)); const Node value_compare = Operation(OperationCode::UBitwiseAnd, NO_PRECISE, mask, shift); -- cgit v1.2.3 From 55a10d02e571532bba7a2a7af605a4cda2743d6d Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 24 Dec 2018 02:36:47 -0300 Subject: shader_decode: Fixup PSET --- src/video_core/shader/decode/predicate_set_register.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'src/video_core/shader/decode') diff --git a/src/video_core/shader/decode/predicate_set_register.cpp b/src/video_core/shader/decode/predicate_set_register.cpp index 04ddd9f9e..6c58496c2 100644 --- a/src/video_core/shader/decode/predicate_set_register.cpp +++ b/src/video_core/shader/decode/predicate_set_register.cpp @@ -26,11 +26,12 @@ u32 ShaderIR::DecodePredicateSetRegister(BasicBlock& bb, u32 pc) { const Node second_pred = GetPredicate(instr.pset.pred39, instr.pset.neg_pred39 != 0); const OperationCode combiner = GetPredicateCombiner(instr.pset.op); - const Node result = Operation(combiner, first_pred, second_pred); + const Node predicate = Operation(combiner, first_pred, second_pred); const Node true_value = instr.pset.bf ? Immediate(1.0f) : Immediate(0xffffffff); const Node false_value = instr.pset.bf ? Immediate(0.0f) : Immediate(0); - const Node value = Operation(OperationCode::Select, PRECISE, true_value, false_value); + const Node value = + Operation(OperationCode::Select, PRECISE, predicate, true_value, false_value); SetRegister(bb, instr.gpr0, value); return pc; -- cgit v1.2.3 From ea78c78253c6183938da6fc87bc763ed93957499 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 24 Dec 2018 18:13:50 -0300 Subject: shader_decode: Fixup WriteLogicOperation zero comparison --- src/video_core/shader/decode/arithmetic_integer_immediate.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/video_core/shader/decode') diff --git a/src/video_core/shader/decode/arithmetic_integer_immediate.cpp b/src/video_core/shader/decode/arithmetic_integer_immediate.cpp index a158d345a..3b8a60c6b 100644 --- a/src/video_core/shader/decode/arithmetic_integer_immediate.cpp +++ b/src/video_core/shader/decode/arithmetic_integer_immediate.cpp @@ -89,7 +89,7 @@ void ShaderIR::WriteLogicOperation(BasicBlock& bb, Register dest, LogicOperation return; case PredicateResultMode::NotZero: { // Set the predicate to true if the result is not zero. - const Node compare = Operation(OperationCode::LogicalIEqual, result, Immediate(0)); + const Node compare = Operation(OperationCode::LogicalINotEqual, result, Immediate(0)); SetPredicate(bb, static_cast(predicate), compare); break; } -- cgit v1.2.3 From c68c13e1aaef63674474861fd7be528a49b72206 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 25 Dec 2018 03:46:14 -0300 Subject: shader_decode: Fixup R2P --- src/video_core/shader/decode/register_set_predicate.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'src/video_core/shader/decode') diff --git a/src/video_core/shader/decode/register_set_predicate.cpp b/src/video_core/shader/decode/register_set_predicate.cpp index bbfe2ce05..06a3c7539 100644 --- a/src/video_core/shader/decode/register_set_predicate.cpp +++ b/src/video_core/shader/decode/register_set_predicate.cpp @@ -37,10 +37,11 @@ u32 ShaderIR::DecodeRegisterSetPredicate(BasicBlock& bb, u32 pc) { const Node apply_compare = Operation(OperationCode::UBitwiseAnd, NO_PRECISE, apply_mask, shift); - const Node condition = Operation(OperationCode::LogicalUEqual, apply_compare, Immediate(0)); + const Node condition = + Operation(OperationCode::LogicalUNotEqual, apply_compare, Immediate(0)); const Node value_compare = Operation(OperationCode::UBitwiseAnd, NO_PRECISE, mask, shift); - const Node value = Operation(OperationCode::LogicalUEqual, value_compare, Immediate(0)); + const Node value = Operation(OperationCode::LogicalUNotEqual, value_compare, Immediate(0)); const Node code = Operation(OperationCode::LogicalAssign, GetPredicate(pred), value); bb.push_back(Conditional(condition, {code})); -- cgit v1.2.3 From 5af82a8ed4e2e0b7abc9c7da9f7bb5fa1c83de29 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 26 Dec 2018 01:33:56 -0300 Subject: shader_decode: Implement TEXS.F16 --- src/video_core/shader/decode/memory.cpp | 38 ++++++++++++++++++++++----------- 1 file changed, 25 insertions(+), 13 deletions(-) (limited to 'src/video_core/shader/decode') diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp index ce3445512..679e7f01b 100644 --- a/src/video_core/shader/decode/memory.cpp +++ b/src/video_core/shader/decode/memory.cpp @@ -219,8 +219,7 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, u32 pc) { if (instr.texs.fp32_flag) { WriteTexsInstructionFloat(bb, instr, texture); } else { - UNIMPLEMENTED(); - // WriteTexsInstructionHalfFloat(bb, instr, texture); + WriteTexsInstructionHalfFloat(bb, instr, texture); } break; } @@ -416,39 +415,52 @@ const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler, Textu return *used_samplers.emplace(entry).first; } -void ShaderIR::WriteTexsInstructionFloat(BasicBlock& bb, Tegra::Shader::Instruction instr, - Node texture) { +void ShaderIR::WriteTexsInstructionFloat(BasicBlock& bb, Instruction instr, Node texture) { // TEXS has two destination registers and a swizzle. The first two elements in the swizzle // go into gpr0+0 and gpr0+1, and the rest goes into gpr28+0 and gpr28+1 MetaComponents meta; std::array dest; - - std::size_t written_components = 0; for (u32 component = 0; component < 4; ++component) { if (!instr.texs.IsComponentEnabled(component)) { continue; } - meta.components_map[written_components] = static_cast(component); + meta.components_map[meta.count] = component; - if (written_components < 2) { + if (meta.count < 2) { // Write the first two swizzle components to gpr0 and gpr0+1 - dest[written_components] = GetRegister(instr.gpr0.Value() + written_components % 2); + dest[meta.count] = GetRegister(instr.gpr0.Value() + meta.count % 2); } else { ASSERT(instr.texs.HasTwoDestinations()); // Write the rest of the swizzle components to gpr28 and gpr28+1 - dest[written_components] = GetRegister(instr.gpr28.Value() + written_components % 2); + dest[meta.count] = GetRegister(instr.gpr28.Value() + meta.count % 2); } - - ++written_components; + ++meta.count; } - std::generate(dest.begin() + written_components, dest.end(), [&]() { return GetRegister(RZ); }); + std::generate(dest.begin() + meta.count, dest.end(), [&]() { return GetRegister(RZ); }); bb.push_back(Operation(OperationCode::AssignComposite, meta, texture, dest[0], dest[1], dest[2], dest[3])); } +void ShaderIR::WriteTexsInstructionHalfFloat(BasicBlock& bb, Instruction instr, Node texture) { + // TEXS.F16 destionation registers are packed in two registers in pairs (just like any half + // float instruction). + + MetaComponents meta; + for (u32 component = 0; component < 4; ++component) { + if (!instr.texs.IsComponentEnabled(component)) + continue; + meta.components_map[meta.count++] = component; + } + if (meta.count == 0) + return; + + bb.push_back(Operation(OperationCode::AssignCompositeHalf, meta, texture, + GetRegister(instr.gpr0), GetRegister(instr.gpr28))); +} + Node ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, TextureProcessMode process_mode, bool depth_compare, bool is_array, std::size_t array_offset, std::size_t bias_offset, -- cgit v1.2.3 From d9118d324a7f40ad9227e15408be528273743bee Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 26 Dec 2018 01:49:32 -0300 Subject: shader_ir: Remove RZ and use Register::ZeroIndex instead --- src/video_core/shader/decode/memory.cpp | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) (limited to 'src/video_core/shader/decode') diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp index 679e7f01b..60bdd9b73 100644 --- a/src/video_core/shader/decode/memory.cpp +++ b/src/video_core/shader/decode/memory.cpp @@ -91,12 +91,14 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, u32 pc) { GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.offset + 4, index); const Node composite = - Operation(OperationCode::Composite, op_a, op_b, GetRegister(RZ), GetRegister(RZ)); + Operation(OperationCode::Composite, op_a, op_b, GetRegister(Register::ZeroIndex), + GetRegister(Register::ZeroIndex)); MetaComponents meta{{0, 1, 2, 3}}; bb.push_back(Operation(OperationCode::AssignComposite, meta, composite, GetRegister(instr.gpr0), GetRegister(instr.gpr0.Value() + 1), - GetRegister(RZ), GetRegister(RZ))); + GetRegister(Register::ZeroIndex), + GetRegister(Register::ZeroIndex))); break; } default: @@ -197,7 +199,8 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, u32 pc) { ++dest_elem; } - std::generate(dest.begin() + dest_elem, dest.end(), [&]() { return GetRegister(RZ); }); + std::generate(dest.begin() + dest_elem, dest.end(), + [&]() { return GetRegister(Register::ZeroIndex); }); bb.push_back(Operation(OperationCode::AssignComposite, std::move(meta), texture, dest[0], dest[1], dest[2], dest[3])); @@ -255,7 +258,8 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, u32 pc) { ++dest_elem; } - std::generate(dest.begin() + dest_elem, dest.end(), [&]() { return GetRegister(RZ); }); + std::generate(dest.begin() + dest_elem, dest.end(), + [&]() { return GetRegister(Register::ZeroIndex); }); bb.push_back(Operation(OperationCode::AssignComposite, std::move(meta_components), texture, dest[0], dest[1], dest[2], dest[3])); @@ -369,7 +373,7 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, u32 pc) { const MetaComponents meta_composite{{0, 1, 2, 3}}; bb.push_back(Operation(OperationCode::AssignComposite, meta_composite, texture, GetRegister(instr.gpr0), GetRegister(instr.gpr0.Value() + 1), - GetRegister(RZ), GetRegister(RZ))); + GetRegister(Register::ZeroIndex), GetRegister(Register::ZeroIndex))); break; } case OpCode::Id::TLDS: { @@ -438,7 +442,8 @@ void ShaderIR::WriteTexsInstructionFloat(BasicBlock& bb, Instruction instr, Node ++meta.count; } - std::generate(dest.begin() + meta.count, dest.end(), [&]() { return GetRegister(RZ); }); + std::generate(dest.begin() + meta.count, dest.end(), + [&]() { return GetRegister(Register::ZeroIndex); }); bb.push_back(Operation(OperationCode::AssignComposite, meta, texture, dest[0], dest[1], dest[2], dest[3])); -- cgit v1.2.3 From 52223313b10af4c76b516d6ead247a1a201a71d8 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 26 Dec 2018 02:17:56 -0300 Subject: shader_ir: Remove Ipa primitive --- src/video_core/shader/decode/other.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'src/video_core/shader/decode') diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp index 1918762b8..386433d8e 100644 --- a/src/video_core/shader/decode/other.cpp +++ b/src/video_core/shader/decode/other.cpp @@ -134,9 +134,8 @@ u32 ShaderIR::DecodeOther(BasicBlock& bb, u32 pc) { const Tegra::Shader::IpaMode input_mode{instr.ipa.interp_mode.Value(), instr.ipa.sample_mode.Value()}; - const Node input_attr = GetInputAttribute(attribute.index, attribute.element, input_mode); - const Node ipa = Operation(OperationCode::Ipa, input_attr); - const Node value = GetSaturatedFloat(ipa, instr.ipa.saturate); + const Node attr = GetInputAttribute(attribute.index, attribute.element, input_mode); + const Node value = GetSaturatedFloat(attr, instr.ipa.saturate); SetRegister(bb, instr.gpr0, value); break; -- cgit v1.2.3 From 2faad9bf23dbcedc80dca7ed9ad4b81c0416dd5e Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 26 Dec 2018 02:58:47 -0300 Subject: shader_decode: Use BitfieldExtract instead of shift + and --- .../shader/decode/arithmetic_integer.cpp | 5 ++--- src/video_core/shader/decode/bfi.cpp | 9 ++------ .../shader/decode/register_set_predicate.cpp | 12 +++++----- src/video_core/shader/decode/video.cpp | 12 +++------- src/video_core/shader/decode/xmad.cpp | 26 +++++----------------- 5 files changed, 18 insertions(+), 46 deletions(-) (limited to 'src/video_core/shader/decode') diff --git a/src/video_core/shader/decode/arithmetic_integer.cpp b/src/video_core/shader/decode/arithmetic_integer.cpp index 271ce205b..931e0fa1d 100644 --- a/src/video_core/shader/decode/arithmetic_integer.cpp +++ b/src/video_core/shader/decode/arithmetic_integer.cpp @@ -57,10 +57,9 @@ u32 ShaderIR::DecodeArithmeticInteger(BasicBlock& bb, u32 pc) { case IAdd3Height::None: return value; case IAdd3Height::LowerHalfWord: - return Operation(OperationCode::IBitwiseAnd, NO_PRECISE, value, Immediate(0xffff)); + return BitfieldExtract(value, 0, 16); case IAdd3Height::UpperHalfWord: - return Operation(OperationCode::ILogicalShiftRight, NO_PRECISE, value, - Immediate(16)); + return BitfieldExtract(value, 16, 16); default: UNIMPLEMENTED_MSG("Unhandled IADD3 height: {}", static_cast(height)); return Immediate(0); diff --git a/src/video_core/shader/decode/bfi.cpp b/src/video_core/shader/decode/bfi.cpp index a750aca30..b0d8d9eba 100644 --- a/src/video_core/shader/decode/bfi.cpp +++ b/src/video_core/shader/decode/bfi.cpp @@ -28,13 +28,8 @@ u32 ShaderIR::DecodeBfi(BasicBlock& bb, u32 pc) { } }(); const Node insert = GetRegister(instr.gpr8); - - const Node offset = - Operation(OperationCode::UBitwiseAnd, NO_PRECISE, packed_shift, Immediate(0xff)); - - Node bits = - Operation(OperationCode::ULogicalShiftRight, NO_PRECISE, packed_shift, Immediate(8)); - bits = Operation(OperationCode::UBitwiseAnd, NO_PRECISE, bits, Immediate(0xff)); + const Node offset = BitfieldExtract(packed_shift, 0, 8); + const Node bits = BitfieldExtract(packed_shift, 8, 8); const Node value = Operation(OperationCode::UBitfieldInsert, PRECISE, base, insert, offset, bits); diff --git a/src/video_core/shader/decode/register_set_predicate.cpp b/src/video_core/shader/decode/register_set_predicate.cpp index 06a3c7539..14bce9fa4 100644 --- a/src/video_core/shader/decode/register_set_predicate.cpp +++ b/src/video_core/shader/decode/register_set_predicate.cpp @@ -27,20 +27,18 @@ u32 ShaderIR::DecodeRegisterSetPredicate(BasicBlock& bb, u32 pc) { return Immediate(static_cast(instr.r2p.immediate_mask)); } }(); - const Node mask = - Operation(OperationCode::ULogicalShiftRight, NO_PRECISE, GetRegister(instr.gpr8), - Immediate(static_cast(instr.r2p.byte))); + const Node mask = GetRegister(instr.gpr8); + const auto offset = static_cast(instr.r2p.byte) * 8; constexpr u32 programmable_preds = 7; for (u64 pred = 0; pred < programmable_preds; ++pred) { - const Node shift = Immediate(1u << static_cast(pred)); + const auto shift = static_cast(pred); - const Node apply_compare = - Operation(OperationCode::UBitwiseAnd, NO_PRECISE, apply_mask, shift); + const Node apply_compare = BitfieldExtract(apply_mask, shift, 1); const Node condition = Operation(OperationCode::LogicalUNotEqual, apply_compare, Immediate(0)); - const Node value_compare = Operation(OperationCode::UBitwiseAnd, NO_PRECISE, mask, shift); + const Node value_compare = BitfieldExtract(mask, offset + shift, 1); const Node value = Operation(OperationCode::LogicalUNotEqual, value_compare, Immediate(0)); const Node code = Operation(OperationCode::LogicalAssign, GetPredicate(pred), value); diff --git a/src/video_core/shader/decode/video.cpp b/src/video_core/shader/decode/video.cpp index 9510896e4..b491fbadb 100644 --- a/src/video_core/shader/decode/video.cpp +++ b/src/video_core/shader/decode/video.cpp @@ -88,21 +88,15 @@ u32 ShaderIR::DecodeVideo(BasicBlock& bb, u32 pc) { Node ShaderIR::GetVideoOperand(Node op, bool is_chunk, bool is_signed, Tegra::Shader::VideoType type, u64 byte_height) { if (!is_chunk) { - const auto offset = static_cast(byte_height * 8); - const Node shift = SignedOperation(OperationCode::ILogicalShiftRight, is_signed, NO_PRECISE, - op, Immediate(offset)); - return SignedOperation(OperationCode::IBitwiseAnd, is_signed, NO_PRECISE, shift, - Immediate(0xff)); + return BitfieldExtract(op, static_cast(byte_height * 8), 8); } const Node zero = Immediate(0); switch (type) { case Tegra::Shader::VideoType::Size16_Low: - return SignedOperation(OperationCode::IBitwiseAnd, is_signed, NO_PRECISE, op, - Immediate(0xffff)); + return BitfieldExtract(op, 0, 16); case Tegra::Shader::VideoType::Size16_High: - return SignedOperation(OperationCode::ILogicalShiftRight, is_signed, NO_PRECISE, op, - Immediate(16)); + return BitfieldExtract(op, 16, 16); case Tegra::Shader::VideoType::Size32: // TODO(Rodrigo): From my hardware tests it becomes a bit "mad" when this type is used // (1 * 1 + 0 == 0x5b800000). Until a better explanation is found: abort. diff --git a/src/video_core/shader/decode/xmad.cpp b/src/video_core/shader/decode/xmad.cpp index 0466069ae..3e37aee4a 100644 --- a/src/video_core/shader/decode/xmad.cpp +++ b/src/video_core/shader/decode/xmad.cpp @@ -47,22 +47,10 @@ u32 ShaderIR::DecodeXmad(BasicBlock& bb, u32 pc) { return {false, Immediate(0), Immediate(0)}; }(); - if (instr.xmad.high_a) { - op_a = SignedOperation(OperationCode::ILogicalShiftRight, is_signed_a, NO_PRECISE, op_a, - Immediate(16)); - } else { - op_a = SignedOperation(OperationCode::IBitwiseAnd, is_signed_a, NO_PRECISE, op_a, - Immediate(0xffff)); - } + op_a = BitfieldExtract(op_a, instr.xmad.high_a ? 16 : 0, 16); const Node original_b = op_b; - if (instr.xmad.high_b) { - op_b = SignedOperation(OperationCode::ILogicalShiftRight, is_signed_b, NO_PRECISE, op_a, - Immediate(16)); - } else { - op_b = SignedOperation(OperationCode::IBitwiseAnd, is_signed_b, NO_PRECISE, op_b, - Immediate(0xffff)); - } + op_b = BitfieldExtract(op_b, instr.xmad.high_b ? 16 : 0, 16); // TODO(Rodrigo): Use an appropiate sign for this operation Node product = Operation(OperationCode::IMul, NO_PRECISE, op_a, op_b); @@ -75,11 +63,9 @@ u32 ShaderIR::DecodeXmad(BasicBlock& bb, u32 pc) { case Tegra::Shader::XmadMode::None: return op_c; case Tegra::Shader::XmadMode::CLo: - return SignedOperation(OperationCode::IBitwiseAnd, is_signed_c, NO_PRECISE, op_c, - Immediate(0xffff)); + return BitfieldExtract(op_c, 0, 16); case Tegra::Shader::XmadMode::CHi: - return SignedOperation(OperationCode::ILogicalShiftRight, is_signed_c, NO_PRECISE, op_c, - Immediate(16)); + return BitfieldExtract(op_c, 16, 16); case Tegra::Shader::XmadMode::CBcc: { const Node shifted_b = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed_b, NO_PRECISE, original_b, Immediate(16)); @@ -94,9 +80,9 @@ u32 ShaderIR::DecodeXmad(BasicBlock& bb, u32 pc) { // TODO(Rodrigo): Use an appropiate sign for this operation Node sum = Operation(OperationCode::IAdd, product, op_c); if (is_merge) { - const Node a = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, sum, Immediate(0xffff)); + const Node a = BitfieldExtract(sum, 0, 16); const Node b = - Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, original_b, Immediate(0xffff)); + Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, original_b, Immediate(16)); sum = Operation(OperationCode::IBitwiseOr, NO_PRECISE, a, b); } -- cgit v1.2.3 From 50195b1704bcdf22d379d31b143172a32ebdfaec Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 26 Dec 2018 03:18:11 -0300 Subject: shader_decode: Use proper primitive names --- src/video_core/shader/decode/memory.cpp | 4 ++-- src/video_core/shader/decode/other.cpp | 12 ++++++------ 2 files changed, 8 insertions(+), 8 deletions(-) (limited to 'src/video_core/shader/decode') diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp index 60bdd9b73..f3f78a662 100644 --- a/src/video_core/shader/decode/memory.cpp +++ b/src/video_core/shader/decode/memory.cpp @@ -155,8 +155,8 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, u32 pc) { break; } case OpCode::Id::ST_L: { - // UNIMPLEMENTED_IF_MSG(instr.st_l.unknown == 0, "ST_L Unhandled mode: {}", - // static_cast(instr.st_l.unknown.Value())); + UNIMPLEMENTED_IF_MSG(instr.st_l.unknown == 0, "ST_L Unhandled mode: {}", + static_cast(instr.st_l.unknown.Value())); const Node index = Operation(OperationCode::IAdd, NO_PRECISE, GetRegister(instr.gpr8), Immediate(static_cast(instr.smem_imm))); diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp index 386433d8e..6e6795ba7 100644 --- a/src/video_core/shader/decode/other.cpp +++ b/src/video_core/shader/decode/other.cpp @@ -54,7 +54,7 @@ u32 ShaderIR::DecodeOther(BasicBlock& bb, u32 pc) { UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "KIL condition code used: {}", static_cast(cc)); - bb.push_back(Operation(OperationCode::Kil)); + bb.push_back(Operation(OperationCode::Discard)); break; } case OpCode::Id::MOV_SYS: { @@ -79,7 +79,7 @@ u32 ShaderIR::DecodeOther(BasicBlock& bb, u32 pc) { "BRA with constant buffers are not implemented"); const u32 target = pc + instr.bra.GetBranchTarget(); - const Node branch = Operation(OperationCode::Bra, Immediate(target)); + const Node branch = Operation(OperationCode::Branch, Immediate(target)); const Tegra::Shader::ConditionCode cc = instr.flow_condition_code; if (cc != Tegra::Shader::ConditionCode::T) { @@ -97,7 +97,7 @@ u32 ShaderIR::DecodeOther(BasicBlock& bb, u32 pc) { // target of the jump that the SYNC instruction will make. The SSY opcode has a similar // structure to the BRA opcode. const u32 target = pc + instr.bra.GetBranchTarget(); - bb.push_back(Operation(OperationCode::Ssy, Immediate(target))); + bb.push_back(Operation(OperationCode::PushFlowStack, Immediate(target))); break; } case OpCode::Id::PBK: { @@ -108,7 +108,7 @@ u32 ShaderIR::DecodeOther(BasicBlock& bb, u32 pc) { // using SYNC on a PBK address will kill the shader execution. We don't emulate this because // it's very unlikely a driver will emit such invalid shader. const u32 target = pc + instr.bra.GetBranchTarget(); - bb.push_back(Operation(OperationCode::Pbk, Immediate(target))); + bb.push_back(Operation(OperationCode::PushFlowStack, Immediate(target))); break; } case OpCode::Id::SYNC: { @@ -117,7 +117,7 @@ u32 ShaderIR::DecodeOther(BasicBlock& bb, u32 pc) { static_cast(cc)); // The SYNC opcode jumps to the address previously set by the SSY opcode - bb.push_back(Operation(OperationCode::Sync)); + bb.push_back(Operation(OperationCode::PopFlowStack)); break; } case OpCode::Id::BRK: { @@ -126,7 +126,7 @@ u32 ShaderIR::DecodeOther(BasicBlock& bb, u32 pc) { static_cast(cc)); // The BRK opcode jumps to the address previously set by the PBK opcode - bb.push_back(Operation(OperationCode::Brk)); + bb.push_back(Operation(OperationCode::PopFlowStack)); break; } case OpCode::Id::IPA: { -- cgit v1.2.3 From d911740e5d474ae459f9e05d82a7dba9c7e06340 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 27 Dec 2018 01:50:22 -0300 Subject: shader_ir: Remove composite primitives and use temporals instead --- src/video_core/shader/decode/memory.cpp | 294 ++++++++++++++++---------------- 1 file changed, 149 insertions(+), 145 deletions(-) (limited to 'src/video_core/shader/decode') diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp index f3f78a662..5ae3f344d 100644 --- a/src/video_core/shader/decode/memory.cpp +++ b/src/video_core/shader/decode/memory.cpp @@ -90,15 +90,10 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, u32 pc) { const Node op_b = GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.offset + 4, index); - const Node composite = - Operation(OperationCode::Composite, op_a, op_b, GetRegister(Register::ZeroIndex), - GetRegister(Register::ZeroIndex)); - - MetaComponents meta{{0, 1, 2, 3}}; - bb.push_back(Operation(OperationCode::AssignComposite, meta, composite, - GetRegister(instr.gpr0), GetRegister(instr.gpr0.Value() + 1), - GetRegister(Register::ZeroIndex), - GetRegister(Register::ZeroIndex))); + SetTemporal(bb, 0, op_a); + SetTemporal(bb, 1, op_b); + SetRegister(bb, instr.gpr0, GetTemporal(0)); + SetRegister(bb, instr.gpr0.Value() + 1, GetTemporal(1)); break; } default: @@ -172,10 +167,6 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, u32 pc) { break; } case OpCode::Id::TEX: { - Tegra::Shader::TextureType texture_type{instr.tex.texture_type}; - const bool is_array = instr.tex.array != 0; - const bool depth_compare = instr.tex.UsesMiscMode(TextureMiscMode::DC); - const auto process_mode = instr.tex.GetTextureProcessMode(); UNIMPLEMENTED_IF_MSG(instr.tex.UsesMiscMode(TextureMiscMode::AOFFI), "AOFFI is not implemented"); @@ -183,27 +174,12 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, u32 pc) { LOG_WARNING(HW_GPU, "TEX.NODEP implementation is incomplete"); } - const Node texture = GetTexCode(instr, texture_type, process_mode, depth_compare, is_array); - - MetaComponents meta; - std::array dest; - - std::size_t dest_elem = 0; - for (std::size_t elem = 0; elem < 4; ++elem) { - if (!instr.tex.IsComponentEnabled(elem)) { - // Skip disabled components - continue; - } - meta.components_map[dest_elem] = static_cast(elem); - dest[dest_elem] = GetRegister(instr.gpr0.Value() + dest_elem); - - ++dest_elem; - } - std::generate(dest.begin() + dest_elem, dest.end(), - [&]() { return GetRegister(Register::ZeroIndex); }); - - bb.push_back(Operation(OperationCode::AssignComposite, std::move(meta), texture, dest[0], - dest[1], dest[2], dest[3])); + const TextureType texture_type{instr.tex.texture_type}; + const bool is_array = instr.tex.array != 0; + const bool depth_compare = instr.tex.UsesMiscMode(TextureMiscMode::DC); + const auto process_mode = instr.tex.GetTextureProcessMode(); + WriteTexInstructionFloat( + bb, instr, GetTexCode(instr, texture_type, process_mode, depth_compare, is_array)); break; } case OpCode::Id::TEXS: { @@ -216,13 +192,13 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, u32 pc) { LOG_WARNING(HW_GPU, "TEXS.NODEP implementation is incomplete"); } - const Node texture = + const Node4 components = GetTexsCode(instr, texture_type, process_mode, depth_compare, is_array); if (instr.texs.fp32_flag) { - WriteTexsInstructionFloat(bb, instr, texture); + WriteTexsInstructionFloat(bb, instr, components); } else { - WriteTexsInstructionHalfFloat(bb, instr, texture); + WriteTexsInstructionHalfFloat(bb, instr, components); } break; } @@ -242,27 +218,8 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, u32 pc) { const auto texture_type = instr.tld4.texture_type.Value(); const bool depth_compare = instr.tld4.UsesMiscMode(TextureMiscMode::DC); const bool is_array = instr.tld4.array != 0; - const Node texture = GetTld4Code(instr, texture_type, depth_compare, is_array); - - MetaComponents meta_components; - std::array dest; - - std::size_t dest_elem = 0; - for (std::size_t elem = 0; elem < 4; ++elem) { - if (!instr.tex.IsComponentEnabled(elem)) { - // Skip disabled components - continue; - } - meta_components.components_map[dest_elem] = static_cast(elem); - dest[dest_elem] = GetRegister(instr.gpr0.Value() + dest_elem); - - ++dest_elem; - } - std::generate(dest.begin() + dest_elem, dest.end(), - [&]() { return GetRegister(Register::ZeroIndex); }); - - bb.push_back(Operation(OperationCode::AssignComposite, std::move(meta_components), texture, - dest[0], dest[1], dest[2], dest[3])); + WriteTexInstructionFloat(bb, instr, + GetTld4Code(instr, texture_type, depth_compare, is_array)); break; } case OpCode::Id::TLD4S: { @@ -277,28 +234,34 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, u32 pc) { const Node op_a = GetRegister(instr.gpr8); const Node op_b = GetRegister(instr.gpr20); - std::vector params; + std::vector coords; // TODO(Subv): Figure out how the sampler type is encoded in the TLD4S instruction. if (depth_compare) { // Note: TLD4S coordinate encoding works just like TEXS's const Node op_y = GetRegister(instr.gpr8.Value() + 1); - params.push_back(op_a); - params.push_back(op_y); - params.push_back(op_b); + coords.push_back(op_a); + coords.push_back(op_y); + coords.push_back(op_b); } else { - params.push_back(op_a); - params.push_back(op_b); + coords.push_back(op_a); + coords.push_back(op_b); } - const auto num_coords = static_cast(params.size()); - params.push_back(Immediate(static_cast(instr.tld4s.component))); + const auto num_coords = static_cast(coords.size()); + coords.push_back(Immediate(static_cast(instr.tld4s.component))); const auto& sampler = GetSampler(instr.sampler, TextureType::Texture2D, false, depth_compare); - MetaTexture meta{sampler, num_coords}; - WriteTexsInstructionFloat( - bb, instr, Operation(OperationCode::F4TextureGather, meta, std::move(params))); + Node4 values; + for (u32 element = 0; element < values.size(); ++element) { + auto params = coords; + MetaTexture meta{sampler, element, num_coords}; + values[element] = + Operation(OperationCode::F4TextureGather, std::move(meta), std::move(params)); + } + + WriteTexsInstructionFloat(bb, instr, values); break; } case OpCode::Id::TXQ: { @@ -314,18 +277,15 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, u32 pc) { switch (instr.txq.query_type) { case Tegra::Shader::TextureQueryType::Dimension: { - MetaTexture meta_texture{sampler}; - const MetaComponents meta_components{{0, 1, 2, 3}}; - - const Node texture = Operation(OperationCode::F4TextureQueryDimensions, meta_texture, - GetRegister(instr.gpr8)); - std::array dest; - for (std::size_t i = 0; i < dest.size(); ++i) { - dest[i] = GetRegister(instr.gpr0.Value() + i); + for (u32 element = 0; element < 4; ++element) { + MetaTexture meta{sampler, element}; + const Node value = Operation(OperationCode::F4TextureQueryDimensions, + std::move(meta), GetRegister(instr.gpr8)); + SetTemporal(bb, element, value); + } + for (u32 i = 0; i < 4; ++i) { + SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i)); } - - bb.push_back(Operation(OperationCode::AssignComposite, meta_components, texture, - dest[0], dest[1], dest[2], dest[3])); break; } default: @@ -366,14 +326,17 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, u32 pc) { texture_type = TextureType::Texture2D; } - MetaTexture meta_texture{sampler, static_cast(coords.size())}; - const Node texture = - Operation(OperationCode::F4TextureQueryLod, meta_texture, std::move(coords)); + for (u32 element = 0; element < 2; ++element) { + auto params = coords; + MetaTexture meta_texture{sampler, element, static_cast(coords.size())}; + const Node value = + Operation(OperationCode::F4TextureQueryLod, meta_texture, std::move(params)); + SetTemporal(bb, element, value); + } + for (u32 element = 0; element < 2; ++element) { + SetRegister(bb, instr.gpr0.Value() + element, GetTemporal(element)); + } - const MetaComponents meta_composite{{0, 1, 2, 3}}; - bb.push_back(Operation(OperationCode::AssignComposite, meta_composite, texture, - GetRegister(instr.gpr0), GetRegister(instr.gpr0.Value() + 1), - GetRegister(Register::ZeroIndex), GetRegister(Register::ZeroIndex))); break; } case OpCode::Id::TLDS: { @@ -388,8 +351,7 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, u32 pc) { LOG_WARNING(HW_GPU, "TMML.NODEP implementation is incomplete"); } - const Node texture = GetTldsCode(instr, texture_type, is_array); - WriteTexsInstructionFloat(bb, instr, texture); + WriteTexsInstructionFloat(bb, instr, GetTldsCode(instr, texture_type, is_array)); break; } default: @@ -419,57 +381,80 @@ const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler, Textu return *used_samplers.emplace(entry).first; } -void ShaderIR::WriteTexsInstructionFloat(BasicBlock& bb, Instruction instr, Node texture) { +void ShaderIR::WriteTexInstructionFloat(BasicBlock& bb, Instruction instr, + const Node4& components) { + u32 dest_elem = 0; + for (u32 elem = 0; elem < 4; ++elem) { + if (!instr.tex.IsComponentEnabled(elem)) { + // Skip disabled components + continue; + } + SetTemporal(bb, dest_elem++, components[elem]); + } + // After writing values in temporals, move them to the real registers + for (u32 i = 0; i < dest_elem; ++i) { + SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i)); + } +} + +void ShaderIR::WriteTexsInstructionFloat(BasicBlock& bb, Instruction instr, + const Node4& components) { // TEXS has two destination registers and a swizzle. The first two elements in the swizzle // go into gpr0+0 and gpr0+1, and the rest goes into gpr28+0 and gpr28+1 - MetaComponents meta; - std::array dest; + u32 dest_elem = 0; for (u32 component = 0; component < 4; ++component) { - if (!instr.texs.IsComponentEnabled(component)) { + if (!instr.texs.IsComponentEnabled(component)) continue; - } - meta.components_map[meta.count] = component; + SetTemporal(bb, dest_elem++, components[component]); + } - if (meta.count < 2) { + for (u32 i = 0; i < dest_elem; ++i) { + if (i < 2) { // Write the first two swizzle components to gpr0 and gpr0+1 - dest[meta.count] = GetRegister(instr.gpr0.Value() + meta.count % 2); + SetRegister(bb, instr.gpr0.Value() + i % 2, GetTemporal(i)); } else { ASSERT(instr.texs.HasTwoDestinations()); // Write the rest of the swizzle components to gpr28 and gpr28+1 - dest[meta.count] = GetRegister(instr.gpr28.Value() + meta.count % 2); + SetRegister(bb, instr.gpr28.Value() + i % 2, GetTemporal(i)); } - ++meta.count; } - - std::generate(dest.begin() + meta.count, dest.end(), - [&]() { return GetRegister(Register::ZeroIndex); }); - - bb.push_back(Operation(OperationCode::AssignComposite, meta, texture, dest[0], dest[1], dest[2], - dest[3])); } -void ShaderIR::WriteTexsInstructionHalfFloat(BasicBlock& bb, Instruction instr, Node texture) { +void ShaderIR::WriteTexsInstructionHalfFloat(BasicBlock& bb, Instruction instr, + const Node4& components) { // TEXS.F16 destionation registers are packed in two registers in pairs (just like any half // float instruction). - MetaComponents meta; + Node4 values; + u32 dest_elem = 0; for (u32 component = 0; component < 4; ++component) { if (!instr.texs.IsComponentEnabled(component)) continue; - meta.components_map[meta.count++] = component; + values[dest_elem++] = components[component]; } - if (meta.count == 0) + if (dest_elem == 0) return; - bb.push_back(Operation(OperationCode::AssignCompositeHalf, meta, texture, - GetRegister(instr.gpr0), GetRegister(instr.gpr28))); + std::generate(values.begin() + dest_elem, values.end(), [&]() { return Immediate(0); }); + + const Node first_value = Operation(OperationCode::HPack2, values[0], values[1]); + if (dest_elem <= 2) { + SetRegister(bb, instr.gpr0, first_value); + return; + } + + SetTemporal(bb, 0, first_value); + SetTemporal(bb, 1, Operation(OperationCode::HPack2, values[2], values[3])); + + SetRegister(bb, instr.gpr0, GetTemporal(0)); + SetRegister(bb, instr.gpr28, GetTemporal(1)); } -Node ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, - TextureProcessMode process_mode, bool depth_compare, bool is_array, - std::size_t array_offset, std::size_t bias_offset, - std::vector&& coords) { +Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, + TextureProcessMode process_mode, bool depth_compare, bool is_array, + std::size_t array_offset, std::size_t bias_offset, + std::vector&& coords) { UNIMPLEMENTED_IF_MSG( (texture_type == TextureType::Texture3D && (is_array || depth_compare)) || (texture_type == TextureType::TextureCube && is_array && depth_compare), @@ -495,24 +480,31 @@ Node ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, std::optional array_offset_value; if (is_array) array_offset_value = static_cast(array_offset); - MetaTexture meta{sampler, static_cast(coords.size()), array_offset_value}; - std::vector params = std::move(coords); + + const auto coords_count = static_cast(coords.size()); if (process_mode != TextureProcessMode::None && gl_lod_supported) { if (process_mode == TextureProcessMode::LZ) { - params.push_back(Immediate(0.0f)); + coords.push_back(Immediate(0.0f)); } else { - // If present, lod or bias are always stored in the register indexed by the gpr20 field - // with an offset depending on the usage of the other registers - params.push_back(GetRegister(instr.gpr20.Value() + bias_offset)); + // If present, lod or bias are always stored in the register indexed by the gpr20 + // field with an offset depending on the usage of the other registers + coords.push_back(GetRegister(instr.gpr20.Value() + bias_offset)); } } - return Operation(read_method, meta, std::move(params)); + Node4 values; + for (u32 element = 0; element < values.size(); ++element) { + auto params = coords; + MetaTexture meta{sampler, element, coords_count, array_offset_value}; + values[element] = Operation(read_method, std::move(meta), std::move(params)); + } + + return values; } -Node ShaderIR::GetTexCode(Instruction instr, TextureType texture_type, - TextureProcessMode process_mode, bool depth_compare, bool is_array) { +Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type, + TextureProcessMode process_mode, bool depth_compare, bool is_array) { const bool lod_bias_enabled = (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ); @@ -551,8 +543,8 @@ Node ShaderIR::GetTexCode(Instruction instr, TextureType texture_type, 0, std::move(coords)); } -Node ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type, - TextureProcessMode process_mode, bool depth_compare, bool is_array) { +Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type, + TextureProcessMode process_mode, bool depth_compare, bool is_array) { const bool lod_bias_enabled = (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ); @@ -593,8 +585,8 @@ Node ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type, (coord_count > 2 ? 1 : 0), std::move(coords)); } -Node ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool depth_compare, - bool is_array) { +Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool depth_compare, + bool is_array) { const std::size_t coord_count = GetCoordCount(texture_type); const std::size_t total_coord_count = coord_count + (is_array ? 1 : 0); const std::size_t total_reg_count = total_coord_count + (depth_compare ? 1 : 0); @@ -604,24 +596,31 @@ Node ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool dep // First coordinate index is the gpr8 or gpr8 + 1 when arrays are used const u64 coord_register = array_register + (is_array ? 1 : 0); - std::vector params; + std::vector coords; for (size_t i = 0; i < coord_count; ++i) { - params.push_back(GetRegister(coord_register + i)); + coords.push_back(GetRegister(coord_register + i)); } std::optional array_offset; if (is_array) { - array_offset = static_cast(params.size()); - params.push_back(GetRegister(array_register)); + array_offset = static_cast(coords.size()); + coords.push_back(GetRegister(array_register)); } const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, depth_compare); - MetaTexture meta{sampler, static_cast(params.size()), array_offset}; - return Operation(OperationCode::F4TextureGather, std::move(meta), std::move(params)); + Node4 values; + for (u32 element = 0; element < values.size(); ++element) { + auto params = coords; + MetaTexture meta{sampler, element, static_cast(coords.size()), array_offset}; + values[element] = + Operation(OperationCode::F4TextureGather, std::move(meta), std::move(params)); + } + + return values; } -Node ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is_array) { +Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is_array) { const std::size_t type_coord_count = GetCoordCount(texture_type); const std::size_t total_coord_count = type_coord_count + (is_array ? 1 : 0); const bool lod_enabled = instr.tlds.GetTextureProcessMode() == TextureProcessMode::LL; @@ -636,36 +635,41 @@ Node ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is_ ? static_cast(instr.gpr20.Value()) : coord_register + 1; - std::vector params; + std::vector coords; for (std::size_t i = 0; i < type_coord_count; ++i) { const bool last = (i == (type_coord_count - 1)) && (type_coord_count > 1); - params.push_back(GetRegister(last ? last_coord_register : coord_register + i)); + coords.push_back(GetRegister(last ? last_coord_register : coord_register + i)); } std::optional array_offset; if (is_array) { - array_offset = static_cast(params.size()); - params.push_back(GetRegister(array_register)); + array_offset = static_cast(coords.size()); + coords.push_back(GetRegister(array_register)); } - const auto coords_count = static_cast(params.size()); + const auto coords_count = static_cast(coords.size()); if (lod_enabled) { // When lod is used always is in grp20 - params.push_back(GetRegister(instr.gpr20)); + coords.push_back(GetRegister(instr.gpr20)); } else { - params.push_back(Immediate(0)); + coords.push_back(Immediate(0)); } const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false); - MetaTexture meta{sampler, coords_count, array_offset}; - return Operation(OperationCode::F4TexelFetch, std::move(meta), std::move(params)); + Node4 values; + for (u32 element = 0; element < values.size(); ++element) { + auto params = coords; + MetaTexture meta{sampler, element, coords_count, array_offset}; + values[element] = + Operation(OperationCode::F4TexelFetch, std::move(meta), std::move(params)); + } + return values; } std::tuple ShaderIR::ValidateAndGetCoordinateElement( TextureType texture_type, bool depth_compare, bool is_array, bool lod_bias_enabled, std::size_t max_coords, std::size_t max_inputs) { - const std::size_t coord_count = GetCoordCount(texture_type); std::size_t total_coord_count = coord_count + (is_array ? 1 : 0) + (depth_compare ? 1 : 0); -- cgit v1.2.3 From 2d6c064e66bac4cb871aa26a12066441a8852008 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 27 Dec 2018 16:50:36 -0300 Subject: shader_decode: Improve zero flag implementation --- src/video_core/shader/decode/arithmetic.cpp | 15 ++++------- .../shader/decode/arithmetic_immediate.cpp | 6 ++--- .../shader/decode/arithmetic_integer.cpp | 31 ++++++++++------------ .../shader/decode/arithmetic_integer_immediate.cpp | 18 +++++-------- src/video_core/shader/decode/bfe.cpp | 1 + src/video_core/shader/decode/bfi.cpp | 4 +-- src/video_core/shader/decode/conversion.cpp | 11 +++----- src/video_core/shader/decode/ffma.cpp | 3 +-- src/video_core/shader/decode/float_set.cpp | 11 ++++---- .../shader/decode/predicate_set_register.cpp | 6 +++++ src/video_core/shader/decode/shift.cpp | 14 +++++----- src/video_core/shader/decode/video.cpp | 5 +--- src/video_core/shader/decode/xmad.cpp | 1 + 13 files changed, 53 insertions(+), 73 deletions(-) (limited to 'src/video_core/shader/decode') diff --git a/src/video_core/shader/decode/arithmetic.cpp b/src/video_core/shader/decode/arithmetic.cpp index ef846bd9a..926abcc8e 100644 --- a/src/video_core/shader/decode/arithmetic.cpp +++ b/src/video_core/shader/decode/arithmetic.cpp @@ -45,8 +45,6 @@ u32 ShaderIR::DecodeArithmetic(BasicBlock& bb, u32 pc) { UNIMPLEMENTED_IF_MSG( instr.fmul.tab5c68_0 != 1, "FMUL tab5cb8_0({}) is not implemented", instr.fmul.tab5c68_0.Value()); // SMO typical sends 1 here which seems to be the default - UNIMPLEMENTED_IF_MSG(instr.generates_cc, - "Condition codes generation in FMUL is not implemented"); op_b = GetOperandAbsNegFloat(op_b, false, instr.fmul.negate_b); @@ -75,21 +73,20 @@ u32 ShaderIR::DecodeArithmetic(BasicBlock& bb, u32 pc) { value = GetSaturatedFloat(value, instr.alu.saturate_d); + SetInternalFlagsFromFloat(bb, value, instr.generates_cc); SetRegister(bb, instr.gpr0, value); break; } case OpCode::Id::FADD_C: case OpCode::Id::FADD_R: case OpCode::Id::FADD_IMM: { - UNIMPLEMENTED_IF_MSG(instr.generates_cc, - "Condition codes generation in FADD is not implemented"); - op_a = GetOperandAbsNegFloat(op_a, instr.alu.abs_a, instr.alu.negate_a); op_b = GetOperandAbsNegFloat(op_b, instr.alu.abs_b, instr.alu.negate_b); Node value = Operation(OperationCode::FAdd, PRECISE, op_a, op_b); value = GetSaturatedFloat(value, instr.alu.saturate_d); + SetInternalFlagsFromFloat(bb, value, instr.generates_cc); SetRegister(bb, instr.gpr0, value); break; } @@ -126,9 +123,6 @@ u32 ShaderIR::DecodeArithmetic(BasicBlock& bb, u32 pc) { case OpCode::Id::FMNMX_C: case OpCode::Id::FMNMX_R: case OpCode::Id::FMNMX_IMM: { - UNIMPLEMENTED_IF_MSG(instr.generates_cc, - "Condition codes generation in FMNMX is not implemented"); - op_a = GetOperandAbsNegFloat(op_a, instr.alu.abs_a, instr.alu.negate_a); op_b = GetOperandAbsNegFloat(op_b, instr.alu.abs_b, instr.alu.negate_b); @@ -136,9 +130,10 @@ u32 ShaderIR::DecodeArithmetic(BasicBlock& bb, u32 pc) { const Node min = Operation(OperationCode::FMin, NO_PRECISE, op_a, op_b); const Node max = Operation(OperationCode::FMax, NO_PRECISE, op_a, op_b); + const Node value = Operation(OperationCode::Select, NO_PRECISE, condition, min, max); - SetRegister(bb, instr.gpr0, - Operation(OperationCode::Select, NO_PRECISE, condition, min, max)); + SetInternalFlagsFromFloat(bb, value, instr.generates_cc); + SetRegister(bb, instr.gpr0, value); break; } case OpCode::Id::RRO_C: diff --git a/src/video_core/shader/decode/arithmetic_immediate.cpp b/src/video_core/shader/decode/arithmetic_immediate.cpp index 996b2537a..1c6da94b4 100644 --- a/src/video_core/shader/decode/arithmetic_immediate.cpp +++ b/src/video_core/shader/decode/arithmetic_immediate.cpp @@ -22,24 +22,22 @@ u32 ShaderIR::DecodeArithmeticImmediate(BasicBlock& bb, u32 pc) { break; } case OpCode::Id::FMUL32_IMM: { - UNIMPLEMENTED_IF_MSG(instr.op_32.generates_cc, - "Condition codes generation in FMUL32 is not implemented"); Node value = Operation(OperationCode::FMul, PRECISE, GetRegister(instr.gpr8), GetImmediate32(instr)); value = GetSaturatedFloat(value, instr.fmul32.saturate); + SetInternalFlagsFromFloat(bb, value, instr.op_32.generates_cc); SetRegister(bb, instr.gpr0, value); break; } case OpCode::Id::FADD32I: { - UNIMPLEMENTED_IF_MSG(instr.op_32.generates_cc, - "Condition codes generation in FADD32I is not implemented"); const Node op_a = GetOperandAbsNegFloat(GetRegister(instr.gpr8), instr.fadd32i.abs_a, instr.fadd32i.negate_a); const Node op_b = GetOperandAbsNegFloat(GetImmediate32(instr), instr.fadd32i.abs_b, instr.fadd32i.negate_b); const Node value = Operation(OperationCode::FAdd, PRECISE, op_a, op_b); + SetInternalFlagsFromFloat(bb, value, instr.op_32.generates_cc); SetRegister(bb, instr.gpr0, value); break; } diff --git a/src/video_core/shader/decode/arithmetic_integer.cpp b/src/video_core/shader/decode/arithmetic_integer.cpp index 931e0fa1d..edd1695f4 100644 --- a/src/video_core/shader/decode/arithmetic_integer.cpp +++ b/src/video_core/shader/decode/arithmetic_integer.cpp @@ -34,22 +34,20 @@ u32 ShaderIR::DecodeArithmeticInteger(BasicBlock& bb, u32 pc) { case OpCode::Id::IADD_C: case OpCode::Id::IADD_R: case OpCode::Id::IADD_IMM: { - UNIMPLEMENTED_IF_MSG(instr.generates_cc, - "Condition codes generation in IADD is not implemented"); UNIMPLEMENTED_IF_MSG(instr.alu.saturate_d, "IADD saturation not implemented"); op_a = GetOperandAbsNegInteger(op_a, false, instr.alu_integer.negate_a, true); op_b = GetOperandAbsNegInteger(op_b, false, instr.alu_integer.negate_b, true); - SetRegister(bb, instr.gpr0, Operation(OperationCode::IAdd, PRECISE, op_a, op_b)); + const Node value = Operation(OperationCode::IAdd, PRECISE, op_a, op_b); + + SetInternalFlagsFromInteger(bb, value, instr.op_32.generates_cc); + SetRegister(bb, instr.gpr0, value); break; } case OpCode::Id::IADD3_C: case OpCode::Id::IADD3_R: case OpCode::Id::IADD3_IMM: { - UNIMPLEMENTED_IF_MSG(instr.generates_cc, - "Condition codes generation in IADD3 is not implemented"); - Node op_c = GetRegister(instr.gpr39); const auto ApplyHeight = [&](IAdd3Height height, Node value) { @@ -100,6 +98,7 @@ u32 ShaderIR::DecodeArithmeticInteger(BasicBlock& bb, u32 pc) { return Operation(OperationCode::IAdd, NO_PRECISE, shifted, op_c); }(); + SetInternalFlagsFromInteger(bb, value, instr.generates_cc); SetRegister(bb, instr.gpr0, value); break; } @@ -115,6 +114,8 @@ u32 ShaderIR::DecodeArithmeticInteger(BasicBlock& bb, u32 pc) { const Node shift = Immediate(static_cast(instr.alu_integer.shift_amount)); const Node shifted_a = Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, op_a, shift); const Node value = Operation(OperationCode::IAdd, NO_PRECISE, shifted_a, op_b); + + SetInternalFlagsFromInteger(bb, value, instr.generates_cc); SetRegister(bb, instr.gpr0, value); break; } @@ -139,24 +140,19 @@ u32 ShaderIR::DecodeArithmeticInteger(BasicBlock& bb, u32 pc) { case OpCode::Id::LOP_C: case OpCode::Id::LOP_R: case OpCode::Id::LOP_IMM: { - UNIMPLEMENTED_IF_MSG(instr.generates_cc, - "Condition codes generation in LOP is not implemented"); - if (instr.alu.lop.invert_a) op_a = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_a); if (instr.alu.lop.invert_b) op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_b); WriteLogicOperation(bb, instr.gpr0, instr.alu.lop.operation, op_a, op_b, - instr.alu.lop.pred_result_mode, instr.alu.lop.pred48); + instr.alu.lop.pred_result_mode, instr.alu.lop.pred48, + instr.generates_cc); break; } case OpCode::Id::LOP3_C: case OpCode::Id::LOP3_R: case OpCode::Id::LOP3_IMM: { - UNIMPLEMENTED_IF_MSG(instr.generates_cc, - "Condition codes generation in LOP3 is not implemented"); - const Node op_c = GetRegister(instr.gpr39); const Node lut = [&]() { if (opcode->get().GetId() == OpCode::Id::LOP3_R) { @@ -166,15 +162,13 @@ u32 ShaderIR::DecodeArithmeticInteger(BasicBlock& bb, u32 pc) { } }(); - WriteLop3Instruction(bb, instr.gpr0, op_a, op_b, op_c, lut); + WriteLop3Instruction(bb, instr.gpr0, op_a, op_b, op_c, lut, instr.generates_cc); break; } case OpCode::Id::IMNMX_C: case OpCode::Id::IMNMX_R: case OpCode::Id::IMNMX_IMM: { UNIMPLEMENTED_IF(instr.imnmx.exchange != Tegra::Shader::IMinMaxExchange::None); - UNIMPLEMENTED_IF_MSG(instr.generates_cc, - "Condition codes generation in IMNMX is not implemented"); const bool is_signed = instr.imnmx.is_signed; @@ -182,6 +176,8 @@ u32 ShaderIR::DecodeArithmeticInteger(BasicBlock& bb, u32 pc) { const Node min = SignedOperation(OperationCode::IMin, is_signed, NO_PRECISE, op_a, op_b); const Node max = SignedOperation(OperationCode::IMax, is_signed, NO_PRECISE, op_a, op_b); const Node value = Operation(OperationCode::Select, NO_PRECISE, condition, min, max); + + SetInternalFlagsFromInteger(bb, value, instr.generates_cc); SetRegister(bb, instr.gpr0, value); break; } @@ -247,7 +243,7 @@ u32 ShaderIR::DecodeArithmeticInteger(BasicBlock& bb, u32 pc) { } void ShaderIR::WriteLop3Instruction(BasicBlock& bb, Register dest, Node op_a, Node op_b, Node op_c, - Node imm_lut) { + Node imm_lut, bool sets_cc) { constexpr u32 lop_iterations = 32; const Node one = Immediate(1); const Node two = Immediate(2); @@ -284,6 +280,7 @@ void ShaderIR::WriteLop3Instruction(BasicBlock& bb, Register dest, Node op_a, No } } + SetInternalFlagsFromInteger(bb, value, sets_cc); SetRegister(bb, dest, value); } diff --git a/src/video_core/shader/decode/arithmetic_integer_immediate.cpp b/src/video_core/shader/decode/arithmetic_integer_immediate.cpp index 3b8a60c6b..3cbaeeaf5 100644 --- a/src/video_core/shader/decode/arithmetic_integer_immediate.cpp +++ b/src/video_core/shader/decode/arithmetic_integer_immediate.cpp @@ -25,20 +25,17 @@ u32 ShaderIR::DecodeArithmeticIntegerImmediate(BasicBlock& bb, u32 pc) { switch (opcode->get().GetId()) { case OpCode::Id::IADD32I: { - UNIMPLEMENTED_IF_MSG(instr.op_32.generates_cc, - "Condition codes generation in IADD32I is not implemented"); UNIMPLEMENTED_IF_MSG(instr.iadd32i.saturate, "IADD32I saturation is not implemented"); op_a = GetOperandAbsNegInteger(op_a, false, instr.iadd32i.negate_a, true); const Node value = Operation(OperationCode::IAdd, PRECISE, op_a, op_b); + + SetInternalFlagsFromInteger(bb, value, instr.op_32.generates_cc); SetRegister(bb, instr.gpr0, value); break; } case OpCode::Id::LOP32I: { - UNIMPLEMENTED_IF_MSG(instr.op_32.generates_cc, - "Condition codes generation in LOP32I is not implemented"); - if (instr.alu.lop32i.invert_a) op_a = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_a); @@ -46,8 +43,7 @@ u32 ShaderIR::DecodeArithmeticIntegerImmediate(BasicBlock& bb, u32 pc) { op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_b); WriteLogicOperation(bb, instr.gpr0, instr.alu.lop32i.operation, op_a, op_b, - Tegra::Shader::PredicateResultMode::None, - Tegra::Shader::Pred::UnusedIndex); + PredicateResultMode::None, Pred::UnusedIndex, instr.op_32.generates_cc); break; } default: @@ -60,7 +56,7 @@ u32 ShaderIR::DecodeArithmeticIntegerImmediate(BasicBlock& bb, u32 pc) { void ShaderIR::WriteLogicOperation(BasicBlock& bb, Register dest, LogicOperation logic_op, Node op_a, Node op_b, PredicateResultMode predicate_mode, - Pred predicate) { + Pred predicate, bool sets_cc) { const Node result = [&]() { switch (logic_op) { case LogicOperation::And: @@ -77,11 +73,9 @@ void ShaderIR::WriteLogicOperation(BasicBlock& bb, Register dest, LogicOperation } }(); - if (dest != Register::ZeroIndex) { - SetRegister(bb, dest, result); - } + SetInternalFlagsFromInteger(bb, result, sets_cc); + SetRegister(bb, dest, result); - using Tegra::Shader::PredicateResultMode; // Write the predicate value depending on the predicate mode. switch (predicate_mode) { case PredicateResultMode::None: diff --git a/src/video_core/shader/decode/bfe.cpp b/src/video_core/shader/decode/bfe.cpp index 6532a3bce..d3244fd40 100644 --- a/src/video_core/shader/decode/bfe.cpp +++ b/src/video_core/shader/decode/bfe.cpp @@ -35,6 +35,7 @@ u32 ShaderIR::DecodeBfe(BasicBlock& bb, u32 pc) { const Node outer_shift = Operation(OperationCode::ILogicalShiftRight, NO_PRECISE, inner_shift, outer_shift_imm); + SetInternalFlagsFromInteger(bb, outer_shift, instr.generates_cc); SetRegister(bb, instr.gpr0, outer_shift); break; } diff --git a/src/video_core/shader/decode/bfi.cpp b/src/video_core/shader/decode/bfi.cpp index b0d8d9eba..ddb1872c6 100644 --- a/src/video_core/shader/decode/bfi.cpp +++ b/src/video_core/shader/decode/bfi.cpp @@ -16,8 +16,6 @@ u32 ShaderIR::DecodeBfi(BasicBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); - UNIMPLEMENTED_IF(instr.generates_cc); - const auto [base, packed_shift] = [&]() -> std::tuple { switch (opcode->get().GetId()) { case OpCode::Id::BFI_IMM_R: @@ -33,6 +31,8 @@ u32 ShaderIR::DecodeBfi(BasicBlock& bb, u32 pc) { const Node value = Operation(OperationCode::UBitfieldInsert, PRECISE, base, insert, offset, bits); + + SetInternalFlagsFromInteger(bb, value, instr.generates_cc); SetRegister(bb, instr.gpr0, value); return pc; diff --git a/src/video_core/shader/decode/conversion.cpp b/src/video_core/shader/decode/conversion.cpp index 791f03fe0..d5c75e8eb 100644 --- a/src/video_core/shader/decode/conversion.cpp +++ b/src/video_core/shader/decode/conversion.cpp @@ -33,15 +33,8 @@ u32 ShaderIR::DecodeConversion(BasicBlock& bb, u32 pc) { value = SignedOperation(OperationCode::ICastUnsigned, output_signed, NO_PRECISE, value); } + SetInternalFlagsFromInteger(bb, value, instr.generates_cc); SetRegister(bb, instr.gpr0, value); - - if (instr.generates_cc) { - const Node zero_condition = - SignedOperation(OperationCode::LogicalIEqual, output_signed, value, Immediate(0)); - SetInternalFlag(bb, InternalFlag::Zero, zero_condition); - LOG_WARNING(HW_GPU, "I2I Condition codes implementation is incomplete."); - } - break; } case OpCode::Id::I2F_R: @@ -64,6 +57,7 @@ u32 ShaderIR::DecodeConversion(BasicBlock& bb, u32 pc) { value = SignedOperation(OperationCode::FCastInteger, input_signed, PRECISE, value); value = GetOperandAbsNegFloat(value, false, instr.conversion.negate_a); + SetInternalFlagsFromFloat(bb, value, instr.generates_cc); SetRegister(bb, instr.gpr0, value); break; } @@ -103,6 +97,7 @@ u32 ShaderIR::DecodeConversion(BasicBlock& bb, u32 pc) { }(); value = GetSaturatedFloat(value, instr.alu.saturate_d); + SetInternalFlagsFromFloat(bb, value, instr.generates_cc); SetRegister(bb, instr.gpr0, value); break; } diff --git a/src/video_core/shader/decode/ffma.cpp b/src/video_core/shader/decode/ffma.cpp index a17ebd6db..f3ab3d2e8 100644 --- a/src/video_core/shader/decode/ffma.cpp +++ b/src/video_core/shader/decode/ffma.cpp @@ -21,8 +21,6 @@ u32 ShaderIR::DecodeFfma(BasicBlock& bb, u32 pc) { instr.ffma.tab5980_0.Value()); // Seems to be 1 by default based on SMO UNIMPLEMENTED_IF_MSG(instr.ffma.tab5980_1 != 0, "FFMA tab5980_1({}) not implemented", instr.ffma.tab5980_1.Value()); - UNIMPLEMENTED_IF_MSG(instr.generates_cc, - "Condition codes generation in FFMA is not implemented"); const Node op_a = GetRegister(instr.gpr8); @@ -52,6 +50,7 @@ u32 ShaderIR::DecodeFfma(BasicBlock& bb, u32 pc) { Node value = Operation(OperationCode::FFma, PRECISE, op_a, op_b, op_c); value = GetSaturatedFloat(value, instr.alu.saturate_d); + SetInternalFlagsFromFloat(bb, value, instr.generates_cc); SetRegister(bb, instr.gpr0, value); return pc; diff --git a/src/video_core/shader/decode/float_set.cpp b/src/video_core/shader/decode/float_set.cpp index b69d94c2e..8e266cc4e 100644 --- a/src/video_core/shader/decode/float_set.cpp +++ b/src/video_core/shader/decode/float_set.cpp @@ -45,13 +45,12 @@ u32 ShaderIR::DecodeFloatSet(BasicBlock& bb, u32 pc) { const Node value = Operation(OperationCode::Select, PRECISE, predicate, true_value, false_value); - SetRegister(bb, instr.gpr0, value); - - if (instr.generates_cc) { - const Node is_zero = Operation(OperationCode::LogicalFEqual, value, Immediate(0.0f)); - SetInternalFlag(bb, InternalFlag::Zero, is_zero); - LOG_WARNING(HW_GPU, "FSET condition code is incomplete"); + if (instr.fset.bf) { + SetInternalFlagsFromFloat(bb, value, instr.generates_cc); + } else { + SetInternalFlagsFromInteger(bb, value, instr.generates_cc); } + SetRegister(bb, instr.gpr0, value); return pc; } diff --git a/src/video_core/shader/decode/predicate_set_register.cpp b/src/video_core/shader/decode/predicate_set_register.cpp index 6c58496c2..58d20ceb5 100644 --- a/src/video_core/shader/decode/predicate_set_register.cpp +++ b/src/video_core/shader/decode/predicate_set_register.cpp @@ -32,6 +32,12 @@ u32 ShaderIR::DecodePredicateSetRegister(BasicBlock& bb, u32 pc) { const Node false_value = instr.pset.bf ? Immediate(0.0f) : Immediate(0); const Node value = Operation(OperationCode::Select, PRECISE, predicate, true_value, false_value); + + if (instr.pset.bf) { + SetInternalFlagsFromFloat(bb, value, instr.generates_cc); + } else { + SetInternalFlagsFromInteger(bb, value, instr.generates_cc); + } SetRegister(bb, instr.gpr0, value); return pc; diff --git a/src/video_core/shader/decode/shift.cpp b/src/video_core/shader/decode/shift.cpp index 3ba039d21..e8ffdb818 100644 --- a/src/video_core/shader/decode/shift.cpp +++ b/src/video_core/shader/decode/shift.cpp @@ -31,22 +31,20 @@ u32 ShaderIR::DecodeShift(BasicBlock& bb, u32 pc) { case OpCode::Id::SHR_C: case OpCode::Id::SHR_R: case OpCode::Id::SHR_IMM: { - UNIMPLEMENTED_IF_MSG(instr.generates_cc, - "Condition codes generation in SHR is not implemented"); - const Node value = SignedOperation(OperationCode::IArithmeticShiftRight, instr.shift.is_signed, PRECISE, op_a, op_b); + SetInternalFlagsFromInteger(bb, value, instr.generates_cc); SetRegister(bb, instr.gpr0, value); break; } case OpCode::Id::SHL_C: case OpCode::Id::SHL_R: - case OpCode::Id::SHL_IMM: - UNIMPLEMENTED_IF_MSG(instr.generates_cc, - "Condition codes generation in SHL is not implemented"); - SetRegister(bb, instr.gpr0, - Operation(OperationCode::ILogicalShiftLeft, PRECISE, op_a, op_b)); + case OpCode::Id::SHL_IMM: { + const Node value = Operation(OperationCode::ILogicalShiftLeft, PRECISE, op_a, op_b); + SetInternalFlagsFromInteger(bb, value, instr.generates_cc); + SetRegister(bb, instr.gpr0, value); break; + } default: UNIMPLEMENTED_MSG("Unhandled shift instruction: {}", opcode->get().GetName()); } diff --git a/src/video_core/shader/decode/video.cpp b/src/video_core/shader/decode/video.cpp index b491fbadb..609b3a257 100644 --- a/src/video_core/shader/decode/video.cpp +++ b/src/video_core/shader/decode/video.cpp @@ -38,9 +38,6 @@ u32 ShaderIR::DecodeVideo(BasicBlock& bb, u32 pc) { switch (opcode->get().GetId()) { case OpCode::Id::VMAD: { - UNIMPLEMENTED_IF_MSG(instr.generates_cc, - "Condition codes generation in VMAD is not implemented"); - const bool result_signed = instr.video.signed_a == 1 || instr.video.signed_b == 1; const Node op_c = GetRegister(instr.gpr39); @@ -53,8 +50,8 @@ u32 ShaderIR::DecodeVideo(BasicBlock& bb, u32 pc) { SignedOperation(OperationCode::IArithmeticShiftRight, result_signed, value, shift); } + SetInternalFlagsFromInteger(bb, value, instr.generates_cc); SetRegister(bb, instr.gpr0, value); - break; } case OpCode::Id::VSETP: { diff --git a/src/video_core/shader/decode/xmad.cpp b/src/video_core/shader/decode/xmad.cpp index 3e37aee4a..88f1be27d 100644 --- a/src/video_core/shader/decode/xmad.cpp +++ b/src/video_core/shader/decode/xmad.cpp @@ -86,6 +86,7 @@ u32 ShaderIR::DecodeXmad(BasicBlock& bb, u32 pc) { sum = Operation(OperationCode::IBitwiseOr, NO_PRECISE, a, b); } + SetInternalFlagsFromInteger(bb, sum, instr.generates_cc); SetRegister(bb, instr.gpr0, sum); return pc; -- cgit v1.2.3 From 170c8212bbb10129dfbaed8eb7ab67138c932af2 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 28 Dec 2018 20:00:36 -0300 Subject: shader_ir: Pass to decoder functions basic block's code --- src/video_core/shader/decode/arithmetic.cpp | 2 +- src/video_core/shader/decode/arithmetic_half.cpp | 2 +- src/video_core/shader/decode/arithmetic_half_immediate.cpp | 2 +- src/video_core/shader/decode/arithmetic_immediate.cpp | 2 +- src/video_core/shader/decode/arithmetic_integer.cpp | 2 +- src/video_core/shader/decode/arithmetic_integer_immediate.cpp | 2 +- src/video_core/shader/decode/bfe.cpp | 2 +- src/video_core/shader/decode/bfi.cpp | 2 +- src/video_core/shader/decode/conversion.cpp | 2 +- src/video_core/shader/decode/ffma.cpp | 2 +- src/video_core/shader/decode/float_set.cpp | 2 +- src/video_core/shader/decode/float_set_predicate.cpp | 2 +- src/video_core/shader/decode/half_set.cpp | 2 +- src/video_core/shader/decode/half_set_predicate.cpp | 2 +- src/video_core/shader/decode/hfma2.cpp | 2 +- src/video_core/shader/decode/integer_set.cpp | 2 +- src/video_core/shader/decode/integer_set_predicate.cpp | 2 +- src/video_core/shader/decode/memory.cpp | 2 +- src/video_core/shader/decode/other.cpp | 2 +- src/video_core/shader/decode/predicate_set_predicate.cpp | 2 +- src/video_core/shader/decode/predicate_set_register.cpp | 2 +- src/video_core/shader/decode/register_set_predicate.cpp | 2 +- src/video_core/shader/decode/shift.cpp | 2 +- src/video_core/shader/decode/video.cpp | 2 +- src/video_core/shader/decode/xmad.cpp | 2 +- 25 files changed, 25 insertions(+), 25 deletions(-) (limited to 'src/video_core/shader/decode') diff --git a/src/video_core/shader/decode/arithmetic.cpp b/src/video_core/shader/decode/arithmetic.cpp index 926abcc8e..e7847f614 100644 --- a/src/video_core/shader/decode/arithmetic.cpp +++ b/src/video_core/shader/decode/arithmetic.cpp @@ -13,7 +13,7 @@ using Tegra::Shader::Instruction; using Tegra::Shader::OpCode; using Tegra::Shader::SubOp; -u32 ShaderIR::DecodeArithmetic(BasicBlock& bb, u32 pc) { +u32 ShaderIR::DecodeArithmetic(BasicBlock& bb, const BasicBlock& code, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); diff --git a/src/video_core/shader/decode/arithmetic_half.cpp b/src/video_core/shader/decode/arithmetic_half.cpp index 9547eae5d..a237dcb92 100644 --- a/src/video_core/shader/decode/arithmetic_half.cpp +++ b/src/video_core/shader/decode/arithmetic_half.cpp @@ -12,7 +12,7 @@ namespace VideoCommon::Shader { using Tegra::Shader::Instruction; using Tegra::Shader::OpCode; -u32 ShaderIR::DecodeArithmeticHalf(BasicBlock& bb, u32 pc) { +u32 ShaderIR::DecodeArithmeticHalf(BasicBlock& bb, const BasicBlock& code, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); diff --git a/src/video_core/shader/decode/arithmetic_half_immediate.cpp b/src/video_core/shader/decode/arithmetic_half_immediate.cpp index 5c280a1a6..7b4f7d284 100644 --- a/src/video_core/shader/decode/arithmetic_half_immediate.cpp +++ b/src/video_core/shader/decode/arithmetic_half_immediate.cpp @@ -12,7 +12,7 @@ namespace VideoCommon::Shader { using Tegra::Shader::Instruction; using Tegra::Shader::OpCode; -u32 ShaderIR::DecodeArithmeticHalfImmediate(BasicBlock& bb, u32 pc) { +u32 ShaderIR::DecodeArithmeticHalfImmediate(BasicBlock& bb, const BasicBlock& code, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); diff --git a/src/video_core/shader/decode/arithmetic_immediate.cpp b/src/video_core/shader/decode/arithmetic_immediate.cpp index 1c6da94b4..4fd3db54e 100644 --- a/src/video_core/shader/decode/arithmetic_immediate.cpp +++ b/src/video_core/shader/decode/arithmetic_immediate.cpp @@ -12,7 +12,7 @@ namespace VideoCommon::Shader { using Tegra::Shader::Instruction; using Tegra::Shader::OpCode; -u32 ShaderIR::DecodeArithmeticImmediate(BasicBlock& bb, u32 pc) { +u32 ShaderIR::DecodeArithmeticImmediate(BasicBlock& bb, const BasicBlock& code, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); diff --git a/src/video_core/shader/decode/arithmetic_integer.cpp b/src/video_core/shader/decode/arithmetic_integer.cpp index edd1695f4..4a8cc1a1c 100644 --- a/src/video_core/shader/decode/arithmetic_integer.cpp +++ b/src/video_core/shader/decode/arithmetic_integer.cpp @@ -15,7 +15,7 @@ using Tegra::Shader::OpCode; using Tegra::Shader::Pred; using Tegra::Shader::Register; -u32 ShaderIR::DecodeArithmeticInteger(BasicBlock& bb, u32 pc) { +u32 ShaderIR::DecodeArithmeticInteger(BasicBlock& bb, const BasicBlock& code, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); diff --git a/src/video_core/shader/decode/arithmetic_integer_immediate.cpp b/src/video_core/shader/decode/arithmetic_integer_immediate.cpp index 3cbaeeaf5..b26a6e473 100644 --- a/src/video_core/shader/decode/arithmetic_integer_immediate.cpp +++ b/src/video_core/shader/decode/arithmetic_integer_immediate.cpp @@ -16,7 +16,7 @@ using Tegra::Shader::Pred; using Tegra::Shader::PredicateResultMode; using Tegra::Shader::Register; -u32 ShaderIR::DecodeArithmeticIntegerImmediate(BasicBlock& bb, u32 pc) { +u32 ShaderIR::DecodeArithmeticIntegerImmediate(BasicBlock& bb, const BasicBlock& code, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); diff --git a/src/video_core/shader/decode/bfe.cpp b/src/video_core/shader/decode/bfe.cpp index d3244fd40..0734141b0 100644 --- a/src/video_core/shader/decode/bfe.cpp +++ b/src/video_core/shader/decode/bfe.cpp @@ -12,7 +12,7 @@ namespace VideoCommon::Shader { using Tegra::Shader::Instruction; using Tegra::Shader::OpCode; -u32 ShaderIR::DecodeBfe(BasicBlock& bb, u32 pc) { +u32 ShaderIR::DecodeBfe(BasicBlock& bb, const BasicBlock& code, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); diff --git a/src/video_core/shader/decode/bfi.cpp b/src/video_core/shader/decode/bfi.cpp index ddb1872c6..942d6729d 100644 --- a/src/video_core/shader/decode/bfi.cpp +++ b/src/video_core/shader/decode/bfi.cpp @@ -12,7 +12,7 @@ namespace VideoCommon::Shader { using Tegra::Shader::Instruction; using Tegra::Shader::OpCode; -u32 ShaderIR::DecodeBfi(BasicBlock& bb, u32 pc) { +u32 ShaderIR::DecodeBfi(BasicBlock& bb, const BasicBlock& code, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); diff --git a/src/video_core/shader/decode/conversion.cpp b/src/video_core/shader/decode/conversion.cpp index d5c75e8eb..ee18d3a99 100644 --- a/src/video_core/shader/decode/conversion.cpp +++ b/src/video_core/shader/decode/conversion.cpp @@ -13,7 +13,7 @@ using Tegra::Shader::Instruction; using Tegra::Shader::OpCode; using Tegra::Shader::Register; -u32 ShaderIR::DecodeConversion(BasicBlock& bb, u32 pc) { +u32 ShaderIR::DecodeConversion(BasicBlock& bb, const BasicBlock& code, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); diff --git a/src/video_core/shader/decode/ffma.cpp b/src/video_core/shader/decode/ffma.cpp index f3ab3d2e8..be8dc2230 100644 --- a/src/video_core/shader/decode/ffma.cpp +++ b/src/video_core/shader/decode/ffma.cpp @@ -12,7 +12,7 @@ namespace VideoCommon::Shader { using Tegra::Shader::Instruction; using Tegra::Shader::OpCode; -u32 ShaderIR::DecodeFfma(BasicBlock& bb, u32 pc) { +u32 ShaderIR::DecodeFfma(BasicBlock& bb, const BasicBlock& code, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); diff --git a/src/video_core/shader/decode/float_set.cpp b/src/video_core/shader/decode/float_set.cpp index 8e266cc4e..ba846f1bd 100644 --- a/src/video_core/shader/decode/float_set.cpp +++ b/src/video_core/shader/decode/float_set.cpp @@ -12,7 +12,7 @@ namespace VideoCommon::Shader { using Tegra::Shader::Instruction; using Tegra::Shader::OpCode; -u32 ShaderIR::DecodeFloatSet(BasicBlock& bb, u32 pc) { +u32 ShaderIR::DecodeFloatSet(BasicBlock& bb, const BasicBlock& code, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); diff --git a/src/video_core/shader/decode/float_set_predicate.cpp b/src/video_core/shader/decode/float_set_predicate.cpp index 5dd085fea..e88b04d18 100644 --- a/src/video_core/shader/decode/float_set_predicate.cpp +++ b/src/video_core/shader/decode/float_set_predicate.cpp @@ -13,7 +13,7 @@ using Tegra::Shader::Instruction; using Tegra::Shader::OpCode; using Tegra::Shader::Pred; -u32 ShaderIR::DecodeFloatSetPredicate(BasicBlock& bb, u32 pc) { +u32 ShaderIR::DecodeFloatSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); diff --git a/src/video_core/shader/decode/half_set.cpp b/src/video_core/shader/decode/half_set.cpp index e34deeff4..dfd7cb98f 100644 --- a/src/video_core/shader/decode/half_set.cpp +++ b/src/video_core/shader/decode/half_set.cpp @@ -14,7 +14,7 @@ namespace VideoCommon::Shader { using Tegra::Shader::Instruction; using Tegra::Shader::OpCode; -u32 ShaderIR::DecodeHalfSet(BasicBlock& bb, u32 pc) { +u32 ShaderIR::DecodeHalfSet(BasicBlock& bb, const BasicBlock& code, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); diff --git a/src/video_core/shader/decode/half_set_predicate.cpp b/src/video_core/shader/decode/half_set_predicate.cpp index 72cc3d5c8..53c44ae5a 100644 --- a/src/video_core/shader/decode/half_set_predicate.cpp +++ b/src/video_core/shader/decode/half_set_predicate.cpp @@ -13,7 +13,7 @@ using Tegra::Shader::Instruction; using Tegra::Shader::OpCode; using Tegra::Shader::Pred; -u32 ShaderIR::DecodeHalfSetPredicate(BasicBlock& bb, u32 pc) { +u32 ShaderIR::DecodeHalfSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); diff --git a/src/video_core/shader/decode/hfma2.cpp b/src/video_core/shader/decode/hfma2.cpp index bf7491804..4a6b945f9 100644 --- a/src/video_core/shader/decode/hfma2.cpp +++ b/src/video_core/shader/decode/hfma2.cpp @@ -16,7 +16,7 @@ using Tegra::Shader::HalfType; using Tegra::Shader::Instruction; using Tegra::Shader::OpCode; -u32 ShaderIR::DecodeHfma2(BasicBlock& bb, u32 pc) { +u32 ShaderIR::DecodeHfma2(BasicBlock& bb, const BasicBlock& code, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); diff --git a/src/video_core/shader/decode/integer_set.cpp b/src/video_core/shader/decode/integer_set.cpp index eba1c5123..85e67b03b 100644 --- a/src/video_core/shader/decode/integer_set.cpp +++ b/src/video_core/shader/decode/integer_set.cpp @@ -12,7 +12,7 @@ namespace VideoCommon::Shader { using Tegra::Shader::Instruction; using Tegra::Shader::OpCode; -u32 ShaderIR::DecodeIntegerSet(BasicBlock& bb, u32 pc) { +u32 ShaderIR::DecodeIntegerSet(BasicBlock& bb, const BasicBlock& code, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); diff --git a/src/video_core/shader/decode/integer_set_predicate.cpp b/src/video_core/shader/decode/integer_set_predicate.cpp index d76b8018c..c8b105a08 100644 --- a/src/video_core/shader/decode/integer_set_predicate.cpp +++ b/src/video_core/shader/decode/integer_set_predicate.cpp @@ -13,7 +13,7 @@ using Tegra::Shader::Instruction; using Tegra::Shader::OpCode; using Tegra::Shader::Pred; -u32 ShaderIR::DecodeIntegerSetPredicate(BasicBlock& bb, u32 pc) { +u32 ShaderIR::DecodeIntegerSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp index 5ae3f344d..ae71672d6 100644 --- a/src/video_core/shader/decode/memory.cpp +++ b/src/video_core/shader/decode/memory.cpp @@ -35,7 +35,7 @@ static std::size_t GetCoordCount(TextureType texture_type) { } } -u32 ShaderIR::DecodeMemory(BasicBlock& bb, u32 pc) { +u32 ShaderIR::DecodeMemory(BasicBlock& bb, const BasicBlock& code, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp index 6e6795ba7..c1e5f4efb 100644 --- a/src/video_core/shader/decode/other.cpp +++ b/src/video_core/shader/decode/other.cpp @@ -14,7 +14,7 @@ using Tegra::Shader::Instruction; using Tegra::Shader::OpCode; using Tegra::Shader::Register; -u32 ShaderIR::DecodeOther(BasicBlock& bb, u32 pc) { +u32 ShaderIR::DecodeOther(BasicBlock& bb, const BasicBlock& code, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); diff --git a/src/video_core/shader/decode/predicate_set_predicate.cpp b/src/video_core/shader/decode/predicate_set_predicate.cpp index 6ea6daceb..1717f0653 100644 --- a/src/video_core/shader/decode/predicate_set_predicate.cpp +++ b/src/video_core/shader/decode/predicate_set_predicate.cpp @@ -13,7 +13,7 @@ using Tegra::Shader::Instruction; using Tegra::Shader::OpCode; using Tegra::Shader::Pred; -u32 ShaderIR::DecodePredicateSetPredicate(BasicBlock& bb, u32 pc) { +u32 ShaderIR::DecodePredicateSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); diff --git a/src/video_core/shader/decode/predicate_set_register.cpp b/src/video_core/shader/decode/predicate_set_register.cpp index 58d20ceb5..8bd15fb00 100644 --- a/src/video_core/shader/decode/predicate_set_register.cpp +++ b/src/video_core/shader/decode/predicate_set_register.cpp @@ -12,7 +12,7 @@ namespace VideoCommon::Shader { using Tegra::Shader::Instruction; using Tegra::Shader::OpCode; -u32 ShaderIR::DecodePredicateSetRegister(BasicBlock& bb, u32 pc) { +u32 ShaderIR::DecodePredicateSetRegister(BasicBlock& bb, const BasicBlock& code, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); diff --git a/src/video_core/shader/decode/register_set_predicate.cpp b/src/video_core/shader/decode/register_set_predicate.cpp index 14bce9fa4..bdb4424a6 100644 --- a/src/video_core/shader/decode/register_set_predicate.cpp +++ b/src/video_core/shader/decode/register_set_predicate.cpp @@ -12,7 +12,7 @@ namespace VideoCommon::Shader { using Tegra::Shader::Instruction; using Tegra::Shader::OpCode; -u32 ShaderIR::DecodeRegisterSetPredicate(BasicBlock& bb, u32 pc) { +u32 ShaderIR::DecodeRegisterSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); diff --git a/src/video_core/shader/decode/shift.cpp b/src/video_core/shader/decode/shift.cpp index e8ffdb818..85026bb37 100644 --- a/src/video_core/shader/decode/shift.cpp +++ b/src/video_core/shader/decode/shift.cpp @@ -12,7 +12,7 @@ namespace VideoCommon::Shader { using Tegra::Shader::Instruction; using Tegra::Shader::OpCode; -u32 ShaderIR::DecodeShift(BasicBlock& bb, u32 pc) { +u32 ShaderIR::DecodeShift(BasicBlock& bb, const BasicBlock& code, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); diff --git a/src/video_core/shader/decode/video.cpp b/src/video_core/shader/decode/video.cpp index 609b3a257..c3432356d 100644 --- a/src/video_core/shader/decode/video.cpp +++ b/src/video_core/shader/decode/video.cpp @@ -15,7 +15,7 @@ using Tegra::Shader::Pred; using Tegra::Shader::VideoType; using Tegra::Shader::VmadShr; -u32 ShaderIR::DecodeVideo(BasicBlock& bb, u32 pc) { +u32 ShaderIR::DecodeVideo(BasicBlock& bb, const BasicBlock& code, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); diff --git a/src/video_core/shader/decode/xmad.cpp b/src/video_core/shader/decode/xmad.cpp index 88f1be27d..3ceabecb5 100644 --- a/src/video_core/shader/decode/xmad.cpp +++ b/src/video_core/shader/decode/xmad.cpp @@ -12,7 +12,7 @@ namespace VideoCommon::Shader { using Tegra::Shader::Instruction; using Tegra::Shader::OpCode; -u32 ShaderIR::DecodeXmad(BasicBlock& bb, u32 pc) { +u32 ShaderIR::DecodeXmad(BasicBlock& bb, const BasicBlock& code, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); -- cgit v1.2.3 From 1c9c4eefeb1d40a9c0ca29c528e71ee1e918a967 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 30 Dec 2018 01:05:14 -0300 Subject: shader_decode: Fixup XMAD --- src/video_core/shader/decode/xmad.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/video_core/shader/decode') diff --git a/src/video_core/shader/decode/xmad.cpp b/src/video_core/shader/decode/xmad.cpp index 3ceabecb5..9f2d636b8 100644 --- a/src/video_core/shader/decode/xmad.cpp +++ b/src/video_core/shader/decode/xmad.cpp @@ -55,7 +55,7 @@ u32 ShaderIR::DecodeXmad(BasicBlock& bb, const BasicBlock& code, u32 pc) { // TODO(Rodrigo): Use an appropiate sign for this operation Node product = Operation(OperationCode::IMul, NO_PRECISE, op_a, op_b); if (instr.xmad.product_shift_left) { - product = Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, op_a, Immediate(16)); + product = Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, product, Immediate(16)); } op_c = [&]() { -- cgit v1.2.3 From a63d7c49fc928d0ad440213a5a409a2f1f05afed Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 15 Jan 2019 21:06:05 -0300 Subject: shader_ir: Fixup clang build --- src/video_core/shader/decode/xmad.cpp | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'src/video_core/shader/decode') diff --git a/src/video_core/shader/decode/xmad.cpp b/src/video_core/shader/decode/xmad.cpp index 9f2d636b8..0cd9cd1cc 100644 --- a/src/video_core/shader/decode/xmad.cpp +++ b/src/video_core/shader/decode/xmad.cpp @@ -58,18 +58,20 @@ u32 ShaderIR::DecodeXmad(BasicBlock& bb, const BasicBlock& code, u32 pc) { product = Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, product, Immediate(16)); } + const Node original_c = op_c; op_c = [&]() { switch (instr.xmad.mode) { case Tegra::Shader::XmadMode::None: - return op_c; + return original_c; case Tegra::Shader::XmadMode::CLo: - return BitfieldExtract(op_c, 0, 16); + return BitfieldExtract(original_c, 0, 16); case Tegra::Shader::XmadMode::CHi: - return BitfieldExtract(op_c, 16, 16); + return BitfieldExtract(original_c, 16, 16); case Tegra::Shader::XmadMode::CBcc: { const Node shifted_b = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed_b, NO_PRECISE, original_b, Immediate(16)); - return SignedOperation(OperationCode::IAdd, is_signed_c, NO_PRECISE, op_c, shifted_b); + return SignedOperation(OperationCode::IAdd, is_signed_c, NO_PRECISE, original_c, + shifted_b); } default: UNIMPLEMENTED_MSG("Unhandled XMAD mode: {}", static_cast(instr.xmad.mode.Value())); -- cgit v1.2.3