diff options
Diffstat (limited to 'src/video_core/shader/decode')
19 files changed, 573 insertions, 142 deletions
diff --git a/src/video_core/shader/decode/arithmetic.cpp b/src/video_core/shader/decode/arithmetic.cpp index 87d8fecaa..1473c282a 100644 --- a/src/video_core/shader/decode/arithmetic.cpp +++ b/src/video_core/shader/decode/arithmetic.cpp @@ -42,11 +42,14 @@ u32 ShaderIR::DecodeArithmetic(NodeBlock& bb, u32 pc) { case OpCode::Id::FMUL_R: case OpCode::Id::FMUL_IMM: { // FMUL does not have 'abs' bits and only the second operand has a 'neg' bit. - UNIMPLEMENTED_IF_MSG(instr.fmul.tab5cb8_2 != 0, "FMUL tab5cb8_2({}) is not implemented", - instr.fmul.tab5cb8_2.Value()); - UNIMPLEMENTED_IF_MSG( - instr.fmul.tab5c68_0 != 1, "FMUL tab5cb8_0({}) is not implemented", - instr.fmul.tab5c68_0.Value()); // SMO typical sends 1 here which seems to be the default + if (instr.fmul.tab5cb8_2 != 0) { + LOG_WARNING(HW_GPU, "FMUL tab5cb8_2({}) is not implemented", + instr.fmul.tab5cb8_2.Value()); + } + if (instr.fmul.tab5c68_0 != 1) { + LOG_WARNING(HW_GPU, "FMUL tab5cb8_0({}) is not implemented", + instr.fmul.tab5c68_0.Value()); + } op_b = GetOperandAbsNegFloat(op_b, false, instr.fmul.negate_b); diff --git a/src/video_core/shader/decode/arithmetic_half_immediate.cpp b/src/video_core/shader/decode/arithmetic_half_immediate.cpp index 7bcf38f23..6466fc011 100644 --- a/src/video_core/shader/decode/arithmetic_half_immediate.cpp +++ b/src/video_core/shader/decode/arithmetic_half_immediate.cpp @@ -23,7 +23,9 @@ u32 ShaderIR::DecodeArithmeticHalfImmediate(NodeBlock& bb, u32 pc) { LOG_WARNING(HW_GPU, "{} FTZ not implemented", opcode->get().GetName()); } } else { - UNIMPLEMENTED_IF(instr.alu_half_imm.precision != Tegra::Shader::HalfPrecision::None); + if (instr.alu_half_imm.precision != Tegra::Shader::HalfPrecision::None) { + LOG_WARNING(HW_GPU, "{} FTZ not implemented", opcode->get().GetName()); + } } Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.alu_half_imm.type_a); diff --git a/src/video_core/shader/decode/conversion.cpp b/src/video_core/shader/decode/conversion.cpp index 4221f0c58..32facd6ba 100644 --- a/src/video_core/shader/decode/conversion.cpp +++ b/src/video_core/shader/decode/conversion.cpp @@ -14,6 +14,12 @@ using Tegra::Shader::Instruction; using Tegra::Shader::OpCode; using Tegra::Shader::Register; +namespace { +constexpr OperationCode GetFloatSelector(u64 selector) { + return selector == 0 ? OperationCode::FCastHalf0 : OperationCode::FCastHalf1; +} +} // Anonymous namespace + u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); @@ -22,7 +28,7 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) { case OpCode::Id::I2I_R: case OpCode::Id::I2I_C: case OpCode::Id::I2I_IMM: { - UNIMPLEMENTED_IF(instr.conversion.selector); + UNIMPLEMENTED_IF(instr.conversion.int_src.selector != 0); UNIMPLEMENTED_IF(instr.conversion.dst_size != Register::Size::Word); UNIMPLEMENTED_IF(instr.alu.saturate_d); @@ -57,8 +63,8 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) { case OpCode::Id::I2F_R: case OpCode::Id::I2F_C: case OpCode::Id::I2F_IMM: { - UNIMPLEMENTED_IF(instr.conversion.dst_size != Register::Size::Word); - UNIMPLEMENTED_IF(instr.conversion.selector); + UNIMPLEMENTED_IF(instr.conversion.int_src.selector != 0); + UNIMPLEMENTED_IF(instr.conversion.dst_size == Register::Size::Long); UNIMPLEMENTED_IF_MSG(instr.generates_cc, "Condition codes generation in I2F is not implemented"); @@ -82,14 +88,19 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) { value = GetOperandAbsNegFloat(value, false, instr.conversion.negate_a); SetInternalFlagsFromFloat(bb, value, instr.generates_cc); + + if (instr.conversion.dst_size == Register::Size::Short) { + value = Operation(OperationCode::HCastFloat, PRECISE, value); + } + SetRegister(bb, instr.gpr0, value); break; } case OpCode::Id::F2F_R: case OpCode::Id::F2F_C: case OpCode::Id::F2F_IMM: { - UNIMPLEMENTED_IF(instr.conversion.f2f.dst_size != Register::Size::Word); - UNIMPLEMENTED_IF(instr.conversion.f2f.src_size != Register::Size::Word); + UNIMPLEMENTED_IF(instr.conversion.dst_size == Register::Size::Long); + UNIMPLEMENTED_IF(instr.conversion.src_size == Register::Size::Long); UNIMPLEMENTED_IF_MSG(instr.generates_cc, "Condition codes generation in F2F is not implemented"); @@ -107,6 +118,13 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) { } }(); + if (instr.conversion.src_size == Register::Size::Short) { + value = Operation(GetFloatSelector(instr.conversion.float_src.selector), NO_PRECISE, + std::move(value)); + } else { + ASSERT(instr.conversion.float_src.selector == 0); + } + value = GetOperandAbsNegFloat(value, instr.conversion.abs_a, instr.conversion.negate_a); value = [&]() { @@ -124,19 +142,24 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) { default: UNIMPLEMENTED_MSG("Unimplemented F2F rounding mode {}", static_cast<u32>(instr.conversion.f2f.rounding.Value())); - return Immediate(0); + return value; } }(); value = GetSaturatedFloat(value, instr.alu.saturate_d); SetInternalFlagsFromFloat(bb, value, instr.generates_cc); + + if (instr.conversion.dst_size == Register::Size::Short) { + value = Operation(OperationCode::HCastFloat, PRECISE, value); + } + SetRegister(bb, instr.gpr0, value); break; } case OpCode::Id::F2I_R: case OpCode::Id::F2I_C: case OpCode::Id::F2I_IMM: { - UNIMPLEMENTED_IF(instr.conversion.src_size != Register::Size::Word); + UNIMPLEMENTED_IF(instr.conversion.src_size == Register::Size::Long); UNIMPLEMENTED_IF_MSG(instr.generates_cc, "Condition codes generation in F2I is not implemented"); Node value = [&]() { @@ -153,6 +176,13 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) { } }(); + if (instr.conversion.src_size == Register::Size::Short) { + value = Operation(GetFloatSelector(instr.conversion.float_src.selector), NO_PRECISE, + std::move(value)); + } else { + ASSERT(instr.conversion.float_src.selector == 0); + } + value = GetOperandAbsNegFloat(value, instr.conversion.abs_a, instr.conversion.negate_a); value = [&]() { diff --git a/src/video_core/shader/decode/decode_integer_set.cpp b/src/video_core/shader/decode/decode_integer_set.cpp deleted file mode 100644 index e69de29bb..000000000 --- a/src/video_core/shader/decode/decode_integer_set.cpp +++ /dev/null diff --git a/src/video_core/shader/decode/ffma.cpp b/src/video_core/shader/decode/ffma.cpp index 29be25ca3..ca2f39e8d 100644 --- a/src/video_core/shader/decode/ffma.cpp +++ b/src/video_core/shader/decode/ffma.cpp @@ -18,10 +18,12 @@ u32 ShaderIR::DecodeFfma(NodeBlock& bb, u32 pc) { const auto opcode = OpCode::Decode(instr); UNIMPLEMENTED_IF_MSG(instr.ffma.cc != 0, "FFMA cc not implemented"); - UNIMPLEMENTED_IF_MSG(instr.ffma.tab5980_0 != 1, "FFMA tab5980_0({}) not implemented", - instr.ffma.tab5980_0.Value()); // Seems to be 1 by default based on SMO - UNIMPLEMENTED_IF_MSG(instr.ffma.tab5980_1 != 0, "FFMA tab5980_1({}) not implemented", - instr.ffma.tab5980_1.Value()); + if (instr.ffma.tab5980_0 != 1) { + LOG_WARNING(HW_GPU, "FFMA tab5980_0({}) not implemented", instr.ffma.tab5980_0.Value()); + } + if (instr.ffma.tab5980_1 != 0) { + LOG_WARNING(HW_GPU, "FFMA tab5980_1({}) not implemented", instr.ffma.tab5980_1.Value()); + } const Node op_a = GetRegister(instr.gpr8); diff --git a/src/video_core/shader/decode/float_set.cpp b/src/video_core/shader/decode/float_set.cpp index f5013e44a..5614e8a0d 100644 --- a/src/video_core/shader/decode/float_set.cpp +++ b/src/video_core/shader/decode/float_set.cpp @@ -15,7 +15,6 @@ using Tegra::Shader::OpCode; u32 ShaderIR::DecodeFloatSet(NodeBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; - const auto opcode = OpCode::Decode(instr); const Node op_a = GetOperandAbsNegFloat(GetRegister(instr.gpr8), instr.fset.abs_a != 0, instr.fset.neg_a != 0); diff --git a/src/video_core/shader/decode/float_set_predicate.cpp b/src/video_core/shader/decode/float_set_predicate.cpp index 2323052b0..200c2c983 100644 --- a/src/video_core/shader/decode/float_set_predicate.cpp +++ b/src/video_core/shader/decode/float_set_predicate.cpp @@ -16,10 +16,9 @@ using Tegra::Shader::Pred; u32 ShaderIR::DecodeFloatSetPredicate(NodeBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; - const auto opcode = OpCode::Decode(instr); - const Node op_a = GetOperandAbsNegFloat(GetRegister(instr.gpr8), instr.fsetp.abs_a != 0, - instr.fsetp.neg_a != 0); + Node op_a = GetOperandAbsNegFloat(GetRegister(instr.gpr8), instr.fsetp.abs_a != 0, + instr.fsetp.neg_a != 0); Node op_b = [&]() { if (instr.is_b_imm) { return GetImmediate19(instr); @@ -29,12 +28,13 @@ u32 ShaderIR::DecodeFloatSetPredicate(NodeBlock& bb, u32 pc) { return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); } }(); - op_b = GetOperandAbsNegFloat(op_b, instr.fsetp.abs_b, false); + op_b = GetOperandAbsNegFloat(std::move(op_b), instr.fsetp.abs_b, instr.fsetp.neg_b); // We can't use the constant predicate as destination. ASSERT(instr.fsetp.pred3 != static_cast<u64>(Pred::UnusedIndex)); - const Node predicate = GetPredicateComparisonFloat(instr.fsetp.cond, op_a, op_b); + const Node predicate = + GetPredicateComparisonFloat(instr.fsetp.cond, std::move(op_a), std::move(op_b)); const Node second_pred = GetPredicate(instr.fsetp.pred39, instr.fsetp.neg_pred != 0); const OperationCode combiner = GetPredicateCombiner(instr.fsetp.op); diff --git a/src/video_core/shader/decode/half_set_predicate.cpp b/src/video_core/shader/decode/half_set_predicate.cpp index d59d15bd8..840694527 100644 --- a/src/video_core/shader/decode/half_set_predicate.cpp +++ b/src/video_core/shader/decode/half_set_predicate.cpp @@ -18,43 +18,55 @@ u32 ShaderIR::DecodeHalfSetPredicate(NodeBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); - UNIMPLEMENTED_IF(instr.hsetp2.ftz != 0); + DEBUG_ASSERT(instr.hsetp2.ftz == 0); Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hsetp2.type_a); op_a = GetOperandAbsNegHalf(op_a, instr.hsetp2.abs_a, instr.hsetp2.negate_a); - Node op_b = [&]() { - switch (opcode->get().GetId()) { - case OpCode::Id::HSETP2_R: - return GetOperandAbsNegHalf(GetRegister(instr.gpr20), instr.hsetp2.abs_a, - instr.hsetp2.negate_b); - default: - UNREACHABLE(); - return Immediate(0); - } - }(); - op_b = UnpackHalfFloat(op_b, instr.hsetp2.type_b); - - // We can't use the constant predicate as destination. - ASSERT(instr.hsetp2.pred3 != static_cast<u64>(Pred::UnusedIndex)); - - const Node second_pred = GetPredicate(instr.hsetp2.pred39, instr.hsetp2.neg_pred != 0); + Tegra::Shader::PredCondition cond{}; + bool h_and{}; + Node op_b{}; + switch (opcode->get().GetId()) { + case OpCode::Id::HSETP2_C: + cond = instr.hsetp2.cbuf_and_imm.cond; + h_and = instr.hsetp2.cbuf_and_imm.h_and; + op_b = GetOperandAbsNegHalf(GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()), + instr.hsetp2.cbuf.abs_b, instr.hsetp2.cbuf.negate_b); + break; + case OpCode::Id::HSETP2_IMM: + cond = instr.hsetp2.cbuf_and_imm.cond; + h_and = instr.hsetp2.cbuf_and_imm.h_and; + op_b = UnpackHalfImmediate(instr, true); + break; + case OpCode::Id::HSETP2_R: + cond = instr.hsetp2.reg.cond; + h_and = instr.hsetp2.reg.h_and; + op_b = + GetOperandAbsNegHalf(UnpackHalfFloat(GetRegister(instr.gpr20), instr.hsetp2.reg.type_b), + instr.hsetp2.reg.abs_b, instr.hsetp2.reg.negate_b); + break; + default: + UNREACHABLE(); + op_b = Immediate(0); + } const OperationCode combiner = GetPredicateCombiner(instr.hsetp2.op); - const OperationCode pair_combiner = - instr.hsetp2.h_and ? OperationCode::LogicalAll2 : OperationCode::LogicalAny2; - - const Node comparison = GetPredicateComparisonHalf(instr.hsetp2.cond, op_a, op_b); - const Node first_pred = Operation(pair_combiner, comparison); + const Node combined_pred = GetPredicate(instr.hsetp2.pred39, instr.hsetp2.neg_pred); - // Set the primary predicate to the result of Predicate OP SecondPredicate - const Node value = Operation(combiner, first_pred, second_pred); - SetPredicate(bb, instr.hsetp2.pred3, value); + const auto Write = [&](u64 dest, Node src) { + SetPredicate(bb, dest, Operation(combiner, std::move(src), combined_pred)); + }; - if (instr.hsetp2.pred0 != static_cast<u64>(Pred::UnusedIndex)) { - // Set the secondary predicate to the result of !Predicate OP SecondPredicate, if enabled - const Node negated_pred = Operation(OperationCode::LogicalNegate, first_pred); - SetPredicate(bb, instr.hsetp2.pred0, Operation(combiner, negated_pred, second_pred)); + const Node comparison = GetPredicateComparisonHalf(cond, op_a, op_b); + const u64 first = instr.hsetp2.pred3; + const u64 second = instr.hsetp2.pred0; + if (h_and) { + Node joined = Operation(OperationCode::LogicalAnd2, comparison); + Write(first, joined); + Write(second, Operation(OperationCode::LogicalNegate, std::move(joined))); + } else { + Write(first, Operation(OperationCode::LogicalPick2, comparison, Immediate(0U))); + Write(second, Operation(OperationCode::LogicalPick2, comparison, Immediate(1U))); } return pc; diff --git a/src/video_core/shader/decode/hfma2.cpp b/src/video_core/shader/decode/hfma2.cpp index c3bcf1ae9..5b44cb79c 100644 --- a/src/video_core/shader/decode/hfma2.cpp +++ b/src/video_core/shader/decode/hfma2.cpp @@ -22,9 +22,9 @@ u32 ShaderIR::DecodeHfma2(NodeBlock& bb, u32 pc) { const auto opcode = OpCode::Decode(instr); if (opcode->get().GetId() == OpCode::Id::HFMA2_RR) { - UNIMPLEMENTED_IF(instr.hfma2.rr.precision != HalfPrecision::None); + DEBUG_ASSERT(instr.hfma2.rr.precision == HalfPrecision::None); } else { - UNIMPLEMENTED_IF(instr.hfma2.precision != HalfPrecision::None); + DEBUG_ASSERT(instr.hfma2.precision == HalfPrecision::None); } constexpr auto identity = HalfType::H0_H1; diff --git a/src/video_core/shader/decode/image.cpp b/src/video_core/shader/decode/image.cpp new file mode 100644 index 000000000..d54fb88c9 --- /dev/null +++ b/src/video_core/shader/decode/image.cpp @@ -0,0 +1,164 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <algorithm> +#include <vector> +#include <fmt/format.h> + +#include "common/assert.h" +#include "common/bit_field.h" +#include "common/common_types.h" +#include "common/logging/log.h" +#include "video_core/engines/shader_bytecode.h" +#include "video_core/shader/node_helper.h" +#include "video_core/shader/shader_ir.h" + +namespace VideoCommon::Shader { + +using Tegra::Shader::Instruction; +using Tegra::Shader::OpCode; + +namespace { +std::size_t GetImageTypeNumCoordinates(Tegra::Shader::ImageType image_type) { + switch (image_type) { + case Tegra::Shader::ImageType::Texture1D: + case Tegra::Shader::ImageType::TextureBuffer: + return 1; + case Tegra::Shader::ImageType::Texture1DArray: + case Tegra::Shader::ImageType::Texture2D: + return 2; + case Tegra::Shader::ImageType::Texture2DArray: + case Tegra::Shader::ImageType::Texture3D: + return 3; + } + UNREACHABLE(); + return 1; +} +} // Anonymous namespace + +u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) { + const Instruction instr = {program_code[pc]}; + const auto opcode = OpCode::Decode(instr); + + switch (opcode->get().GetId()) { + case OpCode::Id::SUST: { + UNIMPLEMENTED_IF(instr.sust.mode != Tegra::Shader::SurfaceDataMode::P); + UNIMPLEMENTED_IF(instr.sust.out_of_bounds_store != Tegra::Shader::OutOfBoundsStore::Ignore); + UNIMPLEMENTED_IF(instr.sust.component_mask_selector != 0xf); // Ensure we have an RGBA store + + std::vector<Node> values; + constexpr std::size_t hardcoded_size{4}; + for (std::size_t i = 0; i < hardcoded_size; ++i) { + values.push_back(GetRegister(instr.gpr0.Value() + i)); + } + + std::vector<Node> coords; + const std::size_t num_coords{GetImageTypeNumCoordinates(instr.sust.image_type)}; + for (std::size_t i = 0; i < num_coords; ++i) { + coords.push_back(GetRegister(instr.gpr8.Value() + i)); + } + + const auto type{instr.sust.image_type}; + auto& image{instr.sust.is_immediate ? GetImage(instr.image, type) + : GetBindlessImage(instr.gpr39, type)}; + image.MarkWrite(); + + MetaImage meta{image, values}; + bb.push_back(Operation(OperationCode::ImageStore, meta, std::move(coords))); + break; + } + case OpCode::Id::SUATOM: { + UNIMPLEMENTED_IF(instr.suatom_d.is_ba != 0); + + Node value = GetRegister(instr.gpr0); + + std::vector<Node> coords; + const std::size_t num_coords{GetImageTypeNumCoordinates(instr.sust.image_type)}; + for (std::size_t i = 0; i < num_coords; ++i) { + coords.push_back(GetRegister(instr.gpr8.Value() + i)); + } + + const OperationCode operation_code = [instr] { + switch (instr.suatom_d.operation) { + case Tegra::Shader::ImageAtomicOperation::Add: + return OperationCode::AtomicImageAdd; + case Tegra::Shader::ImageAtomicOperation::Min: + return OperationCode::AtomicImageMin; + case Tegra::Shader::ImageAtomicOperation::Max: + return OperationCode::AtomicImageMax; + case Tegra::Shader::ImageAtomicOperation::And: + return OperationCode::AtomicImageAnd; + case Tegra::Shader::ImageAtomicOperation::Or: + return OperationCode::AtomicImageOr; + case Tegra::Shader::ImageAtomicOperation::Xor: + return OperationCode::AtomicImageXor; + case Tegra::Shader::ImageAtomicOperation::Exch: + return OperationCode::AtomicImageExchange; + default: + UNIMPLEMENTED_MSG("Unimplemented operation={}", + static_cast<u32>(instr.suatom_d.operation.Value())); + return OperationCode::AtomicImageAdd; + } + }(); + + const auto& image{GetImage(instr.image, instr.suatom_d.image_type, instr.suatom_d.size)}; + MetaImage meta{image, {std::move(value)}}; + SetRegister(bb, instr.gpr0, Operation(operation_code, meta, std::move(coords))); + break; + } + default: + UNIMPLEMENTED_MSG("Unhandled image instruction: {}", opcode->get().GetName()); + } + + return pc; +} + +Image& ShaderIR::GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type, + std::optional<Tegra::Shader::ImageAtomicSize> size) { + const auto offset{static_cast<std::size_t>(image.index.Value())}; + if (const auto image = TryUseExistingImage(offset, type, size)) { + return *image; + } + + const std::size_t next_index{used_images.size()}; + return used_images.emplace(offset, Image{offset, next_index, type, size}).first->second; +} + +Image& ShaderIR::GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type, + std::optional<Tegra::Shader::ImageAtomicSize> size) { + const Node image_register{GetRegister(reg)}; + const auto [base_image, cbuf_index, cbuf_offset]{ + TrackCbuf(image_register, global_code, static_cast<s64>(global_code.size()))}; + const auto cbuf_key{(static_cast<u64>(cbuf_index) << 32) | static_cast<u64>(cbuf_offset)}; + + if (const auto image = TryUseExistingImage(cbuf_key, type, size)) { + return *image; + } + + const std::size_t next_index{used_images.size()}; + return used_images.emplace(cbuf_key, Image{cbuf_index, cbuf_offset, next_index, type, size}) + .first->second; +} + +Image* ShaderIR::TryUseExistingImage(u64 offset, Tegra::Shader::ImageType type, + std::optional<Tegra::Shader::ImageAtomicSize> size) { + auto it = used_images.find(offset); + if (it == used_images.end()) { + return nullptr; + } + auto& image = it->second; + ASSERT(image.GetType() == type); + + if (size) { + // We know the size, if it's known it has to be the same as before, otherwise we can set it. + if (image.IsSizeKnown()) { + ASSERT(image.GetSize() == size); + } else { + image.SetSize(*size); + } + } + return ℑ +} + +} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/integer_set.cpp b/src/video_core/shader/decode/integer_set.cpp index 46e3d5905..59809bcd8 100644 --- a/src/video_core/shader/decode/integer_set.cpp +++ b/src/video_core/shader/decode/integer_set.cpp @@ -14,7 +14,6 @@ using Tegra::Shader::OpCode; u32 ShaderIR::DecodeIntegerSet(NodeBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; - const auto opcode = OpCode::Decode(instr); const Node op_a = GetRegister(instr.gpr8); const Node op_b = [&]() { diff --git a/src/video_core/shader/decode/integer_set_predicate.cpp b/src/video_core/shader/decode/integer_set_predicate.cpp index dd20775d7..25e48fef8 100644 --- a/src/video_core/shader/decode/integer_set_predicate.cpp +++ b/src/video_core/shader/decode/integer_set_predicate.cpp @@ -16,7 +16,6 @@ using Tegra::Shader::Pred; u32 ShaderIR::DecodeIntegerSetPredicate(NodeBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; - const auto opcode = OpCode::Decode(instr); const Node op_a = GetRegister(instr.gpr8); diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp index 80fc0ccfc..7923d4d69 100644 --- a/src/video_core/shader/decode/memory.cpp +++ b/src/video_core/shader/decode/memory.cpp @@ -35,7 +35,7 @@ u32 GetUniformTypeElementsCount(Tegra::Shader::UniformType uniform_type) { return 1; } } -} // namespace +} // Anonymous namespace u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; @@ -95,10 +95,10 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { const Node op_b = GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.GetOffset() + 4, index); - SetTemporal(bb, 0, op_a); - SetTemporal(bb, 1, op_b); - SetRegister(bb, instr.gpr0, GetTemporal(0)); - SetRegister(bb, instr.gpr0.Value() + 1, GetTemporal(1)); + SetTemporary(bb, 0, op_a); + SetTemporary(bb, 1, op_b); + SetRegister(bb, instr.gpr0, GetTemporary(0)); + SetRegister(bb, instr.gpr0.Value() + 1, GetTemporary(1)); break; } default: @@ -106,16 +106,17 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { } break; } - case OpCode::Id::LD_L: { - LOG_DEBUG(HW_GPU, "LD_L cache management mode: {}", - static_cast<u64>(instr.ld_l.unknown.Value())); - - const auto GetLmem = [&](s32 offset) { + case OpCode::Id::LD_L: + LOG_DEBUG(HW_GPU, "LD_L cache management mode: {}", static_cast<u64>(instr.ld_l.unknown)); + [[fallthrough]]; + case OpCode::Id::LD_S: { + const auto GetMemory = [&](s32 offset) { ASSERT(offset % 4 == 0); const Node immediate_offset = Immediate(static_cast<s32>(instr.smem_imm) + offset); const Node address = Operation(OperationCode::IAdd, NO_PRECISE, GetRegister(instr.gpr8), immediate_offset); - return GetLocalMemory(address); + return opcode->get().GetId() == OpCode::Id::LD_S ? GetSharedMemory(address) + : GetLocalMemory(address); }; switch (instr.ldst_sl.type.Value()) { @@ -135,14 +136,16 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { return 0; } }(); - for (u32 i = 0; i < count; ++i) - SetTemporal(bb, i, GetLmem(i * 4)); - for (u32 i = 0; i < count; ++i) - SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i)); + for (u32 i = 0; i < count; ++i) { + SetTemporary(bb, i, GetMemory(i * 4)); + } + for (u32 i = 0; i < count; ++i) { + SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); + } break; } default: - UNIMPLEMENTED_MSG("LD_L Unhandled type: {}", + UNIMPLEMENTED_MSG("{} Unhandled type: {}", opcode->get().GetName(), static_cast<u32>(instr.ldst_sl.type.Value())); } break; @@ -172,10 +175,10 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { Operation(OperationCode::UAdd, NO_PRECISE, real_address_base, it_offset); const Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor); - SetTemporal(bb, i, gmem); + SetTemporary(bb, i, gmem); } for (u32 i = 0; i < count; ++i) { - SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i)); + SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); } break; } @@ -209,27 +212,34 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { break; } - case OpCode::Id::ST_L: { + case OpCode::Id::ST_L: LOG_DEBUG(HW_GPU, "ST_L cache management mode: {}", static_cast<u64>(instr.st_l.cache_management.Value())); - - const auto GetLmemAddr = [&](s32 offset) { + [[fallthrough]]; + case OpCode::Id::ST_S: { + const auto GetAddress = [&](s32 offset) { ASSERT(offset % 4 == 0); const Node immediate = Immediate(static_cast<s32>(instr.smem_imm) + offset); return Operation(OperationCode::IAdd, NO_PRECISE, GetRegister(instr.gpr8), immediate); }; + const auto set_memory = opcode->get().GetId() == OpCode::Id::ST_L + ? &ShaderIR::SetLocalMemory + : &ShaderIR::SetSharedMemory; + switch (instr.ldst_sl.type.Value()) { case Tegra::Shader::StoreType::Bits128: - SetLocalMemory(bb, GetLmemAddr(12), GetRegister(instr.gpr0.Value() + 3)); - SetLocalMemory(bb, GetLmemAddr(8), GetRegister(instr.gpr0.Value() + 2)); + (this->*set_memory)(bb, GetAddress(12), GetRegister(instr.gpr0.Value() + 3)); + (this->*set_memory)(bb, GetAddress(8), GetRegister(instr.gpr0.Value() + 2)); + [[fallthrough]]; case Tegra::Shader::StoreType::Bits64: - SetLocalMemory(bb, GetLmemAddr(4), GetRegister(instr.gpr0.Value() + 1)); + (this->*set_memory)(bb, GetAddress(4), GetRegister(instr.gpr0.Value() + 1)); + [[fallthrough]]; case Tegra::Shader::StoreType::Bits32: - SetLocalMemory(bb, GetLmemAddr(0), GetRegister(instr.gpr0)); + (this->*set_memory)(bb, GetAddress(0), GetRegister(instr.gpr0)); break; default: - UNIMPLEMENTED_MSG("ST_L Unhandled type: {}", + UNIMPLEMENTED_MSG("{} unhandled type: {}", opcode->get().GetName(), static_cast<u32>(instr.ldst_sl.type.Value())); } break; @@ -253,11 +263,11 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { TrackAndGetGlobalMemory(bb, instr, true); // Encode in temporary registers like this: real_base_address, {registers_to_be_written...} - SetTemporal(bb, 0, real_address_base); + SetTemporary(bb, 0, real_address_base); const u32 count = GetUniformTypeElementsCount(type); for (u32 i = 0; i < count; ++i) { - SetTemporal(bb, i + 1, GetRegister(instr.gpr0.Value() + i)); + SetTemporary(bb, i + 1, GetRegister(instr.gpr0.Value() + i)); } for (u32 i = 0; i < count; ++i) { const Node it_offset = Immediate(i * 4); @@ -265,7 +275,7 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { Operation(OperationCode::UAdd, NO_PRECISE, real_address_base, it_offset); const Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor); - bb.push_back(Operation(OperationCode::Assign, gmem, GetTemporal(i + 1))); + bb.push_back(Operation(OperationCode::Assign, gmem, GetTemporary(i + 1))); } break; } @@ -297,18 +307,13 @@ std::tuple<Node, Node, GlobalMemoryBase> ShaderIR::TrackAndGetGlobalMemory(NodeB const auto addr_register{GetRegister(instr.gmem.gpr)}; const auto immediate_offset{static_cast<u32>(instr.gmem.offset)}; - const Node base_address{ - TrackCbuf(addr_register, global_code, static_cast<s64>(global_code.size()))}; - const auto cbuf = std::get_if<CbufNode>(&*base_address); - ASSERT(cbuf != nullptr); - const auto cbuf_offset_imm = std::get_if<ImmediateNode>(&*cbuf->GetOffset()); - ASSERT(cbuf_offset_imm != nullptr); - const auto cbuf_offset = cbuf_offset_imm->GetValue(); + const auto [base_address, index, offset] = + TrackCbuf(addr_register, global_code, static_cast<s64>(global_code.size())); + ASSERT(base_address != nullptr); - bb.push_back( - Comment(fmt::format("Base address is c[0x{:x}][0x{:x}]", cbuf->GetIndex(), cbuf_offset))); + bb.push_back(Comment(fmt::format("Base address is c[0x{:x}][0x{:x}]", index, offset))); - const GlobalMemoryBase descriptor{cbuf->GetIndex(), cbuf_offset}; + const GlobalMemoryBase descriptor{index, offset}; const auto& [entry, is_new] = used_global_memory.try_emplace(descriptor); auto& usage = entry->second; if (is_write) { diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp index 6fc07f213..d46e0f823 100644 --- a/src/video_core/shader/decode/other.cpp +++ b/src/video_core/shader/decode/other.cpp @@ -22,6 +22,12 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { const auto opcode = OpCode::Decode(instr); switch (opcode->get().GetId()) { + case OpCode::Id::NOP: { + UNIMPLEMENTED_IF(instr.nop.cc != Tegra::Shader::ConditionCode::T); + UNIMPLEMENTED_IF(instr.nop.trigger != 0); + // With the previous preconditions, this instruction is a no-operation. + break; + } case OpCode::Id::EXIT: { const Tegra::Shader::ConditionCode cc = instr.flow_condition_code; UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "EXIT condition code used: {}", @@ -68,6 +74,13 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { case SystemVariable::InvocationInfo: LOG_WARNING(HW_GPU, "MOV_SYS instruction with InvocationInfo is incomplete"); return Immediate(0u); + case SystemVariable::Tid: { + Node value = Immediate(0); + value = BitfieldInsert(value, Operation(OperationCode::LocalInvocationIdX), 0, 9); + value = BitfieldInsert(value, Operation(OperationCode::LocalInvocationIdY), 16, 9); + value = BitfieldInsert(value, Operation(OperationCode::LocalInvocationIdZ), 26, 5); + return value; + } case SystemVariable::TidX: return Operation(OperationCode::LocalInvocationIdX); case SystemVariable::TidY: @@ -91,11 +104,46 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { break; } case OpCode::Id::BRA: { - UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0, - "BRA with constant buffers are not implemented"); + Node branch; + if (instr.bra.constant_buffer == 0) { + const u32 target = pc + instr.bra.GetBranchTarget(); + branch = Operation(OperationCode::Branch, Immediate(target)); + } else { + const u32 target = pc + 1; + const Node op_a = GetConstBuffer(instr.cbuf36.index, instr.cbuf36.GetOffset()); + const Node convert = SignedOperation(OperationCode::IArithmeticShiftRight, true, + PRECISE, op_a, Immediate(3)); + const Node operand = + Operation(OperationCode::IAdd, PRECISE, convert, Immediate(target)); + branch = Operation(OperationCode::BranchIndirect, operand); + } - const u32 target = pc + instr.bra.GetBranchTarget(); - const Node branch = Operation(OperationCode::Branch, Immediate(target)); + const Tegra::Shader::ConditionCode cc = instr.flow_condition_code; + if (cc != Tegra::Shader::ConditionCode::T) { + bb.push_back(Conditional(GetConditionCode(cc), {branch})); + } else { + bb.push_back(branch); + } + break; + } + case OpCode::Id::BRX: { + Node operand; + if (instr.brx.constant_buffer != 0) { + const s32 target = pc + 1; + const Node index = GetRegister(instr.gpr8); + const Node op_a = + GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.GetOffset() + 0, index); + const Node convert = SignedOperation(OperationCode::IArithmeticShiftRight, true, + PRECISE, op_a, Immediate(3)); + operand = Operation(OperationCode::IAdd, PRECISE, convert, Immediate(target)); + } else { + const s32 target = pc + instr.brx.GetBranchExtend(); + const Node op_a = GetRegister(instr.gpr8); + const Node convert = SignedOperation(OperationCode::IArithmeticShiftRight, true, + PRECISE, op_a, Immediate(3)); + operand = Operation(OperationCode::IAdd, PRECISE, convert, Immediate(target)); + } + const Node branch = Operation(OperationCode::BranchIndirect, operand); const Tegra::Shader::ConditionCode cc = instr.flow_condition_code; if (cc != Tegra::Shader::ConditionCode::T) { @@ -109,22 +157,28 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0, "Constant buffer flow is not supported"); - // The SSY opcode tells the GPU where to re-converge divergent execution paths, it sets the - // target of the jump that the SYNC instruction will make. The SSY opcode has a similar - // structure to the BRA opcode. + if (disable_flow_stack) { + break; + } + + // The SSY opcode tells the GPU where to re-converge divergent execution paths with SYNC. const u32 target = pc + instr.bra.GetBranchTarget(); - bb.push_back(Operation(OperationCode::PushFlowStack, Immediate(target))); + bb.push_back( + Operation(OperationCode::PushFlowStack, MetaStackClass::Ssy, Immediate(target))); break; } case OpCode::Id::PBK: { UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0, "Constant buffer PBK is not supported"); - // PBK pushes to a stack the address where BRK will jump to. This shares stack with SSY but - // using SYNC on a PBK address will kill the shader execution. We don't emulate this because - // it's very unlikely a driver will emit such invalid shader. + if (disable_flow_stack) { + break; + } + + // PBK pushes to a stack the address where BRK will jump to. const u32 target = pc + instr.bra.GetBranchTarget(); - bb.push_back(Operation(OperationCode::PushFlowStack, Immediate(target))); + bb.push_back( + Operation(OperationCode::PushFlowStack, MetaStackClass::Pbk, Immediate(target))); break; } case OpCode::Id::SYNC: { @@ -132,17 +186,24 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "SYNC condition code used: {}", static_cast<u32>(cc)); + if (disable_flow_stack) { + break; + } + // The SYNC opcode jumps to the address previously set by the SSY opcode - bb.push_back(Operation(OperationCode::PopFlowStack)); + bb.push_back(Operation(OperationCode::PopFlowStack, MetaStackClass::Ssy)); break; } case OpCode::Id::BRK: { const Tegra::Shader::ConditionCode cc = instr.flow_condition_code; UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "BRK condition code used: {}", static_cast<u32>(cc)); + if (disable_flow_stack) { + break; + } // The BRK opcode jumps to the address previously set by the PBK opcode - bb.push_back(Operation(OperationCode::PopFlowStack)); + bb.push_back(Operation(OperationCode::PopFlowStack, MetaStackClass::Pbk)); break; } case OpCode::Id::IPA: { diff --git a/src/video_core/shader/decode/predicate_set_register.cpp b/src/video_core/shader/decode/predicate_set_register.cpp index febbfeb50..84dbc50fe 100644 --- a/src/video_core/shader/decode/predicate_set_register.cpp +++ b/src/video_core/shader/decode/predicate_set_register.cpp @@ -15,7 +15,6 @@ using Tegra::Shader::OpCode; u32 ShaderIR::DecodePredicateSetRegister(NodeBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; - const auto opcode = OpCode::Decode(instr); UNIMPLEMENTED_IF_MSG(instr.generates_cc, "Condition codes generation in PSET is not implemented"); diff --git a/src/video_core/shader/decode/shift.cpp b/src/video_core/shader/decode/shift.cpp index 2ac16eeb0..f6ee68a54 100644 --- a/src/video_core/shader/decode/shift.cpp +++ b/src/video_core/shader/decode/shift.cpp @@ -17,8 +17,8 @@ u32 ShaderIR::DecodeShift(NodeBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); - const Node op_a = GetRegister(instr.gpr8); - const Node op_b = [&]() { + Node op_a = GetRegister(instr.gpr8); + Node op_b = [&]() { if (instr.is_b_imm) { return Immediate(instr.alu.GetSignedImm20_20()); } else if (instr.is_b_gpr) { @@ -32,16 +32,23 @@ u32 ShaderIR::DecodeShift(NodeBlock& bb, u32 pc) { case OpCode::Id::SHR_C: case OpCode::Id::SHR_R: case OpCode::Id::SHR_IMM: { - const Node value = SignedOperation(OperationCode::IArithmeticShiftRight, - instr.shift.is_signed, PRECISE, op_a, op_b); + if (instr.shr.wrap) { + op_b = Operation(OperationCode::UBitwiseAnd, std::move(op_b), Immediate(0x1f)); + } else { + op_b = Operation(OperationCode::IMax, std::move(op_b), Immediate(0)); + op_b = Operation(OperationCode::IMin, std::move(op_b), Immediate(31)); + } + + Node value = SignedOperation(OperationCode::IArithmeticShiftRight, instr.shift.is_signed, + std::move(op_a), std::move(op_b)); SetInternalFlagsFromInteger(bb, value, instr.generates_cc); - SetRegister(bb, instr.gpr0, value); + SetRegister(bb, instr.gpr0, std::move(value)); break; } case OpCode::Id::SHL_C: case OpCode::Id::SHL_R: case OpCode::Id::SHL_IMM: { - const Node value = Operation(OperationCode::ILogicalShiftLeft, PRECISE, op_a, op_b); + const Node value = Operation(OperationCode::ILogicalShiftLeft, op_a, op_b); SetInternalFlagsFromInteger(bb, value, instr.generates_cc); SetRegister(bb, instr.gpr0, value); break; diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp index 4a356dbd4..0b934a069 100644 --- a/src/video_core/shader/decode/texture.cpp +++ b/src/video_core/shader/decode/texture.cpp @@ -181,10 +181,10 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { const Node value = Operation(OperationCode::TextureQueryDimensions, meta, GetRegister(instr.gpr8.Value() + (is_bindless ? 1 : 0))); - SetTemporal(bb, indexer++, value); + SetTemporary(bb, indexer++, value); } for (u32 i = 0; i < indexer; ++i) { - SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i)); + SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); } break; } @@ -238,13 +238,25 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { auto params = coords; MetaTexture meta{sampler, {}, {}, {}, {}, {}, {}, element}; const Node value = Operation(OperationCode::TextureQueryLod, meta, std::move(params)); - SetTemporal(bb, indexer++, value); + SetTemporary(bb, indexer++, value); } for (u32 i = 0; i < indexer; ++i) { - SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i)); + SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); } break; } + case OpCode::Id::TLD: { + UNIMPLEMENTED_IF_MSG(instr.tld.aoffi, "AOFFI is not implemented"); + UNIMPLEMENTED_IF_MSG(instr.tld.ms, "MS is not implemented"); + UNIMPLEMENTED_IF_MSG(instr.tld.cl, "CL is not implemented"); + + if (instr.tld.nodep_flag) { + LOG_WARNING(HW_GPU, "TLD.NODEP implementation is incomplete"); + } + + WriteTexInstructionFloat(bb, instr, GetTldCode(instr)); + break; + } case OpCode::Id::TLDS: { const Tegra::Shader::TextureType texture_type{instr.tlds.GetTextureType()}; const bool is_array{instr.tlds.IsArrayTexture()}; @@ -257,7 +269,13 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { LOG_WARNING(HW_GPU, "TLDS.NODEP implementation is incomplete"); } - WriteTexsInstructionFloat(bb, instr, GetTldsCode(instr, texture_type, is_array)); + const Node4 components = GetTldsCode(instr, texture_type, is_array); + + if (instr.tlds.fp32_flag) { + WriteTexsInstructionFloat(bb, instr, components); + } else { + WriteTexsInstructionHalfFloat(bb, instr, components); + } break; } default: @@ -290,13 +308,9 @@ const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler, Textu const Sampler& ShaderIR::GetBindlessSampler(const Tegra::Shader::Register& reg, TextureType type, bool is_array, bool is_shadow) { const Node sampler_register = GetRegister(reg); - const Node base_sampler = + const auto [base_sampler, cbuf_index, cbuf_offset] = TrackCbuf(sampler_register, global_code, static_cast<s64>(global_code.size())); - const auto cbuf = std::get_if<CbufNode>(&*base_sampler); - const auto cbuf_offset_imm = std::get_if<ImmediateNode>(&*cbuf->GetOffset()); - ASSERT(cbuf_offset_imm != nullptr); - const auto cbuf_offset = cbuf_offset_imm->GetValue(); - const auto cbuf_index = cbuf->GetIndex(); + ASSERT(base_sampler != nullptr); const auto cbuf_key = (static_cast<u64>(cbuf_index) << 32) | static_cast<u64>(cbuf_offset); // If this sampler has already been used, return the existing mapping. @@ -322,11 +336,11 @@ void ShaderIR::WriteTexInstructionFloat(NodeBlock& bb, Instruction instr, const // Skip disabled components continue; } - SetTemporal(bb, dest_elem++, components[elem]); + SetTemporary(bb, dest_elem++, components[elem]); } // After writing values in temporals, move them to the real registers for (u32 i = 0; i < dest_elem; ++i) { - SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i)); + SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); } } @@ -339,17 +353,17 @@ void ShaderIR::WriteTexsInstructionFloat(NodeBlock& bb, Instruction instr, for (u32 component = 0; component < 4; ++component) { if (!instr.texs.IsComponentEnabled(component)) continue; - SetTemporal(bb, dest_elem++, components[component]); + SetTemporary(bb, dest_elem++, components[component]); } for (u32 i = 0; i < dest_elem; ++i) { if (i < 2) { // Write the first two swizzle components to gpr0 and gpr0+1 - SetRegister(bb, instr.gpr0.Value() + i % 2, GetTemporal(i)); + SetRegister(bb, instr.gpr0.Value() + i % 2, GetTemporary(i)); } else { ASSERT(instr.texs.HasTwoDestinations()); // Write the rest of the swizzle components to gpr28 and gpr28+1 - SetRegister(bb, instr.gpr28.Value() + i % 2, GetTemporal(i)); + SetRegister(bb, instr.gpr28.Value() + i % 2, GetTemporary(i)); } } } @@ -377,11 +391,11 @@ void ShaderIR::WriteTexsInstructionHalfFloat(NodeBlock& bb, Instruction instr, return; } - SetTemporal(bb, 0, first_value); - SetTemporal(bb, 1, Operation(OperationCode::HPack2, values[2], values[3])); + SetTemporary(bb, 0, first_value); + SetTemporary(bb, 1, Operation(OperationCode::HPack2, values[2], values[3])); - SetRegister(bb, instr.gpr0, GetTemporal(0)); - SetRegister(bb, instr.gpr28, GetTemporal(1)); + SetRegister(bb, instr.gpr0, GetTemporary(0)); + SetRegister(bb, instr.gpr28, GetTemporary(1)); } Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, @@ -575,6 +589,39 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de return values; } +Node4 ShaderIR::GetTldCode(Tegra::Shader::Instruction instr) { + const auto texture_type{instr.tld.texture_type}; + const bool is_array{instr.tld.is_array}; + const bool lod_enabled{instr.tld.GetTextureProcessMode() == TextureProcessMode::LL}; + const std::size_t coord_count{GetCoordCount(texture_type)}; + + u64 gpr8_cursor{instr.gpr8.Value()}; + const Node array_register{is_array ? GetRegister(gpr8_cursor++) : nullptr}; + + std::vector<Node> coords; + coords.reserve(coord_count); + for (std::size_t i = 0; i < coord_count; ++i) { + coords.push_back(GetRegister(gpr8_cursor++)); + } + + u64 gpr20_cursor{instr.gpr20.Value()}; + // const Node bindless_register{is_bindless ? GetRegister(gpr20_cursor++) : nullptr}; + const Node lod{lod_enabled ? GetRegister(gpr20_cursor++) : Immediate(0u)}; + // const Node aoffi_register{is_aoffi ? GetRegister(gpr20_cursor++) : nullptr}; + // const Node multisample{is_multisample ? GetRegister(gpr20_cursor++) : nullptr}; + + const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false); + + Node4 values; + for (u32 element = 0; element < values.size(); ++element) { + auto coords_copy = coords; + MetaTexture meta{sampler, array_register, {}, {}, {}, lod, {}, element}; + values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy)); + } + + return values; +} + Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is_array) { const std::size_t type_coord_count = GetCoordCount(texture_type); const bool lod_enabled = instr.tlds.GetTextureProcessMode() == TextureProcessMode::LL; diff --git a/src/video_core/shader/decode/warp.cpp b/src/video_core/shader/decode/warp.cpp new file mode 100644 index 000000000..a8e481b3c --- /dev/null +++ b/src/video_core/shader/decode/warp.cpp @@ -0,0 +1,102 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/assert.h" +#include "common/common_types.h" +#include "video_core/engines/shader_bytecode.h" +#include "video_core/shader/node_helper.h" +#include "video_core/shader/shader_ir.h" + +namespace VideoCommon::Shader { + +using Tegra::Shader::Instruction; +using Tegra::Shader::OpCode; +using Tegra::Shader::Pred; +using Tegra::Shader::ShuffleOperation; +using Tegra::Shader::VoteOperation; + +namespace { +OperationCode GetOperationCode(VoteOperation vote_op) { + switch (vote_op) { + case VoteOperation::All: + return OperationCode::VoteAll; + case VoteOperation::Any: + return OperationCode::VoteAny; + case VoteOperation::Eq: + return OperationCode::VoteEqual; + default: + UNREACHABLE_MSG("Invalid vote operation={}", static_cast<u64>(vote_op)); + return OperationCode::VoteAll; + } +} +} // Anonymous namespace + +u32 ShaderIR::DecodeWarp(NodeBlock& bb, u32 pc) { + const Instruction instr = {program_code[pc]}; + const auto opcode = OpCode::Decode(instr); + + switch (opcode->get().GetId()) { + case OpCode::Id::VOTE: { + const Node value = GetPredicate(instr.vote.value, instr.vote.negate_value != 0); + const Node active = Operation(OperationCode::BallotThread, value); + const Node vote = Operation(GetOperationCode(instr.vote.operation), value); + SetRegister(bb, instr.gpr0, active); + SetPredicate(bb, instr.vote.dest_pred, vote); + break; + } + case OpCode::Id::SHFL: { + Node mask = instr.shfl.is_mask_imm ? Immediate(static_cast<u32>(instr.shfl.mask_imm)) + : GetRegister(instr.gpr39); + Node width = [&] { + // Convert the obscure SHFL mask back into GL_NV_shader_thread_shuffle's width. This has + // been done reversing Nvidia's math. It won't work on all cases due to SHFL having + // different parameters that don't properly map to GLSL's interface, but it should work + // for cases emitted by Nvidia's compiler. + if (instr.shfl.operation == ShuffleOperation::Up) { + return Operation( + OperationCode::ILogicalShiftRight, + Operation(OperationCode::IAdd, std::move(mask), Immediate(-0x2000)), + Immediate(8)); + } else { + return Operation(OperationCode::ILogicalShiftRight, + Operation(OperationCode::IAdd, Immediate(0x201F), + Operation(OperationCode::INegate, std::move(mask))), + Immediate(8)); + } + }(); + + const auto [operation, in_range] = [instr]() -> std::pair<OperationCode, OperationCode> { + switch (instr.shfl.operation) { + case ShuffleOperation::Idx: + return {OperationCode::ShuffleIndexed, OperationCode::InRangeShuffleIndexed}; + case ShuffleOperation::Up: + return {OperationCode::ShuffleUp, OperationCode::InRangeShuffleUp}; + case ShuffleOperation::Down: + return {OperationCode::ShuffleDown, OperationCode::InRangeShuffleDown}; + case ShuffleOperation::Bfly: + return {OperationCode::ShuffleButterfly, OperationCode::InRangeShuffleButterfly}; + } + UNREACHABLE_MSG("Invalid SHFL operation: {}", + static_cast<u64>(instr.shfl.operation.Value())); + return {}; + }(); + + // Setting the predicate before the register is intentional to avoid overwriting. + Node index = instr.shfl.is_index_imm ? Immediate(static_cast<u32>(instr.shfl.index_imm)) + : GetRegister(instr.gpr20); + SetPredicate(bb, instr.shfl.pred48, Operation(in_range, index, width)); + SetRegister( + bb, instr.gpr0, + Operation(operation, GetRegister(instr.gpr8), std::move(index), std::move(width))); + break; + } + default: + UNIMPLEMENTED_MSG("Unhandled warp instruction: {}", opcode->get().GetName()); + break; + } + + return pc; +} + +} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/xmad.cpp b/src/video_core/shader/decode/xmad.cpp index 93dee77d1..206961909 100644 --- a/src/video_core/shader/decode/xmad.cpp +++ b/src/video_core/shader/decode/xmad.cpp @@ -73,8 +73,8 @@ u32 ShaderIR::DecodeXmad(NodeBlock& bb, u32 pc) { if (is_psl) { product = Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, product, Immediate(16)); } - SetTemporal(bb, 0, product); - product = GetTemporal(0); + SetTemporary(bb, 0, product); + product = GetTemporary(0); const Node original_c = op_c; const Tegra::Shader::XmadMode set_mode = mode; // Workaround to clang compile error @@ -98,13 +98,13 @@ u32 ShaderIR::DecodeXmad(NodeBlock& bb, u32 pc) { } }(); - SetTemporal(bb, 1, op_c); - op_c = GetTemporal(1); + SetTemporary(bb, 1, op_c); + op_c = GetTemporary(1); // TODO(Rodrigo): Use an appropiate sign for this operation Node sum = Operation(OperationCode::IAdd, product, op_c); - SetTemporal(bb, 2, sum); - sum = GetTemporal(2); + SetTemporary(bb, 2, sum); + sum = GetTemporary(2); if (is_merge) { const Node a = BitfieldExtract(sum, 0, 16); const Node b = |