diff options
Diffstat (limited to 'src/video_core/shader')
-rw-r--r-- | src/video_core/shader/control_flow.cpp | 47 | ||||
-rw-r--r-- | src/video_core/shader/control_flow.h | 30 | ||||
-rw-r--r-- | src/video_core/shader/decode.cpp | 5 | ||||
-rw-r--r-- | src/video_core/shader/decode/arithmetic.cpp | 13 | ||||
-rw-r--r-- | src/video_core/shader/decode/arithmetic_half_immediate.cpp | 4 | ||||
-rw-r--r-- | src/video_core/shader/decode/conversion.cpp | 30 | ||||
-rw-r--r-- | src/video_core/shader/decode/ffma.cpp | 10 | ||||
-rw-r--r-- | src/video_core/shader/decode/half_set_predicate.cpp | 10 | ||||
-rw-r--r-- | src/video_core/shader/decode/hfma2.cpp | 4 | ||||
-rw-r--r-- | src/video_core/shader/decode/other.cpp | 6 | ||||
-rw-r--r-- | src/video_core/shader/decode/warp.cpp | 55 | ||||
-rw-r--r-- | src/video_core/shader/node.h | 30 | ||||
-rw-r--r-- | src/video_core/shader/shader_ir.h | 1 | ||||
-rw-r--r-- | src/video_core/shader/track.cpp | 4 |
14 files changed, 184 insertions, 65 deletions
diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp index fdcc970ff..ec3a76690 100644 --- a/src/video_core/shader/control_flow.cpp +++ b/src/video_core/shader/control_flow.cpp @@ -15,7 +15,7 @@ #include "video_core/shader/shader_ir.h" namespace VideoCommon::Shader { - +namespace { using Tegra::Shader::Instruction; using Tegra::Shader::OpCode; @@ -29,8 +29,7 @@ struct Query { struct BlockStack { BlockStack() = default; - BlockStack(const BlockStack& b) = default; - BlockStack(const Query& q) : ssy_stack{q.ssy_stack}, pbk_stack{q.pbk_stack} {} + explicit BlockStack(const Query& q) : ssy_stack{q.ssy_stack}, pbk_stack{q.pbk_stack} {} std::stack<u32> ssy_stack{}; std::stack<u32> pbk_stack{}; }; @@ -58,7 +57,7 @@ struct BlockInfo { struct CFGRebuildState { explicit CFGRebuildState(const ProgramCode& program_code, const std::size_t program_size, const u32 start) - : program_code{program_code}, program_size{program_size}, start{start} {} + : start{start}, program_code{program_code}, program_size{program_size} {} u32 start{}; std::vector<BlockInfo> block_info{}; @@ -85,7 +84,7 @@ std::pair<BlockCollision, u32> TryGetBlock(CFGRebuildState& state, u32 address) return {BlockCollision::Inside, index}; } } - return {BlockCollision::None, -1}; + return {BlockCollision::None, 0xFFFFFFFF}; } struct ParseInfo { @@ -365,27 +364,29 @@ bool TryQuery(CFGRebuildState& state) { const auto gather_end = labels.upper_bound(block.end); while (gather_start != gather_end) { cc.push(gather_start->second); - gather_start++; + ++gather_start; } }; if (state.queries.empty()) { return false; } + Query& q = state.queries.front(); const u32 block_index = state.registered[q.address]; BlockInfo& block = state.block_info[block_index]; - // If the block is visted, check if the stacks match, else gather the ssy/pbk + // If the block is visited, check if the stacks match, else gather the ssy/pbk // labels into the current stack and look if the branch at the end of the block // consumes a label. Schedule new queries accordingly if (block.visited) { BlockStack& stack = state.stacks[q.address]; - const bool all_okay = (stack.ssy_stack.size() == 0 || q.ssy_stack == stack.ssy_stack) && - (stack.pbk_stack.size() == 0 || q.pbk_stack == stack.pbk_stack); + const bool all_okay = (stack.ssy_stack.empty() || q.ssy_stack == stack.ssy_stack) && + (stack.pbk_stack.empty() || q.pbk_stack == stack.pbk_stack); state.queries.pop_front(); return all_okay; } block.visited = true; - state.stacks[q.address] = BlockStack{q}; + state.stacks.insert_or_assign(q.address, BlockStack{q}); + Query q2(q); state.queries.pop_front(); gather_labels(q2.ssy_stack, state.ssy_labels, block); @@ -394,6 +395,7 @@ bool TryQuery(CFGRebuildState& state) { q2.address = block.end + 1; state.queries.push_back(q2); } + Query conditional_query{q2}; if (block.branch.is_sync) { if (block.branch.address == unassigned_branch) { @@ -408,13 +410,15 @@ bool TryQuery(CFGRebuildState& state) { conditional_query.pbk_stack.pop(); } conditional_query.address = block.branch.address; - state.queries.push_back(conditional_query); + state.queries.push_back(std::move(conditional_query)); return true; } +} // Anonymous namespace -std::optional<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u32 program_size, - u32 start_address) { +std::optional<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, + std::size_t program_size, u32 start_address) { CFGRebuildState state{program_code, program_size, start_address}; + // Inspect Code and generate blocks state.labels.clear(); state.labels.emplace(start_address); @@ -424,10 +428,9 @@ std::optional<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u return {}; } } + // Decompile Stacks - Query start_query{}; - start_query.address = state.start; - state.queries.push_back(start_query); + state.queries.push_back(Query{state.start, {}, {}}); bool decompiled = true; while (!state.queries.empty()) { if (!TryQuery(state)) { @@ -435,14 +438,15 @@ std::optional<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u break; } } + // Sort and organize results std::sort(state.block_info.begin(), state.block_info.end(), - [](const BlockInfo& a, const BlockInfo& b) -> bool { return a.start < b.start; }); + [](const BlockInfo& a, const BlockInfo& b) { return a.start < b.start; }); ShaderCharacteristics result_out{}; result_out.decompilable = decompiled; result_out.start = start_address; result_out.end = start_address; - for (auto& block : state.block_info) { + for (const auto& block : state.block_info) { ShaderBlock new_block{}; new_block.start = block.start; new_block.end = block.end; @@ -457,8 +461,9 @@ std::optional<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u } if (result_out.decompilable) { result_out.labels = std::move(state.labels); - return {result_out}; + return {std::move(result_out)}; } + // If it's not decompilable, merge the unlabelled blocks together auto back = result_out.blocks.begin(); auto next = std::next(back); @@ -469,8 +474,8 @@ std::optional<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u continue; } back = next; - next++; + ++next; } - return {result_out}; + return {std::move(result_out)}; } } // namespace VideoCommon::Shader diff --git a/src/video_core/shader/control_flow.h b/src/video_core/shader/control_flow.h index 5e8ea3271..b0a5e4f8c 100644 --- a/src/video_core/shader/control_flow.h +++ b/src/video_core/shader/control_flow.h @@ -4,7 +4,6 @@ #pragma once -#include <cstring> #include <list> #include <optional> #include <unordered_set> @@ -26,27 +25,44 @@ struct Condition { bool IsUnconditional() const { return predicate == Pred::UnusedIndex && cc == ConditionCode::T; } + bool operator==(const Condition& other) const { return std::tie(predicate, cc) == std::tie(other.predicate, other.cc); } + + bool operator!=(const Condition& other) const { + return !operator==(other); + } }; struct ShaderBlock { - u32 start{}; - u32 end{}; - bool ignore_branch{}; struct Branch { Condition cond{}; bool kills{}; s32 address{}; + bool operator==(const Branch& b) const { return std::tie(cond, kills, address) == std::tie(b.cond, b.kills, b.address); } - } branch{}; + + bool operator!=(const Branch& b) const { + return !operator==(b); + } + }; + + u32 start{}; + u32 end{}; + bool ignore_branch{}; + Branch branch{}; + bool operator==(const ShaderBlock& sb) const { return std::tie(start, end, ignore_branch, branch) == std::tie(sb.start, sb.end, sb.ignore_branch, sb.branch); } + + bool operator!=(const ShaderBlock& sb) const { + return !operator==(sb); + } }; struct ShaderCharacteristics { @@ -57,7 +73,7 @@ struct ShaderCharacteristics { std::unordered_set<u32> labels{}; }; -std::optional<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u32 program_size, - u32 start_address); +std::optional<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, + std::size_t program_size, u32 start_address); } // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp index afffd157f..47a9fd961 100644 --- a/src/video_core/shader/decode.cpp +++ b/src/video_core/shader/decode.cpp @@ -47,14 +47,14 @@ void ShaderIR::Decode() { if (shader_info.decompilable) { disable_flow_stack = true; const auto insert_block = [this](NodeBlock& nodes, u32 label) { - if (label == exit_branch) { + if (label == static_cast<u32>(exit_branch)) { return; } basic_blocks.insert({label, nodes}); }; const auto& blocks = shader_info.blocks; NodeBlock current_block; - u32 current_label = exit_branch; + u32 current_label = static_cast<u32>(exit_branch); for (auto& block : blocks) { if (shader_info.labels.count(block.start) != 0) { insert_block(current_block, current_label); @@ -176,6 +176,7 @@ u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) { {OpCode::Type::Ffma, &ShaderIR::DecodeFfma}, {OpCode::Type::Hfma2, &ShaderIR::DecodeHfma2}, {OpCode::Type::Conversion, &ShaderIR::DecodeConversion}, + {OpCode::Type::Warp, &ShaderIR::DecodeWarp}, {OpCode::Type::Memory, &ShaderIR::DecodeMemory}, {OpCode::Type::Texture, &ShaderIR::DecodeTexture}, {OpCode::Type::Image, &ShaderIR::DecodeImage}, diff --git a/src/video_core/shader/decode/arithmetic.cpp b/src/video_core/shader/decode/arithmetic.cpp index 87d8fecaa..1473c282a 100644 --- a/src/video_core/shader/decode/arithmetic.cpp +++ b/src/video_core/shader/decode/arithmetic.cpp @@ -42,11 +42,14 @@ u32 ShaderIR::DecodeArithmetic(NodeBlock& bb, u32 pc) { case OpCode::Id::FMUL_R: case OpCode::Id::FMUL_IMM: { // FMUL does not have 'abs' bits and only the second operand has a 'neg' bit. - UNIMPLEMENTED_IF_MSG(instr.fmul.tab5cb8_2 != 0, "FMUL tab5cb8_2({}) is not implemented", - instr.fmul.tab5cb8_2.Value()); - UNIMPLEMENTED_IF_MSG( - instr.fmul.tab5c68_0 != 1, "FMUL tab5cb8_0({}) is not implemented", - instr.fmul.tab5c68_0.Value()); // SMO typical sends 1 here which seems to be the default + if (instr.fmul.tab5cb8_2 != 0) { + LOG_WARNING(HW_GPU, "FMUL tab5cb8_2({}) is not implemented", + instr.fmul.tab5cb8_2.Value()); + } + if (instr.fmul.tab5c68_0 != 1) { + LOG_WARNING(HW_GPU, "FMUL tab5cb8_0({}) is not implemented", + instr.fmul.tab5c68_0.Value()); + } op_b = GetOperandAbsNegFloat(op_b, false, instr.fmul.negate_b); diff --git a/src/video_core/shader/decode/arithmetic_half_immediate.cpp b/src/video_core/shader/decode/arithmetic_half_immediate.cpp index 7bcf38f23..6466fc011 100644 --- a/src/video_core/shader/decode/arithmetic_half_immediate.cpp +++ b/src/video_core/shader/decode/arithmetic_half_immediate.cpp @@ -23,7 +23,9 @@ u32 ShaderIR::DecodeArithmeticHalfImmediate(NodeBlock& bb, u32 pc) { LOG_WARNING(HW_GPU, "{} FTZ not implemented", opcode->get().GetName()); } } else { - UNIMPLEMENTED_IF(instr.alu_half_imm.precision != Tegra::Shader::HalfPrecision::None); + if (instr.alu_half_imm.precision != Tegra::Shader::HalfPrecision::None) { + LOG_WARNING(HW_GPU, "{} FTZ not implemented", opcode->get().GetName()); + } } Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.alu_half_imm.type_a); diff --git a/src/video_core/shader/decode/conversion.cpp b/src/video_core/shader/decode/conversion.cpp index 4221f0c58..8973fbefa 100644 --- a/src/video_core/shader/decode/conversion.cpp +++ b/src/video_core/shader/decode/conversion.cpp @@ -57,7 +57,7 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) { case OpCode::Id::I2F_R: case OpCode::Id::I2F_C: case OpCode::Id::I2F_IMM: { - UNIMPLEMENTED_IF(instr.conversion.dst_size != Register::Size::Word); + UNIMPLEMENTED_IF(instr.conversion.dst_size == Register::Size::Long); UNIMPLEMENTED_IF(instr.conversion.selector); UNIMPLEMENTED_IF_MSG(instr.generates_cc, "Condition codes generation in I2F is not implemented"); @@ -82,14 +82,19 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) { value = GetOperandAbsNegFloat(value, false, instr.conversion.negate_a); SetInternalFlagsFromFloat(bb, value, instr.generates_cc); + + if (instr.conversion.dst_size == Register::Size::Short) { + value = Operation(OperationCode::HCastFloat, PRECISE, value); + } + SetRegister(bb, instr.gpr0, value); break; } case OpCode::Id::F2F_R: case OpCode::Id::F2F_C: case OpCode::Id::F2F_IMM: { - UNIMPLEMENTED_IF(instr.conversion.f2f.dst_size != Register::Size::Word); - UNIMPLEMENTED_IF(instr.conversion.f2f.src_size != Register::Size::Word); + UNIMPLEMENTED_IF(instr.conversion.dst_size == Register::Size::Long); + UNIMPLEMENTED_IF(instr.conversion.src_size == Register::Size::Long); UNIMPLEMENTED_IF_MSG(instr.generates_cc, "Condition codes generation in F2F is not implemented"); @@ -107,6 +112,11 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) { } }(); + if (instr.conversion.src_size == Register::Size::Short) { + // TODO: figure where extract is sey in the encoding + value = Operation(OperationCode::FCastHalf0, PRECISE, value); + } + value = GetOperandAbsNegFloat(value, instr.conversion.abs_a, instr.conversion.negate_a); value = [&]() { @@ -124,19 +134,24 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) { default: UNIMPLEMENTED_MSG("Unimplemented F2F rounding mode {}", static_cast<u32>(instr.conversion.f2f.rounding.Value())); - return Immediate(0); + return value; } }(); value = GetSaturatedFloat(value, instr.alu.saturate_d); SetInternalFlagsFromFloat(bb, value, instr.generates_cc); + + if (instr.conversion.dst_size == Register::Size::Short) { + value = Operation(OperationCode::HCastFloat, PRECISE, value); + } + SetRegister(bb, instr.gpr0, value); break; } case OpCode::Id::F2I_R: case OpCode::Id::F2I_C: case OpCode::Id::F2I_IMM: { - UNIMPLEMENTED_IF(instr.conversion.src_size != Register::Size::Word); + UNIMPLEMENTED_IF(instr.conversion.src_size == Register::Size::Long); UNIMPLEMENTED_IF_MSG(instr.generates_cc, "Condition codes generation in F2I is not implemented"); Node value = [&]() { @@ -153,6 +168,11 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) { } }(); + if (instr.conversion.src_size == Register::Size::Short) { + // TODO: figure where extract is sey in the encoding + value = Operation(OperationCode::FCastHalf0, PRECISE, value); + } + value = GetOperandAbsNegFloat(value, instr.conversion.abs_a, instr.conversion.negate_a); value = [&]() { diff --git a/src/video_core/shader/decode/ffma.cpp b/src/video_core/shader/decode/ffma.cpp index 29be25ca3..ca2f39e8d 100644 --- a/src/video_core/shader/decode/ffma.cpp +++ b/src/video_core/shader/decode/ffma.cpp @@ -18,10 +18,12 @@ u32 ShaderIR::DecodeFfma(NodeBlock& bb, u32 pc) { const auto opcode = OpCode::Decode(instr); UNIMPLEMENTED_IF_MSG(instr.ffma.cc != 0, "FFMA cc not implemented"); - UNIMPLEMENTED_IF_MSG(instr.ffma.tab5980_0 != 1, "FFMA tab5980_0({}) not implemented", - instr.ffma.tab5980_0.Value()); // Seems to be 1 by default based on SMO - UNIMPLEMENTED_IF_MSG(instr.ffma.tab5980_1 != 0, "FFMA tab5980_1({}) not implemented", - instr.ffma.tab5980_1.Value()); + if (instr.ffma.tab5980_0 != 1) { + LOG_WARNING(HW_GPU, "FFMA tab5980_0({}) not implemented", instr.ffma.tab5980_0.Value()); + } + if (instr.ffma.tab5980_1 != 0) { + LOG_WARNING(HW_GPU, "FFMA tab5980_1({}) not implemented", instr.ffma.tab5980_1.Value()); + } const Node op_a = GetRegister(instr.gpr8); diff --git a/src/video_core/shader/decode/half_set_predicate.cpp b/src/video_core/shader/decode/half_set_predicate.cpp index ad180d6df..afea33e5f 100644 --- a/src/video_core/shader/decode/half_set_predicate.cpp +++ b/src/video_core/shader/decode/half_set_predicate.cpp @@ -18,7 +18,7 @@ u32 ShaderIR::DecodeHalfSetPredicate(NodeBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); - UNIMPLEMENTED_IF(instr.hsetp2.ftz != 0); + DEBUG_ASSERT(instr.hsetp2.ftz == 0); Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hsetp2.type_a); op_a = GetOperandAbsNegHalf(op_a, instr.hsetp2.abs_a, instr.hsetp2.negate_a); @@ -30,7 +30,7 @@ u32 ShaderIR::DecodeHalfSetPredicate(NodeBlock& bb, u32 pc) { case OpCode::Id::HSETP2_C: cond = instr.hsetp2.cbuf_and_imm.cond; h_and = instr.hsetp2.cbuf_and_imm.h_and; - op_b = GetOperandAbsNegHalf(GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset), + op_b = GetOperandAbsNegHalf(GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()), instr.hsetp2.cbuf.abs_b, instr.hsetp2.cbuf.negate_b); break; case OpCode::Id::HSETP2_IMM: @@ -52,15 +52,15 @@ u32 ShaderIR::DecodeHalfSetPredicate(NodeBlock& bb, u32 pc) { } const OperationCode combiner = GetPredicateCombiner(instr.hsetp2.op); - const Node pred39 = GetPredicate(instr.hsetp2.pred39, instr.hsetp2.neg_pred); + const Node combined_pred = GetPredicate(instr.hsetp2.pred3, instr.hsetp2.neg_pred); const auto Write = [&](u64 dest, Node src) { - SetPredicate(bb, dest, Operation(combiner, std::move(src), pred39)); + SetPredicate(bb, dest, Operation(combiner, std::move(src), combined_pred)); }; const Node comparison = GetPredicateComparisonHalf(cond, op_a, op_b); const u64 first = instr.hsetp2.pred0; - const u64 second = instr.hsetp2.pred3; + const u64 second = instr.hsetp2.pred39; if (h_and) { const Node joined = Operation(OperationCode::LogicalAnd2, comparison); Write(first, joined); diff --git a/src/video_core/shader/decode/hfma2.cpp b/src/video_core/shader/decode/hfma2.cpp index c3bcf1ae9..5b44cb79c 100644 --- a/src/video_core/shader/decode/hfma2.cpp +++ b/src/video_core/shader/decode/hfma2.cpp @@ -22,9 +22,9 @@ u32 ShaderIR::DecodeHfma2(NodeBlock& bb, u32 pc) { const auto opcode = OpCode::Decode(instr); if (opcode->get().GetId() == OpCode::Id::HFMA2_RR) { - UNIMPLEMENTED_IF(instr.hfma2.rr.precision != HalfPrecision::None); + DEBUG_ASSERT(instr.hfma2.rr.precision == HalfPrecision::None); } else { - UNIMPLEMENTED_IF(instr.hfma2.precision != HalfPrecision::None); + DEBUG_ASSERT(instr.hfma2.precision == HalfPrecision::None); } constexpr auto identity = HalfType::H0_H1; diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp index 856e1b3d8..d46e0f823 100644 --- a/src/video_core/shader/decode/other.cpp +++ b/src/video_core/shader/decode/other.cpp @@ -22,6 +22,12 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { const auto opcode = OpCode::Decode(instr); switch (opcode->get().GetId()) { + case OpCode::Id::NOP: { + UNIMPLEMENTED_IF(instr.nop.cc != Tegra::Shader::ConditionCode::T); + UNIMPLEMENTED_IF(instr.nop.trigger != 0); + // With the previous preconditions, this instruction is a no-operation. + break; + } case OpCode::Id::EXIT: { const Tegra::Shader::ConditionCode cc = instr.flow_condition_code; UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "EXIT condition code used: {}", diff --git a/src/video_core/shader/decode/warp.cpp b/src/video_core/shader/decode/warp.cpp new file mode 100644 index 000000000..04ca74f46 --- /dev/null +++ b/src/video_core/shader/decode/warp.cpp @@ -0,0 +1,55 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/assert.h" +#include "common/common_types.h" +#include "video_core/engines/shader_bytecode.h" +#include "video_core/shader/node_helper.h" +#include "video_core/shader/shader_ir.h" + +namespace VideoCommon::Shader { + +using Tegra::Shader::Instruction; +using Tegra::Shader::OpCode; +using Tegra::Shader::Pred; +using Tegra::Shader::VoteOperation; + +namespace { +OperationCode GetOperationCode(VoteOperation vote_op) { + switch (vote_op) { + case VoteOperation::All: + return OperationCode::VoteAll; + case VoteOperation::Any: + return OperationCode::VoteAny; + case VoteOperation::Eq: + return OperationCode::VoteEqual; + default: + UNREACHABLE_MSG("Invalid vote operation={}", static_cast<u64>(vote_op)); + return OperationCode::VoteAll; + } +} +} // Anonymous namespace + +u32 ShaderIR::DecodeWarp(NodeBlock& bb, u32 pc) { + const Instruction instr = {program_code[pc]}; + const auto opcode = OpCode::Decode(instr); + + switch (opcode->get().GetId()) { + case OpCode::Id::VOTE: { + const Node value = GetPredicate(instr.vote.value, instr.vote.negate_value != 0); + const Node active = Operation(OperationCode::BallotThread, value); + const Node vote = Operation(GetOperationCode(instr.vote.operation), value); + SetRegister(bb, instr.gpr0, active); + SetPredicate(bb, instr.vote.dest_pred, vote); + break; + } + default: + UNIMPLEMENTED_MSG("Unhandled warp instruction: {}", opcode->get().GetName()); + break; + } + + return pc; +} + +} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h index 715184d67..5db9313c4 100644 --- a/src/video_core/shader/node.h +++ b/src/video_core/shader/node.h @@ -30,6 +30,8 @@ enum class OperationCode { FNegate, /// (MetaArithmetic, float a) -> float FAbsolute, /// (MetaArithmetic, float a) -> float FClamp, /// (MetaArithmetic, float value, float min, float max) -> float + FCastHalf0, /// (MetaArithmetic, f16vec2 a) -> float + FCastHalf1, /// (MetaArithmetic, f16vec2 a) -> float FMin, /// (MetaArithmetic, float a, float b) -> float FMax, /// (MetaArithmetic, float a, float b) -> float FCos, /// (MetaArithmetic, float a) -> float @@ -83,17 +85,18 @@ enum class OperationCode { UBitfieldExtract, /// (MetaArithmetic, uint value, int offset, int offset) -> uint UBitCount, /// (MetaArithmetic, uint) -> uint - HAdd, /// (MetaArithmetic, f16vec2 a, f16vec2 b) -> f16vec2 - HMul, /// (MetaArithmetic, f16vec2 a, f16vec2 b) -> f16vec2 - HFma, /// (MetaArithmetic, f16vec2 a, f16vec2 b, f16vec2 c) -> f16vec2 - HAbsolute, /// (f16vec2 a) -> f16vec2 - HNegate, /// (f16vec2 a, bool first, bool second) -> f16vec2 - HClamp, /// (f16vec2 src, float min, float max) -> f16vec2 - HUnpack, /// (Tegra::Shader::HalfType, T value) -> f16vec2 - HMergeF32, /// (f16vec2 src) -> float - HMergeH0, /// (f16vec2 dest, f16vec2 src) -> f16vec2 - HMergeH1, /// (f16vec2 dest, f16vec2 src) -> f16vec2 - HPack2, /// (float a, float b) -> f16vec2 + HAdd, /// (MetaArithmetic, f16vec2 a, f16vec2 b) -> f16vec2 + HMul, /// (MetaArithmetic, f16vec2 a, f16vec2 b) -> f16vec2 + HFma, /// (MetaArithmetic, f16vec2 a, f16vec2 b, f16vec2 c) -> f16vec2 + HAbsolute, /// (f16vec2 a) -> f16vec2 + HNegate, /// (f16vec2 a, bool first, bool second) -> f16vec2 + HClamp, /// (f16vec2 src, float min, float max) -> f16vec2 + HCastFloat, /// (MetaArithmetic, float a) -> f16vec2 + HUnpack, /// (Tegra::Shader::HalfType, T value) -> f16vec2 + HMergeF32, /// (f16vec2 src) -> float + HMergeH0, /// (f16vec2 dest, f16vec2 src) -> f16vec2 + HMergeH1, /// (f16vec2 dest, f16vec2 src) -> f16vec2 + HPack2, /// (float a, float b) -> f16vec2 LogicalAssign, /// (bool& dst, bool src) -> void LogicalAnd, /// (bool a, bool b) -> bool @@ -165,6 +168,11 @@ enum class OperationCode { WorkGroupIdY, /// () -> uint WorkGroupIdZ, /// () -> uint + BallotThread, /// (bool) -> uint + VoteAll, /// (bool) -> bool + VoteAny, /// (bool) -> bool + VoteEqual, /// (bool) -> bool + Amount, }; diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index 0509a5f88..bcc9b79b6 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h @@ -167,6 +167,7 @@ private: u32 DecodeFfma(NodeBlock& bb, u32 pc); u32 DecodeHfma2(NodeBlock& bb, u32 pc); u32 DecodeConversion(NodeBlock& bb, u32 pc); + u32 DecodeWarp(NodeBlock& bb, u32 pc); u32 DecodeMemory(NodeBlock& bb, u32 pc); u32 DecodeTexture(NodeBlock& bb, u32 pc); u32 DecodeImage(NodeBlock& bb, u32 pc); diff --git a/src/video_core/shader/track.cpp b/src/video_core/shader/track.cpp index a53e02253..55f5949e4 100644 --- a/src/video_core/shader/track.cpp +++ b/src/video_core/shader/track.cpp @@ -59,8 +59,8 @@ std::tuple<Node, u32, u32> ShaderIR::TrackCbuf(Node tracked, const NodeBlock& co return TrackCbuf(source, code, new_cursor); } if (const auto operation = std::get_if<OperationNode>(&*tracked)) { - for (std::size_t i = 0; i < operation->GetOperandsCount(); ++i) { - if (auto found = TrackCbuf((*operation)[i], code, cursor); std::get<0>(found)) { + for (std::size_t i = operation->GetOperandsCount(); i > 0; --i) { + if (auto found = TrackCbuf((*operation)[i - 1], code, cursor); std::get<0>(found)) { // Cbuf found in operand. return found; } |