14 files changed, 184 insertions, 65 deletions
diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp
index fdcc970ff..ec3a76690 100644
--- a/src/video_core/shader/control_flow.cpp
+++ b/src/video_core/shader/control_flow.cpp
@@ -15,7 +15,7 @@
 #include "video_core/shader/shader_ir.h"
 
 namespace VideoCommon::Shader {
-
+namespace {
 using Tegra::Shader::Instruction;
 using Tegra::Shader::OpCode;
 
@@ -29,8 +29,7 @@ struct Query {
 
 struct BlockStack {
     BlockStack() = default;
-    BlockStack(const BlockStack& b) = default;
-    BlockStack(const Query& q) : ssy_stack{q.ssy_stack}, pbk_stack{q.pbk_stack} {}
+    explicit BlockStack(const Query& q) : ssy_stack{q.ssy_stack}, pbk_stack{q.pbk_stack} {}
     std::stack<u32> ssy_stack{};
     std::stack<u32> pbk_stack{};
 };
@@ -58,7 +57,7 @@ struct BlockInfo {
 struct CFGRebuildState {
     explicit CFGRebuildState(const ProgramCode& program_code, const std::size_t program_size,
                              const u32 start)
-        : program_code{program_code}, program_size{program_size}, start{start} {}
+        : start{start}, program_code{program_code}, program_size{program_size} {}
 
     u32 start{};
     std::vector<BlockInfo> block_info{};
@@ -85,7 +84,7 @@ std::pair<BlockCollision, u32> TryGetBlock(CFGRebuildState& state, u32 address)
             return {BlockCollision::Inside, index};
         }
     }
-    return {BlockCollision::None, -1};
+    return {BlockCollision::None, 0xFFFFFFFF};
 }
 
 struct ParseInfo {
@@ -365,27 +364,29 @@ bool TryQuery(CFGRebuildState& state) {
         const auto gather_end = labels.upper_bound(block.end);
         while (gather_start != gather_end) {
             cc.push(gather_start->second);
-            gather_start++;
+            ++gather_start;
         }
     };
     if (state.queries.empty()) {
         return false;
     }
+
     Query& q = state.queries.front();
     const u32 block_index = state.registered[q.address];
     BlockInfo& block = state.block_info[block_index];
-    // If the block is visted, check if the stacks match, else gather the ssy/pbk
+    // If the block is visited, check if the stacks match, else gather the ssy/pbk
     // labels into the current stack and look if the branch at the end of the block
     // consumes a label. Schedule new queries accordingly
     if (block.visited) {
         BlockStack& stack = state.stacks[q.address];
-        const bool all_okay = (stack.ssy_stack.size() == 0 || q.ssy_stack == stack.ssy_stack) &&
-                              (stack.pbk_stack.size() == 0 || q.pbk_stack == stack.pbk_stack);
+        const bool all_okay = (stack.ssy_stack.empty() || q.ssy_stack == stack.ssy_stack) &&
+                              (stack.pbk_stack.empty() || q.pbk_stack == stack.pbk_stack);
         state.queries.pop_front();
         return all_okay;
     }
     block.visited = true;
-    state.stacks[q.address] = BlockStack{q};
+    state.stacks.insert_or_assign(q.address, BlockStack{q});
+
     Query q2(q);
     state.queries.pop_front();
     gather_labels(q2.ssy_stack, state.ssy_labels, block);
@@ -394,6 +395,7 @@ bool TryQuery(CFGRebuildState& state) {
         q2.address = block.end + 1;
         state.queries.push_back(q2);
     }
+
     Query conditional_query{q2};
     if (block.branch.is_sync) {
         if (block.branch.address == unassigned_branch) {
@@ -408,13 +410,15 @@ bool TryQuery(CFGRebuildState& state) {
         conditional_query.pbk_stack.pop();
     }
     conditional_query.address = block.branch.address;
-    state.queries.push_back(conditional_query);
+    state.queries.push_back(std::move(conditional_query));
     return true;
 }
+} // Anonymous namespace
 
-std::optional<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u32 program_size,
-                                              u32 start_address) {
+std::optional<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code,
+                                              std::size_t program_size, u32 start_address) {
     CFGRebuildState state{program_code, program_size, start_address};
+
     // Inspect Code and generate blocks
     state.labels.clear();
     state.labels.emplace(start_address);
@@ -424,10 +428,9 @@ std::optional<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u
             return {};
         }
     }
+
     // Decompile Stacks
-    Query start_query{};
-    start_query.address = state.start;
-    state.queries.push_back(start_query);
+    state.queries.push_back(Query{state.start, {}, {}});
     bool decompiled = true;
     while (!state.queries.empty()) {
         if (!TryQuery(state)) {
@@ -435,14 +438,15 @@ std::optional<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u
             break;
         }
     }
+
     // Sort and organize results
     std::sort(state.block_info.begin(), state.block_info.end(),
-              [](const BlockInfo& a, const BlockInfo& b) -> bool { return a.start < b.start; });
+              [](const BlockInfo& a, const BlockInfo& b) { return a.start < b.start; });
     ShaderCharacteristics result_out{};
     result_out.decompilable = decompiled;
     result_out.start = start_address;
     result_out.end = start_address;
-    for (auto& block : state.block_info) {
+    for (const auto& block : state.block_info) {
         ShaderBlock new_block{};
         new_block.start = block.start;
         new_block.end = block.end;
@@ -457,8 +461,9 @@ std::optional<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u
     }
     if (result_out.decompilable) {
         result_out.labels = std::move(state.labels);
-        return {result_out};
+        return {std::move(result_out)};
     }
+
     // If it's not decompilable, merge the unlabelled blocks together
     auto back = result_out.blocks.begin();
     auto next = std::next(back);
@@ -469,8 +474,8 @@ std::optional<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u
             continue;
         }
         back = next;
-        next++;
+        ++next;
     }
-    return {result_out};
+    return {std::move(result_out)};
 }
 } // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/control_flow.h b/src/video_core/shader/control_flow.h
index 5e8ea3271..b0a5e4f8c 100644
--- a/src/video_core/shader/control_flow.h
+++ b/src/video_core/shader/control_flow.h
@@ -4,7 +4,6 @@
 
 #pragma once
 
-#include <cstring>
 #include <list>
 #include <optional>
 #include <unordered_set>
@@ -26,27 +25,44 @@ struct Condition {
     bool IsUnconditional() const {
         return predicate == Pred::UnusedIndex && cc == ConditionCode::T;
     }
+
     bool operator==(const Condition& other) const {
         return std::tie(predicate, cc) == std::tie(other.predicate, other.cc);
     }
+
+    bool operator!=(const Condition& other) const {
+        return !operator==(other);
+    }
 };
 
 struct ShaderBlock {
-    u32 start{};
-    u32 end{};
-    bool ignore_branch{};
     struct Branch {
         Condition cond{};
         bool kills{};
         s32 address{};
+
         bool operator==(const Branch& b) const {
             return std::tie(cond, kills, address) == std::tie(b.cond, b.kills, b.address);
         }
-    } branch{};
+
+        bool operator!=(const Branch& b) const {
+            return !operator==(b);
+        }
+    };
+
+    u32 start{};
+    u32 end{};
+    bool ignore_branch{};
+    Branch branch{};
+
     bool operator==(const ShaderBlock& sb) const {
         return std::tie(start, end, ignore_branch, branch) ==
                std::tie(sb.start, sb.end, sb.ignore_branch, sb.branch);
     }
+
+    bool operator!=(const ShaderBlock& sb) const {
+        return !operator==(sb);
+    }
 };
 
 struct ShaderCharacteristics {
@@ -57,7 +73,7 @@ struct ShaderCharacteristics {
     std::unordered_set<u32> labels{};
 };
 
-std::optional<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u32 program_size,
-                                              u32 start_address);
+std::optional<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code,
+                                              std::size_t program_size, u32 start_address);
 
 } // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp
index afffd157f..47a9fd961 100644
--- a/src/video_core/shader/decode.cpp
+++ b/src/video_core/shader/decode.cpp
@@ -47,14 +47,14 @@ void ShaderIR::Decode() {
         if (shader_info.decompilable) {
             disable_flow_stack = true;
             const auto insert_block = [this](NodeBlock& nodes, u32 label) {
-                if (label == exit_branch) {
+                if (label == static_cast<u32>(exit_branch)) {
                     return;
                 }
                 basic_blocks.insert({label, nodes});
             };
             const auto& blocks = shader_info.blocks;
             NodeBlock current_block;
-            u32 current_label = exit_branch;
+            u32 current_label = static_cast<u32>(exit_branch);
             for (auto& block : blocks) {
                 if (shader_info.labels.count(block.start) != 0) {
                     insert_block(current_block, current_label);
@@ -176,6 +176,7 @@ u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) {
         {OpCode::Type::Ffma, &ShaderIR::DecodeFfma},
         {OpCode::Type::Hfma2, &ShaderIR::DecodeHfma2},
         {OpCode::Type::Conversion, &ShaderIR::DecodeConversion},
+        {OpCode::Type::Warp, &ShaderIR::DecodeWarp},
         {OpCode::Type::Memory, &ShaderIR::DecodeMemory},
         {OpCode::Type::Texture, &ShaderIR::DecodeTexture},
         {OpCode::Type::Image, &ShaderIR::DecodeImage},
diff --git a/src/video_core/shader/decode/arithmetic.cpp b/src/video_core/shader/decode/arithmetic.cpp
index 87d8fecaa..1473c282a 100644
--- a/src/video_core/shader/decode/arithmetic.cpp
+++ b/src/video_core/shader/decode/arithmetic.cpp
@@ -42,11 +42,14 @@ u32 ShaderIR::DecodeArithmetic(NodeBlock& bb, u32 pc) {
     case OpCode::Id::FMUL_R:
     case OpCode::Id::FMUL_IMM: {
         // FMUL does not have 'abs' bits and only the second operand has a 'neg' bit.
-        UNIMPLEMENTED_IF_MSG(instr.fmul.tab5cb8_2 != 0, "FMUL tab5cb8_2({}) is not implemented",
-                             instr.fmul.tab5cb8_2.Value());
-        UNIMPLEMENTED_IF_MSG(
-            instr.fmul.tab5c68_0 != 1, "FMUL tab5cb8_0({}) is not implemented",
-            instr.fmul.tab5c68_0.Value()); // SMO typical sends 1 here which seems to be the default
+        if (instr.fmul.tab5cb8_2 != 0) {
+            LOG_WARNING(HW_GPU, "FMUL tab5cb8_2({}) is not implemented",
+                        instr.fmul.tab5cb8_2.Value());
+        }
+        if (instr.fmul.tab5c68_0 != 1) {
+            LOG_WARNING(HW_GPU, "FMUL tab5cb8_0({}) is not implemented",
+                        instr.fmul.tab5c68_0.Value());
+        }
 
         op_b = GetOperandAbsNegFloat(op_b, false, instr.fmul.negate_b);
 
diff --git a/src/video_core/shader/decode/arithmetic_half_immediate.cpp b/src/video_core/shader/decode/arithmetic_half_immediate.cpp
index 7bcf38f23..6466fc011 100644
--- a/src/video_core/shader/decode/arithmetic_half_immediate.cpp
+++ b/src/video_core/shader/decode/arithmetic_half_immediate.cpp
@@ -23,7 +23,9 @@ u32 ShaderIR::DecodeArithmeticHalfImmediate(NodeBlock& bb, u32 pc) {
             LOG_WARNING(HW_GPU, "{} FTZ not implemented", opcode->get().GetName());
         }
     } else {
-        UNIMPLEMENTED_IF(instr.alu_half_imm.precision != Tegra::Shader::HalfPrecision::None);
+        if (instr.alu_half_imm.precision != Tegra::Shader::HalfPrecision::None) {
+            LOG_WARNING(HW_GPU, "{} FTZ not implemented", opcode->get().GetName());
+        }
     }
 
     Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.alu_half_imm.type_a);
diff --git a/src/video_core/shader/decode/conversion.cpp b/src/video_core/shader/decode/conversion.cpp
index 4221f0c58..8973fbefa 100644
--- a/src/video_core/shader/decode/conversion.cpp
+++ b/src/video_core/shader/decode/conversion.cpp
@@ -57,7 +57,7 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) {
     case OpCode::Id::I2F_R:
     case OpCode::Id::I2F_C:
     case OpCode::Id::I2F_IMM: {
-        UNIMPLEMENTED_IF(instr.conversion.dst_size != Register::Size::Word);
+        UNIMPLEMENTED_IF(instr.conversion.dst_size == Register::Size::Long);
         UNIMPLEMENTED_IF(instr.conversion.selector);
         UNIMPLEMENTED_IF_MSG(instr.generates_cc,
                              "Condition codes generation in I2F is not implemented");
@@ -82,14 +82,19 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) {
         value = GetOperandAbsNegFloat(value, false, instr.conversion.negate_a);
 
         SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
+
+        if (instr.conversion.dst_size == Register::Size::Short) {
+            value = Operation(OperationCode::HCastFloat, PRECISE, value);
+        }
+
         SetRegister(bb, instr.gpr0, value);
         break;
     }
     case OpCode::Id::F2F_R:
     case OpCode::Id::F2F_C:
     case OpCode::Id::F2F_IMM: {
-        UNIMPLEMENTED_IF(instr.conversion.f2f.dst_size != Register::Size::Word);
-        UNIMPLEMENTED_IF(instr.conversion.f2f.src_size != Register::Size::Word);
+        UNIMPLEMENTED_IF(instr.conversion.dst_size == Register::Size::Long);
+        UNIMPLEMENTED_IF(instr.conversion.src_size == Register::Size::Long);
         UNIMPLEMENTED_IF_MSG(instr.generates_cc,
                              "Condition codes generation in F2F is not implemented");
 
@@ -107,6 +112,11 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) {
             }
         }();
 
+        if (instr.conversion.src_size == Register::Size::Short) {
+            // TODO: figure where extract is sey in the encoding
+            value = Operation(OperationCode::FCastHalf0, PRECISE, value);
+        }
+
         value = GetOperandAbsNegFloat(value, instr.conversion.abs_a, instr.conversion.negate_a);
 
         value = [&]() {
@@ -124,19 +134,24 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) {
             default:
                 UNIMPLEMENTED_MSG("Unimplemented F2F rounding mode {}",
                                   static_cast<u32>(instr.conversion.f2f.rounding.Value()));
-                return Immediate(0);
+                return value;
             }
         }();
         value = GetSaturatedFloat(value, instr.alu.saturate_d);
 
         SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
+
+        if (instr.conversion.dst_size == Register::Size::Short) {
+            value = Operation(OperationCode::HCastFloat, PRECISE, value);
+        }
+
         SetRegister(bb, instr.gpr0, value);
         break;
     }
     case OpCode::Id::F2I_R:
     case OpCode::Id::F2I_C:
     case OpCode::Id::F2I_IMM: {
-        UNIMPLEMENTED_IF(instr.conversion.src_size != Register::Size::Word);
+        UNIMPLEMENTED_IF(instr.conversion.src_size == Register::Size::Long);
         UNIMPLEMENTED_IF_MSG(instr.generates_cc,
                              "Condition codes generation in F2I is not implemented");
         Node value = [&]() {
@@ -153,6 +168,11 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) {
             }
         }();
 
+        if (instr.conversion.src_size == Register::Size::Short) {
+            // TODO: figure where extract is sey in the encoding
+            value = Operation(OperationCode::FCastHalf0, PRECISE, value);
+        }
+
         value = GetOperandAbsNegFloat(value, instr.conversion.abs_a, instr.conversion.negate_a);
 
         value = [&]() {
diff --git a/src/video_core/shader/decode/ffma.cpp b/src/video_core/shader/decode/ffma.cpp
index 29be25ca3..ca2f39e8d 100644
--- a/src/video_core/shader/decode/ffma.cpp
+++ b/src/video_core/shader/decode/ffma.cpp
@@ -18,10 +18,12 @@ u32 ShaderIR::DecodeFfma(NodeBlock& bb, u32 pc) {
     const auto opcode = OpCode::Decode(instr);
 
     UNIMPLEMENTED_IF_MSG(instr.ffma.cc != 0, "FFMA cc not implemented");
-    UNIMPLEMENTED_IF_MSG(instr.ffma.tab5980_0 != 1, "FFMA tab5980_0({}) not implemented",
-                         instr.ffma.tab5980_0.Value()); // Seems to be 1 by default based on SMO
-    UNIMPLEMENTED_IF_MSG(instr.ffma.tab5980_1 != 0, "FFMA tab5980_1({}) not implemented",
-                         instr.ffma.tab5980_1.Value());
+    if (instr.ffma.tab5980_0 != 1) {
+        LOG_WARNING(HW_GPU, "FFMA tab5980_0({}) not implemented", instr.ffma.tab5980_0.Value());
+    }
+    if (instr.ffma.tab5980_1 != 0) {
+        LOG_WARNING(HW_GPU, "FFMA tab5980_1({}) not implemented", instr.ffma.tab5980_1.Value());
+    }
 
     const Node op_a = GetRegister(instr.gpr8);
 
diff --git a/src/video_core/shader/decode/half_set_predicate.cpp b/src/video_core/shader/decode/half_set_predicate.cpp
index ad180d6df..afea33e5f 100644
--- a/src/video_core/shader/decode/half_set_predicate.cpp
+++ b/src/video_core/shader/decode/half_set_predicate.cpp
@@ -18,7 +18,7 @@ u32 ShaderIR::DecodeHalfSetPredicate(NodeBlock& bb, u32 pc) {
     const Instruction instr = {program_code[pc]};
     const auto opcode = OpCode::Decode(instr);
 
-    UNIMPLEMENTED_IF(instr.hsetp2.ftz != 0);
+    DEBUG_ASSERT(instr.hsetp2.ftz == 0);
 
     Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hsetp2.type_a);
     op_a = GetOperandAbsNegHalf(op_a, instr.hsetp2.abs_a, instr.hsetp2.negate_a);
@@ -30,7 +30,7 @@ u32 ShaderIR::DecodeHalfSetPredicate(NodeBlock& bb, u32 pc) {
     case OpCode::Id::HSETP2_C:
         cond = instr.hsetp2.cbuf_and_imm.cond;
         h_and = instr.hsetp2.cbuf_and_imm.h_and;
-        op_b = GetOperandAbsNegHalf(GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset),
+        op_b = GetOperandAbsNegHalf(GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()),
                                     instr.hsetp2.cbuf.abs_b, instr.hsetp2.cbuf.negate_b);
         break;
     case OpCode::Id::HSETP2_IMM:
@@ -52,15 +52,15 @@ u32 ShaderIR::DecodeHalfSetPredicate(NodeBlock& bb, u32 pc) {
     }
 
     const OperationCode combiner = GetPredicateCombiner(instr.hsetp2.op);
-    const Node pred39 = GetPredicate(instr.hsetp2.pred39, instr.hsetp2.neg_pred);
+    const Node combined_pred = GetPredicate(instr.hsetp2.pred3, instr.hsetp2.neg_pred);
 
     const auto Write = [&](u64 dest, Node src) {
-        SetPredicate(bb, dest, Operation(combiner, std::move(src), pred39));
+        SetPredicate(bb, dest, Operation(combiner, std::move(src), combined_pred));
     };
 
     const Node comparison = GetPredicateComparisonHalf(cond, op_a, op_b);
     const u64 first = instr.hsetp2.pred0;
-    const u64 second = instr.hsetp2.pred3;
+    const u64 second = instr.hsetp2.pred39;
     if (h_and) {
         const Node joined = Operation(OperationCode::LogicalAnd2, comparison);
         Write(first, joined);
diff --git a/src/video_core/shader/decode/hfma2.cpp b/src/video_core/shader/decode/hfma2.cpp
index c3bcf1ae9..5b44cb79c 100644
--- a/src/video_core/shader/decode/hfma2.cpp
+++ b/src/video_core/shader/decode/hfma2.cpp
@@ -22,9 +22,9 @@ u32 ShaderIR::DecodeHfma2(NodeBlock& bb, u32 pc) {
     const auto opcode = OpCode::Decode(instr);
 
     if (opcode->get().GetId() == OpCode::Id::HFMA2_RR) {
-        UNIMPLEMENTED_IF(instr.hfma2.rr.precision != HalfPrecision::None);
+        DEBUG_ASSERT(instr.hfma2.rr.precision == HalfPrecision::None);
     } else {
-        UNIMPLEMENTED_IF(instr.hfma2.precision != HalfPrecision::None);
+        DEBUG_ASSERT(instr.hfma2.precision == HalfPrecision::None);
     }
 
     constexpr auto identity = HalfType::H0_H1;
diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp
index 856e1b3d8..d46e0f823 100644
--- a/src/video_core/shader/decode/other.cpp
+++ b/src/video_core/shader/decode/other.cpp
@@ -22,6 +22,12 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
     const auto opcode = OpCode::Decode(instr);
 
     switch (opcode->get().GetId()) {
+    case OpCode::Id::NOP: {
+        UNIMPLEMENTED_IF(instr.nop.cc != Tegra::Shader::ConditionCode::T);
+        UNIMPLEMENTED_IF(instr.nop.trigger != 0);
+        // With the previous preconditions, this instruction is a no-operation.
+        break;
+    }
     case OpCode::Id::EXIT: {
         const Tegra::Shader::ConditionCode cc = instr.flow_condition_code;
         UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "EXIT condition code used: {}",
diff --git a/src/video_core/shader/decode/warp.cpp b/src/video_core/shader/decode/warp.cpp
new file mode 100644
index 000000000..04ca74f46
--- /dev/null
+++ b/src/video_core/shader/decode/warp.cpp
@@ -0,0 +1,55 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "video_core/engines/shader_bytecode.h"
+#include "video_core/shader/node_helper.h"
+#include "video_core/shader/shader_ir.h"
+
+namespace VideoCommon::Shader {
+
+using Tegra::Shader::Instruction;
+using Tegra::Shader::OpCode;
+using Tegra::Shader::Pred;
+using Tegra::Shader::VoteOperation;
+
+namespace {
+OperationCode GetOperationCode(VoteOperation vote_op) {
+    switch (vote_op) {
+    case VoteOperation::All:
+        return OperationCode::VoteAll;
+    case VoteOperation::Any:
+        return OperationCode::VoteAny;
+    case VoteOperation::Eq:
+        return OperationCode::VoteEqual;
+    default:
+        UNREACHABLE_MSG("Invalid vote operation={}", static_cast<u64>(vote_op));
+        return OperationCode::VoteAll;
+    }
+}
+} // Anonymous namespace
+
+u32 ShaderIR::DecodeWarp(NodeBlock& bb, u32 pc) {
+    const Instruction instr = {program_code[pc]};
+    const auto opcode = OpCode::Decode(instr);
+
+    switch (opcode->get().GetId()) {
+    case OpCode::Id::VOTE: {
+        const Node value = GetPredicate(instr.vote.value, instr.vote.negate_value != 0);
+        const Node active = Operation(OperationCode::BallotThread, value);
+        const Node vote = Operation(GetOperationCode(instr.vote.operation), value);
+        SetRegister(bb, instr.gpr0, active);
+        SetPredicate(bb, instr.vote.dest_pred, vote);
+        break;
+    }
+    default:
+        UNIMPLEMENTED_MSG("Unhandled warp instruction: {}", opcode->get().GetName());
+        break;
+    }
+
+    return pc;
+}
+
+} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h
index 715184d67..5db9313c4 100644
--- a/src/video_core/shader/node.h
+++ b/src/video_core/shader/node.h
@@ -30,6 +30,8 @@ enum class OperationCode {
     FNegate,       /// (MetaArithmetic, float a) -> float
     FAbsolute,     /// (MetaArithmetic, float a) -> float
     FClamp,        /// (MetaArithmetic, float value, float min, float max) -> float
+    FCastHalf0,    /// (MetaArithmetic, f16vec2 a) -> float
+    FCastHalf1,    /// (MetaArithmetic, f16vec2 a) -> float
     FMin,          /// (MetaArithmetic, float a, float b) -> float
     FMax,          /// (MetaArithmetic, float a, float b) -> float
     FCos,          /// (MetaArithmetic, float a) -> float
@@ -83,17 +85,18 @@ enum class OperationCode {
     UBitfieldExtract, /// (MetaArithmetic, uint value, int offset, int offset) -> uint
     UBitCount,        /// (MetaArithmetic, uint) -> uint
 
-    HAdd,      /// (MetaArithmetic, f16vec2 a, f16vec2 b) -> f16vec2
-    HMul,      /// (MetaArithmetic, f16vec2 a, f16vec2 b) -> f16vec2
-    HFma,      /// (MetaArithmetic, f16vec2 a, f16vec2 b, f16vec2 c) -> f16vec2
-    HAbsolute, /// (f16vec2 a) -> f16vec2
-    HNegate,   /// (f16vec2 a, bool first, bool second) -> f16vec2
-    HClamp,    /// (f16vec2 src, float min, float max) -> f16vec2
-    HUnpack,   /// (Tegra::Shader::HalfType, T value) -> f16vec2
-    HMergeF32, /// (f16vec2 src) -> float
-    HMergeH0,  /// (f16vec2 dest, f16vec2 src) -> f16vec2
-    HMergeH1,  /// (f16vec2 dest, f16vec2 src) -> f16vec2
-    HPack2,    /// (float a, float b) -> f16vec2
+    HAdd,       /// (MetaArithmetic, f16vec2 a, f16vec2 b) -> f16vec2
+    HMul,       /// (MetaArithmetic, f16vec2 a, f16vec2 b) -> f16vec2
+    HFma,       /// (MetaArithmetic, f16vec2 a, f16vec2 b, f16vec2 c) -> f16vec2
+    HAbsolute,  /// (f16vec2 a) -> f16vec2
+    HNegate,    /// (f16vec2 a, bool first, bool second) -> f16vec2
+    HClamp,     /// (f16vec2 src, float min, float max) -> f16vec2
+    HCastFloat, /// (MetaArithmetic, float a) -> f16vec2
+    HUnpack,    /// (Tegra::Shader::HalfType, T value) -> f16vec2
+    HMergeF32,  /// (f16vec2 src) -> float
+    HMergeH0,   /// (f16vec2 dest, f16vec2 src) -> f16vec2
+    HMergeH1,   /// (f16vec2 dest, f16vec2 src) -> f16vec2
+    HPack2,     /// (float a, float b) -> f16vec2
 
     LogicalAssign, /// (bool& dst, bool src) -> void
     LogicalAnd,    /// (bool a, bool b) -> bool
@@ -165,6 +168,11 @@ enum class OperationCode {
     WorkGroupIdY,       /// () -> uint
     WorkGroupIdZ,       /// () -> uint
 
+    BallotThread, /// (bool) -> uint
+    VoteAll,      /// (bool) -> bool
+    VoteAny,      /// (bool) -> bool
+    VoteEqual,    /// (bool) -> bool
+
     Amount,
 };
 
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h
index 0509a5f88..bcc9b79b6 100644
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -167,6 +167,7 @@ private:
     u32 DecodeFfma(NodeBlock& bb, u32 pc);
     u32 DecodeHfma2(NodeBlock& bb, u32 pc);
     u32 DecodeConversion(NodeBlock& bb, u32 pc);
+    u32 DecodeWarp(NodeBlock& bb, u32 pc);
     u32 DecodeMemory(NodeBlock& bb, u32 pc);
     u32 DecodeTexture(NodeBlock& bb, u32 pc);
     u32 DecodeImage(NodeBlock& bb, u32 pc);
diff --git a/src/video_core/shader/track.cpp b/src/video_core/shader/track.cpp
index a53e02253..55f5949e4 100644
--- a/src/video_core/shader/track.cpp
+++ b/src/video_core/shader/track.cpp
@@ -59,8 +59,8 @@ std::tuple<Node, u32, u32> ShaderIR::TrackCbuf(Node tracked, const NodeBlock& co
         return TrackCbuf(source, code, new_cursor);
     }
     if (const auto operation = std::get_if<OperationNode>(&*tracked)) {
-        for (std::size_t i = 0; i < operation->GetOperandsCount(); ++i) {
-            if (auto found = TrackCbuf((*operation)[i], code, cursor); std::get<0>(found)) {
+        for (std::size_t i = operation->GetOperandsCount(); i > 0; --i) {
+            if (auto found = TrackCbuf((*operation)[i - 1], code, cursor); std::get<0>(found)) {
                 // Cbuf found in operand.
                 return found;
             }