3 files changed, 576 insertions, 18 deletions
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 2d7c3152f..2a3ff234a 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -74,8 +74,6 @@ void Maxwell3D::WriteReg(u32 method, u32 value, u32 remaining_params) {
 
     regs.reg_array[method] = value;
 
-#define MAXWELL3D_REG_INDEX(field_name) (offsetof(Regs, field_name) / sizeof(u32))
-
     switch (method) {
     case MAXWELL3D_REG_INDEX(code_address.code_address_high):
     case MAXWELL3D_REG_INDEX(code_address.code_address_low): {
@@ -136,7 +134,7 @@ void Maxwell3D::WriteReg(u32 method, u32 value, u32 remaining_params) {
         break;
     }
 
-#undef MAXWELL3D_REG_INDEX
+    VideoCore::g_renderer->Rasterizer()->NotifyMaxwellRegisterChanged(method);
 
     if (debug_context) {
         debug_context->OnEvent(Tegra::DebugContext::Event::MaxwellCommandProcessed, nullptr);
@@ -165,6 +163,7 @@ void Maxwell3D::ProcessQueryGet() {
 void Maxwell3D::DrawArrays() {
     LOG_DEBUG(HW_GPU, "called, topology=%d, count=%d", regs.draw.topology.Value(),
               regs.vertex_buffer.count);
+    ASSERT_MSG(!(regs.index_array.count && regs.vertex_buffer.count), "Both indexed and direct?");
 
     auto debug_context = Core::System::GetInstance().GetGPUDebugContext();
 
@@ -176,7 +175,8 @@ void Maxwell3D::DrawArrays() {
         debug_context->OnEvent(Tegra::DebugContext::Event::FinishedPrimitiveBatch, nullptr);
     }
 
-    VideoCore::g_renderer->Rasterizer()->AccelerateDrawBatch(false /*is_indexed*/);
+    const bool is_indexed{regs.index_array.count && !regs.vertex_buffer.count};
+    VideoCore::g_renderer->Rasterizer()->AccelerateDrawBatch(is_indexed);
 }
 
 void Maxwell3D::ProcessCBBind(Regs::ShaderStage stage) {
@@ -218,10 +218,12 @@ Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const {
     Texture::TICEntry tic_entry;
     Memory::ReadBlock(tic_address_cpu, &tic_entry, sizeof(Texture::TICEntry));
 
-    ASSERT_MSG(tic_entry.header_version == Texture::TICHeaderVersion::BlockLinear,
-               "TIC versions other than BlockLinear are unimplemented");
+    ASSERT_MSG(tic_entry.header_version == Texture::TICHeaderVersion::BlockLinear ||
+                   tic_entry.header_version == Texture::TICHeaderVersion::Pitch,
+               "TIC versions other than BlockLinear or Pitch are unimplemented");
 
-    ASSERT_MSG(tic_entry.texture_type == Texture::TextureType::Texture2D,
+    ASSERT_MSG((tic_entry.texture_type == Texture::TextureType::Texture2D) ||
+                   (tic_entry.texture_type == Texture::TextureType::Texture2DNoMipmap),
                "Texture types other than Texture2D are unimplemented");
 
     auto r_type = tic_entry.r_type.Value();
@@ -301,5 +303,26 @@ u32 Maxwell3D::GetRegisterValue(u32 method) const {
     return regs.reg_array[method];
 }
 
+bool Maxwell3D::IsShaderStageEnabled(Regs::ShaderStage stage) const {
+    // The Vertex stage is always enabled.
+    if (stage == Regs::ShaderStage::Vertex)
+        return true;
+
+    switch (stage) {
+    case Regs::ShaderStage::TesselationControl:
+        return regs.shader_config[static_cast<size_t>(Regs::ShaderProgram::TesselationControl)]
+                   .enable != 0;
+    case Regs::ShaderStage::TesselationEval:
+        return regs.shader_config[static_cast<size_t>(Regs::ShaderProgram::TesselationEval)]
+                   .enable != 0;
+    case Regs::ShaderStage::Geometry:
+        return regs.shader_config[static_cast<size_t>(Regs::ShaderProgram::Geometry)].enable != 0;
+    case Regs::ShaderStage::Fragment:
+        return regs.shader_config[static_cast<size_t>(Regs::ShaderProgram::Fragment)].enable != 0;
+    }
+
+    UNREACHABLE();
+}
+
 } // namespace Engines
 } // namespace Tegra
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index 98b39b2ff..d4fcedace 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -20,6 +20,9 @@
 namespace Tegra {
 namespace Engines {
 
+#define MAXWELL3D_REG_INDEX(field_name)                                                            \
+    (offsetof(Tegra::Engines::Maxwell3D::Regs, field_name) / sizeof(u32))
+
 class Maxwell3D final {
 public:
     explicit Maxwell3D(MemoryManager& memory_manager);
@@ -248,6 +251,52 @@ public:
             Patches = 0xe,
         };
 
+        enum class IndexFormat : u32 {
+            UnsignedByte = 0x0,
+            UnsignedShort = 0x1,
+            UnsignedInt = 0x2,
+        };
+
+        struct Blend {
+            enum class Equation : u32 {
+                Add = 1,
+                Subtract = 2,
+                ReverseSubtract = 3,
+                Min = 4,
+                Max = 5,
+            };
+
+            enum class Factor : u32 {
+                Zero = 0x1,
+                One = 0x2,
+                SourceColor = 0x3,
+                OneMinusSourceColor = 0x4,
+                SourceAlpha = 0x5,
+                OneMinusSourceAlpha = 0x6,
+                DestAlpha = 0x7,
+                OneMinusDestAlpha = 0x8,
+                DestColor = 0x9,
+                OneMinusDestColor = 0xa,
+                SourceAlphaSaturate = 0xb,
+                Source1Color = 0x10,
+                OneMinusSource1Color = 0x11,
+                Source1Alpha = 0x12,
+                OneMinusSource1Alpha = 0x13,
+                ConstantColor = 0x61,
+                OneMinusConstantColor = 0x62,
+                ConstantAlpha = 0x63,
+                OneMinusConstantAlpha = 0x64,
+            };
+
+            u32 separate_alpha;
+            Equation equation_rgb;
+            Factor factor_source_rgb;
+            Factor factor_dest_rgb;
+            Equation equation_a;
+            Factor factor_source_a;
+            Factor factor_dest_a;
+        };
+
         union {
             struct {
                 INSERT_PADDING_WORDS(0x200);
@@ -270,7 +319,15 @@ public:
                     }
                 } rt[NumRenderTargets];
 
-                INSERT_PADDING_WORDS(0x80);
+                struct {
+                    f32 scale_x;
+                    f32 scale_y;
+                    f32 scale_z;
+                    u32 translate_x;
+                    u32 translate_y;
+                    u32 translate_z;
+                    INSERT_PADDING_WORDS(2);
+                } viewport_transform[NumViewports];
 
                 struct {
                     union {
@@ -375,7 +432,42 @@ public:
                     };
                 } draw;
 
-                INSERT_PADDING_WORDS(0x139);
+                INSERT_PADDING_WORDS(0x6B);
+
+                struct {
+                    u32 start_addr_high;
+                    u32 start_addr_low;
+                    u32 end_addr_high;
+                    u32 end_addr_low;
+                    IndexFormat format;
+                    u32 first;
+                    u32 count;
+
+                    unsigned FormatSizeInBytes() const {
+                        switch (format) {
+                        case IndexFormat::UnsignedByte:
+                            return 1;
+                        case IndexFormat::UnsignedShort:
+                            return 2;
+                        case IndexFormat::UnsignedInt:
+                            return 4;
+                        }
+                        UNREACHABLE();
+                    }
+
+                    GPUVAddr StartAddress() const {
+                        return static_cast<GPUVAddr>(
+                            (static_cast<GPUVAddr>(start_addr_high) << 32) | start_addr_low);
+                    }
+
+                    GPUVAddr EndAddress() const {
+                        return static_cast<GPUVAddr>((static_cast<GPUVAddr>(end_addr_high) << 32) |
+                                                     end_addr_low);
+                    }
+                } index_array;
+
+                INSERT_PADDING_WORDS(0xC7);
+
                 struct {
                     u32 query_address_high;
                     u32 query_address_low;
@@ -410,7 +502,9 @@ public:
                     }
                 } vertex_array[NumVertexArrays];
 
-                INSERT_PADDING_WORDS(0x40);
+                Blend blend;
+
+                INSERT_PADDING_WORDS(0x39);
 
                 struct {
                     u32 limit_high;
@@ -427,14 +521,11 @@ public:
                         BitField<0, 1, u32> enable;
                         BitField<4, 4, ShaderProgram> program;
                     };
-                    u32 start_id;
-                    INSERT_PADDING_WORDS(1);
-                    u32 gpr_alloc;
-                    ShaderStage type;
-                    INSERT_PADDING_WORDS(9);
+                    u32 offset;
+                    INSERT_PADDING_WORDS(14);
                 } shader_config[MaxShaderProgram];
 
-                INSERT_PADDING_WORDS(0x8C);
+                INSERT_PADDING_WORDS(0x80);
 
                 struct {
                     u32 cb_size;
@@ -507,6 +598,7 @@ public:
     };
 
     State state{};
+    MemoryManager& memory_manager;
 
     /// Reads a register value located at the input method address
     u32 GetRegisterValue(u32 method) const;
@@ -520,9 +612,10 @@ public:
     /// Returns a list of enabled textures for the specified shader stage.
     std::vector<Texture::FullTextureInfo> GetStageTextures(Regs::ShaderStage stage) const;
 
-private:
-    MemoryManager& memory_manager;
+    /// Returns whether the specified shader stage is enabled or not.
+    bool IsShaderStageEnabled(Regs::ShaderStage stage) const;
 
+private:
     std::unordered_map<u32, std::vector<u32>> uploaded_macros;
 
     /// Macro method that is currently being executed / being fed parameters.
@@ -564,6 +657,7 @@ private:
                   "Field " #field_name " has invalid position")
 
 ASSERT_REG_POSITION(rt, 0x200);
+ASSERT_REG_POSITION(viewport_transform[0], 0x280);
 ASSERT_REG_POSITION(viewport, 0x300);
 ASSERT_REG_POSITION(vertex_buffer, 0x35D);
 ASSERT_REG_POSITION(zeta, 0x3F8);
@@ -573,8 +667,10 @@ ASSERT_REG_POSITION(tsc, 0x557);
 ASSERT_REG_POSITION(tic, 0x55D);
 ASSERT_REG_POSITION(code_address, 0x582);
 ASSERT_REG_POSITION(draw, 0x585);
+ASSERT_REG_POSITION(index_array, 0x5F2);
 ASSERT_REG_POSITION(query, 0x6C0);
 ASSERT_REG_POSITION(vertex_array[0], 0x700);
+ASSERT_REG_POSITION(blend, 0x780);
 ASSERT_REG_POSITION(vertex_array_limit[0], 0x7C0);
 ASSERT_REG_POSITION(shader_config[0], 0x800);
 ASSERT_REG_POSITION(const_buffer, 0x8E0);
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
new file mode 100644
index 000000000..5a006aee5
--- /dev/null
+++ b/src/video_core/engines/shader_bytecode.h
@@ -0,0 +1,439 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <bitset>
+#include <cstring>
+#include <map>
+#include <string>
+#include <vector>
+
+#include <boost/optional.hpp>
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+
+namespace Tegra {
+namespace Shader {
+
+struct Register {
+    // Register 255 is special cased to always be 0
+    static constexpr size_t ZeroIndex = 255;
+
+    constexpr Register() = default;
+
+    constexpr Register(u64 value) : value(value) {}
+
+    constexpr operator u64() const {
+        return value;
+    }
+
+    template <typename T>
+    constexpr u64 operator-(const T& oth) const {
+        return value - oth;
+    }
+
+    template <typename T>
+    constexpr u64 operator&(const T& oth) const {
+        return value & oth;
+    }
+
+    constexpr u64 operator&(const Register& oth) const {
+        return value & oth.value;
+    }
+
+    constexpr u64 operator~() const {
+        return ~value;
+    }
+
+private:
+    u64 value{};
+};
+
+union Attribute {
+    Attribute() = default;
+
+    constexpr explicit Attribute(u64 value) : value(value) {}
+
+    enum class Index : u64 {
+        Position = 7,
+        Attribute_0 = 8,
+    };
+
+    union {
+        BitField<22, 2, u64> element;
+        BitField<24, 6, Index> index;
+        BitField<47, 3, u64> size;
+    } fmt20;
+
+    union {
+        BitField<30, 2, u64> element;
+        BitField<32, 6, Index> index;
+    } fmt28;
+
+    BitField<39, 8, u64> reg;
+    u64 value{};
+};
+
+union Sampler {
+    Sampler() = default;
+
+    constexpr explicit Sampler(u64 value) : value(value) {}
+
+    enum class Index : u64 {
+        Sampler_0 = 8,
+    };
+
+    BitField<36, 13, Index> index;
+    u64 value{};
+};
+
+union Uniform {
+    BitField<20, 14, u64> offset;
+    BitField<34, 5, u64> index;
+};
+
+} // namespace Shader
+} // namespace Tegra
+
+namespace std {
+
+// TODO(bunnei): The below is forbidden by the C++ standard, but works fine. See #330.
+template <>
+struct make_unsigned<Tegra::Shader::Attribute> {
+    using type = Tegra::Shader::Attribute;
+};
+
+template <>
+struct make_unsigned<Tegra::Shader::Register> {
+    using type = Tegra::Shader::Register;
+};
+
+} // namespace std
+
+namespace Tegra {
+namespace Shader {
+
+enum class Pred : u64 {
+    UnusedIndex = 0x7,
+    NeverExecute = 0xF,
+};
+
+enum class PredCondition : u64 {
+    LessThan = 1,
+    Equal = 2,
+    LessEqual = 3,
+    GreaterThan = 4,
+    NotEqual = 5,
+    GreaterEqual = 6,
+    // TODO(Subv): Other condition types
+};
+
+enum class PredOperation : u64 {
+    And = 0,
+    Or = 1,
+    Xor = 2,
+};
+
+enum class SubOp : u64 {
+    Cos = 0x0,
+    Sin = 0x1,
+    Ex2 = 0x2,
+    Lg2 = 0x3,
+    Rcp = 0x4,
+    Rsq = 0x5,
+    Min = 0x8,
+};
+
+union Instruction {
+    Instruction& operator=(const Instruction& instr) {
+        value = instr.value;
+        return *this;
+    }
+
+    constexpr Instruction(u64 value) : value{value} {}
+
+    BitField<0, 8, Register> gpr0;
+    BitField<8, 8, Register> gpr8;
+    union {
+        BitField<16, 4, Pred> full_pred;
+        BitField<16, 3, u64> pred_index;
+    } pred;
+    BitField<19, 1, u64> negate_pred;
+    BitField<20, 8, Register> gpr20;
+    BitField<20, 7, SubOp> sub_op;
+    BitField<28, 8, Register> gpr28;
+    BitField<39, 8, Register> gpr39;
+    BitField<48, 16, u64> opcode;
+
+    union {
+        BitField<20, 19, u64> imm20_19;
+        BitField<20, 32, u64> imm20_32;
+        BitField<45, 1, u64> negate_b;
+        BitField<46, 1, u64> abs_a;
+        BitField<48, 1, u64> negate_a;
+        BitField<49, 1, u64> abs_b;
+        BitField<50, 1, u64> abs_d;
+        BitField<56, 1, u64> negate_imm;
+
+        float GetImm20_19() const {
+            float result{};
+            u32 imm{static_cast<u32>(imm20_19)};
+            imm <<= 12;
+            imm |= negate_imm ? 0x80000000 : 0;
+            std::memcpy(&result, &imm, sizeof(imm));
+            return result;
+        }
+
+        float GetImm20_32() const {
+            float result{};
+            u32 imm{static_cast<u32>(imm20_32)};
+            std::memcpy(&result, &imm, sizeof(imm));
+            return result;
+        }
+    } alu;
+
+    union {
+        BitField<48, 1, u64> negate_b;
+        BitField<49, 1, u64> negate_c;
+    } ffma;
+
+    union {
+        BitField<0, 3, u64> pred0;
+        BitField<3, 3, u64> pred3;
+        BitField<7, 1, u64> abs_a;
+        BitField<39, 3, u64> pred39;
+        BitField<42, 1, u64> neg_pred;
+        BitField<43, 1, u64> neg_a;
+        BitField<44, 1, u64> abs_b;
+        BitField<45, 2, PredOperation> op;
+        BitField<47, 1, u64> ftz;
+        BitField<48, 4, PredCondition> cond;
+        BitField<56, 1, u64> neg_b;
+    } fsetp;
+
+    BitField<61, 1, u64> is_b_imm;
+    BitField<60, 1, u64> is_b_gpr;
+    BitField<59, 1, u64> is_c_gpr;
+
+    Attribute attribute;
+    Uniform uniform;
+    Sampler sampler;
+
+    u64 value;
+};
+static_assert(sizeof(Instruction) == 0x8, "Incorrect structure size");
+static_assert(std::is_standard_layout<Instruction>::value,
+              "Structure does not have standard layout");
+
+class OpCode {
+public:
+    enum class Id {
+        KIL,
+        LD_A,
+        ST_A,
+        TEXQ, // Texture Query
+        TEXS, // Texture Fetch with scalar/non-vec4 source/destinations
+        TLDS, // Texture Load with scalar/non-vec4 source/destinations
+        EXIT,
+        IPA,
+        FFMA_IMM, // Fused Multiply and Add
+        FFMA_CR,
+        FFMA_RC,
+        FFMA_RR,
+        FADD_C,
+        FADD_R,
+        FADD_IMM,
+        FMUL_C,
+        FMUL_R,
+        FMUL_IMM,
+        FMUL32_IMM,
+        MUFU, // Multi-Function Operator
+        RRO,  // Range Reduction Operator
+        F2F_C,
+        F2F_R,
+        F2F_IMM,
+        F2I_C,
+        F2I_R,
+        F2I_IMM,
+        I2F_C,
+        I2F_R,
+        I2F_IMM,
+        LOP32I,
+        MOV_C,
+        MOV_R,
+        MOV_IMM,
+        MOV32I,
+        SHR_C,
+        SHR_R,
+        SHR_IMM,
+        FSETP_C, // Set Predicate
+        FSETP_R,
+        FSETP_IMM,
+        ISETP_C,
+        ISETP_IMM,
+        ISETP_R,
+    };
+
+    enum class Type {
+        Trivial,
+        Arithmetic,
+        Ffma,
+        Flow,
+        Memory,
+        FloatPredicate,
+        IntegerPredicate,
+        Unknown,
+    };
+
+    class Matcher {
+    public:
+        Matcher(const char* const name, u16 mask, u16 expected, OpCode::Id id, OpCode::Type type)
+            : name{name}, mask{mask}, expected{expected}, id{id}, type{type} {}
+
+        const char* GetName() const {
+            return name;
+        }
+
+        u16 GetMask() const {
+            return mask;
+        }
+
+        Id GetId() const {
+            return id;
+        }
+
+        Type GetType() const {
+            return type;
+        }
+
+        /**
+         * Tests to see if the given instruction is the instruction this matcher represents.
+         * @param instruction The instruction to test
+         * @returns true if the given instruction matches.
+         */
+        bool Matches(u16 instruction) const {
+            return (instruction & mask) == expected;
+        }
+
+    private:
+        const char* name;
+        u16 mask;
+        u16 expected;
+        Id id;
+        Type type;
+    };
+
+    static boost::optional<const Matcher&> Decode(Instruction instr) {
+        static const auto table{GetDecodeTable()};
+
+        const auto matches_instruction = [instr](const auto& matcher) {
+            return matcher.Matches(static_cast<u16>(instr.opcode));
+        };
+
+        auto iter = std::find_if(table.begin(), table.end(), matches_instruction);
+        return iter != table.end() ? boost::optional<const Matcher&>(*iter) : boost::none;
+    }
+
+private:
+    struct Detail {
+    private:
+        static constexpr size_t opcode_bitsize = 16;
+
+        /**
+         * Generates the mask and the expected value after masking from a given bitstring.
+         * A '0' in a bitstring indicates that a zero must be present at that bit position.
+         * A '1' in a bitstring indicates that a one must be present at that bit position.
+         */
+        static auto GetMaskAndExpect(const char* const bitstring) {
+            u16 mask = 0, expect = 0;
+            for (size_t i = 0; i < opcode_bitsize; i++) {
+                const size_t bit_position = opcode_bitsize - i - 1;
+                switch (bitstring[i]) {
+                case '0':
+                    mask |= 1 << bit_position;
+                    break;
+                case '1':
+                    expect |= 1 << bit_position;
+                    mask |= 1 << bit_position;
+                    break;
+                default:
+                    // Ignore
+                    break;
+                }
+            }
+            return std::make_tuple(mask, expect);
+        }
+
+    public:
+        /// Creates a matcher that can match and parse instructions based on bitstring.
+        static auto GetMatcher(const char* const bitstring, OpCode::Id op, OpCode::Type type,
+                               const char* const name) {
+            const auto mask_expect = GetMaskAndExpect(bitstring);
+            return Matcher(name, std::get<0>(mask_expect), std::get<1>(mask_expect), op, type);
+        }
+    };
+
+    static std::vector<Matcher> GetDecodeTable() {
+        std::vector<Matcher> table = {
+#define INST(bitstring, op, type, name) Detail::GetMatcher(bitstring, op, type, name)
+            INST("111000110011----", Id::KIL, Type::Flow, "KIL"),
+            INST("1110111111011---", Id::LD_A, Type::Memory, "LD_A"),
+            INST("1110111111110---", Id::ST_A, Type::Memory, "ST_A"),
+            INST("1101111101001---", Id::TEXQ, Type::Memory, "TEXQ"),
+            INST("1101100---------", Id::TEXS, Type::Memory, "TEXS"),
+            INST("1101101---------", Id::TLDS, Type::Memory, "TLDS"),
+            INST("111000110000----", Id::EXIT, Type::Trivial, "EXIT"),
+            INST("11100000--------", Id::IPA, Type::Trivial, "IPA"),
+            INST("001100101-------", Id::FFMA_IMM, Type::Ffma, "FFMA_IMM"),
+            INST("010010011-------", Id::FFMA_CR, Type::Ffma, "FFMA_CR"),
+            INST("010100011-------", Id::FFMA_RC, Type::Ffma, "FFMA_RC"),
+            INST("010110011-------", Id::FFMA_RR, Type::Ffma, "FFMA_RR"),
+            INST("0100110001011---", Id::FADD_C, Type::Arithmetic, "FADD_C"),
+            INST("0101110001011---", Id::FADD_R, Type::Arithmetic, "FADD_R"),
+            INST("0011100-01011---", Id::FADD_IMM, Type::Arithmetic, "FADD_IMM"),
+            INST("0100110001101---", Id::FMUL_C, Type::Arithmetic, "FMUL_C"),
+            INST("0101110001101---", Id::FMUL_R, Type::Arithmetic, "FMUL_R"),
+            INST("0011100-01101---", Id::FMUL_IMM, Type::Arithmetic, "FMUL_IMM"),
+            INST("00011110--------", Id::FMUL32_IMM, Type::Arithmetic, "FMUL32_IMM"),
+            INST("0101000010000---", Id::MUFU, Type::Arithmetic, "MUFU"),
+            INST("0101110010010---", Id::RRO, Type::Arithmetic, "RRO"),
+            INST("0100110010101---", Id::F2F_C, Type::Arithmetic, "F2F_C"),
+            INST("0101110010101---", Id::F2F_R, Type::Arithmetic, "F2F_R"),
+            INST("0011100-10101---", Id::F2F_IMM, Type::Arithmetic, "F2F_IMM"),
+            INST("0100110010110---", Id::F2I_C, Type::Arithmetic, "F2I_C"),
+            INST("0101110010110---", Id::F2I_R, Type::Arithmetic, "F2I_R"),
+            INST("0011100-10110---", Id::F2I_IMM, Type::Arithmetic, "F2I_IMM"),
+            INST("0100110010111---", Id::I2F_C, Type::Arithmetic, "I2F_C"),
+            INST("0101110010111---", Id::I2F_R, Type::Arithmetic, "I2F_R"),
+            INST("0011100-10111---", Id::I2F_IMM, Type::Arithmetic, "I2F_IMM"),
+            INST("000001----------", Id::LOP32I, Type::Arithmetic, "LOP32I"),
+            INST("0100110010011---", Id::MOV_C, Type::Arithmetic, "MOV_C"),
+            INST("0101110010011---", Id::MOV_R, Type::Arithmetic, "MOV_R"),
+            INST("0011100-10011---", Id::MOV_IMM, Type::Arithmetic, "MOV_IMM"),
+            INST("000000010000----", Id::MOV32I, Type::Arithmetic, "MOV32I"),
+            INST("0100110000101---", Id::SHR_C, Type::Arithmetic, "SHR_C"),
+            INST("0101110000101---", Id::SHR_R, Type::Arithmetic, "SHR_R"),
+            INST("0011100-00101---", Id::SHR_IMM, Type::Arithmetic, "SHR_IMM"),
+            INST("010010111011----", Id::FSETP_C, Type::FloatPredicate, "FSETP_C"),
+            INST("010110111011----", Id::FSETP_R, Type::FloatPredicate, "FSETP_R"),
+            INST("0011011-1011----", Id::FSETP_IMM, Type::FloatPredicate, "FSETP_IMM"),
+            INST("010010110110----", Id::ISETP_C, Type::IntegerPredicate, "ISETP_C"),
+            INST("010110110110----", Id::ISETP_R, Type::IntegerPredicate, "ISETP_R"),
+            INST("0011011-0110----", Id::ISETP_IMM, Type::IntegerPredicate, "ISETP_IMM"),
+        };
+#undef INST
+        std::stable_sort(table.begin(), table.end(), [](const auto& a, const auto& b) {
+            // If a matcher has more bits in its mask it is more specific, so it
+            // should come first.
+            return std::bitset<16>(a.GetMask()).count() > std::bitset<16>(b.GetMask()).count();
+        });
+
+        return table;
+    }
+};
+
+} // namespace Shader
+} // namespace Tegra