20 files changed, 1112 insertions, 234 deletions
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 23e70cd8a..86e9dc998 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -156,16 +156,15 @@ void Maxwell3D::ProcessQueryGet() {
     // TODO(Subv): Support the other query units.
     ASSERT_MSG(regs.query.query_get.unit == Regs::QueryUnit::Crop,
                "Units other than CROP are unimplemented");
-    ASSERT_MSG(regs.query.query_get.short_query,
-               "Writing the entire query result structure is unimplemented");
 
     u32 value = Memory::Read32(*address);
-    u32 result = 0;
+    u64 result = 0;
 
     // TODO(Subv): Support the other query variables
     switch (regs.query.query_get.select) {
     case Regs::QuerySelect::Zero:
-        result = 0;
+        // This seems to actually write the query sequence to the query address.
+        result = regs.query.query_sequence;
         break;
     default:
         UNIMPLEMENTED_MSG("Unimplemented query select type {}",
@@ -174,15 +173,31 @@ void Maxwell3D::ProcessQueryGet() {
 
     // TODO(Subv): Research and implement how query sync conditions work.
 
+    struct LongQueryResult {
+        u64_le value;
+        u64_le timestamp;
+    };
+    static_assert(sizeof(LongQueryResult) == 16, "LongQueryResult has wrong size");
+
     switch (regs.query.query_get.mode) {
     case Regs::QueryMode::Write:
     case Regs::QueryMode::Write2: {
-        // Write the current query sequence to the sequence address.
         u32 sequence = regs.query.query_sequence;
-        Memory::Write32(*address, sequence);
-
-        // TODO(Subv): Write the proper query response structure to the address when not using short
-        // mode.
+        if (regs.query.query_get.short_query) {
+            // Write the current query sequence to the sequence address.
+            // TODO(Subv): Find out what happens if you use a long query type but mark it as a short
+            // query.
+            Memory::Write32(*address, sequence);
+        } else {
+            // Write the 128-bit result structure in long mode. Note: We emulate an infinitely fast
+            // GPU, this command may actually take a while to complete in real hardware due to GPU
+            // wait queues.
+            LongQueryResult query_result{};
+            query_result.value = result;
+            // TODO(Subv): Generate a real GPU timestamp and write it here instead of 0
+            query_result.timestamp = 0;
+            Memory::WriteBlock(*address, &query_result, sizeof(query_result));
+        }
         break;
     }
     default:
@@ -339,6 +354,40 @@ std::vector<Texture::FullTextureInfo> Maxwell3D::GetStageTextures(Regs::ShaderSt
     return textures;
 }
 
+Texture::FullTextureInfo Maxwell3D::GetStageTexture(Regs::ShaderStage stage, size_t offset) const {
+    auto& shader = state.shader_stages[static_cast<size_t>(stage)];
+    auto& tex_info_buffer = shader.const_buffers[regs.tex_cb_index];
+    ASSERT(tex_info_buffer.enabled && tex_info_buffer.address != 0);
+
+    GPUVAddr tex_info_address = tex_info_buffer.address + offset * sizeof(Texture::TextureHandle);
+
+    ASSERT(tex_info_address < tex_info_buffer.address + tex_info_buffer.size);
+
+    boost::optional<VAddr> tex_address_cpu = memory_manager.GpuToCpuAddress(tex_info_address);
+    Texture::TextureHandle tex_handle{Memory::Read32(*tex_address_cpu)};
+
+    Texture::FullTextureInfo tex_info{};
+    tex_info.index = static_cast<u32>(offset);
+
+    // Load the TIC data.
+    if (tex_handle.tic_id != 0) {
+        tex_info.enabled = true;
+
+        auto tic_entry = GetTICEntry(tex_handle.tic_id);
+        // TODO(Subv): Workaround for BitField's move constructor being deleted.
+        std::memcpy(&tex_info.tic, &tic_entry, sizeof(tic_entry));
+    }
+
+    // Load the TSC data
+    if (tex_handle.tsc_id != 0) {
+        auto tsc_entry = GetTSCEntry(tex_handle.tsc_id);
+        // TODO(Subv): Workaround for BitField's move constructor being deleted.
+        std::memcpy(&tex_info.tsc, &tsc_entry, sizeof(tsc_entry));
+    }
+
+    return tex_info;
+}
+
 u32 Maxwell3D::GetRegisterValue(u32 method) const {
     ASSERT_MSG(method < Regs::NUM_REGS, "Invalid Maxwell3D register");
     return regs.reg_array[method];
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index 5cf62fb01..56b837372 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -354,10 +354,35 @@ public:
                     f32 scale_x;
                     f32 scale_y;
                     f32 scale_z;
-                    u32 translate_x;
-                    u32 translate_y;
-                    u32 translate_z;
+                    f32 translate_x;
+                    f32 translate_y;
+                    f32 translate_z;
                     INSERT_PADDING_WORDS(2);
+
+                    MathUtil::Rectangle<s32> GetRect() const {
+                        return {
+                            GetX(),               // left
+                            GetY() + GetHeight(), // top
+                            GetX() + GetWidth(),  // right
+                            GetY()                // bottom
+                        };
+                    };
+
+                    s32 GetX() const {
+                        return static_cast<s32>(std::max(0.0f, translate_x - std::fabs(scale_x)));
+                    }
+
+                    s32 GetY() const {
+                        return static_cast<s32>(std::max(0.0f, translate_y - std::fabs(scale_y)));
+                    }
+
+                    s32 GetWidth() const {
+                        return static_cast<s32>(translate_x + std::fabs(scale_x)) - GetX();
+                    }
+
+                    s32 GetHeight() const {
+                        return static_cast<s32>(translate_y + std::fabs(scale_y)) - GetY();
+                    }
                 } viewport_transform[NumViewports];
 
                 struct {
@@ -371,15 +396,6 @@ public:
                     };
                     float depth_range_near;
                     float depth_range_far;
-
-                    MathUtil::Rectangle<s32> GetRect() const {
-                        return {
-                            static_cast<s32>(x),          // left
-                            static_cast<s32>(y + height), // top
-                            static_cast<s32>(x + width),  // right
-                            static_cast<s32>(y)           // bottom
-                        };
-                    };
                 } viewport[NumViewports];
 
                 INSERT_PADDING_WORDS(0x1D);
@@ -648,6 +664,9 @@ public:
     /// Returns a list of enabled textures for the specified shader stage.
     std::vector<Texture::FullTextureInfo> GetStageTextures(Regs::ShaderStage stage) const;
 
+    /// Returns the texture information for a specific texture in a specific shader stage.
+    Texture::FullTextureInfo GetStageTexture(Regs::ShaderStage stage, size_t offset) const;
+
     /// Returns whether the specified shader stage is enabled or not.
     bool IsShaderStageEnabled(Regs::ShaderStage stage) const;
 
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index d75de85e2..32800392b 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -75,6 +75,10 @@ union Attribute {
     enum class Index : u64 {
         Position = 7,
         Attribute_0 = 8,
+        // This attribute contains a tuple of (~, ~, InstanceId, VertexId) when inside a vertex
+        // shader, and a tuple of (TessCoord.x, TessCoord.y, TessCoord.z, ~) when inside a Tess Eval
+        // shader.
+        TessCoordInstanceIDVertexID = 47,
     };
 
     union {
@@ -105,11 +109,6 @@ union Sampler {
     u64 value{};
 };
 
-union Uniform {
-    BitField<20, 14, u64> offset;
-    BitField<34, 5, u64> index;
-};
-
 } // namespace Shader
 } // namespace Tegra
 
@@ -152,6 +151,13 @@ enum class PredOperation : u64 {
     Xor = 2,
 };
 
+enum class LogicOperation : u64 {
+    And = 0,
+    Or = 1,
+    Xor = 2,
+    PassB = 3,
+};
+
 enum class SubOp : u64 {
     Cos = 0x0,
     Sin = 0x1,
@@ -162,6 +168,31 @@ enum class SubOp : u64 {
     Min = 0x8,
 };
 
+enum class F2iRoundingOp : u64 {
+    None = 0,
+    Floor = 1,
+    Ceil = 2,
+    Trunc = 3,
+};
+
+enum class F2fRoundingOp : u64 {
+    None = 0,
+    Pass = 3,
+    Round = 8,
+    Floor = 9,
+    Ceil = 10,
+    Trunc = 11,
+};
+
+enum class UniformType : u64 {
+    UnsignedByte = 0,
+    SignedByte = 1,
+    UnsignedShort = 2,
+    SignedShort = 3,
+    Single = 4,
+    Double = 5,
+};
+
 union Instruction {
     Instruction& operator=(const Instruction& instr) {
         value = instr.value;
@@ -198,6 +229,12 @@ union Instruction {
             BitField<42, 1, u64> negate_pred;
         } fmnmx;
 
+        union {
+            BitField<53, 2, LogicOperation> operation;
+            BitField<55, 1, u64> invert_a;
+            BitField<56, 1, u64> invert_b;
+        } lop;
+
         float GetImm20_19() const {
             float result{};
             u32 imm{static_cast<u32>(imm20_19)};
@@ -213,14 +250,43 @@ union Instruction {
             std::memcpy(&result, &imm, sizeof(imm));
             return result;
         }
+
+        s32 GetSignedImm20_20() const {
+            u32 immediate = static_cast<u32>(imm20_19 | (negate_imm << 19));
+            // Sign extend the 20-bit value.
+            u32 mask = 1U << (20 - 1);
+            return static_cast<s32>((immediate ^ mask) - mask);
+        }
     } alu;
 
     union {
+        BitField<39, 5, u64> shift_amount;
+        BitField<48, 1, u64> negate_b;
+        BitField<49, 1, u64> negate_a;
+    } iscadd;
+
+    union {
+        BitField<20, 8, u64> shift_position;
+        BitField<28, 8, u64> shift_length;
+        BitField<48, 1, u64> negate_b;
+        BitField<49, 1, u64> negate_a;
+
+        u64 GetLeftShiftValue() const {
+            return 32 - (shift_position + shift_length);
+        }
+    } bfe;
+
+    union {
         BitField<48, 1, u64> negate_b;
         BitField<49, 1, u64> negate_c;
     } ffma;
 
     union {
+        BitField<48, 3, UniformType> type;
+        BitField<44, 2, u64> unknown;
+    } ld_c;
+
+    union {
         BitField<0, 3, u64> pred0;
         BitField<3, 3, u64> pred3;
         BitField<7, 1, u64> abs_a;
@@ -235,34 +301,103 @@ union Instruction {
     } fsetp;
 
     union {
+        BitField<0, 3, u64> pred0;
+        BitField<3, 3, u64> pred3;
+        BitField<39, 3, u64> pred39;
+        BitField<42, 1, u64> neg_pred;
+        BitField<45, 2, PredOperation> op;
+        BitField<48, 1, u64> is_signed;
+        BitField<49, 3, PredCondition> cond;
+    } isetp;
+
+    union {
         BitField<39, 3, u64> pred39;
         BitField<42, 1, u64> neg_pred;
         BitField<43, 1, u64> neg_a;
         BitField<44, 1, u64> abs_b;
         BitField<45, 2, PredOperation> op;
         BitField<48, 4, PredCondition> cond;
+        BitField<52, 1, u64> bf;
         BitField<53, 1, u64> neg_b;
         BitField<54, 1, u64> abs_a;
-        BitField<52, 1, u64> bf;
         BitField<55, 1, u64> ftz;
         BitField<56, 1, u64> neg_imm;
     } fset;
 
     union {
         BitField<10, 2, Register::Size> size;
-        BitField<13, 1, u64> is_signed;
+        BitField<12, 1, u64> is_output_signed;
+        BitField<13, 1, u64> is_input_signed;
         BitField<41, 2, u64> selector;
         BitField<45, 1, u64> negate_a;
         BitField<49, 1, u64> abs_a;
         BitField<50, 1, u64> saturate_a;
+
+        union {
+            BitField<39, 2, F2iRoundingOp> rounding;
+        } f2i;
+
+        union {
+            BitField<39, 4, F2fRoundingOp> rounding;
+        } f2f;
     } conversion;
 
+    union {
+        BitField<31, 4, u64> component_mask;
+
+        bool IsComponentEnabled(size_t component) const {
+            return ((1 << component) & component_mask) != 0;
+        }
+    } tex;
+
+    union {
+        BitField<50, 3, u64> component_mask_selector;
+        BitField<28, 8, Register> gpr28;
+
+        bool HasTwoDestinations() const {
+            return gpr28.Value() != Register::ZeroIndex;
+        }
+
+        bool IsComponentEnabled(size_t component) const {
+            static constexpr std::array<size_t, 5> one_dest_mask{0x1, 0x2, 0x4, 0x8, 0x3};
+            static constexpr std::array<size_t, 5> two_dest_mask{0x7, 0xb, 0xd, 0xe, 0xf};
+            const auto& mask{HasTwoDestinations() ? two_dest_mask : one_dest_mask};
+
+            ASSERT(component_mask_selector < mask.size());
+
+            return ((1 << component) & mask[component_mask_selector]) != 0;
+        }
+    } texs;
+
+    union {
+        BitField<20, 24, u64> target;
+        BitField<5, 1, u64> constant_buffer;
+
+        s32 GetBranchTarget() const {
+            // Sign extend the branch target offset
+            u32 mask = 1U << (24 - 1);
+            u32 value = static_cast<u32>(target);
+            // The branch offset is relative to the next instruction and is stored in bytes, so
+            // divide it by the size of an instruction and add 1 to it.
+            return static_cast<s32>((value ^ mask) - mask) / sizeof(Instruction) + 1;
+        }
+    } bra;
+
+    union {
+        BitField<20, 14, u64> offset;
+        BitField<34, 5, u64> index;
+    } cbuf34;
+
+    union {
+        BitField<20, 16, s64> offset;
+        BitField<36, 5, u64> index;
+    } cbuf36;
+
     BitField<61, 1, u64> is_b_imm;
     BitField<60, 1, u64> is_b_gpr;
     BitField<59, 1, u64> is_c_gpr;
 
     Attribute attribute;
-    Uniform uniform;
     Sampler sampler;
 
     u64 value;
@@ -275,8 +410,14 @@ class OpCode {
 public:
     enum class Id {
         KIL,
+        BFE_C,
+        BFE_R,
+        BFE_IMM,
+        BRA,
         LD_A,
+        LD_C,
         ST_A,
+        TEX,
         TEXQ, // Texture Query
         TEXS, // Texture Fetch with scalar/non-vec4 source/destinations
         TLDS, // Texture Load with scalar/non-vec4 source/destinations
@@ -293,8 +434,13 @@ public:
         FMUL_R,
         FMUL_IMM,
         FMUL32_IMM,
-        MUFU, // Multi-Function Operator
-        RRO,  // Range Reduction Operator
+        ISCADD_C, // Scale and Add
+        ISCADD_R,
+        ISCADD_IMM,
+        MUFU,  // Multi-Function Operator
+        RRO_C, // Range Reduction Operator
+        RRO_R,
+        RRO_IMM,
         F2F_C,
         F2F_R,
         F2F_IMM,
@@ -312,12 +458,18 @@ public:
         MOV_R,
         MOV_IMM,
         MOV32_IMM,
+        SHL_C,
+        SHL_R,
+        SHL_IMM,
         SHR_C,
         SHR_R,
         SHR_IMM,
         FMNMX_C,
         FMNMX_R,
         FMNMX_IMM,
+        IMNMX_C,
+        IMNMX_R,
+        IMNMX_IMM,
         FSETP_C, // Set Predicate
         FSETP_R,
         FSETP_IMM,
@@ -328,11 +480,19 @@ public:
         ISETP_IMM,
         ISETP_R,
         PSETP,
+        XMAD_IMM,
+        XMAD_CR,
+        XMAD_RC,
+        XMAD_RR,
     };
 
     enum class Type {
         Trivial,
         Arithmetic,
+        Bfe,
+        Logic,
+        Shift,
+        ScaledAdd,
         Ffma,
         Flow,
         Memory,
@@ -436,8 +596,11 @@ private:
         std::vector<Matcher> table = {
 #define INST(bitstring, op, type, name) Detail::GetMatcher(bitstring, op, type, name)
             INST("111000110011----", Id::KIL, Type::Flow, "KIL"),
+            INST("111000100100----", Id::BRA, Type::Flow, "BRA"),
             INST("1110111111011---", Id::LD_A, Type::Memory, "LD_A"),
+            INST("1110111110010---", Id::LD_C, Type::Memory, "LD_C"),
             INST("1110111111110---", Id::ST_A, Type::Memory, "ST_A"),
+            INST("1100000000111---", Id::TEX, Type::Memory, "TEX"),
             INST("1101111101001---", Id::TEXQ, Type::Memory, "TEXQ"),
             INST("1101100---------", Id::TEXS, Type::Memory, "TEXS"),
             INST("1101101---------", Id::TLDS, Type::Memory, "TLDS"),
@@ -454,25 +617,39 @@ private:
             INST("0101110001101---", Id::FMUL_R, Type::Arithmetic, "FMUL_R"),
             INST("0011100-01101---", Id::FMUL_IMM, Type::Arithmetic, "FMUL_IMM"),
             INST("00011110--------", Id::FMUL32_IMM, Type::Arithmetic, "FMUL32_IMM"),
+            INST("0100110000011---", Id::ISCADD_C, Type::ScaledAdd, "ISCADD_C"),
+            INST("0101110000011---", Id::ISCADD_R, Type::ScaledAdd, "ISCADD_R"),
+            INST("0011100-00011---", Id::ISCADD_IMM, Type::ScaledAdd, "ISCADD_IMM"),
             INST("0101000010000---", Id::MUFU, Type::Arithmetic, "MUFU"),
-            INST("0101110010010---", Id::RRO, Type::Arithmetic, "RRO"),
-            INST("0100110010101---", Id::F2F_C, Type::Arithmetic, "F2F_C"),
-            INST("0101110010101---", Id::F2F_R, Type::Arithmetic, "F2F_R"),
-            INST("0011100-10101---", Id::F2F_IMM, Type::Arithmetic, "F2F_IMM"),
-            INST("0100110010110---", Id::F2I_C, Type::Arithmetic, "F2I_C"),
-            INST("0101110010110---", Id::F2I_R, Type::Arithmetic, "F2I_R"),
-            INST("0011100-10110---", Id::F2I_IMM, Type::Arithmetic, "F2I_IMM"),
-            INST("000001----------", Id::LOP32I, Type::Arithmetic, "LOP32I"),
+            INST("0100110010010---", Id::RRO_C, Type::Arithmetic, "RRO_C"),
+            INST("0101110010010---", Id::RRO_R, Type::Arithmetic, "RRO_R"),
+            INST("0011100-10010---", Id::RRO_IMM, Type::Arithmetic, "RRO_IMM"),
+            INST("0100110010101---", Id::F2F_C, Type::Conversion, "F2F_C"),
+            INST("0101110010101---", Id::F2F_R, Type::Conversion, "F2F_R"),
+            INST("0011100-10101---", Id::F2F_IMM, Type::Conversion, "F2F_IMM"),
+            INST("0100110010110---", Id::F2I_C, Type::Conversion, "F2I_C"),
+            INST("0101110010110---", Id::F2I_R, Type::Conversion, "F2I_R"),
+            INST("0011100-10110---", Id::F2I_IMM, Type::Conversion, "F2I_IMM"),
             INST("0100110010011---", Id::MOV_C, Type::Arithmetic, "MOV_C"),
             INST("0101110010011---", Id::MOV_R, Type::Arithmetic, "MOV_R"),
             INST("0011100-10011---", Id::MOV_IMM, Type::Arithmetic, "MOV_IMM"),
             INST("000000010000----", Id::MOV32_IMM, Type::Arithmetic, "MOV32_IMM"),
-            INST("0100110000101---", Id::SHR_C, Type::Arithmetic, "SHR_C"),
-            INST("0101110000101---", Id::SHR_R, Type::Arithmetic, "SHR_R"),
-            INST("0011100-00101---", Id::SHR_IMM, Type::Arithmetic, "SHR_IMM"),
             INST("0100110001100---", Id::FMNMX_C, Type::Arithmetic, "FMNMX_C"),
             INST("0101110001100---", Id::FMNMX_R, Type::Arithmetic, "FMNMX_R"),
             INST("0011100-01100---", Id::FMNMX_IMM, Type::Arithmetic, "FMNMX_IMM"),
+            INST("0100110000100---", Id::IMNMX_C, Type::Arithmetic, "FMNMX_IMM"),
+            INST("0101110000100---", Id::IMNMX_R, Type::Arithmetic, "FMNMX_IMM"),
+            INST("0011100-00100---", Id::IMNMX_IMM, Type::Arithmetic, "FMNMX_IMM"),
+            INST("0100110000000---", Id::BFE_C, Type::Bfe, "BFE_C"),
+            INST("0101110000000---", Id::BFE_R, Type::Bfe, "BFE_R"),
+            INST("0011100-00000---", Id::BFE_IMM, Type::Bfe, "BFE_IMM"),
+            INST("000001----------", Id::LOP32I, Type::Logic, "LOP32I"),
+            INST("0100110001001---", Id::SHL_C, Type::Shift, "SHL_C"),
+            INST("0101110001001---", Id::SHL_R, Type::Shift, "SHL_R"),
+            INST("0011100-01001---", Id::SHL_IMM, Type::Shift, "SHL_IMM"),
+            INST("0100110000101---", Id::SHR_C, Type::Shift, "SHR_C"),
+            INST("0101110000101---", Id::SHR_R, Type::Shift, "SHR_R"),
+            INST("0011100-00101---", Id::SHR_IMM, Type::Shift, "SHR_IMM"),
             INST("0100110011100---", Id::I2I_C, Type::Conversion, "I2I_C"),
             INST("0101110011100---", Id::I2I_R, Type::Conversion, "I2I_R"),
             INST("01110001-1000---", Id::I2I_IMM, Type::Conversion, "I2I_IMM"),
@@ -489,6 +666,10 @@ private:
             INST("010110110110----", Id::ISETP_R, Type::IntegerSetPredicate, "ISETP_R"),
             INST("0011011-0110----", Id::ISETP_IMM, Type::IntegerSetPredicate, "ISETP_IMM"),
             INST("0101000010010---", Id::PSETP, Type::PredicateSetPredicate, "PSETP"),
+            INST("0011011-00------", Id::XMAD_IMM, Type::Arithmetic, "XMAD_IMM"),
+            INST("0100111---------", Id::XMAD_CR, Type::Arithmetic, "XMAD_CR"),
+            INST("010100010-------", Id::XMAD_RC, Type::Arithmetic, "XMAD_RC"),
+            INST("0101101100------", Id::XMAD_RR, Type::Arithmetic, "XMAD_RR"),
         };
 #undef INST
         std::stable_sort(table.begin(), table.end(), [](const auto& a, const auto& b) {
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index 756518ee7..66351fe6e 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -26,6 +26,10 @@ u32 RenderTargetBytesPerPixel(RenderTargetFormat format) {
     ASSERT(format != RenderTargetFormat::NONE);
 
     switch (format) {
+    case RenderTargetFormat::RGBA32_FLOAT:
+        return 16;
+    case RenderTargetFormat::RGBA16_FLOAT:
+        return 8;
     case RenderTargetFormat::RGBA8_UNORM:
     case RenderTargetFormat::RGB10_A2_UNORM:
         return 4;
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index f168a5171..5852b9619 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -15,10 +15,12 @@ namespace Tegra {
 
 enum class RenderTargetFormat : u32 {
     NONE = 0x0,
+    RGBA32_FLOAT = 0xC0,
     RGBA16_FLOAT = 0xCA,
     RGB10_A2_UNORM = 0xD1,
     RGBA8_UNORM = 0xD5,
     RGBA8_SRGB = 0xD6,
+    R11G11B10_FLOAT = 0xE0,
 };
 
 /// Returns the number of bytes per pixel of each rendertarget format.
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 35c1b1890..0bd235218 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -196,8 +196,10 @@ void RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset) {
     auto& gpu = Core::System().GetInstance().GPU().Maxwell3D();
     ASSERT_MSG(!gpu.regs.shader_config[0].enable, "VertexA is unsupported!");
 
-    // Next available bindpoint to use when uploading the const buffers to the GLSL shaders.
+    // Next available bindpoints to use when uploading the const buffers and textures to the GLSL
+    // shaders.
     u32 current_constbuffer_bindpoint = 0;
+    u32 current_texture_bindpoint = 0;
 
     for (unsigned index = 1; index < Maxwell::MaxShaderProgram; ++index) {
         auto& shader_config = gpu.regs.shader_config[index];
@@ -212,13 +214,14 @@ void RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset) {
             continue;
         }
 
+        GLShader::MaxwellUniformData ubo{};
+        ubo.SetFromRegs(gpu.state.shader_stages[stage]);
+        std::memcpy(buffer_ptr, &ubo, sizeof(ubo));
+
         // Upload uniform data as one UBO per stage
         const GLintptr ubo_offset = buffer_offset;
         copy_buffer(uniform_buffers[stage].handle, ubo_offset,
                     sizeof(GLShader::MaxwellUniformData));
-        GLShader::MaxwellUniformData* ub_ptr =
-            reinterpret_cast<GLShader::MaxwellUniformData*>(buffer_ptr);
-        ub_ptr->SetFromRegs(gpu.state.shader_stages[stage]);
 
         buffer_ptr += sizeof(GLShader::MaxwellUniformData);
         buffer_offset += sizeof(GLShader::MaxwellUniformData);
@@ -258,6 +261,11 @@ void RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset) {
         current_constbuffer_bindpoint =
             SetupConstBuffers(static_cast<Maxwell::ShaderStage>(stage), gl_stage_program,
                               current_constbuffer_bindpoint, shader_resources.const_buffer_entries);
+
+        // Configure the textures for this shader stage.
+        current_texture_bindpoint =
+            SetupTextures(static_cast<Maxwell::ShaderStage>(stage), gl_stage_program,
+                          current_texture_bindpoint, shader_resources.texture_samplers);
     }
 
     shader_program_manager->UseTrivialGeometryShader();
@@ -298,7 +306,7 @@ void RasterizerOpenGL::DrawArrays() {
     const bool has_stencil = false;
     const bool using_color_fb = true;
     const bool using_depth_fb = false;
-    const MathUtil::Rectangle<s32> viewport_rect{regs.viewport[0].GetRect()};
+    const MathUtil::Rectangle<s32> viewport_rect{regs.viewport_transform[0].GetRect()};
 
     const bool write_color_fb =
         state.color_mask.red_enabled == GL_TRUE || state.color_mask.green_enabled == GL_TRUE ||
@@ -341,9 +349,6 @@ void RasterizerOpenGL::DrawArrays() {
     // TODO(bunnei): Sync framebuffer_scale uniform here
     // TODO(bunnei): Sync scissorbox uniform(s) here
 
-    // Sync and bind the texture surfaces
-    BindTextures();
-
     // Viewport can have negative offsets or larger dimensions than our framebuffer sub-rect. Enable
     // scissor test to prevent drawing outside of the framebuffer region
     state.scissor.enabled = true;
@@ -447,39 +452,6 @@ void RasterizerOpenGL::DrawArrays() {
     }
 }
 
-void RasterizerOpenGL::BindTextures() {
-    using Regs = Tegra::Engines::Maxwell3D::Regs;
-    auto& maxwell3d = Core::System::GetInstance().GPU().Get3DEngine();
-
-    // Each Maxwell shader stage can have an arbitrary number of textures, but we're limited to a
-    // certain number in OpenGL. We try to only use the minimum amount of host textures by not
-    // keeping a 1:1 relation between guest texture ids and host texture ids, ie, guest texture id 8
-    // can be host texture id 0 if it's the only texture used in the guest shader program.
-    u32 host_texture_index = 0;
-    for (u32 stage = 0; stage < Regs::MaxShaderStage; ++stage) {
-        ASSERT(host_texture_index < texture_samplers.size());
-        const auto textures = maxwell3d.GetStageTextures(static_cast<Regs::ShaderStage>(stage));
-        for (unsigned texture_index = 0; texture_index < textures.size(); ++texture_index) {
-            const auto& texture = textures[texture_index];
-
-            if (texture.enabled) {
-                texture_samplers[host_texture_index].SyncWithConfig(texture.tsc);
-                Surface surface = res_cache.GetTextureSurface(texture);
-                if (surface != nullptr) {
-                    state.texture_units[host_texture_index].texture_2d = surface->texture.handle;
-                } else {
-                    // Can occur when texture addr is null or its memory is unmapped/invalid
-                    state.texture_units[texture_index].texture_2d = 0;
-                }
-
-                ++host_texture_index;
-            } else {
-                state.texture_units[texture_index].texture_2d = 0;
-            }
-        }
-    }
-}
-
 void RasterizerOpenGL::NotifyMaxwellRegisterChanged(u32 method) {
     const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs;
     switch (method) {
@@ -654,7 +626,16 @@ u32 RasterizerOpenGL::SetupConstBuffers(Maxwell::ShaderStage stage, GLuint progr
         buffer_draw_state.bindpoint = current_bindpoint + bindpoint;
 
         boost::optional<VAddr> addr = gpu.memory_manager->GpuToCpuAddress(buffer.address);
-        std::vector<u8> data(used_buffer.GetSize() * sizeof(float));
+
+        std::vector<u8> data;
+        if (used_buffer.IsIndirect()) {
+            // Buffer is accessed indirectly, so upload the entire thing
+            data.resize(buffer.size * sizeof(float));
+        } else {
+            // Buffer is accessed directly, upload just what we use
+            data.resize(used_buffer.GetSize() * sizeof(float));
+        }
+
         Memory::ReadBlock(*addr, data.data(), data.size());
 
         glBindBuffer(GL_SHADER_STORAGE_BUFFER, buffer_draw_state.ssbo);
@@ -674,6 +655,52 @@ u32 RasterizerOpenGL::SetupConstBuffers(Maxwell::ShaderStage stage, GLuint progr
     return current_bindpoint + entries.size();
 }
 
+u32 RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, GLuint program, u32 current_unit,
+                                    const std::vector<GLShader::SamplerEntry>& entries) {
+    auto& gpu = Core::System::GetInstance().GPU();
+    auto& maxwell3d = gpu.Get3DEngine();
+
+    ASSERT_MSG(maxwell3d.IsShaderStageEnabled(stage),
+               "Attempted to upload textures of disabled shader stage");
+
+    ASSERT_MSG(current_unit + entries.size() <= std::size(state.texture_units),
+               "Exceeded the number of active textures.");
+
+    for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) {
+        const auto& entry = entries[bindpoint];
+        u32 current_bindpoint = current_unit + bindpoint;
+
+        // Bind the uniform to the sampler.
+        GLint uniform = glGetUniformLocation(program, entry.GetName().c_str());
+        ASSERT(uniform != -1);
+        glProgramUniform1i(program, uniform, current_bindpoint);
+
+        const auto texture = maxwell3d.GetStageTexture(entry.GetStage(), entry.GetOffset());
+        ASSERT(texture.enabled);
+
+        texture_samplers[current_bindpoint].SyncWithConfig(texture.tsc);
+        Surface surface = res_cache.GetTextureSurface(texture);
+        if (surface != nullptr) {
+            state.texture_units[current_bindpoint].texture_2d = surface->texture.handle;
+            state.texture_units[current_bindpoint].swizzle.r =
+                MaxwellToGL::SwizzleSource(texture.tic.x_source);
+            state.texture_units[current_bindpoint].swizzle.g =
+                MaxwellToGL::SwizzleSource(texture.tic.y_source);
+            state.texture_units[current_bindpoint].swizzle.b =
+                MaxwellToGL::SwizzleSource(texture.tic.z_source);
+            state.texture_units[current_bindpoint].swizzle.a =
+                MaxwellToGL::SwizzleSource(texture.tic.w_source);
+        } else {
+            // Can occur when texture addr is null or its memory is unmapped/invalid
+            state.texture_units[current_bindpoint].texture_2d = 0;
+        }
+    }
+
+    state.Apply();
+
+    return current_unit + entries.size();
+}
+
 void RasterizerOpenGL::BindFramebufferSurfaces(const Surface& color_surface,
                                                const Surface& depth_surface, bool has_stencil) {
     state.draw.draw_framebuffer = framebuffer.handle;
@@ -702,7 +729,7 @@ void RasterizerOpenGL::BindFramebufferSurfaces(const Surface& color_surface,
 
 void RasterizerOpenGL::SyncViewport(const MathUtil::Rectangle<u32>& surfaces_rect, u16 res_scale) {
     const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs;
-    const MathUtil::Rectangle<s32> viewport_rect{regs.viewport[0].GetRect()};
+    const MathUtil::Rectangle<s32> viewport_rect{regs.viewport_transform[0].GetRect()};
 
     state.viewport.x = static_cast<GLint>(surfaces_rect.left) + viewport_rect.left * res_scale;
     state.viewport.y = static_cast<GLint>(surfaces_rect.bottom) + viewport_rect.bottom * res_scale;
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 4b915c76a..d3f0558ed 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -80,9 +80,6 @@ private:
     void BindFramebufferSurfaces(const Surface& color_surface, const Surface& depth_surface,
                                  bool has_stencil);
 
-    /// Binds the required textures to OpenGL before drawing a batch.
-    void BindTextures();
-
     /*
      * Configures the current constbuffers to use for the draw command.
      * @param stage The shader stage to configure buffers for.
@@ -95,6 +92,17 @@ private:
                           u32 current_bindpoint,
                           const std::vector<GLShader::ConstBufferEntry>& entries);
 
+    /*
+     * Configures the current textures to use for the draw command.
+     * @param stage The shader stage to configure textures for.
+     * @param program The OpenGL program object that contains the specified stage.
+     * @param current_unit The offset at which to start counting unused texture units.
+     * @param entries Vector describing the textures that are actually used in the guest shader.
+     * @returns The next available bindpoint for use in the next shader stage.
+     */
+    u32 SetupTextures(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, GLuint program,
+                      u32 current_unit, const std::vector<GLShader::SamplerEntry>& entries);
+
     /// Syncs the viewport to match the guest state
     void SyncViewport(const MathUtil::Rectangle<u32>& surfaces_rect, u16 res_scale);
 
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
index e652bd9ed..df2474ea2 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -45,20 +45,26 @@ struct FormatTuple {
 
 static constexpr std::array<FormatTuple, SurfaceParams::MaxPixelFormat> tex_format_tuples = {{
     {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, false},                    // ABGR8
-    {GL_RGB, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV, false},                       // B5G6R5
+    {GL_RGB, GL_RGB, GL_UNSIGNED_SHORT_5_6_5, false},                           // B5G6R5
     {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV, false},              // A2B10G10R10
     {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV, false},                // A1B5G5R5
+    {GL_R8, GL_RED, GL_UNSIGNED_BYTE, false},                                   // R8
+    {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT, false},                                // RGBA16F
+    {GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV, false},        // R11FG11FB10F
     {GL_COMPRESSED_RGB_S3TC_DXT1_EXT, GL_RGB, GL_UNSIGNED_INT_8_8_8_8, true},   // DXT1
     {GL_COMPRESSED_RGBA_S3TC_DXT3_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true}, // DXT23
     {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true}, // DXT45
+    {GL_COMPRESSED_RED_RGTC1, GL_RED, GL_UNSIGNED_INT_8_8_8_8, true},           // DXN1
 }};
 
 static const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType component_type) {
     const SurfaceType type = SurfaceParams::GetFormatType(pixel_format);
     if (type == SurfaceType::ColorTexture) {
         ASSERT(static_cast<size_t>(pixel_format) < tex_format_tuples.size());
-        // For now only UNORM components are supported
-        ASSERT(component_type == ComponentType::UNorm);
+        // For now only UNORM components are supported, or either R11FG11FB10F or RGBA16F which are
+        // type FLOAT
+        ASSERT(component_type == ComponentType::UNorm || pixel_format == PixelFormat::RGBA16F ||
+               pixel_format == PixelFormat::R11FG11FB10F);
         return tex_format_tuples[static_cast<unsigned int>(pixel_format)];
     } else if (type == SurfaceType::Depth || type == SurfaceType::DepthStencil) {
         // TODO(Subv): Implement depth formats
@@ -107,10 +113,12 @@ static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr, Tegra:
                                      Tegra::GPUVAddr),
                             SurfaceParams::MaxPixelFormat>
     morton_to_gl_fns = {
-        MortonCopy<true, PixelFormat::ABGR8>,       MortonCopy<true, PixelFormat::B5G6R5>,
-        MortonCopy<true, PixelFormat::A2B10G10R10>, MortonCopy<true, PixelFormat::A1B5G5R5>,
-        MortonCopy<true, PixelFormat::DXT1>,        MortonCopy<true, PixelFormat::DXT23>,
-        MortonCopy<true, PixelFormat::DXT45>,
+        MortonCopy<true, PixelFormat::ABGR8>,        MortonCopy<true, PixelFormat::B5G6R5>,
+        MortonCopy<true, PixelFormat::A2B10G10R10>,  MortonCopy<true, PixelFormat::A1B5G5R5>,
+        MortonCopy<true, PixelFormat::R8>,           MortonCopy<true, PixelFormat::RGBA16F>,
+        MortonCopy<true, PixelFormat::R11FG11FB10F>, MortonCopy<true, PixelFormat::DXT1>,
+        MortonCopy<true, PixelFormat::DXT23>,        MortonCopy<true, PixelFormat::DXT45>,
+        MortonCopy<true, PixelFormat::DXN1>,
 };
 
 static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr, Tegra::GPUVAddr,
@@ -121,7 +129,11 @@ static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr, Tegra:
         MortonCopy<false, PixelFormat::B5G6R5>,
         MortonCopy<false, PixelFormat::A2B10G10R10>,
         MortonCopy<false, PixelFormat::A1B5G5R5>,
-        // TODO(Subv): Swizzling the DXT1/DXT23/DXT45 formats is not yet supported
+        MortonCopy<false, PixelFormat::R8>,
+        MortonCopy<false, PixelFormat::RGBA16F>,
+        MortonCopy<false, PixelFormat::R11FG11FB10F>,
+        // TODO(Subv): Swizzling the DXT1/DXT23/DXT45/DXN1 formats is not yet supported
+        nullptr,
         nullptr,
         nullptr,
         nullptr,
@@ -157,60 +169,10 @@ static void AllocateSurfaceTexture(GLuint texture, const FormatTuple& format_tup
 static bool BlitTextures(GLuint src_tex, const MathUtil::Rectangle<u32>& src_rect, GLuint dst_tex,
                          const MathUtil::Rectangle<u32>& dst_rect, SurfaceType type,
                          GLuint read_fb_handle, GLuint draw_fb_handle) {
-    OpenGLState state = OpenGLState::GetCurState();
-
-    OpenGLState prev_state = state;
-    SCOPE_EXIT({ prev_state.Apply(); });
-
-    // Make sure textures aren't bound to texture units, since going to bind them to framebuffer
-    // components
-    state.ResetTexture(src_tex);
-    state.ResetTexture(dst_tex);
-
-    state.draw.read_framebuffer = read_fb_handle;
-    state.draw.draw_framebuffer = draw_fb_handle;
-    state.Apply();
-
-    u32 buffers = 0;
-
-    if (type == SurfaceType::ColorTexture) {
-        glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, src_tex,
-                               0);
-        glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0,
-                               0);
-
-        glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, dst_tex,
-                               0);
-        glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0,
-                               0);
-
-        buffers = GL_COLOR_BUFFER_BIT;
-    } else if (type == SurfaceType::Depth) {
-        glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
-        glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, src_tex, 0);
-        glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
-
-        glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
-        glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, dst_tex, 0);
-        glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
-
-        buffers = GL_DEPTH_BUFFER_BIT;
-    } else if (type == SurfaceType::DepthStencil) {
-        glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
-        glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
-                               src_tex, 0);
-
-        glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
-        glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
-                               dst_tex, 0);
-
-        buffers = GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT;
-    }
-
-    glBlitFramebuffer(src_rect.left, src_rect.bottom, src_rect.right, src_rect.top, dst_rect.left,
-                      dst_rect.bottom, dst_rect.right, dst_rect.top, buffers,
-                      buffers == GL_COLOR_BUFFER_BIT ? GL_LINEAR : GL_NEAREST);
 
+    glCopyImageSubData(src_tex, GL_TEXTURE_2D, 0, src_rect.left, src_rect.bottom, 0, dst_tex,
+                       GL_TEXTURE_2D, 0, dst_rect.left, dst_rect.bottom, 0, src_rect.GetWidth(),
+                       src_rect.GetHeight(), 0);
     return true;
 }
 
@@ -926,7 +888,8 @@ Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, ScaleMatc
     // Use GetSurfaceSubRect instead
     ASSERT(params.width == params.stride);
 
-    ASSERT(!params.is_tiled || (params.width % 8 == 0 && params.height % 8 == 0));
+    ASSERT(!params.is_tiled ||
+           (params.GetActualWidth() % 8 == 0 && params.GetActualHeight() % 8 == 0));
 
     // Check for an exact match in existing surfaces
     Surface surface =
@@ -1089,16 +1052,19 @@ Surface RasterizerCacheOpenGL::GetTextureSurface(const Tegra::Texture::FullTextu
 
     params.UpdateParams();
 
-    if (config.tic.Width() % 8 != 0 || config.tic.Height() % 8 != 0 ||
+    if (params.GetActualWidth() % 8 != 0 || params.GetActualHeight() % 8 != 0 ||
         params.stride != params.width) {
         Surface src_surface;
         MathUtil::Rectangle<u32> rect;
         std::tie(src_surface, rect) = GetSurfaceSubRect(params, ScaleMatch::Ignore, true);
 
+        rect = rect.Scale(params.GetCompresssionFactor());
+
         params.res_scale = src_surface->res_scale;
         Surface tmp_surface = CreateSurface(params);
-        BlitTextures(src_surface->texture.handle, rect, tmp_surface->texture.handle,
-                     tmp_surface->GetScaledRect(),
+
+        auto dst_rect = tmp_surface->GetScaledRect().Scale(params.GetCompresssionFactor());
+        BlitTextures(src_surface->texture.handle, rect, tmp_surface->texture.handle, dst_rect,
                      SurfaceParams::GetFormatType(params.pixel_format), read_framebuffer.handle,
                      draw_framebuffer.handle);
 
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
index 03e28f64a..0f43e863d 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
@@ -58,9 +58,13 @@ struct SurfaceParams {
         B5G6R5 = 1,
         A2B10G10R10 = 2,
         A1B5G5R5 = 3,
-        DXT1 = 4,
-        DXT23 = 5,
-        DXT45 = 6,
+        R8 = 4,
+        RGBA16F = 5,
+        R11FG11FB10F = 6,
+        DXT1 = 7,
+        DXT23 = 8,
+        DXT45 = 9,
+        DXN1 = 10, // This is also known as BC4
 
         Max,
         Invalid = 255,
@@ -100,9 +104,13 @@ struct SurfaceParams {
             1, // B5G6R5
             1, // A2B10G10R10
             1, // A1B5G5R5
+            1, // R8
+            1, // RGBA16F
+            1, // R11FG11FB10F
             4, // DXT1
             4, // DXT23
             4, // DXT45
+            4, // DXN1
         }};
 
         ASSERT(static_cast<size_t>(format) < compression_factor_table.size());
@@ -121,9 +129,13 @@ struct SurfaceParams {
             16,  // B5G6R5
             32,  // A2B10G10R10
             16,  // A1B5G5R5
+            8,   // R8
+            64,  // RGBA16F
+            32,  // R11FG11FB10F
             64,  // DXT1
             128, // DXT23
             128, // DXT45
+            64,  // DXN1
         }};
 
         ASSERT(static_cast<size_t>(format) < bpp_table.size());
@@ -140,6 +152,10 @@ struct SurfaceParams {
             return PixelFormat::ABGR8;
         case Tegra::RenderTargetFormat::RGB10_A2_UNORM:
             return PixelFormat::A2B10G10R10;
+        case Tegra::RenderTargetFormat::RGBA16_FLOAT:
+            return PixelFormat::RGBA16F;
+        case Tegra::RenderTargetFormat::R11G11B10_FLOAT:
+            return PixelFormat::R11FG11FB10F;
         default:
             NGLOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));
             UNREACHABLE();
@@ -167,12 +183,20 @@ struct SurfaceParams {
             return PixelFormat::A2B10G10R10;
         case Tegra::Texture::TextureFormat::A1B5G5R5:
             return PixelFormat::A1B5G5R5;
+        case Tegra::Texture::TextureFormat::R8:
+            return PixelFormat::R8;
+        case Tegra::Texture::TextureFormat::R16_G16_B16_A16:
+            return PixelFormat::RGBA16F;
+        case Tegra::Texture::TextureFormat::BF10GF11RF11:
+            return PixelFormat::R11FG11FB10F;
         case Tegra::Texture::TextureFormat::DXT1:
             return PixelFormat::DXT1;
         case Tegra::Texture::TextureFormat::DXT23:
             return PixelFormat::DXT23;
         case Tegra::Texture::TextureFormat::DXT45:
             return PixelFormat::DXT45;
+        case Tegra::Texture::TextureFormat::DXN1:
+            return PixelFormat::DXN1;
         default:
             NGLOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));
             UNREACHABLE();
@@ -190,12 +214,20 @@ struct SurfaceParams {
             return Tegra::Texture::TextureFormat::A2B10G10R10;
         case PixelFormat::A1B5G5R5:
             return Tegra::Texture::TextureFormat::A1B5G5R5;
+        case PixelFormat::R8:
+            return Tegra::Texture::TextureFormat::R8;
+        case PixelFormat::RGBA16F:
+            return Tegra::Texture::TextureFormat::R16_G16_B16_A16;
+        case PixelFormat::R11FG11FB10F:
+            return Tegra::Texture::TextureFormat::BF10GF11RF11;
         case PixelFormat::DXT1:
             return Tegra::Texture::TextureFormat::DXT1;
         case PixelFormat::DXT23:
             return Tegra::Texture::TextureFormat::DXT23;
         case PixelFormat::DXT45:
             return Tegra::Texture::TextureFormat::DXT45;
+        case PixelFormat::DXN1:
+            return Tegra::Texture::TextureFormat::DXN1;
         default:
             UNREACHABLE();
         }
@@ -219,6 +251,9 @@ struct SurfaceParams {
         case Tegra::RenderTargetFormat::RGBA8_SRGB:
         case Tegra::RenderTargetFormat::RGB10_A2_UNORM:
             return ComponentType::UNorm;
+        case Tegra::RenderTargetFormat::RGBA16_FLOAT:
+        case Tegra::RenderTargetFormat::R11G11B10_FLOAT:
+            return ComponentType::Float;
         default:
             NGLOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));
             UNREACHABLE();
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 75822e750..94c6bc4b2 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -20,7 +20,6 @@ using Tegra::Shader::OpCode;
 using Tegra::Shader::Register;
 using Tegra::Shader::Sampler;
 using Tegra::Shader::SubOp;
-using Tegra::Shader::Uniform;
 
 constexpr u32 PROGRAM_END = MAX_PROGRAM_CODE_LENGTH;
 
@@ -88,6 +87,20 @@ private:
         return *subroutines.insert(std::move(subroutine)).first;
     }
 
+    /// Merges exit method of two parallel branches.
+    static ExitMethod ParallelExit(ExitMethod a, ExitMethod b) {
+        if (a == ExitMethod::Undetermined) {
+            return b;
+        }
+        if (b == ExitMethod::Undetermined) {
+            return a;
+        }
+        if (a == b) {
+            return a;
+        }
+        return ExitMethod::Conditional;
+    }
+
     /// Scans a range of code for labels and determines the exit method.
     ExitMethod Scan(u32 begin, u32 end, std::set<u32>& labels) {
         auto [iter, inserted] =
@@ -97,10 +110,27 @@ private:
             return exit_method;
 
         for (u32 offset = begin; offset != end && offset != PROGRAM_END; ++offset) {
-            if (const auto opcode = OpCode::Decode({program_code[offset]})) {
+            const Instruction instr = {program_code[offset]};
+            if (const auto opcode = OpCode::Decode(instr)) {
                 switch (opcode->GetId()) {
                 case OpCode::Id::EXIT: {
-                    return exit_method = ExitMethod::AlwaysEnd;
+                    // The EXIT instruction can be predicated, which means that the shader can
+                    // conditionally end on this instruction. We have to consider the case where the
+                    // condition is not met and check the exit method of that other basic block.
+                    using Tegra::Shader::Pred;
+                    if (instr.pred.pred_index == static_cast<u64>(Pred::UnusedIndex)) {
+                        return exit_method = ExitMethod::AlwaysEnd;
+                    } else {
+                        ExitMethod not_met = Scan(offset + 1, end, labels);
+                        return exit_method = ParallelExit(ExitMethod::AlwaysEnd, not_met);
+                    }
+                }
+                case OpCode::Id::BRA: {
+                    u32 target = offset + instr.bra.GetBranchTarget();
+                    labels.insert(target);
+                    ExitMethod no_jmp = Scan(offset + 1, end, labels);
+                    ExitMethod jmp = Scan(target, end, labels);
+                    return exit_method = ParallelExit(no_jmp, jmp);
                 }
                 }
             }
@@ -197,6 +227,11 @@ public:
         return active_type == Type::Integer;
     }
 
+    /// Returns the current active type of the register
+    Type GetActiveType() const {
+        return active_type;
+    }
+
     /// Returns the index of the register
     size_t GetIndex() const {
         return index;
@@ -299,7 +334,7 @@ public:
      * are stored as floats, so this may require conversion.
      * @param reg The destination register to use.
      * @param elem The element to use for the operation.
-     * @param attribute The input attibute to use as the source value.
+     * @param attribute The input attribute to use as the source value.
      */
     void SetRegisterToInputAttibute(const Register& reg, u64 elem, Attribute::Index attribute) {
         std::string dest = GetRegisterAsFloat(reg);
@@ -328,16 +363,29 @@ public:
         shader.AddLine(dest + " = " + src + ';');
     }
 
-    /// Generates code representing a uniform (C buffer) register.
-    std::string GetUniform(const Uniform& uniform, const Register& dest_reg) {
-        declr_const_buffers[uniform.index].MarkAsUsed(static_cast<unsigned>(uniform.index),
-                                                      static_cast<unsigned>(uniform.offset), stage);
-        std::string value =
-            'c' + std::to_string(uniform.index) + '[' + std::to_string(uniform.offset) + ']';
+    /// Generates code representing a uniform (C buffer) register, interpreted as the input type.
+    std::string GetUniform(u64 index, u64 offset, GLSLRegister::Type type) {
+        declr_const_buffers[index].MarkAsUsed(index, offset, stage);
+        std::string value = 'c' + std::to_string(index) + '[' + std::to_string(offset) + ']';
+
+        if (type == GLSLRegister::Type::Float) {
+            return value;
+        } else if (type == GLSLRegister::Type::Integer) {
+            return "floatBitsToInt(" + value + ')';
+        } else {
+            UNREACHABLE();
+        }
+    }
+
+    std::string GetUniformIndirect(u64 index, s64 offset, const Register& index_reg,
+                                   GLSLRegister::Type type) {
+        declr_const_buffers[index].MarkAsUsedIndirect(index, stage);
+        std::string value = 'c' + std::to_string(index) + "[(floatBitsToInt(" +
+                            GetRegister(index_reg, 0) + ") + " + std::to_string(offset) + ") / 4]";
 
-        if (regs[dest_reg].IsFloat()) {
+        if (type == GLSLRegister::Type::Float) {
             return value;
-        } else if (regs[dest_reg].IsInteger()) {
+        } else if (type == GLSLRegister::Type::Integer) {
             return "floatBitsToInt(" + value + ')';
         } else {
             UNREACHABLE();
@@ -383,6 +431,14 @@ public:
             ++const_buffer_layout;
         }
         declarations.AddNewLine();
+
+        // Append the sampler2D array for the used textures.
+        size_t num_samplers = GetSamplers().size();
+        if (num_samplers > 0) {
+            declarations.AddLine("uniform sampler2D " + SamplerEntry::GetArrayName(stage) + '[' +
+                                 std::to_string(num_samplers) + "];");
+            declarations.AddNewLine();
+        }
     }
 
     /// Returns a list of constant buffer declarations
@@ -393,6 +449,32 @@ public:
         return result;
     }
 
+    /// Returns a list of samplers used in the shader
+    std::vector<SamplerEntry> GetSamplers() const {
+        return used_samplers;
+    }
+
+    /// Returns the GLSL sampler used for the input shader sampler, and creates a new one if
+    /// necessary.
+    std::string AccessSampler(const Sampler& sampler) {
+        size_t offset = static_cast<size_t>(sampler.index.Value());
+
+        // If this sampler has already been used, return the existing mapping.
+        auto itr =
+            std::find_if(used_samplers.begin(), used_samplers.end(),
+                         [&](const SamplerEntry& entry) { return entry.GetOffset() == offset; });
+
+        if (itr != used_samplers.end()) {
+            return itr->GetName();
+        }
+
+        // Otherwise create a new mapping for this sampler
+        size_t next_index = used_samplers.size();
+        SamplerEntry entry{stage, offset, next_index};
+        used_samplers.emplace_back(entry);
+        return entry.GetName();
+    }
+
 private:
     /// Build GLSL conversion function, e.g. floatBitsToInt, intBitsToFloat, etc.
     const std::string GetGLSLConversionFunc(GLSLRegister::Type src, GLSLRegister::Type dest) const {
@@ -451,6 +533,12 @@ private:
         switch (attribute) {
         case Attribute::Index::Position:
             return "position";
+        case Attribute::Index::TessCoordInstanceIDVertexID:
+            // TODO(Subv): Find out what the values are for the first two elements when inside a
+            // vertex shader, and what's the value of the fourth element when inside a Tess Eval
+            // shader.
+            ASSERT(stage == Maxwell3D::Regs::ShaderStage::Vertex);
+            return "vec4(0, 0, gl_InstanceID, gl_VertexID)";
         default:
             const u32 index{static_cast<u32>(attribute) -
                             static_cast<u32>(Attribute::Index::Attribute_0)};
@@ -496,6 +584,7 @@ private:
     std::set<Attribute::Index> declr_input_attribute;
     std::set<Attribute::Index> declr_output_attribute;
     std::array<ConstBufferEntry, Maxwell3D::Regs::MaxConstBuffers> declr_const_buffers;
+    std::vector<SamplerEntry> used_samplers;
     const Maxwell3D::Regs::ShaderStage& stage;
 };
 
@@ -515,7 +604,7 @@ public:
 
     /// Returns entries in the shader that are useful for external functions
     ShaderEntries GetEntries() const {
-        return {regs.GetConstBuffersDeclarations()};
+        return {regs.GetConstBuffersDeclarations(), regs.GetSamplers()};
     }
 
 private:
@@ -537,12 +626,8 @@ private:
     }
 
     /// Generates code representing a texture sampler.
-    std::string GetSampler(const Sampler& sampler) const {
-        // TODO(Subv): Support more than just texture sampler 0
-        ASSERT_MSG(sampler.index == Sampler::Index::Sampler_0, "unsupported");
-        const unsigned index{static_cast<unsigned>(sampler.index.Value()) -
-                             static_cast<unsigned>(Sampler::Index::Sampler_0)};
-        return "tex[" + std::to_string(index) + ']';
+    std::string GetSampler(const Sampler& sampler) {
+        return regs.AccessSampler(sampler);
     }
 
     /**
@@ -606,9 +691,9 @@ private:
     std::string GetPredicateComparison(Tegra::Shader::PredCondition condition) const {
         using Tegra::Shader::PredCondition;
         static const std::unordered_map<PredCondition, const char*> PredicateComparisonStrings = {
-            {PredCondition::LessThan, "<"},      {PredCondition::Equal, "=="},
-            {PredCondition::LessEqual, "<="},    {PredCondition::GreaterThan, ">"},
-            {PredCondition::GreaterEqual, ">="},
+            {PredCondition::LessThan, "<"},   {PredCondition::Equal, "=="},
+            {PredCondition::LessEqual, "<="}, {PredCondition::GreaterThan, ">"},
+            {PredCondition::NotEqual, "!="},  {PredCondition::GreaterEqual, ">="},
         };
 
         auto comparison = PredicateComparisonStrings.find(condition);
@@ -699,7 +784,8 @@ private:
                 if (instr.is_b_gpr) {
                     op_b += regs.GetRegisterAsFloat(instr.gpr20);
                 } else {
-                    op_b += regs.GetUniform(instr.uniform, instr.gpr0);
+                    op_b += regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset,
+                                            GLSLRegister::Type::Float);
                 }
             }
 
@@ -786,8 +872,13 @@ private:
                                         1, 1);
                 break;
             }
-            case OpCode::Id::RRO: {
-                NGLOG_DEBUG(HW_GPU, "Skipping RRO instruction");
+            case OpCode::Id::RRO_C:
+            case OpCode::Id::RRO_R:
+            case OpCode::Id::RRO_IMM: {
+                // Currently RRO is only implemented as a register move.
+                // Usage of `abs_b` and `negate_b` here should also be correct.
+                regs.SetRegisterToFloat(instr.gpr0, 0, op_b, 1, 1);
+                NGLOG_WARNING(HW_GPU, "RRO instruction is incomplete");
                 break;
             }
             default: {
@@ -797,6 +888,129 @@ private:
             }
             break;
         }
+        case OpCode::Type::Bfe: {
+            ASSERT_MSG(!instr.bfe.negate_b, "Unimplemented");
+
+            std::string op_a = instr.bfe.negate_a ? "-" : "";
+            op_a += regs.GetRegisterAsInteger(instr.gpr8);
+
+            switch (opcode->GetId()) {
+            case OpCode::Id::BFE_IMM: {
+                std::string inner_shift =
+                    '(' + op_a + " << " + std::to_string(instr.bfe.GetLeftShiftValue()) + ')';
+                std::string outer_shift =
+                    '(' + inner_shift + " >> " +
+                    std::to_string(instr.bfe.GetLeftShiftValue() + instr.bfe.shift_position) + ')';
+
+                regs.SetRegisterToInteger(instr.gpr0, true, 0, outer_shift, 1, 1);
+                break;
+            }
+            default: {
+                NGLOG_CRITICAL(HW_GPU, "Unhandled BFE instruction: {}", opcode->GetName());
+                UNREACHABLE();
+            }
+            }
+
+            break;
+        }
+        case OpCode::Type::Logic: {
+            std::string op_a = regs.GetRegisterAsInteger(instr.gpr8, 0, true);
+
+            if (instr.alu.lop.invert_a)
+                op_a = "~(" + op_a + ')';
+
+            switch (opcode->GetId()) {
+            case OpCode::Id::LOP32I: {
+                u32 imm = static_cast<u32>(instr.alu.imm20_32.Value());
+
+                if (instr.alu.lop.invert_b)
+                    imm = ~imm;
+
+                switch (instr.alu.lop.operation) {
+                case Tegra::Shader::LogicOperation::And: {
+                    regs.SetRegisterToInteger(instr.gpr0, true, 0,
+                                              '(' + op_a + " & " + std::to_string(imm) + ')', 1, 1);
+                    break;
+                }
+                case Tegra::Shader::LogicOperation::Or: {
+                    regs.SetRegisterToInteger(instr.gpr0, true, 0,
+                                              '(' + op_a + " | " + std::to_string(imm) + ')', 1, 1);
+                    break;
+                }
+                case Tegra::Shader::LogicOperation::Xor: {
+                    regs.SetRegisterToInteger(instr.gpr0, true, 0,
+                                              '(' + op_a + " ^ " + std::to_string(imm) + ')', 1, 1);
+                    break;
+                }
+                default:
+                    NGLOG_CRITICAL(HW_GPU, "Unimplemented lop32i operation: {}",
+                                   static_cast<u32>(instr.alu.lop.operation.Value()));
+                    UNREACHABLE();
+                }
+                break;
+            }
+            default: {
+                NGLOG_CRITICAL(HW_GPU, "Unhandled logic instruction: {}", opcode->GetName());
+                UNREACHABLE();
+            }
+            }
+            break;
+        }
+
+        case OpCode::Type::Shift: {
+            std::string op_a = regs.GetRegisterAsInteger(instr.gpr8, 0, true);
+            std::string op_b;
+
+            if (instr.is_b_imm) {
+                op_b += '(' + std::to_string(instr.alu.GetSignedImm20_20()) + ')';
+            } else {
+                if (instr.is_b_gpr) {
+                    op_b += regs.GetRegisterAsInteger(instr.gpr20);
+                } else {
+                    op_b += regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset,
+                                            GLSLRegister::Type::Integer);
+                }
+            }
+
+            switch (opcode->GetId()) {
+            case OpCode::Id::SHL_C:
+            case OpCode::Id::SHL_R:
+            case OpCode::Id::SHL_IMM:
+                regs.SetRegisterToInteger(instr.gpr0, true, 0, op_a + " << " + op_b, 1, 1);
+                break;
+            default: {
+                NGLOG_CRITICAL(HW_GPU, "Unhandled shift instruction: {}", opcode->GetName());
+                UNREACHABLE();
+            }
+            }
+            break;
+        }
+
+        case OpCode::Type::ScaledAdd: {
+            std::string op_a = regs.GetRegisterAsInteger(instr.gpr8);
+
+            if (instr.iscadd.negate_a)
+                op_a = '-' + op_a;
+
+            std::string op_b = instr.iscadd.negate_b ? "-" : "";
+
+            if (instr.is_b_imm) {
+                op_b += '(' + std::to_string(instr.alu.GetSignedImm20_20()) + ')';
+            } else {
+                if (instr.is_b_gpr) {
+                    op_b += regs.GetRegisterAsInteger(instr.gpr20);
+                } else {
+                    op_b += regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset,
+                                            GLSLRegister::Type::Integer);
+                }
+            }
+
+            std::string shift = std::to_string(instr.iscadd.shift_amount.Value());
+
+            regs.SetRegisterToInteger(instr.gpr0, true, 0,
+                                      "((" + op_a + " << " + shift + ") + " + op_b + ')', 1, 1);
+            break;
+        }
         case OpCode::Type::Ffma: {
             std::string op_a = regs.GetRegisterAsFloat(instr.gpr8);
             std::string op_b = instr.ffma.negate_b ? "-" : "";
@@ -804,7 +1018,8 @@ private:
 
             switch (opcode->GetId()) {
             case OpCode::Id::FFMA_CR: {
-                op_b += regs.GetUniform(instr.uniform, instr.gpr0);
+                op_b += regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset,
+                                        GLSLRegister::Type::Float);
                 op_c += regs.GetRegisterAsFloat(instr.gpr39);
                 break;
             }
@@ -815,7 +1030,8 @@ private:
             }
             case OpCode::Id::FFMA_RC: {
                 op_b += regs.GetRegisterAsFloat(instr.gpr39);
-                op_c += regs.GetUniform(instr.uniform, instr.gpr0);
+                op_c += regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset,
+                                        GLSLRegister::Type::Float);
                 break;
             }
             case OpCode::Id::FFMA_IMM: {
@@ -834,21 +1050,99 @@ private:
         }
         case OpCode::Type::Conversion: {
             ASSERT_MSG(instr.conversion.size == Register::Size::Word, "Unimplemented");
-            ASSERT_MSG(!instr.conversion.selector, "Unimplemented");
             ASSERT_MSG(!instr.conversion.negate_a, "Unimplemented");
             ASSERT_MSG(!instr.conversion.saturate_a, "Unimplemented");
 
             switch (opcode->GetId()) {
-            case OpCode::Id::I2I_R:
+            case OpCode::Id::I2I_R: {
+                ASSERT_MSG(!instr.conversion.selector, "Unimplemented");
+
+                std::string op_a =
+                    regs.GetRegisterAsInteger(instr.gpr20, 0, instr.conversion.is_input_signed);
+
+                if (instr.conversion.abs_a) {
+                    op_a = "abs(" + op_a + ')';
+                }
+
+                regs.SetRegisterToInteger(instr.gpr0, instr.conversion.is_output_signed, 0, op_a, 1,
+                                          1);
+                break;
+            }
             case OpCode::Id::I2F_R: {
+                ASSERT_MSG(!instr.conversion.selector, "Unimplemented");
                 std::string op_a =
-                    regs.GetRegisterAsInteger(instr.gpr20, 0, instr.conversion.is_signed);
+                    regs.GetRegisterAsInteger(instr.gpr20, 0, instr.conversion.is_input_signed);
+
+                if (instr.conversion.abs_a) {
+                    op_a = "abs(" + op_a + ')';
+                }
+
+                regs.SetRegisterToFloat(instr.gpr0, 0, op_a, 1, 1);
+                break;
+            }
+            case OpCode::Id::F2F_R: {
+                std::string op_a = regs.GetRegisterAsFloat(instr.gpr20);
+
+                switch (instr.conversion.f2f.rounding) {
+                case Tegra::Shader::F2fRoundingOp::None:
+                    break;
+                case Tegra::Shader::F2fRoundingOp::Floor:
+                    op_a = "floor(" + op_a + ')';
+                    break;
+                case Tegra::Shader::F2fRoundingOp::Ceil:
+                    op_a = "ceil(" + op_a + ')';
+                    break;
+                case Tegra::Shader::F2fRoundingOp::Trunc:
+                    op_a = "trunc(" + op_a + ')';
+                    break;
+                default:
+                    NGLOG_CRITICAL(HW_GPU, "Unimplemented f2f rounding mode {}",
+                                   static_cast<u32>(instr.conversion.f2f.rounding.Value()));
+                    UNREACHABLE();
+                    break;
+                }
+
+                if (instr.conversion.abs_a) {
+                    op_a = "abs(" + op_a + ')';
+                }
+
+                regs.SetRegisterToFloat(instr.gpr0, 0, op_a, 1, 1);
+                break;
+            }
+            case OpCode::Id::F2I_R: {
+                std::string op_a = regs.GetRegisterAsFloat(instr.gpr20);
 
                 if (instr.conversion.abs_a) {
                     op_a = "abs(" + op_a + ')';
                 }
 
-                regs.SetRegisterToInteger(instr.gpr0, instr.conversion.is_signed, 0, op_a, 1, 1);
+                switch (instr.conversion.f2i.rounding) {
+                case Tegra::Shader::F2iRoundingOp::None:
+                    break;
+                case Tegra::Shader::F2iRoundingOp::Floor:
+                    op_a = "floor(" + op_a + ')';
+                    break;
+                case Tegra::Shader::F2iRoundingOp::Ceil:
+                    op_a = "ceil(" + op_a + ')';
+                    break;
+                case Tegra::Shader::F2iRoundingOp::Trunc:
+                    op_a = "trunc(" + op_a + ')';
+                    break;
+                default:
+                    NGLOG_CRITICAL(HW_GPU, "Unimplemented f2i rounding mode {}",
+                                   static_cast<u32>(instr.conversion.f2i.rounding.Value()));
+                    UNREACHABLE();
+                    break;
+                }
+
+                if (instr.conversion.is_output_signed) {
+                    op_a = "int(" + op_a + ')';
+                } else {
+                    op_a = "uint(" + op_a + ')';
+                }
+
+                regs.SetRegisterToInteger(instr.gpr0, instr.conversion.is_output_signed, 0, op_a, 1,
+                                          1);
                 break;
             }
             default: {
@@ -859,25 +1153,49 @@ private:
             break;
         }
         case OpCode::Type::Memory: {
-            const Attribute::Index attribute = instr.attribute.fmt20.index;
-
             switch (opcode->GetId()) {
             case OpCode::Id::LD_A: {
                 ASSERT_MSG(instr.attribute.fmt20.size == 0, "untested");
                 regs.SetRegisterToInputAttibute(instr.gpr0, instr.attribute.fmt20.element,
-                                                attribute);
+                                                instr.attribute.fmt20.index);
+                break;
+            }
+            case OpCode::Id::LD_C: {
+                ASSERT_MSG(instr.ld_c.unknown == 0, "Unimplemented");
+
+                std::string op_a =
+                    regs.GetUniformIndirect(instr.cbuf36.index, instr.cbuf36.offset + 0, instr.gpr8,
+                                            GLSLRegister::Type::Float);
+                std::string op_b =
+                    regs.GetUniformIndirect(instr.cbuf36.index, instr.cbuf36.offset + 4, instr.gpr8,
+                                            GLSLRegister::Type::Float);
+
+                switch (instr.ld_c.type.Value()) {
+                case Tegra::Shader::UniformType::Single:
+                    regs.SetRegisterToFloat(instr.gpr0, 0, op_a, 1, 1);
+                    break;
+
+                case Tegra::Shader::UniformType::Double:
+                    regs.SetRegisterToFloat(instr.gpr0, 0, op_a, 1, 1);
+                    regs.SetRegisterToFloat(instr.gpr0.Value() + 1, 0, op_b, 1, 1);
+                    break;
+
+                default:
+                    NGLOG_CRITICAL(HW_GPU, "Unhandled type: {}",
+                                   static_cast<unsigned>(instr.ld_c.type.Value()));
+                    UNREACHABLE();
+                }
                 break;
             }
             case OpCode::Id::ST_A: {
                 ASSERT_MSG(instr.attribute.fmt20.size == 0, "untested");
-                regs.SetOutputAttributeToRegister(attribute, instr.attribute.fmt20.element,
-                                                  instr.gpr0);
+                regs.SetOutputAttributeToRegister(instr.attribute.fmt20.index,
+                                                  instr.attribute.fmt20.element, instr.gpr0);
                 break;
             }
-            case OpCode::Id::TEXS: {
-                ASSERT_MSG(instr.attribute.fmt20.size == 4, "untested");
+            case OpCode::Id::TEX: {
                 const std::string op_a = regs.GetRegisterAsFloat(instr.gpr8);
-                const std::string op_b = regs.GetRegisterAsFloat(instr.gpr20);
+                const std::string op_b = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
                 const std::string sampler = GetSampler(instr.sampler);
                 const std::string coord = "vec2 coords = vec2(" + op_a + ", " + op_b + ");";
                 // Add an extra scope and declare the texture coords inside to prevent overwriting
@@ -886,8 +1204,51 @@ private:
                 ++shader.scope;
                 shader.AddLine(coord);
                 const std::string texture = "texture(" + sampler + ", coords)";
-                for (unsigned elem = 0; elem < instr.attribute.fmt20.size; ++elem) {
-                    regs.SetRegisterToFloat(instr.gpr0, elem, texture, 1, 4, false, elem);
+
+                size_t dest_elem{};
+                for (size_t elem = 0; elem < 4; ++elem) {
+                    if (!instr.tex.IsComponentEnabled(elem)) {
+                        // Skip disabled components
+                        continue;
+                    }
+                    regs.SetRegisterToFloat(instr.gpr0, elem, texture, 1, 4, false, dest_elem);
+                    ++dest_elem;
+                }
+                --shader.scope;
+                shader.AddLine("}");
+                break;
+            }
+            case OpCode::Id::TEXS: {
+                const std::string op_a = regs.GetRegisterAsFloat(instr.gpr8);
+                const std::string op_b = regs.GetRegisterAsFloat(instr.gpr20);
+                const std::string sampler = GetSampler(instr.sampler);
+                const std::string coord = "vec2 coords = vec2(" + op_a + ", " + op_b + ");";
+                // Add an extra scope and declare the texture coords inside to prevent
+                // overwriting them in case they are used as outputs of the texs instruction.
+                shader.AddLine("{");
+                ++shader.scope;
+                shader.AddLine(coord);
+                const std::string texture = "texture(" + sampler + ", coords)";
+
+                // TEXS has two destination registers. RG goes into gpr0+0 and gpr0+1, and BA goes
+                // into gpr28+0 and gpr28+1
+                size_t offset{};
+
+                for (const auto& dest : {instr.gpr0.Value(), instr.gpr28.Value()}) {
+                    for (unsigned elem = 0; elem < 2; ++elem) {
+                        if (!instr.texs.IsComponentEnabled(elem)) {
+                            // Skip disabled components
+                            continue;
+                        }
+                        regs.SetRegisterToFloat(dest, elem + offset, texture, 1, 4, false, elem);
+                    }
+
+                    if (!instr.texs.HasTwoDestinations()) {
+                        // Skip the second destination
+                        break;
+                    }
+
+                    offset += 2;
                 }
                 --shader.scope;
                 shader.AddLine("}");
@@ -920,7 +1281,8 @@ private:
                 if (instr.is_b_gpr) {
                     op_b += regs.GetRegisterAsFloat(instr.gpr20);
                 } else {
-                    op_b += regs.GetUniform(instr.uniform, instr.gpr0);
+                    op_b += regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset,
+                                            GLSLRegister::Type::Float);
                 }
             }
 
@@ -944,13 +1306,51 @@ private:
                          '(' + predicate + ") " + combiner + " (" + second_pred + ')');
 
             if (instr.fsetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) {
-                // Set the secondary predicate to the result of !Predicate OP SecondPredicate, if
-                // enabled
+                // Set the secondary predicate to the result of !Predicate OP SecondPredicate,
+                // if enabled
                 SetPredicate(instr.fsetp.pred0,
                              "!(" + predicate + ") " + combiner + " (" + second_pred + ')');
             }
             break;
         }
+        case OpCode::Type::IntegerSetPredicate: {
+            std::string op_a = regs.GetRegisterAsInteger(instr.gpr8, 0, instr.isetp.is_signed);
+            std::string op_b;
+
+            if (instr.is_b_imm) {
+                op_b += '(' + std::to_string(instr.alu.GetSignedImm20_20()) + ')';
+            } else {
+                if (instr.is_b_gpr) {
+                    op_b += regs.GetRegisterAsInteger(instr.gpr20, 0, instr.isetp.is_signed);
+                } else {
+                    op_b += regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset,
+                                            GLSLRegister::Type::Integer);
+                }
+            }
+
+            using Tegra::Shader::Pred;
+            // We can't use the constant predicate as destination.
+            ASSERT(instr.isetp.pred3 != static_cast<u64>(Pred::UnusedIndex));
+
+            std::string second_pred =
+                GetPredicateCondition(instr.isetp.pred39, instr.isetp.neg_pred != 0);
+
+            std::string comparator = GetPredicateComparison(instr.isetp.cond);
+            std::string combiner = GetPredicateCombiner(instr.isetp.op);
+
+            std::string predicate = '(' + op_a + ") " + comparator + " (" + op_b + ')';
+            // Set the primary predicate to the result of Predicate OP SecondPredicate
+            SetPredicate(instr.isetp.pred3,
+                         '(' + predicate + ") " + combiner + " (" + second_pred + ')');
+
+            if (instr.isetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) {
+                // Set the secondary predicate to the result of !Predicate OP SecondPredicate,
+                // if enabled
+                SetPredicate(instr.isetp.pred0,
+                             "!(" + predicate + ") " + combiner + " (" + second_pred + ')');
+            }
+            break;
+        }
         case OpCode::Type::FloatSet: {
             std::string op_a = instr.fset.neg_a ? "-" : "";
             op_a += regs.GetRegisterAsFloat(instr.gpr8);
@@ -971,7 +1371,8 @@ private:
                 if (instr.is_b_gpr) {
                     op_b += regs.GetRegisterAsFloat(instr.gpr20);
                 } else {
-                    op_b += regs.GetUniform(instr.uniform, instr.gpr0);
+                    op_b += regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset,
+                                            GLSLRegister::Type::Float);
                 }
             }
 
@@ -990,15 +1391,17 @@ private:
             std::string predicate = "(((" + op_a + ") " + comparator + " (" + op_b + ")) " +
                                     combiner + " (" + second_pred + "))";
 
-            regs.SetRegisterToFloat(instr.gpr0, 0, predicate + " ? 1.0 : 0.0", 1, 1);
+            if (instr.fset.bf) {
+                regs.SetRegisterToFloat(instr.gpr0, 0, predicate + " ? 1.0 : 0.0", 1, 1);
+            } else {
+                regs.SetRegisterToInteger(instr.gpr0, false, 0, predicate + " ? 0xFFFFFFFF : 0", 1,
+                                          1);
+            }
             break;
         }
         default: {
             switch (opcode->GetId()) {
             case OpCode::Id::EXIT: {
-                ASSERT_MSG(instr.pred.pred_index == static_cast<u64>(Pred::UnusedIndex),
-                           "Predicated exits not implemented");
-
                 // Final color output is currently hardcoded to GPR0-3 for fragment shaders
                 if (stage == Maxwell3D::Regs::ShaderStage::Fragment) {
                     shader.AddLine("color.r = " + regs.GetRegisterAsFloat(0) + ';');
@@ -1008,13 +1411,25 @@ private:
                 }
 
                 shader.AddLine("return true;");
-                offset = PROGRAM_END - 1;
+                if (instr.pred.pred_index == static_cast<u64>(Pred::UnusedIndex)) {
+                    // If this is an unconditional exit then just end processing here, otherwise we
+                    // have to account for the possibility of the condition not being met, so
+                    // continue processing the next instruction.
+                    offset = PROGRAM_END - 1;
+                }
                 break;
             }
             case OpCode::Id::KIL: {
                 shader.AddLine("discard;");
                 break;
             }
+            case OpCode::Id::BRA: {
+                ASSERT_MSG(instr.bra.constant_buffer == 0,
+                           "BRA with constant buffers are not implemented");
+                u32 target = offset + instr.bra.GetBranchTarget();
+                shader.AddLine("{ jmp_to = " + std::to_string(target) + "u; break; }");
+                break;
+            }
             case OpCode::Id::IPA: {
                 const auto& attribute = instr.attribute.fmt28;
                 regs.SetRegisterToInputAttibute(instr.gpr0, attribute.element, attribute.index);
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp
index 254f6e2c3..b88d592b7 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp
@@ -62,8 +62,6 @@ layout (std140) uniform fs_config {
     vec4 viewport_flip;
 };
 
-uniform sampler2D tex[32];
-
 void main() {
     exec_shader();
 }
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.h b/src/video_core/renderer_opengl/gl_shader_gen.h
index 458032b5c..ed890e0f9 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.h
+++ b/src/video_core/renderer_opengl/gl_shader_gen.h
@@ -22,17 +22,28 @@ class ConstBufferEntry {
     using Maxwell = Tegra::Engines::Maxwell3D::Regs;
 
 public:
-    void MarkAsUsed(unsigned index, unsigned offset, Maxwell::ShaderStage stage) {
+    void MarkAsUsed(u64 index, u64 offset, Maxwell::ShaderStage stage) {
         is_used = true;
-        this->index = index;
+        this->index = static_cast<unsigned>(index);
+        this->stage = stage;
+        max_offset = std::max(max_offset, static_cast<unsigned>(offset));
+    }
+
+    void MarkAsUsedIndirect(u64 index, Maxwell::ShaderStage stage) {
+        is_used = true;
+        is_indirect = true;
+        this->index = static_cast<unsigned>(index);
         this->stage = stage;
-        max_offset = std::max(max_offset, offset);
     }
 
     bool IsUsed() const {
         return is_used;
     }
 
+    bool IsIndirect() const {
+        return is_indirect;
+    }
+
     unsigned GetIndex() const {
         return index;
     }
@@ -51,13 +62,54 @@ private:
     };
 
     bool is_used{};
+    bool is_indirect{};
     unsigned index{};
     unsigned max_offset{};
     Maxwell::ShaderStage stage;
 };
 
+class SamplerEntry {
+    using Maxwell = Tegra::Engines::Maxwell3D::Regs;
+
+public:
+    SamplerEntry(Maxwell::ShaderStage stage, size_t offset, size_t index)
+        : offset(offset), stage(stage), sampler_index(index) {}
+
+    size_t GetOffset() const {
+        return offset;
+    }
+
+    size_t GetIndex() const {
+        return sampler_index;
+    }
+
+    Maxwell::ShaderStage GetStage() const {
+        return stage;
+    }
+
+    std::string GetName() const {
+        return std::string(TextureSamplerNames[static_cast<size_t>(stage)]) + '[' +
+               std::to_string(sampler_index) + ']';
+    }
+
+    static std::string GetArrayName(Maxwell::ShaderStage stage) {
+        return TextureSamplerNames[static_cast<size_t>(stage)];
+    }
+
+private:
+    static constexpr std::array<const char*, Maxwell::MaxShaderStage> TextureSamplerNames = {
+        "tex_vs", "tex_tessc", "tex_tesse", "tex_gs", "tex_fs",
+    };
+    /// Offset in TSC memory from which to read the sampler object, as specified by the sampling
+    /// instruction.
+    size_t offset;
+    Maxwell::ShaderStage stage; ///< Shader stage where this sampler was used.
+    size_t sampler_index;       ///< Value used to index into the generated GLSL sampler array.
+};
+
 struct ShaderEntries {
     std::vector<ConstBufferEntry> const_buffer_entries;
+    std::vector<SamplerEntry> texture_samplers;
 };
 
 using ProgramResult = std::pair<std::string, ShaderEntries>;
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp
index ccdfc2718..7c00beb33 100644
--- a/src/video_core/renderer_opengl/gl_shader_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp
@@ -32,25 +32,6 @@ void SetShaderUniformBlockBindings(GLuint shader) {
                                  sizeof(MaxwellUniformData));
 }
 
-void SetShaderSamplerBindings(GLuint shader) {
-    OpenGLState cur_state = OpenGLState::GetCurState();
-    GLuint old_program = std::exchange(cur_state.draw.shader_program, shader);
-    cur_state.Apply();
-
-    // Set the texture samplers to correspond to different texture units
-    for (u32 texture = 0; texture < NumTextureSamplers; ++texture) {
-        // Set the texture samplers to correspond to different texture units
-        std::string uniform_name = "tex[" + std::to_string(texture) + "]";
-        GLint uniform_tex = glGetUniformLocation(shader, uniform_name.c_str());
-        if (uniform_tex != -1) {
-            glUniform1i(uniform_tex, TextureUnits::MaxwellTexture(texture).id);
-        }
-    }
-
-    cur_state.draw.shader_program = old_program;
-    cur_state.Apply();
-}
-
 } // namespace Impl
 
 void MaxwellUniformData::SetFromRegs(const Maxwell3D::State::ShaderStageInfo& shader_stage) {
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h
index e963b4b7e..4295c20a6 100644
--- a/src/video_core/renderer_opengl/gl_shader_manager.h
+++ b/src/video_core/renderer_opengl/gl_shader_manager.h
@@ -45,7 +45,6 @@ public:
         shader.Create(program_result.first.c_str(), type);
         program.Create(true, shader.handle);
         Impl::SetShaderUniformBlockBindings(program.handle);
-        Impl::SetShaderSamplerBindings(program.handle);
         entries = program_result.second;
     }
     GLuint GetHandle() const {
diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp
index f91dfe36a..44f0c8a01 100644
--- a/src/video_core/renderer_opengl/gl_state.cpp
+++ b/src/video_core/renderer_opengl/gl_state.cpp
@@ -50,6 +50,10 @@ OpenGLState::OpenGLState() {
     for (auto& texture_unit : texture_units) {
         texture_unit.texture_2d = 0;
         texture_unit.sampler = 0;
+        texture_unit.swizzle.r = GL_RED;
+        texture_unit.swizzle.g = GL_GREEN;
+        texture_unit.swizzle.b = GL_BLUE;
+        texture_unit.swizzle.a = GL_ALPHA;
     }
 
     lighting_lut.texture_buffer = 0;
@@ -200,6 +204,15 @@ void OpenGLState::Apply() const {
         if (texture_units[i].sampler != cur_state.texture_units[i].sampler) {
             glBindSampler(i, texture_units[i].sampler);
         }
+        // Update the texture swizzle
+        if (texture_units[i].swizzle.r != cur_state.texture_units[i].swizzle.r ||
+            texture_units[i].swizzle.g != cur_state.texture_units[i].swizzle.g ||
+            texture_units[i].swizzle.b != cur_state.texture_units[i].swizzle.b ||
+            texture_units[i].swizzle.a != cur_state.texture_units[i].swizzle.a) {
+            std::array<GLint, 4> mask = {texture_units[i].swizzle.r, texture_units[i].swizzle.g,
+                                         texture_units[i].swizzle.b, texture_units[i].swizzle.a};
+            glTexParameteriv(GL_TEXTURE_2D, GL_TEXTURE_SWIZZLE_RGBA, mask.data());
+        }
     }
 
     // Constbuffers
diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h
index 75c08e645..839e50e93 100644
--- a/src/video_core/renderer_opengl/gl_state.h
+++ b/src/video_core/renderer_opengl/gl_state.h
@@ -85,6 +85,12 @@ public:
     struct {
         GLuint texture_2d; // GL_TEXTURE_BINDING_2D
         GLuint sampler;    // GL_SAMPLER_BINDING
+        struct {
+            GLint r; // GL_TEXTURE_SWIZZLE_R
+            GLint g; // GL_TEXTURE_SWIZZLE_G
+            GLint b; // GL_TEXTURE_SWIZZLE_B
+            GLint a; // GL_TEXTURE_SWIZZLE_A
+        } swizzle;
     } texture_units[32];
 
     struct {
diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h
index a630610d8..2155fb019 100644
--- a/src/video_core/renderer_opengl/maxwell_to_gl.h
+++ b/src/video_core/renderer_opengl/maxwell_to_gl.h
@@ -100,6 +100,8 @@ inline GLenum WrapMode(Tegra::Texture::WrapMode wrap_mode) {
     switch (wrap_mode) {
     case Tegra::Texture::WrapMode::Wrap:
         return GL_REPEAT;
+    case Tegra::Texture::WrapMode::Mirror:
+        return GL_MIRRORED_REPEAT;
     case Tegra::Texture::WrapMode::ClampToEdge:
         return GL_CLAMP_TO_EDGE;
     case Tegra::Texture::WrapMode::ClampOGL:
@@ -178,4 +180,25 @@ inline GLenum BlendFunc(Maxwell::Blend::Factor factor) {
     return {};
 }
 
+inline GLenum SwizzleSource(Tegra::Texture::SwizzleSource source) {
+    switch (source) {
+    case Tegra::Texture::SwizzleSource::Zero:
+        return GL_ZERO;
+    case Tegra::Texture::SwizzleSource::R:
+        return GL_RED;
+    case Tegra::Texture::SwizzleSource::G:
+        return GL_GREEN;
+    case Tegra::Texture::SwizzleSource::B:
+        return GL_BLUE;
+    case Tegra::Texture::SwizzleSource::A:
+        return GL_ALPHA;
+    case Tegra::Texture::SwizzleSource::OneInt:
+    case Tegra::Texture::SwizzleSource::OneFloat:
+        return GL_ONE;
+    }
+    NGLOG_CRITICAL(Render_OpenGL, "Unimplemented swizzle source={}", static_cast<u32>(source));
+    UNREACHABLE();
+    return {};
+}
+
 } // namespace MaxwellToGL
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index 3440d2190..f33766bfd 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -316,6 +316,7 @@ void RendererOpenGL::DrawScreenTriangles(const ScreenInfo& screen_info, float x,
     }};
 
     state.texture_units[0].texture_2d = screen_info.display_texture;
+    state.texture_units[0].swizzle = {GL_RED, GL_GREEN, GL_BLUE, GL_ALPHA};
     state.Apply();
 
     glBufferSubData(GL_ARRAY_BUFFER, 0, sizeof(vertices), vertices.data());
diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp
index ceb760e0f..7bf9c4c4b 100644
--- a/src/video_core/textures/decoders.cpp
+++ b/src/video_core/textures/decoders.cpp
@@ -46,6 +46,7 @@ void CopySwizzledData(u32 width, u32 height, u32 bytes_per_pixel, u32 out_bytes_
 u32 BytesPerPixel(TextureFormat format) {
     switch (format) {
     case TextureFormat::DXT1:
+    case TextureFormat::DXN1:
         // In this case a 'pixel' actually refers to a 4x4 tile.
         return 8;
     case TextureFormat::DXT23:
@@ -54,10 +55,15 @@ u32 BytesPerPixel(TextureFormat format) {
         return 16;
     case TextureFormat::A8R8G8B8:
     case TextureFormat::A2B10G10R10:
+    case TextureFormat::BF10GF11RF11:
         return 4;
     case TextureFormat::A1B5G5R5:
     case TextureFormat::B5G6R5:
         return 2;
+    case TextureFormat::R8:
+        return 1;
+    case TextureFormat::R16_G16_B16_A16:
+        return 8;
     default:
         UNIMPLEMENTED_MSG("Format not implemented");
         break;
@@ -75,7 +81,9 @@ std::vector<u8> UnswizzleTexture(VAddr address, TextureFormat format, u32 width,
     case TextureFormat::DXT1:
     case TextureFormat::DXT23:
     case TextureFormat::DXT45:
-        // In the DXT formats, each 4x4 tile is swizzled instead of just individual pixel values.
+    case TextureFormat::DXN1:
+        // In the DXT and DXN formats, each 4x4 tile is swizzled instead of just individual pixel
+        // values.
         CopySwizzledData(width / 4, height / 4, bytes_per_pixel, bytes_per_pixel, data,
                          unswizzled_data.data(), true, block_height);
         break;
@@ -83,6 +91,9 @@ std::vector<u8> UnswizzleTexture(VAddr address, TextureFormat format, u32 width,
     case TextureFormat::A2B10G10R10:
     case TextureFormat::A1B5G5R5:
     case TextureFormat::B5G6R5:
+    case TextureFormat::R8:
+    case TextureFormat::R16_G16_B16_A16:
+    case TextureFormat::BF10GF11RF11:
         CopySwizzledData(width, height, bytes_per_pixel, bytes_per_pixel, data,
                          unswizzled_data.data(), true, block_height);
         break;
@@ -103,10 +114,13 @@ std::vector<u8> DecodeTexture(const std::vector<u8>& texture_data, TextureFormat
     case TextureFormat::DXT1:
     case TextureFormat::DXT23:
     case TextureFormat::DXT45:
+    case TextureFormat::DXN1:
     case TextureFormat::A8R8G8B8:
     case TextureFormat::A2B10G10R10:
     case TextureFormat::A1B5G5R5:
     case TextureFormat::B5G6R5:
+    case TextureFormat::R8:
+    case TextureFormat::BF10GF11RF11:
         // TODO(Subv): For the time being just forward the same data without any decoding.
         rgba_data = texture_data;
         break;
diff --git a/src/video_core/textures/texture.h b/src/video_core/textures/texture.h
index b33e9bab3..a17eaf19d 100644
--- a/src/video_core/textures/texture.h
+++ b/src/video_core/textures/texture.h
@@ -14,13 +14,82 @@ namespace Tegra {
 namespace Texture {
 
 enum class TextureFormat : u32 {
-    A8R8G8B8 = 0x8,
-    A2B10G10R10 = 0x9,
+    R32_G32_B32_A32 = 0x01,
+    R32_G32_B32 = 0x02,
+    R16_G16_B16_A16 = 0x03,
+    R32_G32 = 0x04,
+    R32_B24G8 = 0x05,
+    ETC2_RGB = 0x06,
+    X8B8G8R8 = 0x07,
+    A8R8G8B8 = 0x08,
+    A2B10G10R10 = 0x09,
+    ETC2_RGB_PTA = 0x0a,
+    ETC2_RGBA = 0x0b,
+    R16_G16 = 0x0c,
+    G8R24 = 0x0d,
+    G24R8 = 0x0e,
+    R32 = 0x0f,
+    BC6H_SF16 = 0x10,
+    BC6H_UF16 = 0x11,
+    A4B4G4R4 = 0x12,
+    A5B5G5R1 = 0x13,
     A1B5G5R5 = 0x14,
     B5G6R5 = 0x15,
+    B6G5R5 = 0x16,
+    BC7U = 0x17,
+    G8R8 = 0x18,
+    EAC = 0x19,
+    EACX2 = 0x1a,
+    R16 = 0x1b,
+    Y8_VIDEO = 0x1c,
+    R8 = 0x1d,
+    G4R4 = 0x1e,
+    R1 = 0x1f,
+    E5B9G9R9_SHAREDEXP = 0x20,
+    BF10GF11RF11 = 0x21,
+    G8B8G8R8 = 0x22,
+    B8G8R8G8 = 0x23,
     DXT1 = 0x24,
     DXT23 = 0x25,
     DXT45 = 0x26,
+    DXN1 = 0x27,
+    DXN2 = 0x28,
+    Z24S8 = 0x29,
+    X8Z24 = 0x2a,
+    S8Z24 = 0x2b,
+    X4V4Z24__COV4R4V = 0x2c,
+    X4V4Z24__COV8R8V = 0x2d,
+    V8Z24__COV4R12V = 0x2e,
+    ZF32 = 0x2f,
+    ZF32_X24S8 = 0x30,
+    X8Z24_X20V4S8__COV4R4V = 0x31,
+    X8Z24_X20V4S8__COV8R8V = 0x32,
+    ZF32_X20V4X8__COV4R4V = 0x33,
+    ZF32_X20V4X8__COV8R8V = 0x34,
+    ZF32_X20V4S8__COV4R4V = 0x35,
+    ZF32_X20V4S8__COV8R8V = 0x36,
+    X8Z24_X16V8S8__COV4R12V = 0x37,
+    ZF32_X16V8X8__COV4R12V = 0x38,
+    ZF32_X16V8S8__COV4R12V = 0x39,
+    Z16 = 0x3a,
+    V8Z24__COV8R24V = 0x3b,
+    X8Z24_X16V8S8__COV8R24V = 0x3c,
+    ZF32_X16V8X8__COV8R24V = 0x3d,
+    ZF32_X16V8S8__COV8R24V = 0x3e,
+    ASTC_2D_4X4 = 0x40,
+    ASTC_2D_5X5 = 0x41,
+    ASTC_2D_6X6 = 0x42,
+    ASTC_2D_8X8 = 0x44,
+    ASTC_2D_10X10 = 0x45,
+    ASTC_2D_12X12 = 0x46,
+    ASTC_2D_5X4 = 0x50,
+    ASTC_2D_6X5 = 0x51,
+    ASTC_2D_8X6 = 0x52,
+    ASTC_2D_10X8 = 0x53,
+    ASTC_2D_12X10 = 0x54,
+    ASTC_2D_8X5 = 0x55,
+    ASTC_2D_10X5 = 0x56,
+    ASTC_2D_10X6 = 0x57,
 };
 
 enum class TextureType : u32 {
@@ -53,6 +122,17 @@ enum class ComponentType : u32 {
     FLOAT = 7
 };
 
+enum class SwizzleSource : u32 {
+    Zero = 0,
+
+    R = 2,
+    G = 3,
+    B = 4,
+    A = 5,
+    OneInt = 6,
+    OneFloat = 7,
+};
+
 union TextureHandle {
     u32 raw;
     BitField<0, 20, u32> tic_id;
@@ -70,6 +150,11 @@ struct TICEntry {
         BitField<10, 3, ComponentType> g_type;
         BitField<13, 3, ComponentType> b_type;
         BitField<16, 3, ComponentType> a_type;
+
+        BitField<19, 3, SwizzleSource> x_source;
+        BitField<22, 3, SwizzleSource> y_source;
+        BitField<25, 3, SwizzleSource> z_source;
+        BitField<28, 3, SwizzleSource> w_source;
     };
     u32 address_low;
     union {