5 files changed, 440 insertions, 61 deletions
diff --git a/src/video_core/shader/decode/arithmetic_integer.cpp b/src/video_core/shader/decode/arithmetic_integer.cpp
index 2fe787d6f..0f4c3103a 100644
--- a/src/video_core/shader/decode/arithmetic_integer.cpp
+++ b/src/video_core/shader/decode/arithmetic_integer.cpp
@@ -235,34 +235,30 @@ u32 ShaderIR::DecodeArithmeticInteger(NodeBlock& bb, u32 pc) {
     case OpCode::Id::LEA_IMM:
     case OpCode::Id::LEA_RZ:
     case OpCode::Id::LEA_HI: {
-        const auto [op_a, op_b, op_c] = [&]() -> std::tuple<Node, Node, Node> {
+        auto [op_a, op_b, op_c] = [&]() -> std::tuple<Node, Node, Node> {
             switch (opcode->get().GetId()) {
             case OpCode::Id::LEA_R2: {
                 return {GetRegister(instr.gpr20), GetRegister(instr.gpr39),
                         Immediate(static_cast<u32>(instr.lea.r2.entry_a))};
             }
-
             case OpCode::Id::LEA_R1: {
                 const bool neg = instr.lea.r1.neg != 0;
                 return {GetOperandAbsNegInteger(GetRegister(instr.gpr8), false, neg, true),
                         GetRegister(instr.gpr20),
                         Immediate(static_cast<u32>(instr.lea.r1.entry_a))};
             }
-
             case OpCode::Id::LEA_IMM: {
                 const bool neg = instr.lea.imm.neg != 0;
                 return {Immediate(static_cast<u32>(instr.lea.imm.entry_a)),
                         GetOperandAbsNegInteger(GetRegister(instr.gpr8), false, neg, true),
                         Immediate(static_cast<u32>(instr.lea.imm.entry_b))};
             }
-
             case OpCode::Id::LEA_RZ: {
                 const bool neg = instr.lea.rz.neg != 0;
                 return {GetConstBuffer(instr.lea.rz.cb_index, instr.lea.rz.cb_offset),
                         GetOperandAbsNegInteger(GetRegister(instr.gpr8), false, neg, true),
                         Immediate(static_cast<u32>(instr.lea.rz.entry_a))};
             }
-
             case OpCode::Id::LEA_HI:
             default:
                 UNIMPLEMENTED_MSG("Unhandled LEA subinstruction: {}", opcode->get().GetName());
@@ -275,12 +271,9 @@ u32 ShaderIR::DecodeArithmeticInteger(NodeBlock& bb, u32 pc) {
         UNIMPLEMENTED_IF_MSG(instr.lea.pred48 != static_cast<u64>(Pred::UnusedIndex),
                              "Unhandled LEA Predicate");
 
-        const Node shifted_c =
-            Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, Immediate(1), op_c);
-        const Node mul_bc = Operation(OperationCode::IMul, NO_PRECISE, op_b, shifted_c);
-        const Node value = Operation(OperationCode::IAdd, NO_PRECISE, op_a, mul_bc);
-
-        SetRegister(bb, instr.gpr0, value);
+        Node value = Operation(OperationCode::ILogicalShiftLeft, std::move(op_a), std::move(op_c));
+        value = Operation(OperationCode::IAdd, std::move(op_b), std::move(value));
+        SetRegister(bb, instr.gpr0, std::move(value));
 
         break;
     }
diff --git a/src/video_core/shader/decode/conversion.cpp b/src/video_core/shader/decode/conversion.cpp
index 6ead42070..c72690b2b 100644
--- a/src/video_core/shader/decode/conversion.cpp
+++ b/src/video_core/shader/decode/conversion.cpp
@@ -138,18 +138,23 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) {
 
         value = GetOperandAbsNegFloat(value, instr.conversion.abs_a, instr.conversion.negate_a);
 
-        value = [&]() {
+        value = [&] {
+            if (instr.conversion.src_size != instr.conversion.dst_size) {
+                // Rounding operations only matter when the source and destination conversion size
+                // is the same.
+                return value;
+            }
             switch (instr.conversion.f2f.GetRoundingMode()) {
             case Tegra::Shader::F2fRoundingOp::None:
                 return value;
             case Tegra::Shader::F2fRoundingOp::Round:
-                return Operation(OperationCode::FRoundEven, PRECISE, value);
+                return Operation(OperationCode::FRoundEven, value);
             case Tegra::Shader::F2fRoundingOp::Floor:
-                return Operation(OperationCode::FFloor, PRECISE, value);
+                return Operation(OperationCode::FFloor, value);
             case Tegra::Shader::F2fRoundingOp::Ceil:
-                return Operation(OperationCode::FCeil, PRECISE, value);
+                return Operation(OperationCode::FCeil, value);
             case Tegra::Shader::F2fRoundingOp::Trunc:
-                return Operation(OperationCode::FTrunc, PRECISE, value);
+                return Operation(OperationCode::FTrunc, value);
             default:
                 UNIMPLEMENTED_MSG("Unimplemented F2F rounding mode {}",
                                   static_cast<u32>(instr.conversion.f2f.rounding.Value()));
diff --git a/src/video_core/shader/decode/image.cpp b/src/video_core/shader/decode/image.cpp
index d2fe4ec5d..0dd7a1196 100644
--- a/src/video_core/shader/decode/image.cpp
+++ b/src/video_core/shader/decode/image.cpp
@@ -13,13 +13,247 @@
 #include "video_core/engines/shader_bytecode.h"
 #include "video_core/shader/node_helper.h"
 #include "video_core/shader/shader_ir.h"
+#include "video_core/textures/texture.h"
 
 namespace VideoCommon::Shader {
 
 using Tegra::Shader::Instruction;
 using Tegra::Shader::OpCode;
+using Tegra::Shader::PredCondition;
+using Tegra::Shader::StoreType;
+using Tegra::Texture::ComponentType;
+using Tegra::Texture::TextureFormat;
+using Tegra::Texture::TICEntry;
 
 namespace {
+
+ComponentType GetComponentType(Tegra::Engines::SamplerDescriptor descriptor,
+                               std::size_t component) {
+    const TextureFormat format{descriptor.format};
+    switch (format) {
+    case TextureFormat::R16_G16_B16_A16:
+    case TextureFormat::R32_G32_B32_A32:
+    case TextureFormat::R32_G32_B32:
+    case TextureFormat::R32_G32:
+    case TextureFormat::R16_G16:
+    case TextureFormat::R32:
+    case TextureFormat::R16:
+    case TextureFormat::R8:
+    case TextureFormat::R1:
+        if (component == 0) {
+            return descriptor.r_type;
+        }
+        if (component == 1) {
+            return descriptor.g_type;
+        }
+        if (component == 2) {
+            return descriptor.b_type;
+        }
+        if (component == 3) {
+            return descriptor.a_type;
+        }
+        break;
+    case TextureFormat::A8R8G8B8:
+        if (component == 0) {
+            return descriptor.a_type;
+        }
+        if (component == 1) {
+            return descriptor.r_type;
+        }
+        if (component == 2) {
+            return descriptor.g_type;
+        }
+        if (component == 3) {
+            return descriptor.b_type;
+        }
+        break;
+    case TextureFormat::A2B10G10R10:
+    case TextureFormat::A4B4G4R4:
+    case TextureFormat::A5B5G5R1:
+    case TextureFormat::A1B5G5R5:
+        if (component == 0) {
+            return descriptor.a_type;
+        }
+        if (component == 1) {
+            return descriptor.b_type;
+        }
+        if (component == 2) {
+            return descriptor.g_type;
+        }
+        if (component == 3) {
+            return descriptor.r_type;
+        }
+        break;
+    case TextureFormat::R32_B24G8:
+        if (component == 0) {
+            return descriptor.r_type;
+        }
+        if (component == 1) {
+            return descriptor.b_type;
+        }
+        if (component == 2) {
+            return descriptor.g_type;
+        }
+        break;
+    case TextureFormat::B5G6R5:
+    case TextureFormat::B6G5R5:
+        if (component == 0) {
+            return descriptor.b_type;
+        }
+        if (component == 1) {
+            return descriptor.g_type;
+        }
+        if (component == 2) {
+            return descriptor.r_type;
+        }
+        break;
+    case TextureFormat::G8R24:
+    case TextureFormat::G24R8:
+    case TextureFormat::G8R8:
+    case TextureFormat::G4R4:
+        if (component == 0) {
+            return descriptor.g_type;
+        }
+        if (component == 1) {
+            return descriptor.r_type;
+        }
+        break;
+    }
+    UNIMPLEMENTED_MSG("texture format not implement={}", format);
+    return ComponentType::FLOAT;
+}
+
+bool IsComponentEnabled(std::size_t component_mask, std::size_t component) {
+    constexpr u8 R = 0b0001;
+    constexpr u8 G = 0b0010;
+    constexpr u8 B = 0b0100;
+    constexpr u8 A = 0b1000;
+    constexpr std::array<u8, 16> mask = {
+        0,   (R),     (G),     (R | G),     (B),     (R | B),     (G | B),     (R | G | B),
+        (A), (R | A), (G | A), (R | G | A), (B | A), (R | B | A), (G | B | A), (R | G | B | A)};
+    return std::bitset<4>{mask.at(component_mask)}.test(component);
+}
+
+u32 GetComponentSize(TextureFormat format, std::size_t component) {
+    switch (format) {
+    case TextureFormat::R32_G32_B32_A32:
+        return 32;
+    case TextureFormat::R16_G16_B16_A16:
+        return 16;
+    case TextureFormat::R32_G32_B32:
+        return component <= 2 ? 32 : 0;
+    case TextureFormat::R32_G32:
+        return component <= 1 ? 32 : 0;
+    case TextureFormat::R16_G16:
+        return component <= 1 ? 16 : 0;
+    case TextureFormat::R32:
+        return component == 0 ? 32 : 0;
+    case TextureFormat::R16:
+        return component == 0 ? 16 : 0;
+    case TextureFormat::R8:
+        return component == 0 ? 8 : 0;
+    case TextureFormat::R1:
+        return component == 0 ? 1 : 0;
+    case TextureFormat::A8R8G8B8:
+        return 8;
+    case TextureFormat::A2B10G10R10:
+        return (component == 3 || component == 2 || component == 1) ? 10 : 2;
+    case TextureFormat::A4B4G4R4:
+        return 4;
+    case TextureFormat::A5B5G5R1:
+        return (component == 0 || component == 1 || component == 2) ? 5 : 1;
+    case TextureFormat::A1B5G5R5:
+        return (component == 1 || component == 2 || component == 3) ? 5 : 1;
+    case TextureFormat::R32_B24G8:
+        if (component == 0) {
+            return 32;
+        }
+        if (component == 1) {
+            return 24;
+        }
+        if (component == 2) {
+            return 8;
+        }
+        return 0;
+    case TextureFormat::B5G6R5:
+        if (component == 0 || component == 2) {
+            return 5;
+        }
+        if (component == 1) {
+            return 6;
+        }
+        return 0;
+    case TextureFormat::B6G5R5:
+        if (component == 1 || component == 2) {
+            return 5;
+        }
+        if (component == 0) {
+            return 6;
+        }
+        return 0;
+    case TextureFormat::G8R24:
+        if (component == 0) {
+            return 8;
+        }
+        if (component == 1) {
+            return 24;
+        }
+        return 0;
+    case TextureFormat::G24R8:
+        if (component == 0) {
+            return 8;
+        }
+        if (component == 1) {
+            return 24;
+        }
+        return 0;
+    case TextureFormat::G8R8:
+        return (component == 0 || component == 1) ? 8 : 0;
+    case TextureFormat::G4R4:
+        return (component == 0 || component == 1) ? 4 : 0;
+    default:
+        UNIMPLEMENTED_MSG("texture format not implement={}", format);
+        return 0;
+    }
+}
+
+std::size_t GetImageComponentMask(TextureFormat format) {
+    constexpr u8 R = 0b0001;
+    constexpr u8 G = 0b0010;
+    constexpr u8 B = 0b0100;
+    constexpr u8 A = 0b1000;
+    switch (format) {
+    case TextureFormat::R32_G32_B32_A32:
+    case TextureFormat::R16_G16_B16_A16:
+    case TextureFormat::A8R8G8B8:
+    case TextureFormat::A2B10G10R10:
+    case TextureFormat::A4B4G4R4:
+    case TextureFormat::A5B5G5R1:
+    case TextureFormat::A1B5G5R5:
+        return std::size_t{R | G | B | A};
+    case TextureFormat::R32_G32_B32:
+    case TextureFormat::R32_B24G8:
+    case TextureFormat::B5G6R5:
+    case TextureFormat::B6G5R5:
+        return std::size_t{R | G | B};
+    case TextureFormat::R32_G32:
+    case TextureFormat::R16_G16:
+    case TextureFormat::G8R24:
+    case TextureFormat::G24R8:
+    case TextureFormat::G8R8:
+    case TextureFormat::G4R4:
+        return std::size_t{R | G};
+    case TextureFormat::R32:
+    case TextureFormat::R16:
+    case TextureFormat::R8:
+    case TextureFormat::R1:
+        return std::size_t{R};
+    default:
+        UNIMPLEMENTED_MSG("texture format not implement={}", format);
+        return std::size_t{R | G | B | A};
+    }
+}
+
 std::size_t GetImageTypeNumCoordinates(Tegra::Shader::ImageType image_type) {
     switch (image_type) {
     case Tegra::Shader::ImageType::Texture1D:
@@ -37,6 +271,39 @@ std::size_t GetImageTypeNumCoordinates(Tegra::Shader::ImageType image_type) {
 }
 } // Anonymous namespace
 
+std::pair<Node, bool> ShaderIR::GetComponentValue(ComponentType component_type, u32 component_size,
+                                                  Node original_value) {
+    switch (component_type) {
+    case ComponentType::SNORM: {
+        // range [-1.0, 1.0]
+        auto cnv_value = Operation(OperationCode::FMul, original_value,
+                                   Immediate(static_cast<float>(1 << component_size) / 2.f - 1.f));
+        cnv_value = Operation(OperationCode::ICastFloat, std::move(cnv_value));
+        return {BitfieldExtract(std::move(cnv_value), 0, component_size), true};
+    }
+    case ComponentType::SINT:
+    case ComponentType::UNORM: {
+        bool is_signed = component_type == ComponentType::SINT;
+        // range [0.0, 1.0]
+        auto cnv_value = Operation(OperationCode::FMul, original_value,
+                                   Immediate(static_cast<float>(1 << component_size) - 1.f));
+        return {SignedOperation(OperationCode::ICastFloat, is_signed, std::move(cnv_value)),
+                is_signed};
+    }
+    case ComponentType::UINT: // range [0, (1 << component_size) - 1]
+        return {std::move(original_value), false};
+    case ComponentType::FLOAT:
+        if (component_size == 16) {
+            return {Operation(OperationCode::HCastFloat, original_value), true};
+        } else {
+            return {std::move(original_value), true};
+        }
+    default:
+        UNIMPLEMENTED_MSG("Unimplement component type={}", component_type);
+        return {std::move(original_value), true};
+    }
+}
+
 u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) {
     const Instruction instr = {program_code[pc]};
     const auto opcode = OpCode::Decode(instr);
@@ -53,7 +320,6 @@ u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) {
 
     switch (opcode->get().GetId()) {
     case OpCode::Id::SULD: {
-        UNIMPLEMENTED_IF(instr.suldst.mode != Tegra::Shader::SurfaceDataMode::P);
         UNIMPLEMENTED_IF(instr.suldst.out_of_bounds_store !=
                          Tegra::Shader::OutOfBoundsStore::Ignore);
 
@@ -62,17 +328,89 @@ u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) {
                                               : GetBindlessImage(instr.gpr39, type)};
         image.MarkRead();
 
-        u32 indexer = 0;
-        for (u32 element = 0; element < 4; ++element) {
-            if (!instr.suldst.IsComponentEnabled(element)) {
-                continue;
+        if (instr.suldst.mode == Tegra::Shader::SurfaceDataMode::P) {
+            u32 indexer = 0;
+            for (u32 element = 0; element < 4; ++element) {
+                if (!instr.suldst.IsComponentEnabled(element)) {
+                    continue;
+                }
+                MetaImage meta{image, {}, element};
+                Node value = Operation(OperationCode::ImageLoad, meta, GetCoordinates(type));
+                SetTemporary(bb, indexer++, std::move(value));
+            }
+            for (u32 i = 0; i < indexer; ++i) {
+                SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
+            }
+        } else if (instr.suldst.mode == Tegra::Shader::SurfaceDataMode::D_BA) {
+            UNIMPLEMENTED_IF(instr.suldst.GetStoreDataLayout() != StoreType::Bits32 &&
+                             instr.suldst.GetStoreDataLayout() != StoreType::Bits64);
+
+            auto descriptor = [this, instr] {
+                std::optional<Tegra::Engines::SamplerDescriptor> descriptor;
+                if (instr.suldst.is_immediate) {
+                    descriptor =
+                        registry.ObtainBoundSampler(static_cast<u32>(instr.image.index.Value()));
+                } else {
+                    const Node image_register = GetRegister(instr.gpr39);
+                    const auto [base_image, buffer, offset] = TrackCbuf(
+                        image_register, global_code, static_cast<s64>(global_code.size()));
+                    descriptor = registry.ObtainBindlessSampler(buffer, offset);
+                }
+                if (!descriptor) {
+                    UNREACHABLE_MSG("Failed to obtain image descriptor");
+                }
+                return *descriptor;
+            }();
+
+            const auto comp_mask = GetImageComponentMask(descriptor.format);
+
+            switch (instr.suldst.GetStoreDataLayout()) {
+            case StoreType::Bits32:
+            case StoreType::Bits64: {
+                u32 indexer = 0;
+                u32 shifted_counter = 0;
+                Node value = Immediate(0);
+                for (u32 element = 0; element < 4; ++element) {
+                    if (!IsComponentEnabled(comp_mask, element)) {
+                        continue;
+                    }
+                    const auto component_type = GetComponentType(descriptor, element);
+                    const auto component_size = GetComponentSize(descriptor.format, element);
+                    MetaImage meta{image, {}, element};
+
+                    auto [converted_value, is_signed] = GetComponentValue(
+                        component_type, component_size,
+                        Operation(OperationCode::ImageLoad, meta, GetCoordinates(type)));
+
+                    // shift element to correct position
+                    const auto shifted = shifted_counter;
+                    if (shifted > 0) {
+                        converted_value =
+                            SignedOperation(OperationCode::ILogicalShiftLeft, is_signed,
+                                            std::move(converted_value), Immediate(shifted));
+                    }
+                    shifted_counter += component_size;
+
+                    // add value into result
+                    value = Operation(OperationCode::UBitwiseOr, value, std::move(converted_value));
+
+                    // if we shifted enough for 1 byte -> we save it into temp
+                    if (shifted_counter >= 32) {
+                        SetTemporary(bb, indexer++, std::move(value));
+                        // reset counter and value to prepare pack next byte
+                        value = Immediate(0);
+                        shifted_counter = 0;
+                    }
+                }
+                for (u32 i = 0; i < indexer; ++i) {
+                    SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
+                }
+                break;
+            }
+            default:
+                UNREACHABLE();
+                break;
             }
-            MetaImage meta{image, {}, element};
-            Node value = Operation(OperationCode::ImageLoad, meta, GetCoordinates(type));
-            SetTemporary(bb, indexer++, std::move(value));
-        }
-        for (u32 i = 0; i < indexer; ++i) {
-            SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
         }
         break;
     }
diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp
index b5fbc4d58..b8f63922f 100644
--- a/src/video_core/shader/decode/memory.cpp
+++ b/src/video_core/shader/decode/memory.cpp
@@ -19,7 +19,6 @@ namespace VideoCommon::Shader {
 using Tegra::Shader::AtomicOp;
 using Tegra::Shader::AtomicType;
 using Tegra::Shader::Attribute;
-using Tegra::Shader::GlobalAtomicOp;
 using Tegra::Shader::GlobalAtomicType;
 using Tegra::Shader::Instruction;
 using Tegra::Shader::OpCode;
@@ -28,6 +27,31 @@ using Tegra::Shader::StoreType;
 
 namespace {
 
+Node GetAtomOperation(AtomicOp op, bool is_signed, Node memory, Node data) {
+    const OperationCode operation_code = [op] {
+        switch (op) {
+        case AtomicOp::Add:
+            return OperationCode::AtomicIAdd;
+        case AtomicOp::Min:
+            return OperationCode::AtomicIMin;
+        case AtomicOp::Max:
+            return OperationCode::AtomicIMax;
+        case AtomicOp::And:
+            return OperationCode::AtomicIAnd;
+        case AtomicOp::Or:
+            return OperationCode::AtomicIOr;
+        case AtomicOp::Xor:
+            return OperationCode::AtomicIXor;
+        case AtomicOp::Exch:
+            return OperationCode::AtomicIExchange;
+        default:
+            UNIMPLEMENTED_MSG("op={}", static_cast<int>(op));
+            return OperationCode::AtomicIAdd;
+        }
+    }();
+    return SignedOperation(operation_code, is_signed, std::move(memory), std::move(data));
+}
+
 bool IsUnaligned(Tegra::Shader::UniformType uniform_type) {
     return uniform_type == Tegra::Shader::UniformType::UnsignedByte ||
            uniform_type == Tegra::Shader::UniformType::UnsignedShort;
@@ -363,10 +387,13 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
         break;
     }
     case OpCode::Id::ATOM: {
-        UNIMPLEMENTED_IF_MSG(instr.atom.operation != GlobalAtomicOp::Add, "operation={}",
-                             static_cast<int>(instr.atom.operation.Value()));
-        UNIMPLEMENTED_IF_MSG(instr.atom.type != GlobalAtomicType::S32, "type={}",
-                             static_cast<int>(instr.atom.type.Value()));
+        UNIMPLEMENTED_IF_MSG(instr.atom.operation == AtomicOp::Inc ||
+                                 instr.atom.operation == AtomicOp::Dec ||
+                                 instr.atom.operation == AtomicOp::SafeAdd,
+                             "operation={}", static_cast<int>(instr.atom.operation.Value()));
+        UNIMPLEMENTED_IF_MSG(instr.atom.type == GlobalAtomicType::S64 ||
+                                 instr.atom.type == GlobalAtomicType::U64,
+                             "type={}", static_cast<int>(instr.atom.type.Value()));
 
         const auto [real_address, base_address, descriptor] =
             TrackGlobalMemory(bb, instr, true, true);
@@ -375,25 +402,29 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
             break;
         }
 
+        const bool is_signed =
+            instr.atoms.type == AtomicType::S32 || instr.atoms.type == AtomicType::S64;
         Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor);
-        Node value = Operation(OperationCode::AtomicAdd, std::move(gmem), GetRegister(instr.gpr20));
+        Node value = GetAtomOperation(static_cast<AtomicOp>(instr.atom.operation), is_signed, gmem,
+                                      GetRegister(instr.gpr20));
         SetRegister(bb, instr.gpr0, std::move(value));
         break;
     }
     case OpCode::Id::ATOMS: {
-        UNIMPLEMENTED_IF_MSG(instr.atoms.operation != AtomicOp::Add, "operation={}",
-                             static_cast<int>(instr.atoms.operation.Value()));
-        UNIMPLEMENTED_IF_MSG(instr.atoms.type != AtomicType::U32, "type={}",
-                             static_cast<int>(instr.atoms.type.Value()));
-
+        UNIMPLEMENTED_IF_MSG(instr.atoms.operation == AtomicOp::Inc ||
+                                 instr.atoms.operation == AtomicOp::Dec,
+                             "operation={}", static_cast<int>(instr.atoms.operation.Value()));
+        UNIMPLEMENTED_IF_MSG(instr.atoms.type == AtomicType::S64 ||
+                                 instr.atoms.type == AtomicType::U64,
+                             "type={}", static_cast<int>(instr.atoms.type.Value()));
+        const bool is_signed =
+            instr.atoms.type == AtomicType::S32 || instr.atoms.type == AtomicType::S64;
         const s32 offset = instr.atoms.GetImmediateOffset();
         Node address = GetRegister(instr.gpr8);
         address = Operation(OperationCode::IAdd, std::move(address), Immediate(offset));
-
-        Node memory = GetSharedMemory(std::move(address));
-        Node data = GetRegister(instr.gpr20);
-
-        Node value = Operation(OperationCode::AtomicAdd, std::move(memory), std::move(data));
+        Node value =
+            GetAtomOperation(static_cast<AtomicOp>(instr.atoms.operation), is_signed,
+                             GetSharedMemory(std::move(address)), GetRegister(instr.gpr20));
         SetRegister(bb, instr.gpr0, std::move(value));
         break;
     }
diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp
index 4944e9d69..d4f95b18c 100644
--- a/src/video_core/shader/decode/other.cpp
+++ b/src/video_core/shader/decode/other.cpp
@@ -11,12 +11,17 @@
 
 namespace VideoCommon::Shader {
 
+using std::move;
 using Tegra::Shader::ConditionCode;
 using Tegra::Shader::Instruction;
+using Tegra::Shader::IpaInterpMode;
 using Tegra::Shader::OpCode;
+using Tegra::Shader::PixelImap;
 using Tegra::Shader::Register;
 using Tegra::Shader::SystemVariable;
 
+using Index = Tegra::Shader::Attribute::Index;
+
 u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
     const Instruction instr = {program_code[pc]};
     const auto opcode = OpCode::Decode(instr);
@@ -66,18 +71,24 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
         bb.push_back(Operation(OperationCode::Discard));
         break;
     }
-    case OpCode::Id::MOV_SYS: {
+    case OpCode::Id::S2R: {
         const Node value = [this, instr] {
             switch (instr.sys20) {
             case SystemVariable::LaneId:
-                LOG_WARNING(HW_GPU, "MOV_SYS instruction with LaneId is incomplete");
+                LOG_WARNING(HW_GPU, "S2R instruction with LaneId is incomplete");
                 return Immediate(0U);
             case SystemVariable::InvocationId:
                 return Operation(OperationCode::InvocationId);
             case SystemVariable::Ydirection:
                 return Operation(OperationCode::YNegate);
             case SystemVariable::InvocationInfo:
-                LOG_WARNING(HW_GPU, "MOV_SYS instruction with InvocationInfo is incomplete");
+                LOG_WARNING(HW_GPU, "S2R instruction with InvocationInfo is incomplete");
+                return Immediate(0U);
+            case SystemVariable::WscaleFactorXY:
+                UNIMPLEMENTED_MSG("S2R WscaleFactorXY is not implemented");
+                return Immediate(0U);
+            case SystemVariable::WscaleFactorZ:
+                UNIMPLEMENTED_MSG("S2R WscaleFactorZ is not implemented");
                 return Immediate(0U);
             case SystemVariable::Tid: {
                 Node value = Immediate(0);
@@ -213,27 +224,28 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
     }
     case OpCode::Id::IPA: {
         const bool is_physical = instr.ipa.idx && instr.gpr8.Value() != 0xff;
-
         const auto attribute = instr.attribute.fmt28;
-        const Tegra::Shader::IpaMode input_mode{instr.ipa.interp_mode.Value(),
-                                                instr.ipa.sample_mode.Value()};
+        const Index index = attribute.index;
 
         Node value = is_physical ? GetPhysicalInputAttribute(instr.gpr8)
-                                 : GetInputAttribute(attribute.index, attribute.element);
-        const Tegra::Shader::Attribute::Index index = attribute.index.Value();
-        const bool is_generic = index >= Tegra::Shader::Attribute::Index::Attribute_0 &&
-                                index <= Tegra::Shader::Attribute::Index::Attribute_31;
-        if (is_generic || is_physical) {
-            // TODO(Blinkhawk): There are cases where a perspective attribute use PASS.
-            // In theory by setting them as perspective, OpenGL does the perspective correction.
-            // A way must figured to reverse the last step of it.
-            if (input_mode.interpolation_mode == Tegra::Shader::IpaInterpMode::Multiply) {
-                value = Operation(OperationCode::FMul, PRECISE, value, GetRegister(instr.gpr20));
+                                 : GetInputAttribute(index, attribute.element);
+
+        // Code taken from Ryujinx.
+        if (index >= Index::Attribute_0 && index <= Index::Attribute_31) {
+            const u32 location = static_cast<u32>(index) - static_cast<u32>(Index::Attribute_0);
+            if (header.ps.GetPixelImap(location) == PixelImap::Perspective) {
+                Node position_w = GetInputAttribute(Index::Position, 3);
+                value = Operation(OperationCode::FMul, move(value), move(position_w));
             }
         }
-        value = GetSaturatedFloat(value, instr.ipa.saturate);
 
-        SetRegister(bb, instr.gpr0, value);
+        if (instr.ipa.interp_mode == IpaInterpMode::Multiply) {
+            value = Operation(OperationCode::FMul, move(value), GetRegister(instr.gpr20));
+        }
+
+        value = GetSaturatedFloat(move(value), instr.ipa.saturate);
+
+        SetRegister(bb, instr.gpr0, move(value));
         break;
     }
     case OpCode::Id::OUT_R: {