diff options
Diffstat (limited to 'src/video_core/shader')
-rw-r--r-- | src/video_core/shader/decode/arithmetic_integer.cpp | 15 | ||||
-rw-r--r-- | src/video_core/shader/decode/conversion.cpp | 15 | ||||
-rw-r--r-- | src/video_core/shader/decode/image.cpp | 360 | ||||
-rw-r--r-- | src/video_core/shader/decode/memory.cpp | 63 | ||||
-rw-r--r-- | src/video_core/shader/decode/other.cpp | 48 | ||||
-rw-r--r-- | src/video_core/shader/node.h | 16 | ||||
-rw-r--r-- | src/video_core/shader/node_helper.cpp | 14 | ||||
-rw-r--r-- | src/video_core/shader/shader_ir.cpp | 3 | ||||
-rw-r--r-- | src/video_core/shader/shader_ir.h | 4 |
9 files changed, 476 insertions, 62 deletions
diff --git a/src/video_core/shader/decode/arithmetic_integer.cpp b/src/video_core/shader/decode/arithmetic_integer.cpp index 2fe787d6f..0f4c3103a 100644 --- a/src/video_core/shader/decode/arithmetic_integer.cpp +++ b/src/video_core/shader/decode/arithmetic_integer.cpp @@ -235,34 +235,30 @@ u32 ShaderIR::DecodeArithmeticInteger(NodeBlock& bb, u32 pc) { case OpCode::Id::LEA_IMM: case OpCode::Id::LEA_RZ: case OpCode::Id::LEA_HI: { - const auto [op_a, op_b, op_c] = [&]() -> std::tuple<Node, Node, Node> { + auto [op_a, op_b, op_c] = [&]() -> std::tuple<Node, Node, Node> { switch (opcode->get().GetId()) { case OpCode::Id::LEA_R2: { return {GetRegister(instr.gpr20), GetRegister(instr.gpr39), Immediate(static_cast<u32>(instr.lea.r2.entry_a))}; } - case OpCode::Id::LEA_R1: { const bool neg = instr.lea.r1.neg != 0; return {GetOperandAbsNegInteger(GetRegister(instr.gpr8), false, neg, true), GetRegister(instr.gpr20), Immediate(static_cast<u32>(instr.lea.r1.entry_a))}; } - case OpCode::Id::LEA_IMM: { const bool neg = instr.lea.imm.neg != 0; return {Immediate(static_cast<u32>(instr.lea.imm.entry_a)), GetOperandAbsNegInteger(GetRegister(instr.gpr8), false, neg, true), Immediate(static_cast<u32>(instr.lea.imm.entry_b))}; } - case OpCode::Id::LEA_RZ: { const bool neg = instr.lea.rz.neg != 0; return {GetConstBuffer(instr.lea.rz.cb_index, instr.lea.rz.cb_offset), GetOperandAbsNegInteger(GetRegister(instr.gpr8), false, neg, true), Immediate(static_cast<u32>(instr.lea.rz.entry_a))}; } - case OpCode::Id::LEA_HI: default: UNIMPLEMENTED_MSG("Unhandled LEA subinstruction: {}", opcode->get().GetName()); @@ -275,12 +271,9 @@ u32 ShaderIR::DecodeArithmeticInteger(NodeBlock& bb, u32 pc) { UNIMPLEMENTED_IF_MSG(instr.lea.pred48 != static_cast<u64>(Pred::UnusedIndex), "Unhandled LEA Predicate"); - const Node shifted_c = - Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, Immediate(1), op_c); - const Node mul_bc = Operation(OperationCode::IMul, NO_PRECISE, op_b, shifted_c); - const Node value = Operation(OperationCode::IAdd, NO_PRECISE, op_a, mul_bc); - - SetRegister(bb, instr.gpr0, value); + Node value = Operation(OperationCode::ILogicalShiftLeft, std::move(op_a), std::move(op_c)); + value = Operation(OperationCode::IAdd, std::move(op_b), std::move(value)); + SetRegister(bb, instr.gpr0, std::move(value)); break; } diff --git a/src/video_core/shader/decode/conversion.cpp b/src/video_core/shader/decode/conversion.cpp index 6ead42070..c72690b2b 100644 --- a/src/video_core/shader/decode/conversion.cpp +++ b/src/video_core/shader/decode/conversion.cpp @@ -138,18 +138,23 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) { value = GetOperandAbsNegFloat(value, instr.conversion.abs_a, instr.conversion.negate_a); - value = [&]() { + value = [&] { + if (instr.conversion.src_size != instr.conversion.dst_size) { + // Rounding operations only matter when the source and destination conversion size + // is the same. + return value; + } switch (instr.conversion.f2f.GetRoundingMode()) { case Tegra::Shader::F2fRoundingOp::None: return value; case Tegra::Shader::F2fRoundingOp::Round: - return Operation(OperationCode::FRoundEven, PRECISE, value); + return Operation(OperationCode::FRoundEven, value); case Tegra::Shader::F2fRoundingOp::Floor: - return Operation(OperationCode::FFloor, PRECISE, value); + return Operation(OperationCode::FFloor, value); case Tegra::Shader::F2fRoundingOp::Ceil: - return Operation(OperationCode::FCeil, PRECISE, value); + return Operation(OperationCode::FCeil, value); case Tegra::Shader::F2fRoundingOp::Trunc: - return Operation(OperationCode::FTrunc, PRECISE, value); + return Operation(OperationCode::FTrunc, value); default: UNIMPLEMENTED_MSG("Unimplemented F2F rounding mode {}", static_cast<u32>(instr.conversion.f2f.rounding.Value())); diff --git a/src/video_core/shader/decode/image.cpp b/src/video_core/shader/decode/image.cpp index d2fe4ec5d..0dd7a1196 100644 --- a/src/video_core/shader/decode/image.cpp +++ b/src/video_core/shader/decode/image.cpp @@ -13,13 +13,247 @@ #include "video_core/engines/shader_bytecode.h" #include "video_core/shader/node_helper.h" #include "video_core/shader/shader_ir.h" +#include "video_core/textures/texture.h" namespace VideoCommon::Shader { using Tegra::Shader::Instruction; using Tegra::Shader::OpCode; +using Tegra::Shader::PredCondition; +using Tegra::Shader::StoreType; +using Tegra::Texture::ComponentType; +using Tegra::Texture::TextureFormat; +using Tegra::Texture::TICEntry; namespace { + +ComponentType GetComponentType(Tegra::Engines::SamplerDescriptor descriptor, + std::size_t component) { + const TextureFormat format{descriptor.format}; + switch (format) { + case TextureFormat::R16_G16_B16_A16: + case TextureFormat::R32_G32_B32_A32: + case TextureFormat::R32_G32_B32: + case TextureFormat::R32_G32: + case TextureFormat::R16_G16: + case TextureFormat::R32: + case TextureFormat::R16: + case TextureFormat::R8: + case TextureFormat::R1: + if (component == 0) { + return descriptor.r_type; + } + if (component == 1) { + return descriptor.g_type; + } + if (component == 2) { + return descriptor.b_type; + } + if (component == 3) { + return descriptor.a_type; + } + break; + case TextureFormat::A8R8G8B8: + if (component == 0) { + return descriptor.a_type; + } + if (component == 1) { + return descriptor.r_type; + } + if (component == 2) { + return descriptor.g_type; + } + if (component == 3) { + return descriptor.b_type; + } + break; + case TextureFormat::A2B10G10R10: + case TextureFormat::A4B4G4R4: + case TextureFormat::A5B5G5R1: + case TextureFormat::A1B5G5R5: + if (component == 0) { + return descriptor.a_type; + } + if (component == 1) { + return descriptor.b_type; + } + if (component == 2) { + return descriptor.g_type; + } + if (component == 3) { + return descriptor.r_type; + } + break; + case TextureFormat::R32_B24G8: + if (component == 0) { + return descriptor.r_type; + } + if (component == 1) { + return descriptor.b_type; + } + if (component == 2) { + return descriptor.g_type; + } + break; + case TextureFormat::B5G6R5: + case TextureFormat::B6G5R5: + if (component == 0) { + return descriptor.b_type; + } + if (component == 1) { + return descriptor.g_type; + } + if (component == 2) { + return descriptor.r_type; + } + break; + case TextureFormat::G8R24: + case TextureFormat::G24R8: + case TextureFormat::G8R8: + case TextureFormat::G4R4: + if (component == 0) { + return descriptor.g_type; + } + if (component == 1) { + return descriptor.r_type; + } + break; + } + UNIMPLEMENTED_MSG("texture format not implement={}", format); + return ComponentType::FLOAT; +} + +bool IsComponentEnabled(std::size_t component_mask, std::size_t component) { + constexpr u8 R = 0b0001; + constexpr u8 G = 0b0010; + constexpr u8 B = 0b0100; + constexpr u8 A = 0b1000; + constexpr std::array<u8, 16> mask = { + 0, (R), (G), (R | G), (B), (R | B), (G | B), (R | G | B), + (A), (R | A), (G | A), (R | G | A), (B | A), (R | B | A), (G | B | A), (R | G | B | A)}; + return std::bitset<4>{mask.at(component_mask)}.test(component); +} + +u32 GetComponentSize(TextureFormat format, std::size_t component) { + switch (format) { + case TextureFormat::R32_G32_B32_A32: + return 32; + case TextureFormat::R16_G16_B16_A16: + return 16; + case TextureFormat::R32_G32_B32: + return component <= 2 ? 32 : 0; + case TextureFormat::R32_G32: + return component <= 1 ? 32 : 0; + case TextureFormat::R16_G16: + return component <= 1 ? 16 : 0; + case TextureFormat::R32: + return component == 0 ? 32 : 0; + case TextureFormat::R16: + return component == 0 ? 16 : 0; + case TextureFormat::R8: + return component == 0 ? 8 : 0; + case TextureFormat::R1: + return component == 0 ? 1 : 0; + case TextureFormat::A8R8G8B8: + return 8; + case TextureFormat::A2B10G10R10: + return (component == 3 || component == 2 || component == 1) ? 10 : 2; + case TextureFormat::A4B4G4R4: + return 4; + case TextureFormat::A5B5G5R1: + return (component == 0 || component == 1 || component == 2) ? 5 : 1; + case TextureFormat::A1B5G5R5: + return (component == 1 || component == 2 || component == 3) ? 5 : 1; + case TextureFormat::R32_B24G8: + if (component == 0) { + return 32; + } + if (component == 1) { + return 24; + } + if (component == 2) { + return 8; + } + return 0; + case TextureFormat::B5G6R5: + if (component == 0 || component == 2) { + return 5; + } + if (component == 1) { + return 6; + } + return 0; + case TextureFormat::B6G5R5: + if (component == 1 || component == 2) { + return 5; + } + if (component == 0) { + return 6; + } + return 0; + case TextureFormat::G8R24: + if (component == 0) { + return 8; + } + if (component == 1) { + return 24; + } + return 0; + case TextureFormat::G24R8: + if (component == 0) { + return 8; + } + if (component == 1) { + return 24; + } + return 0; + case TextureFormat::G8R8: + return (component == 0 || component == 1) ? 8 : 0; + case TextureFormat::G4R4: + return (component == 0 || component == 1) ? 4 : 0; + default: + UNIMPLEMENTED_MSG("texture format not implement={}", format); + return 0; + } +} + +std::size_t GetImageComponentMask(TextureFormat format) { + constexpr u8 R = 0b0001; + constexpr u8 G = 0b0010; + constexpr u8 B = 0b0100; + constexpr u8 A = 0b1000; + switch (format) { + case TextureFormat::R32_G32_B32_A32: + case TextureFormat::R16_G16_B16_A16: + case TextureFormat::A8R8G8B8: + case TextureFormat::A2B10G10R10: + case TextureFormat::A4B4G4R4: + case TextureFormat::A5B5G5R1: + case TextureFormat::A1B5G5R5: + return std::size_t{R | G | B | A}; + case TextureFormat::R32_G32_B32: + case TextureFormat::R32_B24G8: + case TextureFormat::B5G6R5: + case TextureFormat::B6G5R5: + return std::size_t{R | G | B}; + case TextureFormat::R32_G32: + case TextureFormat::R16_G16: + case TextureFormat::G8R24: + case TextureFormat::G24R8: + case TextureFormat::G8R8: + case TextureFormat::G4R4: + return std::size_t{R | G}; + case TextureFormat::R32: + case TextureFormat::R16: + case TextureFormat::R8: + case TextureFormat::R1: + return std::size_t{R}; + default: + UNIMPLEMENTED_MSG("texture format not implement={}", format); + return std::size_t{R | G | B | A}; + } +} + std::size_t GetImageTypeNumCoordinates(Tegra::Shader::ImageType image_type) { switch (image_type) { case Tegra::Shader::ImageType::Texture1D: @@ -37,6 +271,39 @@ std::size_t GetImageTypeNumCoordinates(Tegra::Shader::ImageType image_type) { } } // Anonymous namespace +std::pair<Node, bool> ShaderIR::GetComponentValue(ComponentType component_type, u32 component_size, + Node original_value) { + switch (component_type) { + case ComponentType::SNORM: { + // range [-1.0, 1.0] + auto cnv_value = Operation(OperationCode::FMul, original_value, + Immediate(static_cast<float>(1 << component_size) / 2.f - 1.f)); + cnv_value = Operation(OperationCode::ICastFloat, std::move(cnv_value)); + return {BitfieldExtract(std::move(cnv_value), 0, component_size), true}; + } + case ComponentType::SINT: + case ComponentType::UNORM: { + bool is_signed = component_type == ComponentType::SINT; + // range [0.0, 1.0] + auto cnv_value = Operation(OperationCode::FMul, original_value, + Immediate(static_cast<float>(1 << component_size) - 1.f)); + return {SignedOperation(OperationCode::ICastFloat, is_signed, std::move(cnv_value)), + is_signed}; + } + case ComponentType::UINT: // range [0, (1 << component_size) - 1] + return {std::move(original_value), false}; + case ComponentType::FLOAT: + if (component_size == 16) { + return {Operation(OperationCode::HCastFloat, original_value), true}; + } else { + return {std::move(original_value), true}; + } + default: + UNIMPLEMENTED_MSG("Unimplement component type={}", component_type); + return {std::move(original_value), true}; + } +} + u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); @@ -53,7 +320,6 @@ u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) { switch (opcode->get().GetId()) { case OpCode::Id::SULD: { - UNIMPLEMENTED_IF(instr.suldst.mode != Tegra::Shader::SurfaceDataMode::P); UNIMPLEMENTED_IF(instr.suldst.out_of_bounds_store != Tegra::Shader::OutOfBoundsStore::Ignore); @@ -62,17 +328,89 @@ u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) { : GetBindlessImage(instr.gpr39, type)}; image.MarkRead(); - u32 indexer = 0; - for (u32 element = 0; element < 4; ++element) { - if (!instr.suldst.IsComponentEnabled(element)) { - continue; + if (instr.suldst.mode == Tegra::Shader::SurfaceDataMode::P) { + u32 indexer = 0; + for (u32 element = 0; element < 4; ++element) { + if (!instr.suldst.IsComponentEnabled(element)) { + continue; + } + MetaImage meta{image, {}, element}; + Node value = Operation(OperationCode::ImageLoad, meta, GetCoordinates(type)); + SetTemporary(bb, indexer++, std::move(value)); + } + for (u32 i = 0; i < indexer; ++i) { + SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); + } + } else if (instr.suldst.mode == Tegra::Shader::SurfaceDataMode::D_BA) { + UNIMPLEMENTED_IF(instr.suldst.GetStoreDataLayout() != StoreType::Bits32 && + instr.suldst.GetStoreDataLayout() != StoreType::Bits64); + + auto descriptor = [this, instr] { + std::optional<Tegra::Engines::SamplerDescriptor> descriptor; + if (instr.suldst.is_immediate) { + descriptor = + registry.ObtainBoundSampler(static_cast<u32>(instr.image.index.Value())); + } else { + const Node image_register = GetRegister(instr.gpr39); + const auto [base_image, buffer, offset] = TrackCbuf( + image_register, global_code, static_cast<s64>(global_code.size())); + descriptor = registry.ObtainBindlessSampler(buffer, offset); + } + if (!descriptor) { + UNREACHABLE_MSG("Failed to obtain image descriptor"); + } + return *descriptor; + }(); + + const auto comp_mask = GetImageComponentMask(descriptor.format); + + switch (instr.suldst.GetStoreDataLayout()) { + case StoreType::Bits32: + case StoreType::Bits64: { + u32 indexer = 0; + u32 shifted_counter = 0; + Node value = Immediate(0); + for (u32 element = 0; element < 4; ++element) { + if (!IsComponentEnabled(comp_mask, element)) { + continue; + } + const auto component_type = GetComponentType(descriptor, element); + const auto component_size = GetComponentSize(descriptor.format, element); + MetaImage meta{image, {}, element}; + + auto [converted_value, is_signed] = GetComponentValue( + component_type, component_size, + Operation(OperationCode::ImageLoad, meta, GetCoordinates(type))); + + // shift element to correct position + const auto shifted = shifted_counter; + if (shifted > 0) { + converted_value = + SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, + std::move(converted_value), Immediate(shifted)); + } + shifted_counter += component_size; + + // add value into result + value = Operation(OperationCode::UBitwiseOr, value, std::move(converted_value)); + + // if we shifted enough for 1 byte -> we save it into temp + if (shifted_counter >= 32) { + SetTemporary(bb, indexer++, std::move(value)); + // reset counter and value to prepare pack next byte + value = Immediate(0); + shifted_counter = 0; + } + } + for (u32 i = 0; i < indexer; ++i) { + SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); + } + break; + } + default: + UNREACHABLE(); + break; } - MetaImage meta{image, {}, element}; - Node value = Operation(OperationCode::ImageLoad, meta, GetCoordinates(type)); - SetTemporary(bb, indexer++, std::move(value)); - } - for (u32 i = 0; i < indexer; ++i) { - SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); } break; } diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp index b5fbc4d58..b8f63922f 100644 --- a/src/video_core/shader/decode/memory.cpp +++ b/src/video_core/shader/decode/memory.cpp @@ -19,7 +19,6 @@ namespace VideoCommon::Shader { using Tegra::Shader::AtomicOp; using Tegra::Shader::AtomicType; using Tegra::Shader::Attribute; -using Tegra::Shader::GlobalAtomicOp; using Tegra::Shader::GlobalAtomicType; using Tegra::Shader::Instruction; using Tegra::Shader::OpCode; @@ -28,6 +27,31 @@ using Tegra::Shader::StoreType; namespace { +Node GetAtomOperation(AtomicOp op, bool is_signed, Node memory, Node data) { + const OperationCode operation_code = [op] { + switch (op) { + case AtomicOp::Add: + return OperationCode::AtomicIAdd; + case AtomicOp::Min: + return OperationCode::AtomicIMin; + case AtomicOp::Max: + return OperationCode::AtomicIMax; + case AtomicOp::And: + return OperationCode::AtomicIAnd; + case AtomicOp::Or: + return OperationCode::AtomicIOr; + case AtomicOp::Xor: + return OperationCode::AtomicIXor; + case AtomicOp::Exch: + return OperationCode::AtomicIExchange; + default: + UNIMPLEMENTED_MSG("op={}", static_cast<int>(op)); + return OperationCode::AtomicIAdd; + } + }(); + return SignedOperation(operation_code, is_signed, std::move(memory), std::move(data)); +} + bool IsUnaligned(Tegra::Shader::UniformType uniform_type) { return uniform_type == Tegra::Shader::UniformType::UnsignedByte || uniform_type == Tegra::Shader::UniformType::UnsignedShort; @@ -363,10 +387,13 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { break; } case OpCode::Id::ATOM: { - UNIMPLEMENTED_IF_MSG(instr.atom.operation != GlobalAtomicOp::Add, "operation={}", - static_cast<int>(instr.atom.operation.Value())); - UNIMPLEMENTED_IF_MSG(instr.atom.type != GlobalAtomicType::S32, "type={}", - static_cast<int>(instr.atom.type.Value())); + UNIMPLEMENTED_IF_MSG(instr.atom.operation == AtomicOp::Inc || + instr.atom.operation == AtomicOp::Dec || + instr.atom.operation == AtomicOp::SafeAdd, + "operation={}", static_cast<int>(instr.atom.operation.Value())); + UNIMPLEMENTED_IF_MSG(instr.atom.type == GlobalAtomicType::S64 || + instr.atom.type == GlobalAtomicType::U64, + "type={}", static_cast<int>(instr.atom.type.Value())); const auto [real_address, base_address, descriptor] = TrackGlobalMemory(bb, instr, true, true); @@ -375,25 +402,29 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { break; } + const bool is_signed = + instr.atoms.type == AtomicType::S32 || instr.atoms.type == AtomicType::S64; Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor); - Node value = Operation(OperationCode::AtomicAdd, std::move(gmem), GetRegister(instr.gpr20)); + Node value = GetAtomOperation(static_cast<AtomicOp>(instr.atom.operation), is_signed, gmem, + GetRegister(instr.gpr20)); SetRegister(bb, instr.gpr0, std::move(value)); break; } case OpCode::Id::ATOMS: { - UNIMPLEMENTED_IF_MSG(instr.atoms.operation != AtomicOp::Add, "operation={}", - static_cast<int>(instr.atoms.operation.Value())); - UNIMPLEMENTED_IF_MSG(instr.atoms.type != AtomicType::U32, "type={}", - static_cast<int>(instr.atoms.type.Value())); - + UNIMPLEMENTED_IF_MSG(instr.atoms.operation == AtomicOp::Inc || + instr.atoms.operation == AtomicOp::Dec, + "operation={}", static_cast<int>(instr.atoms.operation.Value())); + UNIMPLEMENTED_IF_MSG(instr.atoms.type == AtomicType::S64 || + instr.atoms.type == AtomicType::U64, + "type={}", static_cast<int>(instr.atoms.type.Value())); + const bool is_signed = + instr.atoms.type == AtomicType::S32 || instr.atoms.type == AtomicType::S64; const s32 offset = instr.atoms.GetImmediateOffset(); Node address = GetRegister(instr.gpr8); address = Operation(OperationCode::IAdd, std::move(address), Immediate(offset)); - - Node memory = GetSharedMemory(std::move(address)); - Node data = GetRegister(instr.gpr20); - - Node value = Operation(OperationCode::AtomicAdd, std::move(memory), std::move(data)); + Node value = + GetAtomOperation(static_cast<AtomicOp>(instr.atoms.operation), is_signed, + GetSharedMemory(std::move(address)), GetRegister(instr.gpr20)); SetRegister(bb, instr.gpr0, std::move(value)); break; } diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp index 4944e9d69..d4f95b18c 100644 --- a/src/video_core/shader/decode/other.cpp +++ b/src/video_core/shader/decode/other.cpp @@ -11,12 +11,17 @@ namespace VideoCommon::Shader { +using std::move; using Tegra::Shader::ConditionCode; using Tegra::Shader::Instruction; +using Tegra::Shader::IpaInterpMode; using Tegra::Shader::OpCode; +using Tegra::Shader::PixelImap; using Tegra::Shader::Register; using Tegra::Shader::SystemVariable; +using Index = Tegra::Shader::Attribute::Index; + u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); @@ -66,18 +71,24 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { bb.push_back(Operation(OperationCode::Discard)); break; } - case OpCode::Id::MOV_SYS: { + case OpCode::Id::S2R: { const Node value = [this, instr] { switch (instr.sys20) { case SystemVariable::LaneId: - LOG_WARNING(HW_GPU, "MOV_SYS instruction with LaneId is incomplete"); + LOG_WARNING(HW_GPU, "S2R instruction with LaneId is incomplete"); return Immediate(0U); case SystemVariable::InvocationId: return Operation(OperationCode::InvocationId); case SystemVariable::Ydirection: return Operation(OperationCode::YNegate); case SystemVariable::InvocationInfo: - LOG_WARNING(HW_GPU, "MOV_SYS instruction with InvocationInfo is incomplete"); + LOG_WARNING(HW_GPU, "S2R instruction with InvocationInfo is incomplete"); + return Immediate(0U); + case SystemVariable::WscaleFactorXY: + UNIMPLEMENTED_MSG("S2R WscaleFactorXY is not implemented"); + return Immediate(0U); + case SystemVariable::WscaleFactorZ: + UNIMPLEMENTED_MSG("S2R WscaleFactorZ is not implemented"); return Immediate(0U); case SystemVariable::Tid: { Node value = Immediate(0); @@ -213,27 +224,28 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { } case OpCode::Id::IPA: { const bool is_physical = instr.ipa.idx && instr.gpr8.Value() != 0xff; - const auto attribute = instr.attribute.fmt28; - const Tegra::Shader::IpaMode input_mode{instr.ipa.interp_mode.Value(), - instr.ipa.sample_mode.Value()}; + const Index index = attribute.index; Node value = is_physical ? GetPhysicalInputAttribute(instr.gpr8) - : GetInputAttribute(attribute.index, attribute.element); - const Tegra::Shader::Attribute::Index index = attribute.index.Value(); - const bool is_generic = index >= Tegra::Shader::Attribute::Index::Attribute_0 && - index <= Tegra::Shader::Attribute::Index::Attribute_31; - if (is_generic || is_physical) { - // TODO(Blinkhawk): There are cases where a perspective attribute use PASS. - // In theory by setting them as perspective, OpenGL does the perspective correction. - // A way must figured to reverse the last step of it. - if (input_mode.interpolation_mode == Tegra::Shader::IpaInterpMode::Multiply) { - value = Operation(OperationCode::FMul, PRECISE, value, GetRegister(instr.gpr20)); + : GetInputAttribute(index, attribute.element); + + // Code taken from Ryujinx. + if (index >= Index::Attribute_0 && index <= Index::Attribute_31) { + const u32 location = static_cast<u32>(index) - static_cast<u32>(Index::Attribute_0); + if (header.ps.GetPixelImap(location) == PixelImap::Perspective) { + Node position_w = GetInputAttribute(Index::Position, 3); + value = Operation(OperationCode::FMul, move(value), move(position_w)); } } - value = GetSaturatedFloat(value, instr.ipa.saturate); - SetRegister(bb, instr.gpr0, value); + if (instr.ipa.interp_mode == IpaInterpMode::Multiply) { + value = Operation(OperationCode::FMul, move(value), GetRegister(instr.gpr20)); + } + + value = GetSaturatedFloat(move(value), instr.ipa.saturate); + + SetRegister(bb, instr.gpr0, move(value)); break; } case OpCode::Id::OUT_R: { diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h index a1828546e..5fcc9da60 100644 --- a/src/video_core/shader/node.h +++ b/src/video_core/shader/node.h @@ -162,7 +162,21 @@ enum class OperationCode { AtomicImageXor, /// (MetaImage, int[N] coords) -> void AtomicImageExchange, /// (MetaImage, int[N] coords) -> void - AtomicAdd, /// (memory, {u}int) -> {u}int + AtomicUExchange, /// (memory, uint) -> uint + AtomicUAdd, /// (memory, uint) -> uint + AtomicUMin, /// (memory, uint) -> uint + AtomicUMax, /// (memory, uint) -> uint + AtomicUAnd, /// (memory, uint) -> uint + AtomicUOr, /// (memory, uint) -> uint + AtomicUXor, /// (memory, uint) -> uint + + AtomicIExchange, /// (memory, int) -> int + AtomicIAdd, /// (memory, int) -> int + AtomicIMin, /// (memory, int) -> int + AtomicIMax, /// (memory, int) -> int + AtomicIAnd, /// (memory, int) -> int + AtomicIOr, /// (memory, int) -> int + AtomicIXor, /// (memory, int) -> int Branch, /// (uint branch_target) -> void BranchIndirect, /// (uint branch_target) -> void diff --git a/src/video_core/shader/node_helper.cpp b/src/video_core/shader/node_helper.cpp index 76c56abb5..7bf4ff387 100644 --- a/src/video_core/shader/node_helper.cpp +++ b/src/video_core/shader/node_helper.cpp @@ -86,6 +86,20 @@ OperationCode SignedToUnsignedCode(OperationCode operation_code, bool is_signed) return OperationCode::LogicalUNotEqual; case OperationCode::LogicalIGreaterEqual: return OperationCode::LogicalUGreaterEqual; + case OperationCode::AtomicIExchange: + return OperationCode::AtomicUExchange; + case OperationCode::AtomicIAdd: + return OperationCode::AtomicUAdd; + case OperationCode::AtomicIMin: + return OperationCode::AtomicUMin; + case OperationCode::AtomicIMax: + return OperationCode::AtomicUMax; + case OperationCode::AtomicIAnd: + return OperationCode::AtomicUAnd; + case OperationCode::AtomicIOr: + return OperationCode::AtomicUOr; + case OperationCode::AtomicIXor: + return OperationCode::AtomicUXor; case OperationCode::INegate: UNREACHABLE_MSG("Can't negate an unsigned integer"); return {}; diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp index baf7188d2..8852c8a1b 100644 --- a/src/video_core/shader/shader_ir.cpp +++ b/src/video_core/shader/shader_ir.cpp @@ -359,6 +359,9 @@ Node ShaderIR::GetConditionCode(Tegra::Shader::ConditionCode cc) const { switch (cc) { case Tegra::Shader::ConditionCode::NEU: return GetInternalFlag(InternalFlag::Zero, true); + case Tegra::Shader::ConditionCode::FCSM_TR: + UNIMPLEMENTED_MSG("EXIT.FCSM_TR is not implemented"); + return MakeNode<PredicateNode>(Pred::NeverExecute, false); default: UNIMPLEMENTED_MSG("Unimplemented condition code: {}", static_cast<u32>(cc)); return MakeNode<PredicateNode>(Pred::NeverExecute, false); diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index 0f1ebef1b..c6e7bdf50 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h @@ -312,6 +312,10 @@ private: /// Conditionally saturates a half float pair Node GetSaturatedHalfFloat(Node value, bool saturate = true); + /// Get image component value by type and size + std::pair<Node, bool> GetComponentValue(Tegra::Texture::ComponentType component_type, + u32 component_size, Node original_value); + /// Returns a predicate comparing two floats Node GetPredicateComparisonFloat(Tegra::Shader::PredCondition condition, Node op_a, Node op_b); /// Returns a predicate comparing two integers |