// Copyright 2020 yuzu Emulator Project // Licensed under GPLv2 or any later version // Refer to the license.txt file included. #include #include #include #include #include #include #include #include #include "common/alignment.h" #include "common/assert.h" #include "common/common_types.h" #include "video_core/renderer_opengl/gl_arb_decompiler.h" #include "video_core/renderer_opengl/gl_device.h" #include "video_core/shader/registry.h" #include "video_core/shader/shader_ir.h" // Predicates in the decompiled code follow the convention that -1 means true and 0 means false. // GLASM lacks booleans, so they have to be implemented as integers. // Using -1 for true is useful because both CMP.S and NOT.U can negate it, and CMP.S can be used to // select between two values, because -1 will be evaluated as true and 0 as false. namespace OpenGL { namespace { using Tegra::Engines::ShaderType; using Tegra::Shader::Attribute; using Tegra::Shader::PixelImap; using Tegra::Shader::Register; using namespace VideoCommon::Shader; using Operation = const OperationNode&; constexpr std::array INTERNAL_FLAG_NAMES = {"ZERO", "SIGN", "CARRY", "OVERFLOW"}; char Swizzle(std::size_t component) { static constexpr std::string_view SWIZZLE{"xyzw"}; return SWIZZLE.at(component); } constexpr bool IsGenericAttribute(Attribute::Index index) { return index >= Attribute::Index::Attribute_0 && index <= Attribute::Index::Attribute_31; } u32 GetGenericAttributeIndex(Attribute::Index index) { ASSERT(IsGenericAttribute(index)); return static_cast(index) - static_cast(Attribute::Index::Attribute_0); } std::string_view Modifiers(Operation operation) { const auto meta = std::get_if(&operation.GetMeta()); if (meta && meta->precise) { return ".PREC"; } return ""; } std::string_view GetInputFlags(PixelImap attribute) { switch (attribute) { case PixelImap::Perspective: return ""; case PixelImap::Constant: return "FLAT "; case PixelImap::ScreenLinear: return "NOPERSPECTIVE "; case PixelImap::Unused: break; } UNIMPLEMENTED_MSG("Unknown attribute usage index={}", attribute); return {}; } std::string_view ImageType(Tegra::Shader::ImageType image_type) { switch (image_type) { case Tegra::Shader::ImageType::Texture1D: return "1D"; case Tegra::Shader::ImageType::TextureBuffer: return "BUFFER"; case Tegra::Shader::ImageType::Texture1DArray: return "ARRAY1D"; case Tegra::Shader::ImageType::Texture2D: return "2D"; case Tegra::Shader::ImageType::Texture2DArray: return "ARRAY2D"; case Tegra::Shader::ImageType::Texture3D: return "3D"; } UNREACHABLE(); return {}; } std::string_view StackName(MetaStackClass stack) { switch (stack) { case MetaStackClass::Ssy: return "SSY"; case MetaStackClass::Pbk: return "PBK"; } UNREACHABLE(); return ""; }; std::string_view PrimitiveDescription(Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology topology) { switch (topology) { case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Points: return "POINTS"; case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Lines: case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LineStrip: return "LINES"; case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LinesAdjacency: case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LineStripAdjacency: return "LINES_ADJACENCY"; case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Triangles: case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TriangleStrip: case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TriangleFan: return "TRIANGLES"; case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TrianglesAdjacency: case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TriangleStripAdjacency: return "TRIANGLES_ADJACENCY"; default: UNIMPLEMENTED_MSG("topology={}", topology); return "POINTS"; } } std::string_view TopologyName(Tegra::Shader::OutputTopology topology) { switch (topology) { case Tegra::Shader::OutputTopology::PointList: return "POINTS"; case Tegra::Shader::OutputTopology::LineStrip: return "LINE_STRIP"; case Tegra::Shader::OutputTopology::TriangleStrip: return "TRIANGLE_STRIP"; default: UNIMPLEMENTED_MSG("Unknown output topology: {}", topology); return "points"; } } std::string_view StageInputName(ShaderType stage) { switch (stage) { case ShaderType::Vertex: case ShaderType::Geometry: return "vertex"; case ShaderType::Fragment: return "fragment"; case ShaderType::Compute: return "invocation"; default: UNREACHABLE(); return ""; } } std::string TextureType(const MetaTexture& meta) { if (meta.sampler.is_buffer) { return "BUFFER"; } std::string type; if (meta.sampler.is_shadow) { type += "SHADOW"; } if (meta.sampler.is_array) { type += "ARRAY"; } type += [&meta] { switch (meta.sampler.type) { case Tegra::Shader::TextureType::Texture1D: return "1D"; case Tegra::Shader::TextureType::Texture2D: return "2D"; case Tegra::Shader::TextureType::Texture3D: return "3D"; case Tegra::Shader::TextureType::TextureCube: return "CUBE"; } UNREACHABLE(); return "2D"; }(); return type; } class ARBDecompiler final { public: explicit ARBDecompiler(const Device& device_, const ShaderIR& ir_, const Registry& registry_, ShaderType stage_, std::string_view identifier); std::string Code() const { return shader_source; } private: void DefineGlobalMemory(); void DeclareHeader(); void DeclareVertex(); void DeclareGeometry(); void DeclareFragment(); void DeclareCompute(); void DeclareInputAttributes(); void DeclareOutputAttributes(); void DeclareLocalMemory(); void DeclareGlobalMemory(); void DeclareConstantBuffers(); void DeclareRegisters(); void DeclareTemporaries(); void DeclarePredicates(); void DeclareInternalFlags(); void InitializeVariables(); void DecompileAST(); void DecompileBranchMode(); void VisitAST(const ASTNode& node); std::string VisitExpression(const Expr& node); void VisitBlock(const NodeBlock& bb); std::string Visit(const Node& node); std::tuple BuildCoords(Operation); std::string BuildAoffi(Operation); std::string GlobalMemoryPointer(const GmemNode& gmem); void Exit(); std::string Assign(Operation); std::string Select(Operation); std::string FClamp(Operation); std::string FCastHalf0(Operation); std::string FCastHalf1(Operation); std::string FSqrt(Operation); std::string FSwizzleAdd(Operation); std::string HAdd2(Operation); std::string HMul2(Operation); std::string HFma2(Operation); std::string HAbsolute(Operation); std::string HNegate(Operation); std::string HClamp(Operation); std::string HCastFloat(Operation); std::string HUnpack(Operation); std::string HMergeF32(Operation); std::string HMergeH0(Operation); std::string HMergeH1(Operation); std::string HPack2(Operation); std::string LogicalAssign(Operation); std::string LogicalPick2(Operation); std::string LogicalAnd2(Operation); std::string FloatOrdered(Operation); std::string FloatUnordered(Operation); std::string LogicalAddCarry(Operation); std::string Texture(Operation); std::string TextureGather(Operation); std::string TextureQueryDimensions(Operation); std::string TextureQueryLod(Operation); std::string TexelFetch(Operation); std::string TextureGradient(Operation); std::string ImageLoad(Operation); std::string ImageStore(Operation); std::string Branch(Operation); std::string BranchIndirect(Operation); std::string PushFlowStack(Operation); std::string PopFlowStack(Operation); std::string Exit(Operation); std::string Discard(Operation); std::string EmitVertex(Operation); std::string EndPrimitive(Operation); std::string InvocationId(Operation); std::string YNegate(Operation); std::string ThreadId(Operation); std::string ShuffleIndexed(Operation); std::string Barrier(Operation); std::string MemoryBarrierGroup(Operation); std::string MemoryBarrierGlobal(Operation); template std::string Unary(Operation operation) { std::string temporary = AllocTemporary(); AddLine("{}{} {}, {};", op, Modifiers(operation), temporary, Visit(operation[0])); return temporary; } template std::string Binary(Operation operation) { std::string temporary = AllocTemporary(); AddLine("{}{} {}, {}, {};", op, Modifiers(operation), temporary, Visit(operation[0]), Visit(operation[1])); return temporary; } template std::string Trinary(Operation operation) { std::string temporary = AllocTemporary(); AddLine("{}{} {}, {}, {}, {};", op, Modifiers(operation), temporary, Visit(operation[0]), Visit(operation[1]), Visit(operation[2])); return temporary; } template std::string FloatComparison(Operation operation) { std::string temporary = AllocTemporary(); AddLine("TRUNC.U.CC RC.x, {};", Binary(operation)); AddLine("MOV.S {}, 0;", temporary); AddLine("MOV.S {} (NE.x), -1;", temporary); const std::string op_a = Visit(operation[0]); const std::string op_b = Visit(operation[1]); if constexpr (unordered) { AddLine("SNE.F RC.x, {}, {};", op_a, op_a); AddLine("TRUNC.U.CC RC.x, RC.x;"); AddLine("MOV.S {} (NE.x), -1;", temporary); AddLine("SNE.F RC.x, {}, {};", op_b, op_b); AddLine("TRUNC.U.CC RC.x, RC.x;"); AddLine("MOV.S {} (NE.x), -1;", temporary); } else if (op == SNE_F) { AddLine("SNE.F RC.x, {}, {};", op_a, op_a); AddLine("TRUNC.U.CC RC.x, RC.x;"); AddLine("MOV.S {} (NE.x), 0;", temporary); AddLine("SNE.F RC.x, {}, {};", op_b, op_b); AddLine("TRUNC.U.CC RC.x, RC.x;"); AddLine("MOV.S {} (NE.x), 0;", temporary); } return temporary; } template std::string HalfComparison(Operation operation) { std::string tmp1 = AllocVectorTemporary(); const std::string tmp2 = AllocVectorTemporary(); const std::string op_a = Visit(operation[0]); const std::string op_b = Visit(operation[1]); AddLine("UP2H.F {}, {};", tmp1, op_a); AddLine("UP2H.F {}, {};", tmp2, op_b); AddLine("{} {}, {}, {};", op, tmp1, tmp1, tmp2); AddLine("TRUNC.U.CC RC.xy, {};", tmp1); AddLine("MOV.S {}.xy, {{0, 0, 0, 0}};", tmp1); AddLine("MOV.S {}.x (NE.x), -1;", tmp1); AddLine("MOV.S {}.y (NE.y), -1;", tmp1); if constexpr (is_nan) { AddLine("MOVC.F RC.x, {};", op_a); AddLine("MOV.S {}.x (NAN.x), -1;", tmp1); AddLine("MOVC.F RC.x, {};", op_b); AddLine("MOV.S {}.y (NAN.x), -1;", tmp1); } return tmp1; } template std::string AtomicImage(Operation operation) { const auto& meta = std::get(operation.GetMeta()); const u32 image_id = device.GetBaseBindings(stage).image + meta.image.index; const std::size_t num_coords = operation.GetOperandsCount(); const std::size_t num_values = meta.values.size(); const std::string coord = AllocVectorTemporary(); const std::string value = AllocVectorTemporary(); for (std::size_t i = 0; i < num_coords; ++i) { AddLine("MOV.S {}.{}, {};", coord, Swizzle(i), Visit(operation[i])); } for (std::size_t i = 0; i < num_values; ++i) { AddLine("MOV.F {}.{}, {};", value, Swizzle(i), Visit(meta.values[i])); } AddLine("ATOMIM.{}.{} {}.x, {}, {}, image[{}], {};", op, type, coord, value, coord, image_id, ImageType(meta.image.type)); return fmt::format("{}.x", coord); } template std::string Atomic(Operation operation) { std::string temporary = AllocTemporary(); std::string address; std::string_view opname; bool robust = false; if (const auto gmem = std::get_if(&*operation[0])) { address = GlobalMemoryPointer(*gmem); opname = "ATOM"; robust = true; } else if (const auto smem = std::get_if(&*operation[0])) { address = fmt::format("shared_mem[{}]", Visit(smem->GetAddress())); opname = "ATOMS"; } else { UNREACHABLE(); return "{0, 0, 0, 0}"; } if (robust) { AddLine("IF NE.x;"); } AddLine("{}.{}.{} {}, {}, {};", opname, op, type, temporary, Visit(operation[1]), address); if (robust) { AddLine("ELSE;"); AddLine("MOV.S {}, 0;", temporary); AddLine("ENDIF;"); } return temporary; } template std::string Negate(Operation operation) { std::string temporary = AllocTemporary(); if constexpr (type == 'F') { AddLine("MOV.F32 {}, -{};", temporary, Visit(operation[0])); } else { AddLine("MOV.{} {}, -{};", type, temporary, Visit(operation[0])); } return temporary; } template std::string Absolute(Operation operation) { std::string temporary = AllocTemporary(); AddLine("MOV.{} {}, |{}|;", type, temporary, Visit(operation[0])); return temporary; } template std::string BitfieldInsert(Operation operation) { const std::string temporary = AllocVectorTemporary(); AddLine("MOV.{} {}.x, {};", type, temporary, Visit(operation[3])); AddLine("MOV.{} {}.y, {};", type, temporary, Visit(operation[2])); AddLine("BFI.{} {}.x, {}, {}, {};", type, temporary, temporary, Visit(operation[1]), Visit(operation[0])); return fmt::format("{}.x", temporary); } template std::string BitfieldExtract(Operation operation) { const std::string temporary = AllocVectorTemporary(); AddLine("MOV.{} {}.x, {};", type, temporary, Visit(operation[2])); AddLine("MOV.{} {}.y, {};", type, temporary, Visit(operation[1])); AddLine("BFE.{} {}.x, {}, {};", type, temporary, temporary, Visit(operation[0])); return fmt::format("{}.x", temporary); } template std::string LocalInvocationId(Operation) { return fmt::format("invocation.localid.{}", swizzle); } template std::string WorkGroupId(Operation) { return fmt::format("invocation.groupid.{}", swizzle); } template std::string ThreadMask(Operation) { return fmt::format("{}.thread{}{}mask", StageInputName(stage), c1, c2); } template void AddExpression(std::string_view text, Args&&... args) { shader_source += fmt::format(text, std::forward(args)...); } template void AddLine(std::string_view text, Args&&... args) { AddExpression(text, std::forward(args)...); shader_source += '\n'; } std::string AllocLongVectorTemporary() { max_long_temporaries = std::max(max_long_temporaries, num_long_temporaries + 1); return fmt::format("L{}", num_long_temporaries++); } std::string AllocLongTemporary() { return fmt::format("{}.x", AllocLongVectorTemporary()); } std::string AllocVectorTemporary() { max_temporaries = std::max(max_temporaries, num_temporaries + 1); return fmt::format("T{}", num_temporaries++); } std::string AllocTemporary() { return fmt::format("{}.x", AllocVectorTemporary()); } void ResetTemporaries() noexcept { num_temporaries = 0; num_long_temporaries = 0; } const Device& device; const ShaderIR& ir; const Registry& registry; const ShaderType stage; std::size_t num_temporaries = 0; std::size_t max_temporaries = 0; std::size_t num_long_temporaries = 0; std::size_t max_long_temporaries = 0; std::map global_memory_names; std::string shader_source; static constexpr std::string_view ADD_F32 = "ADD.F32"; static constexpr std::string_view ADD_S = "ADD.S"; static constexpr std::string_view ADD_U = "ADD.U"; static constexpr std::string_view MUL_F32 = "MUL.F32"; static constexpr std::string_view MUL_S = "MUL.S"; static constexpr std::string_view MUL_U = "MUL.U"; static constexpr std::string_view DIV_F32 = "DIV.F32"; static constexpr std::string_view DIV_S = "DIV.S"; static constexpr std::string_view DIV_U = "DIV.U"; static constexpr std::string_view MAD_F32 = "MAD.F32"; static constexpr std::string_view RSQ_F32 = "RSQ.F32"; static constexpr std::string_view COS_F32 = "COS.F32"; static constexpr std::string_view SIN_F32 = "SIN.F32"; static constexpr std::string_view EX2_F32 = "EX2.F32"; static constexpr std::string_view LG2_F32 = "LG2.F32"; static constexpr std::string_view SLT_F = "SLT.F32"; static constexpr std::string_view SLT_S = "SLT.S"; static constexpr std::string_view SLT_U = "SLT.U"; static constexpr std::string_view SEQ_F = "SEQ.F32"; static constexpr std::string_view SEQ_S = "SEQ.S"; static constexpr std::string_view SEQ_U = "SEQ.U"; static constexpr std::string_view SLE_F = "SLE.F32"; static constexpr std::string_view SLE_S = "SLE.S"; static constexpr std::string_view SLE_U = "SLE.U"; static constexpr std::string_view SGT_F = "SGT.F32"; static constexpr std::string_view SGT_S = "SGT.S"; static constexpr std::string_view SGT_U = "SGT.U"; static constexpr std::string_view SNE_F = "SNE.F32"; static constexpr std::string_view SNE_S = "SNE.S"; static constexpr std::string_view SNE_U = "SNE.U"; static constexpr std::string_view SGE_F = "SGE.F32"; static constexpr std::string_view SGE_S = "SGE.S"; static constexpr std::string_view SGE_U = "SGE.U"; static constexpr std::string_view AND_S = "AND.S"; static constexpr std::string_view AND_U = "AND.U"; static constexpr std::string_view TRUNC_F = "TRUNC.F"; static constexpr std::string_view TRUNC_S = "TRUNC.S"; static constexpr std::string_view TRUNC_U = "TRUNC.U"; static constexpr std::string_view SHL_S = "SHL.S"; static constexpr std::string_view SHL_U = "SHL.U"; static constexpr std::string_view SHR_S = "SHR.S"; static constexpr std::string_view SHR_U = "SHR.U"; static constexpr std::string_view OR_S = "OR.S"; static constexpr std::string_view OR_U = "OR.U"; static constexpr std::string_view XOR_S = "XOR.S"; static constexpr std::string_view XOR_U = "XOR.U"; static constexpr std::string_view NOT_S = "NOT.S"; static constexpr std::string_view NOT_U = "NOT.U"; static constexpr std::string_view BTC_S = "BTC.S"; static constexpr std::string_view BTC_U = "BTC.U"; static constexpr std::string_view BTFM_S = "BTFM.S"; static constexpr std::string_view BTFM_U = "BTFM.U"; static constexpr std::string_view ROUND_F = "ROUND.F"; static constexpr std::string_view CEIL_F = "CEIL.F"; static constexpr std::string_view FLR_F = "FLR.F"; static constexpr std::string_view I2F_S = "I2F.S"; static constexpr std::string_view I2F_U = "I2F.U"; static constexpr std::string_view MIN_F = "MIN.F"; static constexpr std::string_view MIN_S = "MIN.S"; static constexpr std::string_view MIN_U = "MIN.U"; static constexpr std::string_view MAX_F = "MAX.F"; static constexpr std::string_view MAX_S = "MAX.S"; static constexpr std::string_view MAX_U = "MAX.U"; static constexpr std::string_view MOV_U = "MOV.U"; static constexpr std::string_view TGBALLOT_U = "TGBALLOT.U"; static constexpr std::string_view TGALL_U = "TGALL.U"; static constexpr std::string_view TGANY_U = "TGANY.U"; static constexpr std::string_view TGEQ_U = "TGEQ.U"; static constexpr std::string_view EXCH = "EXCH"; static constexpr std::string_view ADD = "ADD"; static constexpr std::string_view MIN = "MIN"; static constexpr std::string_view MAX = "MAX"; static constexpr std::string_view AND = "AND"; static constexpr std::string_view OR = "OR"; static constexpr std::string_view XOR = "XOR"; static constexpr std::string_view U32 = "U32"; static constexpr std::string_view S32 = "S32"; static constexpr std::size_t NUM_ENTRIES = static_cast(OperationCode::Amount); using DecompilerType = std::string (ARBDecompiler::*)(Operation); static constexpr std::array OPERATION_DECOMPILERS = { &ARBDecompiler::Assign, &ARBDecompiler::Select, &ARBDecompiler::Binary, &ARBDecompiler::Binary, &ARBDecompiler::Binary, &ARBDecompiler::Trinary, &ARBDecompiler::Negate<'F'>, &ARBDecompiler::Absolute<'F'>, &ARBDecompiler::FClamp, &ARBDecompiler::FCastHalf0, &ARBDecompiler::FCastHalf1, &ARBDecompiler::Binary, &ARBDecompiler::Binary, &ARBDecompiler::Unary, &ARBDecompiler::Unary, &ARBDecompiler::Unary, &ARBDecompiler::Unary, &ARBDecompiler::Unary, &ARBDecompiler::FSqrt, &ARBDecompiler::Unary, &ARBDecompiler::Unary, &ARBDecompiler::Unary, &ARBDecompiler::Unary, &ARBDecompiler::Unary, &ARBDecompiler::Unary, &ARBDecompiler::FSwizzleAdd, &ARBDecompiler::Binary, &ARBDecompiler::Binary, &ARBDecompiler::Binary, &ARBDecompiler::Negate<'S'>, &ARBDecompiler::Absolute<'S'>, &ARBDecompiler::Binary, &ARBDecompiler::Binary, &ARBDecompiler::Unary, &ARBDecompiler::Unary, &ARBDecompiler::Binary, &ARBDecompiler::Binary, &ARBDecompiler::Binary, &ARBDecompiler::Binary, &ARBDecompiler::Binary, &ARBDecompiler::Binary, &ARBDecompiler::Unary, &ARBDecompiler::BitfieldInsert<'S'>, &ARBDecompiler::BitfieldExtract<'S'>, &ARBDecompiler::Unary, &ARBDecompiler::Unary, &ARBDecompiler::Binary, &ARBDecompiler::Binary, &ARBDecompiler::Binary, &ARBDecompiler::Binary, &ARBDecompiler::Binary, &ARBDecompiler::Unary, &ARBDecompiler::Unary, &ARBDecompiler::Binary, &ARBDecompiler::Binary, &ARBDecompiler::Binary, &ARBDecompiler::Binary, &ARBDecompiler::Binary, &ARBDecompiler::Binary, &ARBDecompiler::Unary, &ARBDecompiler::BitfieldInsert<'U'>, &ARBDecompiler::BitfieldExtract<'U'>, &ARBDecompiler::Unary, &ARBDecompiler::Unary, &ARBDecompiler::HAdd2, &ARBDecompiler::HMul2, &ARBDecompiler::HFma2, &ARBDecompiler::HAbsolute, &ARBDecompiler::HNegate, &ARBDecompiler::HClamp, &ARBDecompiler::HCastFloat, &ARBDecompiler::HUnpack, &ARBDecompiler::HMergeF32, &ARBDecompiler::HMergeH0, &ARBDecompiler::HMergeH1, &ARBDecompiler::HPack2, &ARBDecompiler::LogicalAssign, &ARBDecompiler::Binary, &ARBDecompiler::Binary, &ARBDecompiler::Binary, &ARBDecompiler::Unary, &ARBDecompiler::LogicalPick2, &ARBDecompiler::LogicalAnd2, &ARBDecompiler::FloatComparison, &ARBDecompiler::FloatComparison, &ARBDecompiler::FloatComparison, &ARBDecompiler::FloatComparison, &ARBDecompiler::FloatComparison, &ARBDecompiler::FloatComparison, &ARBDecompiler::FloatOrdered, &ARBDecompiler::FloatUnordered, &ARBDecompiler::FloatComparison, &ARBDecompiler::FloatComparison, &ARBDecompiler::FloatComparison, &ARBDecompiler::FloatComparison, &ARBDecompiler::FloatComparison, &ARBDecompiler::FloatComparison, &ARBDecompiler::Binary, &ARBDecompiler::Binary, &ARBDecompiler::Binary, &ARBDecompiler::Binary, &ARBDecompiler::Binary, &ARBDecompiler::Binary, &ARBDecompiler::Binary, &ARBDecompiler::Binary, &ARBDecompiler::Binary, &ARBDecompiler::Binary, &ARBDecompiler::Binary, &ARBDecompiler::Binary, &ARBDecompiler::LogicalAddCarry, &ARBDecompiler::HalfComparison, &ARBDecompiler::HalfComparison, &ARBDecompiler::HalfComparison, &ARBDecompiler::HalfComparison, &ARBDecompiler::HalfComparison, &ARBDecompiler::HalfComparison, &ARBDecompiler::HalfComparison, &ARBDecompiler::HalfComparison, &ARBDecompiler::HalfComparison, &ARBDecompiler::HalfComparison, &ARBDecompiler::HalfComparison, &ARBDecompiler::HalfComparison, &ARBDecompiler::Texture, &ARBDecompiler::Texture, &ARBDecompiler::TextureGather, &ARBDecompiler::TextureQueryDimensions, &ARBDecompiler::TextureQueryLod, &ARBDecompiler::TexelFetch, &ARBDecompiler::TextureGradient, &ARBDecompiler::ImageLoad, &ARBDecompiler::ImageStore, &ARBDecompiler::AtomicImage, &ARBDecompiler::AtomicImage, &ARBDecompiler::AtomicImage, &ARBDecompiler::AtomicImage, &ARBDecompiler::AtomicImage, &ARBDecompiler::Atomic, &ARBDecompiler::Atomic, &ARBDecompiler::Atomic, &ARBDecompiler::Atomic, &ARBDecompiler::Atomic, &ARBDecompiler::Atomic, &ARBDecompiler::Atomic, &ARBDecompiler::Atomic, &ARBDecompiler::Atomic, &ARBDecompiler::Atomic, &ARBDecompiler::Atomic, &ARBDecompiler::Atomic, &ARBDecompiler::Atomic, &ARBDecompiler::Atomic, &ARBDecompiler::Atomic, &ARBDecompiler::Atomic, &ARBDecompiler::Atomic, &ARBDecompiler::Atomic, &ARBDecompiler::Atomic, &ARBDecompiler::Atomic, &ARBDecompiler::Atomic, &ARBDecompiler::Atomic, &ARBDecompiler::Atomic, &ARBDecompiler::Atomic, &ARBDecompiler::Atomic, &ARBDecompiler::Atomic, &ARBDecompiler::Branch, &ARBDecompiler::BranchIndirect, &ARBDecompiler::PushFlowStack, &ARBDecompiler::PopFlowStack, &ARBDecompiler::Exit, &ARBDecompiler::Discard, &ARBDecompiler::EmitVertex, &ARBDecompiler::EndPrimitive, &ARBDecompiler::InvocationId, &ARBDecompiler::YNegate, &ARBDecompiler::LocalInvocationId<'x'>, &ARBDecompiler::LocalInvocationId<'y'>, &ARBDecompiler::LocalInvocationId<'z'>, &ARBDecompiler::WorkGroupId<'x'>, &ARBDecompiler::WorkGroupId<'y'>, &ARBDecompiler::WorkGroupId<'z'>, &ARBDecompiler::Unary, &ARBDecompiler::Unary, &ARBDecompiler::Unary, &ARBDecompiler::Unary, &ARBDecompiler::ThreadId, &ARBDecompiler::ThreadMask<'e', 'q'>, &ARBDecompiler::ThreadMask<'g', 'e'>, &ARBDecompiler::ThreadMask<'g', 't'>, &ARBDecompiler::ThreadMask<'l', 'e'>, &ARBDecompiler::ThreadMask<'l', 't'>, &ARBDecompiler::ShuffleIndexed, &ARBDecompiler::Barrier, &ARBDecompiler::MemoryBarrierGroup, &ARBDecompiler::MemoryBarrierGlobal, }; }; ARBDecompiler::ARBDecompiler(const Device& device_, const ShaderIR& ir_, const Registry& registry_, ShaderType stage_, std::string_view identifier) : device{device_}, ir{ir_}, registry{registry_}, stage{stage_} { DefineGlobalMemory(); AddLine("TEMP RC;"); AddLine("TEMP FSWZA[4];"); AddLine("TEMP FSWZB[4];"); if (ir.IsDecompiled()) { DecompileAST(); } else { DecompileBranchMode(); } AddLine("END"); const std::string code = std::move(shader_source); DeclareHeader(); DeclareVertex(); DeclareGeometry(); DeclareFragment(); DeclareCompute(); DeclareInputAttributes(); DeclareOutputAttributes(); DeclareLocalMemory(); DeclareGlobalMemory(); DeclareConstantBuffers(); DeclareRegisters(); DeclareTemporaries(); DeclarePredicates(); DeclareInternalFlags(); shader_source += code; } std::string_view HeaderStageName(ShaderType stage) { switch (stage) { case ShaderType::Vertex: return "vp"; case ShaderType::Geometry: return "gp"; case ShaderType::Fragment: return "fp"; case ShaderType::Compute: return "cp"; default: UNREACHABLE(); return ""; } } void ARBDecompiler::DefineGlobalMemory() { u32 binding = 0; for (const auto& pair : ir.GetGlobalMemory()) { const GlobalMemoryBase base = pair.first; global_memory_names.emplace(base, binding); ++binding; } } void ARBDecompiler::DeclareHeader() { AddLine("!!NV{}5.0", HeaderStageName(stage)); // Enabling this allows us to cheat on some instructions like TXL with SHADOWARRAY2D AddLine("OPTION NV_internal;"); AddLine("OPTION NV_gpu_program_fp64;"); AddLine("OPTION NV_shader_thread_group;"); if (ir.UsesWarps() && device.HasWarpIntrinsics()) { AddLine("OPTION NV_shader_thread_shuffle;"); } if (stage == ShaderType::Vertex) { if (device.HasNvViewportArray2()) { AddLine("OPTION NV_viewport_array2;"); } } if (stage == ShaderType::Fragment) { AddLine("OPTION ARB_draw_buffers;"); } if (device.HasImageLoadFormatted()) { AddLine("OPTION EXT_shader_image_load_formatted;"); } } void ARBDecompiler::DeclareVertex() { if (stage != ShaderType::Vertex) { return; } AddLine("OUTPUT result_clip[] = {{ result.clip[0..7] }};"); } void ARBDecompiler::DeclareGeometry() { if (stage != ShaderType::Geometry) { return; } const auto& info = registry.GetGraphicsInfo(); const auto& header = ir.GetHeader(); AddLine("PRIMITIVE_IN {};", PrimitiveDescription(info.primitive_topology)); AddLine("PRIMITIVE_OUT {};", TopologyName(header.common3.output_topology)); AddLine("VERTICES_OUT {};", header.common4.max_output_vertices.Value()); AddLine("ATTRIB vertex_position = vertex.position;"); } void ARBDecompiler::DeclareFragment() { if (stage != ShaderType::Fragment) { return; } AddLine("OUTPUT result_color7 = result.color[7];"); AddLine("OUTPUT result_color6 = result.color[6];"); AddLine("OUTPUT result_color5 = result.color[5];"); AddLine("OUTPUT result_color4 = result.color[4];"); AddLine("OUTPUT result_color3 = result.color[3];"); AddLine("OUTPUT result_color2 = result.color[2];"); AddLine("OUTPUT result_color1 = result.color[1];"); AddLine("OUTPUT result_color0 = result.color;"); } void ARBDecompiler::DeclareCompute() { if (stage != ShaderType::Compute) { return; } const ComputeInfo& info = registry.GetComputeInfo(); AddLine("GROUP_SIZE {} {} {};", info.workgroup_size[0], info.workgroup_size[1], info.workgroup_size[2]); if (info.shared_memory_size_in_words == 0) { return; } const u32 limit = device.GetMaxComputeSharedMemorySize(); u32 size_in_bytes = info.shared_memory_size_in_words * 4; if (size_in_bytes > limit) { LOG_ERROR(Render_OpenGL, "Shared memory size {} is clamped to host's limit {}", size_in_bytes, limit); size_in_bytes = limit; } AddLine("SHARED_MEMORY {};", size_in_bytes); AddLine("SHARED shared_mem[] = {{program.sharedmem}};"); } void ARBDecompiler::DeclareInputAttributes() { if (stage == ShaderType::Compute) { return; } const std::string_view stage_name = StageInputName(stage); for (const auto attribute : ir.GetInputAttributes()) { if (!IsGenericAttribute(attribute)) { continue; } const u32 index = GetGenericAttributeIndex(attribute); std::string_view suffix; if (stage == ShaderType::Fragment) { const auto input_mode{ir.GetHeader().ps.GetPixelImap(index)}; if (input_mode == PixelImap::Unused) { return; } suffix = GetInputFlags(input_mode); } AddLine("{}ATTRIB in_attr{}[] = {{ {}.attrib[{}..{}] }};", suffix, index, stage_name, index, index); } } void ARBDecompiler::DeclareOutputAttributes() { if (stage == ShaderType::Compute) { return; } for (const auto attribute : ir.GetOutputAttributes()) { if (!IsGenericAttribute(attribute)) { continue; } const u32 index = GetGenericAttributeIndex(attribute); AddLine("OUTPUT out_attr{}[] = {{ result.attrib[{}..{}] }};", index, index, index); } } void ARBDecompiler::DeclareLocalMemory() { u64 size = 0; if (stage == ShaderType::Compute) { size = registry.GetComputeInfo().local_memory_size_in_words * 4ULL; } else { size = ir.GetHeader().GetLocalMemorySize(); } if (size == 0) { return; } const u64 element_count = Common::AlignUp(size, 4) / 4; AddLine("TEMP lmem[{}];", element_count); } void ARBDecompiler::DeclareGlobalMemory() { const size_t num_entries = ir.GetGlobalMemory().size(); if (num_entries > 0) { AddLine("PARAM c[{}] = {{ program.local[0..{}] }};", num_entries, num_entries - 1); } } void ARBDecompiler::DeclareConstantBuffers() { u32 binding = 0; for (const auto& cbuf : ir.GetConstantBuffers()) { AddLine("CBUFFER cbuf{}[] = {{ program.buffer[{}] }};", cbuf.first, binding); ++binding; } } void ARBDecompiler::DeclareRegisters() { for (const u32 gpr : ir.GetRegisters()) { AddLine("TEMP R{};", gpr); } } void ARBDecompiler::DeclareTemporaries() { for (std::size_t i = 0; i < max_temporaries; ++i) { AddLine("TEMP T{};", i); } for (std::size_t i = 0; i < max_long_temporaries; ++i) { AddLine("LONG TEMP L{};", i); } } void ARBDecompiler::DeclarePredicates() { for (const Tegra::Shader::Pred pred : ir.GetPredicates()) { AddLine("TEMP P{};", static_cast(pred)); } } void ARBDecompiler::DeclareInternalFlags() { for (const char* name : INTERNAL_FLAG_NAMES) { AddLine("TEMP {};", name); } } void ARBDecompiler::InitializeVariables() { AddLine("MOV.F32 FSWZA[0], -1;"); AddLine("MOV.F32 FSWZA[1], 1;"); AddLine("MOV.F32 FSWZA[2], -1;"); AddLine("MOV.F32 FSWZA[3], 0;"); AddLine("MOV.F32 FSWZB[0], -1;"); AddLine("MOV.F32 FSWZB[1], -1;"); AddLine("MOV.F32 FSWZB[2], 1;"); AddLine("MOV.F32 FSWZB[3], -1;"); if (stage == ShaderType::Vertex || stage == ShaderType::Geometry) { AddLine("MOV.F result.position, {{0, 0, 0, 1}};"); } for (const auto attribute : ir.GetOutputAttributes()) { if (!IsGenericAttribute(attribute)) { continue; } const u32 index = GetGenericAttributeIndex(attribute); AddLine("MOV.F result.attrib[{}], {{0, 0, 0, 1}};", index); } for (const u32 gpr : ir.GetRegisters()) { AddLine("MOV.F R{}, {{0, 0, 0, 0}};", gpr); } for (const Tegra::Shader::Pred pred : ir.GetPredicates()) { AddLine("MOV.U P{}, {{0, 0, 0, 0}};", static_cast(pred)); } } void ARBDecompiler::DecompileAST() { const u32 num_flow_variables = ir.GetASTNumVariables(); for (u32 i = 0; i < num_flow_variables; ++i) { AddLine("TEMP F{};", i); } for (u32 i = 0; i < num_flow_variables; ++i) { AddLine("MOV.U F{}, {{0, 0, 0, 0}};", i); } InitializeVariables(); VisitAST(ir.GetASTProgram()); } void ARBDecompiler::DecompileBranchMode() { static constexpr u32 FLOW_STACK_SIZE = 20; if (!ir.IsFlowStackDisabled()) { AddLine("TEMP SSY[{}];", FLOW_STACK_SIZE); AddLine("TEMP PBK[{}];", FLOW_STACK_SIZE); AddLine("TEMP SSY_TOP;"); AddLine("TEMP PBK_TOP;"); } AddLine("TEMP PC;"); if (!ir.IsFlowStackDisabled()) { AddLine("MOV.U SSY_TOP.x, 0;"); AddLine("MOV.U PBK_TOP.x, 0;"); } InitializeVariables(); const auto basic_block_end = ir.GetBasicBlocks().end(); auto basic_block_it = ir.GetBasicBlocks().begin(); const u32 first_address = basic_block_it->first; AddLine("MOV.U PC.x, {};", first_address); AddLine("REP;"); std::size_t num_blocks = 0; while (basic_block_it != basic_block_end) { const auto& [address, bb] = *basic_block_it; ++num_blocks; AddLine("SEQ.S.CC RC.x, PC.x, {};", address); AddLine("IF NE.x;"); VisitBlock(bb); ++basic_block_it; if (basic_block_it != basic_block_end) { const auto op = std::get_if(&*bb[bb.size() - 1]); if (!op || op->GetCode() != OperationCode::Branch) { const u32 next_address = basic_block_it->first; AddLine("MOV.U PC.x, {};", next_address); AddLine("CONT;"); } } AddLine("ELSE;"); } AddLine("RET;"); while (num_blocks--) { AddLine("ENDIF;"); } AddLine("ENDREP;"); } void ARBDecompiler::VisitAST(const ASTNode& node) { if (const auto ast = std::get_if(&*node->GetInnerData())) { for (ASTNode current = ast->nodes.GetFirst(); current; current = current->GetNext()) { VisitAST(current); } } else if (const auto if_then = std::get_if(&*node->GetInnerData())) { const std::string condition = VisitExpression(if_then->condition); ResetTemporaries(); AddLine("MOVC.U RC.x, {};", condition); AddLine("IF NE.x;"); for (ASTNode current = if_then->nodes.GetFirst(); current; current = current->GetNext()) { VisitAST(current); } AddLine("ENDIF;"); } else if (const auto if_else = std::get_if(&*node->GetInnerData())) { AddLine("ELSE;"); for (ASTNode current = if_else->nodes.GetFirst(); current; current = current->GetNext()) { VisitAST(current); } } else if (const auto decoded = std::get_if(&*node->GetInnerData())) { VisitBlock(decoded->nodes); } else if (const auto var_set = std::get_if(&*node->GetInnerData())) { AddLine("MOV.U F{}, {};", var_set->index, VisitExpression(var_set->condition)); ResetTemporaries(); } else if (const auto do_while = std::get_if(&*node->GetInnerData())) { const std::string condition = VisitExpression(do_while->condition); ResetTemporaries(); AddLine("REP;"); for (ASTNode current = do_while->nodes.GetFirst(); current; current = current->GetNext()) { VisitAST(current); } AddLine("MOVC.U RC.x, {};", condition); AddLine("BRK (NE.x);"); AddLine("ENDREP;"); } else if (const auto ast_return = std::get_if(&*node->GetInnerData())) { const bool is_true = ExprIsTrue(ast_return->condition); if (!is_true) { AddLine("MOVC.U RC.x, {};", VisitExpression(ast_return->condition)); AddLine("IF NE.x;"); ResetTemporaries(); } if (ast_return->kills) { AddLine("KIL TR;"); } else { Exit(); } if (!is_true) { AddLine("ENDIF;"); } } else if (const auto ast_break = std::get_if(&*node->GetInnerData())) { if (ExprIsTrue(ast_break->condition)) { AddLine("BRK;"); } else { AddLine("MOVC.U RC.x, {};", VisitExpression(ast_break->condition)); AddLine("BRK (NE.x);"); ResetTemporaries(); } } else if (std::holds_alternative(*node->GetInnerData())) { // Nothing to do } else { UNREACHABLE(); } } std::string ARBDecompiler::VisitExpression(const Expr& node) { if (const auto expr = std::get_if(&*node)) { std::string result = AllocTemporary(); AddLine("AND.U {}, {}, {};", result, VisitExpression(expr->operand1), VisitExpression(expr->operand2)); return result; } if (const auto expr = std::get_if(&*node)) { std::string result = AllocTemporary(); AddLine("OR.U {}, {}, {};", result, VisitExpression(expr->operand1), VisitExpression(expr->operand2)); return result; } if (const auto expr = std::get_if(&*node)) { std::string result = AllocTemporary(); AddLine("CMP.S {}, {}, 0, -1;", result, VisitExpression(expr->operand1)); return result; } if (const auto expr = std::get_if(&*node)) { return fmt::format("P{}.x", static_cast(expr->predicate)); } if (const auto expr = std::get_if(&*node)) { return Visit(ir.GetConditionCode(expr->cc)); } if (const auto expr = std::get_if(&*node)) { return fmt::format("F{}.x", expr->var_index); } if (const auto expr = std::get_if(&*node)) { return expr->value ? "0xffffffff" : "0"; } if (const auto expr = std::get_if(&*node)) { std::string result = AllocTemporary(); AddLine("SEQ.U {}, R{}.x, {};", result, expr->gpr, expr->value); return result; } UNREACHABLE(); return "0"; } void ARBDecompiler::VisitBlock(const NodeBlock& bb) { for (const auto& node : bb) { Visit(node); } } std::string ARBDecompiler::Visit(const Node& node) { if (const auto operation = std::get_if(&*node)) { if (const auto amend_index = operation->GetAmendIndex()) { Visit(ir.GetAmendNode(*amend_index)); } const std::size_t index = static_cast(operation->GetCode()); if (index >= OPERATION_DECOMPILERS.size()) { UNREACHABLE_MSG("Out of bounds operation: {}", index); return {}; } const auto decompiler = OPERATION_DECOMPILERS[index]; if (decompiler == nullptr) { UNREACHABLE_MSG("Undefined operation: {}", index); return {}; } return (this->*decompiler)(*operation); } if (const auto gpr = std::get_if(&*node)) { const u32 index = gpr->GetIndex(); if (index == Register::ZeroIndex) { return "{0, 0, 0, 0}.x"; } return fmt::format("R{}.x", index); } if (const auto cv = std::get_if(&*node)) { return fmt::format("CV{}.x", cv->GetIndex()); } if (const auto immediate = std::get_if(&*node)) { std::string temporary = AllocTemporary(); AddLine("MOV.U {}, {};", temporary, immediate->GetValue()); return temporary; } if (const auto predicate = std::get_if(&*node)) { std::string temporary = AllocTemporary(); switch (const auto index = predicate->GetIndex(); index) { case Tegra::Shader::Pred::UnusedIndex: AddLine("MOV.S {}, -1;", temporary); break; case Tegra::Shader::Pred::NeverExecute: AddLine("MOV.S {}, 0;", temporary); break; default: AddLine("MOV.S {}, P{}.x;", temporary, static_cast(index)); break; } if (predicate->IsNegated()) { AddLine("CMP.S {}, {}, 0, -1;", temporary, temporary); } return temporary; } if (const auto abuf = std::get_if(&*node)) { if (abuf->IsPhysicalBuffer()) { UNIMPLEMENTED_MSG("Physical buffers are not implemented"); return "{0, 0, 0, 0}.x"; } const Attribute::Index index = abuf->GetIndex(); const u32 element = abuf->GetElement(); const char swizzle = Swizzle(element); switch (index) { case Attribute::Index::Position: { if (stage == ShaderType::Geometry) { return fmt::format("{}_position[{}].{}", StageInputName(stage), Visit(abuf->GetBuffer()), swizzle); } else { return fmt::format("{}.position.{}", StageInputName(stage), swizzle); } } case Attribute::Index::TessCoordInstanceIDVertexID: ASSERT(stage == ShaderType::Vertex); switch (element) { case 2: return "vertex.instance"; case 3: return "vertex.id"; } UNIMPLEMENTED_MSG("Unmanaged TessCoordInstanceIDVertexID element={}", element); break; case Attribute::Index::PointCoord: switch (element) { case 0: return "fragment.pointcoord.x"; case 1: return "fragment.pointcoord.y"; } UNIMPLEMENTED(); break; case Attribute::Index::FrontFacing: { ASSERT(stage == ShaderType::Fragment); ASSERT(element == 3); const std::string temporary = AllocVectorTemporary(); AddLine("SGT.S RC.x, fragment.facing, {{0, 0, 0, 0}};"); AddLine("MOV.U.CC RC.x, -RC;"); AddLine("MOV.S {}.x, 0;", temporary); AddLine("MOV.S {}.x (NE.x), -1;", temporary); return fmt::format("{}.x", temporary); } default: if (IsGenericAttribute(index)) { if (stage == ShaderType::Geometry) { return fmt::format("in_attr{}[{}][0].{}", GetGenericAttributeIndex(index), Visit(abuf->GetBuffer()), swizzle); } else { return fmt::format("{}.attrib[{}].{}", StageInputName(stage), GetGenericAttributeIndex(index), swizzle); } } UNIMPLEMENTED_MSG("Unimplemented input attribute={}", index); break; } return "{0, 0, 0, 0}.x"; } if (const auto cbuf = std::get_if(&*node)) { std::string offset_string; const auto& offset = cbuf->GetOffset(); if (const auto imm = std::get_if(&*offset)) { offset_string = std::to_string(imm->GetValue()); } else { offset_string = Visit(offset); } std::string temporary = AllocTemporary(); AddLine("LDC.F32 {}, cbuf{}[{}];", temporary, cbuf->GetIndex(), offset_string); return temporary; } if (const auto gmem = std::get_if(&*node)) { std::string temporary = AllocTemporary(); AddLine("MOV {}, 0;", temporary); AddLine("LOAD.U32 {} (NE.x), {};", temporary, GlobalMemoryPointer(*gmem)); return temporary; } if (const auto lmem = std::get_if(&*node)) { std::string temporary = Visit(lmem->GetAddress()); AddLine("SHR.U {}, {}, 2;", temporary, temporary); AddLine("MOV.U {}, lmem[{}].x;", temporary, temporary); return temporary; } if (const auto smem = std::get_if(&*node)) { std::string temporary = Visit(smem->GetAddress()); AddLine("LDS.U32 {}, shared_mem[{}];", temporary, temporary); return temporary; } if (const auto internal_flag = std::get_if(&*node)) { const std::size_t index = static_cast(internal_flag->GetFlag()); return fmt::format("{}.x", INTERNAL_FLAG_NAMES[index]); } if (const auto conditional = std::get_if(&*node)) { if (const auto amend_index = conditional->GetAmendIndex()) { Visit(ir.GetAmendNode(*amend_index)); } AddLine("MOVC.U RC.x, {};", Visit(conditional->GetCondition())); AddLine("IF NE.x;"); VisitBlock(conditional->GetCode()); AddLine("ENDIF;"); return {}; } if ([[maybe_unused]] const auto cmt = std::get_if(&*node)) { // Uncommenting this will generate invalid code. GLASM lacks comments. // AddLine("// {}", cmt->GetText()); return {}; } UNIMPLEMENTED(); return {}; } std::tuple ARBDecompiler::BuildCoords(Operation operation) { const auto& meta = std::get(operation.GetMeta()); UNIMPLEMENTED_IF(meta.sampler.is_indexed); const bool is_extended = meta.sampler.is_shadow && meta.sampler.is_array && meta.sampler.type == Tegra::Shader::TextureType::TextureCube; const std::size_t count = operation.GetOperandsCount(); std::string temporary = AllocVectorTemporary(); std::size_t i = 0; for (; i < count; ++i) { AddLine("MOV.F {}.{}, {};", temporary, Swizzle(i), Visit(operation[i])); } if (meta.sampler.is_array) { AddLine("I2F.S {}.{}, {};", temporary, Swizzle(i), Visit(meta.array)); ++i; } if (meta.sampler.is_shadow) { std::string compare = Visit(meta.depth_compare); if (is_extended) { ASSERT(i == 4); std::string extra_coord = AllocVectorTemporary(); AddLine("MOV.F {}.x, {};", extra_coord, compare); return {fmt::format("{}, {}", temporary, extra_coord), extra_coord, 0}; } AddLine("MOV.F {}.{}, {};", temporary, Swizzle(i), compare); ++i; } return {temporary, temporary, i}; } std::string ARBDecompiler::BuildAoffi(Operation operation) { const auto& meta = std::get(operation.GetMeta()); if (meta.aoffi.empty()) { return {}; } const std::string temporary = AllocVectorTemporary(); std::size_t i = 0; for (auto& node : meta.aoffi) { AddLine("MOV.S {}.{}, {};", temporary, Swizzle(i++), Visit(node)); } return fmt::format(", offset({})", temporary); } std::string ARBDecompiler::GlobalMemoryPointer(const GmemNode& gmem) { // Read a bindless SSBO, return its address and set CC accordingly // address = c[binding].xy // length = c[binding].z const u32 binding = global_memory_names.at(gmem.GetDescriptor()); const std::string pointer = AllocLongVectorTemporary(); std::string temporary = AllocTemporary(); AddLine("PK64.U {}, c[{}];", pointer, binding); AddLine("SUB.U {}, {}, {};", temporary, Visit(gmem.GetRealAddress()), Visit(gmem.GetBaseAddress())); AddLine("CVT.U64.U32 {}.z, {};", pointer, temporary); AddLine("ADD.U64 {}.x, {}.x, {}.z;", pointer, pointer, pointer); // Compare offset to length and set CC AddLine("SLT.U.CC RC.x, {}, c[{}].z;", temporary, binding); return fmt::format("{}.x", pointer); } void ARBDecompiler::Exit() { if (stage != ShaderType::Fragment) { AddLine("RET;"); return; } const auto safe_get_register = [this](u32 reg) -> std::string { if (ir.GetRegisters().contains(reg)) { return fmt::format("R{}.x", reg); } return "{0, 0, 0, 0}.x"; }; const auto& header = ir.GetHeader(); u32 current_reg = 0; for (u32 rt = 0; rt < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; ++rt) { for (u32 component = 0; component < 4; ++component) { if (!header.ps.IsColorComponentOutputEnabled(rt, component)) { continue; } AddLine("MOV.F result_color{}.{}, {};", rt, Swizzle(component), safe_get_register(current_reg)); ++current_reg; } } if (header.ps.omap.depth) { AddLine("MOV.F result.depth.z, {};", safe_get_register(current_reg + 1)); } AddLine("RET;"); } std::string ARBDecompiler::Assign(Operation operation) { const Node& dest = operation[0]; const Node& src = operation[1]; std::string dest_name; if (const auto gpr = std::get_if(&*dest)) { if (gpr->GetIndex() == Register::ZeroIndex) { // Writing to Register::ZeroIndex is a no op return {}; } dest_name = fmt::format("R{}.x", gpr->GetIndex()); } else if (const auto abuf = std::get_if(&*dest)) { const u32 element = abuf->GetElement(); const char swizzle = Swizzle(element); switch (const Attribute::Index index = abuf->GetIndex()) { case Attribute::Index::Position: dest_name = fmt::format("result.position.{}", swizzle); break; case Attribute::Index::LayerViewportPointSize: switch (element) { case 0: UNIMPLEMENTED(); return {}; case 1: case 2: if (!device.HasNvViewportArray2()) { LOG_ERROR( Render_OpenGL, "NV_viewport_array2 is missing. Maxwell gen 2 or better is required."); return {}; } dest_name = element == 1 ? "result.layer.x" : "result.viewport.x"; break; case 3: dest_name = "result.pointsize.x"; break; } break; case Attribute::Index::ClipDistances0123: dest_name = fmt::format("result.clip[{}].x", element); break; case Attribute::Index::ClipDistances4567: dest_name = fmt::format("result.clip[{}].x", element + 4); break; default: if (!IsGenericAttribute(index)) { UNREACHABLE(); return {}; } dest_name = fmt::format("result.attrib[{}].{}", GetGenericAttributeIndex(index), swizzle); break; } } else if (const auto lmem = std::get_if(&*dest)) { const std::string address = Visit(lmem->GetAddress()); AddLine("SHR.U {}, {}, 2;", address, address); dest_name = fmt::format("lmem[{}].x", address); } else if (const auto smem = std::get_if(&*dest)) { AddLine("STS.U32 {}, shared_mem[{}];", Visit(src), Visit(smem->GetAddress())); ResetTemporaries(); return {}; } else if (const auto gmem = std::get_if(&*dest)) { AddLine("IF NE.x;"); AddLine("STORE.U32 {}, {};", Visit(src), GlobalMemoryPointer(*gmem)); AddLine("ENDIF;"); ResetTemporaries(); return {}; } else { UNREACHABLE(); ResetTemporaries(); return {}; } AddLine("MOV.U {}, {};", dest_name, Visit(src)); ResetTemporaries(); return {}; } std::string ARBDecompiler::Select(Operation operation) { std::string temporary = AllocTemporary(); AddLine("CMP.S {}, {}, {}, {};", temporary, Visit(operation[0]), Visit(operation[1]), Visit(operation[2])); return temporary; } std::string ARBDecompiler::FClamp(Operation operation) { // 1.0f in hex, replace with std::bit_cast on C++20 static constexpr u32 POSITIVE_ONE = 0x3f800000; std::string temporary = AllocTemporary(); const Node& value = operation[0]; const Node& low = operation[1]; const Node& high = operation[2]; const auto* const imm_low = std::get_if(&*low); const auto* const imm_high = std::get_if(&*high); if (imm_low && imm_high && imm_low->GetValue() == 0 && imm_high->GetValue() == POSITIVE_ONE) { AddLine("MOV.F32.SAT {}, {};", temporary, Visit(value)); } else { AddLine("MIN.F {}, {}, {};", temporary, Visit(value), Visit(high)); AddLine("MAX.F {}, {}, {};", temporary, temporary, Visit(low)); } return temporary; } std::string ARBDecompiler::FCastHalf0(Operation operation) { const std::string temporary = AllocVectorTemporary(); AddLine("UP2H.F {}.x, {};", temporary, Visit(operation[0])); return fmt::format("{}.x", temporary); } std::string ARBDecompiler::FCastHalf1(Operation operation) { const std::string temporary = AllocVectorTemporary(); AddLine("UP2H.F {}.y, {};", temporary, Visit(operation[0])); AddLine("MOV {}.x, {}.y;", temporary, temporary); return fmt::format("{}.x", temporary); } std::string ARBDecompiler::FSqrt(Operation operation) { std::string temporary = AllocTemporary(); AddLine("RSQ.F32 {}, {};", temporary, Visit(operation[0])); AddLine("RCP.F32 {}, {};", temporary, temporary); return temporary; } std::string ARBDecompiler::FSwizzleAdd(Operation operation) { const std::string temporary = AllocVectorTemporary(); if (!device.HasWarpIntrinsics()) { LOG_ERROR(Render_OpenGL, "NV_shader_thread_shuffle is missing. Kepler or better is required."); AddLine("ADD.F {}.x, {}, {};", temporary, Visit(operation[0]), Visit(operation[1])); return fmt::format("{}.x", temporary); } AddLine("AND.U {}.z, {}.threadid, 3;", temporary, StageInputName(stage)); AddLine("SHL.U {}.z, {}.z, 1;", temporary, temporary); AddLine("SHR.U {}.z, {}, {}.z;", temporary, Visit(operation[2]), temporary); AddLine("AND.U {}.z, {}.z, 3;", temporary, temporary); AddLine("MUL.F32 {}.x, {}, FSWZA[{}.z];", temporary, Visit(operation[0]), temporary); AddLine("MUL.F32 {}.y, {}, FSWZB[{}.z];", temporary, Visit(operation[1]), temporary); AddLine("ADD.F32 {}.x, {}.x, {}.y;", temporary, temporary, temporary); return fmt::format("{}.x", temporary); } std::string ARBDecompiler::HAdd2(Operation operation) { const std::string tmp1 = AllocVectorTemporary(); const std::string tmp2 = AllocVectorTemporary(); AddLine("UP2H.F {}.xy, {};", tmp1, Visit(operation[0])); AddLine("UP2H.F {}.xy, {};", tmp2, Visit(operation[1])); AddLine("ADD.F16 {}, {}, {};", tmp1, tmp1, tmp2); AddLine("PK2H.F {}.x, {};", tmp1, tmp1); return fmt::format("{}.x", tmp1); } std::string ARBDecompiler::HMul2(Operation operation) { const std::string tmp1 = AllocVectorTemporary(); const std::string tmp2 = AllocVectorTemporary(); AddLine("UP2H.F {}.xy, {};", tmp1, Visit(operation[0])); AddLine("UP2H.F {}.xy, {};", tmp2, Visit(operation[1])); AddLine("MUL.F16 {}, {}, {};", tmp1, tmp1, tmp2); AddLine("PK2H.F {}.x, {};", tmp1, tmp1); return fmt::format("{}.x", tmp1); } std::string ARBDecompiler::HFma2(Operation operation) { const std::string tmp1 = AllocVectorTemporary(); const std::string tmp2 = AllocVectorTemporary(); const std::string tmp3 = AllocVectorTemporary(); AddLine("UP2H.F {}.xy, {};", tmp1, Visit(operation[0])); AddLine("UP2H.F {}.xy, {};", tmp2, Visit(operation[1])); AddLine("UP2H.F {}.xy, {};", tmp3, Visit(operation[2])); AddLine("MAD.F16 {}, {}, {}, {};", tmp1, tmp1, tmp2, tmp3); AddLine("PK2H.F {}.x, {};", tmp1, tmp1); return fmt::format("{}.x", tmp1); } std::string ARBDecompiler::HAbsolute(Operation operation) { const std::string temporary = AllocVectorTemporary(); AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0])); AddLine("PK2H.F {}.x, |{}|;", temporary, temporary); return fmt::format("{}.x", temporary); } std::string ARBDecompiler::HNegate(Operation operation) { const std::string temporary = AllocVectorTemporary(); AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0])); AddLine("MOVC.S RC.x, {};", Visit(operation[1])); AddLine("MOV.F {}.x (NE.x), -{}.x;", temporary, temporary); AddLine("MOVC.S RC.x, {};", Visit(operation[2])); AddLine("MOV.F {}.y (NE.x), -{}.y;", temporary, temporary); AddLine("PK2H.F {}.x, {};", temporary, temporary); return fmt::format("{}.x", temporary); } std::string ARBDecompiler::HClamp(Operation operation) { const std::string tmp1 = AllocVectorTemporary(); const std::string tmp2 = AllocVectorTemporary(); AddLine("UP2H.F {}.xy, {};", tmp1, Visit(operation[0])); AddLine("MOV.U {}.x, {};", tmp2, Visit(operation[1])); AddLine("MOV.U {}.y, {}.x;", tmp2, tmp2); AddLine("MAX.F {}, {}, {};", tmp1, tmp1, tmp2); AddLine("MOV.U {}.x, {};", tmp2, Visit(operation[2])); AddLine("MOV.U {}.y, {}.x;", tmp2, tmp2); AddLine("MIN.F {}, {}, {};", tmp1, tmp1, tmp2); AddLine("PK2H.F {}.x, {};", tmp1, tmp1); return fmt::format("{}.x", tmp1); } std::string ARBDecompiler::HCastFloat(Operation operation) { const std::string temporary = AllocVectorTemporary(); AddLine("MOV.F {}.y, {{0, 0, 0, 0}};", temporary); AddLine("MOV.F {}.x, {};", temporary, Visit(operation[0])); AddLine("PK2H.F {}.x, {};", temporary, temporary); return fmt::format("{}.x", temporary); } std::string ARBDecompiler::HUnpack(Operation operation) { std::string operand = Visit(operation[0]); switch (std::get(operation.GetMeta())) { case Tegra::Shader::HalfType::H0_H1: return operand; case Tegra::Shader::HalfType::F32: { const std::string temporary = AllocVectorTemporary(); AddLine("MOV.U {}.x, {};", temporary, operand); AddLine("MOV.U {}.y, {}.x;", temporary, temporary); AddLine("PK2H.F {}.x, {};", temporary, temporary); return fmt::format("{}.x", temporary); } case Tegra::Shader::HalfType::H0_H0: { const std::string temporary = AllocVectorTemporary(); AddLine("UP2H.F {}.xy, {};", temporary, operand); AddLine("MOV.U {}.y, {}.x;", temporary, temporary); AddLine("PK2H.F {}.x, {};", temporary, temporary); return fmt::format("{}.x", temporary); } case Tegra::Shader::HalfType::H1_H1: { const std::string temporary = AllocVectorTemporary(); AddLine("UP2H.F {}.xy, {};", temporary, operand); AddLine("MOV.U {}.x, {}.y;", temporary, temporary); AddLine("PK2H.F {}.x, {};", temporary, temporary); return fmt::format("{}.x", temporary); } } UNREACHABLE(); return "{0, 0, 0, 0}.x"; } std::string ARBDecompiler::HMergeF32(Operation operation) { const std::string temporary = AllocVectorTemporary(); AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0])); return fmt::format("{}.x", temporary); } std::string ARBDecompiler::HMergeH0(Operation operation) { const std::string temporary = AllocVectorTemporary(); AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0])); AddLine("UP2H.F {}.zw, {};", temporary, Visit(operation[1])); AddLine("MOV.U {}.x, {}.z;", temporary, temporary); AddLine("PK2H.F {}.x, {};", temporary, temporary); return fmt::format("{}.x", temporary); } std::string ARBDecompiler::HMergeH1(Operation operation) { const std::string temporary = AllocVectorTemporary(); AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0])); AddLine("UP2H.F {}.zw, {};", temporary, Visit(operation[1])); AddLine("MOV.U {}.y, {}.w;", temporary, temporary); AddLine("PK2H.F {}.x, {};", temporary, temporary); return fmt::format("{}.x", temporary); } std::string ARBDecompiler::HPack2(Operation operation) { const std::string temporary = AllocVectorTemporary(); AddLine("MOV.U {}.x, {};", temporary, Visit(operation[0])); AddLine("MOV.U {}.y, {};", temporary, Visit(operation[1])); AddLine("PK2H.F {}.x, {};", temporary, temporary); return fmt::format("{}.x", temporary); } std::string ARBDecompiler::LogicalAssign(Operation operation) { const Node& dest = operation[0]; const Node& src = operation[1]; std::string target; if (const auto pred = std::get_if(&*dest)) { ASSERT_MSG(!pred->IsNegated(), "Negating logical assignment"); const Tegra::Shader::Pred index = pred->GetIndex(); switch (index) { case Tegra::Shader::Pred::NeverExecute: case Tegra::Shader::Pred::UnusedIndex: // Writing to these predicates is a no-op return {}; } target = fmt::format("P{}.x", static_cast(index)); } else if (const auto internal_flag = std::get_if(&*dest)) { const std::size_t index = static_cast(internal_flag->GetFlag()); target = fmt::format("{}.x", INTERNAL_FLAG_NAMES[index]); } else { UNREACHABLE(); ResetTemporaries(); return {}; } AddLine("MOV.U {}, {};", target, Visit(src)); ResetTemporaries(); return {}; } std::string ARBDecompiler::LogicalPick2(Operation operation) { std::string temporary = AllocTemporary(); const u32 index = std::get(*operation[1]).GetValue(); AddLine("MOV.U {}, {}.{};", temporary, Visit(operation[0]), Swizzle(index)); return temporary; } std::string ARBDecompiler::LogicalAnd2(Operation operation) { std::string temporary = AllocTemporary(); const std::string op = Visit(operation[0]); AddLine("AND.U {}, {}.x, {}.y;", temporary, op, op); return temporary; } std::string ARBDecompiler::FloatOrdered(Operation operation) { std::string temporary = AllocTemporary(); AddLine("MOVC.F32 RC.x, {};", Visit(operation[0])); AddLine("MOVC.F32 RC.y, {};", Visit(operation[1])); AddLine("MOV.S {}, -1;", temporary); AddLine("MOV.S {} (NAN.x), 0;", temporary); AddLine("MOV.S {} (NAN.y), 0;", temporary); return temporary; } std::string ARBDecompiler::FloatUnordered(Operation operation) { std::string temporary = AllocTemporary(); AddLine("MOVC.F32 RC.x, {};", Visit(operation[0])); AddLine("MOVC.F32 RC.y, {};", Visit(operation[1])); AddLine("MOV.S {}, 0;", temporary); AddLine("MOV.S {} (NAN.x), -1;", temporary); AddLine("MOV.S {} (NAN.y), -1;", temporary); return temporary; } std::string ARBDecompiler::LogicalAddCarry(Operation operation) { std::string temporary = AllocTemporary(); AddLine("ADDC.U RC, {}, {};", Visit(operation[0]), Visit(operation[1])); AddLine("MOV.S {}, 0;", temporary); AddLine("IF CF.x;"); AddLine("MOV.S {}, -1;", temporary); AddLine("ENDIF;"); return temporary; } std::string ARBDecompiler::Texture(Operation operation) { const auto& meta = std::get(operation.GetMeta()); const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index; const auto [coords, temporary, swizzle] = BuildCoords(operation); std::string_view opcode = "TEX"; std::string extra; if (meta.bias) { ASSERT(!meta.lod); opcode = "TXB"; if (swizzle < 4) { AddLine("MOV.F {}.w, {};", temporary, Visit(meta.bias)); } else { const std::string bias = AllocTemporary(); AddLine("MOV.F {}, {};", bias, Visit(meta.bias)); extra = fmt::format(" {},", bias); } } if (meta.lod) { ASSERT(!meta.bias); opcode = "TXL"; if (swizzle < 4) { AddLine("MOV.F {}.w, {};", temporary, Visit(meta.lod)); } else { const std::string lod = AllocTemporary(); AddLine("MOV.F {}, {};", lod, Visit(meta.lod)); extra = fmt::format(" {},", lod); } } AddLine("{}.F {}, {},{} texture[{}], {}{};", opcode, temporary, coords, extra, sampler_id, TextureType(meta), BuildAoffi(operation)); AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element)); return fmt::format("{}.x", temporary); } std::string ARBDecompiler::TextureGather(Operation operation) { const auto& meta = std::get(operation.GetMeta()); const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index; const auto [coords, temporary, swizzle] = BuildCoords(operation); std::string comp; if (!meta.sampler.is_shadow) { const auto& immediate = std::get(*meta.component); comp = fmt::format(".{}", Swizzle(immediate.GetValue())); } AddLine("TXG.F {}, {}, texture[{}]{}, {}{};", temporary, temporary, sampler_id, comp, TextureType(meta), BuildAoffi(operation)); AddLine("MOV.U {}.x, {}.{};", temporary, coords, Swizzle(meta.element)); return fmt::format("{}.x", temporary); } std::string ARBDecompiler::TextureQueryDimensions(Operation operation) { const auto& meta = std::get(operation.GetMeta()); const std::string temporary = AllocVectorTemporary(); const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index; ASSERT(!meta.sampler.is_array); const std::string lod = operation.GetOperandsCount() > 0 ? Visit(operation[0]) : "0"; AddLine("TXQ {}, {}, texture[{}], {};", temporary, lod, sampler_id, TextureType(meta)); AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element)); return fmt::format("{}.x", temporary); } std::string ARBDecompiler::TextureQueryLod(Operation operation) { const auto& meta = std::get(operation.GetMeta()); const std::string temporary = AllocVectorTemporary(); const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index; ASSERT(!meta.sampler.is_array); const std::size_t count = operation.GetOperandsCount(); for (std::size_t i = 0; i < count; ++i) { AddLine("MOV.F {}.{}, {};", temporary, Swizzle(i), Visit(operation[i])); } AddLine("LOD.F {}, {}, texture[{}], {};", temporary, temporary, sampler_id, TextureType(meta)); AddLine("MUL.F32 {}, {}, {{256, 256, 0, 0}};", temporary, temporary); AddLine("TRUNC.S {}, {};", temporary, temporary); AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element)); return fmt::format("{}.x", temporary); } std::string ARBDecompiler::TexelFetch(Operation operation) { const auto& meta = std::get(operation.GetMeta()); const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index; const auto [coords, temporary, swizzle] = BuildCoords(operation); if (!meta.sampler.is_buffer) { ASSERT(swizzle < 4); AddLine("MOV.F {}.w, {};", temporary, Visit(meta.lod)); } AddLine("TXF.F {}, {}, texture[{}], {}{};", temporary, coords, sampler_id, TextureType(meta), BuildAoffi(operation)); AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element)); return fmt::format("{}.x", temporary); } std::string ARBDecompiler::TextureGradient(Operation operation) { const auto& meta = std::get(operation.GetMeta()); const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index; const std::string ddx = AllocVectorTemporary(); const std::string ddy = AllocVectorTemporary(); const std::string coord = std::get<1>(BuildCoords(operation)); const std::size_t num_components = meta.derivates.size() / 2; for (std::size_t index = 0; index < num_components; ++index) { const char swizzle = Swizzle(index); AddLine("MOV.F {}.{}, {};", ddx, swizzle, Visit(meta.derivates[index * 2])); AddLine("MOV.F {}.{}, {};", ddy, swizzle, Visit(meta.derivates[index * 2 + 1])); } const std::string_view result = coord; AddLine("TXD.F {}, {}, {}, {}, texture[{}], {}{};", result, coord, ddx, ddy, sampler_id, TextureType(meta), BuildAoffi(operation)); AddLine("MOV.F {}.x, {}.{};", result, result, Swizzle(meta.element)); return fmt::format("{}.x", result); } std::string ARBDecompiler::ImageLoad(Operation operation) { const auto& meta = std::get(operation.GetMeta()); const u32 image_id = device.GetBaseBindings(stage).image + meta.image.index; const std::size_t count = operation.GetOperandsCount(); const std::string_view type = ImageType(meta.image.type); const std::string temporary = AllocVectorTemporary(); for (std::size_t i = 0; i < count; ++i) { AddLine("MOV.S {}.{}, {};", temporary, Swizzle(i), Visit(operation[i])); } AddLine("LOADIM.F {}, {}, image[{}], {};", temporary, temporary, image_id, type); AddLine("MOV.F {}.x, {}.{};", temporary, temporary, Swizzle(meta.element)); return fmt::format("{}.x", temporary); } std::string ARBDecompiler::ImageStore(Operation operation) { const auto& meta = std::get(operation.GetMeta()); const u32 image_id = device.GetBaseBindings(stage).image + meta.image.index; const std::size_t num_coords = operation.GetOperandsCount(); const std::size_t num_values = meta.values.size(); const std::string_view type = ImageType(meta.image.type); const std::string coord = AllocVectorTemporary(); const std::string value = AllocVectorTemporary(); for (std::size_t i = 0; i < num_coords; ++i) { AddLine("MOV.S {}.{}, {};", coord, Swizzle(i), Visit(operation[i])); } for (std::size_t i = 0; i < num_values; ++i) { AddLine("MOV.F {}.{}, {};", value, Swizzle(i), Visit(meta.values[i])); } AddLine("STOREIM.F image[{}], {}, {}, {};", image_id, value, coord, type); return {}; } std::string ARBDecompiler::Branch(Operation operation) { const auto target = std::get(*operation[0]); AddLine("MOV.U PC.x, {};", target.GetValue()); AddLine("CONT;"); return {}; } std::string ARBDecompiler::BranchIndirect(Operation operation) { AddLine("MOV.U PC.x, {};", Visit(operation[0])); AddLine("CONT;"); return {}; } std::string ARBDecompiler::PushFlowStack(Operation operation) { const auto stack = std::get(operation.GetMeta()); const u32 target = std::get(*operation[0]).GetValue(); const std::string_view stack_name = StackName(stack); AddLine("MOV.U {}[{}_TOP.x].x, {};", stack_name, stack_name, target); AddLine("ADD.S {}_TOP.x, {}_TOP.x, 1;", stack_name, stack_name); return {}; } std::string ARBDecompiler::PopFlowStack(Operation operation) { const auto stack = std::get(operation.GetMeta()); const std::string_view stack_name = StackName(stack); AddLine("SUB.S {}_TOP.x, {}_TOP.x, 1;", stack_name, stack_name); AddLine("MOV.U PC.x, {}[{}_TOP.x].x;", stack_name, stack_name); AddLine("CONT;"); return {}; } std::string ARBDecompiler::Exit(Operation) { Exit(); return {}; } std::string ARBDecompiler::Discard(Operation) { AddLine("KIL TR;"); return {}; } std::string ARBDecompiler::EmitVertex(Operation) { AddLine("EMIT;"); return {}; } std::string ARBDecompiler::EndPrimitive(Operation) { AddLine("ENDPRIM;"); return {}; } std::string ARBDecompiler::InvocationId(Operation) { return "primitive.invocation"; } std::string ARBDecompiler::YNegate(Operation) { LOG_WARNING(Render_OpenGL, "(STUBBED)"); std::string temporary = AllocTemporary(); AddLine("MOV.F {}, 1;", temporary); return temporary; } std::string ARBDecompiler::ThreadId(Operation) { return fmt::format("{}.threadid", StageInputName(stage)); } std::string ARBDecompiler::ShuffleIndexed(Operation operation) { if (!device.HasWarpIntrinsics()) { LOG_ERROR(Render_OpenGL, "NV_shader_thread_shuffle is missing. Kepler or better is required."); return Visit(operation[0]); } const std::string temporary = AllocVectorTemporary(); AddLine("SHFIDX.U {}, {}, {}, {{31, 0, 0, 0}};", temporary, Visit(operation[0]), Visit(operation[1])); AddLine("MOV.U {}.x, {}.y;", temporary, temporary); return fmt::format("{}.x", temporary); } std::string ARBDecompiler::Barrier(Operation) { AddLine("BAR;"); return {}; } std::string ARBDecompiler::MemoryBarrierGroup(Operation) { AddLine("MEMBAR.CTA;"); return {}; } std::string ARBDecompiler::MemoryBarrierGlobal(Operation) { AddLine("MEMBAR;"); return {}; } } // Anonymous namespace std::string DecompileAssemblyShader(const Device& device, const VideoCommon::Shader::ShaderIR& ir, const VideoCommon::Shader::Registry& registry, Tegra::Engines::ShaderType stage, std::string_view identifier) { return ARBDecompiler(device, ir, registry, stage, identifier).Code(); } } // namespace OpenGL