From 11f4e739bd6d076606069420a38a5e8535fa9440 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sat, 20 Jul 2019 17:38:25 -0400 Subject: Shader_Ir: Implement F16 Variants of F2F, F2I, I2F. This commit takes care of implementing the F16 Variants of the conversion instructions and makes sure conversions are done. --- src/video_core/engines/shader_bytecode.h | 2 -- .../renderer_opengl/gl_shader_decompiler.cpp | 18 +++++++++++++ .../renderer_vulkan/vk_shader_decompiler.cpp | 18 +++++++++++++ src/video_core/shader/decode/conversion.cpp | 30 ++++++++++++++++++---- src/video_core/shader/node.h | 25 ++++++++++-------- 5 files changed, 75 insertions(+), 18 deletions(-) diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index 8520a0143..cc307f8a4 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -1018,8 +1018,6 @@ union Instruction { } f2i; union { - BitField<8, 2, Register::Size> src_size; - BitField<10, 2, Register::Size> dst_size; BitField<39, 4, u64> rounding; // H0, H1 extract for F16 missing BitField<41, 1, u64> selector; // Guessed as some games set it, TODO: reverse this value diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index e19d502bc..7e9251626 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -1122,6 +1122,16 @@ private: Type::Float); } + std::string FCastHalf0(Operation operation) { + const std::string op_a = VisitOperand(operation, 0, Type::HalfFloat); + return fmt::format("({})[0]", op_a); + } + + std::string FCastHalf1(Operation operation) { + const std::string op_a = VisitOperand(operation, 0, Type::HalfFloat); + return fmt::format("({})[1]", op_a); + } + template std::string Min(Operation operation) { return GenerateBinaryCall(operation, "min", type, type, type); @@ -1278,6 +1288,11 @@ private: return ApplyPrecise(operation, BitwiseCastResult(clamped, Type::HalfFloat)); } + std::string HCastFloat(Operation operation) { + const std::string op_a = VisitOperand(operation, 0, Type::Float); + return fmt::format("fromHalf2(vec2({}, 0.0f))", op_a); + } + std::string HUnpack(Operation operation) { const std::string operand{VisitOperand(operation, 0, Type::HalfFloat)}; const auto value = [&]() -> std::string { @@ -1718,6 +1733,8 @@ private: &GLSLDecompiler::Negate, &GLSLDecompiler::Absolute, &GLSLDecompiler::FClamp, + &GLSLDecompiler::FCastHalf0, + &GLSLDecompiler::FCastHalf1, &GLSLDecompiler::Min, &GLSLDecompiler::Max, &GLSLDecompiler::FCos, @@ -1778,6 +1795,7 @@ private: &GLSLDecompiler::Absolute, &GLSLDecompiler::HNegate, &GLSLDecompiler::HClamp, + &GLSLDecompiler::HCastFloat, &GLSLDecompiler::HUnpack, &GLSLDecompiler::HMergeF32, &GLSLDecompiler::HMergeH0, diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index d267712c9..24a591797 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp @@ -735,6 +735,16 @@ private: return {}; } + Id FCastHalf0(Operation operation) { + UNIMPLEMENTED(); + return {}; + } + + Id FCastHalf1(Operation operation) { + UNIMPLEMENTED(); + return {}; + } + Id HNegate(Operation operation) { UNIMPLEMENTED(); return {}; @@ -745,6 +755,11 @@ private: return {}; } + Id HCastFloat(Operation operation) { + UNIMPLEMENTED(); + return {}; + } + Id HUnpack(Operation operation) { UNIMPLEMENTED(); return {}; @@ -1210,6 +1225,8 @@ private: &SPIRVDecompiler::Unary<&Module::OpFNegate, Type::Float>, &SPIRVDecompiler::Unary<&Module::OpFAbs, Type::Float>, &SPIRVDecompiler::Ternary<&Module::OpFClamp, Type::Float>, + &SPIRVDecompiler::FCastHalf0, + &SPIRVDecompiler::FCastHalf1, &SPIRVDecompiler::Binary<&Module::OpFMin, Type::Float>, &SPIRVDecompiler::Binary<&Module::OpFMax, Type::Float>, &SPIRVDecompiler::Unary<&Module::OpCos, Type::Float>, @@ -1270,6 +1287,7 @@ private: &SPIRVDecompiler::Unary<&Module::OpFAbs, Type::HalfFloat>, &SPIRVDecompiler::HNegate, &SPIRVDecompiler::HClamp, + &SPIRVDecompiler::HCastFloat, &SPIRVDecompiler::HUnpack, &SPIRVDecompiler::HMergeF32, &SPIRVDecompiler::HMergeH0, diff --git a/src/video_core/shader/decode/conversion.cpp b/src/video_core/shader/decode/conversion.cpp index 4221f0c58..8973fbefa 100644 --- a/src/video_core/shader/decode/conversion.cpp +++ b/src/video_core/shader/decode/conversion.cpp @@ -57,7 +57,7 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) { case OpCode::Id::I2F_R: case OpCode::Id::I2F_C: case OpCode::Id::I2F_IMM: { - UNIMPLEMENTED_IF(instr.conversion.dst_size != Register::Size::Word); + UNIMPLEMENTED_IF(instr.conversion.dst_size == Register::Size::Long); UNIMPLEMENTED_IF(instr.conversion.selector); UNIMPLEMENTED_IF_MSG(instr.generates_cc, "Condition codes generation in I2F is not implemented"); @@ -82,14 +82,19 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) { value = GetOperandAbsNegFloat(value, false, instr.conversion.negate_a); SetInternalFlagsFromFloat(bb, value, instr.generates_cc); + + if (instr.conversion.dst_size == Register::Size::Short) { + value = Operation(OperationCode::HCastFloat, PRECISE, value); + } + SetRegister(bb, instr.gpr0, value); break; } case OpCode::Id::F2F_R: case OpCode::Id::F2F_C: case OpCode::Id::F2F_IMM: { - UNIMPLEMENTED_IF(instr.conversion.f2f.dst_size != Register::Size::Word); - UNIMPLEMENTED_IF(instr.conversion.f2f.src_size != Register::Size::Word); + UNIMPLEMENTED_IF(instr.conversion.dst_size == Register::Size::Long); + UNIMPLEMENTED_IF(instr.conversion.src_size == Register::Size::Long); UNIMPLEMENTED_IF_MSG(instr.generates_cc, "Condition codes generation in F2F is not implemented"); @@ -107,6 +112,11 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) { } }(); + if (instr.conversion.src_size == Register::Size::Short) { + // TODO: figure where extract is sey in the encoding + value = Operation(OperationCode::FCastHalf0, PRECISE, value); + } + value = GetOperandAbsNegFloat(value, instr.conversion.abs_a, instr.conversion.negate_a); value = [&]() { @@ -124,19 +134,24 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) { default: UNIMPLEMENTED_MSG("Unimplemented F2F rounding mode {}", static_cast(instr.conversion.f2f.rounding.Value())); - return Immediate(0); + return value; } }(); value = GetSaturatedFloat(value, instr.alu.saturate_d); SetInternalFlagsFromFloat(bb, value, instr.generates_cc); + + if (instr.conversion.dst_size == Register::Size::Short) { + value = Operation(OperationCode::HCastFloat, PRECISE, value); + } + SetRegister(bb, instr.gpr0, value); break; } case OpCode::Id::F2I_R: case OpCode::Id::F2I_C: case OpCode::Id::F2I_IMM: { - UNIMPLEMENTED_IF(instr.conversion.src_size != Register::Size::Word); + UNIMPLEMENTED_IF(instr.conversion.src_size == Register::Size::Long); UNIMPLEMENTED_IF_MSG(instr.generates_cc, "Condition codes generation in F2I is not implemented"); Node value = [&]() { @@ -153,6 +168,11 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) { } }(); + if (instr.conversion.src_size == Register::Size::Short) { + // TODO: figure where extract is sey in the encoding + value = Operation(OperationCode::FCastHalf0, PRECISE, value); + } + value = GetOperandAbsNegFloat(value, instr.conversion.abs_a, instr.conversion.negate_a); value = [&]() { diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h index 715184d67..5f0852364 100644 --- a/src/video_core/shader/node.h +++ b/src/video_core/shader/node.h @@ -30,6 +30,8 @@ enum class OperationCode { FNegate, /// (MetaArithmetic, float a) -> float FAbsolute, /// (MetaArithmetic, float a) -> float FClamp, /// (MetaArithmetic, float value, float min, float max) -> float + FCastHalf0, /// (MetaArithmetic, f16vec2 a) -> float + FCastHalf1, /// (MetaArithmetic, f16vec2 a) -> float FMin, /// (MetaArithmetic, float a, float b) -> float FMax, /// (MetaArithmetic, float a, float b) -> float FCos, /// (MetaArithmetic, float a) -> float @@ -83,17 +85,18 @@ enum class OperationCode { UBitfieldExtract, /// (MetaArithmetic, uint value, int offset, int offset) -> uint UBitCount, /// (MetaArithmetic, uint) -> uint - HAdd, /// (MetaArithmetic, f16vec2 a, f16vec2 b) -> f16vec2 - HMul, /// (MetaArithmetic, f16vec2 a, f16vec2 b) -> f16vec2 - HFma, /// (MetaArithmetic, f16vec2 a, f16vec2 b, f16vec2 c) -> f16vec2 - HAbsolute, /// (f16vec2 a) -> f16vec2 - HNegate, /// (f16vec2 a, bool first, bool second) -> f16vec2 - HClamp, /// (f16vec2 src, float min, float max) -> f16vec2 - HUnpack, /// (Tegra::Shader::HalfType, T value) -> f16vec2 - HMergeF32, /// (f16vec2 src) -> float - HMergeH0, /// (f16vec2 dest, f16vec2 src) -> f16vec2 - HMergeH1, /// (f16vec2 dest, f16vec2 src) -> f16vec2 - HPack2, /// (float a, float b) -> f16vec2 + HAdd, /// (MetaArithmetic, f16vec2 a, f16vec2 b) -> f16vec2 + HMul, /// (MetaArithmetic, f16vec2 a, f16vec2 b) -> f16vec2 + HFma, /// (MetaArithmetic, f16vec2 a, f16vec2 b, f16vec2 c) -> f16vec2 + HAbsolute, /// (f16vec2 a) -> f16vec2 + HNegate, /// (f16vec2 a, bool first, bool second) -> f16vec2 + HClamp, /// (f16vec2 src, float min, float max) -> f16vec2 + HCastFloat, /// (MetaArithmetic, float a) -> f16vec2 + HUnpack, /// (Tegra::Shader::HalfType, T value) -> f16vec2 + HMergeF32, /// (f16vec2 src) -> float + HMergeH0, /// (f16vec2 dest, f16vec2 src) -> f16vec2 + HMergeH1, /// (f16vec2 dest, f16vec2 src) -> f16vec2 + HPack2, /// (float a, float b) -> f16vec2 LogicalAssign, /// (bool& dst, bool src) -> void LogicalAnd, /// (bool a, bool b) -> bool -- cgit v1.2.3