From fda0835300a7ef6112791ae503435c81ffe883f5 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Thu, 25 Mar 2021 19:59:35 +0100 Subject: shader: Implement TLD4S. --- .../maxwell/translate/impl/not_implemented.cpp | 4 - .../translate/impl/texture_gather_swizzled.cpp | 133 +++++++++++++++++++++ 2 files changed, 133 insertions(+), 4 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather_swizzled.cpp (limited to 'src/shader_recompiler/frontend/maxwell/translate/impl') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index e59c3326e..788765c21 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -349,10 +349,6 @@ void TranslatorVisitor::TLD_b(u64) { ThrowNotImplemented(Opcode::TLD_b); } -void TranslatorVisitor::TLD4S(u64) { - ThrowNotImplemented(Opcode::TLD4S); -} - void TranslatorVisitor::TLDS(u64) { ThrowNotImplemented(Opcode::TLDS); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather_swizzled.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather_swizzled.cpp new file mode 100644 index 000000000..beab515ad --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather_swizzled.cpp @@ -0,0 +1,133 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/ir/modifiers.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class Precision : u64 { + F32, + F16, +}; + +enum class ComponentType : u64 { + R = 0, + G = 1, + B = 2, + A = 3, +}; + +union Encoding { + u64 raw; + BitField<55, 1, Precision> precision; + BitField<52, 2, ComponentType> component_type; + BitField<51, 1, u64> aoffi; + BitField<50, 1, u64> dc; + BitField<49, 1, u64> nodep; + BitField<28, 8, IR::Reg> dest_reg_b; + BitField<0, 8, IR::Reg> dest_reg_a; + BitField<8, 8, IR::Reg> src_reg_a; + BitField<20, 8, IR::Reg> src_reg_b; + BitField<36, 13, u64> cbuf_offset; +}; + +void CheckAlignment(IR::Reg reg, int alignment) { + if (!IR::IsAligned(reg, alignment)) { + throw NotImplementedException("Unaligned source register {}", reg); + } +} + +IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg reg) { + const IR::U32 value{v.X(reg)}; + return v.ir.CompositeConstruct(v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(6), true), + v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(6), true)); +} + +IR::Value Sample(TranslatorVisitor& v, u64 insn) { + const Encoding tld4s{insn}; + const IR::U32 handle{v.ir.Imm32(static_cast(tld4s.cbuf_offset * 4))}; + const IR::Reg reg_a{tld4s.src_reg_a}; + const IR::Reg reg_b{tld4s.src_reg_b}; + IR::TextureInstInfo info{}; + if (tld4s.precision == Precision::F16) { + info.relaxed_precision.Assign(1); + } + info.gather_component.Assign(static_cast(tld4s.component_type.Value())); + info.type.Assign(tld4s.dc != 0 ? Shader::TextureType::Shadow2D : Shader::TextureType::Color2D); + IR::Value coords; + if (tld4s.aoffi != 0) { + CheckAlignment(reg_a, 2); + coords = v.ir.CompositeConstruct(v.F(reg_a), v.F(reg_a + 1)); + IR::Value offset = MakeOffset(v, reg_b); + if (tld4s.dc != 0) { + CheckAlignment(reg_b, 2); + IR::F32 dref = v.F(reg_b + 1); + return v.ir.ImageGatherDref(handle, coords, offset, {}, dref, info); + } + return v.ir.ImageGather(handle, coords, offset, {}, info); + } + if (tld4s.dc != 0) { + CheckAlignment(reg_a, 2); + coords = v.ir.CompositeConstruct(v.F(reg_a), v.F(reg_a + 1)); + IR::F32 dref = v.F(reg_b); + return v.ir.ImageGatherDref(handle, coords, {}, {}, dref, info); + } + coords = v.ir.CompositeConstruct(v.F(reg_a), v.F(reg_b)); + return v.ir.ImageGather(handle, coords, {}, {}, info); +} + +IR::Reg RegStoreComponent32(u64 insn, size_t index) { + const Encoding tlds4{insn}; + switch (index) { + case 0: + return tlds4.dest_reg_a; + case 1: + CheckAlignment(tlds4.dest_reg_a, 2); + return tlds4.dest_reg_a + 1; + case 2: + return tlds4.dest_reg_b; + case 3: + CheckAlignment(tlds4.dest_reg_b, 2); + return tlds4.dest_reg_b + 1; + } + throw LogicError("Invalid store index {}", index); +} + +void Store32(TranslatorVisitor& v, u64 insn, const IR::Value& sample) { + for (size_t component = 0; component < 4; ++component) { + const IR::Reg dest{RegStoreComponent32(insn, component)}; + v.F(dest, IR::F32{v.ir.CompositeExtract(sample, component)}); + } +} + +IR::U32 Pack(TranslatorVisitor& v, const IR::F32& lhs, const IR::F32& rhs) { + return v.ir.PackHalf2x16(v.ir.CompositeConstruct(lhs, rhs)); +} + +void Store16(TranslatorVisitor& v, u64 insn, const IR::Value& sample) { + std::array swizzled; + for (size_t component = 0; component < 4; ++component) { + swizzled[component] = IR::F32{v.ir.CompositeExtract(sample, component)}; + } + const Encoding tld4s{insn}; + v.X(tld4s.dest_reg_a, Pack(v, swizzled[0], swizzled[1])); + v.X(tld4s.dest_reg_b, Pack(v, swizzled[2], swizzled[3])); +} +} // Anonymous namespace + +void TranslatorVisitor::TLD4S(u64 insn) { + const IR::Value sample{Sample(*this, insn)}; + if (Encoding{insn}.precision == Precision::F32) { + Store32(*this, insn, sample); + } else { + Store16(*this, insn, sample); + } +} + +} // namespace Shader::Maxwell -- cgit v1.2.3