From 17a82b56d74afcebaad78ce4754d8ee99ea66f93 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 15 Mar 2021 04:54:43 -0300 Subject: shader: Implement TEXS --- src/shader_recompiler/CMakeLists.txt | 3 +- .../backend/spirv/emit_spirv_image.cpp | 11 +- src/shader_recompiler/frontend/ir/ir_emitter.cpp | 8 + src/shader_recompiler/frontend/ir/ir_emitter.h | 3 + src/shader_recompiler/frontend/ir/modifiers.h | 3 +- .../maxwell/translate/impl/not_implemented.cpp | 4 - .../maxwell/translate/impl/texture_fetch.cpp | 232 ++++++++++++++++++ .../translate/impl/texture_fetch_swizzled.cpp | 262 +++++++++++++++++++++ .../maxwell/translate/impl/texture_sample.cpp | 232 ------------------ 9 files changed, 519 insertions(+), 239 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp delete mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/texture_sample.cpp (limited to 'src/shader_recompiler') diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index 20409e09a..97e9b4c8e 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -102,7 +102,8 @@ add_library(shader_recompiler STATIC frontend/maxwell/translate/impl/predicate_set_predicate.cpp frontend/maxwell/translate/impl/predicate_set_register.cpp frontend/maxwell/translate/impl/select_source_with_predicate.cpp - frontend/maxwell/translate/impl/texture_sample.cpp + frontend/maxwell/translate/impl/texture_fetch.cpp + frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp frontend/maxwell/translate/translate.cpp frontend/maxwell/translate/translate.h ir_opt/collect_shader_info_pass.cpp diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp index 5f4783c95..f75152911 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp @@ -57,18 +57,27 @@ Id Texture(EmitContext& ctx, const IR::Value& index) { throw NotImplementedException("Indirect texture sample"); } +Id Decorate(EmitContext& ctx, IR::Inst* inst, Id sample) { + const auto info{inst->Flags()}; + if (info.relaxed_precision != 0) { + ctx.Decorate(sample, spv::Decoration::RelaxedPrecision); + } + return sample; +} + template Id Emit(MethodPtrType sparse_ptr, MethodPtrType non_sparse_ptr, EmitContext& ctx, IR::Inst* inst, Id result_type, Args&&... args) { IR::Inst* const sparse{inst->GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp)}; if (!sparse) { - return (ctx.*non_sparse_ptr)(result_type, std::forward(args)...); + return Decorate(ctx, inst, (ctx.*non_sparse_ptr)(result_type, std::forward(args)...)); } const Id struct_type{ctx.TypeStruct(ctx.U32[1], result_type)}; const Id sample{(ctx.*sparse_ptr)(struct_type, std::forward(args)...)}; const Id resident_code{ctx.OpCompositeExtract(ctx.U32[1], sample, 0U)}; sparse->SetDefinition(ctx.OpImageSparseTexelsResident(ctx.U1, resident_code)); sparse->Invalidate(); + Decorate(ctx, inst, sample); return ctx.OpCompositeExtract(result_type, sample, 1U); } } // Anonymous namespace diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 556961fa4..d94596ee9 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -512,6 +512,14 @@ Value IREmitter::UnpackFloat2x16(const U32& value) { return Inst(Opcode::UnpackFloat2x16, value); } +U32 IREmitter::PackHalf2x16(const Value& vector) { + return Inst(Opcode::PackHalf2x16, vector); +} + +Value IREmitter::UnpackHalf2x16(const U32& value) { + return Inst(Opcode::UnpackHalf2x16, value); +} + F64 IREmitter::PackDouble2x32(const Value& vector) { return Inst(Opcode::PackDouble2x32, vector); } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 74fb3dbcb..27ff5a29d 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -115,6 +115,9 @@ public: [[nodiscard]] U32 PackFloat2x16(const Value& vector); [[nodiscard]] Value UnpackFloat2x16(const U32& value); + [[nodiscard]] U32 PackHalf2x16(const Value& vector); + [[nodiscard]] Value UnpackHalf2x16(const U32& value); + [[nodiscard]] F64 PackDouble2x32(const Value& vector); [[nodiscard]] Value UnpackDouble2x32(const F64& value); diff --git a/src/shader_recompiler/frontend/ir/modifiers.h b/src/shader_recompiler/frontend/ir/modifiers.h index ad07700ae..308c00153 100644 --- a/src/shader_recompiler/frontend/ir/modifiers.h +++ b/src/shader_recompiler/frontend/ir/modifiers.h @@ -36,7 +36,8 @@ union TextureInstInfo { u32 raw; BitField<0, 8, TextureType> type; BitField<8, 1, u32> has_bias; - BitField<16, 1, u32> has_lod_clamp; + BitField<9, 1, u32> has_lod_clamp; + BitField<10, 1, u32> relaxed_precision; }; static_assert(sizeof(TextureInstInfo) <= sizeof(u32)); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 92da5c7e8..9aa7b836c 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -553,10 +553,6 @@ void TranslatorVisitor::SYNC(u64) { ThrowNotImplemented(Opcode::SYNC); } -void TranslatorVisitor::TEXS(u64) { - ThrowNotImplemented(Opcode::TEXS); -} - void TranslatorVisitor::TLD(u64) { ThrowNotImplemented(Opcode::TLD); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp new file mode 100644 index 000000000..98d9f4c64 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp @@ -0,0 +1,232 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/ir/modifiers.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class Blod : u64 { + None, + LZ, + LB, + LL, + INVALIDBLOD4, + INVALIDBLOD5, + LBA, + LLA, +}; + +enum class TextureType : u64 { + _1D, + ARRAY_1D, + _2D, + ARRAY_2D, + _3D, + ARRAY_3D, + CUBE, + ARRAY_CUBE, +}; + +Shader::TextureType GetType(TextureType type, bool dc) { + switch (type) { + case TextureType::_1D: + return dc ? Shader::TextureType::Shadow1D : Shader::TextureType::Color1D; + case TextureType::ARRAY_1D: + return dc ? Shader::TextureType::ShadowArray1D : Shader::TextureType::ColorArray1D; + case TextureType::_2D: + return dc ? Shader::TextureType::Shadow2D : Shader::TextureType::Color2D; + case TextureType::ARRAY_2D: + return dc ? Shader::TextureType::ShadowArray2D : Shader::TextureType::ColorArray2D; + case TextureType::_3D: + return dc ? Shader::TextureType::Shadow3D : Shader::TextureType::Color3D; + case TextureType::ARRAY_3D: + throw NotImplementedException("3D array texture type"); + case TextureType::CUBE: + return dc ? Shader::TextureType::ShadowCube : Shader::TextureType::ColorCube; + case TextureType::ARRAY_CUBE: + return dc ? Shader::TextureType::ShadowArrayCube : Shader::TextureType::ColorArrayCube; + } + throw NotImplementedException("Invalid texture type {}", type); +} + +IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, TextureType type) { + const auto read_array{[&]() -> IR::F32 { return v.ir.ConvertUToF(32, v.X(reg)); }}; + switch (type) { + case TextureType::_1D: + return v.F(reg); + case TextureType::ARRAY_1D: + return v.ir.CompositeConstruct(read_array(), v.F(reg + 1)); + case TextureType::_2D: + return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1)); + case TextureType::ARRAY_2D: + return v.ir.CompositeConstruct(read_array(), v.F(reg + 1), v.F(reg + 2)); + case TextureType::_3D: + return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2)); + case TextureType::ARRAY_3D: + throw NotImplementedException("3D array texture type"); + case TextureType::CUBE: + return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2)); + case TextureType::ARRAY_CUBE: + return v.ir.CompositeConstruct(read_array(), v.F(reg + 1), v.F(reg + 2), v.F(reg + 3)); + } + throw NotImplementedException("Invalid texture type {}", type); +} + +IR::F32 MakeLod(TranslatorVisitor& v, IR::Reg& reg, Blod blod) { + switch (blod) { + case Blod::None: + return v.ir.Imm32(0.0f); + case Blod::LZ: + return v.ir.Imm32(0.0f); + case Blod::LB: + case Blod::LL: + case Blod::LBA: + case Blod::LLA: + return v.F(reg++); + case Blod::INVALIDBLOD4: + case Blod::INVALIDBLOD5: + break; + } + throw NotImplementedException("Invalid blod {}", blod); +} + +IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg& reg, TextureType type) { + const IR::U32 value{v.X(reg++)}; + switch (type) { + case TextureType::_1D: + case TextureType::ARRAY_1D: + return v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4)); + case TextureType::_2D: + case TextureType::ARRAY_2D: + return v.ir.CompositeConstruct(v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4)), + v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4))); + case TextureType::_3D: + case TextureType::ARRAY_3D: + return v.ir.CompositeConstruct(v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4)), + v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4)), + v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(4))); + case TextureType::CUBE: + case TextureType::ARRAY_CUBE: + throw NotImplementedException("Illegal offset on CUBE sample"); + } + throw NotImplementedException("Invalid texture type {}", type); +} + +bool HasExplicitLod(Blod blod) { + switch (blod) { + case Blod::LL: + case Blod::LLA: + case Blod::LZ: + return true; + default: + return false; + } +} + +void Impl(TranslatorVisitor& v, u64 insn, bool aoffi, Blod blod, bool lc, + std::optional cbuf_offset) { + union { + u64 raw; + BitField<35, 1, u64> ndv; + BitField<49, 1, u64> nodep; + BitField<50, 1, u64> dc; + BitField<51, 3, IR::Pred> sparse_pred; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> coord_reg; + BitField<20, 8, IR::Reg> meta_reg; + BitField<28, 3, TextureType> type; + BitField<31, 4, u64> mask; + } const tex{insn}; + + if (lc) { + throw NotImplementedException("LC"); + } + const IR::Value coords{MakeCoords(v, tex.coord_reg, tex.type)}; + + IR::Reg meta_reg{tex.meta_reg}; + IR::Value handle; + IR::Value offset; + IR::F32 dref; + IR::F32 lod_clamp; + if (cbuf_offset) { + handle = v.ir.Imm32(*cbuf_offset); + } else { + handle = v.X(meta_reg++); + } + const IR::F32 lod{MakeLod(v, meta_reg, blod)}; + if (aoffi) { + offset = MakeOffset(v, meta_reg, tex.type); + } + if (tex.dc != 0) { + dref = v.F(meta_reg++); + } + IR::TextureInstInfo info{}; + info.type.Assign(GetType(tex.type, tex.dc != 0)); + info.has_bias.Assign(blod == Blod::LB || blod == Blod::LBA ? 1 : 0); + info.has_lod_clamp.Assign(lc ? 1 : 0); + + const IR::Value sample{[&]() -> IR::Value { + if (tex.dc == 0) { + if (HasExplicitLod(blod)) { + return v.ir.ImageSampleExplicitLod(handle, coords, lod, offset, lod_clamp, info); + } else { + return v.ir.ImageSampleImplicitLod(handle, coords, lod, offset, lod_clamp, info); + } + } + if (HasExplicitLod(blod)) { + return v.ir.ImageSampleDrefExplicitLod(handle, coords, dref, lod, offset, lod_clamp, + info); + } else { + return v.ir.ImageSampleDrefImplicitLod(handle, coords, dref, lod, offset, lod_clamp, + info); + } + }()}; + + for (int element = 0; element < 4; ++element) { + if (((tex.mask >> element) & 1) == 0) { + continue; + } + IR::F32 value; + if (tex.dc != 0) { + value = element < 3 ? IR::F32{sample} : v.ir.Imm32(1.0f); + } else { + value = IR::F32{v.ir.CompositeExtract(sample, element)}; + } + v.F(tex.dest_reg + element, value); + } + if (tex.sparse_pred != IR::Pred::PT) { + v.ir.SetPred(tex.sparse_pred, v.ir.LogicalNot(v.ir.GetSparseFromOp(sample))); + } +} +} // Anonymous namespace + +void TranslatorVisitor::TEX(u64 insn) { + union { + u64 raw; + BitField<54, 1, u64> aoffi; + BitField<55, 3, Blod> blod; + BitField<58, 1, u64> lc; + BitField<36, 13, u64> cbuf_offset; + } const tex{insn}; + + Impl(*this, insn, tex.aoffi != 0, tex.blod, tex.lc != 0, static_cast(tex.cbuf_offset)); +} + +void TranslatorVisitor::TEX_b(u64 insn) { + union { + u64 raw; + BitField<36, 1, u64> aoffi; + BitField<37, 3, Blod> blod; + BitField<40, 1, u64> lc; + } const tex{insn}; + + Impl(*this, insn, tex.aoffi != 0, tex.blod, tex.lc != 0, std::nullopt); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp new file mode 100644 index 000000000..ac1615b00 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp @@ -0,0 +1,262 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/ir/modifiers.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class Precision : u64 { + F16, + F32, +}; + +union Encoding { + u64 raw; + BitField<59, 1, Precision> precision; + BitField<53, 4, u64> encoding; + BitField<49, 1, u64> nodep; + BitField<28, 8, IR::Reg> dest_reg_b; + BitField<0, 8, IR::Reg> dest_reg_a; + BitField<8, 8, IR::Reg> src_reg_a; + BitField<20, 8, IR::Reg> src_reg_b; + BitField<36, 13, u64> cbuf_offset; + BitField<50, 3, u64> swizzle; +}; + +constexpr unsigned R = 1; +constexpr unsigned G = 2; +constexpr unsigned B = 4; +constexpr unsigned A = 8; + +constexpr std::array RG_LUT{ + R, // + G, // + B, // + A, // + R | G, // + R | A, // + G | A, // + B | A, // +}; + +constexpr std::array RGBA_LUT{ + R | G | B, // + R | G | A, // + R | B | A, // + G | B | A, // + R | G | B | A, // +}; + +void CheckAlignment(IR::Reg reg, int alignment) { + if (!IR::IsAligned(reg, alignment)) { + throw NotImplementedException("Unaligned source register {}", reg); + } +} + +template +IR::Value Composite(TranslatorVisitor& v, Args... regs) { + return v.ir.CompositeConstruct(v.F(regs)...); +} + +IR::F32 ReadArray(TranslatorVisitor& v, const IR::U32& value) { + return v.ir.ConvertUToF(32, v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(16))); +} + +IR::Value Sample(TranslatorVisitor& v, u64 insn) { + const Encoding texs{insn}; + const IR::U32 handle{v.ir.Imm32(static_cast(texs.cbuf_offset))}; + const IR::F32 zero{v.ir.Imm32(0.0f)}; + const IR::Reg reg_a{texs.src_reg_a}; + const IR::Reg reg_b{texs.src_reg_b}; + IR::TextureInstInfo info{}; + if (texs.precision == Precision::F16) { + info.relaxed_precision.Assign(1); + } + switch (texs.encoding) { + case 0: // 1D.LZ + info.type.Assign(TextureType::Color1D); + return v.ir.ImageSampleExplicitLod(handle, v.F(reg_a), zero, {}, {}, info); + case 1: // 2D + info.type.Assign(TextureType::Color2D); + return v.ir.ImageSampleImplicitLod(handle, Composite(v, reg_a, reg_b), {}, {}, {}, info); + case 2: // 2D.LZ + info.type.Assign(TextureType::Color2D); + return v.ir.ImageSampleExplicitLod(handle, Composite(v, reg_a, reg_b), zero, {}, {}, info); + case 3: // 2D.LL + CheckAlignment(reg_a, 2); + info.type.Assign(TextureType::Color2D); + return v.ir.ImageSampleExplicitLod(handle, Composite(v, reg_a, reg_a + 1), v.F(reg_b), {}, + {}, info); + case 4: // 2D.DC + CheckAlignment(reg_a, 2); + info.type.Assign(TextureType::Shadow2D); + return v.ir.ImageSampleDrefImplicitLod(handle, Composite(v, reg_a, reg_a + 1), v.F(reg_b), + {}, {}, {}, info); + case 5: // 2D.LL.DC + CheckAlignment(reg_a, 2); + CheckAlignment(reg_b, 2); + info.type.Assign(TextureType::Shadow2D); + return v.ir.ImageSampleDrefExplicitLod(handle, Composite(v, reg_a, reg_a + 1), + v.F(reg_b + 1), v.F(reg_b), {}, {}, info); + case 6: // 2D.LZ.DC + CheckAlignment(reg_a, 2); + info.type.Assign(TextureType::Shadow2D); + return v.ir.ImageSampleDrefExplicitLod(handle, Composite(v, reg_a, reg_a + 1), v.F(reg_b), + zero, {}, {}, info); + case 7: // ARRAY_2D + CheckAlignment(reg_a, 2); + info.type.Assign(TextureType::ColorArray2D); + return v.ir.ImageSampleImplicitLod( + handle, v.ir.CompositeConstruct(v.F(reg_a + 1), v.F(reg_b), ReadArray(v, v.X(reg_a))), + {}, {}, {}, info); + case 8: // ARRAY_2D.LZ + CheckAlignment(reg_a, 2); + info.type.Assign(TextureType::ColorArray2D); + return v.ir.ImageSampleExplicitLod( + handle, v.ir.CompositeConstruct(v.F(reg_a + 1), v.F(reg_b), ReadArray(v, v.X(reg_a))), + zero, {}, {}, info); + case 9: // ARRAY_2D.LZ.DC + CheckAlignment(reg_a, 2); + CheckAlignment(reg_b, 2); + info.type.Assign(TextureType::ShadowArray2D); + return v.ir.ImageSampleDrefExplicitLod( + handle, v.ir.CompositeConstruct(v.F(reg_a + 1), v.F(reg_b), ReadArray(v, v.X(reg_a))), + v.F(reg_b + 1), zero, {}, {}, info); + case 10: // 3D + CheckAlignment(reg_a, 2); + info.type.Assign(TextureType::Color3D); + return v.ir.ImageSampleImplicitLod(handle, Composite(v, reg_a, reg_a + 1, reg_b), {}, {}, + {}, info); + case 11: // 3D.LZ + CheckAlignment(reg_a, 2); + info.type.Assign(TextureType::Color3D); + return v.ir.ImageSampleExplicitLod(handle, Composite(v, reg_a, reg_a + 1, reg_b), zero, {}, + {}, info); + case 12: // CUBE + CheckAlignment(reg_a, 2); + info.type.Assign(TextureType::ColorCube); + return v.ir.ImageSampleImplicitLod(handle, Composite(v, reg_a, reg_a + 1, reg_b), {}, {}, + {}, info); + case 13: // CUBE.LL + CheckAlignment(reg_a, 2); + CheckAlignment(reg_b, 2); + info.type.Assign(TextureType::ColorCube); + return v.ir.ImageSampleExplicitLod(handle, Composite(v, reg_a, reg_a + 1, reg_b), + v.F(reg_b + 1), {}, {}, info); + default: + throw NotImplementedException("Illegal encoding {}", texs.encoding.Value()); + } +} + +unsigned Swizzle(u64 insn) { + const Encoding texs{insn}; + const size_t encoding{texs.swizzle}; + if (texs.dest_reg_b == IR::Reg::RZ) { + if (encoding >= RG_LUT.size()) { + throw NotImplementedException("Illegal RG encoding {}", encoding); + } + return RG_LUT[encoding]; + } else { + if (encoding >= RGBA_LUT.size()) { + throw NotImplementedException("Illegal RGBA encoding {}", encoding); + } + return RGBA_LUT[encoding]; + } +} + +IR::F32 Extract(TranslatorVisitor& v, const IR::Value& sample, unsigned component) { + const bool is_shadow{sample.Type() == IR::Type::F32}; + if (is_shadow) { + const bool is_alpha{component == 3}; + return is_alpha ? v.ir.Imm32(1.0f) : IR::F32{sample}; + } else { + return IR::F32{v.ir.CompositeExtract(sample, component)}; + } +} + +IR::Reg RegStoreComponent32(u64 insn, unsigned index) { + const Encoding texs{insn}; + switch (index) { + case 0: + return texs.dest_reg_a; + case 1: + CheckAlignment(texs.dest_reg_a, 2); + return texs.dest_reg_a + 1; + case 2: + return texs.dest_reg_b; + case 3: + CheckAlignment(texs.dest_reg_b, 2); + return texs.dest_reg_b + 1; + } + throw LogicError("Invalid store index {}", index); +} + +void Store32(TranslatorVisitor& v, u64 insn, const IR::Value& sample) { + const unsigned swizzle{Swizzle(insn)}; + unsigned store_index{0}; + for (unsigned component = 0; component < 4; ++component) { + if (((swizzle >> component) & 1) == 0) { + continue; + } + const IR::Reg dest{RegStoreComponent32(insn, store_index)}; + v.F(dest, Extract(v, sample, component)); + ++store_index; + } +} + +IR::U32 Pack(TranslatorVisitor& v, const IR::F32& lhs, const IR::F32& rhs) { + return v.ir.PackHalf2x16(v.ir.CompositeConstruct(lhs, rhs)); +} + +void Store16(TranslatorVisitor& v, u64 insn, const IR::Value& sample) { + const unsigned swizzle{Swizzle(insn)}; + unsigned store_index{0}; + std::array swizzled; + for (unsigned component = 0; component < 4; ++component) { + if (((swizzle >> component) & 1) == 0) { + continue; + } + swizzled[store_index] = Extract(v, sample, component); + ++store_index; + } + const IR::F32 zero{v.ir.Imm32(0.0f)}; + const Encoding texs{insn}; + switch (store_index) { + case 1: + v.X(texs.dest_reg_a, Pack(v, swizzled[0], zero)); + break; + case 2: + case 3: + case 4: + v.X(texs.dest_reg_a, Pack(v, swizzled[0], swizzled[1])); + switch (store_index) { + case 2: + break; + case 3: + v.X(texs.dest_reg_b, Pack(v, swizzled[2], zero)); + break; + case 4: + v.X(texs.dest_reg_b, Pack(v, swizzled[2], swizzled[3])); + break; + } + break; + } +} +} // Anonymous namespace + +void TranslatorVisitor::TEXS(u64 insn) { + const IR::Value sample{Sample(*this, insn)}; + if (Encoding{insn}.precision == Precision::F32) { + Store32(*this, insn, sample); + } else { + Store16(*this, insn, sample); + } +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_sample.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_sample.cpp deleted file mode 100644 index 98d9f4c64..000000000 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_sample.cpp +++ /dev/null @@ -1,232 +0,0 @@ -// Copyright 2021 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include - -#include "common/bit_field.h" -#include "common/common_types.h" -#include "shader_recompiler/frontend/ir/modifiers.h" -#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" - -namespace Shader::Maxwell { -namespace { -enum class Blod : u64 { - None, - LZ, - LB, - LL, - INVALIDBLOD4, - INVALIDBLOD5, - LBA, - LLA, -}; - -enum class TextureType : u64 { - _1D, - ARRAY_1D, - _2D, - ARRAY_2D, - _3D, - ARRAY_3D, - CUBE, - ARRAY_CUBE, -}; - -Shader::TextureType GetType(TextureType type, bool dc) { - switch (type) { - case TextureType::_1D: - return dc ? Shader::TextureType::Shadow1D : Shader::TextureType::Color1D; - case TextureType::ARRAY_1D: - return dc ? Shader::TextureType::ShadowArray1D : Shader::TextureType::ColorArray1D; - case TextureType::_2D: - return dc ? Shader::TextureType::Shadow2D : Shader::TextureType::Color2D; - case TextureType::ARRAY_2D: - return dc ? Shader::TextureType::ShadowArray2D : Shader::TextureType::ColorArray2D; - case TextureType::_3D: - return dc ? Shader::TextureType::Shadow3D : Shader::TextureType::Color3D; - case TextureType::ARRAY_3D: - throw NotImplementedException("3D array texture type"); - case TextureType::CUBE: - return dc ? Shader::TextureType::ShadowCube : Shader::TextureType::ColorCube; - case TextureType::ARRAY_CUBE: - return dc ? Shader::TextureType::ShadowArrayCube : Shader::TextureType::ColorArrayCube; - } - throw NotImplementedException("Invalid texture type {}", type); -} - -IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, TextureType type) { - const auto read_array{[&]() -> IR::F32 { return v.ir.ConvertUToF(32, v.X(reg)); }}; - switch (type) { - case TextureType::_1D: - return v.F(reg); - case TextureType::ARRAY_1D: - return v.ir.CompositeConstruct(read_array(), v.F(reg + 1)); - case TextureType::_2D: - return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1)); - case TextureType::ARRAY_2D: - return v.ir.CompositeConstruct(read_array(), v.F(reg + 1), v.F(reg + 2)); - case TextureType::_3D: - return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2)); - case TextureType::ARRAY_3D: - throw NotImplementedException("3D array texture type"); - case TextureType::CUBE: - return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2)); - case TextureType::ARRAY_CUBE: - return v.ir.CompositeConstruct(read_array(), v.F(reg + 1), v.F(reg + 2), v.F(reg + 3)); - } - throw NotImplementedException("Invalid texture type {}", type); -} - -IR::F32 MakeLod(TranslatorVisitor& v, IR::Reg& reg, Blod blod) { - switch (blod) { - case Blod::None: - return v.ir.Imm32(0.0f); - case Blod::LZ: - return v.ir.Imm32(0.0f); - case Blod::LB: - case Blod::LL: - case Blod::LBA: - case Blod::LLA: - return v.F(reg++); - case Blod::INVALIDBLOD4: - case Blod::INVALIDBLOD5: - break; - } - throw NotImplementedException("Invalid blod {}", blod); -} - -IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg& reg, TextureType type) { - const IR::U32 value{v.X(reg++)}; - switch (type) { - case TextureType::_1D: - case TextureType::ARRAY_1D: - return v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4)); - case TextureType::_2D: - case TextureType::ARRAY_2D: - return v.ir.CompositeConstruct(v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4)), - v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4))); - case TextureType::_3D: - case TextureType::ARRAY_3D: - return v.ir.CompositeConstruct(v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4)), - v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4)), - v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(4))); - case TextureType::CUBE: - case TextureType::ARRAY_CUBE: - throw NotImplementedException("Illegal offset on CUBE sample"); - } - throw NotImplementedException("Invalid texture type {}", type); -} - -bool HasExplicitLod(Blod blod) { - switch (blod) { - case Blod::LL: - case Blod::LLA: - case Blod::LZ: - return true; - default: - return false; - } -} - -void Impl(TranslatorVisitor& v, u64 insn, bool aoffi, Blod blod, bool lc, - std::optional cbuf_offset) { - union { - u64 raw; - BitField<35, 1, u64> ndv; - BitField<49, 1, u64> nodep; - BitField<50, 1, u64> dc; - BitField<51, 3, IR::Pred> sparse_pred; - BitField<0, 8, IR::Reg> dest_reg; - BitField<8, 8, IR::Reg> coord_reg; - BitField<20, 8, IR::Reg> meta_reg; - BitField<28, 3, TextureType> type; - BitField<31, 4, u64> mask; - } const tex{insn}; - - if (lc) { - throw NotImplementedException("LC"); - } - const IR::Value coords{MakeCoords(v, tex.coord_reg, tex.type)}; - - IR::Reg meta_reg{tex.meta_reg}; - IR::Value handle; - IR::Value offset; - IR::F32 dref; - IR::F32 lod_clamp; - if (cbuf_offset) { - handle = v.ir.Imm32(*cbuf_offset); - } else { - handle = v.X(meta_reg++); - } - const IR::F32 lod{MakeLod(v, meta_reg, blod)}; - if (aoffi) { - offset = MakeOffset(v, meta_reg, tex.type); - } - if (tex.dc != 0) { - dref = v.F(meta_reg++); - } - IR::TextureInstInfo info{}; - info.type.Assign(GetType(tex.type, tex.dc != 0)); - info.has_bias.Assign(blod == Blod::LB || blod == Blod::LBA ? 1 : 0); - info.has_lod_clamp.Assign(lc ? 1 : 0); - - const IR::Value sample{[&]() -> IR::Value { - if (tex.dc == 0) { - if (HasExplicitLod(blod)) { - return v.ir.ImageSampleExplicitLod(handle, coords, lod, offset, lod_clamp, info); - } else { - return v.ir.ImageSampleImplicitLod(handle, coords, lod, offset, lod_clamp, info); - } - } - if (HasExplicitLod(blod)) { - return v.ir.ImageSampleDrefExplicitLod(handle, coords, dref, lod, offset, lod_clamp, - info); - } else { - return v.ir.ImageSampleDrefImplicitLod(handle, coords, dref, lod, offset, lod_clamp, - info); - } - }()}; - - for (int element = 0; element < 4; ++element) { - if (((tex.mask >> element) & 1) == 0) { - continue; - } - IR::F32 value; - if (tex.dc != 0) { - value = element < 3 ? IR::F32{sample} : v.ir.Imm32(1.0f); - } else { - value = IR::F32{v.ir.CompositeExtract(sample, element)}; - } - v.F(tex.dest_reg + element, value); - } - if (tex.sparse_pred != IR::Pred::PT) { - v.ir.SetPred(tex.sparse_pred, v.ir.LogicalNot(v.ir.GetSparseFromOp(sample))); - } -} -} // Anonymous namespace - -void TranslatorVisitor::TEX(u64 insn) { - union { - u64 raw; - BitField<54, 1, u64> aoffi; - BitField<55, 3, Blod> blod; - BitField<58, 1, u64> lc; - BitField<36, 13, u64> cbuf_offset; - } const tex{insn}; - - Impl(*this, insn, tex.aoffi != 0, tex.blod, tex.lc != 0, static_cast(tex.cbuf_offset)); -} - -void TranslatorVisitor::TEX_b(u64 insn) { - union { - u64 raw; - BitField<36, 1, u64> aoffi; - BitField<37, 3, Blod> blod; - BitField<40, 1, u64> lc; - } const tex{insn}; - - Impl(*this, insn, tex.aoffi != 0, tex.blod, tex.lc != 0, std::nullopt); -} - -} // namespace Shader::Maxwell -- cgit v1.2.3