diff options
Diffstat (limited to 'src/video_core/shader')
-rw-r--r-- | src/video_core/shader/decode/texture.cpp | 114 | ||||
-rw-r--r-- | src/video_core/shader/decode/xmad.cpp | 39 | ||||
-rw-r--r-- | src/video_core/shader/shader_ir.h | 12 | ||||
-rw-r--r-- | src/video_core/shader/track.cpp | 17 |
4 files changed, 144 insertions, 38 deletions
diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp index a99ae19bf..a775b402b 100644 --- a/src/video_core/shader/decode/texture.cpp +++ b/src/video_core/shader/decode/texture.cpp @@ -7,7 +7,9 @@ #include <fmt/format.h> #include "common/assert.h" +#include "common/bit_field.h" #include "common/common_types.h" +#include "common/logging/log.h" #include "video_core/engines/shader_bytecode.h" #include "video_core/shader/shader_ir.h" @@ -41,19 +43,18 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { switch (opcode->get().GetId()) { case OpCode::Id::TEX: { - UNIMPLEMENTED_IF_MSG(instr.tex.UsesMiscMode(TextureMiscMode::AOFFI), - "AOFFI is not implemented"); - if (instr.tex.UsesMiscMode(TextureMiscMode::NODEP)) { LOG_WARNING(HW_GPU, "TEX.NODEP implementation is incomplete"); } const TextureType texture_type{instr.tex.texture_type}; const bool is_array = instr.tex.array != 0; + const bool is_aoffi = instr.tex.UsesMiscMode(TextureMiscMode::AOFFI); const bool depth_compare = instr.tex.UsesMiscMode(TextureMiscMode::DC); const auto process_mode = instr.tex.GetTextureProcessMode(); WriteTexInstructionFloat( - bb, instr, GetTexCode(instr, texture_type, process_mode, depth_compare, is_array)); + bb, instr, + GetTexCode(instr, texture_type, process_mode, depth_compare, is_array, is_aoffi)); break; } case OpCode::Id::TEXS: { @@ -78,8 +79,6 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { } case OpCode::Id::TLD4: { ASSERT(instr.tld4.array == 0); - UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::AOFFI), - "AOFFI is not implemented"); UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::NDV), "NDV is not implemented"); UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::PTP), @@ -92,8 +91,9 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { const auto texture_type = instr.tld4.texture_type.Value(); const bool depth_compare = instr.tld4.UsesMiscMode(TextureMiscMode::DC); const bool is_array = instr.tld4.array != 0; - WriteTexInstructionFloat(bb, instr, - GetTld4Code(instr, texture_type, depth_compare, is_array)); + const bool is_aoffi = instr.tld4.UsesMiscMode(TextureMiscMode::AOFFI); + WriteTexInstructionFloat( + bb, instr, GetTld4Code(instr, texture_type, depth_compare, is_array, is_aoffi)); break; } case OpCode::Id::TLD4S: { @@ -127,7 +127,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { Node4 values; for (u32 element = 0; element < values.size(); ++element) { auto coords_copy = coords; - MetaTexture meta{sampler, {}, {}, {}, {}, component, element}; + MetaTexture meta{sampler, {}, {}, {}, {}, {}, component, element}; values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy)); } @@ -152,7 +152,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { if (!instr.txq.IsComponentEnabled(element)) { continue; } - MetaTexture meta{sampler, {}, {}, {}, {}, {}, element}; + MetaTexture meta{sampler, {}, {}, {}, {}, {}, {}, element}; const Node value = Operation(OperationCode::TextureQueryDimensions, meta, GetRegister(instr.gpr8)); SetTemporal(bb, indexer++, value); @@ -202,7 +202,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { for (u32 element = 0; element < 2; ++element) { auto params = coords; - MetaTexture meta{sampler, {}, {}, {}, {}, {}, element}; + MetaTexture meta{sampler, {}, {}, {}, {}, {}, {}, element}; const Node value = Operation(OperationCode::TextureQueryLod, meta, std::move(params)); SetTemporal(bb, element, value); } @@ -325,7 +325,8 @@ void ShaderIR::WriteTexsInstructionHalfFloat(NodeBlock& bb, Instruction instr, Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, TextureProcessMode process_mode, std::vector<Node> coords, - Node array, Node depth_compare, u32 bias_offset) { + Node array, Node depth_compare, u32 bias_offset, + std::vector<Node> aoffi) { const bool is_array = array; const bool is_shadow = depth_compare; @@ -374,7 +375,7 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, Node4 values; for (u32 element = 0; element < values.size(); ++element) { auto copy_coords = coords; - MetaTexture meta{sampler, array, depth_compare, bias, lod, {}, element}; + MetaTexture meta{sampler, array, depth_compare, aoffi, bias, lod, {}, element}; values[element] = Operation(read_method, meta, std::move(copy_coords)); } @@ -382,9 +383,15 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, } Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type, - TextureProcessMode process_mode, bool depth_compare, bool is_array) { - const bool lod_bias_enabled = - (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ); + TextureProcessMode process_mode, bool depth_compare, bool is_array, + bool is_aoffi) { + const bool lod_bias_enabled{ + (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ)}; + + u64 parameter_register = instr.gpr20.Value(); + if (lod_bias_enabled) { + ++parameter_register; + } const auto [coord_count, total_coord_count] = ValidateAndGetCoordinateElement( texture_type, depth_compare, is_array, lod_bias_enabled, 4, 5); @@ -404,15 +411,19 @@ Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type, const Node array = is_array ? GetRegister(array_register) : nullptr; + std::vector<Node> aoffi; + if (is_aoffi) { + aoffi = GetAoffiCoordinates(GetRegister(parameter_register++), coord_count, false); + } + Node dc{}; if (depth_compare) { // Depth is always stored in the register signaled by gpr20 or in the next register if lod // or bias are used - const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0); - dc = GetRegister(depth_register); + dc = GetRegister(parameter_register++); } - return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, 0); + return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, 0, aoffi); } Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type, @@ -448,11 +459,11 @@ Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type, dc = GetRegister(depth_register); } - return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, bias_offset); + return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, bias_offset, {}); } Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool depth_compare, - bool is_array) { + bool is_array, bool is_aoffi) { const std::size_t coord_count = GetCoordCount(texture_type); const std::size_t total_coord_count = coord_count + (is_array ? 1 : 0); const std::size_t total_reg_count = total_coord_count + (depth_compare ? 1 : 0); @@ -463,15 +474,27 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de const u64 coord_register = array_register + (is_array ? 1 : 0); std::vector<Node> coords; - for (size_t i = 0; i < coord_count; ++i) + for (std::size_t i = 0; i < coord_count; ++i) { coords.push_back(GetRegister(coord_register + i)); + } + + u64 parameter_register = instr.gpr20.Value(); + std::vector<Node> aoffi; + if (is_aoffi) { + aoffi = GetAoffiCoordinates(GetRegister(parameter_register++), coord_count, true); + } + + Node dc{}; + if (depth_compare) { + dc = GetRegister(parameter_register++); + } const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, depth_compare); Node4 values; for (u32 element = 0; element < values.size(); ++element) { auto coords_copy = coords; - MetaTexture meta{sampler, GetRegister(array_register), {}, {}, {}, {}, element}; + MetaTexture meta{sampler, GetRegister(array_register), dc, aoffi, {}, {}, {}, element}; values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy)); } @@ -507,7 +530,7 @@ Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is Node4 values; for (u32 element = 0; element < values.size(); ++element) { auto coords_copy = coords; - MetaTexture meta{sampler, array, {}, {}, lod, {}, element}; + MetaTexture meta{sampler, array, {}, {}, {}, lod, {}, element}; values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy)); } return values; @@ -531,4 +554,45 @@ std::tuple<std::size_t, std::size_t> ShaderIR::ValidateAndGetCoordinateElement( return {coord_count, total_coord_count}; } -} // namespace VideoCommon::Shader
\ No newline at end of file +std::vector<Node> ShaderIR::GetAoffiCoordinates(Node aoffi_reg, std::size_t coord_count, + bool is_tld4) { + const auto [coord_offsets, size, wrap_value, + diff_value] = [is_tld4]() -> std::tuple<std::array<u32, 3>, u32, s32, s32> { + if (is_tld4) { + return {{0, 8, 16}, 6, 32, 64}; + } else { + return {{0, 4, 8}, 4, 8, 16}; + } + }(); + const u32 mask = (1U << size) - 1; + + std::vector<Node> aoffi; + aoffi.reserve(coord_count); + + const auto aoffi_immediate{ + TrackImmediate(aoffi_reg, global_code, static_cast<s64>(global_code.size()))}; + if (!aoffi_immediate) { + // Variable access, not supported on AMD. + LOG_WARNING(HW_GPU, + "AOFFI constant folding failed, some hardware might have graphical issues"); + for (std::size_t coord = 0; coord < coord_count; ++coord) { + const Node value = BitfieldExtract(aoffi_reg, coord_offsets.at(coord), size); + const Node condition = + Operation(OperationCode::LogicalIGreaterEqual, value, Immediate(wrap_value)); + const Node negative = Operation(OperationCode::IAdd, value, Immediate(-diff_value)); + aoffi.push_back(Operation(OperationCode::Select, condition, negative, value)); + } + return aoffi; + } + + for (std::size_t coord = 0; coord < coord_count; ++coord) { + s32 value = (*aoffi_immediate >> coord_offsets.at(coord)) & mask; + if (value >= wrap_value) { + value -= diff_value; + } + aoffi.push_back(Immediate(value)); + } + return aoffi; +} + +} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/xmad.cpp b/src/video_core/shader/decode/xmad.cpp index c34843307..db15c0718 100644 --- a/src/video_core/shader/decode/xmad.cpp +++ b/src/video_core/shader/decode/xmad.cpp @@ -29,39 +29,55 @@ u32 ShaderIR::DecodeXmad(NodeBlock& bb, u32 pc) { const bool is_signed_b = instr.xmad.sign_b == 1; const bool is_signed_c = is_signed_a; - auto [is_merge, op_b, op_c] = [&]() -> std::tuple<bool, Node, Node> { + auto [is_merge, is_psl, is_high_b, mode, op_b, + op_c] = [&]() -> std::tuple<bool, bool, bool, Tegra::Shader::XmadMode, Node, Node> { switch (opcode->get().GetId()) { case OpCode::Id::XMAD_CR: return {instr.xmad.merge_56, + instr.xmad.product_shift_left_second, + instr.xmad.high_b, + instr.xmad.mode_cbf, GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()), GetRegister(instr.gpr39)}; case OpCode::Id::XMAD_RR: - return {instr.xmad.merge_37, GetRegister(instr.gpr20), GetRegister(instr.gpr39)}; + return {instr.xmad.merge_37, instr.xmad.product_shift_left, instr.xmad.high_b_rr, + instr.xmad.mode, GetRegister(instr.gpr20), GetRegister(instr.gpr39)}; case OpCode::Id::XMAD_RC: - return {false, GetRegister(instr.gpr39), + return {false, + false, + instr.xmad.high_b, + instr.xmad.mode_cbf, + GetRegister(instr.gpr39), GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())}; case OpCode::Id::XMAD_IMM: - return {instr.xmad.merge_37, Immediate(static_cast<u32>(instr.xmad.imm20_16)), + return {instr.xmad.merge_37, + instr.xmad.product_shift_left, + false, + instr.xmad.mode, + Immediate(static_cast<u32>(instr.xmad.imm20_16)), GetRegister(instr.gpr39)}; } UNIMPLEMENTED_MSG("Unhandled XMAD instruction: {}", opcode->get().GetName()); - return {false, Immediate(0), Immediate(0)}; + return {false, false, false, Tegra::Shader::XmadMode::None, Immediate(0), Immediate(0)}; }(); op_a = BitfieldExtract(op_a, instr.xmad.high_a ? 16 : 0, 16); const Node original_b = op_b; - op_b = BitfieldExtract(op_b, instr.xmad.high_b ? 16 : 0, 16); + op_b = BitfieldExtract(op_b, is_high_b ? 16 : 0, 16); // TODO(Rodrigo): Use an appropiate sign for this operation Node product = Operation(OperationCode::IMul, NO_PRECISE, op_a, op_b); - if (instr.xmad.product_shift_left) { + if (is_psl) { product = Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, product, Immediate(16)); } + SetTemporal(bb, 0, product); + product = GetTemporal(0); const Node original_c = op_c; + const Tegra::Shader::XmadMode set_mode = mode; // Workaround to clang compile error op_c = [&]() { - switch (instr.xmad.mode) { + switch (set_mode) { case Tegra::Shader::XmadMode::None: return original_c; case Tegra::Shader::XmadMode::CLo: @@ -80,8 +96,13 @@ u32 ShaderIR::DecodeXmad(NodeBlock& bb, u32 pc) { } }(); + SetTemporal(bb, 1, op_c); + op_c = GetTemporal(1); + // TODO(Rodrigo): Use an appropiate sign for this operation Node sum = Operation(OperationCode::IAdd, product, op_c); + SetTemporal(bb, 2, sum); + sum = GetTemporal(2); if (is_merge) { const Node a = BitfieldExtract(sum, 0, 16); const Node b = @@ -95,4 +116,4 @@ u32 ShaderIR::DecodeXmad(NodeBlock& bb, u32 pc) { return pc; } -} // namespace VideoCommon::Shader
\ No newline at end of file +} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index 5bc3a3900..4888998d3 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h @@ -7,6 +7,7 @@ #include <array> #include <cstring> #include <map> +#include <optional> #include <set> #include <string> #include <tuple> @@ -290,6 +291,7 @@ struct MetaTexture { const Sampler& sampler; Node array{}; Node depth_compare{}; + std::vector<Node> aoffi; Node bias{}; Node lod{}; Node component{}; @@ -741,14 +743,14 @@ private: Node4 GetTexCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, Tegra::Shader::TextureProcessMode process_mode, bool depth_compare, - bool is_array); + bool is_array, bool is_aoffi); Node4 GetTexsCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, Tegra::Shader::TextureProcessMode process_mode, bool depth_compare, bool is_array); Node4 GetTld4Code(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, - bool depth_compare, bool is_array); + bool depth_compare, bool is_array, bool is_aoffi); Node4 GetTldsCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, bool is_array); @@ -757,9 +759,11 @@ private: Tegra::Shader::TextureType texture_type, bool depth_compare, bool is_array, bool lod_bias_enabled, std::size_t max_coords, std::size_t max_inputs); + std::vector<Node> GetAoffiCoordinates(Node aoffi_reg, std::size_t coord_count, bool is_tld4); + Node4 GetTextureCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, Tegra::Shader::TextureProcessMode process_mode, std::vector<Node> coords, - Node array, Node depth_compare, u32 bias_offset); + Node array, Node depth_compare, u32 bias_offset, std::vector<Node> aoffi); Node GetVideoOperand(Node op, bool is_chunk, bool is_signed, Tegra::Shader::VideoType type, u64 byte_height); @@ -773,6 +777,8 @@ private: Node TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor); + std::optional<u32> TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor); + std::pair<Node, s64> TrackRegister(const GprNode* tracked, const NodeBlock& code, s64 cursor); template <typename... T> diff --git a/src/video_core/shader/track.cpp b/src/video_core/shader/track.cpp index 33b071747..4505667ff 100644 --- a/src/video_core/shader/track.cpp +++ b/src/video_core/shader/track.cpp @@ -6,6 +6,7 @@ #include <utility> #include <variant> +#include "common/common_types.h" #include "video_core/shader/shader_ir.h" namespace VideoCommon::Shader { @@ -14,7 +15,7 @@ namespace { std::pair<Node, s64> FindOperation(const NodeBlock& code, s64 cursor, OperationCode operation_code) { for (; cursor >= 0; --cursor) { - const Node node = code[cursor]; + const Node node = code.at(cursor); if (const auto operation = std::get_if<OperationNode>(node)) { if (operation->GetCode() == operation_code) return {node, cursor}; @@ -64,6 +65,20 @@ Node ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) { return nullptr; } +std::optional<u32> ShaderIR::TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) { + // Reduce the cursor in one to avoid infinite loops when the instruction sets the same register + // that it uses as operand + const auto [found, found_cursor] = + TrackRegister(&std::get<GprNode>(*tracked), code, cursor - 1); + if (!found) { + return {}; + } + if (const auto immediate = std::get_if<ImmediateNode>(found)) { + return immediate->GetValue(); + } + return {}; +} + std::pair<Node, s64> ShaderIR::TrackRegister(const GprNode* tracked, const NodeBlock& code, s64 cursor) { for (; cursor >= 0; --cursor) { |