diff options
Diffstat (limited to 'src/video_core/shader')
-rw-r--r-- | src/video_core/shader/control_flow.cpp | 12 | ||||
-rw-r--r-- | src/video_core/shader/decode.cpp | 12 | ||||
-rw-r--r-- | src/video_core/shader/decode/arithmetic_half.cpp | 51 | ||||
-rw-r--r-- | src/video_core/shader/decode/arithmetic_integer.cpp | 35 | ||||
-rw-r--r-- | src/video_core/shader/decode/register_set_predicate.cpp | 52 | ||||
-rw-r--r-- | src/video_core/shader/memory_util.cpp | 77 | ||||
-rw-r--r-- | src/video_core/shader/memory_util.h | 47 | ||||
-rw-r--r-- | src/video_core/shader/node.h | 2 | ||||
-rw-r--r-- | src/video_core/shader/shader_ir.h | 3 | ||||
-rw-r--r-- | src/video_core/shader/track.cpp | 20 |
10 files changed, 230 insertions, 81 deletions
diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp index e00a3fb70..8d86020f6 100644 --- a/src/video_core/shader/control_flow.cpp +++ b/src/video_core/shader/control_flow.cpp @@ -13,6 +13,7 @@ #include "common/common_types.h" #include "video_core/shader/ast.h" #include "video_core/shader/control_flow.h" +#include "video_core/shader/memory_util.h" #include "video_core/shader/registry.h" #include "video_core/shader/shader_ir.h" @@ -115,17 +116,6 @@ Pred GetPredicate(u32 index, bool negated) { return static_cast<Pred>(static_cast<u64>(index) + (negated ? 8ULL : 0ULL)); } -/** - * Returns whether the instruction at the specified offset is a 'sched' instruction. - * Sched instructions always appear before a sequence of 3 instructions. - */ -constexpr bool IsSchedInstruction(u32 offset, u32 main_offset) { - constexpr u32 SchedPeriod = 4; - u32 absolute_offset = offset - main_offset; - - return (absolute_offset % SchedPeriod) == 0; -} - enum class ParseResult : u32 { ControlCaught, BlockEnd, diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp index 8427837b7..a75a5cc63 100644 --- a/src/video_core/shader/decode.cpp +++ b/src/video_core/shader/decode.cpp @@ -13,6 +13,7 @@ #include "video_core/engines/shader_bytecode.h" #include "video_core/engines/shader_header.h" #include "video_core/shader/control_flow.h" +#include "video_core/shader/memory_util.h" #include "video_core/shader/node_helper.h" #include "video_core/shader/shader_ir.h" @@ -23,17 +24,6 @@ using Tegra::Shader::OpCode; namespace { -/** - * Returns whether the instruction at the specified offset is a 'sched' instruction. - * Sched instructions always appear before a sequence of 3 instructions. - */ -constexpr bool IsSchedInstruction(u32 offset, u32 main_offset) { - constexpr u32 SchedPeriod = 4; - u32 absolute_offset = offset - main_offset; - - return (absolute_offset % SchedPeriod) == 0; -} - void DeduceTextureHandlerSize(VideoCore::GuestDriverProfile& gpu_driver, const std::list<Sampler>& used_samplers) { if (gpu_driver.IsTextureHandlerSizeKnown() || used_samplers.size() <= 1) { diff --git a/src/video_core/shader/decode/arithmetic_half.cpp b/src/video_core/shader/decode/arithmetic_half.cpp index ee7d9a29d..a276aee44 100644 --- a/src/video_core/shader/decode/arithmetic_half.cpp +++ b/src/video_core/shader/decode/arithmetic_half.cpp @@ -19,22 +19,46 @@ u32 ShaderIR::DecodeArithmeticHalf(NodeBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); - if (opcode->get().GetId() == OpCode::Id::HADD2_C || - opcode->get().GetId() == OpCode::Id::HADD2_R) { + bool negate_a = false; + bool negate_b = false; + bool absolute_a = false; + bool absolute_b = false; + + switch (opcode->get().GetId()) { + case OpCode::Id::HADD2_R: if (instr.alu_half.ftz == 0) { LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName()); } + negate_a = ((instr.value >> 43) & 1) != 0; + negate_b = ((instr.value >> 31) & 1) != 0; + absolute_a = ((instr.value >> 44) & 1) != 0; + absolute_b = ((instr.value >> 30) & 1) != 0; + break; + case OpCode::Id::HADD2_C: + if (instr.alu_half.ftz == 0) { + LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName()); + } + negate_a = ((instr.value >> 43) & 1) != 0; + negate_b = ((instr.value >> 56) & 1) != 0; + absolute_a = ((instr.value >> 44) & 1) != 0; + absolute_b = ((instr.value >> 54) & 1) != 0; + break; + case OpCode::Id::HMUL2_R: + negate_a = ((instr.value >> 43) & 1) != 0; + absolute_a = ((instr.value >> 44) & 1) != 0; + absolute_b = ((instr.value >> 30) & 1) != 0; + break; + case OpCode::Id::HMUL2_C: + negate_b = ((instr.value >> 31) & 1) != 0; + absolute_a = ((instr.value >> 44) & 1) != 0; + absolute_b = ((instr.value >> 54) & 1) != 0; + break; } - const bool negate_a = - opcode->get().GetId() != OpCode::Id::HMUL2_R && instr.alu_half.negate_a != 0; - const bool negate_b = - opcode->get().GetId() != OpCode::Id::HMUL2_C && instr.alu_half.negate_b != 0; - Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.alu_half.type_a); - op_a = GetOperandAbsNegHalf(op_a, instr.alu_half.abs_a, negate_a); + op_a = GetOperandAbsNegHalf(op_a, absolute_a, negate_a); - auto [type_b, op_b] = [&]() -> std::tuple<HalfType, Node> { + auto [type_b, op_b] = [this, instr, opcode]() -> std::pair<HalfType, Node> { switch (opcode->get().GetId()) { case OpCode::Id::HADD2_C: case OpCode::Id::HMUL2_C: @@ -48,17 +72,16 @@ u32 ShaderIR::DecodeArithmeticHalf(NodeBlock& bb, u32 pc) { } }(); op_b = UnpackHalfFloat(op_b, type_b); - // redeclaration to avoid a bug in clang with reusing local bindings in lambdas - Node op_b_alt = GetOperandAbsNegHalf(op_b, instr.alu_half.abs_b, negate_b); + op_b = GetOperandAbsNegHalf(op_b, absolute_b, negate_b); - Node value = [&]() { + Node value = [this, opcode, op_a, op_b = op_b] { switch (opcode->get().GetId()) { case OpCode::Id::HADD2_C: case OpCode::Id::HADD2_R: - return Operation(OperationCode::HAdd, PRECISE, op_a, op_b_alt); + return Operation(OperationCode::HAdd, PRECISE, op_a, op_b); case OpCode::Id::HMUL2_C: case OpCode::Id::HMUL2_R: - return Operation(OperationCode::HMul, PRECISE, op_a, op_b_alt); + return Operation(OperationCode::HMul, PRECISE, op_a, op_b); default: UNIMPLEMENTED_MSG("Unhandled half float instruction: {}", opcode->get().GetName()); return Immediate(0); diff --git a/src/video_core/shader/decode/arithmetic_integer.cpp b/src/video_core/shader/decode/arithmetic_integer.cpp index 0f4c3103a..a041519b7 100644 --- a/src/video_core/shader/decode/arithmetic_integer.cpp +++ b/src/video_core/shader/decode/arithmetic_integer.cpp @@ -35,15 +35,38 @@ u32 ShaderIR::DecodeArithmeticInteger(NodeBlock& bb, u32 pc) { case OpCode::Id::IADD_C: case OpCode::Id::IADD_R: case OpCode::Id::IADD_IMM: { - UNIMPLEMENTED_IF_MSG(instr.alu.saturate_d, "IADD saturation not implemented"); + UNIMPLEMENTED_IF_MSG(instr.alu.saturate_d, "IADD.SAT"); + UNIMPLEMENTED_IF_MSG(instr.iadd.x && instr.generates_cc, "IADD.X Rd.CC"); op_a = GetOperandAbsNegInteger(op_a, false, instr.alu_integer.negate_a, true); op_b = GetOperandAbsNegInteger(op_b, false, instr.alu_integer.negate_b, true); - const Node value = Operation(OperationCode::IAdd, PRECISE, op_a, op_b); + Node value = Operation(OperationCode::UAdd, op_a, op_b); - SetInternalFlagsFromInteger(bb, value, instr.generates_cc); - SetRegister(bb, instr.gpr0, value); + if (instr.iadd.x) { + Node carry = GetInternalFlag(InternalFlag::Carry); + Node x = Operation(OperationCode::Select, std::move(carry), Immediate(1), Immediate(0)); + value = Operation(OperationCode::UAdd, std::move(value), std::move(x)); + } + + if (instr.generates_cc) { + const Node i0 = Immediate(0); + + Node zero = Operation(OperationCode::LogicalIEqual, value, i0); + Node sign = Operation(OperationCode::LogicalILessThan, value, i0); + Node carry = Operation(OperationCode::LogicalAddCarry, op_a, op_b); + + Node pos_a = Operation(OperationCode::LogicalIGreaterThan, op_a, i0); + Node pos_b = Operation(OperationCode::LogicalIGreaterThan, op_b, i0); + Node pos = Operation(OperationCode::LogicalAnd, std::move(pos_a), std::move(pos_b)); + Node overflow = Operation(OperationCode::LogicalAnd, pos, sign); + + SetInternalFlag(bb, InternalFlag::Zero, std::move(zero)); + SetInternalFlag(bb, InternalFlag::Sign, std::move(sign)); + SetInternalFlag(bb, InternalFlag::Carry, std::move(carry)); + SetInternalFlag(bb, InternalFlag::Overflow, std::move(overflow)); + } + SetRegister(bb, instr.gpr0, std::move(value)); break; } case OpCode::Id::IADD3_C: @@ -249,8 +272,8 @@ u32 ShaderIR::DecodeArithmeticInteger(NodeBlock& bb, u32 pc) { } case OpCode::Id::LEA_IMM: { const bool neg = instr.lea.imm.neg != 0; - return {Immediate(static_cast<u32>(instr.lea.imm.entry_a)), - GetOperandAbsNegInteger(GetRegister(instr.gpr8), false, neg, true), + return {GetOperandAbsNegInteger(GetRegister(instr.gpr8), false, neg, true), + Immediate(static_cast<u32>(instr.lea.imm.entry_a)), Immediate(static_cast<u32>(instr.lea.imm.entry_b))}; } case OpCode::Id::LEA_RZ: { diff --git a/src/video_core/shader/decode/register_set_predicate.cpp b/src/video_core/shader/decode/register_set_predicate.cpp index 8d54cce34..6116c31aa 100644 --- a/src/video_core/shader/decode/register_set_predicate.cpp +++ b/src/video_core/shader/decode/register_set_predicate.cpp @@ -2,6 +2,8 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include <utility> + #include "common/assert.h" #include "common/common_types.h" #include "video_core/engines/shader_bytecode.h" @@ -10,20 +12,20 @@ namespace VideoCommon::Shader { +using std::move; using Tegra::Shader::Instruction; using Tegra::Shader::OpCode; namespace { -constexpr u64 NUM_PROGRAMMABLE_PREDICATES = 7; -} +constexpr u64 NUM_CONDITION_CODES = 4; +constexpr u64 NUM_PREDICATES = 7; +} // namespace u32 ShaderIR::DecodeRegisterSetPredicate(NodeBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); - UNIMPLEMENTED_IF(instr.p2r_r2p.mode != Tegra::Shader::R2pMode::Pr); - - const Node apply_mask = [&] { + Node apply_mask = [this, opcode, instr] { switch (opcode->get().GetId()) { case OpCode::Id::R2P_IMM: case OpCode::Id::P2R_IMM: @@ -34,39 +36,43 @@ u32 ShaderIR::DecodeRegisterSetPredicate(NodeBlock& bb, u32 pc) { } }(); - const auto offset = static_cast<u32>(instr.p2r_r2p.byte) * 8; + const u32 offset = static_cast<u32>(instr.p2r_r2p.byte) * 8; + + const bool cc = instr.p2r_r2p.mode == Tegra::Shader::R2pMode::Cc; + const u64 num_entries = cc ? NUM_CONDITION_CODES : NUM_PREDICATES; + const auto get_entry = [this, cc](u64 entry) { + return cc ? GetInternalFlag(static_cast<InternalFlag>(entry)) : GetPredicate(entry); + }; switch (opcode->get().GetId()) { case OpCode::Id::R2P_IMM: { - const Node mask = GetRegister(instr.gpr8); + Node mask = GetRegister(instr.gpr8); - for (u64 pred = 0; pred < NUM_PROGRAMMABLE_PREDICATES; ++pred) { - const auto shift = static_cast<u32>(pred); + for (u64 entry = 0; entry < num_entries; ++entry) { + const u32 shift = static_cast<u32>(entry); - const Node apply_compare = BitfieldExtract(apply_mask, shift, 1); - const Node condition = - Operation(OperationCode::LogicalUNotEqual, apply_compare, Immediate(0)); + Node apply = BitfieldExtract(apply_mask, shift, 1); + Node condition = Operation(OperationCode::LogicalUNotEqual, apply, Immediate(0)); - const Node value_compare = BitfieldExtract(mask, offset + shift, 1); - const Node value = - Operation(OperationCode::LogicalUNotEqual, value_compare, Immediate(0)); + Node compare = BitfieldExtract(mask, offset + shift, 1); + Node value = Operation(OperationCode::LogicalUNotEqual, move(compare), Immediate(0)); - const Node code = Operation(OperationCode::LogicalAssign, GetPredicate(pred), value); - bb.push_back(Conditional(condition, {code})); + Node code = Operation(OperationCode::LogicalAssign, get_entry(entry), move(value)); + bb.push_back(Conditional(condition, {move(code)})); } break; } case OpCode::Id::P2R_IMM: { Node value = Immediate(0); - for (u64 pred = 0; pred < NUM_PROGRAMMABLE_PREDICATES; ++pred) { - Node bit = Operation(OperationCode::Select, GetPredicate(pred), Immediate(1U << pred), + for (u64 entry = 0; entry < num_entries; ++entry) { + Node bit = Operation(OperationCode::Select, get_entry(entry), Immediate(1U << entry), Immediate(0)); - value = Operation(OperationCode::UBitwiseOr, std::move(value), std::move(bit)); + value = Operation(OperationCode::UBitwiseOr, move(value), move(bit)); } - value = Operation(OperationCode::UBitwiseAnd, std::move(value), apply_mask); - value = BitfieldInsert(GetRegister(instr.gpr8), std::move(value), offset, 8); + value = Operation(OperationCode::UBitwiseAnd, move(value), apply_mask); + value = BitfieldInsert(GetRegister(instr.gpr8), move(value), offset, 8); - SetRegister(bb, instr.gpr0, std::move(value)); + SetRegister(bb, instr.gpr0, move(value)); break; } default: diff --git a/src/video_core/shader/memory_util.cpp b/src/video_core/shader/memory_util.cpp new file mode 100644 index 000000000..074f21691 --- /dev/null +++ b/src/video_core/shader/memory_util.cpp @@ -0,0 +1,77 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <algorithm> +#include <cstddef> + +#include <boost/container_hash/hash.hpp> + +#include "common/common_types.h" +#include "core/core.h" +#include "video_core/engines/maxwell_3d.h" +#include "video_core/memory_manager.h" +#include "video_core/shader/memory_util.h" +#include "video_core/shader/shader_ir.h" + +namespace VideoCommon::Shader { + +GPUVAddr GetShaderAddress(Core::System& system, + Tegra::Engines::Maxwell3D::Regs::ShaderProgram program) { + const auto& gpu{system.GPU().Maxwell3D()}; + const auto& shader_config{gpu.regs.shader_config[static_cast<std::size_t>(program)]}; + return gpu.regs.code_address.CodeAddress() + shader_config.offset; +} + +bool IsSchedInstruction(std::size_t offset, std::size_t main_offset) { + // Sched instructions appear once every 4 instructions. + constexpr std::size_t SchedPeriod = 4; + const std::size_t absolute_offset = offset - main_offset; + return (absolute_offset % SchedPeriod) == 0; +} + +std::size_t CalculateProgramSize(const ProgramCode& program, bool is_compute) { + // This is the encoded version of BRA that jumps to itself. All Nvidia + // shaders end with one. + static constexpr u64 SELF_JUMPING_BRANCH = 0xE2400FFFFF07000FULL; + static constexpr u64 MASK = 0xFFFFFFFFFF7FFFFFULL; + + const std::size_t start_offset = is_compute ? KERNEL_MAIN_OFFSET : STAGE_MAIN_OFFSET; + std::size_t offset = start_offset; + while (offset < program.size()) { + const u64 instruction = program[offset]; + if (!IsSchedInstruction(offset, start_offset)) { + if ((instruction & MASK) == SELF_JUMPING_BRANCH) { + // End on Maxwell's "nop" instruction + break; + } + if (instruction == 0) { + break; + } + } + ++offset; + } + // The last instruction is included in the program size + return std::min(offset + 1, program.size()); +} + +ProgramCode GetShaderCode(Tegra::MemoryManager& memory_manager, GPUVAddr gpu_addr, + const u8* host_ptr, bool is_compute) { + ProgramCode code(VideoCommon::Shader::MAX_PROGRAM_LENGTH); + ASSERT_OR_EXECUTE(host_ptr != nullptr, { return code; }); + memory_manager.ReadBlockUnsafe(gpu_addr, code.data(), code.size() * sizeof(u64)); + code.resize(CalculateProgramSize(code, is_compute)); + return code; +} + +u64 GetUniqueIdentifier(Tegra::Engines::ShaderType shader_type, bool is_a, const ProgramCode& code, + const ProgramCode& code_b) { + u64 unique_identifier = boost::hash_value(code); + if (is_a) { + // VertexA programs include two programs + boost::hash_combine(unique_identifier, boost::hash_value(code_b)); + } + return unique_identifier; +} + +} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/memory_util.h b/src/video_core/shader/memory_util.h new file mode 100644 index 000000000..be90d24fd --- /dev/null +++ b/src/video_core/shader/memory_util.h @@ -0,0 +1,47 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <cstddef> +#include <vector> + +#include "common/common_types.h" +#include "video_core/engines/maxwell_3d.h" +#include "video_core/engines/shader_type.h" + +namespace Core { +class System; +} + +namespace Tegra { +class MemoryManager; +} + +namespace VideoCommon::Shader { + +using ProgramCode = std::vector<u64>; + +constexpr u32 STAGE_MAIN_OFFSET = 10; +constexpr u32 KERNEL_MAIN_OFFSET = 0; + +/// Gets the address for the specified shader stage program +GPUVAddr GetShaderAddress(Core::System& system, + Tegra::Engines::Maxwell3D::Regs::ShaderProgram program); + +/// Gets if the current instruction offset is a scheduler instruction +bool IsSchedInstruction(std::size_t offset, std::size_t main_offset); + +/// Calculates the size of a program stream +std::size_t CalculateProgramSize(const ProgramCode& program, bool is_compute); + +/// Gets the shader program code from memory for the specified address +ProgramCode GetShaderCode(Tegra::MemoryManager& memory_manager, GPUVAddr gpu_addr, + const u8* host_ptr, bool is_compute); + +/// Hashes one (or two) program streams +u64 GetUniqueIdentifier(Tegra::Engines::ShaderType shader_type, bool is_a, const ProgramCode& code, + const ProgramCode& code_b = {}); + +} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h index d0656b581..601c822d2 100644 --- a/src/video_core/shader/node.h +++ b/src/video_core/shader/node.h @@ -132,6 +132,8 @@ enum class OperationCode { LogicalUNotEqual, /// (uint a, uint b) -> bool LogicalUGreaterEqual, /// (uint a, uint b) -> bool + LogicalAddCarry, /// (uint a, uint b) -> bool + Logical2HLessThan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 Logical2HEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 Logical2HLessEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index 748d73087..15ae152f2 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h @@ -18,6 +18,7 @@ #include "video_core/engines/shader_header.h" #include "video_core/shader/ast.h" #include "video_core/shader/compiler_settings.h" +#include "video_core/shader/memory_util.h" #include "video_core/shader/node.h" #include "video_core/shader/registry.h" @@ -25,8 +26,6 @@ namespace VideoCommon::Shader { struct ShaderBlock; -using ProgramCode = std::vector<u64>; - constexpr u32 MAX_PROGRAM_LENGTH = 0x1000; struct ConstBuffer { diff --git a/src/video_core/shader/track.cpp b/src/video_core/shader/track.cpp index 513e9bf49..eb97bfd41 100644 --- a/src/video_core/shader/track.cpp +++ b/src/video_core/shader/track.cpp @@ -153,21 +153,13 @@ std::tuple<Node, u32, u32> ShaderIR::TrackCbuf(Node tracked, const NodeBlock& co if (gpr->GetIndex() == Tegra::Shader::Register::ZeroIndex) { return {}; } - s64 current_cursor = cursor; - while (current_cursor > 0) { - // Reduce the cursor in one to avoid infinite loops when the instruction sets the same - // register that it uses as operand - const auto [source, new_cursor] = TrackRegister(gpr, code, current_cursor - 1); - current_cursor = new_cursor; - if (!source) { - continue; - } - const auto [base_address, index, offset] = TrackCbuf(source, code, current_cursor); - if (base_address != nullptr) { - return {base_address, index, offset}; - } + // Reduce the cursor in one to avoid infinite loops when the instruction sets the same + // register that it uses as operand + const auto [source, new_cursor] = TrackRegister(gpr, code, cursor - 1); + if (!source) { + return {}; } - return {}; + return TrackCbuf(source, code, new_cursor); } if (const auto operation = std::get_if<OperationNode>(&*tracked)) { for (std::size_t i = operation->GetOperandsCount(); i > 0; --i) { |