diff options
Diffstat (limited to '')
-rw-r--r-- | src/video_core/shader/decode.cpp | 41 | ||||
-rw-r--r-- | src/video_core/shader/decode/arithmetic_integer.cpp | 6 | ||||
-rw-r--r-- | src/video_core/shader/decode/image.cpp | 4 | ||||
-rw-r--r-- | src/video_core/shader/decode/memory.cpp | 39 | ||||
-rw-r--r-- | src/video_core/shader/decode/other.cpp | 2 | ||||
-rw-r--r-- | src/video_core/shader/decode/shift.cpp | 2 | ||||
-rw-r--r-- | src/video_core/shader/decode/texture.cpp | 103 | ||||
-rw-r--r-- | src/video_core/shader/decode/video.cpp | 2 | ||||
-rw-r--r-- | src/video_core/shader/decode/warp.cpp | 7 |
9 files changed, 137 insertions, 69 deletions
diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp index 2626b1616..21fb9cb83 100644 --- a/src/video_core/shader/decode.cpp +++ b/src/video_core/shader/decode.cpp @@ -33,7 +33,7 @@ constexpr bool IsSchedInstruction(u32 offset, u32 main_offset) { return (absolute_offset % SchedPeriod) == 0; } -} // namespace +} // Anonymous namespace class ASTDecoder { public: @@ -102,7 +102,7 @@ void ShaderIR::Decode() { std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header)); decompiled = false; - auto info = ScanFlow(program_code, program_size, main_offset, settings); + auto info = ScanFlow(program_code, main_offset, settings, locker); auto& shader_info = *info; coverage_begin = shader_info.start; coverage_end = shader_info.end; @@ -155,7 +155,7 @@ void ShaderIR::Decode() { [[fallthrough]]; case CompileDepth::BruteForce: { coverage_begin = main_offset; - const u32 shader_end = static_cast<u32>(program_size / sizeof(u64)); + const std::size_t shader_end = program_code.size(); coverage_end = shader_end; for (u32 label = main_offset; label < shader_end; label++) { basic_blocks.insert({label, DecodeRange(label, label + 1)}); @@ -198,24 +198,39 @@ void ShaderIR::InsertControlFlow(NodeBlock& bb, const ShaderBlock& block) { } return result; }; - if (block.branch.address < 0) { - if (block.branch.kills) { - Node n = Operation(OperationCode::Discard); - n = apply_conditions(block.branch.cond, n); + if (std::holds_alternative<SingleBranch>(*block.branch)) { + auto branch = std::get_if<SingleBranch>(block.branch.get()); + if (branch->address < 0) { + if (branch->kill) { + Node n = Operation(OperationCode::Discard); + n = apply_conditions(branch->condition, n); + bb.push_back(n); + global_code.push_back(n); + return; + } + Node n = Operation(OperationCode::Exit); + n = apply_conditions(branch->condition, n); bb.push_back(n); global_code.push_back(n); return; } - Node n = Operation(OperationCode::Exit); - n = apply_conditions(block.branch.cond, n); + Node n = Operation(OperationCode::Branch, Immediate(branch->address)); + n = apply_conditions(branch->condition, n); bb.push_back(n); global_code.push_back(n); return; } - Node n = Operation(OperationCode::Branch, Immediate(block.branch.address)); - n = apply_conditions(block.branch.cond, n); - bb.push_back(n); - global_code.push_back(n); + auto multi_branch = std::get_if<MultiBranch>(block.branch.get()); + Node op_a = GetRegister(multi_branch->gpr); + for (auto& branch_case : multi_branch->branches) { + Node n = Operation(OperationCode::Branch, Immediate(branch_case.address)); + Node op_b = Immediate(branch_case.cmp_value); + Node condition = + GetPredicateComparisonInteger(Tegra::Shader::PredCondition::Equal, false, op_a, op_b); + auto result = Conditional(condition, {n}); + bb.push_back(result); + global_code.push_back(result); + } } u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) { diff --git a/src/video_core/shader/decode/arithmetic_integer.cpp b/src/video_core/shader/decode/arithmetic_integer.cpp index b73f6536e..a33d242e9 100644 --- a/src/video_core/shader/decode/arithmetic_integer.cpp +++ b/src/video_core/shader/decode/arithmetic_integer.cpp @@ -144,7 +144,7 @@ u32 ShaderIR::DecodeArithmeticInteger(NodeBlock& bb, u32 pc) { case OpCode::Id::ICMP_IMM: { const Node zero = Immediate(0); - const auto [op_b, test] = [&]() -> std::pair<Node, Node> { + const auto [op_rhs, test] = [&]() -> std::pair<Node, Node> { switch (opcode->get().GetId()) { case OpCode::Id::ICMP_CR: return {GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset), @@ -161,10 +161,10 @@ u32 ShaderIR::DecodeArithmeticInteger(NodeBlock& bb, u32 pc) { return {zero, zero}; } }(); - const Node op_a = GetRegister(instr.gpr8); + const Node op_lhs = GetRegister(instr.gpr8); const Node comparison = GetPredicateComparisonInteger(instr.icmp.cond, instr.icmp.is_signed != 0, test, zero); - SetRegister(bb, instr.gpr0, Operation(OperationCode::Select, comparison, op_a, op_b)); + SetRegister(bb, instr.gpr0, Operation(OperationCode::Select, comparison, op_lhs, op_rhs)); break; } case OpCode::Id::LOP_C: diff --git a/src/video_core/shader/decode/image.cpp b/src/video_core/shader/decode/image.cpp index 95ec1cdd9..b02d2cb95 100644 --- a/src/video_core/shader/decode/image.cpp +++ b/src/video_core/shader/decode/image.cpp @@ -144,8 +144,8 @@ u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) { Image& ShaderIR::GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type) { const auto offset{static_cast<std::size_t>(image.index.Value())}; - if (const auto image = TryUseExistingImage(offset, type)) { - return *image; + if (const auto existing_image = TryUseExistingImage(offset, type)) { + return *existing_image; } const std::size_t next_index{used_images.size()}; diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp index 7923d4d69..335d78146 100644 --- a/src/video_core/shader/decode/memory.cpp +++ b/src/video_core/shader/decode/memory.cpp @@ -166,9 +166,17 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { }(); const auto [real_address_base, base_address, descriptor] = - TrackAndGetGlobalMemory(bb, instr, false); + TrackGlobalMemory(bb, instr, false); const u32 count = GetUniformTypeElementsCount(type); + if (!real_address_base || !base_address) { + // Tracking failed, load zeroes. + for (u32 i = 0; i < count; ++i) { + SetRegister(bb, instr.gpr0.Value() + i, Immediate(0.0f)); + } + break; + } + for (u32 i = 0; i < count; ++i) { const Node it_offset = Immediate(i * 4); const Node real_address = @@ -260,22 +268,19 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { }(); const auto [real_address_base, base_address, descriptor] = - TrackAndGetGlobalMemory(bb, instr, true); - - // Encode in temporary registers like this: real_base_address, {registers_to_be_written...} - SetTemporary(bb, 0, real_address_base); + TrackGlobalMemory(bb, instr, true); + if (!real_address_base || !base_address) { + // Tracking failed, skip the store. + break; + } const u32 count = GetUniformTypeElementsCount(type); for (u32 i = 0; i < count; ++i) { - SetTemporary(bb, i + 1, GetRegister(instr.gpr0.Value() + i)); - } - for (u32 i = 0; i < count; ++i) { const Node it_offset = Immediate(i * 4); - const Node real_address = - Operation(OperationCode::UAdd, NO_PRECISE, real_address_base, it_offset); + const Node real_address = Operation(OperationCode::UAdd, real_address_base, it_offset); const Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor); - - bb.push_back(Operation(OperationCode::Assign, gmem, GetTemporary(i + 1))); + const Node value = GetRegister(instr.gpr0.Value() + i); + bb.push_back(Operation(OperationCode::Assign, gmem, value)); } break; } @@ -301,15 +306,17 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { return pc; } -std::tuple<Node, Node, GlobalMemoryBase> ShaderIR::TrackAndGetGlobalMemory(NodeBlock& bb, - Instruction instr, - bool is_write) { +std::tuple<Node, Node, GlobalMemoryBase> ShaderIR::TrackGlobalMemory(NodeBlock& bb, + Instruction instr, + bool is_write) { const auto addr_register{GetRegister(instr.gmem.gpr)}; const auto immediate_offset{static_cast<u32>(instr.gmem.offset)}; const auto [base_address, index, offset] = TrackCbuf(addr_register, global_code, static_cast<s64>(global_code.size())); - ASSERT(base_address != nullptr); + ASSERT_OR_EXECUTE_MSG(base_address != nullptr, + { return std::make_tuple(nullptr, nullptr, GlobalMemoryBase{}); }, + "Global memory tracking failed"); bb.push_back(Comment(fmt::format("Base address is c[0x{:x}][0x{:x}]", index, offset))); diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp index d46e0f823..116b95f76 100644 --- a/src/video_core/shader/decode/other.cpp +++ b/src/video_core/shader/decode/other.cpp @@ -67,7 +67,7 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { break; } case OpCode::Id::MOV_SYS: { - const Node value = [&]() { + const Node value = [this, instr] { switch (instr.sys20) { case SystemVariable::Ydirection: return Operation(OperationCode::YNegate); diff --git a/src/video_core/shader/decode/shift.cpp b/src/video_core/shader/decode/shift.cpp index f6ee68a54..d419e9c45 100644 --- a/src/video_core/shader/decode/shift.cpp +++ b/src/video_core/shader/decode/shift.cpp @@ -18,7 +18,7 @@ u32 ShaderIR::DecodeShift(NodeBlock& bb, u32 pc) { const auto opcode = OpCode::Decode(instr); Node op_a = GetRegister(instr.gpr8); - Node op_b = [&]() { + Node op_b = [this, instr] { if (instr.is_b_imm) { return Immediate(instr.alu.GetSignedImm20_20()); } else if (instr.is_b_gpr) { diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp index 0b934a069..4c838c8bb 100644 --- a/src/video_core/shader/decode/texture.cpp +++ b/src/video_core/shader/decode/texture.cpp @@ -96,6 +96,10 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { } break; } + case OpCode::Id::TLD4_B: { + is_bindless = true; + [[fallthrough]]; + } case OpCode::Id::TLD4: { ASSERT(instr.tld4.array == 0); UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::NDV), @@ -108,11 +112,14 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { } const auto texture_type = instr.tld4.texture_type.Value(); - const bool depth_compare = instr.tld4.UsesMiscMode(TextureMiscMode::DC); + const bool depth_compare = is_bindless ? instr.tld4_b.UsesMiscMode(TextureMiscMode::DC) + : instr.tld4.UsesMiscMode(TextureMiscMode::DC); const bool is_array = instr.tld4.array != 0; - const bool is_aoffi = instr.tld4.UsesMiscMode(TextureMiscMode::AOFFI); + const bool is_aoffi = is_bindless ? instr.tld4_b.UsesMiscMode(TextureMiscMode::AOFFI) + : instr.tld4.UsesMiscMode(TextureMiscMode::AOFFI); WriteTexInstructionFloat( - bb, instr, GetTld4Code(instr, texture_type, depth_compare, is_array, is_aoffi)); + bb, instr, + GetTld4Code(instr, texture_type, depth_compare, is_array, is_aoffi, is_bindless)); break; } case OpCode::Id::TLD4S: { @@ -141,7 +148,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { const Node component = Immediate(static_cast<u32>(instr.tld4s.component)); const auto& sampler = - GetSampler(instr.sampler, TextureType::Texture2D, false, depth_compare); + GetSampler(instr.sampler, {{TextureType::Texture2D, false, depth_compare}}); Node4 values; for (u32 element = 0; element < values.size(); ++element) { @@ -150,7 +157,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy)); } - WriteTexsInstructionFloat(bb, instr, values); + WriteTexsInstructionFloat(bb, instr, values, true); break; } case OpCode::Id::TXQ_B: @@ -165,10 +172,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { // Sadly, not all texture instructions specify the type of texture their sampler // uses. This must be fixed at a later instance. const auto& sampler = - is_bindless - ? GetBindlessSampler(instr.gpr8, Tegra::Shader::TextureType::Texture2D, false, - false) - : GetSampler(instr.sampler, Tegra::Shader::TextureType::Texture2D, false, false); + is_bindless ? GetBindlessSampler(instr.gpr8, {}) : GetSampler(instr.sampler, {}); u32 indexer = 0; switch (instr.txq.query_type) { @@ -207,9 +211,9 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { auto texture_type = instr.tmml.texture_type.Value(); const bool is_array = instr.tmml.array != 0; - const auto& sampler = is_bindless - ? GetBindlessSampler(instr.gpr20, texture_type, is_array, false) - : GetSampler(instr.sampler, texture_type, is_array, false); + const auto& sampler = + is_bindless ? GetBindlessSampler(instr.gpr20, {{texture_type, is_array, false}}) + : GetSampler(instr.sampler, {{texture_type, is_array, false}}); std::vector<Node> coords; @@ -285,9 +289,26 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { return pc; } -const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler, TextureType type, - bool is_array, bool is_shadow) { - const auto offset = static_cast<std::size_t>(sampler.index.Value()); +const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler, + std::optional<SamplerInfo> sampler_info) { + const auto offset = static_cast<u32>(sampler.index.Value()); + + Tegra::Shader::TextureType type; + bool is_array; + bool is_shadow; + if (sampler_info) { + type = sampler_info->type; + is_array = sampler_info->is_array; + is_shadow = sampler_info->is_shadow; + } else if (auto sampler = locker.ObtainBoundSampler(offset); sampler) { + type = sampler->texture_type.Value(); + is_array = sampler->is_array.Value() != 0; + is_shadow = sampler->is_shadow.Value() != 0; + } else { + type = Tegra::Shader::TextureType::Texture2D; + is_array = false; + is_shadow = false; + } // If this sampler has already been used, return the existing mapping. const auto itr = @@ -303,15 +324,31 @@ const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler, Textu const std::size_t next_index = used_samplers.size(); const Sampler entry{offset, next_index, type, is_array, is_shadow}; return *used_samplers.emplace(entry).first; -} +} // namespace VideoCommon::Shader -const Sampler& ShaderIR::GetBindlessSampler(const Tegra::Shader::Register& reg, TextureType type, - bool is_array, bool is_shadow) { +const Sampler& ShaderIR::GetBindlessSampler(const Tegra::Shader::Register& reg, + std::optional<SamplerInfo> sampler_info) { const Node sampler_register = GetRegister(reg); const auto [base_sampler, cbuf_index, cbuf_offset] = TrackCbuf(sampler_register, global_code, static_cast<s64>(global_code.size())); ASSERT(base_sampler != nullptr); const auto cbuf_key = (static_cast<u64>(cbuf_index) << 32) | static_cast<u64>(cbuf_offset); + Tegra::Shader::TextureType type; + bool is_array; + bool is_shadow; + if (sampler_info) { + type = sampler_info->type; + is_array = sampler_info->is_array; + is_shadow = sampler_info->is_shadow; + } else if (auto sampler = locker.ObtainBindlessSampler(cbuf_index, cbuf_offset); sampler) { + type = sampler->texture_type.Value(); + is_array = sampler->is_array.Value() != 0; + is_shadow = sampler->is_shadow.Value() != 0; + } else { + type = Tegra::Shader::TextureType::Texture2D; + is_array = false; + is_shadow = false; + } // If this sampler has already been used, return the existing mapping. const auto itr = @@ -344,14 +381,14 @@ void ShaderIR::WriteTexInstructionFloat(NodeBlock& bb, Instruction instr, const } } -void ShaderIR::WriteTexsInstructionFloat(NodeBlock& bb, Instruction instr, - const Node4& components) { +void ShaderIR::WriteTexsInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components, + bool ignore_mask) { // TEXS has two destination registers and a swizzle. The first two elements in the swizzle // go into gpr0+0 and gpr0+1, and the rest goes into gpr28+0 and gpr28+1 u32 dest_elem = 0; for (u32 component = 0; component < 4; ++component) { - if (!instr.texs.IsComponentEnabled(component)) + if (!instr.texs.IsComponentEnabled(component) && !ignore_mask) continue; SetTemporary(bb, dest_elem++, components[component]); } @@ -411,9 +448,9 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, (texture_type == TextureType::TextureCube && is_array && is_shadow), "This method is not supported."); - const auto& sampler = is_bindless - ? GetBindlessSampler(*bindless_reg, texture_type, is_array, is_shadow) - : GetSampler(instr.sampler, texture_type, is_array, is_shadow); + const auto& sampler = + is_bindless ? GetBindlessSampler(*bindless_reg, {{texture_type, is_array, is_shadow}}) + : GetSampler(instr.sampler, {{texture_type, is_array, is_shadow}}); const bool lod_needed = process_mode == TextureProcessMode::LZ || process_mode == TextureProcessMode::LL || @@ -553,7 +590,7 @@ Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type, } Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool depth_compare, - bool is_array, bool is_aoffi) { + bool is_array, bool is_aoffi, bool is_bindless) { const std::size_t coord_count = GetCoordCount(texture_type); // If enabled arrays index is always stored in the gpr8 field @@ -567,6 +604,12 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de } u64 parameter_register = instr.gpr20.Value(); + + const auto& sampler = + is_bindless + ? GetBindlessSampler(parameter_register++, {{texture_type, is_array, depth_compare}}) + : GetSampler(instr.sampler, {{texture_type, is_array, depth_compare}}); + std::vector<Node> aoffi; if (is_aoffi) { aoffi = GetAoffiCoordinates(GetRegister(parameter_register++), coord_count, true); @@ -577,12 +620,14 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de dc = GetRegister(parameter_register++); } - const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, depth_compare); + const Node component = is_bindless ? Immediate(static_cast<u32>(instr.tld4_b.component)) + : Immediate(static_cast<u32>(instr.tld4.component)); Node4 values; for (u32 element = 0; element < values.size(); ++element) { auto coords_copy = coords; - MetaTexture meta{sampler, GetRegister(array_register), dc, aoffi, {}, {}, {}, element}; + MetaTexture meta{sampler, GetRegister(array_register), dc, aoffi, {}, {}, component, + element}; values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy)); } @@ -610,7 +655,7 @@ Node4 ShaderIR::GetTldCode(Tegra::Shader::Instruction instr) { // const Node aoffi_register{is_aoffi ? GetRegister(gpr20_cursor++) : nullptr}; // const Node multisample{is_multisample ? GetRegister(gpr20_cursor++) : nullptr}; - const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false); + const auto& sampler = GetSampler(instr.sampler, {{texture_type, is_array, false}}); Node4 values; for (u32 element = 0; element < values.size(); ++element) { @@ -646,7 +691,7 @@ Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is // When lod is used always is in gpr20 const Node lod = lod_enabled ? GetRegister(instr.gpr20) : Immediate(0); - const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false); + const auto& sampler = GetSampler(instr.sampler, {{texture_type, is_array, false}}); Node4 values; for (u32 element = 0; element < values.size(); ++element) { diff --git a/src/video_core/shader/decode/video.cpp b/src/video_core/shader/decode/video.cpp index 97fc6f9b1..b047cf870 100644 --- a/src/video_core/shader/decode/video.cpp +++ b/src/video_core/shader/decode/video.cpp @@ -23,7 +23,7 @@ u32 ShaderIR::DecodeVideo(NodeBlock& bb, u32 pc) { const Node op_a = GetVideoOperand(GetRegister(instr.gpr8), instr.video.is_byte_chunk_a, instr.video.signed_a, instr.video.type_a, instr.video.byte_height_a); - const Node op_b = [&]() { + const Node op_b = [this, instr] { if (instr.video.use_register_b) { return GetVideoOperand(GetRegister(instr.gpr20), instr.video.is_byte_chunk_b, instr.video.signed_b, instr.video.type_b, diff --git a/src/video_core/shader/decode/warp.cpp b/src/video_core/shader/decode/warp.cpp index a8e481b3c..fa8a250cc 100644 --- a/src/video_core/shader/decode/warp.cpp +++ b/src/video_core/shader/decode/warp.cpp @@ -46,9 +46,10 @@ u32 ShaderIR::DecodeWarp(NodeBlock& bb, u32 pc) { break; } case OpCode::Id::SHFL: { - Node mask = instr.shfl.is_mask_imm ? Immediate(static_cast<u32>(instr.shfl.mask_imm)) - : GetRegister(instr.gpr39); - Node width = [&] { + Node width = [this, instr] { + Node mask = instr.shfl.is_mask_imm ? Immediate(static_cast<u32>(instr.shfl.mask_imm)) + : GetRegister(instr.gpr39); + // Convert the obscure SHFL mask back into GL_NV_shader_thread_shuffle's width. This has // been done reversing Nvidia's math. It won't work on all cases due to SHFL having // different parameters that don't properly map to GLSL's interface, but it should work |