summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp10
-rw-r--r--src/video_core/shader/control_flow.cpp47
-rw-r--r--src/video_core/shader/control_flow.h3
-rw-r--r--src/video_core/shader/decode.cpp35
-rw-r--r--src/video_core/shader/decode/other.cpp30
-rw-r--r--src/video_core/shader/node.h12
-rw-r--r--src/video_core/shader/shader_ir.h6
7 files changed, 85 insertions, 58 deletions
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index cedfe30b1..bfc975a04 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -191,10 +191,12 @@ public:
// TODO(Subv): Figure out the actual depth of the flow stack, for now it seems
// unlikely that shaders will use 20 nested SSYs and PBKs.
- constexpr u32 FLOW_STACK_SIZE = 20;
- for (const auto stack : std::array{MetaStackClass::Ssy, MetaStackClass::Pbk}) {
- code.AddLine("uint {}[{}];", FlowStackName(stack), FLOW_STACK_SIZE);
- code.AddLine("uint {} = 0u;", FlowStackTopName(stack));
+ if (!ir.IsFlowStackDisabled()) {
+ constexpr u32 FLOW_STACK_SIZE = 20;
+ for (const auto stack : std::array{MetaStackClass::Ssy, MetaStackClass::Pbk}) {
+ code.AddLine("uint {}[{}];", FlowStackName(stack), FLOW_STACK_SIZE);
+ code.AddLine("uint {} = 0u;", FlowStackTopName(stack));
+ }
}
code.AddLine("while (true) {{");
diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp
index 3af4c6190..c99d95b57 100644
--- a/src/video_core/shader/control_flow.cpp
+++ b/src/video_core/shader/control_flow.cpp
@@ -1,5 +1,6 @@
#include <list>
+#include <map>
#include <unordered_map>
#include <unordered_set>
#include <vector>
@@ -104,28 +105,6 @@ struct BlockInfo {
}
};
-struct Stamp {
- Stamp() = default;
- Stamp(u32 address, u32 target) : address{address}, target{target} {}
- u32 address{};
- u32 target{};
- bool operator==(const Stamp& sb) const {
- return std::tie(address, target) == std::tie(sb.address, sb.target);
- }
- bool operator<(const Stamp& sb) const {
- return address < sb.address;
- }
- bool operator>(const Stamp& sb) const {
- return address > sb.address;
- }
- bool operator<=(const Stamp& sb) const {
- return address <= sb.address;
- }
- bool operator>=(const Stamp& sb) const {
- return address >= sb.address;
- }
-};
-
struct CFGRebuildState {
explicit CFGRebuildState(const ProgramCode& program_code, const std::size_t program_size)
: program_code{program_code}, program_size{program_size} {
@@ -144,8 +123,8 @@ struct CFGRebuildState {
std::list<Query> queries{};
std::unordered_map<u32, u32> registered{};
std::unordered_set<u32> labels{};
- std::set<Stamp> ssy_labels;
- std::set<Stamp> pbk_labels;
+ std::map<u32, u32> ssy_labels;
+ std::map<u32, u32> pbk_labels;
std::unordered_map<u32, BlockStack> stacks{};
const ProgramCode& program_code;
const std::size_t program_size;
@@ -393,7 +372,7 @@ bool TryInspectAddress(CFGRebuildState& state) {
}
case BlockCollision::Inside: {
// This case is the tricky one:
- // We need to Split the block in 2 sepprate blocks
+ // We need to Split the block in 2 sepparate blocks
auto it = search_result.second;
block_info = CreateBlockInfo(state, address, it->end);
it->end = address - 1;
@@ -428,13 +407,11 @@ bool TryInspectAddress(CFGRebuildState& state) {
}
bool TryQuery(CFGRebuildState& state) {
- auto gather_labels = ([](ControlStack& cc, std::set<Stamp> labels, BlockInfo& block) {
- Stamp start{block.start, 0};
- Stamp end{block.end, 0};
- auto gather_start = labels.lower_bound(start);
- auto gather_end = labels.upper_bound(end);
+ auto gather_labels = ([](ControlStack& cc, std::map<u32, u32>& labels, BlockInfo& block) {
+ auto gather_start = labels.lower_bound(block.start);
+ auto gather_end = labels.upper_bound(block.end);
while (gather_start != gather_end) {
- cc.Push(gather_start->target);
+ cc.Push(gather_start->second);
gather_start++;
}
});
@@ -444,9 +421,13 @@ bool TryQuery(CFGRebuildState& state) {
Query& q = state.queries.front();
u32 block_index = state.registered[q.address];
BlockInfo& block = state.block_info[block_index];
+ // If the block is visted, check if the stacks match, else gather the ssy/pbk
+ // labels into the current stack and look if the branch at the end of the block
+ // consumes a label. Schedule new queries accordingly
if (block.visited) {
BlockStack& stack = state.stacks[q.address];
- bool all_okay = q.ssy_stack.Compare(stack.ssy_stack) && q.pbk_stack.Compare(stack.pbk_stack);
+ bool all_okay = (stack.ssy_stack.Size() == 0 || q.ssy_stack.Compare(stack.ssy_stack)) &&
+ (stack.pbk_stack.Size() == 0 || q.pbk_stack.Compare(stack.pbk_stack));
state.queries.pop_front();
return all_okay;
}
@@ -523,8 +504,10 @@ bool ScanFlow(const ProgramCode& program_code, u32 program_size, u32 start_addre
result_out.blocks.push_back(new_block);
}
if (result_out.decompilable) {
+ result_out.labels = std::move(state.labels);
return true;
}
+ // If it's not decompilable, merge the unlabelled blocks together
auto back = result_out.blocks.begin();
auto next = std::next(back);
while (next != result_out.blocks.end()) {
diff --git a/src/video_core/shader/control_flow.h b/src/video_core/shader/control_flow.h
index f5d37a231..4a2cd622c 100644
--- a/src/video_core/shader/control_flow.h
+++ b/src/video_core/shader/control_flow.h
@@ -3,7 +3,7 @@
#include <cstring>
#include <list>
#include <optional>
-#include <vector>
+#include <unordered_set>
#include "video_core/engines/shader_bytecode.h"
#include "video_core/shader/shader_ir.h"
@@ -48,6 +48,7 @@ struct ShaderCharacteristics {
bool decompilable{};
u32 start;
u32 end;
+ std::unordered_set<u32> labels{};
};
bool ScanFlow(const ProgramCode& program_code, u32 program_size, u32 start_address,
diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp
index 1a74b70cb..f9b1960da 100644
--- a/src/video_core/shader/decode.cpp
+++ b/src/video_core/shader/decode.cpp
@@ -38,32 +38,47 @@ constexpr bool IsSchedInstruction(u32 offset, u32 main_offset) {
void ShaderIR::Decode() {
std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header));
+ disable_flow_stack = false;
ShaderCharacteristics shader_info{};
bool can_proceed = ScanFlow(program_code, program_code.size(), main_offset, shader_info);
if (can_proceed) {
coverage_begin = shader_info.start;
coverage_end = shader_info.end;
if (shader_info.decompilable) {
+ disable_flow_stack = true;
+ auto insert_block = ([this](NodeBlock& nodes, u32 label) {
+ if (label == exit_branch) {
+ return;
+ }
+ basic_blocks.insert({label, nodes});
+ });
std::list<ShaderBlock>& blocks = shader_info.blocks;
+ NodeBlock current_block;
+ u32 current_label = exit_branch;
for (auto& block : blocks) {
- NodeBlock nodes;
+ if (shader_info.labels.count(block.start) != 0) {
+ insert_block(current_block, current_label);
+ current_block.clear();
+ current_label = block.start;
+ }
if (!block.ignore_branch) {
- nodes = DecodeRange(block.start, block.end);
- InsertControlFlow(nodes, block);
+ DecodeRangeInner(current_block, block.start, block.end);
+ InsertControlFlow(current_block, block);
} else {
- nodes = DecodeRange(block.start, block.end + 1);
+ DecodeRangeInner(current_block, block.start, block.end + 1);
}
- basic_blocks.insert({block.start, nodes});
}
+ insert_block(current_block, current_label);
return;
}
+ LOG_WARNING(HW_GPU, "Flow Stack Removing Failed! Falling back to old method");
// we can't decompile it, fallback to standard method
for (const auto& block : shader_info.blocks) {
basic_blocks.insert({block.start, DecodeRange(block.start, block.end + 1)});
}
return;
}
- LOG_WARNING(HW_GPU, "Flow Analysis failed, falling back to brute force compiling");
+ LOG_WARNING(HW_GPU, "Flow Analysis Failed! Falling back to brute force compiling");
// Now we need to deal with an undecompilable shader. We need to brute force
// a shader that captures every position.
@@ -78,10 +93,14 @@ void ShaderIR::Decode() {
NodeBlock ShaderIR::DecodeRange(u32 begin, u32 end) {
NodeBlock basic_block;
+ DecodeRangeInner(basic_block, begin, end);
+ return basic_block;
+}
+
+void ShaderIR::DecodeRangeInner(NodeBlock& bb, u32 begin, u32 end) {
for (u32 pc = begin; pc < (begin > end ? MAX_PROGRAM_LENGTH : end);) {
- pc = DecodeInstr(basic_block, pc);
+ pc = DecodeInstr(bb, pc);
}
- return basic_block;
}
void ShaderIR::InsertControlFlow(NodeBlock& bb, const ShaderBlock& block) {
diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp
index ed3c63781..42e3de02f 100644
--- a/src/video_core/shader/decode/other.cpp
+++ b/src/video_core/shader/decode/other.cpp
@@ -98,9 +98,10 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
} else {
const u32 target = pc + 1;
const Node op_a = GetConstBuffer(instr.cbuf36.index, instr.cbuf36.GetOffset());
- const Node convert = SignedOperation(OperationCode::IArithmeticShiftRight,
- true, PRECISE, op_a, Immediate(3));
- const Node operand = Operation(OperationCode::IAdd, PRECISE, convert, Immediate(target));
+ const Node convert = SignedOperation(OperationCode::IArithmeticShiftRight, true,
+ PRECISE, op_a, Immediate(3));
+ const Node operand =
+ Operation(OperationCode::IAdd, PRECISE, convert, Immediate(target));
branch = Operation(OperationCode::BranchIndirect, convert);
}
@@ -119,14 +120,14 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
const Node index = GetRegister(instr.gpr8);
const Node op_a =
GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.GetOffset() + 0, index);
- const Node convert = SignedOperation(OperationCode::IArithmeticShiftRight,
- true, PRECISE, op_a, Immediate(3));
+ const Node convert = SignedOperation(OperationCode::IArithmeticShiftRight, true,
+ PRECISE, op_a, Immediate(3));
operand = Operation(OperationCode::IAdd, PRECISE, convert, Immediate(target));
} else {
const s32 target = pc + instr.brx.GetBranchExtend();
const Node op_a = GetRegister(instr.gpr8);
- const Node convert = SignedOperation(OperationCode::IArithmeticShiftRight,
- true, PRECISE, op_a, Immediate(3));
+ const Node convert = SignedOperation(OperationCode::IArithmeticShiftRight, true,
+ PRECISE, op_a, Immediate(3));
operand = Operation(OperationCode::IAdd, PRECISE, convert, Immediate(target));
}
const Node branch = Operation(OperationCode::BranchIndirect, operand);
@@ -143,6 +144,10 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0,
"Constant buffer flow is not supported");
+ if (disable_flow_stack) {
+ break;
+ }
+
// The SSY opcode tells the GPU where to re-converge divergent execution paths with SYNC.
const u32 target = pc + instr.bra.GetBranchTarget();
bb.push_back(
@@ -153,6 +158,10 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0,
"Constant buffer PBK is not supported");
+ if (disable_flow_stack) {
+ break;
+ }
+
// PBK pushes to a stack the address where BRK will jump to.
const u32 target = pc + instr.bra.GetBranchTarget();
bb.push_back(
@@ -164,6 +173,10 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "SYNC condition code used: {}",
static_cast<u32>(cc));
+ if (disable_flow_stack) {
+ break;
+ }
+
// The SYNC opcode jumps to the address previously set by the SSY opcode
bb.push_back(Operation(OperationCode::PopFlowStack, MetaStackClass::Ssy));
break;
@@ -172,6 +185,9 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
const Tegra::Shader::ConditionCode cc = instr.flow_condition_code;
UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "BRK condition code used: {}",
static_cast<u32>(cc));
+ if (disable_flow_stack) {
+ break;
+ }
// The BRK opcode jumps to the address previously set by the PBK opcode
bb.push_back(Operation(OperationCode::PopFlowStack, MetaStackClass::Pbk));
diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h
index e468758a6..7427ed896 100644
--- a/src/video_core/shader/node.h
+++ b/src/video_core/shader/node.h
@@ -148,12 +148,12 @@ enum class OperationCode {
ImageStore, /// (MetaImage, float[N] coords) -> void
- Branch, /// (uint branch_target) -> void
- BranchIndirect,/// (uint branch_target) -> void
- PushFlowStack, /// (uint branch_target) -> void
- PopFlowStack, /// () -> void
- Exit, /// () -> void
- Discard, /// () -> void
+ Branch, /// (uint branch_target) -> void
+ BranchIndirect, /// (uint branch_target) -> void
+ PushFlowStack, /// (uint branch_target) -> void
+ PopFlowStack, /// () -> void
+ Exit, /// () -> void
+ Discard, /// () -> void
EmitVertex, /// () -> void
EndPrimitive, /// () -> void
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h
index a6729064b..928ac7cb5 100644
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -123,10 +123,15 @@ public:
return header;
}
+ bool IsFlowStackDisabled() const {
+ return disable_flow_stack;
+ }
+
private:
void Decode();
NodeBlock DecodeRange(u32 begin, u32 end);
+ void DecodeRangeInner(NodeBlock& bb, u32 begin, u32 end);
void InsertControlFlow(NodeBlock& bb, const ShaderBlock& block);
/**
@@ -320,6 +325,7 @@ private:
const ProgramCode& program_code;
const u32 main_offset;
const std::size_t program_size;
+ bool disable_flow_stack{};
u32 coverage_begin{};
u32 coverage_end{};