summaryrefslogtreecommitdiffstats
path: root/src/video_core
diff options
context:
space:
mode:
authorFernando Sahmkow <fsahmkow27@gmail.com>2019-06-25 17:10:45 +0200
committerFernandoS27 <fsahmkow27@gmail.com>2019-07-09 14:14:38 +0200
commit926b80102f1c00675a9f3956258a066bfe0c3642 (patch)
tree616288030c9e72f8cf6a3ee12ae89faa07b9da79 /src/video_core
parentshader_ir: propagate shader size to the IR (diff)
downloadyuzu-926b80102f1c00675a9f3956258a066bfe0c3642.tar
yuzu-926b80102f1c00675a9f3956258a066bfe0c3642.tar.gz
yuzu-926b80102f1c00675a9f3956258a066bfe0c3642.tar.bz2
yuzu-926b80102f1c00675a9f3956258a066bfe0c3642.tar.lz
yuzu-926b80102f1c00675a9f3956258a066bfe0c3642.tar.xz
yuzu-926b80102f1c00675a9f3956258a066bfe0c3642.tar.zst
yuzu-926b80102f1c00675a9f3956258a066bfe0c3642.zip
Diffstat (limited to 'src/video_core')
-rw-r--r--src/video_core/shader/control_flow.cpp167
-rw-r--r--src/video_core/shader/control_flow.h1
-rw-r--r--src/video_core/shader/decode.cpp46
-rw-r--r--src/video_core/shader/shader_ir.h3
4 files changed, 206 insertions, 11 deletions
diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp
index a9de8f814..3af4c6190 100644
--- a/src/video_core/shader/control_flow.cpp
+++ b/src/video_core/shader/control_flow.cpp
@@ -1,6 +1,6 @@
#include <list>
-#include <map>
+#include <unordered_map>
#include <unordered_set>
#include <vector>
@@ -16,12 +16,80 @@ using Tegra::Shader::OpCode;
constexpr s32 unassigned_branch = -2;
+struct ControlStack {
+ std::array<u32, 20> stack;
+ u32 index{};
+
+ ControlStack() {}
+
+ ControlStack(const ControlStack& cp) {
+ index = cp.index;
+ std::memcpy(stack.data(), cp.stack.data(), index * sizeof(u32));
+ }
+
+ bool Compare(const ControlStack& cs) const {
+ if (index != cs.index) {
+ return false;
+ }
+ return std::memcmp(stack.data(), cs.stack.data(), index * sizeof(u32)) == 0;
+ }
+
+ bool SoftCompare(const ControlStack& cs) const {
+ if (index == 0 || cs.index == 0) {
+ return index == cs.index;
+ }
+ return Top() == cs.Top();
+ }
+
+ u32 Size() const {
+ return index;
+ }
+
+ u32 Top() const {
+ return stack[index - 1];
+ }
+
+ bool Push(u32 address) {
+ if (index >= 20) {
+ return false;
+ }
+ stack[index] = address;
+ index++;
+ return true;
+ }
+
+ bool Pop() {
+ if (index == 0) {
+ return false;
+ }
+ index--;
+ return true;
+ }
+};
+
+struct Query {
+ Query() {}
+ Query(const Query& q) : address{q.address}, ssy_stack{q.ssy_stack}, pbk_stack{q.pbk_stack} {}
+ u32 address;
+ ControlStack ssy_stack{};
+ ControlStack pbk_stack{};
+};
+
+struct BlockStack {
+ BlockStack() = default;
+ BlockStack(const BlockStack& b) : ssy_stack{b.ssy_stack}, pbk_stack{b.pbk_stack} {}
+ BlockStack(const Query& q) : ssy_stack{q.ssy_stack}, pbk_stack{q.pbk_stack} {}
+ ControlStack ssy_stack{};
+ ControlStack pbk_stack{};
+};
+
struct BlockBranchInfo {
Condition condition{};
s32 address{exit_branch};
bool kill{};
bool is_sync{};
bool is_brk{};
+ bool ignore{};
};
struct BlockInfo {
@@ -64,19 +132,21 @@ struct CFGRebuildState {
// queries.clear();
block_info.clear();
labels.clear();
- visited_address.clear();
+ registered.clear();
ssy_labels.clear();
pbk_labels.clear();
inspect_queries.clear();
+ queries.clear();
}
std::vector<BlockInfo> block_info{};
std::list<u32> inspect_queries{};
- // std::list<Query> queries{};
- std::unordered_set<u32> visited_address{};
+ std::list<Query> queries{};
+ std::unordered_map<u32, u32> registered{};
std::unordered_set<u32> labels{};
std::set<Stamp> ssy_labels;
std::set<Stamp> pbk_labels;
+ std::unordered_map<u32, BlockStack> stacks{};
const ProgramCode& program_code;
const std::size_t program_size;
};
@@ -107,7 +177,8 @@ BlockInfo* CreateBlockInfo(CFGRebuildState& state, u32 start, u32 end) {
auto& it = state.block_info.emplace_back();
it.start = start;
it.end = end;
- state.visited_address.insert(start);
+ u32 index = state.block_info.size() - 1;
+ state.registered.insert({start, index});
return &it;
}
@@ -136,10 +207,12 @@ ParseResult ParseCode(CFGRebuildState& state, u32 address, ParseInfo& parse_info
while (true) {
if (offset >= end_address) {
parse_info.branch_info.address = exit_branch;
+ parse_info.branch_info.ignore = false;
break;
}
- if (state.visited_address.count(offset) != 0) {
+ if (state.registered.count(offset) != 0) {
parse_info.branch_info.address = offset;
+ parse_info.branch_info.ignore = true;
break;
}
const Instruction instr = {state.program_code[offset]};
@@ -168,6 +241,7 @@ ParseResult ParseCode(CFGRebuildState& state, u32 address, ParseInfo& parse_info
parse_info.branch_info.kill = false;
parse_info.branch_info.is_sync = false;
parse_info.branch_info.is_brk = false;
+ parse_info.branch_info.ignore = false;
parse_info.end_address = offset;
return ParseResult::ControlCaught;
@@ -199,6 +273,7 @@ ParseResult ParseCode(CFGRebuildState& state, u32 address, ParseInfo& parse_info
parse_info.branch_info.kill = false;
parse_info.branch_info.is_sync = false;
parse_info.branch_info.is_brk = false;
+ parse_info.branch_info.ignore = false;
parse_info.end_address = offset;
return ParseResult::ControlCaught;
@@ -222,6 +297,7 @@ ParseResult ParseCode(CFGRebuildState& state, u32 address, ParseInfo& parse_info
parse_info.branch_info.kill = false;
parse_info.branch_info.is_sync = true;
parse_info.branch_info.is_brk = false;
+ parse_info.branch_info.ignore = false;
parse_info.end_address = offset;
return ParseResult::ControlCaught;
@@ -245,6 +321,7 @@ ParseResult ParseCode(CFGRebuildState& state, u32 address, ParseInfo& parse_info
parse_info.branch_info.kill = false;
parse_info.branch_info.is_sync = false;
parse_info.branch_info.is_brk = true;
+ parse_info.branch_info.ignore = false;
parse_info.end_address = offset;
return ParseResult::ControlCaught;
@@ -268,6 +345,7 @@ ParseResult ParseCode(CFGRebuildState& state, u32 address, ParseInfo& parse_info
parse_info.branch_info.kill = true;
parse_info.branch_info.is_sync = false;
parse_info.branch_info.is_brk = false;
+ parse_info.branch_info.ignore = false;
parse_info.end_address = offset;
return ParseResult::ControlCaught;
@@ -322,6 +400,7 @@ bool TryInspectAddress(CFGRebuildState& state) {
block_info->branch = it->branch;
BlockBranchInfo forward_branch{};
forward_branch.address = address;
+ forward_branch.ignore = true;
it->branch = forward_branch;
return true;
break;
@@ -348,6 +427,58 @@ bool TryInspectAddress(CFGRebuildState& state) {
return true;
}
+bool TryQuery(CFGRebuildState& state) {
+ auto gather_labels = ([](ControlStack& cc, std::set<Stamp> labels, BlockInfo& block) {
+ Stamp start{block.start, 0};
+ Stamp end{block.end, 0};
+ auto gather_start = labels.lower_bound(start);
+ auto gather_end = labels.upper_bound(end);
+ while (gather_start != gather_end) {
+ cc.Push(gather_start->target);
+ gather_start++;
+ }
+ });
+ if (state.queries.empty()) {
+ return false;
+ }
+ Query& q = state.queries.front();
+ u32 block_index = state.registered[q.address];
+ BlockInfo& block = state.block_info[block_index];
+ if (block.visited) {
+ BlockStack& stack = state.stacks[q.address];
+ bool all_okay = q.ssy_stack.Compare(stack.ssy_stack) && q.pbk_stack.Compare(stack.pbk_stack);
+ state.queries.pop_front();
+ return all_okay;
+ }
+ block.visited = true;
+ BlockStack bs{q};
+ state.stacks[q.address] = bs;
+ Query q2(q);
+ state.queries.pop_front();
+ gather_labels(q2.ssy_stack, state.ssy_labels, block);
+ gather_labels(q2.pbk_stack, state.pbk_labels, block);
+ if (!block.branch.condition.IsUnconditional()) {
+ q2.address = block.end + 1;
+ state.queries.push_back(q2);
+ }
+ Query conditional_query{q2};
+ if (block.branch.is_sync) {
+ if (block.branch.address == unassigned_branch) {
+ block.branch.address = conditional_query.ssy_stack.Top();
+ }
+ conditional_query.ssy_stack.Pop();
+ }
+ if (block.branch.is_brk) {
+ if (block.branch.address == unassigned_branch) {
+ block.branch.address = conditional_query.pbk_stack.Top();
+ }
+ conditional_query.pbk_stack.Pop();
+ }
+ conditional_query.address = block.branch.address;
+ state.queries.push_back(conditional_query);
+ return true;
+}
+
bool ScanFlow(const ProgramCode& program_code, u32 program_size, u32 start_address,
ShaderCharacteristics& result_out) {
CFGRebuildState state{program_code, program_size};
@@ -360,20 +491,34 @@ bool ScanFlow(const ProgramCode& program_code, u32 program_size, u32 start_addre
return false;
}
}
+ // Decompile Stacks
+ Query start_query{};
+ start_query.address = start_address;
+ state.queries.push_back(start_query);
+ bool decompiled = true;
+ while (!state.queries.empty()) {
+ if (!TryQuery(state)) {
+ decompiled = false;
+ break;
+ }
+ }
+ // Sort and organize results
std::sort(state.block_info.begin(), state.block_info.end(),
[](const BlockInfo& a, const BlockInfo& b) -> bool { return a.start < b.start; });
- // Remove unvisited blocks
result_out.blocks.clear();
- result_out.decompilable = false;
+ result_out.decompilable = decompiled;
result_out.start = start_address;
result_out.end = start_address;
for (auto& block : state.block_info) {
ShaderBlock new_block{};
new_block.start = block.start;
new_block.end = block.end;
- new_block.branch.cond = block.branch.condition;
- new_block.branch.kills = block.branch.kill;
- new_block.branch.address = block.branch.address;
+ new_block.ignore_branch = block.branch.ignore;
+ if (!new_block.ignore_branch) {
+ new_block.branch.cond = block.branch.condition;
+ new_block.branch.kills = block.branch.kill;
+ new_block.branch.address = block.branch.address;
+ }
result_out.end = std::max(result_out.end, block.end);
result_out.blocks.push_back(new_block);
}
diff --git a/src/video_core/shader/control_flow.h b/src/video_core/shader/control_flow.h
index 16736d57a..f5d37a231 100644
--- a/src/video_core/shader/control_flow.h
+++ b/src/video_core/shader/control_flow.h
@@ -29,6 +29,7 @@ struct ShaderBlock {
ShaderBlock(const ShaderBlock& sb) = default;
u32 start{};
u32 end{};
+ bool ignore_branch{};
struct Branch {
Condition cond{};
bool kills{};
diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp
index 09f55bd21..1a74b70cb 100644
--- a/src/video_core/shader/decode.cpp
+++ b/src/video_core/shader/decode.cpp
@@ -44,6 +44,17 @@ void ShaderIR::Decode() {
coverage_begin = shader_info.start;
coverage_end = shader_info.end;
if (shader_info.decompilable) {
+ std::list<ShaderBlock>& blocks = shader_info.blocks;
+ for (auto& block : blocks) {
+ NodeBlock nodes;
+ if (!block.ignore_branch) {
+ nodes = DecodeRange(block.start, block.end);
+ InsertControlFlow(nodes, block);
+ } else {
+ nodes = DecodeRange(block.start, block.end + 1);
+ }
+ basic_blocks.insert({block.start, nodes});
+ }
return;
}
// we can't decompile it, fallback to standard method
@@ -73,6 +84,41 @@ NodeBlock ShaderIR::DecodeRange(u32 begin, u32 end) {
return basic_block;
}
+void ShaderIR::InsertControlFlow(NodeBlock& bb, const ShaderBlock& block) {
+ auto apply_conditions = ([&](const Condition& cond, Node n) -> Node {
+ Node result = n;
+ if (cond.cc != ConditionCode::T) {
+ result = Conditional(GetConditionCode(cond.cc), {result});
+ }
+ if (cond.predicate != Pred::UnusedIndex) {
+ u32 pred = static_cast<u32>(cond.predicate);
+ bool is_neg = pred > 7;
+ if (is_neg)
+ pred -= 8;
+ result = Conditional(GetPredicate(pred, is_neg), {result});
+ }
+ return result;
+ });
+ if (block.branch.address < 0) {
+ if (block.branch.kills) {
+ Node n = Operation(OperationCode::Discard);
+ n = apply_conditions(block.branch.cond, n);
+ bb.push_back(n);
+ global_code.push_back(n);
+ return;
+ }
+ Node n = Operation(OperationCode::Exit);
+ n = apply_conditions(block.branch.cond, n);
+ bb.push_back(n);
+ global_code.push_back(n);
+ return;
+ }
+ Node n = Operation(OperationCode::Branch, Immediate(block.branch.address));
+ n = apply_conditions(block.branch.cond, n);
+ bb.push_back(n);
+ global_code.push_back(n);
+}
+
u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) {
// Ignore sched instructions when generating code.
if (IsSchedInstruction(pc, main_offset)) {
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h
index a67d4f390..a6729064b 100644
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -22,6 +22,8 @@
namespace VideoCommon::Shader {
+struct ShaderBlock;
+
using ProgramCode = std::vector<u64>;
constexpr u32 MAX_PROGRAM_LENGTH = 0x1000;
@@ -125,6 +127,7 @@ private:
void Decode();
NodeBlock DecodeRange(u32 begin, u32 end);
+ void InsertControlFlow(NodeBlock& bb, const ShaderBlock& block);
/**
* Decodes a single instruction from Tegra to IR.