summaryrefslogtreecommitdiffstats
path: root/src/shader_recompiler/frontend/maxwell
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/shader_recompiler/frontend/maxwell/control_flow.cpp642
-rw-r--r--src/shader_recompiler/frontend/maxwell/control_flow.h169
-rw-r--r--src/shader_recompiler/frontend/maxwell/decode.cpp149
-rw-r--r--src/shader_recompiler/frontend/maxwell/decode.h14
-rw-r--r--src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.cpp108
-rw-r--r--src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.h28
-rw-r--r--src/shader_recompiler/frontend/maxwell/instruction.h63
-rw-r--r--src/shader_recompiler/frontend/maxwell/location.h112
-rw-r--r--src/shader_recompiler/frontend/maxwell/maxwell.inc286
-rw-r--r--src/shader_recompiler/frontend/maxwell/opcodes.cpp26
-rw-r--r--src/shader_recompiler/frontend/maxwell/opcodes.h30
-rw-r--r--src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp883
-rw-r--r--src/shader_recompiler/frontend/maxwell/structured_control_flow.h20
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_global_memory.cpp214
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_shared_memory.cpp110
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/attribute_memory_to_physical.cpp35
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/barrier_operations.cpp96
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_extract.cpp74
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_insert.cpp62
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/branch_indirect.cpp36
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h57
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp153
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h28
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/condition_code_set.cpp66
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/double_add.cpp55
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/double_compare_and_set.cpp72
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/double_fused_multiply_add.cpp58
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/double_min_max.cpp55
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/double_multiply.cpp50
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/double_set_predicate.cpp54
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/exit_program.cpp43
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/find_leading_one.cpp47
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp82
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp55
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare_and_set.cpp78
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp214
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp253
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp94
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_min_max.cpp62
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multi_function.cpp71
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp127
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_range_reduction.cpp41
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_set_predicate.cpp60
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_swizzled_add.cpp44
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp125
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp169
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.cpp62
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h42
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_multiply.cpp143
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp117
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set_predicate.cpp118
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp272
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/impl.h387
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp105
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/integer_add_three_input.cpp122
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare.cpp48
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare_and_set.cpp80
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp182
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/integer_funnel_shift.cpp82
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/integer_minimum_maximum.cpp64
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/integer_popcount.cpp36
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp86
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/integer_set_predicate.cpp58
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp71
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_right.cpp66
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/integer_short_multiply_add.cpp135
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/integer_to_integer_conversion.cpp126
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/internal_stage_buffer_entry_read.cpp53
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp62
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.h39
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/load_effective_address.cpp108
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp196
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/load_store_local_shared.cpp218
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp184
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation.cpp116
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input.cpp122
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/move_predicate_to_register.cpp66
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp44
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/move_register_to_predicate.cpp71
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp181
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp283
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/output_geometry.cpp45
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/pixel_load.cpp46
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set_predicate.cpp38
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set_register.cpp53
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/select_source_with_predicate.cpp44
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/surface_atomic_operations.cpp205
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/surface_load_store.cpp281
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp236
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp266
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather.cpp208
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather_swizzled.cpp134
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/texture_gradient.cpp182
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/texture_load.cpp165
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/texture_load_swizzled.cpp242
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp131
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/texture_query.cpp76
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/video_helper.cpp30
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/video_helper.h23
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/video_minimum_maximum.cpp92
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/video_multiply_add.cpp64
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/video_set_predicate.cpp92
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp54
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/warp_shuffle.cpp69
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/translate.cpp52
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/translate.h14
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate_program.cpp223
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate_program.h23
108 files changed, 12603 insertions, 0 deletions
diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.cpp b/src/shader_recompiler/frontend/maxwell/control_flow.cpp
new file mode 100644
index 000000000..efe457baa
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/control_flow.cpp
@@ -0,0 +1,642 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <array>
+#include <optional>
+#include <string>
+#include <utility>
+
+#include <fmt/format.h>
+
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/maxwell/control_flow.h"
+#include "shader_recompiler/frontend/maxwell/decode.h"
+#include "shader_recompiler/frontend/maxwell/indirect_branch_table_track.h"
+#include "shader_recompiler/frontend/maxwell/location.h"
+
+namespace Shader::Maxwell::Flow {
+namespace {
+struct Compare {
+ bool operator()(const Block& lhs, Location rhs) const noexcept {
+ return lhs.begin < rhs;
+ }
+
+ bool operator()(Location lhs, const Block& rhs) const noexcept {
+ return lhs < rhs.begin;
+ }
+
+ bool operator()(const Block& lhs, const Block& rhs) const noexcept {
+ return lhs.begin < rhs.begin;
+ }
+};
+
+u32 BranchOffset(Location pc, Instruction inst) {
+ return pc.Offset() + static_cast<u32>(inst.branch.Offset()) + 8u;
+}
+
+void Split(Block* old_block, Block* new_block, Location pc) {
+ if (pc <= old_block->begin || pc >= old_block->end) {
+ throw InvalidArgument("Invalid address to split={}", pc);
+ }
+ *new_block = Block{};
+ new_block->begin = pc;
+ new_block->end = old_block->end;
+ new_block->end_class = old_block->end_class;
+ new_block->cond = old_block->cond;
+ new_block->stack = old_block->stack;
+ new_block->branch_true = old_block->branch_true;
+ new_block->branch_false = old_block->branch_false;
+ new_block->function_call = old_block->function_call;
+ new_block->return_block = old_block->return_block;
+ new_block->branch_reg = old_block->branch_reg;
+ new_block->branch_offset = old_block->branch_offset;
+ new_block->indirect_branches = std::move(old_block->indirect_branches);
+
+ const Location old_begin{old_block->begin};
+ Stack old_stack{std::move(old_block->stack)};
+ *old_block = Block{};
+ old_block->begin = old_begin;
+ old_block->end = pc;
+ old_block->end_class = EndClass::Branch;
+ old_block->cond = IR::Condition(true);
+ old_block->stack = old_stack;
+ old_block->branch_true = new_block;
+ old_block->branch_false = nullptr;
+}
+
+Token OpcodeToken(Opcode opcode) {
+ switch (opcode) {
+ case Opcode::PBK:
+ case Opcode::BRK:
+ return Token::PBK;
+ case Opcode::PCNT:
+ case Opcode::CONT:
+ return Token::PCNT;
+ case Opcode::PEXIT:
+ case Opcode::EXIT:
+ return Token::PEXIT;
+ case Opcode::PLONGJMP:
+ case Opcode::LONGJMP:
+ return Token::PLONGJMP;
+ case Opcode::PRET:
+ case Opcode::RET:
+ case Opcode::CAL:
+ return Token::PRET;
+ case Opcode::SSY:
+ case Opcode::SYNC:
+ return Token::SSY;
+ default:
+ throw InvalidArgument("{}", opcode);
+ }
+}
+
+bool IsAbsoluteJump(Opcode opcode) {
+ switch (opcode) {
+ case Opcode::JCAL:
+ case Opcode::JMP:
+ case Opcode::JMX:
+ return true;
+ default:
+ return false;
+ }
+}
+
+bool HasFlowTest(Opcode opcode) {
+ switch (opcode) {
+ case Opcode::BRA:
+ case Opcode::BRX:
+ case Opcode::EXIT:
+ case Opcode::JMP:
+ case Opcode::JMX:
+ case Opcode::KIL:
+ case Opcode::BRK:
+ case Opcode::CONT:
+ case Opcode::LONGJMP:
+ case Opcode::RET:
+ case Opcode::SYNC:
+ return true;
+ case Opcode::CAL:
+ case Opcode::JCAL:
+ return false;
+ default:
+ throw InvalidArgument("Invalid branch {}", opcode);
+ }
+}
+
+std::string NameOf(const Block& block) {
+ if (block.begin.IsVirtual()) {
+ return fmt::format("\"Virtual {}\"", block.begin);
+ } else {
+ return fmt::format("\"{}\"", block.begin);
+ }
+}
+} // Anonymous namespace
+
+void Stack::Push(Token token, Location target) {
+ entries.push_back({
+ .token = token,
+ .target{target},
+ });
+}
+
+std::pair<Location, Stack> Stack::Pop(Token token) const {
+ const std::optional<Location> pc{Peek(token)};
+ if (!pc) {
+ throw LogicError("Token could not be found");
+ }
+ return {*pc, Remove(token)};
+}
+
+std::optional<Location> Stack::Peek(Token token) const {
+ const auto it{std::find_if(entries.rbegin(), entries.rend(),
+ [token](const auto& entry) { return entry.token == token; })};
+ if (it == entries.rend()) {
+ return std::nullopt;
+ }
+ return it->target;
+}
+
+Stack Stack::Remove(Token token) const {
+ const auto it{std::find_if(entries.rbegin(), entries.rend(),
+ [token](const auto& entry) { return entry.token == token; })};
+ const auto pos{std::distance(entries.rbegin(), it)};
+ Stack result;
+ result.entries.insert(result.entries.end(), entries.begin(), entries.end() - pos - 1);
+ return result;
+}
+
+bool Block::Contains(Location pc) const noexcept {
+ return pc >= begin && pc < end;
+}
+
+Function::Function(ObjectPool<Block>& block_pool, Location start_address)
+ : entrypoint{start_address} {
+ Label& label{labels.emplace_back()};
+ label.address = start_address;
+ label.block = block_pool.Create(Block{});
+ label.block->begin = start_address;
+ label.block->end = start_address;
+ label.block->end_class = EndClass::Branch;
+ label.block->cond = IR::Condition(true);
+ label.block->branch_true = nullptr;
+ label.block->branch_false = nullptr;
+}
+
+CFG::CFG(Environment& env_, ObjectPool<Block>& block_pool_, Location start_address,
+ bool exits_to_dispatcher_)
+ : env{env_}, block_pool{block_pool_}, program_start{start_address}, exits_to_dispatcher{
+ exits_to_dispatcher_} {
+ if (exits_to_dispatcher) {
+ dispatch_block = block_pool.Create(Block{});
+ dispatch_block->begin = {};
+ dispatch_block->end = {};
+ dispatch_block->end_class = EndClass::Exit;
+ dispatch_block->cond = IR::Condition(true);
+ dispatch_block->stack = {};
+ dispatch_block->branch_true = nullptr;
+ dispatch_block->branch_false = nullptr;
+ }
+ functions.emplace_back(block_pool, start_address);
+ for (FunctionId function_id = 0; function_id < functions.size(); ++function_id) {
+ while (!functions[function_id].labels.empty()) {
+ Function& function{functions[function_id]};
+ Label label{function.labels.back()};
+ function.labels.pop_back();
+ AnalyzeLabel(function_id, label);
+ }
+ }
+ if (exits_to_dispatcher) {
+ const auto last_block{functions[0].blocks.rbegin()};
+ dispatch_block->begin = last_block->end + 1;
+ dispatch_block->end = last_block->end + 1;
+ functions[0].blocks.insert(*dispatch_block);
+ }
+}
+
+void CFG::AnalyzeLabel(FunctionId function_id, Label& label) {
+ if (InspectVisitedBlocks(function_id, label)) {
+ // Label address has been visited
+ return;
+ }
+ // Try to find the next block
+ Function* const function{&functions[function_id]};
+ Location pc{label.address};
+ const auto next_it{function->blocks.upper_bound(pc, Compare{})};
+ const bool is_last{next_it == function->blocks.end()};
+ Block* const next{is_last ? nullptr : &*next_it};
+ // Insert before the next block
+ Block* const block{label.block};
+ // Analyze instructions until it reaches an already visited block or there's a branch
+ bool is_branch{false};
+ while (!next || pc < next->begin) {
+ is_branch = AnalyzeInst(block, function_id, pc) == AnalysisState::Branch;
+ if (is_branch) {
+ break;
+ }
+ ++pc;
+ }
+ if (!is_branch) {
+ // If the block finished without a branch,
+ // it means that the next instruction is already visited, jump to it
+ block->end = pc;
+ block->cond = IR::Condition{true};
+ block->branch_true = next;
+ block->branch_false = nullptr;
+ }
+ // Function's pointer might be invalid, resolve it again
+ // Insert the new block
+ functions[function_id].blocks.insert(*block);
+}
+
+bool CFG::InspectVisitedBlocks(FunctionId function_id, const Label& label) {
+ const Location pc{label.address};
+ Function& function{functions[function_id]};
+ const auto it{
+ std::ranges::find_if(function.blocks, [pc](auto& block) { return block.Contains(pc); })};
+ if (it == function.blocks.end()) {
+ // Address has not been visited
+ return false;
+ }
+ Block* const visited_block{&*it};
+ if (visited_block->begin == pc) {
+ throw LogicError("Dangling block");
+ }
+ Block* const new_block{label.block};
+ Split(visited_block, new_block, pc);
+ function.blocks.insert(it, *new_block);
+ return true;
+}
+
+CFG::AnalysisState CFG::AnalyzeInst(Block* block, FunctionId function_id, Location pc) {
+ const Instruction inst{env.ReadInstruction(pc.Offset())};
+ const Opcode opcode{Decode(inst.raw)};
+ switch (opcode) {
+ case Opcode::BRA:
+ case Opcode::JMP:
+ case Opcode::RET:
+ if (!AnalyzeBranch(block, function_id, pc, inst, opcode)) {
+ return AnalysisState::Continue;
+ }
+ switch (opcode) {
+ case Opcode::BRA:
+ case Opcode::JMP:
+ AnalyzeBRA(block, function_id, pc, inst, IsAbsoluteJump(opcode));
+ break;
+ case Opcode::RET:
+ block->end_class = EndClass::Return;
+ break;
+ default:
+ break;
+ }
+ block->end = pc;
+ return AnalysisState::Branch;
+ case Opcode::BRK:
+ case Opcode::CONT:
+ case Opcode::LONGJMP:
+ case Opcode::SYNC: {
+ if (!AnalyzeBranch(block, function_id, pc, inst, opcode)) {
+ return AnalysisState::Continue;
+ }
+ const auto [stack_pc, new_stack]{block->stack.Pop(OpcodeToken(opcode))};
+ block->branch_true = AddLabel(block, new_stack, stack_pc, function_id);
+ block->end = pc;
+ return AnalysisState::Branch;
+ }
+ case Opcode::KIL: {
+ const Predicate pred{inst.Pred()};
+ const auto ir_pred{static_cast<IR::Pred>(pred.index)};
+ const IR::Condition cond{inst.branch.flow_test, ir_pred, pred.negated};
+ AnalyzeCondInst(block, function_id, pc, EndClass::Kill, cond);
+ return AnalysisState::Branch;
+ }
+ case Opcode::PBK:
+ case Opcode::PCNT:
+ case Opcode::PEXIT:
+ case Opcode::PLONGJMP:
+ case Opcode::SSY:
+ block->stack.Push(OpcodeToken(opcode), BranchOffset(pc, inst));
+ return AnalysisState::Continue;
+ case Opcode::BRX:
+ case Opcode::JMX:
+ return AnalyzeBRX(block, pc, inst, IsAbsoluteJump(opcode), function_id);
+ case Opcode::EXIT:
+ return AnalyzeEXIT(block, function_id, pc, inst);
+ case Opcode::PRET:
+ throw NotImplementedException("PRET flow analysis");
+ case Opcode::CAL:
+ case Opcode::JCAL: {
+ const bool is_absolute{IsAbsoluteJump(opcode)};
+ const Location cal_pc{is_absolute ? inst.branch.Absolute() : BranchOffset(pc, inst)};
+ // Technically CAL pushes into PRET, but that's implicit in the function call for us
+ // Insert the function into the list if it doesn't exist
+ const auto it{std::ranges::find(functions, cal_pc, &Function::entrypoint)};
+ const bool exists{it != functions.end()};
+ const FunctionId call_id{exists ? static_cast<size_t>(std::distance(functions.begin(), it))
+ : functions.size()};
+ if (!exists) {
+ functions.emplace_back(block_pool, cal_pc);
+ }
+ block->end_class = EndClass::Call;
+ block->function_call = call_id;
+ block->return_block = AddLabel(block, block->stack, pc + 1, function_id);
+ block->end = pc;
+ return AnalysisState::Branch;
+ }
+ default:
+ break;
+ }
+ const Predicate pred{inst.Pred()};
+ if (pred == Predicate{true} || pred == Predicate{false}) {
+ return AnalysisState::Continue;
+ }
+ const IR::Condition cond{static_cast<IR::Pred>(pred.index), pred.negated};
+ AnalyzeCondInst(block, function_id, pc, EndClass::Branch, cond);
+ return AnalysisState::Branch;
+}
+
+void CFG::AnalyzeCondInst(Block* block, FunctionId function_id, Location pc,
+ EndClass insn_end_class, IR::Condition cond) {
+ if (block->begin != pc) {
+ // If the block doesn't start in the conditional instruction
+ // mark it as a label to visit it later
+ block->end = pc;
+ block->cond = IR::Condition{true};
+ block->branch_true = AddLabel(block, block->stack, pc, function_id);
+ block->branch_false = nullptr;
+ return;
+ }
+ // Create a virtual block and a conditional block
+ Block* const conditional_block{block_pool.Create()};
+ Block virtual_block{};
+ virtual_block.begin = block->begin.Virtual();
+ virtual_block.end = block->begin.Virtual();
+ virtual_block.end_class = EndClass::Branch;
+ virtual_block.stack = block->stack;
+ virtual_block.cond = cond;
+ virtual_block.branch_true = conditional_block;
+ virtual_block.branch_false = nullptr;
+ // Save the contents of the visited block in the conditional block
+ *conditional_block = std::move(*block);
+ // Impersonate the visited block with a virtual block
+ *block = std::move(virtual_block);
+ // Set the end properties of the conditional instruction
+ conditional_block->end = pc + 1;
+ conditional_block->end_class = insn_end_class;
+ // Add a label to the instruction after the conditional instruction
+ Block* const endif_block{AddLabel(conditional_block, block->stack, pc + 1, function_id)};
+ // Branch to the next instruction from the virtual block
+ block->branch_false = endif_block;
+ // And branch to it from the conditional instruction if it is a branch or a kill instruction
+ // Kill instructions are considered a branch because they demote to a helper invocation and
+ // execution may continue.
+ if (insn_end_class == EndClass::Branch || insn_end_class == EndClass::Kill) {
+ conditional_block->cond = IR::Condition{true};
+ conditional_block->branch_true = endif_block;
+ conditional_block->branch_false = nullptr;
+ }
+ // Finally insert the condition block into the list of blocks
+ functions[function_id].blocks.insert(*conditional_block);
+}
+
+bool CFG::AnalyzeBranch(Block* block, FunctionId function_id, Location pc, Instruction inst,
+ Opcode opcode) {
+ if (inst.branch.is_cbuf) {
+ throw NotImplementedException("Branch with constant buffer offset");
+ }
+ const Predicate pred{inst.Pred()};
+ if (pred == Predicate{false}) {
+ return false;
+ }
+ const bool has_flow_test{HasFlowTest(opcode)};
+ const IR::FlowTest flow_test{has_flow_test ? inst.branch.flow_test.Value() : IR::FlowTest::T};
+ if (pred != Predicate{true} || flow_test != IR::FlowTest::T) {
+ block->cond = IR::Condition(flow_test, static_cast<IR::Pred>(pred.index), pred.negated);
+ block->branch_false = AddLabel(block, block->stack, pc + 1, function_id);
+ } else {
+ block->cond = IR::Condition{true};
+ }
+ return true;
+}
+
+void CFG::AnalyzeBRA(Block* block, FunctionId function_id, Location pc, Instruction inst,
+ bool is_absolute) {
+ const Location bra_pc{is_absolute ? inst.branch.Absolute() : BranchOffset(pc, inst)};
+ block->branch_true = AddLabel(block, block->stack, bra_pc, function_id);
+}
+
+CFG::AnalysisState CFG::AnalyzeBRX(Block* block, Location pc, Instruction inst, bool is_absolute,
+ FunctionId function_id) {
+ const std::optional brx_table{TrackIndirectBranchTable(env, pc, program_start)};
+ if (!brx_table) {
+ TrackIndirectBranchTable(env, pc, program_start);
+ throw NotImplementedException("Failed to track indirect branch");
+ }
+ const IR::FlowTest flow_test{inst.branch.flow_test};
+ const Predicate pred{inst.Pred()};
+ if (flow_test != IR::FlowTest::T || pred != Predicate{true}) {
+ throw NotImplementedException("Conditional indirect branch");
+ }
+ std::vector<u32> targets;
+ targets.reserve(brx_table->num_entries);
+ for (u32 i = 0; i < brx_table->num_entries; ++i) {
+ u32 target{env.ReadCbufValue(brx_table->cbuf_index, brx_table->cbuf_offset + i * 4)};
+ if (!is_absolute) {
+ target += pc.Offset();
+ }
+ target += static_cast<u32>(brx_table->branch_offset);
+ target += 8;
+ targets.push_back(target);
+ }
+ std::ranges::sort(targets);
+ targets.erase(std::unique(targets.begin(), targets.end()), targets.end());
+
+ block->indirect_branches.reserve(targets.size());
+ for (const u32 target : targets) {
+ Block* const branch{AddLabel(block, block->stack, target, function_id)};
+ block->indirect_branches.push_back({
+ .block = branch,
+ .address = target,
+ });
+ }
+ block->cond = IR::Condition{true};
+ block->end = pc + 1;
+ block->end_class = EndClass::IndirectBranch;
+ block->branch_reg = brx_table->branch_reg;
+ block->branch_offset = brx_table->branch_offset + 8;
+ if (!is_absolute) {
+ block->branch_offset += pc.Offset();
+ }
+ return AnalysisState::Branch;
+}
+
+CFG::AnalysisState CFG::AnalyzeEXIT(Block* block, FunctionId function_id, Location pc,
+ Instruction inst) {
+ const IR::FlowTest flow_test{inst.branch.flow_test};
+ const Predicate pred{inst.Pred()};
+ if (pred == Predicate{false} || flow_test == IR::FlowTest::F) {
+ // EXIT will never be taken
+ return AnalysisState::Continue;
+ }
+ if (exits_to_dispatcher && function_id != 0) {
+ throw NotImplementedException("Dispatch EXIT on external function");
+ }
+ if (pred != Predicate{true} || flow_test != IR::FlowTest::T) {
+ if (block->stack.Peek(Token::PEXIT).has_value()) {
+ throw NotImplementedException("Conditional EXIT with PEXIT token");
+ }
+ const IR::Condition cond{flow_test, static_cast<IR::Pred>(pred.index), pred.negated};
+ if (exits_to_dispatcher) {
+ block->end = pc;
+ block->end_class = EndClass::Branch;
+ block->cond = cond;
+ block->branch_true = dispatch_block;
+ block->branch_false = AddLabel(block, block->stack, pc + 1, function_id);
+ return AnalysisState::Branch;
+ }
+ AnalyzeCondInst(block, function_id, pc, EndClass::Exit, cond);
+ return AnalysisState::Branch;
+ }
+ if (const std::optional<Location> exit_pc{block->stack.Peek(Token::PEXIT)}) {
+ const Stack popped_stack{block->stack.Remove(Token::PEXIT)};
+ block->cond = IR::Condition{true};
+ block->branch_true = AddLabel(block, popped_stack, *exit_pc, function_id);
+ block->branch_false = nullptr;
+ return AnalysisState::Branch;
+ }
+ if (exits_to_dispatcher) {
+ block->cond = IR::Condition{true};
+ block->end = pc;
+ block->end_class = EndClass::Branch;
+ block->branch_true = dispatch_block;
+ block->branch_false = nullptr;
+ return AnalysisState::Branch;
+ }
+ block->end = pc + 1;
+ block->end_class = EndClass::Exit;
+ return AnalysisState::Branch;
+}
+
+Block* CFG::AddLabel(Block* block, Stack stack, Location pc, FunctionId function_id) {
+ Function& function{functions[function_id]};
+ if (block->begin == pc) {
+ // Jumps to itself
+ return block;
+ }
+ if (const auto it{function.blocks.find(pc, Compare{})}; it != function.blocks.end()) {
+ // Block already exists and it has been visited
+ if (function.blocks.begin() != it) {
+ // Check if the previous node is the virtual variant of the label
+ // This won't exist if a virtual node is not needed or it hasn't been visited
+ // If it hasn't been visited and a virtual node is needed, this will still behave as
+ // expected because the node impersonated with its virtual node.
+ const auto prev{std::prev(it)};
+ if (it->begin.Virtual() == prev->begin) {
+ return &*prev;
+ }
+ }
+ return &*it;
+ }
+ // Make sure we don't insert the same layer twice
+ const auto label_it{std::ranges::find(function.labels, pc, &Label::address)};
+ if (label_it != function.labels.end()) {
+ return label_it->block;
+ }
+ Block* const new_block{block_pool.Create()};
+ new_block->begin = pc;
+ new_block->end = pc;
+ new_block->end_class = EndClass::Branch;
+ new_block->cond = IR::Condition(true);
+ new_block->stack = stack;
+ new_block->branch_true = nullptr;
+ new_block->branch_false = nullptr;
+ function.labels.push_back(Label{
+ .address{pc},
+ .block = new_block,
+ .stack{std::move(stack)},
+ });
+ return new_block;
+}
+
+std::string CFG::Dot() const {
+ int node_uid{0};
+
+ std::string dot{"digraph shader {\n"};
+ for (const Function& function : functions) {
+ dot += fmt::format("\tsubgraph cluster_{} {{\n", function.entrypoint);
+ dot += fmt::format("\t\tnode [style=filled];\n");
+ for (const Block& block : function.blocks) {
+ const std::string name{NameOf(block)};
+ const auto add_branch = [&](Block* branch, bool add_label) {
+ dot += fmt::format("\t\t{}->{}", name, NameOf(*branch));
+ if (add_label && block.cond != IR::Condition{true} &&
+ block.cond != IR::Condition{false}) {
+ dot += fmt::format(" [label=\"{}\"]", block.cond);
+ }
+ dot += '\n';
+ };
+ dot += fmt::format("\t\t{};\n", name);
+ switch (block.end_class) {
+ case EndClass::Branch:
+ if (block.cond != IR::Condition{false}) {
+ add_branch(block.branch_true, true);
+ }
+ if (block.cond != IR::Condition{true}) {
+ add_branch(block.branch_false, false);
+ }
+ break;
+ case EndClass::IndirectBranch:
+ for (const IndirectBranch& branch : block.indirect_branches) {
+ add_branch(branch.block, false);
+ }
+ break;
+ case EndClass::Call:
+ dot += fmt::format("\t\t{}->N{};\n", name, node_uid);
+ dot += fmt::format("\t\tN{}->{};\n", node_uid, NameOf(*block.return_block));
+ dot += fmt::format("\t\tN{} [label=\"Call {}\"][shape=square][style=stripped];\n",
+ node_uid, block.function_call);
+ dot += '\n';
+ ++node_uid;
+ break;
+ case EndClass::Exit:
+ dot += fmt::format("\t\t{}->N{};\n", name, node_uid);
+ dot += fmt::format("\t\tN{} [label=\"Exit\"][shape=square][style=stripped];\n",
+ node_uid);
+ ++node_uid;
+ break;
+ case EndClass::Return:
+ dot += fmt::format("\t\t{}->N{};\n", name, node_uid);
+ dot += fmt::format("\t\tN{} [label=\"Return\"][shape=square][style=stripped];\n",
+ node_uid);
+ ++node_uid;
+ break;
+ case EndClass::Kill:
+ dot += fmt::format("\t\t{}->N{};\n", name, node_uid);
+ dot += fmt::format("\t\tN{} [label=\"Kill\"][shape=square][style=stripped];\n",
+ node_uid);
+ ++node_uid;
+ break;
+ }
+ }
+ if (function.entrypoint == 8) {
+ dot += fmt::format("\t\tlabel = \"main\";\n");
+ } else {
+ dot += fmt::format("\t\tlabel = \"Function {}\";\n", function.entrypoint);
+ }
+ dot += "\t}\n";
+ }
+ if (!functions.empty()) {
+ auto& function{functions.front()};
+ if (function.blocks.empty()) {
+ dot += "Start;\n";
+ } else {
+ dot += fmt::format("\tStart -> {};\n", NameOf(*function.blocks.begin()));
+ }
+ dot += fmt::format("\tStart [shape=diamond];\n");
+ }
+ dot += "}\n";
+ return dot;
+}
+
+} // namespace Shader::Maxwell::Flow
diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.h b/src/shader_recompiler/frontend/maxwell/control_flow.h
new file mode 100644
index 000000000..a6bd3e196
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/control_flow.h
@@ -0,0 +1,169 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <compare>
+#include <optional>
+#include <span>
+#include <string>
+#include <vector>
+
+#include <boost/container/small_vector.hpp>
+#include <boost/intrusive/set.hpp>
+
+#include "shader_recompiler/environment.h"
+#include "shader_recompiler/frontend/ir/condition.h"
+#include "shader_recompiler/frontend/maxwell/instruction.h"
+#include "shader_recompiler/frontend/maxwell/location.h"
+#include "shader_recompiler/frontend/maxwell/opcodes.h"
+#include "shader_recompiler/object_pool.h"
+
+namespace Shader::Maxwell::Flow {
+
+struct Block;
+
+using FunctionId = size_t;
+
+enum class EndClass {
+ Branch,
+ IndirectBranch,
+ Call,
+ Exit,
+ Return,
+ Kill,
+};
+
+enum class Token {
+ SSY,
+ PBK,
+ PEXIT,
+ PRET,
+ PCNT,
+ PLONGJMP,
+};
+
+struct StackEntry {
+ auto operator<=>(const StackEntry&) const noexcept = default;
+
+ Token token;
+ Location target;
+};
+
+class Stack {
+public:
+ void Push(Token token, Location target);
+ [[nodiscard]] std::pair<Location, Stack> Pop(Token token) const;
+ [[nodiscard]] std::optional<Location> Peek(Token token) const;
+ [[nodiscard]] Stack Remove(Token token) const;
+
+private:
+ boost::container::small_vector<StackEntry, 3> entries;
+};
+
+struct IndirectBranch {
+ Block* block;
+ u32 address;
+};
+
+struct Block : boost::intrusive::set_base_hook<
+ // Normal link is ~2.5% faster compared to safe link
+ boost::intrusive::link_mode<boost::intrusive::normal_link>> {
+ [[nodiscard]] bool Contains(Location pc) const noexcept;
+
+ bool operator<(const Block& rhs) const noexcept {
+ return begin < rhs.begin;
+ }
+
+ Location begin;
+ Location end;
+ EndClass end_class{};
+ IR::Condition cond{};
+ Stack stack;
+ Block* branch_true{};
+ Block* branch_false{};
+ FunctionId function_call{};
+ Block* return_block{};
+ IR::Reg branch_reg{};
+ s32 branch_offset{};
+ std::vector<IndirectBranch> indirect_branches;
+};
+
+struct Label {
+ Location address;
+ Block* block;
+ Stack stack;
+};
+
+struct Function {
+ explicit Function(ObjectPool<Block>& block_pool, Location start_address);
+
+ Location entrypoint;
+ boost::container::small_vector<Label, 16> labels;
+ boost::intrusive::set<Block> blocks;
+};
+
+class CFG {
+ enum class AnalysisState {
+ Branch,
+ Continue,
+ };
+
+public:
+ explicit CFG(Environment& env, ObjectPool<Block>& block_pool, Location start_address,
+ bool exits_to_dispatcher = false);
+
+ CFG& operator=(const CFG&) = delete;
+ CFG(const CFG&) = delete;
+
+ CFG& operator=(CFG&&) = delete;
+ CFG(CFG&&) = delete;
+
+ [[nodiscard]] std::string Dot() const;
+
+ [[nodiscard]] std::span<const Function> Functions() const noexcept {
+ return std::span(functions.data(), functions.size());
+ }
+ [[nodiscard]] std::span<Function> Functions() noexcept {
+ return std::span(functions.data(), functions.size());
+ }
+
+ [[nodiscard]] bool ExitsToDispatcher() const {
+ return exits_to_dispatcher;
+ }
+
+private:
+ void AnalyzeLabel(FunctionId function_id, Label& label);
+
+ /// Inspect already visited blocks.
+ /// Return true when the block has already been visited
+ bool InspectVisitedBlocks(FunctionId function_id, const Label& label);
+
+ AnalysisState AnalyzeInst(Block* block, FunctionId function_id, Location pc);
+
+ void AnalyzeCondInst(Block* block, FunctionId function_id, Location pc, EndClass insn_end_class,
+ IR::Condition cond);
+
+ /// Return true when the branch instruction is confirmed to be a branch
+ bool AnalyzeBranch(Block* block, FunctionId function_id, Location pc, Instruction inst,
+ Opcode opcode);
+
+ void AnalyzeBRA(Block* block, FunctionId function_id, Location pc, Instruction inst,
+ bool is_absolute);
+ AnalysisState AnalyzeBRX(Block* block, Location pc, Instruction inst, bool is_absolute,
+ FunctionId function_id);
+ AnalysisState AnalyzeEXIT(Block* block, FunctionId function_id, Location pc, Instruction inst);
+
+ /// Return the branch target block id
+ Block* AddLabel(Block* block, Stack stack, Location pc, FunctionId function_id);
+
+ Environment& env;
+ ObjectPool<Block>& block_pool;
+ boost::container::small_vector<Function, 1> functions;
+ Location program_start;
+ bool exits_to_dispatcher{};
+ Block* dispatch_block{};
+};
+
+} // namespace Shader::Maxwell::Flow
diff --git a/src/shader_recompiler/frontend/maxwell/decode.cpp b/src/shader_recompiler/frontend/maxwell/decode.cpp
new file mode 100644
index 000000000..972f677dc
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/decode.cpp
@@ -0,0 +1,149 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <array>
+#include <bit>
+#include <memory>
+#include <string_view>
+
+#include "common/common_types.h"
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/maxwell/decode.h"
+#include "shader_recompiler/frontend/maxwell/opcodes.h"
+
+namespace Shader::Maxwell {
+namespace {
+struct MaskValue {
+ u64 mask;
+ u64 value;
+};
+
+constexpr MaskValue MaskValueFromEncoding(const char* encoding) {
+ u64 mask{};
+ u64 value{};
+ u64 bit{u64(1) << 63};
+ while (*encoding) {
+ switch (*encoding) {
+ case '0':
+ mask |= bit;
+ break;
+ case '1':
+ mask |= bit;
+ value |= bit;
+ break;
+ case '-':
+ break;
+ case ' ':
+ break;
+ default:
+ throw LogicError("Invalid encoding character '{}'", *encoding);
+ }
+ ++encoding;
+ if (*encoding != ' ') {
+ bit >>= 1;
+ }
+ }
+ return MaskValue{.mask = mask, .value = value};
+}
+
+struct InstEncoding {
+ MaskValue mask_value;
+ Opcode opcode;
+};
+constexpr std::array UNORDERED_ENCODINGS{
+#define INST(name, cute, encode) \
+ InstEncoding{ \
+ .mask_value{MaskValueFromEncoding(encode)}, \
+ .opcode = Opcode::name, \
+ },
+#include "maxwell.inc"
+#undef INST
+};
+
+constexpr auto SortedEncodings() {
+ std::array encodings{UNORDERED_ENCODINGS};
+ std::ranges::sort(encodings, [](const InstEncoding& lhs, const InstEncoding& rhs) {
+ return std::popcount(lhs.mask_value.mask) > std::popcount(rhs.mask_value.mask);
+ });
+ return encodings;
+}
+constexpr auto ENCODINGS{SortedEncodings()};
+
+constexpr int WidestLeftBits() {
+ int bits{64};
+ for (const InstEncoding& encoding : ENCODINGS) {
+ bits = std::min(bits, std::countr_zero(encoding.mask_value.mask));
+ }
+ return 64 - bits;
+}
+constexpr int WIDEST_LEFT_BITS{WidestLeftBits()};
+constexpr int MASK_SHIFT{64 - WIDEST_LEFT_BITS};
+
+constexpr size_t ToFastLookupIndex(u64 value) {
+ return static_cast<size_t>(value >> MASK_SHIFT);
+}
+
+constexpr size_t FastLookupSize() {
+ size_t max_width{};
+ for (const InstEncoding& encoding : ENCODINGS) {
+ max_width = std::max(max_width, ToFastLookupIndex(encoding.mask_value.mask));
+ }
+ return max_width + 1;
+}
+constexpr size_t FAST_LOOKUP_SIZE{FastLookupSize()};
+
+struct InstInfo {
+ [[nodiscard]] u64 Mask() const noexcept {
+ return static_cast<u64>(high_mask) << MASK_SHIFT;
+ }
+
+ [[nodiscard]] u64 Value() const noexcept {
+ return static_cast<u64>(high_value) << MASK_SHIFT;
+ }
+
+ u16 high_mask;
+ u16 high_value;
+ Opcode opcode;
+};
+
+constexpr auto MakeFastLookupTableIndex(size_t index) {
+ std::array<InstInfo, 2> encodings{};
+ size_t element{};
+ for (const auto& encoding : ENCODINGS) {
+ const size_t mask{ToFastLookupIndex(encoding.mask_value.mask)};
+ const size_t value{ToFastLookupIndex(encoding.mask_value.value)};
+ if ((index & mask) == value) {
+ encodings.at(element) = InstInfo{
+ .high_mask = static_cast<u16>(encoding.mask_value.mask >> MASK_SHIFT),
+ .high_value = static_cast<u16>(encoding.mask_value.value >> MASK_SHIFT),
+ .opcode = encoding.opcode,
+ };
+ ++element;
+ }
+ }
+ return encodings;
+}
+
+/*constexpr*/ auto MakeFastLookupTable() {
+ auto encodings{std::make_unique<std::array<std::array<InstInfo, 2>, FAST_LOOKUP_SIZE>>()};
+ for (size_t index = 0; index < FAST_LOOKUP_SIZE; ++index) {
+ (*encodings)[index] = MakeFastLookupTableIndex(index);
+ }
+ return encodings;
+}
+const auto FAST_LOOKUP_TABLE{MakeFastLookupTable()};
+} // Anonymous namespace
+
+Opcode Decode(u64 insn) {
+ const auto& table{(*FAST_LOOKUP_TABLE)[ToFastLookupIndex(insn)]};
+ const auto it{std::ranges::find_if(
+ table, [insn](const InstInfo& info) { return (insn & info.Mask()) == info.Value(); })};
+ if (it == table.end()) {
+ throw NotImplementedException("Instruction 0x{:016x} is unknown / unimplemented", insn);
+ }
+ return it->opcode;
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/decode.h b/src/shader_recompiler/frontend/maxwell/decode.h
new file mode 100644
index 000000000..b4f080fd7
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/decode.h
@@ -0,0 +1,14 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/opcodes.h"
+
+namespace Shader::Maxwell {
+
+[[nodiscard]] Opcode Decode(u64 insn);
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.cpp b/src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.cpp
new file mode 100644
index 000000000..008625cb3
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.cpp
@@ -0,0 +1,108 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <optional>
+
+#include "common/common_types.h"
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/maxwell/decode.h"
+#include "shader_recompiler/frontend/maxwell/indirect_branch_table_track.h"
+#include "shader_recompiler/frontend/maxwell/opcodes.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/load_constant.h"
+
+namespace Shader::Maxwell {
+namespace {
+union Encoding {
+ u64 raw;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> src_reg;
+ BitField<20, 19, u64> immediate;
+ BitField<56, 1, u64> is_negative;
+ BitField<20, 24, s64> brx_offset;
+};
+
+template <typename Callable>
+std::optional<u64> Track(Environment& env, Location block_begin, Location& pos, Callable&& func) {
+ while (pos >= block_begin) {
+ const u64 insn{env.ReadInstruction(pos.Offset())};
+ --pos;
+ if (func(insn, Decode(insn))) {
+ return insn;
+ }
+ }
+ return std::nullopt;
+}
+
+std::optional<u64> TrackLDC(Environment& env, Location block_begin, Location& pos,
+ IR::Reg brx_reg) {
+ return Track(env, block_begin, pos, [brx_reg](u64 insn, Opcode opcode) {
+ const LDC::Encoding ldc{insn};
+ return opcode == Opcode::LDC && ldc.dest_reg == brx_reg && ldc.size == LDC::Size::B32 &&
+ ldc.mode == LDC::Mode::Default;
+ });
+}
+
+std::optional<u64> TrackSHL(Environment& env, Location block_begin, Location& pos,
+ IR::Reg ldc_reg) {
+ return Track(env, block_begin, pos, [ldc_reg](u64 insn, Opcode opcode) {
+ const Encoding shl{insn};
+ return opcode == Opcode::SHL_imm && shl.dest_reg == ldc_reg;
+ });
+}
+
+std::optional<u64> TrackIMNMX(Environment& env, Location block_begin, Location& pos,
+ IR::Reg shl_reg) {
+ return Track(env, block_begin, pos, [shl_reg](u64 insn, Opcode opcode) {
+ const Encoding imnmx{insn};
+ return opcode == Opcode::IMNMX_imm && imnmx.dest_reg == shl_reg;
+ });
+}
+} // Anonymous namespace
+
+std::optional<IndirectBranchTableInfo> TrackIndirectBranchTable(Environment& env, Location brx_pos,
+ Location block_begin) {
+ const u64 brx_insn{env.ReadInstruction(brx_pos.Offset())};
+ const Opcode brx_opcode{Decode(brx_insn)};
+ if (brx_opcode != Opcode::BRX && brx_opcode != Opcode::JMX) {
+ throw LogicError("Tracked instruction is not BRX or JMX");
+ }
+ const IR::Reg brx_reg{Encoding{brx_insn}.src_reg};
+ const s32 brx_offset{static_cast<s32>(Encoding{brx_insn}.brx_offset)};
+
+ Location pos{brx_pos};
+ const std::optional<u64> ldc_insn{TrackLDC(env, block_begin, pos, brx_reg)};
+ if (!ldc_insn) {
+ return std::nullopt;
+ }
+ const LDC::Encoding ldc{*ldc_insn};
+ const u32 cbuf_index{static_cast<u32>(ldc.index)};
+ const u32 cbuf_offset{static_cast<u32>(static_cast<s32>(ldc.offset.Value()))};
+ const IR::Reg ldc_reg{ldc.src_reg};
+
+ const std::optional<u64> shl_insn{TrackSHL(env, block_begin, pos, ldc_reg)};
+ if (!shl_insn) {
+ return std::nullopt;
+ }
+ const Encoding shl{*shl_insn};
+ const IR::Reg shl_reg{shl.src_reg};
+
+ const std::optional<u64> imnmx_insn{TrackIMNMX(env, block_begin, pos, shl_reg)};
+ if (!imnmx_insn) {
+ return std::nullopt;
+ }
+ const Encoding imnmx{*imnmx_insn};
+ if (imnmx.is_negative != 0) {
+ return std::nullopt;
+ }
+ const u32 imnmx_immediate{static_cast<u32>(imnmx.immediate.Value())};
+ return IndirectBranchTableInfo{
+ .cbuf_index = cbuf_index,
+ .cbuf_offset = cbuf_offset,
+ .num_entries = imnmx_immediate + 1,
+ .branch_offset = brx_offset,
+ .branch_reg = brx_reg,
+ };
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.h b/src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.h
new file mode 100644
index 000000000..eee5102fa
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.h
@@ -0,0 +1,28 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <optional>
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/environment.h"
+#include "shader_recompiler/frontend/ir/reg.h"
+#include "shader_recompiler/frontend/maxwell/location.h"
+
+namespace Shader::Maxwell {
+
+struct IndirectBranchTableInfo {
+ u32 cbuf_index{};
+ u32 cbuf_offset{};
+ u32 num_entries{};
+ s32 branch_offset{};
+ IR::Reg branch_reg{};
+};
+
+std::optional<IndirectBranchTableInfo> TrackIndirectBranchTable(Environment& env, Location brx_pos,
+ Location block_begin);
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/instruction.h b/src/shader_recompiler/frontend/maxwell/instruction.h
new file mode 100644
index 000000000..743d68d61
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/instruction.h
@@ -0,0 +1,63 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/ir/flow_test.h"
+#include "shader_recompiler/frontend/ir/reg.h"
+
+namespace Shader::Maxwell {
+
+struct Predicate {
+ Predicate() = default;
+ Predicate(unsigned index_, bool negated_ = false) : index{index_}, negated{negated_} {}
+ Predicate(bool value) : index{7}, negated{!value} {}
+ Predicate(u64 raw) : index{static_cast<unsigned>(raw & 7)}, negated{(raw & 8) != 0} {}
+
+ unsigned index;
+ bool negated;
+};
+
+inline bool operator==(const Predicate& lhs, const Predicate& rhs) noexcept {
+ return lhs.index == rhs.index && lhs.negated == rhs.negated;
+}
+
+inline bool operator!=(const Predicate& lhs, const Predicate& rhs) noexcept {
+ return !(lhs == rhs);
+}
+
+union Instruction {
+ Instruction(u64 raw_) : raw{raw_} {}
+
+ u64 raw;
+
+ union {
+ BitField<5, 1, u64> is_cbuf;
+ BitField<0, 5, IR::FlowTest> flow_test;
+
+ [[nodiscard]] u32 Absolute() const noexcept {
+ return static_cast<u32>(absolute);
+ }
+
+ [[nodiscard]] s32 Offset() const noexcept {
+ return static_cast<s32>(offset);
+ }
+
+ private:
+ BitField<20, 24, s64> offset;
+ BitField<20, 32, u64> absolute;
+ } branch;
+
+ [[nodiscard]] Predicate Pred() const noexcept {
+ return Predicate{pred};
+ }
+
+private:
+ BitField<16, 4, u64> pred;
+};
+static_assert(std::is_trivially_copyable_v<Instruction>);
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/location.h b/src/shader_recompiler/frontend/maxwell/location.h
new file mode 100644
index 000000000..26d29eae2
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/location.h
@@ -0,0 +1,112 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <compare>
+#include <iterator>
+
+#include <fmt/format.h>
+
+#include "common/common_types.h"
+#include "shader_recompiler/exception.h"
+
+namespace Shader::Maxwell {
+
+class Location {
+ static constexpr u32 VIRTUAL_BIAS{4};
+
+public:
+ constexpr Location() = default;
+
+ constexpr Location(u32 initial_offset) : offset{initial_offset} {
+ if (initial_offset % 8 != 0) {
+ throw InvalidArgument("initial_offset={} is not a multiple of 8", initial_offset);
+ }
+ Align();
+ }
+
+ constexpr Location Virtual() const noexcept {
+ Location virtual_location;
+ virtual_location.offset = offset - VIRTUAL_BIAS;
+ return virtual_location;
+ }
+
+ [[nodiscard]] constexpr u32 Offset() const noexcept {
+ return offset;
+ }
+
+ [[nodiscard]] constexpr bool IsVirtual() const {
+ return offset % 8 == VIRTUAL_BIAS;
+ }
+
+ constexpr auto operator<=>(const Location&) const noexcept = default;
+
+ constexpr Location operator++() noexcept {
+ const Location copy{*this};
+ Step();
+ return copy;
+ }
+
+ constexpr Location operator++(int) noexcept {
+ Step();
+ return *this;
+ }
+
+ constexpr Location operator--() noexcept {
+ const Location copy{*this};
+ Back();
+ return copy;
+ }
+
+ constexpr Location operator--(int) noexcept {
+ Back();
+ return *this;
+ }
+
+ constexpr Location operator+(int number) const {
+ Location new_pc{*this};
+ while (number > 0) {
+ --number;
+ ++new_pc;
+ }
+ while (number < 0) {
+ ++number;
+ --new_pc;
+ }
+ return new_pc;
+ }
+
+ constexpr Location operator-(int number) const {
+ return operator+(-number);
+ }
+
+private:
+ constexpr void Align() {
+ offset += offset % 32 == 0 ? 8 : 0;
+ }
+
+ constexpr void Step() {
+ offset += 8 + (offset % 32 == 24 ? 8 : 0);
+ }
+
+ constexpr void Back() {
+ offset -= 8 + (offset % 32 == 8 ? 8 : 0);
+ }
+
+ u32 offset{0xcccccccc};
+};
+
+} // namespace Shader::Maxwell
+
+template <>
+struct fmt::formatter<Shader::Maxwell::Location> {
+ constexpr auto parse(format_parse_context& ctx) {
+ return ctx.begin();
+ }
+ template <typename FormatContext>
+ auto format(const Shader::Maxwell::Location& location, FormatContext& ctx) {
+ return fmt::format_to(ctx.out(), "{:04x}", location.Offset());
+ }
+};
diff --git a/src/shader_recompiler/frontend/maxwell/maxwell.inc b/src/shader_recompiler/frontend/maxwell/maxwell.inc
new file mode 100644
index 000000000..2fee591bb
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/maxwell.inc
@@ -0,0 +1,286 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+INST(AL2P, "AL2P", "1110 1111 1010 0---")
+INST(ALD, "ALD", "1110 1111 1101 1---")
+INST(AST, "AST", "1110 1111 1111 0---")
+INST(ATOM_cas, "ATOM (cas)", "1110 1110 1111 ----")
+INST(ATOM, "ATOM", "1110 1101 ---- ----")
+INST(ATOMS_cas, "ATOMS (cas)", "1110 1110 ---- ----")
+INST(ATOMS, "ATOMS", "1110 1100 ---- ----")
+INST(B2R, "B2R", "1111 0000 1011 1---")
+INST(BAR, "BAR", "1111 0000 1010 1---")
+INST(BFE_reg, "BFE (reg)", "0101 1100 0000 0---")
+INST(BFE_cbuf, "BFE (cbuf)", "0100 1100 0000 0---")
+INST(BFE_imm, "BFE (imm)", "0011 100- 0000 0---")
+INST(BFI_reg, "BFI (reg)", "0101 1011 1111 0---")
+INST(BFI_rc, "BFI (rc)", "0101 0011 1111 0---")
+INST(BFI_cr, "BFI (cr)", "0100 1011 1111 0---")
+INST(BFI_imm, "BFI (imm)", "0011 011- 1111 0---")
+INST(BPT, "BPT", "1110 0011 1010 ----")
+INST(BRA, "BRA", "1110 0010 0100 ----")
+INST(BRK, "BRK", "1110 0011 0100 ----")
+INST(BRX, "BRX", "1110 0010 0101 ----")
+INST(CAL, "CAL", "1110 0010 0110 ----")
+INST(CCTL, "CCTL", "1110 1111 011- ----")
+INST(CCTLL, "CCTLL", "1110 1111 100- ----")
+INST(CONT, "CONT", "1110 0011 0101 ----")
+INST(CS2R, "CS2R", "0101 0000 1100 1---")
+INST(CSET, "CSET", "0101 0000 1001 1---")
+INST(CSETP, "CSETP", "0101 0000 1010 0---")
+INST(DADD_reg, "DADD (reg)", "0101 1100 0111 0---")
+INST(DADD_cbuf, "DADD (cbuf)", "0100 1100 0111 0---")
+INST(DADD_imm, "DADD (imm)", "0011 100- 0111 0---")
+INST(DEPBAR, "DEPBAR", "1111 0000 1111 0---")
+INST(DFMA_reg, "DFMA (reg)", "0101 1011 0111 ----")
+INST(DFMA_rc, "DFMA (rc)", "0101 0011 0111 ----")
+INST(DFMA_cr, "DFMA (cr)", "0100 1011 0111 ----")
+INST(DFMA_imm, "DFMA (imm)", "0011 011- 0111 ----")
+INST(DMNMX_reg, "DMNMX (reg)", "0101 1100 0101 0---")
+INST(DMNMX_cbuf, "DMNMX (cbuf)", "0100 1100 0101 0---")
+INST(DMNMX_imm, "DMNMX (imm)", "0011 100- 0101 0---")
+INST(DMUL_reg, "DMUL (reg)", "0101 1100 1000 0---")
+INST(DMUL_cbuf, "DMUL (cbuf)", "0100 1100 1000 0---")
+INST(DMUL_imm, "DMUL (imm)", "0011 100- 1000 0---")
+INST(DSET_reg, "DSET (reg)", "0101 1001 0--- ----")
+INST(DSET_cbuf, "DSET (cbuf)", "0100 1001 0--- ----")
+INST(DSET_imm, "DSET (imm)", "0011 001- 0--- ----")
+INST(DSETP_reg, "DSETP (reg)", "0101 1011 1000 ----")
+INST(DSETP_cbuf, "DSETP (cbuf)", "0100 1011 1000 ----")
+INST(DSETP_imm, "DSETP (imm)", "0011 011- 1000 ----")
+INST(EXIT, "EXIT", "1110 0011 0000 ----")
+INST(F2F_reg, "F2F (reg)", "0101 1100 1010 1---")
+INST(F2F_cbuf, "F2F (cbuf)", "0100 1100 1010 1---")
+INST(F2F_imm, "F2F (imm)", "0011 100- 1010 1---")
+INST(F2I_reg, "F2I (reg)", "0101 1100 1011 0---")
+INST(F2I_cbuf, "F2I (cbuf)", "0100 1100 1011 0---")
+INST(F2I_imm, "F2I (imm)", "0011 100- 1011 0---")
+INST(FADD_reg, "FADD (reg)", "0101 1100 0101 1---")
+INST(FADD_cbuf, "FADD (cbuf)", "0100 1100 0101 1---")
+INST(FADD_imm, "FADD (imm)", "0011 100- 0101 1---")
+INST(FADD32I, "FADD32I", "0000 10-- ---- ----")
+INST(FCHK_reg, "FCHK (reg)", "0101 1100 1000 1---")
+INST(FCHK_cbuf, "FCHK (cbuf)", "0100 1100 1000 1---")
+INST(FCHK_imm, "FCHK (imm)", "0011 100- 1000 1---")
+INST(FCMP_reg, "FCMP (reg)", "0101 1011 1010 ----")
+INST(FCMP_rc, "FCMP (rc)", "0101 0011 1010 ----")
+INST(FCMP_cr, "FCMP (cr)", "0100 1011 1010 ----")
+INST(FCMP_imm, "FCMP (imm)", "0011 011- 1010 ----")
+INST(FFMA_reg, "FFMA (reg)", "0101 1001 1--- ----")
+INST(FFMA_rc, "FFMA (rc)", "0101 0001 1--- ----")
+INST(FFMA_cr, "FFMA (cr)", "0100 1001 1--- ----")
+INST(FFMA_imm, "FFMA (imm)", "0011 001- 1--- ----")
+INST(FFMA32I, "FFMA32I", "0000 11-- ---- ----")
+INST(FLO_reg, "FLO (reg)", "0101 1100 0011 0---")
+INST(FLO_cbuf, "FLO (cbuf)", "0100 1100 0011 0---")
+INST(FLO_imm, "FLO (imm)", "0011 100- 0011 0---")
+INST(FMNMX_reg, "FMNMX (reg)", "0101 1100 0110 0---")
+INST(FMNMX_cbuf, "FMNMX (cbuf)", "0100 1100 0110 0---")
+INST(FMNMX_imm, "FMNMX (imm)", "0011 100- 0110 0---")
+INST(FMUL_reg, "FMUL (reg)", "0101 1100 0110 1---")
+INST(FMUL_cbuf, "FMUL (cbuf)", "0100 1100 0110 1---")
+INST(FMUL_imm, "FMUL (imm)", "0011 100- 0110 1---")
+INST(FMUL32I, "FMUL32I", "0001 1110 ---- ----")
+INST(FSET_reg, "FSET (reg)", "0101 1000 ---- ----")
+INST(FSET_cbuf, "FSET (cbuf)", "0100 1000 ---- ----")
+INST(FSET_imm, "FSET (imm)", "0011 000- ---- ----")
+INST(FSETP_reg, "FSETP (reg)", "0101 1011 1011 ----")
+INST(FSETP_cbuf, "FSETP (cbuf)", "0100 1011 1011 ----")
+INST(FSETP_imm, "FSETP (imm)", "0011 011- 1011 ----")
+INST(FSWZADD, "FSWZADD", "0101 0000 1111 1---")
+INST(GETCRSPTR, "GETCRSPTR", "1110 0010 1100 ----")
+INST(GETLMEMBASE, "GETLMEMBASE", "1110 0010 1101 ----")
+INST(HADD2_reg, "HADD2 (reg)", "0101 1101 0001 0---")
+INST(HADD2_cbuf, "HADD2 (cbuf)", "0111 101- 1--- ----")
+INST(HADD2_imm, "HADD2 (imm)", "0111 101- 0--- ----")
+INST(HADD2_32I, "HADD2_32I", "0010 110- ---- ----")
+INST(HFMA2_reg, "HFMA2 (reg)", "0101 1101 0000 0---")
+INST(HFMA2_rc, "HFMA2 (rc)", "0110 0--- 1--- ----")
+INST(HFMA2_cr, "HFMA2 (cr)", "0111 0--- 1--- ----")
+INST(HFMA2_imm, "HFMA2 (imm)", "0111 0--- 0--- ----")
+INST(HFMA2_32I, "HFMA2_32I", "0010 100- ---- ----")
+INST(HMUL2_reg, "HMUL2 (reg)", "0101 1101 0000 1---")
+INST(HMUL2_cbuf, "HMUL2 (cbuf)", "0111 100- 1--- ----")
+INST(HMUL2_imm, "HMUL2 (imm)", "0111 100- 0--- ----")
+INST(HMUL2_32I, "HMUL2_32I", "0010 101- ---- ----")
+INST(HSET2_reg, "HSET2 (reg)", "0101 1101 0001 1---")
+INST(HSET2_cbuf, "HSET2 (cbuf)", "0111 110- 1--- ----")
+INST(HSET2_imm, "HSET2 (imm)", "0111 110- 0--- ----")
+INST(HSETP2_reg, "HSETP2 (reg)", "0101 1101 0010 0---")
+INST(HSETP2_cbuf, "HSETP2 (cbuf)", "0111 111- 1--- ----")
+INST(HSETP2_imm, "HSETP2 (imm)", "0111 111- 0--- ----")
+INST(I2F_reg, "I2F (reg)", "0101 1100 1011 1---")
+INST(I2F_cbuf, "I2F (cbuf)", "0100 1100 1011 1---")
+INST(I2F_imm, "I2F (imm)", "0011 100- 1011 1---")
+INST(I2I_reg, "I2I (reg)", "0101 1100 1110 0---")
+INST(I2I_cbuf, "I2I (cbuf)", "0100 1100 1110 0---")
+INST(I2I_imm, "I2I (imm)", "0011 100- 1110 0---")
+INST(IADD_reg, "IADD (reg)", "0101 1100 0001 0---")
+INST(IADD_cbuf, "IADD (cbuf)", "0100 1100 0001 0---")
+INST(IADD_imm, "IADD (imm)", "0011 100- 0001 0---")
+INST(IADD3_reg, "IADD3 (reg)", "0101 1100 1100 ----")
+INST(IADD3_cbuf, "IADD3 (cbuf)", "0100 1100 1100 ----")
+INST(IADD3_imm, "IADD3 (imm)", "0011 100- 1100 ----")
+INST(IADD32I, "IADD32I", "0001 110- ---- ----")
+INST(ICMP_reg, "ICMP (reg)", "0101 1011 0100 ----")
+INST(ICMP_rc, "ICMP (rc)", "0101 0011 0100 ----")
+INST(ICMP_cr, "ICMP (cr)", "0100 1011 0100 ----")
+INST(ICMP_imm, "ICMP (imm)", "0011 011- 0100 ----")
+INST(IDE, "IDE", "1110 0011 1001 ----")
+INST(IDP_reg, "IDP (reg)", "0101 0011 1111 1---")
+INST(IDP_imm, "IDP (imm)", "0101 0011 1101 1---")
+INST(IMAD_reg, "IMAD (reg)", "0101 1010 0--- ----")
+INST(IMAD_rc, "IMAD (rc)", "0101 0010 0--- ----")
+INST(IMAD_cr, "IMAD (cr)", "0100 1010 0--- ----")
+INST(IMAD_imm, "IMAD (imm)", "0011 010- 0--- ----")
+INST(IMAD32I, "IMAD32I", "1000 00-- ---- ----")
+INST(IMADSP_reg, "IMADSP (reg)", "0101 1010 1--- ----")
+INST(IMADSP_rc, "IMADSP (rc)", "0101 0010 1--- ----")
+INST(IMADSP_cr, "IMADSP (cr)", "0100 1010 1--- ----")
+INST(IMADSP_imm, "IMADSP (imm)", "0011 010- 1--- ----")
+INST(IMNMX_reg, "IMNMX (reg)", "0101 1100 0010 0---")
+INST(IMNMX_cbuf, "IMNMX (cbuf)", "0100 1100 0010 0---")
+INST(IMNMX_imm, "IMNMX (imm)", "0011 100- 0010 0---")
+INST(IMUL_reg, "IMUL (reg)", "0101 1100 0011 1---")
+INST(IMUL_cbuf, "IMUL (cbuf)", "0100 1100 0011 1---")
+INST(IMUL_imm, "IMUL (imm)", "0011 100- 0011 1---")
+INST(IMUL32I, "IMUL32I", "0001 1111 ---- ----")
+INST(IPA, "IPA", "1110 0000 ---- ----")
+INST(ISBERD, "ISBERD", "1110 1111 1101 0---")
+INST(ISCADD_reg, "ISCADD (reg)", "0101 1100 0001 1---")
+INST(ISCADD_cbuf, "ISCADD (cbuf)", "0100 1100 0001 1---")
+INST(ISCADD_imm, "ISCADD (imm)", "0011 100- 0001 1---")
+INST(ISCADD32I, "ISCADD32I", "0001 01-- ---- ----")
+INST(ISET_reg, "ISET (reg)", "0101 1011 0101 ----")
+INST(ISET_cbuf, "ISET (cbuf)", "0100 1011 0101 ----")
+INST(ISET_imm, "ISET (imm)", "0011 011- 0101 ----")
+INST(ISETP_reg, "ISETP (reg)", "0101 1011 0110 ----")
+INST(ISETP_cbuf, "ISETP (cbuf)", "0100 1011 0110 ----")
+INST(ISETP_imm, "ISETP (imm)", "0011 011- 0110 ----")
+INST(JCAL, "JCAL", "1110 0010 0010 ----")
+INST(JMP, "JMP", "1110 0010 0001 ----")
+INST(JMX, "JMX", "1110 0010 0000 ----")
+INST(KIL, "KIL", "1110 0011 0011 ----")
+INST(LD, "LD", "100- ---- ---- ----")
+INST(LDC, "LDC", "1110 1111 1001 0---")
+INST(LDG, "LDG", "1110 1110 1101 0---")
+INST(LDL, "LDL", "1110 1111 0100 0---")
+INST(LDS, "LDS", "1110 1111 0100 1---")
+INST(LEA_hi_reg, "LEA (hi reg)", "0101 1011 1101 1---")
+INST(LEA_hi_cbuf, "LEA (hi cbuf)", "0001 10-- ---- ----")
+INST(LEA_lo_reg, "LEA (lo reg)", "0101 1011 1101 0---")
+INST(LEA_lo_cbuf, "LEA (lo cbuf)", "0100 1011 1101 ----")
+INST(LEA_lo_imm, "LEA (lo imm)", "0011 011- 1101 0---")
+INST(LEPC, "LEPC", "0101 0000 1101 0---")
+INST(LONGJMP, "LONGJMP", "1110 0011 0001 ----")
+INST(LOP_reg, "LOP (reg)", "0101 1100 0100 0---")
+INST(LOP_cbuf, "LOP (cbuf)", "0100 1100 0100 0---")
+INST(LOP_imm, "LOP (imm)", "0011 100- 0100 0---")
+INST(LOP3_reg, "LOP3 (reg)", "0101 1011 1110 0---")
+INST(LOP3_cbuf, "LOP3 (cbuf)", "0000 001- ---- ----")
+INST(LOP3_imm, "LOP3 (imm)", "0011 11-- ---- ----")
+INST(LOP32I, "LOP32I", "0000 01-- ---- ----")
+INST(MEMBAR, "MEMBAR", "1110 1111 1001 1---")
+INST(MOV_reg, "MOV (reg)", "0101 1100 1001 1---")
+INST(MOV_cbuf, "MOV (cbuf)", "0100 1100 1001 1---")
+INST(MOV_imm, "MOV (imm)", "0011 100- 1001 1---")
+INST(MOV32I, "MOV32I", "0000 0001 0000 ----")
+INST(MUFU, "MUFU", "0101 0000 1000 0---")
+INST(NOP, "NOP", "0101 0000 1011 0---")
+INST(OUT_reg, "OUT (reg)", "1111 1011 1110 0---")
+INST(OUT_cbuf, "OUT (cbuf)", "1110 1011 1110 0---")
+INST(OUT_imm, "OUT (imm)", "1111 011- 1110 0---")
+INST(P2R_reg, "P2R (reg)", "0101 1100 1110 1---")
+INST(P2R_cbuf, "P2R (cbuf)", "0100 1100 1110 1---")
+INST(P2R_imm, "P2R (imm)", "0011 1000 1110 1---")
+INST(PBK, "PBK", "1110 0010 1010 ----")
+INST(PCNT, "PCNT", "1110 0010 1011 ----")
+INST(PEXIT, "PEXIT", "1110 0010 0011 ----")
+INST(PIXLD, "PIXLD", "1110 1111 1110 1---")
+INST(PLONGJMP, "PLONGJMP", "1110 0010 1000 ----")
+INST(POPC_reg, "POPC (reg)", "0101 1100 0000 1---")
+INST(POPC_cbuf, "POPC (cbuf)", "0100 1100 0000 1---")
+INST(POPC_imm, "POPC (imm)", "0011 100- 0000 1---")
+INST(PRET, "PRET", "1110 0010 0111 ----")
+INST(PRMT_reg, "PRMT (reg)", "0101 1011 1100 ----")
+INST(PRMT_rc, "PRMT (rc)", "0101 0011 1100 ----")
+INST(PRMT_cr, "PRMT (cr)", "0100 1011 1100 ----")
+INST(PRMT_imm, "PRMT (imm)", "0011 011- 1100 ----")
+INST(PSET, "PSET", "0101 0000 1000 1---")
+INST(PSETP, "PSETP", "0101 0000 1001 0---")
+INST(R2B, "R2B", "1111 0000 1100 0---")
+INST(R2P_reg, "R2P (reg)", "0101 1100 1111 0---")
+INST(R2P_cbuf, "R2P (cbuf)", "0100 1100 1111 0---")
+INST(R2P_imm, "R2P (imm)", "0011 100- 1111 0---")
+INST(RAM, "RAM", "1110 0011 1000 ----")
+INST(RED, "RED", "1110 1011 1111 1---")
+INST(RET, "RET", "1110 0011 0010 ----")
+INST(RRO_reg, "RRO (reg)", "0101 1100 1001 0---")
+INST(RRO_cbuf, "RRO (cbuf)", "0100 1100 1001 0---")
+INST(RRO_imm, "RRO (imm)", "0011 100- 1001 0---")
+INST(RTT, "RTT", "1110 0011 0110 ----")
+INST(S2R, "S2R", "1111 0000 1100 1---")
+INST(SAM, "SAM", "1110 0011 0111 ----")
+INST(SEL_reg, "SEL (reg)", "0101 1100 1010 0---")
+INST(SEL_cbuf, "SEL (cbuf)", "0100 1100 1010 0---")
+INST(SEL_imm, "SEL (imm)", "0011 100- 1010 0---")
+INST(SETCRSPTR, "SETCRSPTR", "1110 0010 1110 ----")
+INST(SETLMEMBASE, "SETLMEMBASE", "1110 0010 1111 ----")
+INST(SHF_l_reg, "SHF (l reg)", "0101 1011 1111 1---")
+INST(SHF_l_imm, "SHF (l imm)", "0011 011- 1111 1---")
+INST(SHF_r_reg, "SHF (r reg)", "0101 1100 1111 1---")
+INST(SHF_r_imm, "SHF (r imm)", "0011 100- 1111 1---")
+INST(SHFL, "SHFL", "1110 1111 0001 0---")
+INST(SHL_reg, "SHL (reg)", "0101 1100 0100 1---")
+INST(SHL_cbuf, "SHL (cbuf)", "0100 1100 0100 1---")
+INST(SHL_imm, "SHL (imm)", "0011 100- 0100 1---")
+INST(SHR_reg, "SHR (reg)", "0101 1100 0010 1---")
+INST(SHR_cbuf, "SHR (cbuf)", "0100 1100 0010 1---")
+INST(SHR_imm, "SHR (imm)", "0011 100- 0010 1---")
+INST(SSY, "SSY", "1110 0010 1001 ----")
+INST(ST, "ST", "101- ---- ---- ----")
+INST(STG, "STG", "1110 1110 1101 1---")
+INST(STL, "STL", "1110 1111 0101 0---")
+INST(STP, "STP", "1110 1110 1010 0---")
+INST(STS, "STS", "1110 1111 0101 1---")
+INST(SUATOM, "SUATOM", "1110 1010 0--- ----")
+INST(SUATOM_cas, "SUATOM_cas", "1110 1010 1--- ----")
+INST(SULD, "SULD", "1110 1011 000- ----")
+INST(SURED, "SURED", "1110 1011 010- ----")
+INST(SUST, "SUST", "1110 1011 001- ----")
+INST(SYNC, "SYNC", "1111 0000 1111 1---")
+INST(TEX, "TEX", "1100 0--- ---- ----")
+INST(TEX_b, "TEX (b)", "1101 1110 10-- ----")
+INST(TEXS, "TEXS", "1101 -00- ---- ----")
+INST(TLD, "TLD", "1101 1100 ---- ----")
+INST(TLD_b, "TLD (b)", "1101 1101 ---- ----")
+INST(TLD4, "TLD4", "1100 10-- ---- ----")
+INST(TLD4_b, "TLD4 (b)", "1101 1110 11-- ----")
+INST(TLD4S, "TLD4S", "1101 1111 -0-- ----")
+INST(TLDS, "TLDS", "1101 -01- ---- ----")
+INST(TMML, "TMML", "1101 1111 0101 1---")
+INST(TMML_b, "TMML (b)", "1101 1111 0110 0---")
+INST(TXA, "TXA", "1101 1111 0100 0---")
+INST(TXD, "TXD", "1101 1110 00-- ----")
+INST(TXD_b, "TXD (b)", "1101 1110 01-- ----")
+INST(TXQ, "TXQ", "1101 1111 0100 1---")
+INST(TXQ_b, "TXQ (b)", "1101 1111 0101 0---")
+INST(VABSDIFF, "VABSDIFF", "0101 0100 ---- ----")
+INST(VABSDIFF4, "VABSDIFF4", "0101 0000 0--- ----")
+INST(VADD, "VADD", "0010 00-- ---- ----")
+INST(VMAD, "VMAD", "0101 1111 ---- ----")
+INST(VMNMX, "VMNMX", "0011 101- ---- ----")
+INST(VOTE, "VOTE", "0101 0000 1101 1---")
+INST(VOTE_vtg, "VOTE (vtg)", "0101 0000 1110 0---")
+INST(VSET, "VSET", "0100 000- ---- ----")
+INST(VSETP, "VSETP", "0101 0000 1111 0---")
+INST(VSHL, "VSHL", "0101 0111 ---- ----")
+INST(VSHR, "VSHR", "0101 0110 ---- ----")
+INST(XMAD_reg, "XMAD (reg)", "0101 1011 00-- ----")
+INST(XMAD_rc, "XMAD (rc)", "0101 0001 0--- ----")
+INST(XMAD_cr, "XMAD (cr)", "0100 111- ---- ----")
+INST(XMAD_imm, "XMAD (imm)", "0011 011- 00-- ----")
+
+// Removed due to its weird formatting making fast tables larger
+// INST(CCTLT, "CCTLT", "1110 1011 1111 0--0")
diff --git a/src/shader_recompiler/frontend/maxwell/opcodes.cpp b/src/shader_recompiler/frontend/maxwell/opcodes.cpp
new file mode 100644
index 000000000..ccc40c20c
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/opcodes.cpp
@@ -0,0 +1,26 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <array>
+
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/maxwell/opcodes.h"
+
+namespace Shader::Maxwell {
+namespace {
+constexpr std::array NAME_TABLE{
+#define INST(name, cute, encode) cute,
+#include "maxwell.inc"
+#undef INST
+};
+} // Anonymous namespace
+
+const char* NameOf(Opcode opcode) {
+ if (static_cast<size_t>(opcode) >= NAME_TABLE.size()) {
+ throw InvalidArgument("Invalid opcode with raw value {}", static_cast<int>(opcode));
+ }
+ return NAME_TABLE[static_cast<size_t>(opcode)];
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/opcodes.h b/src/shader_recompiler/frontend/maxwell/opcodes.h
new file mode 100644
index 000000000..cd574f29d
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/opcodes.h
@@ -0,0 +1,30 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <fmt/format.h>
+
+namespace Shader::Maxwell {
+
+enum class Opcode {
+#define INST(name, cute, encode) name,
+#include "maxwell.inc"
+#undef INST
+};
+
+const char* NameOf(Opcode opcode);
+
+} // namespace Shader::Maxwell
+
+template <>
+struct fmt::formatter<Shader::Maxwell::Opcode> {
+ constexpr auto parse(format_parse_context& ctx) {
+ return ctx.begin();
+ }
+ template <typename FormatContext>
+ auto format(const Shader::Maxwell::Opcode& opcode, FormatContext& ctx) {
+ return format_to(ctx.out(), "{}", NameOf(opcode));
+ }
+};
diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp
new file mode 100644
index 000000000..8b3e0a15c
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp
@@ -0,0 +1,883 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <memory>
+#include <string>
+#include <unordered_map>
+#include <utility>
+#include <vector>
+#include <version>
+
+#include <fmt/format.h>
+
+#include <boost/intrusive/list.hpp>
+
+#include "shader_recompiler/environment.h"
+#include "shader_recompiler/frontend/ir/basic_block.h"
+#include "shader_recompiler/frontend/ir/ir_emitter.h"
+#include "shader_recompiler/frontend/maxwell/decode.h"
+#include "shader_recompiler/frontend/maxwell/structured_control_flow.h"
+#include "shader_recompiler/frontend/maxwell/translate/translate.h"
+#include "shader_recompiler/object_pool.h"
+
+namespace Shader::Maxwell {
+namespace {
+struct Statement;
+
+// Use normal_link because we are not guaranteed to destroy the tree in order
+using ListBaseHook =
+ boost::intrusive::list_base_hook<boost::intrusive::link_mode<boost::intrusive::normal_link>>;
+
+using Tree = boost::intrusive::list<Statement,
+ // Allow using Statement without a definition
+ boost::intrusive::base_hook<ListBaseHook>,
+ // Avoid linear complexity on splice, size is never called
+ boost::intrusive::constant_time_size<false>>;
+using Node = Tree::iterator;
+
+enum class StatementType {
+ Code,
+ Goto,
+ Label,
+ If,
+ Loop,
+ Break,
+ Return,
+ Kill,
+ Unreachable,
+ Function,
+ Identity,
+ Not,
+ Or,
+ SetVariable,
+ SetIndirectBranchVariable,
+ Variable,
+ IndirectBranchCond,
+};
+
+bool HasChildren(StatementType type) {
+ switch (type) {
+ case StatementType::If:
+ case StatementType::Loop:
+ case StatementType::Function:
+ return true;
+ default:
+ return false;
+ }
+}
+
+struct Goto {};
+struct Label {};
+struct If {};
+struct Loop {};
+struct Break {};
+struct Return {};
+struct Kill {};
+struct Unreachable {};
+struct FunctionTag {};
+struct Identity {};
+struct Not {};
+struct Or {};
+struct SetVariable {};
+struct SetIndirectBranchVariable {};
+struct Variable {};
+struct IndirectBranchCond {};
+
+#ifdef _MSC_VER
+#pragma warning(push)
+#pragma warning(disable : 26495) // Always initialize a member variable, expected in Statement
+#endif
+struct Statement : ListBaseHook {
+ Statement(const Flow::Block* block_, Statement* up_)
+ : block{block_}, up{up_}, type{StatementType::Code} {}
+ Statement(Goto, Statement* cond_, Node label_, Statement* up_)
+ : label{label_}, cond{cond_}, up{up_}, type{StatementType::Goto} {}
+ Statement(Label, u32 id_, Statement* up_) : id{id_}, up{up_}, type{StatementType::Label} {}
+ Statement(If, Statement* cond_, Tree&& children_, Statement* up_)
+ : children{std::move(children_)}, cond{cond_}, up{up_}, type{StatementType::If} {}
+ Statement(Loop, Statement* cond_, Tree&& children_, Statement* up_)
+ : children{std::move(children_)}, cond{cond_}, up{up_}, type{StatementType::Loop} {}
+ Statement(Break, Statement* cond_, Statement* up_)
+ : cond{cond_}, up{up_}, type{StatementType::Break} {}
+ Statement(Return, Statement* up_) : up{up_}, type{StatementType::Return} {}
+ Statement(Kill, Statement* up_) : up{up_}, type{StatementType::Kill} {}
+ Statement(Unreachable, Statement* up_) : up{up_}, type{StatementType::Unreachable} {}
+ Statement(FunctionTag) : children{}, type{StatementType::Function} {}
+ Statement(Identity, IR::Condition cond_, Statement* up_)
+ : guest_cond{cond_}, up{up_}, type{StatementType::Identity} {}
+ Statement(Not, Statement* op_, Statement* up_) : op{op_}, up{up_}, type{StatementType::Not} {}
+ Statement(Or, Statement* op_a_, Statement* op_b_, Statement* up_)
+ : op_a{op_a_}, op_b{op_b_}, up{up_}, type{StatementType::Or} {}
+ Statement(SetVariable, u32 id_, Statement* op_, Statement* up_)
+ : op{op_}, id{id_}, up{up_}, type{StatementType::SetVariable} {}
+ Statement(SetIndirectBranchVariable, IR::Reg branch_reg_, s32 branch_offset_, Statement* up_)
+ : branch_offset{branch_offset_},
+ branch_reg{branch_reg_}, up{up_}, type{StatementType::SetIndirectBranchVariable} {}
+ Statement(Variable, u32 id_, Statement* up_)
+ : id{id_}, up{up_}, type{StatementType::Variable} {}
+ Statement(IndirectBranchCond, u32 location_, Statement* up_)
+ : location{location_}, up{up_}, type{StatementType::IndirectBranchCond} {}
+
+ ~Statement() {
+ if (HasChildren(type)) {
+ std::destroy_at(&children);
+ }
+ }
+
+ union {
+ const Flow::Block* block;
+ Node label;
+ Tree children;
+ IR::Condition guest_cond;
+ Statement* op;
+ Statement* op_a;
+ u32 location;
+ s32 branch_offset;
+ };
+ union {
+ Statement* cond;
+ Statement* op_b;
+ u32 id;
+ IR::Reg branch_reg;
+ };
+ Statement* up{};
+ StatementType type;
+};
+#ifdef _MSC_VER
+#pragma warning(pop)
+#endif
+
+std::string DumpExpr(const Statement* stmt) {
+ switch (stmt->type) {
+ case StatementType::Identity:
+ return fmt::format("{}", stmt->guest_cond);
+ case StatementType::Not:
+ return fmt::format("!{}", DumpExpr(stmt->op));
+ case StatementType::Or:
+ return fmt::format("{} || {}", DumpExpr(stmt->op_a), DumpExpr(stmt->op_b));
+ case StatementType::Variable:
+ return fmt::format("goto_L{}", stmt->id);
+ case StatementType::IndirectBranchCond:
+ return fmt::format("(indirect_branch == {:x})", stmt->location);
+ default:
+ return "<invalid type>";
+ }
+}
+
+[[maybe_unused]] std::string DumpTree(const Tree& tree, u32 indentation = 0) {
+ std::string ret;
+ std::string indent(indentation, ' ');
+ for (auto stmt = tree.begin(); stmt != tree.end(); ++stmt) {
+ switch (stmt->type) {
+ case StatementType::Code:
+ ret += fmt::format("{} Block {:04x} -> {:04x} (0x{:016x});\n", indent,
+ stmt->block->begin.Offset(), stmt->block->end.Offset(),
+ reinterpret_cast<uintptr_t>(stmt->block));
+ break;
+ case StatementType::Goto:
+ ret += fmt::format("{} if ({}) goto L{};\n", indent, DumpExpr(stmt->cond),
+ stmt->label->id);
+ break;
+ case StatementType::Label:
+ ret += fmt::format("{}L{}:\n", indent, stmt->id);
+ break;
+ case StatementType::If:
+ ret += fmt::format("{} if ({}) {{\n", indent, DumpExpr(stmt->cond));
+ ret += DumpTree(stmt->children, indentation + 4);
+ ret += fmt::format("{} }}\n", indent);
+ break;
+ case StatementType::Loop:
+ ret += fmt::format("{} do {{\n", indent);
+ ret += DumpTree(stmt->children, indentation + 4);
+ ret += fmt::format("{} }} while ({});\n", indent, DumpExpr(stmt->cond));
+ break;
+ case StatementType::Break:
+ ret += fmt::format("{} if ({}) break;\n", indent, DumpExpr(stmt->cond));
+ break;
+ case StatementType::Return:
+ ret += fmt::format("{} return;\n", indent);
+ break;
+ case StatementType::Kill:
+ ret += fmt::format("{} kill;\n", indent);
+ break;
+ case StatementType::Unreachable:
+ ret += fmt::format("{} unreachable;\n", indent);
+ break;
+ case StatementType::SetVariable:
+ ret += fmt::format("{} goto_L{} = {};\n", indent, stmt->id, DumpExpr(stmt->op));
+ break;
+ case StatementType::SetIndirectBranchVariable:
+ ret += fmt::format("{} indirect_branch = {} + {};\n", indent, stmt->branch_reg,
+ stmt->branch_offset);
+ break;
+ case StatementType::Function:
+ case StatementType::Identity:
+ case StatementType::Not:
+ case StatementType::Or:
+ case StatementType::Variable:
+ case StatementType::IndirectBranchCond:
+ throw LogicError("Statement can't be printed");
+ }
+ }
+ return ret;
+}
+
+void SanitizeNoBreaks(const Tree& tree) {
+ if (std::ranges::find(tree, StatementType::Break, &Statement::type) != tree.end()) {
+ throw NotImplementedException("Capturing statement with break nodes");
+ }
+}
+
+size_t Level(Node stmt) {
+ size_t level{0};
+ Statement* node{stmt->up};
+ while (node) {
+ ++level;
+ node = node->up;
+ }
+ return level;
+}
+
+bool IsDirectlyRelated(Node goto_stmt, Node label_stmt) {
+ const size_t goto_level{Level(goto_stmt)};
+ const size_t label_level{Level(label_stmt)};
+ size_t min_level;
+ size_t max_level;
+ Node min;
+ Node max;
+ if (label_level < goto_level) {
+ min_level = label_level;
+ max_level = goto_level;
+ min = label_stmt;
+ max = goto_stmt;
+ } else { // goto_level < label_level
+ min_level = goto_level;
+ max_level = label_level;
+ min = goto_stmt;
+ max = label_stmt;
+ }
+ while (max_level > min_level) {
+ --max_level;
+ max = max->up;
+ }
+ return min->up == max->up;
+}
+
+bool IsIndirectlyRelated(Node goto_stmt, Node label_stmt) {
+ return goto_stmt->up != label_stmt->up && !IsDirectlyRelated(goto_stmt, label_stmt);
+}
+
+[[maybe_unused]] bool AreSiblings(Node goto_stmt, Node label_stmt) noexcept {
+ Node it{goto_stmt};
+ do {
+ if (it == label_stmt) {
+ return true;
+ }
+ --it;
+ } while (it != goto_stmt->up->children.begin());
+ while (it != goto_stmt->up->children.end()) {
+ if (it == label_stmt) {
+ return true;
+ }
+ ++it;
+ }
+ return false;
+}
+
+Node SiblingFromNephew(Node uncle, Node nephew) noexcept {
+ Statement* const parent{uncle->up};
+ Statement* it{&*nephew};
+ while (it->up != parent) {
+ it = it->up;
+ }
+ return Tree::s_iterator_to(*it);
+}
+
+bool AreOrdered(Node left_sibling, Node right_sibling) noexcept {
+ const Node end{right_sibling->up->children.end()};
+ for (auto it = right_sibling; it != end; ++it) {
+ if (it == left_sibling) {
+ return false;
+ }
+ }
+ return true;
+}
+
+bool NeedsLift(Node goto_stmt, Node label_stmt) noexcept {
+ const Node sibling{SiblingFromNephew(goto_stmt, label_stmt)};
+ return AreOrdered(sibling, goto_stmt);
+}
+
+class GotoPass {
+public:
+ explicit GotoPass(Flow::CFG& cfg, ObjectPool<Statement>& stmt_pool) : pool{stmt_pool} {
+ std::vector gotos{BuildTree(cfg)};
+ const auto end{gotos.rend()};
+ for (auto goto_stmt = gotos.rbegin(); goto_stmt != end; ++goto_stmt) {
+ RemoveGoto(*goto_stmt);
+ }
+ }
+
+ Statement& RootStatement() noexcept {
+ return root_stmt;
+ }
+
+private:
+ void RemoveGoto(Node goto_stmt) {
+ // Force goto_stmt and label_stmt to be directly related
+ const Node label_stmt{goto_stmt->label};
+ if (IsIndirectlyRelated(goto_stmt, label_stmt)) {
+ // Move goto_stmt out using outward-movement transformation until it becomes
+ // directly related to label_stmt
+ while (!IsDirectlyRelated(goto_stmt, label_stmt)) {
+ goto_stmt = MoveOutward(goto_stmt);
+ }
+ }
+ // Force goto_stmt and label_stmt to be siblings
+ if (IsDirectlyRelated(goto_stmt, label_stmt)) {
+ const size_t label_level{Level(label_stmt)};
+ size_t goto_level{Level(goto_stmt)};
+ if (goto_level > label_level) {
+ // Move goto_stmt out of its level using outward-movement transformations
+ while (goto_level > label_level) {
+ goto_stmt = MoveOutward(goto_stmt);
+ --goto_level;
+ }
+ } else { // Level(goto_stmt) < Level(label_stmt)
+ if (NeedsLift(goto_stmt, label_stmt)) {
+ // Lift goto_stmt to above stmt containing label_stmt using goto-lifting
+ // transformations
+ goto_stmt = Lift(goto_stmt);
+ }
+ // Move goto_stmt into label_stmt's level using inward-movement transformation
+ while (goto_level < label_level) {
+ goto_stmt = MoveInward(goto_stmt);
+ ++goto_level;
+ }
+ }
+ }
+ // Expensive operation:
+ // if (!AreSiblings(goto_stmt, label_stmt)) {
+ // throw LogicError("Goto is not a sibling with the label");
+ // }
+ // goto_stmt and label_stmt are guaranteed to be siblings, eliminate
+ if (std::next(goto_stmt) == label_stmt) {
+ // Simply eliminate the goto if the label is next to it
+ goto_stmt->up->children.erase(goto_stmt);
+ } else if (AreOrdered(goto_stmt, label_stmt)) {
+ // Eliminate goto_stmt with a conditional
+ EliminateAsConditional(goto_stmt, label_stmt);
+ } else {
+ // Eliminate goto_stmt with a loop
+ EliminateAsLoop(goto_stmt, label_stmt);
+ }
+ }
+
+ std::vector<Node> BuildTree(Flow::CFG& cfg) {
+ u32 label_id{0};
+ std::vector<Node> gotos;
+ Flow::Function& first_function{cfg.Functions().front()};
+ BuildTree(cfg, first_function, label_id, gotos, root_stmt.children.end(), std::nullopt);
+ return gotos;
+ }
+
+ void BuildTree(Flow::CFG& cfg, Flow::Function& function, u32& label_id,
+ std::vector<Node>& gotos, Node function_insert_point,
+ std::optional<Node> return_label) {
+ Statement* const false_stmt{pool.Create(Identity{}, IR::Condition{false}, &root_stmt)};
+ Tree& root{root_stmt.children};
+ std::unordered_map<Flow::Block*, Node> local_labels;
+ local_labels.reserve(function.blocks.size());
+
+ for (Flow::Block& block : function.blocks) {
+ Statement* const label{pool.Create(Label{}, label_id, &root_stmt)};
+ const Node label_it{root.insert(function_insert_point, *label)};
+ local_labels.emplace(&block, label_it);
+ ++label_id;
+ }
+ for (Flow::Block& block : function.blocks) {
+ const Node label{local_labels.at(&block)};
+ // Insertion point
+ const Node ip{std::next(label)};
+
+ // Reset goto variables before the first block and after its respective label
+ const auto make_reset_variable{[&]() -> Statement& {
+ return *pool.Create(SetVariable{}, label->id, false_stmt, &root_stmt);
+ }};
+ root.push_front(make_reset_variable());
+ root.insert(ip, make_reset_variable());
+ root.insert(ip, *pool.Create(&block, &root_stmt));
+
+ switch (block.end_class) {
+ case Flow::EndClass::Branch: {
+ Statement* const always_cond{
+ pool.Create(Identity{}, IR::Condition{true}, &root_stmt)};
+ if (block.cond == IR::Condition{true}) {
+ const Node true_label{local_labels.at(block.branch_true)};
+ gotos.push_back(
+ root.insert(ip, *pool.Create(Goto{}, always_cond, true_label, &root_stmt)));
+ } else if (block.cond == IR::Condition{false}) {
+ const Node false_label{local_labels.at(block.branch_false)};
+ gotos.push_back(root.insert(
+ ip, *pool.Create(Goto{}, always_cond, false_label, &root_stmt)));
+ } else {
+ const Node true_label{local_labels.at(block.branch_true)};
+ const Node false_label{local_labels.at(block.branch_false)};
+ Statement* const true_cond{pool.Create(Identity{}, block.cond, &root_stmt)};
+ gotos.push_back(
+ root.insert(ip, *pool.Create(Goto{}, true_cond, true_label, &root_stmt)));
+ gotos.push_back(root.insert(
+ ip, *pool.Create(Goto{}, always_cond, false_label, &root_stmt)));
+ }
+ break;
+ }
+ case Flow::EndClass::IndirectBranch:
+ root.insert(ip, *pool.Create(SetIndirectBranchVariable{}, block.branch_reg,
+ block.branch_offset, &root_stmt));
+ for (const Flow::IndirectBranch& indirect : block.indirect_branches) {
+ const Node indirect_label{local_labels.at(indirect.block)};
+ Statement* cond{
+ pool.Create(IndirectBranchCond{}, indirect.address, &root_stmt)};
+ Statement* goto_stmt{pool.Create(Goto{}, cond, indirect_label, &root_stmt)};
+ gotos.push_back(root.insert(ip, *goto_stmt));
+ }
+ root.insert(ip, *pool.Create(Unreachable{}, &root_stmt));
+ break;
+ case Flow::EndClass::Call: {
+ Flow::Function& call{cfg.Functions()[block.function_call]};
+ const Node call_return_label{local_labels.at(block.return_block)};
+ BuildTree(cfg, call, label_id, gotos, ip, call_return_label);
+ break;
+ }
+ case Flow::EndClass::Exit:
+ root.insert(ip, *pool.Create(Return{}, &root_stmt));
+ break;
+ case Flow::EndClass::Return: {
+ Statement* const always_cond{pool.Create(Identity{}, block.cond, &root_stmt)};
+ auto goto_stmt{pool.Create(Goto{}, always_cond, return_label.value(), &root_stmt)};
+ gotos.push_back(root.insert(ip, *goto_stmt));
+ break;
+ }
+ case Flow::EndClass::Kill:
+ root.insert(ip, *pool.Create(Kill{}, &root_stmt));
+ break;
+ }
+ }
+ }
+
+ void UpdateTreeUp(Statement* tree) {
+ for (Statement& stmt : tree->children) {
+ stmt.up = tree;
+ }
+ }
+
+ void EliminateAsConditional(Node goto_stmt, Node label_stmt) {
+ Tree& body{goto_stmt->up->children};
+ Tree if_body;
+ if_body.splice(if_body.begin(), body, std::next(goto_stmt), label_stmt);
+ Statement* const cond{pool.Create(Not{}, goto_stmt->cond, &root_stmt)};
+ Statement* const if_stmt{pool.Create(If{}, cond, std::move(if_body), goto_stmt->up)};
+ UpdateTreeUp(if_stmt);
+ body.insert(goto_stmt, *if_stmt);
+ body.erase(goto_stmt);
+ }
+
+ void EliminateAsLoop(Node goto_stmt, Node label_stmt) {
+ Tree& body{goto_stmt->up->children};
+ Tree loop_body;
+ loop_body.splice(loop_body.begin(), body, label_stmt, goto_stmt);
+ Statement* const cond{goto_stmt->cond};
+ Statement* const loop{pool.Create(Loop{}, cond, std::move(loop_body), goto_stmt->up)};
+ UpdateTreeUp(loop);
+ body.insert(goto_stmt, *loop);
+ body.erase(goto_stmt);
+ }
+
+ [[nodiscard]] Node MoveOutward(Node goto_stmt) {
+ switch (goto_stmt->up->type) {
+ case StatementType::If:
+ return MoveOutwardIf(goto_stmt);
+ case StatementType::Loop:
+ return MoveOutwardLoop(goto_stmt);
+ default:
+ throw LogicError("Invalid outward movement");
+ }
+ }
+
+ [[nodiscard]] Node MoveInward(Node goto_stmt) {
+ Statement* const parent{goto_stmt->up};
+ Tree& body{parent->children};
+ const Node label{goto_stmt->label};
+ const Node label_nested_stmt{SiblingFromNephew(goto_stmt, label)};
+ const u32 label_id{label->id};
+
+ Statement* const goto_cond{goto_stmt->cond};
+ Statement* const set_var{pool.Create(SetVariable{}, label_id, goto_cond, parent)};
+ body.insert(goto_stmt, *set_var);
+
+ Tree if_body;
+ if_body.splice(if_body.begin(), body, std::next(goto_stmt), label_nested_stmt);
+ Statement* const variable{pool.Create(Variable{}, label_id, &root_stmt)};
+ Statement* const neg_var{pool.Create(Not{}, variable, &root_stmt)};
+ if (!if_body.empty()) {
+ Statement* const if_stmt{pool.Create(If{}, neg_var, std::move(if_body), parent)};
+ UpdateTreeUp(if_stmt);
+ body.insert(goto_stmt, *if_stmt);
+ }
+ body.erase(goto_stmt);
+
+ switch (label_nested_stmt->type) {
+ case StatementType::If:
+ // Update nested if condition
+ label_nested_stmt->cond =
+ pool.Create(Or{}, variable, label_nested_stmt->cond, &root_stmt);
+ break;
+ case StatementType::Loop:
+ break;
+ default:
+ throw LogicError("Invalid inward movement");
+ }
+ Tree& nested_tree{label_nested_stmt->children};
+ Statement* const new_goto{pool.Create(Goto{}, variable, label, &*label_nested_stmt)};
+ return nested_tree.insert(nested_tree.begin(), *new_goto);
+ }
+
+ [[nodiscard]] Node Lift(Node goto_stmt) {
+ Statement* const parent{goto_stmt->up};
+ Tree& body{parent->children};
+ const Node label{goto_stmt->label};
+ const u32 label_id{label->id};
+ const Node label_nested_stmt{SiblingFromNephew(goto_stmt, label)};
+
+ Tree loop_body;
+ loop_body.splice(loop_body.begin(), body, label_nested_stmt, goto_stmt);
+ SanitizeNoBreaks(loop_body);
+ Statement* const variable{pool.Create(Variable{}, label_id, &root_stmt)};
+ Statement* const loop_stmt{pool.Create(Loop{}, variable, std::move(loop_body), parent)};
+ UpdateTreeUp(loop_stmt);
+ body.insert(goto_stmt, *loop_stmt);
+
+ Statement* const new_goto{pool.Create(Goto{}, variable, label, loop_stmt)};
+ loop_stmt->children.push_front(*new_goto);
+ const Node new_goto_node{loop_stmt->children.begin()};
+
+ Statement* const set_var{pool.Create(SetVariable{}, label_id, goto_stmt->cond, loop_stmt)};
+ loop_stmt->children.push_back(*set_var);
+
+ body.erase(goto_stmt);
+ return new_goto_node;
+ }
+
+ Node MoveOutwardIf(Node goto_stmt) {
+ const Node parent{Tree::s_iterator_to(*goto_stmt->up)};
+ Tree& body{parent->children};
+ const u32 label_id{goto_stmt->label->id};
+ Statement* const goto_cond{goto_stmt->cond};
+ Statement* const set_goto_var{pool.Create(SetVariable{}, label_id, goto_cond, &*parent)};
+ body.insert(goto_stmt, *set_goto_var);
+
+ Tree if_body;
+ if_body.splice(if_body.begin(), body, std::next(goto_stmt), body.end());
+ if_body.pop_front();
+ Statement* const cond{pool.Create(Variable{}, label_id, &root_stmt)};
+ Statement* const neg_cond{pool.Create(Not{}, cond, &root_stmt)};
+ Statement* const if_stmt{pool.Create(If{}, neg_cond, std::move(if_body), &*parent)};
+ UpdateTreeUp(if_stmt);
+ body.insert(goto_stmt, *if_stmt);
+
+ body.erase(goto_stmt);
+
+ Statement* const new_cond{pool.Create(Variable{}, label_id, &root_stmt)};
+ Statement* const new_goto{pool.Create(Goto{}, new_cond, goto_stmt->label, parent->up)};
+ Tree& parent_tree{parent->up->children};
+ return parent_tree.insert(std::next(parent), *new_goto);
+ }
+
+ Node MoveOutwardLoop(Node goto_stmt) {
+ Statement* const parent{goto_stmt->up};
+ Tree& body{parent->children};
+ const u32 label_id{goto_stmt->label->id};
+ Statement* const goto_cond{goto_stmt->cond};
+ Statement* const set_goto_var{pool.Create(SetVariable{}, label_id, goto_cond, parent)};
+ Statement* const cond{pool.Create(Variable{}, label_id, &root_stmt)};
+ Statement* const break_stmt{pool.Create(Break{}, cond, parent)};
+ body.insert(goto_stmt, *set_goto_var);
+ body.insert(goto_stmt, *break_stmt);
+ body.erase(goto_stmt);
+
+ const Node loop{Tree::s_iterator_to(*goto_stmt->up)};
+ Statement* const new_goto_cond{pool.Create(Variable{}, label_id, &root_stmt)};
+ Statement* const new_goto{pool.Create(Goto{}, new_goto_cond, goto_stmt->label, loop->up)};
+ Tree& parent_tree{loop->up->children};
+ return parent_tree.insert(std::next(loop), *new_goto);
+ }
+
+ ObjectPool<Statement>& pool;
+ Statement root_stmt{FunctionTag{}};
+};
+
+[[nodiscard]] Statement* TryFindForwardBlock(Statement& stmt) {
+ Tree& tree{stmt.up->children};
+ const Node end{tree.end()};
+ Node forward_node{std::next(Tree::s_iterator_to(stmt))};
+ while (forward_node != end && !HasChildren(forward_node->type)) {
+ if (forward_node->type == StatementType::Code) {
+ return &*forward_node;
+ }
+ ++forward_node;
+ }
+ return nullptr;
+}
+
+[[nodiscard]] IR::U1 VisitExpr(IR::IREmitter& ir, const Statement& stmt) {
+ switch (stmt.type) {
+ case StatementType::Identity:
+ return ir.Condition(stmt.guest_cond);
+ case StatementType::Not:
+ return ir.LogicalNot(IR::U1{VisitExpr(ir, *stmt.op)});
+ case StatementType::Or:
+ return ir.LogicalOr(VisitExpr(ir, *stmt.op_a), VisitExpr(ir, *stmt.op_b));
+ case StatementType::Variable:
+ return ir.GetGotoVariable(stmt.id);
+ case StatementType::IndirectBranchCond:
+ return ir.IEqual(ir.GetIndirectBranchVariable(), ir.Imm32(stmt.location));
+ default:
+ throw NotImplementedException("Statement type {}", stmt.type);
+ }
+}
+
+class TranslatePass {
+public:
+ TranslatePass(ObjectPool<IR::Inst>& inst_pool_, ObjectPool<IR::Block>& block_pool_,
+ ObjectPool<Statement>& stmt_pool_, Environment& env_, Statement& root_stmt,
+ IR::AbstractSyntaxList& syntax_list_)
+ : stmt_pool{stmt_pool_}, inst_pool{inst_pool_}, block_pool{block_pool_}, env{env_},
+ syntax_list{syntax_list_} {
+ Visit(root_stmt, nullptr, nullptr);
+
+ IR::Block& first_block{*syntax_list.front().data.block};
+ IR::IREmitter ir(first_block, first_block.begin());
+ ir.Prologue();
+ }
+
+private:
+ void Visit(Statement& parent, IR::Block* break_block, IR::Block* fallthrough_block) {
+ IR::Block* current_block{};
+ const auto ensure_block{[&] {
+ if (current_block) {
+ return;
+ }
+ current_block = block_pool.Create(inst_pool);
+ auto& node{syntax_list.emplace_back()};
+ node.type = IR::AbstractSyntaxNode::Type::Block;
+ node.data.block = current_block;
+ }};
+ Tree& tree{parent.children};
+ for (auto it = tree.begin(); it != tree.end(); ++it) {
+ Statement& stmt{*it};
+ switch (stmt.type) {
+ case StatementType::Label:
+ // Labels can be ignored
+ break;
+ case StatementType::Code: {
+ ensure_block();
+ Translate(env, current_block, stmt.block->begin.Offset(), stmt.block->end.Offset());
+ break;
+ }
+ case StatementType::SetVariable: {
+ ensure_block();
+ IR::IREmitter ir{*current_block};
+ ir.SetGotoVariable(stmt.id, VisitExpr(ir, *stmt.op));
+ break;
+ }
+ case StatementType::SetIndirectBranchVariable: {
+ ensure_block();
+ IR::IREmitter ir{*current_block};
+ IR::U32 address{ir.IAdd(ir.GetReg(stmt.branch_reg), ir.Imm32(stmt.branch_offset))};
+ ir.SetIndirectBranchVariable(address);
+ break;
+ }
+ case StatementType::If: {
+ ensure_block();
+ IR::Block* const merge_block{MergeBlock(parent, stmt)};
+
+ // Implement if header block
+ IR::IREmitter ir{*current_block};
+ const IR::U1 cond{ir.ConditionRef(VisitExpr(ir, *stmt.cond))};
+
+ const size_t if_node_index{syntax_list.size()};
+ syntax_list.emplace_back();
+
+ // Visit children
+ const size_t then_block_index{syntax_list.size()};
+ Visit(stmt, break_block, merge_block);
+
+ IR::Block* const then_block{syntax_list.at(then_block_index).data.block};
+ current_block->AddBranch(then_block);
+ current_block->AddBranch(merge_block);
+ current_block = merge_block;
+
+ auto& if_node{syntax_list[if_node_index]};
+ if_node.type = IR::AbstractSyntaxNode::Type::If;
+ if_node.data.if_node.cond = cond;
+ if_node.data.if_node.body = then_block;
+ if_node.data.if_node.merge = merge_block;
+
+ auto& endif_node{syntax_list.emplace_back()};
+ endif_node.type = IR::AbstractSyntaxNode::Type::EndIf;
+ endif_node.data.end_if.merge = merge_block;
+
+ auto& merge{syntax_list.emplace_back()};
+ merge.type = IR::AbstractSyntaxNode::Type::Block;
+ merge.data.block = merge_block;
+ break;
+ }
+ case StatementType::Loop: {
+ IR::Block* const loop_header_block{block_pool.Create(inst_pool)};
+ if (current_block) {
+ current_block->AddBranch(loop_header_block);
+ }
+ auto& header_node{syntax_list.emplace_back()};
+ header_node.type = IR::AbstractSyntaxNode::Type::Block;
+ header_node.data.block = loop_header_block;
+
+ IR::Block* const continue_block{block_pool.Create(inst_pool)};
+ IR::Block* const merge_block{MergeBlock(parent, stmt)};
+
+ const size_t loop_node_index{syntax_list.size()};
+ syntax_list.emplace_back();
+
+ // Visit children
+ const size_t body_block_index{syntax_list.size()};
+ Visit(stmt, merge_block, continue_block);
+
+ // The continue block is located at the end of the loop
+ IR::IREmitter ir{*continue_block};
+ const IR::U1 cond{ir.ConditionRef(VisitExpr(ir, *stmt.cond))};
+
+ IR::Block* const body_block{syntax_list.at(body_block_index).data.block};
+ loop_header_block->AddBranch(body_block);
+
+ continue_block->AddBranch(loop_header_block);
+ continue_block->AddBranch(merge_block);
+
+ current_block = merge_block;
+
+ auto& loop{syntax_list[loop_node_index]};
+ loop.type = IR::AbstractSyntaxNode::Type::Loop;
+ loop.data.loop.body = body_block;
+ loop.data.loop.continue_block = continue_block;
+ loop.data.loop.merge = merge_block;
+
+ auto& continue_block_node{syntax_list.emplace_back()};
+ continue_block_node.type = IR::AbstractSyntaxNode::Type::Block;
+ continue_block_node.data.block = continue_block;
+
+ auto& repeat{syntax_list.emplace_back()};
+ repeat.type = IR::AbstractSyntaxNode::Type::Repeat;
+ repeat.data.repeat.cond = cond;
+ repeat.data.repeat.loop_header = loop_header_block;
+ repeat.data.repeat.merge = merge_block;
+
+ auto& merge{syntax_list.emplace_back()};
+ merge.type = IR::AbstractSyntaxNode::Type::Block;
+ merge.data.block = merge_block;
+ break;
+ }
+ case StatementType::Break: {
+ ensure_block();
+ IR::Block* const skip_block{MergeBlock(parent, stmt)};
+
+ IR::IREmitter ir{*current_block};
+ const IR::U1 cond{ir.ConditionRef(VisitExpr(ir, *stmt.cond))};
+ current_block->AddBranch(break_block);
+ current_block->AddBranch(skip_block);
+ current_block = skip_block;
+
+ auto& break_node{syntax_list.emplace_back()};
+ break_node.type = IR::AbstractSyntaxNode::Type::Break;
+ break_node.data.break_node.cond = cond;
+ break_node.data.break_node.merge = break_block;
+ break_node.data.break_node.skip = skip_block;
+
+ auto& merge{syntax_list.emplace_back()};
+ merge.type = IR::AbstractSyntaxNode::Type::Block;
+ merge.data.block = skip_block;
+ break;
+ }
+ case StatementType::Return: {
+ ensure_block();
+ IR::IREmitter{*current_block}.Epilogue();
+ current_block = nullptr;
+ syntax_list.emplace_back().type = IR::AbstractSyntaxNode::Type::Return;
+ break;
+ }
+ case StatementType::Kill: {
+ ensure_block();
+ IR::Block* demote_block{MergeBlock(parent, stmt)};
+ IR::IREmitter{*current_block}.DemoteToHelperInvocation();
+ current_block->AddBranch(demote_block);
+ current_block = demote_block;
+
+ auto& merge{syntax_list.emplace_back()};
+ merge.type = IR::AbstractSyntaxNode::Type::Block;
+ merge.data.block = demote_block;
+ break;
+ }
+ case StatementType::Unreachable: {
+ ensure_block();
+ current_block = nullptr;
+ syntax_list.emplace_back().type = IR::AbstractSyntaxNode::Type::Unreachable;
+ break;
+ }
+ default:
+ throw NotImplementedException("Statement type {}", stmt.type);
+ }
+ }
+ if (current_block) {
+ if (fallthrough_block) {
+ current_block->AddBranch(fallthrough_block);
+ } else {
+ syntax_list.emplace_back().type = IR::AbstractSyntaxNode::Type::Unreachable;
+ }
+ }
+ }
+
+ IR::Block* MergeBlock(Statement& parent, Statement& stmt) {
+ Statement* merge_stmt{TryFindForwardBlock(stmt)};
+ if (!merge_stmt) {
+ // Create a merge block we can visit later
+ merge_stmt = stmt_pool.Create(&dummy_flow_block, &parent);
+ parent.children.insert(std::next(Tree::s_iterator_to(stmt)), *merge_stmt);
+ }
+ return block_pool.Create(inst_pool);
+ }
+
+ ObjectPool<Statement>& stmt_pool;
+ ObjectPool<IR::Inst>& inst_pool;
+ ObjectPool<IR::Block>& block_pool;
+ Environment& env;
+ IR::AbstractSyntaxList& syntax_list;
+
+// TODO: C++20 Remove this when all compilers support constexpr std::vector
+#if __cpp_lib_constexpr_vector >= 201907
+ static constexpr Flow::Block dummy_flow_block;
+#else
+ const Flow::Block dummy_flow_block;
+#endif
+};
+} // Anonymous namespace
+
+IR::AbstractSyntaxList BuildASL(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool,
+ Environment& env, Flow::CFG& cfg) {
+ ObjectPool<Statement> stmt_pool{64};
+ GotoPass goto_pass{cfg, stmt_pool};
+ Statement& root{goto_pass.RootStatement()};
+ IR::AbstractSyntaxList syntax_list;
+ TranslatePass{inst_pool, block_pool, stmt_pool, env, root, syntax_list};
+ return syntax_list;
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.h b/src/shader_recompiler/frontend/maxwell/structured_control_flow.h
new file mode 100644
index 000000000..88b083649
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.h
@@ -0,0 +1,20 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "shader_recompiler/environment.h"
+#include "shader_recompiler/frontend/ir/abstract_syntax_list.h"
+#include "shader_recompiler/frontend/ir/basic_block.h"
+#include "shader_recompiler/frontend/ir/value.h"
+#include "shader_recompiler/frontend/maxwell/control_flow.h"
+#include "shader_recompiler/object_pool.h"
+
+namespace Shader::Maxwell {
+
+[[nodiscard]] IR::AbstractSyntaxList BuildASL(ObjectPool<IR::Inst>& inst_pool,
+ ObjectPool<IR::Block>& block_pool, Environment& env,
+ Flow::CFG& cfg);
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_global_memory.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_global_memory.cpp
new file mode 100644
index 000000000..d9f999e05
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_global_memory.cpp
@@ -0,0 +1,214 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class AtomOp : u64 {
+ ADD,
+ MIN,
+ MAX,
+ INC,
+ DEC,
+ AND,
+ OR,
+ XOR,
+ EXCH,
+ SAFEADD,
+};
+
+enum class AtomSize : u64 {
+ U32,
+ S32,
+ U64,
+ F32,
+ F16x2,
+ S64,
+};
+
+IR::U32U64 ApplyIntegerAtomOp(IR::IREmitter& ir, const IR::U32U64& offset, const IR::U32U64& op_b,
+ AtomOp op, bool is_signed) {
+ switch (op) {
+ case AtomOp::ADD:
+ return ir.GlobalAtomicIAdd(offset, op_b);
+ case AtomOp::MIN:
+ return ir.GlobalAtomicIMin(offset, op_b, is_signed);
+ case AtomOp::MAX:
+ return ir.GlobalAtomicIMax(offset, op_b, is_signed);
+ case AtomOp::INC:
+ return ir.GlobalAtomicInc(offset, op_b);
+ case AtomOp::DEC:
+ return ir.GlobalAtomicDec(offset, op_b);
+ case AtomOp::AND:
+ return ir.GlobalAtomicAnd(offset, op_b);
+ case AtomOp::OR:
+ return ir.GlobalAtomicOr(offset, op_b);
+ case AtomOp::XOR:
+ return ir.GlobalAtomicXor(offset, op_b);
+ case AtomOp::EXCH:
+ return ir.GlobalAtomicExchange(offset, op_b);
+ default:
+ throw NotImplementedException("Integer Atom Operation {}", op);
+ }
+}
+
+IR::Value ApplyFpAtomOp(IR::IREmitter& ir, const IR::U64& offset, const IR::Value& op_b, AtomOp op,
+ AtomSize size) {
+ static constexpr IR::FpControl f16_control{
+ .no_contraction = false,
+ .rounding = IR::FpRounding::RN,
+ .fmz_mode = IR::FmzMode::DontCare,
+ };
+ static constexpr IR::FpControl f32_control{
+ .no_contraction = false,
+ .rounding = IR::FpRounding::RN,
+ .fmz_mode = IR::FmzMode::FTZ,
+ };
+ switch (op) {
+ case AtomOp::ADD:
+ return size == AtomSize::F32 ? ir.GlobalAtomicF32Add(offset, op_b, f32_control)
+ : ir.GlobalAtomicF16x2Add(offset, op_b, f16_control);
+ case AtomOp::MIN:
+ return ir.GlobalAtomicF16x2Min(offset, op_b, f16_control);
+ case AtomOp::MAX:
+ return ir.GlobalAtomicF16x2Max(offset, op_b, f16_control);
+ default:
+ throw NotImplementedException("FP Atom Operation {}", op);
+ }
+}
+
+IR::U64 AtomOffset(TranslatorVisitor& v, u64 insn) {
+ union {
+ u64 raw;
+ BitField<8, 8, IR::Reg> addr_reg;
+ BitField<28, 20, s64> addr_offset;
+ BitField<28, 20, u64> rz_addr_offset;
+ BitField<48, 1, u64> e;
+ } const mem{insn};
+
+ const IR::U64 address{[&]() -> IR::U64 {
+ if (mem.e == 0) {
+ return v.ir.UConvert(64, v.X(mem.addr_reg));
+ }
+ return v.L(mem.addr_reg);
+ }()};
+ const u64 addr_offset{[&]() -> u64 {
+ if (mem.addr_reg == IR::Reg::RZ) {
+ // When RZ is used, the address is an absolute address
+ return static_cast<u64>(mem.rz_addr_offset.Value());
+ } else {
+ return static_cast<u64>(mem.addr_offset.Value());
+ }
+ }()};
+ return v.ir.IAdd(address, v.ir.Imm64(addr_offset));
+}
+
+bool AtomOpNotApplicable(AtomSize size, AtomOp op) {
+ // TODO: SAFEADD
+ switch (size) {
+ case AtomSize::S32:
+ case AtomSize::U64:
+ return (op == AtomOp::INC || op == AtomOp::DEC);
+ case AtomSize::S64:
+ return !(op == AtomOp::MIN || op == AtomOp::MAX);
+ case AtomSize::F32:
+ return op != AtomOp::ADD;
+ case AtomSize::F16x2:
+ return !(op == AtomOp::ADD || op == AtomOp::MIN || op == AtomOp::MAX);
+ default:
+ return false;
+ }
+}
+
+IR::U32U64 LoadGlobal(IR::IREmitter& ir, const IR::U64& offset, AtomSize size) {
+ switch (size) {
+ case AtomSize::U32:
+ case AtomSize::S32:
+ case AtomSize::F32:
+ case AtomSize::F16x2:
+ return ir.LoadGlobal32(offset);
+ case AtomSize::U64:
+ case AtomSize::S64:
+ return ir.PackUint2x32(ir.LoadGlobal64(offset));
+ default:
+ throw NotImplementedException("Atom Size {}", size);
+ }
+}
+
+void StoreResult(TranslatorVisitor& v, IR::Reg dest_reg, const IR::Value& result, AtomSize size) {
+ switch (size) {
+ case AtomSize::U32:
+ case AtomSize::S32:
+ case AtomSize::F16x2:
+ return v.X(dest_reg, IR::U32{result});
+ case AtomSize::U64:
+ case AtomSize::S64:
+ return v.L(dest_reg, IR::U64{result});
+ case AtomSize::F32:
+ return v.F(dest_reg, IR::F32{result});
+ default:
+ break;
+ }
+}
+
+IR::Value ApplyAtomOp(TranslatorVisitor& v, IR::Reg operand_reg, const IR::U64& offset,
+ AtomSize size, AtomOp op) {
+ switch (size) {
+ case AtomSize::U32:
+ case AtomSize::S32:
+ return ApplyIntegerAtomOp(v.ir, offset, v.X(operand_reg), op, size == AtomSize::S32);
+ case AtomSize::U64:
+ case AtomSize::S64:
+ return ApplyIntegerAtomOp(v.ir, offset, v.L(operand_reg), op, size == AtomSize::S64);
+ case AtomSize::F32:
+ return ApplyFpAtomOp(v.ir, offset, v.F(operand_reg), op, size);
+ case AtomSize::F16x2: {
+ return ApplyFpAtomOp(v.ir, offset, v.ir.UnpackFloat2x16(v.X(operand_reg)), op, size);
+ }
+ default:
+ throw NotImplementedException("Atom Size {}", size);
+ }
+}
+
+void GlobalAtomic(TranslatorVisitor& v, IR::Reg dest_reg, IR::Reg operand_reg,
+ const IR::U64& offset, AtomSize size, AtomOp op, bool write_dest) {
+ IR::Value result;
+ if (AtomOpNotApplicable(size, op)) {
+ result = LoadGlobal(v.ir, offset, size);
+ } else {
+ result = ApplyAtomOp(v, operand_reg, offset, size, op);
+ }
+ if (write_dest) {
+ StoreResult(v, dest_reg, result, size);
+ }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::ATOM(u64 insn) {
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<20, 8, IR::Reg> operand_reg;
+ BitField<49, 3, AtomSize> size;
+ BitField<52, 4, AtomOp> op;
+ } const atom{insn};
+ const IR::U64 offset{AtomOffset(*this, insn)};
+ GlobalAtomic(*this, atom.dest_reg, atom.operand_reg, offset, atom.size, atom.op, true);
+}
+
+void TranslatorVisitor::RED(u64 insn) {
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> operand_reg;
+ BitField<20, 3, AtomSize> size;
+ BitField<23, 3, AtomOp> op;
+ } const red{insn};
+ const IR::U64 offset{AtomOffset(*this, insn)};
+ GlobalAtomic(*this, IR::Reg::RZ, red.operand_reg, offset, red.size, red.op, true);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_shared_memory.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_shared_memory.cpp
new file mode 100644
index 000000000..8b974621e
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_shared_memory.cpp
@@ -0,0 +1,110 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class AtomOp : u64 {
+ ADD,
+ MIN,
+ MAX,
+ INC,
+ DEC,
+ AND,
+ OR,
+ XOR,
+ EXCH,
+};
+
+enum class AtomsSize : u64 {
+ U32,
+ S32,
+ U64,
+};
+
+IR::U32U64 ApplyAtomsOp(IR::IREmitter& ir, const IR::U32& offset, const IR::U32U64& op_b, AtomOp op,
+ bool is_signed) {
+ switch (op) {
+ case AtomOp::ADD:
+ return ir.SharedAtomicIAdd(offset, op_b);
+ case AtomOp::MIN:
+ return ir.SharedAtomicIMin(offset, op_b, is_signed);
+ case AtomOp::MAX:
+ return ir.SharedAtomicIMax(offset, op_b, is_signed);
+ case AtomOp::INC:
+ return ir.SharedAtomicInc(offset, op_b);
+ case AtomOp::DEC:
+ return ir.SharedAtomicDec(offset, op_b);
+ case AtomOp::AND:
+ return ir.SharedAtomicAnd(offset, op_b);
+ case AtomOp::OR:
+ return ir.SharedAtomicOr(offset, op_b);
+ case AtomOp::XOR:
+ return ir.SharedAtomicXor(offset, op_b);
+ case AtomOp::EXCH:
+ return ir.SharedAtomicExchange(offset, op_b);
+ default:
+ throw NotImplementedException("Integer Atoms Operation {}", op);
+ }
+}
+
+IR::U32 AtomsOffset(TranslatorVisitor& v, u64 insn) {
+ union {
+ u64 raw;
+ BitField<8, 8, IR::Reg> offset_reg;
+ BitField<30, 22, u64> absolute_offset;
+ BitField<30, 22, s64> relative_offset;
+ } const encoding{insn};
+
+ if (encoding.offset_reg == IR::Reg::RZ) {
+ return v.ir.Imm32(static_cast<u32>(encoding.absolute_offset << 2));
+ } else {
+ const s32 relative{static_cast<s32>(encoding.relative_offset << 2)};
+ return v.ir.IAdd(v.X(encoding.offset_reg), v.ir.Imm32(relative));
+ }
+}
+
+void StoreResult(TranslatorVisitor& v, IR::Reg dest_reg, const IR::Value& result, AtomsSize size) {
+ switch (size) {
+ case AtomsSize::U32:
+ case AtomsSize::S32:
+ return v.X(dest_reg, IR::U32{result});
+ case AtomsSize::U64:
+ return v.L(dest_reg, IR::U64{result});
+ default:
+ break;
+ }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::ATOMS(u64 insn) {
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> addr_reg;
+ BitField<20, 8, IR::Reg> src_reg_b;
+ BitField<28, 2, AtomsSize> size;
+ BitField<52, 4, AtomOp> op;
+ } const atoms{insn};
+
+ const bool size_64{atoms.size == AtomsSize::U64};
+ if (size_64 && atoms.op != AtomOp::EXCH) {
+ throw NotImplementedException("64-bit Atoms Operation {}", atoms.op.Value());
+ }
+ const bool is_signed{atoms.size == AtomsSize::S32};
+ const IR::U32 offset{AtomsOffset(*this, insn)};
+
+ IR::Value result;
+ if (size_64) {
+ result = ApplyAtomsOp(ir, offset, L(atoms.src_reg_b), atoms.op, is_signed);
+ } else {
+ result = ApplyAtomsOp(ir, offset, X(atoms.src_reg_b), atoms.op, is_signed);
+ }
+ StoreResult(*this, atoms.dest_reg, result, atoms.size);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/attribute_memory_to_physical.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/attribute_memory_to_physical.cpp
new file mode 100644
index 000000000..fb3f00d3f
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/attribute_memory_to_physical.cpp
@@ -0,0 +1,35 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/opcodes.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+
+enum class BitSize : u64 {
+ B32,
+ B64,
+ B96,
+ B128,
+};
+
+void TranslatorVisitor::AL2P(u64 inst) {
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> result_register;
+ BitField<8, 8, IR::Reg> indexing_register;
+ BitField<20, 11, s64> offset;
+ BitField<47, 2, BitSize> bitsize;
+ } al2p{inst};
+ if (al2p.bitsize != BitSize::B32) {
+ throw NotImplementedException("BitSize {}", al2p.bitsize.Value());
+ }
+ const IR::U32 converted_offset{ir.Imm32(static_cast<u32>(al2p.offset.Value()))};
+ const IR::U32 result{ir.IAdd(X(al2p.indexing_register), converted_offset)};
+ X(al2p.result_register, result);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/barrier_operations.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/barrier_operations.cpp
new file mode 100644
index 000000000..86e433e41
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/barrier_operations.cpp
@@ -0,0 +1,96 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/ir/modifiers.h"
+#include "shader_recompiler/frontend/maxwell/opcodes.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+// Seems to be in CUDA terminology.
+enum class LocalScope : u64 {
+ CTA,
+ GL,
+ SYS,
+ VC,
+};
+} // Anonymous namespace
+
+void TranslatorVisitor::MEMBAR(u64 inst) {
+ union {
+ u64 raw;
+ BitField<8, 2, LocalScope> scope;
+ } const membar{inst};
+
+ if (membar.scope == LocalScope::CTA) {
+ ir.WorkgroupMemoryBarrier();
+ } else {
+ ir.DeviceMemoryBarrier();
+ }
+}
+
+void TranslatorVisitor::DEPBAR() {
+ // DEPBAR is a no-op
+}
+
+void TranslatorVisitor::BAR(u64 insn) {
+ enum class Mode {
+ RedPopc,
+ Scan,
+ RedAnd,
+ RedOr,
+ Sync,
+ Arrive,
+ };
+ union {
+ u64 raw;
+ BitField<43, 1, u64> is_a_imm;
+ BitField<44, 1, u64> is_b_imm;
+ BitField<8, 8, u64> imm_a;
+ BitField<20, 12, u64> imm_b;
+ BitField<42, 1, u64> neg_pred;
+ BitField<39, 3, IR::Pred> pred;
+ } const bar{insn};
+
+ const Mode mode{[insn] {
+ switch (insn & 0x0000009B00000000ULL) {
+ case 0x0000000200000000ULL:
+ return Mode::RedPopc;
+ case 0x0000000300000000ULL:
+ return Mode::Scan;
+ case 0x0000000A00000000ULL:
+ return Mode::RedAnd;
+ case 0x0000001200000000ULL:
+ return Mode::RedOr;
+ case 0x0000008000000000ULL:
+ return Mode::Sync;
+ case 0x0000008100000000ULL:
+ return Mode::Arrive;
+ }
+ throw NotImplementedException("Invalid encoding");
+ }()};
+ if (mode != Mode::Sync) {
+ throw NotImplementedException("BAR mode {}", mode);
+ }
+ if (bar.is_a_imm == 0) {
+ throw NotImplementedException("Non-immediate input A");
+ }
+ if (bar.imm_a != 0) {
+ throw NotImplementedException("Non-zero input A");
+ }
+ if (bar.is_b_imm == 0) {
+ throw NotImplementedException("Non-immediate input B");
+ }
+ if (bar.imm_b != 0) {
+ throw NotImplementedException("Non-zero input B");
+ }
+ if (bar.pred != IR::Pred::PT && bar.neg_pred != 0) {
+ throw NotImplementedException("Non-true input predicate");
+ }
+ ir.Barrier();
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_extract.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_extract.cpp
new file mode 100644
index 000000000..9d5a87e52
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_extract.cpp
@@ -0,0 +1,74 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+void BFE(TranslatorVisitor& v, u64 insn, const IR::U32& src) {
+ union {
+ u64 insn;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> offset_reg;
+ BitField<40, 1, u64> brev;
+ BitField<47, 1, u64> cc;
+ BitField<48, 1, u64> is_signed;
+ } const bfe{insn};
+
+ const IR::U32 offset{v.ir.BitFieldExtract(src, v.ir.Imm32(0), v.ir.Imm32(8), false)};
+ const IR::U32 count{v.ir.BitFieldExtract(src, v.ir.Imm32(8), v.ir.Imm32(8), false)};
+
+ // Common constants
+ const IR::U32 zero{v.ir.Imm32(0)};
+ const IR::U32 one{v.ir.Imm32(1)};
+ const IR::U32 max_size{v.ir.Imm32(32)};
+ // Edge case conditions
+ const IR::U1 zero_count{v.ir.IEqual(count, zero)};
+ const IR::U1 exceed_count{v.ir.IGreaterThanEqual(v.ir.IAdd(offset, count), max_size, false)};
+ const IR::U1 replicate{v.ir.IGreaterThanEqual(offset, max_size, false)};
+
+ IR::U32 base{v.X(bfe.offset_reg)};
+ if (bfe.brev != 0) {
+ base = v.ir.BitReverse(base);
+ }
+ IR::U32 result{v.ir.BitFieldExtract(base, offset, count, bfe.is_signed != 0)};
+ if (bfe.is_signed != 0) {
+ const IR::U1 is_negative{v.ir.ILessThan(base, zero, true)};
+ const IR::U32 replicated_bit{v.ir.Select(is_negative, v.ir.Imm32(-1), zero)};
+ const IR::U32 exceed_bit{v.ir.BitFieldExtract(base, v.ir.Imm32(31), one, false)};
+ // Replicate condition
+ result = IR::U32{v.ir.Select(replicate, replicated_bit, result)};
+ // Exceeding condition
+ const IR::U32 exceed_result{v.ir.BitFieldInsert(result, exceed_bit, v.ir.Imm32(31), one)};
+ result = IR::U32{v.ir.Select(exceed_count, exceed_result, result)};
+ }
+ // Zero count condition
+ result = IR::U32{v.ir.Select(zero_count, zero, result)};
+
+ v.X(bfe.dest_reg, result);
+
+ if (bfe.cc != 0) {
+ v.SetZFlag(v.ir.IEqual(result, zero));
+ v.SetSFlag(v.ir.ILessThan(result, zero, true));
+ v.ResetCFlag();
+ v.ResetOFlag();
+ }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::BFE_reg(u64 insn) {
+ BFE(*this, insn, GetReg20(insn));
+}
+
+void TranslatorVisitor::BFE_cbuf(u64 insn) {
+ BFE(*this, insn, GetCbuf(insn));
+}
+
+void TranslatorVisitor::BFE_imm(u64 insn) {
+ BFE(*this, insn, GetImm20(insn));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_insert.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_insert.cpp
new file mode 100644
index 000000000..1e1ec2119
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_insert.cpp
@@ -0,0 +1,62 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+void BFI(TranslatorVisitor& v, u64 insn, const IR::U32& src_a, const IR::U32& base) {
+ union {
+ u64 insn;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> insert_reg;
+ BitField<47, 1, u64> cc;
+ } const bfi{insn};
+
+ const IR::U32 zero{v.ir.Imm32(0)};
+ const IR::U32 offset{v.ir.BitFieldExtract(src_a, zero, v.ir.Imm32(8), false)};
+ const IR::U32 unsafe_count{v.ir.BitFieldExtract(src_a, v.ir.Imm32(8), v.ir.Imm32(8), false)};
+ const IR::U32 max_size{v.ir.Imm32(32)};
+
+ // Edge case conditions
+ const IR::U1 exceed_offset{v.ir.IGreaterThanEqual(offset, max_size, false)};
+ const IR::U1 exceed_count{v.ir.IGreaterThan(unsafe_count, max_size, false)};
+
+ const IR::U32 remaining_size{v.ir.ISub(max_size, offset)};
+ const IR::U32 safe_count{v.ir.Select(exceed_count, remaining_size, unsafe_count)};
+
+ const IR::U32 insert{v.X(bfi.insert_reg)};
+ IR::U32 result{v.ir.BitFieldInsert(base, insert, offset, safe_count)};
+
+ result = IR::U32{v.ir.Select(exceed_offset, base, result)};
+
+ v.X(bfi.dest_reg, result);
+ if (bfi.cc != 0) {
+ v.SetZFlag(v.ir.IEqual(result, zero));
+ v.SetSFlag(v.ir.ILessThan(result, zero, true));
+ v.ResetCFlag();
+ v.ResetOFlag();
+ }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::BFI_reg(u64 insn) {
+ BFI(*this, insn, GetReg20(insn), GetReg39(insn));
+}
+
+void TranslatorVisitor::BFI_rc(u64 insn) {
+ BFI(*this, insn, GetReg39(insn), GetCbuf(insn));
+}
+
+void TranslatorVisitor::BFI_cr(u64 insn) {
+ BFI(*this, insn, GetCbuf(insn), GetReg39(insn));
+}
+
+void TranslatorVisitor::BFI_imm(u64 insn) {
+ BFI(*this, insn, GetImm20(insn), GetReg39(insn));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/branch_indirect.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/branch_indirect.cpp
new file mode 100644
index 000000000..371c0e0f7
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/branch_indirect.cpp
@@ -0,0 +1,36 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+void Check(u64 insn) {
+ union {
+ u64 raw;
+ BitField<5, 1, u64> cbuf_mode;
+ BitField<6, 1, u64> lmt;
+ } const encoding{insn};
+
+ if (encoding.cbuf_mode != 0) {
+ throw NotImplementedException("Constant buffer mode");
+ }
+ if (encoding.lmt != 0) {
+ throw NotImplementedException("LMT");
+ }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::BRX(u64 insn) {
+ Check(insn);
+}
+
+void TranslatorVisitor::JMX(u64 insn) {
+ Check(insn);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h b/src/shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h
new file mode 100644
index 000000000..fd73f656c
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h
@@ -0,0 +1,57 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "common/common_types.h"
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/ir/modifiers.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
+
+namespace Shader::Maxwell {
+
+enum class FpRounding : u64 {
+ RN,
+ RM,
+ RP,
+ RZ,
+};
+
+enum class FmzMode : u64 {
+ None,
+ FTZ,
+ FMZ,
+ INVALIDFMZ3,
+};
+
+inline IR::FpRounding CastFpRounding(FpRounding fp_rounding) {
+ switch (fp_rounding) {
+ case FpRounding::RN:
+ return IR::FpRounding::RN;
+ case FpRounding::RM:
+ return IR::FpRounding::RM;
+ case FpRounding::RP:
+ return IR::FpRounding::RP;
+ case FpRounding::RZ:
+ return IR::FpRounding::RZ;
+ }
+ throw NotImplementedException("Invalid floating-point rounding {}", fp_rounding);
+}
+
+inline IR::FmzMode CastFmzMode(FmzMode fmz_mode) {
+ switch (fmz_mode) {
+ case FmzMode::None:
+ return IR::FmzMode::None;
+ case FmzMode::FTZ:
+ return IR::FmzMode::FTZ;
+ case FmzMode::FMZ:
+ // FMZ is manually handled in the instruction
+ return IR::FmzMode::FTZ;
+ case FmzMode::INVALIDFMZ3:
+ break;
+ }
+ throw NotImplementedException("Invalid FMZ mode {}", fmz_mode);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp
new file mode 100644
index 000000000..20458d2ad
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp
@@ -0,0 +1,153 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
+
+namespace Shader::Maxwell {
+IR::U1 IntegerCompare(IR::IREmitter& ir, const IR::U32& operand_1, const IR::U32& operand_2,
+ CompareOp compare_op, bool is_signed) {
+ switch (compare_op) {
+ case CompareOp::False:
+ return ir.Imm1(false);
+ case CompareOp::LessThan:
+ return ir.ILessThan(operand_1, operand_2, is_signed);
+ case CompareOp::Equal:
+ return ir.IEqual(operand_1, operand_2);
+ case CompareOp::LessThanEqual:
+ return ir.ILessThanEqual(operand_1, operand_2, is_signed);
+ case CompareOp::GreaterThan:
+ return ir.IGreaterThan(operand_1, operand_2, is_signed);
+ case CompareOp::NotEqual:
+ return ir.INotEqual(operand_1, operand_2);
+ case CompareOp::GreaterThanEqual:
+ return ir.IGreaterThanEqual(operand_1, operand_2, is_signed);
+ case CompareOp::True:
+ return ir.Imm1(true);
+ default:
+ throw NotImplementedException("Invalid compare op {}", compare_op);
+ }
+}
+
+IR::U1 ExtendedIntegerCompare(IR::IREmitter& ir, const IR::U32& operand_1, const IR::U32& operand_2,
+ CompareOp compare_op, bool is_signed) {
+ const IR::U32 zero{ir.Imm32(0)};
+ const IR::U32 carry{ir.Select(ir.GetCFlag(), ir.Imm32(1), zero)};
+ const IR::U1 z_flag{ir.GetZFlag()};
+ const IR::U32 intermediate{ir.IAdd(ir.IAdd(operand_1, ir.BitwiseNot(operand_2)), carry)};
+ const IR::U1 flip_logic{is_signed ? ir.Imm1(false)
+ : ir.LogicalXor(ir.ILessThan(operand_1, zero, true),
+ ir.ILessThan(operand_2, zero, true))};
+ switch (compare_op) {
+ case CompareOp::False:
+ return ir.Imm1(false);
+ case CompareOp::LessThan:
+ return IR::U1{ir.Select(flip_logic, ir.IGreaterThanEqual(intermediate, zero, true),
+ ir.ILessThan(intermediate, zero, true))};
+ case CompareOp::Equal:
+ return ir.LogicalAnd(ir.IEqual(intermediate, zero), z_flag);
+ case CompareOp::LessThanEqual: {
+ const IR::U1 base_cmp{ir.Select(flip_logic, ir.IGreaterThanEqual(intermediate, zero, true),
+ ir.ILessThan(intermediate, zero, true))};
+ return ir.LogicalOr(base_cmp, ir.LogicalAnd(ir.IEqual(intermediate, zero), z_flag));
+ }
+ case CompareOp::GreaterThan: {
+ const IR::U1 base_cmp{ir.Select(flip_logic, ir.ILessThanEqual(intermediate, zero, true),
+ ir.IGreaterThan(intermediate, zero, true))};
+ const IR::U1 not_z{ir.LogicalNot(z_flag)};
+ return ir.LogicalOr(base_cmp, ir.LogicalAnd(ir.IEqual(intermediate, zero), not_z));
+ }
+ case CompareOp::NotEqual:
+ return ir.LogicalOr(ir.INotEqual(intermediate, zero),
+ ir.LogicalAnd(ir.IEqual(intermediate, zero), ir.LogicalNot(z_flag)));
+ case CompareOp::GreaterThanEqual: {
+ const IR::U1 base_cmp{ir.Select(flip_logic, ir.ILessThan(intermediate, zero, true),
+ ir.IGreaterThanEqual(intermediate, zero, true))};
+ return ir.LogicalOr(base_cmp, ir.LogicalAnd(ir.IEqual(intermediate, zero), z_flag));
+ }
+ case CompareOp::True:
+ return ir.Imm1(true);
+ default:
+ throw NotImplementedException("Invalid compare op {}", compare_op);
+ }
+}
+
+IR::U1 PredicateCombine(IR::IREmitter& ir, const IR::U1& predicate_1, const IR::U1& predicate_2,
+ BooleanOp bop) {
+ switch (bop) {
+ case BooleanOp::AND:
+ return ir.LogicalAnd(predicate_1, predicate_2);
+ case BooleanOp::OR:
+ return ir.LogicalOr(predicate_1, predicate_2);
+ case BooleanOp::XOR:
+ return ir.LogicalXor(predicate_1, predicate_2);
+ default:
+ throw NotImplementedException("Invalid bop {}", bop);
+ }
+}
+
+IR::U1 PredicateOperation(IR::IREmitter& ir, const IR::U32& result, PredicateOp op) {
+ switch (op) {
+ case PredicateOp::False:
+ return ir.Imm1(false);
+ case PredicateOp::True:
+ return ir.Imm1(true);
+ case PredicateOp::Zero:
+ return ir.IEqual(result, ir.Imm32(0));
+ case PredicateOp::NonZero:
+ return ir.INotEqual(result, ir.Imm32(0));
+ default:
+ throw NotImplementedException("Invalid Predicate operation {}", op);
+ }
+}
+
+bool IsCompareOpOrdered(FPCompareOp op) {
+ switch (op) {
+ case FPCompareOp::LTU:
+ case FPCompareOp::EQU:
+ case FPCompareOp::LEU:
+ case FPCompareOp::GTU:
+ case FPCompareOp::NEU:
+ case FPCompareOp::GEU:
+ return false;
+ default:
+ return true;
+ }
+}
+
+IR::U1 FloatingPointCompare(IR::IREmitter& ir, const IR::F16F32F64& operand_1,
+ const IR::F16F32F64& operand_2, FPCompareOp compare_op,
+ IR::FpControl control) {
+ const bool ordered{IsCompareOpOrdered(compare_op)};
+ switch (compare_op) {
+ case FPCompareOp::F:
+ return ir.Imm1(false);
+ case FPCompareOp::LT:
+ case FPCompareOp::LTU:
+ return ir.FPLessThan(operand_1, operand_2, control, ordered);
+ case FPCompareOp::EQ:
+ case FPCompareOp::EQU:
+ return ir.FPEqual(operand_1, operand_2, control, ordered);
+ case FPCompareOp::LE:
+ case FPCompareOp::LEU:
+ return ir.FPLessThanEqual(operand_1, operand_2, control, ordered);
+ case FPCompareOp::GT:
+ case FPCompareOp::GTU:
+ return ir.FPGreaterThan(operand_1, operand_2, control, ordered);
+ case FPCompareOp::NE:
+ case FPCompareOp::NEU:
+ return ir.FPNotEqual(operand_1, operand_2, control, ordered);
+ case FPCompareOp::GE:
+ case FPCompareOp::GEU:
+ return ir.FPGreaterThanEqual(operand_1, operand_2, control, ordered);
+ case FPCompareOp::NUM:
+ return ir.FPOrdered(operand_1, operand_2);
+ case FPCompareOp::Nan:
+ return ir.FPUnordered(operand_1, operand_2);
+ case FPCompareOp::T:
+ return ir.Imm1(true);
+ default:
+ throw NotImplementedException("Invalid FP compare op {}", compare_op);
+ }
+}
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h
new file mode 100644
index 000000000..214d0af3c
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h
@@ -0,0 +1,28 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+[[nodiscard]] IR::U1 IntegerCompare(IR::IREmitter& ir, const IR::U32& operand_1,
+ const IR::U32& operand_2, CompareOp compare_op, bool is_signed);
+
+[[nodiscard]] IR::U1 ExtendedIntegerCompare(IR::IREmitter& ir, const IR::U32& operand_1,
+ const IR::U32& operand_2, CompareOp compare_op,
+ bool is_signed);
+
+[[nodiscard]] IR::U1 PredicateCombine(IR::IREmitter& ir, const IR::U1& predicate_1,
+ const IR::U1& predicate_2, BooleanOp bop);
+
+[[nodiscard]] IR::U1 PredicateOperation(IR::IREmitter& ir, const IR::U32& result, PredicateOp op);
+
+[[nodiscard]] bool IsCompareOpOrdered(FPCompareOp op);
+
+[[nodiscard]] IR::U1 FloatingPointCompare(IR::IREmitter& ir, const IR::F16F32F64& operand_1,
+ const IR::F16F32F64& operand_2, FPCompareOp compare_op,
+ IR::FpControl control = {});
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/condition_code_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/condition_code_set.cpp
new file mode 100644
index 000000000..420f2fb94
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/condition_code_set.cpp
@@ -0,0 +1,66 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+
+void TranslatorVisitor::CSET(u64 insn) {
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 5, IR::FlowTest> cc_test;
+ BitField<39, 3, IR::Pred> bop_pred;
+ BitField<42, 1, u64> neg_bop_pred;
+ BitField<44, 1, u64> bf;
+ BitField<45, 2, BooleanOp> bop;
+ BitField<47, 1, u64> cc;
+ } const cset{insn};
+
+ const IR::U32 one_mask{ir.Imm32(-1)};
+ const IR::U32 fp_one{ir.Imm32(0x3f800000)};
+ const IR::U32 zero{ir.Imm32(0)};
+ const IR::U32 pass_result{cset.bf == 0 ? one_mask : fp_one};
+ const IR::U1 cc_test_result{ir.GetFlowTestResult(cset.cc_test)};
+ const IR::U1 bop_pred{ir.GetPred(cset.bop_pred, cset.neg_bop_pred != 0)};
+ const IR::U1 pred_result{PredicateCombine(ir, cc_test_result, bop_pred, cset.bop)};
+ const IR::U32 result{ir.Select(pred_result, pass_result, zero)};
+ X(cset.dest_reg, result);
+ if (cset.cc != 0) {
+ const IR::U1 is_zero{ir.IEqual(result, zero)};
+ SetZFlag(is_zero);
+ if (cset.bf != 0) {
+ ResetSFlag();
+ } else {
+ SetSFlag(ir.LogicalNot(is_zero));
+ }
+ ResetOFlag();
+ ResetCFlag();
+ }
+}
+
+void TranslatorVisitor::CSETP(u64 insn) {
+ union {
+ u64 raw;
+ BitField<0, 3, IR::Pred> dest_pred_b;
+ BitField<3, 3, IR::Pred> dest_pred_a;
+ BitField<8, 5, IR::FlowTest> cc_test;
+ BitField<39, 3, IR::Pred> bop_pred;
+ BitField<42, 1, u64> neg_bop_pred;
+ BitField<45, 2, BooleanOp> bop;
+ } const csetp{insn};
+
+ const BooleanOp bop{csetp.bop};
+ const IR::U1 bop_pred{ir.GetPred(csetp.bop_pred, csetp.neg_bop_pred != 0)};
+ const IR::U1 cc_test_result{ir.GetFlowTestResult(csetp.cc_test)};
+ const IR::U1 result_a{PredicateCombine(ir, cc_test_result, bop_pred, bop)};
+ const IR::U1 result_b{PredicateCombine(ir, ir.LogicalNot(cc_test_result), bop_pred, bop)};
+ ir.SetPred(csetp.dest_pred_a, result_a);
+ ir.SetPred(csetp.dest_pred_b, result_b);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_add.cpp
new file mode 100644
index 000000000..5a1b3a8fc
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_add.cpp
@@ -0,0 +1,55 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/common_types.h"
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+
+void DADD(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) {
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> src_a_reg;
+ BitField<39, 2, FpRounding> fp_rounding;
+ BitField<45, 1, u64> neg_b;
+ BitField<46, 1, u64> abs_a;
+ BitField<47, 1, u64> cc;
+ BitField<48, 1, u64> neg_a;
+ BitField<49, 1, u64> abs_b;
+ } const dadd{insn};
+ if (dadd.cc != 0) {
+ throw NotImplementedException("DADD CC");
+ }
+
+ const IR::F64 src_a{v.D(dadd.src_a_reg)};
+ const IR::F64 op_a{v.ir.FPAbsNeg(src_a, dadd.abs_a != 0, dadd.neg_a != 0)};
+ const IR::F64 op_b{v.ir.FPAbsNeg(src_b, dadd.abs_b != 0, dadd.neg_b != 0)};
+
+ const IR::FpControl control{
+ .no_contraction = true,
+ .rounding = CastFpRounding(dadd.fp_rounding),
+ .fmz_mode = IR::FmzMode::None,
+ };
+
+ v.D(dadd.dest_reg, v.ir.FPAdd(op_a, op_b, control));
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::DADD_reg(u64 insn) {
+ DADD(*this, insn, GetDoubleReg20(insn));
+}
+
+void TranslatorVisitor::DADD_cbuf(u64 insn) {
+ DADD(*this, insn, GetDoubleCbuf(insn));
+}
+
+void TranslatorVisitor::DADD_imm(u64 insn) {
+ DADD(*this, insn, GetDoubleImm20(insn));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_compare_and_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_compare_and_set.cpp
new file mode 100644
index 000000000..1173192e4
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_compare_and_set.cpp
@@ -0,0 +1,72 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+void DSET(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) {
+ union {
+ u64 insn;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> src_a_reg;
+ BitField<39, 3, IR::Pred> pred;
+ BitField<42, 1, u64> neg_pred;
+ BitField<43, 1, u64> negate_a;
+ BitField<44, 1, u64> abs_b;
+ BitField<45, 2, BooleanOp> bop;
+ BitField<47, 1, u64> cc;
+ BitField<48, 4, FPCompareOp> compare_op;
+ BitField<52, 1, u64> bf;
+ BitField<53, 1, u64> negate_b;
+ BitField<54, 1, u64> abs_a;
+ } const dset{insn};
+
+ const IR::F64 op_a{v.ir.FPAbsNeg(v.D(dset.src_a_reg), dset.abs_a != 0, dset.negate_a != 0)};
+ const IR::F64 op_b{v.ir.FPAbsNeg(src_b, dset.abs_b != 0, dset.negate_b != 0)};
+
+ IR::U1 pred{v.ir.GetPred(dset.pred)};
+ if (dset.neg_pred != 0) {
+ pred = v.ir.LogicalNot(pred);
+ }
+ const IR::U1 cmp_result{FloatingPointCompare(v.ir, op_a, op_b, dset.compare_op)};
+ const IR::U1 bop_result{PredicateCombine(v.ir, cmp_result, pred, dset.bop)};
+
+ const IR::U32 one_mask{v.ir.Imm32(-1)};
+ const IR::U32 fp_one{v.ir.Imm32(0x3f800000)};
+ const IR::U32 zero{v.ir.Imm32(0)};
+ const IR::U32 pass_result{dset.bf == 0 ? one_mask : fp_one};
+ const IR::U32 result{v.ir.Select(bop_result, pass_result, zero)};
+
+ v.X(dset.dest_reg, result);
+ if (dset.cc != 0) {
+ const IR::U1 is_zero{v.ir.IEqual(result, zero)};
+ v.SetZFlag(is_zero);
+ if (dset.bf != 0) {
+ v.ResetSFlag();
+ } else {
+ v.SetSFlag(v.ir.LogicalNot(is_zero));
+ }
+ v.ResetCFlag();
+ v.ResetOFlag();
+ }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::DSET_reg(u64 insn) {
+ DSET(*this, insn, GetDoubleReg20(insn));
+}
+
+void TranslatorVisitor::DSET_cbuf(u64 insn) {
+ DSET(*this, insn, GetDoubleCbuf(insn));
+}
+
+void TranslatorVisitor::DSET_imm(u64 insn) {
+ DSET(*this, insn, GetDoubleImm20(insn));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_fused_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_fused_multiply_add.cpp
new file mode 100644
index 000000000..f66097014
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_fused_multiply_add.cpp
@@ -0,0 +1,58 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/common_types.h"
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+
+void DFMA(TranslatorVisitor& v, u64 insn, const IR::F64& src_b, const IR::F64& src_c) {
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> src_a_reg;
+ BitField<50, 2, FpRounding> fp_rounding;
+ BitField<47, 1, u64> cc;
+ BitField<48, 1, u64> neg_b;
+ BitField<49, 1, u64> neg_c;
+ } const dfma{insn};
+
+ if (dfma.cc != 0) {
+ throw NotImplementedException("DFMA CC");
+ }
+
+ const IR::F64 src_a{v.D(dfma.src_a_reg)};
+ const IR::F64 op_b{v.ir.FPAbsNeg(src_b, false, dfma.neg_b != 0)};
+ const IR::F64 op_c{v.ir.FPAbsNeg(src_c, false, dfma.neg_c != 0)};
+
+ const IR::FpControl control{
+ .no_contraction = true,
+ .rounding = CastFpRounding(dfma.fp_rounding),
+ .fmz_mode = IR::FmzMode::None,
+ };
+
+ v.D(dfma.dest_reg, v.ir.FPFma(src_a, op_b, op_c, control));
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::DFMA_reg(u64 insn) {
+ DFMA(*this, insn, GetDoubleReg20(insn), GetDoubleReg39(insn));
+}
+
+void TranslatorVisitor::DFMA_cr(u64 insn) {
+ DFMA(*this, insn, GetDoubleCbuf(insn), GetDoubleReg39(insn));
+}
+
+void TranslatorVisitor::DFMA_rc(u64 insn) {
+ DFMA(*this, insn, GetDoubleReg39(insn), GetDoubleCbuf(insn));
+}
+
+void TranslatorVisitor::DFMA_imm(u64 insn) {
+ DFMA(*this, insn, GetDoubleImm20(insn), GetDoubleReg39(insn));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_min_max.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_min_max.cpp
new file mode 100644
index 000000000..6b551847c
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_min_max.cpp
@@ -0,0 +1,55 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+void DMNMX(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) {
+ union {
+ u64 insn;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> src_a_reg;
+ BitField<39, 3, IR::Pred> pred;
+ BitField<42, 1, u64> neg_pred;
+ BitField<45, 1, u64> negate_b;
+ BitField<46, 1, u64> abs_a;
+ BitField<47, 1, u64> cc;
+ BitField<48, 1, u64> negate_a;
+ BitField<49, 1, u64> abs_b;
+ } const dmnmx{insn};
+
+ if (dmnmx.cc != 0) {
+ throw NotImplementedException("DMNMX CC");
+ }
+
+ const IR::U1 pred{v.ir.GetPred(dmnmx.pred)};
+ const IR::F64 op_a{v.ir.FPAbsNeg(v.D(dmnmx.src_a_reg), dmnmx.abs_a != 0, dmnmx.negate_a != 0)};
+ const IR::F64 op_b{v.ir.FPAbsNeg(src_b, dmnmx.abs_b != 0, dmnmx.negate_b != 0)};
+
+ IR::F64 max{v.ir.FPMax(op_a, op_b)};
+ IR::F64 min{v.ir.FPMin(op_a, op_b)};
+
+ if (dmnmx.neg_pred != 0) {
+ std::swap(min, max);
+ }
+ v.D(dmnmx.dest_reg, IR::F64{v.ir.Select(pred, min, max)});
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::DMNMX_reg(u64 insn) {
+ DMNMX(*this, insn, GetDoubleReg20(insn));
+}
+
+void TranslatorVisitor::DMNMX_cbuf(u64 insn) {
+ DMNMX(*this, insn, GetDoubleCbuf(insn));
+}
+
+void TranslatorVisitor::DMNMX_imm(u64 insn) {
+ DMNMX(*this, insn, GetDoubleImm20(insn));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_multiply.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_multiply.cpp
new file mode 100644
index 000000000..c0159fb65
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_multiply.cpp
@@ -0,0 +1,50 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/common_types.h"
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+
+void DMUL(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) {
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> src_a_reg;
+ BitField<39, 2, FpRounding> fp_rounding;
+ BitField<47, 1, u64> cc;
+ BitField<48, 1, u64> neg;
+ } const dmul{insn};
+
+ if (dmul.cc != 0) {
+ throw NotImplementedException("DMUL CC");
+ }
+
+ const IR::F64 src_a{v.ir.FPAbsNeg(v.D(dmul.src_a_reg), false, dmul.neg != 0)};
+ const IR::FpControl control{
+ .no_contraction = true,
+ .rounding = CastFpRounding(dmul.fp_rounding),
+ .fmz_mode = IR::FmzMode::None,
+ };
+
+ v.D(dmul.dest_reg, v.ir.FPMul(src_a, src_b, control));
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::DMUL_reg(u64 insn) {
+ DMUL(*this, insn, GetDoubleReg20(insn));
+}
+
+void TranslatorVisitor::DMUL_cbuf(u64 insn) {
+ DMUL(*this, insn, GetDoubleCbuf(insn));
+}
+
+void TranslatorVisitor::DMUL_imm(u64 insn) {
+ DMUL(*this, insn, GetDoubleImm20(insn));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_set_predicate.cpp
new file mode 100644
index 000000000..b8e74ee44
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_set_predicate.cpp
@@ -0,0 +1,54 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+void DSETP(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) {
+ union {
+ u64 insn;
+ BitField<0, 3, IR::Pred> dest_pred_b;
+ BitField<3, 3, IR::Pred> dest_pred_a;
+ BitField<6, 1, u64> negate_b;
+ BitField<7, 1, u64> abs_a;
+ BitField<8, 8, IR::Reg> src_a_reg;
+ BitField<39, 3, IR::Pred> bop_pred;
+ BitField<42, 1, u64> neg_bop_pred;
+ BitField<43, 1, u64> negate_a;
+ BitField<44, 1, u64> abs_b;
+ BitField<45, 2, BooleanOp> bop;
+ BitField<48, 4, FPCompareOp> compare_op;
+ } const dsetp{insn};
+
+ const IR::F64 op_a{v.ir.FPAbsNeg(v.D(dsetp.src_a_reg), dsetp.abs_a != 0, dsetp.negate_a != 0)};
+ const IR::F64 op_b{v.ir.FPAbsNeg(src_b, dsetp.abs_b != 0, dsetp.negate_b != 0)};
+
+ const BooleanOp bop{dsetp.bop};
+ const FPCompareOp compare_op{dsetp.compare_op};
+ const IR::U1 comparison{FloatingPointCompare(v.ir, op_a, op_b, compare_op)};
+ const IR::U1 bop_pred{v.ir.GetPred(dsetp.bop_pred, dsetp.neg_bop_pred != 0)};
+ const IR::U1 result_a{PredicateCombine(v.ir, comparison, bop_pred, bop)};
+ const IR::U1 result_b{PredicateCombine(v.ir, v.ir.LogicalNot(comparison), bop_pred, bop)};
+ v.ir.SetPred(dsetp.dest_pred_a, result_a);
+ v.ir.SetPred(dsetp.dest_pred_b, result_b);
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::DSETP_reg(u64 insn) {
+ DSETP(*this, insn, GetDoubleReg20(insn));
+}
+
+void TranslatorVisitor::DSETP_cbuf(u64 insn) {
+ DSETP(*this, insn, GetDoubleCbuf(insn));
+}
+
+void TranslatorVisitor::DSETP_imm(u64 insn) {
+ DSETP(*this, insn, GetDoubleImm20(insn));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/exit_program.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/exit_program.cpp
new file mode 100644
index 000000000..c2443c886
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/exit_program.cpp
@@ -0,0 +1,43 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/common_types.h"
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+void ExitFragment(TranslatorVisitor& v) {
+ const ProgramHeader sph{v.env.SPH()};
+ IR::Reg src_reg{IR::Reg::R0};
+ for (u32 render_target = 0; render_target < 8; ++render_target) {
+ const std::array<bool, 4> mask{sph.ps.EnabledOutputComponents(render_target)};
+ for (u32 component = 0; component < 4; ++component) {
+ if (!mask[component]) {
+ continue;
+ }
+ v.ir.SetFragColor(render_target, component, v.F(src_reg));
+ ++src_reg;
+ }
+ }
+ if (sph.ps.omap.sample_mask != 0) {
+ v.ir.SetSampleMask(v.X(src_reg));
+ }
+ if (sph.ps.omap.depth != 0) {
+ v.ir.SetFragDepth(v.F(src_reg + 1));
+ }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::EXIT() {
+ switch (env.ShaderStage()) {
+ case Stage::Fragment:
+ ExitFragment(*this);
+ break;
+ default:
+ break;
+ }
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/find_leading_one.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/find_leading_one.cpp
new file mode 100644
index 000000000..f0cb25d61
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/find_leading_one.cpp
@@ -0,0 +1,47 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+void FLO(TranslatorVisitor& v, u64 insn, IR::U32 src) {
+ union {
+ u64 insn;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<40, 1, u64> tilde;
+ BitField<41, 1, u64> shift;
+ BitField<47, 1, u64> cc;
+ BitField<48, 1, u64> is_signed;
+ } const flo{insn};
+
+ if (flo.cc != 0) {
+ throw NotImplementedException("CC");
+ }
+ if (flo.tilde != 0) {
+ src = v.ir.BitwiseNot(src);
+ }
+ IR::U32 result{flo.is_signed != 0 ? v.ir.FindSMsb(src) : v.ir.FindUMsb(src)};
+ if (flo.shift != 0) {
+ const IR::U1 not_found{v.ir.IEqual(result, v.ir.Imm32(-1))};
+ result = IR::U32{v.ir.Select(not_found, result, v.ir.BitwiseXor(result, v.ir.Imm32(31)))};
+ }
+ v.X(flo.dest_reg, result);
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::FLO_reg(u64 insn) {
+ FLO(*this, insn, GetReg20(insn));
+}
+
+void TranslatorVisitor::FLO_cbuf(u64 insn) {
+ FLO(*this, insn, GetCbuf(insn));
+}
+
+void TranslatorVisitor::FLO_imm(u64 insn) {
+ FLO(*this, insn, GetImm20(insn));
+}
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp
new file mode 100644
index 000000000..b8c89810c
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp
@@ -0,0 +1,82 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/common_types.h"
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+void FADD(TranslatorVisitor& v, u64 insn, bool sat, bool cc, bool ftz, FpRounding fp_rounding,
+ const IR::F32& src_b, bool abs_a, bool neg_a, bool abs_b, bool neg_b) {
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> src_a;
+ } const fadd{insn};
+
+ if (cc) {
+ throw NotImplementedException("FADD CC");
+ }
+ const IR::F32 op_a{v.ir.FPAbsNeg(v.F(fadd.src_a), abs_a, neg_a)};
+ const IR::F32 op_b{v.ir.FPAbsNeg(src_b, abs_b, neg_b)};
+ IR::FpControl control{
+ .no_contraction = true,
+ .rounding = CastFpRounding(fp_rounding),
+ .fmz_mode = (ftz ? IR::FmzMode::FTZ : IR::FmzMode::None),
+ };
+ IR::F32 value{v.ir.FPAdd(op_a, op_b, control)};
+ if (sat) {
+ value = v.ir.FPSaturate(value);
+ }
+ v.F(fadd.dest_reg, value);
+}
+
+void FADD(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) {
+ union {
+ u64 raw;
+ BitField<39, 2, FpRounding> fp_rounding;
+ BitField<44, 1, u64> ftz;
+ BitField<45, 1, u64> neg_b;
+ BitField<46, 1, u64> abs_a;
+ BitField<47, 1, u64> cc;
+ BitField<48, 1, u64> neg_a;
+ BitField<49, 1, u64> abs_b;
+ BitField<50, 1, u64> sat;
+ } const fadd{insn};
+
+ FADD(v, insn, fadd.sat != 0, fadd.cc != 0, fadd.ftz != 0, fadd.fp_rounding, src_b,
+ fadd.abs_a != 0, fadd.neg_a != 0, fadd.abs_b != 0, fadd.neg_b != 0);
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::FADD_reg(u64 insn) {
+ FADD(*this, insn, GetFloatReg20(insn));
+}
+
+void TranslatorVisitor::FADD_cbuf(u64 insn) {
+ FADD(*this, insn, GetFloatCbuf(insn));
+}
+
+void TranslatorVisitor::FADD_imm(u64 insn) {
+ FADD(*this, insn, GetFloatImm20(insn));
+}
+
+void TranslatorVisitor::FADD32I(u64 insn) {
+ union {
+ u64 raw;
+ BitField<55, 1, u64> ftz;
+ BitField<56, 1, u64> neg_a;
+ BitField<54, 1, u64> abs_a;
+ BitField<52, 1, u64> cc;
+ BitField<53, 1, u64> neg_b;
+ BitField<57, 1, u64> abs_b;
+ } const fadd32i{insn};
+
+ FADD(*this, insn, false, fadd32i.cc != 0, fadd32i.ftz != 0, FpRounding::RN, GetFloatImm32(insn),
+ fadd32i.abs_a != 0, fadd32i.neg_a != 0, fadd32i.abs_b != 0, fadd32i.neg_b != 0);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp
new file mode 100644
index 000000000..7127ebf54
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp
@@ -0,0 +1,55 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+void FCMP(TranslatorVisitor& v, u64 insn, const IR::U32& src_a, const IR::F32& operand) {
+ union {
+ u64 insn;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> src_reg;
+ BitField<47, 1, u64> ftz;
+ BitField<48, 4, FPCompareOp> compare_op;
+ } const fcmp{insn};
+
+ const IR::F32 zero{v.ir.Imm32(0.0f)};
+ const IR::FpControl control{.fmz_mode = (fcmp.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None)};
+ const IR::U1 cmp_result{FloatingPointCompare(v.ir, operand, zero, fcmp.compare_op, control)};
+ const IR::U32 src_reg{v.X(fcmp.src_reg)};
+ const IR::U32 result{v.ir.Select(cmp_result, src_reg, src_a)};
+
+ v.X(fcmp.dest_reg, result);
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::FCMP_reg(u64 insn) {
+ FCMP(*this, insn, GetReg20(insn), GetFloatReg39(insn));
+}
+
+void TranslatorVisitor::FCMP_rc(u64 insn) {
+ FCMP(*this, insn, GetReg39(insn), GetFloatCbuf(insn));
+}
+
+void TranslatorVisitor::FCMP_cr(u64 insn) {
+ FCMP(*this, insn, GetCbuf(insn), GetFloatReg39(insn));
+}
+
+void TranslatorVisitor::FCMP_imm(u64 insn) {
+ union {
+ u64 raw;
+ BitField<20, 19, u64> value;
+ BitField<56, 1, u64> is_negative;
+ } const fcmp{insn};
+ const u32 sign_bit{fcmp.is_negative != 0 ? (1U << 31) : 0};
+ const u32 value{static_cast<u32>(fcmp.value) << 12};
+
+ FCMP(*this, insn, ir.Imm32(value | sign_bit), GetFloatReg39(insn));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare_and_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare_and_set.cpp
new file mode 100644
index 000000000..eece4f28f
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare_and_set.cpp
@@ -0,0 +1,78 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+void FSET(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) {
+ union {
+ u64 insn;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> src_a_reg;
+ BitField<39, 3, IR::Pred> pred;
+ BitField<42, 1, u64> neg_pred;
+ BitField<43, 1, u64> negate_a;
+ BitField<44, 1, u64> abs_b;
+ BitField<45, 2, BooleanOp> bop;
+ BitField<47, 1, u64> cc;
+ BitField<48, 4, FPCompareOp> compare_op;
+ BitField<52, 1, u64> bf;
+ BitField<53, 1, u64> negate_b;
+ BitField<54, 1, u64> abs_a;
+ BitField<55, 1, u64> ftz;
+ } const fset{insn};
+
+ const IR::F32 op_a{v.ir.FPAbsNeg(v.F(fset.src_a_reg), fset.abs_a != 0, fset.negate_a != 0)};
+ const IR::F32 op_b = v.ir.FPAbsNeg(src_b, fset.abs_b != 0, fset.negate_b != 0);
+ const IR::FpControl control{
+ .no_contraction = false,
+ .rounding = IR::FpRounding::DontCare,
+ .fmz_mode = (fset.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None),
+ };
+
+ IR::U1 pred{v.ir.GetPred(fset.pred)};
+ if (fset.neg_pred != 0) {
+ pred = v.ir.LogicalNot(pred);
+ }
+ const IR::U1 cmp_result{FloatingPointCompare(v.ir, op_a, op_b, fset.compare_op, control)};
+ const IR::U1 bop_result{PredicateCombine(v.ir, cmp_result, pred, fset.bop)};
+
+ const IR::U32 one_mask{v.ir.Imm32(-1)};
+ const IR::U32 fp_one{v.ir.Imm32(0x3f800000)};
+ const IR::U32 zero{v.ir.Imm32(0)};
+ const IR::U32 pass_result{fset.bf == 0 ? one_mask : fp_one};
+ const IR::U32 result{v.ir.Select(bop_result, pass_result, zero)};
+
+ v.X(fset.dest_reg, result);
+ if (fset.cc != 0) {
+ const IR::U1 is_zero{v.ir.IEqual(result, zero)};
+ v.SetZFlag(is_zero);
+ if (fset.bf != 0) {
+ v.ResetSFlag();
+ } else {
+ v.SetSFlag(v.ir.LogicalNot(is_zero));
+ }
+ v.ResetCFlag();
+ v.ResetOFlag();
+ }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::FSET_reg(u64 insn) {
+ FSET(*this, insn, GetFloatReg20(insn));
+}
+
+void TranslatorVisitor::FSET_cbuf(u64 insn) {
+ FSET(*this, insn, GetFloatCbuf(insn));
+}
+
+void TranslatorVisitor::FSET_imm(u64 insn) {
+ FSET(*this, insn, GetFloatImm20(insn));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp
new file mode 100644
index 000000000..02ab023c1
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp
@@ -0,0 +1,214 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class FloatFormat : u64 {
+ F16 = 1,
+ F32 = 2,
+ F64 = 3,
+};
+
+enum class RoundingOp : u64 {
+ None = 0,
+ Pass = 3,
+ Round = 8,
+ Floor = 9,
+ Ceil = 10,
+ Trunc = 11,
+};
+
+[[nodiscard]] u32 WidthSize(FloatFormat width) {
+ switch (width) {
+ case FloatFormat::F16:
+ return 16;
+ case FloatFormat::F32:
+ return 32;
+ case FloatFormat::F64:
+ return 64;
+ default:
+ throw NotImplementedException("Invalid width {}", width);
+ }
+}
+
+void F2F(TranslatorVisitor& v, u64 insn, const IR::F16F32F64& src_a, bool abs) {
+ union {
+ u64 insn;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<44, 1, u64> ftz;
+ BitField<45, 1, u64> neg;
+ BitField<47, 1, u64> cc;
+ BitField<50, 1, u64> sat;
+ BitField<39, 4, u64> rounding_op;
+ BitField<39, 2, FpRounding> rounding;
+ BitField<10, 2, FloatFormat> src_size;
+ BitField<8, 2, FloatFormat> dst_size;
+
+ [[nodiscard]] RoundingOp RoundingOperation() const {
+ constexpr u64 rounding_mask = 0x0B;
+ return static_cast<RoundingOp>(rounding_op.Value() & rounding_mask);
+ }
+ } const f2f{insn};
+
+ if (f2f.cc != 0) {
+ throw NotImplementedException("F2F CC");
+ }
+
+ IR::F16F32F64 input{v.ir.FPAbsNeg(src_a, abs, f2f.neg != 0)};
+
+ const bool any_fp64{f2f.src_size == FloatFormat::F64 || f2f.dst_size == FloatFormat::F64};
+ IR::FpControl fp_control{
+ .no_contraction = false,
+ .rounding = IR::FpRounding::DontCare,
+ .fmz_mode = (f2f.ftz != 0 && !any_fp64 ? IR::FmzMode::FTZ : IR::FmzMode::None),
+ };
+ if (f2f.src_size != f2f.dst_size) {
+ fp_control.rounding = CastFpRounding(f2f.rounding);
+ input = v.ir.FPConvert(WidthSize(f2f.dst_size), input, fp_control);
+ } else {
+ switch (f2f.RoundingOperation()) {
+ case RoundingOp::None:
+ case RoundingOp::Pass:
+ // Make sure NANs are handled properly
+ switch (f2f.src_size) {
+ case FloatFormat::F16:
+ input = v.ir.FPAdd(input, v.ir.FPConvert(16, v.ir.Imm32(0.0f)), fp_control);
+ break;
+ case FloatFormat::F32:
+ input = v.ir.FPAdd(input, v.ir.Imm32(0.0f), fp_control);
+ break;
+ case FloatFormat::F64:
+ input = v.ir.FPAdd(input, v.ir.Imm64(0.0), fp_control);
+ break;
+ }
+ break;
+ case RoundingOp::Round:
+ input = v.ir.FPRoundEven(input, fp_control);
+ break;
+ case RoundingOp::Floor:
+ input = v.ir.FPFloor(input, fp_control);
+ break;
+ case RoundingOp::Ceil:
+ input = v.ir.FPCeil(input, fp_control);
+ break;
+ case RoundingOp::Trunc:
+ input = v.ir.FPTrunc(input, fp_control);
+ break;
+ default:
+ throw NotImplementedException("Unimplemented rounding mode {}", f2f.rounding.Value());
+ }
+ }
+ if (f2f.sat != 0 && !any_fp64) {
+ input = v.ir.FPSaturate(input);
+ }
+
+ switch (f2f.dst_size) {
+ case FloatFormat::F16: {
+ const IR::F16 imm{v.ir.FPConvert(16, v.ir.Imm32(0.0f))};
+ v.X(f2f.dest_reg, v.ir.PackFloat2x16(v.ir.CompositeConstruct(input, imm)));
+ break;
+ }
+ case FloatFormat::F32:
+ v.F(f2f.dest_reg, input);
+ break;
+ case FloatFormat::F64:
+ v.D(f2f.dest_reg, input);
+ break;
+ default:
+ throw NotImplementedException("Invalid dest format {}", f2f.dst_size.Value());
+ }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::F2F_reg(u64 insn) {
+ union {
+ u64 insn;
+ BitField<49, 1, u64> abs;
+ BitField<10, 2, FloatFormat> src_size;
+ BitField<41, 1, u64> selector;
+ } const f2f{insn};
+
+ IR::F16F32F64 src_a;
+ switch (f2f.src_size) {
+ case FloatFormat::F16: {
+ auto [lhs_a, rhs_a]{Extract(ir, GetReg20(insn), Swizzle::H1_H0)};
+ src_a = f2f.selector != 0 ? rhs_a : lhs_a;
+ break;
+ }
+ case FloatFormat::F32:
+ src_a = GetFloatReg20(insn);
+ break;
+ case FloatFormat::F64:
+ src_a = GetDoubleReg20(insn);
+ break;
+ default:
+ throw NotImplementedException("Invalid dest format {}", f2f.src_size.Value());
+ }
+ F2F(*this, insn, src_a, f2f.abs != 0);
+}
+
+void TranslatorVisitor::F2F_cbuf(u64 insn) {
+ union {
+ u64 insn;
+ BitField<49, 1, u64> abs;
+ BitField<10, 2, FloatFormat> src_size;
+ BitField<41, 1, u64> selector;
+ } const f2f{insn};
+
+ IR::F16F32F64 src_a;
+ switch (f2f.src_size) {
+ case FloatFormat::F16: {
+ auto [lhs_a, rhs_a]{Extract(ir, GetCbuf(insn), Swizzle::H1_H0)};
+ src_a = f2f.selector != 0 ? rhs_a : lhs_a;
+ break;
+ }
+ case FloatFormat::F32:
+ src_a = GetFloatCbuf(insn);
+ break;
+ case FloatFormat::F64:
+ src_a = GetDoubleCbuf(insn);
+ break;
+ default:
+ throw NotImplementedException("Invalid dest format {}", f2f.src_size.Value());
+ }
+ F2F(*this, insn, src_a, f2f.abs != 0);
+}
+
+void TranslatorVisitor::F2F_imm([[maybe_unused]] u64 insn) {
+ union {
+ u64 insn;
+ BitField<49, 1, u64> abs;
+ BitField<10, 2, FloatFormat> src_size;
+ BitField<41, 1, u64> selector;
+ BitField<20, 19, u64> imm;
+ BitField<56, 1, u64> imm_neg;
+ } const f2f{insn};
+
+ IR::F16F32F64 src_a;
+ switch (f2f.src_size) {
+ case FloatFormat::F16: {
+ const u32 imm{static_cast<u32>(f2f.imm & 0x0000ffff)};
+ const IR::Value vector{ir.UnpackFloat2x16(ir.Imm32(imm | (imm << 16)))};
+ src_a = IR::F16{ir.CompositeExtract(vector, f2f.selector != 0 ? 0 : 1)};
+ if (f2f.imm_neg != 0) {
+ throw NotImplementedException("Neg bit on F16");
+ }
+ break;
+ }
+ case FloatFormat::F32:
+ src_a = GetFloatImm20(insn);
+ break;
+ case FloatFormat::F64:
+ src_a = GetDoubleImm20(insn);
+ break;
+ default:
+ throw NotImplementedException("Invalid dest format {}", f2f.src_size.Value());
+ }
+ F2F(*this, insn, src_a, f2f.abs != 0);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp
new file mode 100644
index 000000000..92b1ce015
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp
@@ -0,0 +1,253 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <limits>
+
+#include "common/common_types.h"
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/maxwell/opcodes.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class DestFormat : u64 {
+ Invalid,
+ I16,
+ I32,
+ I64,
+};
+enum class SrcFormat : u64 {
+ Invalid,
+ F16,
+ F32,
+ F64,
+};
+enum class Rounding : u64 {
+ Round,
+ Floor,
+ Ceil,
+ Trunc,
+};
+
+union F2I {
+ u64 raw;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 2, DestFormat> dest_format;
+ BitField<10, 2, SrcFormat> src_format;
+ BitField<12, 1, u64> is_signed;
+ BitField<39, 2, Rounding> rounding;
+ BitField<41, 1, u64> half;
+ BitField<44, 1, u64> ftz;
+ BitField<45, 1, u64> abs;
+ BitField<47, 1, u64> cc;
+ BitField<49, 1, u64> neg;
+};
+
+size_t BitSize(DestFormat dest_format) {
+ switch (dest_format) {
+ case DestFormat::I16:
+ return 16;
+ case DestFormat::I32:
+ return 32;
+ case DestFormat::I64:
+ return 64;
+ default:
+ throw NotImplementedException("Invalid destination format {}", dest_format);
+ }
+}
+
+std::pair<f64, f64> ClampBounds(DestFormat format, bool is_signed) {
+ if (is_signed) {
+ switch (format) {
+ case DestFormat::I16:
+ return {static_cast<f64>(std::numeric_limits<s16>::max()),
+ static_cast<f64>(std::numeric_limits<s16>::min())};
+ case DestFormat::I32:
+ return {static_cast<f64>(std::numeric_limits<s32>::max()),
+ static_cast<f64>(std::numeric_limits<s32>::min())};
+ case DestFormat::I64:
+ return {static_cast<f64>(std::numeric_limits<s64>::max()),
+ static_cast<f64>(std::numeric_limits<s64>::min())};
+ default:
+ break;
+ }
+ } else {
+ switch (format) {
+ case DestFormat::I16:
+ return {static_cast<f64>(std::numeric_limits<u16>::max()),
+ static_cast<f64>(std::numeric_limits<u16>::min())};
+ case DestFormat::I32:
+ return {static_cast<f64>(std::numeric_limits<u32>::max()),
+ static_cast<f64>(std::numeric_limits<u32>::min())};
+ case DestFormat::I64:
+ return {static_cast<f64>(std::numeric_limits<u64>::max()),
+ static_cast<f64>(std::numeric_limits<u64>::min())};
+ default:
+ break;
+ }
+ }
+ throw NotImplementedException("Invalid destination format {}", format);
+}
+
+IR::F64 UnpackCbuf(TranslatorVisitor& v, u64 insn) {
+ union {
+ u64 raw;
+ BitField<20, 14, s64> offset;
+ BitField<34, 5, u64> binding;
+ } const cbuf{insn};
+ if (cbuf.binding >= 18) {
+ throw NotImplementedException("Out of bounds constant buffer binding {}", cbuf.binding);
+ }
+ if (cbuf.offset >= 0x4'000 || cbuf.offset < 0) {
+ throw NotImplementedException("Out of bounds constant buffer offset {}", cbuf.offset * 4);
+ }
+ if (cbuf.offset % 2 != 0) {
+ throw NotImplementedException("Unaligned F64 constant buffer offset {}", cbuf.offset * 4);
+ }
+ const IR::U32 binding{v.ir.Imm32(static_cast<u32>(cbuf.binding))};
+ const IR::U32 byte_offset{v.ir.Imm32(static_cast<u32>(cbuf.offset) * 4 + 4)};
+ const IR::U32 cbuf_data{v.ir.GetCbuf(binding, byte_offset)};
+ const IR::Value vector{v.ir.CompositeConstruct(v.ir.Imm32(0U), cbuf_data)};
+ return v.ir.PackDouble2x32(vector);
+}
+
+void TranslateF2I(TranslatorVisitor& v, u64 insn, const IR::F16F32F64& src_a) {
+ // F2I is used to convert from a floating point value to an integer
+ const F2I f2i{insn};
+
+ const bool denorm_cares{f2i.src_format != SrcFormat::F16 && f2i.src_format != SrcFormat::F64 &&
+ f2i.dest_format != DestFormat::I64};
+ IR::FmzMode fmz_mode{IR::FmzMode::DontCare};
+ if (denorm_cares) {
+ fmz_mode = f2i.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None;
+ }
+ const IR::FpControl fp_control{
+ .no_contraction = true,
+ .rounding = IR::FpRounding::DontCare,
+ .fmz_mode = fmz_mode,
+ };
+ const IR::F16F32F64 op_a{v.ir.FPAbsNeg(src_a, f2i.abs != 0, f2i.neg != 0)};
+ const IR::F16F32F64 rounded_value{[&] {
+ switch (f2i.rounding) {
+ case Rounding::Round:
+ return v.ir.FPRoundEven(op_a, fp_control);
+ case Rounding::Floor:
+ return v.ir.FPFloor(op_a, fp_control);
+ case Rounding::Ceil:
+ return v.ir.FPCeil(op_a, fp_control);
+ case Rounding::Trunc:
+ return v.ir.FPTrunc(op_a, fp_control);
+ default:
+ throw NotImplementedException("Invalid F2I rounding {}", f2i.rounding.Value());
+ }
+ }()};
+ const bool is_signed{f2i.is_signed != 0};
+ const auto [max_bound, min_bound] = ClampBounds(f2i.dest_format, is_signed);
+
+ IR::F16F32F64 intermediate;
+ switch (f2i.src_format) {
+ case SrcFormat::F16: {
+ const IR::F16 max_val{v.ir.FPConvert(16, v.ir.Imm32(static_cast<f32>(max_bound)))};
+ const IR::F16 min_val{v.ir.FPConvert(16, v.ir.Imm32(static_cast<f32>(min_bound)))};
+ intermediate = v.ir.FPClamp(rounded_value, min_val, max_val);
+ break;
+ }
+ case SrcFormat::F32: {
+ const IR::F32 max_val{v.ir.Imm32(static_cast<f32>(max_bound))};
+ const IR::F32 min_val{v.ir.Imm32(static_cast<f32>(min_bound))};
+ intermediate = v.ir.FPClamp(rounded_value, min_val, max_val);
+ break;
+ }
+ case SrcFormat::F64: {
+ const IR::F64 max_val{v.ir.Imm64(max_bound)};
+ const IR::F64 min_val{v.ir.Imm64(min_bound)};
+ intermediate = v.ir.FPClamp(rounded_value, min_val, max_val);
+ break;
+ }
+ default:
+ throw NotImplementedException("Invalid destination format {}", f2i.dest_format.Value());
+ }
+
+ const size_t bitsize{std::max<size_t>(32, BitSize(f2i.dest_format))};
+ IR::U16U32U64 result{v.ir.ConvertFToI(bitsize, is_signed, intermediate)};
+
+ bool handled_special_case = false;
+ const bool special_nan_cases =
+ (f2i.src_format == SrcFormat::F64) != (f2i.dest_format == DestFormat::I64);
+ if (special_nan_cases) {
+ if (f2i.dest_format == DestFormat::I32) {
+ handled_special_case = true;
+ result = IR::U32{v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm32(0x8000'0000U), result)};
+ } else if (f2i.dest_format == DestFormat::I64) {
+ handled_special_case = true;
+ result = IR::U64{
+ v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm64(0x8000'0000'0000'0000UL), result)};
+ }
+ }
+ if (!handled_special_case && is_signed) {
+ if (bitsize != 64) {
+ result = IR::U32{v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm32(0U), result)};
+ } else {
+ result = IR::U64{v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm64(u64{0}), result)};
+ }
+ }
+
+ if (bitsize == 64) {
+ v.L(f2i.dest_reg, result);
+ } else {
+ v.X(f2i.dest_reg, result);
+ }
+
+ if (f2i.cc != 0) {
+ throw NotImplementedException("F2I CC");
+ }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::F2I_reg(u64 insn) {
+ union {
+ u64 raw;
+ F2I base;
+ BitField<20, 8, IR::Reg> src_reg;
+ } const f2i{insn};
+
+ const IR::F16F32F64 op_a{[&]() -> IR::F16F32F64 {
+ switch (f2i.base.src_format) {
+ case SrcFormat::F16:
+ return IR::F16{ir.CompositeExtract(ir.UnpackFloat2x16(X(f2i.src_reg)), f2i.base.half)};
+ case SrcFormat::F32:
+ return F(f2i.src_reg);
+ case SrcFormat::F64:
+ return ir.PackDouble2x32(ir.CompositeConstruct(X(f2i.src_reg), X(f2i.src_reg + 1)));
+ default:
+ throw NotImplementedException("Invalid F2I source format {}",
+ f2i.base.src_format.Value());
+ }
+ }()};
+ TranslateF2I(*this, insn, op_a);
+}
+
+void TranslatorVisitor::F2I_cbuf(u64 insn) {
+ const F2I f2i{insn};
+ const IR::F16F32F64 op_a{[&]() -> IR::F16F32F64 {
+ switch (f2i.src_format) {
+ case SrcFormat::F16:
+ return IR::F16{ir.CompositeExtract(ir.UnpackFloat2x16(GetCbuf(insn)), f2i.half)};
+ case SrcFormat::F32:
+ return GetFloatCbuf(insn);
+ case SrcFormat::F64: {
+ return UnpackCbuf(*this, insn);
+ }
+ default:
+ throw NotImplementedException("Invalid F2I source format {}", f2i.src_format.Value());
+ }
+ }()};
+ TranslateF2I(*this, insn, op_a);
+}
+
+void TranslatorVisitor::F2I_imm(u64) {
+ throw NotImplementedException("{}", Opcode::F2I_imm);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp
new file mode 100644
index 000000000..fa2a7807b
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp
@@ -0,0 +1,94 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/common_types.h"
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+void FFMA(TranslatorVisitor& v, u64 insn, const IR::F32& src_b, const IR::F32& src_c, bool neg_a,
+ bool neg_b, bool neg_c, bool sat, bool cc, FmzMode fmz_mode, FpRounding fp_rounding) {
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> src_a;
+ } const ffma{insn};
+
+ if (cc) {
+ throw NotImplementedException("FFMA CC");
+ }
+ const IR::F32 op_a{v.ir.FPAbsNeg(v.F(ffma.src_a), false, neg_a)};
+ const IR::F32 op_b{v.ir.FPAbsNeg(src_b, false, neg_b)};
+ const IR::F32 op_c{v.ir.FPAbsNeg(src_c, false, neg_c)};
+ const IR::FpControl fp_control{
+ .no_contraction = true,
+ .rounding = CastFpRounding(fp_rounding),
+ .fmz_mode = CastFmzMode(fmz_mode),
+ };
+ IR::F32 value{v.ir.FPFma(op_a, op_b, op_c, fp_control)};
+ if (fmz_mode == FmzMode::FMZ && !sat) {
+ // Do not implement FMZ if SAT is enabled, as it does the logic for us.
+ // On D3D9 mode, anything * 0 is zero, even NAN and infinity
+ const IR::F32 zero{v.ir.Imm32(0.0f)};
+ const IR::U1 zero_a{v.ir.FPEqual(op_a, zero)};
+ const IR::U1 zero_b{v.ir.FPEqual(op_b, zero)};
+ const IR::U1 any_zero{v.ir.LogicalOr(zero_a, zero_b)};
+ value = IR::F32{v.ir.Select(any_zero, op_c, value)};
+ }
+ if (sat) {
+ value = v.ir.FPSaturate(value);
+ }
+ v.F(ffma.dest_reg, value);
+}
+
+void FFMA(TranslatorVisitor& v, u64 insn, const IR::F32& src_b, const IR::F32& src_c) {
+ union {
+ u64 raw;
+ BitField<47, 1, u64> cc;
+ BitField<48, 1, u64> neg_b;
+ BitField<49, 1, u64> neg_c;
+ BitField<50, 1, u64> sat;
+ BitField<51, 2, FpRounding> fp_rounding;
+ BitField<53, 2, FmzMode> fmz_mode;
+ } const ffma{insn};
+
+ FFMA(v, insn, src_b, src_c, false, ffma.neg_b != 0, ffma.neg_c != 0, ffma.sat != 0,
+ ffma.cc != 0, ffma.fmz_mode, ffma.fp_rounding);
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::FFMA_reg(u64 insn) {
+ FFMA(*this, insn, GetFloatReg20(insn), GetFloatReg39(insn));
+}
+
+void TranslatorVisitor::FFMA_rc(u64 insn) {
+ FFMA(*this, insn, GetFloatReg39(insn), GetFloatCbuf(insn));
+}
+
+void TranslatorVisitor::FFMA_cr(u64 insn) {
+ FFMA(*this, insn, GetFloatCbuf(insn), GetFloatReg39(insn));
+}
+
+void TranslatorVisitor::FFMA_imm(u64 insn) {
+ FFMA(*this, insn, GetFloatImm20(insn), GetFloatReg39(insn));
+}
+
+void TranslatorVisitor::FFMA32I(u64 insn) {
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> src_c; // FFMA32I mirrors the destination and addition register
+ BitField<52, 1, u64> cc;
+ BitField<53, 2, FmzMode> fmz_mode;
+ BitField<55, 1, u64> sat;
+ BitField<56, 1, u64> neg_a;
+ BitField<57, 1, u64> neg_c;
+ } const ffma32i{insn};
+
+ FFMA(*this, insn, GetFloatImm32(insn), F(ffma32i.src_c), ffma32i.neg_a != 0, false,
+ ffma32i.neg_c != 0, ffma32i.sat != 0, ffma32i.cc != 0, ffma32i.fmz_mode, FpRounding::RN);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_min_max.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_min_max.cpp
new file mode 100644
index 000000000..c0d6ee5af
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_min_max.cpp
@@ -0,0 +1,62 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+void FMNMX(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) {
+ union {
+ u64 insn;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> src_a_reg;
+ BitField<39, 3, IR::Pred> pred;
+ BitField<42, 1, u64> neg_pred;
+ BitField<44, 1, u64> ftz;
+ BitField<45, 1, u64> negate_b;
+ BitField<46, 1, u64> abs_a;
+ BitField<47, 1, u64> cc;
+ BitField<48, 1, u64> negate_a;
+ BitField<49, 1, u64> abs_b;
+ } const fmnmx{insn};
+
+ if (fmnmx.cc) {
+ throw NotImplementedException("FMNMX CC");
+ }
+
+ const IR::U1 pred{v.ir.GetPred(fmnmx.pred)};
+ const IR::F32 op_a{v.ir.FPAbsNeg(v.F(fmnmx.src_a_reg), fmnmx.abs_a != 0, fmnmx.negate_a != 0)};
+ const IR::F32 op_b{v.ir.FPAbsNeg(src_b, fmnmx.abs_b != 0, fmnmx.negate_b != 0)};
+
+ const IR::FpControl control{
+ .no_contraction = false,
+ .rounding = IR::FpRounding::DontCare,
+ .fmz_mode = (fmnmx.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None),
+ };
+ IR::F32 max{v.ir.FPMax(op_a, op_b, control)};
+ IR::F32 min{v.ir.FPMin(op_a, op_b, control)};
+
+ if (fmnmx.neg_pred != 0) {
+ std::swap(min, max);
+ }
+
+ v.F(fmnmx.dest_reg, IR::F32{v.ir.Select(pred, min, max)});
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::FMNMX_reg(u64 insn) {
+ FMNMX(*this, insn, GetFloatReg20(insn));
+}
+
+void TranslatorVisitor::FMNMX_cbuf(u64 insn) {
+ FMNMX(*this, insn, GetFloatCbuf(insn));
+}
+
+void TranslatorVisitor::FMNMX_imm(u64 insn) {
+ FMNMX(*this, insn, GetFloatImm20(insn));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multi_function.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multi_function.cpp
new file mode 100644
index 000000000..2f8605619
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multi_function.cpp
@@ -0,0 +1,71 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/maxwell/opcodes.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class Operation : u64 {
+ Cos = 0,
+ Sin = 1,
+ Ex2 = 2, // Base 2 exponent
+ Lg2 = 3, // Base 2 logarithm
+ Rcp = 4, // Reciprocal
+ Rsq = 5, // Reciprocal square root
+ Rcp64H = 6, // 64-bit reciprocal
+ Rsq64H = 7, // 64-bit reciprocal square root
+ Sqrt = 8,
+};
+} // Anonymous namespace
+
+void TranslatorVisitor::MUFU(u64 insn) {
+ // MUFU is used to implement a bunch of special functions. See Operation.
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> src_reg;
+ BitField<20, 4, Operation> operation;
+ BitField<46, 1, u64> abs;
+ BitField<48, 1, u64> neg;
+ BitField<50, 1, u64> sat;
+ } const mufu{insn};
+
+ const IR::F32 op_a{ir.FPAbsNeg(F(mufu.src_reg), mufu.abs != 0, mufu.neg != 0)};
+ IR::F32 value{[&]() -> IR::F32 {
+ switch (mufu.operation) {
+ case Operation::Cos:
+ return ir.FPCos(op_a);
+ case Operation::Sin:
+ return ir.FPSin(op_a);
+ case Operation::Ex2:
+ return ir.FPExp2(op_a);
+ case Operation::Lg2:
+ return ir.FPLog2(op_a);
+ case Operation::Rcp:
+ return ir.FPRecip(op_a);
+ case Operation::Rsq:
+ return ir.FPRecipSqrt(op_a);
+ case Operation::Rcp64H:
+ throw NotImplementedException("MUFU.RCP64H");
+ case Operation::Rsq64H:
+ throw NotImplementedException("MUFU.RSQ64H");
+ case Operation::Sqrt:
+ return ir.FPSqrt(op_a);
+ default:
+ throw NotImplementedException("Invalid MUFU operation {}", mufu.operation.Value());
+ }
+ }()};
+
+ if (mufu.sat) {
+ value = ir.FPSaturate(value);
+ }
+
+ F(mufu.dest_reg, value);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp
new file mode 100644
index 000000000..06226b7ce
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp
@@ -0,0 +1,127 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/ir/ir_emitter.h"
+#include "shader_recompiler/frontend/ir/modifiers.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class Scale : u64 {
+ None,
+ D2,
+ D4,
+ D8,
+ M8,
+ M4,
+ M2,
+ INVALIDSCALE37,
+};
+
+float ScaleFactor(Scale scale) {
+ switch (scale) {
+ case Scale::None:
+ return 1.0f;
+ case Scale::D2:
+ return 1.0f / 2.0f;
+ case Scale::D4:
+ return 1.0f / 4.0f;
+ case Scale::D8:
+ return 1.0f / 8.0f;
+ case Scale::M8:
+ return 8.0f;
+ case Scale::M4:
+ return 4.0f;
+ case Scale::M2:
+ return 2.0f;
+ case Scale::INVALIDSCALE37:
+ break;
+ }
+ throw NotImplementedException("Invalid FMUL scale {}", scale);
+}
+
+void FMUL(TranslatorVisitor& v, u64 insn, const IR::F32& src_b, FmzMode fmz_mode,
+ FpRounding fp_rounding, Scale scale, bool sat, bool cc, bool neg_b) {
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> src_a;
+ } const fmul{insn};
+
+ if (cc) {
+ throw NotImplementedException("FMUL CC");
+ }
+ IR::F32 op_a{v.F(fmul.src_a)};
+ if (scale != Scale::None) {
+ if (fmz_mode != FmzMode::FTZ || fp_rounding != FpRounding::RN) {
+ throw NotImplementedException("FMUL scale with non-FMZ or non-RN modifiers");
+ }
+ op_a = v.ir.FPMul(op_a, v.ir.Imm32(ScaleFactor(scale)));
+ }
+ const IR::F32 op_b{v.ir.FPAbsNeg(src_b, false, neg_b)};
+ const IR::FpControl fp_control{
+ .no_contraction = true,
+ .rounding = CastFpRounding(fp_rounding),
+ .fmz_mode = CastFmzMode(fmz_mode),
+ };
+ IR::F32 value{v.ir.FPMul(op_a, op_b, fp_control)};
+ if (fmz_mode == FmzMode::FMZ && !sat) {
+ // Do not implement FMZ if SAT is enabled, as it does the logic for us.
+ // On D3D9 mode, anything * 0 is zero, even NAN and infinity
+ const IR::F32 zero{v.ir.Imm32(0.0f)};
+ const IR::U1 zero_a{v.ir.FPEqual(op_a, zero)};
+ const IR::U1 zero_b{v.ir.FPEqual(op_b, zero)};
+ const IR::U1 any_zero{v.ir.LogicalOr(zero_a, zero_b)};
+ value = IR::F32{v.ir.Select(any_zero, zero, value)};
+ }
+ if (sat) {
+ value = v.ir.FPSaturate(value);
+ }
+ v.F(fmul.dest_reg, value);
+}
+
+void FMUL(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) {
+ union {
+ u64 raw;
+ BitField<39, 2, FpRounding> fp_rounding;
+ BitField<41, 3, Scale> scale;
+ BitField<44, 2, FmzMode> fmz;
+ BitField<47, 1, u64> cc;
+ BitField<48, 1, u64> neg_b;
+ BitField<50, 1, u64> sat;
+ } const fmul{insn};
+
+ FMUL(v, insn, src_b, fmul.fmz, fmul.fp_rounding, fmul.scale, fmul.sat != 0, fmul.cc != 0,
+ fmul.neg_b != 0);
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::FMUL_reg(u64 insn) {
+ return FMUL(*this, insn, GetFloatReg20(insn));
+}
+
+void TranslatorVisitor::FMUL_cbuf(u64 insn) {
+ return FMUL(*this, insn, GetFloatCbuf(insn));
+}
+
+void TranslatorVisitor::FMUL_imm(u64 insn) {
+ return FMUL(*this, insn, GetFloatImm20(insn));
+}
+
+void TranslatorVisitor::FMUL32I(u64 insn) {
+ union {
+ u64 raw;
+ BitField<52, 1, u64> cc;
+ BitField<53, 2, FmzMode> fmz;
+ BitField<55, 1, u64> sat;
+ } const fmul32i{insn};
+
+ FMUL(*this, insn, GetFloatImm32(insn), fmul32i.fmz, FpRounding::RN, Scale::None,
+ fmul32i.sat != 0, fmul32i.cc != 0, false);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_range_reduction.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_range_reduction.cpp
new file mode 100644
index 000000000..f91b93fad
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_range_reduction.cpp
@@ -0,0 +1,41 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class Mode : u64 {
+ SINCOS,
+ EX2,
+};
+
+void RRO(TranslatorVisitor& v, u64 insn, const IR::F32& src) {
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<39, 1, Mode> mode;
+ BitField<45, 1, u64> neg;
+ BitField<49, 1, u64> abs;
+ } const rro{insn};
+
+ v.F(rro.dest_reg, v.ir.FPAbsNeg(src, rro.abs != 0, rro.neg != 0));
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::RRO_reg(u64 insn) {
+ RRO(*this, insn, GetFloatReg20(insn));
+}
+
+void TranslatorVisitor::RRO_cbuf(u64 insn) {
+ RRO(*this, insn, GetFloatCbuf(insn));
+}
+
+void TranslatorVisitor::RRO_imm(u64) {
+ throw NotImplementedException("RRO (imm)");
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_set_predicate.cpp
new file mode 100644
index 000000000..5f93a1513
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_set_predicate.cpp
@@ -0,0 +1,60 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+void FSETP(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) {
+ union {
+ u64 insn;
+ BitField<0, 3, IR::Pred> dest_pred_b;
+ BitField<3, 3, IR::Pred> dest_pred_a;
+ BitField<6, 1, u64> negate_b;
+ BitField<7, 1, u64> abs_a;
+ BitField<8, 8, IR::Reg> src_a_reg;
+ BitField<39, 3, IR::Pred> bop_pred;
+ BitField<42, 1, u64> neg_bop_pred;
+ BitField<43, 1, u64> negate_a;
+ BitField<44, 1, u64> abs_b;
+ BitField<45, 2, BooleanOp> bop;
+ BitField<47, 1, u64> ftz;
+ BitField<48, 4, FPCompareOp> compare_op;
+ } const fsetp{insn};
+
+ const IR::F32 op_a{v.ir.FPAbsNeg(v.F(fsetp.src_a_reg), fsetp.abs_a != 0, fsetp.negate_a != 0)};
+ const IR::F32 op_b = v.ir.FPAbsNeg(src_b, fsetp.abs_b != 0, fsetp.negate_b != 0);
+ const IR::FpControl control{
+ .no_contraction = false,
+ .rounding = IR::FpRounding::DontCare,
+ .fmz_mode = (fsetp.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None),
+ };
+
+ const BooleanOp bop{fsetp.bop};
+ const FPCompareOp compare_op{fsetp.compare_op};
+ const IR::U1 comparison{FloatingPointCompare(v.ir, op_a, op_b, compare_op, control)};
+ const IR::U1 bop_pred{v.ir.GetPred(fsetp.bop_pred, fsetp.neg_bop_pred != 0)};
+ const IR::U1 result_a{PredicateCombine(v.ir, comparison, bop_pred, bop)};
+ const IR::U1 result_b{PredicateCombine(v.ir, v.ir.LogicalNot(comparison), bop_pred, bop)};
+ v.ir.SetPred(fsetp.dest_pred_a, result_a);
+ v.ir.SetPred(fsetp.dest_pred_b, result_b);
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::FSETP_reg(u64 insn) {
+ FSETP(*this, insn, GetFloatReg20(insn));
+}
+
+void TranslatorVisitor::FSETP_cbuf(u64 insn) {
+ FSETP(*this, insn, GetFloatCbuf(insn));
+}
+
+void TranslatorVisitor::FSETP_imm(u64 insn) {
+ FSETP(*this, insn, GetFloatImm20(insn));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_swizzled_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_swizzled_add.cpp
new file mode 100644
index 000000000..7550a8d4c
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_swizzled_add.cpp
@@ -0,0 +1,44 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/common_types.h"
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+void TranslatorVisitor::FSWZADD(u64 insn) {
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<28, 8, u64> swizzle;
+ BitField<38, 1, u64> ndv;
+ BitField<39, 2, FpRounding> round;
+ BitField<44, 1, u64> ftz;
+ BitField<47, 1, u64> cc;
+ } const fswzadd{insn};
+
+ if (fswzadd.ndv != 0) {
+ throw NotImplementedException("FSWZADD NDV");
+ }
+
+ const IR::F32 src_a{GetFloatReg8(insn)};
+ const IR::F32 src_b{GetFloatReg20(insn)};
+ const IR::U32 swizzle{ir.Imm32(static_cast<u32>(fswzadd.swizzle))};
+
+ const IR::FpControl fp_control{
+ .no_contraction = false,
+ .rounding = CastFpRounding(fswzadd.round),
+ .fmz_mode = (fswzadd.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None),
+ };
+
+ const IR::F32 result{ir.FSwizzleAdd(src_a, src_b, swizzle, fp_control)};
+ F(fswzadd.dest_reg, result);
+
+ if (fswzadd.cc != 0) {
+ throw NotImplementedException("FSWZADD CC");
+ }
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp
new file mode 100644
index 000000000..f2738a93b
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp
@@ -0,0 +1,125 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h"
+
+namespace Shader::Maxwell {
+namespace {
+void HADD2(TranslatorVisitor& v, u64 insn, Merge merge, bool ftz, bool sat, bool abs_a, bool neg_a,
+ Swizzle swizzle_a, bool abs_b, bool neg_b, Swizzle swizzle_b, const IR::U32& src_b) {
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> src_a;
+ } const hadd2{insn};
+
+ auto [lhs_a, rhs_a]{Extract(v.ir, v.X(hadd2.src_a), swizzle_a)};
+ auto [lhs_b, rhs_b]{Extract(v.ir, src_b, swizzle_b)};
+ const bool promotion{lhs_a.Type() != lhs_b.Type()};
+ if (promotion) {
+ if (lhs_a.Type() == IR::Type::F16) {
+ lhs_a = v.ir.FPConvert(32, lhs_a);
+ rhs_a = v.ir.FPConvert(32, rhs_a);
+ }
+ if (lhs_b.Type() == IR::Type::F16) {
+ lhs_b = v.ir.FPConvert(32, lhs_b);
+ rhs_b = v.ir.FPConvert(32, rhs_b);
+ }
+ }
+ lhs_a = v.ir.FPAbsNeg(lhs_a, abs_a, neg_a);
+ rhs_a = v.ir.FPAbsNeg(rhs_a, abs_a, neg_a);
+
+ lhs_b = v.ir.FPAbsNeg(lhs_b, abs_b, neg_b);
+ rhs_b = v.ir.FPAbsNeg(rhs_b, abs_b, neg_b);
+
+ const IR::FpControl fp_control{
+ .no_contraction = true,
+ .rounding = IR::FpRounding::DontCare,
+ .fmz_mode = (ftz ? IR::FmzMode::FTZ : IR::FmzMode::None),
+ };
+ IR::F16F32F64 lhs{v.ir.FPAdd(lhs_a, lhs_b, fp_control)};
+ IR::F16F32F64 rhs{v.ir.FPAdd(rhs_a, rhs_b, fp_control)};
+ if (sat) {
+ lhs = v.ir.FPSaturate(lhs);
+ rhs = v.ir.FPSaturate(rhs);
+ }
+ if (promotion) {
+ lhs = v.ir.FPConvert(16, lhs);
+ rhs = v.ir.FPConvert(16, rhs);
+ }
+ v.X(hadd2.dest_reg, MergeResult(v.ir, hadd2.dest_reg, lhs, rhs, merge));
+}
+
+void HADD2(TranslatorVisitor& v, u64 insn, bool sat, bool abs_b, bool neg_b, Swizzle swizzle_b,
+ const IR::U32& src_b) {
+ union {
+ u64 raw;
+ BitField<49, 2, Merge> merge;
+ BitField<39, 1, u64> ftz;
+ BitField<43, 1, u64> neg_a;
+ BitField<44, 1, u64> abs_a;
+ BitField<47, 2, Swizzle> swizzle_a;
+ } const hadd2{insn};
+
+ HADD2(v, insn, hadd2.merge, hadd2.ftz != 0, sat, hadd2.abs_a != 0, hadd2.neg_a != 0,
+ hadd2.swizzle_a, abs_b, neg_b, swizzle_b, src_b);
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::HADD2_reg(u64 insn) {
+ union {
+ u64 raw;
+ BitField<32, 1, u64> sat;
+ BitField<31, 1, u64> neg_b;
+ BitField<30, 1, u64> abs_b;
+ BitField<28, 2, Swizzle> swizzle_b;
+ } const hadd2{insn};
+
+ HADD2(*this, insn, hadd2.sat != 0, hadd2.abs_b != 0, hadd2.neg_b != 0, hadd2.swizzle_b,
+ GetReg20(insn));
+}
+
+void TranslatorVisitor::HADD2_cbuf(u64 insn) {
+ union {
+ u64 raw;
+ BitField<52, 1, u64> sat;
+ BitField<56, 1, u64> neg_b;
+ BitField<54, 1, u64> abs_b;
+ } const hadd2{insn};
+
+ HADD2(*this, insn, hadd2.sat != 0, hadd2.abs_b != 0, hadd2.neg_b != 0, Swizzle::F32,
+ GetCbuf(insn));
+}
+
+void TranslatorVisitor::HADD2_imm(u64 insn) {
+ union {
+ u64 raw;
+ BitField<52, 1, u64> sat;
+ BitField<56, 1, u64> neg_high;
+ BitField<30, 9, u64> high;
+ BitField<29, 1, u64> neg_low;
+ BitField<20, 9, u64> low;
+ } const hadd2{insn};
+
+ const u32 imm{
+ static_cast<u32>(hadd2.low << 6) | static_cast<u32>((hadd2.neg_low != 0 ? 1 : 0) << 15) |
+ static_cast<u32>(hadd2.high << 22) | static_cast<u32>((hadd2.neg_high != 0 ? 1 : 0) << 31)};
+ HADD2(*this, insn, hadd2.sat != 0, false, false, Swizzle::H1_H0, ir.Imm32(imm));
+}
+
+void TranslatorVisitor::HADD2_32I(u64 insn) {
+ union {
+ u64 raw;
+ BitField<55, 1, u64> ftz;
+ BitField<52, 1, u64> sat;
+ BitField<56, 1, u64> neg_a;
+ BitField<53, 2, Swizzle> swizzle_a;
+ BitField<20, 32, u64> imm32;
+ } const hadd2{insn};
+
+ const u32 imm{static_cast<u32>(hadd2.imm32)};
+ HADD2(*this, insn, Merge::H1_H0, hadd2.ftz != 0, hadd2.sat != 0, false, hadd2.neg_a != 0,
+ hadd2.swizzle_a, false, false, Swizzle::H1_H0, ir.Imm32(imm));
+}
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp
new file mode 100644
index 000000000..fd7986701
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp
@@ -0,0 +1,169 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h"
+
+namespace Shader::Maxwell {
+namespace {
+void HFMA2(TranslatorVisitor& v, u64 insn, Merge merge, Swizzle swizzle_a, bool neg_b, bool neg_c,
+ Swizzle swizzle_b, Swizzle swizzle_c, const IR::U32& src_b, const IR::U32& src_c,
+ bool sat, HalfPrecision precision) {
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> src_a;
+ } const hfma2{insn};
+
+ auto [lhs_a, rhs_a]{Extract(v.ir, v.X(hfma2.src_a), swizzle_a)};
+ auto [lhs_b, rhs_b]{Extract(v.ir, src_b, swizzle_b)};
+ auto [lhs_c, rhs_c]{Extract(v.ir, src_c, swizzle_c)};
+ const bool promotion{lhs_a.Type() != lhs_b.Type() || lhs_a.Type() != lhs_c.Type()};
+ if (promotion) {
+ if (lhs_a.Type() == IR::Type::F16) {
+ lhs_a = v.ir.FPConvert(32, lhs_a);
+ rhs_a = v.ir.FPConvert(32, rhs_a);
+ }
+ if (lhs_b.Type() == IR::Type::F16) {
+ lhs_b = v.ir.FPConvert(32, lhs_b);
+ rhs_b = v.ir.FPConvert(32, rhs_b);
+ }
+ if (lhs_c.Type() == IR::Type::F16) {
+ lhs_c = v.ir.FPConvert(32, lhs_c);
+ rhs_c = v.ir.FPConvert(32, rhs_c);
+ }
+ }
+
+ lhs_b = v.ir.FPAbsNeg(lhs_b, false, neg_b);
+ rhs_b = v.ir.FPAbsNeg(rhs_b, false, neg_b);
+
+ lhs_c = v.ir.FPAbsNeg(lhs_c, false, neg_c);
+ rhs_c = v.ir.FPAbsNeg(rhs_c, false, neg_c);
+
+ const IR::FpControl fp_control{
+ .no_contraction = true,
+ .rounding = IR::FpRounding::DontCare,
+ .fmz_mode = HalfPrecision2FmzMode(precision),
+ };
+ IR::F16F32F64 lhs{v.ir.FPFma(lhs_a, lhs_b, lhs_c, fp_control)};
+ IR::F16F32F64 rhs{v.ir.FPFma(rhs_a, rhs_b, rhs_c, fp_control)};
+ if (precision == HalfPrecision::FMZ && !sat) {
+ // Do not implement FMZ if SAT is enabled, as it does the logic for us.
+ // On D3D9 mode, anything * 0 is zero, even NAN and infinity
+ const IR::F32 zero{v.ir.Imm32(0.0f)};
+ const IR::U1 lhs_zero_a{v.ir.FPEqual(lhs_a, zero)};
+ const IR::U1 lhs_zero_b{v.ir.FPEqual(lhs_b, zero)};
+ const IR::U1 lhs_any_zero{v.ir.LogicalOr(lhs_zero_a, lhs_zero_b)};
+ lhs = IR::F16F32F64{v.ir.Select(lhs_any_zero, lhs_c, lhs)};
+
+ const IR::U1 rhs_zero_a{v.ir.FPEqual(rhs_a, zero)};
+ const IR::U1 rhs_zero_b{v.ir.FPEqual(rhs_b, zero)};
+ const IR::U1 rhs_any_zero{v.ir.LogicalOr(rhs_zero_a, rhs_zero_b)};
+ rhs = IR::F16F32F64{v.ir.Select(rhs_any_zero, rhs_c, rhs)};
+ }
+ if (sat) {
+ lhs = v.ir.FPSaturate(lhs);
+ rhs = v.ir.FPSaturate(rhs);
+ }
+ if (promotion) {
+ lhs = v.ir.FPConvert(16, lhs);
+ rhs = v.ir.FPConvert(16, rhs);
+ }
+ v.X(hfma2.dest_reg, MergeResult(v.ir, hfma2.dest_reg, lhs, rhs, merge));
+}
+
+void HFMA2(TranslatorVisitor& v, u64 insn, bool neg_b, bool neg_c, Swizzle swizzle_b,
+ Swizzle swizzle_c, const IR::U32& src_b, const IR::U32& src_c, bool sat,
+ HalfPrecision precision) {
+ union {
+ u64 raw;
+ BitField<47, 2, Swizzle> swizzle_a;
+ BitField<49, 2, Merge> merge;
+ } const hfma2{insn};
+
+ HFMA2(v, insn, hfma2.merge, hfma2.swizzle_a, neg_b, neg_c, swizzle_b, swizzle_c, src_b, src_c,
+ sat, precision);
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::HFMA2_reg(u64 insn) {
+ union {
+ u64 raw;
+ BitField<28, 2, Swizzle> swizzle_b;
+ BitField<32, 1, u64> saturate;
+ BitField<31, 1, u64> neg_b;
+ BitField<30, 1, u64> neg_c;
+ BitField<35, 2, Swizzle> swizzle_c;
+ BitField<37, 2, HalfPrecision> precision;
+ } const hfma2{insn};
+
+ HFMA2(*this, insn, hfma2.neg_b != 0, hfma2.neg_c != 0, hfma2.swizzle_b, hfma2.swizzle_c,
+ GetReg20(insn), GetReg39(insn), hfma2.saturate != 0, hfma2.precision);
+}
+
+void TranslatorVisitor::HFMA2_rc(u64 insn) {
+ union {
+ u64 raw;
+ BitField<51, 1, u64> neg_c;
+ BitField<52, 1, u64> saturate;
+ BitField<53, 2, Swizzle> swizzle_b;
+ BitField<56, 1, u64> neg_b;
+ BitField<57, 2, HalfPrecision> precision;
+ } const hfma2{insn};
+
+ HFMA2(*this, insn, hfma2.neg_b != 0, hfma2.neg_c != 0, hfma2.swizzle_b, Swizzle::F32,
+ GetReg39(insn), GetCbuf(insn), hfma2.saturate != 0, hfma2.precision);
+}
+
+void TranslatorVisitor::HFMA2_cr(u64 insn) {
+ union {
+ u64 raw;
+ BitField<51, 1, u64> neg_c;
+ BitField<52, 1, u64> saturate;
+ BitField<53, 2, Swizzle> swizzle_c;
+ BitField<56, 1, u64> neg_b;
+ BitField<57, 2, HalfPrecision> precision;
+ } const hfma2{insn};
+
+ HFMA2(*this, insn, hfma2.neg_b != 0, hfma2.neg_c != 0, Swizzle::F32, hfma2.swizzle_c,
+ GetCbuf(insn), GetReg39(insn), hfma2.saturate != 0, hfma2.precision);
+}
+
+void TranslatorVisitor::HFMA2_imm(u64 insn) {
+ union {
+ u64 raw;
+ BitField<51, 1, u64> neg_c;
+ BitField<52, 1, u64> saturate;
+ BitField<53, 2, Swizzle> swizzle_c;
+
+ BitField<56, 1, u64> neg_high;
+ BitField<30, 9, u64> high;
+ BitField<29, 1, u64> neg_low;
+ BitField<20, 9, u64> low;
+ BitField<57, 2, HalfPrecision> precision;
+ } const hfma2{insn};
+
+ const u32 imm{
+ static_cast<u32>(hfma2.low << 6) | static_cast<u32>((hfma2.neg_low != 0 ? 1 : 0) << 15) |
+ static_cast<u32>(hfma2.high << 22) | static_cast<u32>((hfma2.neg_high != 0 ? 1 : 0) << 31)};
+
+ HFMA2(*this, insn, false, hfma2.neg_c != 0, Swizzle::H1_H0, hfma2.swizzle_c, ir.Imm32(imm),
+ GetReg39(insn), hfma2.saturate != 0, hfma2.precision);
+}
+
+void TranslatorVisitor::HFMA2_32I(u64 insn) {
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> src_c;
+ BitField<20, 32, u64> imm32;
+ BitField<52, 1, u64> neg_c;
+ BitField<53, 2, Swizzle> swizzle_a;
+ BitField<55, 2, HalfPrecision> precision;
+ } const hfma2{insn};
+
+ const u32 imm{static_cast<u32>(hfma2.imm32)};
+ HFMA2(*this, insn, Merge::H1_H0, hfma2.swizzle_a, false, hfma2.neg_c != 0, Swizzle::H1_H0,
+ Swizzle::H1_H0, ir.Imm32(imm), X(hfma2.src_c), false, hfma2.precision);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.cpp
new file mode 100644
index 000000000..0dbeb7f56
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.cpp
@@ -0,0 +1,62 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h"
+
+namespace Shader::Maxwell {
+
+IR::FmzMode HalfPrecision2FmzMode(HalfPrecision precision) {
+ switch (precision) {
+ case HalfPrecision::None:
+ return IR::FmzMode::None;
+ case HalfPrecision::FTZ:
+ return IR::FmzMode::FTZ;
+ case HalfPrecision::FMZ:
+ return IR::FmzMode::FMZ;
+ default:
+ return IR::FmzMode::DontCare;
+ }
+}
+
+std::pair<IR::F16F32F64, IR::F16F32F64> Extract(IR::IREmitter& ir, IR::U32 value, Swizzle swizzle) {
+ switch (swizzle) {
+ case Swizzle::H1_H0: {
+ const IR::Value vector{ir.UnpackFloat2x16(value)};
+ return {IR::F16{ir.CompositeExtract(vector, 0)}, IR::F16{ir.CompositeExtract(vector, 1)}};
+ }
+ case Swizzle::H0_H0: {
+ const IR::F16 scalar{ir.CompositeExtract(ir.UnpackFloat2x16(value), 0)};
+ return {scalar, scalar};
+ }
+ case Swizzle::H1_H1: {
+ const IR::F16 scalar{ir.CompositeExtract(ir.UnpackFloat2x16(value), 1)};
+ return {scalar, scalar};
+ }
+ case Swizzle::F32: {
+ const IR::F32 scalar{ir.BitCast<IR::F32>(value)};
+ return {scalar, scalar};
+ }
+ }
+ throw InvalidArgument("Invalid swizzle {}", swizzle);
+}
+
+IR::U32 MergeResult(IR::IREmitter& ir, IR::Reg dest, const IR::F16& lhs, const IR::F16& rhs,
+ Merge merge) {
+ switch (merge) {
+ case Merge::H1_H0:
+ return ir.PackFloat2x16(ir.CompositeConstruct(lhs, rhs));
+ case Merge::F32:
+ return ir.BitCast<IR::U32, IR::F32>(ir.FPConvert(32, lhs));
+ case Merge::MRG_H0:
+ case Merge::MRG_H1: {
+ const IR::Value vector{ir.UnpackFloat2x16(ir.GetReg(dest))};
+ const bool is_h0{merge == Merge::MRG_H0};
+ const IR::F16 insert{ir.FPConvert(16, is_h0 ? lhs : rhs)};
+ return ir.PackFloat2x16(ir.CompositeInsert(vector, insert, is_h0 ? 0 : 1));
+ }
+ }
+ throw InvalidArgument("Invalid merge {}", merge);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h
new file mode 100644
index 000000000..59da56a7e
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h
@@ -0,0 +1,42 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "common/common_types.h"
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+
+enum class Merge : u64 {
+ H1_H0,
+ F32,
+ MRG_H0,
+ MRG_H1,
+};
+
+enum class Swizzle : u64 {
+ H1_H0,
+ F32,
+ H0_H0,
+ H1_H1,
+};
+
+enum class HalfPrecision : u64 {
+ None = 0,
+ FTZ = 1,
+ FMZ = 2,
+};
+
+IR::FmzMode HalfPrecision2FmzMode(HalfPrecision precision);
+
+std::pair<IR::F16F32F64, IR::F16F32F64> Extract(IR::IREmitter& ir, IR::U32 value, Swizzle swizzle);
+
+IR::U32 MergeResult(IR::IREmitter& ir, IR::Reg dest, const IR::F16& lhs, const IR::F16& rhs,
+ Merge merge);
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_multiply.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_multiply.cpp
new file mode 100644
index 000000000..3f548ce76
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_multiply.cpp
@@ -0,0 +1,143 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h"
+
+namespace Shader::Maxwell {
+namespace {
+void HMUL2(TranslatorVisitor& v, u64 insn, Merge merge, bool sat, bool abs_a, bool neg_a,
+ Swizzle swizzle_a, bool abs_b, bool neg_b, Swizzle swizzle_b, const IR::U32& src_b,
+ HalfPrecision precision) {
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> src_a;
+ } const hmul2{insn};
+
+ auto [lhs_a, rhs_a]{Extract(v.ir, v.X(hmul2.src_a), swizzle_a)};
+ auto [lhs_b, rhs_b]{Extract(v.ir, src_b, swizzle_b)};
+ const bool promotion{lhs_a.Type() != lhs_b.Type()};
+ if (promotion) {
+ if (lhs_a.Type() == IR::Type::F16) {
+ lhs_a = v.ir.FPConvert(32, lhs_a);
+ rhs_a = v.ir.FPConvert(32, rhs_a);
+ }
+ if (lhs_b.Type() == IR::Type::F16) {
+ lhs_b = v.ir.FPConvert(32, lhs_b);
+ rhs_b = v.ir.FPConvert(32, rhs_b);
+ }
+ }
+ lhs_a = v.ir.FPAbsNeg(lhs_a, abs_a, neg_a);
+ rhs_a = v.ir.FPAbsNeg(rhs_a, abs_a, neg_a);
+
+ lhs_b = v.ir.FPAbsNeg(lhs_b, abs_b, neg_b);
+ rhs_b = v.ir.FPAbsNeg(rhs_b, abs_b, neg_b);
+
+ const IR::FpControl fp_control{
+ .no_contraction = true,
+ .rounding = IR::FpRounding::DontCare,
+ .fmz_mode = HalfPrecision2FmzMode(precision),
+ };
+ IR::F16F32F64 lhs{v.ir.FPMul(lhs_a, lhs_b, fp_control)};
+ IR::F16F32F64 rhs{v.ir.FPMul(rhs_a, rhs_b, fp_control)};
+ if (precision == HalfPrecision::FMZ && !sat) {
+ // Do not implement FMZ if SAT is enabled, as it does the logic for us.
+ // On D3D9 mode, anything * 0 is zero, even NAN and infinity
+ const IR::F32 zero{v.ir.Imm32(0.0f)};
+ const IR::U1 lhs_zero_a{v.ir.FPEqual(lhs_a, zero)};
+ const IR::U1 lhs_zero_b{v.ir.FPEqual(lhs_b, zero)};
+ const IR::U1 lhs_any_zero{v.ir.LogicalOr(lhs_zero_a, lhs_zero_b)};
+ lhs = IR::F16F32F64{v.ir.Select(lhs_any_zero, zero, lhs)};
+
+ const IR::U1 rhs_zero_a{v.ir.FPEqual(rhs_a, zero)};
+ const IR::U1 rhs_zero_b{v.ir.FPEqual(rhs_b, zero)};
+ const IR::U1 rhs_any_zero{v.ir.LogicalOr(rhs_zero_a, rhs_zero_b)};
+ rhs = IR::F16F32F64{v.ir.Select(rhs_any_zero, zero, rhs)};
+ }
+ if (sat) {
+ lhs = v.ir.FPSaturate(lhs);
+ rhs = v.ir.FPSaturate(rhs);
+ }
+ if (promotion) {
+ lhs = v.ir.FPConvert(16, lhs);
+ rhs = v.ir.FPConvert(16, rhs);
+ }
+ v.X(hmul2.dest_reg, MergeResult(v.ir, hmul2.dest_reg, lhs, rhs, merge));
+}
+
+void HMUL2(TranslatorVisitor& v, u64 insn, bool sat, bool abs_a, bool neg_a, bool abs_b, bool neg_b,
+ Swizzle swizzle_b, const IR::U32& src_b) {
+ union {
+ u64 raw;
+ BitField<49, 2, Merge> merge;
+ BitField<47, 2, Swizzle> swizzle_a;
+ BitField<39, 2, HalfPrecision> precision;
+ } const hmul2{insn};
+
+ HMUL2(v, insn, hmul2.merge, sat, abs_a, neg_a, hmul2.swizzle_a, abs_b, neg_b, swizzle_b, src_b,
+ hmul2.precision);
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::HMUL2_reg(u64 insn) {
+ union {
+ u64 raw;
+ BitField<32, 1, u64> sat;
+ BitField<31, 1, u64> neg_b;
+ BitField<30, 1, u64> abs_b;
+ BitField<44, 1, u64> abs_a;
+ BitField<28, 2, Swizzle> swizzle_b;
+ } const hmul2{insn};
+
+ HMUL2(*this, insn, hmul2.sat != 0, hmul2.abs_a != 0, false, hmul2.abs_b != 0, hmul2.neg_b != 0,
+ hmul2.swizzle_b, GetReg20(insn));
+}
+
+void TranslatorVisitor::HMUL2_cbuf(u64 insn) {
+ union {
+ u64 raw;
+ BitField<52, 1, u64> sat;
+ BitField<54, 1, u64> abs_b;
+ BitField<43, 1, u64> neg_a;
+ BitField<44, 1, u64> abs_a;
+ } const hmul2{insn};
+
+ HMUL2(*this, insn, hmul2.sat != 0, hmul2.abs_a != 0, hmul2.neg_a != 0, hmul2.abs_b != 0, false,
+ Swizzle::F32, GetCbuf(insn));
+}
+
+void TranslatorVisitor::HMUL2_imm(u64 insn) {
+ union {
+ u64 raw;
+ BitField<52, 1, u64> sat;
+ BitField<56, 1, u64> neg_high;
+ BitField<30, 9, u64> high;
+ BitField<29, 1, u64> neg_low;
+ BitField<20, 9, u64> low;
+ BitField<43, 1, u64> neg_a;
+ BitField<44, 1, u64> abs_a;
+ } const hmul2{insn};
+
+ const u32 imm{
+ static_cast<u32>(hmul2.low << 6) | static_cast<u32>((hmul2.neg_low != 0 ? 1 : 0) << 15) |
+ static_cast<u32>(hmul2.high << 22) | static_cast<u32>((hmul2.neg_high != 0 ? 1 : 0) << 31)};
+ HMUL2(*this, insn, hmul2.sat != 0, hmul2.abs_a != 0, hmul2.neg_a != 0, false, false,
+ Swizzle::H1_H0, ir.Imm32(imm));
+}
+
+void TranslatorVisitor::HMUL2_32I(u64 insn) {
+ union {
+ u64 raw;
+ BitField<55, 2, HalfPrecision> precision;
+ BitField<52, 1, u64> sat;
+ BitField<53, 2, Swizzle> swizzle_a;
+ BitField<20, 32, u64> imm32;
+ } const hmul2{insn};
+
+ const u32 imm{static_cast<u32>(hmul2.imm32)};
+ HMUL2(*this, insn, Merge::H1_H0, hmul2.sat != 0, false, false, hmul2.swizzle_a, false, false,
+ Swizzle::H1_H0, ir.Imm32(imm), hmul2.precision);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp
new file mode 100644
index 000000000..cca5b831f
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp
@@ -0,0 +1,117 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h"
+
+namespace Shader::Maxwell {
+namespace {
+void HSET2(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, bool bf, bool ftz, bool neg_b,
+ bool abs_b, FPCompareOp compare_op, Swizzle swizzle_b) {
+ union {
+ u64 insn;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> src_a_reg;
+ BitField<39, 3, IR::Pred> pred;
+ BitField<42, 1, u64> neg_pred;
+ BitField<43, 1, u64> neg_a;
+ BitField<45, 2, BooleanOp> bop;
+ BitField<44, 1, u64> abs_a;
+ BitField<47, 2, Swizzle> swizzle_a;
+ } const hset2{insn};
+
+ auto [lhs_a, rhs_a]{Extract(v.ir, v.X(hset2.src_a_reg), hset2.swizzle_a)};
+ auto [lhs_b, rhs_b]{Extract(v.ir, src_b, swizzle_b)};
+
+ if (lhs_a.Type() != lhs_b.Type()) {
+ if (lhs_a.Type() == IR::Type::F16) {
+ lhs_a = v.ir.FPConvert(32, lhs_a);
+ rhs_a = v.ir.FPConvert(32, rhs_a);
+ }
+ if (lhs_b.Type() == IR::Type::F16) {
+ lhs_b = v.ir.FPConvert(32, lhs_b);
+ rhs_b = v.ir.FPConvert(32, rhs_b);
+ }
+ }
+
+ lhs_a = v.ir.FPAbsNeg(lhs_a, hset2.abs_a != 0, hset2.neg_a != 0);
+ rhs_a = v.ir.FPAbsNeg(rhs_a, hset2.abs_a != 0, hset2.neg_a != 0);
+
+ lhs_b = v.ir.FPAbsNeg(lhs_b, abs_b, neg_b);
+ rhs_b = v.ir.FPAbsNeg(rhs_b, abs_b, neg_b);
+
+ const IR::FpControl control{
+ .no_contraction = false,
+ .rounding = IR::FpRounding::DontCare,
+ .fmz_mode = (ftz ? IR::FmzMode::FTZ : IR::FmzMode::None),
+ };
+
+ IR::U1 pred{v.ir.GetPred(hset2.pred)};
+ if (hset2.neg_pred != 0) {
+ pred = v.ir.LogicalNot(pred);
+ }
+ const IR::U1 cmp_result_lhs{FloatingPointCompare(v.ir, lhs_a, lhs_b, compare_op, control)};
+ const IR::U1 cmp_result_rhs{FloatingPointCompare(v.ir, rhs_a, rhs_b, compare_op, control)};
+ const IR::U1 bop_result_lhs{PredicateCombine(v.ir, cmp_result_lhs, pred, hset2.bop)};
+ const IR::U1 bop_result_rhs{PredicateCombine(v.ir, cmp_result_rhs, pred, hset2.bop)};
+
+ const u32 true_value = bf ? 0x3c00 : 0xffff;
+ const IR::U32 true_val_lhs{v.ir.Imm32(true_value)};
+ const IR::U32 true_val_rhs{v.ir.Imm32(true_value << 16)};
+ const IR::U32 fail_result{v.ir.Imm32(0)};
+ const IR::U32 result_lhs{v.ir.Select(bop_result_lhs, true_val_lhs, fail_result)};
+ const IR::U32 result_rhs{v.ir.Select(bop_result_rhs, true_val_rhs, fail_result)};
+
+ v.X(hset2.dest_reg, IR::U32{v.ir.BitwiseOr(result_lhs, result_rhs)});
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::HSET2_reg(u64 insn) {
+ union {
+ u64 insn;
+ BitField<30, 1, u64> abs_b;
+ BitField<49, 1, u64> bf;
+ BitField<31, 1, u64> neg_b;
+ BitField<50, 1, u64> ftz;
+ BitField<35, 4, FPCompareOp> compare_op;
+ BitField<28, 2, Swizzle> swizzle_b;
+ } const hset2{insn};
+
+ HSET2(*this, insn, GetReg20(insn), hset2.bf != 0, hset2.ftz != 0, hset2.neg_b != 0,
+ hset2.abs_b != 0, hset2.compare_op, hset2.swizzle_b);
+}
+
+void TranslatorVisitor::HSET2_cbuf(u64 insn) {
+ union {
+ u64 insn;
+ BitField<53, 1, u64> bf;
+ BitField<56, 1, u64> neg_b;
+ BitField<54, 1, u64> ftz;
+ BitField<49, 4, FPCompareOp> compare_op;
+ } const hset2{insn};
+
+ HSET2(*this, insn, GetCbuf(insn), hset2.bf != 0, hset2.ftz != 0, hset2.neg_b != 0, false,
+ hset2.compare_op, Swizzle::F32);
+}
+
+void TranslatorVisitor::HSET2_imm(u64 insn) {
+ union {
+ u64 insn;
+ BitField<53, 1, u64> bf;
+ BitField<54, 1, u64> ftz;
+ BitField<49, 4, FPCompareOp> compare_op;
+ BitField<56, 1, u64> neg_high;
+ BitField<30, 9, u64> high;
+ BitField<29, 1, u64> neg_low;
+ BitField<20, 9, u64> low;
+ } const hset2{insn};
+
+ const u32 imm{
+ static_cast<u32>(hset2.low << 6) | static_cast<u32>((hset2.neg_low != 0 ? 1 : 0) << 15) |
+ static_cast<u32>(hset2.high << 22) | static_cast<u32>((hset2.neg_high != 0 ? 1 : 0) << 31)};
+
+ HSET2(*this, insn, ir.Imm32(imm), hset2.bf != 0, hset2.ftz != 0, false, false, hset2.compare_op,
+ Swizzle::H1_H0);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set_predicate.cpp
new file mode 100644
index 000000000..b3931dae3
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set_predicate.cpp
@@ -0,0 +1,118 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h"
+
+namespace Shader::Maxwell {
+namespace {
+void HSETP2(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, bool neg_b, bool abs_b,
+ Swizzle swizzle_b, FPCompareOp compare_op, bool h_and) {
+ union {
+ u64 insn;
+ BitField<8, 8, IR::Reg> src_a_reg;
+ BitField<3, 3, IR::Pred> dest_pred_a;
+ BitField<0, 3, IR::Pred> dest_pred_b;
+ BitField<39, 3, IR::Pred> pred;
+ BitField<42, 1, u64> neg_pred;
+ BitField<43, 1, u64> neg_a;
+ BitField<45, 2, BooleanOp> bop;
+ BitField<44, 1, u64> abs_a;
+ BitField<6, 1, u64> ftz;
+ BitField<47, 2, Swizzle> swizzle_a;
+ } const hsetp2{insn};
+
+ auto [lhs_a, rhs_a]{Extract(v.ir, v.X(hsetp2.src_a_reg), hsetp2.swizzle_a)};
+ auto [lhs_b, rhs_b]{Extract(v.ir, src_b, swizzle_b)};
+
+ if (lhs_a.Type() != lhs_b.Type()) {
+ if (lhs_a.Type() == IR::Type::F16) {
+ lhs_a = v.ir.FPConvert(32, lhs_a);
+ rhs_a = v.ir.FPConvert(32, rhs_a);
+ }
+ if (lhs_b.Type() == IR::Type::F16) {
+ lhs_b = v.ir.FPConvert(32, lhs_b);
+ rhs_b = v.ir.FPConvert(32, rhs_b);
+ }
+ }
+
+ lhs_a = v.ir.FPAbsNeg(lhs_a, hsetp2.abs_a != 0, hsetp2.neg_a != 0);
+ rhs_a = v.ir.FPAbsNeg(rhs_a, hsetp2.abs_a != 0, hsetp2.neg_a != 0);
+
+ lhs_b = v.ir.FPAbsNeg(lhs_b, abs_b, neg_b);
+ rhs_b = v.ir.FPAbsNeg(rhs_b, abs_b, neg_b);
+
+ const IR::FpControl control{
+ .no_contraction = false,
+ .rounding = IR::FpRounding::DontCare,
+ .fmz_mode = (hsetp2.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None),
+ };
+
+ IR::U1 pred{v.ir.GetPred(hsetp2.pred)};
+ if (hsetp2.neg_pred != 0) {
+ pred = v.ir.LogicalNot(pred);
+ }
+ const IR::U1 cmp_result_lhs{FloatingPointCompare(v.ir, lhs_a, lhs_b, compare_op, control)};
+ const IR::U1 cmp_result_rhs{FloatingPointCompare(v.ir, rhs_a, rhs_b, compare_op, control)};
+ const IR::U1 bop_result_lhs{PredicateCombine(v.ir, cmp_result_lhs, pred, hsetp2.bop)};
+ const IR::U1 bop_result_rhs{PredicateCombine(v.ir, cmp_result_rhs, pred, hsetp2.bop)};
+
+ if (h_and) {
+ auto result = v.ir.LogicalAnd(bop_result_lhs, bop_result_rhs);
+ v.ir.SetPred(hsetp2.dest_pred_a, result);
+ v.ir.SetPred(hsetp2.dest_pred_b, v.ir.LogicalNot(result));
+ } else {
+ v.ir.SetPred(hsetp2.dest_pred_a, bop_result_lhs);
+ v.ir.SetPred(hsetp2.dest_pred_b, bop_result_rhs);
+ }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::HSETP2_reg(u64 insn) {
+ union {
+ u64 insn;
+ BitField<30, 1, u64> abs_b;
+ BitField<49, 1, u64> h_and;
+ BitField<31, 1, u64> neg_b;
+ BitField<35, 4, FPCompareOp> compare_op;
+ BitField<28, 2, Swizzle> swizzle_b;
+ } const hsetp2{insn};
+ HSETP2(*this, insn, GetReg20(insn), hsetp2.neg_b != 0, hsetp2.abs_b != 0, hsetp2.swizzle_b,
+ hsetp2.compare_op, hsetp2.h_and != 0);
+}
+
+void TranslatorVisitor::HSETP2_cbuf(u64 insn) {
+ union {
+ u64 insn;
+ BitField<53, 1, u64> h_and;
+ BitField<54, 1, u64> abs_b;
+ BitField<56, 1, u64> neg_b;
+ BitField<49, 4, FPCompareOp> compare_op;
+ } const hsetp2{insn};
+
+ HSETP2(*this, insn, GetCbuf(insn), hsetp2.neg_b != 0, hsetp2.abs_b != 0, Swizzle::F32,
+ hsetp2.compare_op, hsetp2.h_and != 0);
+}
+
+void TranslatorVisitor::HSETP2_imm(u64 insn) {
+ union {
+ u64 insn;
+ BitField<53, 1, u64> h_and;
+ BitField<54, 1, u64> ftz;
+ BitField<49, 4, FPCompareOp> compare_op;
+ BitField<56, 1, u64> neg_high;
+ BitField<30, 9, u64> high;
+ BitField<29, 1, u64> neg_low;
+ BitField<20, 9, u64> low;
+ } const hsetp2{insn};
+
+ const u32 imm{static_cast<u32>(hsetp2.low << 6) |
+ static_cast<u32>((hsetp2.neg_low != 0 ? 1 : 0) << 15) |
+ static_cast<u32>(hsetp2.high << 22) |
+ static_cast<u32>((hsetp2.neg_high != 0 ? 1 : 0) << 31)};
+
+ HSETP2(*this, insn, ir.Imm32(imm), false, false, Swizzle::H1_H0, hsetp2.compare_op,
+ hsetp2.h_and != 0);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp
new file mode 100644
index 000000000..b446aae0e
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp
@@ -0,0 +1,272 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "shader_recompiler/frontend/ir/ir_emitter.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+[[nodiscard]] IR::U32 CbufLowerBits(IR::IREmitter& ir, bool unaligned, const IR::U32& binding,
+ u32 offset) {
+ if (unaligned) {
+ return ir.Imm32(0);
+ }
+ return ir.GetCbuf(binding, IR::U32{IR::Value{offset}});
+}
+} // Anonymous namespace
+
+IR::U32 TranslatorVisitor::X(IR::Reg reg) {
+ return ir.GetReg(reg);
+}
+
+IR::U64 TranslatorVisitor::L(IR::Reg reg) {
+ if (!IR::IsAligned(reg, 2)) {
+ throw NotImplementedException("Unaligned source register {}", reg);
+ }
+ return IR::U64{ir.PackUint2x32(ir.CompositeConstruct(X(reg), X(reg + 1)))};
+}
+
+IR::F32 TranslatorVisitor::F(IR::Reg reg) {
+ return ir.BitCast<IR::F32>(X(reg));
+}
+
+IR::F64 TranslatorVisitor::D(IR::Reg reg) {
+ if (!IR::IsAligned(reg, 2)) {
+ throw NotImplementedException("Unaligned source register {}", reg);
+ }
+ return IR::F64{ir.PackDouble2x32(ir.CompositeConstruct(X(reg), X(reg + 1)))};
+}
+
+void TranslatorVisitor::X(IR::Reg dest_reg, const IR::U32& value) {
+ ir.SetReg(dest_reg, value);
+}
+
+void TranslatorVisitor::L(IR::Reg dest_reg, const IR::U64& value) {
+ if (!IR::IsAligned(dest_reg, 2)) {
+ throw NotImplementedException("Unaligned destination register {}", dest_reg);
+ }
+ const IR::Value result{ir.UnpackUint2x32(value)};
+ for (int i = 0; i < 2; i++) {
+ X(dest_reg + i, IR::U32{ir.CompositeExtract(result, static_cast<size_t>(i))});
+ }
+}
+
+void TranslatorVisitor::F(IR::Reg dest_reg, const IR::F32& value) {
+ X(dest_reg, ir.BitCast<IR::U32>(value));
+}
+
+void TranslatorVisitor::D(IR::Reg dest_reg, const IR::F64& value) {
+ if (!IR::IsAligned(dest_reg, 2)) {
+ throw NotImplementedException("Unaligned destination register {}", dest_reg);
+ }
+ const IR::Value result{ir.UnpackDouble2x32(value)};
+ for (int i = 0; i < 2; i++) {
+ X(dest_reg + i, IR::U32{ir.CompositeExtract(result, static_cast<size_t>(i))});
+ }
+}
+
+IR::U32 TranslatorVisitor::GetReg8(u64 insn) {
+ union {
+ u64 raw;
+ BitField<8, 8, IR::Reg> index;
+ } const reg{insn};
+ return X(reg.index);
+}
+
+IR::U32 TranslatorVisitor::GetReg20(u64 insn) {
+ union {
+ u64 raw;
+ BitField<20, 8, IR::Reg> index;
+ } const reg{insn};
+ return X(reg.index);
+}
+
+IR::U32 TranslatorVisitor::GetReg39(u64 insn) {
+ union {
+ u64 raw;
+ BitField<39, 8, IR::Reg> index;
+ } const reg{insn};
+ return X(reg.index);
+}
+
+IR::F32 TranslatorVisitor::GetFloatReg8(u64 insn) {
+ return ir.BitCast<IR::F32>(GetReg8(insn));
+}
+
+IR::F32 TranslatorVisitor::GetFloatReg20(u64 insn) {
+ return ir.BitCast<IR::F32>(GetReg20(insn));
+}
+
+IR::F32 TranslatorVisitor::GetFloatReg39(u64 insn) {
+ return ir.BitCast<IR::F32>(GetReg39(insn));
+}
+
+IR::F64 TranslatorVisitor::GetDoubleReg20(u64 insn) {
+ union {
+ u64 raw;
+ BitField<20, 8, IR::Reg> index;
+ } const reg{insn};
+ return D(reg.index);
+}
+
+IR::F64 TranslatorVisitor::GetDoubleReg39(u64 insn) {
+ union {
+ u64 raw;
+ BitField<39, 8, IR::Reg> index;
+ } const reg{insn};
+ return D(reg.index);
+}
+
+static std::pair<IR::U32, IR::U32> CbufAddr(u64 insn) {
+ union {
+ u64 raw;
+ BitField<20, 14, u64> offset;
+ BitField<34, 5, u64> binding;
+ } const cbuf{insn};
+
+ if (cbuf.binding >= 18) {
+ throw NotImplementedException("Out of bounds constant buffer binding {}", cbuf.binding);
+ }
+ if (cbuf.offset >= 0x10'000) {
+ throw NotImplementedException("Out of bounds constant buffer offset {}", cbuf.offset);
+ }
+ const IR::Value binding{static_cast<u32>(cbuf.binding)};
+ const IR::Value byte_offset{static_cast<u32>(cbuf.offset) * 4};
+ return {IR::U32{binding}, IR::U32{byte_offset}};
+}
+
+IR::U32 TranslatorVisitor::GetCbuf(u64 insn) {
+ const auto [binding, byte_offset]{CbufAddr(insn)};
+ return ir.GetCbuf(binding, byte_offset);
+}
+
+IR::F32 TranslatorVisitor::GetFloatCbuf(u64 insn) {
+ const auto [binding, byte_offset]{CbufAddr(insn)};
+ return ir.GetFloatCbuf(binding, byte_offset);
+}
+
+IR::F64 TranslatorVisitor::GetDoubleCbuf(u64 insn) {
+ union {
+ u64 raw;
+ BitField<20, 1, u64> unaligned;
+ } const cbuf{insn};
+
+ const auto [binding, offset_value]{CbufAddr(insn)};
+ const bool unaligned{cbuf.unaligned != 0};
+ const u32 offset{offset_value.U32()};
+ const IR::Value addr{unaligned ? offset | 4u : (offset & ~7u) | 4u};
+
+ const IR::U32 value{ir.GetCbuf(binding, IR::U32{addr})};
+ const IR::U32 lower_bits{CbufLowerBits(ir, unaligned, binding, offset)};
+ return ir.PackDouble2x32(ir.CompositeConstruct(lower_bits, value));
+}
+
+IR::U64 TranslatorVisitor::GetPackedCbuf(u64 insn) {
+ union {
+ u64 raw;
+ BitField<20, 1, u64> unaligned;
+ } const cbuf{insn};
+
+ if (cbuf.unaligned != 0) {
+ throw NotImplementedException("Unaligned packed constant buffer read");
+ }
+ const auto [binding, lower_offset]{CbufAddr(insn)};
+ const IR::U32 upper_offset{ir.Imm32(lower_offset.U32() + 4)};
+ const IR::U32 lower_value{ir.GetCbuf(binding, lower_offset)};
+ const IR::U32 upper_value{ir.GetCbuf(binding, upper_offset)};
+ return ir.PackUint2x32(ir.CompositeConstruct(lower_value, upper_value));
+}
+
+IR::U32 TranslatorVisitor::GetImm20(u64 insn) {
+ union {
+ u64 raw;
+ BitField<20, 19, u64> value;
+ BitField<56, 1, u64> is_negative;
+ } const imm{insn};
+
+ if (imm.is_negative != 0) {
+ const s64 raw{static_cast<s64>(imm.value)};
+ return ir.Imm32(static_cast<s32>(-(1LL << 19) + raw));
+ } else {
+ return ir.Imm32(static_cast<u32>(imm.value));
+ }
+}
+
+IR::F32 TranslatorVisitor::GetFloatImm20(u64 insn) {
+ union {
+ u64 raw;
+ BitField<20, 19, u64> value;
+ BitField<56, 1, u64> is_negative;
+ } const imm{insn};
+ const u32 sign_bit{static_cast<u32>(imm.is_negative != 0 ? (1ULL << 31) : 0)};
+ const u32 value{static_cast<u32>(imm.value) << 12};
+ return ir.Imm32(Common::BitCast<f32>(value | sign_bit));
+}
+
+IR::F64 TranslatorVisitor::GetDoubleImm20(u64 insn) {
+ union {
+ u64 raw;
+ BitField<20, 19, u64> value;
+ BitField<56, 1, u64> is_negative;
+ } const imm{insn};
+ const u64 sign_bit{imm.is_negative != 0 ? (1ULL << 63) : 0};
+ const u64 value{imm.value << 44};
+ return ir.Imm64(Common::BitCast<f64>(value | sign_bit));
+}
+
+IR::U64 TranslatorVisitor::GetPackedImm20(u64 insn) {
+ const s64 value{GetImm20(insn).U32()};
+ return ir.Imm64(static_cast<u64>(static_cast<s64>(value) << 32));
+}
+
+IR::U32 TranslatorVisitor::GetImm32(u64 insn) {
+ union {
+ u64 raw;
+ BitField<20, 32, u64> value;
+ } const imm{insn};
+ return ir.Imm32(static_cast<u32>(imm.value));
+}
+
+IR::F32 TranslatorVisitor::GetFloatImm32(u64 insn) {
+ union {
+ u64 raw;
+ BitField<20, 32, u64> value;
+ } const imm{insn};
+ return ir.Imm32(Common::BitCast<f32>(static_cast<u32>(imm.value)));
+}
+
+void TranslatorVisitor::SetZFlag(const IR::U1& value) {
+ ir.SetZFlag(value);
+}
+
+void TranslatorVisitor::SetSFlag(const IR::U1& value) {
+ ir.SetSFlag(value);
+}
+
+void TranslatorVisitor::SetCFlag(const IR::U1& value) {
+ ir.SetCFlag(value);
+}
+
+void TranslatorVisitor::SetOFlag(const IR::U1& value) {
+ ir.SetOFlag(value);
+}
+
+void TranslatorVisitor::ResetZero() {
+ SetZFlag(ir.Imm1(false));
+}
+
+void TranslatorVisitor::ResetSFlag() {
+ SetSFlag(ir.Imm1(false));
+}
+
+void TranslatorVisitor::ResetCFlag() {
+ SetCFlag(ir.Imm1(false));
+}
+
+void TranslatorVisitor::ResetOFlag() {
+ SetOFlag(ir.Imm1(false));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h
new file mode 100644
index 000000000..335e4f24f
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h
@@ -0,0 +1,387 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "shader_recompiler/environment.h"
+#include "shader_recompiler/frontend/ir/basic_block.h"
+#include "shader_recompiler/frontend/ir/ir_emitter.h"
+#include "shader_recompiler/frontend/maxwell/instruction.h"
+
+namespace Shader::Maxwell {
+
+enum class CompareOp : u64 {
+ False,
+ LessThan,
+ Equal,
+ LessThanEqual,
+ GreaterThan,
+ NotEqual,
+ GreaterThanEqual,
+ True,
+};
+
+enum class BooleanOp : u64 {
+ AND,
+ OR,
+ XOR,
+};
+
+enum class PredicateOp : u64 {
+ False,
+ True,
+ Zero,
+ NonZero,
+};
+
+enum class FPCompareOp : u64 {
+ F,
+ LT,
+ EQ,
+ LE,
+ GT,
+ NE,
+ GE,
+ NUM,
+ Nan,
+ LTU,
+ EQU,
+ LEU,
+ GTU,
+ NEU,
+ GEU,
+ T,
+};
+
+class TranslatorVisitor {
+public:
+ explicit TranslatorVisitor(Environment& env_, IR::Block& block) : env{env_}, ir(block) {}
+
+ Environment& env;
+ IR::IREmitter ir;
+
+ void AL2P(u64 insn);
+ void ALD(u64 insn);
+ void AST(u64 insn);
+ void ATOM_cas(u64 insn);
+ void ATOM(u64 insn);
+ void ATOMS_cas(u64 insn);
+ void ATOMS(u64 insn);
+ void B2R(u64 insn);
+ void BAR(u64 insn);
+ void BFE_reg(u64 insn);
+ void BFE_cbuf(u64 insn);
+ void BFE_imm(u64 insn);
+ void BFI_reg(u64 insn);
+ void BFI_rc(u64 insn);
+ void BFI_cr(u64 insn);
+ void BFI_imm(u64 insn);
+ void BPT(u64 insn);
+ void BRA(u64 insn);
+ void BRK(u64 insn);
+ void BRX(u64 insn);
+ void CAL();
+ void CCTL(u64 insn);
+ void CCTLL(u64 insn);
+ void CONT(u64 insn);
+ void CS2R(u64 insn);
+ void CSET(u64 insn);
+ void CSETP(u64 insn);
+ void DADD_reg(u64 insn);
+ void DADD_cbuf(u64 insn);
+ void DADD_imm(u64 insn);
+ void DEPBAR();
+ void DFMA_reg(u64 insn);
+ void DFMA_rc(u64 insn);
+ void DFMA_cr(u64 insn);
+ void DFMA_imm(u64 insn);
+ void DMNMX_reg(u64 insn);
+ void DMNMX_cbuf(u64 insn);
+ void DMNMX_imm(u64 insn);
+ void DMUL_reg(u64 insn);
+ void DMUL_cbuf(u64 insn);
+ void DMUL_imm(u64 insn);
+ void DSET_reg(u64 insn);
+ void DSET_cbuf(u64 insn);
+ void DSET_imm(u64 insn);
+ void DSETP_reg(u64 insn);
+ void DSETP_cbuf(u64 insn);
+ void DSETP_imm(u64 insn);
+ void EXIT();
+ void F2F_reg(u64 insn);
+ void F2F_cbuf(u64 insn);
+ void F2F_imm(u64 insn);
+ void F2I_reg(u64 insn);
+ void F2I_cbuf(u64 insn);
+ void F2I_imm(u64 insn);
+ void FADD_reg(u64 insn);
+ void FADD_cbuf(u64 insn);
+ void FADD_imm(u64 insn);
+ void FADD32I(u64 insn);
+ void FCHK_reg(u64 insn);
+ void FCHK_cbuf(u64 insn);
+ void FCHK_imm(u64 insn);
+ void FCMP_reg(u64 insn);
+ void FCMP_rc(u64 insn);
+ void FCMP_cr(u64 insn);
+ void FCMP_imm(u64 insn);
+ void FFMA_reg(u64 insn);
+ void FFMA_rc(u64 insn);
+ void FFMA_cr(u64 insn);
+ void FFMA_imm(u64 insn);
+ void FFMA32I(u64 insn);
+ void FLO_reg(u64 insn);
+ void FLO_cbuf(u64 insn);
+ void FLO_imm(u64 insn);
+ void FMNMX_reg(u64 insn);
+ void FMNMX_cbuf(u64 insn);
+ void FMNMX_imm(u64 insn);
+ void FMUL_reg(u64 insn);
+ void FMUL_cbuf(u64 insn);
+ void FMUL_imm(u64 insn);
+ void FMUL32I(u64 insn);
+ void FSET_reg(u64 insn);
+ void FSET_cbuf(u64 insn);
+ void FSET_imm(u64 insn);
+ void FSETP_reg(u64 insn);
+ void FSETP_cbuf(u64 insn);
+ void FSETP_imm(u64 insn);
+ void FSWZADD(u64 insn);
+ void GETCRSPTR(u64 insn);
+ void GETLMEMBASE(u64 insn);
+ void HADD2_reg(u64 insn);
+ void HADD2_cbuf(u64 insn);
+ void HADD2_imm(u64 insn);
+ void HADD2_32I(u64 insn);
+ void HFMA2_reg(u64 insn);
+ void HFMA2_rc(u64 insn);
+ void HFMA2_cr(u64 insn);
+ void HFMA2_imm(u64 insn);
+ void HFMA2_32I(u64 insn);
+ void HMUL2_reg(u64 insn);
+ void HMUL2_cbuf(u64 insn);
+ void HMUL2_imm(u64 insn);
+ void HMUL2_32I(u64 insn);
+ void HSET2_reg(u64 insn);
+ void HSET2_cbuf(u64 insn);
+ void HSET2_imm(u64 insn);
+ void HSETP2_reg(u64 insn);
+ void HSETP2_cbuf(u64 insn);
+ void HSETP2_imm(u64 insn);
+ void I2F_reg(u64 insn);
+ void I2F_cbuf(u64 insn);
+ void I2F_imm(u64 insn);
+ void I2I_reg(u64 insn);
+ void I2I_cbuf(u64 insn);
+ void I2I_imm(u64 insn);
+ void IADD_reg(u64 insn);
+ void IADD_cbuf(u64 insn);
+ void IADD_imm(u64 insn);
+ void IADD3_reg(u64 insn);
+ void IADD3_cbuf(u64 insn);
+ void IADD3_imm(u64 insn);
+ void IADD32I(u64 insn);
+ void ICMP_reg(u64 insn);
+ void ICMP_rc(u64 insn);
+ void ICMP_cr(u64 insn);
+ void ICMP_imm(u64 insn);
+ void IDE(u64 insn);
+ void IDP_reg(u64 insn);
+ void IDP_imm(u64 insn);
+ void IMAD_reg(u64 insn);
+ void IMAD_rc(u64 insn);
+ void IMAD_cr(u64 insn);
+ void IMAD_imm(u64 insn);
+ void IMAD32I(u64 insn);
+ void IMADSP_reg(u64 insn);
+ void IMADSP_rc(u64 insn);
+ void IMADSP_cr(u64 insn);
+ void IMADSP_imm(u64 insn);
+ void IMNMX_reg(u64 insn);
+ void IMNMX_cbuf(u64 insn);
+ void IMNMX_imm(u64 insn);
+ void IMUL_reg(u64 insn);
+ void IMUL_cbuf(u64 insn);
+ void IMUL_imm(u64 insn);
+ void IMUL32I(u64 insn);
+ void IPA(u64 insn);
+ void ISBERD(u64 insn);
+ void ISCADD_reg(u64 insn);
+ void ISCADD_cbuf(u64 insn);
+ void ISCADD_imm(u64 insn);
+ void ISCADD32I(u64 insn);
+ void ISET_reg(u64 insn);
+ void ISET_cbuf(u64 insn);
+ void ISET_imm(u64 insn);
+ void ISETP_reg(u64 insn);
+ void ISETP_cbuf(u64 insn);
+ void ISETP_imm(u64 insn);
+ void JCAL(u64 insn);
+ void JMP(u64 insn);
+ void JMX(u64 insn);
+ void KIL();
+ void LD(u64 insn);
+ void LDC(u64 insn);
+ void LDG(u64 insn);
+ void LDL(u64 insn);
+ void LDS(u64 insn);
+ void LEA_hi_reg(u64 insn);
+ void LEA_hi_cbuf(u64 insn);
+ void LEA_lo_reg(u64 insn);
+ void LEA_lo_cbuf(u64 insn);
+ void LEA_lo_imm(u64 insn);
+ void LEPC(u64 insn);
+ void LONGJMP(u64 insn);
+ void LOP_reg(u64 insn);
+ void LOP_cbuf(u64 insn);
+ void LOP_imm(u64 insn);
+ void LOP3_reg(u64 insn);
+ void LOP3_cbuf(u64 insn);
+ void LOP3_imm(u64 insn);
+ void LOP32I(u64 insn);
+ void MEMBAR(u64 insn);
+ void MOV_reg(u64 insn);
+ void MOV_cbuf(u64 insn);
+ void MOV_imm(u64 insn);
+ void MOV32I(u64 insn);
+ void MUFU(u64 insn);
+ void NOP(u64 insn);
+ void OUT_reg(u64 insn);
+ void OUT_cbuf(u64 insn);
+ void OUT_imm(u64 insn);
+ void P2R_reg(u64 insn);
+ void P2R_cbuf(u64 insn);
+ void P2R_imm(u64 insn);
+ void PBK();
+ void PCNT();
+ void PEXIT(u64 insn);
+ void PIXLD(u64 insn);
+ void PLONGJMP(u64 insn);
+ void POPC_reg(u64 insn);
+ void POPC_cbuf(u64 insn);
+ void POPC_imm(u64 insn);
+ void PRET(u64 insn);
+ void PRMT_reg(u64 insn);
+ void PRMT_rc(u64 insn);
+ void PRMT_cr(u64 insn);
+ void PRMT_imm(u64 insn);
+ void PSET(u64 insn);
+ void PSETP(u64 insn);
+ void R2B(u64 insn);
+ void R2P_reg(u64 insn);
+ void R2P_cbuf(u64 insn);
+ void R2P_imm(u64 insn);
+ void RAM(u64 insn);
+ void RED(u64 insn);
+ void RET(u64 insn);
+ void RRO_reg(u64 insn);
+ void RRO_cbuf(u64 insn);
+ void RRO_imm(u64 insn);
+ void RTT(u64 insn);
+ void S2R(u64 insn);
+ void SAM(u64 insn);
+ void SEL_reg(u64 insn);
+ void SEL_cbuf(u64 insn);
+ void SEL_imm(u64 insn);
+ void SETCRSPTR(u64 insn);
+ void SETLMEMBASE(u64 insn);
+ void SHF_l_reg(u64 insn);
+ void SHF_l_imm(u64 insn);
+ void SHF_r_reg(u64 insn);
+ void SHF_r_imm(u64 insn);
+ void SHFL(u64 insn);
+ void SHL_reg(u64 insn);
+ void SHL_cbuf(u64 insn);
+ void SHL_imm(u64 insn);
+ void SHR_reg(u64 insn);
+ void SHR_cbuf(u64 insn);
+ void SHR_imm(u64 insn);
+ void SSY();
+ void ST(u64 insn);
+ void STG(u64 insn);
+ void STL(u64 insn);
+ void STP(u64 insn);
+ void STS(u64 insn);
+ void SUATOM(u64 insn);
+ void SUATOM_cas(u64 insn);
+ void SULD(u64 insn);
+ void SURED(u64 insn);
+ void SUST(u64 insn);
+ void SYNC(u64 insn);
+ void TEX(u64 insn);
+ void TEX_b(u64 insn);
+ void TEXS(u64 insn);
+ void TLD(u64 insn);
+ void TLD_b(u64 insn);
+ void TLD4(u64 insn);
+ void TLD4_b(u64 insn);
+ void TLD4S(u64 insn);
+ void TLDS(u64 insn);
+ void TMML(u64 insn);
+ void TMML_b(u64 insn);
+ void TXA(u64 insn);
+ void TXD(u64 insn);
+ void TXD_b(u64 insn);
+ void TXQ(u64 insn);
+ void TXQ_b(u64 insn);
+ void VABSDIFF(u64 insn);
+ void VABSDIFF4(u64 insn);
+ void VADD(u64 insn);
+ void VMAD(u64 insn);
+ void VMNMX(u64 insn);
+ void VOTE(u64 insn);
+ void VOTE_vtg(u64 insn);
+ void VSET(u64 insn);
+ void VSETP(u64 insn);
+ void VSHL(u64 insn);
+ void VSHR(u64 insn);
+ void XMAD_reg(u64 insn);
+ void XMAD_rc(u64 insn);
+ void XMAD_cr(u64 insn);
+ void XMAD_imm(u64 insn);
+
+ [[nodiscard]] IR::U32 X(IR::Reg reg);
+ [[nodiscard]] IR::U64 L(IR::Reg reg);
+ [[nodiscard]] IR::F32 F(IR::Reg reg);
+ [[nodiscard]] IR::F64 D(IR::Reg reg);
+
+ void X(IR::Reg dest_reg, const IR::U32& value);
+ void L(IR::Reg dest_reg, const IR::U64& value);
+ void F(IR::Reg dest_reg, const IR::F32& value);
+ void D(IR::Reg dest_reg, const IR::F64& value);
+
+ [[nodiscard]] IR::U32 GetReg8(u64 insn);
+ [[nodiscard]] IR::U32 GetReg20(u64 insn);
+ [[nodiscard]] IR::U32 GetReg39(u64 insn);
+ [[nodiscard]] IR::F32 GetFloatReg8(u64 insn);
+ [[nodiscard]] IR::F32 GetFloatReg20(u64 insn);
+ [[nodiscard]] IR::F32 GetFloatReg39(u64 insn);
+ [[nodiscard]] IR::F64 GetDoubleReg20(u64 insn);
+ [[nodiscard]] IR::F64 GetDoubleReg39(u64 insn);
+
+ [[nodiscard]] IR::U32 GetCbuf(u64 insn);
+ [[nodiscard]] IR::F32 GetFloatCbuf(u64 insn);
+ [[nodiscard]] IR::F64 GetDoubleCbuf(u64 insn);
+ [[nodiscard]] IR::U64 GetPackedCbuf(u64 insn);
+
+ [[nodiscard]] IR::U32 GetImm20(u64 insn);
+ [[nodiscard]] IR::F32 GetFloatImm20(u64 insn);
+ [[nodiscard]] IR::F64 GetDoubleImm20(u64 insn);
+ [[nodiscard]] IR::U64 GetPackedImm20(u64 insn);
+
+ [[nodiscard]] IR::U32 GetImm32(u64 insn);
+ [[nodiscard]] IR::F32 GetFloatImm32(u64 insn);
+
+ void SetZFlag(const IR::U1& value);
+ void SetSFlag(const IR::U1& value);
+ void SetCFlag(const IR::U1& value);
+ void SetOFlag(const IR::U1& value);
+
+ void ResetZero();
+ void ResetSFlag();
+ void ResetCFlag();
+ void ResetOFlag();
+};
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp
new file mode 100644
index 000000000..8ffd84867
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp
@@ -0,0 +1,105 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+void IADD(TranslatorVisitor& v, u64 insn, const IR::U32 op_b, bool neg_a, bool po, bool sat, bool x,
+ bool cc) {
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> src_a;
+ } const iadd{insn};
+
+ if (sat) {
+ throw NotImplementedException("IADD SAT");
+ }
+ if (x && po) {
+ throw NotImplementedException("IADD X+PO");
+ }
+ // Operand A is always read from here, negated if needed
+ IR::U32 op_a{v.X(iadd.src_a)};
+ if (neg_a) {
+ op_a = v.ir.INeg(op_a);
+ }
+ // Add both operands
+ IR::U32 result{v.ir.IAdd(op_a, op_b)};
+ if (x) {
+ const IR::U32 carry{v.ir.Select(v.ir.GetCFlag(), v.ir.Imm32(1), v.ir.Imm32(0))};
+ result = v.ir.IAdd(result, carry);
+ }
+ if (po) {
+ // .PO adds one to the result
+ result = v.ir.IAdd(result, v.ir.Imm32(1));
+ }
+ if (cc) {
+ // Store flags
+ // TODO: Does this grab the result pre-PO or after?
+ if (po) {
+ throw NotImplementedException("IADD CC+PO");
+ }
+ // TODO: How does CC behave when X is set?
+ if (x) {
+ throw NotImplementedException("IADD X+CC");
+ }
+ v.SetZFlag(v.ir.GetZeroFromOp(result));
+ v.SetSFlag(v.ir.GetSignFromOp(result));
+ v.SetCFlag(v.ir.GetCarryFromOp(result));
+ v.SetOFlag(v.ir.GetOverflowFromOp(result));
+ }
+ // Store result
+ v.X(iadd.dest_reg, result);
+}
+
+void IADD(TranslatorVisitor& v, u64 insn, IR::U32 op_b) {
+ union {
+ u64 insn;
+ BitField<43, 1, u64> x;
+ BitField<47, 1, u64> cc;
+ BitField<48, 2, u64> three_for_po;
+ BitField<48, 1, u64> neg_b;
+ BitField<49, 1, u64> neg_a;
+ BitField<50, 1, u64> sat;
+ } const iadd{insn};
+
+ const bool po{iadd.three_for_po == 3};
+ if (!po && iadd.neg_b != 0) {
+ op_b = v.ir.INeg(op_b);
+ }
+ IADD(v, insn, op_b, iadd.neg_a != 0, po, iadd.sat != 0, iadd.x != 0, iadd.cc != 0);
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::IADD_reg(u64 insn) {
+ IADD(*this, insn, GetReg20(insn));
+}
+
+void TranslatorVisitor::IADD_cbuf(u64 insn) {
+ IADD(*this, insn, GetCbuf(insn));
+}
+
+void TranslatorVisitor::IADD_imm(u64 insn) {
+ IADD(*this, insn, GetImm20(insn));
+}
+
+void TranslatorVisitor::IADD32I(u64 insn) {
+ union {
+ u64 raw;
+ BitField<52, 1, u64> cc;
+ BitField<53, 1, u64> x;
+ BitField<54, 1, u64> sat;
+ BitField<55, 2, u64> three_for_po;
+ BitField<56, 1, u64> neg_a;
+ } const iadd32i{insn};
+
+ const bool po{iadd32i.three_for_po == 3};
+ const bool neg_a{!po && iadd32i.neg_a != 0};
+ IADD(*this, insn, GetImm32(insn), neg_a, po, iadd32i.sat != 0, iadd32i.x != 0, iadd32i.cc != 0);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add_three_input.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add_three_input.cpp
new file mode 100644
index 000000000..040cfc10f
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add_three_input.cpp
@@ -0,0 +1,122 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class Shift : u64 {
+ None,
+ Right,
+ Left,
+};
+enum class Half : u64 {
+ All,
+ Lower,
+ Upper,
+};
+
+[[nodiscard]] IR::U32 IntegerHalf(IR::IREmitter& ir, const IR::U32& value, Half half) {
+ constexpr bool is_signed{false};
+ switch (half) {
+ case Half::All:
+ return value;
+ case Half::Lower:
+ return ir.BitFieldExtract(value, ir.Imm32(0), ir.Imm32(16), is_signed);
+ case Half::Upper:
+ return ir.BitFieldExtract(value, ir.Imm32(16), ir.Imm32(16), is_signed);
+ }
+ throw NotImplementedException("Invalid half");
+}
+
+[[nodiscard]] IR::U32 IntegerShift(IR::IREmitter& ir, const IR::U32& value, Shift shift) {
+ switch (shift) {
+ case Shift::None:
+ return value;
+ case Shift::Right: {
+ // 33-bit RS IADD3 edge case
+ const IR::U1 edge_case{ir.GetCarryFromOp(value)};
+ const IR::U32 shifted{ir.ShiftRightLogical(value, ir.Imm32(16))};
+ return IR::U32{ir.Select(edge_case, ir.IAdd(shifted, ir.Imm32(0x10000)), shifted)};
+ }
+ case Shift::Left:
+ return ir.ShiftLeftLogical(value, ir.Imm32(16));
+ }
+ throw NotImplementedException("Invalid shift");
+}
+
+void IADD3(TranslatorVisitor& v, u64 insn, IR::U32 op_a, IR::U32 op_b, IR::U32 op_c,
+ Shift shift = Shift::None) {
+ union {
+ u64 insn;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<47, 1, u64> cc;
+ BitField<48, 1, u64> x;
+ BitField<49, 1, u64> neg_c;
+ BitField<50, 1, u64> neg_b;
+ BitField<51, 1, u64> neg_a;
+ } iadd3{insn};
+
+ if (iadd3.neg_a != 0) {
+ op_a = v.ir.INeg(op_a);
+ }
+ if (iadd3.neg_b != 0) {
+ op_b = v.ir.INeg(op_b);
+ }
+ if (iadd3.neg_c != 0) {
+ op_c = v.ir.INeg(op_c);
+ }
+ IR::U32 lhs_1{v.ir.IAdd(op_a, op_b)};
+ if (iadd3.x != 0) {
+ // TODO: How does RS behave when X is set?
+ if (shift == Shift::Right) {
+ throw NotImplementedException("IADD3 X+RS");
+ }
+ const IR::U32 carry{v.ir.Select(v.ir.GetCFlag(), v.ir.Imm32(1), v.ir.Imm32(0))};
+ lhs_1 = v.ir.IAdd(lhs_1, carry);
+ }
+ const IR::U32 lhs_2{IntegerShift(v.ir, lhs_1, shift)};
+ const IR::U32 result{v.ir.IAdd(lhs_2, op_c)};
+
+ v.X(iadd3.dest_reg, result);
+ if (iadd3.cc != 0) {
+ // TODO: How does CC behave when X is set?
+ if (iadd3.x != 0) {
+ throw NotImplementedException("IADD3 X+CC");
+ }
+ v.SetZFlag(v.ir.GetZeroFromOp(result));
+ v.SetSFlag(v.ir.GetSignFromOp(result));
+ v.SetCFlag(v.ir.GetCarryFromOp(result));
+ const IR::U1 of_1{v.ir.ILessThan(lhs_1, op_a, false)};
+ v.SetOFlag(v.ir.LogicalOr(v.ir.GetOverflowFromOp(result), of_1));
+ }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::IADD3_reg(u64 insn) {
+ union {
+ u64 insn;
+ BitField<37, 2, Shift> shift;
+ BitField<35, 2, Half> half_a;
+ BitField<33, 2, Half> half_b;
+ BitField<31, 2, Half> half_c;
+ } const iadd3{insn};
+
+ const auto op_a{IntegerHalf(ir, GetReg8(insn), iadd3.half_a)};
+ const auto op_b{IntegerHalf(ir, GetReg20(insn), iadd3.half_b)};
+ const auto op_c{IntegerHalf(ir, GetReg39(insn), iadd3.half_c)};
+ IADD3(*this, insn, op_a, op_b, op_c, iadd3.shift);
+}
+
+void TranslatorVisitor::IADD3_cbuf(u64 insn) {
+ IADD3(*this, insn, GetReg8(insn), GetCbuf(insn), GetReg39(insn));
+}
+
+void TranslatorVisitor::IADD3_imm(u64 insn) {
+ IADD3(*this, insn, GetReg8(insn), GetImm20(insn), GetReg39(insn));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare.cpp
new file mode 100644
index 000000000..ba6e01926
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare.cpp
@@ -0,0 +1,48 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+void ICMP(TranslatorVisitor& v, u64 insn, const IR::U32& src_a, const IR::U32& operand) {
+ union {
+ u64 insn;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> src_reg;
+ BitField<48, 1, u64> is_signed;
+ BitField<49, 3, CompareOp> compare_op;
+ } const icmp{insn};
+
+ const IR::U32 zero{v.ir.Imm32(0)};
+ const bool is_signed{icmp.is_signed != 0};
+ const IR::U1 cmp_result{IntegerCompare(v.ir, operand, zero, icmp.compare_op, is_signed)};
+
+ const IR::U32 src_reg{v.X(icmp.src_reg)};
+ const IR::U32 result{v.ir.Select(cmp_result, src_reg, src_a)};
+
+ v.X(icmp.dest_reg, result);
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::ICMP_reg(u64 insn) {
+ ICMP(*this, insn, GetReg20(insn), GetReg39(insn));
+}
+
+void TranslatorVisitor::ICMP_rc(u64 insn) {
+ ICMP(*this, insn, GetReg39(insn), GetCbuf(insn));
+}
+
+void TranslatorVisitor::ICMP_cr(u64 insn) {
+ ICMP(*this, insn, GetCbuf(insn), GetReg39(insn));
+}
+
+void TranslatorVisitor::ICMP_imm(u64 insn) {
+ ICMP(*this, insn, GetImm20(insn), GetReg39(insn));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare_and_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare_and_set.cpp
new file mode 100644
index 000000000..8ce1aee04
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare_and_set.cpp
@@ -0,0 +1,80 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+IR::U1 IsetCompare(IR::IREmitter& ir, const IR::U32& operand_1, const IR::U32& operand_2,
+ CompareOp compare_op, bool is_signed, bool x) {
+ return x ? ExtendedIntegerCompare(ir, operand_1, operand_2, compare_op, is_signed)
+ : IntegerCompare(ir, operand_1, operand_2, compare_op, is_signed);
+}
+
+void ISET(TranslatorVisitor& v, u64 insn, const IR::U32& src_b) {
+ union {
+ u64 insn;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> src_reg;
+ BitField<39, 3, IR::Pred> pred;
+ BitField<42, 1, u64> neg_pred;
+ BitField<43, 1, u64> x;
+ BitField<44, 1, u64> bf;
+ BitField<45, 2, BooleanOp> bop;
+ BitField<47, 1, u64> cc;
+ BitField<48, 1, u64> is_signed;
+ BitField<49, 3, CompareOp> compare_op;
+ } const iset{insn};
+
+ const IR::U32 src_a{v.X(iset.src_reg)};
+ const bool is_signed{iset.is_signed != 0};
+ const IR::U32 zero{v.ir.Imm32(0)};
+ const bool x{iset.x != 0};
+ const IR::U1 cmp_result{IsetCompare(v.ir, src_a, src_b, iset.compare_op, is_signed, x)};
+
+ IR::U1 pred{v.ir.GetPred(iset.pred)};
+ if (iset.neg_pred != 0) {
+ pred = v.ir.LogicalNot(pred);
+ }
+ const IR::U1 bop_result{PredicateCombine(v.ir, cmp_result, pred, iset.bop)};
+
+ const IR::U32 one_mask{v.ir.Imm32(-1)};
+ const IR::U32 fp_one{v.ir.Imm32(0x3f800000)};
+ const IR::U32 pass_result{iset.bf == 0 ? one_mask : fp_one};
+ const IR::U32 result{v.ir.Select(bop_result, pass_result, zero)};
+
+ v.X(iset.dest_reg, result);
+ if (iset.cc != 0) {
+ if (x) {
+ throw NotImplementedException("ISET.CC + X");
+ }
+ const IR::U1 is_zero{v.ir.IEqual(result, zero)};
+ v.SetZFlag(is_zero);
+ if (iset.bf != 0) {
+ v.ResetSFlag();
+ } else {
+ v.SetSFlag(v.ir.LogicalNot(is_zero));
+ }
+ v.ResetCFlag();
+ v.ResetOFlag();
+ }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::ISET_reg(u64 insn) {
+ ISET(*this, insn, GetReg20(insn));
+}
+
+void TranslatorVisitor::ISET_cbuf(u64 insn) {
+ ISET(*this, insn, GetCbuf(insn));
+}
+
+void TranslatorVisitor::ISET_imm(u64 insn) {
+ ISET(*this, insn, GetImm20(insn));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp
new file mode 100644
index 000000000..0b8119ddd
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp
@@ -0,0 +1,182 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class FloatFormat : u64 {
+ F16 = 1,
+ F32 = 2,
+ F64 = 3,
+};
+
+enum class IntFormat : u64 {
+ U8 = 0,
+ U16 = 1,
+ U32 = 2,
+ U64 = 3,
+};
+
+union Encoding {
+ u64 raw;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 2, FloatFormat> float_format;
+ BitField<10, 2, IntFormat> int_format;
+ BitField<13, 1, u64> is_signed;
+ BitField<39, 2, FpRounding> fp_rounding;
+ BitField<41, 2, u64> selector;
+ BitField<47, 1, u64> cc;
+ BitField<45, 1, u64> neg;
+ BitField<49, 1, u64> abs;
+};
+
+bool Is64(u64 insn) {
+ return Encoding{insn}.int_format == IntFormat::U64;
+}
+
+int BitSize(FloatFormat format) {
+ switch (format) {
+ case FloatFormat::F16:
+ return 16;
+ case FloatFormat::F32:
+ return 32;
+ case FloatFormat::F64:
+ return 64;
+ }
+ throw NotImplementedException("Invalid float format {}", format);
+}
+
+IR::U32 SmallAbs(TranslatorVisitor& v, const IR::U32& value, int bitsize) {
+ const IR::U32 least_value{v.ir.Imm32(-(1 << (bitsize - 1)))};
+ const IR::U32 mask{v.ir.ShiftRightArithmetic(value, v.ir.Imm32(bitsize - 1))};
+ const IR::U32 absolute{v.ir.BitwiseXor(v.ir.IAdd(value, mask), mask)};
+ const IR::U1 is_least{v.ir.IEqual(value, least_value)};
+ return IR::U32{v.ir.Select(is_least, value, absolute)};
+}
+
+void I2F(TranslatorVisitor& v, u64 insn, IR::U32U64 src) {
+ const Encoding i2f{insn};
+ if (i2f.cc != 0) {
+ throw NotImplementedException("I2F CC");
+ }
+ const bool is_signed{i2f.is_signed != 0};
+ int src_bitsize{};
+ switch (i2f.int_format) {
+ case IntFormat::U8:
+ src = v.ir.BitFieldExtract(src, v.ir.Imm32(static_cast<u32>(i2f.selector) * 8),
+ v.ir.Imm32(8), is_signed);
+ if (i2f.abs != 0) {
+ src = SmallAbs(v, src, 8);
+ }
+ src_bitsize = 8;
+ break;
+ case IntFormat::U16:
+ if (i2f.selector == 1 || i2f.selector == 3) {
+ throw NotImplementedException("Invalid U16 selector {}", i2f.selector.Value());
+ }
+ src = v.ir.BitFieldExtract(src, v.ir.Imm32(static_cast<u32>(i2f.selector) * 8),
+ v.ir.Imm32(16), is_signed);
+ if (i2f.abs != 0) {
+ src = SmallAbs(v, src, 16);
+ }
+ src_bitsize = 16;
+ break;
+ case IntFormat::U32:
+ case IntFormat::U64:
+ if (i2f.selector != 0) {
+ throw NotImplementedException("Unexpected selector {}", i2f.selector.Value());
+ }
+ if (i2f.abs != 0 && is_signed) {
+ src = v.ir.IAbs(src);
+ }
+ src_bitsize = i2f.int_format == IntFormat::U64 ? 64 : 32;
+ break;
+ }
+ const int conversion_src_bitsize{i2f.int_format == IntFormat::U64 ? 64 : 32};
+ const int dst_bitsize{BitSize(i2f.float_format)};
+ const IR::FpControl fp_control{
+ .no_contraction = false,
+ .rounding = CastFpRounding(i2f.fp_rounding),
+ .fmz_mode = IR::FmzMode::DontCare,
+ };
+ auto value{v.ir.ConvertIToF(static_cast<size_t>(dst_bitsize),
+ static_cast<size_t>(conversion_src_bitsize), is_signed, src,
+ fp_control)};
+ if (i2f.neg != 0) {
+ if (i2f.abs != 0 || !is_signed) {
+ // We know the value is positive
+ value = v.ir.FPNeg(value);
+ } else {
+ // Only negate if the input isn't the lowest value
+ IR::U1 is_least;
+ if (src_bitsize == 64) {
+ is_least = v.ir.IEqual(src, v.ir.Imm64(std::numeric_limits<s64>::min()));
+ } else if (src_bitsize == 32) {
+ is_least = v.ir.IEqual(src, v.ir.Imm32(std::numeric_limits<s32>::min()));
+ } else {
+ const IR::U32 least_value{v.ir.Imm32(-(1 << (src_bitsize - 1)))};
+ is_least = v.ir.IEqual(src, least_value);
+ }
+ value = IR::F16F32F64{v.ir.Select(is_least, value, v.ir.FPNeg(value))};
+ }
+ }
+ switch (i2f.float_format) {
+ case FloatFormat::F16: {
+ const IR::F16 zero{v.ir.FPConvert(16, v.ir.Imm32(0.0f))};
+ v.X(i2f.dest_reg, v.ir.PackFloat2x16(v.ir.CompositeConstruct(value, zero)));
+ break;
+ }
+ case FloatFormat::F32:
+ v.F(i2f.dest_reg, value);
+ break;
+ case FloatFormat::F64: {
+ if (!IR::IsAligned(i2f.dest_reg, 2)) {
+ throw NotImplementedException("Unaligned destination {}", i2f.dest_reg.Value());
+ }
+ const IR::Value vector{v.ir.UnpackDouble2x32(value)};
+ for (int i = 0; i < 2; ++i) {
+ v.X(i2f.dest_reg + i, IR::U32{v.ir.CompositeExtract(vector, static_cast<size_t>(i))});
+ }
+ break;
+ }
+ default:
+ throw NotImplementedException("Invalid float format {}", i2f.float_format.Value());
+ }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::I2F_reg(u64 insn) {
+ if (Is64(insn)) {
+ union {
+ u64 raw;
+ BitField<20, 8, IR::Reg> reg;
+ } const value{insn};
+ const IR::Value regs{ir.CompositeConstruct(ir.GetReg(value.reg), ir.GetReg(value.reg + 1))};
+ I2F(*this, insn, ir.PackUint2x32(regs));
+ } else {
+ I2F(*this, insn, GetReg20(insn));
+ }
+}
+
+void TranslatorVisitor::I2F_cbuf(u64 insn) {
+ if (Is64(insn)) {
+ I2F(*this, insn, GetPackedCbuf(insn));
+ } else {
+ I2F(*this, insn, GetCbuf(insn));
+ }
+}
+
+void TranslatorVisitor::I2F_imm(u64 insn) {
+ if (Is64(insn)) {
+ I2F(*this, insn, GetPackedImm20(insn));
+ } else {
+ I2F(*this, insn, GetImm20(insn));
+ }
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_funnel_shift.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_funnel_shift.cpp
new file mode 100644
index 000000000..5feefc0ce
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_funnel_shift.cpp
@@ -0,0 +1,82 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class MaxShift : u64 {
+ U32,
+ Undefined,
+ U64,
+ S64,
+};
+
+IR::U64 PackedShift(IR::IREmitter& ir, const IR::U64& packed_int, const IR::U32& safe_shift,
+ bool right_shift, bool is_signed) {
+ if (!right_shift) {
+ return ir.ShiftLeftLogical(packed_int, safe_shift);
+ }
+ if (is_signed) {
+ return ir.ShiftRightArithmetic(packed_int, safe_shift);
+ }
+ return ir.ShiftRightLogical(packed_int, safe_shift);
+}
+
+void SHF(TranslatorVisitor& v, u64 insn, const IR::U32& shift, const IR::U32& high_bits,
+ bool right_shift) {
+ union {
+ u64 insn;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<0, 8, IR::Reg> lo_bits_reg;
+ BitField<37, 2, MaxShift> max_shift;
+ BitField<47, 1, u64> cc;
+ BitField<48, 2, u64> x_mode;
+ BitField<50, 1, u64> wrap;
+ } const shf{insn};
+
+ if (shf.cc != 0) {
+ throw NotImplementedException("SHF CC");
+ }
+ if (shf.x_mode != 0) {
+ throw NotImplementedException("SHF X Mode");
+ }
+ if (shf.max_shift == MaxShift::Undefined) {
+ throw NotImplementedException("SHF Use of undefined MaxShift value");
+ }
+ const IR::U32 low_bits{v.X(shf.lo_bits_reg)};
+ const IR::U64 packed_int{v.ir.PackUint2x32(v.ir.CompositeConstruct(low_bits, high_bits))};
+ const IR::U32 max_shift{shf.max_shift == MaxShift::U32 ? v.ir.Imm32(32) : v.ir.Imm32(63)};
+ const IR::U32 safe_shift{shf.wrap != 0
+ ? v.ir.BitwiseAnd(shift, v.ir.ISub(max_shift, v.ir.Imm32(1)))
+ : v.ir.UMin(shift, max_shift)};
+
+ const bool is_signed{shf.max_shift == MaxShift::S64};
+ const IR::U64 shifted_value{PackedShift(v.ir, packed_int, safe_shift, right_shift, is_signed)};
+ const IR::Value unpacked_value{v.ir.UnpackUint2x32(shifted_value)};
+
+ const IR::U32 result{v.ir.CompositeExtract(unpacked_value, right_shift ? 0 : 1)};
+ v.X(shf.dest_reg, result);
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::SHF_l_reg(u64 insn) {
+ SHF(*this, insn, GetReg20(insn), GetReg39(insn), false);
+}
+
+void TranslatorVisitor::SHF_l_imm(u64 insn) {
+ SHF(*this, insn, GetImm20(insn), GetReg39(insn), false);
+}
+
+void TranslatorVisitor::SHF_r_reg(u64 insn) {
+ SHF(*this, insn, GetReg20(insn), GetReg39(insn), true);
+}
+
+void TranslatorVisitor::SHF_r_imm(u64 insn) {
+ SHF(*this, insn, GetImm20(insn), GetReg39(insn), true);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_minimum_maximum.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_minimum_maximum.cpp
new file mode 100644
index 000000000..1badbacc4
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_minimum_maximum.cpp
@@ -0,0 +1,64 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+void IMNMX(TranslatorVisitor& v, u64 insn, const IR::U32& op_b) {
+ union {
+ u64 insn;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> src_reg;
+ BitField<39, 3, IR::Pred> pred;
+ BitField<42, 1, u64> neg_pred;
+ BitField<43, 2, u64> mode;
+ BitField<47, 1, u64> cc;
+ BitField<48, 1, u64> is_signed;
+ } const imnmx{insn};
+
+ if (imnmx.cc != 0) {
+ throw NotImplementedException("IMNMX CC");
+ }
+
+ if (imnmx.mode != 0) {
+ throw NotImplementedException("IMNMX.MODE");
+ }
+
+ const IR::U1 pred{v.ir.GetPred(imnmx.pred)};
+ const IR::U32 op_a{v.X(imnmx.src_reg)};
+ IR::U32 min;
+ IR::U32 max;
+
+ if (imnmx.is_signed != 0) {
+ min = IR::U32{v.ir.SMin(op_a, op_b)};
+ max = IR::U32{v.ir.SMax(op_a, op_b)};
+ } else {
+ min = IR::U32{v.ir.UMin(op_a, op_b)};
+ max = IR::U32{v.ir.UMax(op_a, op_b)};
+ }
+ if (imnmx.neg_pred != 0) {
+ std::swap(min, max);
+ }
+
+ const IR::U32 result{v.ir.Select(pred, min, max)};
+ v.X(imnmx.dest_reg, result);
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::IMNMX_reg(u64 insn) {
+ IMNMX(*this, insn, GetReg20(insn));
+}
+
+void TranslatorVisitor::IMNMX_cbuf(u64 insn) {
+ IMNMX(*this, insn, GetCbuf(insn));
+}
+
+void TranslatorVisitor::IMNMX_imm(u64 insn) {
+ IMNMX(*this, insn, GetImm20(insn));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_popcount.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_popcount.cpp
new file mode 100644
index 000000000..5ece7678d
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_popcount.cpp
@@ -0,0 +1,36 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+void POPC(TranslatorVisitor& v, u64 insn, const IR::U32& src) {
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<40, 1, u64> tilde;
+ } const popc{insn};
+
+ const IR::U32 operand = popc.tilde == 0 ? src : v.ir.BitwiseNot(src);
+ const IR::U32 result = v.ir.BitCount(operand);
+ v.X(popc.dest_reg, result);
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::POPC_reg(u64 insn) {
+ POPC(*this, insn, GetReg20(insn));
+}
+
+void TranslatorVisitor::POPC_cbuf(u64 insn) {
+ POPC(*this, insn, GetCbuf(insn));
+}
+
+void TranslatorVisitor::POPC_imm(u64 insn) {
+ POPC(*this, insn, GetImm20(insn));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp
new file mode 100644
index 000000000..044671943
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp
@@ -0,0 +1,86 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+void ISCADD(TranslatorVisitor& v, u64 insn, IR::U32 op_b, bool cc, bool neg_a, bool neg_b,
+ u64 scale_imm) {
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> op_a;
+ } const iscadd{insn};
+
+ const bool po{neg_a && neg_b};
+ IR::U32 op_a{v.X(iscadd.op_a)};
+ if (po) {
+ // When PO is present, add one
+ op_b = v.ir.IAdd(op_b, v.ir.Imm32(1));
+ } else {
+ // When PO is not present, the bits are interpreted as negation
+ if (neg_a) {
+ op_a = v.ir.INeg(op_a);
+ }
+ if (neg_b) {
+ op_b = v.ir.INeg(op_b);
+ }
+ }
+ // With the operands already processed, scale A
+ const IR::U32 scale{v.ir.Imm32(static_cast<u32>(scale_imm))};
+ const IR::U32 scaled_a{v.ir.ShiftLeftLogical(op_a, scale)};
+
+ const IR::U32 result{v.ir.IAdd(scaled_a, op_b)};
+ v.X(iscadd.dest_reg, result);
+
+ if (cc) {
+ v.SetZFlag(v.ir.GetZeroFromOp(result));
+ v.SetSFlag(v.ir.GetSignFromOp(result));
+ const IR::U1 carry{v.ir.GetCarryFromOp(result)};
+ const IR::U1 overflow{v.ir.GetOverflowFromOp(result)};
+ v.SetCFlag(po ? v.ir.LogicalOr(carry, v.ir.GetCarryFromOp(op_b)) : carry);
+ v.SetOFlag(po ? v.ir.LogicalOr(overflow, v.ir.GetOverflowFromOp(op_b)) : overflow);
+ }
+}
+
+void ISCADD(TranslatorVisitor& v, u64 insn, IR::U32 op_b) {
+ union {
+ u64 raw;
+ BitField<47, 1, u64> cc;
+ BitField<48, 1, u64> neg_b;
+ BitField<49, 1, u64> neg_a;
+ BitField<39, 5, u64> scale;
+ } const iscadd{insn};
+
+ ISCADD(v, insn, op_b, iscadd.cc != 0, iscadd.neg_a != 0, iscadd.neg_b != 0, iscadd.scale);
+}
+
+} // Anonymous namespace
+
+void TranslatorVisitor::ISCADD_reg(u64 insn) {
+ ISCADD(*this, insn, GetReg20(insn));
+}
+
+void TranslatorVisitor::ISCADD_cbuf(u64 insn) {
+ ISCADD(*this, insn, GetCbuf(insn));
+}
+
+void TranslatorVisitor::ISCADD_imm(u64 insn) {
+ ISCADD(*this, insn, GetImm20(insn));
+}
+
+void TranslatorVisitor::ISCADD32I(u64 insn) {
+ union {
+ u64 raw;
+ BitField<52, 1, u64> cc;
+ BitField<53, 5, u64> scale;
+ } const iscadd{insn};
+
+ return ISCADD(*this, insn, GetImm32(insn), iscadd.cc != 0, false, false, iscadd.scale);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_set_predicate.cpp
new file mode 100644
index 000000000..bee10e5b9
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_set_predicate.cpp
@@ -0,0 +1,58 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+IR::U1 IsetpCompare(IR::IREmitter& ir, const IR::U32& operand_1, const IR::U32& operand_2,
+ CompareOp compare_op, bool is_signed, bool x) {
+ return x ? ExtendedIntegerCompare(ir, operand_1, operand_2, compare_op, is_signed)
+ : IntegerCompare(ir, operand_1, operand_2, compare_op, is_signed);
+}
+
+void ISETP(TranslatorVisitor& v, u64 insn, const IR::U32& op_b) {
+ union {
+ u64 raw;
+ BitField<0, 3, IR::Pred> dest_pred_b;
+ BitField<3, 3, IR::Pred> dest_pred_a;
+ BitField<8, 8, IR::Reg> src_reg_a;
+ BitField<39, 3, IR::Pred> bop_pred;
+ BitField<42, 1, u64> neg_bop_pred;
+ BitField<43, 1, u64> x;
+ BitField<45, 2, BooleanOp> bop;
+ BitField<48, 1, u64> is_signed;
+ BitField<49, 3, CompareOp> compare_op;
+ } const isetp{insn};
+
+ const bool is_signed{isetp.is_signed != 0};
+ const bool x{isetp.x != 0};
+ const BooleanOp bop{isetp.bop};
+ const CompareOp compare_op{isetp.compare_op};
+ const IR::U32 op_a{v.X(isetp.src_reg_a)};
+ const IR::U1 comparison{IsetpCompare(v.ir, op_a, op_b, compare_op, is_signed, x)};
+ const IR::U1 bop_pred{v.ir.GetPred(isetp.bop_pred, isetp.neg_bop_pred != 0)};
+ const IR::U1 result_a{PredicateCombine(v.ir, comparison, bop_pred, bop)};
+ const IR::U1 result_b{PredicateCombine(v.ir, v.ir.LogicalNot(comparison), bop_pred, bop)};
+ v.ir.SetPred(isetp.dest_pred_a, result_a);
+ v.ir.SetPred(isetp.dest_pred_b, result_b);
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::ISETP_reg(u64 insn) {
+ ISETP(*this, insn, GetReg20(insn));
+}
+
+void TranslatorVisitor::ISETP_cbuf(u64 insn) {
+ ISETP(*this, insn, GetCbuf(insn));
+}
+
+void TranslatorVisitor::ISETP_imm(u64 insn) {
+ ISETP(*this, insn, GetImm20(insn));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp
new file mode 100644
index 000000000..20af68852
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp
@@ -0,0 +1,71 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+void SHL(TranslatorVisitor& v, u64 insn, const IR::U32& unsafe_shift) {
+ union {
+ u64 insn;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> src_reg_a;
+ BitField<39, 1, u64> w;
+ BitField<43, 1, u64> x;
+ BitField<47, 1, u64> cc;
+ } const shl{insn};
+
+ if (shl.x != 0) {
+ throw NotImplementedException("SHL.X");
+ }
+ if (shl.cc != 0) {
+ throw NotImplementedException("SHL.CC");
+ }
+ const IR::U32 base{v.X(shl.src_reg_a)};
+ IR::U32 result;
+ if (shl.w != 0) {
+ // When .W is set, the shift value is wrapped
+ // To emulate this we just have to wrap it ourselves.
+ const IR::U32 shift{v.ir.BitwiseAnd(unsafe_shift, v.ir.Imm32(31))};
+ result = v.ir.ShiftLeftLogical(base, shift);
+ } else {
+ // When .W is not set, the shift value is clamped between 0 and 32.
+ // To emulate this we have to have in mind the special shift of 32, that evaluates as 0.
+ // We can safely evaluate an out of bounds shift according to the SPIR-V specification:
+ //
+ // https://www.khronos.org/registry/spir-v/specs/unified1/SPIRV.html#OpShiftLeftLogical
+ // "Shift is treated as unsigned. The resulting value is undefined if Shift is greater than
+ // or equal to the bit width of the components of Base."
+ //
+ // And on the GLASM specification it is also safe to evaluate out of bounds:
+ //
+ // https://www.khronos.org/registry/OpenGL/extensions/NV/NV_gpu_program4.txt
+ // "The results of a shift operation ("<<") are undefined if the value of the second operand
+ // is negative, or greater than or equal to the number of bits in the first operand."
+ //
+ // Emphasis on undefined results in contrast to undefined behavior.
+ //
+ const IR::U1 is_safe{v.ir.ILessThan(unsafe_shift, v.ir.Imm32(32), false)};
+ const IR::U32 unsafe_result{v.ir.ShiftLeftLogical(base, unsafe_shift)};
+ result = IR::U32{v.ir.Select(is_safe, unsafe_result, v.ir.Imm32(0))};
+ }
+ v.X(shl.dest_reg, result);
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::SHL_reg(u64 insn) {
+ SHL(*this, insn, GetReg20(insn));
+}
+
+void TranslatorVisitor::SHL_cbuf(u64 insn) {
+ SHL(*this, insn, GetCbuf(insn));
+}
+
+void TranslatorVisitor::SHL_imm(u64 insn) {
+ SHL(*this, insn, GetImm20(insn));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_right.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_right.cpp
new file mode 100644
index 000000000..be00bb605
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_right.cpp
@@ -0,0 +1,66 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+void SHR(TranslatorVisitor& v, u64 insn, const IR::U32& shift) {
+ union {
+ u64 insn;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> src_reg_a;
+ BitField<39, 1, u64> is_wrapped;
+ BitField<40, 1, u64> brev;
+ BitField<43, 1, u64> xmode;
+ BitField<47, 1, u64> cc;
+ BitField<48, 1, u64> is_signed;
+ } const shr{insn};
+
+ if (shr.xmode != 0) {
+ throw NotImplementedException("SHR.XMODE");
+ }
+ if (shr.cc != 0) {
+ throw NotImplementedException("SHR.CC");
+ }
+
+ IR::U32 base{v.X(shr.src_reg_a)};
+ if (shr.brev == 1) {
+ base = v.ir.BitReverse(base);
+ }
+ IR::U32 result;
+ const IR::U32 safe_shift = shr.is_wrapped == 0 ? shift : v.ir.BitwiseAnd(shift, v.ir.Imm32(31));
+ if (shr.is_signed == 1) {
+ result = IR::U32{v.ir.ShiftRightArithmetic(base, safe_shift)};
+ } else {
+ result = IR::U32{v.ir.ShiftRightLogical(base, safe_shift)};
+ }
+
+ if (shr.is_wrapped == 0) {
+ const IR::U32 zero{v.ir.Imm32(0)};
+ const IR::U32 safe_bits{v.ir.Imm32(32)};
+
+ const IR::U1 is_negative{v.ir.ILessThan(result, zero, true)};
+ const IR::U1 is_safe{v.ir.ILessThan(shift, safe_bits, false)};
+ const IR::U32 clamped_value{v.ir.Select(is_negative, v.ir.Imm32(-1), zero)};
+ result = IR::U32{v.ir.Select(is_safe, result, clamped_value)};
+ }
+ v.X(shr.dest_reg, result);
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::SHR_reg(u64 insn) {
+ SHR(*this, insn, GetReg20(insn));
+}
+
+void TranslatorVisitor::SHR_cbuf(u64 insn) {
+ SHR(*this, insn, GetCbuf(insn));
+}
+
+void TranslatorVisitor::SHR_imm(u64 insn) {
+ SHR(*this, insn, GetImm20(insn));
+}
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_short_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_short_multiply_add.cpp
new file mode 100644
index 000000000..2932cdc42
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_short_multiply_add.cpp
@@ -0,0 +1,135 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class SelectMode : u64 {
+ Default,
+ CLO,
+ CHI,
+ CSFU,
+ CBCC,
+};
+
+enum class Half : u64 {
+ H0, // Least-significant bits (15:0)
+ H1, // Most-significant bits (31:16)
+};
+
+IR::U32 ExtractHalf(TranslatorVisitor& v, const IR::U32& src, Half half, bool is_signed) {
+ const IR::U32 offset{v.ir.Imm32(half == Half::H1 ? 16 : 0)};
+ return v.ir.BitFieldExtract(src, offset, v.ir.Imm32(16), is_signed);
+}
+
+void XMAD(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, const IR::U32& src_c,
+ SelectMode select_mode, Half half_b, bool psl, bool mrg, bool x) {
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> src_reg_a;
+ BitField<47, 1, u64> cc;
+ BitField<48, 1, u64> is_a_signed;
+ BitField<49, 1, u64> is_b_signed;
+ BitField<53, 1, Half> half_a;
+ } const xmad{insn};
+
+ if (x) {
+ throw NotImplementedException("XMAD X");
+ }
+ const IR::U32 op_a{ExtractHalf(v, v.X(xmad.src_reg_a), xmad.half_a, xmad.is_a_signed != 0)};
+ const IR::U32 op_b{ExtractHalf(v, src_b, half_b, xmad.is_b_signed != 0)};
+
+ IR::U32 product{v.ir.IMul(op_a, op_b)};
+ if (psl) {
+ // .PSL shifts the product 16 bits
+ product = v.ir.ShiftLeftLogical(product, v.ir.Imm32(16));
+ }
+ const IR::U32 op_c{[&]() -> IR::U32 {
+ switch (select_mode) {
+ case SelectMode::Default:
+ return src_c;
+ case SelectMode::CLO:
+ return ExtractHalf(v, src_c, Half::H0, false);
+ case SelectMode::CHI:
+ return ExtractHalf(v, src_c, Half::H1, false);
+ case SelectMode::CBCC:
+ return v.ir.IAdd(v.ir.ShiftLeftLogical(src_b, v.ir.Imm32(16)), src_c);
+ case SelectMode::CSFU:
+ throw NotImplementedException("XMAD CSFU");
+ }
+ throw NotImplementedException("Invalid XMAD select mode {}", select_mode);
+ }()};
+ IR::U32 result{v.ir.IAdd(product, op_c)};
+ if (mrg) {
+ // .MRG inserts src_b [15:0] into result's [31:16].
+ const IR::U32 lsb_b{ExtractHalf(v, src_b, Half::H0, false)};
+ result = v.ir.BitFieldInsert(result, lsb_b, v.ir.Imm32(16), v.ir.Imm32(16));
+ }
+ if (xmad.cc) {
+ throw NotImplementedException("XMAD CC");
+ }
+ // Store result
+ v.X(xmad.dest_reg, result);
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::XMAD_reg(u64 insn) {
+ union {
+ u64 raw;
+ BitField<35, 1, Half> half_b;
+ BitField<36, 1, u64> psl;
+ BitField<37, 1, u64> mrg;
+ BitField<38, 1, u64> x;
+ BitField<50, 3, SelectMode> select_mode;
+ } const xmad{insn};
+
+ XMAD(*this, insn, GetReg20(insn), GetReg39(insn), xmad.select_mode, xmad.half_b, xmad.psl != 0,
+ xmad.mrg != 0, xmad.x != 0);
+}
+
+void TranslatorVisitor::XMAD_rc(u64 insn) {
+ union {
+ u64 raw;
+ BitField<50, 2, SelectMode> select_mode;
+ BitField<52, 1, Half> half_b;
+ BitField<54, 1, u64> x;
+ } const xmad{insn};
+
+ XMAD(*this, insn, GetReg39(insn), GetCbuf(insn), xmad.select_mode, xmad.half_b, false, false,
+ xmad.x != 0);
+}
+
+void TranslatorVisitor::XMAD_cr(u64 insn) {
+ union {
+ u64 raw;
+ BitField<50, 2, SelectMode> select_mode;
+ BitField<52, 1, Half> half_b;
+ BitField<54, 1, u64> x;
+ BitField<55, 1, u64> psl;
+ BitField<56, 1, u64> mrg;
+ } const xmad{insn};
+
+ XMAD(*this, insn, GetCbuf(insn), GetReg39(insn), xmad.select_mode, xmad.half_b, xmad.psl != 0,
+ xmad.mrg != 0, xmad.x != 0);
+}
+
+void TranslatorVisitor::XMAD_imm(u64 insn) {
+ union {
+ u64 raw;
+ BitField<20, 16, u64> src_b;
+ BitField<36, 1, u64> psl;
+ BitField<37, 1, u64> mrg;
+ BitField<38, 1, u64> x;
+ BitField<50, 3, SelectMode> select_mode;
+ } const xmad{insn};
+
+ XMAD(*this, insn, ir.Imm32(static_cast<u32>(xmad.src_b)), GetReg39(insn), xmad.select_mode,
+ Half::H0, xmad.psl != 0, xmad.mrg != 0, xmad.x != 0);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_to_integer_conversion.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_to_integer_conversion.cpp
new file mode 100644
index 000000000..53e8d8923
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_to_integer_conversion.cpp
@@ -0,0 +1,126 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class IntegerWidth : u64 {
+ Byte,
+ Short,
+ Word,
+};
+
+[[nodiscard]] IR::U32 WidthSize(IR::IREmitter& ir, IntegerWidth width) {
+ switch (width) {
+ case IntegerWidth::Byte:
+ return ir.Imm32(8);
+ case IntegerWidth::Short:
+ return ir.Imm32(16);
+ case IntegerWidth::Word:
+ return ir.Imm32(32);
+ default:
+ throw NotImplementedException("Invalid width {}", width);
+ }
+}
+
+[[nodiscard]] IR::U32 ConvertInteger(IR::IREmitter& ir, const IR::U32& src,
+ IntegerWidth dst_width) {
+ const IR::U32 zero{ir.Imm32(0)};
+ const IR::U32 count{WidthSize(ir, dst_width)};
+ return ir.BitFieldExtract(src, zero, count, false);
+}
+
+[[nodiscard]] IR::U32 SaturateInteger(IR::IREmitter& ir, const IR::U32& src, IntegerWidth dst_width,
+ bool dst_signed, bool src_signed) {
+ IR::U32 min{};
+ IR::U32 max{};
+ const IR::U32 zero{ir.Imm32(0)};
+ switch (dst_width) {
+ case IntegerWidth::Byte:
+ min = dst_signed && src_signed ? ir.Imm32(0xffffff80) : zero;
+ max = dst_signed ? ir.Imm32(0x7f) : ir.Imm32(0xff);
+ break;
+ case IntegerWidth::Short:
+ min = dst_signed && src_signed ? ir.Imm32(0xffff8000) : zero;
+ max = dst_signed ? ir.Imm32(0x7fff) : ir.Imm32(0xffff);
+ break;
+ case IntegerWidth::Word:
+ min = dst_signed && src_signed ? ir.Imm32(0x80000000) : zero;
+ max = dst_signed ? ir.Imm32(0x7fffffff) : ir.Imm32(0xffffffff);
+ break;
+ default:
+ throw NotImplementedException("Invalid width {}", dst_width);
+ }
+ const IR::U32 value{!dst_signed && src_signed ? ir.SMax(zero, src) : src};
+ return dst_signed && src_signed ? ir.SClamp(value, min, max) : ir.UClamp(value, min, max);
+}
+
+void I2I(TranslatorVisitor& v, u64 insn, const IR::U32& src_a) {
+ union {
+ u64 insn;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 2, IntegerWidth> dst_fmt;
+ BitField<12, 1, u64> dst_fmt_sign;
+ BitField<10, 2, IntegerWidth> src_fmt;
+ BitField<13, 1, u64> src_fmt_sign;
+ BitField<41, 3, u64> selector;
+ BitField<45, 1, u64> neg;
+ BitField<47, 1, u64> cc;
+ BitField<49, 1, u64> abs;
+ BitField<50, 1, u64> sat;
+ } const i2i{insn};
+
+ if (i2i.src_fmt == IntegerWidth::Short && (i2i.selector == 1 || i2i.selector == 3)) {
+ throw NotImplementedException("16-bit source format incompatible with selector {}",
+ i2i.selector);
+ }
+ if (i2i.src_fmt == IntegerWidth::Word && i2i.selector != 0) {
+ throw NotImplementedException("32-bit source format incompatible with selector {}",
+ i2i.selector);
+ }
+
+ const s32 selector{static_cast<s32>(i2i.selector)};
+ const IR::U32 offset{v.ir.Imm32(selector * 8)};
+ const IR::U32 count{WidthSize(v.ir, i2i.src_fmt)};
+ const bool src_signed{i2i.src_fmt_sign != 0};
+ const bool dst_signed{i2i.dst_fmt_sign != 0};
+ const bool sat{i2i.sat != 0};
+
+ IR::U32 src_values{v.ir.BitFieldExtract(src_a, offset, count, src_signed)};
+ if (i2i.abs != 0) {
+ src_values = v.ir.IAbs(src_values);
+ }
+ if (i2i.neg != 0) {
+ src_values = v.ir.INeg(src_values);
+ }
+ const IR::U32 result{
+ sat ? SaturateInteger(v.ir, src_values, i2i.dst_fmt, dst_signed, src_signed)
+ : ConvertInteger(v.ir, src_values, i2i.dst_fmt)};
+
+ v.X(i2i.dest_reg, result);
+ if (i2i.cc != 0) {
+ v.SetZFlag(v.ir.GetZeroFromOp(result));
+ v.SetSFlag(v.ir.GetSignFromOp(result));
+ v.ResetCFlag();
+ v.ResetOFlag();
+ }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::I2I_reg(u64 insn) {
+ I2I(*this, insn, GetReg20(insn));
+}
+
+void TranslatorVisitor::I2I_cbuf(u64 insn) {
+ I2I(*this, insn, GetCbuf(insn));
+}
+
+void TranslatorVisitor::I2I_imm(u64 insn) {
+ I2I(*this, insn, GetImm20(insn));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/internal_stage_buffer_entry_read.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/internal_stage_buffer_entry_read.cpp
new file mode 100644
index 000000000..9b85f8059
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/internal_stage_buffer_entry_read.cpp
@@ -0,0 +1,53 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class Mode : u64 {
+ Default,
+ Patch,
+ Prim,
+ Attr,
+};
+
+enum class Shift : u64 {
+ Default,
+ U16,
+ B32,
+};
+
+} // Anonymous namespace
+
+void TranslatorVisitor::ISBERD(u64 insn) {
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> src_reg;
+ BitField<31, 1, u64> skew;
+ BitField<32, 1, u64> o;
+ BitField<33, 2, Mode> mode;
+ BitField<47, 2, Shift> shift;
+ } const isberd{insn};
+
+ if (isberd.skew != 0) {
+ throw NotImplementedException("SKEW");
+ }
+ if (isberd.o != 0) {
+ throw NotImplementedException("O");
+ }
+ if (isberd.mode != Mode::Default) {
+ throw NotImplementedException("Mode {}", isberd.mode.Value());
+ }
+ if (isberd.shift != Shift::Default) {
+ throw NotImplementedException("Shift {}", isberd.shift.Value());
+ }
+ LOG_WARNING(Shader, "(STUBBED) called");
+ X(isberd.dest_reg, X(isberd.src_reg));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp
new file mode 100644
index 000000000..2300088e3
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp
@@ -0,0 +1,62 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/load_constant.h"
+
+namespace Shader::Maxwell {
+using namespace LDC;
+namespace {
+std::pair<IR::U32, IR::U32> Slot(IR::IREmitter& ir, Mode mode, const IR::U32& imm_index,
+ const IR::U32& reg, const IR::U32& imm) {
+ switch (mode) {
+ case Mode::Default:
+ return {imm_index, ir.IAdd(reg, imm)};
+ default:
+ break;
+ }
+ throw NotImplementedException("Mode {}", mode);
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::LDC(u64 insn) {
+ const Encoding ldc{insn};
+ const IR::U32 imm_index{ir.Imm32(static_cast<u32>(ldc.index))};
+ const IR::U32 reg{X(ldc.src_reg)};
+ const IR::U32 imm{ir.Imm32(static_cast<s32>(ldc.offset))};
+ const auto [index, offset]{Slot(ir, ldc.mode, imm_index, reg, imm)};
+ switch (ldc.size) {
+ case Size::U8:
+ X(ldc.dest_reg, IR::U32{ir.GetCbuf(index, offset, 8, false)});
+ break;
+ case Size::S8:
+ X(ldc.dest_reg, IR::U32{ir.GetCbuf(index, offset, 8, true)});
+ break;
+ case Size::U16:
+ X(ldc.dest_reg, IR::U32{ir.GetCbuf(index, offset, 16, false)});
+ break;
+ case Size::S16:
+ X(ldc.dest_reg, IR::U32{ir.GetCbuf(index, offset, 16, true)});
+ break;
+ case Size::B32:
+ X(ldc.dest_reg, IR::U32{ir.GetCbuf(index, offset, 32, false)});
+ break;
+ case Size::B64: {
+ if (!IR::IsAligned(ldc.dest_reg, 2)) {
+ throw NotImplementedException("Unaligned destination register");
+ }
+ const IR::Value vector{ir.GetCbuf(index, offset, 64, false)};
+ for (int i = 0; i < 2; ++i) {
+ X(ldc.dest_reg + i, IR::U32{ir.CompositeExtract(vector, static_cast<size_t>(i))});
+ }
+ break;
+ }
+ default:
+ throw NotImplementedException("Invalid size {}", ldc.size.Value());
+ }
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.h b/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.h
new file mode 100644
index 000000000..3074ea0e3
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.h
@@ -0,0 +1,39 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/ir/reg.h"
+
+namespace Shader::Maxwell::LDC {
+
+enum class Mode : u64 {
+ Default,
+ IL,
+ IS,
+ ISL,
+};
+
+enum class Size : u64 {
+ U8,
+ S8,
+ U16,
+ S16,
+ B32,
+ B64,
+};
+
+union Encoding {
+ u64 raw;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> src_reg;
+ BitField<20, 16, s64> offset;
+ BitField<36, 5, u64> index;
+ BitField<44, 2, Mode> mode;
+ BitField<48, 3, Size> size;
+};
+
+} // namespace Shader::Maxwell::LDC
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_effective_address.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_effective_address.cpp
new file mode 100644
index 000000000..4a0f04e47
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_effective_address.cpp
@@ -0,0 +1,108 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+void LEA_hi(TranslatorVisitor& v, u64 insn, const IR::U32& base, IR::U32 offset_hi, u64 scale,
+ bool neg, bool x) {
+ union {
+ u64 insn;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> offset_lo_reg;
+ BitField<47, 1, u64> cc;
+ BitField<48, 3, IR::Pred> pred;
+ } const lea{insn};
+
+ if (x) {
+ throw NotImplementedException("LEA.HI X");
+ }
+ if (lea.pred != IR::Pred::PT) {
+ throw NotImplementedException("LEA.HI Pred");
+ }
+ if (lea.cc != 0) {
+ throw NotImplementedException("LEA.HI CC");
+ }
+
+ const IR::U32 offset_lo{v.X(lea.offset_lo_reg)};
+ const IR::U64 packed_offset{v.ir.PackUint2x32(v.ir.CompositeConstruct(offset_lo, offset_hi))};
+ const IR::U64 offset{neg ? IR::U64{v.ir.INeg(packed_offset)} : packed_offset};
+
+ const s32 hi_scale{32 - static_cast<s32>(scale)};
+ const IR::U64 scaled_offset{v.ir.ShiftRightLogical(offset, v.ir.Imm32(hi_scale))};
+ const IR::U32 scaled_offset_w0{v.ir.CompositeExtract(v.ir.UnpackUint2x32(scaled_offset), 0)};
+
+ IR::U32 result{v.ir.IAdd(base, scaled_offset_w0)};
+ v.X(lea.dest_reg, result);
+}
+
+void LEA_lo(TranslatorVisitor& v, u64 insn, const IR::U32& base) {
+ union {
+ u64 insn;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> offset_lo_reg;
+ BitField<39, 5, u64> scale;
+ BitField<45, 1, u64> neg;
+ BitField<46, 1, u64> x;
+ BitField<47, 1, u64> cc;
+ BitField<48, 3, IR::Pred> pred;
+ } const lea{insn};
+ if (lea.x != 0) {
+ throw NotImplementedException("LEA.LO X");
+ }
+ if (lea.pred != IR::Pred::PT) {
+ throw NotImplementedException("LEA.LO Pred");
+ }
+ if (lea.cc != 0) {
+ throw NotImplementedException("LEA.LO CC");
+ }
+
+ const IR::U32 offset_lo{v.X(lea.offset_lo_reg)};
+ const s32 scale{static_cast<s32>(lea.scale)};
+ const IR::U32 offset{lea.neg != 0 ? IR::U32{v.ir.INeg(offset_lo)} : offset_lo};
+ const IR::U32 scaled_offset{v.ir.ShiftLeftLogical(offset, v.ir.Imm32(scale))};
+
+ IR::U32 result{v.ir.IAdd(base, scaled_offset)};
+ v.X(lea.dest_reg, result);
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::LEA_hi_reg(u64 insn) {
+ union {
+ u64 insn;
+ BitField<28, 5, u64> scale;
+ BitField<37, 1, u64> neg;
+ BitField<38, 1, u64> x;
+ } const lea{insn};
+
+ LEA_hi(*this, insn, GetReg20(insn), GetReg39(insn), lea.scale, lea.neg != 0, lea.x != 0);
+}
+
+void TranslatorVisitor::LEA_hi_cbuf(u64 insn) {
+ union {
+ u64 insn;
+ BitField<51, 5, u64> scale;
+ BitField<56, 1, u64> neg;
+ BitField<57, 1, u64> x;
+ } const lea{insn};
+
+ LEA_hi(*this, insn, GetCbuf(insn), GetReg39(insn), lea.scale, lea.neg != 0, lea.x != 0);
+}
+
+void TranslatorVisitor::LEA_lo_reg(u64 insn) {
+ LEA_lo(*this, insn, GetReg20(insn));
+}
+
+void TranslatorVisitor::LEA_lo_cbuf(u64 insn) {
+ LEA_lo(*this, insn, GetCbuf(insn));
+}
+
+void TranslatorVisitor::LEA_lo_imm(u64 insn) {
+ LEA_lo(*this, insn, GetImm20(insn));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp
new file mode 100644
index 000000000..924fb7a40
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp
@@ -0,0 +1,196 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/ir/ir_emitter.h"
+#include "shader_recompiler/frontend/maxwell/opcodes.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class Size : u64 {
+ B32,
+ B64,
+ B96,
+ B128,
+};
+
+enum class InterpolationMode : u64 {
+ Pass,
+ Multiply,
+ Constant,
+ Sc,
+};
+
+enum class SampleMode : u64 {
+ Default,
+ Centroid,
+ Offset,
+};
+
+u32 NumElements(Size size) {
+ switch (size) {
+ case Size::B32:
+ return 1;
+ case Size::B64:
+ return 2;
+ case Size::B96:
+ return 3;
+ case Size::B128:
+ return 4;
+ }
+ throw InvalidArgument("Invalid size {}", size);
+}
+
+template <typename F>
+void HandleIndexed(TranslatorVisitor& v, IR::Reg index_reg, u32 num_elements, F&& f) {
+ const IR::U32 index_value{v.X(index_reg)};
+ for (u32 element = 0; element < num_elements; ++element) {
+ const IR::U32 final_offset{
+ element == 0 ? index_value : IR::U32{v.ir.IAdd(index_value, v.ir.Imm32(element * 4U))}};
+ f(element, final_offset);
+ }
+}
+
+} // Anonymous namespace
+
+void TranslatorVisitor::ALD(u64 insn) {
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> index_reg;
+ BitField<20, 10, u64> absolute_offset;
+ BitField<20, 11, s64> relative_offset;
+ BitField<39, 8, IR::Reg> vertex_reg;
+ BitField<32, 1, u64> o;
+ BitField<31, 1, u64> patch;
+ BitField<47, 2, Size> size;
+ } const ald{insn};
+
+ const u64 offset{ald.absolute_offset.Value()};
+ if (offset % 4 != 0) {
+ throw NotImplementedException("Unaligned absolute offset {}", offset);
+ }
+ const IR::U32 vertex{X(ald.vertex_reg)};
+ const u32 num_elements{NumElements(ald.size)};
+ if (ald.index_reg == IR::Reg::RZ) {
+ for (u32 element = 0; element < num_elements; ++element) {
+ if (ald.patch != 0) {
+ const IR::Patch patch{offset / 4 + element};
+ F(ald.dest_reg + static_cast<int>(element), ir.GetPatch(patch));
+ } else {
+ const IR::Attribute attr{offset / 4 + element};
+ F(ald.dest_reg + static_cast<int>(element), ir.GetAttribute(attr, vertex));
+ }
+ }
+ return;
+ }
+ if (ald.patch != 0) {
+ throw NotImplementedException("Indirect patch read");
+ }
+ HandleIndexed(*this, ald.index_reg, num_elements, [&](u32 element, IR::U32 final_offset) {
+ F(ald.dest_reg + static_cast<int>(element), ir.GetAttributeIndexed(final_offset, vertex));
+ });
+}
+
+void TranslatorVisitor::AST(u64 insn) {
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> src_reg;
+ BitField<8, 8, IR::Reg> index_reg;
+ BitField<20, 10, u64> absolute_offset;
+ BitField<20, 11, s64> relative_offset;
+ BitField<31, 1, u64> patch;
+ BitField<39, 8, IR::Reg> vertex_reg;
+ BitField<47, 2, Size> size;
+ } const ast{insn};
+
+ if (ast.index_reg != IR::Reg::RZ) {
+ throw NotImplementedException("Indexed store");
+ }
+ const u64 offset{ast.absolute_offset.Value()};
+ if (offset % 4 != 0) {
+ throw NotImplementedException("Unaligned absolute offset {}", offset);
+ }
+ const IR::U32 vertex{X(ast.vertex_reg)};
+ const u32 num_elements{NumElements(ast.size)};
+ if (ast.index_reg == IR::Reg::RZ) {
+ for (u32 element = 0; element < num_elements; ++element) {
+ if (ast.patch != 0) {
+ const IR::Patch patch{offset / 4 + element};
+ ir.SetPatch(patch, F(ast.src_reg + static_cast<int>(element)));
+ } else {
+ const IR::Attribute attr{offset / 4 + element};
+ ir.SetAttribute(attr, F(ast.src_reg + static_cast<int>(element)), vertex);
+ }
+ }
+ return;
+ }
+ if (ast.patch != 0) {
+ throw NotImplementedException("Indexed tessellation patch store");
+ }
+ HandleIndexed(*this, ast.index_reg, num_elements, [&](u32 element, IR::U32 final_offset) {
+ ir.SetAttributeIndexed(final_offset, F(ast.src_reg + static_cast<int>(element)), vertex);
+ });
+}
+
+void TranslatorVisitor::IPA(u64 insn) {
+ // IPA is the instruction used to read varyings from a fragment shader.
+ // gl_FragCoord is mapped to the gl_Position attribute.
+ // It yields unknown results when used outside of the fragment shader stage.
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> index_reg;
+ BitField<20, 8, IR::Reg> multiplier;
+ BitField<30, 8, IR::Attribute> attribute;
+ BitField<38, 1, u64> idx;
+ BitField<51, 1, u64> sat;
+ BitField<52, 2, SampleMode> sample_mode;
+ BitField<54, 2, InterpolationMode> interpolation_mode;
+ } const ipa{insn};
+
+ // Indexed IPAs are used for indexed varyings.
+ // For example:
+ //
+ // in vec4 colors[4];
+ // uniform int idx;
+ // void main() {
+ // gl_FragColor = colors[idx];
+ // }
+ const bool is_indexed{ipa.idx != 0 && ipa.index_reg != IR::Reg::RZ};
+ const IR::Attribute attribute{ipa.attribute};
+ IR::F32 value{is_indexed ? ir.GetAttributeIndexed(X(ipa.index_reg))
+ : ir.GetAttribute(attribute)};
+ if (IR::IsGeneric(attribute)) {
+ const ProgramHeader& sph{env.SPH()};
+ const u32 attr_index{IR::GenericAttributeIndex(attribute)};
+ const u32 element{static_cast<u32>(attribute) % 4};
+ const std::array input_map{sph.ps.GenericInputMap(attr_index)};
+ const bool is_perspective{input_map[element] == Shader::PixelImap::Perspective};
+ if (is_perspective) {
+ const IR::F32 position_w{ir.GetAttribute(IR::Attribute::PositionW)};
+ value = ir.FPMul(value, position_w);
+ }
+ }
+ if (ipa.interpolation_mode == InterpolationMode::Multiply) {
+ value = ir.FPMul(value, F(ipa.multiplier));
+ }
+
+ // Saturated IPAs are generally generated out of clamped varyings.
+ // For example: clamp(some_varying, 0.0, 1.0)
+ const bool is_saturated{ipa.sat != 0};
+ if (is_saturated) {
+ if (attribute == IR::Attribute::FrontFace) {
+ throw NotImplementedException("IPA.SAT on FrontFace");
+ }
+ value = ir.FPSaturate(value);
+ }
+
+ F(ipa.dest_reg, value);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_local_shared.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_local_shared.cpp
new file mode 100644
index 000000000..d2a1dbf61
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_local_shared.cpp
@@ -0,0 +1,218 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class Size : u64 {
+ U8,
+ S8,
+ U16,
+ S16,
+ B32,
+ B64,
+ B128,
+};
+
+IR::U32 Offset(TranslatorVisitor& v, u64 insn) {
+ union {
+ u64 raw;
+ BitField<8, 8, IR::Reg> offset_reg;
+ BitField<20, 24, u64> absolute_offset;
+ BitField<20, 24, s64> relative_offset;
+ } const encoding{insn};
+
+ if (encoding.offset_reg == IR::Reg::RZ) {
+ return v.ir.Imm32(static_cast<u32>(encoding.absolute_offset));
+ } else {
+ const s32 relative{static_cast<s32>(encoding.relative_offset.Value())};
+ return v.ir.IAdd(v.X(encoding.offset_reg), v.ir.Imm32(relative));
+ }
+}
+
+std::pair<IR::U32, IR::U32> WordOffset(TranslatorVisitor& v, u64 insn) {
+ const IR::U32 offset{Offset(v, insn)};
+ if (offset.IsImmediate()) {
+ return {v.ir.Imm32(offset.U32() / 4), offset};
+ } else {
+ return {v.ir.ShiftRightArithmetic(offset, v.ir.Imm32(2)), offset};
+ }
+}
+
+std::pair<int, bool> GetSize(u64 insn) {
+ union {
+ u64 raw;
+ BitField<48, 3, Size> size;
+ } const encoding{insn};
+
+ switch (encoding.size) {
+ case Size::U8:
+ return {8, false};
+ case Size::S8:
+ return {8, true};
+ case Size::U16:
+ return {16, false};
+ case Size::S16:
+ return {16, true};
+ case Size::B32:
+ return {32, false};
+ case Size::B64:
+ return {64, false};
+ case Size::B128:
+ return {128, false};
+ default:
+ throw NotImplementedException("Invalid size {}", encoding.size.Value());
+ }
+}
+
+IR::Reg Reg(u64 insn) {
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> reg;
+ } const encoding{insn};
+
+ return encoding.reg;
+}
+
+IR::U32 ByteOffset(IR::IREmitter& ir, const IR::U32& offset) {
+ return ir.BitwiseAnd(ir.ShiftLeftLogical(offset, ir.Imm32(3)), ir.Imm32(24));
+}
+
+IR::U32 ShortOffset(IR::IREmitter& ir, const IR::U32& offset) {
+ return ir.BitwiseAnd(ir.ShiftLeftLogical(offset, ir.Imm32(3)), ir.Imm32(16));
+}
+
+IR::U32 LoadLocal(TranslatorVisitor& v, const IR::U32& word_offset, const IR::U32& offset) {
+ const IR::U32 local_memory_size{v.ir.Imm32(v.env.LocalMemorySize())};
+ const IR::U1 in_bounds{v.ir.ILessThan(offset, local_memory_size, false)};
+ return IR::U32{v.ir.Select(in_bounds, v.ir.LoadLocal(word_offset), v.ir.Imm32(0))};
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::LDL(u64 insn) {
+ const auto [word_offset, offset]{WordOffset(*this, insn)};
+ const IR::U32 word{LoadLocal(*this, word_offset, offset)};
+ const IR::Reg dest{Reg(insn)};
+ const auto [bit_size, is_signed]{GetSize(insn)};
+ switch (bit_size) {
+ case 8: {
+ const IR::U32 bit{ByteOffset(ir, offset)};
+ X(dest, ir.BitFieldExtract(word, bit, ir.Imm32(8), is_signed));
+ break;
+ }
+ case 16: {
+ const IR::U32 bit{ShortOffset(ir, offset)};
+ X(dest, ir.BitFieldExtract(word, bit, ir.Imm32(16), is_signed));
+ break;
+ }
+ case 32:
+ case 64:
+ case 128:
+ if (!IR::IsAligned(dest, static_cast<size_t>(bit_size / 32))) {
+ throw NotImplementedException("Unaligned destination register {}", dest);
+ }
+ X(dest, word);
+ for (int i = 1; i < bit_size / 32; ++i) {
+ const IR::U32 sub_word_offset{ir.IAdd(word_offset, ir.Imm32(i))};
+ const IR::U32 sub_offset{ir.IAdd(offset, ir.Imm32(i * 4))};
+ X(dest + i, LoadLocal(*this, sub_word_offset, sub_offset));
+ }
+ break;
+ }
+}
+
+void TranslatorVisitor::LDS(u64 insn) {
+ const IR::U32 offset{Offset(*this, insn)};
+ const IR::Reg dest{Reg(insn)};
+ const auto [bit_size, is_signed]{GetSize(insn)};
+ const IR::Value value{ir.LoadShared(bit_size, is_signed, offset)};
+ switch (bit_size) {
+ case 8:
+ case 16:
+ case 32:
+ X(dest, IR::U32{value});
+ break;
+ case 64:
+ case 128:
+ if (!IR::IsAligned(dest, static_cast<size_t>(bit_size / 32))) {
+ throw NotImplementedException("Unaligned destination register {}", dest);
+ }
+ for (int element = 0; element < bit_size / 32; ++element) {
+ X(dest + element, IR::U32{ir.CompositeExtract(value, static_cast<size_t>(element))});
+ }
+ break;
+ }
+}
+
+void TranslatorVisitor::STL(u64 insn) {
+ const auto [word_offset, offset]{WordOffset(*this, insn)};
+ if (offset.IsImmediate()) {
+ // TODO: Support storing out of bounds at runtime
+ if (offset.U32() >= env.LocalMemorySize()) {
+ LOG_WARNING(Shader, "Storing local memory at 0x{:x} with a size of 0x{:x}, dropping",
+ offset.U32(), env.LocalMemorySize());
+ return;
+ }
+ }
+ const IR::Reg reg{Reg(insn)};
+ const IR::U32 src{X(reg)};
+ const int bit_size{GetSize(insn).first};
+ switch (bit_size) {
+ case 8: {
+ const IR::U32 bit{ByteOffset(ir, offset)};
+ const IR::U32 value{ir.BitFieldInsert(ir.LoadLocal(word_offset), src, bit, ir.Imm32(8))};
+ ir.WriteLocal(word_offset, value);
+ break;
+ }
+ case 16: {
+ const IR::U32 bit{ShortOffset(ir, offset)};
+ const IR::U32 value{ir.BitFieldInsert(ir.LoadLocal(word_offset), src, bit, ir.Imm32(16))};
+ ir.WriteLocal(word_offset, value);
+ break;
+ }
+ case 32:
+ case 64:
+ case 128:
+ if (!IR::IsAligned(reg, static_cast<size_t>(bit_size / 32))) {
+ throw NotImplementedException("Unaligned source register");
+ }
+ ir.WriteLocal(word_offset, src);
+ for (int i = 1; i < bit_size / 32; ++i) {
+ ir.WriteLocal(ir.IAdd(word_offset, ir.Imm32(i)), X(reg + i));
+ }
+ break;
+ }
+}
+
+void TranslatorVisitor::STS(u64 insn) {
+ const IR::U32 offset{Offset(*this, insn)};
+ const IR::Reg reg{Reg(insn)};
+ const int bit_size{GetSize(insn).first};
+ switch (bit_size) {
+ case 8:
+ case 16:
+ case 32:
+ ir.WriteShared(bit_size, offset, X(reg));
+ break;
+ case 64:
+ if (!IR::IsAligned(reg, 2)) {
+ throw NotImplementedException("Unaligned source register {}", reg);
+ }
+ ir.WriteShared(64, offset, ir.CompositeConstruct(X(reg), X(reg + 1)));
+ break;
+ case 128: {
+ if (!IR::IsAligned(reg, 2)) {
+ throw NotImplementedException("Unaligned source register {}", reg);
+ }
+ const IR::Value vector{ir.CompositeConstruct(X(reg), X(reg + 1), X(reg + 2), X(reg + 3))};
+ ir.WriteShared(128, offset, vector);
+ break;
+ }
+ }
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp
new file mode 100644
index 000000000..36c5cff2f
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp
@@ -0,0 +1,184 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/maxwell/opcodes.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class LoadSize : u64 {
+ U8, // Zero-extend
+ S8, // Sign-extend
+ U16, // Zero-extend
+ S16, // Sign-extend
+ B32,
+ B64,
+ B128,
+ U128, // ???
+};
+
+enum class StoreSize : u64 {
+ U8, // Zero-extend
+ S8, // Sign-extend
+ U16, // Zero-extend
+ S16, // Sign-extend
+ B32,
+ B64,
+ B128,
+};
+
+// See Table 27 in https://docs.nvidia.com/cuda/parallel-thread-execution/index.html
+enum class LoadCache : u64 {
+ CA, // Cache at all levels, likely to be accessed again
+ CG, // Cache at global level (cache in L2 and below, not L1)
+ CI, // ???
+ CV, // Don't cache and fetch again (consider cached system memory lines stale, fetch again)
+};
+
+// See Table 28 in https://docs.nvidia.com/cuda/parallel-thread-execution/index.html
+enum class StoreCache : u64 {
+ WB, // Cache write-back all coherent levels
+ CG, // Cache at global level
+ CS, // Cache streaming, likely to be accessed once
+ WT, // Cache write-through (to system memory)
+};
+
+IR::U64 Address(TranslatorVisitor& v, u64 insn) {
+ union {
+ u64 raw;
+ BitField<8, 8, IR::Reg> addr_reg;
+ BitField<20, 24, s64> addr_offset;
+ BitField<20, 24, u64> rz_addr_offset;
+ BitField<45, 1, u64> e;
+ } const mem{insn};
+
+ const IR::U64 address{[&]() -> IR::U64 {
+ if (mem.e == 0) {
+ // LDG/STG without .E uses a 32-bit pointer, zero-extend it
+ return v.ir.UConvert(64, v.X(mem.addr_reg));
+ }
+ if (!IR::IsAligned(mem.addr_reg, 2)) {
+ throw NotImplementedException("Unaligned address register");
+ }
+ // Pack two registers to build the 64-bit address
+ return v.ir.PackUint2x32(v.ir.CompositeConstruct(v.X(mem.addr_reg), v.X(mem.addr_reg + 1)));
+ }()};
+ const u64 addr_offset{[&]() -> u64 {
+ if (mem.addr_reg == IR::Reg::RZ) {
+ // When RZ is used, the address is an absolute address
+ return static_cast<u64>(mem.rz_addr_offset.Value());
+ } else {
+ return static_cast<u64>(mem.addr_offset.Value());
+ }
+ }()};
+ // Apply the offset
+ return v.ir.IAdd(address, v.ir.Imm64(addr_offset));
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::LDG(u64 insn) {
+ // LDG loads global memory into registers
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<46, 2, LoadCache> cache;
+ BitField<48, 3, LoadSize> size;
+ } const ldg{insn};
+
+ // Pointer to load data from
+ const IR::U64 address{Address(*this, insn)};
+ const IR::Reg dest_reg{ldg.dest_reg};
+ switch (ldg.size) {
+ case LoadSize::U8:
+ X(dest_reg, ir.LoadGlobalU8(address));
+ break;
+ case LoadSize::S8:
+ X(dest_reg, ir.LoadGlobalS8(address));
+ break;
+ case LoadSize::U16:
+ X(dest_reg, ir.LoadGlobalU16(address));
+ break;
+ case LoadSize::S16:
+ X(dest_reg, ir.LoadGlobalS16(address));
+ break;
+ case LoadSize::B32:
+ X(dest_reg, ir.LoadGlobal32(address));
+ break;
+ case LoadSize::B64: {
+ if (!IR::IsAligned(dest_reg, 2)) {
+ throw NotImplementedException("Unaligned data registers");
+ }
+ const IR::Value vector{ir.LoadGlobal64(address)};
+ for (int i = 0; i < 2; ++i) {
+ X(dest_reg + i, IR::U32{ir.CompositeExtract(vector, static_cast<size_t>(i))});
+ }
+ break;
+ }
+ case LoadSize::B128:
+ case LoadSize::U128: {
+ if (!IR::IsAligned(dest_reg, 4)) {
+ throw NotImplementedException("Unaligned data registers");
+ }
+ const IR::Value vector{ir.LoadGlobal128(address)};
+ for (int i = 0; i < 4; ++i) {
+ X(dest_reg + i, IR::U32{ir.CompositeExtract(vector, static_cast<size_t>(i))});
+ }
+ break;
+ }
+ default:
+ throw NotImplementedException("Invalid LDG size {}", ldg.size.Value());
+ }
+}
+
+void TranslatorVisitor::STG(u64 insn) {
+ // STG stores registers into global memory.
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> data_reg;
+ BitField<46, 2, StoreCache> cache;
+ BitField<48, 3, StoreSize> size;
+ } const stg{insn};
+
+ // Pointer to store data into
+ const IR::U64 address{Address(*this, insn)};
+ const IR::Reg data_reg{stg.data_reg};
+ switch (stg.size) {
+ case StoreSize::U8:
+ ir.WriteGlobalU8(address, X(data_reg));
+ break;
+ case StoreSize::S8:
+ ir.WriteGlobalS8(address, X(data_reg));
+ break;
+ case StoreSize::U16:
+ ir.WriteGlobalU16(address, X(data_reg));
+ break;
+ case StoreSize::S16:
+ ir.WriteGlobalS16(address, X(data_reg));
+ break;
+ case StoreSize::B32:
+ ir.WriteGlobal32(address, X(data_reg));
+ break;
+ case StoreSize::B64: {
+ if (!IR::IsAligned(data_reg, 2)) {
+ throw NotImplementedException("Unaligned data registers");
+ }
+ const IR::Value vector{ir.CompositeConstruct(X(data_reg), X(data_reg + 1))};
+ ir.WriteGlobal64(address, vector);
+ break;
+ }
+ case StoreSize::B128:
+ if (!IR::IsAligned(data_reg, 4)) {
+ throw NotImplementedException("Unaligned data registers");
+ }
+ const IR::Value vector{
+ ir.CompositeConstruct(X(data_reg), X(data_reg + 1), X(data_reg + 2), X(data_reg + 3))};
+ ir.WriteGlobal128(address, vector);
+ break;
+ }
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation.cpp
new file mode 100644
index 000000000..92cd27ed4
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation.cpp
@@ -0,0 +1,116 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class LogicalOp : u64 {
+ AND,
+ OR,
+ XOR,
+ PASS_B,
+};
+
+[[nodiscard]] IR::U32 LogicalOperation(IR::IREmitter& ir, const IR::U32& operand_1,
+ const IR::U32& operand_2, LogicalOp op) {
+ switch (op) {
+ case LogicalOp::AND:
+ return ir.BitwiseAnd(operand_1, operand_2);
+ case LogicalOp::OR:
+ return ir.BitwiseOr(operand_1, operand_2);
+ case LogicalOp::XOR:
+ return ir.BitwiseXor(operand_1, operand_2);
+ case LogicalOp::PASS_B:
+ return operand_2;
+ default:
+ throw NotImplementedException("Invalid Logical operation {}", op);
+ }
+}
+
+void LOP(TranslatorVisitor& v, u64 insn, IR::U32 op_b, bool x, bool cc, bool inv_a, bool inv_b,
+ LogicalOp bit_op, std::optional<PredicateOp> pred_op = std::nullopt,
+ IR::Pred dest_pred = IR::Pred::PT) {
+ union {
+ u64 insn;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> src_reg;
+ } const lop{insn};
+
+ if (x) {
+ throw NotImplementedException("X");
+ }
+ IR::U32 op_a{v.X(lop.src_reg)};
+ if (inv_a != 0) {
+ op_a = v.ir.BitwiseNot(op_a);
+ }
+ if (inv_b != 0) {
+ op_b = v.ir.BitwiseNot(op_b);
+ }
+
+ const IR::U32 result{LogicalOperation(v.ir, op_a, op_b, bit_op)};
+ if (pred_op) {
+ const IR::U1 pred_result{PredicateOperation(v.ir, result, *pred_op)};
+ v.ir.SetPred(dest_pred, pred_result);
+ }
+ if (cc) {
+ if (bit_op == LogicalOp::PASS_B) {
+ v.SetZFlag(v.ir.IEqual(result, v.ir.Imm32(0)));
+ v.SetSFlag(v.ir.ILessThan(result, v.ir.Imm32(0), true));
+ } else {
+ v.SetZFlag(v.ir.GetZeroFromOp(result));
+ v.SetSFlag(v.ir.GetSignFromOp(result));
+ }
+ v.ResetCFlag();
+ v.ResetOFlag();
+ }
+ v.X(lop.dest_reg, result);
+}
+
+void LOP(TranslatorVisitor& v, u64 insn, const IR::U32& op_b) {
+ union {
+ u64 insn;
+ BitField<39, 1, u64> inv_a;
+ BitField<40, 1, u64> inv_b;
+ BitField<41, 2, LogicalOp> bit_op;
+ BitField<43, 1, u64> x;
+ BitField<44, 2, PredicateOp> pred_op;
+ BitField<47, 1, u64> cc;
+ BitField<48, 3, IR::Pred> dest_pred;
+ } const lop{insn};
+
+ LOP(v, insn, op_b, lop.x != 0, lop.cc != 0, lop.inv_a != 0, lop.inv_b != 0, lop.bit_op,
+ lop.pred_op, lop.dest_pred);
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::LOP_reg(u64 insn) {
+ LOP(*this, insn, GetReg20(insn));
+}
+
+void TranslatorVisitor::LOP_cbuf(u64 insn) {
+ LOP(*this, insn, GetCbuf(insn));
+}
+
+void TranslatorVisitor::LOP_imm(u64 insn) {
+ LOP(*this, insn, GetImm20(insn));
+}
+
+void TranslatorVisitor::LOP32I(u64 insn) {
+ union {
+ u64 raw;
+ BitField<53, 2, LogicalOp> bit_op;
+ BitField<57, 1, u64> x;
+ BitField<52, 1, u64> cc;
+ BitField<55, 1, u64> inv_a;
+ BitField<56, 1, u64> inv_b;
+ } const lop32i{insn};
+
+ LOP(*this, insn, GetImm32(insn), lop32i.x != 0, lop32i.cc != 0, lop32i.inv_a != 0,
+ lop32i.inv_b != 0, lop32i.bit_op);
+}
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input.cpp
new file mode 100644
index 000000000..e0fe47912
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input.cpp
@@ -0,0 +1,122 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+// https://forums.developer.nvidia.com/t/reverse-lut-for-lop3-lut/110651
+// Emulate GPU's LOP3.LUT (three-input logic op with 8-bit truth table)
+IR::U32 ApplyLUT(IR::IREmitter& ir, const IR::U32& a, const IR::U32& b, const IR::U32& c,
+ u64 ttbl) {
+ IR::U32 r{ir.Imm32(0)};
+ const IR::U32 not_a{ir.BitwiseNot(a)};
+ const IR::U32 not_b{ir.BitwiseNot(b)};
+ const IR::U32 not_c{ir.BitwiseNot(c)};
+ if (ttbl & 0x01) {
+ // r |= ~a & ~b & ~c;
+ const auto lhs{ir.BitwiseAnd(not_a, not_b)};
+ const auto rhs{ir.BitwiseAnd(lhs, not_c)};
+ r = ir.BitwiseOr(r, rhs);
+ }
+ if (ttbl & 0x02) {
+ // r |= ~a & ~b & c;
+ const auto lhs{ir.BitwiseAnd(not_a, not_b)};
+ const auto rhs{ir.BitwiseAnd(lhs, c)};
+ r = ir.BitwiseOr(r, rhs);
+ }
+ if (ttbl & 0x04) {
+ // r |= ~a & b & ~c;
+ const auto lhs{ir.BitwiseAnd(not_a, b)};
+ const auto rhs{ir.BitwiseAnd(lhs, not_c)};
+ r = ir.BitwiseOr(r, rhs);
+ }
+ if (ttbl & 0x08) {
+ // r |= ~a & b & c;
+ const auto lhs{ir.BitwiseAnd(not_a, b)};
+ const auto rhs{ir.BitwiseAnd(lhs, c)};
+ r = ir.BitwiseOr(r, rhs);
+ }
+ if (ttbl & 0x10) {
+ // r |= a & ~b & ~c;
+ const auto lhs{ir.BitwiseAnd(a, not_b)};
+ const auto rhs{ir.BitwiseAnd(lhs, not_c)};
+ r = ir.BitwiseOr(r, rhs);
+ }
+ if (ttbl & 0x20) {
+ // r |= a & ~b & c;
+ const auto lhs{ir.BitwiseAnd(a, not_b)};
+ const auto rhs{ir.BitwiseAnd(lhs, c)};
+ r = ir.BitwiseOr(r, rhs);
+ }
+ if (ttbl & 0x40) {
+ // r |= a & b & ~c;
+ const auto lhs{ir.BitwiseAnd(a, b)};
+ const auto rhs{ir.BitwiseAnd(lhs, not_c)};
+ r = ir.BitwiseOr(r, rhs);
+ }
+ if (ttbl & 0x80) {
+ // r |= a & b & c;
+ const auto lhs{ir.BitwiseAnd(a, b)};
+ const auto rhs{ir.BitwiseAnd(lhs, c)};
+ r = ir.BitwiseOr(r, rhs);
+ }
+ return r;
+}
+
+IR::U32 LOP3(TranslatorVisitor& v, u64 insn, const IR::U32& op_b, const IR::U32& op_c, u64 lut) {
+ union {
+ u64 insn;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> src_reg;
+ BitField<47, 1, u64> cc;
+ } const lop3{insn};
+
+ if (lop3.cc != 0) {
+ throw NotImplementedException("LOP3 CC");
+ }
+
+ const IR::U32 op_a{v.X(lop3.src_reg)};
+ const IR::U32 result{ApplyLUT(v.ir, op_a, op_b, op_c, lut)};
+ v.X(lop3.dest_reg, result);
+ return result;
+}
+
+u64 GetLut48(u64 insn) {
+ union {
+ u64 raw;
+ BitField<48, 8, u64> lut;
+ } const lut{insn};
+ return lut.lut;
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::LOP3_reg(u64 insn) {
+ union {
+ u64 insn;
+ BitField<28, 8, u64> lut;
+ BitField<38, 1, u64> x;
+ BitField<36, 2, PredicateOp> pred_op;
+ BitField<48, 3, IR::Pred> pred;
+ } const lop3{insn};
+
+ if (lop3.x != 0) {
+ throw NotImplementedException("LOP3 X");
+ }
+ const IR::U32 result{LOP3(*this, insn, GetReg20(insn), GetReg39(insn), lop3.lut)};
+ const IR::U1 pred_result{PredicateOperation(ir, result, lop3.pred_op)};
+ ir.SetPred(lop3.pred, pred_result);
+}
+
+void TranslatorVisitor::LOP3_cbuf(u64 insn) {
+ LOP3(*this, insn, GetCbuf(insn), GetReg39(insn), GetLut48(insn));
+}
+
+void TranslatorVisitor::LOP3_imm(u64 insn) {
+ LOP3(*this, insn, GetImm20(insn), GetReg39(insn), GetLut48(insn));
+}
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_predicate_to_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_predicate_to_register.cpp
new file mode 100644
index 000000000..4324fd443
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_predicate_to_register.cpp
@@ -0,0 +1,66 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class Mode : u64 {
+ PR,
+ CC,
+};
+} // Anonymous namespace
+
+void TranslatorVisitor::P2R_reg(u64) {
+ throw NotImplementedException("P2R (reg)");
+}
+
+void TranslatorVisitor::P2R_cbuf(u64) {
+ throw NotImplementedException("P2R (cbuf)");
+}
+
+void TranslatorVisitor::P2R_imm(u64 insn) {
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> src;
+ BitField<40, 1, Mode> mode;
+ BitField<41, 2, u64> byte_selector;
+ } const p2r{insn};
+
+ const u32 mask{GetImm20(insn).U32()};
+ const bool pr_mode{p2r.mode == Mode::PR};
+ const u32 num_items{pr_mode ? 7U : 4U};
+ const u32 offset{static_cast<u32>(p2r.byte_selector) * 8};
+ IR::U32 insert{ir.Imm32(0)};
+ for (u32 index = 0; index < num_items; ++index) {
+ if (((mask >> index) & 1) == 0) {
+ continue;
+ }
+ const IR::U1 cond{[this, index, pr_mode] {
+ if (pr_mode) {
+ return ir.GetPred(IR::Pred{index});
+ }
+ switch (index) {
+ case 0:
+ return ir.GetZFlag();
+ case 1:
+ return ir.GetSFlag();
+ case 2:
+ return ir.GetCFlag();
+ case 3:
+ return ir.GetOFlag();
+ }
+ throw LogicError("Unreachable P2R index");
+ }()};
+ const IR::U32 bit{ir.Select(cond, ir.Imm32(1U << (index + offset)), ir.Imm32(0))};
+ insert = ir.BitwiseOr(insert, bit);
+ }
+ const IR::U32 masked_out{ir.BitwiseAnd(X(p2r.src), ir.Imm32(~(mask << offset)))};
+ X(p2r.dest_reg, ir.BitwiseOr(masked_out, insert));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp
new file mode 100644
index 000000000..6bb08db8a
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp
@@ -0,0 +1,44 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/maxwell/opcodes.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+void MOV(TranslatorVisitor& v, u64 insn, const IR::U32& src, bool is_mov32i = false) {
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<39, 4, u64> mask;
+ BitField<12, 4, u64> mov32i_mask;
+ } const mov{insn};
+
+ if ((is_mov32i ? mov.mov32i_mask : mov.mask) != 0xf) {
+ throw NotImplementedException("Non-full move mask");
+ }
+ v.X(mov.dest_reg, src);
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::MOV_reg(u64 insn) {
+ MOV(*this, insn, GetReg20(insn));
+}
+
+void TranslatorVisitor::MOV_cbuf(u64 insn) {
+ MOV(*this, insn, GetCbuf(insn));
+}
+
+void TranslatorVisitor::MOV_imm(u64 insn) {
+ MOV(*this, insn, GetImm20(insn));
+}
+
+void TranslatorVisitor::MOV32I(u64 insn) {
+ MOV(*this, insn, GetImm32(insn), true);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_register_to_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_register_to_predicate.cpp
new file mode 100644
index 000000000..eda5f177b
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_register_to_predicate.cpp
@@ -0,0 +1,71 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class Mode : u64 {
+ PR,
+ CC,
+};
+
+void SetFlag(IR::IREmitter& ir, const IR::U1& inv_mask_bit, const IR::U1& src_bit, u32 index) {
+ switch (index) {
+ case 0:
+ return ir.SetZFlag(IR::U1{ir.Select(inv_mask_bit, ir.GetZFlag(), src_bit)});
+ case 1:
+ return ir.SetSFlag(IR::U1{ir.Select(inv_mask_bit, ir.GetSFlag(), src_bit)});
+ case 2:
+ return ir.SetCFlag(IR::U1{ir.Select(inv_mask_bit, ir.GetCFlag(), src_bit)});
+ case 3:
+ return ir.SetOFlag(IR::U1{ir.Select(inv_mask_bit, ir.GetOFlag(), src_bit)});
+ default:
+ throw LogicError("Unreachable R2P index");
+ }
+}
+
+void R2P(TranslatorVisitor& v, u64 insn, const IR::U32& mask) {
+ union {
+ u64 raw;
+ BitField<8, 8, IR::Reg> src_reg;
+ BitField<40, 1, Mode> mode;
+ BitField<41, 2, u64> byte_selector;
+ } const r2p{insn};
+ const IR::U32 src{v.X(r2p.src_reg)};
+ const IR::U32 count{v.ir.Imm32(1)};
+ const bool pr_mode{r2p.mode == Mode::PR};
+ const u32 num_items{pr_mode ? 7U : 4U};
+ const u32 offset_base{static_cast<u32>(r2p.byte_selector) * 8};
+ for (u32 index = 0; index < num_items; ++index) {
+ const IR::U32 offset{v.ir.Imm32(offset_base + index)};
+ const IR::U1 src_zero{v.ir.GetZeroFromOp(v.ir.BitFieldExtract(src, offset, count, false))};
+ const IR::U1 src_bit{v.ir.LogicalNot(src_zero)};
+ const IR::U32 mask_bfe{v.ir.BitFieldExtract(mask, v.ir.Imm32(index), count, false)};
+ const IR::U1 inv_mask_bit{v.ir.GetZeroFromOp(mask_bfe)};
+ if (pr_mode) {
+ const IR::Pred pred{index};
+ v.ir.SetPred(pred, IR::U1{v.ir.Select(inv_mask_bit, v.ir.GetPred(pred), src_bit)});
+ } else {
+ SetFlag(v.ir, inv_mask_bit, src_bit, index);
+ }
+ }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::R2P_reg(u64 insn) {
+ R2P(*this, insn, GetReg20(insn));
+}
+
+void TranslatorVisitor::R2P_cbuf(u64 insn) {
+ R2P(*this, insn, GetCbuf(insn));
+}
+
+void TranslatorVisitor::R2P_imm(u64 insn) {
+ R2P(*this, insn, GetImm20(insn));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp
new file mode 100644
index 000000000..20cb2674e
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp
@@ -0,0 +1,181 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class SpecialRegister : u64 {
+ SR_LANEID = 0,
+ SR_CLOCK = 1,
+ SR_VIRTCFG = 2,
+ SR_VIRTID = 3,
+ SR_PM0 = 4,
+ SR_PM1 = 5,
+ SR_PM2 = 6,
+ SR_PM3 = 7,
+ SR_PM4 = 8,
+ SR_PM5 = 9,
+ SR_PM6 = 10,
+ SR_PM7 = 11,
+ SR12 = 12,
+ SR13 = 13,
+ SR14 = 14,
+ SR_ORDERING_TICKET = 15,
+ SR_PRIM_TYPE = 16,
+ SR_INVOCATION_ID = 17,
+ SR_Y_DIRECTION = 18,
+ SR_THREAD_KILL = 19,
+ SM_SHADER_TYPE = 20,
+ SR_DIRECTCBEWRITEADDRESSLOW = 21,
+ SR_DIRECTCBEWRITEADDRESSHIGH = 22,
+ SR_DIRECTCBEWRITEENABLE = 23,
+ SR_MACHINE_ID_0 = 24,
+ SR_MACHINE_ID_1 = 25,
+ SR_MACHINE_ID_2 = 26,
+ SR_MACHINE_ID_3 = 27,
+ SR_AFFINITY = 28,
+ SR_INVOCATION_INFO = 29,
+ SR_WSCALEFACTOR_XY = 30,
+ SR_WSCALEFACTOR_Z = 31,
+ SR_TID = 32,
+ SR_TID_X = 33,
+ SR_TID_Y = 34,
+ SR_TID_Z = 35,
+ SR_CTA_PARAM = 36,
+ SR_CTAID_X = 37,
+ SR_CTAID_Y = 38,
+ SR_CTAID_Z = 39,
+ SR_NTID = 40,
+ SR_CirQueueIncrMinusOne = 41,
+ SR_NLATC = 42,
+ SR43 = 43,
+ SR_SM_SPA_VERSION = 44,
+ SR_MULTIPASSSHADERINFO = 45,
+ SR_LWINHI = 46,
+ SR_SWINHI = 47,
+ SR_SWINLO = 48,
+ SR_SWINSZ = 49,
+ SR_SMEMSZ = 50,
+ SR_SMEMBANKS = 51,
+ SR_LWINLO = 52,
+ SR_LWINSZ = 53,
+ SR_LMEMLOSZ = 54,
+ SR_LMEMHIOFF = 55,
+ SR_EQMASK = 56,
+ SR_LTMASK = 57,
+ SR_LEMASK = 58,
+ SR_GTMASK = 59,
+ SR_GEMASK = 60,
+ SR_REGALLOC = 61,
+ SR_BARRIERALLOC = 62,
+ SR63 = 63,
+ SR_GLOBALERRORSTATUS = 64,
+ SR65 = 65,
+ SR_WARPERRORSTATUS = 66,
+ SR_WARPERRORSTATUSCLEAR = 67,
+ SR68 = 68,
+ SR69 = 69,
+ SR70 = 70,
+ SR71 = 71,
+ SR_PM_HI0 = 72,
+ SR_PM_HI1 = 73,
+ SR_PM_HI2 = 74,
+ SR_PM_HI3 = 75,
+ SR_PM_HI4 = 76,
+ SR_PM_HI5 = 77,
+ SR_PM_HI6 = 78,
+ SR_PM_HI7 = 79,
+ SR_CLOCKLO = 80,
+ SR_CLOCKHI = 81,
+ SR_GLOBALTIMERLO = 82,
+ SR_GLOBALTIMERHI = 83,
+ SR84 = 84,
+ SR85 = 85,
+ SR86 = 86,
+ SR87 = 87,
+ SR88 = 88,
+ SR89 = 89,
+ SR90 = 90,
+ SR91 = 91,
+ SR92 = 92,
+ SR93 = 93,
+ SR94 = 94,
+ SR95 = 95,
+ SR_HWTASKID = 96,
+ SR_CIRCULARQUEUEENTRYINDEX = 97,
+ SR_CIRCULARQUEUEENTRYADDRESSLOW = 98,
+ SR_CIRCULARQUEUEENTRYADDRESSHIGH = 99,
+};
+
+[[nodiscard]] IR::U32 Read(IR::IREmitter& ir, SpecialRegister special_register) {
+ switch (special_register) {
+ case SpecialRegister::SR_INVOCATION_ID:
+ return ir.InvocationId();
+ case SpecialRegister::SR_THREAD_KILL:
+ return IR::U32{ir.Select(ir.IsHelperInvocation(), ir.Imm32(-1), ir.Imm32(0))};
+ case SpecialRegister::SR_INVOCATION_INFO:
+ LOG_WARNING(Shader, "(STUBBED) SR_INVOCATION_INFO");
+ return ir.Imm32(0x00ff'0000);
+ case SpecialRegister::SR_TID: {
+ const IR::Value tid{ir.LocalInvocationId()};
+ return ir.BitFieldInsert(ir.BitFieldInsert(IR::U32{ir.CompositeExtract(tid, 0)},
+ IR::U32{ir.CompositeExtract(tid, 1)},
+ ir.Imm32(16), ir.Imm32(8)),
+ IR::U32{ir.CompositeExtract(tid, 2)}, ir.Imm32(26), ir.Imm32(6));
+ }
+ case SpecialRegister::SR_TID_X:
+ return ir.LocalInvocationIdX();
+ case SpecialRegister::SR_TID_Y:
+ return ir.LocalInvocationIdY();
+ case SpecialRegister::SR_TID_Z:
+ return ir.LocalInvocationIdZ();
+ case SpecialRegister::SR_CTAID_X:
+ return ir.WorkgroupIdX();
+ case SpecialRegister::SR_CTAID_Y:
+ return ir.WorkgroupIdY();
+ case SpecialRegister::SR_CTAID_Z:
+ return ir.WorkgroupIdZ();
+ case SpecialRegister::SR_WSCALEFACTOR_XY:
+ LOG_WARNING(Shader, "(STUBBED) SR_WSCALEFACTOR_XY");
+ return ir.Imm32(Common::BitCast<u32>(1.0f));
+ case SpecialRegister::SR_WSCALEFACTOR_Z:
+ LOG_WARNING(Shader, "(STUBBED) SR_WSCALEFACTOR_Z");
+ return ir.Imm32(Common::BitCast<u32>(1.0f));
+ case SpecialRegister::SR_LANEID:
+ return ir.LaneId();
+ case SpecialRegister::SR_EQMASK:
+ return ir.SubgroupEqMask();
+ case SpecialRegister::SR_LTMASK:
+ return ir.SubgroupLtMask();
+ case SpecialRegister::SR_LEMASK:
+ return ir.SubgroupLeMask();
+ case SpecialRegister::SR_GTMASK:
+ return ir.SubgroupGtMask();
+ case SpecialRegister::SR_GEMASK:
+ return ir.SubgroupGeMask();
+ case SpecialRegister::SR_Y_DIRECTION:
+ return ir.BitCast<IR::U32>(ir.YDirection());
+ case SpecialRegister::SR_AFFINITY:
+ LOG_WARNING(Shader, "(STUBBED) SR_AFFINITY");
+ return ir.Imm32(0); // This is the default value hardware returns.
+ default:
+ throw NotImplementedException("S2R special register {}", special_register);
+ }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::S2R(u64 insn) {
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<20, 8, SpecialRegister> src_reg;
+ } const s2r{insn};
+
+ X(s2r.dest_reg, Read(ir, s2r.src_reg));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp
new file mode 100644
index 000000000..7e26ab359
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp
@@ -0,0 +1,283 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/common_types.h"
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/maxwell/opcodes.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+
+[[noreturn]] static void ThrowNotImplemented(Opcode opcode) {
+ throw NotImplementedException("Instruction {} is not implemented", opcode);
+}
+
+void TranslatorVisitor::ATOM_cas(u64) {
+ ThrowNotImplemented(Opcode::ATOM_cas);
+}
+
+void TranslatorVisitor::ATOMS_cas(u64) {
+ ThrowNotImplemented(Opcode::ATOMS_cas);
+}
+
+void TranslatorVisitor::B2R(u64) {
+ ThrowNotImplemented(Opcode::B2R);
+}
+
+void TranslatorVisitor::BPT(u64) {
+ ThrowNotImplemented(Opcode::BPT);
+}
+
+void TranslatorVisitor::BRA(u64) {
+ ThrowNotImplemented(Opcode::BRA);
+}
+
+void TranslatorVisitor::BRK(u64) {
+ ThrowNotImplemented(Opcode::BRK);
+}
+
+void TranslatorVisitor::CAL() {
+ // CAL is a no-op
+}
+
+void TranslatorVisitor::CCTL(u64) {
+ ThrowNotImplemented(Opcode::CCTL);
+}
+
+void TranslatorVisitor::CCTLL(u64) {
+ ThrowNotImplemented(Opcode::CCTLL);
+}
+
+void TranslatorVisitor::CONT(u64) {
+ ThrowNotImplemented(Opcode::CONT);
+}
+
+void TranslatorVisitor::CS2R(u64) {
+ ThrowNotImplemented(Opcode::CS2R);
+}
+
+void TranslatorVisitor::FCHK_reg(u64) {
+ ThrowNotImplemented(Opcode::FCHK_reg);
+}
+
+void TranslatorVisitor::FCHK_cbuf(u64) {
+ ThrowNotImplemented(Opcode::FCHK_cbuf);
+}
+
+void TranslatorVisitor::FCHK_imm(u64) {
+ ThrowNotImplemented(Opcode::FCHK_imm);
+}
+
+void TranslatorVisitor::GETCRSPTR(u64) {
+ ThrowNotImplemented(Opcode::GETCRSPTR);
+}
+
+void TranslatorVisitor::GETLMEMBASE(u64) {
+ ThrowNotImplemented(Opcode::GETLMEMBASE);
+}
+
+void TranslatorVisitor::IDE(u64) {
+ ThrowNotImplemented(Opcode::IDE);
+}
+
+void TranslatorVisitor::IDP_reg(u64) {
+ ThrowNotImplemented(Opcode::IDP_reg);
+}
+
+void TranslatorVisitor::IDP_imm(u64) {
+ ThrowNotImplemented(Opcode::IDP_imm);
+}
+
+void TranslatorVisitor::IMAD_reg(u64) {
+ ThrowNotImplemented(Opcode::IMAD_reg);
+}
+
+void TranslatorVisitor::IMAD_rc(u64) {
+ ThrowNotImplemented(Opcode::IMAD_rc);
+}
+
+void TranslatorVisitor::IMAD_cr(u64) {
+ ThrowNotImplemented(Opcode::IMAD_cr);
+}
+
+void TranslatorVisitor::IMAD_imm(u64) {
+ ThrowNotImplemented(Opcode::IMAD_imm);
+}
+
+void TranslatorVisitor::IMAD32I(u64) {
+ ThrowNotImplemented(Opcode::IMAD32I);
+}
+
+void TranslatorVisitor::IMADSP_reg(u64) {
+ ThrowNotImplemented(Opcode::IMADSP_reg);
+}
+
+void TranslatorVisitor::IMADSP_rc(u64) {
+ ThrowNotImplemented(Opcode::IMADSP_rc);
+}
+
+void TranslatorVisitor::IMADSP_cr(u64) {
+ ThrowNotImplemented(Opcode::IMADSP_cr);
+}
+
+void TranslatorVisitor::IMADSP_imm(u64) {
+ ThrowNotImplemented(Opcode::IMADSP_imm);
+}
+
+void TranslatorVisitor::IMUL_reg(u64) {
+ ThrowNotImplemented(Opcode::IMUL_reg);
+}
+
+void TranslatorVisitor::IMUL_cbuf(u64) {
+ ThrowNotImplemented(Opcode::IMUL_cbuf);
+}
+
+void TranslatorVisitor::IMUL_imm(u64) {
+ ThrowNotImplemented(Opcode::IMUL_imm);
+}
+
+void TranslatorVisitor::IMUL32I(u64) {
+ ThrowNotImplemented(Opcode::IMUL32I);
+}
+
+void TranslatorVisitor::JCAL(u64) {
+ ThrowNotImplemented(Opcode::JCAL);
+}
+
+void TranslatorVisitor::JMP(u64) {
+ ThrowNotImplemented(Opcode::JMP);
+}
+
+void TranslatorVisitor::KIL() {
+ // KIL is a no-op
+}
+
+void TranslatorVisitor::LD(u64) {
+ ThrowNotImplemented(Opcode::LD);
+}
+
+void TranslatorVisitor::LEPC(u64) {
+ ThrowNotImplemented(Opcode::LEPC);
+}
+
+void TranslatorVisitor::LONGJMP(u64) {
+ ThrowNotImplemented(Opcode::LONGJMP);
+}
+
+void TranslatorVisitor::NOP(u64) {
+ // NOP is No-Op.
+}
+
+void TranslatorVisitor::PBK() {
+ // PBK is a no-op
+}
+
+void TranslatorVisitor::PCNT() {
+ // PCNT is a no-op
+}
+
+void TranslatorVisitor::PEXIT(u64) {
+ ThrowNotImplemented(Opcode::PEXIT);
+}
+
+void TranslatorVisitor::PLONGJMP(u64) {
+ ThrowNotImplemented(Opcode::PLONGJMP);
+}
+
+void TranslatorVisitor::PRET(u64) {
+ ThrowNotImplemented(Opcode::PRET);
+}
+
+void TranslatorVisitor::PRMT_reg(u64) {
+ ThrowNotImplemented(Opcode::PRMT_reg);
+}
+
+void TranslatorVisitor::PRMT_rc(u64) {
+ ThrowNotImplemented(Opcode::PRMT_rc);
+}
+
+void TranslatorVisitor::PRMT_cr(u64) {
+ ThrowNotImplemented(Opcode::PRMT_cr);
+}
+
+void TranslatorVisitor::PRMT_imm(u64) {
+ ThrowNotImplemented(Opcode::PRMT_imm);
+}
+
+void TranslatorVisitor::R2B(u64) {
+ ThrowNotImplemented(Opcode::R2B);
+}
+
+void TranslatorVisitor::RAM(u64) {
+ ThrowNotImplemented(Opcode::RAM);
+}
+
+void TranslatorVisitor::RET(u64) {
+ ThrowNotImplemented(Opcode::RET);
+}
+
+void TranslatorVisitor::RTT(u64) {
+ ThrowNotImplemented(Opcode::RTT);
+}
+
+void TranslatorVisitor::SAM(u64) {
+ ThrowNotImplemented(Opcode::SAM);
+}
+
+void TranslatorVisitor::SETCRSPTR(u64) {
+ ThrowNotImplemented(Opcode::SETCRSPTR);
+}
+
+void TranslatorVisitor::SETLMEMBASE(u64) {
+ ThrowNotImplemented(Opcode::SETLMEMBASE);
+}
+
+void TranslatorVisitor::SSY() {
+ // SSY is a no-op
+}
+
+void TranslatorVisitor::ST(u64) {
+ ThrowNotImplemented(Opcode::ST);
+}
+
+void TranslatorVisitor::STP(u64) {
+ ThrowNotImplemented(Opcode::STP);
+}
+
+void TranslatorVisitor::SUATOM_cas(u64) {
+ ThrowNotImplemented(Opcode::SUATOM_cas);
+}
+
+void TranslatorVisitor::SYNC(u64) {
+ ThrowNotImplemented(Opcode::SYNC);
+}
+
+void TranslatorVisitor::TXA(u64) {
+ ThrowNotImplemented(Opcode::TXA);
+}
+
+void TranslatorVisitor::VABSDIFF(u64) {
+ ThrowNotImplemented(Opcode::VABSDIFF);
+}
+
+void TranslatorVisitor::VABSDIFF4(u64) {
+ ThrowNotImplemented(Opcode::VABSDIFF4);
+}
+
+void TranslatorVisitor::VADD(u64) {
+ ThrowNotImplemented(Opcode::VADD);
+}
+
+void TranslatorVisitor::VSET(u64) {
+ ThrowNotImplemented(Opcode::VSET);
+}
+void TranslatorVisitor::VSHL(u64) {
+ ThrowNotImplemented(Opcode::VSHL);
+}
+
+void TranslatorVisitor::VSHR(u64) {
+ ThrowNotImplemented(Opcode::VSHR);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/output_geometry.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/output_geometry.cpp
new file mode 100644
index 000000000..01cfad88d
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/output_geometry.cpp
@@ -0,0 +1,45 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+void OUT(TranslatorVisitor& v, u64 insn, IR::U32 stream_index) {
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> output_reg; // Not needed on host
+ BitField<39, 1, u64> emit;
+ BitField<40, 1, u64> cut;
+ } const out{insn};
+
+ stream_index = v.ir.BitwiseAnd(stream_index, v.ir.Imm32(0b11));
+
+ if (out.emit != 0) {
+ v.ir.EmitVertex(stream_index);
+ }
+ if (out.cut != 0) {
+ v.ir.EndPrimitive(stream_index);
+ }
+ // Host doesn't need the output register, but we can write to it to avoid undefined reads
+ v.X(out.dest_reg, v.ir.Imm32(0));
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::OUT_reg(u64 insn) {
+ OUT(*this, insn, GetReg20(insn));
+}
+
+void TranslatorVisitor::OUT_cbuf(u64 insn) {
+ OUT(*this, insn, GetCbuf(insn));
+}
+
+void TranslatorVisitor::OUT_imm(u64 insn) {
+ OUT(*this, insn, GetImm20(insn));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/pixel_load.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/pixel_load.cpp
new file mode 100644
index 000000000..b4767afb5
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/pixel_load.cpp
@@ -0,0 +1,46 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class Mode : u64 {
+ Default,
+ CovMask,
+ Covered,
+ Offset,
+ CentroidOffset,
+ MyIndex,
+};
+} // Anonymous namespace
+
+void TranslatorVisitor::PIXLD(u64 insn) {
+ union {
+ u64 raw;
+ BitField<31, 3, Mode> mode;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> addr_reg;
+ BitField<20, 8, s64> addr_offset;
+ BitField<45, 3, IR::Pred> dest_pred;
+ } const pixld{insn};
+
+ if (pixld.dest_pred != IR::Pred::PT) {
+ throw NotImplementedException("Destination predicate");
+ }
+ if (pixld.addr_reg != IR::Reg::RZ || pixld.addr_offset != 0) {
+ throw NotImplementedException("Non-zero source register");
+ }
+ switch (pixld.mode) {
+ case Mode::MyIndex:
+ X(pixld.dest_reg, ir.SampleId());
+ break;
+ default:
+ throw NotImplementedException("Mode {}", pixld.mode.Value());
+ }
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set_predicate.cpp
new file mode 100644
index 000000000..75d1fa8c1
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set_predicate.cpp
@@ -0,0 +1,38 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+void TranslatorVisitor::PSETP(u64 insn) {
+ union {
+ u64 raw;
+ BitField<0, 3, IR::Pred> dest_pred_b;
+ BitField<3, 3, IR::Pred> dest_pred_a;
+ BitField<12, 3, IR::Pred> pred_a;
+ BitField<15, 1, u64> neg_pred_a;
+ BitField<24, 2, BooleanOp> bop_1;
+ BitField<29, 3, IR::Pred> pred_b;
+ BitField<32, 1, u64> neg_pred_b;
+ BitField<39, 3, IR::Pred> pred_c;
+ BitField<42, 1, u64> neg_pred_c;
+ BitField<45, 2, BooleanOp> bop_2;
+ } const pset{insn};
+
+ const IR::U1 pred_a{ir.GetPred(pset.pred_a, pset.neg_pred_a != 0)};
+ const IR::U1 pred_b{ir.GetPred(pset.pred_b, pset.neg_pred_b != 0)};
+ const IR::U1 pred_c{ir.GetPred(pset.pred_c, pset.neg_pred_c != 0)};
+
+ const IR::U1 lhs_a{PredicateCombine(ir, pred_a, pred_b, pset.bop_1)};
+ const IR::U1 lhs_b{PredicateCombine(ir, ir.LogicalNot(pred_a), pred_b, pset.bop_1)};
+ const IR::U1 result_a{PredicateCombine(ir, lhs_a, pred_c, pset.bop_2)};
+ const IR::U1 result_b{PredicateCombine(ir, lhs_b, pred_c, pset.bop_2)};
+
+ ir.SetPred(pset.dest_pred_a, result_a);
+ ir.SetPred(pset.dest_pred_b, result_b);
+}
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set_register.cpp
new file mode 100644
index 000000000..b02789874
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set_register.cpp
@@ -0,0 +1,53 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+void TranslatorVisitor::PSET(u64 insn) {
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<12, 3, IR::Pred> pred_a;
+ BitField<15, 1, u64> neg_pred_a;
+ BitField<24, 2, BooleanOp> bop_1;
+ BitField<29, 3, IR::Pred> pred_b;
+ BitField<32, 1, u64> neg_pred_b;
+ BitField<39, 3, IR::Pred> pred_c;
+ BitField<42, 1, u64> neg_pred_c;
+ BitField<44, 1, u64> bf;
+ BitField<45, 2, BooleanOp> bop_2;
+ BitField<47, 1, u64> cc;
+ } const pset{insn};
+
+ const IR::U1 pred_a{ir.GetPred(pset.pred_a, pset.neg_pred_a != 0)};
+ const IR::U1 pred_b{ir.GetPred(pset.pred_b, pset.neg_pred_b != 0)};
+ const IR::U1 pred_c{ir.GetPred(pset.pred_c, pset.neg_pred_c != 0)};
+
+ const IR::U1 res_1{PredicateCombine(ir, pred_a, pred_b, pset.bop_1)};
+ const IR::U1 res_2{PredicateCombine(ir, res_1, pred_c, pset.bop_2)};
+
+ const IR::U32 true_result{pset.bf != 0 ? ir.Imm32(0x3f800000) : ir.Imm32(-1)};
+ const IR::U32 zero{ir.Imm32(0)};
+
+ const IR::U32 result{ir.Select(res_2, true_result, zero)};
+
+ X(pset.dest_reg, result);
+ if (pset.cc != 0) {
+ const IR::U1 is_zero{ir.IEqual(result, zero)};
+ SetZFlag(is_zero);
+ if (pset.bf != 0) {
+ ResetSFlag();
+ } else {
+ SetSFlag(ir.LogicalNot(is_zero));
+ }
+ ResetOFlag();
+ ResetCFlag();
+ }
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/select_source_with_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/select_source_with_predicate.cpp
new file mode 100644
index 000000000..93baa75a9
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/select_source_with_predicate.cpp
@@ -0,0 +1,44 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+
+void SEL(TranslatorVisitor& v, u64 insn, const IR::U32& src) {
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> src_reg;
+ BitField<39, 3, IR::Pred> pred;
+ BitField<42, 1, u64> neg_pred;
+ } const sel{insn};
+
+ const IR::U1 pred = v.ir.GetPred(sel.pred);
+ IR::U32 op_a{v.X(sel.src_reg)};
+ IR::U32 op_b{src};
+ if (sel.neg_pred != 0) {
+ std::swap(op_a, op_b);
+ }
+ const IR::U32 result{v.ir.Select(pred, op_a, op_b)};
+
+ v.X(sel.dest_reg, result);
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::SEL_reg(u64 insn) {
+ SEL(*this, insn, GetReg20(insn));
+}
+
+void TranslatorVisitor::SEL_cbuf(u64 insn) {
+ SEL(*this, insn, GetCbuf(insn));
+}
+
+void TranslatorVisitor::SEL_imm(u64 insn) {
+ SEL(*this, insn, GetImm20(insn));
+}
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/surface_atomic_operations.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/surface_atomic_operations.cpp
new file mode 100644
index 000000000..63b588ad4
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/surface_atomic_operations.cpp
@@ -0,0 +1,205 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <array>
+#include <bit>
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/ir/modifiers.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class Type : u64 {
+ _1D,
+ BUFFER_1D,
+ ARRAY_1D,
+ _2D,
+ ARRAY_2D,
+ _3D,
+};
+
+enum class Size : u64 {
+ U32,
+ S32,
+ U64,
+ S64,
+ F32FTZRN,
+ F16x2FTZRN,
+ SD32,
+ SD64,
+};
+
+enum class AtomicOp : u64 {
+ ADD,
+ MIN,
+ MAX,
+ INC,
+ DEC,
+ AND,
+ OR,
+ XOR,
+ EXCH,
+};
+
+enum class Clamp : u64 {
+ IGN,
+ Default,
+ TRAP,
+};
+
+TextureType GetType(Type type) {
+ switch (type) {
+ case Type::_1D:
+ return TextureType::Color1D;
+ case Type::BUFFER_1D:
+ return TextureType::Buffer;
+ case Type::ARRAY_1D:
+ return TextureType::ColorArray1D;
+ case Type::_2D:
+ return TextureType::Color2D;
+ case Type::ARRAY_2D:
+ return TextureType::ColorArray2D;
+ case Type::_3D:
+ return TextureType::Color3D;
+ }
+ throw NotImplementedException("Invalid type {}", type);
+}
+
+IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, Type type) {
+ switch (type) {
+ case Type::_1D:
+ case Type::BUFFER_1D:
+ return v.X(reg);
+ case Type::_2D:
+ return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1));
+ case Type::_3D:
+ return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1), v.X(reg + 2));
+ default:
+ break;
+ }
+ throw NotImplementedException("Invalid type {}", type);
+}
+
+IR::Value ApplyAtomicOp(IR::IREmitter& ir, const IR::U32& handle, const IR::Value& coords,
+ const IR::Value& op_b, IR::TextureInstInfo info, AtomicOp op,
+ bool is_signed) {
+ switch (op) {
+ case AtomicOp::ADD:
+ return ir.ImageAtomicIAdd(handle, coords, op_b, info);
+ case AtomicOp::MIN:
+ return ir.ImageAtomicIMin(handle, coords, op_b, is_signed, info);
+ case AtomicOp::MAX:
+ return ir.ImageAtomicIMax(handle, coords, op_b, is_signed, info);
+ case AtomicOp::INC:
+ return ir.ImageAtomicInc(handle, coords, op_b, info);
+ case AtomicOp::DEC:
+ return ir.ImageAtomicDec(handle, coords, op_b, info);
+ case AtomicOp::AND:
+ return ir.ImageAtomicAnd(handle, coords, op_b, info);
+ case AtomicOp::OR:
+ return ir.ImageAtomicOr(handle, coords, op_b, info);
+ case AtomicOp::XOR:
+ return ir.ImageAtomicXor(handle, coords, op_b, info);
+ case AtomicOp::EXCH:
+ return ir.ImageAtomicExchange(handle, coords, op_b, info);
+ default:
+ throw NotImplementedException("Atomic Operation {}", op);
+ }
+}
+
+ImageFormat Format(Size size) {
+ switch (size) {
+ case Size::U32:
+ case Size::S32:
+ case Size::SD32:
+ return ImageFormat::R32_UINT;
+ default:
+ break;
+ }
+ throw NotImplementedException("Invalid size {}", size);
+}
+
+bool IsSizeInt32(Size size) {
+ switch (size) {
+ case Size::U32:
+ case Size::S32:
+ case Size::SD32:
+ return true;
+ default:
+ return false;
+ }
+}
+
+void ImageAtomOp(TranslatorVisitor& v, IR::Reg dest_reg, IR::Reg operand_reg, IR::Reg coord_reg,
+ IR::Reg bindless_reg, AtomicOp op, Clamp clamp, Size size, Type type,
+ u64 bound_offset, bool is_bindless, bool write_result) {
+ if (clamp != Clamp::IGN) {
+ throw NotImplementedException("Clamp {}", clamp);
+ }
+ if (!IsSizeInt32(size)) {
+ throw NotImplementedException("Size {}", size);
+ }
+ const bool is_signed{size == Size::S32};
+ const ImageFormat format{Format(size)};
+ const TextureType tex_type{GetType(type)};
+ const IR::Value coords{MakeCoords(v, coord_reg, type)};
+
+ const IR::U32 handle{is_bindless != 0 ? v.X(bindless_reg)
+ : v.ir.Imm32(static_cast<u32>(bound_offset * 4))};
+ IR::TextureInstInfo info{};
+ info.type.Assign(tex_type);
+ info.image_format.Assign(format);
+
+ // TODO: float/64-bit operand
+ const IR::Value op_b{v.X(operand_reg)};
+ const IR::Value color{ApplyAtomicOp(v.ir, handle, coords, op_b, info, op, is_signed)};
+
+ if (write_result) {
+ v.X(dest_reg, IR::U32{color});
+ }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::SUATOM(u64 insn) {
+ union {
+ u64 raw;
+ BitField<54, 1, u64> is_bindless;
+ BitField<29, 4, AtomicOp> op;
+ BitField<33, 3, Type> type;
+ BitField<51, 3, Size> size;
+ BitField<49, 2, Clamp> clamp;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> coord_reg;
+ BitField<20, 8, IR::Reg> operand_reg;
+ BitField<36, 13, u64> bound_offset; // !is_bindless
+ BitField<39, 8, IR::Reg> bindless_reg; // is_bindless
+ } const suatom{insn};
+
+ ImageAtomOp(*this, suatom.dest_reg, suatom.operand_reg, suatom.coord_reg, suatom.bindless_reg,
+ suatom.op, suatom.clamp, suatom.size, suatom.type, suatom.bound_offset,
+ suatom.is_bindless != 0, true);
+}
+
+void TranslatorVisitor::SURED(u64 insn) {
+ // TODO: confirm offsets
+ union {
+ u64 raw;
+ BitField<51, 1, u64> is_bound;
+ BitField<21, 3, AtomicOp> op;
+ BitField<33, 3, Type> type;
+ BitField<20, 3, Size> size;
+ BitField<49, 2, Clamp> clamp;
+ BitField<0, 8, IR::Reg> operand_reg;
+ BitField<8, 8, IR::Reg> coord_reg;
+ BitField<36, 13, u64> bound_offset; // is_bound
+ BitField<39, 8, IR::Reg> bindless_reg; // !is_bound
+ } const sured{insn};
+ ImageAtomOp(*this, IR::Reg::RZ, sured.operand_reg, sured.coord_reg, sured.bindless_reg,
+ sured.op, sured.clamp, sured.size, sured.type, sured.bound_offset,
+ sured.is_bound == 0, false);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/surface_load_store.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/surface_load_store.cpp
new file mode 100644
index 000000000..681220a8d
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/surface_load_store.cpp
@@ -0,0 +1,281 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <array>
+#include <bit>
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/ir/modifiers.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class Type : u64 {
+ _1D,
+ BUFFER_1D,
+ ARRAY_1D,
+ _2D,
+ ARRAY_2D,
+ _3D,
+};
+
+constexpr unsigned R = 1 << 0;
+constexpr unsigned G = 1 << 1;
+constexpr unsigned B = 1 << 2;
+constexpr unsigned A = 1 << 3;
+
+constexpr std::array MASK{
+ 0U, //
+ R, //
+ G, //
+ R | G, //
+ B, //
+ R | B, //
+ G | B, //
+ R | G | B, //
+ A, //
+ R | A, //
+ G | A, //
+ R | G | A, //
+ B | A, //
+ R | B | A, //
+ G | B | A, //
+ R | G | B | A, //
+};
+
+enum class Size : u64 {
+ U8,
+ S8,
+ U16,
+ S16,
+ B32,
+ B64,
+ B128,
+};
+
+enum class Clamp : u64 {
+ IGN,
+ Default,
+ TRAP,
+};
+
+// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#cache-operators
+enum class LoadCache : u64 {
+ CA, // Cache at all levels, likely to be accessed again
+ CG, // Cache at global level (L2 and below, not L1)
+ CI, // ???
+ CV, // Don't cache and fetch again (volatile)
+};
+
+enum class StoreCache : u64 {
+ WB, // Cache write-back all coherent levels
+ CG, // Cache at global level (L2 and below, not L1)
+ CS, // Cache streaming, likely to be accessed once
+ WT, // Cache write-through (to system memory, volatile?)
+};
+
+ImageFormat Format(Size size) {
+ switch (size) {
+ case Size::U8:
+ return ImageFormat::R8_UINT;
+ case Size::S8:
+ return ImageFormat::R8_SINT;
+ case Size::U16:
+ return ImageFormat::R16_UINT;
+ case Size::S16:
+ return ImageFormat::R16_SINT;
+ case Size::B32:
+ return ImageFormat::R32_UINT;
+ case Size::B64:
+ return ImageFormat::R32G32_UINT;
+ case Size::B128:
+ return ImageFormat::R32G32B32A32_UINT;
+ }
+ throw NotImplementedException("Invalid size {}", size);
+}
+
+int SizeInRegs(Size size) {
+ switch (size) {
+ case Size::U8:
+ case Size::S8:
+ case Size::U16:
+ case Size::S16:
+ case Size::B32:
+ return 1;
+ case Size::B64:
+ return 2;
+ case Size::B128:
+ return 4;
+ }
+ throw NotImplementedException("Invalid size {}", size);
+}
+
+TextureType GetType(Type type) {
+ switch (type) {
+ case Type::_1D:
+ return TextureType::Color1D;
+ case Type::BUFFER_1D:
+ return TextureType::Buffer;
+ case Type::ARRAY_1D:
+ return TextureType::ColorArray1D;
+ case Type::_2D:
+ return TextureType::Color2D;
+ case Type::ARRAY_2D:
+ return TextureType::ColorArray2D;
+ case Type::_3D:
+ return TextureType::Color3D;
+ }
+ throw NotImplementedException("Invalid type {}", type);
+}
+
+IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, Type type) {
+ const auto array{[&](int index) {
+ return v.ir.BitFieldExtract(v.X(reg + index), v.ir.Imm32(0), v.ir.Imm32(16));
+ }};
+ switch (type) {
+ case Type::_1D:
+ case Type::BUFFER_1D:
+ return v.X(reg);
+ case Type::ARRAY_1D:
+ return v.ir.CompositeConstruct(v.X(reg), array(1));
+ case Type::_2D:
+ return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1));
+ case Type::ARRAY_2D:
+ return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1), array(2));
+ case Type::_3D:
+ return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1), v.X(reg + 2));
+ }
+ throw NotImplementedException("Invalid type {}", type);
+}
+
+unsigned SwizzleMask(u64 swizzle) {
+ if (swizzle == 0 || swizzle >= MASK.size()) {
+ throw NotImplementedException("Invalid swizzle {}", swizzle);
+ }
+ return MASK[swizzle];
+}
+
+IR::Value MakeColor(IR::IREmitter& ir, IR::Reg reg, int num_regs) {
+ std::array<IR::U32, 4> colors;
+ for (int i = 0; i < num_regs; ++i) {
+ colors[static_cast<size_t>(i)] = ir.GetReg(reg + i);
+ }
+ for (int i = num_regs; i < 4; ++i) {
+ colors[static_cast<size_t>(i)] = ir.Imm32(0);
+ }
+ return ir.CompositeConstruct(colors[0], colors[1], colors[2], colors[3]);
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::SULD(u64 insn) {
+ union {
+ u64 raw;
+ BitField<51, 1, u64> is_bound;
+ BitField<52, 1, u64> d;
+ BitField<23, 1, u64> ba;
+ BitField<33, 3, Type> type;
+ BitField<24, 2, LoadCache> cache;
+ BitField<20, 3, Size> size; // .D
+ BitField<20, 4, u64> swizzle; // .P
+ BitField<49, 2, Clamp> clamp;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> coord_reg;
+ BitField<36, 13, u64> bound_offset; // is_bound
+ BitField<39, 8, IR::Reg> bindless_reg; // !is_bound
+ } const suld{insn};
+
+ if (suld.clamp != Clamp::IGN) {
+ throw NotImplementedException("Clamp {}", suld.clamp.Value());
+ }
+ if (suld.cache != LoadCache::CA && suld.cache != LoadCache::CG) {
+ throw NotImplementedException("Cache {}", suld.cache.Value());
+ }
+ const bool is_typed{suld.d != 0};
+ if (is_typed && suld.ba != 0) {
+ throw NotImplementedException("BA");
+ }
+
+ const ImageFormat format{is_typed ? Format(suld.size) : ImageFormat::Typeless};
+ const TextureType type{GetType(suld.type)};
+ const IR::Value coords{MakeCoords(*this, suld.coord_reg, suld.type)};
+ const IR::U32 handle{suld.is_bound != 0 ? ir.Imm32(static_cast<u32>(suld.bound_offset * 4))
+ : X(suld.bindless_reg)};
+ IR::TextureInstInfo info{};
+ info.type.Assign(type);
+ info.image_format.Assign(format);
+
+ const IR::Value result{ir.ImageRead(handle, coords, info)};
+ IR::Reg dest_reg{suld.dest_reg};
+ if (is_typed) {
+ const int num_regs{SizeInRegs(suld.size)};
+ for (int i = 0; i < num_regs; ++i) {
+ X(dest_reg + i, IR::U32{ir.CompositeExtract(result, static_cast<size_t>(i))});
+ }
+ } else {
+ const unsigned mask{SwizzleMask(suld.swizzle)};
+ const int bits{std::popcount(mask)};
+ if (!IR::IsAligned(dest_reg, bits == 3 ? 4 : static_cast<size_t>(bits))) {
+ throw NotImplementedException("Unaligned destination register");
+ }
+ for (unsigned component = 0; component < 4; ++component) {
+ if (((mask >> component) & 1) == 0) {
+ continue;
+ }
+ X(dest_reg, IR::U32{ir.CompositeExtract(result, component)});
+ ++dest_reg;
+ }
+ }
+}
+
+void TranslatorVisitor::SUST(u64 insn) {
+ union {
+ u64 raw;
+ BitField<51, 1, u64> is_bound;
+ BitField<52, 1, u64> d;
+ BitField<23, 1, u64> ba;
+ BitField<33, 3, Type> type;
+ BitField<24, 2, StoreCache> cache;
+ BitField<20, 3, Size> size; // .D
+ BitField<20, 4, u64> swizzle; // .P
+ BitField<49, 2, Clamp> clamp;
+ BitField<0, 8, IR::Reg> data_reg;
+ BitField<8, 8, IR::Reg> coord_reg;
+ BitField<36, 13, u64> bound_offset; // is_bound
+ BitField<39, 8, IR::Reg> bindless_reg; // !is_bound
+ } const sust{insn};
+
+ if (sust.clamp != Clamp::IGN) {
+ throw NotImplementedException("Clamp {}", sust.clamp.Value());
+ }
+ if (sust.cache != StoreCache::WB && sust.cache != StoreCache::CG) {
+ throw NotImplementedException("Cache {}", sust.cache.Value());
+ }
+ const bool is_typed{sust.d != 0};
+ if (is_typed && sust.ba != 0) {
+ throw NotImplementedException("BA");
+ }
+ const ImageFormat format{is_typed ? Format(sust.size) : ImageFormat::Typeless};
+ const TextureType type{GetType(sust.type)};
+ const IR::Value coords{MakeCoords(*this, sust.coord_reg, sust.type)};
+ const IR::U32 handle{sust.is_bound != 0 ? ir.Imm32(static_cast<u32>(sust.bound_offset * 4))
+ : X(sust.bindless_reg)};
+ IR::TextureInstInfo info{};
+ info.type.Assign(type);
+ info.image_format.Assign(format);
+
+ IR::Value color;
+ if (is_typed) {
+ color = MakeColor(ir, sust.data_reg, SizeInRegs(sust.size));
+ } else {
+ const unsigned mask{SwizzleMask(sust.swizzle)};
+ if (mask != 0xf) {
+ throw NotImplementedException("Non-full mask");
+ }
+ color = MakeColor(ir, sust.data_reg, 4);
+ }
+ ir.ImageWrite(handle, coords, color, info);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp
new file mode 100644
index 000000000..0046b5edd
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp
@@ -0,0 +1,236 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <optional>
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/ir/modifiers.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class Blod : u64 {
+ None,
+ LZ,
+ LB,
+ LL,
+ INVALIDBLOD4,
+ INVALIDBLOD5,
+ LBA,
+ LLA,
+};
+
+enum class TextureType : u64 {
+ _1D,
+ ARRAY_1D,
+ _2D,
+ ARRAY_2D,
+ _3D,
+ ARRAY_3D,
+ CUBE,
+ ARRAY_CUBE,
+};
+
+Shader::TextureType GetType(TextureType type) {
+ switch (type) {
+ case TextureType::_1D:
+ return Shader::TextureType::Color1D;
+ case TextureType::ARRAY_1D:
+ return Shader::TextureType::ColorArray1D;
+ case TextureType::_2D:
+ return Shader::TextureType::Color2D;
+ case TextureType::ARRAY_2D:
+ return Shader::TextureType::ColorArray2D;
+ case TextureType::_3D:
+ return Shader::TextureType::Color3D;
+ case TextureType::ARRAY_3D:
+ throw NotImplementedException("3D array texture type");
+ case TextureType::CUBE:
+ return Shader::TextureType::ColorCube;
+ case TextureType::ARRAY_CUBE:
+ return Shader::TextureType::ColorArrayCube;
+ }
+ throw NotImplementedException("Invalid texture type {}", type);
+}
+
+IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, TextureType type) {
+ const auto read_array{[&]() -> IR::F32 { return v.ir.ConvertUToF(32, 16, v.X(reg)); }};
+ switch (type) {
+ case TextureType::_1D:
+ return v.F(reg);
+ case TextureType::ARRAY_1D:
+ return v.ir.CompositeConstruct(v.F(reg + 1), read_array());
+ case TextureType::_2D:
+ return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1));
+ case TextureType::ARRAY_2D:
+ return v.ir.CompositeConstruct(v.F(reg + 1), v.F(reg + 2), read_array());
+ case TextureType::_3D:
+ return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2));
+ case TextureType::ARRAY_3D:
+ throw NotImplementedException("3D array texture type");
+ case TextureType::CUBE:
+ return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2));
+ case TextureType::ARRAY_CUBE:
+ return v.ir.CompositeConstruct(v.F(reg + 1), v.F(reg + 2), v.F(reg + 3), read_array());
+ }
+ throw NotImplementedException("Invalid texture type {}", type);
+}
+
+IR::F32 MakeLod(TranslatorVisitor& v, IR::Reg& reg, Blod blod) {
+ switch (blod) {
+ case Blod::None:
+ return v.ir.Imm32(0.0f);
+ case Blod::LZ:
+ return v.ir.Imm32(0.0f);
+ case Blod::LB:
+ case Blod::LL:
+ case Blod::LBA:
+ case Blod::LLA:
+ return v.F(reg++);
+ case Blod::INVALIDBLOD4:
+ case Blod::INVALIDBLOD5:
+ break;
+ }
+ throw NotImplementedException("Invalid blod {}", blod);
+}
+
+IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg& reg, TextureType type) {
+ const IR::U32 value{v.X(reg++)};
+ switch (type) {
+ case TextureType::_1D:
+ case TextureType::ARRAY_1D:
+ return v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true);
+ case TextureType::_2D:
+ case TextureType::ARRAY_2D:
+ return v.ir.CompositeConstruct(
+ v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true),
+ v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4), true));
+ case TextureType::_3D:
+ case TextureType::ARRAY_3D:
+ return v.ir.CompositeConstruct(
+ v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true),
+ v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4), true),
+ v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(4), true));
+ case TextureType::CUBE:
+ case TextureType::ARRAY_CUBE:
+ throw NotImplementedException("Illegal offset on CUBE sample");
+ }
+ throw NotImplementedException("Invalid texture type {}", type);
+}
+
+bool HasExplicitLod(Blod blod) {
+ switch (blod) {
+ case Blod::LL:
+ case Blod::LLA:
+ case Blod::LZ:
+ return true;
+ default:
+ return false;
+ }
+}
+
+void Impl(TranslatorVisitor& v, u64 insn, bool aoffi, Blod blod, bool lc,
+ std::optional<u32> cbuf_offset) {
+ union {
+ u64 raw;
+ BitField<35, 1, u64> ndv;
+ BitField<49, 1, u64> nodep;
+ BitField<50, 1, u64> dc;
+ BitField<51, 3, IR::Pred> sparse_pred;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> coord_reg;
+ BitField<20, 8, IR::Reg> meta_reg;
+ BitField<28, 3, TextureType> type;
+ BitField<31, 4, u64> mask;
+ } const tex{insn};
+
+ if (lc) {
+ throw NotImplementedException("LC");
+ }
+ const IR::Value coords{MakeCoords(v, tex.coord_reg, tex.type)};
+
+ IR::Reg meta_reg{tex.meta_reg};
+ IR::Value handle;
+ IR::Value offset;
+ IR::F32 dref;
+ IR::F32 lod_clamp;
+ if (cbuf_offset) {
+ handle = v.ir.Imm32(*cbuf_offset);
+ } else {
+ handle = v.X(meta_reg++);
+ }
+ const IR::F32 lod{MakeLod(v, meta_reg, blod)};
+ if (aoffi) {
+ offset = MakeOffset(v, meta_reg, tex.type);
+ }
+ if (tex.dc != 0) {
+ dref = v.F(meta_reg++);
+ }
+ IR::TextureInstInfo info{};
+ info.type.Assign(GetType(tex.type));
+ info.is_depth.Assign(tex.dc != 0 ? 1 : 0);
+ info.has_bias.Assign(blod == Blod::LB || blod == Blod::LBA ? 1 : 0);
+ info.has_lod_clamp.Assign(lc ? 1 : 0);
+
+ const IR::Value sample{[&]() -> IR::Value {
+ if (tex.dc == 0) {
+ if (HasExplicitLod(blod)) {
+ return v.ir.ImageSampleExplicitLod(handle, coords, lod, offset, info);
+ } else {
+ return v.ir.ImageSampleImplicitLod(handle, coords, lod, offset, lod_clamp, info);
+ }
+ }
+ if (HasExplicitLod(blod)) {
+ return v.ir.ImageSampleDrefExplicitLod(handle, coords, dref, lod, offset, info);
+ } else {
+ return v.ir.ImageSampleDrefImplicitLod(handle, coords, dref, lod, offset, lod_clamp,
+ info);
+ }
+ }()};
+
+ IR::Reg dest_reg{tex.dest_reg};
+ for (int element = 0; element < 4; ++element) {
+ if (((tex.mask >> element) & 1) == 0) {
+ continue;
+ }
+ IR::F32 value;
+ if (tex.dc != 0) {
+ value = element < 3 ? IR::F32{sample} : v.ir.Imm32(1.0f);
+ } else {
+ value = IR::F32{v.ir.CompositeExtract(sample, static_cast<size_t>(element))};
+ }
+ v.F(dest_reg, value);
+ ++dest_reg;
+ }
+ if (tex.sparse_pred != IR::Pred::PT) {
+ v.ir.SetPred(tex.sparse_pred, v.ir.LogicalNot(v.ir.GetSparseFromOp(sample)));
+ }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::TEX(u64 insn) {
+ union {
+ u64 raw;
+ BitField<54, 1, u64> aoffi;
+ BitField<55, 3, Blod> blod;
+ BitField<58, 1, u64> lc;
+ BitField<36, 13, u64> cbuf_offset;
+ } const tex{insn};
+
+ Impl(*this, insn, tex.aoffi != 0, tex.blod, tex.lc != 0, static_cast<u32>(tex.cbuf_offset * 4));
+}
+
+void TranslatorVisitor::TEX_b(u64 insn) {
+ union {
+ u64 raw;
+ BitField<36, 1, u64> aoffi;
+ BitField<37, 3, Blod> blod;
+ BitField<40, 1, u64> lc;
+ } const tex{insn};
+
+ Impl(*this, insn, tex.aoffi != 0, tex.blod, tex.lc != 0, std::nullopt);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp
new file mode 100644
index 000000000..154e7f1a1
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp
@@ -0,0 +1,266 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <utility>
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/ir/modifiers.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class Precision : u64 {
+ F16,
+ F32,
+};
+
+union Encoding {
+ u64 raw;
+ BitField<59, 1, Precision> precision;
+ BitField<53, 4, u64> encoding;
+ BitField<49, 1, u64> nodep;
+ BitField<28, 8, IR::Reg> dest_reg_b;
+ BitField<0, 8, IR::Reg> dest_reg_a;
+ BitField<8, 8, IR::Reg> src_reg_a;
+ BitField<20, 8, IR::Reg> src_reg_b;
+ BitField<36, 13, u64> cbuf_offset;
+ BitField<50, 3, u64> swizzle;
+};
+
+constexpr unsigned R = 1;
+constexpr unsigned G = 2;
+constexpr unsigned B = 4;
+constexpr unsigned A = 8;
+
+constexpr std::array RG_LUT{
+ R, //
+ G, //
+ B, //
+ A, //
+ R | G, //
+ R | A, //
+ G | A, //
+ B | A, //
+};
+
+constexpr std::array RGBA_LUT{
+ R | G | B, //
+ R | G | A, //
+ R | B | A, //
+ G | B | A, //
+ R | G | B | A, //
+};
+
+void CheckAlignment(IR::Reg reg, size_t alignment) {
+ if (!IR::IsAligned(reg, alignment)) {
+ throw NotImplementedException("Unaligned source register {}", reg);
+ }
+}
+
+template <typename... Args>
+IR::Value Composite(TranslatorVisitor& v, Args... regs) {
+ return v.ir.CompositeConstruct(v.F(regs)...);
+}
+
+IR::F32 ReadArray(TranslatorVisitor& v, const IR::U32& value) {
+ return v.ir.ConvertUToF(32, 16, v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(16)));
+}
+
+IR::Value Sample(TranslatorVisitor& v, u64 insn) {
+ const Encoding texs{insn};
+ const IR::U32 handle{v.ir.Imm32(static_cast<u32>(texs.cbuf_offset * 4))};
+ const IR::F32 zero{v.ir.Imm32(0.0f)};
+ const IR::Reg reg_a{texs.src_reg_a};
+ const IR::Reg reg_b{texs.src_reg_b};
+ IR::TextureInstInfo info{};
+ if (texs.precision == Precision::F16) {
+ info.relaxed_precision.Assign(1);
+ }
+ switch (texs.encoding) {
+ case 0: // 1D.LZ
+ info.type.Assign(TextureType::Color1D);
+ return v.ir.ImageSampleExplicitLod(handle, v.F(reg_a), zero, {}, info);
+ case 1: // 2D
+ info.type.Assign(TextureType::Color2D);
+ return v.ir.ImageSampleImplicitLod(handle, Composite(v, reg_a, reg_b), {}, {}, {}, info);
+ case 2: // 2D.LZ
+ info.type.Assign(TextureType::Color2D);
+ return v.ir.ImageSampleExplicitLod(handle, Composite(v, reg_a, reg_b), zero, {}, info);
+ case 3: // 2D.LL
+ CheckAlignment(reg_a, 2);
+ info.type.Assign(TextureType::Color2D);
+ return v.ir.ImageSampleExplicitLod(handle, Composite(v, reg_a, reg_a + 1), v.F(reg_b), {},
+ info);
+ case 4: // 2D.DC
+ CheckAlignment(reg_a, 2);
+ info.type.Assign(TextureType::Color2D);
+ info.is_depth.Assign(1);
+ return v.ir.ImageSampleDrefImplicitLod(handle, Composite(v, reg_a, reg_a + 1), v.F(reg_b),
+ {}, {}, {}, info);
+ case 5: // 2D.LL.DC
+ CheckAlignment(reg_a, 2);
+ CheckAlignment(reg_b, 2);
+ info.type.Assign(TextureType::Color2D);
+ info.is_depth.Assign(1);
+ return v.ir.ImageSampleDrefExplicitLod(handle, Composite(v, reg_a, reg_a + 1),
+ v.F(reg_b + 1), v.F(reg_b), {}, info);
+ case 6: // 2D.LZ.DC
+ CheckAlignment(reg_a, 2);
+ info.type.Assign(TextureType::Color2D);
+ info.is_depth.Assign(1);
+ return v.ir.ImageSampleDrefExplicitLod(handle, Composite(v, reg_a, reg_a + 1), v.F(reg_b),
+ zero, {}, info);
+ case 7: // ARRAY_2D
+ CheckAlignment(reg_a, 2);
+ info.type.Assign(TextureType::ColorArray2D);
+ return v.ir.ImageSampleImplicitLod(
+ handle, v.ir.CompositeConstruct(v.F(reg_a + 1), v.F(reg_b), ReadArray(v, v.X(reg_a))),
+ {}, {}, {}, info);
+ case 8: // ARRAY_2D.LZ
+ CheckAlignment(reg_a, 2);
+ info.type.Assign(TextureType::ColorArray2D);
+ return v.ir.ImageSampleExplicitLod(
+ handle, v.ir.CompositeConstruct(v.F(reg_a + 1), v.F(reg_b), ReadArray(v, v.X(reg_a))),
+ zero, {}, info);
+ case 9: // ARRAY_2D.LZ.DC
+ CheckAlignment(reg_a, 2);
+ CheckAlignment(reg_b, 2);
+ info.type.Assign(TextureType::ColorArray2D);
+ info.is_depth.Assign(1);
+ return v.ir.ImageSampleDrefExplicitLod(
+ handle, v.ir.CompositeConstruct(v.F(reg_a + 1), v.F(reg_b), ReadArray(v, v.X(reg_a))),
+ v.F(reg_b + 1), zero, {}, info);
+ case 10: // 3D
+ CheckAlignment(reg_a, 2);
+ info.type.Assign(TextureType::Color3D);
+ return v.ir.ImageSampleImplicitLod(handle, Composite(v, reg_a, reg_a + 1, reg_b), {}, {},
+ {}, info);
+ case 11: // 3D.LZ
+ CheckAlignment(reg_a, 2);
+ info.type.Assign(TextureType::Color3D);
+ return v.ir.ImageSampleExplicitLod(handle, Composite(v, reg_a, reg_a + 1, reg_b), zero, {},
+ info);
+ case 12: // CUBE
+ CheckAlignment(reg_a, 2);
+ info.type.Assign(TextureType::ColorCube);
+ return v.ir.ImageSampleImplicitLod(handle, Composite(v, reg_a, reg_a + 1, reg_b), {}, {},
+ {}, info);
+ case 13: // CUBE.LL
+ CheckAlignment(reg_a, 2);
+ CheckAlignment(reg_b, 2);
+ info.type.Assign(TextureType::ColorCube);
+ return v.ir.ImageSampleExplicitLod(handle, Composite(v, reg_a, reg_a + 1, reg_b),
+ v.F(reg_b + 1), {}, info);
+ default:
+ throw NotImplementedException("Illegal encoding {}", texs.encoding.Value());
+ }
+}
+
+unsigned Swizzle(u64 insn) {
+ const Encoding texs{insn};
+ const size_t encoding{texs.swizzle};
+ if (texs.dest_reg_b == IR::Reg::RZ) {
+ if (encoding >= RG_LUT.size()) {
+ throw NotImplementedException("Illegal RG encoding {}", encoding);
+ }
+ return RG_LUT[encoding];
+ } else {
+ if (encoding >= RGBA_LUT.size()) {
+ throw NotImplementedException("Illegal RGBA encoding {}", encoding);
+ }
+ return RGBA_LUT[encoding];
+ }
+}
+
+IR::F32 Extract(TranslatorVisitor& v, const IR::Value& sample, unsigned component) {
+ const bool is_shadow{sample.Type() == IR::Type::F32};
+ if (is_shadow) {
+ const bool is_alpha{component == 3};
+ return is_alpha ? v.ir.Imm32(1.0f) : IR::F32{sample};
+ } else {
+ return IR::F32{v.ir.CompositeExtract(sample, component)};
+ }
+}
+
+IR::Reg RegStoreComponent32(u64 insn, unsigned index) {
+ const Encoding texs{insn};
+ switch (index) {
+ case 0:
+ return texs.dest_reg_a;
+ case 1:
+ CheckAlignment(texs.dest_reg_a, 2);
+ return texs.dest_reg_a + 1;
+ case 2:
+ return texs.dest_reg_b;
+ case 3:
+ CheckAlignment(texs.dest_reg_b, 2);
+ return texs.dest_reg_b + 1;
+ }
+ throw LogicError("Invalid store index {}", index);
+}
+
+void Store32(TranslatorVisitor& v, u64 insn, const IR::Value& sample) {
+ const unsigned swizzle{Swizzle(insn)};
+ unsigned store_index{0};
+ for (unsigned component = 0; component < 4; ++component) {
+ if (((swizzle >> component) & 1) == 0) {
+ continue;
+ }
+ const IR::Reg dest{RegStoreComponent32(insn, store_index)};
+ v.F(dest, Extract(v, sample, component));
+ ++store_index;
+ }
+}
+
+IR::U32 Pack(TranslatorVisitor& v, const IR::F32& lhs, const IR::F32& rhs) {
+ return v.ir.PackHalf2x16(v.ir.CompositeConstruct(lhs, rhs));
+}
+
+void Store16(TranslatorVisitor& v, u64 insn, const IR::Value& sample) {
+ const unsigned swizzle{Swizzle(insn)};
+ unsigned store_index{0};
+ std::array<IR::F32, 4> swizzled;
+ for (unsigned component = 0; component < 4; ++component) {
+ if (((swizzle >> component) & 1) == 0) {
+ continue;
+ }
+ swizzled[store_index] = Extract(v, sample, component);
+ ++store_index;
+ }
+ const IR::F32 zero{v.ir.Imm32(0.0f)};
+ const Encoding texs{insn};
+ switch (store_index) {
+ case 1:
+ v.X(texs.dest_reg_a, Pack(v, swizzled[0], zero));
+ break;
+ case 2:
+ case 3:
+ case 4:
+ v.X(texs.dest_reg_a, Pack(v, swizzled[0], swizzled[1]));
+ switch (store_index) {
+ case 2:
+ break;
+ case 3:
+ v.X(texs.dest_reg_b, Pack(v, swizzled[2], zero));
+ break;
+ case 4:
+ v.X(texs.dest_reg_b, Pack(v, swizzled[2], swizzled[3]));
+ break;
+ }
+ break;
+ }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::TEXS(u64 insn) {
+ const IR::Value sample{Sample(*this, insn)};
+ if (Encoding{insn}.precision == Precision::F32) {
+ Store32(*this, insn, sample);
+ } else {
+ Store16(*this, insn, sample);
+ }
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather.cpp
new file mode 100644
index 000000000..218cbc1a8
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather.cpp
@@ -0,0 +1,208 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <optional>
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/ir/modifiers.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+
+enum class TextureType : u64 {
+ _1D,
+ ARRAY_1D,
+ _2D,
+ ARRAY_2D,
+ _3D,
+ ARRAY_3D,
+ CUBE,
+ ARRAY_CUBE,
+};
+
+enum class OffsetType : u64 {
+ None = 0,
+ AOFFI,
+ PTP,
+ Invalid,
+};
+
+enum class ComponentType : u64 {
+ R = 0,
+ G = 1,
+ B = 2,
+ A = 3,
+};
+
+Shader::TextureType GetType(TextureType type) {
+ switch (type) {
+ case TextureType::_1D:
+ return Shader::TextureType::Color1D;
+ case TextureType::ARRAY_1D:
+ return Shader::TextureType::ColorArray1D;
+ case TextureType::_2D:
+ return Shader::TextureType::Color2D;
+ case TextureType::ARRAY_2D:
+ return Shader::TextureType::ColorArray2D;
+ case TextureType::_3D:
+ return Shader::TextureType::Color3D;
+ case TextureType::ARRAY_3D:
+ throw NotImplementedException("3D array texture type");
+ case TextureType::CUBE:
+ return Shader::TextureType::ColorCube;
+ case TextureType::ARRAY_CUBE:
+ return Shader::TextureType::ColorArrayCube;
+ }
+ throw NotImplementedException("Invalid texture type {}", type);
+}
+
+IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, TextureType type) {
+ const auto read_array{[&]() -> IR::F32 { return v.ir.ConvertUToF(32, 16, v.X(reg)); }};
+ switch (type) {
+ case TextureType::_1D:
+ return v.F(reg);
+ case TextureType::ARRAY_1D:
+ return v.ir.CompositeConstruct(v.F(reg + 1), read_array());
+ case TextureType::_2D:
+ return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1));
+ case TextureType::ARRAY_2D:
+ return v.ir.CompositeConstruct(v.F(reg + 1), v.F(reg + 2), read_array());
+ case TextureType::_3D:
+ return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2));
+ case TextureType::ARRAY_3D:
+ throw NotImplementedException("3D array texture type");
+ case TextureType::CUBE:
+ return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2));
+ case TextureType::ARRAY_CUBE:
+ return v.ir.CompositeConstruct(v.F(reg + 1), v.F(reg + 2), v.F(reg + 3), read_array());
+ }
+ throw NotImplementedException("Invalid texture type {}", type);
+}
+
+IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg& reg, TextureType type) {
+ const IR::U32 value{v.X(reg++)};
+ switch (type) {
+ case TextureType::_1D:
+ case TextureType::ARRAY_1D:
+ return v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(6), true);
+ case TextureType::_2D:
+ case TextureType::ARRAY_2D:
+ return v.ir.CompositeConstruct(
+ v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(6), true),
+ v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(6), true));
+ case TextureType::_3D:
+ case TextureType::ARRAY_3D:
+ return v.ir.CompositeConstruct(
+ v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(6), true),
+ v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(6), true),
+ v.ir.BitFieldExtract(value, v.ir.Imm32(16), v.ir.Imm32(6), true));
+ case TextureType::CUBE:
+ case TextureType::ARRAY_CUBE:
+ throw NotImplementedException("Illegal offset on CUBE sample");
+ }
+ throw NotImplementedException("Invalid texture type {}", type);
+}
+
+std::pair<IR::Value, IR::Value> MakeOffsetPTP(TranslatorVisitor& v, IR::Reg& reg) {
+ const IR::U32 value1{v.X(reg++)};
+ const IR::U32 value2{v.X(reg++)};
+ const IR::U32 bitsize{v.ir.Imm32(6)};
+ const auto make_vector{[&v, &bitsize](const IR::U32& value) {
+ return v.ir.CompositeConstruct(v.ir.BitFieldExtract(value, v.ir.Imm32(0), bitsize, true),
+ v.ir.BitFieldExtract(value, v.ir.Imm32(8), bitsize, true),
+ v.ir.BitFieldExtract(value, v.ir.Imm32(16), bitsize, true),
+ v.ir.BitFieldExtract(value, v.ir.Imm32(24), bitsize, true));
+ }};
+ return {make_vector(value1), make_vector(value2)};
+}
+
+void Impl(TranslatorVisitor& v, u64 insn, ComponentType component_type, OffsetType offset_type,
+ bool is_bindless) {
+ union {
+ u64 raw;
+ BitField<35, 1, u64> ndv;
+ BitField<49, 1, u64> nodep;
+ BitField<50, 1, u64> dc;
+ BitField<51, 3, IR::Pred> sparse_pred;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> coord_reg;
+ BitField<20, 8, IR::Reg> meta_reg;
+ BitField<28, 3, TextureType> type;
+ BitField<31, 4, u64> mask;
+ BitField<36, 13, u64> cbuf_offset;
+ } const tld4{insn};
+
+ const IR::Value coords{MakeCoords(v, tld4.coord_reg, tld4.type)};
+
+ IR::Reg meta_reg{tld4.meta_reg};
+ IR::Value handle;
+ IR::Value offset;
+ IR::Value offset2;
+ IR::F32 dref;
+ if (!is_bindless) {
+ handle = v.ir.Imm32(static_cast<u32>(tld4.cbuf_offset.Value() * 4));
+ } else {
+ handle = v.X(meta_reg++);
+ }
+ switch (offset_type) {
+ case OffsetType::None:
+ break;
+ case OffsetType::AOFFI:
+ offset = MakeOffset(v, meta_reg, tld4.type);
+ break;
+ case OffsetType::PTP:
+ std::tie(offset, offset2) = MakeOffsetPTP(v, meta_reg);
+ break;
+ default:
+ throw NotImplementedException("Invalid offset type {}", offset_type);
+ }
+ if (tld4.dc != 0) {
+ dref = v.F(meta_reg++);
+ }
+ IR::TextureInstInfo info{};
+ info.type.Assign(GetType(tld4.type));
+ info.is_depth.Assign(tld4.dc != 0 ? 1 : 0);
+ info.gather_component.Assign(static_cast<u32>(component_type));
+ const IR::Value sample{[&] {
+ if (tld4.dc == 0) {
+ return v.ir.ImageGather(handle, coords, offset, offset2, info);
+ }
+ return v.ir.ImageGatherDref(handle, coords, offset, offset2, dref, info);
+ }()};
+
+ IR::Reg dest_reg{tld4.dest_reg};
+ for (size_t element = 0; element < 4; ++element) {
+ if (((tld4.mask >> element) & 1) == 0) {
+ continue;
+ }
+ v.F(dest_reg, IR::F32{v.ir.CompositeExtract(sample, element)});
+ ++dest_reg;
+ }
+ if (tld4.sparse_pred != IR::Pred::PT) {
+ v.ir.SetPred(tld4.sparse_pred, v.ir.LogicalNot(v.ir.GetSparseFromOp(sample)));
+ }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::TLD4(u64 insn) {
+ union {
+ u64 raw;
+ BitField<56, 2, ComponentType> component;
+ BitField<54, 2, OffsetType> offset;
+ } const tld4{insn};
+ Impl(*this, insn, tld4.component, tld4.offset, false);
+}
+
+void TranslatorVisitor::TLD4_b(u64 insn) {
+ union {
+ u64 raw;
+ BitField<38, 2, ComponentType> component;
+ BitField<36, 2, OffsetType> offset;
+ } const tld4{insn};
+ Impl(*this, insn, tld4.component, tld4.offset, true);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather_swizzled.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather_swizzled.cpp
new file mode 100644
index 000000000..34efa2d50
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather_swizzled.cpp
@@ -0,0 +1,134 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <utility>
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/ir/modifiers.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class Precision : u64 {
+ F32,
+ F16,
+};
+
+enum class ComponentType : u64 {
+ R = 0,
+ G = 1,
+ B = 2,
+ A = 3,
+};
+
+union Encoding {
+ u64 raw;
+ BitField<55, 1, Precision> precision;
+ BitField<52, 2, ComponentType> component_type;
+ BitField<51, 1, u64> aoffi;
+ BitField<50, 1, u64> dc;
+ BitField<49, 1, u64> nodep;
+ BitField<28, 8, IR::Reg> dest_reg_b;
+ BitField<0, 8, IR::Reg> dest_reg_a;
+ BitField<8, 8, IR::Reg> src_reg_a;
+ BitField<20, 8, IR::Reg> src_reg_b;
+ BitField<36, 13, u64> cbuf_offset;
+};
+
+void CheckAlignment(IR::Reg reg, size_t alignment) {
+ if (!IR::IsAligned(reg, alignment)) {
+ throw NotImplementedException("Unaligned source register {}", reg);
+ }
+}
+
+IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg reg) {
+ const IR::U32 value{v.X(reg)};
+ return v.ir.CompositeConstruct(v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(6), true),
+ v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(6), true));
+}
+
+IR::Value Sample(TranslatorVisitor& v, u64 insn) {
+ const Encoding tld4s{insn};
+ const IR::U32 handle{v.ir.Imm32(static_cast<u32>(tld4s.cbuf_offset * 4))};
+ const IR::Reg reg_a{tld4s.src_reg_a};
+ const IR::Reg reg_b{tld4s.src_reg_b};
+ IR::TextureInstInfo info{};
+ if (tld4s.precision == Precision::F16) {
+ info.relaxed_precision.Assign(1);
+ }
+ info.gather_component.Assign(static_cast<u32>(tld4s.component_type.Value()));
+ info.type.Assign(Shader::TextureType::Color2D);
+ info.is_depth.Assign(tld4s.dc != 0 ? 1 : 0);
+ IR::Value coords;
+ if (tld4s.aoffi != 0) {
+ CheckAlignment(reg_a, 2);
+ coords = v.ir.CompositeConstruct(v.F(reg_a), v.F(reg_a + 1));
+ IR::Value offset = MakeOffset(v, reg_b);
+ if (tld4s.dc != 0) {
+ CheckAlignment(reg_b, 2);
+ IR::F32 dref = v.F(reg_b + 1);
+ return v.ir.ImageGatherDref(handle, coords, offset, {}, dref, info);
+ }
+ return v.ir.ImageGather(handle, coords, offset, {}, info);
+ }
+ if (tld4s.dc != 0) {
+ CheckAlignment(reg_a, 2);
+ coords = v.ir.CompositeConstruct(v.F(reg_a), v.F(reg_a + 1));
+ IR::F32 dref = v.F(reg_b);
+ return v.ir.ImageGatherDref(handle, coords, {}, {}, dref, info);
+ }
+ coords = v.ir.CompositeConstruct(v.F(reg_a), v.F(reg_b));
+ return v.ir.ImageGather(handle, coords, {}, {}, info);
+}
+
+IR::Reg RegStoreComponent32(u64 insn, size_t index) {
+ const Encoding tlds4{insn};
+ switch (index) {
+ case 0:
+ return tlds4.dest_reg_a;
+ case 1:
+ CheckAlignment(tlds4.dest_reg_a, 2);
+ return tlds4.dest_reg_a + 1;
+ case 2:
+ return tlds4.dest_reg_b;
+ case 3:
+ CheckAlignment(tlds4.dest_reg_b, 2);
+ return tlds4.dest_reg_b + 1;
+ }
+ throw LogicError("Invalid store index {}", index);
+}
+
+void Store32(TranslatorVisitor& v, u64 insn, const IR::Value& sample) {
+ for (size_t component = 0; component < 4; ++component) {
+ const IR::Reg dest{RegStoreComponent32(insn, component)};
+ v.F(dest, IR::F32{v.ir.CompositeExtract(sample, component)});
+ }
+}
+
+IR::U32 Pack(TranslatorVisitor& v, const IR::F32& lhs, const IR::F32& rhs) {
+ return v.ir.PackHalf2x16(v.ir.CompositeConstruct(lhs, rhs));
+}
+
+void Store16(TranslatorVisitor& v, u64 insn, const IR::Value& sample) {
+ std::array<IR::F32, 4> swizzled;
+ for (size_t component = 0; component < 4; ++component) {
+ swizzled[component] = IR::F32{v.ir.CompositeExtract(sample, component)};
+ }
+ const Encoding tld4s{insn};
+ v.X(tld4s.dest_reg_a, Pack(v, swizzled[0], swizzled[1]));
+ v.X(tld4s.dest_reg_b, Pack(v, swizzled[2], swizzled[3]));
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::TLD4S(u64 insn) {
+ const IR::Value sample{Sample(*this, insn)};
+ if (Encoding{insn}.precision == Precision::F32) {
+ Store32(*this, insn, sample);
+ } else {
+ Store16(*this, insn, sample);
+ }
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gradient.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gradient.cpp
new file mode 100644
index 000000000..c3fe3ffda
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gradient.cpp
@@ -0,0 +1,182 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <optional>
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/ir/modifiers.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+
+enum class TextureType : u64 {
+ _1D,
+ ARRAY_1D,
+ _2D,
+ ARRAY_2D,
+ _3D,
+ ARRAY_3D,
+ CUBE,
+ ARRAY_CUBE,
+};
+
+Shader::TextureType GetType(TextureType type) {
+ switch (type) {
+ case TextureType::_1D:
+ return Shader::TextureType::Color1D;
+ case TextureType::ARRAY_1D:
+ return Shader::TextureType::ColorArray1D;
+ case TextureType::_2D:
+ return Shader::TextureType::Color2D;
+ case TextureType::ARRAY_2D:
+ return Shader::TextureType::ColorArray2D;
+ case TextureType::_3D:
+ return Shader::TextureType::Color3D;
+ case TextureType::ARRAY_3D:
+ throw NotImplementedException("3D array texture type");
+ case TextureType::CUBE:
+ return Shader::TextureType::ColorCube;
+ case TextureType::ARRAY_CUBE:
+ return Shader::TextureType::ColorArrayCube;
+ }
+ throw NotImplementedException("Invalid texture type {}", type);
+}
+
+IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg reg, bool has_lod_clamp) {
+ const IR::U32 value{v.X(reg)};
+ const u32 base{has_lod_clamp ? 12U : 16U};
+ return v.ir.CompositeConstruct(
+ v.ir.BitFieldExtract(value, v.ir.Imm32(base), v.ir.Imm32(4), true),
+ v.ir.BitFieldExtract(value, v.ir.Imm32(base + 4), v.ir.Imm32(4), true));
+}
+
+void Impl(TranslatorVisitor& v, u64 insn, bool is_bindless) {
+ union {
+ u64 raw;
+ BitField<49, 1, u64> nodep;
+ BitField<35, 1, u64> aoffi;
+ BitField<50, 1, u64> lc;
+ BitField<51, 3, IR::Pred> sparse_pred;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> coord_reg;
+ BitField<20, 8, IR::Reg> derivate_reg;
+ BitField<28, 3, TextureType> type;
+ BitField<31, 4, u64> mask;
+ BitField<36, 13, u64> cbuf_offset;
+ } const txd{insn};
+
+ const bool has_lod_clamp = txd.lc != 0;
+ if (has_lod_clamp) {
+ throw NotImplementedException("TXD.LC - CLAMP is not implemented");
+ }
+
+ IR::Value coords;
+ u32 num_derivates{};
+ IR::Reg base_reg{txd.coord_reg};
+ IR::Reg last_reg;
+ IR::Value handle;
+ if (is_bindless) {
+ handle = v.X(base_reg++);
+ } else {
+ handle = v.ir.Imm32(static_cast<u32>(txd.cbuf_offset.Value() * 4));
+ }
+
+ const auto read_array{[&]() -> IR::F32 {
+ const IR::U32 base{v.ir.Imm32(0)};
+ const IR::U32 count{v.ir.Imm32(has_lod_clamp ? 12 : 16)};
+ const IR::U32 array_index{v.ir.BitFieldExtract(v.X(last_reg), base, count)};
+ return v.ir.ConvertUToF(32, 16, array_index);
+ }};
+ switch (txd.type) {
+ case TextureType::_1D: {
+ coords = v.F(base_reg);
+ num_derivates = 1;
+ last_reg = base_reg + 1;
+ break;
+ }
+ case TextureType::ARRAY_1D: {
+ last_reg = base_reg + 1;
+ coords = v.ir.CompositeConstruct(v.F(base_reg), read_array());
+ num_derivates = 1;
+ break;
+ }
+ case TextureType::_2D: {
+ last_reg = base_reg + 2;
+ coords = v.ir.CompositeConstruct(v.F(base_reg), v.F(base_reg + 1));
+ num_derivates = 2;
+ break;
+ }
+ case TextureType::ARRAY_2D: {
+ last_reg = base_reg + 2;
+ coords = v.ir.CompositeConstruct(v.F(base_reg), v.F(base_reg + 1), read_array());
+ num_derivates = 2;
+ break;
+ }
+ default:
+ throw NotImplementedException("Invalid texture type");
+ }
+
+ const IR::Reg derivate_reg{txd.derivate_reg};
+ IR::Value derivates;
+ switch (num_derivates) {
+ case 1: {
+ derivates = v.ir.CompositeConstruct(v.F(derivate_reg), v.F(derivate_reg + 1));
+ break;
+ }
+ case 2: {
+ derivates = v.ir.CompositeConstruct(v.F(derivate_reg), v.F(derivate_reg + 1),
+ v.F(derivate_reg + 2), v.F(derivate_reg + 3));
+ break;
+ }
+ default:
+ throw NotImplementedException("Invalid texture type");
+ }
+
+ IR::Value offset;
+ if (txd.aoffi != 0) {
+ offset = MakeOffset(v, last_reg, has_lod_clamp);
+ }
+
+ IR::F32 lod_clamp;
+ if (has_lod_clamp) {
+ // Lod Clamp is a Fixed Point 4.8, we need to transform it to float.
+ // to convert a fixed point, float(value) / float(1 << fixed_point)
+ // in this case the fixed_point is 8.
+ const IR::F32 conv4_8fixp_f{v.ir.Imm32(static_cast<f32>(1U << 8))};
+ const IR::F32 fixp_lc{v.ir.ConvertUToF(
+ 32, 16, v.ir.BitFieldExtract(v.X(last_reg), v.ir.Imm32(20), v.ir.Imm32(12)))};
+ lod_clamp = v.ir.FPMul(fixp_lc, conv4_8fixp_f);
+ }
+
+ IR::TextureInstInfo info{};
+ info.type.Assign(GetType(txd.type));
+ info.num_derivates.Assign(num_derivates);
+ info.has_lod_clamp.Assign(has_lod_clamp ? 1 : 0);
+ const IR::Value sample{v.ir.ImageGradient(handle, coords, derivates, offset, lod_clamp, info)};
+
+ IR::Reg dest_reg{txd.dest_reg};
+ for (size_t element = 0; element < 4; ++element) {
+ if (((txd.mask >> element) & 1) == 0) {
+ continue;
+ }
+ v.F(dest_reg, IR::F32{v.ir.CompositeExtract(sample, element)});
+ ++dest_reg;
+ }
+ if (txd.sparse_pred != IR::Pred::PT) {
+ v.ir.SetPred(txd.sparse_pred, v.ir.LogicalNot(v.ir.GetSparseFromOp(sample)));
+ }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::TXD(u64 insn) {
+ Impl(*this, insn, false);
+}
+
+void TranslatorVisitor::TXD_b(u64 insn) {
+ Impl(*this, insn, true);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load.cpp
new file mode 100644
index 000000000..983058303
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load.cpp
@@ -0,0 +1,165 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <optional>
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/ir/modifiers.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+
+enum class TextureType : u64 {
+ _1D,
+ ARRAY_1D,
+ _2D,
+ ARRAY_2D,
+ _3D,
+ ARRAY_3D,
+ CUBE,
+ ARRAY_CUBE,
+};
+
+Shader::TextureType GetType(TextureType type) {
+ switch (type) {
+ case TextureType::_1D:
+ return Shader::TextureType::Color1D;
+ case TextureType::ARRAY_1D:
+ return Shader::TextureType::ColorArray1D;
+ case TextureType::_2D:
+ return Shader::TextureType::Color2D;
+ case TextureType::ARRAY_2D:
+ return Shader::TextureType::ColorArray2D;
+ case TextureType::_3D:
+ return Shader::TextureType::Color3D;
+ case TextureType::ARRAY_3D:
+ throw NotImplementedException("3D array texture type");
+ case TextureType::CUBE:
+ return Shader::TextureType::ColorCube;
+ case TextureType::ARRAY_CUBE:
+ return Shader::TextureType::ColorArrayCube;
+ }
+ throw NotImplementedException("Invalid texture type {}", type);
+}
+
+IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, TextureType type) {
+ const auto read_array{
+ [&]() -> IR::U32 { return v.ir.BitFieldExtract(v.X(reg), v.ir.Imm32(0), v.ir.Imm32(16)); }};
+ switch (type) {
+ case TextureType::_1D:
+ return v.X(reg);
+ case TextureType::ARRAY_1D:
+ return v.ir.CompositeConstruct(v.X(reg + 1), read_array());
+ case TextureType::_2D:
+ return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1));
+ case TextureType::ARRAY_2D:
+ return v.ir.CompositeConstruct(v.X(reg + 1), v.X(reg + 2), read_array());
+ case TextureType::_3D:
+ return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1), v.X(reg + 2));
+ case TextureType::ARRAY_3D:
+ throw NotImplementedException("3D array texture type");
+ case TextureType::CUBE:
+ return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1), v.X(reg + 2));
+ case TextureType::ARRAY_CUBE:
+ return v.ir.CompositeConstruct(v.X(reg + 1), v.X(reg + 2), v.X(reg + 3), read_array());
+ }
+ throw NotImplementedException("Invalid texture type {}", type);
+}
+
+IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg& reg, TextureType type) {
+ const IR::U32 value{v.X(reg++)};
+ switch (type) {
+ case TextureType::_1D:
+ case TextureType::ARRAY_1D:
+ return v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true);
+ case TextureType::_2D:
+ case TextureType::ARRAY_2D:
+ return v.ir.CompositeConstruct(
+ v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true),
+ v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4), true));
+ case TextureType::_3D:
+ case TextureType::ARRAY_3D:
+ return v.ir.CompositeConstruct(
+ v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true),
+ v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4), true),
+ v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(4), true));
+ case TextureType::CUBE:
+ case TextureType::ARRAY_CUBE:
+ throw NotImplementedException("Illegal offset on CUBE sample");
+ }
+ throw NotImplementedException("Invalid texture type {}", type);
+}
+
+void Impl(TranslatorVisitor& v, u64 insn, bool is_bindless) {
+ union {
+ u64 raw;
+ BitField<49, 1, u64> nodep;
+ BitField<55, 1, u64> lod;
+ BitField<50, 1, u64> multisample;
+ BitField<35, 1, u64> aoffi;
+ BitField<54, 1, u64> clamp;
+ BitField<51, 3, IR::Pred> sparse_pred;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> coord_reg;
+ BitField<20, 8, IR::Reg> meta_reg;
+ BitField<28, 3, TextureType> type;
+ BitField<31, 4, u64> mask;
+ BitField<36, 13, u64> cbuf_offset;
+ } const tld{insn};
+
+ const IR::Value coords{MakeCoords(v, tld.coord_reg, tld.type)};
+
+ IR::Reg meta_reg{tld.meta_reg};
+ IR::Value handle;
+ IR::Value offset;
+ IR::U32 lod;
+ IR::U32 multisample;
+ if (is_bindless) {
+ handle = v.X(meta_reg++);
+ } else {
+ handle = v.ir.Imm32(static_cast<u32>(tld.cbuf_offset.Value() * 4));
+ }
+ if (tld.lod != 0) {
+ lod = v.X(meta_reg++);
+ } else {
+ lod = v.ir.Imm32(0U);
+ }
+ if (tld.aoffi != 0) {
+ offset = MakeOffset(v, meta_reg, tld.type);
+ }
+ if (tld.multisample != 0) {
+ multisample = v.X(meta_reg++);
+ }
+ if (tld.clamp != 0) {
+ throw NotImplementedException("TLD.CL - CLAMP is not implmented");
+ }
+ IR::TextureInstInfo info{};
+ info.type.Assign(GetType(tld.type));
+ const IR::Value sample{v.ir.ImageFetch(handle, coords, offset, lod, multisample, info)};
+
+ IR::Reg dest_reg{tld.dest_reg};
+ for (size_t element = 0; element < 4; ++element) {
+ if (((tld.mask >> element) & 1) == 0) {
+ continue;
+ }
+ v.F(dest_reg, IR::F32{v.ir.CompositeExtract(sample, element)});
+ ++dest_reg;
+ }
+ if (tld.sparse_pred != IR::Pred::PT) {
+ v.ir.SetPred(tld.sparse_pred, v.ir.LogicalNot(v.ir.GetSparseFromOp(sample)));
+ }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::TLD(u64 insn) {
+ Impl(*this, insn, false);
+}
+
+void TranslatorVisitor::TLD_b(u64 insn) {
+ Impl(*this, insn, true);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load_swizzled.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load_swizzled.cpp
new file mode 100644
index 000000000..5dd7e31b2
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load_swizzled.cpp
@@ -0,0 +1,242 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <array>
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/ir/modifiers.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class Precision : u64 {
+ F16,
+ F32,
+};
+
+constexpr unsigned R = 1;
+constexpr unsigned G = 2;
+constexpr unsigned B = 4;
+constexpr unsigned A = 8;
+
+constexpr std::array RG_LUT{
+ R, //
+ G, //
+ B, //
+ A, //
+ R | G, //
+ R | A, //
+ G | A, //
+ B | A, //
+};
+
+constexpr std::array RGBA_LUT{
+ R | G | B, //
+ R | G | A, //
+ R | B | A, //
+ G | B | A, //
+ R | G | B | A, //
+};
+
+union Encoding {
+ u64 raw;
+ BitField<59, 1, Precision> precision;
+ BitField<54, 1, u64> aoffi;
+ BitField<53, 1, u64> lod;
+ BitField<55, 1, u64> ms;
+ BitField<49, 1, u64> nodep;
+ BitField<28, 8, IR::Reg> dest_reg_b;
+ BitField<0, 8, IR::Reg> dest_reg_a;
+ BitField<8, 8, IR::Reg> src_reg_a;
+ BitField<20, 8, IR::Reg> src_reg_b;
+ BitField<36, 13, u64> cbuf_offset;
+ BitField<50, 3, u64> swizzle;
+ BitField<53, 4, u64> encoding;
+};
+
+void CheckAlignment(IR::Reg reg, size_t alignment) {
+ if (!IR::IsAligned(reg, alignment)) {
+ throw NotImplementedException("Unaligned source register {}", reg);
+ }
+}
+
+IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg reg) {
+ const IR::U32 value{v.X(reg)};
+ return v.ir.CompositeConstruct(v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true),
+ v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4), true));
+}
+
+IR::Value Sample(TranslatorVisitor& v, u64 insn) {
+ const Encoding tlds{insn};
+ const IR::U32 handle{v.ir.Imm32(static_cast<u32>(tlds.cbuf_offset * 4))};
+ const IR::Reg reg_a{tlds.src_reg_a};
+ const IR::Reg reg_b{tlds.src_reg_b};
+ IR::Value coords;
+ IR::U32 lod{v.ir.Imm32(0U)};
+ IR::Value offsets;
+ IR::U32 multisample;
+ Shader::TextureType texture_type{};
+ switch (tlds.encoding) {
+ case 0:
+ texture_type = Shader::TextureType::Color1D;
+ coords = v.X(reg_a);
+ break;
+ case 1:
+ texture_type = Shader::TextureType::Color1D;
+ coords = v.X(reg_a);
+ lod = v.X(reg_b);
+ break;
+ case 2:
+ texture_type = Shader::TextureType::Color2D;
+ coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_b));
+ break;
+ case 4:
+ CheckAlignment(reg_a, 2);
+ texture_type = Shader::TextureType::Color2D;
+ coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_a + 1));
+ offsets = MakeOffset(v, reg_b);
+ break;
+ case 5:
+ CheckAlignment(reg_a, 2);
+ texture_type = Shader::TextureType::Color2D;
+ coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_a + 1));
+ lod = v.X(reg_b);
+ break;
+ case 6:
+ CheckAlignment(reg_a, 2);
+ texture_type = Shader::TextureType::Color2D;
+ coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_a + 1));
+ multisample = v.X(reg_b);
+ break;
+ case 7:
+ CheckAlignment(reg_a, 2);
+ texture_type = Shader::TextureType::Color3D;
+ coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_a + 1), v.X(reg_b));
+ break;
+ case 8: {
+ CheckAlignment(reg_b, 2);
+ const IR::U32 array{v.ir.BitFieldExtract(v.X(reg_a), v.ir.Imm32(0), v.ir.Imm32(16))};
+ texture_type = Shader::TextureType::ColorArray2D;
+ coords = v.ir.CompositeConstruct(v.X(reg_b), v.X(reg_b + 1), array);
+ break;
+ }
+ case 12:
+ CheckAlignment(reg_a, 2);
+ CheckAlignment(reg_b, 2);
+ texture_type = Shader::TextureType::Color2D;
+ coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_a + 1));
+ lod = v.X(reg_b);
+ offsets = MakeOffset(v, reg_b + 1);
+ break;
+ default:
+ throw NotImplementedException("Illegal encoding {}", tlds.encoding.Value());
+ }
+ IR::TextureInstInfo info{};
+ if (tlds.precision == Precision::F16) {
+ info.relaxed_precision.Assign(1);
+ }
+ info.type.Assign(texture_type);
+ return v.ir.ImageFetch(handle, coords, offsets, lod, multisample, info);
+}
+
+unsigned Swizzle(u64 insn) {
+ const Encoding tlds{insn};
+ const size_t encoding{tlds.swizzle};
+ if (tlds.dest_reg_b == IR::Reg::RZ) {
+ if (encoding >= RG_LUT.size()) {
+ throw NotImplementedException("Illegal RG encoding {}", encoding);
+ }
+ return RG_LUT[encoding];
+ } else {
+ if (encoding >= RGBA_LUT.size()) {
+ throw NotImplementedException("Illegal RGBA encoding {}", encoding);
+ }
+ return RGBA_LUT[encoding];
+ }
+}
+
+IR::F32 Extract(TranslatorVisitor& v, const IR::Value& sample, unsigned component) {
+ return IR::F32{v.ir.CompositeExtract(sample, component)};
+}
+
+IR::Reg RegStoreComponent32(u64 insn, unsigned index) {
+ const Encoding tlds{insn};
+ switch (index) {
+ case 0:
+ return tlds.dest_reg_a;
+ case 1:
+ CheckAlignment(tlds.dest_reg_a, 2);
+ return tlds.dest_reg_a + 1;
+ case 2:
+ return tlds.dest_reg_b;
+ case 3:
+ CheckAlignment(tlds.dest_reg_b, 2);
+ return tlds.dest_reg_b + 1;
+ }
+ throw LogicError("Invalid store index {}", index);
+}
+
+void Store32(TranslatorVisitor& v, u64 insn, const IR::Value& sample) {
+ const unsigned swizzle{Swizzle(insn)};
+ unsigned store_index{0};
+ for (unsigned component = 0; component < 4; ++component) {
+ if (((swizzle >> component) & 1) == 0) {
+ continue;
+ }
+ const IR::Reg dest{RegStoreComponent32(insn, store_index)};
+ v.F(dest, Extract(v, sample, component));
+ ++store_index;
+ }
+}
+
+IR::U32 Pack(TranslatorVisitor& v, const IR::F32& lhs, const IR::F32& rhs) {
+ return v.ir.PackHalf2x16(v.ir.CompositeConstruct(lhs, rhs));
+}
+
+void Store16(TranslatorVisitor& v, u64 insn, const IR::Value& sample) {
+ const unsigned swizzle{Swizzle(insn)};
+ unsigned store_index{0};
+ std::array<IR::F32, 4> swizzled;
+ for (unsigned component = 0; component < 4; ++component) {
+ if (((swizzle >> component) & 1) == 0) {
+ continue;
+ }
+ swizzled[store_index] = Extract(v, sample, component);
+ ++store_index;
+ }
+ const IR::F32 zero{v.ir.Imm32(0.0f)};
+ const Encoding tlds{insn};
+ switch (store_index) {
+ case 1:
+ v.X(tlds.dest_reg_a, Pack(v, swizzled[0], zero));
+ break;
+ case 2:
+ case 3:
+ case 4:
+ v.X(tlds.dest_reg_a, Pack(v, swizzled[0], swizzled[1]));
+ switch (store_index) {
+ case 2:
+ break;
+ case 3:
+ v.X(tlds.dest_reg_b, Pack(v, swizzled[2], zero));
+ break;
+ case 4:
+ v.X(tlds.dest_reg_b, Pack(v, swizzled[2], swizzled[3]));
+ break;
+ }
+ break;
+ }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::TLDS(u64 insn) {
+ const IR::Value sample{Sample(*this, insn)};
+ if (Encoding{insn}.precision == Precision::F32) {
+ Store32(*this, insn, sample);
+ } else {
+ Store16(*this, insn, sample);
+ }
+}
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp
new file mode 100644
index 000000000..aea3c0e62
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp
@@ -0,0 +1,131 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <optional>
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/ir/modifiers.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+
+enum class TextureType : u64 {
+ _1D,
+ ARRAY_1D,
+ _2D,
+ ARRAY_2D,
+ _3D,
+ ARRAY_3D,
+ CUBE,
+ ARRAY_CUBE,
+};
+
+Shader::TextureType GetType(TextureType type) {
+ switch (type) {
+ case TextureType::_1D:
+ return Shader::TextureType::Color1D;
+ case TextureType::ARRAY_1D:
+ return Shader::TextureType::ColorArray1D;
+ case TextureType::_2D:
+ return Shader::TextureType::Color2D;
+ case TextureType::ARRAY_2D:
+ return Shader::TextureType::ColorArray2D;
+ case TextureType::_3D:
+ return Shader::TextureType::Color3D;
+ case TextureType::ARRAY_3D:
+ throw NotImplementedException("3D array texture type");
+ case TextureType::CUBE:
+ return Shader::TextureType::ColorCube;
+ case TextureType::ARRAY_CUBE:
+ return Shader::TextureType::ColorArrayCube;
+ }
+ throw NotImplementedException("Invalid texture type {}", type);
+}
+
+IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, TextureType type) {
+ // The ISA reads an array component here, but this is not needed on high level shading languages
+ // We are dropping this information.
+ switch (type) {
+ case TextureType::_1D:
+ return v.F(reg);
+ case TextureType::ARRAY_1D:
+ return v.F(reg + 1);
+ case TextureType::_2D:
+ return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1));
+ case TextureType::ARRAY_2D:
+ return v.ir.CompositeConstruct(v.F(reg + 1), v.F(reg + 2));
+ case TextureType::_3D:
+ return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2));
+ case TextureType::ARRAY_3D:
+ throw NotImplementedException("3D array texture type");
+ case TextureType::CUBE:
+ return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2));
+ case TextureType::ARRAY_CUBE:
+ return v.ir.CompositeConstruct(v.F(reg + 1), v.F(reg + 2), v.F(reg + 3));
+ }
+ throw NotImplementedException("Invalid texture type {}", type);
+}
+
+void Impl(TranslatorVisitor& v, u64 insn, bool is_bindless) {
+ union {
+ u64 raw;
+ BitField<49, 1, u64> nodep;
+ BitField<35, 1, u64> ndv;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> coord_reg;
+ BitField<20, 8, IR::Reg> meta_reg;
+ BitField<28, 3, TextureType> type;
+ BitField<31, 4, u64> mask;
+ BitField<36, 13, u64> cbuf_offset;
+ } const tmml{insn};
+
+ if ((tmml.mask & 0b1100) != 0) {
+ throw NotImplementedException("TMML BA results are not implmented");
+ }
+ const IR::Value coords{MakeCoords(v, tmml.coord_reg, tmml.type)};
+
+ IR::U32 handle;
+ IR::Reg meta_reg{tmml.meta_reg};
+ if (is_bindless) {
+ handle = v.X(meta_reg++);
+ } else {
+ handle = v.ir.Imm32(static_cast<u32>(tmml.cbuf_offset.Value() * 4));
+ }
+ IR::TextureInstInfo info{};
+ info.type.Assign(GetType(tmml.type));
+ const IR::Value sample{v.ir.ImageQueryLod(handle, coords, info)};
+
+ IR::Reg dest_reg{tmml.dest_reg};
+ for (size_t element = 0; element < 4; ++element) {
+ if (((tmml.mask >> element) & 1) == 0) {
+ continue;
+ }
+ IR::F32 value{v.ir.CompositeExtract(sample, element)};
+ if (element < 2) {
+ IR::U32 casted_value;
+ if (element == 0) {
+ casted_value = v.ir.ConvertFToU(32, value);
+ } else {
+ casted_value = v.ir.ConvertFToS(16, value);
+ }
+ v.X(dest_reg, v.ir.ShiftLeftLogical(casted_value, v.ir.Imm32(8)));
+ } else {
+ v.F(dest_reg, value);
+ }
+ ++dest_reg;
+ }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::TMML(u64 insn) {
+ Impl(*this, insn, false);
+}
+
+void TranslatorVisitor::TMML_b(u64 insn) {
+ Impl(*this, insn, true);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_query.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_query.cpp
new file mode 100644
index 000000000..0459e5473
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_query.cpp
@@ -0,0 +1,76 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <optional>
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/ir/modifiers.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class Mode : u64 {
+ Dimension = 1,
+ TextureType = 2,
+ SamplePos = 5,
+};
+
+IR::Value Query(TranslatorVisitor& v, const IR::U32& handle, Mode mode, IR::Reg src_reg) {
+ switch (mode) {
+ case Mode::Dimension: {
+ const IR::U32 lod{v.X(src_reg)};
+ return v.ir.ImageQueryDimension(handle, lod);
+ }
+ case Mode::TextureType:
+ case Mode::SamplePos:
+ default:
+ throw NotImplementedException("Mode {}", mode);
+ }
+}
+
+void Impl(TranslatorVisitor& v, u64 insn, std::optional<u32> cbuf_offset) {
+ union {
+ u64 raw;
+ BitField<49, 1, u64> nodep;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> src_reg;
+ BitField<22, 3, Mode> mode;
+ BitField<31, 4, u64> mask;
+ } const txq{insn};
+
+ IR::Reg src_reg{txq.src_reg};
+ IR::U32 handle;
+ if (cbuf_offset) {
+ handle = v.ir.Imm32(*cbuf_offset);
+ } else {
+ handle = v.X(src_reg);
+ ++src_reg;
+ }
+ const IR::Value query{Query(v, handle, txq.mode, src_reg)};
+ IR::Reg dest_reg{txq.dest_reg};
+ for (int element = 0; element < 4; ++element) {
+ if (((txq.mask >> element) & 1) == 0) {
+ continue;
+ }
+ v.X(dest_reg, IR::U32{v.ir.CompositeExtract(query, static_cast<size_t>(element))});
+ ++dest_reg;
+ }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::TXQ(u64 insn) {
+ union {
+ u64 raw;
+ BitField<36, 13, u64> cbuf_offset;
+ } const txq{insn};
+
+ Impl(*this, insn, static_cast<u32>(txq.cbuf_offset * 4));
+}
+
+void TranslatorVisitor::TXQ_b(u64 insn) {
+ Impl(*this, insn, std::nullopt);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/video_helper.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/video_helper.cpp
new file mode 100644
index 000000000..e1f4174cf
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/video_helper.cpp
@@ -0,0 +1,30 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/video_helper.h"
+
+namespace Shader::Maxwell {
+
+IR::U32 ExtractVideoOperandValue(IR::IREmitter& ir, const IR::U32& value, VideoWidth width,
+ u32 selector, bool is_signed) {
+ switch (width) {
+ case VideoWidth::Byte:
+ case VideoWidth::Unknown:
+ return ir.BitFieldExtract(value, ir.Imm32(selector * 8), ir.Imm32(8), is_signed);
+ case VideoWidth::Short:
+ return ir.BitFieldExtract(value, ir.Imm32(selector * 16), ir.Imm32(16), is_signed);
+ case VideoWidth::Word:
+ return value;
+ default:
+ throw NotImplementedException("Unknown VideoWidth {}", width);
+ }
+}
+
+VideoWidth GetVideoSourceWidth(VideoWidth width, bool is_immediate) {
+ // immediates must be 16-bit format.
+ return is_immediate ? VideoWidth::Short : width;
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/video_helper.h b/src/shader_recompiler/frontend/maxwell/translate/impl/video_helper.h
new file mode 100644
index 000000000..40c0b907c
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/video_helper.h
@@ -0,0 +1,23 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+enum class VideoWidth : u64 {
+ Byte,
+ Unknown,
+ Short,
+ Word,
+};
+
+[[nodiscard]] IR::U32 ExtractVideoOperandValue(IR::IREmitter& ir, const IR::U32& value,
+ VideoWidth width, u32 selector, bool is_signed);
+
+[[nodiscard]] VideoWidth GetVideoSourceWidth(VideoWidth width, bool is_immediate);
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/video_minimum_maximum.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/video_minimum_maximum.cpp
new file mode 100644
index 000000000..78869601f
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/video_minimum_maximum.cpp
@@ -0,0 +1,92 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/common_types.h"
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/video_helper.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class VideoMinMaxOps : u64 {
+ MRG_16H,
+ MRG_16L,
+ MRG_8B0,
+ MRG_8B2,
+ ACC,
+ MIN,
+ MAX,
+};
+
+[[nodiscard]] IR::U32 ApplyVideoMinMaxOp(IR::IREmitter& ir, const IR::U32& lhs, const IR::U32& rhs,
+ VideoMinMaxOps op, bool is_signed) {
+ switch (op) {
+ case VideoMinMaxOps::MIN:
+ return ir.IMin(lhs, rhs, is_signed);
+ case VideoMinMaxOps::MAX:
+ return ir.IMax(lhs, rhs, is_signed);
+ default:
+ throw NotImplementedException("VMNMX op {}", op);
+ }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::VMNMX(u64 insn) {
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<20, 16, u64> src_b_imm;
+ BitField<28, 2, u64> src_b_selector;
+ BitField<29, 2, VideoWidth> src_b_width;
+ BitField<36, 2, u64> src_a_selector;
+ BitField<37, 2, VideoWidth> src_a_width;
+ BitField<47, 1, u64> cc;
+ BitField<48, 1, u64> src_a_sign;
+ BitField<49, 1, u64> src_b_sign;
+ BitField<50, 1, u64> is_src_b_reg;
+ BitField<51, 3, VideoMinMaxOps> op;
+ BitField<54, 1, u64> dest_sign;
+ BitField<55, 1, u64> sat;
+ BitField<56, 1, u64> mx;
+ } const vmnmx{insn};
+
+ if (vmnmx.cc != 0) {
+ throw NotImplementedException("VMNMX CC");
+ }
+ if (vmnmx.sat != 0) {
+ throw NotImplementedException("VMNMX SAT");
+ }
+ // Selectors were shown to default to 2 in unit tests
+ if (vmnmx.src_a_selector != 2) {
+ throw NotImplementedException("VMNMX Selector {}", vmnmx.src_a_selector.Value());
+ }
+ if (vmnmx.src_b_selector != 2) {
+ throw NotImplementedException("VMNMX Selector {}", vmnmx.src_b_selector.Value());
+ }
+ if (vmnmx.src_a_width != VideoWidth::Word) {
+ throw NotImplementedException("VMNMX Source Width {}", vmnmx.src_a_width.Value());
+ }
+
+ const bool is_b_imm{vmnmx.is_src_b_reg == 0};
+ const IR::U32 src_a{GetReg8(insn)};
+ const IR::U32 src_b{is_b_imm ? ir.Imm32(static_cast<u32>(vmnmx.src_b_imm)) : GetReg20(insn)};
+ const IR::U32 src_c{GetReg39(insn)};
+
+ const VideoWidth a_width{vmnmx.src_a_width};
+ const VideoWidth b_width{GetVideoSourceWidth(vmnmx.src_b_width, is_b_imm)};
+
+ const bool src_a_signed{vmnmx.src_a_sign != 0};
+ const bool src_b_signed{vmnmx.src_b_sign != 0};
+ const IR::U32 op_a{ExtractVideoOperandValue(ir, src_a, a_width, 0, src_a_signed)};
+ const IR::U32 op_b{ExtractVideoOperandValue(ir, src_b, b_width, 0, src_b_signed)};
+
+ // First operation's sign is only dependent on operand b's sign
+ const bool op_1_signed{src_b_signed};
+
+ const IR::U32 lhs{vmnmx.mx != 0 ? ir.IMax(op_a, op_b, op_1_signed)
+ : ir.IMin(op_a, op_b, op_1_signed)};
+ X(vmnmx.dest_reg, ApplyVideoMinMaxOp(ir, lhs, src_c, vmnmx.op, vmnmx.dest_sign != 0));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/video_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/video_multiply_add.cpp
new file mode 100644
index 000000000..cc2e6d6e6
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/video_multiply_add.cpp
@@ -0,0 +1,64 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/common_types.h"
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/video_helper.h"
+
+namespace Shader::Maxwell {
+void TranslatorVisitor::VMAD(u64 insn) {
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<20, 16, u64> src_b_imm;
+ BitField<28, 2, u64> src_b_selector;
+ BitField<29, 2, VideoWidth> src_b_width;
+ BitField<36, 2, u64> src_a_selector;
+ BitField<37, 2, VideoWidth> src_a_width;
+ BitField<47, 1, u64> cc;
+ BitField<48, 1, u64> src_a_sign;
+ BitField<49, 1, u64> src_b_sign;
+ BitField<50, 1, u64> is_src_b_reg;
+ BitField<51, 2, u64> scale;
+ BitField<53, 1, u64> src_c_neg;
+ BitField<54, 1, u64> src_a_neg;
+ BitField<55, 1, u64> sat;
+ } const vmad{insn};
+
+ if (vmad.cc != 0) {
+ throw NotImplementedException("VMAD CC");
+ }
+ if (vmad.sat != 0) {
+ throw NotImplementedException("VMAD SAT");
+ }
+ if (vmad.scale != 0) {
+ throw NotImplementedException("VMAD SCALE");
+ }
+ if (vmad.src_a_neg != 0 && vmad.src_c_neg != 0) {
+ throw NotImplementedException("VMAD PO");
+ }
+ if (vmad.src_a_neg != 0 || vmad.src_c_neg != 0) {
+ throw NotImplementedException("VMAD NEG");
+ }
+ const bool is_b_imm{vmad.is_src_b_reg == 0};
+ const IR::U32 src_a{GetReg8(insn)};
+ const IR::U32 src_b{is_b_imm ? ir.Imm32(static_cast<u32>(vmad.src_b_imm)) : GetReg20(insn)};
+ const IR::U32 src_c{GetReg39(insn)};
+
+ const u32 a_selector{static_cast<u32>(vmad.src_a_selector)};
+ // Immediate values can't have a selector
+ const u32 b_selector{is_b_imm ? 0U : static_cast<u32>(vmad.src_b_selector)};
+ const VideoWidth a_width{vmad.src_a_width};
+ const VideoWidth b_width{GetVideoSourceWidth(vmad.src_b_width, is_b_imm)};
+
+ const bool src_a_signed{vmad.src_a_sign != 0};
+ const bool src_b_signed{vmad.src_b_sign != 0};
+ const IR::U32 op_a{ExtractVideoOperandValue(ir, src_a, a_width, a_selector, src_a_signed)};
+ const IR::U32 op_b{ExtractVideoOperandValue(ir, src_b, b_width, b_selector, src_b_signed)};
+
+ X(vmad.dest_reg, ir.IAdd(ir.IMul(op_a, op_b), src_c));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/video_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/video_set_predicate.cpp
new file mode 100644
index 000000000..1b66abc33
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/video_set_predicate.cpp
@@ -0,0 +1,92 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/common_types.h"
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/video_helper.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class VsetpCompareOp : u64 {
+ False = 0,
+ LessThan,
+ Equal,
+ LessThanEqual,
+ GreaterThan = 16,
+ NotEqual,
+ GreaterThanEqual,
+ True,
+};
+
+CompareOp VsetpToShaderCompareOp(VsetpCompareOp op) {
+ switch (op) {
+ case VsetpCompareOp::False:
+ return CompareOp::False;
+ case VsetpCompareOp::LessThan:
+ return CompareOp::LessThan;
+ case VsetpCompareOp::Equal:
+ return CompareOp::Equal;
+ case VsetpCompareOp::LessThanEqual:
+ return CompareOp::LessThanEqual;
+ case VsetpCompareOp::GreaterThan:
+ return CompareOp::GreaterThan;
+ case VsetpCompareOp::NotEqual:
+ return CompareOp::NotEqual;
+ case VsetpCompareOp::GreaterThanEqual:
+ return CompareOp::GreaterThanEqual;
+ case VsetpCompareOp::True:
+ return CompareOp::True;
+ default:
+ throw NotImplementedException("Invalid compare op {}", op);
+ }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::VSETP(u64 insn) {
+ union {
+ u64 raw;
+ BitField<0, 3, IR::Pred> dest_pred_b;
+ BitField<3, 3, IR::Pred> dest_pred_a;
+ BitField<20, 16, u64> src_b_imm;
+ BitField<28, 2, u64> src_b_selector;
+ BitField<29, 2, VideoWidth> src_b_width;
+ BitField<36, 2, u64> src_a_selector;
+ BitField<37, 2, VideoWidth> src_a_width;
+ BitField<39, 3, IR::Pred> bop_pred;
+ BitField<42, 1, u64> neg_bop_pred;
+ BitField<43, 5, VsetpCompareOp> compare_op;
+ BitField<45, 2, BooleanOp> bop;
+ BitField<48, 1, u64> src_a_sign;
+ BitField<49, 1, u64> src_b_sign;
+ BitField<50, 1, u64> is_src_b_reg;
+ } const vsetp{insn};
+
+ const bool is_b_imm{vsetp.is_src_b_reg == 0};
+ const IR::U32 src_a{GetReg8(insn)};
+ const IR::U32 src_b{is_b_imm ? ir.Imm32(static_cast<u32>(vsetp.src_b_imm)) : GetReg20(insn)};
+
+ const u32 a_selector{static_cast<u32>(vsetp.src_a_selector)};
+ const u32 b_selector{static_cast<u32>(vsetp.src_b_selector)};
+ const VideoWidth a_width{vsetp.src_a_width};
+ const VideoWidth b_width{GetVideoSourceWidth(vsetp.src_b_width, is_b_imm)};
+
+ const bool src_a_signed{vsetp.src_a_sign != 0};
+ const bool src_b_signed{vsetp.src_b_sign != 0};
+ const IR::U32 op_a{ExtractVideoOperandValue(ir, src_a, a_width, a_selector, src_a_signed)};
+ const IR::U32 op_b{ExtractVideoOperandValue(ir, src_b, b_width, b_selector, src_b_signed)};
+
+ // Compare operation's sign is only dependent on operand b's sign
+ const bool compare_signed{src_b_signed};
+ const CompareOp compare_op{VsetpToShaderCompareOp(vsetp.compare_op)};
+ const IR::U1 comparison{IntegerCompare(ir, op_a, op_b, compare_op, compare_signed)};
+ const IR::U1 bop_pred{ir.GetPred(vsetp.bop_pred, vsetp.neg_bop_pred != 0)};
+ const IR::U1 result_a{PredicateCombine(ir, comparison, bop_pred, vsetp.bop)};
+ const IR::U1 result_b{PredicateCombine(ir, ir.LogicalNot(comparison), bop_pred, vsetp.bop)};
+ ir.SetPred(vsetp.dest_pred_a, result_a);
+ ir.SetPred(vsetp.dest_pred_b, result_b);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp
new file mode 100644
index 000000000..7ce370f09
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp
@@ -0,0 +1,54 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class VoteOp : u64 {
+ ALL,
+ ANY,
+ EQ,
+};
+
+[[nodiscard]] IR::U1 VoteOperation(IR::IREmitter& ir, const IR::U1& pred, VoteOp vote_op) {
+ switch (vote_op) {
+ case VoteOp::ALL:
+ return ir.VoteAll(pred);
+ case VoteOp::ANY:
+ return ir.VoteAny(pred);
+ case VoteOp::EQ:
+ return ir.VoteEqual(pred);
+ default:
+ throw NotImplementedException("Invalid VOTE op {}", vote_op);
+ }
+}
+
+void Vote(TranslatorVisitor& v, u64 insn) {
+ union {
+ u64 insn;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<39, 3, IR::Pred> pred_a;
+ BitField<42, 1, u64> neg_pred_a;
+ BitField<45, 3, IR::Pred> pred_b;
+ BitField<48, 2, VoteOp> vote_op;
+ } const vote{insn};
+
+ const IR::U1 vote_pred{v.ir.GetPred(vote.pred_a, vote.neg_pred_a != 0)};
+ v.ir.SetPred(vote.pred_b, VoteOperation(v.ir, vote_pred, vote.vote_op));
+ v.X(vote.dest_reg, v.ir.SubgroupBallot(vote_pred));
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::VOTE(u64 insn) {
+ Vote(*this, insn);
+}
+
+void TranslatorVisitor::VOTE_vtg(u64) {
+ LOG_WARNING(Shader, "(STUBBED) called");
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/warp_shuffle.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/warp_shuffle.cpp
new file mode 100644
index 000000000..550fed55c
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/warp_shuffle.cpp
@@ -0,0 +1,69 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <optional>
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class ShuffleMode : u64 {
+ IDX,
+ UP,
+ DOWN,
+ BFLY,
+};
+
+[[nodiscard]] IR::U32 ShuffleOperation(IR::IREmitter& ir, const IR::U32& value,
+ const IR::U32& index, const IR::U32& mask,
+ ShuffleMode shfl_op) {
+ const IR::U32 clamp{ir.BitFieldExtract(mask, ir.Imm32(0), ir.Imm32(5))};
+ const IR::U32 seg_mask{ir.BitFieldExtract(mask, ir.Imm32(8), ir.Imm32(5))};
+ switch (shfl_op) {
+ case ShuffleMode::IDX:
+ return ir.ShuffleIndex(value, index, clamp, seg_mask);
+ case ShuffleMode::UP:
+ return ir.ShuffleUp(value, index, clamp, seg_mask);
+ case ShuffleMode::DOWN:
+ return ir.ShuffleDown(value, index, clamp, seg_mask);
+ case ShuffleMode::BFLY:
+ return ir.ShuffleButterfly(value, index, clamp, seg_mask);
+ default:
+ throw NotImplementedException("Invalid SHFL op {}", shfl_op);
+ }
+}
+
+void Shuffle(TranslatorVisitor& v, u64 insn, const IR::U32& index, const IR::U32& mask) {
+ union {
+ u64 insn;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> src_reg;
+ BitField<30, 2, ShuffleMode> mode;
+ BitField<48, 3, IR::Pred> pred;
+ } const shfl{insn};
+
+ const IR::U32 result{ShuffleOperation(v.ir, v.X(shfl.src_reg), index, mask, shfl.mode)};
+ v.ir.SetPred(shfl.pred, v.ir.GetInBoundsFromOp(result));
+ v.X(shfl.dest_reg, result);
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::SHFL(u64 insn) {
+ union {
+ u64 insn;
+ BitField<20, 5, u64> src_a_imm;
+ BitField<28, 1, u64> src_a_flag;
+ BitField<29, 1, u64> src_b_flag;
+ BitField<34, 13, u64> src_b_imm;
+ } const flags{insn};
+ const IR::U32 src_a{flags.src_a_flag != 0 ? ir.Imm32(static_cast<u32>(flags.src_a_imm))
+ : GetReg20(insn)};
+ const IR::U32 src_b{flags.src_b_flag != 0 ? ir.Imm32(static_cast<u32>(flags.src_b_imm))
+ : GetReg39(insn)};
+ Shuffle(*this, insn, src_a, src_b);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/translate.cpp b/src/shader_recompiler/frontend/maxwell/translate/translate.cpp
new file mode 100644
index 000000000..8e3c4c5d5
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/translate.cpp
@@ -0,0 +1,52 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "shader_recompiler/environment.h"
+#include "shader_recompiler/frontend/ir/basic_block.h"
+#include "shader_recompiler/frontend/maxwell/decode.h"
+#include "shader_recompiler/frontend/maxwell/location.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+#include "shader_recompiler/frontend/maxwell/translate/translate.h"
+
+namespace Shader::Maxwell {
+
+template <auto method>
+static void Invoke(TranslatorVisitor& visitor, Location pc, u64 insn) {
+ using MethodType = decltype(method);
+ if constexpr (std::is_invocable_r_v<void, MethodType, TranslatorVisitor&, Location, u64>) {
+ (visitor.*method)(pc, insn);
+ } else if constexpr (std::is_invocable_r_v<void, MethodType, TranslatorVisitor&, u64>) {
+ (visitor.*method)(insn);
+ } else {
+ (visitor.*method)();
+ }
+}
+
+void Translate(Environment& env, IR::Block* block, u32 location_begin, u32 location_end) {
+ if (location_begin == location_end) {
+ return;
+ }
+ TranslatorVisitor visitor{env, *block};
+ for (Location pc = location_begin; pc != location_end; ++pc) {
+ const u64 insn{env.ReadInstruction(pc.Offset())};
+ try {
+ const Opcode opcode{Decode(insn)};
+ switch (opcode) {
+#define INST(name, cute, mask) \
+ case Opcode::name: \
+ Invoke<&TranslatorVisitor::name>(visitor, pc, insn); \
+ break;
+#include "shader_recompiler/frontend/maxwell/maxwell.inc"
+#undef OPCODE
+ default:
+ throw LogicError("Invalid opcode {}", opcode);
+ }
+ } catch (Exception& exception) {
+ exception.Prepend(fmt::format("Translate {}: ", Decode(insn)));
+ throw;
+ }
+ }
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/translate.h b/src/shader_recompiler/frontend/maxwell/translate/translate.h
new file mode 100644
index 000000000..a3edd2e46
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/translate.h
@@ -0,0 +1,14 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "shader_recompiler/environment.h"
+#include "shader_recompiler/frontend/ir/basic_block.h"
+
+namespace Shader::Maxwell {
+
+void Translate(Environment& env, IR::Block* block, u32 location_begin, u32 location_end);
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.cpp b/src/shader_recompiler/frontend/maxwell/translate_program.cpp
new file mode 100644
index 000000000..c067d459c
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp
@@ -0,0 +1,223 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <memory>
+#include <vector>
+
+#include "common/settings.h"
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/ir/basic_block.h"
+#include "shader_recompiler/frontend/ir/post_order.h"
+#include "shader_recompiler/frontend/maxwell/structured_control_flow.h"
+#include "shader_recompiler/frontend/maxwell/translate/translate.h"
+#include "shader_recompiler/frontend/maxwell/translate_program.h"
+#include "shader_recompiler/host_translate_info.h"
+#include "shader_recompiler/ir_opt/passes.h"
+
+namespace Shader::Maxwell {
+namespace {
+IR::BlockList GenerateBlocks(const IR::AbstractSyntaxList& syntax_list) {
+ size_t num_syntax_blocks{};
+ for (const auto& node : syntax_list) {
+ if (node.type == IR::AbstractSyntaxNode::Type::Block) {
+ ++num_syntax_blocks;
+ }
+ }
+ IR::BlockList blocks;
+ blocks.reserve(num_syntax_blocks);
+ for (const auto& node : syntax_list) {
+ if (node.type == IR::AbstractSyntaxNode::Type::Block) {
+ blocks.push_back(node.data.block);
+ }
+ }
+ return blocks;
+}
+
+void RemoveUnreachableBlocks(IR::Program& program) {
+ // Some blocks might be unreachable if a function call exists unconditionally
+ // If this happens the number of blocks and post order blocks will mismatch
+ if (program.blocks.size() == program.post_order_blocks.size()) {
+ return;
+ }
+ const auto begin{program.blocks.begin() + 1};
+ const auto end{program.blocks.end()};
+ const auto pred{[](IR::Block* block) { return block->ImmPredecessors().empty(); }};
+ program.blocks.erase(std::remove_if(begin, end, pred), end);
+}
+
+void CollectInterpolationInfo(Environment& env, IR::Program& program) {
+ if (program.stage != Stage::Fragment) {
+ return;
+ }
+ const ProgramHeader& sph{env.SPH()};
+ for (size_t index = 0; index < IR::NUM_GENERICS; ++index) {
+ std::optional<PixelImap> imap;
+ for (const PixelImap value : sph.ps.GenericInputMap(static_cast<u32>(index))) {
+ if (value == PixelImap::Unused) {
+ continue;
+ }
+ if (imap && imap != value) {
+ throw NotImplementedException("Per component interpolation");
+ }
+ imap = value;
+ }
+ if (!imap) {
+ continue;
+ }
+ program.info.interpolation[index] = [&] {
+ switch (*imap) {
+ case PixelImap::Unused:
+ case PixelImap::Perspective:
+ return Interpolation::Smooth;
+ case PixelImap::Constant:
+ return Interpolation::Flat;
+ case PixelImap::ScreenLinear:
+ return Interpolation::NoPerspective;
+ }
+ throw NotImplementedException("Unknown interpolation {}", *imap);
+ }();
+ }
+}
+
+void AddNVNStorageBuffers(IR::Program& program) {
+ if (!program.info.uses_global_memory) {
+ return;
+ }
+ const u32 driver_cbuf{0};
+ const u32 descriptor_size{0x10};
+ const u32 num_buffers{16};
+ const u32 base{[&] {
+ switch (program.stage) {
+ case Stage::VertexA:
+ case Stage::VertexB:
+ return 0x110u;
+ case Stage::TessellationControl:
+ return 0x210u;
+ case Stage::TessellationEval:
+ return 0x310u;
+ case Stage::Geometry:
+ return 0x410u;
+ case Stage::Fragment:
+ return 0x510u;
+ case Stage::Compute:
+ return 0x310u;
+ }
+ throw InvalidArgument("Invalid stage {}", program.stage);
+ }()};
+ auto& descs{program.info.storage_buffers_descriptors};
+ for (u32 index = 0; index < num_buffers; ++index) {
+ if (!program.info.nvn_buffer_used[index]) {
+ continue;
+ }
+ const u32 offset{base + index * descriptor_size};
+ const auto it{std::ranges::find(descs, offset, &StorageBufferDescriptor::cbuf_offset)};
+ if (it != descs.end()) {
+ it->is_written |= program.info.stores_global_memory;
+ continue;
+ }
+ descs.push_back({
+ .cbuf_index = driver_cbuf,
+ .cbuf_offset = offset,
+ .count = 1,
+ .is_written = program.info.stores_global_memory,
+ });
+ }
+}
+} // Anonymous namespace
+
+IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool,
+ Environment& env, Flow::CFG& cfg, const HostTranslateInfo& host_info) {
+ IR::Program program;
+ program.syntax_list = BuildASL(inst_pool, block_pool, env, cfg);
+ program.blocks = GenerateBlocks(program.syntax_list);
+ program.post_order_blocks = PostOrder(program.syntax_list.front());
+ program.stage = env.ShaderStage();
+ program.local_memory_size = env.LocalMemorySize();
+ switch (program.stage) {
+ case Stage::TessellationControl: {
+ const ProgramHeader& sph{env.SPH()};
+ program.invocations = sph.common2.threads_per_input_primitive;
+ break;
+ }
+ case Stage::Geometry: {
+ const ProgramHeader& sph{env.SPH()};
+ program.output_topology = sph.common3.output_topology;
+ program.output_vertices = sph.common4.max_output_vertices;
+ program.invocations = sph.common2.threads_per_input_primitive;
+ program.is_geometry_passthrough = sph.common0.geometry_passthrough != 0;
+ if (program.is_geometry_passthrough) {
+ const auto& mask{env.GpPassthroughMask()};
+ for (size_t i = 0; i < program.info.passthrough.mask.size(); ++i) {
+ program.info.passthrough.mask[i] = ((mask[i / 32] >> (i % 32)) & 1) == 0;
+ }
+ }
+ break;
+ }
+ case Stage::Compute:
+ program.workgroup_size = env.WorkgroupSize();
+ program.shared_memory_size = env.SharedMemorySize();
+ break;
+ default:
+ break;
+ }
+ RemoveUnreachableBlocks(program);
+
+ // Replace instructions before the SSA rewrite
+ if (!host_info.support_float16) {
+ Optimization::LowerFp16ToFp32(program);
+ }
+ if (!host_info.support_int64) {
+ Optimization::LowerInt64ToInt32(program);
+ }
+ Optimization::SsaRewritePass(program);
+
+ Optimization::GlobalMemoryToStorageBufferPass(program);
+ Optimization::TexturePass(env, program);
+
+ Optimization::ConstantPropagationPass(program);
+ Optimization::DeadCodeEliminationPass(program);
+ if (Settings::values.renderer_debug) {
+ Optimization::VerificationPass(program);
+ }
+ Optimization::CollectShaderInfoPass(env, program);
+ CollectInterpolationInfo(env, program);
+ AddNVNStorageBuffers(program);
+ return program;
+}
+
+IR::Program MergeDualVertexPrograms(IR::Program& vertex_a, IR::Program& vertex_b,
+ Environment& env_vertex_b) {
+ IR::Program result{};
+ Optimization::VertexATransformPass(vertex_a);
+ Optimization::VertexBTransformPass(vertex_b);
+ for (const auto& term : vertex_a.syntax_list) {
+ if (term.type != IR::AbstractSyntaxNode::Type::Return) {
+ result.syntax_list.push_back(term);
+ }
+ }
+ result.syntax_list.insert(result.syntax_list.end(), vertex_b.syntax_list.begin(),
+ vertex_b.syntax_list.end());
+ result.blocks = GenerateBlocks(result.syntax_list);
+ result.post_order_blocks = vertex_b.post_order_blocks;
+ for (const auto& block : vertex_a.post_order_blocks) {
+ result.post_order_blocks.push_back(block);
+ }
+ result.stage = Stage::VertexB;
+ result.info = vertex_a.info;
+ result.local_memory_size = std::max(vertex_a.local_memory_size, vertex_b.local_memory_size);
+ result.info.loads.mask |= vertex_b.info.loads.mask;
+ result.info.stores.mask |= vertex_b.info.stores.mask;
+
+ Optimization::JoinTextureInfo(result.info, vertex_b.info);
+ Optimization::JoinStorageInfo(result.info, vertex_b.info);
+ Optimization::DeadCodeEliminationPass(result);
+ if (Settings::values.renderer_debug) {
+ Optimization::VerificationPass(result);
+ }
+ Optimization::CollectShaderInfoPass(env_vertex_b, result);
+ return result;
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.h b/src/shader_recompiler/frontend/maxwell/translate_program.h
new file mode 100644
index 000000000..a84814811
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate_program.h
@@ -0,0 +1,23 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "shader_recompiler/environment.h"
+#include "shader_recompiler/frontend/ir/basic_block.h"
+#include "shader_recompiler/frontend/ir/program.h"
+#include "shader_recompiler/frontend/maxwell/control_flow.h"
+#include "shader_recompiler/host_translate_info.h"
+#include "shader_recompiler/object_pool.h"
+
+namespace Shader::Maxwell {
+
+[[nodiscard]] IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool,
+ ObjectPool<IR::Block>& block_pool, Environment& env,
+ Flow::CFG& cfg, const HostTranslateInfo& host_info);
+
+[[nodiscard]] IR::Program MergeDualVertexPrograms(IR::Program& vertex_a, IR::Program& vertex_b,
+ Environment& env_vertex_b);
+
+} // namespace Shader::Maxwell