summaryrefslogtreecommitdiffstats
path: root/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core/renderer_opengl/gl_shader_decompiler.cpp')
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp837
1 files changed, 653 insertions, 184 deletions
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 391c92d47..b3e95187e 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -12,6 +12,7 @@
#include "common/assert.h"
#include "common/common_types.h"
#include "video_core/engines/shader_bytecode.h"
+#include "video_core/engines/shader_header.h"
#include "video_core/renderer_opengl/gl_rasterizer.h"
#include "video_core/renderer_opengl/gl_shader_decompiler.h"
@@ -26,7 +27,7 @@ using Tegra::Shader::Sampler;
using Tegra::Shader::SubOp;
constexpr u32 PROGRAM_END = MAX_PROGRAM_CODE_LENGTH;
-constexpr u32 PROGRAM_HEADER_SIZE = 0x50;
+constexpr u32 PROGRAM_HEADER_SIZE = sizeof(Tegra::Shader::Header);
class DecompileFail : public std::runtime_error {
public:
@@ -113,7 +114,7 @@ private:
/// Scans a range of code for labels and determines the exit method.
ExitMethod Scan(u32 begin, u32 end, std::set<u32>& labels) {
- auto [iter, inserted] =
+ const auto [iter, inserted] =
exit_method_map.emplace(std::make_pair(begin, end), ExitMethod::Undetermined);
ExitMethod& exit_method = iter->second;
if (!inserted)
@@ -131,22 +132,22 @@ private:
if (instr.pred.pred_index == static_cast<u64>(Pred::UnusedIndex)) {
return exit_method = ExitMethod::AlwaysEnd;
} else {
- ExitMethod not_met = Scan(offset + 1, end, labels);
+ const ExitMethod not_met = Scan(offset + 1, end, labels);
return exit_method = ParallelExit(ExitMethod::AlwaysEnd, not_met);
}
}
case OpCode::Id::BRA: {
- u32 target = offset + instr.bra.GetBranchTarget();
+ const u32 target = offset + instr.bra.GetBranchTarget();
labels.insert(target);
- ExitMethod no_jmp = Scan(offset + 1, end, labels);
- ExitMethod jmp = Scan(target, end, labels);
+ const ExitMethod no_jmp = Scan(offset + 1, end, labels);
+ const ExitMethod jmp = Scan(target, end, labels);
return exit_method = ParallelExit(no_jmp, jmp);
}
case OpCode::Id::SSY: {
// The SSY instruction uses a similar encoding as the BRA instruction.
ASSERT_MSG(instr.bra.constant_buffer == 0,
"Constant buffer SSY is not supported");
- u32 target = offset + instr.bra.GetBranchTarget();
+ const u32 target = offset + instr.bra.GetBranchTarget();
labels.insert(target);
// Continue scanning for an exit method.
break;
@@ -189,7 +190,7 @@ public:
private:
void AppendIndentation() {
- shader_source.append(static_cast<size_t>(scope) * 4, ' ');
+ shader_source.append(static_cast<std::size_t>(scope) * 4, ' ');
}
std::string shader_source;
@@ -208,7 +209,7 @@ public:
UnsignedInteger,
};
- GLSLRegister(size_t index, const std::string& suffix) : index{index}, suffix{suffix} {}
+ GLSLRegister(std::size_t index, const std::string& suffix) : index{index}, suffix{suffix} {}
/// Gets the GLSL type string for a register
static std::string GetTypeString() {
@@ -226,15 +227,23 @@ public:
}
/// Returns the index of the register
- size_t GetIndex() const {
+ std::size_t GetIndex() const {
return index;
}
private:
- const size_t index;
+ const std::size_t index;
const std::string& suffix;
};
+enum class InternalFlag : u64 {
+ ZeroFlag = 0,
+ CarryFlag = 1,
+ OverflowFlag = 2,
+ NaNFlag = 3,
+ Amount
+};
+
/**
* Used to manage shader registers that are emulated with GLSL. This class keeps track of the state
* of all registers (e.g. whether they are currently being used as Floats or Integers), and
@@ -247,6 +256,7 @@ public:
const Maxwell3D::Regs::ShaderStage& stage, const std::string& suffix)
: shader{shader}, declarations{declarations}, stage{stage}, suffix{suffix} {
BuildRegisterList();
+ BuildInputList();
}
/**
@@ -327,13 +337,19 @@ public:
void SetRegisterToInteger(const Register& reg, bool is_signed, u64 elem,
const std::string& value, u64 dest_num_components,
u64 value_num_components, bool is_saturated = false,
- u64 dest_elem = 0, Register::Size size = Register::Size::Word) {
+ u64 dest_elem = 0, Register::Size size = Register::Size::Word,
+ bool sets_cc = false) {
ASSERT_MSG(!is_saturated, "Unimplemented");
const std::string func{is_signed ? "intBitsToFloat" : "uintBitsToFloat"};
SetRegister(reg, elem, func + '(' + ConvertIntegerSize(value, size) + ')',
dest_num_components, value_num_components, dest_elem);
+
+ if (sets_cc) {
+ const std::string zero_condition = "( " + ConvertIntegerSize(value, size) + " == 0 )";
+ SetInternalFlag(InternalFlag::ZeroFlag, zero_condition);
+ }
}
/**
@@ -343,12 +359,33 @@ public:
* @param elem The element to use for the operation.
* @param attribute The input attribute to use as the source value.
*/
- void SetRegisterToInputAttibute(const Register& reg, u64 elem, Attribute::Index attribute) {
- std::string dest = GetRegisterAsFloat(reg);
- std::string src = GetInputAttribute(attribute) + GetSwizzle(elem);
+ void SetRegisterToInputAttibute(const Register& reg, u64 elem, Attribute::Index attribute,
+ const Tegra::Shader::IpaMode& input_mode) {
+ const std::string dest = GetRegisterAsFloat(reg);
+ const std::string src = GetInputAttribute(attribute, input_mode) + GetSwizzle(elem);
shader.AddLine(dest + " = " + src + ';');
}
+ std::string GetControlCode(const Tegra::Shader::ControlCode cc) const {
+ switch (cc) {
+ case Tegra::Shader::ControlCode::NEU:
+ return "!(" + GetInternalFlag(InternalFlag::ZeroFlag) + ')';
+ default:
+ LOG_CRITICAL(HW_GPU, "Unimplemented Control Code {}", static_cast<u32>(cc));
+ UNREACHABLE();
+ return "false";
+ }
+ }
+
+ std::string GetInternalFlag(const InternalFlag ii) const {
+ const u32 code = static_cast<u32>(ii);
+ return "internalFlag_" + std::to_string(code) + suffix;
+ }
+
+ void SetInternalFlag(const InternalFlag ii, const std::string& value) const {
+ shader.AddLine(GetInternalFlag(ii) + " = " + value + ';');
+ }
+
/**
* Writes code that does a output attribute assignment to register operation. Output attributes
* are stored as floats, so this may require conversion.
@@ -357,8 +394,8 @@ public:
* @param reg The register to use as the source value.
*/
void SetOutputAttributeToRegister(Attribute::Index attribute, u64 elem, const Register& reg) {
- std::string dest = GetOutputAttribute(attribute);
- std::string src = GetRegisterAsFloat(reg);
+ const std::string dest = GetOutputAttribute(attribute);
+ const std::string src = GetRegisterAsFloat(reg);
if (!dest.empty()) {
// Can happen with unknown/unimplemented output attributes, in which case we ignore the
@@ -391,9 +428,9 @@ public:
GLSLRegister::Type type) {
declr_const_buffers[cbuf_index].MarkAsUsedIndirect(cbuf_index, stage);
- std::string final_offset = fmt::format("({} + {})", index_str, offset / 4);
- std::string value = 'c' + std::to_string(cbuf_index) + '[' + final_offset + " / 4][" +
- final_offset + " % 4]";
+ const std::string final_offset = fmt::format("({} + {})", index_str, offset / 4);
+ const std::string value = 'c' + std::to_string(cbuf_index) + '[' + final_offset + " / 4][" +
+ final_offset + " % 4]";
if (type == GLSLRegister::Type::Float) {
return value;
@@ -412,12 +449,19 @@ public:
}
declarations.AddNewLine();
- for (const auto& index : declr_input_attribute) {
+ for (u32 ii = 0; ii < static_cast<u64>(InternalFlag::Amount); ii++) {
+ const InternalFlag code = static_cast<InternalFlag>(ii);
+ declarations.AddLine("bool " + GetInternalFlag(code) + " = false;");
+ }
+ declarations.AddNewLine();
+
+ for (const auto element : declr_input_attribute) {
// TODO(bunnei): Use proper number of elements for these
- declarations.AddLine("layout(location = " +
- std::to_string(static_cast<u32>(index) -
- static_cast<u32>(Attribute::Index::Attribute_0)) +
- ") in vec4 " + GetInputAttribute(index) + ';');
+ u32 idx =
+ static_cast<u32>(element.first) - static_cast<u32>(Attribute::Index::Attribute_0);
+ declarations.AddLine("layout(location = " + std::to_string(idx) + ")" +
+ GetInputFlags(element.first) + "in vec4 " +
+ GetInputAttribute(element.first, element.second) + ';');
}
declarations.AddNewLine();
@@ -440,13 +484,12 @@ public:
}
declarations.AddNewLine();
- // Append the sampler2D array for the used textures.
- size_t num_samplers = GetSamplers().size();
- if (num_samplers > 0) {
- declarations.AddLine("uniform sampler2D " + SamplerEntry::GetArrayName(stage) + '[' +
- std::to_string(num_samplers) + "];");
- declarations.AddNewLine();
+ const auto& samplers = GetSamplers();
+ for (const auto& sampler : samplers) {
+ declarations.AddLine("uniform " + sampler.GetTypeString() + ' ' + sampler.GetName() +
+ ';');
}
+ declarations.AddNewLine();
}
/// Returns a list of constant buffer declarations
@@ -458,27 +501,29 @@ public:
}
/// Returns a list of samplers used in the shader
- std::vector<SamplerEntry> GetSamplers() const {
+ const std::vector<SamplerEntry>& GetSamplers() const {
return used_samplers;
}
/// Returns the GLSL sampler used for the input shader sampler, and creates a new one if
/// necessary.
- std::string AccessSampler(const Sampler& sampler) {
- size_t offset = static_cast<size_t>(sampler.index.Value());
+ std::string AccessSampler(const Sampler& sampler, Tegra::Shader::TextureType type,
+ bool is_array) {
+ const std::size_t offset = static_cast<std::size_t>(sampler.index.Value());
// If this sampler has already been used, return the existing mapping.
- auto itr =
+ const auto itr =
std::find_if(used_samplers.begin(), used_samplers.end(),
[&](const SamplerEntry& entry) { return entry.GetOffset() == offset; });
if (itr != used_samplers.end()) {
+ ASSERT(itr->GetType() == type && itr->IsArray() == is_array);
return itr->GetName();
}
// Otherwise create a new mapping for this sampler
- size_t next_index = used_samplers.size();
- SamplerEntry entry{stage, offset, next_index};
+ const std::size_t next_index = used_samplers.size();
+ const SamplerEntry entry{stage, offset, next_index, type, is_array};
used_samplers.emplace_back(entry);
return entry.GetName();
}
@@ -527,16 +572,29 @@ private:
void BuildRegisterList() {
regs.reserve(Register::NumRegisters);
- for (size_t index = 0; index < Register::NumRegisters; ++index) {
+ for (std::size_t index = 0; index < Register::NumRegisters; ++index) {
regs.emplace_back(index, suffix);
}
}
+ void BuildInputList() {
+ const u32 size = static_cast<u32>(Attribute::Index::Attribute_31) -
+ static_cast<u32>(Attribute::Index::Attribute_0) + 1;
+ declr_input_attribute.reserve(size);
+ }
+
/// Generates code representing an input attribute register.
- std::string GetInputAttribute(Attribute::Index attribute) {
+ std::string GetInputAttribute(Attribute::Index attribute,
+ const Tegra::Shader::IpaMode& input_mode) {
switch (attribute) {
case Attribute::Index::Position:
- return "position";
+ if (stage != Maxwell3D::Regs::ShaderStage::Fragment) {
+ return "position";
+ } else {
+ return "vec4(gl_FragCoord.x, gl_FragCoord.y, gl_FragCoord.z, 1.0)";
+ }
+ case Attribute::Index::PointCoord:
+ return "vec4(gl_PointCoord.x, gl_PointCoord.y, 0, 0)";
case Attribute::Index::TessCoordInstanceIDVertexID:
// TODO(Subv): Find out what the values are for the first two elements when inside a
// vertex shader, and what's the value of the fourth element when inside a Tess Eval
@@ -552,7 +610,14 @@ private:
static_cast<u32>(Attribute::Index::Attribute_0)};
if (attribute >= Attribute::Index::Attribute_0 &&
attribute <= Attribute::Index::Attribute_31) {
- declr_input_attribute.insert(attribute);
+ if (declr_input_attribute.count(attribute) == 0) {
+ declr_input_attribute[attribute] = input_mode;
+ } else {
+ if (declr_input_attribute[attribute] != input_mode) {
+ LOG_CRITICAL(HW_GPU, "Same Input multiple input modes");
+ UNREACHABLE();
+ }
+ }
return "input_attribute_" + std::to_string(index);
}
@@ -563,6 +628,49 @@ private:
return "vec4(0, 0, 0, 0)";
}
+ std::string GetInputFlags(const Attribute::Index attribute) {
+ const Tegra::Shader::IpaSampleMode sample_mode =
+ declr_input_attribute[attribute].sampling_mode;
+ const Tegra::Shader::IpaInterpMode interp_mode =
+ declr_input_attribute[attribute].interpolation_mode;
+ std::string out;
+ switch (interp_mode) {
+ case Tegra::Shader::IpaInterpMode::Flat: {
+ out += "flat ";
+ break;
+ }
+ case Tegra::Shader::IpaInterpMode::Linear: {
+ out += "noperspective ";
+ break;
+ }
+ case Tegra::Shader::IpaInterpMode::Perspective: {
+ // Default, Smooth
+ break;
+ }
+ default: {
+ LOG_CRITICAL(HW_GPU, "Unhandled Ipa InterpMode: {}", static_cast<u32>(interp_mode));
+ UNREACHABLE();
+ }
+ }
+ switch (sample_mode) {
+ case Tegra::Shader::IpaSampleMode::Centroid: {
+ // Note not implemented, it can be implemented with the "centroid " keyword in glsl;
+ LOG_CRITICAL(HW_GPU, "Ipa Sampler Mode: centroid, not implemented");
+ UNREACHABLE();
+ break;
+ }
+ case Tegra::Shader::IpaSampleMode::Default: {
+ // Default, n/a
+ break;
+ }
+ default: {
+ LOG_CRITICAL(HW_GPU, "Unhandled Ipa SampleMode: {}", static_cast<u32>(sample_mode));
+ UNREACHABLE();
+ }
+ }
+ return out;
+ }
+
/// Generates code representing an output attribute register.
std::string GetOutputAttribute(Attribute::Index attribute) {
switch (attribute) {
@@ -593,7 +701,7 @@ private:
ShaderWriter& shader;
ShaderWriter& declarations;
std::vector<GLSLRegister> regs;
- std::set<Attribute::Index> declr_input_attribute;
+ std::unordered_map<Attribute::Index, Tegra::Shader::IpaMode> declr_input_attribute;
std::set<Attribute::Index> declr_output_attribute;
std::array<ConstBufferEntry, Maxwell3D::Regs::MaxConstBuffers> declr_const_buffers;
std::vector<SamplerEntry> used_samplers;
@@ -607,7 +715,7 @@ public:
u32 main_offset, Maxwell3D::Regs::ShaderStage stage, const std::string& suffix)
: subroutines(subroutines), program_code(program_code), main_offset(main_offset),
stage(stage), suffix(suffix) {
-
+ std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header));
Generate(suffix);
}
@@ -621,26 +729,9 @@ public:
}
private:
- // Shader program header for a Fragment Shader.
- struct FragmentHeader {
- INSERT_PADDING_WORDS(5);
- INSERT_PADDING_WORDS(13);
- u32 enabled_color_outputs;
- union {
- BitField<0, 1, u32> writes_samplemask;
- BitField<1, 1, u32> writes_depth;
- };
-
- bool IsColorComponentOutputEnabled(u32 render_target, u32 component) const {
- u32 bit = render_target * 4 + component;
- return enabled_color_outputs & (1 << bit);
- }
- };
- static_assert(sizeof(FragmentHeader) == PROGRAM_HEADER_SIZE, "FragmentHeader size is wrong");
-
/// Gets the Subroutine object corresponding to the specified address.
const Subroutine& GetSubroutine(u32 begin, u32 end) const {
- auto iter = subroutines.find(Subroutine{begin, end, suffix});
+ const auto iter = subroutines.find(Subroutine{begin, end, suffix});
ASSERT(iter != subroutines.end());
return *iter;
}
@@ -656,8 +747,8 @@ private:
}
/// Generates code representing a texture sampler.
- std::string GetSampler(const Sampler& sampler) {
- return regs.AccessSampler(sampler);
+ std::string GetSampler(const Sampler& sampler, Tegra::Shader::TextureType type, bool is_array) {
+ return regs.AccessSampler(sampler, type, is_array);
}
/**
@@ -685,7 +776,7 @@ private:
// Can't assign to the constant predicate.
ASSERT(pred != static_cast<u64>(Pred::UnusedIndex));
- std::string variable = 'p' + std::to_string(pred) + '_' + suffix;
+ const std::string variable = 'p' + std::to_string(pred) + '_' + suffix;
shader.AddLine(variable + " = " + value + ';');
declr_predicates.insert(std::move(variable));
}
@@ -795,7 +886,7 @@ private:
*/
bool IsSchedInstruction(u32 offset) const {
// sched instructions appear once every 4 instructions.
- static constexpr size_t SchedPeriod = 4;
+ static constexpr std::size_t SchedPeriod = 4;
u32 absolute_offset = offset - main_offset;
return (absolute_offset % SchedPeriod) == 0;
@@ -863,7 +954,7 @@ private:
std::string result;
result += '(';
- for (size_t i = 0; i < shift_amounts.size(); ++i) {
+ for (std::size_t i = 0; i < shift_amounts.size(); ++i) {
if (i)
result += '|';
result += "(((" + imm_lut + " >> (((" + op_c + " >> " + shift_amounts[i] +
@@ -887,7 +978,7 @@ private:
// TEXS has two destination registers and a swizzle. The first two elements in the swizzle
// go into gpr0+0 and gpr0+1, and the rest goes into gpr28+0 and gpr28+1
- size_t written_components = 0;
+ std::size_t written_components = 0;
for (u32 component = 0; component < 4; ++component) {
if (!instr.texs.IsComponentEnabled(component)) {
continue;
@@ -941,10 +1032,8 @@ private:
/// Writes the output values from a fragment shader to the corresponding GLSL output variables.
void EmitFragmentOutputsWrite() {
ASSERT(stage == Maxwell3D::Regs::ShaderStage::Fragment);
- FragmentHeader header;
- std::memcpy(&header, program_code.data(), PROGRAM_HEADER_SIZE);
- ASSERT_MSG(header.writes_samplemask == 0, "Samplemask write is unimplemented");
+ ASSERT_MSG(header.ps.omap.sample_mask == 0, "Samplemask write is unimplemented");
// Write the color outputs using the data in the shader registers, disabled
// rendertargets/components are skipped in the register assignment.
@@ -953,18 +1042,22 @@ private:
++render_target) {
// TODO(Subv): Figure out how dual-source blending is configured in the Switch.
for (u32 component = 0; component < 4; ++component) {
- if (header.IsColorComponentOutputEnabled(render_target, component)) {
- shader.AddLine(fmt::format("color[{}][{}] = {};", render_target, component,
+ if (header.ps.IsColorComponentOutputEnabled(render_target, component)) {
+ shader.AddLine(fmt::format("FragColor{}[{}] = {};", render_target, component,
regs.GetRegisterAsFloat(current_reg)));
++current_reg;
}
}
}
- if (header.writes_depth) {
+ if (header.ps.omap.depth) {
// The depth output is always 2 registers after the last color output, and current_reg
// already contains one past the last color register.
- shader.AddLine("gl_FragDepth = " + regs.GetRegisterAsFloat(current_reg + 1) + ';');
+
+ shader.AddLine(
+ "gl_FragDepth = " +
+ regs.GetRegisterAsFloat(static_cast<Tegra::Shader::Register>(current_reg) + 1) +
+ ';');
}
}
@@ -1038,6 +1131,15 @@ private:
case OpCode::Id::FMUL_R:
case OpCode::Id::FMUL_IMM: {
// FMUL does not have 'abs' bits and only the second operand has a 'neg' bit.
+ ASSERT_MSG(instr.fmul.tab5cb8_2 == 0, "FMUL tab5cb8_2({}) is not implemented",
+ instr.fmul.tab5cb8_2.Value());
+ ASSERT_MSG(instr.fmul.tab5c68_1 == 0, "FMUL tab5cb8_1({}) is not implemented",
+ instr.fmul.tab5c68_1.Value());
+ ASSERT_MSG(instr.fmul.tab5c68_0 == 1, "FMUL tab5cb8_0({}) is not implemented",
+ instr.fmul.tab5c68_0
+ .Value()); // SMO typical sends 1 here which seems to be the default
+ ASSERT_MSG(instr.fmul.cc == 0, "FMUL cc is not implemented");
+
op_b = GetOperandAbsNeg(op_b, false, instr.fmul.negate_b);
regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " * " + op_b, 1, 1,
instr.alu.saturate_d);
@@ -1357,7 +1459,7 @@ private:
if (instr.alu_integer.negate_b)
op_b = "-(" + op_b + ')';
- std::string shift = std::to_string(instr.alu_integer.shift_amount.Value());
+ const std::string shift = std::to_string(instr.alu_integer.shift_amount.Value());
regs.SetRegisterToInteger(instr.gpr0, true, 0,
"((" + op_a + " << " + shift + ") + " + op_b + ')', 1, 1);
@@ -1375,7 +1477,7 @@ private:
case OpCode::Id::SEL_C:
case OpCode::Id::SEL_R:
case OpCode::Id::SEL_IMM: {
- std::string condition =
+ const std::string condition =
GetPredicateCondition(instr.sel.pred, instr.sel.neg_pred != 0);
regs.SetRegisterToInteger(instr.gpr0, true, 0,
'(' + condition + ") ? " + op_a + " : " + op_b, 1, 1);
@@ -1397,8 +1499,9 @@ private:
case OpCode::Id::LOP3_C:
case OpCode::Id::LOP3_R:
case OpCode::Id::LOP3_IMM: {
- std::string op_c = regs.GetRegisterAsInteger(instr.gpr39);
+ const std::string op_c = regs.GetRegisterAsInteger(instr.gpr39);
std::string lut;
+
if (opcode->GetId() == OpCode::Id::LOP3_R) {
lut = '(' + std::to_string(instr.alu.lop3.GetImmLut28()) + ')';
} else {
@@ -1413,15 +1516,80 @@ private:
case OpCode::Id::IMNMX_IMM: {
ASSERT_MSG(instr.imnmx.exchange == Tegra::Shader::IMinMaxExchange::None,
"Unimplemented");
- std::string condition =
+ const std::string condition =
GetPredicateCondition(instr.imnmx.pred, instr.imnmx.negate_pred != 0);
- std::string parameters = op_a + ',' + op_b;
+ const std::string parameters = op_a + ',' + op_b;
regs.SetRegisterToInteger(instr.gpr0, instr.imnmx.is_signed, 0,
'(' + condition + ") ? min(" + parameters + ") : max(" +
parameters + ')',
1, 1);
break;
}
+ case OpCode::Id::LEA_R2:
+ case OpCode::Id::LEA_R1:
+ case OpCode::Id::LEA_IMM:
+ case OpCode::Id::LEA_RZ:
+ case OpCode::Id::LEA_HI: {
+ std::string op_c;
+
+ switch (opcode->GetId()) {
+ case OpCode::Id::LEA_R2: {
+ op_a = regs.GetRegisterAsInteger(instr.gpr20);
+ op_b = regs.GetRegisterAsInteger(instr.gpr39);
+ op_c = std::to_string(instr.lea.r2.entry_a);
+ break;
+ }
+
+ case OpCode::Id::LEA_R1: {
+ const bool neg = instr.lea.r1.neg != 0;
+ op_a = regs.GetRegisterAsInteger(instr.gpr8);
+ if (neg)
+ op_a = "-(" + op_a + ')';
+ op_b = regs.GetRegisterAsInteger(instr.gpr20);
+ op_c = std::to_string(instr.lea.r1.entry_a);
+ break;
+ }
+
+ case OpCode::Id::LEA_IMM: {
+ const bool neg = instr.lea.imm.neg != 0;
+ op_b = regs.GetRegisterAsInteger(instr.gpr8);
+ if (neg)
+ op_b = "-(" + op_b + ')';
+ op_a = std::to_string(instr.lea.imm.entry_a);
+ op_c = std::to_string(instr.lea.imm.entry_b);
+ break;
+ }
+
+ case OpCode::Id::LEA_RZ: {
+ const bool neg = instr.lea.rz.neg != 0;
+ op_b = regs.GetRegisterAsInteger(instr.gpr8);
+ if (neg)
+ op_b = "-(" + op_b + ')';
+ op_a = regs.GetUniform(instr.lea.rz.cb_index, instr.lea.rz.cb_offset,
+ GLSLRegister::Type::Integer);
+ op_c = std::to_string(instr.lea.rz.entry_a);
+
+ break;
+ }
+
+ case OpCode::Id::LEA_HI:
+ default: {
+ op_b = regs.GetRegisterAsInteger(instr.gpr8);
+ op_a = std::to_string(instr.lea.imm.entry_a);
+ op_c = std::to_string(instr.lea.imm.entry_b);
+ LOG_CRITICAL(HW_GPU, "Unhandled LEA subinstruction: {}", opcode->GetName());
+ UNREACHABLE();
+ }
+ }
+ if (instr.lea.pred48 != static_cast<u64>(Pred::UnusedIndex)) {
+ LOG_ERROR(HW_GPU, "Unhandled LEA Predicate");
+ UNREACHABLE();
+ }
+ const std::string value = '(' + op_a + " + (" + op_b + "*(1 << " + op_c + ")))";
+ regs.SetRegisterToInteger(instr.gpr0, true, 0, value, 1, 1);
+
+ break;
+ }
default: {
LOG_CRITICAL(HW_GPU, "Unhandled ArithmeticInteger instruction: {}",
opcode->GetName());
@@ -1432,10 +1600,16 @@ private:
break;
}
case OpCode::Type::Ffma: {
- std::string op_a = regs.GetRegisterAsFloat(instr.gpr8);
+ const std::string op_a = regs.GetRegisterAsFloat(instr.gpr8);
std::string op_b = instr.ffma.negate_b ? "-" : "";
std::string op_c = instr.ffma.negate_c ? "-" : "";
+ ASSERT_MSG(instr.ffma.cc == 0, "FFMA cc not implemented");
+ ASSERT_MSG(instr.ffma.tab5980_0 == 1, "FFMA tab5980_0({}) not implemented",
+ instr.ffma.tab5980_0.Value()); // Seems to be 1 by default based on SMO
+ ASSERT_MSG(instr.ffma.tab5980_1 == 0, "FFMA tab5980_1({}) not implemented",
+ instr.ffma.tab5980_1.Value());
+
switch (opcode->GetId()) {
case OpCode::Id::FFMA_CR: {
op_b += regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset,
@@ -1486,7 +1660,8 @@ private:
}
regs.SetRegisterToInteger(instr.gpr0, instr.conversion.is_output_signed, 0, op_a, 1,
- 1, instr.alu.saturate_d, 0, instr.conversion.dest_size);
+ 1, instr.alu.saturate_d, 0, instr.conversion.dest_size,
+ instr.generates_cc.Value() != 0);
break;
}
case OpCode::Id::I2F_R:
@@ -1616,9 +1791,34 @@ private:
case OpCode::Type::Memory: {
switch (opcode->GetId()) {
case OpCode::Id::LD_A: {
- ASSERT_MSG(instr.attribute.fmt20.size == 0, "untested");
- regs.SetRegisterToInputAttibute(instr.gpr0, instr.attribute.fmt20.element,
- instr.attribute.fmt20.index);
+ // Note: Shouldn't this be interp mode flat? As in no interpolation made.
+ ASSERT_MSG(instr.gpr8.Value() == Register::ZeroIndex,
+ "Indirect attribute loads are not supported");
+ ASSERT_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) == 0,
+ "Unaligned attribute loads are not supported");
+
+ Tegra::Shader::IpaMode input_mode{Tegra::Shader::IpaInterpMode::Perspective,
+ Tegra::Shader::IpaSampleMode::Default};
+
+ u64 next_element = instr.attribute.fmt20.element;
+ u64 next_index = static_cast<u64>(instr.attribute.fmt20.index.Value());
+
+ const auto LoadNextElement = [&](u32 reg_offset) {
+ regs.SetRegisterToInputAttibute(instr.gpr0.Value() + reg_offset, next_element,
+ static_cast<Attribute::Index>(next_index),
+ input_mode);
+
+ // Load the next attribute element into the following register. If the element
+ // to load goes beyond the vec4 size, load the first element of the next
+ // attribute.
+ next_element = (next_element + 1) % 4;
+ next_index = next_index + (next_element == 0 ? 1 : 0);
+ };
+
+ const u32 num_words = static_cast<u32>(instr.attribute.fmt20.size.Value()) + 1;
+ for (u32 reg_offset = 0; reg_offset < num_words; ++reg_offset) {
+ LoadNextElement(reg_offset);
+ }
break;
}
case OpCode::Id::LD_C: {
@@ -1632,7 +1832,7 @@ private:
shader.AddLine("uint index = (" + regs.GetRegisterAsInteger(instr.gpr8, 0, false) +
" / 4) & (MAX_CONSTBUFFER_ELEMENTS - 1);");
- std::string op_a =
+ const std::string op_a =
regs.GetUniformIndirect(instr.cbuf36.index, instr.cbuf36.offset + 0, "index",
GLSLRegister::Type::Float);
@@ -1642,7 +1842,7 @@ private:
break;
case Tegra::Shader::UniformType::Double: {
- std::string op_b =
+ const std::string op_b =
regs.GetUniformIndirect(instr.cbuf36.index, instr.cbuf36.offset + 4,
"index", GLSLRegister::Type::Float);
regs.SetRegisterToFloat(instr.gpr0, 0, op_a, 1, 1);
@@ -1660,25 +1860,111 @@ private:
break;
}
case OpCode::Id::ST_A: {
- ASSERT_MSG(instr.attribute.fmt20.size == 0, "untested");
- regs.SetOutputAttributeToRegister(instr.attribute.fmt20.index,
- instr.attribute.fmt20.element, instr.gpr0);
+ ASSERT_MSG(instr.gpr8.Value() == Register::ZeroIndex,
+ "Indirect attribute loads are not supported");
+ ASSERT_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) == 0,
+ "Unaligned attribute loads are not supported");
+
+ u64 next_element = instr.attribute.fmt20.element;
+ u64 next_index = static_cast<u64>(instr.attribute.fmt20.index.Value());
+
+ const auto StoreNextElement = [&](u32 reg_offset) {
+ regs.SetOutputAttributeToRegister(static_cast<Attribute::Index>(next_index),
+ next_element,
+ instr.gpr0.Value() + reg_offset);
+
+ // Load the next attribute element into the following register. If the element
+ // to load goes beyond the vec4 size, load the first element of the next
+ // attribute.
+ next_element = (next_element + 1) % 4;
+ next_index = next_index + (next_element == 0 ? 1 : 0);
+ };
+
+ const u32 num_words = static_cast<u32>(instr.attribute.fmt20.size.Value()) + 1;
+ for (u32 reg_offset = 0; reg_offset < num_words; ++reg_offset) {
+ StoreNextElement(reg_offset);
+ }
+
break;
}
case OpCode::Id::TEX: {
- const std::string op_a = regs.GetRegisterAsFloat(instr.gpr8);
- const std::string op_b = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
- const std::string sampler = GetSampler(instr.sampler);
- const std::string coord = "vec2 coords = vec2(" + op_a + ", " + op_b + ");";
+ ASSERT_MSG(instr.tex.array == 0, "TEX arrays unimplemented");
+ Tegra::Shader::TextureType texture_type{instr.tex.texture_type};
+ std::string coord;
+
+ ASSERT_MSG(!instr.tex.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP),
+ "NODEP is not implemented");
+ ASSERT_MSG(!instr.tex.UsesMiscMode(Tegra::Shader::TextureMiscMode::AOFFI),
+ "AOFFI is not implemented");
+ ASSERT_MSG(!instr.tex.UsesMiscMode(Tegra::Shader::TextureMiscMode::DC),
+ "DC is not implemented");
+
+ switch (texture_type) {
+ case Tegra::Shader::TextureType::Texture1D: {
+ const std::string x = regs.GetRegisterAsFloat(instr.gpr8);
+ coord = "float coords = " + x + ';';
+ break;
+ }
+ case Tegra::Shader::TextureType::Texture2D: {
+ const std::string x = regs.GetRegisterAsFloat(instr.gpr8);
+ const std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
+ coord = "vec2 coords = vec2(" + x + ", " + y + ");";
+ break;
+ }
+ default:
+ LOG_CRITICAL(HW_GPU, "Unhandled texture type {}",
+ static_cast<u32>(texture_type));
+ UNREACHABLE();
+
+ // Fallback to interpreting as a 2D texture for now
+ const std::string x = regs.GetRegisterAsFloat(instr.gpr8);
+ const std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
+ coord = "vec2 coords = vec2(" + x + ", " + y + ");";
+ texture_type = Tegra::Shader::TextureType::Texture2D;
+ }
+ // TODO: make sure coordinates are always indexed to gpr8 and gpr20 is always bias
+ // or lod.
+ const std::string op_c = regs.GetRegisterAsFloat(instr.gpr20);
+
+ const std::string sampler = GetSampler(instr.sampler, texture_type, false);
// Add an extra scope and declare the texture coords inside to prevent
// overwriting them in case they are used as outputs of the texs instruction.
+
shader.AddLine("{");
++shader.scope;
shader.AddLine(coord);
- const std::string texture = "texture(" + sampler + ", coords)";
+ std::string texture;
- size_t dest_elem{};
- for (size_t elem = 0; elem < 4; ++elem) {
+ switch (instr.tex.process_mode) {
+ case Tegra::Shader::TextureProcessMode::None: {
+ texture = "texture(" + sampler + ", coords)";
+ break;
+ }
+ case Tegra::Shader::TextureProcessMode::LZ: {
+ texture = "textureLod(" + sampler + ", coords, 0.0)";
+ break;
+ }
+ case Tegra::Shader::TextureProcessMode::LB:
+ case Tegra::Shader::TextureProcessMode::LBA: {
+ // TODO: Figure if A suffix changes the equation at all.
+ texture = "texture(" + sampler + ", coords, " + op_c + ')';
+ break;
+ }
+ case Tegra::Shader::TextureProcessMode::LL:
+ case Tegra::Shader::TextureProcessMode::LLA: {
+ // TODO: Figure if A suffix changes the equation at all.
+ texture = "textureLod(" + sampler + ", coords, " + op_c + ')';
+ break;
+ }
+ default: {
+ texture = "texture(" + sampler + ", coords)";
+ LOG_CRITICAL(HW_GPU, "Unhandled texture process mode {}",
+ static_cast<u32>(instr.tex.process_mode.Value()));
+ UNREACHABLE();
+ }
+ }
+ std::size_t dest_elem{};
+ for (std::size_t elem = 0; elem < 4; ++elem) {
if (!instr.tex.IsComponentEnabled(elem)) {
// Skip disabled components
continue;
@@ -1691,20 +1977,77 @@ private:
break;
}
case OpCode::Id::TEXS: {
- const std::string op_a = regs.GetRegisterAsFloat(instr.gpr8);
- const std::string op_b = regs.GetRegisterAsFloat(instr.gpr20);
- const std::string sampler = GetSampler(instr.sampler);
- const std::string coord = "vec2 coords = vec2(" + op_a + ", " + op_b + ");";
+ std::string coord;
+ Tegra::Shader::TextureType texture_type{instr.texs.GetTextureType()};
+ bool is_array{instr.texs.IsArrayTexture()};
+
+ ASSERT_MSG(!instr.texs.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP),
+ "NODEP is not implemented");
+ ASSERT_MSG(!instr.texs.UsesMiscMode(Tegra::Shader::TextureMiscMode::DC),
+ "DC is not implemented");
+
+ switch (texture_type) {
+ case Tegra::Shader::TextureType::Texture2D: {
+ if (is_array) {
+ const std::string index = regs.GetRegisterAsInteger(instr.gpr8);
+ const std::string x = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
+ const std::string y = regs.GetRegisterAsFloat(instr.gpr20);
+ coord = "vec3 coords = vec3(" + x + ", " + y + ", " + index + ");";
+ } else {
+ const std::string x = regs.GetRegisterAsFloat(instr.gpr8);
+ const std::string y = regs.GetRegisterAsFloat(instr.gpr20);
+ coord = "vec2 coords = vec2(" + x + ", " + y + ");";
+ }
+ break;
+ }
+ default:
+ LOG_CRITICAL(HW_GPU, "Unhandled texture type {}",
+ static_cast<u32>(texture_type));
+ UNREACHABLE();
+ // Fallback to interpreting as a 2D texture for now
+ const std::string x = regs.GetRegisterAsFloat(instr.gpr8);
+ const std::string y = regs.GetRegisterAsFloat(instr.gpr20);
+ coord = "vec2 coords = vec2(" + x + ", " + y + ");";
+ texture_type = Tegra::Shader::TextureType::Texture2D;
+ is_array = false;
+ }
+ const std::string sampler = GetSampler(instr.sampler, texture_type, is_array);
const std::string texture = "texture(" + sampler + ", coords)";
WriteTexsInstruction(instr, coord, texture);
break;
}
case OpCode::Id::TLDS: {
- const std::string op_a = regs.GetRegisterAsInteger(instr.gpr8);
- const std::string op_b = regs.GetRegisterAsInteger(instr.gpr20);
- const std::string sampler = GetSampler(instr.sampler);
- const std::string coord = "ivec2 coords = ivec2(" + op_a + ", " + op_b + ");";
+ ASSERT(instr.tlds.GetTextureType() == Tegra::Shader::TextureType::Texture2D);
+ ASSERT(instr.tlds.IsArrayTexture() == false);
+ std::string coord;
+
+ ASSERT_MSG(!instr.tlds.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP),
+ "NODEP is not implemented");
+ ASSERT_MSG(!instr.tlds.UsesMiscMode(Tegra::Shader::TextureMiscMode::AOFFI),
+ "AOFFI is not implemented");
+ ASSERT_MSG(!instr.tlds.UsesMiscMode(Tegra::Shader::TextureMiscMode::MZ),
+ "MZ is not implemented");
+
+ switch (instr.tlds.GetTextureType()) {
+ case Tegra::Shader::TextureType::Texture2D: {
+ if (instr.tlds.IsArrayTexture()) {
+ LOG_CRITICAL(HW_GPU, "Unhandled 2d array texture");
+ UNREACHABLE();
+ } else {
+ const std::string x = regs.GetRegisterAsInteger(instr.gpr8);
+ const std::string y = regs.GetRegisterAsInteger(instr.gpr20);
+ coord = "ivec2 coords = ivec2(" + x + ", " + y + ");";
+ }
+ break;
+ }
+ default:
+ LOG_CRITICAL(HW_GPU, "Unhandled texture type {}",
+ static_cast<u32>(instr.tlds.GetTextureType()));
+ UNREACHABLE();
+ }
+ const std::string sampler = GetSampler(instr.sampler, instr.tlds.GetTextureType(),
+ instr.tlds.IsArrayTexture());
const std::string texture = "texelFetch(" + sampler + ", coords, 0)";
WriteTexsInstruction(instr, coord, texture);
break;
@@ -1712,12 +2055,23 @@ private:
case OpCode::Id::TLD4: {
ASSERT(instr.tld4.texture_type == Tegra::Shader::TextureType::Texture2D);
ASSERT(instr.tld4.array == 0);
- std::string coord{};
+ std::string coord;
+
+ ASSERT_MSG(!instr.tld4.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP),
+ "NODEP is not implemented");
+ ASSERT_MSG(!instr.tld4.UsesMiscMode(Tegra::Shader::TextureMiscMode::AOFFI),
+ "AOFFI is not implemented");
+ ASSERT_MSG(!instr.tld4.UsesMiscMode(Tegra::Shader::TextureMiscMode::DC),
+ "DC is not implemented");
+ ASSERT_MSG(!instr.tld4.UsesMiscMode(Tegra::Shader::TextureMiscMode::NDV),
+ "NDV is not implemented");
+ ASSERT_MSG(!instr.tld4.UsesMiscMode(Tegra::Shader::TextureMiscMode::PTP),
+ "PTP is not implemented");
switch (instr.tld4.texture_type) {
case Tegra::Shader::TextureType::Texture2D: {
- std::string x = regs.GetRegisterAsFloat(instr.gpr8);
- std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
+ const std::string x = regs.GetRegisterAsFloat(instr.gpr8);
+ const std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
coord = "vec2 coords = vec2(" + x + ", " + y + ");";
break;
}
@@ -1727,7 +2081,8 @@ private:
UNREACHABLE();
}
- const std::string sampler = GetSampler(instr.sampler);
+ const std::string sampler =
+ GetSampler(instr.sampler, instr.tld4.texture_type, false);
// Add an extra scope and declare the texture coords inside to prevent
// overwriting them in case they are used as outputs of the texs instruction.
shader.AddLine("{");
@@ -1736,8 +2091,8 @@ private:
const std::string texture = "textureGather(" + sampler + ", coords, " +
std::to_string(instr.tld4.component) + ')';
- size_t dest_elem{};
- for (size_t elem = 0; elem < 4; ++elem) {
+ std::size_t dest_elem{};
+ for (std::size_t elem = 0; elem < 4; ++elem) {
if (!instr.tex.IsComponentEnabled(elem)) {
// Skip disabled components
continue;
@@ -1750,16 +2105,100 @@ private:
break;
}
case OpCode::Id::TLD4S: {
+ ASSERT_MSG(!instr.tld4s.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP),
+ "NODEP is not implemented");
+ ASSERT_MSG(!instr.tld4s.UsesMiscMode(Tegra::Shader::TextureMiscMode::AOFFI),
+ "AOFFI is not implemented");
+ ASSERT_MSG(!instr.tld4s.UsesMiscMode(Tegra::Shader::TextureMiscMode::DC),
+ "DC is not implemented");
+
const std::string op_a = regs.GetRegisterAsFloat(instr.gpr8);
const std::string op_b = regs.GetRegisterAsFloat(instr.gpr20);
// TODO(Subv): Figure out how the sampler type is encoded in the TLD4S instruction.
- const std::string sampler = GetSampler(instr.sampler);
+ const std::string sampler =
+ GetSampler(instr.sampler, Tegra::Shader::TextureType::Texture2D, false);
const std::string coord = "vec2 coords = vec2(" + op_a + ", " + op_b + ");";
const std::string texture = "textureGather(" + sampler + ", coords, " +
std::to_string(instr.tld4s.component) + ')';
WriteTexsInstruction(instr, coord, texture);
break;
}
+ case OpCode::Id::TXQ: {
+ ASSERT_MSG(!instr.txq.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP),
+ "NODEP is not implemented");
+
+ // TODO: the new commits on the texture refactor, change the way samplers work.
+ // Sadly, not all texture instructions specify the type of texture their sampler
+ // uses. This must be fixed at a later instance.
+ const std::string sampler =
+ GetSampler(instr.sampler, Tegra::Shader::TextureType::Texture2D, false);
+ switch (instr.txq.query_type) {
+ case Tegra::Shader::TextureQueryType::Dimension: {
+ const std::string texture = "textureQueryLevels(" + sampler + ')';
+ regs.SetRegisterToInteger(instr.gpr0, true, 0, texture, 1, 1);
+ break;
+ }
+ default: {
+ LOG_CRITICAL(HW_GPU, "Unhandled texture query type: {}",
+ static_cast<u32>(instr.txq.query_type.Value()));
+ UNREACHABLE();
+ }
+ }
+ break;
+ }
+ case OpCode::Id::TMML: {
+ ASSERT_MSG(!instr.tmml.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP),
+ "NODEP is not implemented");
+ ASSERT_MSG(!instr.tmml.UsesMiscMode(Tegra::Shader::TextureMiscMode::NDV),
+ "NDV is not implemented");
+
+ const std::string op_a = regs.GetRegisterAsFloat(instr.gpr8);
+ const std::string op_b = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
+ const bool is_array = instr.tmml.array != 0;
+ auto texture_type = instr.tmml.texture_type.Value();
+ const std::string sampler = GetSampler(instr.sampler, texture_type, is_array);
+
+ // TODO: add coordinates for different samplers once other texture types are
+ // implemented.
+ std::string coord;
+ switch (texture_type) {
+ case Tegra::Shader::TextureType::Texture1D: {
+ std::string x = regs.GetRegisterAsFloat(instr.gpr8);
+ coord = "float coords = " + x + ';';
+ break;
+ }
+ case Tegra::Shader::TextureType::Texture2D: {
+ std::string x = regs.GetRegisterAsFloat(instr.gpr8);
+ std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
+ coord = "vec2 coords = vec2(" + x + ", " + y + ");";
+ break;
+ }
+ default:
+ LOG_CRITICAL(HW_GPU, "Unhandled texture type {}",
+ static_cast<u32>(texture_type));
+ UNREACHABLE();
+
+ // Fallback to interpreting as a 2D texture for now
+ std::string x = regs.GetRegisterAsFloat(instr.gpr8);
+ std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
+ coord = "vec2 coords = vec2(" + x + ", " + y + ");";
+ texture_type = Tegra::Shader::TextureType::Texture2D;
+ }
+ // Add an extra scope and declare the texture coords inside to prevent
+ // overwriting them in case they are used as outputs of the texs instruction.
+ shader.AddLine('{');
+ ++shader.scope;
+ shader.AddLine(coord);
+ const std::string texture = "textureQueryLod(" + sampler + ", coords)";
+ const std::string tmp = "vec2 tmp = " + texture + "*vec2(256.0, 256.0);";
+ shader.AddLine(tmp);
+
+ regs.SetRegisterToInteger(instr.gpr0, true, 0, "int(tmp.y)", 1, 1);
+ regs.SetRegisterToInteger(instr.gpr0.Value() + 1, false, 0, "uint(tmp.x)", 1, 1);
+ --shader.scope;
+ shader.AddLine('}');
+ break;
+ }
default: {
LOG_CRITICAL(HW_GPU, "Unhandled memory instruction: {}", opcode->GetName());
UNREACHABLE();
@@ -1799,12 +2238,12 @@ private:
// We can't use the constant predicate as destination.
ASSERT(instr.fsetp.pred3 != static_cast<u64>(Pred::UnusedIndex));
- std::string second_pred =
+ const std::string second_pred =
GetPredicateCondition(instr.fsetp.pred39, instr.fsetp.neg_pred != 0);
- std::string combiner = GetPredicateCombiner(instr.fsetp.op);
+ const std::string combiner = GetPredicateCombiner(instr.fsetp.op);
- std::string predicate = GetPredicateComparison(instr.fsetp.cond, op_a, op_b);
+ const std::string predicate = GetPredicateComparison(instr.fsetp.cond, op_a, op_b);
// Set the primary predicate to the result of Predicate OP SecondPredicate
SetPredicate(instr.fsetp.pred3,
'(' + predicate + ") " + combiner + " (" + second_pred + ')');
@@ -1818,7 +2257,8 @@ private:
break;
}
case OpCode::Type::IntegerSetPredicate: {
- std::string op_a = regs.GetRegisterAsInteger(instr.gpr8, 0, instr.isetp.is_signed);
+ const std::string op_a =
+ regs.GetRegisterAsInteger(instr.gpr8, 0, instr.isetp.is_signed);
std::string op_b;
if (instr.is_b_imm) {
@@ -1835,12 +2275,12 @@ private:
// We can't use the constant predicate as destination.
ASSERT(instr.isetp.pred3 != static_cast<u64>(Pred::UnusedIndex));
- std::string second_pred =
+ const std::string second_pred =
GetPredicateCondition(instr.isetp.pred39, instr.isetp.neg_pred != 0);
- std::string combiner = GetPredicateCombiner(instr.isetp.op);
+ const std::string combiner = GetPredicateCombiner(instr.isetp.op);
- std::string predicate = GetPredicateComparison(instr.isetp.cond, op_a, op_b);
+ const std::string predicate = GetPredicateComparison(instr.isetp.cond, op_a, op_b);
// Set the primary predicate to the result of Predicate OP SecondPredicate
SetPredicate(instr.isetp.pred3,
'(' + predicate + ") " + combiner + " (" + second_pred + ')');
@@ -1853,32 +2293,80 @@ private:
}
break;
}
+ case OpCode::Type::PredicateSetRegister: {
+ const std::string op_a =
+ GetPredicateCondition(instr.pset.pred12, instr.pset.neg_pred12 != 0);
+ const std::string op_b =
+ GetPredicateCondition(instr.pset.pred29, instr.pset.neg_pred29 != 0);
+
+ const std::string second_pred =
+ GetPredicateCondition(instr.pset.pred39, instr.pset.neg_pred39 != 0);
+
+ const std::string combiner = GetPredicateCombiner(instr.pset.op);
+
+ const std::string predicate =
+ '(' + op_a + ") " + GetPredicateCombiner(instr.pset.cond) + " (" + op_b + ')';
+ const std::string result = '(' + predicate + ") " + combiner + " (" + second_pred + ')';
+ if (instr.pset.bf == 0) {
+ const std::string value = '(' + result + ") ? 0xFFFFFFFF : 0";
+ regs.SetRegisterToInteger(instr.gpr0, false, 0, value, 1, 1);
+ } else {
+ const std::string value = '(' + result + ") ? 1.0 : 0.0";
+ regs.SetRegisterToFloat(instr.gpr0, 0, value, 1, 1);
+ }
+
+ break;
+ }
case OpCode::Type::PredicateSetPredicate: {
- std::string op_a =
- GetPredicateCondition(instr.psetp.pred12, instr.psetp.neg_pred12 != 0);
- std::string op_b =
- GetPredicateCondition(instr.psetp.pred29, instr.psetp.neg_pred29 != 0);
+ switch (opcode->GetId()) {
+ case OpCode::Id::PSETP: {
+ const std::string op_a =
+ GetPredicateCondition(instr.psetp.pred12, instr.psetp.neg_pred12 != 0);
+ const std::string op_b =
+ GetPredicateCondition(instr.psetp.pred29, instr.psetp.neg_pred29 != 0);
- // We can't use the constant predicate as destination.
- ASSERT(instr.psetp.pred3 != static_cast<u64>(Pred::UnusedIndex));
+ // We can't use the constant predicate as destination.
+ ASSERT(instr.psetp.pred3 != static_cast<u64>(Pred::UnusedIndex));
- std::string second_pred =
- GetPredicateCondition(instr.psetp.pred39, instr.psetp.neg_pred39 != 0);
+ const std::string second_pred =
+ GetPredicateCondition(instr.psetp.pred39, instr.psetp.neg_pred39 != 0);
- std::string combiner = GetPredicateCombiner(instr.psetp.op);
+ const std::string combiner = GetPredicateCombiner(instr.psetp.op);
- std::string predicate =
- '(' + op_a + ") " + GetPredicateCombiner(instr.psetp.cond) + " (" + op_b + ')';
+ const std::string predicate =
+ '(' + op_a + ") " + GetPredicateCombiner(instr.psetp.cond) + " (" + op_b + ')';
- // Set the primary predicate to the result of Predicate OP SecondPredicate
- SetPredicate(instr.psetp.pred3,
- '(' + predicate + ") " + combiner + " (" + second_pred + ')');
+ // Set the primary predicate to the result of Predicate OP SecondPredicate
+ SetPredicate(instr.psetp.pred3,
+ '(' + predicate + ") " + combiner + " (" + second_pred + ')');
- if (instr.psetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) {
- // Set the secondary predicate to the result of !Predicate OP SecondPredicate,
- // if enabled
- SetPredicate(instr.psetp.pred0,
- "!(" + predicate + ") " + combiner + " (" + second_pred + ')');
+ if (instr.psetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) {
+ // Set the secondary predicate to the result of !Predicate OP SecondPredicate,
+ // if enabled
+ SetPredicate(instr.psetp.pred0,
+ "!(" + predicate + ") " + combiner + " (" + second_pred + ')');
+ }
+ break;
+ }
+ case OpCode::Id::CSETP: {
+ const std::string pred =
+ GetPredicateCondition(instr.csetp.pred39, instr.csetp.neg_pred39 != 0);
+ const std::string combiner = GetPredicateCombiner(instr.csetp.op);
+ const std::string controlCode = regs.GetControlCode(instr.csetp.cc);
+ if (instr.csetp.pred3 != static_cast<u64>(Pred::UnusedIndex)) {
+ SetPredicate(instr.csetp.pred3,
+ '(' + controlCode + ") " + combiner + " (" + pred + ')');
+ }
+ if (instr.csetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) {
+ SetPredicate(instr.csetp.pred0,
+ "!(" + controlCode + ") " + combiner + " (" + pred + ')');
+ }
+ break;
+ }
+ default: {
+ LOG_CRITICAL(HW_GPU, "Unhandled predicate instruction: {}", opcode->GetName());
+ UNREACHABLE();
+ }
}
break;
}
@@ -1893,7 +2381,7 @@ private:
std::string op_b = instr.fset.neg_b ? "-" : "";
if (instr.is_b_imm) {
- std::string imm = GetImmediate19(instr);
+ const std::string imm = GetImmediate19(instr);
if (instr.fset.neg_imm)
op_b += "(-" + imm + ')';
else
@@ -1913,13 +2401,14 @@ private:
// The fset instruction sets a register to 1.0 or -1 (depending on the bf bit) if the
// condition is true, and to 0 otherwise.
- std::string second_pred =
+ const std::string second_pred =
GetPredicateCondition(instr.fset.pred39, instr.fset.neg_pred != 0);
- std::string combiner = GetPredicateCombiner(instr.fset.op);
+ const std::string combiner = GetPredicateCombiner(instr.fset.op);
- std::string predicate = "((" + GetPredicateComparison(instr.fset.cond, op_a, op_b) +
- ") " + combiner + " (" + second_pred + "))";
+ const std::string predicate = "((" +
+ GetPredicateComparison(instr.fset.cond, op_a, op_b) +
+ ") " + combiner + " (" + second_pred + "))";
if (instr.fset.bf) {
regs.SetRegisterToFloat(instr.gpr0, 0, predicate + " ? 1.0 : 0.0", 1, 1);
@@ -1930,7 +2419,7 @@ private:
break;
}
case OpCode::Type::IntegerSet: {
- std::string op_a = regs.GetRegisterAsInteger(instr.gpr8, 0, instr.iset.is_signed);
+ const std::string op_a = regs.GetRegisterAsInteger(instr.gpr8, 0, instr.iset.is_signed);
std::string op_b;
@@ -1947,13 +2436,14 @@ private:
// The iset instruction sets a register to 1.0 or -1 (depending on the bf bit) if the
// condition is true, and to 0 otherwise.
- std::string second_pred =
+ const std::string second_pred =
GetPredicateCondition(instr.iset.pred39, instr.iset.neg_pred != 0);
- std::string combiner = GetPredicateCombiner(instr.iset.op);
+ const std::string combiner = GetPredicateCombiner(instr.iset.op);
- std::string predicate = "((" + GetPredicateComparison(instr.iset.cond, op_a, op_b) +
- ") " + combiner + " (" + second_pred + "))";
+ const std::string predicate = "((" +
+ GetPredicateComparison(instr.iset.cond, op_a, op_b) +
+ ") " + combiner + " (" + second_pred + "))";
if (instr.iset.bf) {
regs.SetRegisterToFloat(instr.gpr0, 0, predicate + " ? 1.0 : 0.0", 1, 1);
@@ -2103,45 +2593,22 @@ private:
case OpCode::Id::BRA: {
ASSERT_MSG(instr.bra.constant_buffer == 0,
"BRA with constant buffers are not implemented");
- u32 target = offset + instr.bra.GetBranchTarget();
+ const u32 target = offset + instr.bra.GetBranchTarget();
shader.AddLine("{ jmp_to = " + std::to_string(target) + "u; break; }");
break;
}
case OpCode::Id::IPA: {
const auto& attribute = instr.attribute.fmt28;
const auto& reg = instr.gpr0;
- switch (instr.ipa.mode) {
- case Tegra::Shader::IpaMode::Pass:
- if (stage == Maxwell3D::Regs::ShaderStage::Fragment &&
- attribute.index == Attribute::Index::Position) {
- switch (attribute.element) {
- case 0:
- shader.AddLine(regs.GetRegisterAsFloat(reg) + " = gl_FragCoord.x;");
- break;
- case 1:
- shader.AddLine(regs.GetRegisterAsFloat(reg) + " = gl_FragCoord.y;");
- break;
- case 2:
- shader.AddLine(regs.GetRegisterAsFloat(reg) + " = gl_FragCoord.z;");
- break;
- case 3:
- shader.AddLine(regs.GetRegisterAsFloat(reg) + " = 1.0;");
- break;
- }
- } else {
- regs.SetRegisterToInputAttibute(reg, attribute.element, attribute.index);
- }
- break;
- case Tegra::Shader::IpaMode::None:
- regs.SetRegisterToInputAttibute(reg, attribute.element, attribute.index);
- break;
- default:
- LOG_CRITICAL(HW_GPU, "Unhandled IPA mode: {}",
- static_cast<u32>(instr.ipa.mode.Value()));
- UNREACHABLE();
- regs.SetRegisterToInputAttibute(reg, attribute.element, attribute.index);
- }
+ Tegra::Shader::IpaMode input_mode{instr.ipa.interp_mode.Value(),
+ instr.ipa.sample_mode.Value()};
+ regs.SetRegisterToInputAttibute(reg, attribute.element, attribute.index,
+ input_mode);
+
+ if (instr.ipa.saturate) {
+ regs.SetRegisterToFloat(reg, 0, regs.GetRegisterAsFloat(reg), 1, 1, true);
+ }
break;
}
case OpCode::Id::SSY: {
@@ -2150,7 +2617,7 @@ private:
// has a similar structure to the BRA opcode.
ASSERT_MSG(instr.bra.constant_buffer == 0, "Constant buffer SSY is not supported");
- u32 target = offset + instr.bra.GetBranchTarget();
+ const u32 target = offset + instr.bra.GetBranchTarget();
EmitPushToSSYStack(target);
break;
}
@@ -2244,10 +2711,10 @@ private:
shader.AddLine("case " + std::to_string(label) + "u: {");
++shader.scope;
- auto next_it = labels.lower_bound(label + 1);
- u32 next_label = next_it == labels.end() ? subroutine.end : *next_it;
+ const auto next_it = labels.lower_bound(label + 1);
+ const u32 next_label = next_it == labels.end() ? subroutine.end : *next_it;
- u32 compile_end = CompileRange(label, next_label);
+ const u32 compile_end = CompileRange(label, next_label);
if (compile_end > next_label && compile_end != PROGRAM_END) {
// This happens only when there is a label inside a IF/LOOP block
shader.AddLine(" jmp_to = " + std::to_string(compile_end) + "u; break; }");
@@ -2289,6 +2756,7 @@ private:
private:
const std::set<Subroutine>& subroutines;
const ProgramCode& program_code;
+ Tegra::Shader::Header header;
const u32 main_offset;
Maxwell3D::Regs::ShaderStage stage;
const std::string& suffix;
@@ -2310,7 +2778,8 @@ boost::optional<ProgramResult> DecompileProgram(const ProgramCode& program_code,
Maxwell3D::Regs::ShaderStage stage,
const std::string& suffix) {
try {
- auto subroutines = ControlFlowAnalyzer(program_code, main_offset, suffix).GetSubroutines();
+ const auto subroutines =
+ ControlFlowAnalyzer(program_code, main_offset, suffix).GetSubroutines();
GLSLGenerator generator(subroutines, program_code, main_offset, stage, suffix);
return ProgramResult{generator.GetShaderCode(), generator.GetEntries()};
} catch (const DecompileFail& exception) {