summaryrefslogtreecommitdiffstats
path: root/src/video_core/shader/decode
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core/shader/decode')
-rw-r--r--src/video_core/shader/decode/arithmetic.cpp11
-rw-r--r--src/video_core/shader/decode/arithmetic_half.cpp4
-rw-r--r--src/video_core/shader/decode/arithmetic_half_immediate.cpp8
-rw-r--r--src/video_core/shader/decode/arithmetic_integer.cpp19
-rw-r--r--src/video_core/shader/decode/conversion.cpp15
-rw-r--r--src/video_core/shader/decode/ffma.cpp4
-rw-r--r--src/video_core/shader/decode/half_set.cpp4
-rw-r--r--src/video_core/shader/decode/half_set_predicate.cpp4
-rw-r--r--src/video_core/shader/decode/memory.cpp72
-rw-r--r--src/video_core/shader/decode/other.cpp10
-rw-r--r--src/video_core/shader/decode/register_set_predicate.cpp60
-rw-r--r--src/video_core/shader/decode/texture.cpp365
-rw-r--r--src/video_core/shader/decode/warp.cpp82
13 files changed, 436 insertions, 222 deletions
diff --git a/src/video_core/shader/decode/arithmetic.cpp b/src/video_core/shader/decode/arithmetic.cpp
index 1473c282a..fcedd2af6 100644
--- a/src/video_core/shader/decode/arithmetic.cpp
+++ b/src/video_core/shader/decode/arithmetic.cpp
@@ -43,12 +43,12 @@ u32 ShaderIR::DecodeArithmetic(NodeBlock& bb, u32 pc) {
case OpCode::Id::FMUL_IMM: {
// FMUL does not have 'abs' bits and only the second operand has a 'neg' bit.
if (instr.fmul.tab5cb8_2 != 0) {
- LOG_WARNING(HW_GPU, "FMUL tab5cb8_2({}) is not implemented",
- instr.fmul.tab5cb8_2.Value());
+ LOG_DEBUG(HW_GPU, "FMUL tab5cb8_2({}) is not implemented",
+ instr.fmul.tab5cb8_2.Value());
}
if (instr.fmul.tab5c68_0 != 1) {
- LOG_WARNING(HW_GPU, "FMUL tab5cb8_0({}) is not implemented",
- instr.fmul.tab5c68_0.Value());
+ LOG_DEBUG(HW_GPU, "FMUL tab5cb8_0({}) is not implemented",
+ instr.fmul.tab5c68_0.Value());
}
op_b = GetOperandAbsNegFloat(op_b, false, instr.fmul.negate_b);
@@ -144,10 +144,11 @@ u32 ShaderIR::DecodeArithmetic(NodeBlock& bb, u32 pc) {
case OpCode::Id::RRO_C:
case OpCode::Id::RRO_R:
case OpCode::Id::RRO_IMM: {
+ LOG_DEBUG(HW_GPU, "(STUBBED) RRO used");
+
// Currently RRO is only implemented as a register move.
op_b = GetOperandAbsNegFloat(op_b, instr.alu.abs_b, instr.alu.negate_b);
SetRegister(bb, instr.gpr0, op_b);
- LOG_WARNING(HW_GPU, "RRO instruction is incomplete");
break;
}
default:
diff --git a/src/video_core/shader/decode/arithmetic_half.cpp b/src/video_core/shader/decode/arithmetic_half.cpp
index b06cbe441..ee7d9a29d 100644
--- a/src/video_core/shader/decode/arithmetic_half.cpp
+++ b/src/video_core/shader/decode/arithmetic_half.cpp
@@ -21,8 +21,8 @@ u32 ShaderIR::DecodeArithmeticHalf(NodeBlock& bb, u32 pc) {
if (opcode->get().GetId() == OpCode::Id::HADD2_C ||
opcode->get().GetId() == OpCode::Id::HADD2_R) {
- if (instr.alu_half.ftz != 0) {
- LOG_WARNING(HW_GPU, "{} FTZ not implemented", opcode->get().GetName());
+ if (instr.alu_half.ftz == 0) {
+ LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName());
}
}
diff --git a/src/video_core/shader/decode/arithmetic_half_immediate.cpp b/src/video_core/shader/decode/arithmetic_half_immediate.cpp
index 6466fc011..d179b9873 100644
--- a/src/video_core/shader/decode/arithmetic_half_immediate.cpp
+++ b/src/video_core/shader/decode/arithmetic_half_immediate.cpp
@@ -19,12 +19,12 @@ u32 ShaderIR::DecodeArithmeticHalfImmediate(NodeBlock& bb, u32 pc) {
const auto opcode = OpCode::Decode(instr);
if (opcode->get().GetId() == OpCode::Id::HADD2_IMM) {
- if (instr.alu_half_imm.ftz != 0) {
- LOG_WARNING(HW_GPU, "{} FTZ not implemented", opcode->get().GetName());
+ if (instr.alu_half_imm.ftz == 0) {
+ LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName());
}
} else {
- if (instr.alu_half_imm.precision != Tegra::Shader::HalfPrecision::None) {
- LOG_WARNING(HW_GPU, "{} FTZ not implemented", opcode->get().GetName());
+ if (instr.alu_half_imm.precision != Tegra::Shader::HalfPrecision::FTZ) {
+ LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName());
}
}
diff --git a/src/video_core/shader/decode/arithmetic_integer.cpp b/src/video_core/shader/decode/arithmetic_integer.cpp
index a33d242e9..371fae127 100644
--- a/src/video_core/shader/decode/arithmetic_integer.cpp
+++ b/src/video_core/shader/decode/arithmetic_integer.cpp
@@ -130,6 +130,25 @@ u32 ShaderIR::DecodeArithmeticInteger(NodeBlock& bb, u32 pc) {
SetRegister(bb, instr.gpr0, value);
break;
}
+ case OpCode::Id::FLO_R:
+ case OpCode::Id::FLO_C:
+ case OpCode::Id::FLO_IMM: {
+ Node value;
+ if (instr.flo.invert) {
+ op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, std::move(op_b));
+ }
+ if (instr.flo.is_signed) {
+ value = Operation(OperationCode::IBitMSB, NO_PRECISE, std::move(op_b));
+ } else {
+ value = Operation(OperationCode::UBitMSB, NO_PRECISE, std::move(op_b));
+ }
+ if (instr.flo.sh) {
+ value =
+ Operation(OperationCode::UBitwiseXor, NO_PRECISE, std::move(value), Immediate(31));
+ }
+ SetRegister(bb, instr.gpr0, std::move(value));
+ break;
+ }
case OpCode::Id::SEL_C:
case OpCode::Id::SEL_R:
case OpCode::Id::SEL_IMM: {
diff --git a/src/video_core/shader/decode/conversion.cpp b/src/video_core/shader/decode/conversion.cpp
index 32facd6ba..0eeb75559 100644
--- a/src/video_core/shader/decode/conversion.cpp
+++ b/src/video_core/shader/decode/conversion.cpp
@@ -63,12 +63,11 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) {
case OpCode::Id::I2F_R:
case OpCode::Id::I2F_C:
case OpCode::Id::I2F_IMM: {
- UNIMPLEMENTED_IF(instr.conversion.int_src.selector != 0);
UNIMPLEMENTED_IF(instr.conversion.dst_size == Register::Size::Long);
UNIMPLEMENTED_IF_MSG(instr.generates_cc,
"Condition codes generation in I2F is not implemented");
- Node value = [&]() {
+ Node value = [&] {
switch (opcode->get().GetId()) {
case OpCode::Id::I2F_R:
return GetRegister(instr.gpr20);
@@ -81,7 +80,19 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) {
return Immediate(0);
}
}();
+
const bool input_signed = instr.conversion.is_input_signed;
+
+ if (instr.conversion.src_size == Register::Size::Byte) {
+ const u32 offset = static_cast<u32>(instr.conversion.int_src.selector) * 8;
+ if (offset > 0) {
+ value = SignedOperation(OperationCode::ILogicalShiftRight, input_signed,
+ std::move(value), Immediate(offset));
+ }
+ } else {
+ UNIMPLEMENTED_IF(instr.conversion.int_src.selector != 0);
+ }
+
value = ConvertIntegerSize(value, instr.conversion.src_size, input_signed);
value = GetOperandAbsNegInteger(value, instr.conversion.abs_a, false, input_signed);
value = SignedOperation(OperationCode::FCastInteger, input_signed, PRECISE, value);
diff --git a/src/video_core/shader/decode/ffma.cpp b/src/video_core/shader/decode/ffma.cpp
index ca2f39e8d..5973588d6 100644
--- a/src/video_core/shader/decode/ffma.cpp
+++ b/src/video_core/shader/decode/ffma.cpp
@@ -19,10 +19,10 @@ u32 ShaderIR::DecodeFfma(NodeBlock& bb, u32 pc) {
UNIMPLEMENTED_IF_MSG(instr.ffma.cc != 0, "FFMA cc not implemented");
if (instr.ffma.tab5980_0 != 1) {
- LOG_WARNING(HW_GPU, "FFMA tab5980_0({}) not implemented", instr.ffma.tab5980_0.Value());
+ LOG_DEBUG(HW_GPU, "FFMA tab5980_0({}) not implemented", instr.ffma.tab5980_0.Value());
}
if (instr.ffma.tab5980_1 != 0) {
- LOG_WARNING(HW_GPU, "FFMA tab5980_1({}) not implemented", instr.ffma.tab5980_1.Value());
+ LOG_DEBUG(HW_GPU, "FFMA tab5980_1({}) not implemented", instr.ffma.tab5980_1.Value());
}
const Node op_a = GetRegister(instr.gpr8);
diff --git a/src/video_core/shader/decode/half_set.cpp b/src/video_core/shader/decode/half_set.cpp
index 48ca7a4af..848e46874 100644
--- a/src/video_core/shader/decode/half_set.cpp
+++ b/src/video_core/shader/decode/half_set.cpp
@@ -20,8 +20,8 @@ u32 ShaderIR::DecodeHalfSet(NodeBlock& bb, u32 pc) {
const Instruction instr = {program_code[pc]};
const auto opcode = OpCode::Decode(instr);
- if (instr.hset2.ftz != 0) {
- LOG_WARNING(HW_GPU, "{} FTZ not implemented", opcode->get().GetName());
+ if (instr.hset2.ftz == 0) {
+ LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName());
}
Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hset2.type_a);
diff --git a/src/video_core/shader/decode/half_set_predicate.cpp b/src/video_core/shader/decode/half_set_predicate.cpp
index fec8f2dbe..310655619 100644
--- a/src/video_core/shader/decode/half_set_predicate.cpp
+++ b/src/video_core/shader/decode/half_set_predicate.cpp
@@ -19,7 +19,9 @@ u32 ShaderIR::DecodeHalfSetPredicate(NodeBlock& bb, u32 pc) {
const Instruction instr = {program_code[pc]};
const auto opcode = OpCode::Decode(instr);
- LOG_DEBUG(HW_GPU, "ftz={}", static_cast<u32>(instr.hsetp2.ftz));
+ if (instr.hsetp2.ftz != 0) {
+ LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName());
+ }
Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hsetp2.type_a);
op_a = GetOperandAbsNegHalf(op_a, instr.hsetp2.abs_a, instr.hsetp2.negate_a);
diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp
index 335d78146..c934d0719 100644
--- a/src/video_core/shader/decode/memory.cpp
+++ b/src/video_core/shader/decode/memory.cpp
@@ -21,8 +21,10 @@ using Tegra::Shader::OpCode;
using Tegra::Shader::Register;
namespace {
-u32 GetUniformTypeElementsCount(Tegra::Shader::UniformType uniform_type) {
+
+u32 GetLdgMemorySize(Tegra::Shader::UniformType uniform_type) {
switch (uniform_type) {
+ case Tegra::Shader::UniformType::UnsignedByte:
case Tegra::Shader::UniformType::Single:
return 1;
case Tegra::Shader::UniformType::Double:
@@ -35,6 +37,22 @@ u32 GetUniformTypeElementsCount(Tegra::Shader::UniformType uniform_type) {
return 1;
}
}
+
+u32 GetStgMemorySize(Tegra::Shader::UniformType uniform_type) {
+ switch (uniform_type) {
+ case Tegra::Shader::UniformType::Single:
+ return 1;
+ case Tegra::Shader::UniformType::Double:
+ return 2;
+ case Tegra::Shader::UniformType::Quad:
+ case Tegra::Shader::UniformType::UnsignedQuad:
+ return 4;
+ default:
+ UNIMPLEMENTED_MSG("Unimplemented size={}!", static_cast<u32>(uniform_type));
+ return 1;
+ }
+}
+
} // Anonymous namespace
u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
@@ -168,7 +186,7 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
const auto [real_address_base, base_address, descriptor] =
TrackGlobalMemory(bb, instr, false);
- const u32 count = GetUniformTypeElementsCount(type);
+ const u32 count = GetLdgMemorySize(type);
if (!real_address_base || !base_address) {
// Tracking failed, load zeroes.
for (u32 i = 0; i < count; ++i) {
@@ -179,12 +197,22 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
for (u32 i = 0; i < count; ++i) {
const Node it_offset = Immediate(i * 4);
- const Node real_address =
- Operation(OperationCode::UAdd, NO_PRECISE, real_address_base, it_offset);
- const Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor);
+ const Node real_address = Operation(OperationCode::UAdd, real_address_base, it_offset);
+ Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor);
+
+ if (type == Tegra::Shader::UniformType::UnsignedByte) {
+ // To handle unaligned loads get the byte used to dereferenced global memory
+ // and extract that byte from the loaded uint32.
+ Node byte = Operation(OperationCode::UBitwiseAnd, real_address, Immediate(3));
+ byte = Operation(OperationCode::ULogicalShiftLeft, std::move(byte), Immediate(3));
+
+ gmem = Operation(OperationCode::UBitfieldExtract, std::move(gmem), std::move(byte),
+ Immediate(8));
+ }
SetTemporary(bb, i, gmem);
}
+
for (u32 i = 0; i < count; ++i) {
SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
}
@@ -196,28 +224,28 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
UNIMPLEMENTED_IF_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) != 0,
"Unaligned attribute loads are not supported");
- u64 next_element = instr.attribute.fmt20.element;
- auto next_index = static_cast<u64>(instr.attribute.fmt20.index.Value());
+ u64 element = instr.attribute.fmt20.element;
+ auto index = static_cast<u64>(instr.attribute.fmt20.index.Value());
- const auto StoreNextElement = [&](u32 reg_offset) {
- const auto dest = GetOutputAttribute(static_cast<Attribute::Index>(next_index),
- next_element, GetRegister(instr.gpr39));
+ const u32 num_words = static_cast<u32>(instr.attribute.fmt20.size.Value()) + 1;
+ for (u32 reg_offset = 0; reg_offset < num_words; ++reg_offset) {
+ Node dest;
+ if (instr.attribute.fmt20.patch) {
+ const u32 offset = static_cast<u32>(index) * 4 + static_cast<u32>(element);
+ dest = MakeNode<PatchNode>(offset);
+ } else {
+ dest = GetOutputAttribute(static_cast<Attribute::Index>(index), element,
+ GetRegister(instr.gpr39));
+ }
const auto src = GetRegister(instr.gpr0.Value() + reg_offset);
bb.push_back(Operation(OperationCode::Assign, dest, src));
- // Load the next attribute element into the following register. If the element
- // to load goes beyond the vec4 size, load the first element of the next
- // attribute.
- next_element = (next_element + 1) % 4;
- next_index = next_index + (next_element == 0 ? 1 : 0);
- };
-
- const u32 num_words = static_cast<u32>(instr.attribute.fmt20.size.Value()) + 1;
- for (u32 reg_offset = 0; reg_offset < num_words; ++reg_offset) {
- StoreNextElement(reg_offset);
+ // Load the next attribute element into the following register. If the element to load
+ // goes beyond the vec4 size, load the first element of the next attribute.
+ element = (element + 1) % 4;
+ index = index + (element == 0 ? 1 : 0);
}
-
break;
}
case OpCode::Id::ST_L:
@@ -274,7 +302,7 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
break;
}
- const u32 count = GetUniformTypeElementsCount(type);
+ const u32 count = GetStgMemorySize(type);
for (u32 i = 0; i < count; ++i) {
const Node it_offset = Immediate(i * 4);
const Node real_address = Operation(OperationCode::UAdd, real_address_base, it_offset);
diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp
index 116b95f76..7321698b2 100644
--- a/src/video_core/shader/decode/other.cpp
+++ b/src/video_core/shader/decode/other.cpp
@@ -69,6 +69,8 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
case OpCode::Id::MOV_SYS: {
const Node value = [this, instr] {
switch (instr.sys20) {
+ case SystemVariable::InvocationId:
+ return Operation(OperationCode::InvocationId);
case SystemVariable::Ydirection:
return Operation(OperationCode::YNegate);
case SystemVariable::InvocationInfo:
@@ -255,8 +257,14 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
SetRegister(bb, instr.gpr0, GetRegister(instr.gpr8));
break;
}
+ case OpCode::Id::MEMBAR: {
+ UNIMPLEMENTED_IF(instr.membar.type != Tegra::Shader::MembarType::GL);
+ UNIMPLEMENTED_IF(instr.membar.unknown != Tegra::Shader::MembarUnknown::Default);
+ bb.push_back(Operation(OperationCode::MemoryBarrierGL));
+ break;
+ }
case OpCode::Id::DEPBAR: {
- LOG_WARNING(HW_GPU, "DEPBAR instruction is stubbed");
+ LOG_DEBUG(HW_GPU, "DEPBAR instruction is stubbed");
break;
}
default:
diff --git a/src/video_core/shader/decode/register_set_predicate.cpp b/src/video_core/shader/decode/register_set_predicate.cpp
index e6c9d287e..8d54cce34 100644
--- a/src/video_core/shader/decode/register_set_predicate.cpp
+++ b/src/video_core/shader/decode/register_set_predicate.cpp
@@ -13,37 +13,65 @@ namespace VideoCommon::Shader {
using Tegra::Shader::Instruction;
using Tegra::Shader::OpCode;
+namespace {
+constexpr u64 NUM_PROGRAMMABLE_PREDICATES = 7;
+}
+
u32 ShaderIR::DecodeRegisterSetPredicate(NodeBlock& bb, u32 pc) {
const Instruction instr = {program_code[pc]};
const auto opcode = OpCode::Decode(instr);
- UNIMPLEMENTED_IF(instr.r2p.mode != Tegra::Shader::R2pMode::Pr);
+ UNIMPLEMENTED_IF(instr.p2r_r2p.mode != Tegra::Shader::R2pMode::Pr);
- const Node apply_mask = [&]() {
+ const Node apply_mask = [&] {
switch (opcode->get().GetId()) {
case OpCode::Id::R2P_IMM:
- return Immediate(static_cast<u32>(instr.r2p.immediate_mask));
+ case OpCode::Id::P2R_IMM:
+ return Immediate(static_cast<u32>(instr.p2r_r2p.immediate_mask));
default:
UNREACHABLE();
- return Immediate(static_cast<u32>(instr.r2p.immediate_mask));
+ return Immediate(0);
}
}();
- const Node mask = GetRegister(instr.gpr8);
- const auto offset = static_cast<u32>(instr.r2p.byte) * 8;
- constexpr u32 programmable_preds = 7;
- for (u64 pred = 0; pred < programmable_preds; ++pred) {
- const auto shift = static_cast<u32>(pred);
+ const auto offset = static_cast<u32>(instr.p2r_r2p.byte) * 8;
+
+ switch (opcode->get().GetId()) {
+ case OpCode::Id::R2P_IMM: {
+ const Node mask = GetRegister(instr.gpr8);
- const Node apply_compare = BitfieldExtract(apply_mask, shift, 1);
- const Node condition =
- Operation(OperationCode::LogicalUNotEqual, apply_compare, Immediate(0));
+ for (u64 pred = 0; pred < NUM_PROGRAMMABLE_PREDICATES; ++pred) {
+ const auto shift = static_cast<u32>(pred);
- const Node value_compare = BitfieldExtract(mask, offset + shift, 1);
- const Node value = Operation(OperationCode::LogicalUNotEqual, value_compare, Immediate(0));
+ const Node apply_compare = BitfieldExtract(apply_mask, shift, 1);
+ const Node condition =
+ Operation(OperationCode::LogicalUNotEqual, apply_compare, Immediate(0));
- const Node code = Operation(OperationCode::LogicalAssign, GetPredicate(pred), value);
- bb.push_back(Conditional(condition, {code}));
+ const Node value_compare = BitfieldExtract(mask, offset + shift, 1);
+ const Node value =
+ Operation(OperationCode::LogicalUNotEqual, value_compare, Immediate(0));
+
+ const Node code = Operation(OperationCode::LogicalAssign, GetPredicate(pred), value);
+ bb.push_back(Conditional(condition, {code}));
+ }
+ break;
+ }
+ case OpCode::Id::P2R_IMM: {
+ Node value = Immediate(0);
+ for (u64 pred = 0; pred < NUM_PROGRAMMABLE_PREDICATES; ++pred) {
+ Node bit = Operation(OperationCode::Select, GetPredicate(pred), Immediate(1U << pred),
+ Immediate(0));
+ value = Operation(OperationCode::UBitwiseOr, std::move(value), std::move(bit));
+ }
+ value = Operation(OperationCode::UBitwiseAnd, std::move(value), apply_mask);
+ value = BitfieldInsert(GetRegister(instr.gpr8), std::move(value), offset, 8);
+
+ SetRegister(bb, instr.gpr0, std::move(value));
+ break;
+ }
+ default:
+ UNIMPLEMENTED_MSG("Unhandled P2R/R2R instruction: {}", opcode->get().GetName());
+ break;
}
return pc;
diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp
index ca690b58b..4b14cdf58 100644
--- a/src/video_core/shader/decode/texture.cpp
+++ b/src/video_core/shader/decode/texture.cpp
@@ -44,10 +44,6 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
bool is_bindless = false;
switch (opcode->get().GetId()) {
case OpCode::Id::TEX: {
- if (instr.tex.UsesMiscMode(TextureMiscMode::NODEP)) {
- LOG_WARNING(HW_GPU, "TEX.NODEP implementation is incomplete");
- }
-
const TextureType texture_type{instr.tex.texture_type};
const bool is_array = instr.tex.array != 0;
const bool is_aoffi = instr.tex.UsesMiscMode(TextureMiscMode::AOFFI);
@@ -62,10 +58,6 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
UNIMPLEMENTED_IF_MSG(instr.tex.UsesMiscMode(TextureMiscMode::AOFFI),
"AOFFI is not implemented");
- if (instr.tex.UsesMiscMode(TextureMiscMode::NODEP)) {
- LOG_WARNING(HW_GPU, "TEX.NODEP implementation is incomplete");
- }
-
const TextureType texture_type{instr.tex_b.texture_type};
const bool is_array = instr.tex_b.array != 0;
const bool is_aoffi = instr.tex.UsesMiscMode(TextureMiscMode::AOFFI);
@@ -82,10 +74,6 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
const bool depth_compare = instr.texs.UsesMiscMode(TextureMiscMode::DC);
const auto process_mode = instr.texs.GetTextureProcessMode();
- if (instr.texs.UsesMiscMode(TextureMiscMode::NODEP)) {
- LOG_WARNING(HW_GPU, "TEXS.NODEP implementation is incomplete");
- }
-
const Node4 components =
GetTexsCode(instr, texture_type, process_mode, depth_compare, is_array);
@@ -101,78 +89,142 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
[[fallthrough]];
}
case OpCode::Id::TLD4: {
- ASSERT(instr.tld4.array == 0);
UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::NDV),
"NDV is not implemented");
- UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::PTP),
- "PTP is not implemented");
-
- if (instr.tld4.UsesMiscMode(TextureMiscMode::NODEP)) {
- LOG_WARNING(HW_GPU, "TLD4.NODEP implementation is incomplete");
- }
-
const auto texture_type = instr.tld4.texture_type.Value();
const bool depth_compare = is_bindless ? instr.tld4_b.UsesMiscMode(TextureMiscMode::DC)
: instr.tld4.UsesMiscMode(TextureMiscMode::DC);
const bool is_array = instr.tld4.array != 0;
const bool is_aoffi = is_bindless ? instr.tld4_b.UsesMiscMode(TextureMiscMode::AOFFI)
: instr.tld4.UsesMiscMode(TextureMiscMode::AOFFI);
- WriteTexInstructionFloat(
- bb, instr,
- GetTld4Code(instr, texture_type, depth_compare, is_array, is_aoffi, is_bindless));
+ const bool is_ptp = is_bindless ? instr.tld4_b.UsesMiscMode(TextureMiscMode::PTP)
+ : instr.tld4.UsesMiscMode(TextureMiscMode::PTP);
+ WriteTexInstructionFloat(bb, instr,
+ GetTld4Code(instr, texture_type, depth_compare, is_array, is_aoffi,
+ is_ptp, is_bindless));
break;
}
case OpCode::Id::TLD4S: {
- UNIMPLEMENTED_IF_MSG(instr.tld4s.UsesMiscMode(TextureMiscMode::AOFFI),
- "AOFFI is not implemented");
- if (instr.tld4s.UsesMiscMode(TextureMiscMode::NODEP)) {
- LOG_WARNING(HW_GPU, "TLD4S.NODEP implementation is incomplete");
- }
-
- const bool depth_compare = instr.tld4s.UsesMiscMode(TextureMiscMode::DC);
+ constexpr std::size_t num_coords = 2;
+ const bool is_aoffi = instr.tld4s.UsesMiscMode(TextureMiscMode::AOFFI);
+ const bool is_depth_compare = instr.tld4s.UsesMiscMode(TextureMiscMode::DC);
const Node op_a = GetRegister(instr.gpr8);
const Node op_b = GetRegister(instr.gpr20);
// TODO(Subv): Figure out how the sampler type is encoded in the TLD4S instruction.
std::vector<Node> coords;
- if (depth_compare) {
+ std::vector<Node> aoffi;
+ Node depth_compare;
+ if (is_depth_compare) {
// Note: TLD4S coordinate encoding works just like TEXS's
const Node op_y = GetRegister(instr.gpr8.Value() + 1);
coords.push_back(op_a);
coords.push_back(op_y);
- coords.push_back(op_b);
+ if (is_aoffi) {
+ aoffi = GetAoffiCoordinates(op_b, num_coords, true);
+ depth_compare = GetRegister(instr.gpr20.Value() + 1);
+ } else {
+ depth_compare = op_b;
+ }
} else {
+ // There's no depth compare
coords.push_back(op_a);
- coords.push_back(op_b);
+ if (is_aoffi) {
+ coords.push_back(GetRegister(instr.gpr8.Value() + 1));
+ aoffi = GetAoffiCoordinates(op_b, num_coords, true);
+ } else {
+ coords.push_back(op_b);
+ }
}
const Node component = Immediate(static_cast<u32>(instr.tld4s.component));
- const auto& sampler =
- GetSampler(instr.sampler, {{TextureType::Texture2D, false, depth_compare}});
+ const SamplerInfo info{TextureType::Texture2D, false, is_depth_compare};
+ const Sampler& sampler = *GetSampler(instr.sampler, info);
Node4 values;
for (u32 element = 0; element < values.size(); ++element) {
auto coords_copy = coords;
- MetaTexture meta{sampler, {}, {}, {}, {}, {}, component, element};
+ MetaTexture meta{sampler, {}, depth_compare, aoffi, {}, {}, {}, {}, component, element};
values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy));
}
- WriteTexsInstructionFloat(bb, instr, values, true);
+ if (instr.tld4s.fp16_flag) {
+ WriteTexsInstructionHalfFloat(bb, instr, values, true);
+ } else {
+ WriteTexsInstructionFloat(bb, instr, values, true);
+ }
break;
}
- case OpCode::Id::TXQ_B:
+ case OpCode::Id::TXD_B:
is_bindless = true;
[[fallthrough]];
- case OpCode::Id::TXQ: {
- if (instr.txq.UsesMiscMode(TextureMiscMode::NODEP)) {
- LOG_WARNING(HW_GPU, "TXQ.NODEP implementation is incomplete");
+ case OpCode::Id::TXD: {
+ UNIMPLEMENTED_IF_MSG(instr.txd.UsesMiscMode(TextureMiscMode::AOFFI),
+ "AOFFI is not implemented");
+ UNIMPLEMENTED_IF_MSG(instr.txd.is_array != 0, "TXD Array is not implemented");
+
+ u64 base_reg = instr.gpr8.Value();
+ const auto derivate_reg = instr.gpr20.Value();
+ const auto texture_type = instr.txd.texture_type.Value();
+ const auto coord_count = GetCoordCount(texture_type);
+
+ const Sampler* sampler = is_bindless
+ ? GetBindlessSampler(base_reg, {{texture_type, false, false}})
+ : GetSampler(instr.sampler, {{texture_type, false, false}});
+ Node4 values;
+ if (sampler == nullptr) {
+ for (u32 element = 0; element < values.size(); ++element) {
+ values[element] = Immediate(0);
+ }
+ WriteTexInstructionFloat(bb, instr, values);
+ break;
+ }
+ if (is_bindless) {
+ base_reg++;
}
+ std::vector<Node> coords;
+ std::vector<Node> derivates;
+ for (std::size_t i = 0; i < coord_count; ++i) {
+ coords.push_back(GetRegister(base_reg + i));
+ const std::size_t derivate = i * 2;
+ derivates.push_back(GetRegister(derivate_reg + derivate));
+ derivates.push_back(GetRegister(derivate_reg + derivate + 1));
+ }
+
+ for (u32 element = 0; element < values.size(); ++element) {
+ MetaTexture meta{*sampler, {}, {}, {}, {}, derivates, {}, {}, {}, element};
+ values[element] = Operation(OperationCode::TextureGradient, std::move(meta), coords);
+ }
+
+ WriteTexInstructionFloat(bb, instr, values);
+
+ break;
+ }
+ case OpCode::Id::TXQ_B:
+ is_bindless = true;
+ [[fallthrough]];
+ case OpCode::Id::TXQ: {
// TODO: The new commits on the texture refactor, change the way samplers work.
// Sadly, not all texture instructions specify the type of texture their sampler
// uses. This must be fixed at a later instance.
- const auto& sampler =
- is_bindless ? GetBindlessSampler(instr.gpr8, {}) : GetSampler(instr.sampler, {});
+ const Sampler* sampler =
+ is_bindless ? GetBindlessSampler(instr.gpr8) : GetSampler(instr.sampler);
+
+ if (sampler == nullptr) {
+ u32 indexer = 0;
+ for (u32 element = 0; element < 4; ++element) {
+ if (!instr.txq.IsComponentEnabled(element)) {
+ continue;
+ }
+ const Node value = Immediate(0);
+ SetTemporary(bb, indexer++, value);
+ }
+ for (u32 i = 0; i < indexer; ++i) {
+ SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
+ }
+ break;
+ }
u32 indexer = 0;
switch (instr.txq.query_type) {
@@ -181,7 +233,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
if (!instr.txq.IsComponentEnabled(element)) {
continue;
}
- MetaTexture meta{sampler, {}, {}, {}, {}, {}, {}, element};
+ MetaTexture meta{*sampler, {}, {}, {}, {}, {}, {}, {}, {}, element};
const Node value =
Operation(OperationCode::TextureQueryDimensions, meta,
GetRegister(instr.gpr8.Value() + (is_bindless ? 1 : 0)));
@@ -205,15 +257,25 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
UNIMPLEMENTED_IF_MSG(instr.tmml.UsesMiscMode(Tegra::Shader::TextureMiscMode::NDV),
"NDV is not implemented");
- if (instr.tmml.UsesMiscMode(TextureMiscMode::NODEP)) {
- LOG_WARNING(HW_GPU, "TMML.NODEP implementation is incomplete");
- }
-
auto texture_type = instr.tmml.texture_type.Value();
const bool is_array = instr.tmml.array != 0;
- const auto& sampler =
- is_bindless ? GetBindlessSampler(instr.gpr20, {{texture_type, is_array, false}})
- : GetSampler(instr.sampler, {{texture_type, is_array, false}});
+ const Sampler* sampler =
+ is_bindless ? GetBindlessSampler(instr.gpr20) : GetSampler(instr.sampler);
+
+ if (sampler == nullptr) {
+ u32 indexer = 0;
+ for (u32 element = 0; element < 2; ++element) {
+ if (!instr.tmml.IsComponentEnabled(element)) {
+ continue;
+ }
+ const Node value = Immediate(0);
+ SetTemporary(bb, indexer++, value);
+ }
+ for (u32 i = 0; i < indexer; ++i) {
+ SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
+ }
+ break;
+ }
std::vector<Node> coords;
@@ -240,7 +302,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
continue;
}
auto params = coords;
- MetaTexture meta{sampler, {}, {}, {}, {}, {}, {}, element};
+ MetaTexture meta{*sampler, {}, {}, {}, {}, {}, {}, {}, {}, element};
const Node value = Operation(OperationCode::TextureQueryLod, meta, std::move(params));
SetTemporary(bb, indexer++, value);
}
@@ -254,10 +316,6 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
UNIMPLEMENTED_IF_MSG(instr.tld.ms, "MS is not implemented");
UNIMPLEMENTED_IF_MSG(instr.tld.cl, "CL is not implemented");
- if (instr.tld.nodep_flag) {
- LOG_WARNING(HW_GPU, "TLD.NODEP implementation is incomplete");
- }
-
WriteTexInstructionFloat(bb, instr, GetTldCode(instr));
break;
}
@@ -269,10 +327,6 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
"AOFFI is not implemented");
UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::MZ), "MZ is not implemented");
- if (instr.tlds.UsesMiscMode(TextureMiscMode::NODEP)) {
- LOG_WARNING(HW_GPU, "TLDS.NODEP implementation is incomplete");
- }
-
const Node4 components = GetTldsCode(instr, texture_type, is_array);
if (instr.tlds.fp32_flag) {
@@ -289,68 +343,54 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
return pc;
}
-const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler,
- std::optional<SamplerInfo> sampler_info) {
- const auto offset = static_cast<u32>(sampler.index.Value());
-
- TextureType type;
- bool is_array;
- bool is_shadow;
+ShaderIR::SamplerInfo ShaderIR::GetSamplerInfo(std::optional<SamplerInfo> sampler_info, u32 offset,
+ std::optional<u32> buffer) {
if (sampler_info) {
- type = sampler_info->type;
- is_array = sampler_info->is_array;
- is_shadow = sampler_info->is_shadow;
- } else if (const auto sampler = locker.ObtainBoundSampler(offset)) {
- type = sampler->texture_type.Value();
- is_array = sampler->is_array.Value() != 0;
- is_shadow = sampler->is_shadow.Value() != 0;
- } else {
+ return *sampler_info;
+ }
+ const auto sampler =
+ buffer ? locker.ObtainBindlessSampler(*buffer, offset) : locker.ObtainBoundSampler(offset);
+ if (!sampler) {
LOG_WARNING(HW_GPU, "Unknown sampler info");
- type = TextureType::Texture2D;
- is_array = false;
- is_shadow = false;
+ return SamplerInfo{TextureType::Texture2D, false, false, false};
}
+ return SamplerInfo{sampler->texture_type, sampler->is_array != 0, sampler->is_shadow != 0,
+ sampler->is_buffer != 0};
+}
+
+const Sampler* ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler,
+ std::optional<SamplerInfo> sampler_info) {
+ const auto offset = static_cast<u32>(sampler.index.Value());
+ const auto info = GetSamplerInfo(sampler_info, offset);
// If this sampler has already been used, return the existing mapping.
const auto it =
std::find_if(used_samplers.begin(), used_samplers.end(),
[offset](const Sampler& entry) { return entry.GetOffset() == offset; });
if (it != used_samplers.end()) {
- ASSERT(!it->IsBindless() && it->GetType() == type && it->IsArray() == is_array &&
- it->IsShadow() == is_shadow);
- return *it;
+ ASSERT(!it->IsBindless() && it->GetType() == info.type && it->IsArray() == info.is_array &&
+ it->IsShadow() == info.is_shadow && it->IsBuffer() == info.is_buffer);
+ return &*it;
}
// Otherwise create a new mapping for this sampler
const auto next_index = static_cast<u32>(used_samplers.size());
- return used_samplers.emplace_back(Sampler(next_index, offset, type, is_array, is_shadow));
+ return &used_samplers.emplace_back(next_index, offset, info.type, info.is_array, info.is_shadow,
+ info.is_buffer);
}
-const Sampler& ShaderIR::GetBindlessSampler(const Tegra::Shader::Register& reg,
+const Sampler* ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg,
std::optional<SamplerInfo> sampler_info) {
const Node sampler_register = GetRegister(reg);
const auto [base_sampler, buffer, offset] =
TrackCbuf(sampler_register, global_code, static_cast<s64>(global_code.size()));
ASSERT(base_sampler != nullptr);
-
- TextureType type;
- bool is_array;
- bool is_shadow;
- if (sampler_info) {
- type = sampler_info->type;
- is_array = sampler_info->is_array;
- is_shadow = sampler_info->is_shadow;
- } else if (const auto sampler = locker.ObtainBindlessSampler(buffer, offset)) {
- type = sampler->texture_type.Value();
- is_array = sampler->is_array.Value() != 0;
- is_shadow = sampler->is_shadow.Value() != 0;
- } else {
- LOG_WARNING(HW_GPU, "Unknown sampler info");
- type = TextureType::Texture2D;
- is_array = false;
- is_shadow = false;
+ if (base_sampler == nullptr) {
+ return nullptr;
}
+ const auto info = GetSamplerInfo(sampler_info, offset, buffer);
+
// If this sampler has already been used, return the existing mapping.
const auto it =
std::find_if(used_samplers.begin(), used_samplers.end(),
@@ -358,15 +398,15 @@ const Sampler& ShaderIR::GetBindlessSampler(const Tegra::Shader::Register& reg,
return entry.GetBuffer() == buffer && entry.GetOffset() == offset;
});
if (it != used_samplers.end()) {
- ASSERT(it->IsBindless() && it->GetType() == type && it->IsArray() == is_array &&
- it->IsShadow() == is_shadow);
- return *it;
+ ASSERT(it->IsBindless() && it->GetType() == info.type && it->IsArray() == info.is_array &&
+ it->IsShadow() == info.is_shadow);
+ return &*it;
}
// Otherwise create a new mapping for this sampler
const auto next_index = static_cast<u32>(used_samplers.size());
- return used_samplers.emplace_back(
- Sampler(next_index, offset, buffer, type, is_array, is_shadow));
+ return &used_samplers.emplace_back(next_index, offset, buffer, info.type, info.is_array,
+ info.is_shadow, info.is_buffer);
}
void ShaderIR::WriteTexInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components) {
@@ -409,14 +449,14 @@ void ShaderIR::WriteTexsInstructionFloat(NodeBlock& bb, Instruction instr, const
}
void ShaderIR::WriteTexsInstructionHalfFloat(NodeBlock& bb, Instruction instr,
- const Node4& components) {
+ const Node4& components, bool ignore_mask) {
// TEXS.F16 destionation registers are packed in two registers in pairs (just like any half
// float instruction).
Node4 values;
u32 dest_elem = 0;
for (u32 component = 0; component < 4; ++component) {
- if (!instr.texs.IsComponentEnabled(component))
+ if (!instr.texs.IsComponentEnabled(component) && !ignore_mask)
continue;
values[dest_elem++] = components[component];
}
@@ -451,17 +491,23 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
(texture_type == TextureType::TextureCube && is_array && is_shadow),
"This method is not supported.");
- const auto& sampler =
- is_bindless ? GetBindlessSampler(*bindless_reg, {{texture_type, is_array, is_shadow}})
- : GetSampler(instr.sampler, {{texture_type, is_array, is_shadow}});
+ const SamplerInfo info{texture_type, is_array, is_shadow, false};
+ const Sampler* sampler =
+ is_bindless ? GetBindlessSampler(*bindless_reg, info) : GetSampler(instr.sampler, info);
+ Node4 values;
+ if (sampler == nullptr) {
+ for (u32 element = 0; element < values.size(); ++element) {
+ values[element] = Immediate(0);
+ }
+ return values;
+ }
const bool lod_needed = process_mode == TextureProcessMode::LZ ||
process_mode == TextureProcessMode::LL ||
process_mode == TextureProcessMode::LLA;
- // LOD selection (either via bias or explicit textureLod) not
- // supported in GL for sampler2DArrayShadow and
- // samplerCubeArrayShadow.
+ // LOD selection (either via bias or explicit textureLod) not supported in GL for
+ // sampler2DArrayShadow and samplerCubeArrayShadow.
const bool gl_lod_supported =
!((texture_type == Tegra::Shader::TextureType::Texture2D && is_array && is_shadow) ||
(texture_type == Tegra::Shader::TextureType::TextureCube && is_array && is_shadow));
@@ -471,8 +517,8 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
UNIMPLEMENTED_IF(process_mode != TextureProcessMode::None && !gl_lod_supported);
- Node bias = {};
- Node lod = {};
+ Node bias;
+ Node lod;
if (process_mode != TextureProcessMode::None && gl_lod_supported) {
switch (process_mode) {
case TextureProcessMode::LZ:
@@ -493,10 +539,9 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
}
}
- Node4 values;
for (u32 element = 0; element < values.size(); ++element) {
auto copy_coords = coords;
- MetaTexture meta{sampler, array, depth_compare, aoffi, bias, lod, {}, element};
+ MetaTexture meta{*sampler, array, depth_compare, aoffi, {}, {}, bias, lod, {}, element};
values[element] = Operation(read_method, meta, std::move(copy_coords));
}
@@ -593,7 +638,9 @@ Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type,
}
Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool depth_compare,
- bool is_array, bool is_aoffi, bool is_bindless) {
+ bool is_array, bool is_aoffi, bool is_ptp, bool is_bindless) {
+ ASSERT_MSG(!(is_aoffi && is_ptp), "AOFFI and PTP can't be enabled at the same time");
+
const std::size_t coord_count = GetCoordCount(texture_type);
// If enabled arrays index is always stored in the gpr8 field
@@ -608,17 +655,26 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de
u64 parameter_register = instr.gpr20.Value();
- const auto& sampler =
- is_bindless
- ? GetBindlessSampler(parameter_register++, {{texture_type, is_array, depth_compare}})
- : GetSampler(instr.sampler, {{texture_type, is_array, depth_compare}});
+ const SamplerInfo info{texture_type, is_array, depth_compare, false};
+ const Sampler* sampler = is_bindless ? GetBindlessSampler(parameter_register++, info)
+ : GetSampler(instr.sampler, info);
+ Node4 values;
+ if (sampler == nullptr) {
+ for (u32 element = 0; element < values.size(); ++element) {
+ values[element] = Immediate(0);
+ }
+ return values;
+ }
- std::vector<Node> aoffi;
+ std::vector<Node> aoffi, ptp;
if (is_aoffi) {
aoffi = GetAoffiCoordinates(GetRegister(parameter_register++), coord_count, true);
+ } else if (is_ptp) {
+ ptp = GetPtpCoordinates(
+ {GetRegister(parameter_register++), GetRegister(parameter_register++)});
}
- Node dc{};
+ Node dc;
if (depth_compare) {
dc = GetRegister(parameter_register++);
}
@@ -626,11 +682,10 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de
const Node component = is_bindless ? Immediate(static_cast<u32>(instr.tld4_b.component))
: Immediate(static_cast<u32>(instr.tld4.component));
- Node4 values;
for (u32 element = 0; element < values.size(); ++element) {
auto coords_copy = coords;
- MetaTexture meta{sampler, GetRegister(array_register), dc, aoffi, {}, {}, component,
- element};
+ MetaTexture meta{
+ *sampler, GetRegister(array_register), dc, aoffi, ptp, {}, {}, {}, component, element};
values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy));
}
@@ -658,12 +713,12 @@ Node4 ShaderIR::GetTldCode(Tegra::Shader::Instruction instr) {
// const Node aoffi_register{is_aoffi ? GetRegister(gpr20_cursor++) : nullptr};
// const Node multisample{is_multisample ? GetRegister(gpr20_cursor++) : nullptr};
- const auto& sampler = GetSampler(instr.sampler, {{texture_type, is_array, false}});
+ const auto& sampler = *GetSampler(instr.sampler);
Node4 values;
for (u32 element = 0; element < values.size(); ++element) {
auto coords_copy = coords;
- MetaTexture meta{sampler, array_register, {}, {}, {}, lod, {}, element};
+ MetaTexture meta{sampler, array_register, {}, {}, {}, {}, {}, lod, {}, element};
values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy));
}
@@ -671,6 +726,8 @@ Node4 ShaderIR::GetTldCode(Tegra::Shader::Instruction instr) {
}
Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is_array) {
+ const Sampler& sampler = *GetSampler(instr.sampler);
+
const std::size_t type_coord_count = GetCoordCount(texture_type);
const bool lod_enabled = instr.tlds.GetTextureProcessMode() == TextureProcessMode::LL;
@@ -694,12 +751,24 @@ Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is
// When lod is used always is in gpr20
const Node lod = lod_enabled ? GetRegister(instr.gpr20) : Immediate(0);
- const auto& sampler = GetSampler(instr.sampler, {{texture_type, is_array, false}});
+ // Fill empty entries from the guest sampler
+ const std::size_t entry_coord_count = GetCoordCount(sampler.GetType());
+ if (type_coord_count != entry_coord_count) {
+ LOG_WARNING(HW_GPU, "Bound and built texture types mismatch");
+
+ // When the size is higher we insert zeroes
+ for (std::size_t i = type_coord_count; i < entry_coord_count; ++i) {
+ coords.push_back(GetRegister(Register::ZeroIndex));
+ }
+
+ // Then we ensure the size matches the number of entries (dropping unused values)
+ coords.resize(entry_coord_count);
+ }
Node4 values;
for (u32 element = 0; element < values.size(); ++element) {
auto coords_copy = coords;
- MetaTexture meta{sampler, array, {}, {}, {}, lod, {}, element};
+ MetaTexture meta{sampler, array, {}, {}, {}, {}, {}, lod, {}, element};
values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy));
}
return values;
@@ -764,4 +833,38 @@ std::vector<Node> ShaderIR::GetAoffiCoordinates(Node aoffi_reg, std::size_t coor
return aoffi;
}
+std::vector<Node> ShaderIR::GetPtpCoordinates(std::array<Node, 2> ptp_regs) {
+ static constexpr u32 num_entries = 8;
+
+ std::vector<Node> ptp;
+ ptp.reserve(num_entries);
+
+ const auto global_size = static_cast<s64>(global_code.size());
+ const std::optional low = TrackImmediate(ptp_regs[0], global_code, global_size);
+ const std::optional high = TrackImmediate(ptp_regs[1], global_code, global_size);
+ if (!low || !high) {
+ for (u32 entry = 0; entry < num_entries; ++entry) {
+ const u32 reg = entry / 4;
+ const u32 offset = entry % 4;
+ const Node value = BitfieldExtract(ptp_regs[reg], offset * 8, 6);
+ const Node condition =
+ Operation(OperationCode::LogicalIGreaterEqual, value, Immediate(32));
+ const Node negative = Operation(OperationCode::IAdd, value, Immediate(-64));
+ ptp.push_back(Operation(OperationCode::Select, condition, negative, value));
+ }
+ return ptp;
+ }
+
+ const u64 immediate = (static_cast<u64>(*high) << 32) | static_cast<u64>(*low);
+ for (u32 entry = 0; entry < num_entries; ++entry) {
+ s32 value = (immediate >> (entry * 8)) & 0b111111;
+ if (value >= 32) {
+ value -= 64;
+ }
+ ptp.push_back(Immediate(value));
+ }
+
+ return ptp;
+}
+
} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/warp.cpp b/src/video_core/shader/decode/warp.cpp
index fa8a250cc..11b77f795 100644
--- a/src/video_core/shader/decode/warp.cpp
+++ b/src/video_core/shader/decode/warp.cpp
@@ -17,6 +17,7 @@ using Tegra::Shader::ShuffleOperation;
using Tegra::Shader::VoteOperation;
namespace {
+
OperationCode GetOperationCode(VoteOperation vote_op) {
switch (vote_op) {
case VoteOperation::All:
@@ -30,12 +31,16 @@ OperationCode GetOperationCode(VoteOperation vote_op) {
return OperationCode::VoteAll;
}
}
+
} // Anonymous namespace
u32 ShaderIR::DecodeWarp(NodeBlock& bb, u32 pc) {
const Instruction instr = {program_code[pc]};
const auto opcode = OpCode::Decode(instr);
+ // Signal the backend that this shader uses warp instructions.
+ uses_warps = true;
+
switch (opcode->get().GetId()) {
case OpCode::Id::VOTE: {
const Node value = GetPredicate(instr.vote.value, instr.vote.negate_value != 0);
@@ -46,50 +51,59 @@ u32 ShaderIR::DecodeWarp(NodeBlock& bb, u32 pc) {
break;
}
case OpCode::Id::SHFL: {
- Node width = [this, instr] {
- Node mask = instr.shfl.is_mask_imm ? Immediate(static_cast<u32>(instr.shfl.mask_imm))
- : GetRegister(instr.gpr39);
-
- // Convert the obscure SHFL mask back into GL_NV_shader_thread_shuffle's width. This has
- // been done reversing Nvidia's math. It won't work on all cases due to SHFL having
- // different parameters that don't properly map to GLSL's interface, but it should work
- // for cases emitted by Nvidia's compiler.
- if (instr.shfl.operation == ShuffleOperation::Up) {
- return Operation(
- OperationCode::ILogicalShiftRight,
- Operation(OperationCode::IAdd, std::move(mask), Immediate(-0x2000)),
- Immediate(8));
- } else {
- return Operation(OperationCode::ILogicalShiftRight,
- Operation(OperationCode::IAdd, Immediate(0x201F),
- Operation(OperationCode::INegate, std::move(mask))),
- Immediate(8));
- }
- }();
+ Node mask = instr.shfl.is_mask_imm ? Immediate(static_cast<u32>(instr.shfl.mask_imm))
+ : GetRegister(instr.gpr39);
+ Node index = instr.shfl.is_index_imm ? Immediate(static_cast<u32>(instr.shfl.index_imm))
+ : GetRegister(instr.gpr20);
- const auto [operation, in_range] = [instr]() -> std::pair<OperationCode, OperationCode> {
+ Node thread_id = Operation(OperationCode::ThreadId);
+ Node clamp = Operation(OperationCode::IBitwiseAnd, mask, Immediate(0x1FU));
+ Node seg_mask = BitfieldExtract(mask, 8, 16);
+
+ Node neg_seg_mask = Operation(OperationCode::IBitwiseNot, seg_mask);
+ Node min_thread_id = Operation(OperationCode::IBitwiseAnd, thread_id, seg_mask);
+ Node max_thread_id = Operation(OperationCode::IBitwiseOr, min_thread_id,
+ Operation(OperationCode::IBitwiseAnd, clamp, neg_seg_mask));
+
+ Node src_thread_id = [instr, index, neg_seg_mask, min_thread_id, thread_id] {
switch (instr.shfl.operation) {
case ShuffleOperation::Idx:
- return {OperationCode::ShuffleIndexed, OperationCode::InRangeShuffleIndexed};
- case ShuffleOperation::Up:
- return {OperationCode::ShuffleUp, OperationCode::InRangeShuffleUp};
+ return Operation(OperationCode::IBitwiseOr,
+ Operation(OperationCode::IBitwiseAnd, index, neg_seg_mask),
+ min_thread_id);
case ShuffleOperation::Down:
- return {OperationCode::ShuffleDown, OperationCode::InRangeShuffleDown};
+ return Operation(OperationCode::IAdd, thread_id, index);
+ case ShuffleOperation::Up:
+ return Operation(OperationCode::IAdd, thread_id,
+ Operation(OperationCode::INegate, index));
case ShuffleOperation::Bfly:
- return {OperationCode::ShuffleButterfly, OperationCode::InRangeShuffleButterfly};
+ return Operation(OperationCode::IBitwiseXor, thread_id, index);
}
- UNREACHABLE_MSG("Invalid SHFL operation: {}",
- static_cast<u64>(instr.shfl.operation.Value()));
- return {};
+ UNREACHABLE();
+ return Immediate(0U);
}();
- // Setting the predicate before the register is intentional to avoid overwriting.
- Node index = instr.shfl.is_index_imm ? Immediate(static_cast<u32>(instr.shfl.index_imm))
- : GetRegister(instr.gpr20);
- SetPredicate(bb, instr.shfl.pred48, Operation(in_range, index, width));
+ Node in_bounds = [instr, src_thread_id, min_thread_id, max_thread_id] {
+ if (instr.shfl.operation == ShuffleOperation::Up) {
+ return Operation(OperationCode::LogicalIGreaterEqual, src_thread_id, min_thread_id);
+ } else {
+ return Operation(OperationCode::LogicalILessEqual, src_thread_id, max_thread_id);
+ }
+ }();
+
+ SetPredicate(bb, instr.shfl.pred48, in_bounds);
SetRegister(
bb, instr.gpr0,
- Operation(operation, GetRegister(instr.gpr8), std::move(index), std::move(width)));
+ Operation(OperationCode::ShuffleIndexed, GetRegister(instr.gpr8), src_thread_id));
+ break;
+ }
+ case OpCode::Id::FSWZADD: {
+ UNIMPLEMENTED_IF(instr.fswzadd.ndv);
+
+ Node op_a = GetRegister(instr.gpr8);
+ Node op_b = GetRegister(instr.gpr20);
+ Node mask = Immediate(static_cast<u32>(instr.fswzadd.swizzle));
+ SetRegister(bb, instr.gpr0, Operation(OperationCode::FSwizzleAdd, op_a, op_b, mask));
break;
}
default: