summaryrefslogtreecommitdiffstats
path: root/src/shader_recompiler/ir_opt
diff options
context:
space:
mode:
authorFernando Sahmkow <fsahmkow27@gmail.com>2023-08-18 18:30:31 +0200
committerLiam <byteslice@airmail.cc>2023-08-19 04:17:02 +0200
commitc03f0b3c893f2bc2ae4f1e1825c5ac1453c36710 (patch)
tree3f7f14fad6574878a7c0f94fbe16d574b61cb95a /src/shader_recompiler/ir_opt
parentMerge pull request #11278 from Kelebek1/dma_sync (diff)
downloadyuzu-c03f0b3c893f2bc2ae4f1e1825c5ac1453c36710.tar
yuzu-c03f0b3c893f2bc2ae4f1e1825c5ac1453c36710.tar.gz
yuzu-c03f0b3c893f2bc2ae4f1e1825c5ac1453c36710.tar.bz2
yuzu-c03f0b3c893f2bc2ae4f1e1825c5ac1453c36710.tar.lz
yuzu-c03f0b3c893f2bc2ae4f1e1825c5ac1453c36710.tar.xz
yuzu-c03f0b3c893f2bc2ae4f1e1825c5ac1453c36710.tar.zst
yuzu-c03f0b3c893f2bc2ae4f1e1825c5ac1453c36710.zip
Diffstat (limited to 'src/shader_recompiler/ir_opt')
-rw-r--r--src/shader_recompiler/ir_opt/constant_propagation_pass.cpp227
1 files changed, 226 insertions, 1 deletions
diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp
index 4d81e9336..f46e55122 100644
--- a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp
+++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp
@@ -10,6 +10,7 @@
#include "shader_recompiler/environment.h"
#include "shader_recompiler/exception.h"
#include "shader_recompiler/frontend/ir/ir_emitter.h"
+#include "shader_recompiler/frontend/ir/modifiers.h"
#include "shader_recompiler/frontend/ir/value.h"
#include "shader_recompiler/ir_opt/passes.h"
@@ -410,7 +411,49 @@ void FoldSelect(IR::Inst& inst) {
}
}
+void FoldFPAdd32(IR::Inst& inst) {
+ if (FoldWhenAllImmediates(inst, [](f32 a, f32 b) { return a + b; })) {
+ return;
+ }
+ const IR::Value lhs_value{inst.Arg(0)};
+ const IR::Value rhs_value{inst.Arg(1)};
+ const auto check_neutral = [](const IR::Value& one_operand) {
+ return one_operand.IsImmediate() && std::abs(one_operand.F32()) == 0.0f;
+ };
+ if (check_neutral(lhs_value)) {
+ inst.ReplaceUsesWith(rhs_value);
+ }
+ if (check_neutral(rhs_value)) {
+ inst.ReplaceUsesWith(lhs_value);
+ }
+}
+
+bool FoldDerivateYFromCorrection(IR::Inst& inst) {
+ const IR::Value lhs_value{inst.Arg(0)};
+ const IR::Value rhs_value{inst.Arg(1)};
+ IR::Inst* const lhs_op{lhs_value.InstRecursive()};
+ IR::Inst* const rhs_op{rhs_value.InstRecursive()};
+ if (lhs_op->GetOpcode() == IR::Opcode::YDirection) {
+ if (rhs_op->GetOpcode() != IR::Opcode::DPdyFine) {
+ return false;
+ }
+ inst.ReplaceUsesWith(rhs_value);
+ return true;
+ }
+ if (rhs_op->GetOpcode() != IR::Opcode::YDirection) {
+ return false;
+ }
+ if (lhs_op->GetOpcode() != IR::Opcode::DPdyFine) {
+ return false;
+ }
+ inst.ReplaceUsesWith(lhs_value);
+ return true;
+}
+
void FoldFPMul32(IR::Inst& inst) {
+ if (FoldWhenAllImmediates(inst, [](f32 a, f32 b) { return a * b; })) {
+ return;
+ }
const auto control{inst.Flags<IR::FpControl>()};
if (control.no_contraction) {
return;
@@ -421,6 +464,9 @@ void FoldFPMul32(IR::Inst& inst) {
if (lhs_value.IsImmediate() || rhs_value.IsImmediate()) {
return;
}
+ if (FoldDerivateYFromCorrection(inst)) {
+ return;
+ }
IR::Inst* const lhs_op{lhs_value.InstRecursive()};
IR::Inst* const rhs_op{rhs_value.InstRecursive()};
if (lhs_op->GetOpcode() != IR::Opcode::FPMul32 ||
@@ -622,7 +668,12 @@ void FoldFSwizzleAdd(IR::Block& block, IR::Inst& inst) {
}
const IR::Value value_3{GetThroughCast(inst2->Arg(0).Resolve(), IR::Opcode::BitCastU32F32)};
if (value_2 != value_3) {
- return;
+ if (!value_2.IsImmediate() || !value_3.IsImmediate()) {
+ return;
+ }
+ if (Common::BitCast<u32>(value_2.F32()) != value_3.U32()) {
+ return;
+ }
}
const IR::Value index{inst2->Arg(1)};
const IR::Value clamp{inst2->Arg(2)};
@@ -648,6 +699,169 @@ void FoldFSwizzleAdd(IR::Block& block, IR::Inst& inst) {
}
}
+bool FindGradient3DDerivates(std::array<IR::Value, 3>& results, IR::Value coord) {
+ if (coord.IsImmediate()) {
+ return false;
+ }
+ const auto check_through_shuffle = [](IR::Value input, IR::Value& result) {
+ const IR::Value value_1{GetThroughCast(input.Resolve(), IR::Opcode::BitCastF32U32)};
+ IR::Inst* const inst2{value_1.InstRecursive()};
+ if (inst2->GetOpcode() != IR::Opcode::ShuffleIndex) {
+ return false;
+ }
+ const IR::Value index{inst2->Arg(1).Resolve()};
+ const IR::Value clamp{inst2->Arg(2).Resolve()};
+ const IR::Value segmentation_mask{inst2->Arg(3).Resolve()};
+ if (!index.IsImmediate() || !clamp.IsImmediate() || !segmentation_mask.IsImmediate()) {
+ return false;
+ }
+ if (index.U32() != 3 && clamp.U32() != 3) {
+ return false;
+ }
+ result = GetThroughCast(inst2->Arg(0).Resolve(), IR::Opcode::BitCastU32F32);
+ return true;
+ };
+ IR::Inst* const inst = coord.InstRecursive();
+ if (inst->GetOpcode() != IR::Opcode::FSwizzleAdd) {
+ return false;
+ }
+ std::array<IR::Value, 3> temporary_values;
+ IR::Value value_1 = inst->Arg(0).Resolve();
+ IR::Value value_2 = inst->Arg(1).Resolve();
+ IR::Value value_3 = inst->Arg(2).Resolve();
+ std::array<u32, 4> swizzles_mask_a{};
+ std::array<u32, 4> swizzles_mask_b{};
+ const auto resolve_mask = [](std::array<u32, 4>& mask_results, IR::Value mask) {
+ u32 value = mask.U32();
+ for (size_t i = 0; i < 4; i++) {
+ mask_results[i] = (value >> (i * 2)) & 0x3;
+ }
+ };
+ resolve_mask(swizzles_mask_a, value_3);
+ size_t coordinate_index = 0;
+ const auto resolve_pending = [&](IR::Value resolve_v) {
+ IR::Inst* const inst_r = resolve_v.InstRecursive();
+ if (inst_r->GetOpcode() != IR::Opcode::FSwizzleAdd) {
+ return false;
+ }
+ if (!check_through_shuffle(inst_r->Arg(0).Resolve(), temporary_values[1])) {
+ return false;
+ }
+ if (!check_through_shuffle(inst_r->Arg(1).Resolve(), temporary_values[2])) {
+ return false;
+ }
+ resolve_mask(swizzles_mask_b, inst_r->Arg(2).Resolve());
+ return true;
+ };
+ if (value_1.IsImmediate() || value_2.IsImmediate()) {
+ return false;
+ }
+ bool should_continue = false;
+ if (resolve_pending(value_1)) {
+ should_continue = check_through_shuffle(value_2, temporary_values[0]);
+ coordinate_index = 0;
+ }
+ if (resolve_pending(value_2)) {
+ should_continue = check_through_shuffle(value_1, temporary_values[0]);
+ coordinate_index = 2;
+ }
+ if (!should_continue) {
+ return false;
+ }
+ // figure which is which
+ size_t zero_mask_a = 0;
+ size_t zero_mask_b = 0;
+ for (size_t i = 0; i < 4; i++) {
+ if (swizzles_mask_a[i] == 2 || swizzles_mask_b[i] == 2) {
+ // last operand can be inversed, we cannot determine a result.
+ return false;
+ }
+ zero_mask_a |= static_cast<size_t>(swizzles_mask_a[i] == 3 ? 1 : 0) << i;
+ zero_mask_b |= static_cast<size_t>(swizzles_mask_b[i] == 3 ? 1 : 0) << i;
+ }
+ static constexpr size_t ddx_pattern = 0b1010;
+ static constexpr size_t ddx_pattern_inv = ~ddx_pattern & 0b00001111;
+ if (std::popcount(zero_mask_a) != 2) {
+ return false;
+ }
+ if (std::popcount(zero_mask_b) != 2) {
+ return false;
+ }
+ if (zero_mask_a == zero_mask_b) {
+ return false;
+ }
+ results[0] = temporary_values[coordinate_index];
+
+ if (coordinate_index == 0) {
+ if (zero_mask_b == ddx_pattern || zero_mask_b == ddx_pattern_inv) {
+ results[1] = temporary_values[1];
+ results[2] = temporary_values[2];
+ return true;
+ }
+ results[2] = temporary_values[1];
+ results[1] = temporary_values[2];
+ } else {
+ const auto assign_result = [&results](IR::Value temporary_value, size_t mask) {
+ if (mask == ddx_pattern || mask == ddx_pattern_inv) {
+ results[1] = temporary_value;
+ return;
+ }
+ results[2] = temporary_value;
+ };
+ assign_result(temporary_values[1], zero_mask_b);
+ assign_result(temporary_values[0], zero_mask_a);
+ }
+
+ return true;
+}
+
+void FoldImageSampleImplicitLod(IR::Block& block, IR::Inst& inst) {
+ IR::TextureInstInfo info = inst.Flags<IR::TextureInstInfo>();
+ auto orig_opcode = inst.GetOpcode();
+ if (info.ndv_is_active == 0) {
+ return;
+ }
+ if (info.type != TextureType::Color3D) {
+ return;
+ }
+ const IR::Value handle{inst.Arg(0)};
+ const IR::Value coords{inst.Arg(1)};
+ const IR::Value bias_lc{inst.Arg(2)};
+ const IR::Value offset{inst.Arg(3)};
+ if (!offset.IsImmediate()) {
+ return;
+ }
+ IR::Inst* const inst2 = coords.InstRecursive();
+ std::array<std::array<IR::Value, 3>, 3> results_matrix;
+ for (size_t i = 0; i < 3; i++) {
+ if (!FindGradient3DDerivates(results_matrix[i], inst2->Arg(i).Resolve())) {
+ return;
+ }
+ }
+ IR::F32 lod_clamp{};
+ if (info.has_lod_clamp != 0) {
+ if (!bias_lc.IsImmediate()) {
+ lod_clamp = IR::F32{bias_lc.InstRecursive()->Arg(1).Resolve()};
+ } else {
+ lod_clamp = IR::F32{bias_lc};
+ }
+ }
+ IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
+ IR::Value new_coords =
+ ir.CompositeConstruct(results_matrix[0][0], results_matrix[1][0], results_matrix[2][0]);
+ IR::Value derivatives_1 = ir.CompositeConstruct(results_matrix[0][1], results_matrix[0][2],
+ results_matrix[1][1], results_matrix[1][2]);
+ IR::Value derivatives_2 = ir.CompositeConstruct(results_matrix[2][1], results_matrix[2][2]);
+ info.num_derivates.Assign(3);
+ IR::Value new_gradient_instruction =
+ ir.ImageGradient(handle, new_coords, derivatives_1, derivatives_2, lod_clamp, info);
+ IR::Inst* const new_inst = new_gradient_instruction.InstRecursive();
+ if (orig_opcode == IR::Opcode::ImageSampleImplicitLod) {
+ new_inst->ReplaceOpcode(IR::Opcode::ImageGradient);
+ }
+ inst.ReplaceUsesWith(new_gradient_instruction);
+}
+
void FoldConstBuffer(Environment& env, IR::Block& block, IR::Inst& inst) {
const IR::Value bank{inst.Arg(0)};
const IR::Value offset{inst.Arg(1)};
@@ -743,6 +957,12 @@ void ConstantPropagation(Environment& env, IR::Block& block, IR::Inst& inst) {
case IR::Opcode::SelectF32:
case IR::Opcode::SelectF64:
return FoldSelect(inst);
+ case IR::Opcode::FPNeg32:
+ FoldWhenAllImmediates(inst, [](f32 a) { return -a; });
+ return;
+ case IR::Opcode::FPAdd32:
+ FoldFPAdd32(inst);
+ return;
case IR::Opcode::FPMul32:
return FoldFPMul32(inst);
case IR::Opcode::LogicalAnd:
@@ -858,6 +1078,11 @@ void ConstantPropagation(Environment& env, IR::Block& block, IR::Inst& inst) {
FoldDriverConstBuffer(env, block, inst, 1);
}
break;
+ case IR::Opcode::BindlessImageSampleImplicitLod:
+ case IR::Opcode::BoundImageSampleImplicitLod:
+ case IR::Opcode::ImageSampleImplicitLod:
+ FoldImageSampleImplicitLod(block, inst);
+ break;
default:
break;
}