From 3a63fa0477ea8297c80133d35494e1dfdc012f95 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 9 Mar 2021 17:14:57 -0300 Subject: shader: Partial implementation of LDC --- .../ir_opt/collect_shader_info_pass.cpp | 135 +++++++++++++++++++-- .../ir_opt/constant_propagation_pass.cpp | 22 +++- .../global_memory_to_storage_buffer_pass.cpp | 2 +- src/shader_recompiler/ir_opt/texture_pass.cpp | 2 +- 4 files changed, 146 insertions(+), 15 deletions(-) (limited to 'src/shader_recompiler/ir_opt') diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index 960beadd4..cdbe85221 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -25,18 +25,13 @@ void AddConstantBufferDescriptor(Info& info, u32 index, u32 count) { void VisitUsages(Info& info, IR::Inst& inst) { switch (inst.Opcode()) { - case IR::Opcode::WorkgroupId: - info.uses_workgroup_id = true; - break; - case IR::Opcode::LocalInvocationId: - info.uses_local_invocation_id = true; - break; case IR::Opcode::CompositeConstructF16x2: case IR::Opcode::CompositeConstructF16x3: case IR::Opcode::CompositeConstructF16x4: case IR::Opcode::CompositeExtractF16x2: case IR::Opcode::CompositeExtractF16x3: case IR::Opcode::CompositeExtractF16x4: + case IR::Opcode::SelectF16: case IR::Opcode::BitCastU16F16: case IR::Opcode::BitCastF16U16: case IR::Opcode::PackFloat2x16: @@ -75,13 +70,139 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::FPTrunc64: info.uses_fp64 = true; break; - case IR::Opcode::GetCbuf: + default: + break; + } + switch (inst.Opcode()) { + case IR::Opcode::GetCbufU8: + case IR::Opcode::GetCbufS8: + case IR::Opcode::UndefU8: + case IR::Opcode::LoadGlobalU8: + case IR::Opcode::LoadGlobalS8: + case IR::Opcode::WriteGlobalU8: + case IR::Opcode::WriteGlobalS8: + case IR::Opcode::LoadStorageU8: + case IR::Opcode::LoadStorageS8: + case IR::Opcode::WriteStorageU8: + case IR::Opcode::WriteStorageS8: + case IR::Opcode::SelectU8: + info.uses_int8 = true; + break; + default: + break; + } + switch (inst.Opcode()) { + case IR::Opcode::GetCbufU16: + case IR::Opcode::GetCbufS16: + case IR::Opcode::UndefU16: + case IR::Opcode::LoadGlobalU16: + case IR::Opcode::LoadGlobalS16: + case IR::Opcode::WriteGlobalU16: + case IR::Opcode::WriteGlobalS16: + case IR::Opcode::LoadStorageU16: + case IR::Opcode::LoadStorageS16: + case IR::Opcode::WriteStorageU16: + case IR::Opcode::WriteStorageS16: + case IR::Opcode::SelectU16: + case IR::Opcode::BitCastU16F16: + case IR::Opcode::BitCastF16U16: + case IR::Opcode::ConvertS16F16: + case IR::Opcode::ConvertS16F32: + case IR::Opcode::ConvertS16F64: + case IR::Opcode::ConvertU16F16: + case IR::Opcode::ConvertU16F32: + case IR::Opcode::ConvertU16F64: + info.uses_int16 = true; + break; + default: + break; + } + switch (inst.Opcode()) { + case IR::Opcode::GetCbufU64: + case IR::Opcode::UndefU64: + case IR::Opcode::LoadGlobalU8: + case IR::Opcode::LoadGlobalS8: + case IR::Opcode::LoadGlobalU16: + case IR::Opcode::LoadGlobalS16: + case IR::Opcode::LoadGlobal32: + case IR::Opcode::LoadGlobal64: + case IR::Opcode::LoadGlobal128: + case IR::Opcode::WriteGlobalU8: + case IR::Opcode::WriteGlobalS8: + case IR::Opcode::WriteGlobalU16: + case IR::Opcode::WriteGlobalS16: + case IR::Opcode::WriteGlobal32: + case IR::Opcode::WriteGlobal64: + case IR::Opcode::WriteGlobal128: + case IR::Opcode::SelectU64: + case IR::Opcode::BitCastU64F64: + case IR::Opcode::BitCastF64U64: + case IR::Opcode::PackUint2x32: + case IR::Opcode::UnpackUint2x32: + case IR::Opcode::IAdd64: + case IR::Opcode::ISub64: + case IR::Opcode::INeg64: + case IR::Opcode::ShiftLeftLogical64: + case IR::Opcode::ShiftRightLogical64: + case IR::Opcode::ShiftRightArithmetic64: + case IR::Opcode::ConvertS64F16: + case IR::Opcode::ConvertS64F32: + case IR::Opcode::ConvertS64F64: + case IR::Opcode::ConvertU64F16: + case IR::Opcode::ConvertU64F32: + case IR::Opcode::ConvertU64F64: + case IR::Opcode::ConvertU64U32: + case IR::Opcode::ConvertU32U64: + case IR::Opcode::ConvertF16U64: + case IR::Opcode::ConvertF32U64: + case IR::Opcode::ConvertF64U64: + info.uses_int64 = true; + break; + default: + break; + } + switch (inst.Opcode()) { + case IR::Opcode::WorkgroupId: + info.uses_workgroup_id = true; + break; + case IR::Opcode::LocalInvocationId: + info.uses_local_invocation_id = true; + break; + case IR::Opcode::GetCbufU8: + case IR::Opcode::GetCbufS8: + case IR::Opcode::GetCbufU16: + case IR::Opcode::GetCbufS16: + case IR::Opcode::GetCbufU32: + case IR::Opcode::GetCbufF32: + case IR::Opcode::GetCbufU64: { if (const IR::Value index{inst.Arg(0)}; index.IsImmediate()) { AddConstantBufferDescriptor(info, index.U32(), 1); } else { throw NotImplementedException("Constant buffer with non-immediate index"); } + switch (inst.Opcode()) { + case IR::Opcode::GetCbufU8: + case IR::Opcode::GetCbufS8: + info.used_constant_buffer_types |= IR::Type::U8; + break; + case IR::Opcode::GetCbufU16: + case IR::Opcode::GetCbufS16: + info.used_constant_buffer_types |= IR::Type::U16; + break; + case IR::Opcode::GetCbufU32: + info.used_constant_buffer_types |= IR::Type::U32; + break; + case IR::Opcode::GetCbufF32: + info.used_constant_buffer_types |= IR::Type::F32; + break; + case IR::Opcode::GetCbufU64: + info.used_constant_buffer_types |= IR::Type::U64; + break; + default: + break; + } break; + } case IR::Opcode::BindlessImageSampleImplicitLod: case IR::Opcode::BindlessImageSampleExplicitLod: case IR::Opcode::BindlessImageSampleDrefImplicitLod: diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp index ae3d5a7d6..7ba9ebe9b 100644 --- a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp +++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp @@ -193,7 +193,7 @@ void FoldISub32(IR::Inst& inst) { // ISub32 is generally used to subtract two constant buffers, compare and replace this with // zero if they equal. const auto equal_cbuf{[](IR::Inst* a, IR::Inst* b) { - return a->Opcode() == IR::Opcode::GetCbuf && b->Opcode() == IR::Opcode::GetCbuf && + return a->Opcode() == IR::Opcode::GetCbufU32 && b->Opcode() == IR::Opcode::GetCbufU32 && a->Arg(0) == b->Arg(0) && a->Arg(1) == b->Arg(1); }}; IR::Inst* op_a{inst.Arg(0).InstRecursive()}; @@ -207,7 +207,7 @@ void FoldISub32(IR::Inst& inst) { // Canonicalize local variables to simplify the following logic std::swap(op_a, op_b); } - if (op_b->Opcode() != IR::Opcode::GetCbuf) { + if (op_b->Opcode() != IR::Opcode::GetCbufU32) { return; } IR::Inst* const inst_cbuf{op_b}; @@ -277,7 +277,7 @@ void FoldLogicalNot(IR::Inst& inst) { } } -template +template void FoldBitCast(IR::Inst& inst, IR::Opcode reverse) { const IR::Value value{inst.Arg(0)}; if (value.IsImmediate()) { @@ -285,8 +285,18 @@ void FoldBitCast(IR::Inst& inst, IR::Opcode reverse) { return; } IR::Inst* const arg_inst{value.InstRecursive()}; - if (value.InstRecursive()->Opcode() == reverse) { + if (arg_inst->Opcode() == reverse) { inst.ReplaceUsesWith(arg_inst->Arg(0)); + return; + } + if constexpr (op == IR::Opcode::BitCastF32U32) { + if (arg_inst->Opcode() == IR::Opcode::GetCbufU32) { + // Replace the bitcast with a typed constant buffer read + inst.ReplaceOpcode(IR::Opcode::GetCbufF32); + inst.SetArg(0, arg_inst->Arg(0)); + inst.SetArg(1, arg_inst->Arg(1)); + return; + } } } @@ -325,9 +335,9 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) { case IR::Opcode::ISub32: return FoldISub32(inst); case IR::Opcode::BitCastF32U32: - return FoldBitCast(inst, IR::Opcode::BitCastU32F32); + return FoldBitCast(inst, IR::Opcode::BitCastU32F32); case IR::Opcode::BitCastU32F32: - return FoldBitCast(inst, IR::Opcode::BitCastF32U32); + return FoldBitCast(inst, IR::Opcode::BitCastF32U32); case IR::Opcode::IAdd64: return FoldAdd(block, inst); case IR::Opcode::SelectU32: diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp index 2625c0bb2..5d98d278e 100644 --- a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp +++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp @@ -203,7 +203,7 @@ std::optional Track(IR::Block* block, const IR::Value& value, return std::nullopt; } const IR::Inst* const inst{value.InstRecursive()}; - if (inst->Opcode() == IR::Opcode::GetCbuf) { + if (inst->Opcode() == IR::Opcode::GetCbufU32) { const IR::Value index{inst->Arg(0)}; const IR::Value offset{inst->Arg(1)}; if (!index.IsImmediate()) { diff --git a/src/shader_recompiler/ir_opt/texture_pass.cpp b/src/shader_recompiler/ir_opt/texture_pass.cpp index 80e4ad6a9..ec802e02c 100644 --- a/src/shader_recompiler/ir_opt/texture_pass.cpp +++ b/src/shader_recompiler/ir_opt/texture_pass.cpp @@ -78,7 +78,7 @@ std::optional Track(IR::Block* block, const IR::Value& value, return std::nullopt; } const IR::Inst* const inst{value.InstRecursive()}; - if (inst->Opcode() == IR::Opcode::GetCbuf) { + if (inst->Opcode() == IR::Opcode::GetCbufU32) { const IR::Value index{inst->Arg(0)}; const IR::Value offset{inst->Arg(1)}; if (!index.IsImmediate()) { -- cgit v1.2.3