diff options
Diffstat (limited to 'src/shader_recompiler')
9 files changed, 214 insertions, 62 deletions
diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index 2ffa8c453..7f16cb0dc 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -411,6 +411,7 @@ EmitContext::EmitContext(const Profile& profile_, IR::Program& program, u32& bin DefineTextures(program.info, binding); DefineImages(program.info, binding); DefineAttributeMemAccess(program.info); + DefineGlobalMemoryFunctions(program.info); DefineLabels(program); } @@ -762,6 +763,82 @@ void EmitContext::DefineAttributeMemAccess(const Info& info) { } } +void EmitContext::DefineGlobalMemoryFunctions(const Info& info) { + if (!info.uses_global_memory) { + return; + } + using DefPtr = Id StorageDefinitions::*; + const Id zero{u32_zero_value}; + const auto define_body{[&](DefPtr ssbo_member, Id addr, Id element_pointer, u32 shift, + auto&& callback) { + AddLabel(); + const size_t num_buffers{info.storage_buffers_descriptors.size()}; + for (size_t index = 0; index < num_buffers; ++index) { + const auto& ssbo{info.storage_buffers_descriptors[index]}; + const Id ssbo_addr_cbuf_offset{Const(ssbo.cbuf_offset / 8)}; + const Id ssbo_size_cbuf_offset{Const(ssbo.cbuf_offset / 4 + 2)}; + const Id ssbo_addr_pointer{OpAccessChain( + uniform_types.U32x2, cbufs[ssbo.cbuf_index].U32x2, zero, ssbo_addr_cbuf_offset)}; + const Id ssbo_size_pointer{OpAccessChain(uniform_types.U32, cbufs[ssbo.cbuf_index].U32, + zero, ssbo_size_cbuf_offset)}; + + const Id ssbo_addr{OpBitcast(U64, OpLoad(U32[2], ssbo_addr_pointer))}; + const Id ssbo_size{OpUConvert(U64, OpLoad(U32[1], ssbo_size_pointer))}; + const Id ssbo_end{OpIAdd(U64, ssbo_addr, ssbo_size)}; + const Id cond{OpLogicalAnd(U1, OpUGreaterThanEqual(U1, addr, ssbo_addr), + OpULessThan(U1, addr, ssbo_end))}; + const Id then_label{OpLabel()}; + const Id else_label{OpLabel()}; + OpSelectionMerge(else_label, spv::SelectionControlMask::MaskNone); + OpBranchConditional(cond, then_label, else_label); + AddLabel(then_label); + const Id ssbo_id{ssbos[index].*ssbo_member}; + const Id ssbo_offset{OpUConvert(U32[1], OpISub(U64, addr, ssbo_addr))}; + const Id ssbo_index{OpShiftRightLogical(U32[1], ssbo_offset, Const(shift))}; + const Id ssbo_pointer{OpAccessChain(element_pointer, ssbo_id, zero, ssbo_index)}; + callback(ssbo_pointer); + AddLabel(else_label); + } + }}; + const auto define_load{[&](DefPtr ssbo_member, Id element_pointer, Id type, u32 shift) { + const Id function_type{TypeFunction(type, U64)}; + const Id func_id{OpFunction(type, spv::FunctionControlMask::MaskNone, function_type)}; + const Id addr{OpFunctionParameter(U64)}; + define_body(ssbo_member, addr, element_pointer, shift, + [&](Id ssbo_pointer) { OpReturnValue(OpLoad(type, ssbo_pointer)); }); + OpReturnValue(ConstantNull(type)); + OpFunctionEnd(); + return func_id; + }}; + const auto define_write{[&](DefPtr ssbo_member, Id element_pointer, Id type, u32 shift) { + const Id function_type{TypeFunction(void_id, U64, type)}; + const Id func_id{OpFunction(void_id, spv::FunctionControlMask::MaskNone, function_type)}; + const Id addr{OpFunctionParameter(U64)}; + const Id data{OpFunctionParameter(type)}; + define_body(ssbo_member, addr, element_pointer, shift, [&](Id ssbo_pointer) { + OpStore(ssbo_pointer, data); + OpReturn(); + }); + OpReturn(); + OpFunctionEnd(); + return func_id; + }}; + const auto define{ + [&](DefPtr ssbo_member, const StorageTypeDefinition& type_def, Id type, size_t size) { + const Id element_type{type_def.element}; + const u32 shift{static_cast<u32>(std::countr_zero(size))}; + const Id load_func{define_load(ssbo_member, element_type, type, shift)}; + const Id write_func{define_write(ssbo_member, element_type, type, shift)}; + return std::make_pair(load_func, write_func); + }}; + std::tie(load_global_func_u32, write_global_func_u32) = + define(&StorageDefinitions::U32, storage_types.U32, U32[1], sizeof(u32)); + std::tie(load_global_func_u32x2, write_global_func_u32x2) = + define(&StorageDefinitions::U32x2, storage_types.U32x2, U32[2], sizeof(u32[2])); + std::tie(load_global_func_u32x4, write_global_func_u32x4) = + define(&StorageDefinitions::U32x4, storage_types.U32x4, U32[4], sizeof(u32[4])); +} + void EmitContext::DefineConstantBuffers(const Info& info, u32& binding) { if (info.constant_buffer_descriptors.empty()) { return; diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h index ef8507367..a4503c7ab 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.h +++ b/src/shader_recompiler/backend/spirv/emit_context.h @@ -224,6 +224,13 @@ public: Id f32x2_min_cas{}; Id f32x2_max_cas{}; + Id load_global_func_u32{}; + Id load_global_func_u32x2{}; + Id load_global_func_u32x4{}; + Id write_global_func_u32{}; + Id write_global_func_u32x2{}; + Id write_global_func_u32x4{}; + Id input_position{}; std::array<Id, 32> input_generics{}; @@ -255,6 +262,7 @@ private: void DefineTextures(const Info& info, u32& binding); void DefineImages(const Info& info, u32& binding); void DefineAttributeMemAccess(const Info& info); + void DefineGlobalMemoryFunctions(const Info& info); void DefineLabels(IR::Program& program); void DefineInputs(const Info& info); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index 67d06faa0..89a82e858 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -84,16 +84,16 @@ void EmitLoadGlobalU8(EmitContext& ctx); void EmitLoadGlobalS8(EmitContext& ctx); void EmitLoadGlobalU16(EmitContext& ctx); void EmitLoadGlobalS16(EmitContext& ctx); -void EmitLoadGlobal32(EmitContext& ctx); -void EmitLoadGlobal64(EmitContext& ctx); -void EmitLoadGlobal128(EmitContext& ctx); +Id EmitLoadGlobal32(EmitContext& ctx, Id address); +Id EmitLoadGlobal64(EmitContext& ctx, Id address); +Id EmitLoadGlobal128(EmitContext& ctx, Id address); void EmitWriteGlobalU8(EmitContext& ctx); void EmitWriteGlobalS8(EmitContext& ctx); void EmitWriteGlobalU16(EmitContext& ctx); void EmitWriteGlobalS16(EmitContext& ctx); -void EmitWriteGlobal32(EmitContext& ctx); -void EmitWriteGlobal64(EmitContext& ctx); -void EmitWriteGlobal128(EmitContext& ctx); +void EmitWriteGlobal32(EmitContext& ctx, Id address, Id value); +void EmitWriteGlobal64(EmitContext& ctx, Id address, Id value); +void EmitWriteGlobal128(EmitContext& ctx, Id address, Id value); Id EmitLoadStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); Id EmitLoadStorageS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); Id EmitLoadStorageU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); @@ -277,9 +277,9 @@ Id EmitFPIsNan16(EmitContext& ctx, Id value); Id EmitFPIsNan32(EmitContext& ctx, Id value); Id EmitFPIsNan64(EmitContext& ctx, Id value); Id EmitIAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); -void EmitIAdd64(EmitContext& ctx); +Id EmitIAdd64(EmitContext& ctx, Id a, Id b); Id EmitISub32(EmitContext& ctx, Id a, Id b); -void EmitISub64(EmitContext& ctx); +Id EmitISub64(EmitContext& ctx, Id a, Id b); Id EmitIMul32(EmitContext& ctx, Id a, Id b); Id EmitINeg32(EmitContext& ctx, Id value); Id EmitINeg64(EmitContext& ctx, Id value); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp index c12d0a513..cd5b1f42c 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp @@ -55,16 +55,16 @@ Id EmitIAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { return result; } -void EmitIAdd64(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitIAdd64(EmitContext& ctx, Id a, Id b) { + return ctx.OpIAdd(ctx.U64, a, b); } Id EmitISub32(EmitContext& ctx, Id a, Id b) { return ctx.OpISub(ctx.U32[1], a, b); } -void EmitISub64(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitISub64(EmitContext& ctx, Id a, Id b) { + return ctx.OpISub(ctx.U64, a, b); } Id EmitIMul32(EmitContext& ctx, Id a, Id b) { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp index 7bf828995..8849258e3 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp @@ -64,16 +64,16 @@ void EmitLoadGlobalS16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitLoadGlobal32(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitLoadGlobal32(EmitContext& ctx, Id address) { + return ctx.OpFunctionCall(ctx.U32[1], ctx.load_global_func_u32, address); } -void EmitLoadGlobal64(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitLoadGlobal64(EmitContext& ctx, Id address) { + return ctx.OpFunctionCall(ctx.U32[2], ctx.load_global_func_u32x2, address); } -void EmitLoadGlobal128(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitLoadGlobal128(EmitContext& ctx, Id address) { + return ctx.OpFunctionCall(ctx.U32[4], ctx.load_global_func_u32x4, address); } void EmitWriteGlobalU8(EmitContext&) { @@ -92,16 +92,16 @@ void EmitWriteGlobalS16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitWriteGlobal32(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +void EmitWriteGlobal32(EmitContext& ctx, Id address, Id value) { + ctx.OpFunctionCall(ctx.void_id, ctx.write_global_func_u32, address, value); } -void EmitWriteGlobal64(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +void EmitWriteGlobal64(EmitContext& ctx, Id address, Id value) { + ctx.OpFunctionCall(ctx.void_id, ctx.write_global_func_u32x2, address, value); } -void EmitWriteGlobal128(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +void EmitWriteGlobal128(EmitContext& ctx, Id address, Id value) { + ctx.OpFunctionCall(ctx.void_id, ctx.write_global_func_u32x4, address, value); } Id EmitLoadStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp index 20a1d61cc..14180dcd9 100644 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ b/src/shader_recompiler/frontend/maxwell/program.cpp @@ -60,6 +60,48 @@ void CollectInterpolationInfo(Environment& env, IR::Program& program) { }(); } } + +void AddNVNStorageBuffers(IR::Program& program) { + if (!program.info.uses_global_memory) { + return; + } + const u32 driver_cbuf{0}; + const u32 descriptor_size{0x10}; + const u32 num_buffers{16}; + const u32 base{[&] { + switch (program.stage) { + case Stage::VertexA: + case Stage::VertexB: + return 0x110u; + case Stage::TessellationControl: + return 0x210u; + case Stage::TessellationEval: + return 0x310u; + case Stage::Geometry: + return 0x410u; + case Stage::Fragment: + return 0x510u; + case Stage::Compute: + return 0x310u; + } + throw InvalidArgument("Invalid stage {}", program.stage); + }()}; + auto& descs{program.info.storage_buffers_descriptors}; + for (u32 index = 0; index < num_buffers; ++index) { + const u32 offset{base + index * descriptor_size}; + const auto it{std::ranges::find(descs, offset, &StorageBufferDescriptor::cbuf_offset)}; + if (it != descs.end()) { + continue; + } + // Assume these are written for now + descs.push_back({ + .cbuf_index = driver_cbuf, + .cbuf_offset = offset, + .count = 1, + .is_written = true, + }); + } +} } // Anonymous namespace IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool, @@ -105,6 +147,7 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo Optimization::VerificationPass(program); Optimization::CollectShaderInfoPass(env, program); CollectInterpolationInfo(env, program); + AddNVNStorageBuffers(program); return program; } diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index 0500a5141..cccf0909d 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -187,6 +187,8 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::FPUnordGreaterThanEqual16: case IR::Opcode::FPIsNan16: case IR::Opcode::GlobalAtomicAddF16x2: + case IR::Opcode::GlobalAtomicMinF16x2: + case IR::Opcode::GlobalAtomicMaxF16x2: case IR::Opcode::StorageAtomicAddF16x2: case IR::Opcode::StorageAtomicMinF16x2: case IR::Opcode::StorageAtomicMaxF16x2: @@ -373,7 +375,58 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::StorageAtomicAnd64: case IR::Opcode::StorageAtomicOr64: case IR::Opcode::StorageAtomicXor64: + case IR::Opcode::StorageAtomicExchange64: + info.uses_int64 = true; + break; + default: + break; + } + switch (inst.GetOpcode()) { + case IR::Opcode::LoadGlobalU8: + case IR::Opcode::LoadGlobalS8: + case IR::Opcode::LoadGlobalU16: + case IR::Opcode::LoadGlobalS16: + case IR::Opcode::LoadGlobal32: + case IR::Opcode::LoadGlobal64: + case IR::Opcode::LoadGlobal128: + case IR::Opcode::WriteGlobalU8: + case IR::Opcode::WriteGlobalS8: + case IR::Opcode::WriteGlobalU16: + case IR::Opcode::WriteGlobalS16: + case IR::Opcode::WriteGlobal32: + case IR::Opcode::WriteGlobal64: + case IR::Opcode::WriteGlobal128: + case IR::Opcode::GlobalAtomicIAdd32: + case IR::Opcode::GlobalAtomicSMin32: + case IR::Opcode::GlobalAtomicUMin32: + case IR::Opcode::GlobalAtomicSMax32: + case IR::Opcode::GlobalAtomicUMax32: + case IR::Opcode::GlobalAtomicInc32: + case IR::Opcode::GlobalAtomicDec32: + case IR::Opcode::GlobalAtomicAnd32: + case IR::Opcode::GlobalAtomicOr32: + case IR::Opcode::GlobalAtomicXor32: + case IR::Opcode::GlobalAtomicExchange32: + case IR::Opcode::GlobalAtomicIAdd64: + case IR::Opcode::GlobalAtomicSMin64: + case IR::Opcode::GlobalAtomicUMin64: + case IR::Opcode::GlobalAtomicSMax64: + case IR::Opcode::GlobalAtomicUMax64: + case IR::Opcode::GlobalAtomicAnd64: + case IR::Opcode::GlobalAtomicOr64: + case IR::Opcode::GlobalAtomicXor64: + case IR::Opcode::GlobalAtomicExchange64: + case IR::Opcode::GlobalAtomicAddF32: + case IR::Opcode::GlobalAtomicAddF16x2: + case IR::Opcode::GlobalAtomicAddF32x2: + case IR::Opcode::GlobalAtomicMinF16x2: + case IR::Opcode::GlobalAtomicMinF32x2: + case IR::Opcode::GlobalAtomicMaxF16x2: + case IR::Opcode::GlobalAtomicMaxF32x2: info.uses_int64 = true; + info.uses_global_memory = true; + info.used_constant_buffer_types |= IR::Type::U32 | IR::Type::U32x2; + info.used_storage_buffer_types |= IR::Type::U32 | IR::Type::U32x2 | IR::Type::U32x4; break; default: break; diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp index 378a3a915..f294d297f 100644 --- a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp +++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp @@ -11,6 +11,7 @@ #include <boost/container/flat_set.hpp> #include <boost/container/small_vector.hpp> +#include "common/alignment.h" #include "shader_recompiler/frontend/ir/basic_block.h" #include "shader_recompiler/frontend/ir/breadth_first_search.h" #include "shader_recompiler/frontend/ir/ir_emitter.h" @@ -244,39 +245,6 @@ bool MeetsBias(const StorageBufferAddr& storage_buffer, const Bias& bias) noexce storage_buffer.offset < bias.offset_end; } -/// Discards a global memory operation, reads return zero and writes are ignored -void DiscardGlobalMemory(IR::Block& block, IR::Inst& inst) { - IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; - const IR::Value zero{u32{0}}; - switch (inst.GetOpcode()) { - case IR::Opcode::LoadGlobalS8: - case IR::Opcode::LoadGlobalU8: - case IR::Opcode::LoadGlobalS16: - case IR::Opcode::LoadGlobalU16: - case IR::Opcode::LoadGlobal32: - inst.ReplaceUsesWith(zero); - break; - case IR::Opcode::LoadGlobal64: - inst.ReplaceUsesWith(IR::Value{ir.CompositeConstruct(zero, zero)}); - break; - case IR::Opcode::LoadGlobal128: - inst.ReplaceUsesWith(IR::Value{ir.CompositeConstruct(zero, zero, zero, zero)}); - break; - case IR::Opcode::WriteGlobalS8: - case IR::Opcode::WriteGlobalU8: - case IR::Opcode::WriteGlobalS16: - case IR::Opcode::WriteGlobalU16: - case IR::Opcode::WriteGlobal32: - case IR::Opcode::WriteGlobal64: - case IR::Opcode::WriteGlobal128: - inst.Invalidate(); - break; - default: - throw LogicError("Invalid opcode to discard its global memory operation {}", - inst.GetOpcode()); - } -} - struct LowAddrInfo { IR::U32 value; s32 imm_offset; @@ -350,6 +318,10 @@ std::optional<StorageBufferAddr> Track(const IR::Value& value, const Bias* bias) .index{index.U32()}, .offset{offset.U32()}, }; + if (!Common::IsAligned(storage_buffer.offset, 16)) { + // The SSBO pointer has to be aligned + return std::nullopt; + } if (bias && !MeetsBias(storage_buffer, *bias)) { // We have to blacklist some addresses in case we wrongly // point to them @@ -372,19 +344,17 @@ void CollectStorageBuffers(IR::Block& block, IR::Inst& inst, StorageInfo& info) // Track the low address of the instruction const std::optional<LowAddrInfo> low_addr_info{TrackLowAddress(&inst)}; if (!low_addr_info) { - DiscardGlobalMemory(block, inst); + // Failed to track the low address, use NVN fallbacks return; } // First try to find storage buffers in the NVN address const IR::U32 low_addr{low_addr_info->value}; - std::optional storage_buffer{Track(low_addr, &nvn_bias)}; + std::optional<StorageBufferAddr> storage_buffer{Track(low_addr, &nvn_bias)}; if (!storage_buffer) { // If it fails, track without a bias storage_buffer = Track(low_addr, nullptr); if (!storage_buffer) { - // If that also failed, drop the global memory usage - // LOG_ERROR - DiscardGlobalMemory(block, inst); + // If that also fails, use NVN fallbacks return; } } diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h index f808adeba..50b4d1c05 100644 --- a/src/shader_recompiler/shader_info.h +++ b/src/shader_recompiler/shader_info.h @@ -162,6 +162,7 @@ struct Info { bool uses_atomic_f32x2_min{}; bool uses_atomic_f32x2_max{}; bool uses_int64_bit_atomics{}; + bool uses_global_memory{}; IR::Type used_constant_buffer_types{}; IR::Type used_storage_buffer_types{}; |