diff options
Diffstat (limited to 'src/shader_recompiler')
21 files changed, 1397 insertions, 126 deletions
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h index b48007856..5efbe4e6f 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h +++ b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h @@ -372,6 +372,8 @@ void EmitSharedAtomicExchange32(EmitContext& ctx, IR::Inst& inst, ScalarU32 poin ScalarU32 value); void EmitSharedAtomicExchange64(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, Register value); +void EmitSharedAtomicExchange32x2(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, + Register value); void EmitStorageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset, ScalarU32 value); void EmitStorageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, @@ -412,6 +414,24 @@ void EmitStorageAtomicXor64(EmitContext& ctx, IR::Inst& inst, const IR::Value& b ScalarU32 offset, Register value); void EmitStorageAtomicExchange64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset, Register value); +void EmitStorageAtomicIAdd32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, Register value); +void EmitStorageAtomicSMin32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, Register value); +void EmitStorageAtomicUMin32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, Register value); +void EmitStorageAtomicSMax32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, Register value); +void EmitStorageAtomicUMax32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, Register value); +void EmitStorageAtomicAnd32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, Register value); +void EmitStorageAtomicOr32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, Register value); +void EmitStorageAtomicXor32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, Register value); +void EmitStorageAtomicExchange32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, Register value); void EmitStorageAtomicAddF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset, ScalarF32 value); void EmitStorageAtomicAddF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, @@ -448,6 +468,17 @@ void EmitGlobalAtomicAnd64(EmitContext& ctx); void EmitGlobalAtomicOr64(EmitContext& ctx); void EmitGlobalAtomicXor64(EmitContext& ctx); void EmitGlobalAtomicExchange64(EmitContext& ctx); +void EmitGlobalAtomicIAdd32x2(EmitContext& ctx); +void EmitGlobalAtomicSMin32x2(EmitContext& ctx); +void EmitGlobalAtomicUMin32x2(EmitContext& ctx); +void EmitGlobalAtomicSMax32x2(EmitContext& ctx); +void EmitGlobalAtomicUMax32x2(EmitContext& ctx); +void EmitGlobalAtomicInc32x2(EmitContext& ctx); +void EmitGlobalAtomicDec32x2(EmitContext& ctx); +void EmitGlobalAtomicAnd32x2(EmitContext& ctx); +void EmitGlobalAtomicOr32x2(EmitContext& ctx); +void EmitGlobalAtomicXor32x2(EmitContext& ctx); +void EmitGlobalAtomicExchange32x2(EmitContext& ctx); void EmitGlobalAtomicAddF32(EmitContext& ctx); void EmitGlobalAtomicAddF16x2(EmitContext& ctx); void EmitGlobalAtomicAddF32x2(EmitContext& ctx); diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_memory.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_memory.cpp index f135b67f5..f0fd94a28 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_memory.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_memory.cpp @@ -311,6 +311,13 @@ void EmitSharedAtomicExchange64(EmitContext& ctx, IR::Inst& inst, ScalarU32 poin ctx.LongAdd("ATOMS.EXCH.U64 {}.x,{},shared_mem[{}];", inst, value, pointer_offset); } +void EmitSharedAtomicExchange32x2([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] ScalarU32 pointer_offset, + [[maybe_unused]] Register value) { + throw NotImplementedException("GLASM instruction"); +} + void EmitStorageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset, ScalarU32 value) { Atom(ctx, inst, binding, offset, value, "ADD", "U32"); @@ -411,6 +418,62 @@ void EmitStorageAtomicExchange64(EmitContext& ctx, IR::Inst& inst, const IR::Val Atom(ctx, inst, binding, offset, value, "EXCH", "U64"); } +void EmitStorageAtomicIAdd32x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] const IR::Value& binding, + [[maybe_unused]] ScalarU32 offset, [[maybe_unused]] Register value) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitStorageAtomicSMin32x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] const IR::Value& binding, + [[maybe_unused]] ScalarU32 offset, [[maybe_unused]] Register value) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitStorageAtomicUMin32x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] const IR::Value& binding, + [[maybe_unused]] ScalarU32 offset, [[maybe_unused]] Register value) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitStorageAtomicSMax32x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] const IR::Value& binding, + [[maybe_unused]] ScalarU32 offset, [[maybe_unused]] Register value) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitStorageAtomicUMax32x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] const IR::Value& binding, + [[maybe_unused]] ScalarU32 offset, [[maybe_unused]] Register value) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitStorageAtomicAnd32x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] const IR::Value& binding, + [[maybe_unused]] ScalarU32 offset, [[maybe_unused]] Register value) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitStorageAtomicOr32x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] const IR::Value& binding, + [[maybe_unused]] ScalarU32 offset, [[maybe_unused]] Register value) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitStorageAtomicXor32x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] const IR::Value& binding, + [[maybe_unused]] ScalarU32 offset, [[maybe_unused]] Register value) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitStorageAtomicExchange32x2([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] const IR::Value& binding, + [[maybe_unused]] ScalarU32 offset, + [[maybe_unused]] Register value) { + throw NotImplementedException("GLASM instruction"); +} + void EmitStorageAtomicAddF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset, ScalarF32 value) { Atom(ctx, inst, binding, offset, value, "ADD", "F32"); @@ -537,6 +600,50 @@ void EmitGlobalAtomicExchange64(EmitContext&) { throw NotImplementedException("GLASM instruction"); } +void EmitGlobalAtomicIAdd32x2(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicSMin32x2(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicUMin32x2(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicSMax32x2(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicUMax32x2(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicInc32x2(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicDec32x2(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicAnd32x2(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicOr32x2(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicXor32x2(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicExchange32x2(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + void EmitGlobalAtomicAddF32(EmitContext&) { throw NotImplementedException("GLASM instruction"); } diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp index dc377b053..a409a7ab3 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp @@ -105,6 +105,13 @@ void EmitSharedAtomicExchange64(EmitContext& ctx, IR::Inst& inst, std::string_vi pointer_offset, value, pointer_offset, value); } +void EmitSharedAtomicExchange32x2(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, + std::string_view value) { + LOG_WARNING(Shader_GLSL, "Int64 atomics not supported, fallback to non-atomic"); + ctx.AddU32x2("{}=uvec2(smem[{}>>2],smem[({}+4)>>2]);", inst, pointer_offset, pointer_offset); + ctx.Add("smem[{}>>2]={}.x;smem[({}+4)>>2]={}.y;", pointer_offset, value, pointer_offset, value); +} + void EmitStorageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset, std::string_view value) { ctx.AddU32("{}=atomicAdd({}_ssbo{}[{}>>2],{});", inst, ctx.stage_name, binding.U32(), @@ -265,6 +272,97 @@ void EmitStorageAtomicExchange64(EmitContext& ctx, IR::Inst& inst, const IR::Val ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value); } +void EmitStorageAtomicIAdd32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value) { + LOG_WARNING(Shader_GLSL, "Int64 atomics not supported, fallback to non-atomic"); + ctx.AddU32x2("{}=uvec2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]);", inst, ctx.stage_name, + binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name, binding.U32(), + ctx.var_alloc.Consume(offset)); + ctx.Add("{}_ssbo{}[{}>>2]+={}.x;{}_ssbo{}[({}>>2)+1]+={}.y;", ctx.stage_name, binding.U32(), + ctx.var_alloc.Consume(offset), value, ctx.stage_name, binding.U32(), + ctx.var_alloc.Consume(offset), value); +} + +void EmitStorageAtomicSMin32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value) { + LOG_WARNING(Shader_GLSL, "Int64 atomics not supported, fallback to non-atomic"); + ctx.AddU32x2("{}=ivec2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]);", inst, ctx.stage_name, + binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name, binding.U32(), + ctx.var_alloc.Consume(offset)); + ctx.Add("for(int " + "i=0;i<2;++i){{{}_ssbo{}[({}>>2)+i]=uint(min(int({}_ssbo{}[({}>>2)+i]),int({}[i])));}}", + ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name, + binding.U32(), ctx.var_alloc.Consume(offset), value); +} + +void EmitStorageAtomicUMin32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value) { + LOG_WARNING(Shader_GLSL, "Int64 atomics not supported, fallback to non-atomic"); + ctx.AddU32x2("{}=uvec2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]);", inst, ctx.stage_name, + binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name, binding.U32(), + ctx.var_alloc.Consume(offset)); + ctx.Add("for(int i=0;i<2;++i){{ " + "{}_ssbo{}[({}>>2)+i]=min({}_ssbo{}[({}>>2)+i],{}[i]);}}", + ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name, + binding.U32(), ctx.var_alloc.Consume(offset), value); +} + +void EmitStorageAtomicSMax32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value) { + LOG_WARNING(Shader_GLSL, "Int64 atomics not supported, fallback to non-atomic"); + ctx.AddU32x2("{}=ivec2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]);", inst, ctx.stage_name, + binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name, binding.U32(), + ctx.var_alloc.Consume(offset)); + ctx.Add("for(int " + "i=0;i<2;++i){{{}_ssbo{}[({}>>2)+i]=uint(max(int({}_ssbo{}[({}>>2)+i]),int({}[i])));}}", + ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name, + binding.U32(), ctx.var_alloc.Consume(offset), value); +} + +void EmitStorageAtomicUMax32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value) { + LOG_WARNING(Shader_GLSL, "Int64 atomics not supported, fallback to non-atomic"); + ctx.AddU32x2("{}=uvec2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]);", inst, ctx.stage_name, + binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name, binding.U32(), + ctx.var_alloc.Consume(offset)); + ctx.Add("for(int i=0;i<2;++i){{{}_ssbo{}[({}>>2)+i]=max({}_ssbo{}[({}>>2)+i],{}[i]);}}", + ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name, + binding.U32(), ctx.var_alloc.Consume(offset), value); +} + +void EmitStorageAtomicAnd32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value) { + LOG_WARNING(Shader_GLSL, "Int64 atomics not supported, fallback to 32x2"); + ctx.AddU32x2("{}=uvec2(atomicAnd({}_ssbo{}[{}>>2],{}.x),atomicAnd({}_ssbo{}[({}>>2)+1],{}.y));", + inst, ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value, + ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value); +} + +void EmitStorageAtomicOr32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value) { + LOG_WARNING(Shader_GLSL, "Int64 atomics not supported, fallback to 32x2"); + ctx.AddU32x2("{}=uvec2(atomicOr({}_ssbo{}[{}>>2],{}.x),atomicOr({}_ssbo{}[({}>>2)+1],{}.y));", + inst, ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value, + ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value); +} + +void EmitStorageAtomicXor32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value) { + LOG_WARNING(Shader_GLSL, "Int64 atomics not supported, fallback to 32x2"); + ctx.AddU32x2("{}=uvec2(atomicXor({}_ssbo{}[{}>>2],{}.x),atomicXor({}_ssbo{}[({}>>2)+1],{}.y));", + inst, ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value, + ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value); +} + +void EmitStorageAtomicExchange32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value) { + LOG_WARNING(Shader_GLSL, "Int64 atomics not supported, fallback to 32x2"); + ctx.AddU32x2("{}=uvec2(atomicExchange({}_ssbo{}[{}>>2],{}.x),atomicExchange({}_ssbo{}[({}>>2)+" + "1],{}.y));", + inst, ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value, + ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value); +} + void EmitStorageAtomicAddF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset, std::string_view value) { SsboCasFunctionF32(ctx, inst, binding, offset, value, "CasFloatAdd"); @@ -388,6 +486,50 @@ void EmitGlobalAtomicExchange64(EmitContext&) { throw NotImplementedException("GLSL Instrucion"); } +void EmitGlobalAtomicIAdd32x2(EmitContext&) { + throw NotImplementedException("GLSL Instrucion"); +} + +void EmitGlobalAtomicSMin32x2(EmitContext&) { + throw NotImplementedException("GLSL Instrucion"); +} + +void EmitGlobalAtomicUMin32x2(EmitContext&) { + throw NotImplementedException("GLSL Instrucion"); +} + +void EmitGlobalAtomicSMax32x2(EmitContext&) { + throw NotImplementedException("GLSL Instrucion"); +} + +void EmitGlobalAtomicUMax32x2(EmitContext&) { + throw NotImplementedException("GLSL Instrucion"); +} + +void EmitGlobalAtomicInc32x2(EmitContext&) { + throw NotImplementedException("GLSL Instrucion"); +} + +void EmitGlobalAtomicDec32x2(EmitContext&) { + throw NotImplementedException("GLSL Instrucion"); +} + +void EmitGlobalAtomicAnd32x2(EmitContext&) { + throw NotImplementedException("GLSL Instrucion"); +} + +void EmitGlobalAtomicOr32x2(EmitContext&) { + throw NotImplementedException("GLSL Instrucion"); +} + +void EmitGlobalAtomicXor32x2(EmitContext&) { + throw NotImplementedException("GLSL Instrucion"); +} + +void EmitGlobalAtomicExchange32x2(EmitContext&) { + throw NotImplementedException("GLSL Instrucion"); +} + void EmitGlobalAtomicAddF32(EmitContext&) { throw NotImplementedException("GLSL Instrucion"); } diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h index 6cabbc717..704baddc9 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h +++ b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h @@ -442,6 +442,8 @@ void EmitSharedAtomicExchange32(EmitContext& ctx, IR::Inst& inst, std::string_vi std::string_view value); void EmitSharedAtomicExchange64(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, std::string_view value); +void EmitSharedAtomicExchange32x2(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, + std::string_view value); void EmitStorageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset, std::string_view value); void EmitStorageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, @@ -482,6 +484,24 @@ void EmitStorageAtomicXor64(EmitContext& ctx, IR::Inst& inst, const IR::Value& b const IR::Value& offset, std::string_view value); void EmitStorageAtomicExchange64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset, std::string_view value); +void EmitStorageAtomicIAdd32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value); +void EmitStorageAtomicSMin32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value); +void EmitStorageAtomicUMin32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value); +void EmitStorageAtomicSMax32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value); +void EmitStorageAtomicUMax32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value); +void EmitStorageAtomicAnd32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value); +void EmitStorageAtomicOr32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value); +void EmitStorageAtomicXor32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value); +void EmitStorageAtomicExchange32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value); void EmitStorageAtomicAddF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset, std::string_view value); void EmitStorageAtomicAddF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, @@ -518,6 +538,17 @@ void EmitGlobalAtomicAnd64(EmitContext& ctx); void EmitGlobalAtomicOr64(EmitContext& ctx); void EmitGlobalAtomicXor64(EmitContext& ctx); void EmitGlobalAtomicExchange64(EmitContext& ctx); +void EmitGlobalAtomicIAdd32x2(EmitContext& ctx); +void EmitGlobalAtomicSMin32x2(EmitContext& ctx); +void EmitGlobalAtomicUMin32x2(EmitContext& ctx); +void EmitGlobalAtomicSMax32x2(EmitContext& ctx); +void EmitGlobalAtomicUMax32x2(EmitContext& ctx); +void EmitGlobalAtomicInc32x2(EmitContext& ctx); +void EmitGlobalAtomicDec32x2(EmitContext& ctx); +void EmitGlobalAtomicAnd32x2(EmitContext& ctx); +void EmitGlobalAtomicOr32x2(EmitContext& ctx); +void EmitGlobalAtomicXor32x2(EmitContext& ctx); +void EmitGlobalAtomicExchange32x2(EmitContext& ctx); void EmitGlobalAtomicAddF32(EmitContext& ctx); void EmitGlobalAtomicAddF16x2(EmitContext& ctx); void EmitGlobalAtomicAddF32x2(EmitContext& ctx); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index b412957c7..2b360e073 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -22,7 +22,7 @@ constexpr u32 NUM_TEXTURE_AND_IMAGE_SCALING_WORDS = struct RescalingLayout { alignas(16) std::array<u32, NUM_TEXTURE_SCALING_WORDS> rescaling_textures; alignas(16) std::array<u32, NUM_IMAGE_SCALING_WORDS> rescaling_images; - alignas(16) u32 down_factor; + u32 down_factor; }; constexpr u32 RESCALING_LAYOUT_WORDS_OFFSET = offsetof(RescalingLayout, rescaling_textures); constexpr u32 RESCALING_LAYOUT_DOWN_FACTOR_OFFSET = offsetof(RescalingLayout, down_factor); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp index 46ba52a25..d3cbb14a9 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp @@ -82,6 +82,17 @@ Id StorageAtomicU64(EmitContext& ctx, const IR::Value& binding, const IR::Value& ctx.OpStore(pointer, ctx.OpBitcast(ctx.U32[2], result)); return original_value; } + +Id StorageAtomicU32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value, + Id (Sirit::Module::*non_atomic_func)(Id, Id, Id)) { + LOG_WARNING(Shader_SPIRV, "Int64 atomics not supported, fallback to non-atomic"); + const Id pointer{StoragePointer(ctx, ctx.storage_types.U32x2, &StorageDefinitions::U32x2, + binding, offset, sizeof(u32[2]))}; + const Id original_value{ctx.OpLoad(ctx.U32[2], pointer)}; + const Id result{(ctx.*non_atomic_func)(ctx.U32[2], value, original_value)}; + ctx.OpStore(pointer, result); + return original_value; +} } // Anonymous namespace Id EmitSharedAtomicIAdd32(EmitContext& ctx, Id offset, Id value) { @@ -141,7 +152,7 @@ Id EmitSharedAtomicExchange64(EmitContext& ctx, Id offset, Id value) { const auto [scope, semantics]{AtomicArgs(ctx)}; return ctx.OpAtomicExchange(ctx.U64, pointer, scope, semantics, value); } - LOG_ERROR(Shader_SPIRV, "Int64 atomics not supported, fallback to non-atomic"); + LOG_WARNING(Shader_SPIRV, "Int64 atomics not supported, fallback to non-atomic"); const Id pointer_1{SharedPointer(ctx, offset, 0)}; const Id pointer_2{SharedPointer(ctx, offset, 1)}; const Id value_1{ctx.OpLoad(ctx.U32[1], pointer_1)}; @@ -152,6 +163,18 @@ Id EmitSharedAtomicExchange64(EmitContext& ctx, Id offset, Id value) { return ctx.OpBitcast(ctx.U64, ctx.OpCompositeConstruct(ctx.U32[2], value_1, value_2)); } +Id EmitSharedAtomicExchange32x2(EmitContext& ctx, Id offset, Id value) { + LOG_WARNING(Shader_SPIRV, "Int64 atomics not supported, fallback to non-atomic"); + const Id pointer_1{SharedPointer(ctx, offset, 0)}; + const Id pointer_2{SharedPointer(ctx, offset, 1)}; + const Id value_1{ctx.OpLoad(ctx.U32[1], pointer_1)}; + const Id value_2{ctx.OpLoad(ctx.U32[1], pointer_2)}; + const Id new_vector{ctx.OpBitcast(ctx.U32[2], value)}; + ctx.OpStore(pointer_1, ctx.OpCompositeExtract(ctx.U32[1], new_vector, 0U)); + ctx.OpStore(pointer_2, ctx.OpCompositeExtract(ctx.U32[1], new_vector, 1U)); + return ctx.OpCompositeConstruct(ctx.U32[2], value_1, value_2); +} + Id EmitStorageAtomicIAdd32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value) { return StorageAtomicU32(ctx, binding, offset, value, &Sirit::Module::OpAtomicIAdd); @@ -275,6 +298,56 @@ Id EmitStorageAtomicExchange64(EmitContext& ctx, const IR::Value& binding, const return original; } +Id EmitStorageAtomicIAdd32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + return StorageAtomicU32x2(ctx, binding, offset, value, &Sirit::Module::OpIAdd); +} + +Id EmitStorageAtomicSMin32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + return StorageAtomicU32x2(ctx, binding, offset, value, &Sirit::Module::OpSMin); +} + +Id EmitStorageAtomicUMin32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + return StorageAtomicU32x2(ctx, binding, offset, value, &Sirit::Module::OpUMin); +} + +Id EmitStorageAtomicSMax32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + return StorageAtomicU32x2(ctx, binding, offset, value, &Sirit::Module::OpSMax); +} + +Id EmitStorageAtomicUMax32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + return StorageAtomicU32x2(ctx, binding, offset, value, &Sirit::Module::OpUMax); +} + +Id EmitStorageAtomicAnd32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + return StorageAtomicU32x2(ctx, binding, offset, value, &Sirit::Module::OpBitwiseAnd); +} + +Id EmitStorageAtomicOr32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + return StorageAtomicU32x2(ctx, binding, offset, value, &Sirit::Module::OpBitwiseOr); +} + +Id EmitStorageAtomicXor32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + return StorageAtomicU32x2(ctx, binding, offset, value, &Sirit::Module::OpBitwiseXor); +} + +Id EmitStorageAtomicExchange32x2(EmitContext& ctx, const IR::Value& binding, + const IR::Value& offset, Id value) { + LOG_WARNING(Shader_SPIRV, "Int64 atomics not supported, fallback to non-atomic"); + const Id pointer{StoragePointer(ctx, ctx.storage_types.U32x2, &StorageDefinitions::U32x2, + binding, offset, sizeof(u32[2]))}; + const Id original{ctx.OpLoad(ctx.U32[2], pointer)}; + ctx.OpStore(pointer, value); + return original; +} + Id EmitStorageAtomicAddF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value) { const Id ssbo{ctx.ssbos[binding.U32()].U32}; @@ -418,6 +491,50 @@ Id EmitGlobalAtomicExchange64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } +Id EmitGlobalAtomicIAdd32x2(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicSMin32x2(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicUMin32x2(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicSMax32x2(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicUMax32x2(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicInc32x2(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicDec32x2(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicAnd32x2(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicOr32x2(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicXor32x2(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicExchange32x2(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + Id EmitGlobalAtomicAddF32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index 8ea730c80..80b4bbd27 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -123,34 +123,36 @@ std::optional<OutAttr> OutputAttrPointer(EmitContext& ctx, IR::Attribute attr) { } Id GetCbuf(EmitContext& ctx, Id result_type, Id UniformDefinitions::*member_ptr, u32 element_size, - const IR::Value& binding, const IR::Value& offset) { + const IR::Value& binding, const IR::Value& offset, const Id indirect_func) { + Id buffer_offset; + const Id uniform_type{ctx.uniform_types.*member_ptr}; + if (offset.IsImmediate()) { + // Hardware been proved to read the aligned offset (e.g. LDC.U32 at 6 will read offset 4) + const Id imm_offset{ctx.Const(offset.U32() / element_size)}; + buffer_offset = imm_offset; + } else if (element_size > 1) { + const u32 log2_element_size{static_cast<u32>(std::countr_zero(element_size))}; + const Id shift{ctx.Const(log2_element_size)}; + buffer_offset = ctx.OpShiftRightArithmetic(ctx.U32[1], ctx.Def(offset), shift); + } else { + buffer_offset = ctx.Def(offset); + } if (!binding.IsImmediate()) { - throw NotImplementedException("Constant buffer indexing"); + return ctx.OpFunctionCall(result_type, indirect_func, ctx.Def(binding), buffer_offset); } const Id cbuf{ctx.cbufs[binding.U32()].*member_ptr}; - const Id uniform_type{ctx.uniform_types.*member_ptr}; - if (!offset.IsImmediate()) { - Id index{ctx.Def(offset)}; - if (element_size > 1) { - const u32 log2_element_size{static_cast<u32>(std::countr_zero(element_size))}; - const Id shift{ctx.Const(log2_element_size)}; - index = ctx.OpShiftRightArithmetic(ctx.U32[1], ctx.Def(offset), shift); - } - const Id access_chain{ctx.OpAccessChain(uniform_type, cbuf, ctx.u32_zero_value, index)}; - return ctx.OpLoad(result_type, access_chain); - } - // Hardware been proved to read the aligned offset (e.g. LDC.U32 at 6 will read offset 4) - const Id imm_offset{ctx.Const(offset.U32() / element_size)}; - const Id access_chain{ctx.OpAccessChain(uniform_type, cbuf, ctx.u32_zero_value, imm_offset)}; + const Id access_chain{ctx.OpAccessChain(uniform_type, cbuf, ctx.u32_zero_value, buffer_offset)}; return ctx.OpLoad(result_type, access_chain); } Id GetCbufU32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { - return GetCbuf(ctx, ctx.U32[1], &UniformDefinitions::U32, sizeof(u32), binding, offset); + return GetCbuf(ctx, ctx.U32[1], &UniformDefinitions::U32, sizeof(u32), binding, offset, + ctx.load_const_func_u32); } Id GetCbufU32x4(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { - return GetCbuf(ctx, ctx.U32[4], &UniformDefinitions::U32x4, sizeof(u32[4]), binding, offset); + return GetCbuf(ctx, ctx.U32[4], &UniformDefinitions::U32x4, sizeof(u32[4]), binding, offset, + ctx.load_const_func_u32x4); } Id GetCbufElement(EmitContext& ctx, Id vector, const IR::Value& offset, u32 index_offset) { @@ -201,7 +203,8 @@ void EmitGetIndirectBranchVariable(EmitContext&) { Id EmitGetCbufU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { if (ctx.profile.support_descriptor_aliasing && ctx.profile.support_int8) { - const Id load{GetCbuf(ctx, ctx.U8, &UniformDefinitions::U8, sizeof(u8), binding, offset)}; + const Id load{GetCbuf(ctx, ctx.U8, &UniformDefinitions::U8, sizeof(u8), binding, offset, + ctx.load_const_func_u8)}; return ctx.OpUConvert(ctx.U32[1], load); } Id element{}; @@ -217,7 +220,8 @@ Id EmitGetCbufU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& of Id EmitGetCbufS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { if (ctx.profile.support_descriptor_aliasing && ctx.profile.support_int8) { - const Id load{GetCbuf(ctx, ctx.S8, &UniformDefinitions::S8, sizeof(s8), binding, offset)}; + const Id load{GetCbuf(ctx, ctx.S8, &UniformDefinitions::S8, sizeof(s8), binding, offset, + ctx.load_const_func_u8)}; return ctx.OpSConvert(ctx.U32[1], load); } Id element{}; @@ -233,8 +237,8 @@ Id EmitGetCbufS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& of Id EmitGetCbufU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { if (ctx.profile.support_descriptor_aliasing && ctx.profile.support_int16) { - const Id load{ - GetCbuf(ctx, ctx.U16, &UniformDefinitions::U16, sizeof(u16), binding, offset)}; + const Id load{GetCbuf(ctx, ctx.U16, &UniformDefinitions::U16, sizeof(u16), binding, offset, + ctx.load_const_func_u16)}; return ctx.OpUConvert(ctx.U32[1], load); } Id element{}; @@ -250,8 +254,8 @@ Id EmitGetCbufU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& o Id EmitGetCbufS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { if (ctx.profile.support_descriptor_aliasing && ctx.profile.support_int16) { - const Id load{ - GetCbuf(ctx, ctx.S16, &UniformDefinitions::S16, sizeof(s16), binding, offset)}; + const Id load{GetCbuf(ctx, ctx.S16, &UniformDefinitions::S16, sizeof(s16), binding, offset, + ctx.load_const_func_u16)}; return ctx.OpSConvert(ctx.U32[1], load); } Id element{}; @@ -276,7 +280,8 @@ Id EmitGetCbufU32(EmitContext& ctx, const IR::Value& binding, const IR::Value& o Id EmitGetCbufF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { if (ctx.profile.support_descriptor_aliasing) { - return GetCbuf(ctx, ctx.F32[1], &UniformDefinitions::F32, sizeof(f32), binding, offset); + return GetCbuf(ctx, ctx.F32[1], &UniformDefinitions::F32, sizeof(f32), binding, offset, + ctx.load_const_func_f32); } else { const Id vector{GetCbufU32x4(ctx, binding, offset)}; return ctx.OpBitcast(ctx.F32[1], GetCbufElement(ctx, vector, offset, 0u)); @@ -285,8 +290,8 @@ Id EmitGetCbufF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& o Id EmitGetCbufU32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { if (ctx.profile.support_descriptor_aliasing) { - return GetCbuf(ctx, ctx.U32[2], &UniformDefinitions::U32x2, sizeof(u32[2]), binding, - offset); + return GetCbuf(ctx, ctx.U32[2], &UniformDefinitions::U32x2, sizeof(u32[2]), binding, offset, + ctx.load_const_func_u32x2); } else { const Id vector{GetCbufU32x4(ctx, binding, offset)}; return ctx.OpCompositeConstruct(ctx.U32[2], GetCbufElement(ctx, vector, offset, 0u), diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h index 887112deb..f263b41b0 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h @@ -335,6 +335,7 @@ Id EmitSharedAtomicOr32(EmitContext& ctx, Id pointer_offset, Id value); Id EmitSharedAtomicXor32(EmitContext& ctx, Id pointer_offset, Id value); Id EmitSharedAtomicExchange32(EmitContext& ctx, Id pointer_offset, Id value); Id EmitSharedAtomicExchange64(EmitContext& ctx, Id pointer_offset, Id value); +Id EmitSharedAtomicExchange32x2(EmitContext& ctx, Id pointer_offset, Id value); Id EmitStorageAtomicIAdd32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value); Id EmitStorageAtomicSMin32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, @@ -375,6 +376,24 @@ Id EmitStorageAtomicXor64(EmitContext& ctx, const IR::Value& binding, const IR:: Id value); Id EmitStorageAtomicExchange64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value); +Id EmitStorageAtomicIAdd32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicSMin32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicUMin32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicSMax32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicUMax32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicAnd32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicOr32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicXor32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicExchange32x2(EmitContext& ctx, const IR::Value& binding, + const IR::Value& offset, Id value); Id EmitStorageAtomicAddF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value); Id EmitStorageAtomicAddF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, @@ -411,6 +430,17 @@ Id EmitGlobalAtomicAnd64(EmitContext& ctx); Id EmitGlobalAtomicOr64(EmitContext& ctx); Id EmitGlobalAtomicXor64(EmitContext& ctx); Id EmitGlobalAtomicExchange64(EmitContext& ctx); +Id EmitGlobalAtomicIAdd32x2(EmitContext& ctx); +Id EmitGlobalAtomicSMin32x2(EmitContext& ctx); +Id EmitGlobalAtomicUMin32x2(EmitContext& ctx); +Id EmitGlobalAtomicSMax32x2(EmitContext& ctx); +Id EmitGlobalAtomicUMax32x2(EmitContext& ctx); +Id EmitGlobalAtomicInc32x2(EmitContext& ctx); +Id EmitGlobalAtomicDec32x2(EmitContext& ctx); +Id EmitGlobalAtomicAnd32x2(EmitContext& ctx); +Id EmitGlobalAtomicOr32x2(EmitContext& ctx); +Id EmitGlobalAtomicXor32x2(EmitContext& ctx); +Id EmitGlobalAtomicExchange32x2(EmitContext& ctx); Id EmitGlobalAtomicAddF32(EmitContext& ctx); Id EmitGlobalAtomicAddF16x2(EmitContext& ctx); Id EmitGlobalAtomicAddF32x2(EmitContext& ctx); diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index cd90c084a..aa5b6c9b7 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp @@ -464,6 +464,7 @@ EmitContext::EmitContext(const Profile& profile_, const RuntimeInfo& runtime_inf DefineSharedMemory(program); DefineSharedMemoryFunctions(program); DefineConstantBuffers(program.info, uniform_binding); + DefineConstantBufferIndirectFunctions(program.info); DefineStorageBuffers(program.info, storage_binding); DefineTextureBuffers(program.info, texture_binding); DefineImageBuffers(program.info, image_binding); @@ -993,7 +994,7 @@ void EmitContext::DefineConstantBuffers(const Info& info, u32& binding) { } return; } - IR::Type types{info.used_constant_buffer_types}; + IR::Type types{info.used_constant_buffer_types | info.used_indirect_cbuf_types}; if (True(types & IR::Type::U8)) { if (profile.support_int8) { DefineConstBuffers(*this, info, &UniformDefinitions::U8, binding, U8, 'u', sizeof(u8)); @@ -1027,6 +1028,62 @@ void EmitContext::DefineConstantBuffers(const Info& info, u32& binding) { binding += static_cast<u32>(info.constant_buffer_descriptors.size()); } +void EmitContext::DefineConstantBufferIndirectFunctions(const Info& info) { + if (!info.uses_cbuf_indirect) { + return; + } + const auto make_accessor{[&](Id buffer_type, Id UniformDefinitions::*member_ptr) { + const Id func_type{TypeFunction(buffer_type, U32[1], U32[1])}; + const Id func{OpFunction(buffer_type, spv::FunctionControlMask::MaskNone, func_type)}; + const Id binding{OpFunctionParameter(U32[1])}; + const Id offset{OpFunctionParameter(U32[1])}; + + AddLabel(); + + const Id merge_label{OpLabel()}; + const Id uniform_type{uniform_types.*member_ptr}; + + std::array<Id, Info::MAX_CBUFS> buf_labels; + std::array<Sirit::Literal, Info::MAX_CBUFS> buf_literals; + for (u32 i = 0; i < Info::MAX_CBUFS; i++) { + buf_labels[i] = OpLabel(); + buf_literals[i] = Sirit::Literal{i}; + } + OpSelectionMerge(merge_label, spv::SelectionControlMask::MaskNone); + OpSwitch(binding, buf_labels[0], buf_literals, buf_labels); + for (u32 i = 0; i < Info::MAX_CBUFS; i++) { + AddLabel(buf_labels[i]); + const Id cbuf{cbufs[i].*member_ptr}; + const Id access_chain{OpAccessChain(uniform_type, cbuf, u32_zero_value, offset)}; + const Id result{OpLoad(buffer_type, access_chain)}; + OpReturnValue(result); + } + AddLabel(merge_label); + OpUnreachable(); + OpFunctionEnd(); + return func; + }}; + IR::Type types{info.used_indirect_cbuf_types}; + if (True(types & IR::Type::U8)) { + load_const_func_u8 = make_accessor(U8, &UniformDefinitions::U8); + } + if (True(types & IR::Type::U16)) { + load_const_func_u16 = make_accessor(U16, &UniformDefinitions::U16); + } + if (True(types & IR::Type::F32)) { + load_const_func_f32 = make_accessor(F32[1], &UniformDefinitions::F32); + } + if (True(types & IR::Type::U32)) { + load_const_func_u32 = make_accessor(U32[1], &UniformDefinitions::U32); + } + if (True(types & IR::Type::U32x2)) { + load_const_func_u32x2 = make_accessor(U32[2], &UniformDefinitions::U32x2); + } + if (True(types & IR::Type::U32x4)) { + load_const_func_u32x4 = make_accessor(U32[4], &UniformDefinitions::U32x4); + } +} + void EmitContext::DefineStorageBuffers(const Info& info, u32& binding) { if (info.storage_buffers_descriptors.empty()) { return; diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.h b/src/shader_recompiler/backend/spirv/spirv_emit_context.h index f87138f7e..906a1dc2c 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.h +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.h @@ -294,6 +294,13 @@ public: std::vector<Id> interfaces; + Id load_const_func_u8{}; + Id load_const_func_u16{}; + Id load_const_func_u32{}; + Id load_const_func_f32{}; + Id load_const_func_u32x2{}; + Id load_const_func_u32x4{}; + private: void DefineCommonTypes(const Info& info); void DefineCommonConstants(); @@ -302,6 +309,7 @@ private: void DefineSharedMemory(const IR::Program& program); void DefineSharedMemoryFunctions(const IR::Program& program); void DefineConstantBuffers(const Info& info, u32& binding); + void DefineConstantBufferIndirectFunctions(const Info& info); void DefineStorageBuffers(const Info& info, u32& binding); void DefineTextureBuffers(const Info& info, u32& binding); void DefineImageBuffers(const Info& info, u32& binding); diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp index 97e2bf6af..631446cf7 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.cpp +++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp @@ -118,6 +118,7 @@ bool Inst::MayHaveSideEffects() const noexcept { case Opcode::SharedAtomicXor32: case Opcode::SharedAtomicExchange32: case Opcode::SharedAtomicExchange64: + case Opcode::SharedAtomicExchange32x2: case Opcode::GlobalAtomicIAdd32: case Opcode::GlobalAtomicSMin32: case Opcode::GlobalAtomicUMin32: @@ -138,6 +139,15 @@ bool Inst::MayHaveSideEffects() const noexcept { case Opcode::GlobalAtomicOr64: case Opcode::GlobalAtomicXor64: case Opcode::GlobalAtomicExchange64: + case Opcode::GlobalAtomicIAdd32x2: + case Opcode::GlobalAtomicSMin32x2: + case Opcode::GlobalAtomicUMin32x2: + case Opcode::GlobalAtomicSMax32x2: + case Opcode::GlobalAtomicUMax32x2: + case Opcode::GlobalAtomicAnd32x2: + case Opcode::GlobalAtomicOr32x2: + case Opcode::GlobalAtomicXor32x2: + case Opcode::GlobalAtomicExchange32x2: case Opcode::GlobalAtomicAddF32: case Opcode::GlobalAtomicAddF16x2: case Opcode::GlobalAtomicAddF32x2: @@ -165,6 +175,15 @@ bool Inst::MayHaveSideEffects() const noexcept { case Opcode::StorageAtomicOr64: case Opcode::StorageAtomicXor64: case Opcode::StorageAtomicExchange64: + case Opcode::StorageAtomicIAdd32x2: + case Opcode::StorageAtomicSMin32x2: + case Opcode::StorageAtomicUMin32x2: + case Opcode::StorageAtomicSMax32x2: + case Opcode::StorageAtomicUMax32x2: + case Opcode::StorageAtomicAnd32x2: + case Opcode::StorageAtomicOr32x2: + case Opcode::StorageAtomicXor32x2: + case Opcode::StorageAtomicExchange32x2: case Opcode::StorageAtomicAddF32: case Opcode::StorageAtomicAddF16x2: case Opcode::StorageAtomicAddF32x2: diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index b94ce7406..efb6bfac3 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -341,6 +341,7 @@ OPCODE(SharedAtomicOr32, U32, U32, OPCODE(SharedAtomicXor32, U32, U32, U32, ) OPCODE(SharedAtomicExchange32, U32, U32, U32, ) OPCODE(SharedAtomicExchange64, U64, U32, U64, ) +OPCODE(SharedAtomicExchange32x2, U32x2, U32, U32x2, ) OPCODE(GlobalAtomicIAdd32, U32, U64, U32, ) OPCODE(GlobalAtomicSMin32, U32, U64, U32, ) @@ -362,6 +363,15 @@ OPCODE(GlobalAtomicAnd64, U64, U64, OPCODE(GlobalAtomicOr64, U64, U64, U64, ) OPCODE(GlobalAtomicXor64, U64, U64, U64, ) OPCODE(GlobalAtomicExchange64, U64, U64, U64, ) +OPCODE(GlobalAtomicIAdd32x2, U32x2, U32x2, U32x2, ) +OPCODE(GlobalAtomicSMin32x2, U32x2, U32x2, U32x2, ) +OPCODE(GlobalAtomicUMin32x2, U32x2, U32x2, U32x2, ) +OPCODE(GlobalAtomicSMax32x2, U32x2, U32x2, U32x2, ) +OPCODE(GlobalAtomicUMax32x2, U32x2, U32x2, U32x2, ) +OPCODE(GlobalAtomicAnd32x2, U32x2, U32x2, U32x2, ) +OPCODE(GlobalAtomicOr32x2, U32x2, U32x2, U32x2, ) +OPCODE(GlobalAtomicXor32x2, U32x2, U32x2, U32x2, ) +OPCODE(GlobalAtomicExchange32x2, U32x2, U32x2, U32x2, ) OPCODE(GlobalAtomicAddF32, F32, U64, F32, ) OPCODE(GlobalAtomicAddF16x2, U32, U64, F16x2, ) OPCODE(GlobalAtomicAddF32x2, U32, U64, F32x2, ) @@ -390,6 +400,15 @@ OPCODE(StorageAtomicAnd64, U64, U32, OPCODE(StorageAtomicOr64, U64, U32, U32, U64, ) OPCODE(StorageAtomicXor64, U64, U32, U32, U64, ) OPCODE(StorageAtomicExchange64, U64, U32, U32, U64, ) +OPCODE(StorageAtomicIAdd32x2, U32x2, U32, U32, U32x2, ) +OPCODE(StorageAtomicSMin32x2, U32x2, U32, U32, U32x2, ) +OPCODE(StorageAtomicUMin32x2, U32x2, U32, U32, U32x2, ) +OPCODE(StorageAtomicSMax32x2, U32x2, U32, U32, U32x2, ) +OPCODE(StorageAtomicUMax32x2, U32x2, U32, U32, U32x2, ) +OPCODE(StorageAtomicAnd32x2, U32x2, U32, U32, U32x2, ) +OPCODE(StorageAtomicOr32x2, U32x2, U32, U32, U32x2, ) +OPCODE(StorageAtomicXor32x2, U32x2, U32, U32, U32x2, ) +OPCODE(StorageAtomicExchange32x2, U32x2, U32, U32, U32x2, ) OPCODE(StorageAtomicAddF32, F32, U32, U32, F32, ) OPCODE(StorageAtomicAddF16x2, U32, U32, U32, F16x2, ) OPCODE(StorageAtomicAddF32x2, U32, U32, U32, F32x2, ) diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp index 2300088e3..8007a4d46 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp @@ -11,10 +11,20 @@ namespace Shader::Maxwell { using namespace LDC; namespace { std::pair<IR::U32, IR::U32> Slot(IR::IREmitter& ir, Mode mode, const IR::U32& imm_index, - const IR::U32& reg, const IR::U32& imm) { + const IR::U32& reg, const IR::U32& imm_offset) { switch (mode) { case Mode::Default: - return {imm_index, ir.IAdd(reg, imm)}; + return {imm_index, ir.IAdd(reg, imm_offset)}; + case Mode::IS: { + // Segmented addressing mode + // Ra+imm_offset points into a flat mapping of const buffer + // address space + const IR::U32 address{ir.IAdd(reg, imm_offset)}; + const IR::U32 index{ir.BitFieldExtract(address, ir.Imm32(16), ir.Imm32(16))}; + const IR::U32 offset{ir.BitFieldExtract(address, ir.Imm32(0), ir.Imm32(16))}; + + return {ir.IAdd(index, imm_index), offset}; + } default: break; } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input.cpp index e0fe47912..f3c7ceb57 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input.cpp @@ -13,59 +13,535 @@ namespace { // Emulate GPU's LOP3.LUT (three-input logic op with 8-bit truth table) IR::U32 ApplyLUT(IR::IREmitter& ir, const IR::U32& a, const IR::U32& b, const IR::U32& c, u64 ttbl) { - IR::U32 r{ir.Imm32(0)}; - const IR::U32 not_a{ir.BitwiseNot(a)}; - const IR::U32 not_b{ir.BitwiseNot(b)}; - const IR::U32 not_c{ir.BitwiseNot(c)}; - if (ttbl & 0x01) { - // r |= ~a & ~b & ~c; - const auto lhs{ir.BitwiseAnd(not_a, not_b)}; - const auto rhs{ir.BitwiseAnd(lhs, not_c)}; - r = ir.BitwiseOr(r, rhs); + switch (ttbl) { + // generated code, do not edit manually + case 0: + return ir.Imm32(0); + case 1: + return ir.BitwiseNot(ir.BitwiseOr(a, ir.BitwiseOr(b, c))); + case 2: + return ir.BitwiseAnd(c, ir.BitwiseNot(ir.BitwiseOr(a, b))); + case 3: + return ir.BitwiseNot(ir.BitwiseOr(a, b)); + case 4: + return ir.BitwiseAnd(b, ir.BitwiseNot(ir.BitwiseOr(a, c))); + case 5: + return ir.BitwiseNot(ir.BitwiseOr(a, c)); + case 6: + return ir.BitwiseAnd(ir.BitwiseNot(a), ir.BitwiseXor(b, c)); + case 7: + return ir.BitwiseNot(ir.BitwiseOr(a, ir.BitwiseAnd(b, c))); + case 8: + return ir.BitwiseAnd(ir.BitwiseAnd(b, c), ir.BitwiseNot(a)); + case 9: + return ir.BitwiseNot(ir.BitwiseOr(a, ir.BitwiseXor(b, c))); + case 10: + return ir.BitwiseAnd(c, ir.BitwiseNot(a)); + case 11: + return ir.BitwiseAnd(ir.BitwiseNot(a), ir.BitwiseOr(c, ir.BitwiseNot(b))); + case 12: + return ir.BitwiseAnd(b, ir.BitwiseNot(a)); + case 13: + return ir.BitwiseAnd(ir.BitwiseNot(a), ir.BitwiseOr(b, ir.BitwiseNot(c))); + case 14: + return ir.BitwiseAnd(ir.BitwiseNot(a), ir.BitwiseOr(b, c)); + case 15: + return ir.BitwiseNot(a); + case 16: + return ir.BitwiseAnd(a, ir.BitwiseNot(ir.BitwiseOr(b, c))); + case 17: + return ir.BitwiseNot(ir.BitwiseOr(b, c)); + case 18: + return ir.BitwiseAnd(ir.BitwiseNot(b), ir.BitwiseXor(a, c)); + case 19: + return ir.BitwiseNot(ir.BitwiseOr(b, ir.BitwiseAnd(a, c))); + case 20: + return ir.BitwiseAnd(ir.BitwiseNot(c), ir.BitwiseXor(a, b)); + case 21: + return ir.BitwiseNot(ir.BitwiseOr(c, ir.BitwiseAnd(a, b))); + case 22: + return ir.BitwiseXor(ir.BitwiseOr(a, b), ir.BitwiseOr(c, ir.BitwiseAnd(a, b))); + case 23: + return ir.BitwiseXor(ir.BitwiseAnd(ir.BitwiseXor(a, b), ir.BitwiseXor(a, c)), + ir.BitwiseNot(a)); + case 24: + return ir.BitwiseAnd(ir.BitwiseXor(a, b), ir.BitwiseXor(a, c)); + case 25: + return ir.BitwiseNot(ir.BitwiseOr(ir.BitwiseAnd(a, b), ir.BitwiseXor(b, c))); + case 26: + return ir.BitwiseAnd(ir.BitwiseOr(c, ir.BitwiseNot(b)), ir.BitwiseXor(a, c)); + case 27: + return ir.BitwiseXor(ir.BitwiseOr(a, ir.BitwiseNot(c)), ir.BitwiseOr(b, c)); + case 28: + return ir.BitwiseAnd(ir.BitwiseOr(b, ir.BitwiseNot(c)), ir.BitwiseXor(a, b)); + case 29: + return ir.BitwiseXor(ir.BitwiseOr(a, ir.BitwiseNot(b)), ir.BitwiseOr(b, c)); + case 30: + return ir.BitwiseXor(a, ir.BitwiseOr(b, c)); + case 31: + return ir.BitwiseNot(ir.BitwiseAnd(a, ir.BitwiseOr(b, c))); + case 32: + return ir.BitwiseAnd(ir.BitwiseAnd(a, c), ir.BitwiseNot(b)); + case 33: + return ir.BitwiseNot(ir.BitwiseOr(b, ir.BitwiseXor(a, c))); + case 34: + return ir.BitwiseAnd(c, ir.BitwiseNot(b)); + case 35: + return ir.BitwiseAnd(ir.BitwiseNot(b), ir.BitwiseOr(c, ir.BitwiseNot(a))); + case 36: + return ir.BitwiseAnd(ir.BitwiseXor(a, b), ir.BitwiseXor(b, c)); + case 37: + return ir.BitwiseNot(ir.BitwiseOr(ir.BitwiseAnd(a, b), ir.BitwiseXor(a, c))); + case 38: + return ir.BitwiseAnd(ir.BitwiseOr(c, ir.BitwiseNot(a)), ir.BitwiseXor(b, c)); + case 39: + return ir.BitwiseXor(ir.BitwiseOr(a, c), ir.BitwiseOr(b, ir.BitwiseNot(c))); + case 40: + return ir.BitwiseAnd(c, ir.BitwiseXor(a, b)); + case 41: + return ir.BitwiseXor(ir.BitwiseOr(a, b), + ir.BitwiseOr(ir.BitwiseAnd(a, b), ir.BitwiseNot(c))); + case 42: + return ir.BitwiseAnd(c, ir.BitwiseNot(ir.BitwiseAnd(a, b))); + case 43: + return ir.BitwiseXor(ir.BitwiseOr(a, ir.BitwiseNot(c)), + ir.BitwiseOr(b, ir.BitwiseXor(a, c))); + case 44: + return ir.BitwiseAnd(ir.BitwiseOr(b, c), ir.BitwiseXor(a, b)); + case 45: + return ir.BitwiseXor(a, ir.BitwiseOr(b, ir.BitwiseNot(c))); + case 46: + return ir.BitwiseXor(ir.BitwiseAnd(a, b), ir.BitwiseOr(b, c)); + case 47: + return ir.BitwiseOr(ir.BitwiseAnd(c, ir.BitwiseNot(b)), ir.BitwiseNot(a)); + case 48: + return ir.BitwiseAnd(a, ir.BitwiseNot(b)); + case 49: + return ir.BitwiseAnd(ir.BitwiseNot(b), ir.BitwiseOr(a, ir.BitwiseNot(c))); + case 50: + return ir.BitwiseAnd(ir.BitwiseNot(b), ir.BitwiseOr(a, c)); + case 51: + return ir.BitwiseNot(b); + case 52: + return ir.BitwiseAnd(ir.BitwiseOr(a, ir.BitwiseNot(c)), ir.BitwiseXor(a, b)); + case 53: + return ir.BitwiseXor(ir.BitwiseOr(a, c), ir.BitwiseOr(b, ir.BitwiseNot(a))); + case 54: + return ir.BitwiseXor(b, ir.BitwiseOr(a, c)); + case 55: + return ir.BitwiseNot(ir.BitwiseAnd(b, ir.BitwiseOr(a, c))); + case 56: + return ir.BitwiseAnd(ir.BitwiseOr(a, c), ir.BitwiseXor(a, b)); + case 57: + return ir.BitwiseXor(b, ir.BitwiseOr(a, ir.BitwiseNot(c))); + case 58: + return ir.BitwiseXor(ir.BitwiseAnd(a, b), ir.BitwiseOr(a, c)); + case 59: + return ir.BitwiseOr(ir.BitwiseAnd(c, ir.BitwiseNot(a)), ir.BitwiseNot(b)); + case 60: + return ir.BitwiseXor(a, b); + case 61: + return ir.BitwiseOr(ir.BitwiseNot(ir.BitwiseOr(a, c)), ir.BitwiseXor(a, b)); + case 62: + return ir.BitwiseOr(ir.BitwiseAnd(c, ir.BitwiseNot(a)), ir.BitwiseXor(a, b)); + case 63: + return ir.BitwiseNot(ir.BitwiseAnd(a, b)); + case 64: + return ir.BitwiseAnd(ir.BitwiseAnd(a, b), ir.BitwiseNot(c)); + case 65: + return ir.BitwiseNot(ir.BitwiseOr(c, ir.BitwiseXor(a, b))); + case 66: + return ir.BitwiseAnd(ir.BitwiseXor(a, c), ir.BitwiseXor(b, c)); + case 67: + return ir.BitwiseNot(ir.BitwiseOr(ir.BitwiseAnd(a, c), ir.BitwiseXor(a, b))); + case 68: + return ir.BitwiseAnd(b, ir.BitwiseNot(c)); + case 69: + return ir.BitwiseAnd(ir.BitwiseNot(c), ir.BitwiseOr(b, ir.BitwiseNot(a))); + case 70: + return ir.BitwiseAnd(ir.BitwiseOr(b, ir.BitwiseNot(a)), ir.BitwiseXor(b, c)); + case 71: + return ir.BitwiseXor(ir.BitwiseOr(a, b), ir.BitwiseOr(c, ir.BitwiseNot(b))); + case 72: + return ir.BitwiseAnd(b, ir.BitwiseXor(a, c)); + case 73: + return ir.BitwiseXor(ir.BitwiseOr(a, c), + ir.BitwiseOr(ir.BitwiseAnd(a, c), ir.BitwiseNot(b))); + case 74: + return ir.BitwiseAnd(ir.BitwiseOr(b, c), ir.BitwiseXor(a, c)); + case 75: + return ir.BitwiseXor(a, ir.BitwiseOr(c, ir.BitwiseNot(b))); + case 76: + return ir.BitwiseAnd(b, ir.BitwiseNot(ir.BitwiseAnd(a, c))); + case 77: + return ir.BitwiseXor(ir.BitwiseOr(a, ir.BitwiseNot(b)), + ir.BitwiseOr(c, ir.BitwiseXor(a, b))); + case 78: + return ir.BitwiseXor(ir.BitwiseAnd(a, c), ir.BitwiseOr(b, c)); + case 79: + return ir.BitwiseOr(ir.BitwiseAnd(b, ir.BitwiseNot(c)), ir.BitwiseNot(a)); + case 80: + return ir.BitwiseAnd(a, ir.BitwiseNot(c)); + case 81: + return ir.BitwiseAnd(ir.BitwiseNot(c), ir.BitwiseOr(a, ir.BitwiseNot(b))); + case 82: + return ir.BitwiseAnd(ir.BitwiseOr(a, ir.BitwiseNot(b)), ir.BitwiseXor(a, c)); + case 83: + return ir.BitwiseXor(ir.BitwiseOr(a, b), ir.BitwiseOr(c, ir.BitwiseNot(a))); + case 84: + return ir.BitwiseAnd(ir.BitwiseNot(c), ir.BitwiseOr(a, b)); + case 85: + return ir.BitwiseNot(c); + case 86: + return ir.BitwiseXor(c, ir.BitwiseOr(a, b)); + case 87: + return ir.BitwiseNot(ir.BitwiseAnd(c, ir.BitwiseOr(a, b))); + case 88: + return ir.BitwiseAnd(ir.BitwiseOr(a, b), ir.BitwiseXor(a, c)); + case 89: + return ir.BitwiseXor(c, ir.BitwiseOr(a, ir.BitwiseNot(b))); + case 90: + return ir.BitwiseXor(a, c); + case 91: + return ir.BitwiseOr(ir.BitwiseNot(ir.BitwiseOr(a, b)), ir.BitwiseXor(a, c)); + case 92: + return ir.BitwiseXor(ir.BitwiseAnd(a, c), ir.BitwiseOr(a, b)); + case 93: + return ir.BitwiseOr(ir.BitwiseAnd(b, ir.BitwiseNot(a)), ir.BitwiseNot(c)); + case 94: + return ir.BitwiseOr(ir.BitwiseAnd(b, ir.BitwiseNot(a)), ir.BitwiseXor(a, c)); + case 95: + return ir.BitwiseNot(ir.BitwiseAnd(a, c)); + case 96: + return ir.BitwiseAnd(a, ir.BitwiseXor(b, c)); + case 97: + return ir.BitwiseXor(ir.BitwiseOr(b, c), + ir.BitwiseOr(ir.BitwiseAnd(b, c), ir.BitwiseNot(a))); + case 98: + return ir.BitwiseAnd(ir.BitwiseOr(a, c), ir.BitwiseXor(b, c)); + case 99: + return ir.BitwiseXor(b, ir.BitwiseOr(c, ir.BitwiseNot(a))); + case 100: + return ir.BitwiseAnd(ir.BitwiseOr(a, b), ir.BitwiseXor(b, c)); + case 101: + return ir.BitwiseXor(c, ir.BitwiseOr(b, ir.BitwiseNot(a))); + case 102: + return ir.BitwiseXor(b, c); + case 103: + return ir.BitwiseOr(ir.BitwiseNot(ir.BitwiseOr(a, b)), ir.BitwiseXor(b, c)); + case 104: + return ir.BitwiseAnd(ir.BitwiseOr(a, b), ir.BitwiseXor(c, ir.BitwiseAnd(a, b))); + case 105: + return ir.BitwiseXor(ir.BitwiseNot(a), ir.BitwiseXor(b, c)); + case 106: + return ir.BitwiseXor(c, ir.BitwiseAnd(a, b)); + case 107: + return ir.BitwiseXor(ir.BitwiseAnd(c, ir.BitwiseOr(a, b)), + ir.BitwiseXor(a, ir.BitwiseNot(b))); + case 108: + return ir.BitwiseXor(b, ir.BitwiseAnd(a, c)); + case 109: + return ir.BitwiseXor(ir.BitwiseAnd(b, ir.BitwiseOr(a, c)), + ir.BitwiseXor(a, ir.BitwiseNot(c))); + case 110: + return ir.BitwiseOr(ir.BitwiseAnd(b, ir.BitwiseNot(a)), ir.BitwiseXor(b, c)); + case 111: + return ir.BitwiseOr(ir.BitwiseNot(a), ir.BitwiseXor(b, c)); + case 112: + return ir.BitwiseAnd(a, ir.BitwiseNot(ir.BitwiseAnd(b, c))); + case 113: + return ir.BitwiseXor(ir.BitwiseOr(b, ir.BitwiseNot(a)), + ir.BitwiseOr(c, ir.BitwiseXor(a, b))); + case 114: + return ir.BitwiseXor(ir.BitwiseAnd(b, c), ir.BitwiseOr(a, c)); + case 115: + return ir.BitwiseOr(ir.BitwiseAnd(a, ir.BitwiseNot(c)), ir.BitwiseNot(b)); + case 116: + return ir.BitwiseXor(ir.BitwiseAnd(b, c), ir.BitwiseOr(a, b)); + case 117: + return ir.BitwiseOr(ir.BitwiseAnd(a, ir.BitwiseNot(b)), ir.BitwiseNot(c)); + case 118: + return ir.BitwiseOr(ir.BitwiseAnd(a, ir.BitwiseNot(b)), ir.BitwiseXor(b, c)); + case 119: + return ir.BitwiseNot(ir.BitwiseAnd(b, c)); + case 120: + return ir.BitwiseXor(a, ir.BitwiseAnd(b, c)); + case 121: + return ir.BitwiseXor(ir.BitwiseAnd(a, ir.BitwiseOr(b, c)), + ir.BitwiseXor(b, ir.BitwiseNot(c))); + case 122: + return ir.BitwiseOr(ir.BitwiseAnd(a, ir.BitwiseNot(b)), ir.BitwiseXor(a, c)); + case 123: + return ir.BitwiseOr(ir.BitwiseNot(b), ir.BitwiseXor(a, c)); + case 124: + return ir.BitwiseOr(ir.BitwiseAnd(a, ir.BitwiseNot(c)), ir.BitwiseXor(a, b)); + case 125: + return ir.BitwiseOr(ir.BitwiseNot(c), ir.BitwiseXor(a, b)); + case 126: + return ir.BitwiseOr(ir.BitwiseXor(a, b), ir.BitwiseXor(a, c)); + case 127: + return ir.BitwiseNot(ir.BitwiseAnd(a, ir.BitwiseAnd(b, c))); + case 128: + return ir.BitwiseAnd(a, ir.BitwiseAnd(b, c)); + case 129: + return ir.BitwiseNot(ir.BitwiseOr(ir.BitwiseXor(a, b), ir.BitwiseXor(a, c))); + case 130: + return ir.BitwiseAnd(c, ir.BitwiseXor(a, ir.BitwiseNot(b))); + case 131: + return ir.BitwiseAnd(ir.BitwiseOr(c, ir.BitwiseNot(a)), ir.BitwiseXor(a, ir.BitwiseNot(b))); + case 132: + return ir.BitwiseAnd(b, ir.BitwiseXor(a, ir.BitwiseNot(c))); + case 133: + return ir.BitwiseAnd(ir.BitwiseOr(b, ir.BitwiseNot(a)), ir.BitwiseXor(a, ir.BitwiseNot(c))); + case 134: + return ir.BitwiseAnd(ir.BitwiseOr(b, c), ir.BitwiseXor(a, ir.BitwiseXor(b, c))); + case 135: + return ir.BitwiseXor(ir.BitwiseAnd(b, c), ir.BitwiseNot(a)); + case 136: + return ir.BitwiseAnd(b, c); + case 137: + return ir.BitwiseAnd(ir.BitwiseOr(b, ir.BitwiseNot(a)), ir.BitwiseXor(b, ir.BitwiseNot(c))); + case 138: + return ir.BitwiseAnd(c, ir.BitwiseOr(b, ir.BitwiseNot(a))); + case 139: + return ir.BitwiseOr(ir.BitwiseAnd(b, c), ir.BitwiseNot(ir.BitwiseOr(a, b))); + case 140: + return ir.BitwiseAnd(b, ir.BitwiseOr(c, ir.BitwiseNot(a))); + case 141: + return ir.BitwiseOr(ir.BitwiseAnd(b, c), ir.BitwiseNot(ir.BitwiseOr(a, c))); + case 142: + return ir.BitwiseXor(a, ir.BitwiseOr(ir.BitwiseXor(a, b), ir.BitwiseXor(a, c))); + case 143: + return ir.BitwiseOr(ir.BitwiseAnd(b, c), ir.BitwiseNot(a)); + case 144: + return ir.BitwiseAnd(a, ir.BitwiseXor(b, ir.BitwiseNot(c))); + case 145: + return ir.BitwiseAnd(ir.BitwiseOr(a, ir.BitwiseNot(b)), ir.BitwiseXor(b, ir.BitwiseNot(c))); + case 146: + return ir.BitwiseAnd(ir.BitwiseOr(a, c), ir.BitwiseXor(a, ir.BitwiseXor(b, c))); + case 147: + return ir.BitwiseXor(ir.BitwiseAnd(a, c), ir.BitwiseNot(b)); + case 148: + return ir.BitwiseAnd(ir.BitwiseOr(a, b), ir.BitwiseXor(a, ir.BitwiseXor(b, c))); + case 149: + return ir.BitwiseXor(ir.BitwiseAnd(a, b), ir.BitwiseNot(c)); + case 150: + return ir.BitwiseXor(a, ir.BitwiseXor(b, c)); + case 151: + return ir.BitwiseOr(ir.BitwiseNot(ir.BitwiseOr(a, b)), + ir.BitwiseXor(a, ir.BitwiseXor(b, c))); + case 152: + return ir.BitwiseAnd(ir.BitwiseOr(a, b), ir.BitwiseXor(b, ir.BitwiseNot(c))); + case 153: + return ir.BitwiseXor(b, ir.BitwiseNot(c)); + case 154: + return ir.BitwiseXor(c, ir.BitwiseAnd(a, ir.BitwiseNot(b))); + case 155: + return ir.BitwiseNot(ir.BitwiseAnd(ir.BitwiseOr(a, b), ir.BitwiseXor(b, c))); + case 156: + return ir.BitwiseXor(b, ir.BitwiseAnd(a, ir.BitwiseNot(c))); + case 157: + return ir.BitwiseNot(ir.BitwiseAnd(ir.BitwiseOr(a, c), ir.BitwiseXor(b, c))); + case 158: + return ir.BitwiseOr(ir.BitwiseAnd(b, c), ir.BitwiseXor(a, ir.BitwiseOr(b, c))); + case 159: + return ir.BitwiseNot(ir.BitwiseAnd(a, ir.BitwiseXor(b, c))); + case 160: + return ir.BitwiseAnd(a, c); + case 161: + return ir.BitwiseAnd(ir.BitwiseOr(a, ir.BitwiseNot(b)), ir.BitwiseXor(a, ir.BitwiseNot(c))); + case 162: + return ir.BitwiseAnd(c, ir.BitwiseOr(a, ir.BitwiseNot(b))); + case 163: + return ir.BitwiseOr(ir.BitwiseAnd(a, c), ir.BitwiseNot(ir.BitwiseOr(a, b))); + case 164: + return ir.BitwiseAnd(ir.BitwiseOr(a, b), ir.BitwiseXor(a, ir.BitwiseNot(c))); + case 165: + return ir.BitwiseXor(a, ir.BitwiseNot(c)); + case 166: + return ir.BitwiseXor(c, ir.BitwiseAnd(b, ir.BitwiseNot(a))); + case 167: + return ir.BitwiseNot(ir.BitwiseAnd(ir.BitwiseOr(a, b), ir.BitwiseXor(a, c))); + case 168: + return ir.BitwiseAnd(c, ir.BitwiseOr(a, b)); + case 169: + return ir.BitwiseXor(ir.BitwiseNot(c), ir.BitwiseOr(a, b)); + case 170: + return c; + case 171: + return ir.BitwiseOr(c, ir.BitwiseNot(ir.BitwiseOr(a, b))); + case 172: + return ir.BitwiseAnd(ir.BitwiseOr(a, b), ir.BitwiseOr(c, ir.BitwiseNot(a))); + case 173: + return ir.BitwiseOr(ir.BitwiseAnd(b, c), ir.BitwiseXor(a, ir.BitwiseNot(c))); + case 174: + return ir.BitwiseOr(c, ir.BitwiseAnd(b, ir.BitwiseNot(a))); + case 175: + return ir.BitwiseOr(c, ir.BitwiseNot(a)); + case 176: + return ir.BitwiseAnd(a, ir.BitwiseOr(c, ir.BitwiseNot(b))); + case 177: + return ir.BitwiseOr(ir.BitwiseAnd(a, c), ir.BitwiseNot(ir.BitwiseOr(b, c))); + case 178: + return ir.BitwiseXor(b, ir.BitwiseOr(ir.BitwiseXor(a, b), ir.BitwiseXor(a, c))); + case 179: + return ir.BitwiseOr(ir.BitwiseAnd(a, c), ir.BitwiseNot(b)); + case 180: + return ir.BitwiseXor(a, ir.BitwiseAnd(b, ir.BitwiseNot(c))); + case 181: + return ir.BitwiseNot(ir.BitwiseAnd(ir.BitwiseOr(b, c), ir.BitwiseXor(a, c))); + case 182: + return ir.BitwiseOr(ir.BitwiseAnd(a, c), ir.BitwiseXor(b, ir.BitwiseOr(a, c))); + case 183: + return ir.BitwiseNot(ir.BitwiseAnd(b, ir.BitwiseXor(a, c))); + case 184: + return ir.BitwiseAnd(ir.BitwiseOr(a, b), ir.BitwiseOr(c, ir.BitwiseNot(b))); + case 185: + return ir.BitwiseOr(ir.BitwiseAnd(a, c), ir.BitwiseXor(b, ir.BitwiseNot(c))); + case 186: + return ir.BitwiseOr(c, ir.BitwiseAnd(a, ir.BitwiseNot(b))); + case 187: + return ir.BitwiseOr(c, ir.BitwiseNot(b)); + case 188: + return ir.BitwiseOr(ir.BitwiseAnd(a, c), ir.BitwiseXor(a, b)); + case 189: + return ir.BitwiseOr(ir.BitwiseXor(a, b), ir.BitwiseXor(a, ir.BitwiseNot(c))); + case 190: + return ir.BitwiseOr(c, ir.BitwiseXor(a, b)); + case 191: + return ir.BitwiseOr(c, ir.BitwiseNot(ir.BitwiseAnd(a, b))); + case 192: + return ir.BitwiseAnd(a, b); + case 193: + return ir.BitwiseAnd(ir.BitwiseOr(a, ir.BitwiseNot(c)), ir.BitwiseXor(a, ir.BitwiseNot(b))); + case 194: + return ir.BitwiseAnd(ir.BitwiseOr(a, c), ir.BitwiseXor(a, ir.BitwiseNot(b))); + case 195: + return ir.BitwiseXor(a, ir.BitwiseNot(b)); + case 196: + return ir.BitwiseAnd(b, ir.BitwiseOr(a, ir.BitwiseNot(c))); + case 197: + return ir.BitwiseOr(ir.BitwiseAnd(a, b), ir.BitwiseNot(ir.BitwiseOr(a, c))); + case 198: + return ir.BitwiseXor(b, ir.BitwiseAnd(c, ir.BitwiseNot(a))); + case 199: + return ir.BitwiseNot(ir.BitwiseAnd(ir.BitwiseOr(a, c), ir.BitwiseXor(a, b))); + case 200: + return ir.BitwiseAnd(b, ir.BitwiseOr(a, c)); + case 201: + return ir.BitwiseXor(ir.BitwiseNot(b), ir.BitwiseOr(a, c)); + case 202: + return ir.BitwiseAnd(ir.BitwiseOr(a, c), ir.BitwiseOr(b, ir.BitwiseNot(a))); + case 203: + return ir.BitwiseOr(ir.BitwiseAnd(b, c), ir.BitwiseXor(a, ir.BitwiseNot(b))); + case 204: + return b; + case 205: + return ir.BitwiseOr(b, ir.BitwiseNot(ir.BitwiseOr(a, c))); + case 206: + return ir.BitwiseOr(b, ir.BitwiseAnd(c, ir.BitwiseNot(a))); + case 207: + return ir.BitwiseOr(b, ir.BitwiseNot(a)); + case 208: + return ir.BitwiseAnd(a, ir.BitwiseOr(b, ir.BitwiseNot(c))); + case 209: + return ir.BitwiseOr(ir.BitwiseAnd(a, b), ir.BitwiseNot(ir.BitwiseOr(b, c))); + case 210: + return ir.BitwiseXor(a, ir.BitwiseAnd(c, ir.BitwiseNot(b))); + case 211: + return ir.BitwiseNot(ir.BitwiseAnd(ir.BitwiseOr(b, c), ir.BitwiseXor(a, b))); + case 212: + return ir.BitwiseXor(c, ir.BitwiseOr(ir.BitwiseXor(a, b), ir.BitwiseXor(a, c))); + case 213: + return ir.BitwiseOr(ir.BitwiseAnd(a, b), ir.BitwiseNot(c)); + case 214: + return ir.BitwiseOr(ir.BitwiseAnd(a, b), ir.BitwiseXor(c, ir.BitwiseOr(a, b))); + case 215: + return ir.BitwiseNot(ir.BitwiseAnd(c, ir.BitwiseXor(a, b))); + case 216: + return ir.BitwiseAnd(ir.BitwiseOr(a, c), ir.BitwiseOr(b, ir.BitwiseNot(c))); + case 217: + return ir.BitwiseOr(ir.BitwiseAnd(a, b), ir.BitwiseXor(b, ir.BitwiseNot(c))); + case 218: + return ir.BitwiseOr(ir.BitwiseAnd(a, b), ir.BitwiseXor(a, c)); + case 219: + return ir.BitwiseOr(ir.BitwiseXor(a, c), ir.BitwiseXor(a, ir.BitwiseNot(b))); + case 220: + return ir.BitwiseOr(b, ir.BitwiseAnd(a, ir.BitwiseNot(c))); + case 221: + return ir.BitwiseOr(b, ir.BitwiseNot(c)); + case 222: + return ir.BitwiseOr(b, ir.BitwiseXor(a, c)); + case 223: + return ir.BitwiseOr(b, ir.BitwiseNot(ir.BitwiseAnd(a, c))); + case 224: + return ir.BitwiseAnd(a, ir.BitwiseOr(b, c)); + case 225: + return ir.BitwiseXor(ir.BitwiseNot(a), ir.BitwiseOr(b, c)); + case 226: + return ir.BitwiseAnd(ir.BitwiseOr(a, ir.BitwiseNot(b)), ir.BitwiseOr(b, c)); + case 227: + return ir.BitwiseOr(ir.BitwiseAnd(a, c), ir.BitwiseXor(a, ir.BitwiseNot(b))); + case 228: + return ir.BitwiseAnd(ir.BitwiseOr(a, ir.BitwiseNot(c)), ir.BitwiseOr(b, c)); + case 229: + return ir.BitwiseOr(ir.BitwiseAnd(a, b), ir.BitwiseXor(a, ir.BitwiseNot(c))); + case 230: + return ir.BitwiseOr(ir.BitwiseAnd(a, b), ir.BitwiseXor(b, c)); + case 231: + return ir.BitwiseOr(ir.BitwiseXor(a, ir.BitwiseNot(b)), ir.BitwiseXor(b, c)); + case 232: + return ir.BitwiseAnd(ir.BitwiseOr(a, b), ir.BitwiseOr(c, ir.BitwiseAnd(a, b))); + case 233: + return ir.BitwiseOr(ir.BitwiseAnd(a, b), + ir.BitwiseXor(ir.BitwiseNot(c), ir.BitwiseOr(a, b))); + case 234: + return ir.BitwiseOr(c, ir.BitwiseAnd(a, b)); + case 235: + return ir.BitwiseOr(c, ir.BitwiseXor(a, ir.BitwiseNot(b))); + case 236: + return ir.BitwiseOr(b, ir.BitwiseAnd(a, c)); + case 237: + return ir.BitwiseOr(b, ir.BitwiseXor(a, ir.BitwiseNot(c))); + case 238: + return ir.BitwiseOr(b, c); + case 239: + return ir.BitwiseOr(ir.BitwiseNot(a), ir.BitwiseOr(b, c)); + case 240: + return a; + case 241: + return ir.BitwiseOr(a, ir.BitwiseNot(ir.BitwiseOr(b, c))); + case 242: + return ir.BitwiseOr(a, ir.BitwiseAnd(c, ir.BitwiseNot(b))); + case 243: + return ir.BitwiseOr(a, ir.BitwiseNot(b)); + case 244: + return ir.BitwiseOr(a, ir.BitwiseAnd(b, ir.BitwiseNot(c))); + case 245: + return ir.BitwiseOr(a, ir.BitwiseNot(c)); + case 246: + return ir.BitwiseOr(a, ir.BitwiseXor(b, c)); + case 247: + return ir.BitwiseOr(a, ir.BitwiseNot(ir.BitwiseAnd(b, c))); + case 248: + return ir.BitwiseOr(a, ir.BitwiseAnd(b, c)); + case 249: + return ir.BitwiseOr(a, ir.BitwiseXor(b, ir.BitwiseNot(c))); + case 250: + return ir.BitwiseOr(a, c); + case 251: + return ir.BitwiseOr(ir.BitwiseNot(b), ir.BitwiseOr(a, c)); + case 252: + return ir.BitwiseOr(a, b); + case 253: + return ir.BitwiseOr(ir.BitwiseNot(c), ir.BitwiseOr(a, b)); + case 254: + return ir.BitwiseOr(a, ir.BitwiseOr(b, c)); + case 255: + return ir.Imm32(0xFFFFFFFF); + // end of generated code } - if (ttbl & 0x02) { - // r |= ~a & ~b & c; - const auto lhs{ir.BitwiseAnd(not_a, not_b)}; - const auto rhs{ir.BitwiseAnd(lhs, c)}; - r = ir.BitwiseOr(r, rhs); - } - if (ttbl & 0x04) { - // r |= ~a & b & ~c; - const auto lhs{ir.BitwiseAnd(not_a, b)}; - const auto rhs{ir.BitwiseAnd(lhs, not_c)}; - r = ir.BitwiseOr(r, rhs); - } - if (ttbl & 0x08) { - // r |= ~a & b & c; - const auto lhs{ir.BitwiseAnd(not_a, b)}; - const auto rhs{ir.BitwiseAnd(lhs, c)}; - r = ir.BitwiseOr(r, rhs); - } - if (ttbl & 0x10) { - // r |= a & ~b & ~c; - const auto lhs{ir.BitwiseAnd(a, not_b)}; - const auto rhs{ir.BitwiseAnd(lhs, not_c)}; - r = ir.BitwiseOr(r, rhs); - } - if (ttbl & 0x20) { - // r |= a & ~b & c; - const auto lhs{ir.BitwiseAnd(a, not_b)}; - const auto rhs{ir.BitwiseAnd(lhs, c)}; - r = ir.BitwiseOr(r, rhs); - } - if (ttbl & 0x40) { - // r |= a & b & ~c; - const auto lhs{ir.BitwiseAnd(a, b)}; - const auto rhs{ir.BitwiseAnd(lhs, not_c)}; - r = ir.BitwiseOr(r, rhs); - } - if (ttbl & 0x80) { - // r |= a & b & c; - const auto lhs{ir.BitwiseAnd(a, b)}; - const auto rhs{ir.BitwiseAnd(lhs, c)}; - r = ir.BitwiseOr(r, rhs); - } - return r; + throw NotImplementedException("LOP3 with out of range ttbl"); } IR::U32 LOP3(TranslatorVisitor& v, u64 insn, const IR::U32& op_b, const IR::U32& op_c, u64 lut) { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input_lut3.py b/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input_lut3.py new file mode 100644 index 000000000..8f547c266 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input_lut3.py @@ -0,0 +1,92 @@ +# Copyright © 2022 degasus <markus@selfnet.de> +# This work is free. You can redistribute it and/or modify it under the +# terms of the Do What The Fuck You Want To Public License, Version 2, +# as published by Sam Hocevar. See http://www.wtfpl.net/ for more details. + +from itertools import product + +# The primitive instructions +OPS = { + 'ir.BitwiseAnd({}, {})' : (2, 1, lambda a,b: a&b), + 'ir.BitwiseOr({}, {})' : (2, 1, lambda a,b: a|b), + 'ir.BitwiseXor({}, {})' : (2, 1, lambda a,b: a^b), + 'ir.BitwiseNot({})' : (1, 0.1, lambda a: (~a) & 255), # Only tiny cost, as this can often inlined in other instructions +} + +# Our database of combination of instructions +optimized_calls = {} +def cmp(lhs, rhs): + if lhs is None: # new entry + return True + if lhs[3] > rhs[3]: # costs + return True + if lhs[3] < rhs[3]: # costs + return False + if len(lhs[0]) > len(rhs[0]): # string len + return True + if len(lhs[0]) < len(rhs[0]): # string len + return False + if lhs[0] > rhs[0]: # string sorting + return True + if lhs[0] < rhs[0]: # string sorting + return False + assert lhs == rhs, "redundant instruction, bug in brute force" + return False +def register(imm, instruction, count, latency): + # Use the sum of instruction count and latency as costs to evaluate which combination is best + costs = count + latency + + old = optimized_calls.get(imm, None) + new = (instruction, count, latency, costs) + + # Update if new or better + if cmp(old, new): + optimized_calls[imm] = new + return True + + return False + +# Constants: 0, 1 (for free) +register(0, 'ir.Imm32(0)', 0, 0) +register(255, 'ir.Imm32(0xFFFFFFFF)', 0, 0) + +# Inputs: a, b, c (for free) +ta = 0xF0 +tb = 0xCC +tc = 0xAA +inputs = { + ta : 'a', + tb : 'b', + tc : 'c', +} +for imm, instruction in inputs.items(): + register(imm, instruction, 0, 0) + register((~imm) & 255, 'ir.BitwiseNot({})'.format(instruction), 0.099, 0.099) # slightly cheaper NEG on inputs + +# Try to combine two values from the db with an instruction. +# If it is better than the old method, update it. +while True: + registered = 0 + calls_copy = optimized_calls.copy() + for OP, (argc, cost, f) in OPS.items(): + for args in product(calls_copy.items(), repeat=argc): + # unpack(transponse) the arrays + imm = [arg[0] for arg in args] + value = [arg[1][0] for arg in args] + count = [arg[1][1] for arg in args] + latency = [arg[1][2] for arg in args] + + registered += register( + f(*imm), + OP.format(*value), + sum(count) + cost, + max(latency) + cost) + if registered == 0: + # No update at all? So terminate + break + +# Hacky output. Please improve me to output valid C++ instead. +s = """ case {imm}: + return {op};""" +for imm in range(256): + print(s.format(imm=imm, op=optimized_calls[imm][0])) diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.cpp b/src/shader_recompiler/frontend/maxwell/translate_program.cpp index 248ad3ced..b22725584 100644 --- a/src/shader_recompiler/frontend/maxwell/translate_program.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp @@ -212,11 +212,11 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo } Optimization::SsaRewritePass(program); + Optimization::ConstantPropagationPass(program); + Optimization::GlobalMemoryToStorageBufferPass(program); Optimization::TexturePass(env, program); - Optimization::ConstantPropagationPass(program); - if (Settings::values.resolution_info.active) { Optimization::RescalingPass(program); } diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index b6a20f904..0b2c60842 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -29,6 +29,46 @@ void AddConstantBufferDescriptor(Info& info, u32 index, u32 count) { }); } +void AddRegisterIndexedLdc(Info& info) { + info.uses_cbuf_indirect = true; + + // The shader can use any possible constant buffer + info.constant_buffer_mask = (1 << Info::MAX_CBUFS) - 1; + + auto& cbufs{info.constant_buffer_descriptors}; + cbufs.clear(); + for (u32 i = 0; i < Info::MAX_CBUFS; i++) { + cbufs.push_back(ConstantBufferDescriptor{.index = i, .count = 1}); + + // The shader can use any possible access size + info.constant_buffer_used_sizes[i] = 0x10'000; + } +} + +u32 GetElementSize(IR::Type& used_type, Shader::IR::Opcode opcode) { + switch (opcode) { + case IR::Opcode::GetCbufU8: + case IR::Opcode::GetCbufS8: + used_type |= IR::Type::U8; + return 1; + case IR::Opcode::GetCbufU16: + case IR::Opcode::GetCbufS16: + used_type |= IR::Type::U16; + return 2; + case IR::Opcode::GetCbufU32: + used_type |= IR::Type::U32; + return 4; + case IR::Opcode::GetCbufF32: + used_type |= IR::Type::F32; + return 4; + case IR::Opcode::GetCbufU32x2: + used_type |= IR::Type::U32x2; + return 8; + default: + throw InvalidArgument("Invalid opcode {}", opcode); + } +} + void GetPatch(Info& info, IR::Patch patch) { if (!IR::IsGeneric(patch)) { throw NotImplementedException("Reading non-generic patch {}", patch); @@ -360,6 +400,15 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::GlobalAtomicOr64: case IR::Opcode::GlobalAtomicXor64: case IR::Opcode::GlobalAtomicExchange64: + case IR::Opcode::GlobalAtomicIAdd32x2: + case IR::Opcode::GlobalAtomicSMin32x2: + case IR::Opcode::GlobalAtomicUMin32x2: + case IR::Opcode::GlobalAtomicSMax32x2: + case IR::Opcode::GlobalAtomicUMax32x2: + case IR::Opcode::GlobalAtomicAnd32x2: + case IR::Opcode::GlobalAtomicOr32x2: + case IR::Opcode::GlobalAtomicXor32x2: + case IR::Opcode::GlobalAtomicExchange32x2: case IR::Opcode::GlobalAtomicAddF32: case IR::Opcode::GlobalAtomicAddF16x2: case IR::Opcode::GlobalAtomicAddF32x2: @@ -454,42 +503,18 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::GetCbufU32x2: { const IR::Value index{inst.Arg(0)}; const IR::Value offset{inst.Arg(1)}; - if (!index.IsImmediate()) { - throw NotImplementedException("Constant buffer with non-immediate index"); - } - AddConstantBufferDescriptor(info, index.U32(), 1); - u32 element_size{}; - switch (inst.GetOpcode()) { - case IR::Opcode::GetCbufU8: - case IR::Opcode::GetCbufS8: - info.used_constant_buffer_types |= IR::Type::U8; - element_size = 1; - break; - case IR::Opcode::GetCbufU16: - case IR::Opcode::GetCbufS16: - info.used_constant_buffer_types |= IR::Type::U16; - element_size = 2; - break; - case IR::Opcode::GetCbufU32: - info.used_constant_buffer_types |= IR::Type::U32; - element_size = 4; - break; - case IR::Opcode::GetCbufF32: - info.used_constant_buffer_types |= IR::Type::F32; - element_size = 4; - break; - case IR::Opcode::GetCbufU32x2: - info.used_constant_buffer_types |= IR::Type::U32x2; - element_size = 8; - break; - default: - break; - } - u32& size{info.constant_buffer_used_sizes[index.U32()]}; - if (offset.IsImmediate()) { - size = Common::AlignUp(std::max(size, offset.U32() + element_size), 16u); + if (index.IsImmediate()) { + AddConstantBufferDescriptor(info, index.U32(), 1); + u32 element_size = GetElementSize(info.used_constant_buffer_types, inst.GetOpcode()); + u32& size{info.constant_buffer_used_sizes[index.U32()]}; + if (offset.IsImmediate()) { + size = Common::AlignUp(std::max(size, offset.U32() + element_size), 16u); + } else { + size = 0x10'000; + } } else { - size = 0x10'000; + AddRegisterIndexedLdc(info); + GetElementSize(info.used_indirect_cbuf_types, inst.GetOpcode()); } break; } @@ -597,6 +622,15 @@ void VisitUsages(Info& info, IR::Inst& inst) { break; case IR::Opcode::LoadStorage64: case IR::Opcode::WriteStorage64: + case IR::Opcode::StorageAtomicIAdd32x2: + case IR::Opcode::StorageAtomicSMin32x2: + case IR::Opcode::StorageAtomicUMin32x2: + case IR::Opcode::StorageAtomicSMax32x2: + case IR::Opcode::StorageAtomicUMax32x2: + case IR::Opcode::StorageAtomicAnd32x2: + case IR::Opcode::StorageAtomicOr32x2: + case IR::Opcode::StorageAtomicXor32x2: + case IR::Opcode::StorageAtomicExchange32x2: info.used_storage_buffer_types |= IR::Type::U32x2; break; case IR::Opcode::LoadStorage128: diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp index 4197b0095..ddf497e32 100644 --- a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp +++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp @@ -92,6 +92,15 @@ bool IsGlobalMemory(const IR::Inst& inst) { case IR::Opcode::GlobalAtomicOr64: case IR::Opcode::GlobalAtomicXor64: case IR::Opcode::GlobalAtomicExchange64: + case IR::Opcode::GlobalAtomicIAdd32x2: + case IR::Opcode::GlobalAtomicSMin32x2: + case IR::Opcode::GlobalAtomicUMin32x2: + case IR::Opcode::GlobalAtomicSMax32x2: + case IR::Opcode::GlobalAtomicUMax32x2: + case IR::Opcode::GlobalAtomicAnd32x2: + case IR::Opcode::GlobalAtomicOr32x2: + case IR::Opcode::GlobalAtomicXor32x2: + case IR::Opcode::GlobalAtomicExchange32x2: case IR::Opcode::GlobalAtomicAddF32: case IR::Opcode::GlobalAtomicAddF16x2: case IR::Opcode::GlobalAtomicAddF32x2: @@ -135,6 +144,15 @@ bool IsGlobalMemoryWrite(const IR::Inst& inst) { case IR::Opcode::GlobalAtomicOr64: case IR::Opcode::GlobalAtomicXor64: case IR::Opcode::GlobalAtomicExchange64: + case IR::Opcode::GlobalAtomicIAdd32x2: + case IR::Opcode::GlobalAtomicSMin32x2: + case IR::Opcode::GlobalAtomicUMin32x2: + case IR::Opcode::GlobalAtomicSMax32x2: + case IR::Opcode::GlobalAtomicUMax32x2: + case IR::Opcode::GlobalAtomicAnd32x2: + case IR::Opcode::GlobalAtomicOr32x2: + case IR::Opcode::GlobalAtomicXor32x2: + case IR::Opcode::GlobalAtomicExchange32x2: case IR::Opcode::GlobalAtomicAddF32: case IR::Opcode::GlobalAtomicAddF16x2: case IR::Opcode::GlobalAtomicAddF32x2: @@ -199,6 +217,8 @@ IR::Opcode GlobalToStorage(IR::Opcode opcode) { return IR::Opcode::StorageAtomicOr32; case IR::Opcode::GlobalAtomicXor32: return IR::Opcode::StorageAtomicXor32; + case IR::Opcode::GlobalAtomicExchange32: + return IR::Opcode::StorageAtomicExchange32; case IR::Opcode::GlobalAtomicIAdd64: return IR::Opcode::StorageAtomicIAdd64; case IR::Opcode::GlobalAtomicSMin64: @@ -215,10 +235,26 @@ IR::Opcode GlobalToStorage(IR::Opcode opcode) { return IR::Opcode::StorageAtomicOr64; case IR::Opcode::GlobalAtomicXor64: return IR::Opcode::StorageAtomicXor64; - case IR::Opcode::GlobalAtomicExchange32: - return IR::Opcode::StorageAtomicExchange32; case IR::Opcode::GlobalAtomicExchange64: return IR::Opcode::StorageAtomicExchange64; + case IR::Opcode::GlobalAtomicIAdd32x2: + return IR::Opcode::StorageAtomicIAdd32x2; + case IR::Opcode::GlobalAtomicSMin32x2: + return IR::Opcode::StorageAtomicSMin32x2; + case IR::Opcode::GlobalAtomicUMin32x2: + return IR::Opcode::StorageAtomicUMin32x2; + case IR::Opcode::GlobalAtomicSMax32x2: + return IR::Opcode::StorageAtomicSMax32x2; + case IR::Opcode::GlobalAtomicUMax32x2: + return IR::Opcode::StorageAtomicUMax32x2; + case IR::Opcode::GlobalAtomicAnd32x2: + return IR::Opcode::StorageAtomicAnd32x2; + case IR::Opcode::GlobalAtomicOr32x2: + return IR::Opcode::StorageAtomicOr32x2; + case IR::Opcode::GlobalAtomicXor32x2: + return IR::Opcode::StorageAtomicXor32x2; + case IR::Opcode::GlobalAtomicExchange32x2: + return IR::Opcode::StorageAtomicExchange32x2; case IR::Opcode::GlobalAtomicAddF32: return IR::Opcode::StorageAtomicAddF32; case IR::Opcode::GlobalAtomicAddF16x2: @@ -298,7 +334,8 @@ std::optional<LowAddrInfo> TrackLowAddress(IR::Inst* inst) { /// Tries to track the storage buffer address used by a global memory instruction std::optional<StorageBufferAddr> Track(const IR::Value& value, const Bias* bias) { const auto pred{[bias](const IR::Inst* inst) -> std::optional<StorageBufferAddr> { - if (inst->GetOpcode() != IR::Opcode::GetCbufU32) { + if (inst->GetOpcode() != IR::Opcode::GetCbufU32 && + inst->GetOpcode() != IR::Opcode::GetCbufU32x2) { return std::nullopt; } const IR::Value index{inst->Arg(0)}; @@ -454,6 +491,15 @@ void Replace(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index, case IR::Opcode::GlobalAtomicOr64: case IR::Opcode::GlobalAtomicXor64: case IR::Opcode::GlobalAtomicExchange64: + case IR::Opcode::GlobalAtomicIAdd32x2: + case IR::Opcode::GlobalAtomicSMin32x2: + case IR::Opcode::GlobalAtomicUMin32x2: + case IR::Opcode::GlobalAtomicSMax32x2: + case IR::Opcode::GlobalAtomicUMax32x2: + case IR::Opcode::GlobalAtomicAnd32x2: + case IR::Opcode::GlobalAtomicOr32x2: + case IR::Opcode::GlobalAtomicXor32x2: + case IR::Opcode::GlobalAtomicExchange32x2: case IR::Opcode::GlobalAtomicAddF32: case IR::Opcode::GlobalAtomicAddF16x2: case IR::Opcode::GlobalAtomicAddF32x2: diff --git a/src/shader_recompiler/ir_opt/lower_int64_to_int32.cpp b/src/shader_recompiler/ir_opt/lower_int64_to_int32.cpp index e80d3d1d9..c2654cd9b 100644 --- a/src/shader_recompiler/ir_opt/lower_int64_to_int32.cpp +++ b/src/shader_recompiler/ir_opt/lower_int64_to_int32.cpp @@ -199,6 +199,26 @@ void Lower(IR::Block& block, IR::Inst& inst) { return ShiftRightLogical64To32(block, inst); case IR::Opcode::ShiftRightArithmetic64: return ShiftRightArithmetic64To32(block, inst); + case IR::Opcode::SharedAtomicExchange64: + return inst.ReplaceOpcode(IR::Opcode::SharedAtomicExchange32x2); + case IR::Opcode::GlobalAtomicIAdd64: + return inst.ReplaceOpcode(IR::Opcode::GlobalAtomicIAdd32x2); + case IR::Opcode::GlobalAtomicSMin64: + return inst.ReplaceOpcode(IR::Opcode::GlobalAtomicSMin32x2); + case IR::Opcode::GlobalAtomicUMin64: + return inst.ReplaceOpcode(IR::Opcode::GlobalAtomicUMin32x2); + case IR::Opcode::GlobalAtomicSMax64: + return inst.ReplaceOpcode(IR::Opcode::GlobalAtomicSMax32x2); + case IR::Opcode::GlobalAtomicUMax64: + return inst.ReplaceOpcode(IR::Opcode::GlobalAtomicUMax32x2); + case IR::Opcode::GlobalAtomicAnd64: + return inst.ReplaceOpcode(IR::Opcode::GlobalAtomicAnd32x2); + case IR::Opcode::GlobalAtomicOr64: + return inst.ReplaceOpcode(IR::Opcode::GlobalAtomicOr32x2); + case IR::Opcode::GlobalAtomicXor64: + return inst.ReplaceOpcode(IR::Opcode::GlobalAtomicXor32x2); + case IR::Opcode::GlobalAtomicExchange64: + return inst.ReplaceOpcode(IR::Opcode::GlobalAtomicExchange32x2); default: break; } diff --git a/src/shader_recompiler/ir_opt/rescaling_pass.cpp b/src/shader_recompiler/ir_opt/rescaling_pass.cpp index c28500dd1..496d4667e 100644 --- a/src/shader_recompiler/ir_opt/rescaling_pass.cpp +++ b/src/shader_recompiler/ir_opt/rescaling_pass.cpp @@ -183,6 +183,31 @@ void ScaleIntegerComposite(IR::IREmitter& ir, IR::Inst& inst, const IR::U1& is_s } } +void ScaleIntegerOffsetComposite(IR::IREmitter& ir, IR::Inst& inst, const IR::U1& is_scaled, + size_t index) { + const IR::Value composite{inst.Arg(index)}; + if (composite.IsEmpty()) { + return; + } + const auto info{inst.Flags<IR::TextureInstInfo>()}; + const IR::U32 x{Scale(ir, is_scaled, IR::U32{ir.CompositeExtract(composite, 0)})}; + const IR::U32 y{Scale(ir, is_scaled, IR::U32{ir.CompositeExtract(composite, 1)})}; + switch (info.type) { + case TextureType::ColorArray2D: + case TextureType::Color2D: + inst.SetArg(index, ir.CompositeConstruct(x, y)); + break; + case TextureType::Color1D: + case TextureType::ColorArray1D: + case TextureType::Color3D: + case TextureType::ColorCube: + case TextureType::ColorArrayCube: + case TextureType::Buffer: + // Nothing to patch here + break; + } +} + void SubScaleCoord(IR::IREmitter& ir, IR::Inst& inst, const IR::U1& is_scaled) { const auto info{inst.Flags<IR::TextureInstInfo>()}; const IR::Value coord{inst.Arg(1)}; @@ -220,7 +245,7 @@ void SubScaleImageFetch(IR::Block& block, IR::Inst& inst) { const IR::U1 is_scaled{ir.IsTextureScaled(ir.Imm32(info.descriptor_index))}; SubScaleCoord(ir, inst, is_scaled); // Scale ImageFetch offset - ScaleIntegerComposite(ir, inst, is_scaled, 2); + ScaleIntegerOffsetComposite(ir, inst, is_scaled, 2); } void SubScaleImageRead(IR::Block& block, IR::Inst& inst) { @@ -242,7 +267,7 @@ void PatchImageFetch(IR::Block& block, IR::Inst& inst) { const IR::U1 is_scaled{ir.IsTextureScaled(ir.Imm32(info.descriptor_index))}; ScaleIntegerComposite(ir, inst, is_scaled, 1); // Scale ImageFetch offset - ScaleIntegerComposite(ir, inst, is_scaled, 2); + ScaleIntegerOffsetComposite(ir, inst, is_scaled, 2); } void PatchImageRead(IR::Block& block, IR::Inst& inst) { diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h index 9f375c30e..9d36bd9eb 100644 --- a/src/shader_recompiler/shader_info.h +++ b/src/shader_recompiler/shader_info.h @@ -173,9 +173,11 @@ struct Info { bool uses_atomic_image_u32{}; bool uses_shadow_lod{}; bool uses_rescaling_uniform{}; + bool uses_cbuf_indirect{}; IR::Type used_constant_buffer_types{}; IR::Type used_storage_buffer_types{}; + IR::Type used_indirect_cbuf_types{}; u32 constant_buffer_mask{}; std::array<u32, MAX_CBUFS> constant_buffer_used_sizes{}; |