From 32c5483beb2f79f5d55eb2906f2bfdfa1698bca3 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Thu, 25 Mar 2021 11:31:37 -0400 Subject: shader: Implement SHFL --- .../backend/spirv/emit_context.cpp | 3 +- src/shader_recompiler/backend/spirv/emit_spirv.cpp | 6 +- src/shader_recompiler/backend/spirv/emit_spirv.h | 9 ++ .../backend/spirv/emit_spirv_vote.cpp | 58 --------- .../backend/spirv/emit_spirv_warp.cpp | 135 +++++++++++++++++++++ 5 files changed, 151 insertions(+), 60 deletions(-) delete mode 100644 src/shader_recompiler/backend/spirv/emit_spirv_vote.cpp create mode 100644 src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp (limited to 'src/shader_recompiler/backend') diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index ea46af244..5db4a9082 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -274,7 +274,8 @@ void EmitContext::DefineInputs(const Info& info) { if (info.uses_local_invocation_id) { local_invocation_id = DefineInput(*this, U32[3], spv::BuiltIn::LocalInvocationId); } - if (profile.warp_size_potentially_larger_than_guest && info.uses_subgroup_vote) { + if (info.uses_subgroup_invocation_id || + (profile.warp_size_potentially_larger_than_guest && info.uses_subgroup_vote)) { subgroup_local_invocation_id = DefineInput(*this, U32[1], spv::BuiltIn::SubgroupLocalInvocationId); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 107403912..cee72f50d 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -224,7 +224,7 @@ void SetupCapabilities(const Profile& profile, const Info& info, EmitContext& ct ctx.AddExtension("SPV_KHR_shader_draw_parameters"); ctx.AddCapability(spv::Capability::DrawParameters); } - if (info.uses_subgroup_vote && profile.support_vote) { + if ((info.uses_subgroup_vote || info.uses_subgroup_invocation_id) && profile.support_vote) { ctx.AddExtension("SPV_KHR_shader_ballot"); ctx.AddCapability(spv::Capability::SubgroupBallotKHR); if (!profile.warp_size_potentially_larger_than_guest) { @@ -315,4 +315,8 @@ void EmitGetSparseFromOp(EmitContext&) { throw LogicError("Unreachable instruction"); } +void EmitGetInBoundsFromOp(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index 6d4adafc7..a233a4817 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -158,6 +158,7 @@ void EmitGetSignFromOp(EmitContext& ctx); void EmitGetCarryFromOp(EmitContext& ctx); void EmitGetOverflowFromOp(EmitContext& ctx); void EmitGetSparseFromOp(EmitContext& ctx); +void EmitGetInBoundsFromOp(EmitContext& ctx); Id EmitFPAbs16(EmitContext& ctx, Id value); Id EmitFPAbs32(EmitContext& ctx, Id value); Id EmitFPAbs64(EmitContext& ctx, Id value); @@ -355,5 +356,13 @@ Id EmitVoteAll(EmitContext& ctx, Id pred); Id EmitVoteAny(EmitContext& ctx, Id pred); Id EmitVoteEqual(EmitContext& ctx, Id pred); Id EmitSubgroupBallot(EmitContext& ctx, Id pred); +Id EmitShuffleIndex(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, + Id segmentation_mask); +Id EmitShuffleUp(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, + Id segmentation_mask); +Id EmitShuffleDown(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, + Id segmentation_mask); +Id EmitShuffleButterfly(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, + Id segmentation_mask); } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_vote.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_vote.cpp deleted file mode 100644 index a63677ef2..000000000 --- a/src/shader_recompiler/backend/spirv/emit_spirv_vote.cpp +++ /dev/null @@ -1,58 +0,0 @@ -// Copyright 2021 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "shader_recompiler/backend/spirv/emit_spirv.h" - -namespace Shader::Backend::SPIRV { -namespace { -Id LargeWarpBallot(EmitContext& ctx, Id ballot) { - const Id shift{ctx.Constant(ctx.U32[1], 5)}; - const Id local_index{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)}; - return ctx.OpVectorExtractDynamic(ctx.U32[1], ballot, local_index); -} -} // Anonymous namespace - -Id EmitVoteAll(EmitContext& ctx, Id pred) { - if (!ctx.profile.warp_size_potentially_larger_than_guest) { - return ctx.OpSubgroupAllKHR(ctx.U1, pred); - } - const Id mask_ballot{ctx.OpSubgroupBallotKHR(ctx.U32[4], ctx.true_value)}; - const Id active_mask{LargeWarpBallot(ctx, mask_ballot)}; - const Id ballot{LargeWarpBallot(ctx, ctx.OpSubgroupBallotKHR(ctx.U32[4], pred))}; - const Id lhs{ctx.OpBitwiseAnd(ctx.U32[1], ballot, active_mask)}; - return ctx.OpIEqual(ctx.U1, lhs, active_mask); -} - -Id EmitVoteAny(EmitContext& ctx, Id pred) { - if (!ctx.profile.warp_size_potentially_larger_than_guest) { - return ctx.OpSubgroupAnyKHR(ctx.U1, pred); - } - const Id mask_ballot{ctx.OpSubgroupBallotKHR(ctx.U32[4], ctx.true_value)}; - const Id active_mask{LargeWarpBallot(ctx, mask_ballot)}; - const Id ballot{LargeWarpBallot(ctx, ctx.OpSubgroupBallotKHR(ctx.U32[4], pred))}; - const Id lhs{ctx.OpBitwiseAnd(ctx.U32[1], ballot, active_mask)}; - return ctx.OpINotEqual(ctx.U1, lhs, ctx.u32_zero_value); -} - -Id EmitVoteEqual(EmitContext& ctx, Id pred) { - if (!ctx.profile.warp_size_potentially_larger_than_guest) { - return ctx.OpSubgroupAllEqualKHR(ctx.U1, pred); - } - const Id mask_ballot{ctx.OpSubgroupBallotKHR(ctx.U32[4], ctx.true_value)}; - const Id active_mask{LargeWarpBallot(ctx, mask_ballot)}; - const Id ballot{LargeWarpBallot(ctx, ctx.OpSubgroupBallotKHR(ctx.U32[4], pred))}; - const Id lhs{ctx.OpBitwiseXor(ctx.U32[1], ballot, active_mask)}; - return ctx.OpLogicalOr(ctx.U1, ctx.OpIEqual(ctx.U1, lhs, ctx.u32_zero_value), - ctx.OpIEqual(ctx.U1, lhs, active_mask)); -} - -Id EmitSubgroupBallot(EmitContext& ctx, Id pred) { - const Id ballot{ctx.OpSubgroupBallotKHR(ctx.U32[4], pred)}; - if (!ctx.profile.warp_size_potentially_larger_than_guest) { - return ctx.OpCompositeExtract(ctx.U32[1], ballot, 0U); - } - return LargeWarpBallot(ctx, ballot); -} - -} // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp new file mode 100644 index 000000000..44d8a347f --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp @@ -0,0 +1,135 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/backend/spirv/emit_spirv.h" + +namespace Shader::Backend::SPIRV { +namespace { +Id LargeWarpBallot(EmitContext& ctx, Id ballot) { + const Id shift{ctx.Constant(ctx.U32[1], 5)}; + const Id local_index{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)}; + return ctx.OpVectorExtractDynamic(ctx.U32[1], ballot, local_index); +} + +void SetInBoundsFlag(IR::Inst* inst, Id result) { + IR::Inst* const in_bounds{inst->GetAssociatedPseudoOperation(IR::Opcode::GetInBoundsFromOp)}; + if (!in_bounds) { + return; + } + in_bounds->SetDefinition(result); + in_bounds->Invalidate(); +} + +Id ComputeMinThreadId(EmitContext& ctx, Id thread_id, Id segmentation_mask) { + return ctx.OpBitwiseAnd(ctx.U32[1], thread_id, segmentation_mask); +} + +Id ComputeMaxThreadId(EmitContext& ctx, Id min_thread_id, Id clamp, Id not_seg_mask) { + return ctx.OpBitwiseOr(ctx.U32[1], min_thread_id, + ctx.OpBitwiseAnd(ctx.U32[1], clamp, not_seg_mask)); +} + +Id GetMaxThreadId(EmitContext& ctx, Id thread_id, Id clamp, Id segmentation_mask) { + const Id not_seg_mask{ctx.OpNot(ctx.U32[1], segmentation_mask)}; + const Id min_thread_id{ComputeMinThreadId(ctx, thread_id, segmentation_mask)}; + return ComputeMaxThreadId(ctx, min_thread_id, clamp, not_seg_mask); +} + +Id SelectValue(EmitContext& ctx, Id in_range, Id value, Id src_thread_id) { + return ctx.OpSelect(ctx.U32[1], in_range, + ctx.OpSubgroupReadInvocationKHR(ctx.U32[1], value, src_thread_id), value); +} +} // Anonymous namespace + +Id EmitVoteAll(EmitContext& ctx, Id pred) { + if (!ctx.profile.warp_size_potentially_larger_than_guest) { + return ctx.OpSubgroupAllKHR(ctx.U1, pred); + } + const Id mask_ballot{ctx.OpSubgroupBallotKHR(ctx.U32[4], ctx.true_value)}; + const Id active_mask{LargeWarpBallot(ctx, mask_ballot)}; + const Id ballot{LargeWarpBallot(ctx, ctx.OpSubgroupBallotKHR(ctx.U32[4], pred))}; + const Id lhs{ctx.OpBitwiseAnd(ctx.U32[1], ballot, active_mask)}; + return ctx.OpIEqual(ctx.U1, lhs, active_mask); +} + +Id EmitVoteAny(EmitContext& ctx, Id pred) { + if (!ctx.profile.warp_size_potentially_larger_than_guest) { + return ctx.OpSubgroupAnyKHR(ctx.U1, pred); + } + const Id mask_ballot{ctx.OpSubgroupBallotKHR(ctx.U32[4], ctx.true_value)}; + const Id active_mask{LargeWarpBallot(ctx, mask_ballot)}; + const Id ballot{LargeWarpBallot(ctx, ctx.OpSubgroupBallotKHR(ctx.U32[4], pred))}; + const Id lhs{ctx.OpBitwiseAnd(ctx.U32[1], ballot, active_mask)}; + return ctx.OpINotEqual(ctx.U1, lhs, ctx.u32_zero_value); +} + +Id EmitVoteEqual(EmitContext& ctx, Id pred) { + if (!ctx.profile.warp_size_potentially_larger_than_guest) { + return ctx.OpSubgroupAllEqualKHR(ctx.U1, pred); + } + const Id mask_ballot{ctx.OpSubgroupBallotKHR(ctx.U32[4], ctx.true_value)}; + const Id active_mask{LargeWarpBallot(ctx, mask_ballot)}; + const Id ballot{LargeWarpBallot(ctx, ctx.OpSubgroupBallotKHR(ctx.U32[4], pred))}; + const Id lhs{ctx.OpBitwiseXor(ctx.U32[1], ballot, active_mask)}; + return ctx.OpLogicalOr(ctx.U1, ctx.OpIEqual(ctx.U1, lhs, ctx.u32_zero_value), + ctx.OpIEqual(ctx.U1, lhs, active_mask)); +} + +Id EmitSubgroupBallot(EmitContext& ctx, Id pred) { + const Id ballot{ctx.OpSubgroupBallotKHR(ctx.U32[4], pred)}; + if (!ctx.profile.warp_size_potentially_larger_than_guest) { + return ctx.OpCompositeExtract(ctx.U32[1], ballot, 0U); + } + return LargeWarpBallot(ctx, ballot); +} + +Id EmitShuffleIndex(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, + Id segmentation_mask) { + const Id not_seg_mask{ctx.OpNot(ctx.U32[1], segmentation_mask)}; + const Id thread_id{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)}; + const Id min_thread_id{ComputeMinThreadId(ctx, thread_id, segmentation_mask)}; + const Id max_thread_id{ComputeMaxThreadId(ctx, min_thread_id, clamp, not_seg_mask)}; + + const Id lhs{ctx.OpBitwiseAnd(ctx.U32[1], index, not_seg_mask)}; + const Id src_thread_id{ctx.OpBitwiseOr(ctx.U32[1], lhs, min_thread_id)}; + const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)}; + + SetInBoundsFlag(inst, in_range); + return SelectValue(ctx, in_range, value, src_thread_id); +} + +Id EmitShuffleUp(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, + Id segmentation_mask) { + const Id thread_id{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)}; + const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)}; + const Id src_thread_id{ctx.OpISub(ctx.U32[1], thread_id, index)}; + const Id in_range{ctx.OpSGreaterThanEqual(ctx.U1, src_thread_id, max_thread_id)}; + + SetInBoundsFlag(inst, in_range); + return SelectValue(ctx, in_range, value, src_thread_id); +} + +Id EmitShuffleDown(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, + Id segmentation_mask) { + const Id thread_id{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)}; + const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)}; + const Id src_thread_id{ctx.OpIAdd(ctx.U32[1], thread_id, index)}; + const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)}; + + SetInBoundsFlag(inst, in_range); + return SelectValue(ctx, in_range, value, src_thread_id); +} + +Id EmitShuffleButterfly(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, + Id segmentation_mask) { + const Id thread_id{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)}; + const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)}; + const Id src_thread_id{ctx.OpBitwiseXor(ctx.U32[1], thread_id, index)}; + const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)}; + + SetInBoundsFlag(inst, in_range); + return SelectValue(ctx, in_range, value, src_thread_id); +} + +} // namespace Shader::Backend::SPIRV -- cgit v1.2.3