diff options
Diffstat (limited to 'src/video_core/shader')
-rw-r--r-- | src/video_core/shader/decode/half_set_predicate.cpp | 19 | ||||
-rw-r--r-- | src/video_core/shader/decode/image.cpp | 104 | ||||
-rw-r--r-- | src/video_core/shader/decode/shift.cpp | 19 | ||||
-rw-r--r-- | src/video_core/shader/node.h | 83 | ||||
-rw-r--r-- | src/video_core/shader/shader_ir.h | 14 |
5 files changed, 173 insertions, 66 deletions
diff --git a/src/video_core/shader/decode/half_set_predicate.cpp b/src/video_core/shader/decode/half_set_predicate.cpp index afea33e5f..840694527 100644 --- a/src/video_core/shader/decode/half_set_predicate.cpp +++ b/src/video_core/shader/decode/half_set_predicate.cpp @@ -42,9 +42,8 @@ u32 ShaderIR::DecodeHalfSetPredicate(NodeBlock& bb, u32 pc) { cond = instr.hsetp2.reg.cond; h_and = instr.hsetp2.reg.h_and; op_b = - UnpackHalfFloat(GetOperandAbsNegHalf(GetRegister(instr.gpr20), instr.hsetp2.reg.abs_b, - instr.hsetp2.reg.negate_b), - instr.hsetp2.reg.type_b); + GetOperandAbsNegHalf(UnpackHalfFloat(GetRegister(instr.gpr20), instr.hsetp2.reg.type_b), + instr.hsetp2.reg.abs_b, instr.hsetp2.reg.negate_b); break; default: UNREACHABLE(); @@ -52,22 +51,22 @@ u32 ShaderIR::DecodeHalfSetPredicate(NodeBlock& bb, u32 pc) { } const OperationCode combiner = GetPredicateCombiner(instr.hsetp2.op); - const Node combined_pred = GetPredicate(instr.hsetp2.pred3, instr.hsetp2.neg_pred); + const Node combined_pred = GetPredicate(instr.hsetp2.pred39, instr.hsetp2.neg_pred); const auto Write = [&](u64 dest, Node src) { SetPredicate(bb, dest, Operation(combiner, std::move(src), combined_pred)); }; const Node comparison = GetPredicateComparisonHalf(cond, op_a, op_b); - const u64 first = instr.hsetp2.pred0; - const u64 second = instr.hsetp2.pred39; + const u64 first = instr.hsetp2.pred3; + const u64 second = instr.hsetp2.pred0; if (h_and) { - const Node joined = Operation(OperationCode::LogicalAnd2, comparison); + Node joined = Operation(OperationCode::LogicalAnd2, comparison); Write(first, joined); - Write(second, Operation(OperationCode::LogicalNegate, joined)); + Write(second, Operation(OperationCode::LogicalNegate, std::move(joined))); } else { - Write(first, Operation(OperationCode::LogicalPick2, comparison, Immediate(0u))); - Write(second, Operation(OperationCode::LogicalPick2, comparison, Immediate(1u))); + Write(first, Operation(OperationCode::LogicalPick2, comparison, Immediate(0U))); + Write(second, Operation(OperationCode::LogicalPick2, comparison, Immediate(1U))); } return pc; diff --git a/src/video_core/shader/decode/image.cpp b/src/video_core/shader/decode/image.cpp index 77151a24b..d54fb88c9 100644 --- a/src/video_core/shader/decode/image.cpp +++ b/src/video_core/shader/decode/image.cpp @@ -44,7 +44,6 @@ u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) { switch (opcode->get().GetId()) { case OpCode::Id::SUST: { UNIMPLEMENTED_IF(instr.sust.mode != Tegra::Shader::SurfaceDataMode::P); - UNIMPLEMENTED_IF(instr.sust.image_type == Tegra::Shader::ImageType::TextureBuffer); UNIMPLEMENTED_IF(instr.sust.out_of_bounds_store != Tegra::Shader::OutOfBoundsStore::Ignore); UNIMPLEMENTED_IF(instr.sust.component_mask_selector != 0xf); // Ensure we have an RGBA store @@ -61,56 +60,105 @@ u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) { } const auto type{instr.sust.image_type}; - const auto& image{instr.sust.is_immediate ? GetImage(instr.image, type) - : GetBindlessImage(instr.gpr39, type)}; + auto& image{instr.sust.is_immediate ? GetImage(instr.image, type) + : GetBindlessImage(instr.gpr39, type)}; + image.MarkWrite(); + MetaImage meta{image, values}; - const Node store{Operation(OperationCode::ImageStore, meta, std::move(coords))}; - bb.push_back(store); + bb.push_back(Operation(OperationCode::ImageStore, meta, std::move(coords))); + break; + } + case OpCode::Id::SUATOM: { + UNIMPLEMENTED_IF(instr.suatom_d.is_ba != 0); + + Node value = GetRegister(instr.gpr0); + + std::vector<Node> coords; + const std::size_t num_coords{GetImageTypeNumCoordinates(instr.sust.image_type)}; + for (std::size_t i = 0; i < num_coords; ++i) { + coords.push_back(GetRegister(instr.gpr8.Value() + i)); + } + + const OperationCode operation_code = [instr] { + switch (instr.suatom_d.operation) { + case Tegra::Shader::ImageAtomicOperation::Add: + return OperationCode::AtomicImageAdd; + case Tegra::Shader::ImageAtomicOperation::Min: + return OperationCode::AtomicImageMin; + case Tegra::Shader::ImageAtomicOperation::Max: + return OperationCode::AtomicImageMax; + case Tegra::Shader::ImageAtomicOperation::And: + return OperationCode::AtomicImageAnd; + case Tegra::Shader::ImageAtomicOperation::Or: + return OperationCode::AtomicImageOr; + case Tegra::Shader::ImageAtomicOperation::Xor: + return OperationCode::AtomicImageXor; + case Tegra::Shader::ImageAtomicOperation::Exch: + return OperationCode::AtomicImageExchange; + default: + UNIMPLEMENTED_MSG("Unimplemented operation={}", + static_cast<u32>(instr.suatom_d.operation.Value())); + return OperationCode::AtomicImageAdd; + } + }(); + + const auto& image{GetImage(instr.image, instr.suatom_d.image_type, instr.suatom_d.size)}; + MetaImage meta{image, {std::move(value)}}; + SetRegister(bb, instr.gpr0, Operation(operation_code, meta, std::move(coords))); break; } default: - UNIMPLEMENTED_MSG("Unhandled conversion instruction: {}", opcode->get().GetName()); + UNIMPLEMENTED_MSG("Unhandled image instruction: {}", opcode->get().GetName()); } return pc; } -const Image& ShaderIR::GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type) { +Image& ShaderIR::GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type, + std::optional<Tegra::Shader::ImageAtomicSize> size) { const auto offset{static_cast<std::size_t>(image.index.Value())}; - - // If this image has already been used, return the existing mapping. - const auto itr{std::find_if(used_images.begin(), used_images.end(), - [=](const Image& entry) { return entry.GetOffset() == offset; })}; - if (itr != used_images.end()) { - ASSERT(itr->GetType() == type); - return *itr; + if (const auto image = TryUseExistingImage(offset, type, size)) { + return *image; } - // Otherwise create a new mapping for this image. const std::size_t next_index{used_images.size()}; - const Image entry{offset, next_index, type}; - return *used_images.emplace(entry).first; + return used_images.emplace(offset, Image{offset, next_index, type, size}).first->second; } -const Image& ShaderIR::GetBindlessImage(Tegra::Shader::Register reg, - Tegra::Shader::ImageType type) { +Image& ShaderIR::GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type, + std::optional<Tegra::Shader::ImageAtomicSize> size) { const Node image_register{GetRegister(reg)}; const auto [base_image, cbuf_index, cbuf_offset]{ TrackCbuf(image_register, global_code, static_cast<s64>(global_code.size()))}; const auto cbuf_key{(static_cast<u64>(cbuf_index) << 32) | static_cast<u64>(cbuf_offset)}; - // If this image has already been used, return the existing mapping. - const auto itr{std::find_if(used_images.begin(), used_images.end(), - [=](const Image& entry) { return entry.GetOffset() == cbuf_key; })}; - if (itr != used_images.end()) { - ASSERT(itr->GetType() == type); - return *itr; + if (const auto image = TryUseExistingImage(cbuf_key, type, size)) { + return *image; } - // Otherwise create a new mapping for this image. const std::size_t next_index{used_images.size()}; - const Image entry{cbuf_index, cbuf_offset, next_index, type}; - return *used_images.emplace(entry).first; + return used_images.emplace(cbuf_key, Image{cbuf_index, cbuf_offset, next_index, type, size}) + .first->second; +} + +Image* ShaderIR::TryUseExistingImage(u64 offset, Tegra::Shader::ImageType type, + std::optional<Tegra::Shader::ImageAtomicSize> size) { + auto it = used_images.find(offset); + if (it == used_images.end()) { + return nullptr; + } + auto& image = it->second; + ASSERT(image.GetType() == type); + + if (size) { + // We know the size, if it's known it has to be the same as before, otherwise we can set it. + if (image.IsSizeKnown()) { + ASSERT(image.GetSize() == size); + } else { + image.SetSize(*size); + } + } + return ℑ } } // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/shift.cpp b/src/video_core/shader/decode/shift.cpp index 2ac16eeb0..f6ee68a54 100644 --- a/src/video_core/shader/decode/shift.cpp +++ b/src/video_core/shader/decode/shift.cpp @@ -17,8 +17,8 @@ u32 ShaderIR::DecodeShift(NodeBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); - const Node op_a = GetRegister(instr.gpr8); - const Node op_b = [&]() { + Node op_a = GetRegister(instr.gpr8); + Node op_b = [&]() { if (instr.is_b_imm) { return Immediate(instr.alu.GetSignedImm20_20()); } else if (instr.is_b_gpr) { @@ -32,16 +32,23 @@ u32 ShaderIR::DecodeShift(NodeBlock& bb, u32 pc) { case OpCode::Id::SHR_C: case OpCode::Id::SHR_R: case OpCode::Id::SHR_IMM: { - const Node value = SignedOperation(OperationCode::IArithmeticShiftRight, - instr.shift.is_signed, PRECISE, op_a, op_b); + if (instr.shr.wrap) { + op_b = Operation(OperationCode::UBitwiseAnd, std::move(op_b), Immediate(0x1f)); + } else { + op_b = Operation(OperationCode::IMax, std::move(op_b), Immediate(0)); + op_b = Operation(OperationCode::IMin, std::move(op_b), Immediate(31)); + } + + Node value = SignedOperation(OperationCode::IArithmeticShiftRight, instr.shift.is_signed, + std::move(op_a), std::move(op_b)); SetInternalFlagsFromInteger(bb, value, instr.generates_cc); - SetRegister(bb, instr.gpr0, value); + SetRegister(bb, instr.gpr0, std::move(value)); break; } case OpCode::Id::SHL_C: case OpCode::Id::SHL_R: case OpCode::Id::SHL_IMM: { - const Node value = Operation(OperationCode::ILogicalShiftLeft, PRECISE, op_a, op_b); + const Node value = Operation(OperationCode::ILogicalShiftLeft, op_a, op_b); SetInternalFlagsFromInteger(bb, value, instr.generates_cc); SetRegister(bb, instr.gpr0, value); break; diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h index 5db9313c4..b47b201cf 100644 --- a/src/video_core/shader/node.h +++ b/src/video_core/shader/node.h @@ -7,6 +7,7 @@ #include <array> #include <cstddef> #include <memory> +#include <optional> #include <string> #include <tuple> #include <utility> @@ -148,7 +149,14 @@ enum class OperationCode { TextureQueryLod, /// (MetaTexture, float[N] coords) -> float4 TexelFetch, /// (MetaTexture, int[N], int) -> float4 - ImageStore, /// (MetaImage, float[N] coords) -> void + ImageStore, /// (MetaImage, int[N] values) -> void + AtomicImageAdd, /// (MetaImage, int[N] coords) -> void + AtomicImageMin, /// (MetaImage, int[N] coords) -> void + AtomicImageMax, /// (MetaImage, int[N] coords) -> void + AtomicImageAnd, /// (MetaImage, int[N] coords) -> void + AtomicImageOr, /// (MetaImage, int[N] coords) -> void + AtomicImageXor, /// (MetaImage, int[N] coords) -> void + AtomicImageExchange, /// (MetaImage, int[N] coords) -> void Branch, /// (uint branch_target) -> void BranchIndirect, /// (uint branch_target) -> void @@ -273,46 +281,85 @@ private: bool is_bindless{}; ///< Whether this sampler belongs to a bindless texture or not. }; -class Image { +class Image final { public: - explicit Image(std::size_t offset, std::size_t index, Tegra::Shader::ImageType type) - : offset{offset}, index{index}, type{type}, is_bindless{false} {} + constexpr explicit Image(std::size_t offset, std::size_t index, Tegra::Shader::ImageType type, + std::optional<Tegra::Shader::ImageAtomicSize> size) + : offset{offset}, index{index}, type{type}, is_bindless{false}, size{size} {} - explicit Image(u32 cbuf_index, u32 cbuf_offset, std::size_t index, - Tegra::Shader::ImageType type) + constexpr explicit Image(u32 cbuf_index, u32 cbuf_offset, std::size_t index, + Tegra::Shader::ImageType type, + std::optional<Tegra::Shader::ImageAtomicSize> size) : offset{(static_cast<u64>(cbuf_index) << 32) | cbuf_offset}, index{index}, type{type}, - is_bindless{true} {} + is_bindless{true}, size{size} {} - explicit Image(std::size_t offset, std::size_t index, Tegra::Shader::ImageType type, - bool is_bindless) - : offset{offset}, index{index}, type{type}, is_bindless{is_bindless} {} + constexpr explicit Image(std::size_t offset, std::size_t index, Tegra::Shader::ImageType type, + bool is_bindless, bool is_written, bool is_read, + std::optional<Tegra::Shader::ImageAtomicSize> size) + : offset{offset}, index{index}, type{type}, is_bindless{is_bindless}, + is_written{is_written}, is_read{is_read}, size{size} {} - std::size_t GetOffset() const { + void MarkWrite() { + is_written = true; + } + + void MarkRead() { + is_read = true; + } + + void SetSize(Tegra::Shader::ImageAtomicSize size_) { + size = size_; + } + + constexpr std::size_t GetOffset() const { return offset; } - std::size_t GetIndex() const { + constexpr std::size_t GetIndex() const { return index; } - Tegra::Shader::ImageType GetType() const { + constexpr Tegra::Shader::ImageType GetType() const { return type; } - bool IsBindless() const { + constexpr bool IsBindless() const { return is_bindless; } - bool operator<(const Image& rhs) const { - return std::tie(offset, index, type, is_bindless) < - std::tie(rhs.offset, rhs.index, rhs.type, rhs.is_bindless); + constexpr bool IsWritten() const { + return is_written; + } + + constexpr bool IsRead() const { + return is_read; + } + + constexpr std::pair<u32, u32> GetBindlessCBuf() const { + return {static_cast<u32>(offset >> 32), static_cast<u32>(offset)}; + } + + constexpr bool IsSizeKnown() const { + return size.has_value(); + } + + constexpr Tegra::Shader::ImageAtomicSize GetSize() const { + return size.value(); + } + + constexpr bool operator<(const Image& rhs) const { + return std::tie(offset, index, type, size, is_bindless) < + std::tie(rhs.offset, rhs.index, rhs.type, rhs.size, rhs.is_bindless); } private: - std::size_t offset{}; + u64 offset{}; std::size_t index{}; Tegra::Shader::ImageType type{}; bool is_bindless{}; + bool is_written{}; + bool is_read{}; + std::optional<Tegra::Shader::ImageAtomicSize> size{}; }; struct GlobalMemoryBase { diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index bcc9b79b6..62816bd56 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h @@ -95,7 +95,7 @@ public: return used_samplers; } - const std::set<Image>& GetImages() const { + const std::map<u64, Image>& GetImages() const { return used_images; } @@ -272,10 +272,16 @@ private: bool is_shadow); /// Accesses an image. - const Image& GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type); + Image& GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type, + std::optional<Tegra::Shader::ImageAtomicSize> size = {}); /// Access a bindless image sampler. - const Image& GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type); + Image& GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type, + std::optional<Tegra::Shader::ImageAtomicSize> size = {}); + + /// Tries to access an existing image, updating it's state as needed + Image* TryUseExistingImage(u64 offset, Tegra::Shader::ImageType type, + std::optional<Tegra::Shader::ImageAtomicSize> size); /// Extracts a sequence of bits from a node Node BitfieldExtract(Node value, u32 offset, u32 bits); @@ -356,7 +362,7 @@ private: std::set<Tegra::Shader::Attribute::Index> used_output_attributes; std::map<u32, ConstBuffer> used_cbufs; std::set<Sampler> used_samplers; - std::set<Image> used_images; + std::map<u64, Image> used_images; std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances> used_clip_distances{}; std::map<GlobalMemoryBase, GlobalMemoryUsage> used_global_memory; bool uses_layer{}; |