diff options
-rw-r--r-- | src/shader_recompiler/environment.h | 2 | ||||
-rw-r--r-- | src/shader_recompiler/ir_opt/texture_pass.cpp | 80 | ||||
-rw-r--r-- | src/shader_recompiler/shader_info.h | 6 | ||||
-rw-r--r-- | src/video_core/renderer_vulkan/vk_compute_pipeline.cpp | 31 | ||||
-rw-r--r-- | src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp | 25 | ||||
-rw-r--r-- | src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 32 |
6 files changed, 127 insertions, 49 deletions
diff --git a/src/shader_recompiler/environment.h b/src/shader_recompiler/environment.h index 1c50ae51e..090bc1c08 100644 --- a/src/shader_recompiler/environment.h +++ b/src/shader_recompiler/environment.h @@ -17,7 +17,7 @@ public: [[nodiscard]] virtual u32 ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) = 0; - [[nodiscard]] virtual TextureType ReadTextureType(u32 cbuf_index, u32 cbuf_offset) = 0; + [[nodiscard]] virtual TextureType ReadTextureType(u32 raw_handle) = 0; [[nodiscard]] virtual u32 TextureBoundBuffer() const = 0; diff --git a/src/shader_recompiler/ir_opt/texture_pass.cpp b/src/shader_recompiler/ir_opt/texture_pass.cpp index e1d5a2ce1..5ac485522 100644 --- a/src/shader_recompiler/ir_opt/texture_pass.cpp +++ b/src/shader_recompiler/ir_opt/texture_pass.cpp @@ -19,6 +19,9 @@ namespace { struct ConstBufferAddr { u32 index; u32 offset; + u32 secondary_index; + u32 secondary_offset; + bool has_secondary; }; struct TextureInst { @@ -109,9 +112,38 @@ bool IsTextureInstruction(const IR::Inst& inst) { return IndexedInstruction(inst) != IR::Opcode::Void; } +std::optional<ConstBufferAddr> TryGetConstBuffer(const IR::Inst* inst); + +std::optional<ConstBufferAddr> Track(const IR::Value& value) { + return IR::BreadthFirstSearch(value, TryGetConstBuffer); +} + std::optional<ConstBufferAddr> TryGetConstBuffer(const IR::Inst* inst) { - if (inst->GetOpcode() != IR::Opcode::GetCbufU32) { + switch (inst->GetOpcode()) { + default: return std::nullopt; + case IR::Opcode::BitwiseOr32: { + std::optional lhs{Track(inst->Arg(0))}; + std::optional rhs{Track(inst->Arg(1))}; + if (!lhs || !rhs) { + return std::nullopt; + } + if (lhs->has_secondary || rhs->has_secondary) { + return std::nullopt; + } + if (lhs->index > rhs->index || lhs->offset > rhs->offset) { + std::swap(lhs, rhs); + } + return ConstBufferAddr{ + .index = lhs->index, + .offset = lhs->offset, + .secondary_index = rhs->index, + .secondary_offset = rhs->offset, + .has_secondary = true, + }; + } + case IR::Opcode::GetCbufU32: + break; } const IR::Value index{inst->Arg(0)}; const IR::Value offset{inst->Arg(1)}; @@ -127,13 +159,12 @@ std::optional<ConstBufferAddr> TryGetConstBuffer(const IR::Inst* inst) { return ConstBufferAddr{ .index{index.U32()}, .offset{offset.U32()}, + .secondary_index = 0, + .secondary_offset = 0, + .has_secondary = false, }; } -std::optional<ConstBufferAddr> Track(const IR::Value& value) { - return IR::BreadthFirstSearch(value, TryGetConstBuffer); -} - TextureInst MakeInst(Environment& env, IR::Block* block, IR::Inst& inst) { ConstBufferAddr addr; if (IsBindless(inst)) { @@ -146,6 +177,9 @@ TextureInst MakeInst(Environment& env, IR::Block* block, IR::Inst& inst) { addr = ConstBufferAddr{ .index = env.TextureBoundBuffer(), .offset = inst.Arg(0).U32(), + .secondary_index = 0, + .secondary_offset = 0, + .has_secondary = false, }; } return TextureInst{ @@ -155,6 +189,14 @@ TextureInst MakeInst(Environment& env, IR::Block* block, IR::Inst& inst) { }; } +TextureType ReadTextureType(Environment& env, const ConstBufferAddr& cbuf) { + const u32 secondary_index{cbuf.has_secondary ? cbuf.index : cbuf.secondary_index}; + const u32 secondary_offset{cbuf.has_secondary ? cbuf.offset : cbuf.secondary_offset}; + const u32 lhs_raw{env.ReadCbufValue(cbuf.index, cbuf.offset)}; + const u32 rhs_raw{env.ReadCbufValue(secondary_index, secondary_offset)}; + return env.ReadTextureType(lhs_raw | rhs_raw); +} + class Descriptors { public: explicit Descriptors(TextureBufferDescriptors& texture_buffer_descriptors_, @@ -167,8 +209,11 @@ public: u32 Add(const TextureBufferDescriptor& desc) { return Add(texture_buffer_descriptors, desc, [&desc](const auto& existing) { - return desc.cbuf_index == existing.cbuf_index && - desc.cbuf_offset == existing.cbuf_offset; + return desc.has_secondary == existing.has_secondary && + desc.cbuf_index == existing.cbuf_index && + desc.cbuf_offset == existing.cbuf_offset && + desc.secondary_cbuf_index == existing.secondary_cbuf_index && + desc.secondary_cbuf_offset == existing.secondary_cbuf_offset; }); } @@ -181,8 +226,12 @@ public: u32 Add(const TextureDescriptor& desc) { return Add(texture_descriptors, desc, [&desc](const auto& existing) { - return desc.cbuf_index == existing.cbuf_index && - desc.cbuf_offset == existing.cbuf_offset && desc.type == existing.type; + return desc.type == existing.type && desc.is_depth == existing.is_depth && + desc.has_secondary == existing.has_secondary && + desc.cbuf_index == existing.cbuf_index && + desc.cbuf_offset == existing.cbuf_offset && + desc.secondary_cbuf_index == existing.secondary_cbuf_index && + desc.secondary_cbuf_offset == existing.secondary_cbuf_offset; }); } @@ -247,14 +296,14 @@ void TexturePass(Environment& env, IR::Program& program) { auto flags{inst->Flags<IR::TextureInstInfo>()}; switch (inst->GetOpcode()) { case IR::Opcode::ImageQueryDimensions: - flags.type.Assign(env.ReadTextureType(cbuf.index, cbuf.offset)); + flags.type.Assign(ReadTextureType(env, cbuf)); inst->SetFlags(flags); break; case IR::Opcode::ImageFetch: if (flags.type != TextureType::Color1D) { break; } - if (env.ReadTextureType(cbuf.index, cbuf.offset) == TextureType::Buffer) { + if (ReadTextureType(env, cbuf) == TextureType::Buffer) { // Replace with the bound texture type only when it's a texture buffer // If the instruction is 1D and the bound type is 2D, don't change the code and let // the rasterizer robustness handle it @@ -270,6 +319,9 @@ void TexturePass(Environment& env, IR::Program& program) { switch (inst->GetOpcode()) { case IR::Opcode::ImageRead: case IR::Opcode::ImageWrite: { + if (cbuf.has_secondary) { + throw NotImplementedException("Unexpected separate sampler"); + } const bool is_written{inst->GetOpcode() == IR::Opcode::ImageWrite}; if (flags.type == TextureType::Buffer) { index = descriptors.Add(ImageBufferDescriptor{ @@ -294,16 +346,22 @@ void TexturePass(Environment& env, IR::Program& program) { default: if (flags.type == TextureType::Buffer) { index = descriptors.Add(TextureBufferDescriptor{ + .has_secondary = cbuf.has_secondary, .cbuf_index = cbuf.index, .cbuf_offset = cbuf.offset, + .secondary_cbuf_index = cbuf.secondary_index, + .secondary_cbuf_offset = cbuf.secondary_offset, .count = 1, }); } else { index = descriptors.Add(TextureDescriptor{ .type = flags.type, .is_depth = flags.is_depth != 0, + .has_secondary = cbuf.has_secondary, .cbuf_index = cbuf.index, .cbuf_offset = cbuf.offset, + .secondary_cbuf_index = cbuf.secondary_index, + .secondary_cbuf_offset = cbuf.secondary_offset, .count = 1, }); } diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h index 50b4d1c05..0f45bdfb6 100644 --- a/src/shader_recompiler/shader_info.h +++ b/src/shader_recompiler/shader_info.h @@ -61,8 +61,11 @@ struct StorageBufferDescriptor { }; struct TextureBufferDescriptor { + bool has_secondary; u32 cbuf_index; u32 cbuf_offset; + u32 secondary_cbuf_index; + u32 secondary_cbuf_offset; u32 count; }; using TextureBufferDescriptors = boost::container::small_vector<TextureBufferDescriptor, 6>; @@ -79,8 +82,11 @@ using ImageBufferDescriptors = boost::container::small_vector<ImageBufferDescrip struct TextureDescriptor { TextureType type; bool is_depth; + bool has_secondary; u32 cbuf_index; u32 cbuf_offset; + u32 secondary_cbuf_index; + u32 secondary_cbuf_offset; u32 count; }; using TextureDescriptors = boost::container::small_vector<TextureDescriptor, 12>; diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 3c907ec5a..45d837ca4 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -88,23 +88,34 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, boost::container::static_vector<u32, max_elements> image_view_indices; boost::container::static_vector<VkSampler, max_elements> samplers; - const auto& launch_desc{kepler_compute.launch_description}; - const auto& cbufs{launch_desc.const_buffer_config}; - const bool via_header_index{launch_desc.linked_tsc}; - const auto read_handle{[&](u32 cbuf_index, u32 cbuf_offset) { - ASSERT(((launch_desc.const_buffer_enable_mask >> cbuf_index) & 1) != 0); - const GPUVAddr addr{cbufs[cbuf_index].Address() + cbuf_offset}; - const u32 raw_handle{gpu_memory.Read<u32>(addr)}; - return TextureHandle(raw_handle, via_header_index); + const auto& qmd{kepler_compute.launch_description}; + const auto& cbufs{qmd.const_buffer_config}; + const bool via_header_index{qmd.linked_tsc != 0}; + const auto read_handle{[&](const auto& desc) { + ASSERT(((qmd.const_buffer_enable_mask >> desc.cbuf_index) & 1) != 0); + const GPUVAddr addr{cbufs[desc.cbuf_index].Address() + desc.cbuf_offset}; + if constexpr (std::is_same_v<decltype(desc), const Shader::TextureDescriptor&> || + std::is_same_v<decltype(desc), const Shader::TextureBufferDescriptor&>) { + if (desc.has_secondary) { + ASSERT(((qmd.const_buffer_enable_mask >> desc.secondary_cbuf_index) & 1) != 0); + const GPUVAddr separate_addr{cbufs[desc.secondary_cbuf_index].Address() + + desc.secondary_cbuf_offset}; + const u32 lhs_raw{gpu_memory.Read<u32>(addr)}; + const u32 rhs_raw{gpu_memory.Read<u32>(separate_addr)}; + const u32 raw{lhs_raw | rhs_raw}; + return TextureHandle{raw, via_header_index}; + } + } + return TextureHandle{gpu_memory.Read<u32>(addr), via_header_index}; }}; const auto add_image{[&](const auto& desc) { - const TextureHandle handle{read_handle(desc.cbuf_index, desc.cbuf_offset)}; + const TextureHandle handle{read_handle(desc)}; image_view_indices.push_back(handle.image); }}; std::ranges::for_each(info.texture_buffer_descriptors, add_image); std::ranges::for_each(info.image_buffer_descriptors, add_image); for (const auto& desc : info.texture_descriptors) { - const TextureHandle handle{read_handle(desc.cbuf_index, desc.cbuf_offset)}; + const TextureHandle handle{read_handle(desc)}; image_view_indices.push_back(handle.image); Sampler* const sampler = texture_cache.GetComputeSampler(handle.sampler); diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index d5e9dae0f..08f00b9ce 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -169,20 +169,31 @@ void GraphicsPipeline::Configure(bool is_indexed) { ++index; } const auto& cbufs{maxwell3d.state.shader_stages[stage].const_buffers}; - const auto read_handle{[&](u32 cbuf_index, u32 cbuf_offset) { - ASSERT(cbufs[cbuf_index].enabled); - const GPUVAddr addr{cbufs[cbuf_index].address + cbuf_offset}; - const u32 raw_handle{gpu_memory.Read<u32>(addr)}; - return TextureHandle(raw_handle, via_header_index); + const auto read_handle{[&](const auto& desc) { + ASSERT(cbufs[desc.cbuf_index].enabled); + const GPUVAddr addr{cbufs[desc.cbuf_index].address + desc.cbuf_offset}; + if constexpr (std::is_same_v<decltype(desc), const Shader::TextureDescriptor&> || + std::is_same_v<decltype(desc), const Shader::TextureBufferDescriptor&>) { + if (desc.has_secondary) { + ASSERT(cbufs[desc.secondary_cbuf_index].enabled); + const GPUVAddr separate_addr{cbufs[desc.secondary_cbuf_index].address + + desc.secondary_cbuf_offset}; + const u32 lhs_raw{gpu_memory.Read<u32>(addr)}; + const u32 rhs_raw{gpu_memory.Read<u32>(separate_addr)}; + const u32 raw{lhs_raw | rhs_raw}; + return TextureHandle{raw, via_header_index}; + } + } + return TextureHandle{gpu_memory.Read<u32>(addr), via_header_index}; }}; const auto add_image{[&](const auto& desc) { - const TextureHandle handle{read_handle(desc.cbuf_index, desc.cbuf_offset)}; + const TextureHandle handle{read_handle(desc)}; image_view_indices.push_back(handle.image); }}; std::ranges::for_each(info.texture_buffer_descriptors, add_image); std::ranges::for_each(info.image_buffer_descriptors, add_image); for (const auto& desc : info.texture_descriptors) { - const TextureHandle handle{read_handle(desc.cbuf_index, desc.cbuf_offset)}; + const TextureHandle handle{read_handle(desc)}; image_view_indices.push_back(handle.image); Sampler* const sampler{texture_cache.GetGraphicsSampler(handle.sampler)}; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index e9b93336b..4317b2ac7 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -188,9 +188,7 @@ protected: } Shader::TextureType ReadTextureTypeImpl(GPUVAddr tic_addr, u32 tic_limit, bool via_header_index, - GPUVAddr cbuf_addr, u32 cbuf_size, u32 cbuf_index, - u32 cbuf_offset) { - const u32 raw{cbuf_offset < cbuf_size ? gpu_memory->Read<u32>(cbuf_addr + cbuf_offset) : 0}; + u32 raw) { const TextureHandle handle{raw, via_header_index}; const GPUVAddr descriptor_addr{tic_addr + handle.image * sizeof(Tegra::Texture::TICEntry)}; Tegra::Texture::TICEntry entry; @@ -219,7 +217,7 @@ protected: throw Shader::NotImplementedException("Unknown texture type"); } }()}; - texture_types.emplace(MakeCbufKey(cbuf_index, cbuf_offset), result); + texture_types.emplace(raw, result); return result; } @@ -227,7 +225,7 @@ protected: GPUVAddr program_base{}; std::vector<u64> code; - std::unordered_map<u64, Shader::TextureType> texture_types; + std::unordered_map<u32, Shader::TextureType> texture_types; std::unordered_map<u64, u32> cbuf_values; u32 local_memory_size{}; @@ -250,7 +248,7 @@ using Shader::Maxwell::TranslateProgram; // TODO: Move this to a separate file constexpr std::array<char, 8> MAGIC_NUMBER{'y', 'u', 'z', 'u', 'c', 'a', 'c', 'h'}; -constexpr u32 CACHE_VERSION{1}; +constexpr u32 CACHE_VERSION{2}; class GraphicsEnvironment final : public GenericEnvironment { public: @@ -308,13 +306,10 @@ public: return value; } - Shader::TextureType ReadTextureType(u32 cbuf_index, u32 cbuf_offset) override { + Shader::TextureType ReadTextureType(u32 handle) override { const auto& regs{maxwell3d->regs}; - const auto& cbuf{maxwell3d->state.shader_stages[stage_index].const_buffers[cbuf_index]}; - ASSERT(cbuf.enabled); const bool via_header_index{regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex}; - return ReadTextureTypeImpl(regs.tic.Address(), regs.tic.limit, via_header_index, - cbuf.address, cbuf.size, cbuf_index, cbuf_offset); + return ReadTextureTypeImpl(regs.tic.Address(), regs.tic.limit, via_header_index, handle); } private: @@ -352,13 +347,10 @@ public: return value; } - Shader::TextureType ReadTextureType(u32 cbuf_index, u32 cbuf_offset) override { + Shader::TextureType ReadTextureType(u32 handle) override { const auto& regs{kepler_compute->regs}; const auto& qmd{kepler_compute->launch_description}; - ASSERT(((qmd.const_buffer_enable_mask.Value() >> cbuf_index) & 1) != 0); - const auto& cbuf{qmd.const_buffer_config[cbuf_index]}; - return ReadTextureTypeImpl(regs.tic.Address(), regs.tic.limit, qmd.linked_tsc != 0, - cbuf.Address(), cbuf.size, cbuf_index, cbuf_offset); + return ReadTextureTypeImpl(regs.tic.Address(), regs.tic.limit, qmd.linked_tsc != 0, handle); } private: @@ -421,7 +413,7 @@ public: code = std::make_unique<u64[]>(Common::DivCeil(code_size, sizeof(u64))); file.read(reinterpret_cast<char*>(code.get()), code_size); for (size_t i = 0; i < num_texture_types; ++i) { - u64 key; + u32 key; Shader::TextureType type; file.read(reinterpret_cast<char*>(&key), sizeof(key)) .read(reinterpret_cast<char*>(&type), sizeof(type)); @@ -457,8 +449,8 @@ public: return it->second; } - Shader::TextureType ReadTextureType(u32 cbuf_index, u32 cbuf_offset) override { - const auto it{texture_types.find(MakeCbufKey(cbuf_index, cbuf_offset))}; + Shader::TextureType ReadTextureType(u32 handle) override { + const auto it{texture_types.find(handle)}; if (it == texture_types.end()) { throw Shader::LogicError("Uncached read texture type"); } @@ -483,7 +475,7 @@ public: private: std::unique_ptr<u64[]> code; - std::unordered_map<u64, Shader::TextureType> texture_types; + std::unordered_map<u32, Shader::TextureType> texture_types; std::unordered_map<u64, u32> cbuf_values; std::array<u32, 3> workgroup_size{}; u32 local_memory_size{}; |