// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later #include #include "common/cityhash.h" #include "common/settings.h" // for enum class Settings::ShaderBackend #include "video_core/renderer_opengl/gl_compute_pipeline.h" #include "video_core/renderer_opengl/gl_shader_manager.h" #include "video_core/renderer_opengl/gl_shader_util.h" namespace OpenGL { using Shader::ImageBufferDescriptor; using Tegra::Texture::TexturePair; using VideoCommon::ImageId; constexpr u32 MAX_TEXTURES = 64; constexpr u32 MAX_IMAGES = 16; size_t ComputePipelineKey::Hash() const noexcept { return static_cast( Common::CityHash64(reinterpret_cast(this), sizeof *this)); } bool ComputePipelineKey::operator==(const ComputePipelineKey& rhs) const noexcept { return std::memcmp(this, &rhs, sizeof *this) == 0; } ComputePipeline::ComputePipeline(const Device& device, TextureCache& texture_cache_, BufferCache& buffer_cache_, ProgramManager& program_manager_, const Shader::Info& info_, std::string code, std::vector code_v) : texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, program_manager{program_manager_}, info{info_} { switch (device.GetShaderBackend()) { case Settings::ShaderBackend::GLSL: source_program = CreateProgram(code, GL_COMPUTE_SHADER); break; case Settings::ShaderBackend::GLASM: assembly_program = CompileProgram(code, GL_COMPUTE_PROGRAM_NV); break; case Settings::ShaderBackend::SPIRV: source_program = CreateProgram(code_v, GL_COMPUTE_SHADER); break; } std::copy_n(info.constant_buffer_used_sizes.begin(), uniform_buffer_sizes.size(), uniform_buffer_sizes.begin()); num_texture_buffers = Shader::NumDescriptors(info.texture_buffer_descriptors); num_image_buffers = Shader::NumDescriptors(info.image_buffer_descriptors); const u32 num_textures{num_texture_buffers + Shader::NumDescriptors(info.texture_descriptors)}; ASSERT(num_textures <= MAX_TEXTURES); const u32 num_images{num_image_buffers + Shader::NumDescriptors(info.image_descriptors)}; ASSERT(num_images <= MAX_IMAGES); const bool is_glasm{assembly_program.handle != 0}; const u32 num_storage_buffers{Shader::NumDescriptors(info.storage_buffers_descriptors)}; use_storage_buffers = !is_glasm || num_storage_buffers < device.GetMaxGLASMStorageBufferBlocks(); writes_global_memory = !use_storage_buffers && std::ranges::any_of(info.storage_buffers_descriptors, [](const auto& desc) { return desc.is_written; }); } void ComputePipeline::Configure() { buffer_cache.SetComputeUniformBufferState(info.constant_buffer_mask, &uniform_buffer_sizes); buffer_cache.UnbindComputeStorageBuffers(); size_t ssbo_index{}; for (const auto& desc : info.storage_buffers_descriptors) { ASSERT(desc.count == 1); buffer_cache.BindComputeStorageBuffer(ssbo_index, desc.cbuf_index, desc.cbuf_offset, desc.is_written); ++ssbo_index; } texture_cache.SynchronizeComputeDescriptors(); boost::container::static_vector views; std::array samplers; std::array textures; std::array images; GLsizei sampler_binding{}; GLsizei texture_binding{}; GLsizei image_binding{}; const auto& qmd{kepler_compute->launch_description}; const auto& cbufs{qmd.const_buffer_config}; const bool via_header_index{qmd.linked_tsc != 0}; const auto read_handle{[&](const auto& desc, u32 index) { ASSERT(((qmd.const_buffer_enable_mask >> desc.cbuf_index) & 1) != 0); const u32 index_offset{index << desc.size_shift}; const u32 offset{desc.cbuf_offset + index_offset}; const GPUVAddr addr{cbufs[desc.cbuf_index].Address() + offset}; if constexpr (std::is_same_v || std::is_same_v) { if (desc.has_secondary) { ASSERT(((qmd.const_buffer_enable_mask >> desc.secondary_cbuf_index) & 1) != 0); const u32 secondary_offset{desc.secondary_cbuf_offset + index_offset}; const GPUVAddr separate_addr{cbufs[desc.secondary_cbuf_index].Address() + secondary_offset}; const u32 lhs_raw{gpu_memory->Read(addr) << desc.shift_left}; const u32 rhs_raw{gpu_memory->Read(separate_addr) << desc.secondary_shift_left}; return TexturePair(lhs_raw | rhs_raw, via_header_index); } } return TexturePair(gpu_memory->Read(addr), via_header_index); }}; const auto add_image{[&](const auto& desc, bool blacklist) { for (u32 index = 0; index < desc.count; ++index) { const auto handle{read_handle(desc, index)}; views.push_back({ .index = handle.first, .blacklist = blacklist, .id = {}, }); } }}; for (const auto& desc : info.texture_buffer_descriptors) { for (u32 index = 0; index < desc.count; ++index) { const auto handle{read_handle(desc, index)}; views.push_back({handle.first}); samplers[sampler_binding++] = 0; } } for (const auto& desc : info.image_buffer_descriptors) { add_image(desc, false); } for (const auto& desc : info.texture_descriptors) { for (u32 index = 0; index < desc.count; ++index) { const auto handle{read_handle(desc, index)}; views.push_back({handle.first}); Sampler* const sampler = texture_cache.GetComputeSampler(handle.second); samplers[sampler_binding++] = sampler->Handle(); } } for (const auto& desc : info.image_descriptors) { add_image(desc, desc.is_written); } texture_cache.FillComputeImageViews(std::span(views.data(), views.size())); if (assembly_program.handle != 0) { program_manager.BindComputeAssemblyProgram(assembly_program.handle); } else { program_manager.BindComputeProgram(source_program.handle); } buffer_cache.UnbindComputeTextureBuffers(); size_t texbuf_index{}; const auto add_buffer{[&](const auto& desc) { constexpr bool is_image = std::is_same_v; for (u32 i = 0; i < desc.count; ++i) { bool is_written{false}; if constexpr (is_image) { is_written = desc.is_written; } ImageView& image_view{texture_cache.GetImageView(views[texbuf_index].id)}; buffer_cache.BindComputeTextureBuffer(texbuf_index, image_view.GpuAddr(), image_view.BufferSize(), image_view.format, is_written, is_image); ++texbuf_index; } }}; std::ranges::for_each(info.texture_buffer_descriptors, add_buffer); std::ranges::for_each(info.image_buffer_descriptors, add_buffer); buffer_cache.UpdateComputeBuffers(); buffer_cache.runtime.SetEnableStorageBuffers(use_storage_buffers); buffer_cache.runtime.SetImagePointers(textures.data(), images.data()); buffer_cache.BindHostComputeBuffers(); const VideoCommon::ImageViewInOut* views_it{views.data() + num_texture_buffers + num_image_buffers}; texture_binding += num_texture_buffers; image_binding += num_image_buffers; u32 texture_scaling_mask{}; for (const auto& desc : info.texture_descriptors) { for (u32 index = 0; index < desc.count; ++index) { ImageView& image_view{texture_cache.GetImageView((views_it++)->id)}; textures[texture_binding] = image_view.Handle(desc.type); if (texture_cache.IsRescaling(image_view)) { texture_scaling_mask |= 1u << texture_binding; } ++texture_binding; } } u32 image_scaling_mask{}; for (const auto& desc : info.image_descriptors) { for (u32 index = 0; index < desc.count; ++index) { ImageView& image_view{texture_cache.GetImageView((views_it++)->id)}; if (desc.is_written) { texture_cache.MarkModification(image_view.image_id); } images[image_binding] = image_view.StorageView(desc.type, desc.format); if (texture_cache.IsRescaling(image_view)) { image_scaling_mask |= 1u << image_binding; } ++image_binding; } } if (info.uses_rescaling_uniform) { const f32 float_texture_scaling_mask{Common::BitCast(texture_scaling_mask)}; const f32 float_image_scaling_mask{Common::BitCast(image_scaling_mask)}; if (assembly_program.handle != 0) { glProgramLocalParameter4fARB(GL_COMPUTE_PROGRAM_NV, 0, float_texture_scaling_mask, float_image_scaling_mask, 0.0f, 0.0f); } else { glProgramUniform4f(source_program.handle, 0, float_texture_scaling_mask, float_image_scaling_mask, 0.0f, 0.0f); } } if (texture_binding != 0) { ASSERT(texture_binding == sampler_binding); glBindTextures(0, texture_binding, textures.data()); glBindSamplers(0, sampler_binding, samplers.data()); } if (image_binding != 0) { glBindImageTextures(0, image_binding, images.data()); } } } // namespace OpenGL