diff options
Diffstat (limited to '')
-rw-r--r-- | src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 92 |
1 files changed, 51 insertions, 41 deletions
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index a5c7b7945..ea66e621e 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -27,6 +27,7 @@ #include "video_core/renderer_vulkan/wrapper.h" #include "video_core/shader/compiler_settings.h" #include "video_core/shader/memory_util.h" +#include "video_core/shader_cache.h" namespace Vulkan { @@ -45,6 +46,7 @@ constexpr VkDescriptorType UNIFORM_BUFFER = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; constexpr VkDescriptorType STORAGE_BUFFER = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; constexpr VkDescriptorType UNIFORM_TEXEL_BUFFER = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER; constexpr VkDescriptorType COMBINED_IMAGE_SAMPLER = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; +constexpr VkDescriptorType STORAGE_TEXEL_BUFFER = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER; constexpr VkDescriptorType STORAGE_IMAGE = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; constexpr VideoCommon::Shader::CompilerSettings compiler_settings{ @@ -104,8 +106,9 @@ u32 FillDescriptorLayout(const ShaderEntries& entries, u32 binding = base_binding; AddBindings<UNIFORM_BUFFER>(bindings, binding, flags, entries.const_buffers); AddBindings<STORAGE_BUFFER>(bindings, binding, flags, entries.global_buffers); - AddBindings<UNIFORM_TEXEL_BUFFER>(bindings, binding, flags, entries.texel_buffers); + AddBindings<UNIFORM_TEXEL_BUFFER>(bindings, binding, flags, entries.uniform_texels); AddBindings<COMBINED_IMAGE_SAMPLER>(bindings, binding, flags, entries.samplers); + AddBindings<STORAGE_TEXEL_BUFFER>(bindings, binding, flags, entries.storage_texels); AddBindings<STORAGE_IMAGE>(bindings, binding, flags, entries.images); return binding; } @@ -130,19 +133,18 @@ bool ComputePipelineCacheKey::operator==(const ComputePipelineCacheKey& rhs) con return std::memcmp(&rhs, this, sizeof *this) == 0; } -CachedShader::CachedShader(Core::System& system, Tegra::Engines::ShaderType stage, - GPUVAddr gpu_addr, VAddr cpu_addr, ProgramCode program_code, - u32 main_offset) - : RasterizerCacheObject{cpu_addr}, gpu_addr{gpu_addr}, program_code{std::move(program_code)}, +Shader::Shader(Core::System& system, Tegra::Engines::ShaderType stage, GPUVAddr gpu_addr, + VideoCommon::Shader::ProgramCode program_code, u32 main_offset) + : gpu_addr{gpu_addr}, program_code{std::move(program_code)}, registry{stage, GetEngine(system, stage)}, shader_ir{this->program_code, main_offset, compiler_settings, registry}, entries{GenerateShaderEntries(shader_ir)} {} -CachedShader::~CachedShader() = default; +Shader::~Shader() = default; -Tegra::Engines::ConstBufferEngineInterface& CachedShader::GetEngine( - Core::System& system, Tegra::Engines::ShaderType stage) { - if (stage == Tegra::Engines::ShaderType::Compute) { +Tegra::Engines::ConstBufferEngineInterface& Shader::GetEngine(Core::System& system, + Tegra::Engines::ShaderType stage) { + if (stage == ShaderType::Compute) { return system.GPU().KeplerCompute(); } else { return system.GPU().Maxwell3D(); @@ -154,16 +156,16 @@ VKPipelineCache::VKPipelineCache(Core::System& system, RasterizerVulkan& rasteri VKDescriptorPool& descriptor_pool, VKUpdateDescriptorQueue& update_descriptor_queue, VKRenderPassCache& renderpass_cache) - : RasterizerCache{rasterizer}, system{system}, device{device}, scheduler{scheduler}, - descriptor_pool{descriptor_pool}, update_descriptor_queue{update_descriptor_queue}, - renderpass_cache{renderpass_cache} {} + : VideoCommon::ShaderCache<Shader>{rasterizer}, system{system}, device{device}, + scheduler{scheduler}, descriptor_pool{descriptor_pool}, + update_descriptor_queue{update_descriptor_queue}, renderpass_cache{renderpass_cache} {} VKPipelineCache::~VKPipelineCache() = default; -std::array<Shader, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() { +std::array<Shader*, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() { const auto& gpu = system.GPU().Maxwell3D(); - std::array<Shader, Maxwell::MaxShaderProgram> shaders; + std::array<Shader*, Maxwell::MaxShaderProgram> shaders{}; for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { const auto program{static_cast<Maxwell::ShaderProgram>(index)}; @@ -176,24 +178,28 @@ std::array<Shader, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() { const GPUVAddr program_addr{GetShaderAddress(system, program)}; const std::optional cpu_addr = memory_manager.GpuToCpuAddress(program_addr); ASSERT(cpu_addr); - auto shader = cpu_addr ? TryGet(*cpu_addr) : null_shader; - if (!shader) { + + Shader* result = cpu_addr ? TryGet(*cpu_addr) : null_shader.get(); + if (!result) { const auto host_ptr{memory_manager.GetPointer(program_addr)}; // No shader found - create a new one constexpr u32 stage_offset = STAGE_MAIN_OFFSET; - const auto stage = static_cast<Tegra::Engines::ShaderType>(index == 0 ? 0 : index - 1); + const auto stage = static_cast<ShaderType>(index == 0 ? 0 : index - 1); ProgramCode code = GetShaderCode(memory_manager, program_addr, host_ptr, false); + const std::size_t size_in_bytes = code.size() * sizeof(u64); + + auto shader = std::make_unique<Shader>(system, stage, program_addr, std::move(code), + stage_offset); + result = shader.get(); - shader = std::make_shared<CachedShader>(system, stage, program_addr, *cpu_addr, - std::move(code), stage_offset); if (cpu_addr) { - Register(shader); + Register(std::move(shader), *cpu_addr, size_in_bytes); } else { - null_shader = shader; + null_shader = std::move(shader); } } - shaders[index] = std::move(shader); + shaders[index] = result; } return last_shaders = shaders; } @@ -234,19 +240,22 @@ VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCach const auto cpu_addr = memory_manager.GpuToCpuAddress(program_addr); ASSERT(cpu_addr); - auto shader = cpu_addr ? TryGet(*cpu_addr) : null_kernel; + Shader* shader = cpu_addr ? TryGet(*cpu_addr) : null_kernel.get(); if (!shader) { // No shader found - create a new one const auto host_ptr = memory_manager.GetPointer(program_addr); ProgramCode code = GetShaderCode(memory_manager, program_addr, host_ptr, true); - shader = std::make_shared<CachedShader>(system, Tegra::Engines::ShaderType::Compute, - program_addr, *cpu_addr, std::move(code), - KERNEL_MAIN_OFFSET); + const std::size_t size_in_bytes = code.size() * sizeof(u64); + + auto shader_info = std::make_unique<Shader>(system, ShaderType::Compute, program_addr, + std::move(code), KERNEL_MAIN_OFFSET); + shader = shader_info.get(); + if (cpu_addr) { - Register(shader); + Register(std::move(shader_info), *cpu_addr, size_in_bytes); } else { - null_kernel = shader; + null_kernel = std::move(shader_info); } } @@ -262,7 +271,7 @@ VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCach return *entry; } -void VKPipelineCache::Unregister(const Shader& shader) { +void VKPipelineCache::OnShaderRemoval(Shader* shader) { bool finished = false; const auto Finish = [&] { // TODO(Rodrigo): Instead of finishing here, wait for the fences that use this pipeline and @@ -294,8 +303,6 @@ void VKPipelineCache::Unregister(const Shader& shader) { Finish(); it = compute_cache.erase(it); } - - RasterizerCache::Unregister(shader); } std::pair<SPIRVProgram, std::vector<VkDescriptorSetLayoutBinding>> @@ -312,7 +319,9 @@ VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) { ASSERT(point_size != 0.0f); } for (std::size_t i = 0; i < Maxwell::NumVertexAttributes; ++i) { - specialization.attribute_types[i] = fixed_state.vertex_input.attributes[i].Type(); + const auto& attribute = fixed_state.vertex_input.attributes[i]; + specialization.enabled_attributes[i] = attribute.enabled.Value() != 0; + specialization.attribute_types[i] = attribute.Type(); } specialization.ndc_minus_one_to_one = fixed_state.rasterizer.ndc_minus_one_to_one; @@ -328,12 +337,11 @@ VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) { } const GPUVAddr gpu_addr = GetShaderAddress(system, program_enum); - const auto cpu_addr = memory_manager.GpuToCpuAddress(gpu_addr); - const auto shader = cpu_addr ? TryGet(*cpu_addr) : null_shader; - ASSERT(shader); + const std::optional<VAddr> cpu_addr = memory_manager.GpuToCpuAddress(gpu_addr); + Shader* const shader = cpu_addr ? TryGet(*cpu_addr) : null_shader.get(); const std::size_t stage = index == 0 ? 0 : index - 1; // Stage indices are 0 - 5 - const auto program_type = GetShaderType(program_enum); + const ShaderType program_type = GetShaderType(program_enum); const auto& entries = shader->GetEntries(); program[stage] = { Decompile(device, shader->GetIR(), program_type, shader->GetRegistry(), specialization), @@ -375,16 +383,17 @@ void AddEntry(std::vector<VkDescriptorUpdateTemplateEntry>& template_entries, u3 return; } - if constexpr (descriptor_type == UNIFORM_TEXEL_BUFFER) { - // Nvidia has a bug where updating multiple uniform texels at once causes the driver to - // crash. + if constexpr (descriptor_type == UNIFORM_TEXEL_BUFFER || + descriptor_type == STORAGE_TEXEL_BUFFER) { + // Nvidia has a bug where updating multiple texels at once causes the driver to crash. + // Note: Fixed in driver Windows 443.24, Linux 440.66.15 for (u32 i = 0; i < count; ++i) { VkDescriptorUpdateTemplateEntry& entry = template_entries.emplace_back(); entry.dstBinding = binding + i; entry.dstArrayElement = 0; entry.descriptorCount = 1; entry.descriptorType = descriptor_type; - entry.offset = offset + i * entry_size; + entry.offset = static_cast<std::size_t>(offset + i * entry_size); entry.stride = entry_size; } } else if (count > 0) { @@ -405,8 +414,9 @@ void FillDescriptorUpdateTemplateEntries( std::vector<VkDescriptorUpdateTemplateEntryKHR>& template_entries) { AddEntry<UNIFORM_BUFFER>(template_entries, offset, binding, entries.const_buffers); AddEntry<STORAGE_BUFFER>(template_entries, offset, binding, entries.global_buffers); - AddEntry<UNIFORM_TEXEL_BUFFER>(template_entries, offset, binding, entries.texel_buffers); + AddEntry<UNIFORM_TEXEL_BUFFER>(template_entries, offset, binding, entries.uniform_texels); AddEntry<COMBINED_IMAGE_SAMPLER>(template_entries, offset, binding, entries.samplers); + AddEntry<STORAGE_TEXEL_BUFFER>(template_entries, offset, binding, entries.storage_texels); AddEntry<STORAGE_IMAGE>(template_entries, offset, binding, entries.images); } |