author: ameerj <aj662@drexel.edu> 2020-07-30 21:41:11 +0200
committer: ameerj <aj662@drexel.edu> 2020-08-16 18:02:22 +0200
commit: 4539073ce1d8fd6df03263e826d3805b4909e055 (patch)
tree: d0ed30c327c5b6da9c6d6c8ba256803167f00de2
parent: Vk Async pipeline compilation (diff)
download: yuzu-4539073ce1d8fd6df03263e826d3805b4909e055.tar
yuzu-4539073ce1d8fd6df03263e826d3805b4909e055.tar.gz
yuzu-4539073ce1d8fd6df03263e826d3805b4909e055.tar.bz2
yuzu-4539073ce1d8fd6df03263e826d3805b4909e055.tar.lz
yuzu-4539073ce1d8fd6df03263e826d3805b4909e055.tar.xz
yuzu-4539073ce1d8fd6df03263e826d3805b4909e055.tar.zst
yuzu-4539073ce1d8fd6df03263e826d3805b4909e055.zip
7 files changed, 116 insertions, 80 deletions
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
index 39c73a139..d50bd347c 100644
--- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
@@ -54,7 +54,7 @@ public:
         return renderpass;
     }
 
-    const GraphicsPipelineCacheKey& GetCacheKey() {
+    const GraphicsPipelineCacheKey& GetCacheKey() const {
         return m_key;
     }
 
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
index 45d4dcb8c..a7ce621ca 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -205,20 +205,20 @@ std::array<Shader*, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() {
     return last_shaders = shaders;
 }
 
-VKGraphicsPipeline& VKPipelineCache::GetGraphicsPipeline(
+VKGraphicsPipeline* VKPipelineCache::GetGraphicsPipeline(
     const GraphicsPipelineCacheKey& key, VideoCommon::Shader::AsyncShaders& async_shaders) {
     MICROPROFILE_SCOPE(Vulkan_PipelineCache);
 
     if (last_graphics_pipeline && last_graphics_key == key) {
-        return *last_graphics_pipeline;
+        return last_graphics_pipeline;
     }
     last_graphics_key = key;
 
     if (device.UseAsynchronousShaders()) {
         auto work = async_shaders.GetCompletedWork();
-        for (std::size_t i = 0; i < work.size(); ++i) {
-            auto& entry = graphics_cache.at(work[i].pipeline->GetCacheKey());
-            entry = std::move(work[i].pipeline);
+        for (auto& w : work) {
+            auto& entry = graphics_cache.at(w.pipeline->GetCacheKey());
+            entry = std::move(w.pipeline);
         }
         const auto [pair, is_cache_miss] = graphics_cache.try_emplace(key);
         if (is_cache_miss) {
@@ -227,7 +227,8 @@ VKGraphicsPipeline& VKPipelineCache::GetGraphicsPipeline(
             async_shaders.QueueVulkanShader(this, bindings, program, key.renderpass_params,
                                             key.padding, key.shaders, key.fixed_state);
         }
-        return *(last_graphics_pipeline = graphics_cache.at(key).get());
+        last_graphics_pipeline = graphics_cache.at(key).get();
+        return last_graphics_pipeline;
     }
 
     const auto [pair, is_cache_miss] = graphics_cache.try_emplace(key);
@@ -239,7 +240,8 @@ VKGraphicsPipeline& VKPipelineCache::GetGraphicsPipeline(
                                                      update_descriptor_queue, renderpass_cache, key,
                                                      bindings, program);
     }
-    return *(last_graphics_pipeline = entry.get());
+    last_graphics_pipeline = entry.get();
+    return last_graphics_pipeline;
 }
 
 VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCacheKey& key) {
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
index c70da6da4..404f2b3f7 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
@@ -153,31 +153,46 @@ public:
 
     std::array<Shader*, Maxwell::MaxShaderProgram> GetShaders();
 
-    VKGraphicsPipeline& GetGraphicsPipeline(const GraphicsPipelineCacheKey& key,
+    VKGraphicsPipeline* GetGraphicsPipeline(const GraphicsPipelineCacheKey& key,
                                             VideoCommon::Shader::AsyncShaders& async_shaders);
 
     VKComputePipeline& GetComputePipeline(const ComputePipelineCacheKey& key);
 
-    const VKDevice& GetDevice() {
+    const VKDevice& GetDevice() const {
         return device;
     }
 
     VKScheduler& GetScheduler() {
         return scheduler;
     }
+    const VKScheduler& GetScheduler() const {
+        return scheduler;
+    }
 
     VKDescriptorPool& GetDescriptorPool() {
         return descriptor_pool;
     }
 
+    const VKDescriptorPool& GetDescriptorPool() const {
+        return descriptor_pool;
+    }
+
     VKUpdateDescriptorQueue& GetUpdateDescriptorQueue() {
         return update_descriptor_queue;
     }
 
+    const VKUpdateDescriptorQueue& GetUpdateDescriptorQueue() const {
+        return update_descriptor_queue;
+    }
+
     VKRenderPassCache& GetRenderpassCache() {
         return renderpass_cache;
     }
 
+    const VKRenderPassCache& GetRenderpassCache() const {
+        return renderpass_cache;
+    }
+
 protected:
     void OnShaderRemoval(Shader* shader) final;
 
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index 6310e898c..fc1b51a96 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -404,10 +404,12 @@ RasterizerVulkan::RasterizerVulkan(Core::System& system, Core::Frontend::EmuWind
       wfi_event{device.GetLogical().CreateNewEvent()}, async_shaders{renderer} {
     scheduler.SetQueryCache(query_cache);
     if (device.UseAsynchronousShaders()) {
+        // The following is subject to move into the allocate workers method, to be api agnostic
+
         // Max worker threads we should allow
-        constexpr auto MAX_THREADS = 2u;
+        constexpr u32 MAX_THREADS = 4;
         // Amount of threads we should reserve for other parts of yuzu
-        constexpr auto RESERVED_THREADS = 6u;
+        constexpr u32 RESERVED_THREADS = 6;
         // Get the amount of threads we can use(this can return zero)
         const auto cpu_thread_count =
             std::max(RESERVED_THREADS, std::thread::hardware_concurrency());
@@ -456,16 +458,16 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
     key.renderpass_params = GetRenderPassParams(texceptions);
     key.padding = 0;
 
-    auto& pipeline = pipeline_cache.GetGraphicsPipeline(key, async_shaders);
-    if (&pipeline == nullptr || pipeline.GetHandle() == VK_NULL_HANDLE) {
+    auto pipeline = pipeline_cache.GetGraphicsPipeline(key, async_shaders);
+    if (pipeline == nullptr || pipeline->GetHandle() == VK_NULL_HANDLE) {
         // Async graphics pipeline was not ready.
         system.GPU().TickWork();
         return;
     }
 
-    scheduler.BindGraphicsPipeline(pipeline.GetHandle());
+    scheduler.BindGraphicsPipeline(pipeline->GetHandle());
 
-    const auto renderpass = pipeline.GetRenderPass();
+    const auto renderpass = pipeline->GetRenderPass();
     const auto [framebuffer, render_area] = ConfigureFramebuffers(renderpass);
     scheduler.RequestRenderpass(renderpass, framebuffer, render_area);
 
@@ -475,8 +477,8 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
 
     BeginTransformFeedback();
 
-    const auto pipeline_layout = pipeline.GetLayout();
-    const auto descriptor_set = pipeline.CommitDescriptorSet();
+    const auto pipeline_layout = pipeline->GetLayout();
+    const auto descriptor_set = pipeline->CommitDescriptorSet();
     scheduler.Record([pipeline_layout, descriptor_set, draw_params](vk::CommandBuffer cmdbuf) {
         if (descriptor_set) {
             cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline_layout,
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h
index 27604b9a3..f640ba649 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.h
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.h
@@ -287,7 +287,6 @@ private:
     VKMemoryManager& memory_manager;
     StateTracker& state_tracker;
     VKScheduler& scheduler;
-    VideoCommon::Shader::AsyncShaders async_shaders;
 
     VKStagingBufferPool staging_pool;
     VKDescriptorPool descriptor_pool;
@@ -307,6 +306,7 @@ private:
     vk::Buffer default_buffer;
     VKMemoryCommit default_buffer_commit;
     vk::Event wfi_event;
+    VideoCommon::Shader::AsyncShaders async_shaders;
 
     std::array<View, Maxwell::NumRenderTargets> color_attachments;
     View zeta_attachment;
diff --git a/src/video_core/shader/async_shaders.cpp b/src/video_core/shader/async_shaders.cpp
index 335a0d05b..c536b025b 100644
--- a/src/video_core/shader/async_shaders.cpp
+++ b/src/video_core/shader/async_shaders.cpp
@@ -111,20 +111,19 @@ void AsyncShaders::QueueOpenGLShader(const OpenGL::Device& device,
                                      VideoCommon::Shader::CompilerSettings compiler_settings,
                                      const VideoCommon::Shader::Registry& registry,
                                      VAddr cpu_addr) {
-    WorkerParams params{device.UseAssemblyShaders() ? AsyncShaders::Backend::GLASM
-                                                    : AsyncShaders::Backend::OpenGL,
-                        &device,
-                        shader_type,
-                        uid,
-                        std::move(code),
-                        std::move(code_b),
-                        main_offset,
-                        compiler_settings,
-                        &registry,
-                        cpu_addr};
-
+    auto p = std::make_unique<WorkerParams>();
+    p->backend = device.UseAssemblyShaders() ? Backend::GLASM : Backend::OpenGL;
+    p->device = &device;
+    p->shader_type = shader_type;
+    p->uid = uid;
+    p->code = std::move(code);
+    p->code_b = std::move(code_b);
+    p->main_offset = main_offset;
+    p->compiler_settings = compiler_settings;
+    p->registry = &registry;
+    p->cpu_address = cpu_addr;
     std::unique_lock lock(queue_mutex);
-    pending_queue.push_back(std::move(params));
+    pending_queue.push(std::move(p));
     cv.notify_one();
 }
 
@@ -134,19 +133,19 @@ void AsyncShaders::QueueVulkanShader(
     std::array<GPUVAddr, Vulkan::Maxwell::MaxShaderProgram> shaders,
     Vulkan::FixedPipelineState fixed_state) {
 
-    WorkerParams params{
-        .backend = AsyncShaders::Backend::Vulkan,
-        .pp_cache = pp_cache,
-        .bindings = bindings,
-        .program = program,
-        .renderpass_params = renderpass_params,
-        .padding = padding,
-        .shaders = shaders,
-        .fixed_state = fixed_state,
-    };
+    auto p = std::make_unique<WorkerParams>();
+
+    p->backend = Backend::Vulkan;
+    p->pp_cache = pp_cache;
+    p->bindings = bindings;
+    p->program = program;
+    p->renderpass_params = renderpass_params;
+    p->padding = padding;
+    p->shaders = shaders;
+    p->fixed_state = fixed_state;
 
     std::unique_lock lock(queue_mutex);
-    pending_queue.push_back(std::move(params));
+    pending_queue.push(std::move(p));
     cv.notify_one();
 }
 
@@ -168,64 +167,82 @@ void AsyncShaders::ShaderCompilerThread(Core::Frontend::GraphicsContext* context
         if (pending_queue.empty()) {
             continue;
         }
-        // Pull work from queue
-        WorkerParams work = std::move(pending_queue.front());
-        pending_queue.pop_front();
 
+        // Pull work from queue
+        auto work = std::move(pending_queue.front());
+        pending_queue.pop();
         lock.unlock();
 
-        if (work.backend == AsyncShaders::Backend::OpenGL ||
-            work.backend == AsyncShaders::Backend::GLASM) {
-            VideoCommon::Shader::Registry registry = *work.registry;
-            const ShaderIR ir(work.code, work.main_offset, work.compiler_settings, registry);
+        if (work->backend == Backend::OpenGL || work->backend == Backend::GLASM) {
+            VideoCommon::Shader::Registry registry = *work->registry;
+            const ShaderIR ir(work->code, work->main_offset, work->compiler_settings, registry);
             const auto scope = context->Acquire();
             auto program =
-                OpenGL::BuildShader(*work.device, work.shader_type, work.uid, ir, registry);
+                OpenGL::BuildShader(*work->device, work->shader_type, work->uid, ir, registry);
             Result result{};
-            result.backend = work.backend;
-            result.cpu_address = work.cpu_address;
-            result.uid = work.uid;
-            result.code = std::move(work.code);
-            result.code_b = std::move(work.code_b);
-            result.shader_type = work.shader_type;
-
-            if (work.backend == AsyncShaders::Backend::OpenGL) {
+            result.backend = work->backend;
+            result.cpu_address = work->cpu_address;
+            result.uid = work->uid;
+            result.code = std::move(work->code);
+            result.code_b = std::move(work->code_b);
+            result.shader_type = work->shader_type;
+            // LOG_CRITICAL(Render_Vulkan, "Shader hast been Compiled \t0x{:016X} id {}",
+            // result.uid, id);
+
+            if (work->backend == Backend::OpenGL) {
                 result.program.opengl = std::move(program->source_program);
-            } else if (work.backend == AsyncShaders::Backend::GLASM) {
+            } else if (work->backend == Backend::GLASM) {
                 result.program.glasm = std::move(program->assembly_program);
             }
+            work.reset();
 
             {
                 std::unique_lock complete_lock(completed_mutex);
                 finished_work.push_back(std::move(result));
             }
-
-        } else if (work.backend == AsyncShaders::Backend::Vulkan) {
+        } else if (work->backend == Backend::Vulkan) {
             Vulkan::GraphicsPipelineCacheKey params_key{
-                work.renderpass_params,
-                work.padding,
-                work.shaders,
-                work.fixed_state,
+                .renderpass_params = work->renderpass_params,
+                .padding = work->padding,
+                .shaders = work->shaders,
+                .fixed_state = work->fixed_state,
             };
+
             {
-                std::unique_lock complete_lock(completed_mutex);
+                std::unique_lock find_lock{completed_mutex};
+                for (size_t i = 0; i < finished_work.size(); ++i) {
+                    // This loop deletes duplicate pipelines in finished_work
+                    // in favor of the pipeline about to be created
+
+                    if (finished_work[i].pipeline &&
+                        finished_work[i].pipeline->GetCacheKey().Hash() == params_key.Hash()) {
+                        LOG_CRITICAL(Render_Vulkan,
+                                     "Pipeliene was already here \t0x{:016X} matches 0x{:016X} ",
+                                     params_key.Hash(),
+                                     finished_work[i].pipeline->GetCacheKey().Hash());
+                        finished_work.erase(finished_work.begin() + i);
+                    }
+                }
+                find_lock.unlock();
+            }
+
+            auto pipeline = std::make_unique<Vulkan::VKGraphicsPipeline>(
+                work->pp_cache->GetDevice(), work->pp_cache->GetScheduler(),
+                work->pp_cache->GetDescriptorPool(), work->pp_cache->GetUpdateDescriptorQueue(),
+                work->pp_cache->GetRenderpassCache(), params_key, work->bindings, work->program);
 
-                // Duplicate creation of pipelines leads to instability and crashing, caused by a
-                // race condition but band-aid solution is locking the making of the pipeline
-                // results in only one pipeline created at a time.
+            {
+                std::unique_lock complete_lock(completed_mutex);
                 Result result{
-                    .backend = work.backend,
-                    .pipeline = std::make_unique<Vulkan::VKGraphicsPipeline>(
-                        work.pp_cache->GetDevice(), work.pp_cache->GetScheduler(),
-                        work.pp_cache->GetDescriptorPool(),
-                        work.pp_cache->GetUpdateDescriptorQueue(),
-                        work.pp_cache->GetRenderpassCache(), params_key, work.bindings,
-                        work.program),
+                    .backend = Backend::Vulkan,
+                    .pipeline = std::move(pipeline),
                 };
-
                 finished_work.push_back(std::move(result));
+                complete_lock.unlock();
             }
         }
+        // Give a chance for another thread to get work. Lessens duplicates
+        std::this_thread::yield();
     }
 }
 
diff --git a/src/video_core/shader/async_shaders.h b/src/video_core/shader/async_shaders.h
index 702026ce2..d4eeb8fb6 100644
--- a/src/video_core/shader/async_shaders.h
+++ b/src/video_core/shader/async_shaders.h
@@ -100,7 +100,7 @@ private:
     bool HasWorkQueued();
 
     struct WorkerParams {
-        AsyncShaders::Backend backend;
+        Backend backend;
         // For OGL
         const OpenGL::Device* device;
         Tegra::Engines::ShaderType shader_type;
@@ -128,7 +128,7 @@ private:
     std::atomic<bool> is_thread_exiting{};
     std::vector<std::unique_ptr<Core::Frontend::GraphicsContext>> context_list;
     std::vector<std::thread> worker_threads;
-    std::deque<WorkerParams> pending_queue;
+    std::queue<std::unique_ptr<WorkerParams>> pending_queue;
     std::vector<AsyncShaders::Result> finished_work;
     Core::Frontend::EmuWindow& emu_window;
 };
author	ameerj <aj662@drexel.edu>	2020-07-30 21:41:11 +0200
committer	ameerj <aj662@drexel.edu>	2020-08-16 18:02:22 +0200
commit	4539073ce1d8fd6df03263e826d3805b4909e055 (patch)
tree	d0ed30c327c5b6da9c6d6c8ba256803167f00de2
parent	Vk Async pipeline compilation (diff)
download	yuzu-4539073ce1d8fd6df03263e826d3805b4909e055.tar yuzu-4539073ce1d8fd6df03263e826d3805b4909e055.tar.gz yuzu-4539073ce1d8fd6df03263e826d3805b4909e055.tar.bz2 yuzu-4539073ce1d8fd6df03263e826d3805b4909e055.tar.lz yuzu-4539073ce1d8fd6df03263e826d3805b4909e055.tar.xz yuzu-4539073ce1d8fd6df03263e826d3805b4909e055.tar.zst yuzu-4539073ce1d8fd6df03263e826d3805b4909e055.zip