author: David Marcec <dmarcecguzman@gmail.com> 2020-07-10 05:36:38 +0200
committer: David Marcec <dmarcecguzman@gmail.com> 2020-07-17 06:24:57 +0200
commit: 468bd9c1b0f9e74f7c096b127a94a94e4ed7caec (patch)
tree: 50a0f28b7c817222247369400bedf5de1ccc4e19 /src
parent: Merge pull request #4347 from lioncash/logging (diff)
download: yuzu-468bd9c1b0f9e74f7c096b127a94a94e4ed7caec.tar
yuzu-468bd9c1b0f9e74f7c096b127a94a94e4ed7caec.tar.gz
yuzu-468bd9c1b0f9e74f7c096b127a94a94e4ed7caec.tar.bz2
yuzu-468bd9c1b0f9e74f7c096b127a94a94e4ed7caec.tar.lz
yuzu-468bd9c1b0f9e74f7c096b127a94a94e4ed7caec.tar.xz
yuzu-468bd9c1b0f9e74f7c096b127a94a94e4ed7caec.tar.zst
yuzu-468bd9c1b0f9e74f7c096b127a94a94e4ed7caec.zip
16 files changed, 598 insertions, 64 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 21c46a567..3cd896a0f 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -98,6 +98,8 @@ add_library(video_core STATIC
     sampler_cache.cpp
     sampler_cache.h
     shader_cache.h
+    shader_notify.cpp
+    shader_notify.h
     shader/decode/arithmetic.cpp
     shader/decode/arithmetic_immediate.cpp
     shader/decode/bfe.cpp
@@ -128,6 +130,8 @@ add_library(video_core STATIC
     shader/decode/other.cpp
     shader/ast.cpp
     shader/ast.h
+    shader/async_shaders.cpp
+    shader/async_shaders.h
     shader/compiler_settings.cpp
     shader/compiler_settings.h
     shader/control_flow.cpp
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index 758bfe148..8e19c3373 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -20,6 +20,7 @@
 #include "video_core/gpu.h"
 #include "video_core/memory_manager.h"
 #include "video_core/renderer_base.h"
+#include "video_core/shader_notify.h"
 #include "video_core/video_core.h"
 
 namespace Tegra {
@@ -36,6 +37,7 @@ GPU::GPU(Core::System& system, std::unique_ptr<VideoCore::RendererBase>&& render
     kepler_compute = std::make_unique<Engines::KeplerCompute>(system, rasterizer, *memory_manager);
     maxwell_dma = std::make_unique<Engines::MaxwellDMA>(system, *memory_manager);
     kepler_memory = std::make_unique<Engines::KeplerMemory>(system, *memory_manager);
+    shader_notify = std::make_unique<VideoCore::ShaderNotify>();
 }
 
 GPU::~GPU() = default;
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index 2c42483bd..8d04d9fd9 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -33,6 +33,7 @@ class System;
 
 namespace VideoCore {
 class RendererBase;
+class ShaderNotify;
 } // namespace VideoCore
 
 namespace Tegra {
@@ -207,6 +208,14 @@ public:
         return *renderer;
     }
 
+    VideoCore::ShaderNotify& ShaderNotify() {
+        return *shader_notify;
+    }
+
+    const VideoCore::ShaderNotify& ShaderNotify() const {
+        return *shader_notify;
+    }
+
     // Waits for the GPU to finish working
     virtual void WaitIdle() const = 0;
 
@@ -347,6 +356,8 @@ private:
     std::unique_ptr<Engines::MaxwellDMA> maxwell_dma;
     /// Inline memory engine
     std::unique_ptr<Engines::KeplerMemory> kepler_memory;
+    /// Shader build notifier
+    std::unique_ptr<VideoCore::ShaderNotify> shader_notify;
 
     std::array<std::atomic<u32>, Service::Nvidia::MaxSyncPoints> syncpoints{};
 
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp
index c1f20f0ab..7c27eed6d 100644
--- a/src/video_core/renderer_opengl/gl_device.cpp
+++ b/src/video_core/renderer_opengl/gl_device.cpp
@@ -233,6 +233,8 @@ Device::Device()
                            GLAD_GL_NV_gpu_program5 && GLAD_GL_NV_compute_program5 &&
                            GLAD_GL_NV_transform_feedback && GLAD_GL_NV_transform_feedback2;
 
+    use_asynchronous_shaders = Settings::values.use_asynchronous_shaders;
+
     LOG_INFO(Render_OpenGL, "Renderer_VariableAOFFI: {}", has_variable_aoffi);
     LOG_INFO(Render_OpenGL, "Renderer_ComponentIndexingBug: {}", has_component_indexing_bug);
     LOG_INFO(Render_OpenGL, "Renderer_PreciseBug: {}", has_precise_bug);
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h
index e1d811966..94d38d7d1 100644
--- a/src/video_core/renderer_opengl/gl_device.h
+++ b/src/video_core/renderer_opengl/gl_device.h
@@ -104,6 +104,10 @@ public:
         return use_assembly_shaders;
     }
 
+    bool UseAsynchronousShaders() const {
+        return use_asynchronous_shaders;
+    }
+
 private:
     static bool TestVariableAoffi();
     static bool TestPreciseBug();
@@ -127,6 +131,7 @@ private:
     bool has_fast_buffer_sub_data{};
     bool has_nv_viewport_array2{};
     bool use_assembly_shaders{};
+    bool use_asynchronous_shaders{};
 };
 
 } // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index e960a0ef1..fbd11e28f 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -149,7 +149,8 @@ RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWind
       shader_cache{*this, system, emu_window, device}, query_cache{system, *this},
       buffer_cache{*this, system, device, STREAM_BUFFER_SIZE},
       fence_manager{system, *this, texture_cache, buffer_cache, query_cache}, system{system},
-      screen_info{info}, program_manager{program_manager}, state_tracker{state_tracker} {
+      screen_info{info}, program_manager{program_manager}, state_tracker{state_tracker},
+      async_shaders{emu_window} {
     CheckExtensions();
 
     unified_uniform_buffer.Create();
@@ -162,6 +163,23 @@ RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWind
                                  nullptr, 0);
         }
     }
+
+    if (device.UseAsynchronousShaders()) {
+        // Max worker threads we should allow
+        constexpr auto MAX_THREADS = 8u;
+        // Amount of threads we should reserve for other parts of yuzu
+        constexpr auto RESERVED_THREADS = 6u;
+        // Get the amount of threads we can use(this can return zero)
+        const auto cpu_thread_count =
+            std::max(RESERVED_THREADS, std::thread::hardware_concurrency());
+        // Deduce how many "extra" threads we have to use.
+        const auto max_threads_unused = cpu_thread_count - RESERVED_THREADS;
+        // Always allow at least 1 thread regardless of our settings
+        const auto max_worker_count = std::max(1u, max_threads_unused);
+        // Don't use more than MAX_THREADS
+        const auto worker_count = std::min(max_worker_count, MAX_THREADS);
+        async_shaders.AllocateWorkers(worker_count);
+    }
 }
 
 RasterizerOpenGL::~RasterizerOpenGL() {
@@ -336,7 +354,7 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
             continue;
         }
 
-        Shader* const shader = shader_cache.GetStageProgram(program);
+        Shader* shader = shader_cache.GetStageProgram(program, async_shaders);
 
         if (device.UseAssemblyShaders()) {
             // Check for ARB limitation. We only have 16 SSBOs per context state. To workaround this
@@ -353,7 +371,7 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
         SetupDrawTextures(stage, shader);
         SetupDrawImages(stage, shader);
 
-        const GLuint program_handle = shader->GetHandle();
+        const GLuint program_handle = shader->IsBuilt() ? shader->GetHandle() : 0;
         switch (program) {
         case Maxwell::ShaderProgram::VertexA:
         case Maxwell::ShaderProgram::VertexB:
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 4f082592f..a95646936 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -33,6 +33,7 @@
 #include "video_core/renderer_opengl/gl_state_tracker.h"
 #include "video_core/renderer_opengl/gl_texture_cache.h"
 #include "video_core/renderer_opengl/utils.h"
+#include "video_core/shader/async_shaders.h"
 #include "video_core/textures/texture.h"
 
 namespace Core {
@@ -91,6 +92,14 @@ public:
         return num_queued_commands > 0;
     }
 
+    VideoCommon::Shader::AsyncShaders& GetAsyncShaders() {
+        return async_shaders;
+    }
+
+    const VideoCommon::Shader::AsyncShaders& GetAsyncShaders() const {
+        return async_shaders;
+    }
+
 private:
     /// Configures the color and depth framebuffer states.
     void ConfigureFramebuffers();
@@ -242,6 +251,7 @@ private:
     ScreenInfo& screen_info;
     ProgramManager& program_manager;
     StateTracker& state_tracker;
+    VideoCommon::Shader::AsyncShaders async_shaders;
 
     static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024;
 
diff --git a/src/video_core/renderer_opengl/gl_resource_manager.h b/src/video_core/renderer_opengl/gl_resource_manager.h
index f8b322227..b05cb641c 100644
--- a/src/video_core/renderer_opengl/gl_resource_manager.h
+++ b/src/video_core/renderer_opengl/gl_resource_manager.h
@@ -177,6 +177,12 @@ public:
         Release();
     }
 
+    OGLAssemblyProgram& operator=(OGLAssemblyProgram&& o) noexcept {
+        Release();
+        handle = std::exchange(o.handle, 0);
+        return *this;
+    }
+
     /// Deletes the internal OpenGL resource
     void Release();
 
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index c6a3bf3a1..f469ed656 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -31,6 +31,7 @@
 #include "video_core/shader/registry.h"
 #include "video_core/shader/shader_ir.h"
 #include "video_core/shader_cache.h"
+#include "video_core/shader_notify.h"
 
 namespace OpenGL {
 
@@ -140,9 +141,24 @@ std::shared_ptr<Registry> MakeRegistry(const ShaderDiskCacheEntry& entry) {
     return registry;
 }
 
+std::unordered_set<GLenum> GetSupportedFormats() {
+    GLint num_formats;
+    glGetIntegerv(GL_NUM_PROGRAM_BINARY_FORMATS, &num_formats);
+
+    std::vector<GLint> formats(num_formats);
+    glGetIntegerv(GL_PROGRAM_BINARY_FORMATS, formats.data());
+
+    std::unordered_set<GLenum> supported_formats;
+    for (const GLint format : formats) {
+        supported_formats.insert(static_cast<GLenum>(format));
+    }
+    return supported_formats;
+}
+
+} // Anonymous namespace
+
 ProgramSharedPtr BuildShader(const Device& device, ShaderType shader_type, u64 unique_identifier,
-                             const ShaderIR& ir, const Registry& registry,
-                             bool hint_retrievable = false) {
+                             const ShaderIR& ir, const Registry& registry, bool hint_retrievable) {
     const std::string shader_id = MakeShaderID(unique_identifier, shader_type);
     LOG_INFO(Render_OpenGL, "{}", shader_id);
 
@@ -181,30 +197,17 @@ ProgramSharedPtr BuildShader(const Device& device, ShaderType shader_type, u64 u
     return program;
 }
 
-std::unordered_set<GLenum> GetSupportedFormats() {
-    GLint num_formats;
-    glGetIntegerv(GL_NUM_PROGRAM_BINARY_FORMATS, &num_formats);
-
-    std::vector<GLint> formats(num_formats);
-    glGetIntegerv(GL_PROGRAM_BINARY_FORMATS, formats.data());
-
-    std::unordered_set<GLenum> supported_formats;
-    for (const GLint format : formats) {
-        supported_formats.insert(static_cast<GLenum>(format));
-    }
-    return supported_formats;
-}
-
-} // Anonymous namespace
-
 Shader::Shader(std::shared_ptr<VideoCommon::Shader::Registry> registry_, ShaderEntries entries_,
-               ProgramSharedPtr program_)
-    : registry{std::move(registry_)}, entries{std::move(entries_)}, program{std::move(program_)} {
+               ProgramSharedPtr program_, bool is_built)
+    : registry{std::move(registry_)}, entries{std::move(entries_)}, program{std::move(program_)},
+      is_built(is_built) {
     handle = program->assembly_program.handle;
     if (handle == 0) {
         handle = program->source_program.handle;
     }
-    ASSERT(handle != 0);
+    if (is_built) {
+        ASSERT(handle != 0);
+    }
 }
 
 Shader::~Shader() = default;
@@ -214,42 +217,82 @@ GLuint Shader::GetHandle() const {
     return handle;
 }
 
-std::unique_ptr<Shader> Shader::CreateStageFromMemory(const ShaderParameters& params,
-                                                      Maxwell::ShaderProgram program_type,
-                                                      ProgramCode code, ProgramCode code_b) {
+bool Shader::IsBuilt() const {
+    return is_built;
+}
+
+void Shader::AsyncOpenGLBuilt(OGLProgram new_program) {
+    program->source_program = std::move(new_program);
+    handle = program->source_program.handle;
+    is_built = true;
+}
+
+void Shader::AsyncGLASMBuilt(OGLAssemblyProgram new_program) {
+    program->assembly_program = std::move(new_program);
+    handle = program->assembly_program.handle;
+    is_built = true;
+}
+
+std::unique_ptr<Shader> Shader::CreateStageFromMemory(
+    const ShaderParameters& params, Maxwell::ShaderProgram program_type, ProgramCode code,
+    ProgramCode code_b, VideoCommon::Shader::AsyncShaders& async_shaders, VAddr cpu_addr) {
     const auto shader_type = GetShaderType(program_type);
     const std::size_t size_in_bytes = code.size() * sizeof(u64);
 
-    auto registry = std::make_shared<Registry>(shader_type, params.system.GPU().Maxwell3D());
-    const ShaderIR ir(code, STAGE_MAIN_OFFSET, COMPILER_SETTINGS, *registry);
-    // TODO(Rodrigo): Handle VertexA shaders
-    // std::optional<ShaderIR> ir_b;
-    // if (!code_b.empty()) {
-    //     ir_b.emplace(code_b, STAGE_MAIN_OFFSET);
-    // }
-    auto program = BuildShader(params.device, shader_type, params.unique_identifier, ir, *registry);
+    auto& gpu = params.system.GPU();
+    gpu.ShaderNotify().MarkSharderBuilding();
+
+    auto registry = std::make_shared<Registry>(shader_type, gpu.Maxwell3D());
+    if (!async_shaders.IsShaderAsync(params.system.GPU()) ||
+        !params.device.UseAsynchronousShaders()) {
+        const ShaderIR ir(code, STAGE_MAIN_OFFSET, COMPILER_SETTINGS, *registry);
+        // TODO(Rodrigo): Handle VertexA shaders
+        // std::optional<ShaderIR> ir_b;
+        // if (!code_b.empty()) {
+        //     ir_b.emplace(code_b, STAGE_MAIN_OFFSET);
+        // }
+        auto program =
+            BuildShader(params.device, shader_type, params.unique_identifier, ir, *registry);
+        ShaderDiskCacheEntry entry;
+        entry.type = shader_type;
+        entry.code = std::move(code);
+        entry.code_b = std::move(code_b);
+        entry.unique_identifier = params.unique_identifier;
+        entry.bound_buffer = registry->GetBoundBuffer();
+        entry.graphics_info = registry->GetGraphicsInfo();
+        entry.keys = registry->GetKeys();
+        entry.bound_samplers = registry->GetBoundSamplers();
+        entry.bindless_samplers = registry->GetBindlessSamplers();
+        params.disk_cache.SaveEntry(std::move(entry));
+
+        gpu.ShaderNotify().MarkShaderComplete();
+
+        return std::unique_ptr<Shader>(new Shader(std::move(registry),
+                                                  MakeEntries(params.device, ir, shader_type),
+                                                  std::move(program), true));
+    } else {
+        // Required for entries
+        const ShaderIR ir(code, STAGE_MAIN_OFFSET, COMPILER_SETTINGS, *registry);
+        auto entries = MakeEntries(params.device, ir, shader_type);
 
-    ShaderDiskCacheEntry entry;
-    entry.type = shader_type;
-    entry.code = std::move(code);
-    entry.code_b = std::move(code_b);
-    entry.unique_identifier = params.unique_identifier;
-    entry.bound_buffer = registry->GetBoundBuffer();
-    entry.graphics_info = registry->GetGraphicsInfo();
-    entry.keys = registry->GetKeys();
-    entry.bound_samplers = registry->GetBoundSamplers();
-    entry.bindless_samplers = registry->GetBindlessSamplers();
-    params.disk_cache.SaveEntry(std::move(entry));
+        async_shaders.QueueOpenGLShader(params.device, shader_type, params.unique_identifier,
+                                        std::move(code), std::move(code_b), STAGE_MAIN_OFFSET,
+                                        COMPILER_SETTINGS, *registry, cpu_addr);
 
-    return std::unique_ptr<Shader>(new Shader(
-        std::move(registry), MakeEntries(params.device, ir, shader_type), std::move(program)));
+        auto program = std::make_shared<ProgramHandle>();
+        return std::unique_ptr<Shader>(
+            new Shader(std::move(registry), std::move(entries), std::move(program), false));
+    }
 }
 
 std::unique_ptr<Shader> Shader::CreateKernelFromMemory(const ShaderParameters& params,
                                                        ProgramCode code) {
     const std::size_t size_in_bytes = code.size() * sizeof(u64);
 
-    auto& engine = params.system.GPU().KeplerCompute();
+    auto& gpu = params.system.GPU();
+    gpu.ShaderNotify().MarkSharderBuilding();
+
+    auto& engine = gpu.KeplerCompute();
     auto registry = std::make_shared<Registry>(ShaderType::Compute, engine);
     const ShaderIR ir(code, KERNEL_MAIN_OFFSET, COMPILER_SETTINGS, *registry);
     const u64 uid = params.unique_identifier;
@@ -266,6 +309,8 @@ std::unique_ptr<Shader> Shader::CreateKernelFromMemory(const ShaderParameters& p
     entry.bindless_samplers = registry->GetBindlessSamplers();
     params.disk_cache.SaveEntry(std::move(entry));
 
+    gpu.ShaderNotify().MarkShaderComplete();
+
     return std::unique_ptr<Shader>(new Shader(std::move(registry),
                                               MakeEntries(params.device, ir, ShaderType::Compute),
                                               std::move(program)));
@@ -436,14 +481,51 @@ ProgramSharedPtr ShaderCacheOpenGL::GeneratePrecompiledProgram(
     return program;
 }
 
-Shader* ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
+Shader* ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program,
+                                           VideoCommon::Shader::AsyncShaders& async_shaders) {
     if (!system.GPU().Maxwell3D().dirty.flags[Dirty::Shaders]) {
-        return last_shaders[static_cast<std::size_t>(program)];
+        auto* last_shader = last_shaders[static_cast<std::size_t>(program)];
+        if (last_shader->IsBuilt()) {
+            return last_shader;
+        }
     }
 
     auto& memory_manager{system.GPU().MemoryManager()};
     const GPUVAddr address{GetShaderAddress(system, program)};
 
+    if (device.UseAsynchronousShaders() && async_shaders.HasCompletedWork()) {
+        auto completed_work = async_shaders.GetCompletedWork();
+        for (auto& work : completed_work) {
+            Shader* shader = TryGet(work.cpu_address);
+            auto& gpu = system.GPU();
+            gpu.ShaderNotify().MarkShaderComplete();
+            if (shader == nullptr) {
+                continue;
+            }
+            using namespace VideoCommon::Shader;
+            if (work.backend == AsyncShaders::Backend::OpenGL) {
+                shader->AsyncOpenGLBuilt(std::move(work.program.opengl));
+            } else if (work.backend == AsyncShaders::Backend::GLASM) {
+                shader->AsyncGLASMBuilt(std::move(work.program.glasm));
+            }
+
+            ShaderDiskCacheEntry entry;
+            entry.type = work.shader_type;
+            entry.code = std::move(work.code);
+            entry.code_b = std::move(work.code_b);
+            entry.unique_identifier = work.uid;
+
+            auto& registry = shader->GetRegistry();
+
+            entry.bound_buffer = registry.GetBoundBuffer();
+            entry.graphics_info = registry.GetGraphicsInfo();
+            entry.keys = registry.GetKeys();
+            entry.bound_samplers = registry.GetBoundSamplers();
+            entry.bindless_samplers = registry.GetBindlessSamplers();
+            disk_cache.SaveEntry(std::move(entry));
+        }
+    }
+
     // Look up shader in the cache based on address
     const auto cpu_addr{memory_manager.GpuToCpuAddress(address)};
     if (Shader* const shader{cpu_addr ? TryGet(*cpu_addr) : null_shader.get()}) {
@@ -471,7 +553,8 @@ Shader* ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
     std::unique_ptr<Shader> shader;
     const auto found = runtime_cache.find(unique_identifier);
     if (found == runtime_cache.end()) {
-        shader = Shader::CreateStageFromMemory(params, program, std::move(code), std::move(code_b));
+        shader = Shader::CreateStageFromMemory(params, program, std::move(code), std::move(code_b),
+                                               async_shaders, cpu_addr.value_or(0));
     } else {
         shader = Shader::CreateFromCache(params, found->second);
     }
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h
index 994aaeaf2..7528ac686 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_cache.h
@@ -33,6 +33,10 @@ namespace Core::Frontend {
 class EmuWindow;
 }
 
+namespace VideoCommon::Shader {
+class AsyncShaders;
+}
+
 namespace OpenGL {
 
 class Device;
@@ -61,6 +65,11 @@ struct ShaderParameters {
     u64 unique_identifier;
 };
 
+ProgramSharedPtr BuildShader(const Device& device, Tegra::Engines::ShaderType shader_type,
+                             u64 unique_identifier, const VideoCommon::Shader::ShaderIR& ir,
+                             const VideoCommon::Shader::Registry& registry,
+                             bool hint_retrievable = false);
+
 class Shader final {
 public:
     ~Shader();
@@ -68,15 +77,28 @@ public:
     /// Gets the GL program handle for the shader
     GLuint GetHandle() const;
 
+    bool IsBuilt() const;
+
     /// Gets the shader entries for the shader
     const ShaderEntries& GetEntries() const {
         return entries;
     }
 
-    static std::unique_ptr<Shader> CreateStageFromMemory(const ShaderParameters& params,
-                                                         Maxwell::ShaderProgram program_type,
-                                                         ProgramCode program_code,
-                                                         ProgramCode program_code_b);
+    const VideoCommon::Shader::Registry& GetRegistry() const {
+        return *registry;
+    }
+
+    /// Mark a OpenGL shader as built
+    void AsyncOpenGLBuilt(OGLProgram new_program);
+
+    /// Mark a GLASM shader as built
+    void AsyncGLASMBuilt(OGLAssemblyProgram new_program);
+
+    static std::unique_ptr<Shader> CreateStageFromMemory(
+        const ShaderParameters& params, Maxwell::ShaderProgram program_type,
+        ProgramCode program_code, ProgramCode program_code_b,
+        VideoCommon::Shader::AsyncShaders& async_shaders, VAddr cpu_addr);
+
     static std::unique_ptr<Shader> CreateKernelFromMemory(const ShaderParameters& params,
                                                           ProgramCode code);
 
@@ -85,12 +107,13 @@ public:
 
 private:
     explicit Shader(std::shared_ptr<VideoCommon::Shader::Registry> registry, ShaderEntries entries,
-                    ProgramSharedPtr program);
+                    ProgramSharedPtr program, bool is_built = true);
 
     std::shared_ptr<VideoCommon::Shader::Registry> registry;
     ShaderEntries entries;
     ProgramSharedPtr program;
     GLuint handle = 0;
+    bool is_built{};
 };
 
 class ShaderCacheOpenGL final : public VideoCommon::ShaderCache<Shader> {
@@ -104,7 +127,8 @@ public:
                        const VideoCore::DiskResourceLoadCallback& callback);
 
     /// Gets the current specified shader stage program
-    Shader* GetStageProgram(Maxwell::ShaderProgram program);
+    Shader* GetStageProgram(Maxwell::ShaderProgram program,
+                            VideoCommon::Shader::AsyncShaders& async_shaders);
 
     /// Gets a compute kernel in the passed address
     Shader* GetComputeKernel(GPUVAddr code_addr);
diff --git a/src/video_core/shader/async_shaders.cpp b/src/video_core/shader/async_shaders.cpp
new file mode 100644
index 000000000..fb94ac2e7
--- /dev/null
+++ b/src/video_core/shader/async_shaders.cpp
@@ -0,0 +1,170 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <chrono>
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/renderer_base.h"
+#include "video_core/renderer_opengl/gl_shader_cache.h"
+#include "video_core/shader/async_shaders.h"
+
+namespace VideoCommon::Shader {
+AsyncShaders::AsyncShaders(Core::Frontend::EmuWindow& emu_window) : emu_window(emu_window) {}
+AsyncShaders::~AsyncShaders() {
+    KillWorkers();
+}
+
+void AsyncShaders::AllocateWorkers(std::size_t num_workers) {
+    // If we're already have workers queued or don't want to queue workers, ignore
+    if (num_workers == worker_threads.size() || num_workers == 0) {
+        return;
+    }
+
+    // If workers already exist, clear them
+    if (!worker_threads.empty()) {
+        FreeWorkers();
+    }
+
+    // Create workers
+    for (std::size_t i = 0; i < num_workers; i++) {
+        context_list.push_back(emu_window.CreateSharedContext());
+        worker_threads.push_back(std::move(
+            std::thread(&AsyncShaders::ShaderCompilerThread, this, context_list[i].get())));
+    }
+}
+
+void AsyncShaders::FreeWorkers() {
+    // Mark all threads to quit
+    is_thread_exiting.store(true);
+    for (auto& thread : worker_threads) {
+        thread.join();
+    }
+    // Clear our shared contexts
+    context_list.clear();
+
+    // Clear our worker threads
+    worker_threads.clear();
+}
+
+void AsyncShaders::KillWorkers() {
+    is_thread_exiting.store(true);
+    for (auto& thread : worker_threads) {
+        thread.detach();
+    }
+    // Clear our shared contexts
+    context_list.clear();
+
+    // Clear our worker threads
+    worker_threads.clear();
+}
+
+bool AsyncShaders::HasWorkQueued() {
+    std::shared_lock lock(queue_mutex);
+    return !pending_queue.empty();
+}
+
+bool AsyncShaders::HasCompletedWork() {
+    std::shared_lock lock(completed_mutex);
+    return !finished_work.empty();
+}
+
+bool AsyncShaders::IsShaderAsync(const Tegra::GPU& gpu) const {
+    const auto& regs = gpu.Maxwell3D().regs;
+
+    // If something is using depth, we can assume that games are not rendering anything which will
+    // be used one time.
+    if (regs.zeta_enable) {
+        return true;
+    }
+
+    // If games are using a small index count, we can assume these are full screen quads. Usually
+    // these shaders are only used once for building textures so we can assume they can't be built
+    // async
+    if (regs.index_array.count <= 6 || regs.vertex_buffer.count <= 6) {
+        return false;
+    }
+
+    return true;
+}
+
+std::vector<AsyncShaders::Result> AsyncShaders::GetCompletedWork() {
+    std::vector<AsyncShaders::Result> results;
+    {
+        std::unique_lock lock(completed_mutex);
+        results.assign(std::make_move_iterator(finished_work.begin()),
+                       std::make_move_iterator(finished_work.end()));
+        finished_work.clear();
+    }
+    return results;
+}
+
+void AsyncShaders::QueueOpenGLShader(const OpenGL::Device& device,
+                                     Tegra::Engines::ShaderType shader_type, u64 uid,
+                                     std::vector<u64> code, std::vector<u64> code_b,
+                                     u32 main_offset,
+                                     VideoCommon::Shader::CompilerSettings compiler_settings,
+                                     const VideoCommon::Shader::Registry& registry,
+                                     VAddr cpu_addr) {
+    WorkerParams params{device.UseAssemblyShaders() ? AsyncShaders::Backend::GLASM
+                                                    : AsyncShaders::Backend::OpenGL,
+                        device,
+                        shader_type,
+                        uid,
+                        std::move(code),
+                        std::move(code_b),
+                        main_offset,
+                        compiler_settings,
+                        registry,
+                        cpu_addr};
+    std::unique_lock lock(queue_mutex);
+    pending_queue.push_back(std::move(params));
+}
+
+void AsyncShaders::ShaderCompilerThread(Core::Frontend::GraphicsContext* context) {
+    using namespace std::chrono_literals;
+    while (!is_thread_exiting.load(std::memory_order_relaxed)) {
+        // Partial lock to allow all threads to read at the same time
+        if (!HasWorkQueued()) {
+            continue;
+        }
+        // Complete lock for pulling workload
+        queue_mutex.lock();
+        // Another thread beat us, just unlock and wait for the next load
+        if (pending_queue.empty()) {
+            queue_mutex.unlock();
+            continue;
+        }
+        // Pull work from queue
+        WorkerParams work = std::move(pending_queue.front());
+        pending_queue.pop_front();
+        queue_mutex.unlock();
+
+        if (work.backend == AsyncShaders::Backend::OpenGL ||
+            work.backend == AsyncShaders::Backend::GLASM) {
+            const ShaderIR ir(work.code, work.main_offset, work.compiler_settings, work.registry);
+            const auto scope = context->Acquire();
+            auto program =
+                OpenGL::BuildShader(work.device, work.shader_type, work.uid, ir, work.registry);
+            Result result{};
+            result.backend = work.backend;
+            result.cpu_address = work.cpu_address;
+            result.uid = work.uid;
+            result.code = std::move(work.code);
+            result.code_b = std::move(work.code_b);
+            result.shader_type = work.shader_type;
+
+            if (work.backend == AsyncShaders::Backend::OpenGL) {
+                result.program.opengl = std::move(program->source_program);
+            } else if (work.backend == AsyncShaders::Backend::GLASM) {
+                result.program.glasm = std::move(program->assembly_program);
+            }
+
+            {
+                std::unique_lock complete_lock(completed_mutex);
+                finished_work.push_back(std::move(result));
+            }
+        }
+    }
+}
+
+} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/async_shaders.h b/src/video_core/shader/async_shaders.h
new file mode 100644
index 000000000..26bc38326
--- /dev/null
+++ b/src/video_core/shader/async_shaders.h
@@ -0,0 +1,107 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <deque>
+#include <memory>
+#include <shared_mutex>
+#include <thread>
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "video_core/renderer_opengl/gl_device.h"
+#include "video_core/renderer_opengl/gl_resource_manager.h"
+#include "video_core/renderer_opengl/gl_shader_decompiler.h"
+
+namespace Core::Frontend {
+class EmuWindow;
+class GraphicsContext;
+} // namespace Core::Frontend
+
+namespace Tegra {
+class GPU;
+}
+
+namespace VideoCommon::Shader {
+
+class AsyncShaders {
+public:
+    enum class Backend {
+        OpenGL,
+        GLASM,
+    };
+
+    struct ResultPrograms {
+        OpenGL::OGLProgram opengl;
+        OpenGL::OGLAssemblyProgram glasm;
+    };
+
+    struct Result {
+        u64 uid;
+        VAddr cpu_address;
+        Backend backend;
+        ResultPrograms program;
+        std::vector<u64> code;
+        std::vector<u64> code_b;
+        Tegra::Engines::ShaderType shader_type;
+    };
+
+    explicit AsyncShaders(Core::Frontend::EmuWindow& emu_window);
+    ~AsyncShaders();
+
+    /// Start up shader worker threads
+    void AllocateWorkers(std::size_t num_workers);
+
+    /// Clear the shader queue and kill all worker threads
+    void FreeWorkers();
+
+    // Force end all threads
+    void KillWorkers();
+
+    /// Check our worker queue to see if we have any work queued already
+    bool HasWorkQueued();
+
+    /// Check to see if any shaders have actually been compiled
+    bool HasCompletedWork();
+
+    /// Deduce if a shader can be build on another thread of MUST be built in sync. We cannot build
+    /// every shader async as some shaders are only built and executed once. We try to "guess" which
+    /// shader would be used only once
+    bool IsShaderAsync(const Tegra::GPU& gpu) const;
+
+    /// Pulls completed compiled shaders
+    std::vector<Result> GetCompletedWork();
+
+    void QueueOpenGLShader(const OpenGL::Device& device, Tegra::Engines::ShaderType shader_type,
+                           u64 uid, std::vector<u64> code, std::vector<u64> code_b, u32 main_offset,
+                           VideoCommon::Shader::CompilerSettings compiler_settings,
+                           const VideoCommon::Shader::Registry& registry, VAddr cpu_addr);
+
+private:
+    void ShaderCompilerThread(Core::Frontend::GraphicsContext* context);
+
+    struct WorkerParams {
+        AsyncShaders::Backend backend;
+        OpenGL::Device device;
+        Tegra::Engines::ShaderType shader_type;
+        u64 uid;
+        std::vector<u64> code;
+        std::vector<u64> code_b;
+        u32 main_offset;
+        VideoCommon::Shader::CompilerSettings compiler_settings;
+        VideoCommon::Shader::Registry registry;
+        VAddr cpu_address;
+    };
+
+    std::shared_mutex queue_mutex;
+    std::shared_mutex completed_mutex;
+    std::atomic<bool> is_thread_exiting{};
+    std::vector<std::unique_ptr<Core::Frontend::GraphicsContext>> context_list;
+    std::vector<std::thread> worker_threads;
+    std::deque<WorkerParams> pending_queue;
+    std::vector<AsyncShaders::Result> finished_work;
+    Core::Frontend::EmuWindow& emu_window;
+};
+
+} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader_notify.cpp b/src/video_core/shader_notify.cpp
new file mode 100644
index 000000000..46fd0baae
--- /dev/null
+++ b/src/video_core/shader_notify.cpp
@@ -0,0 +1,42 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "video_core/shader_notify.h"
+
+using namespace std::chrono_literals;
+
+namespace VideoCore {
+namespace {
+constexpr auto UPDATE_TICK = 32ms;
+}
+
+ShaderNotify::ShaderNotify() = default;
+ShaderNotify::~ShaderNotify() = default;
+
+std::size_t ShaderNotify::GetShadersBuilding() {
+    const auto now = std::chrono::high_resolution_clock::now();
+    const auto diff = now - last_update;
+    if (diff > UPDATE_TICK) {
+        std::shared_lock lock(mutex);
+        last_updated_count = accurate_count;
+    }
+    return last_updated_count;
+}
+
+std::size_t ShaderNotify::GetShadersBuildingAccurate() {
+    std::shared_lock lock(mutex);
+    return accurate_count;
+}
+
+void ShaderNotify::MarkShaderComplete() {
+    std::unique_lock lock(mutex);
+    accurate_count--;
+}
+
+void ShaderNotify::MarkSharderBuilding() {
+    std::unique_lock lock(mutex);
+    accurate_count++;
+}
+
+} // namespace VideoCore
diff --git a/src/video_core/shader_notify.h b/src/video_core/shader_notify.h
new file mode 100644
index 000000000..a9c92d179
--- /dev/null
+++ b/src/video_core/shader_notify.h
@@ -0,0 +1,29 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <chrono>
+#include <shared_mutex>
+#include "common/common_types.h"
+
+namespace VideoCore {
+class ShaderNotify {
+public:
+    ShaderNotify();
+    ~ShaderNotify();
+
+    std::size_t GetShadersBuilding();
+    std::size_t GetShadersBuildingAccurate();
+
+    void MarkShaderComplete();
+    void MarkSharderBuilding();
+
+private:
+    std::size_t last_updated_count{};
+    std::size_t accurate_count{};
+    std::shared_mutex mutex;
+    std::chrono::high_resolution_clock::time_point last_update{};
+};
+} // namespace VideoCore
diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp
index 9f758605a..6909d65d0 100644
--- a/src/yuzu/main.cpp
+++ b/src/yuzu/main.cpp
@@ -94,6 +94,8 @@ static FileSys::VirtualFile VfsDirectoryCreateFileWrapper(const FileSys::Virtual
 #include "core/perf_stats.h"
 #include "core/settings.h"
 #include "core/telemetry_session.h"
+#include "video_core/gpu.h"
+#include "video_core/shader_notify.h"
 #include "yuzu/about_dialog.h"
 #include "yuzu/bootmanager.h"
 #include "yuzu/compatdb.h"
@@ -498,6 +500,8 @@ void GMainWindow::InitializeWidgets() {
     message_label->setAlignment(Qt::AlignLeft);
     statusBar()->addPermanentWidget(message_label, 1);
 
+    shader_building_label = new QLabel();
+    shader_building_label->setToolTip(tr("The amount of shaders currently being built"));
     emu_speed_label = new QLabel();
     emu_speed_label->setToolTip(
         tr("Current emulation speed. Values higher or lower than 100% "
@@ -510,7 +514,8 @@ void GMainWindow::InitializeWidgets() {
         tr("Time taken to emulate a Switch frame, not counting framelimiting or v-sync. For "
            "full-speed emulation this should be at most 16.67 ms."));
 
-    for (auto& label : {emu_speed_label, game_fps_label, emu_frametime_label}) {
+    for (auto& label :
+         {shader_building_label, emu_speed_label, game_fps_label, emu_frametime_label}) {
         label->setVisible(false);
         label->setFrameStyle(QFrame::NoFrame);
         label->setContentsMargins(4, 0, 4, 0);
@@ -1176,6 +1181,7 @@ void GMainWindow::ShutdownGame() {
 
     // Disable status bar updates
     status_bar_update_timer.stop();
+    shader_building_label->setVisible(false);
     emu_speed_label->setVisible(false);
     game_fps_label->setVisible(false);
     emu_frametime_label->setVisible(false);
@@ -2186,6 +2192,17 @@ void GMainWindow::UpdateStatusBar() {
     }
 
     auto results = Core::System::GetInstance().GetAndResetPerfStats();
+    auto& shader_notify = Core::System::GetInstance().GPU().ShaderNotify();
+    const auto shaders_building = shader_notify.GetShadersBuilding();
+
+    if (shaders_building != 0) {
+        shader_building_label->setText(
+            tr("Building: %1 shader").arg(shaders_building) +
+            (shaders_building != 1 ? QString::fromStdString("s") : QString::fromStdString("")));
+        shader_building_label->setVisible(true);
+    } else {
+        shader_building_label->setVisible(false);
+    }
 
     if (Settings::values.use_frame_limit.GetValue()) {
         emu_speed_label->setText(tr("Speed: %1% / %2%")
@@ -2315,9 +2332,12 @@ void GMainWindow::OnReinitializeKeys(ReinitializeKeyBehavior behavior) {
     if (behavior == ReinitializeKeyBehavior::Warning) {
         const auto res = QMessageBox::information(
             this, tr("Confirm Key Rederivation"),
-            tr("You are about to force rederive all of your keys. \nIf you do not know what this "
-               "means or what you are doing, \nthis is a potentially destructive action. \nPlease "
-               "make sure this is what you want \nand optionally make backups.\n\nThis will delete "
+            tr("You are about to force rederive all of your keys. \nIf you do not know what "
+               "this "
+               "means or what you are doing, \nthis is a potentially destructive action. "
+               "\nPlease "
+               "make sure this is what you want \nand optionally make backups.\n\nThis will "
+               "delete "
                "your autogenerated key files and re-run the key derivation module."),
             QMessageBox::StandardButtons{QMessageBox::Ok, QMessageBox::Cancel});
 
@@ -2628,8 +2648,8 @@ int main(int argc, char* argv[]) {
 
 #ifdef __APPLE__
     // If you start a bundle (binary) on OSX without the Terminal, the working directory is "/".
-    // But since we require the working directory to be the executable path for the location of the
-    // user folder in the Qt Frontend, we need to cd into that working directory
+    // But since we require the working directory to be the executable path for the location of
+    // the user folder in the Qt Frontend, we need to cd into that working directory
     const std::string bin_path = FileUtil::GetBundleDirectory() + DIR_SEP + "..";
     chdir(bin_path.c_str());
 #endif
diff --git a/src/yuzu/main.h b/src/yuzu/main.h
index adff65fb5..59d9073ae 100644
--- a/src/yuzu/main.h
+++ b/src/yuzu/main.h
@@ -248,6 +248,7 @@ private:
 
     // Status bar elements
     QLabel* message_label = nullptr;
+    QLabel* shader_building_label = nullptr;
     QLabel* emu_speed_label = nullptr;
     QLabel* game_fps_label = nullptr;
     QLabel* emu_frametime_label = nullptr;
author	David Marcec <dmarcecguzman@gmail.com>	2020-07-10 05:36:38 +0200
committer	David Marcec <dmarcecguzman@gmail.com>	2020-07-17 06:24:57 +0200
commit	468bd9c1b0f9e74f7c096b127a94a94e4ed7caec (patch)
tree	50a0f28b7c817222247369400bedf5de1ccc4e19 /src
parent	Merge pull request #4347 from lioncash/logging (diff)
download	yuzu-468bd9c1b0f9e74f7c096b127a94a94e4ed7caec.tar yuzu-468bd9c1b0f9e74f7c096b127a94a94e4ed7caec.tar.gz yuzu-468bd9c1b0f9e74f7c096b127a94a94e4ed7caec.tar.bz2 yuzu-468bd9c1b0f9e74f7c096b127a94a94e4ed7caec.tar.lz yuzu-468bd9c1b0f9e74f7c096b127a94a94e4ed7caec.tar.xz yuzu-468bd9c1b0f9e74f7c096b127a94a94e4ed7caec.tar.zst yuzu-468bd9c1b0f9e74f7c096b127a94a94e4ed7caec.zip