diff options
Diffstat (limited to 'src/video_core/renderer_opengl')
-rw-r--r-- | src/video_core/renderer_opengl/gl_device.cpp | 2 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_device.h | 5 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.cpp | 24 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.h | 10 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_resource_manager.h | 6 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_shader_cache.cpp | 181 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_shader_cache.h | 36 |
7 files changed, 206 insertions, 58 deletions
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index c1f20f0ab..630acb73b 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -233,6 +233,8 @@ Device::Device() GLAD_GL_NV_gpu_program5 && GLAD_GL_NV_compute_program5 && GLAD_GL_NV_transform_feedback && GLAD_GL_NV_transform_feedback2; + use_asynchronous_shaders = Settings::values.use_asynchronous_shaders.GetValue(); + LOG_INFO(Render_OpenGL, "Renderer_VariableAOFFI: {}", has_variable_aoffi); LOG_INFO(Render_OpenGL, "Renderer_ComponentIndexingBug: {}", has_component_indexing_bug); LOG_INFO(Render_OpenGL, "Renderer_PreciseBug: {}", has_precise_bug); diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index e1d811966..94d38d7d1 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h @@ -104,6 +104,10 @@ public: return use_assembly_shaders; } + bool UseAsynchronousShaders() const { + return use_asynchronous_shaders; + } + private: static bool TestVariableAoffi(); static bool TestPreciseBug(); @@ -127,6 +131,7 @@ private: bool has_fast_buffer_sub_data{}; bool has_nv_viewport_array2{}; bool use_assembly_shaders{}; + bool use_asynchronous_shaders{}; }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index e960a0ef1..c3fad563c 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -149,7 +149,8 @@ RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWind shader_cache{*this, system, emu_window, device}, query_cache{system, *this}, buffer_cache{*this, system, device, STREAM_BUFFER_SIZE}, fence_manager{system, *this, texture_cache, buffer_cache, query_cache}, system{system}, - screen_info{info}, program_manager{program_manager}, state_tracker{state_tracker} { + screen_info{info}, program_manager{program_manager}, state_tracker{state_tracker}, + async_shaders{emu_window} { CheckExtensions(); unified_uniform_buffer.Create(); @@ -162,6 +163,23 @@ RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWind nullptr, 0); } } + + if (device.UseAsynchronousShaders()) { + // Max worker threads we should allow + constexpr auto MAX_THREADS = 2u; + // Amount of threads we should reserve for other parts of yuzu + constexpr auto RESERVED_THREADS = 6u; + // Get the amount of threads we can use(this can return zero) + const auto cpu_thread_count = + std::max(RESERVED_THREADS, std::thread::hardware_concurrency()); + // Deduce how many "extra" threads we have to use. + const auto max_threads_unused = cpu_thread_count - RESERVED_THREADS; + // Always allow at least 1 thread regardless of our settings + const auto max_worker_count = std::max(1u, max_threads_unused); + // Don't use more than MAX_THREADS + const auto worker_count = std::min(max_worker_count, MAX_THREADS); + async_shaders.AllocateWorkers(worker_count); + } } RasterizerOpenGL::~RasterizerOpenGL() { @@ -336,7 +354,7 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { continue; } - Shader* const shader = shader_cache.GetStageProgram(program); + Shader* shader = shader_cache.GetStageProgram(program, async_shaders); if (device.UseAssemblyShaders()) { // Check for ARB limitation. We only have 16 SSBOs per context state. To workaround this @@ -353,7 +371,7 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { SetupDrawTextures(stage, shader); SetupDrawImages(stage, shader); - const GLuint program_handle = shader->GetHandle(); + const GLuint program_handle = shader->IsBuilt() ? shader->GetHandle() : 0; switch (program) { case Maxwell::ShaderProgram::VertexA: case Maxwell::ShaderProgram::VertexB: diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 4f082592f..a95646936 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -33,6 +33,7 @@ #include "video_core/renderer_opengl/gl_state_tracker.h" #include "video_core/renderer_opengl/gl_texture_cache.h" #include "video_core/renderer_opengl/utils.h" +#include "video_core/shader/async_shaders.h" #include "video_core/textures/texture.h" namespace Core { @@ -91,6 +92,14 @@ public: return num_queued_commands > 0; } + VideoCommon::Shader::AsyncShaders& GetAsyncShaders() { + return async_shaders; + } + + const VideoCommon::Shader::AsyncShaders& GetAsyncShaders() const { + return async_shaders; + } + private: /// Configures the color and depth framebuffer states. void ConfigureFramebuffers(); @@ -242,6 +251,7 @@ private: ScreenInfo& screen_info; ProgramManager& program_manager; StateTracker& state_tracker; + VideoCommon::Shader::AsyncShaders async_shaders; static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024; diff --git a/src/video_core/renderer_opengl/gl_resource_manager.h b/src/video_core/renderer_opengl/gl_resource_manager.h index f8b322227..b05cb641c 100644 --- a/src/video_core/renderer_opengl/gl_resource_manager.h +++ b/src/video_core/renderer_opengl/gl_resource_manager.h @@ -177,6 +177,12 @@ public: Release(); } + OGLAssemblyProgram& operator=(OGLAssemblyProgram&& o) noexcept { + Release(); + handle = std::exchange(o.handle, 0); + return *this; + } + /// Deletes the internal OpenGL resource void Release(); diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index c6a3bf3a1..f469ed656 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -31,6 +31,7 @@ #include "video_core/shader/registry.h" #include "video_core/shader/shader_ir.h" #include "video_core/shader_cache.h" +#include "video_core/shader_notify.h" namespace OpenGL { @@ -140,9 +141,24 @@ std::shared_ptr<Registry> MakeRegistry(const ShaderDiskCacheEntry& entry) { return registry; } +std::unordered_set<GLenum> GetSupportedFormats() { + GLint num_formats; + glGetIntegerv(GL_NUM_PROGRAM_BINARY_FORMATS, &num_formats); + + std::vector<GLint> formats(num_formats); + glGetIntegerv(GL_PROGRAM_BINARY_FORMATS, formats.data()); + + std::unordered_set<GLenum> supported_formats; + for (const GLint format : formats) { + supported_formats.insert(static_cast<GLenum>(format)); + } + return supported_formats; +} + +} // Anonymous namespace + ProgramSharedPtr BuildShader(const Device& device, ShaderType shader_type, u64 unique_identifier, - const ShaderIR& ir, const Registry& registry, - bool hint_retrievable = false) { + const ShaderIR& ir, const Registry& registry, bool hint_retrievable) { const std::string shader_id = MakeShaderID(unique_identifier, shader_type); LOG_INFO(Render_OpenGL, "{}", shader_id); @@ -181,30 +197,17 @@ ProgramSharedPtr BuildShader(const Device& device, ShaderType shader_type, u64 u return program; } -std::unordered_set<GLenum> GetSupportedFormats() { - GLint num_formats; - glGetIntegerv(GL_NUM_PROGRAM_BINARY_FORMATS, &num_formats); - - std::vector<GLint> formats(num_formats); - glGetIntegerv(GL_PROGRAM_BINARY_FORMATS, formats.data()); - - std::unordered_set<GLenum> supported_formats; - for (const GLint format : formats) { - supported_formats.insert(static_cast<GLenum>(format)); - } - return supported_formats; -} - -} // Anonymous namespace - Shader::Shader(std::shared_ptr<VideoCommon::Shader::Registry> registry_, ShaderEntries entries_, - ProgramSharedPtr program_) - : registry{std::move(registry_)}, entries{std::move(entries_)}, program{std::move(program_)} { + ProgramSharedPtr program_, bool is_built) + : registry{std::move(registry_)}, entries{std::move(entries_)}, program{std::move(program_)}, + is_built(is_built) { handle = program->assembly_program.handle; if (handle == 0) { handle = program->source_program.handle; } - ASSERT(handle != 0); + if (is_built) { + ASSERT(handle != 0); + } } Shader::~Shader() = default; @@ -214,42 +217,82 @@ GLuint Shader::GetHandle() const { return handle; } -std::unique_ptr<Shader> Shader::CreateStageFromMemory(const ShaderParameters& params, - Maxwell::ShaderProgram program_type, - ProgramCode code, ProgramCode code_b) { +bool Shader::IsBuilt() const { + return is_built; +} + +void Shader::AsyncOpenGLBuilt(OGLProgram new_program) { + program->source_program = std::move(new_program); + handle = program->source_program.handle; + is_built = true; +} + +void Shader::AsyncGLASMBuilt(OGLAssemblyProgram new_program) { + program->assembly_program = std::move(new_program); + handle = program->assembly_program.handle; + is_built = true; +} + +std::unique_ptr<Shader> Shader::CreateStageFromMemory( + const ShaderParameters& params, Maxwell::ShaderProgram program_type, ProgramCode code, + ProgramCode code_b, VideoCommon::Shader::AsyncShaders& async_shaders, VAddr cpu_addr) { const auto shader_type = GetShaderType(program_type); const std::size_t size_in_bytes = code.size() * sizeof(u64); - auto registry = std::make_shared<Registry>(shader_type, params.system.GPU().Maxwell3D()); - const ShaderIR ir(code, STAGE_MAIN_OFFSET, COMPILER_SETTINGS, *registry); - // TODO(Rodrigo): Handle VertexA shaders - // std::optional<ShaderIR> ir_b; - // if (!code_b.empty()) { - // ir_b.emplace(code_b, STAGE_MAIN_OFFSET); - // } - auto program = BuildShader(params.device, shader_type, params.unique_identifier, ir, *registry); + auto& gpu = params.system.GPU(); + gpu.ShaderNotify().MarkSharderBuilding(); + + auto registry = std::make_shared<Registry>(shader_type, gpu.Maxwell3D()); + if (!async_shaders.IsShaderAsync(params.system.GPU()) || + !params.device.UseAsynchronousShaders()) { + const ShaderIR ir(code, STAGE_MAIN_OFFSET, COMPILER_SETTINGS, *registry); + // TODO(Rodrigo): Handle VertexA shaders + // std::optional<ShaderIR> ir_b; + // if (!code_b.empty()) { + // ir_b.emplace(code_b, STAGE_MAIN_OFFSET); + // } + auto program = + BuildShader(params.device, shader_type, params.unique_identifier, ir, *registry); + ShaderDiskCacheEntry entry; + entry.type = shader_type; + entry.code = std::move(code); + entry.code_b = std::move(code_b); + entry.unique_identifier = params.unique_identifier; + entry.bound_buffer = registry->GetBoundBuffer(); + entry.graphics_info = registry->GetGraphicsInfo(); + entry.keys = registry->GetKeys(); + entry.bound_samplers = registry->GetBoundSamplers(); + entry.bindless_samplers = registry->GetBindlessSamplers(); + params.disk_cache.SaveEntry(std::move(entry)); + + gpu.ShaderNotify().MarkShaderComplete(); + + return std::unique_ptr<Shader>(new Shader(std::move(registry), + MakeEntries(params.device, ir, shader_type), + std::move(program), true)); + } else { + // Required for entries + const ShaderIR ir(code, STAGE_MAIN_OFFSET, COMPILER_SETTINGS, *registry); + auto entries = MakeEntries(params.device, ir, shader_type); - ShaderDiskCacheEntry entry; - entry.type = shader_type; - entry.code = std::move(code); - entry.code_b = std::move(code_b); - entry.unique_identifier = params.unique_identifier; - entry.bound_buffer = registry->GetBoundBuffer(); - entry.graphics_info = registry->GetGraphicsInfo(); - entry.keys = registry->GetKeys(); - entry.bound_samplers = registry->GetBoundSamplers(); - entry.bindless_samplers = registry->GetBindlessSamplers(); - params.disk_cache.SaveEntry(std::move(entry)); + async_shaders.QueueOpenGLShader(params.device, shader_type, params.unique_identifier, + std::move(code), std::move(code_b), STAGE_MAIN_OFFSET, + COMPILER_SETTINGS, *registry, cpu_addr); - return std::unique_ptr<Shader>(new Shader( - std::move(registry), MakeEntries(params.device, ir, shader_type), std::move(program))); + auto program = std::make_shared<ProgramHandle>(); + return std::unique_ptr<Shader>( + new Shader(std::move(registry), std::move(entries), std::move(program), false)); + } } std::unique_ptr<Shader> Shader::CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code) { const std::size_t size_in_bytes = code.size() * sizeof(u64); - auto& engine = params.system.GPU().KeplerCompute(); + auto& gpu = params.system.GPU(); + gpu.ShaderNotify().MarkSharderBuilding(); + + auto& engine = gpu.KeplerCompute(); auto registry = std::make_shared<Registry>(ShaderType::Compute, engine); const ShaderIR ir(code, KERNEL_MAIN_OFFSET, COMPILER_SETTINGS, *registry); const u64 uid = params.unique_identifier; @@ -266,6 +309,8 @@ std::unique_ptr<Shader> Shader::CreateKernelFromMemory(const ShaderParameters& p entry.bindless_samplers = registry->GetBindlessSamplers(); params.disk_cache.SaveEntry(std::move(entry)); + gpu.ShaderNotify().MarkShaderComplete(); + return std::unique_ptr<Shader>(new Shader(std::move(registry), MakeEntries(params.device, ir, ShaderType::Compute), std::move(program))); @@ -436,14 +481,51 @@ ProgramSharedPtr ShaderCacheOpenGL::GeneratePrecompiledProgram( return program; } -Shader* ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { +Shader* ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program, + VideoCommon::Shader::AsyncShaders& async_shaders) { if (!system.GPU().Maxwell3D().dirty.flags[Dirty::Shaders]) { - return last_shaders[static_cast<std::size_t>(program)]; + auto* last_shader = last_shaders[static_cast<std::size_t>(program)]; + if (last_shader->IsBuilt()) { + return last_shader; + } } auto& memory_manager{system.GPU().MemoryManager()}; const GPUVAddr address{GetShaderAddress(system, program)}; + if (device.UseAsynchronousShaders() && async_shaders.HasCompletedWork()) { + auto completed_work = async_shaders.GetCompletedWork(); + for (auto& work : completed_work) { + Shader* shader = TryGet(work.cpu_address); + auto& gpu = system.GPU(); + gpu.ShaderNotify().MarkShaderComplete(); + if (shader == nullptr) { + continue; + } + using namespace VideoCommon::Shader; + if (work.backend == AsyncShaders::Backend::OpenGL) { + shader->AsyncOpenGLBuilt(std::move(work.program.opengl)); + } else if (work.backend == AsyncShaders::Backend::GLASM) { + shader->AsyncGLASMBuilt(std::move(work.program.glasm)); + } + + ShaderDiskCacheEntry entry; + entry.type = work.shader_type; + entry.code = std::move(work.code); + entry.code_b = std::move(work.code_b); + entry.unique_identifier = work.uid; + + auto& registry = shader->GetRegistry(); + + entry.bound_buffer = registry.GetBoundBuffer(); + entry.graphics_info = registry.GetGraphicsInfo(); + entry.keys = registry.GetKeys(); + entry.bound_samplers = registry.GetBoundSamplers(); + entry.bindless_samplers = registry.GetBindlessSamplers(); + disk_cache.SaveEntry(std::move(entry)); + } + } + // Look up shader in the cache based on address const auto cpu_addr{memory_manager.GpuToCpuAddress(address)}; if (Shader* const shader{cpu_addr ? TryGet(*cpu_addr) : null_shader.get()}) { @@ -471,7 +553,8 @@ Shader* ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { std::unique_ptr<Shader> shader; const auto found = runtime_cache.find(unique_identifier); if (found == runtime_cache.end()) { - shader = Shader::CreateStageFromMemory(params, program, std::move(code), std::move(code_b)); + shader = Shader::CreateStageFromMemory(params, program, std::move(code), std::move(code_b), + async_shaders, cpu_addr.value_or(0)); } else { shader = Shader::CreateFromCache(params, found->second); } diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index 994aaeaf2..7528ac686 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h @@ -33,6 +33,10 @@ namespace Core::Frontend { class EmuWindow; } +namespace VideoCommon::Shader { +class AsyncShaders; +} + namespace OpenGL { class Device; @@ -61,6 +65,11 @@ struct ShaderParameters { u64 unique_identifier; }; +ProgramSharedPtr BuildShader(const Device& device, Tegra::Engines::ShaderType shader_type, + u64 unique_identifier, const VideoCommon::Shader::ShaderIR& ir, + const VideoCommon::Shader::Registry& registry, + bool hint_retrievable = false); + class Shader final { public: ~Shader(); @@ -68,15 +77,28 @@ public: /// Gets the GL program handle for the shader GLuint GetHandle() const; + bool IsBuilt() const; + /// Gets the shader entries for the shader const ShaderEntries& GetEntries() const { return entries; } - static std::unique_ptr<Shader> CreateStageFromMemory(const ShaderParameters& params, - Maxwell::ShaderProgram program_type, - ProgramCode program_code, - ProgramCode program_code_b); + const VideoCommon::Shader::Registry& GetRegistry() const { + return *registry; + } + + /// Mark a OpenGL shader as built + void AsyncOpenGLBuilt(OGLProgram new_program); + + /// Mark a GLASM shader as built + void AsyncGLASMBuilt(OGLAssemblyProgram new_program); + + static std::unique_ptr<Shader> CreateStageFromMemory( + const ShaderParameters& params, Maxwell::ShaderProgram program_type, + ProgramCode program_code, ProgramCode program_code_b, + VideoCommon::Shader::AsyncShaders& async_shaders, VAddr cpu_addr); + static std::unique_ptr<Shader> CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code); @@ -85,12 +107,13 @@ public: private: explicit Shader(std::shared_ptr<VideoCommon::Shader::Registry> registry, ShaderEntries entries, - ProgramSharedPtr program); + ProgramSharedPtr program, bool is_built = true); std::shared_ptr<VideoCommon::Shader::Registry> registry; ShaderEntries entries; ProgramSharedPtr program; GLuint handle = 0; + bool is_built{}; }; class ShaderCacheOpenGL final : public VideoCommon::ShaderCache<Shader> { @@ -104,7 +127,8 @@ public: const VideoCore::DiskResourceLoadCallback& callback); /// Gets the current specified shader stage program - Shader* GetStageProgram(Maxwell::ShaderProgram program); + Shader* GetStageProgram(Maxwell::ShaderProgram program, + VideoCommon::Shader::AsyncShaders& async_shaders); /// Gets a compute kernel in the passed address Shader* GetComputeKernel(GPUVAddr code_addr); |