From d36f667bc0adaa9f50d53efb4c908aadc38921a6 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Tue, 15 Jun 2021 17:23:57 -0400 Subject: glsl: Address rest of feedback --- src/video_core/renderer_opengl/gl_device.cpp | 1 + src/video_core/renderer_opengl/gl_device.h | 5 ++++ .../renderer_opengl/gl_graphics_pipeline.cpp | 32 ++++++++++------------ src/video_core/renderer_opengl/gl_shader_cache.cpp | 1 + 4 files changed, 22 insertions(+), 17 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index bf08a6d93..5838fc02f 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -162,6 +162,7 @@ Device::Device() { has_amd_shader_half_float = GLAD_GL_AMD_gpu_shader_half_float; has_sparse_texture_2 = GLAD_GL_ARB_sparse_texture2; warp_size_potentially_larger_than_guest = !is_nvidia && !is_intel; + need_fastmath_off = is_nvidia; // At the moment of writing this, only Nvidia's driver optimizes BufferSubData on exclusive // uniform buffers as "push constants" diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index 0b59c9df0..0c9d6fe31 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h @@ -136,6 +136,10 @@ public: return warp_size_potentially_larger_than_guest; } + bool NeedsFastmathOff() const { + return need_fastmath_off; + } + private: static bool TestVariableAoffi(); static bool TestPreciseBug(); @@ -171,6 +175,7 @@ private: bool has_amd_shader_half_float{}; bool has_sparse_texture_2{}; bool warp_size_potentially_larger_than_guest{}; + bool need_fastmath_off{}; }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp index d27a3cf46..8d11fbc55 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp @@ -132,28 +132,23 @@ GraphicsPipeline::GraphicsPipeline(const Device& device, TextureCache& texture_c std::ranges::transform(infos, stage_infos.begin(), [](const Shader::Info* info) { return info ? *info : Shader::Info{}; }); auto func{[this, device, sources, shader_notify, xfb_state](ShaderContext::Context*) mutable { - if (device.UseAssemblyShaders()) { - for (size_t stage = 0; stage < 5; ++stage) { - const auto code{sources[stage]}; - if (code.empty()) { - continue; - } + if (!device.UseAssemblyShaders()) { + program.handle = glCreateProgram(); + } + for (size_t stage = 0; stage < 5; ++stage) { + const auto code{sources[stage]}; + if (code.empty()) { + continue; + } + if (device.UseAssemblyShaders()) { assembly_programs[stage] = CompileProgram(code, AssemblyStage(stage)); enabled_stages_mask |= (assembly_programs[stage].handle != 0 ? 1 : 0) << stage; - } - } else { - program.handle = glCreateProgram(); - for (size_t stage = 0; stage < 5; ++stage) { - const auto code{sources[stage]}; - if (code.empty()) { - continue; - } + } else { AttachShader(Stage(stage), program.handle, code); } - LinkProgram(program.handle); } - if (shader_notify) { - shader_notify->MarkShaderComplete(); + if (!device.UseAssemblyShaders()) { + LinkProgram(program.handle); } u32 num_textures{}; u32 num_images{}; @@ -198,6 +193,9 @@ GraphicsPipeline::GraphicsPipeline(const Device& device, TextureCache& texture_c if (assembly_shaders && xfb_state) { GenerateTransformFeedbackState(*xfb_state); } + if (shader_notify) { + shader_notify->MarkShaderComplete(); + } is_built.store(true, std::memory_order_relaxed); }}; if (thread_worker) { diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index fedbce2f0..620666622 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -193,6 +193,7 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo .lower_left_origin_mode = true, .need_declared_frag_colors = true, + .need_fastmath_off = device.NeedsFastmathOff(), .has_broken_spirv_clamp = true, .has_broken_unsigned_image_offsets = true, -- cgit v1.2.3