summaryrefslogtreecommitdiffstats
path: root/src/video_core
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core')
-rw-r--r--src/video_core/renderer_opengl/gl_device.cpp1
-rw-r--r--src/video_core/renderer_opengl/gl_device.h5
-rw-r--r--src/video_core/renderer_opengl/gl_graphics_pipeline.cpp32
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp1
4 files changed, 22 insertions, 17 deletions
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp
index bf08a6d93..5838fc02f 100644
--- a/src/video_core/renderer_opengl/gl_device.cpp
+++ b/src/video_core/renderer_opengl/gl_device.cpp
@@ -162,6 +162,7 @@ Device::Device() {
has_amd_shader_half_float = GLAD_GL_AMD_gpu_shader_half_float;
has_sparse_texture_2 = GLAD_GL_ARB_sparse_texture2;
warp_size_potentially_larger_than_guest = !is_nvidia && !is_intel;
+ need_fastmath_off = is_nvidia;
// At the moment of writing this, only Nvidia's driver optimizes BufferSubData on exclusive
// uniform buffers as "push constants"
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h
index 0b59c9df0..0c9d6fe31 100644
--- a/src/video_core/renderer_opengl/gl_device.h
+++ b/src/video_core/renderer_opengl/gl_device.h
@@ -136,6 +136,10 @@ public:
return warp_size_potentially_larger_than_guest;
}
+ bool NeedsFastmathOff() const {
+ return need_fastmath_off;
+ }
+
private:
static bool TestVariableAoffi();
static bool TestPreciseBug();
@@ -171,6 +175,7 @@ private:
bool has_amd_shader_half_float{};
bool has_sparse_texture_2{};
bool warp_size_potentially_larger_than_guest{};
+ bool need_fastmath_off{};
};
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp
index d27a3cf46..8d11fbc55 100644
--- a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp
+++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp
@@ -132,28 +132,23 @@ GraphicsPipeline::GraphicsPipeline(const Device& device, TextureCache& texture_c
std::ranges::transform(infos, stage_infos.begin(),
[](const Shader::Info* info) { return info ? *info : Shader::Info{}; });
auto func{[this, device, sources, shader_notify, xfb_state](ShaderContext::Context*) mutable {
- if (device.UseAssemblyShaders()) {
- for (size_t stage = 0; stage < 5; ++stage) {
- const auto code{sources[stage]};
- if (code.empty()) {
- continue;
- }
+ if (!device.UseAssemblyShaders()) {
+ program.handle = glCreateProgram();
+ }
+ for (size_t stage = 0; stage < 5; ++stage) {
+ const auto code{sources[stage]};
+ if (code.empty()) {
+ continue;
+ }
+ if (device.UseAssemblyShaders()) {
assembly_programs[stage] = CompileProgram(code, AssemblyStage(stage));
enabled_stages_mask |= (assembly_programs[stage].handle != 0 ? 1 : 0) << stage;
- }
- } else {
- program.handle = glCreateProgram();
- for (size_t stage = 0; stage < 5; ++stage) {
- const auto code{sources[stage]};
- if (code.empty()) {
- continue;
- }
+ } else {
AttachShader(Stage(stage), program.handle, code);
}
- LinkProgram(program.handle);
}
- if (shader_notify) {
- shader_notify->MarkShaderComplete();
+ if (!device.UseAssemblyShaders()) {
+ LinkProgram(program.handle);
}
u32 num_textures{};
u32 num_images{};
@@ -198,6 +193,9 @@ GraphicsPipeline::GraphicsPipeline(const Device& device, TextureCache& texture_c
if (assembly_shaders && xfb_state) {
GenerateTransformFeedbackState(*xfb_state);
}
+ if (shader_notify) {
+ shader_notify->MarkShaderComplete();
+ }
is_built.store(true, std::memory_order_relaxed);
}};
if (thread_worker) {
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index fedbce2f0..620666622 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -193,6 +193,7 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo
.lower_left_origin_mode = true,
.need_declared_frag_colors = true,
+ .need_fastmath_off = device.NeedsFastmathOff(),
.has_broken_spirv_clamp = true,
.has_broken_unsigned_image_offsets = true,