diff options
-rw-r--r-- | src/shader_recompiler/backend/glsl/emit_context.cpp | 44 | ||||
-rw-r--r-- | src/shader_recompiler/backend/glsl/emit_context.h | 2 | ||||
-rw-r--r-- | src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp | 2 | ||||
-rw-r--r-- | src/shader_recompiler/backend/glsl/emit_glsl_special.cpp | 22 | ||||
-rw-r--r-- | src/shader_recompiler/ir_opt/texture_pass.cpp | 11 | ||||
-rw-r--r-- | src/shader_recompiler/profile.h | 2 | ||||
-rw-r--r-- | src/shader_recompiler/shader_info.h | 2 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_device.cpp | 1 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_device.h | 5 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_graphics_pipeline.cpp | 32 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_shader_cache.cpp | 1 |
11 files changed, 86 insertions, 38 deletions
diff --git a/src/shader_recompiler/backend/glsl/emit_context.cpp b/src/shader_recompiler/backend/glsl/emit_context.cpp index 0e8fe017d..d224c4d84 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.cpp +++ b/src/shader_recompiler/backend/glsl/emit_context.cpp @@ -148,6 +148,16 @@ std::string_view ImageFormatString(ImageFormat format) { } } +std::string_view ImageAccessQualifier(bool is_written, bool is_read) { + if (is_written && !is_read) { + return "writeonly "; + } + if (is_read && !is_written) { + return "readonly "; + } + return ""; +} + std::string_view GetTessMode(TessPrimitive primitive) { switch (primitive) { case TessPrimitive::Triangles: @@ -262,7 +272,9 @@ void SetupLegacyInPerFragment(EmitContext& ctx, std::string& header) { EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile& profile_, const RuntimeInfo& runtime_info_) : info{program.info}, profile{profile_}, runtime_info{runtime_info_} { - header += "#pragma optionNV(fastmath off)\n"; + if (profile.need_fastmath_off) { + header += "#pragma optionNV(fastmath off)\n"; + } SetupExtensions(); stage = program.stage; switch (program.stage) { @@ -335,7 +347,7 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile } for (size_t index = 0; index < info.stores_generics.size(); ++index) { // TODO: Properly resolve attribute issues - if (info.stores_generics[index] || stage == Stage::VertexA || stage == Stage::VertexB) { + if (info.stores_generics[index] || StageInitializesVaryings()) { DefineGenericOutput(index, program.invocations); } } @@ -347,6 +359,17 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile DefineConstants(); } +bool EmitContext::StageInitializesVaryings() const noexcept { + switch (stage) { + case Stage::VertexA: + case Stage::VertexB: + case Stage::Geometry: + return true; + default: + return false; + } +} + void EmitContext::SetupExtensions() { if (info.uses_shadow_lod && profile.support_gl_texture_shadow_lod) { header += "#extension GL_EXT_texture_shadow_lod : enable\n"; @@ -361,7 +384,7 @@ void EmitContext::SetupExtensions() { header += "#extension GL_NV_shader_atomic_float : enable\n"; } if (info.uses_atomic_f16x2_add || info.uses_atomic_f16x2_min || info.uses_atomic_f16x2_max) { - header += "#extension NV_shader_atomic_fp16_vector : enable\n"; + header += "#extension GL_NV_shader_atomic_fp16_vector : enable\n"; } if (info.uses_fp16) { if (profile.support_gl_nv_gpu_shader_5) { @@ -392,7 +415,7 @@ void EmitContext::SetupExtensions() { if (info.stores_viewport_mask && profile.support_viewport_mask) { header += "#extension GL_NV_viewport_array2 : enable\n"; } - if (info.uses_typeless_image_reads || info.uses_typeless_image_writes) { + if (info.uses_typeless_image_reads) { header += "#extension GL_EXT_shader_image_load_formatted : enable\n"; } if (info.uses_derivatives && profile.support_gl_derivative_control) { @@ -593,9 +616,9 @@ std::string EmitContext::DefineGlobalMemoryFunctions() { "return uvec4({0}[uint(addr-{1})>>2],{0}[uint(addr-{1}+4)>>2],{0}[" "uint(addr-{1}+8)>>2],{0}[uint(addr-{1}+12)>>2]);}}"); } - write_func += "}"; - write_func_64 += "}"; - write_func_128 += "}"; + write_func += '}'; + write_func_64 += '}'; + write_func_128 += '}'; load_func += "return 0u;}"; load_func_64 += "return uvec2(0);}"; load_func_128 += "return uvec4(0);}"; @@ -607,9 +630,10 @@ void EmitContext::SetupImages(Bindings& bindings) { for (const auto& desc : info.image_buffer_descriptors) { image_buffers.push_back({bindings.image, desc.count}); const auto format{ImageFormatString(desc.format)}; + const auto qualifier{ImageAccessQualifier(desc.is_written, desc.is_read)}; const auto array_decorator{desc.count > 1 ? fmt::format("[{}]", desc.count) : ""}; - header += fmt::format("layout(binding={}{}) uniform uimageBuffer img{}{};", bindings.image, - format, bindings.image, array_decorator); + header += fmt::format("layout(binding={}{}) uniform {}uimageBuffer img{}{};", + bindings.image, format, qualifier, bindings.image, array_decorator); bindings.image += desc.count; } images.reserve(info.image_descriptors.size()); @@ -617,7 +641,7 @@ void EmitContext::SetupImages(Bindings& bindings) { images.push_back({bindings.image, desc.count}); const auto format{ImageFormatString(desc.format)}; const auto image_type{ImageType(desc.type)}; - const auto qualifier{desc.is_written ? "" : "readonly "}; + const auto qualifier{ImageAccessQualifier(desc.is_written, desc.is_read)}; const auto array_decorator{desc.count > 1 ? fmt::format("[{}]", desc.count) : ""}; header += fmt::format("layout(binding={}{})uniform {}{} img{}{};", bindings.image, format, qualifier, image_type, bindings.image, array_decorator); diff --git a/src/shader_recompiler/backend/glsl/emit_context.h b/src/shader_recompiler/backend/glsl/emit_context.h index 8fa87c02c..4a50556e1 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.h +++ b/src/shader_recompiler/backend/glsl/emit_context.h @@ -136,6 +136,8 @@ public: code += '\n'; } + [[nodiscard]] bool StageInitializesVaryings() const noexcept; + std::string header; std::string code; VarAlloc var_alloc; diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp index edeecc26e..a241d18fe 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp @@ -329,7 +329,7 @@ void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, std::string_view val ctx.Add("gl_BackSecondaryColor.{}={};", swizzle, value); break; case IR::Attribute::FogCoordinate: - ctx.Add("gl_FogFragCoord.x={};", value); + ctx.Add("gl_FogFragCoord={};", value); break; case IR::Attribute::ClipDistance0: case IR::Attribute::ClipDistance1: diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_special.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_special.cpp index cfef58d79..59ca52f07 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_special.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_special.cpp @@ -10,6 +10,17 @@ #include "shader_recompiler/frontend/ir/value.h" namespace Shader::Backend::GLSL { +namespace { +void InitializeVaryings(EmitContext& ctx) { + ctx.Add("gl_Position=vec4(0,0,0,1);"); + // TODO: Properly resolve attribute issues + for (size_t index = 0; index < ctx.info.stores_generics.size() / 2; ++index) { + if (!ctx.info.stores_generics[index]) { + ctx.Add("out_attr{}=vec4(0,0,0,1);", index); + } + } +} +} // Anonymous namespace void EmitPhi(EmitContext& ctx, IR::Inst& phi) { const size_t num_args{phi.NumArgs()}; @@ -44,14 +55,8 @@ void EmitPhiMove(EmitContext& ctx, const IR::Value& phi_value, const IR::Value& } void EmitPrologue(EmitContext& ctx) { - if (ctx.stage == Stage::VertexA || ctx.stage == Stage::VertexB) { - ctx.Add("gl_Position=vec4(0.0f, 0.0f, 0.0f, 1.0f);"); - // TODO: Properly resolve attribute issues - for (size_t index = 0; index < ctx.info.stores_generics.size() / 2; ++index) { - if (!ctx.info.stores_generics[index]) { - ctx.Add("out_attr{}=vec4(0,0,0,1);", index); - } - } + if (ctx.StageInitializesVaryings()) { + InitializeVaryings(ctx); } } @@ -59,6 +64,7 @@ void EmitEpilogue(EmitContext&) {} void EmitEmitVertex(EmitContext& ctx, const IR::Value& stream) { ctx.Add("EmitStreamVertex(int({}));", ctx.var_alloc.Consume(stream)); + InitializeVaryings(ctx); } void EmitEndPrimitive(EmitContext& ctx, const IR::Value& stream) { diff --git a/src/shader_recompiler/ir_opt/texture_pass.cpp b/src/shader_recompiler/ir_opt/texture_pass.cpp index e9098239d..737f186ab 100644 --- a/src/shader_recompiler/ir_opt/texture_pass.cpp +++ b/src/shader_recompiler/ir_opt/texture_pass.cpp @@ -312,11 +312,14 @@ public: } u32 Add(const ImageBufferDescriptor& desc) { - return Add(image_buffer_descriptors, desc, [&desc](const auto& existing) { + const u32 index{Add(image_buffer_descriptors, desc, [&desc](const auto& existing) { return desc.format == existing.format && desc.cbuf_index == existing.cbuf_index && desc.cbuf_offset == existing.cbuf_offset && desc.count == existing.count && desc.size_shift == existing.size_shift; - }); + })}; + image_buffer_descriptors[index].is_written |= desc.is_written; + image_buffer_descriptors[index].is_read |= desc.is_read; + return index; } u32 Add(const TextureDescriptor& desc) { @@ -339,6 +342,7 @@ public: desc.size_shift == existing.size_shift; })}; image_descriptors[index].is_written |= desc.is_written; + image_descriptors[index].is_read |= desc.is_read; return index; } @@ -430,10 +434,12 @@ void TexturePass(Environment& env, IR::Program& program) { throw NotImplementedException("Unexpected separate sampler"); } const bool is_written{inst->GetOpcode() != IR::Opcode::ImageRead}; + const bool is_read{inst->GetOpcode() == IR::Opcode::ImageRead}; if (flags.type == TextureType::Buffer) { index = descriptors.Add(ImageBufferDescriptor{ .format = flags.image_format, .is_written = is_written, + .is_read = is_read, .cbuf_index = cbuf.index, .cbuf_offset = cbuf.offset, .count = cbuf.count, @@ -444,6 +450,7 @@ void TexturePass(Environment& env, IR::Program& program) { .type = flags.type, .format = flags.image_format, .is_written = is_written, + .is_read = is_read, .cbuf_index = cbuf.index, .cbuf_offset = cbuf.offset, .count = cbuf.count, diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index e8cfc03af..a3c412a0f 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -97,6 +97,8 @@ struct Profile { /// Fragment outputs have to be declared even if they are not written to avoid undefined values. /// See Ori and the Blind Forest's main menu for reference. bool need_declared_frag_colors{}; + /// Prevents fast math optimizations that may cause inaccuracies + bool need_fastmath_off{}; /// OpFClamp is broken and OpFMax + OpFMin should be used instead bool has_broken_spirv_clamp{}; diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h index 74d7a6a94..e9ebc16a4 100644 --- a/src/shader_recompiler/shader_info.h +++ b/src/shader_recompiler/shader_info.h @@ -75,6 +75,7 @@ using TextureBufferDescriptors = boost::container::small_vector<TextureBufferDes struct ImageBufferDescriptor { ImageFormat format; bool is_written; + bool is_read; u32 cbuf_index; u32 cbuf_offset; u32 count; @@ -99,6 +100,7 @@ struct ImageDescriptor { TextureType type; ImageFormat format; bool is_written; + bool is_read; u32 cbuf_index; u32 cbuf_offset; u32 count; diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index bf08a6d93..5838fc02f 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -162,6 +162,7 @@ Device::Device() { has_amd_shader_half_float = GLAD_GL_AMD_gpu_shader_half_float; has_sparse_texture_2 = GLAD_GL_ARB_sparse_texture2; warp_size_potentially_larger_than_guest = !is_nvidia && !is_intel; + need_fastmath_off = is_nvidia; // At the moment of writing this, only Nvidia's driver optimizes BufferSubData on exclusive // uniform buffers as "push constants" diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index 0b59c9df0..0c9d6fe31 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h @@ -136,6 +136,10 @@ public: return warp_size_potentially_larger_than_guest; } + bool NeedsFastmathOff() const { + return need_fastmath_off; + } + private: static bool TestVariableAoffi(); static bool TestPreciseBug(); @@ -171,6 +175,7 @@ private: bool has_amd_shader_half_float{}; bool has_sparse_texture_2{}; bool warp_size_potentially_larger_than_guest{}; + bool need_fastmath_off{}; }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp index d27a3cf46..8d11fbc55 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp @@ -132,28 +132,23 @@ GraphicsPipeline::GraphicsPipeline(const Device& device, TextureCache& texture_c std::ranges::transform(infos, stage_infos.begin(), [](const Shader::Info* info) { return info ? *info : Shader::Info{}; }); auto func{[this, device, sources, shader_notify, xfb_state](ShaderContext::Context*) mutable { - if (device.UseAssemblyShaders()) { - for (size_t stage = 0; stage < 5; ++stage) { - const auto code{sources[stage]}; - if (code.empty()) { - continue; - } + if (!device.UseAssemblyShaders()) { + program.handle = glCreateProgram(); + } + for (size_t stage = 0; stage < 5; ++stage) { + const auto code{sources[stage]}; + if (code.empty()) { + continue; + } + if (device.UseAssemblyShaders()) { assembly_programs[stage] = CompileProgram(code, AssemblyStage(stage)); enabled_stages_mask |= (assembly_programs[stage].handle != 0 ? 1 : 0) << stage; - } - } else { - program.handle = glCreateProgram(); - for (size_t stage = 0; stage < 5; ++stage) { - const auto code{sources[stage]}; - if (code.empty()) { - continue; - } + } else { AttachShader(Stage(stage), program.handle, code); } - LinkProgram(program.handle); } - if (shader_notify) { - shader_notify->MarkShaderComplete(); + if (!device.UseAssemblyShaders()) { + LinkProgram(program.handle); } u32 num_textures{}; u32 num_images{}; @@ -198,6 +193,9 @@ GraphicsPipeline::GraphicsPipeline(const Device& device, TextureCache& texture_c if (assembly_shaders && xfb_state) { GenerateTransformFeedbackState(*xfb_state); } + if (shader_notify) { + shader_notify->MarkShaderComplete(); + } is_built.store(true, std::memory_order_relaxed); }}; if (thread_worker) { diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index fedbce2f0..620666622 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -193,6 +193,7 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo .lower_left_origin_mode = true, .need_declared_frag_colors = true, + .need_fastmath_off = device.NeedsFastmathOff(), .has_broken_spirv_clamp = true, .has_broken_unsigned_image_offsets = true, |