From 4e35177e23069ad7a4cb0fdfa2ad5b34300c44f7 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 9 Aug 2019 23:50:21 -0300 Subject: shader_ir: Implement VOTE Implement VOTE using Nvidia's intrinsics. Documentation about these can be found here https://developer.nvidia.com/reading-between-threads-shader-intrinsics Instead of using portable ARB instructions I opted to use Nvidia intrinsics because these are the closest we have to how Tegra X1 hardware renders. To stub VOTE on non-Nvidia drivers (including nouveau) this commit simulates a GPU with a warp size of one, returning what is meaningful for the instruction being emulated: * anyThreadNV(value) -> value * allThreadsNV(value) -> value * allThreadsEqualNV(value) -> true ballotARB, also known as "uint64_t(activeThreadsNV())", emits VOTE.ANY Rd, PT, PT; on nouveau's compiler. This doesn't match exactly to Nvidia's code VOTE.ALL Rd, PT, PT; Which is emulated with activeThreadsNV() by this commit. In theory this shouldn't really matter since .ANY, .ALL and .EQ affect the predicates (set to PT on those cases) and not the registers. --- src/video_core/renderer_opengl/gl_shader_cache.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'src/video_core/renderer_opengl/gl_shader_cache.cpp') diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 1c90facc3..a32a7e984 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -212,7 +212,9 @@ CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEn const auto texture_buffer_usage{variant.texture_buffer_usage}; std::string source = "#version 430 core\n" - "#extension GL_ARB_separate_shader_objects : enable\n"; + "#extension GL_ARB_separate_shader_objects : enable\n" + "#extension GL_NV_gpu_shader5 : enable\n" + "#extension GL_NV_shader_thread_group : enable\n"; if (entries.shader_viewport_layer_array) { source += "#extension GL_ARB_shader_viewport_layer_array : enable\n"; } -- cgit v1.2.3