From 1e65da971bf6edd5611e6e409ba1cc4f99e58655 Mon Sep 17 00:00:00 2001 From: Morph <39850852+Morph1984@users.noreply.github.com> Date: Sat, 20 Jun 2020 07:41:55 -0400 Subject: gl_device: Check for GL_EXT_texture_shadow_lod --- src/video_core/renderer_opengl/gl_device.cpp | 2 ++ src/video_core/renderer_opengl/gl_device.h | 5 +++++ 2 files changed, 7 insertions(+) (limited to 'src/video_core/renderer_opengl') diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index b31d604e4..1011c7738 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -216,6 +216,7 @@ Device::Device() has_shader_ballot = GLAD_GL_ARB_shader_ballot; has_vertex_viewport_layer = GLAD_GL_ARB_shader_viewport_layer_array; has_image_load_formatted = HasExtension(extensions, "GL_EXT_shader_image_load_formatted"); + has_texture_shadow_lod = HasExtension(extensions, "GL_EXT_texture_shadow_lod"); has_astc = IsASTCSupported(); has_variable_aoffi = TestVariableAoffi(); has_component_indexing_bug = is_amd; @@ -245,6 +246,7 @@ Device::Device(std::nullptr_t) { has_shader_ballot = true; has_vertex_viewport_layer = true; has_image_load_formatted = true; + has_texture_shadow_lod = true; has_variable_aoffi = true; } diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index 145347943..c86e709b1 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h @@ -68,6 +68,10 @@ public: return has_image_load_formatted; } + bool HasTextureShadowLod() const { + return has_texture_shadow_lod; + } + bool HasASTC() const { return has_astc; } @@ -110,6 +114,7 @@ private: bool has_shader_ballot{}; bool has_vertex_viewport_layer{}; bool has_image_load_formatted{}; + bool has_texture_shadow_lod{}; bool has_astc{}; bool has_variable_aoffi{}; bool has_component_indexing_bug{}; -- cgit v1.2.3 From f77c897b8d5287adb64e08f65e494dac45033de3 Mon Sep 17 00:00:00 2001 From: Morph <39850852+Morph1984@users.noreply.github.com> Date: Sat, 20 Jun 2020 07:43:04 -0400 Subject: gl_shader_decompiler: Enable GL_EXT_texture_shadow_lod if available Enable GL_EXT_texture_shadow_lod if available. If this extension is not available, such as on Intel/AMD proprietary drivers, use textureGrad as a workaround. --- .../renderer_opengl/gl_shader_decompiler.cpp | 50 +++++++++++++++++++--- 1 file changed, 43 insertions(+), 7 deletions(-) (limited to 'src/video_core/renderer_opengl') diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index d6e30b321..2c49aeaac 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -37,6 +37,7 @@ using Tegra::Shader::IpaMode; using Tegra::Shader::IpaSampleMode; using Tegra::Shader::PixelImap; using Tegra::Shader::Register; +using Tegra::Shader::TextureType; using VideoCommon::Shader::BuildTransformFeedback; using VideoCommon::Shader::Registry; @@ -526,6 +527,9 @@ private: if (device.HasImageLoadFormatted()) { code.AddLine("#extension GL_EXT_shader_image_load_formatted : require"); } + if (device.HasTextureShadowLod()) { + code.AddLine("#extension GL_EXT_texture_shadow_lod : require"); + } if (device.HasWarpIntrinsics()) { code.AddLine("#extension GL_NV_gpu_shader5 : require"); code.AddLine("#extension GL_NV_shader_thread_group : require"); @@ -909,13 +913,13 @@ private: return "samplerBuffer"; } switch (sampler.type) { - case Tegra::Shader::TextureType::Texture1D: + case TextureType::Texture1D: return "sampler1D"; - case Tegra::Shader::TextureType::Texture2D: + case TextureType::Texture2D: return "sampler2D"; - case Tegra::Shader::TextureType::Texture3D: + case TextureType::Texture3D: return "sampler3D"; - case Tegra::Shader::TextureType::TextureCube: + case TextureType::TextureCube: return "samplerCube"; default: UNREACHABLE(); @@ -1380,8 +1384,19 @@ private: const std::size_t count = operation.GetOperandsCount(); const bool has_array = meta->sampler.is_array; const bool has_shadow = meta->sampler.is_shadow; + const bool workaround_lod_array_shadow_as_grad = + !device.HasTextureShadowLod() && function_suffix == "Lod" && meta->sampler.is_shadow && + ((meta->sampler.type == TextureType::Texture2D && meta->sampler.is_array) || + meta->sampler.type == TextureType::TextureCube); + + std::string expr = "texture"; + + if (workaround_lod_array_shadow_as_grad) { + expr += "Grad"; + } else { + expr += function_suffix; + } - std::string expr = "texture" + function_suffix; if (!meta->aoffi.empty()) { expr += "Offset"; } else if (!meta->ptp.empty()) { @@ -1415,6 +1430,16 @@ private: expr += ')'; } + if (workaround_lod_array_shadow_as_grad) { + switch (meta->sampler.type) { + case TextureType::Texture2D: + return expr + ", vec2(0.0), vec2(0.0))"; + case TextureType::TextureCube: + return expr + ", vec3(0.0), vec3(0.0))"; + } + UNREACHABLE(); + } + for (const auto& variant : extras) { if (const auto argument = std::get_if(&variant)) { expr += GenerateTextureArgument(*argument); @@ -2041,8 +2066,19 @@ private: const auto meta = std::get_if(&operation.GetMeta()); ASSERT(meta); - std::string expr = GenerateTexture( - operation, "Lod", {TextureArgument{Type::Float, meta->lod}, TextureOffset{}}); + std::string expr{}; + + if (!device.HasTextureShadowLod() && meta->sampler.is_shadow && + ((meta->sampler.type == TextureType::Texture2D && meta->sampler.is_array) || + meta->sampler.type == TextureType::TextureCube)) { + LOG_ERROR(Render_OpenGL, + "Device lacks GL_EXT_texture_shadow_lod, using textureGrad as a workaround"); + expr = GenerateTexture(operation, "Lod", {}); + } else { + expr = GenerateTexture(operation, "Lod", + {TextureArgument{Type::Float, meta->lod}, TextureOffset{}}); + } + if (meta->sampler.is_shadow) { expr = "vec4(" + expr + ')'; } -- cgit v1.2.3 From 9f54cd4dad58c2c99874a9fe6bb4c34052a65555 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 23 Jun 2020 22:51:03 -0300 Subject: gl_shader_cache: Avoid use after move for program size All programs had a size of zero due to this bug, skipping invalidations. While we are at it, remove some unused forward declarations. --- src/video_core/renderer_opengl/gl_shader_cache.cpp | 12 +++++++----- src/video_core/renderer_opengl/gl_shader_cache.h | 1 - 2 files changed, 7 insertions(+), 6 deletions(-) (limited to 'src/video_core/renderer_opengl') diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 46e780a06..c6a3bf3a1 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -460,8 +460,9 @@ Shader* ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { const u8* host_ptr_b = memory_manager.GetPointer(address_b); code_b = GetShaderCode(memory_manager, address_b, host_ptr_b, false); } + const std::size_t code_size = code.size() * sizeof(u64); - const auto unique_identifier = GetUniqueIdentifier( + const u64 unique_identifier = GetUniqueIdentifier( GetShaderType(program), program == Maxwell::ShaderProgram::VertexA, code, code_b); const ShaderParameters params{system, disk_cache, device, @@ -477,7 +478,7 @@ Shader* ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { Shader* const result = shader.get(); if (cpu_addr) { - Register(std::move(shader), *cpu_addr, code.size() * sizeof(u64)); + Register(std::move(shader), *cpu_addr, code_size); } else { null_shader = std::move(shader); } @@ -495,8 +496,9 @@ Shader* ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) { const auto host_ptr{memory_manager.GetPointer(code_addr)}; // No kernel found, create a new one - auto code{GetShaderCode(memory_manager, code_addr, host_ptr, true)}; - const auto unique_identifier{GetUniqueIdentifier(ShaderType::Compute, false, code)}; + ProgramCode code{GetShaderCode(memory_manager, code_addr, host_ptr, true)}; + const std::size_t code_size{code.size() * sizeof(u64)}; + const u64 unique_identifier{GetUniqueIdentifier(ShaderType::Compute, false, code)}; const ShaderParameters params{system, disk_cache, device, *cpu_addr, host_ptr, unique_identifier}; @@ -511,7 +513,7 @@ Shader* ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) { Shader* const result = kernel.get(); if (cpu_addr) { - Register(std::move(kernel), *cpu_addr, code.size() * sizeof(u64)); + Register(std::move(kernel), *cpu_addr, code_size); } else { null_kernel = std::move(kernel); } diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index 6848f1388..994aaeaf2 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h @@ -37,7 +37,6 @@ namespace OpenGL { class Device; class RasterizerOpenGL; -struct UnspecializedShader; using Maxwell = Tegra::Engines::Maxwell3D::Regs; -- cgit v1.2.3 From da79ec9565f670bcf1f09fdf7d9ae0241d97a241 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 11 May 2020 16:18:53 -0300 Subject: gl_stream_buffer: Always use persistent memory maps yuzu no longer supports platforms without persistent maps. --- .../renderer_opengl/gl_stream_buffer.cpp | 40 +++++++--------------- src/video_core/renderer_opengl/gl_stream_buffer.h | 4 +-- 2 files changed, 14 insertions(+), 30 deletions(-) (limited to 'src/video_core/renderer_opengl') diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.cpp b/src/video_core/renderer_opengl/gl_stream_buffer.cpp index 932a2f69e..9cf0f6b46 100644 --- a/src/video_core/renderer_opengl/gl_stream_buffer.cpp +++ b/src/video_core/renderer_opengl/gl_stream_buffer.cpp @@ -14,8 +14,7 @@ MICROPROFILE_DEFINE(OpenGL_StreamBuffer, "OpenGL", "Stream Buffer Orphaning", namespace OpenGL { -OGLStreamBuffer::OGLStreamBuffer(GLsizeiptr size, bool vertex_data_usage, bool prefer_coherent, - bool use_persistent) +OGLStreamBuffer::OGLStreamBuffer(GLsizeiptr size, bool vertex_data_usage, bool prefer_coherent) : buffer_size(size) { gl_buffer.Create(); @@ -29,23 +28,16 @@ OGLStreamBuffer::OGLStreamBuffer(GLsizeiptr size, bool vertex_data_usage, bool p allocate_size *= 2; } - if (use_persistent) { - persistent = true; - coherent = prefer_coherent; - const GLbitfield flags = - GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | (coherent ? GL_MAP_COHERENT_BIT : 0); - glNamedBufferStorage(gl_buffer.handle, allocate_size, nullptr, flags); - mapped_ptr = static_cast(glMapNamedBufferRange( - gl_buffer.handle, 0, buffer_size, flags | (coherent ? 0 : GL_MAP_FLUSH_EXPLICIT_BIT))); - } else { - glNamedBufferData(gl_buffer.handle, allocate_size, nullptr, GL_STREAM_DRAW); - } + coherent = prefer_coherent; + const GLbitfield flags = + GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | (coherent ? GL_MAP_COHERENT_BIT : 0); + glNamedBufferStorage(gl_buffer.handle, allocate_size, nullptr, flags); + mapped_ptr = static_cast(glMapNamedBufferRange( + gl_buffer.handle, 0, buffer_size, flags | (coherent ? 0 : GL_MAP_FLUSH_EXPLICIT_BIT))); } OGLStreamBuffer::~OGLStreamBuffer() { - if (persistent) { - glUnmapNamedBuffer(gl_buffer.handle); - } + glUnmapNamedBuffer(gl_buffer.handle); gl_buffer.Release(); } @@ -63,16 +55,14 @@ std::tuple OGLStreamBuffer::Map(GLsizeiptr size, GLintptr a buffer_pos = 0; invalidate = true; - if (persistent) { - glUnmapNamedBuffer(gl_buffer.handle); - } + glUnmapNamedBuffer(gl_buffer.handle); } - if (invalidate || !persistent) { + if (invalidate) { MICROPROFILE_SCOPE(OpenGL_StreamBuffer); - GLbitfield flags = GL_MAP_WRITE_BIT | (persistent ? GL_MAP_PERSISTENT_BIT : 0) | - (coherent ? GL_MAP_COHERENT_BIT : GL_MAP_FLUSH_EXPLICIT_BIT) | - (invalidate ? GL_MAP_INVALIDATE_BUFFER_BIT : GL_MAP_UNSYNCHRONIZED_BIT); + const GLbitfield flags = GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | + GL_MAP_INVALIDATE_BUFFER_BIT | + (coherent ? GL_MAP_COHERENT_BIT : GL_MAP_FLUSH_EXPLICIT_BIT); mapped_ptr = static_cast( glMapNamedBufferRange(gl_buffer.handle, buffer_pos, buffer_size - buffer_pos, flags)); mapped_offset = buffer_pos; @@ -88,10 +78,6 @@ void OGLStreamBuffer::Unmap(GLsizeiptr size) { glFlushMappedNamedBufferRange(gl_buffer.handle, buffer_pos - mapped_offset, size); } - if (!persistent) { - glUnmapNamedBuffer(gl_buffer.handle); - } - buffer_pos += size; } diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.h b/src/video_core/renderer_opengl/gl_stream_buffer.h index 866da3594..65c3da93f 100644 --- a/src/video_core/renderer_opengl/gl_stream_buffer.h +++ b/src/video_core/renderer_opengl/gl_stream_buffer.h @@ -13,8 +13,7 @@ namespace OpenGL { class OGLStreamBuffer : private NonCopyable { public: - explicit OGLStreamBuffer(GLsizeiptr size, bool vertex_data_usage, bool prefer_coherent = false, - bool use_persistent = true); + explicit OGLStreamBuffer(GLsizeiptr size, bool vertex_data_usage, bool prefer_coherent = false); ~OGLStreamBuffer(); /* @@ -41,7 +40,6 @@ private: OGLBuffer gl_buffer; bool coherent = false; - bool persistent = false; GLintptr buffer_pos = 0; GLsizeiptr buffer_size = 0; -- cgit v1.2.3 From 00c66a728958c3b2804131ce5baf44880119e018 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 11 May 2020 16:21:08 -0300 Subject: gl_stream_buffer: Always use a non-coherent buffer --- src/video_core/renderer_opengl/gl_stream_buffer.cpp | 20 +++++++++----------- src/video_core/renderer_opengl/gl_stream_buffer.h | 4 +--- 2 files changed, 10 insertions(+), 14 deletions(-) (limited to 'src/video_core/renderer_opengl') diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.cpp b/src/video_core/renderer_opengl/gl_stream_buffer.cpp index 9cf0f6b46..aeafcfbfe 100644 --- a/src/video_core/renderer_opengl/gl_stream_buffer.cpp +++ b/src/video_core/renderer_opengl/gl_stream_buffer.cpp @@ -4,6 +4,7 @@ #include #include + #include "common/alignment.h" #include "common/assert.h" #include "common/microprofile.h" @@ -14,8 +15,7 @@ MICROPROFILE_DEFINE(OpenGL_StreamBuffer, "OpenGL", "Stream Buffer Orphaning", namespace OpenGL { -OGLStreamBuffer::OGLStreamBuffer(GLsizeiptr size, bool vertex_data_usage, bool prefer_coherent) - : buffer_size(size) { +OGLStreamBuffer::OGLStreamBuffer(GLsizeiptr size, bool vertex_data_usage) : buffer_size(size) { gl_buffer.Create(); GLsizeiptr allocate_size = size; @@ -28,12 +28,10 @@ OGLStreamBuffer::OGLStreamBuffer(GLsizeiptr size, bool vertex_data_usage, bool p allocate_size *= 2; } - coherent = prefer_coherent; - const GLbitfield flags = - GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | (coherent ? GL_MAP_COHERENT_BIT : 0); + static constexpr GLbitfield flags = GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT; glNamedBufferStorage(gl_buffer.handle, allocate_size, nullptr, flags); - mapped_ptr = static_cast(glMapNamedBufferRange( - gl_buffer.handle, 0, buffer_size, flags | (coherent ? 0 : GL_MAP_FLUSH_EXPLICIT_BIT))); + mapped_ptr = static_cast( + glMapNamedBufferRange(gl_buffer.handle, 0, buffer_size, flags | GL_MAP_FLUSH_EXPLICIT_BIT)); } OGLStreamBuffer::~OGLStreamBuffer() { @@ -59,10 +57,10 @@ std::tuple OGLStreamBuffer::Map(GLsizeiptr size, GLintptr a } if (invalidate) { + static const GLbitfield flags = GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | + GL_MAP_INVALIDATE_BUFFER_BIT | GL_MAP_FLUSH_EXPLICIT_BIT; + MICROPROFILE_SCOPE(OpenGL_StreamBuffer); - const GLbitfield flags = GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | - GL_MAP_INVALIDATE_BUFFER_BIT | - (coherent ? GL_MAP_COHERENT_BIT : GL_MAP_FLUSH_EXPLICIT_BIT); mapped_ptr = static_cast( glMapNamedBufferRange(gl_buffer.handle, buffer_pos, buffer_size - buffer_pos, flags)); mapped_offset = buffer_pos; @@ -74,7 +72,7 @@ std::tuple OGLStreamBuffer::Map(GLsizeiptr size, GLintptr a void OGLStreamBuffer::Unmap(GLsizeiptr size) { ASSERT(size <= mapped_size); - if (!coherent && size > 0) { + if (size > 0) { glFlushMappedNamedBufferRange(gl_buffer.handle, buffer_pos - mapped_offset, size); } diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.h b/src/video_core/renderer_opengl/gl_stream_buffer.h index 65c3da93f..826c2e361 100644 --- a/src/video_core/renderer_opengl/gl_stream_buffer.h +++ b/src/video_core/renderer_opengl/gl_stream_buffer.h @@ -13,7 +13,7 @@ namespace OpenGL { class OGLStreamBuffer : private NonCopyable { public: - explicit OGLStreamBuffer(GLsizeiptr size, bool vertex_data_usage, bool prefer_coherent = false); + explicit OGLStreamBuffer(GLsizeiptr size, bool vertex_data_usage); ~OGLStreamBuffer(); /* @@ -39,8 +39,6 @@ public: private: OGLBuffer gl_buffer; - bool coherent = false; - GLintptr buffer_pos = 0; GLsizeiptr buffer_size = 0; GLintptr mapped_offset = 0; -- cgit v1.2.3 From 73fb3a304b215abce3cfb1c0c5eb2b43740b65ed Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 18 Jun 2020 03:54:13 -0300 Subject: gl_device: Expose NV_vertex_buffer_unified_memory except on Turing Expose NV_vertex_buffer_unified_memory when the driver supports it. This commit adds a function the determine if a GL_RENDERER is a Turing GPU. This is required because on Turing GPUs Nvidia's driver crashes when the buffer is marked as resident or on DeleteBuffers. Without a synchronous debug output (single threaded driver), it's likely that the driver will crash in the first blocking call. --- src/video_core/renderer_opengl/gl_device.cpp | 26 +++++++++++++++++++++++++- src/video_core/renderer_opengl/gl_device.h | 5 +++++ 2 files changed, 30 insertions(+), 1 deletion(-) (limited to 'src/video_core/renderer_opengl') diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index 1011c7738..447a19595 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -188,16 +188,32 @@ bool IsASTCSupported() { return true; } +/// @brief Returns true when a GL_RENDERER is a Turing GPU +/// @param renderer GL_RENDERER string +bool IsTuring(std::string_view renderer) { + static constexpr std::array TURING_GPUS = { + "GTX 1650", "GTX 1660", "RTX 2060", "RTX 2070", + "RTX 2080", "TITAN RTX", "Quadro RTX 3000", "Quadro RTX 4000", + "Quadro RTX 5000", "Quadro RTX 6000", "Quadro RTX 8000", "Tesla T4", + }; + return std::any_of(TURING_GPUS.begin(), TURING_GPUS.end(), + [renderer](std::string_view candidate) { + return renderer.find(candidate) != std::string_view::npos; + }); +} + } // Anonymous namespace Device::Device() : max_uniform_buffers{BuildMaxUniformBuffers()}, base_bindings{BuildBaseBindings()} { const std::string_view vendor = reinterpret_cast(glGetString(GL_VENDOR)); + const std::string_view renderer = reinterpret_cast(glGetString(GL_RENDERER)); const std::string_view version = reinterpret_cast(glGetString(GL_VERSION)); const std::vector extensions = GetExtensions(); const bool is_nvidia = vendor == "NVIDIA Corporation"; const bool is_amd = vendor == "ATI Technologies Inc."; + const bool is_turing = is_nvidia && IsTuring(renderer); bool disable_fast_buffer_sub_data = false; if (is_nvidia && version == "4.6.0 NVIDIA 443.24") { @@ -221,8 +237,16 @@ Device::Device() has_variable_aoffi = TestVariableAoffi(); has_component_indexing_bug = is_amd; has_precise_bug = TestPreciseBug(); - has_fast_buffer_sub_data = is_nvidia && !disable_fast_buffer_sub_data; has_nv_viewport_array2 = GLAD_GL_NV_viewport_array2; + + // At the moment of writing this, only Nvidia's driver optimizes BufferSubData on exclusive + // uniform buffers as "push constants" + has_fast_buffer_sub_data = is_nvidia && !disable_fast_buffer_sub_data; + + // Nvidia's driver on Turing GPUs randomly crashes when the buffer is made resident, or on + // DeleteBuffers. Disable unified memory on these devices. + has_vertex_buffer_unified_memory = GLAD_GL_NV_vertex_buffer_unified_memory && !is_turing; + use_assembly_shaders = Settings::values.use_assembly_shaders && GLAD_GL_NV_gpu_program5 && GLAD_GL_NV_compute_program5 && GLAD_GL_NV_transform_feedback && GLAD_GL_NV_transform_feedback2; diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index c86e709b1..e1d811966 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h @@ -72,6 +72,10 @@ public: return has_texture_shadow_lod; } + bool HasVertexBufferUnifiedMemory() const { + return has_vertex_buffer_unified_memory; + } + bool HasASTC() const { return has_astc; } @@ -115,6 +119,7 @@ private: bool has_vertex_viewport_layer{}; bool has_image_load_formatted{}; bool has_texture_shadow_lod{}; + bool has_vertex_buffer_unified_memory{}; bool has_astc{}; bool has_variable_aoffi{}; bool has_component_indexing_bug{}; -- cgit v1.2.3 From 32485917ba7cb7b2f0cad766c0897365294650a7 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 11 May 2020 16:35:04 -0300 Subject: gl_buffer_cache: Mark buffers as resident Make stream buffer and cached buffers as resident and query their address. This allows us to use GPU addresses for several proprietary Nvidia extensions. --- src/video_core/renderer_opengl/gl_buffer_cache.cpp | 24 ++++++++---- src/video_core/renderer_opengl/gl_buffer_cache.h | 20 +++++++--- src/video_core/renderer_opengl/gl_rasterizer.cpp | 44 +++++++++++----------- .../renderer_opengl/gl_stream_buffer.cpp | 11 +++++- src/video_core/renderer_opengl/gl_stream_buffer.h | 11 +++++- 5 files changed, 70 insertions(+), 40 deletions(-) (limited to 'src/video_core/renderer_opengl') diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index ad0577a4f..e09b47f57 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp @@ -22,21 +22,28 @@ using Maxwell = Tegra::Engines::Maxwell3D::Regs; MICROPROFILE_DEFINE(OpenGL_Buffer_Download, "OpenGL", "Buffer Download", MP_RGB(192, 192, 128)); -Buffer::Buffer(VAddr cpu_addr, const std::size_t size) : VideoCommon::BufferBlock{cpu_addr, size} { +Buffer::Buffer(const Device& device, VAddr cpu_addr, std::size_t size) + : VideoCommon::BufferBlock{cpu_addr, size} { gl_buffer.Create(); glNamedBufferData(gl_buffer.handle, static_cast(size), nullptr, GL_DYNAMIC_DRAW); + if (device.HasVertexBufferUnifiedMemory()) { + glMakeNamedBufferResidentNV(gl_buffer.handle, GL_READ_WRITE); + glGetNamedBufferParameterui64vNV(gl_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, &gpu_address); + } } Buffer::~Buffer() = default; OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, Core::System& system, - const Device& device, std::size_t stream_size) - : GenericBufferCache{rasterizer, system, std::make_unique(stream_size, true)} { + const Device& device_, std::size_t stream_size) + : GenericBufferCache{rasterizer, system, + std::make_unique(device_, stream_size, true)}, + device{device_} { if (!device.HasFastBufferSubData()) { return; } - static constexpr auto size = static_cast(Maxwell::MaxConstBufferSize); + static constexpr GLsizeiptr size = static_cast(Maxwell::MaxConstBufferSize); glCreateBuffers(static_cast(std::size(cbufs)), std::data(cbufs)); for (const GLuint cbuf : cbufs) { glNamedBufferData(cbuf, size, nullptr, GL_STREAM_DRAW); @@ -48,11 +55,11 @@ OGLBufferCache::~OGLBufferCache() { } std::shared_ptr OGLBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) { - return std::make_shared(cpu_addr, size); + return std::make_shared(device, cpu_addr, size); } -GLuint OGLBufferCache::GetEmptyBuffer(std::size_t) { - return 0; +OGLBufferCache::BufferInfo OGLBufferCache::GetEmptyBuffer(std::size_t) { + return {0, 0, 0}; } void OGLBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, @@ -79,8 +86,9 @@ OGLBufferCache::BufferInfo OGLBufferCache::ConstBufferUpload(const void* raw_poi std::size_t size) { DEBUG_ASSERT(cbuf_cursor < std::size(cbufs)); const GLuint cbuf = cbufs[cbuf_cursor++]; + glNamedBufferSubData(cbuf, 0, static_cast(size), raw_pointer); - return {cbuf, 0}; + return {cbuf, 0, 0}; } } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h index a49aaf9c4..6462cfae5 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.h +++ b/src/video_core/renderer_opengl/gl_buffer_cache.h @@ -25,15 +25,20 @@ class RasterizerOpenGL; class Buffer : public VideoCommon::BufferBlock { public: - explicit Buffer(VAddr cpu_addr, const std::size_t size); + explicit Buffer(const Device& device, VAddr cpu_addr, std::size_t size); ~Buffer(); - GLuint Handle() const { + GLuint Handle() const noexcept { return gl_buffer.handle; } + u64 Address() const noexcept { + return gpu_address; + } + private: OGLBuffer gl_buffer; + u64 gpu_address = 0; }; using GenericBufferCache = VideoCommon::BufferCache; @@ -43,7 +48,7 @@ public: const Device& device, std::size_t stream_size); ~OGLBufferCache(); - GLuint GetEmptyBuffer(std::size_t) override; + BufferInfo GetEmptyBuffer(std::size_t) override; void Acquire() noexcept { cbuf_cursor = 0; @@ -64,10 +69,13 @@ protected: BufferInfo ConstBufferUpload(const void* raw_pointer, std::size_t size) override; private: + static constexpr std::size_t NUM_CBUFS = Tegra::Engines::Maxwell3D::Regs::MaxConstBuffers * + Tegra::Engines::Maxwell3D::Regs::MaxShaderProgram; + + const Device& device; + std::size_t cbuf_cursor = 0; - std::array - cbufs; + std::array cbufs{}; }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 2d6c11320..7cb378a71 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -253,8 +253,8 @@ void RasterizerOpenGL::SetupVertexBuffer() { glBindVertexBuffer(static_cast(index), 0, 0, vertex_array.stride); continue; } - const auto [vertex_buffer, vertex_buffer_offset] = buffer_cache.UploadMemory(start, size); - glBindVertexBuffer(static_cast(index), vertex_buffer, vertex_buffer_offset, + const auto info = buffer_cache.UploadMemory(start, size); + glBindVertexBuffer(static_cast(index), info.handle, info.offset, vertex_array.stride); } } @@ -285,9 +285,9 @@ GLintptr RasterizerOpenGL::SetupIndexBuffer() { MICROPROFILE_SCOPE(OpenGL_Index); const auto& regs = system.GPU().Maxwell3D().regs; const std::size_t size = CalculateIndexBufferSize(); - const auto [buffer, offset] = buffer_cache.UploadMemory(regs.index_array.IndexStart(), size); - glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, buffer); - return offset; + const auto info = buffer_cache.UploadMemory(regs.index_array.IndexStart(), size); + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, info.handle); + return info.offset; } void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { @@ -643,9 +643,9 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { if (!device.UseAssemblyShaders()) { MaxwellUniformData ubo; ubo.SetFromRegs(gpu); - const auto [buffer, offset] = + const auto info = buffer_cache.UploadHostMemory(&ubo, sizeof(ubo), device.GetUniformBufferAlignment()); - glBindBufferRange(GL_UNIFORM_BUFFER, EmulationUniformBlockBinding, buffer, offset, + glBindBufferRange(GL_UNIFORM_BUFFER, EmulationUniformBlockBinding, info.handle, info.offset, static_cast(sizeof(ubo))); } @@ -956,8 +956,7 @@ void RasterizerOpenGL::SetupConstBuffer(GLenum stage, u32 binding, if (device.UseAssemblyShaders()) { glBindBufferRangeNV(stage, entry.GetIndex(), 0, 0, 0); } else { - glBindBufferRange(GL_UNIFORM_BUFFER, binding, - buffer_cache.GetEmptyBuffer(sizeof(float)), 0, sizeof(float)); + glBindBufferRange(GL_UNIFORM_BUFFER, binding, 0, 0, sizeof(float)); } return; } @@ -970,24 +969,25 @@ void RasterizerOpenGL::SetupConstBuffer(GLenum stage, u32 binding, const std::size_t alignment = use_unified ? 4 : device.GetUniformBufferAlignment(); const GPUVAddr gpu_addr = buffer.address; - auto [cbuf, offset] = buffer_cache.UploadMemory(gpu_addr, size, alignment, false, fast_upload); + auto info = buffer_cache.UploadMemory(gpu_addr, size, alignment, false, fast_upload); if (device.UseAssemblyShaders()) { UNIMPLEMENTED_IF(use_unified); - if (offset != 0) { + if (info.offset != 0) { const GLuint staging_cbuf = staging_cbufs[current_cbuf++]; - glCopyNamedBufferSubData(cbuf, staging_cbuf, offset, 0, size); - cbuf = staging_cbuf; - offset = 0; + glCopyNamedBufferSubData(info.handle, staging_cbuf, info.offset, 0, size); + info.handle = staging_cbuf; + info.offset = 0; } - glBindBufferRangeNV(stage, binding, cbuf, offset, size); + glBindBufferRangeNV(stage, binding, info.handle, info.offset, size); return; } if (use_unified) { - glCopyNamedBufferSubData(cbuf, unified_uniform_buffer.handle, offset, unified_offset, size); + glCopyNamedBufferSubData(info.handle, unified_uniform_buffer.handle, info.offset, + unified_offset, size); } else { - glBindBufferRange(GL_UNIFORM_BUFFER, binding, cbuf, offset, size); + glBindBufferRange(GL_UNIFORM_BUFFER, binding, info.handle, info.offset, size); } } @@ -1023,9 +1023,8 @@ void RasterizerOpenGL::SetupComputeGlobalMemory(Shader* kernel) { void RasterizerOpenGL::SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry, GPUVAddr gpu_addr, std::size_t size) { const auto alignment{device.GetShaderStorageBufferAlignment()}; - const auto [ssbo, buffer_offset] = - buffer_cache.UploadMemory(gpu_addr, size, alignment, entry.is_written); - glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, ssbo, buffer_offset, + const auto info = buffer_cache.UploadMemory(gpu_addr, size, alignment, entry.is_written); + glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, info.handle, info.offset, static_cast(size)); } @@ -1712,8 +1711,9 @@ void RasterizerOpenGL::EndTransformFeedback() { const GLuint handle = transform_feedback_buffers[index].handle; const GPUVAddr gpu_addr = binding.Address(); const std::size_t size = binding.buffer_size; - const auto [dest_buffer, offset] = buffer_cache.UploadMemory(gpu_addr, size, 4, true); - glCopyNamedBufferSubData(handle, dest_buffer, 0, offset, static_cast(size)); + const auto info = buffer_cache.UploadMemory(gpu_addr, size, 4, true); + glCopyNamedBufferSubData(handle, info.handle, 0, info.offset, + static_cast(size)); } } diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.cpp b/src/video_core/renderer_opengl/gl_stream_buffer.cpp index aeafcfbfe..164df4feb 100644 --- a/src/video_core/renderer_opengl/gl_stream_buffer.cpp +++ b/src/video_core/renderer_opengl/gl_stream_buffer.cpp @@ -2,12 +2,13 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include +#include #include #include "common/alignment.h" #include "common/assert.h" #include "common/microprofile.h" +#include "video_core/renderer_opengl/gl_device.h" #include "video_core/renderer_opengl/gl_stream_buffer.h" MICROPROFILE_DEFINE(OpenGL_StreamBuffer, "OpenGL", "Stream Buffer Orphaning", @@ -15,7 +16,8 @@ MICROPROFILE_DEFINE(OpenGL_StreamBuffer, "OpenGL", "Stream Buffer Orphaning", namespace OpenGL { -OGLStreamBuffer::OGLStreamBuffer(GLsizeiptr size, bool vertex_data_usage) : buffer_size(size) { +OGLStreamBuffer::OGLStreamBuffer(const Device& device, GLsizeiptr size, bool vertex_data_usage) + : buffer_size(size) { gl_buffer.Create(); GLsizeiptr allocate_size = size; @@ -32,6 +34,11 @@ OGLStreamBuffer::OGLStreamBuffer(GLsizeiptr size, bool vertex_data_usage) : buff glNamedBufferStorage(gl_buffer.handle, allocate_size, nullptr, flags); mapped_ptr = static_cast( glMapNamedBufferRange(gl_buffer.handle, 0, buffer_size, flags | GL_MAP_FLUSH_EXPLICIT_BIT)); + + if (device.HasVertexBufferUnifiedMemory()) { + glMakeNamedBufferResidentNV(gl_buffer.handle, GL_READ_ONLY); + glGetNamedBufferParameterui64vNV(gl_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, &gpu_address); + } } OGLStreamBuffer::~OGLStreamBuffer() { diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.h b/src/video_core/renderer_opengl/gl_stream_buffer.h index 826c2e361..e67a82980 100644 --- a/src/video_core/renderer_opengl/gl_stream_buffer.h +++ b/src/video_core/renderer_opengl/gl_stream_buffer.h @@ -11,9 +11,11 @@ namespace OpenGL { +class Device; + class OGLStreamBuffer : private NonCopyable { public: - explicit OGLStreamBuffer(GLsizeiptr size, bool vertex_data_usage); + explicit OGLStreamBuffer(const Device& device, GLsizeiptr size, bool vertex_data_usage); ~OGLStreamBuffer(); /* @@ -32,13 +34,18 @@ public: return gl_buffer.handle; } - GLsizeiptr Size() const { + u64 Address() const { + return gpu_address; + } + + GLsizeiptr Size() const noexcept { return buffer_size; } private: OGLBuffer gl_buffer; + GLuint64EXT gpu_address = 0; GLintptr buffer_pos = 0; GLsizeiptr buffer_size = 0; GLintptr mapped_offset = 0; -- cgit v1.2.3 From 41a4090320ee52e914e8b4c789dfe14210794fed Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 18 Jun 2020 03:56:31 -0300 Subject: gl_rasterizer: Use NV_vertex_buffer_unified_memory for vertex buffer robustness Switch games are allowed to bind less data than what they use in a vertex buffer, the expected behavior here is that these values are read as zero. At the moment of writing this only D3D12, OpenGL and NVN through NV_vertex_buffer_unified_memory support vertex buffer with a size limit. In theory this could be emulated on Vulkan creating a new VkBuffer for each (handle, offset, length) tuple and binding the expected data to it. This is likely going to be slow and memory expensive when used on the vertex buffer and we have to do it on all draws because we can't know without analyzing indices when a game is going to read vertex data out of bounds. This is not a problem on OpenGL's BufferAddressRangeNV because it takes a length parameter, unlike Vulkan's CmdBindVertexBuffers that only takes buffers and offsets (the length is implicit in VkBuffer). It isn't a problem on D3D12 either, because D3D12_VERTEX_BUFFER_VIEW on IASetVertexBuffers takes SizeInBytes as a parameter (although I am not familiar with robustness on D3D12). Currently this only implements buffer ranges for vertex buffers, although indices can also be affected. A KHR_robustness profile is not created, but Nvidia's driver reads out of bound vertex data as zero anyway, this might have to be changed in the future. - Fixes SMO random triangles when capturing an enemy, getting hit, or looking at the environment on certain maps. --- src/video_core/renderer_opengl/gl_rasterizer.cpp | 28 +++++++++++++++------- src/video_core/renderer_opengl/renderer_opengl.cpp | 17 ++++++++++++- src/video_core/renderer_opengl/renderer_opengl.h | 3 +++ 3 files changed, 39 insertions(+), 9 deletions(-) (limited to 'src/video_core/renderer_opengl') diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 7cb378a71..362457ffe 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -61,7 +61,8 @@ constexpr std::size_t NUM_CONST_BUFFERS_BYTES_PER_STAGE = constexpr std::size_t TOTAL_CONST_BUFFER_BYTES = NUM_CONST_BUFFERS_BYTES_PER_STAGE * Maxwell::MaxShaderStage; -constexpr std::size_t NumSupportedVertexAttributes = 16; +constexpr std::size_t NUM_SUPPORTED_VERTEX_ATTRIBUTES = 16; +constexpr std::size_t NUM_SUPPORTED_VERTEX_BINDINGS = 16; template Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry, @@ -193,7 +194,7 @@ void RasterizerOpenGL::SetupVertexFormat() { // avoid OpenGL errors. // TODO(Subv): Analyze the shader to identify which attributes are actually used and don't // assume every shader uses them all. - for (std::size_t index = 0; index < NumSupportedVertexAttributes; ++index) { + for (std::size_t index = 0; index < NUM_SUPPORTED_VERTEX_ATTRIBUTES; ++index) { if (!flags[Dirty::VertexFormat0 + index]) { continue; } @@ -231,9 +232,11 @@ void RasterizerOpenGL::SetupVertexBuffer() { MICROPROFILE_SCOPE(OpenGL_VB); + const bool use_unified_memory = device.HasVertexBufferUnifiedMemory(); + // Upload all guest vertex arrays sequentially to our buffer const auto& regs = gpu.regs; - for (std::size_t index = 0; index < Maxwell::NumVertexArrays; ++index) { + for (std::size_t index = 0; index < NUM_SUPPORTED_VERTEX_BINDINGS; ++index) { if (!flags[Dirty::VertexBuffer0 + index]) { continue; } @@ -246,16 +249,25 @@ void RasterizerOpenGL::SetupVertexBuffer() { const GPUVAddr start = vertex_array.StartAddress(); const GPUVAddr end = regs.vertex_array_limit[index].LimitAddress(); - ASSERT(end >= start); + + const GLuint gl_index = static_cast(index); const u64 size = end - start; if (size == 0) { - glBindVertexBuffer(static_cast(index), 0, 0, vertex_array.stride); + glBindVertexBuffer(gl_index, 0, 0, vertex_array.stride); + if (use_unified_memory) { + glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, gl_index, 0, 0); + } continue; } const auto info = buffer_cache.UploadMemory(start, size); - glBindVertexBuffer(static_cast(index), info.handle, info.offset, - vertex_array.stride); + if (use_unified_memory) { + glBindVertexBuffer(gl_index, 0, 0, vertex_array.stride); + glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, gl_index, + info.address + info.offset, size); + } else { + glBindVertexBuffer(gl_index, info.handle, info.offset, vertex_array.stride); + } } } @@ -268,7 +280,7 @@ void RasterizerOpenGL::SetupVertexInstances() { flags[Dirty::VertexInstances] = false; const auto& regs = gpu.regs; - for (std::size_t index = 0; index < NumSupportedVertexAttributes; ++index) { + for (std::size_t index = 0; index < NUM_SUPPORTED_VERTEX_ATTRIBUTES; ++index) { if (!flags[Dirty::VertexInstance0 + index]) { continue; } diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index 6214fcbc3..c40adb6e7 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -488,6 +488,15 @@ void RendererOpenGL::InitOpenGLObjects() { // Clear screen to black LoadColorToActiveGLTexture(0, 0, 0, 0, screen_info.texture); + + // Enable unified vertex attributes and query vertex buffer address when the driver supports it + if (device.HasVertexBufferUnifiedMemory()) { + glEnableClientState(GL_VERTEX_ATTRIB_ARRAY_UNIFIED_NV); + + glMakeNamedBufferResidentNV(vertex_buffer.handle, GL_READ_ONLY); + glGetNamedBufferParameterui64vNV(vertex_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, + &vertex_buffer_address); + } } void RendererOpenGL::AddTelemetryFields() { @@ -656,7 +665,13 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) { offsetof(ScreenRectVertex, tex_coord)); glVertexAttribBinding(PositionLocation, 0); glVertexAttribBinding(TexCoordLocation, 0); - glBindVertexBuffer(0, vertex_buffer.handle, 0, sizeof(ScreenRectVertex)); + if (device.HasVertexBufferUnifiedMemory()) { + glBindVertexBuffer(0, 0, 0, sizeof(ScreenRectVertex)); + glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, 0, vertex_buffer_address, + sizeof(vertices)); + } else { + glBindVertexBuffer(0, vertex_buffer.handle, 0, sizeof(ScreenRectVertex)); + } glBindTextureUnit(0, screen_info.display_texture); glBindSampler(0, 0); diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h index 61bf507f4..8b18d32e6 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.h +++ b/src/video_core/renderer_opengl/renderer_opengl.h @@ -107,6 +107,9 @@ private: OGLPipeline pipeline; OGLFramebuffer screenshot_framebuffer; + // GPU address of the vertex buffer + GLuint64EXT vertex_buffer_address = 0; + /// Display information for Switch screen ScreenInfo screen_info; -- cgit v1.2.3 From 39c97f1b652898dbd0e5e6d028de2ba4b9fa94a0 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 18 Jun 2020 21:53:47 -0300 Subject: gl_stream_buffer: Use InvalidateBufferData instead unmap and map Making the stream buffer resident increases GPU usage significantly on some games. This seems to be addressed invalidating the stream buffer with InvalidateBufferData instead of using a Unmap + Map (with invalidation flags). --- src/video_core/renderer_opengl/gl_stream_buffer.cpp | 19 +++++-------------- src/video_core/renderer_opengl/gl_stream_buffer.h | 1 - 2 files changed, 5 insertions(+), 15 deletions(-) (limited to 'src/video_core/renderer_opengl') diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.cpp b/src/video_core/renderer_opengl/gl_stream_buffer.cpp index 164df4feb..3655ff629 100644 --- a/src/video_core/renderer_opengl/gl_stream_buffer.cpp +++ b/src/video_core/renderer_opengl/gl_stream_buffer.cpp @@ -57,30 +57,21 @@ std::tuple OGLStreamBuffer::Map(GLsizeiptr size, GLintptr a bool invalidate = false; if (buffer_pos + size > buffer_size) { + MICROPROFILE_SCOPE(OpenGL_StreamBuffer); + glInvalidateBufferData(gl_buffer.handle); + buffer_pos = 0; invalidate = true; - - glUnmapNamedBuffer(gl_buffer.handle); - } - - if (invalidate) { - static const GLbitfield flags = GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | - GL_MAP_INVALIDATE_BUFFER_BIT | GL_MAP_FLUSH_EXPLICIT_BIT; - - MICROPROFILE_SCOPE(OpenGL_StreamBuffer); - mapped_ptr = static_cast( - glMapNamedBufferRange(gl_buffer.handle, buffer_pos, buffer_size - buffer_pos, flags)); - mapped_offset = buffer_pos; } - return std::make_tuple(mapped_ptr + buffer_pos - mapped_offset, buffer_pos, invalidate); + return std::make_tuple(mapped_ptr + buffer_pos, buffer_pos, invalidate); } void OGLStreamBuffer::Unmap(GLsizeiptr size) { ASSERT(size <= mapped_size); if (size > 0) { - glFlushMappedNamedBufferRange(gl_buffer.handle, buffer_pos - mapped_offset, size); + glFlushMappedNamedBufferRange(gl_buffer.handle, buffer_pos, size); } buffer_pos += size; diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.h b/src/video_core/renderer_opengl/gl_stream_buffer.h index e67a82980..307a67113 100644 --- a/src/video_core/renderer_opengl/gl_stream_buffer.h +++ b/src/video_core/renderer_opengl/gl_stream_buffer.h @@ -48,7 +48,6 @@ private: GLuint64EXT gpu_address = 0; GLintptr buffer_pos = 0; GLsizeiptr buffer_size = 0; - GLintptr mapped_offset = 0; GLsizeiptr mapped_size = 0; u8* mapped_ptr = nullptr; }; -- cgit v1.2.3 From 32a2dcd4153f4e2aea7b5f88c85d8a352f647f12 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 19 Jun 2020 20:47:48 -0300 Subject: buffer_cache: Use buffer methods instead of cache virtual methods --- src/video_core/renderer_opengl/gl_buffer_cache.cpp | 38 ++++++++++------------ src/video_core/renderer_opengl/gl_buffer_cache.h | 16 ++++----- 2 files changed, 25 insertions(+), 29 deletions(-) (limited to 'src/video_core/renderer_opengl') diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index e09b47f57..d9f7b4cc6 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp @@ -34,6 +34,24 @@ Buffer::Buffer(const Device& device, VAddr cpu_addr, std::size_t size) Buffer::~Buffer() = default; +void Buffer::Upload(std::size_t offset, std::size_t size, const u8* data) const { + glNamedBufferSubData(Handle(), static_cast(offset), static_cast(size), + data); +} + +void Buffer::Download(std::size_t offset, std::size_t size, u8* data) const { + MICROPROFILE_SCOPE(OpenGL_Buffer_Download); + glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT); + glGetNamedBufferSubData(Handle(), static_cast(offset), static_cast(size), + data); +} + +void Buffer::CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset, + std::size_t size) const { + glCopyNamedBufferSubData(src.Handle(), Handle(), static_cast(src_offset), + static_cast(dst_offset), static_cast(size)); +} + OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, Core::System& system, const Device& device_, std::size_t stream_size) : GenericBufferCache{rasterizer, system, @@ -62,26 +80,6 @@ OGLBufferCache::BufferInfo OGLBufferCache::GetEmptyBuffer(std::size_t) { return {0, 0, 0}; } -void OGLBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, - const u8* data) { - glNamedBufferSubData(buffer.Handle(), static_cast(offset), - static_cast(size), data); -} - -void OGLBufferCache::DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, - u8* data) { - MICROPROFILE_SCOPE(OpenGL_Buffer_Download); - glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT); - glGetNamedBufferSubData(buffer.Handle(), static_cast(offset), - static_cast(size), data); -} - -void OGLBufferCache::CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset, - std::size_t dst_offset, std::size_t size) { - glCopyNamedBufferSubData(src.Handle(), dst.Handle(), static_cast(src_offset), - static_cast(dst_offset), static_cast(size)); -} - OGLBufferCache::BufferInfo OGLBufferCache::ConstBufferUpload(const void* raw_pointer, std::size_t size) { DEBUG_ASSERT(cbuf_cursor < std::size(cbufs)); diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h index 6462cfae5..59d95adbc 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.h +++ b/src/video_core/renderer_opengl/gl_buffer_cache.h @@ -28,6 +28,13 @@ public: explicit Buffer(const Device& device, VAddr cpu_addr, std::size_t size); ~Buffer(); + void Upload(std::size_t offset, std::size_t size, const u8* data) const; + + void Download(std::size_t offset, std::size_t size, u8* data) const; + + void CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset, + std::size_t size) const; + GLuint Handle() const noexcept { return gl_buffer.handle; } @@ -57,15 +64,6 @@ public: protected: std::shared_ptr CreateBlock(VAddr cpu_addr, std::size_t size) override; - void UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, - const u8* data) override; - - void DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, - u8* data) override; - - void CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset, - std::size_t dst_offset, std::size_t size) override; - BufferInfo ConstBufferUpload(const void* raw_pointer, std::size_t size) override; private: -- cgit v1.2.3 From bc8d3b8f82c06e5d0b5a7c1640ef00b83e826dbf Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 25 Jun 2020 01:28:45 -0300 Subject: gl_device: Enable NV_vertex_buffer_unified_memory on Turing devices Once we make sure not to corrupt Nvidia's driver, we can safely use resident buffers on Turing devices. See GitHub pull request #4156 --- src/video_core/renderer_opengl/gl_device.cpp | 20 +------------------- 1 file changed, 1 insertion(+), 19 deletions(-) (limited to 'src/video_core/renderer_opengl') diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index 447a19595..bb1375f82 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -188,20 +188,6 @@ bool IsASTCSupported() { return true; } -/// @brief Returns true when a GL_RENDERER is a Turing GPU -/// @param renderer GL_RENDERER string -bool IsTuring(std::string_view renderer) { - static constexpr std::array TURING_GPUS = { - "GTX 1650", "GTX 1660", "RTX 2060", "RTX 2070", - "RTX 2080", "TITAN RTX", "Quadro RTX 3000", "Quadro RTX 4000", - "Quadro RTX 5000", "Quadro RTX 6000", "Quadro RTX 8000", "Tesla T4", - }; - return std::any_of(TURING_GPUS.begin(), TURING_GPUS.end(), - [renderer](std::string_view candidate) { - return renderer.find(candidate) != std::string_view::npos; - }); -} - } // Anonymous namespace Device::Device() @@ -213,7 +199,6 @@ Device::Device() const bool is_nvidia = vendor == "NVIDIA Corporation"; const bool is_amd = vendor == "ATI Technologies Inc."; - const bool is_turing = is_nvidia && IsTuring(renderer); bool disable_fast_buffer_sub_data = false; if (is_nvidia && version == "4.6.0 NVIDIA 443.24") { @@ -238,15 +223,12 @@ Device::Device() has_component_indexing_bug = is_amd; has_precise_bug = TestPreciseBug(); has_nv_viewport_array2 = GLAD_GL_NV_viewport_array2; + has_vertex_buffer_unified_memory = GLAD_GL_NV_vertex_buffer_unified_memory; // At the moment of writing this, only Nvidia's driver optimizes BufferSubData on exclusive // uniform buffers as "push constants" has_fast_buffer_sub_data = is_nvidia && !disable_fast_buffer_sub_data; - // Nvidia's driver on Turing GPUs randomly crashes when the buffer is made resident, or on - // DeleteBuffers. Disable unified memory on these devices. - has_vertex_buffer_unified_memory = GLAD_GL_NV_vertex_buffer_unified_memory && !is_turing; - use_assembly_shaders = Settings::values.use_assembly_shaders && GLAD_GL_NV_gpu_program5 && GLAD_GL_NV_compute_program5 && GLAD_GL_NV_transform_feedback && GLAD_GL_NV_transform_feedback2; -- cgit v1.2.3 From a927d8be52c343bc1025e5df822c56470eb27919 Mon Sep 17 00:00:00 2001 From: David Marcec Date: Thu, 25 Jun 2020 19:12:56 +1000 Subject: gl_device: Fix IsASTCSupported Other targets were never actually checked --- src/video_core/renderer_opengl/gl_device.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/video_core/renderer_opengl') diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index 447a19595..b6b6659c1 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -178,7 +178,7 @@ bool IsASTCSupported() { for (const GLenum format : formats) { for (const GLenum support : required_support) { GLint value; - glGetInternalformativ(GL_TEXTURE_2D, format, support, 1, &value); + glGetInternalformativ(target, format, support, 1, &value); if (value != GL_FULL_SUPPORT) { return false; } -- cgit v1.2.3 From 6481d91e4a5b5fbae899c3a7924af0b132c16bc8 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 26 Jun 2020 16:58:40 -0300 Subject: gl_buffer_cache: Copy to buffers created as STREAM_READ before downloading After marking buffers as resident, Nvidia's driver seems to take a slow path. To workaround this issue, copy to a STREAM_READ buffer and then call GetNamedBufferSubData on it. This is a temporary solution until we have asynchronous flushing. --- src/video_core/renderer_opengl/gl_buffer_cache.cpp | 17 ++++++++++++----- src/video_core/renderer_opengl/gl_buffer_cache.h | 7 ++++--- 2 files changed, 16 insertions(+), 8 deletions(-) (limited to 'src/video_core/renderer_opengl') diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index d9f7b4cc6..e461e4c70 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp @@ -34,20 +34,27 @@ Buffer::Buffer(const Device& device, VAddr cpu_addr, std::size_t size) Buffer::~Buffer() = default; -void Buffer::Upload(std::size_t offset, std::size_t size, const u8* data) const { +void Buffer::Upload(std::size_t offset, std::size_t size, const u8* data) { glNamedBufferSubData(Handle(), static_cast(offset), static_cast(size), data); } -void Buffer::Download(std::size_t offset, std::size_t size, u8* data) const { +void Buffer::Download(std::size_t offset, std::size_t size, u8* data) { MICROPROFILE_SCOPE(OpenGL_Buffer_Download); + const GLsizeiptr gl_size = static_cast(size); + const GLintptr gl_offset = static_cast(offset); + if (read_buffer.handle == 0) { + read_buffer.Create(); + glNamedBufferData(read_buffer.handle, static_cast(Size()), nullptr, + GL_STREAM_READ); + } glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT); - glGetNamedBufferSubData(Handle(), static_cast(offset), static_cast(size), - data); + glCopyNamedBufferSubData(gl_buffer.handle, read_buffer.handle, gl_offset, gl_offset, gl_size); + glGetNamedBufferSubData(read_buffer.handle, gl_offset, gl_size, data); } void Buffer::CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset, - std::size_t size) const { + std::size_t size) { glCopyNamedBufferSubData(src.Handle(), Handle(), static_cast(src_offset), static_cast(dst_offset), static_cast(size)); } diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h index 59d95adbc..88fdc0536 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.h +++ b/src/video_core/renderer_opengl/gl_buffer_cache.h @@ -28,12 +28,12 @@ public: explicit Buffer(const Device& device, VAddr cpu_addr, std::size_t size); ~Buffer(); - void Upload(std::size_t offset, std::size_t size, const u8* data) const; + void Upload(std::size_t offset, std::size_t size, const u8* data); - void Download(std::size_t offset, std::size_t size, u8* data) const; + void Download(std::size_t offset, std::size_t size, u8* data); void CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset, - std::size_t size) const; + std::size_t size); GLuint Handle() const noexcept { return gl_buffer.handle; @@ -45,6 +45,7 @@ public: private: OGLBuffer gl_buffer; + OGLBuffer read_buffer; u64 gpu_address = 0; }; -- cgit v1.2.3 From 78d80d99a07893b79cecefbb613bf326c8e783eb Mon Sep 17 00:00:00 2001 From: Morph <39850852+Morph1984@users.noreply.github.com> Date: Sun, 28 Jun 2020 02:48:14 -0400 Subject: maxwell_to_gl: Add 32 bit component sizes to (un)signed scaled formats Add 32 bit component sizes to (un)signed scaled formats and group (un)signed normalized, scaled, and integer formats together. --- src/video_core/renderer_opengl/maxwell_to_gl.h | 34 +++----------------------- 1 file changed, 4 insertions(+), 30 deletions(-) (limited to 'src/video_core/renderer_opengl') diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h index 35e329240..8f3871e90 100644 --- a/src/video_core/renderer_opengl/maxwell_to_gl.h +++ b/src/video_core/renderer_opengl/maxwell_to_gl.h @@ -26,8 +26,9 @@ using Maxwell = Tegra::Engines::Maxwell3D::Regs; inline GLenum VertexType(Maxwell::VertexAttribute attrib) { switch (attrib.type) { - case Maxwell::VertexAttribute::Type::UnsignedInt: case Maxwell::VertexAttribute::Type::UnsignedNorm: + case Maxwell::VertexAttribute::Type::UnsignedScaled: + case Maxwell::VertexAttribute::Type::UnsignedInt: switch (attrib.size) { case Maxwell::VertexAttribute::Size::Size_8: case Maxwell::VertexAttribute::Size::Size_8_8: @@ -48,8 +49,9 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) { return GL_UNSIGNED_INT_2_10_10_10_REV; } break; - case Maxwell::VertexAttribute::Type::SignedInt: case Maxwell::VertexAttribute::Type::SignedNorm: + case Maxwell::VertexAttribute::Type::SignedScaled: + case Maxwell::VertexAttribute::Type::SignedInt: switch (attrib.size) { case Maxwell::VertexAttribute::Size::Size_8: case Maxwell::VertexAttribute::Size::Size_8_8: @@ -84,34 +86,6 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) { return GL_FLOAT; } break; - case Maxwell::VertexAttribute::Type::UnsignedScaled: - switch (attrib.size) { - case Maxwell::VertexAttribute::Size::Size_8: - case Maxwell::VertexAttribute::Size::Size_8_8: - case Maxwell::VertexAttribute::Size::Size_8_8_8: - case Maxwell::VertexAttribute::Size::Size_8_8_8_8: - return GL_UNSIGNED_BYTE; - case Maxwell::VertexAttribute::Size::Size_16: - case Maxwell::VertexAttribute::Size::Size_16_16: - case Maxwell::VertexAttribute::Size::Size_16_16_16: - case Maxwell::VertexAttribute::Size::Size_16_16_16_16: - return GL_UNSIGNED_SHORT; - } - break; - case Maxwell::VertexAttribute::Type::SignedScaled: - switch (attrib.size) { - case Maxwell::VertexAttribute::Size::Size_8: - case Maxwell::VertexAttribute::Size::Size_8_8: - case Maxwell::VertexAttribute::Size::Size_8_8_8: - case Maxwell::VertexAttribute::Size::Size_8_8_8_8: - return GL_BYTE; - case Maxwell::VertexAttribute::Size::Size_16: - case Maxwell::VertexAttribute::Size::Size_16_16: - case Maxwell::VertexAttribute::Size::Size_16_16_16: - case Maxwell::VertexAttribute::Size::Size_16_16_16_16: - return GL_SHORT; - } - break; } UNIMPLEMENTED_MSG("Unimplemented vertex type={} and size={}", attrib.TypeString(), attrib.SizeString()); -- cgit v1.2.3 From 10eca7f651d0dc407c7c4076d11e0b960d9dedd4 Mon Sep 17 00:00:00 2001 From: Morph <39850852+Morph1984@users.noreply.github.com> Date: Mon, 29 Jun 2020 11:48:38 -0400 Subject: maxwell_to_gl: Rename VertexType() to VertexFormat() --- src/video_core/renderer_opengl/gl_rasterizer.cpp | 5 +++-- src/video_core/renderer_opengl/maxwell_to_gl.h | 4 ++-- 2 files changed, 5 insertions(+), 4 deletions(-) (limited to 'src/video_core/renderer_opengl') diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 362457ffe..e960a0ef1 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -213,9 +213,10 @@ void RasterizerOpenGL::SetupVertexFormat() { if (attrib.type == Maxwell::VertexAttribute::Type::SignedInt || attrib.type == Maxwell::VertexAttribute::Type::UnsignedInt) { glVertexAttribIFormat(gl_index, attrib.ComponentCount(), - MaxwellToGL::VertexType(attrib), attrib.offset); + MaxwellToGL::VertexFormat(attrib), attrib.offset); } else { - glVertexAttribFormat(gl_index, attrib.ComponentCount(), MaxwellToGL::VertexType(attrib), + glVertexAttribFormat(gl_index, attrib.ComponentCount(), + MaxwellToGL::VertexFormat(attrib), attrib.IsNormalized() ? GL_TRUE : GL_FALSE, attrib.offset); } glVertexAttribBinding(gl_index, attrib.buffer); diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h index 8f3871e90..774e70a5b 100644 --- a/src/video_core/renderer_opengl/maxwell_to_gl.h +++ b/src/video_core/renderer_opengl/maxwell_to_gl.h @@ -24,7 +24,7 @@ namespace MaxwellToGL { using Maxwell = Tegra::Engines::Maxwell3D::Regs; -inline GLenum VertexType(Maxwell::VertexAttribute attrib) { +inline GLenum VertexFormat(Maxwell::VertexAttribute attrib) { switch (attrib.type) { case Maxwell::VertexAttribute::Type::UnsignedNorm: case Maxwell::VertexAttribute::Type::UnsignedScaled: @@ -87,7 +87,7 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) { } break; } - UNIMPLEMENTED_MSG("Unimplemented vertex type={} and size={}", attrib.TypeString(), + UNIMPLEMENTED_MSG("Unimplemented vertex format of type={} and size={}", attrib.TypeString(), attrib.SizeString()); return {}; } -- cgit v1.2.3 From 1b31755ba6eb3940d2ec0661337ef21913f9a756 Mon Sep 17 00:00:00 2001 From: Morph <39850852+Morph1984@users.noreply.github.com> Date: Sat, 13 Jun 2020 11:21:27 -0400 Subject: maxwell_to_gl: Implement MirrorOnceClampOGL using GL_MIRROR_CLAMP_EXT Like MirrorOnceBorder, this requires the GL_EXT_texture_mirror_clamp extension. This extension is unfortunately not available on Intel's drivers (both Windows proprietary and Linux Mesa). Use GL_MIRROR_CLAMP_TO_EDGE as a fallback if the extension is unavailable. --- src/video_core/renderer_opengl/maxwell_to_gl.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'src/video_core/renderer_opengl') diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h index 774e70a5b..fe9bd4b5a 100644 --- a/src/video_core/renderer_opengl/maxwell_to_gl.h +++ b/src/video_core/renderer_opengl/maxwell_to_gl.h @@ -191,6 +191,12 @@ inline GLenum WrapMode(Tegra::Texture::WrapMode wrap_mode) { } else { return GL_MIRROR_CLAMP_TO_EDGE; } + case Tegra::Texture::WrapMode::MirrorOnceClampOGL: + if (GL_EXT_texture_mirror_clamp) { + return GL_MIRROR_CLAMP_EXT; + } else { + return GL_MIRROR_CLAMP_TO_EDGE; + } } UNIMPLEMENTED_MSG("Unimplemented texture wrap mode={}", static_cast(wrap_mode)); return GL_REPEAT; -- cgit v1.2.3