From cb0a41090705f974d5bec009c571344bf72aa375 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sat, 27 May 2023 22:04:16 -0400 Subject: gl_staging_buffers: Optimization to reduce fence waiting --- .../renderer_opengl/gl_staging_buffer_pool.cpp | 24 ++++++++++++++++++---- .../renderer_opengl/gl_staging_buffer_pool.h | 2 ++ 2 files changed, 22 insertions(+), 4 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_staging_buffer_pool.cpp b/src/video_core/renderer_opengl/gl_staging_buffer_pool.cpp index 72b1dbb32..bbb06e51f 100644 --- a/src/video_core/renderer_opengl/gl_staging_buffer_pool.cpp +++ b/src/video_core/renderer_opengl/gl_staging_buffer_pool.cpp @@ -9,8 +9,12 @@ #include "common/alignment.h" #include "common/assert.h" +#include "common/bit_util.h" +#include "common/microprofile.h" #include "video_core/renderer_opengl/gl_staging_buffer_pool.h" +MICROPROFILE_DEFINE(OpenGL_BufferRequest, "OpenGL", "BufferRequest", MP_RGB(128, 128, 192)); + namespace OpenGL { StagingBufferMap::~StagingBufferMap() { @@ -25,8 +29,11 @@ StagingBuffers::StagingBuffers(GLenum storage_flags_, GLenum map_flags_) StagingBuffers::~StagingBuffers() = default; StagingBufferMap StagingBuffers::RequestMap(size_t requested_size, bool insert_fence) { + MICROPROFILE_SCOPE(OpenGL_BufferRequest); + const size_t index = RequestBuffer(requested_size); OGLSync* const sync = insert_fence ? &syncs[index] : nullptr; + sync_indices[index] = insert_fence ? ++current_sync_index : 0; return StagingBufferMap{ .mapped_span = std::span(maps[index], requested_size), .sync = sync, @@ -41,13 +48,14 @@ size_t StagingBuffers::RequestBuffer(size_t requested_size) { OGLBuffer& buffer = buffers.emplace_back(); buffer.Create(); - glNamedBufferStorage(buffer.handle, requested_size, nullptr, + const auto next_pow2_size = Common::NextPow2(requested_size); + glNamedBufferStorage(buffer.handle, next_pow2_size, nullptr, storage_flags | GL_MAP_PERSISTENT_BIT); - maps.push_back(static_cast(glMapNamedBufferRange(buffer.handle, 0, requested_size, + maps.push_back(static_cast(glMapNamedBufferRange(buffer.handle, 0, next_pow2_size, map_flags | GL_MAP_PERSISTENT_BIT))); - syncs.emplace_back(); - sizes.push_back(requested_size); + sync_indices.emplace_back(); + sizes.push_back(next_pow2_size); ASSERT(syncs.size() == buffers.size() && buffers.size() == maps.size() && maps.size() == sizes.size()); @@ -56,6 +64,7 @@ size_t StagingBuffers::RequestBuffer(size_t requested_size) { } std::optional StagingBuffers::FindBuffer(size_t requested_size) { + size_t known_unsignaled_index = current_sync_index + 1; size_t smallest_buffer = std::numeric_limits::max(); std::optional found; const size_t num_buffers = sizes.size(); @@ -65,7 +74,14 @@ std::optional StagingBuffers::FindBuffer(size_t requested_size) { continue; } if (syncs[index].handle != 0) { + if (sync_indices[index] >= known_unsignaled_index) { + // This fence is later than a fence that is known to not be signaled + continue; + } if (!syncs[index].IsSignaled()) { + // Since this fence hasn't been signaled, it's safe to assume all later + // fences haven't been signaled either + known_unsignaled_index = std::min(known_unsignaled_index, sync_indices[index]); continue; } syncs[index].Release(); diff --git a/src/video_core/renderer_opengl/gl_staging_buffer_pool.h b/src/video_core/renderer_opengl/gl_staging_buffer_pool.h index 2c467be3d..60f72d3a0 100644 --- a/src/video_core/renderer_opengl/gl_staging_buffer_pool.h +++ b/src/video_core/renderer_opengl/gl_staging_buffer_pool.h @@ -42,8 +42,10 @@ struct StagingBuffers { std::vector buffers; std::vector maps; std::vector sizes; + std::vector sync_indices; GLenum storage_flags; GLenum map_flags; + size_t current_sync_index = 0; }; class StreamBuffer { -- cgit v1.2.3