diff options
Diffstat (limited to 'src/video_core/renderer_opengl')
8 files changed, 106 insertions, 60 deletions
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index 9d5209e97..e6c70fb34 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp @@ -145,8 +145,12 @@ StagingBufferMap BufferCacheRuntime::UploadStagingBuffer(size_t size) { return staging_buffer_pool.RequestUploadBuffer(size); } -StagingBufferMap BufferCacheRuntime::DownloadStagingBuffer(size_t size) { - return staging_buffer_pool.RequestDownloadBuffer(size); +StagingBufferMap BufferCacheRuntime::DownloadStagingBuffer(size_t size, bool deferred) { + return staging_buffer_pool.RequestDownloadBuffer(size, deferred); +} + +void BufferCacheRuntime::FreeDeferredStagingBuffer(StagingBufferMap& buffer) { + staging_buffer_pool.FreeDeferredStagingBuffer(buffer); } u64 BufferCacheRuntime::GetDeviceMemoryUsage() const { @@ -177,13 +181,14 @@ void BufferCacheRuntime::CopyBuffer(GLuint dst_buffer, Buffer& src_buffer, } void BufferCacheRuntime::CopyBuffer(Buffer& dst_buffer, GLuint src_buffer, - std::span<const VideoCommon::BufferCopy> copies, bool barrier) { + std::span<const VideoCommon::BufferCopy> copies, bool barrier, + bool) { CopyBuffer(dst_buffer.Handle(), src_buffer, copies, barrier); } void BufferCacheRuntime::CopyBuffer(Buffer& dst_buffer, Buffer& src_buffer, - std::span<const VideoCommon::BufferCopy> copies) { - CopyBuffer(dst_buffer.Handle(), src_buffer.Handle(), copies); + std::span<const VideoCommon::BufferCopy> copies, bool) { + CopyBuffer(dst_buffer.Handle(), src_buffer.Handle(), copies, true); } void BufferCacheRuntime::PreCopyBarrier() { diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h index 8613037eb..71cd45d35 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.h +++ b/src/video_core/renderer_opengl/gl_buffer_cache.h @@ -30,6 +30,8 @@ public: void MakeResident(GLenum access) noexcept; + void MarkUsage(u64 offset, u64 size) {} + [[nodiscard]] GLuint View(u32 offset, u32 size, VideoCore::Surface::PixelFormat format); [[nodiscard]] GLuint64EXT HostGpuAddr() const noexcept { @@ -64,24 +66,33 @@ public: [[nodiscard]] StagingBufferMap UploadStagingBuffer(size_t size); - [[nodiscard]] StagingBufferMap DownloadStagingBuffer(size_t size); + [[nodiscard]] StagingBufferMap DownloadStagingBuffer(size_t size, bool deferred = false); + + void FreeDeferredStagingBuffer(StagingBufferMap& buffer); + + bool CanReorderUpload(const Buffer&, std::span<const VideoCommon::BufferCopy>) { + return false; + } void CopyBuffer(GLuint dst_buffer, GLuint src_buffer, - std::span<const VideoCommon::BufferCopy> copies, bool barrier = true); + std::span<const VideoCommon::BufferCopy> copies, bool barrier); void CopyBuffer(GLuint dst_buffer, Buffer& src_buffer, - std::span<const VideoCommon::BufferCopy> copies, bool barrier = true); + std::span<const VideoCommon::BufferCopy> copies, bool barrier); void CopyBuffer(Buffer& dst_buffer, GLuint src_buffer, - std::span<const VideoCommon::BufferCopy> copies, bool barrier = true); + std::span<const VideoCommon::BufferCopy> copies, bool barrier, + bool can_reorder_upload = false); void CopyBuffer(Buffer& dst_buffer, Buffer& src_buffer, - std::span<const VideoCommon::BufferCopy> copies); + std::span<const VideoCommon::BufferCopy> copies, bool); void PreCopyBarrier(); void PostCopyBarrier(); void Finish(); + void TickFrame(VideoCommon::SlotVector<Buffer>&) noexcept {} + void ClearBuffer(Buffer& dest_buffer, u32 offset, size_t size, u32 value); void BindIndexBuffer(Buffer& buffer, u32 offset, u32 size); @@ -182,6 +193,10 @@ public: return device.CanReportMemoryUsage(); } + u32 GetStorageBufferAlignment() const { + return static_cast<u32>(device.GetShaderStorageBufferAlignment()); + } + private: static constexpr std::array PABO_LUT{ GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV, @@ -232,7 +247,7 @@ struct BufferCacheParams { static constexpr bool NEEDS_BIND_STORAGE_INDEX = true; static constexpr bool USE_MEMORY_MAPS = true; static constexpr bool SEPARATE_IMAGE_BUFFER_BINDINGS = true; - static constexpr bool IMPLEMENTS_ASYNC_DOWNLOADS = false; + static constexpr bool IMPLEMENTS_ASYNC_DOWNLOADS = true; // TODO: Investigate why OpenGL seems to perform worse with persistently mapped buffer uploads static constexpr bool USE_MEMORY_MAPS_FOR_UPLOADS = false; diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index 46d88c664..a6c93068f 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -264,33 +264,33 @@ std::string Device::GetVendorName() const { if (vendor_name == "Intel") { // For Mesa, `Intel` is an overloaded vendor string that could mean crocus or iris. // Simply return `INTEL` for those as well as the Windows driver. - return "INTEL"; + return "Intel"; } if (vendor_name == "Intel Open Source Technology Center") { - return "I965"; + return "i965"; } if (vendor_name == "Mesa Project") { - return "I915"; + return "i915"; } if (vendor_name == "Mesa/X.org") { // This vendor string is overloaded between llvmpipe, softpipe, and virgl, so just return // MESA instead of one of those driver names. - return "MESA"; + return "Mesa"; } if (vendor_name == "AMD") { - return "RADEONSI"; + return "RadeonSI"; } if (vendor_name == "nouveau") { - return "NOUVEAU"; + return "Nouveau"; } if (vendor_name == "X.Org") { return "R600"; } if (vendor_name == "Collabora Ltd") { - return "ZINK"; + return "Zink"; } if (vendor_name == "Intel Corporation") { - return "OPENSWR"; + return "OpenSWR"; } if (vendor_name == "Microsoft Corporation") { return "D3D12"; @@ -299,7 +299,7 @@ std::string Device::GetVendorName() const { // Mesa's tegra driver reports `NVIDIA`. Only present in this list because the default // strategy would have returned `NVIDIA` here for this driver, the same result as the // proprietary driver. - return "TEGRA"; + return "Tegra"; } return vendor_name; } diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 2888e0238..26f2d0ea7 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -232,6 +232,7 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo .has_gl_bool_ref_bug = device.HasBoolRefBug(), .ignore_nan_fp_comparisons = true, .gl_max_compute_smem_size = device.GetMaxComputeSharedMemorySize(), + .min_ssbo_alignment = device.GetShaderStorageBufferAlignment(), }, host_info{ .support_float64 = true, @@ -240,6 +241,7 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo .needs_demote_reorder = device.IsAmd(), .support_snorm_render_buffer = false, .support_viewport_index_layer = device.HasVertexViewportLayer(), + .min_ssbo_alignment = static_cast<u32>(device.GetShaderStorageBufferAlignment()), .support_geometry_shader_passthrough = device.HasGeometryShaderPassthrough(), .support_conditional_barrier = device.SupportsConditionalBarriers(), } { diff --git a/src/video_core/renderer_opengl/gl_staging_buffer_pool.cpp b/src/video_core/renderer_opengl/gl_staging_buffer_pool.cpp index bbb06e51f..cadad6507 100644 --- a/src/video_core/renderer_opengl/gl_staging_buffer_pool.cpp +++ b/src/video_core/renderer_opengl/gl_staging_buffer_pool.cpp @@ -28,63 +28,69 @@ StagingBuffers::StagingBuffers(GLenum storage_flags_, GLenum map_flags_) StagingBuffers::~StagingBuffers() = default; -StagingBufferMap StagingBuffers::RequestMap(size_t requested_size, bool insert_fence) { +StagingBufferMap StagingBuffers::RequestMap(size_t requested_size, bool insert_fence, + bool deferred) { MICROPROFILE_SCOPE(OpenGL_BufferRequest); const size_t index = RequestBuffer(requested_size); - OGLSync* const sync = insert_fence ? &syncs[index] : nullptr; - sync_indices[index] = insert_fence ? ++current_sync_index : 0; + OGLSync* const sync = insert_fence ? &allocs[index].sync : nullptr; + allocs[index].sync_index = insert_fence ? ++current_sync_index : 0; + allocs[index].deferred = deferred; return StagingBufferMap{ - .mapped_span = std::span(maps[index], requested_size), + .mapped_span = std::span(allocs[index].map, requested_size), .sync = sync, - .buffer = buffers[index].handle, + .buffer = allocs[index].buffer.handle, + .index = index, }; } +void StagingBuffers::FreeDeferredStagingBuffer(size_t index) { + ASSERT(allocs[index].deferred); + allocs[index].deferred = false; +} + size_t StagingBuffers::RequestBuffer(size_t requested_size) { if (const std::optional<size_t> index = FindBuffer(requested_size); index) { return *index; } - - OGLBuffer& buffer = buffers.emplace_back(); - buffer.Create(); + StagingBufferAlloc alloc; + alloc.buffer.Create(); const auto next_pow2_size = Common::NextPow2(requested_size); - glNamedBufferStorage(buffer.handle, next_pow2_size, nullptr, + glNamedBufferStorage(alloc.buffer.handle, next_pow2_size, nullptr, storage_flags | GL_MAP_PERSISTENT_BIT); - maps.push_back(static_cast<u8*>(glMapNamedBufferRange(buffer.handle, 0, next_pow2_size, - map_flags | GL_MAP_PERSISTENT_BIT))); - syncs.emplace_back(); - sync_indices.emplace_back(); - sizes.push_back(next_pow2_size); - - ASSERT(syncs.size() == buffers.size() && buffers.size() == maps.size() && - maps.size() == sizes.size()); - - return buffers.size() - 1; + alloc.map = static_cast<u8*>(glMapNamedBufferRange(alloc.buffer.handle, 0, next_pow2_size, + map_flags | GL_MAP_PERSISTENT_BIT)); + alloc.size = next_pow2_size; + allocs.emplace_back(std::move(alloc)); + return allocs.size() - 1; } std::optional<size_t> StagingBuffers::FindBuffer(size_t requested_size) { size_t known_unsignaled_index = current_sync_index + 1; size_t smallest_buffer = std::numeric_limits<size_t>::max(); std::optional<size_t> found; - const size_t num_buffers = sizes.size(); + const size_t num_buffers = allocs.size(); for (size_t index = 0; index < num_buffers; ++index) { - const size_t buffer_size = sizes[index]; + StagingBufferAlloc& alloc = allocs[index]; + const size_t buffer_size = alloc.size; if (buffer_size < requested_size || buffer_size >= smallest_buffer) { continue; } - if (syncs[index].handle != 0) { - if (sync_indices[index] >= known_unsignaled_index) { + if (alloc.deferred) { + continue; + } + if (alloc.sync.handle != 0) { + if (alloc.sync_index >= known_unsignaled_index) { // This fence is later than a fence that is known to not be signaled continue; } - if (!syncs[index].IsSignaled()) { + if (!alloc.sync.IsSignaled()) { // Since this fence hasn't been signaled, it's safe to assume all later // fences haven't been signaled either - known_unsignaled_index = std::min(known_unsignaled_index, sync_indices[index]); + known_unsignaled_index = std::min(known_unsignaled_index, alloc.sync_index); continue; } - syncs[index].Release(); + alloc.sync.Release(); } smallest_buffer = buffer_size; found = index; @@ -143,8 +149,12 @@ StagingBufferMap StagingBufferPool::RequestUploadBuffer(size_t size) { return upload_buffers.RequestMap(size, true); } -StagingBufferMap StagingBufferPool::RequestDownloadBuffer(size_t size) { - return download_buffers.RequestMap(size, false); +StagingBufferMap StagingBufferPool::RequestDownloadBuffer(size_t size, bool deferred) { + return download_buffers.RequestMap(size, false, deferred); +} + +void StagingBufferPool::FreeDeferredStagingBuffer(StagingBufferMap& buffer) { + download_buffers.FreeDeferredStagingBuffer(buffer.index); } } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_staging_buffer_pool.h b/src/video_core/renderer_opengl/gl_staging_buffer_pool.h index 60f72d3a0..07a56b4d2 100644 --- a/src/video_core/renderer_opengl/gl_staging_buffer_pool.h +++ b/src/video_core/renderer_opengl/gl_staging_buffer_pool.h @@ -26,23 +26,30 @@ struct StagingBufferMap { size_t offset = 0; OGLSync* sync; GLuint buffer; + size_t index; }; struct StagingBuffers { explicit StagingBuffers(GLenum storage_flags_, GLenum map_flags_); ~StagingBuffers(); - StagingBufferMap RequestMap(size_t requested_size, bool insert_fence); + StagingBufferMap RequestMap(size_t requested_size, bool insert_fence, bool deferred = false); + + void FreeDeferredStagingBuffer(size_t index); size_t RequestBuffer(size_t requested_size); std::optional<size_t> FindBuffer(size_t requested_size); - std::vector<OGLSync> syncs; - std::vector<OGLBuffer> buffers; - std::vector<u8*> maps; - std::vector<size_t> sizes; - std::vector<size_t> sync_indices; + struct StagingBufferAlloc { + OGLSync sync; + OGLBuffer buffer; + u8* map; + size_t size; + size_t sync_index; + bool deferred; + }; + std::vector<StagingBufferAlloc> allocs; GLenum storage_flags; GLenum map_flags; size_t current_sync_index = 0; @@ -85,7 +92,8 @@ public: ~StagingBufferPool() = default; StagingBufferMap RequestUploadBuffer(size_t size); - StagingBufferMap RequestDownloadBuffer(size_t size); + StagingBufferMap RequestDownloadBuffer(size_t size, bool deferred = false); + void FreeDeferredStagingBuffer(StagingBufferMap& buffer); private: StagingBuffers upload_buffers{GL_MAP_WRITE_BIT, GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT}; diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 512eef575..66a5ca03e 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -557,8 +557,12 @@ StagingBufferMap TextureCacheRuntime::UploadStagingBuffer(size_t size) { return staging_buffer_pool.RequestUploadBuffer(size); } -StagingBufferMap TextureCacheRuntime::DownloadStagingBuffer(size_t size) { - return staging_buffer_pool.RequestDownloadBuffer(size); +StagingBufferMap TextureCacheRuntime::DownloadStagingBuffer(size_t size, bool deferred) { + return staging_buffer_pool.RequestDownloadBuffer(size, deferred); +} + +void TextureCacheRuntime::FreeDeferredStagingBuffer(StagingBufferMap& buffer) { + staging_buffer_pool.FreeDeferredStagingBuffer(buffer); } u64 TextureCacheRuntime::GetDeviceMemoryUsage() const { diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index e71b87e99..34870c81f 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -74,7 +74,9 @@ public: StagingBufferMap UploadStagingBuffer(size_t size); - StagingBufferMap DownloadStagingBuffer(size_t size); + StagingBufferMap DownloadStagingBuffer(size_t size, bool deferred = false); + + void FreeDeferredStagingBuffer(StagingBufferMap& buffer); u64 GetDeviceLocalMemory() const { return device_access_memory; @@ -359,7 +361,7 @@ struct TextureCacheParams { static constexpr bool FRAMEBUFFER_BLITS = true; static constexpr bool HAS_EMULATED_COPIES = true; static constexpr bool HAS_DEVICE_MEMORY_INFO = true; - static constexpr bool IMPLEMENTS_ASYNC_DOWNLOADS = false; + static constexpr bool IMPLEMENTS_ASYNC_DOWNLOADS = true; using Runtime = OpenGL::TextureCacheRuntime; using Image = OpenGL::Image; @@ -367,7 +369,7 @@ struct TextureCacheParams { using ImageView = OpenGL::ImageView; using Sampler = OpenGL::Sampler; using Framebuffer = OpenGL::Framebuffer; - using AsyncBuffer = u32; + using AsyncBuffer = OpenGL::StagingBufferMap; using BufferType = GLuint; }; |