From 82c2601555b59a94d7160f2fd686cb63d32dd423 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 16 Jan 2021 20:48:58 -0300 Subject: video_core: Reimplement the buffer cache Reimplement the buffer cache using cached bindings and page level granularity for modification tracking. This also drops the usage of shared pointers and virtual functions from the cache. - Bindings are cached, allowing to skip work when the game changes few bits between draws. - OpenGL Assembly shaders no longer copy when a region has been modified from the GPU to emulate constant buffers, instead GL_EXT_memory_object is used to alias sub-buffers within the same allocation. - OpenGL Assembly shaders stream constant buffer data using glProgramBufferParametersIuivNV, from NV_parameter_buffer_object. In theory this should save one hash table resolve inside the driver compared to glBufferSubData. - A new OpenGL stream buffer is implemented based on fences for drivers that are not Nvidia's proprietary, due to their low performance on partial glBufferSubData calls synchronized with 3D rendering (that some games use a lot). - Most optimizations are shared between APIs now, allowing Vulkan to cache more bindings than before, skipping unnecesarry work. This commit adds the necessary infrastructure to use Vulkan object from OpenGL. Overall, it improves performance and fixes some bugs present on the old cache. There are still some edge cases hit by some games that harm performance on some vendors, this are planned to be fixed in later commits. --- src/video_core/renderer_opengl/gl_buffer_cache.h | 168 +++++++++++++++++------ 1 file changed, 128 insertions(+), 40 deletions(-) (limited to 'src/video_core/renderer_opengl/gl_buffer_cache.h') diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h index 17ee90316..f4d8871a9 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.h +++ b/src/video_core/renderer_opengl/gl_buffer_cache.h @@ -5,79 +5,167 @@ #pragma once #include -#include +#include +#include "common/alignment.h" #include "common/common_types.h" +#include "common/dynamic_library.h" #include "video_core/buffer_cache/buffer_cache.h" -#include "video_core/engines/maxwell_3d.h" +#include "video_core/rasterizer_interface.h" +#include "video_core/renderer_opengl/gl_device.h" #include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/gl_stream_buffer.h" +#include "video_core/vulkan_common/vulkan_device.h" +#include "video_core/vulkan_common/vulkan_memory_allocator.h" -namespace Core { -class System; -} +namespace Vulkan { +class Device; +class MemoryAllocator; +} // namespace Vulkan namespace OpenGL { -class Device; -class OGLStreamBuffer; -class RasterizerOpenGL; -class StateTracker; +class BufferCacheRuntime; -class Buffer : public VideoCommon::BufferBlock { +class Buffer : public VideoCommon::BufferBase { public: - explicit Buffer(const Device& device_, VAddr cpu_addr_, std::size_t size_); - ~Buffer(); + explicit Buffer(BufferCacheRuntime&, VideoCore::RasterizerInterface& rasterizer, VAddr cpu_addr, + u64 size_bytes); + explicit Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams); - void Upload(std::size_t offset, std::size_t data_size, const u8* data); + void ImmediateUpload(size_t offset, std::span data) noexcept; - void Download(std::size_t offset, std::size_t data_size, u8* data); + void ImmediateDownload(size_t offset, std::span data) noexcept; - void CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset, - std::size_t copy_size); + void MakeResident(GLenum access) noexcept; - GLuint Handle() const noexcept { - return gl_buffer.handle; + [[nodiscard]] GLuint SubBuffer(u32 offset); + + [[nodiscard]] GLuint64EXT HostGpuAddr() const noexcept { + return address; } - u64 Address() const noexcept { - return gpu_address; + [[nodiscard]] GLuint Handle() const noexcept { + return buffer.handle; } private: - OGLBuffer gl_buffer; - OGLBuffer read_buffer; - u64 gpu_address = 0; + void CreateMemoryObjects(BufferCacheRuntime& runtime); + + GLuint64EXT address = 0; + Vulkan::MemoryCommit memory_commit; + OGLBuffer buffer; + GLenum current_residency_access = GL_NONE; + std::vector> subs; }; -using GenericBufferCache = VideoCommon::BufferCache; -class OGLBufferCache final : public GenericBufferCache { +class BufferCacheRuntime { + friend Buffer; + public: - explicit OGLBufferCache(VideoCore::RasterizerInterface& rasterizer, - Tegra::MemoryManager& gpu_memory, Core::Memory::Memory& cpu_memory, - const Device& device, OGLStreamBuffer& stream_buffer, - StateTracker& state_tracker); - ~OGLBufferCache(); + static constexpr u8 INVALID_BINDING = std::numeric_limits::max(); + + explicit BufferCacheRuntime(const Device& device_, const Vulkan::Device* vulkan_device_, + Vulkan::MemoryAllocator* vulkan_memory_allocator_); + + void CopyBuffer(Buffer& dst_buffer, Buffer& src_buffer, + std::span copies); + + void BindIndexBuffer(Buffer& buffer, u32 offset, u32 size); + + void BindVertexBuffer(u32 index, Buffer& buffer, u32 offset, u32 size, u32 stride); + + void BindUniformBuffer(size_t stage, u32 binding_index, Buffer& buffer, u32 offset, u32 size); + + void BindComputeUniformBuffer(u32 binding_index, Buffer& buffer, u32 offset, u32 size); + + void BindStorageBuffer(size_t stage, u32 binding_index, Buffer& buffer, u32 offset, u32 size, + bool is_written); + + void BindComputeStorageBuffer(u32 binding_index, Buffer& buffer, u32 offset, u32 size, + bool is_written); - BufferInfo GetEmptyBuffer(std::size_t) override; + void BindTransformFeedbackBuffer(u32 index, Buffer& buffer, u32 offset, u32 size); - void Acquire() noexcept { - cbuf_cursor = 0; + void BindFastUniformBuffer(size_t stage, u32 binding_index, u32 size) { + if (use_assembly_shaders) { + const GLuint handle = fast_uniforms[stage][binding_index].handle; + const GLsizeiptr gl_size = static_cast(size); + glBindBufferRangeNV(PABO_LUT[stage], binding_index, handle, 0, gl_size); + } else { + const GLuint base_binding = device.GetBaseBindings(stage).uniform_buffer; + const GLuint binding = base_binding + binding_index; + glBindBufferRange(GL_UNIFORM_BUFFER, binding, + fast_uniforms[stage][binding_index].handle, 0, + static_cast(size)); + } } -protected: - std::shared_ptr CreateBlock(VAddr cpu_addr, std::size_t size) override; + void PushFastUniformBuffer(size_t stage, u32 binding_index, std::span data) { + if (use_assembly_shaders) { + glProgramBufferParametersIuivNV( + PABO_LUT[stage], binding_index, 0, + static_cast(data.size_bytes() / sizeof(GLuint)), + reinterpret_cast(data.data())); + } else { + glNamedBufferSubData(fast_uniforms[stage][binding_index].handle, 0, + static_cast(data.size_bytes()), data.data()); + } + } + + std::span BindMappedUniformBuffer(size_t stage, u32 binding_index, u32 size) noexcept { + const auto [mapped_span, offset] = stream_buffer->Request(static_cast(size)); + const GLuint base_binding = device.GetBaseBindings(stage).uniform_buffer; + const GLuint binding = base_binding + binding_index; + glBindBufferRange(GL_UNIFORM_BUFFER, binding, stream_buffer->Handle(), + static_cast(offset), static_cast(size)); + return mapped_span; + } - BufferInfo ConstBufferUpload(const void* raw_pointer, std::size_t size) override; + [[nodiscard]] const GLvoid* IndexOffset() const noexcept { + return reinterpret_cast(static_cast(index_buffer_offset)); + } + + [[nodiscard]] bool HasFastBufferSubData() const noexcept { + return device.HasFastBufferSubData(); + } private: - static constexpr std::size_t NUM_CBUFS = Tegra::Engines::Maxwell3D::Regs::MaxConstBuffers * - Tegra::Engines::Maxwell3D::Regs::MaxShaderProgram; + static constexpr std::array PABO_LUT{ + GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV, + GL_TESS_EVALUATION_PROGRAM_PARAMETER_BUFFER_NV, GL_GEOMETRY_PROGRAM_PARAMETER_BUFFER_NV, + GL_FRAGMENT_PROGRAM_PARAMETER_BUFFER_NV, + }; const Device& device; + const Vulkan::Device* vulkan_device; + Vulkan::MemoryAllocator* vulkan_memory_allocator; + std::optional stream_buffer; + + u32 max_attributes = 0; - std::size_t cbuf_cursor = 0; - std::array cbufs{}; + bool use_assembly_shaders = false; + bool has_unified_vertex_buffers = false; + + std::array, + VideoCommon::NUM_STAGES> + fast_uniforms; + + u32 index_buffer_offset = 0; +}; + +struct BufferCacheParams { + using Runtime = OpenGL::BufferCacheRuntime; + using Buffer = OpenGL::Buffer; + + static constexpr bool IS_OPENGL = true; + static constexpr bool HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS = true; + static constexpr bool HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT = true; + static constexpr bool NEEDS_BIND_UNIFORM_INDEX = true; + static constexpr bool NEEDS_BIND_STORAGE_INDEX = true; + static constexpr bool USE_MEMORY_MAPS = false; }; +using BufferCache = VideoCommon::BufferCache; + } // namespace OpenGL -- cgit v1.2.3