summaryrefslogtreecommitdiffstats
path: root/src/video_core/renderer_opengl/gl_buffer_cache.h
blob: 2c18de16690689ff2ccfedb371c2d1470f698acb (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later

#pragma once

#include <array>
#include <span>
#include <unordered_map>

#include "common/common_types.h"
#include "video_core/buffer_cache/buffer_cache_base.h"
#include "video_core/buffer_cache/memory_tracker_base.h"
#include "video_core/rasterizer_interface.h"
#include "video_core/renderer_opengl/gl_device.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_staging_buffer_pool.h"

namespace OpenGL {

class BufferCacheRuntime;

class Buffer : public VideoCommon::BufferBase<VideoCore::RasterizerInterface> {
public:
    explicit Buffer(BufferCacheRuntime&, VideoCore::RasterizerInterface& rasterizer, VAddr cpu_addr,
                    u64 size_bytes);
    explicit Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams);

    void ImmediateUpload(size_t offset, std::span<const u8> data) noexcept;

    void ImmediateDownload(size_t offset, std::span<u8> data) noexcept;

    void MakeResident(GLenum access) noexcept;

    void MarkUsage(u64 offset, u64 size) {}

    [[nodiscard]] GLuint View(u32 offset, u32 size, VideoCore::Surface::PixelFormat format);

    [[nodiscard]] GLuint64EXT HostGpuAddr() const noexcept {
        return address;
    }

    [[nodiscard]] GLuint Handle() const noexcept {
        return buffer.handle;
    }

private:
    struct BufferView {
        u32 offset;
        u32 size;
        VideoCore::Surface::PixelFormat format;
        OGLTexture texture;
    };

    GLuint64EXT address = 0;
    OGLBuffer buffer;
    GLenum current_residency_access = GL_NONE;
    std::vector<BufferView> views;
};

class BufferCacheRuntime {
    friend Buffer;

public:
    static constexpr u8 INVALID_BINDING = std::numeric_limits<u8>::max();

    explicit BufferCacheRuntime(const Device& device_, StagingBufferPool& staging_buffer_pool_);

    [[nodiscard]] StagingBufferMap UploadStagingBuffer(size_t size);

    [[nodiscard]] StagingBufferMap DownloadStagingBuffer(size_t size, bool deferred = false);

    void FreeDeferredStagingBuffer(StagingBufferMap& buffer);

    bool CanReorderUpload(const Buffer&, std::span<const VideoCommon::BufferCopy>) {
        return false;
    }

    void CopyBuffer(GLuint dst_buffer, GLuint src_buffer,
                    std::span<const VideoCommon::BufferCopy> copies, bool barrier);

    void CopyBuffer(GLuint dst_buffer, Buffer& src_buffer,
                    std::span<const VideoCommon::BufferCopy> copies, bool barrier);

    void CopyBuffer(Buffer& dst_buffer, GLuint src_buffer,
                    std::span<const VideoCommon::BufferCopy> copies, bool barrier,
                    bool can_reorder_upload = false);

    void CopyBuffer(Buffer& dst_buffer, Buffer& src_buffer,
                    std::span<const VideoCommon::BufferCopy> copies, bool);

    void PreCopyBarrier();
    void PostCopyBarrier();
    void Finish();

    void TickFrame(VideoCommon::SlotVector<Buffer>&) noexcept {}

    void ClearBuffer(Buffer& dest_buffer, u32 offset, size_t size, u32 value);

    void BindIndexBuffer(Buffer& buffer, u32 offset, u32 size);

    void BindVertexBuffer(u32 index, Buffer& buffer, u32 offset, u32 size, u32 stride);

    void BindVertexBuffers(VideoCommon::HostBindings<Buffer>& bindings);

    void BindUniformBuffer(size_t stage, u32 binding_index, Buffer& buffer, u32 offset, u32 size);

    void BindComputeUniformBuffer(u32 binding_index, Buffer& buffer, u32 offset, u32 size);

    void BindStorageBuffer(size_t stage, u32 binding_index, Buffer& buffer, u32 offset, u32 size,
                           bool is_written);

    void BindComputeStorageBuffer(u32 binding_index, Buffer& buffer, u32 offset, u32 size,
                                  bool is_written);

    void BindTransformFeedbackBuffer(u32 index, Buffer& buffer, u32 offset, u32 size);

    void BindTransformFeedbackBuffers(VideoCommon::HostBindings<Buffer>& bindings);

    void BindTextureBuffer(Buffer& buffer, u32 offset, u32 size,
                           VideoCore::Surface::PixelFormat format);

    void BindImageBuffer(Buffer& buffer, u32 offset, u32 size,
                         VideoCore::Surface::PixelFormat format);

    void BindTransformFeedbackObject(GPUVAddr tfb_object_addr);
    GLuint GetTransformFeedbackObject(GPUVAddr tfb_object_addr);

    u64 GetDeviceMemoryUsage() const;

    void BindFastUniformBuffer(size_t stage, u32 binding_index, u32 size) {
        const GLuint handle = fast_uniforms[stage][binding_index].handle;
        const GLsizeiptr gl_size = static_cast<GLsizeiptr>(size);
        if (use_assembly_shaders) {
            glBindBufferRangeNV(PABO_LUT[stage], binding_index, handle, 0, gl_size);
        } else {
            const GLuint base_binding = graphics_base_uniform_bindings[stage];
            const GLuint binding = base_binding + binding_index;
            glBindBufferRange(GL_UNIFORM_BUFFER, binding, handle, 0, gl_size);
        }
    }

    void PushFastUniformBuffer(size_t stage, u32 binding_index, std::span<const u8> data) {
        if (use_assembly_shaders) {
            glProgramBufferParametersIuivNV(
                PABO_LUT[stage], binding_index, 0,
                static_cast<GLsizei>(data.size_bytes() / sizeof(GLuint)),
                reinterpret_cast<const GLuint*>(data.data()));
        } else {
            glNamedBufferSubData(fast_uniforms[stage][binding_index].handle, 0,
                                 static_cast<GLsizeiptr>(data.size_bytes()), data.data());
        }
    }

    std::span<u8> BindMappedUniformBuffer(size_t stage, u32 binding_index, u32 size) noexcept {
        const auto [mapped_span, offset] = stream_buffer->Request(static_cast<size_t>(size));
        const GLuint base_binding = graphics_base_uniform_bindings[stage];
        const GLuint binding = base_binding + binding_index;
        glBindBufferRange(GL_UNIFORM_BUFFER, binding, stream_buffer->Handle(),
                          static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
        return mapped_span;
    }

    [[nodiscard]] const GLvoid* IndexOffset() const noexcept {
        return reinterpret_cast<const GLvoid*>(static_cast<uintptr_t>(index_buffer_offset));
    }

    [[nodiscard]] bool HasFastBufferSubData() const noexcept {
        return has_fast_buffer_sub_data;
    }

    [[nodiscard]] bool SupportsNonZeroUniformOffset() const noexcept {
        return !use_assembly_shaders;
    }

    void SetBaseUniformBindings(const std::array<GLuint, 5>& bindings) {
        graphics_base_uniform_bindings = bindings;
    }

    void SetBaseStorageBindings(const std::array<GLuint, 5>& bindings) {
        graphics_base_storage_bindings = bindings;
    }

    void SetImagePointers(GLuint* texture_handles_, GLuint* image_handles_) {
        texture_handles = texture_handles_;
        image_handles = image_handles_;
    }

    void SetEnableStorageBuffers(bool use_storage_buffers_) {
        use_storage_buffers = use_storage_buffers_;
    }

    u64 GetDeviceLocalMemory() const {
        return device_access_memory;
    }

    bool CanReportMemoryUsage() const {
        return device.CanReportMemoryUsage();
    }

    u32 GetStorageBufferAlignment() const {
        return static_cast<u32>(device.GetShaderStorageBufferAlignment());
    }

private:
    static constexpr std::array PABO_LUT{
        GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV,          GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV,
        GL_TESS_EVALUATION_PROGRAM_PARAMETER_BUFFER_NV, GL_GEOMETRY_PROGRAM_PARAMETER_BUFFER_NV,
        GL_FRAGMENT_PROGRAM_PARAMETER_BUFFER_NV,
    };

    const Device& device;
    StagingBufferPool& staging_buffer_pool;

    bool has_fast_buffer_sub_data = false;
    bool use_assembly_shaders = false;
    bool has_unified_vertex_buffers = false;

    bool use_storage_buffers = false;

    u32 max_attributes = 0;

    std::array<GLuint, 5> graphics_base_uniform_bindings{};
    std::array<GLuint, 5> graphics_base_storage_bindings{};
    GLuint* texture_handles = nullptr;
    GLuint* image_handles = nullptr;

    std::optional<StreamBuffer> stream_buffer;

    std::array<std::array<OGLBuffer, VideoCommon::NUM_GRAPHICS_UNIFORM_BUFFERS>,
               VideoCommon::NUM_STAGES>
        fast_uniforms;
    std::array<std::array<OGLBuffer, VideoCommon::NUM_GRAPHICS_UNIFORM_BUFFERS>,
               VideoCommon::NUM_STAGES>
        copy_uniforms;
    std::array<OGLBuffer, VideoCommon::NUM_COMPUTE_UNIFORM_BUFFERS> copy_compute_uniforms;

    u32 index_buffer_offset = 0;

    u64 device_access_memory;
    std::unordered_map<GPUVAddr, OGLTransformFeedback> tfb_objects;
};

struct BufferCacheParams {
    using Runtime = OpenGL::BufferCacheRuntime;
    using Buffer = OpenGL::Buffer;
    using Async_Buffer = OpenGL::StagingBufferMap;
    using MemoryTracker = VideoCommon::MemoryTrackerBase<VideoCore::RasterizerInterface>;

    static constexpr bool IS_OPENGL = true;
    static constexpr bool HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS = true;
    static constexpr bool HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT = true;
    static constexpr bool NEEDS_BIND_UNIFORM_INDEX = true;
    static constexpr bool NEEDS_BIND_STORAGE_INDEX = true;
    static constexpr bool USE_MEMORY_MAPS = true;
    static constexpr bool SEPARATE_IMAGE_BUFFER_BINDINGS = true;
    static constexpr bool IMPLEMENTS_ASYNC_DOWNLOADS = true;

    // TODO: Investigate why OpenGL seems to perform worse with persistently mapped buffer uploads
    static constexpr bool USE_MEMORY_MAPS_FOR_UPLOADS = false;
};

using BufferCache = VideoCommon::BufferCache<BufferCacheParams>;

} // namespace OpenGL