summaryrefslogtreecommitdiffstats
path: root/src/video_core/renderer_opengl
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core/renderer_opengl')
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.cpp15
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.h27
-rw-r--r--src/video_core/renderer_opengl/gl_device.cpp18
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp2
-rw-r--r--src/video_core/renderer_opengl/gl_staging_buffer_pool.cpp66
-rw-r--r--src/video_core/renderer_opengl/gl_staging_buffer_pool.h22
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.cpp8
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.h8
8 files changed, 106 insertions, 60 deletions
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
index 9d5209e97..e6c70fb34 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
@@ -145,8 +145,12 @@ StagingBufferMap BufferCacheRuntime::UploadStagingBuffer(size_t size) {
return staging_buffer_pool.RequestUploadBuffer(size);
}
-StagingBufferMap BufferCacheRuntime::DownloadStagingBuffer(size_t size) {
- return staging_buffer_pool.RequestDownloadBuffer(size);
+StagingBufferMap BufferCacheRuntime::DownloadStagingBuffer(size_t size, bool deferred) {
+ return staging_buffer_pool.RequestDownloadBuffer(size, deferred);
+}
+
+void BufferCacheRuntime::FreeDeferredStagingBuffer(StagingBufferMap& buffer) {
+ staging_buffer_pool.FreeDeferredStagingBuffer(buffer);
}
u64 BufferCacheRuntime::GetDeviceMemoryUsage() const {
@@ -177,13 +181,14 @@ void BufferCacheRuntime::CopyBuffer(GLuint dst_buffer, Buffer& src_buffer,
}
void BufferCacheRuntime::CopyBuffer(Buffer& dst_buffer, GLuint src_buffer,
- std::span<const VideoCommon::BufferCopy> copies, bool barrier) {
+ std::span<const VideoCommon::BufferCopy> copies, bool barrier,
+ bool) {
CopyBuffer(dst_buffer.Handle(), src_buffer, copies, barrier);
}
void BufferCacheRuntime::CopyBuffer(Buffer& dst_buffer, Buffer& src_buffer,
- std::span<const VideoCommon::BufferCopy> copies) {
- CopyBuffer(dst_buffer.Handle(), src_buffer.Handle(), copies);
+ std::span<const VideoCommon::BufferCopy> copies, bool) {
+ CopyBuffer(dst_buffer.Handle(), src_buffer.Handle(), copies, true);
}
void BufferCacheRuntime::PreCopyBarrier() {
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h
index 8613037eb..71cd45d35 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.h
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.h
@@ -30,6 +30,8 @@ public:
void MakeResident(GLenum access) noexcept;
+ void MarkUsage(u64 offset, u64 size) {}
+
[[nodiscard]] GLuint View(u32 offset, u32 size, VideoCore::Surface::PixelFormat format);
[[nodiscard]] GLuint64EXT HostGpuAddr() const noexcept {
@@ -64,24 +66,33 @@ public:
[[nodiscard]] StagingBufferMap UploadStagingBuffer(size_t size);
- [[nodiscard]] StagingBufferMap DownloadStagingBuffer(size_t size);
+ [[nodiscard]] StagingBufferMap DownloadStagingBuffer(size_t size, bool deferred = false);
+
+ void FreeDeferredStagingBuffer(StagingBufferMap& buffer);
+
+ bool CanReorderUpload(const Buffer&, std::span<const VideoCommon::BufferCopy>) {
+ return false;
+ }
void CopyBuffer(GLuint dst_buffer, GLuint src_buffer,
- std::span<const VideoCommon::BufferCopy> copies, bool barrier = true);
+ std::span<const VideoCommon::BufferCopy> copies, bool barrier);
void CopyBuffer(GLuint dst_buffer, Buffer& src_buffer,
- std::span<const VideoCommon::BufferCopy> copies, bool barrier = true);
+ std::span<const VideoCommon::BufferCopy> copies, bool barrier);
void CopyBuffer(Buffer& dst_buffer, GLuint src_buffer,
- std::span<const VideoCommon::BufferCopy> copies, bool barrier = true);
+ std::span<const VideoCommon::BufferCopy> copies, bool barrier,
+ bool can_reorder_upload = false);
void CopyBuffer(Buffer& dst_buffer, Buffer& src_buffer,
- std::span<const VideoCommon::BufferCopy> copies);
+ std::span<const VideoCommon::BufferCopy> copies, bool);
void PreCopyBarrier();
void PostCopyBarrier();
void Finish();
+ void TickFrame(VideoCommon::SlotVector<Buffer>&) noexcept {}
+
void ClearBuffer(Buffer& dest_buffer, u32 offset, size_t size, u32 value);
void BindIndexBuffer(Buffer& buffer, u32 offset, u32 size);
@@ -182,6 +193,10 @@ public:
return device.CanReportMemoryUsage();
}
+ u32 GetStorageBufferAlignment() const {
+ return static_cast<u32>(device.GetShaderStorageBufferAlignment());
+ }
+
private:
static constexpr std::array PABO_LUT{
GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV,
@@ -232,7 +247,7 @@ struct BufferCacheParams {
static constexpr bool NEEDS_BIND_STORAGE_INDEX = true;
static constexpr bool USE_MEMORY_MAPS = true;
static constexpr bool SEPARATE_IMAGE_BUFFER_BINDINGS = true;
- static constexpr bool IMPLEMENTS_ASYNC_DOWNLOADS = false;
+ static constexpr bool IMPLEMENTS_ASYNC_DOWNLOADS = true;
// TODO: Investigate why OpenGL seems to perform worse with persistently mapped buffer uploads
static constexpr bool USE_MEMORY_MAPS_FOR_UPLOADS = false;
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp
index 46d88c664..a6c93068f 100644
--- a/src/video_core/renderer_opengl/gl_device.cpp
+++ b/src/video_core/renderer_opengl/gl_device.cpp
@@ -264,33 +264,33 @@ std::string Device::GetVendorName() const {
if (vendor_name == "Intel") {
// For Mesa, `Intel` is an overloaded vendor string that could mean crocus or iris.
// Simply return `INTEL` for those as well as the Windows driver.
- return "INTEL";
+ return "Intel";
}
if (vendor_name == "Intel Open Source Technology Center") {
- return "I965";
+ return "i965";
}
if (vendor_name == "Mesa Project") {
- return "I915";
+ return "i915";
}
if (vendor_name == "Mesa/X.org") {
// This vendor string is overloaded between llvmpipe, softpipe, and virgl, so just return
// MESA instead of one of those driver names.
- return "MESA";
+ return "Mesa";
}
if (vendor_name == "AMD") {
- return "RADEONSI";
+ return "RadeonSI";
}
if (vendor_name == "nouveau") {
- return "NOUVEAU";
+ return "Nouveau";
}
if (vendor_name == "X.Org") {
return "R600";
}
if (vendor_name == "Collabora Ltd") {
- return "ZINK";
+ return "Zink";
}
if (vendor_name == "Intel Corporation") {
- return "OPENSWR";
+ return "OpenSWR";
}
if (vendor_name == "Microsoft Corporation") {
return "D3D12";
@@ -299,7 +299,7 @@ std::string Device::GetVendorName() const {
// Mesa's tegra driver reports `NVIDIA`. Only present in this list because the default
// strategy would have returned `NVIDIA` here for this driver, the same result as the
// proprietary driver.
- return "TEGRA";
+ return "Tegra";
}
return vendor_name;
}
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 2888e0238..26f2d0ea7 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -232,6 +232,7 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo
.has_gl_bool_ref_bug = device.HasBoolRefBug(),
.ignore_nan_fp_comparisons = true,
.gl_max_compute_smem_size = device.GetMaxComputeSharedMemorySize(),
+ .min_ssbo_alignment = device.GetShaderStorageBufferAlignment(),
},
host_info{
.support_float64 = true,
@@ -240,6 +241,7 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo
.needs_demote_reorder = device.IsAmd(),
.support_snorm_render_buffer = false,
.support_viewport_index_layer = device.HasVertexViewportLayer(),
+ .min_ssbo_alignment = static_cast<u32>(device.GetShaderStorageBufferAlignment()),
.support_geometry_shader_passthrough = device.HasGeometryShaderPassthrough(),
.support_conditional_barrier = device.SupportsConditionalBarriers(),
} {
diff --git a/src/video_core/renderer_opengl/gl_staging_buffer_pool.cpp b/src/video_core/renderer_opengl/gl_staging_buffer_pool.cpp
index bbb06e51f..cadad6507 100644
--- a/src/video_core/renderer_opengl/gl_staging_buffer_pool.cpp
+++ b/src/video_core/renderer_opengl/gl_staging_buffer_pool.cpp
@@ -28,63 +28,69 @@ StagingBuffers::StagingBuffers(GLenum storage_flags_, GLenum map_flags_)
StagingBuffers::~StagingBuffers() = default;
-StagingBufferMap StagingBuffers::RequestMap(size_t requested_size, bool insert_fence) {
+StagingBufferMap StagingBuffers::RequestMap(size_t requested_size, bool insert_fence,
+ bool deferred) {
MICROPROFILE_SCOPE(OpenGL_BufferRequest);
const size_t index = RequestBuffer(requested_size);
- OGLSync* const sync = insert_fence ? &syncs[index] : nullptr;
- sync_indices[index] = insert_fence ? ++current_sync_index : 0;
+ OGLSync* const sync = insert_fence ? &allocs[index].sync : nullptr;
+ allocs[index].sync_index = insert_fence ? ++current_sync_index : 0;
+ allocs[index].deferred = deferred;
return StagingBufferMap{
- .mapped_span = std::span(maps[index], requested_size),
+ .mapped_span = std::span(allocs[index].map, requested_size),
.sync = sync,
- .buffer = buffers[index].handle,
+ .buffer = allocs[index].buffer.handle,
+ .index = index,
};
}
+void StagingBuffers::FreeDeferredStagingBuffer(size_t index) {
+ ASSERT(allocs[index].deferred);
+ allocs[index].deferred = false;
+}
+
size_t StagingBuffers::RequestBuffer(size_t requested_size) {
if (const std::optional<size_t> index = FindBuffer(requested_size); index) {
return *index;
}
-
- OGLBuffer& buffer = buffers.emplace_back();
- buffer.Create();
+ StagingBufferAlloc alloc;
+ alloc.buffer.Create();
const auto next_pow2_size = Common::NextPow2(requested_size);
- glNamedBufferStorage(buffer.handle, next_pow2_size, nullptr,
+ glNamedBufferStorage(alloc.buffer.handle, next_pow2_size, nullptr,
storage_flags | GL_MAP_PERSISTENT_BIT);
- maps.push_back(static_cast<u8*>(glMapNamedBufferRange(buffer.handle, 0, next_pow2_size,
- map_flags | GL_MAP_PERSISTENT_BIT)));
- syncs.emplace_back();
- sync_indices.emplace_back();
- sizes.push_back(next_pow2_size);
-
- ASSERT(syncs.size() == buffers.size() && buffers.size() == maps.size() &&
- maps.size() == sizes.size());
-
- return buffers.size() - 1;
+ alloc.map = static_cast<u8*>(glMapNamedBufferRange(alloc.buffer.handle, 0, next_pow2_size,
+ map_flags | GL_MAP_PERSISTENT_BIT));
+ alloc.size = next_pow2_size;
+ allocs.emplace_back(std::move(alloc));
+ return allocs.size() - 1;
}
std::optional<size_t> StagingBuffers::FindBuffer(size_t requested_size) {
size_t known_unsignaled_index = current_sync_index + 1;
size_t smallest_buffer = std::numeric_limits<size_t>::max();
std::optional<size_t> found;
- const size_t num_buffers = sizes.size();
+ const size_t num_buffers = allocs.size();
for (size_t index = 0; index < num_buffers; ++index) {
- const size_t buffer_size = sizes[index];
+ StagingBufferAlloc& alloc = allocs[index];
+ const size_t buffer_size = alloc.size;
if (buffer_size < requested_size || buffer_size >= smallest_buffer) {
continue;
}
- if (syncs[index].handle != 0) {
- if (sync_indices[index] >= known_unsignaled_index) {
+ if (alloc.deferred) {
+ continue;
+ }
+ if (alloc.sync.handle != 0) {
+ if (alloc.sync_index >= known_unsignaled_index) {
// This fence is later than a fence that is known to not be signaled
continue;
}
- if (!syncs[index].IsSignaled()) {
+ if (!alloc.sync.IsSignaled()) {
// Since this fence hasn't been signaled, it's safe to assume all later
// fences haven't been signaled either
- known_unsignaled_index = std::min(known_unsignaled_index, sync_indices[index]);
+ known_unsignaled_index = std::min(known_unsignaled_index, alloc.sync_index);
continue;
}
- syncs[index].Release();
+ alloc.sync.Release();
}
smallest_buffer = buffer_size;
found = index;
@@ -143,8 +149,12 @@ StagingBufferMap StagingBufferPool::RequestUploadBuffer(size_t size) {
return upload_buffers.RequestMap(size, true);
}
-StagingBufferMap StagingBufferPool::RequestDownloadBuffer(size_t size) {
- return download_buffers.RequestMap(size, false);
+StagingBufferMap StagingBufferPool::RequestDownloadBuffer(size_t size, bool deferred) {
+ return download_buffers.RequestMap(size, false, deferred);
+}
+
+void StagingBufferPool::FreeDeferredStagingBuffer(StagingBufferMap& buffer) {
+ download_buffers.FreeDeferredStagingBuffer(buffer.index);
}
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_staging_buffer_pool.h b/src/video_core/renderer_opengl/gl_staging_buffer_pool.h
index 60f72d3a0..07a56b4d2 100644
--- a/src/video_core/renderer_opengl/gl_staging_buffer_pool.h
+++ b/src/video_core/renderer_opengl/gl_staging_buffer_pool.h
@@ -26,23 +26,30 @@ struct StagingBufferMap {
size_t offset = 0;
OGLSync* sync;
GLuint buffer;
+ size_t index;
};
struct StagingBuffers {
explicit StagingBuffers(GLenum storage_flags_, GLenum map_flags_);
~StagingBuffers();
- StagingBufferMap RequestMap(size_t requested_size, bool insert_fence);
+ StagingBufferMap RequestMap(size_t requested_size, bool insert_fence, bool deferred = false);
+
+ void FreeDeferredStagingBuffer(size_t index);
size_t RequestBuffer(size_t requested_size);
std::optional<size_t> FindBuffer(size_t requested_size);
- std::vector<OGLSync> syncs;
- std::vector<OGLBuffer> buffers;
- std::vector<u8*> maps;
- std::vector<size_t> sizes;
- std::vector<size_t> sync_indices;
+ struct StagingBufferAlloc {
+ OGLSync sync;
+ OGLBuffer buffer;
+ u8* map;
+ size_t size;
+ size_t sync_index;
+ bool deferred;
+ };
+ std::vector<StagingBufferAlloc> allocs;
GLenum storage_flags;
GLenum map_flags;
size_t current_sync_index = 0;
@@ -85,7 +92,8 @@ public:
~StagingBufferPool() = default;
StagingBufferMap RequestUploadBuffer(size_t size);
- StagingBufferMap RequestDownloadBuffer(size_t size);
+ StagingBufferMap RequestDownloadBuffer(size_t size, bool deferred = false);
+ void FreeDeferredStagingBuffer(StagingBufferMap& buffer);
private:
StagingBuffers upload_buffers{GL_MAP_WRITE_BIT, GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT};
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp
index 512eef575..66a5ca03e 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp
@@ -557,8 +557,12 @@ StagingBufferMap TextureCacheRuntime::UploadStagingBuffer(size_t size) {
return staging_buffer_pool.RequestUploadBuffer(size);
}
-StagingBufferMap TextureCacheRuntime::DownloadStagingBuffer(size_t size) {
- return staging_buffer_pool.RequestDownloadBuffer(size);
+StagingBufferMap TextureCacheRuntime::DownloadStagingBuffer(size_t size, bool deferred) {
+ return staging_buffer_pool.RequestDownloadBuffer(size, deferred);
+}
+
+void TextureCacheRuntime::FreeDeferredStagingBuffer(StagingBufferMap& buffer) {
+ staging_buffer_pool.FreeDeferredStagingBuffer(buffer);
}
u64 TextureCacheRuntime::GetDeviceMemoryUsage() const {
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h
index e71b87e99..34870c81f 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.h
+++ b/src/video_core/renderer_opengl/gl_texture_cache.h
@@ -74,7 +74,9 @@ public:
StagingBufferMap UploadStagingBuffer(size_t size);
- StagingBufferMap DownloadStagingBuffer(size_t size);
+ StagingBufferMap DownloadStagingBuffer(size_t size, bool deferred = false);
+
+ void FreeDeferredStagingBuffer(StagingBufferMap& buffer);
u64 GetDeviceLocalMemory() const {
return device_access_memory;
@@ -359,7 +361,7 @@ struct TextureCacheParams {
static constexpr bool FRAMEBUFFER_BLITS = true;
static constexpr bool HAS_EMULATED_COPIES = true;
static constexpr bool HAS_DEVICE_MEMORY_INFO = true;
- static constexpr bool IMPLEMENTS_ASYNC_DOWNLOADS = false;
+ static constexpr bool IMPLEMENTS_ASYNC_DOWNLOADS = true;
using Runtime = OpenGL::TextureCacheRuntime;
using Image = OpenGL::Image;
@@ -367,7 +369,7 @@ struct TextureCacheParams {
using ImageView = OpenGL::ImageView;
using Sampler = OpenGL::Sampler;
using Framebuffer = OpenGL::Framebuffer;
- using AsyncBuffer = u32;
+ using AsyncBuffer = OpenGL::StagingBufferMap;
using BufferType = GLuint;
};