summaryrefslogtreecommitdiffstats
path: root/src/video_core/renderer_opengl
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core/renderer_opengl')
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.cpp232
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.h160
-rw-r--r--src/video_core/renderer_opengl/gl_device.cpp13
-rw-r--r--src/video_core/renderer_opengl/gl_device.h6
-rw-r--r--src/video_core/renderer_opengl/gl_fence_manager.cpp2
-rw-r--r--src/video_core/renderer_opengl/gl_fence_manager.h9
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp579
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h66
-rw-r--r--src/video_core/renderer_opengl/gl_resource_manager.cpp6
-rw-r--r--src/video_core/renderer_opengl/gl_resource_manager.h3
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp61
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.h2
-rw-r--r--src/video_core/renderer_opengl/gl_state_tracker.cpp25
-rw-r--r--src/video_core/renderer_opengl/gl_state_tracker.h32
-rw-r--r--src/video_core/renderer_opengl/gl_stream_buffer.cpp94
-rw-r--r--src/video_core/renderer_opengl/gl_stream_buffer.h60
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.cpp38
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.h32
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.cpp51
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.h18
-rw-r--r--src/video_core/renderer_opengl/util_shaders.cpp30
-rw-r--r--src/video_core/renderer_opengl/util_shaders.h9
22 files changed, 624 insertions, 904 deletions
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
index 5772cad87..6da3906a4 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
@@ -2,98 +2,208 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
-#include <memory>
+#include <span>
-#include <glad/glad.h>
-
-#include "common/assert.h"
-#include "common/microprofile.h"
#include "video_core/buffer_cache/buffer_cache.h"
-#include "video_core/engines/maxwell_3d.h"
-#include "video_core/rasterizer_interface.h"
#include "video_core/renderer_opengl/gl_buffer_cache.h"
#include "video_core/renderer_opengl/gl_device.h"
-#include "video_core/renderer_opengl/gl_rasterizer.h"
-#include "video_core/renderer_opengl/gl_resource_manager.h"
namespace OpenGL {
+namespace {
+struct BindlessSSBO {
+ GLuint64EXT address;
+ GLsizei length;
+ GLsizei padding;
+};
+static_assert(sizeof(BindlessSSBO) == sizeof(GLuint) * 4);
+
+constexpr std::array PROGRAM_LUT{
+ GL_VERTEX_PROGRAM_NV, GL_TESS_CONTROL_PROGRAM_NV, GL_TESS_EVALUATION_PROGRAM_NV,
+ GL_GEOMETRY_PROGRAM_NV, GL_FRAGMENT_PROGRAM_NV,
+};
+} // Anonymous namespace
+
+Buffer::Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params)
+ : VideoCommon::BufferBase<VideoCore::RasterizerInterface>(null_params) {}
+
+Buffer::Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_,
+ VAddr cpu_addr_, u64 size_bytes_)
+ : VideoCommon::BufferBase<VideoCore::RasterizerInterface>(rasterizer_, cpu_addr_, size_bytes_) {
+ buffer.Create();
+ const std::string name = fmt::format("Buffer 0x{:x}", CpuAddr());
+ glObjectLabel(GL_BUFFER, buffer.handle, static_cast<GLsizei>(name.size()), name.data());
+ glNamedBufferData(buffer.handle, SizeBytes(), nullptr, GL_DYNAMIC_DRAW);
+
+ if (runtime.has_unified_vertex_buffers) {
+ glGetNamedBufferParameterui64vNV(buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, &address);
+ }
+}
-using Maxwell = Tegra::Engines::Maxwell3D::Regs;
+void Buffer::ImmediateUpload(size_t offset, std::span<const u8> data) noexcept {
+ glNamedBufferSubData(buffer.handle, static_cast<GLintptr>(offset),
+ static_cast<GLsizeiptr>(data.size_bytes()), data.data());
+}
-MICROPROFILE_DEFINE(OpenGL_Buffer_Download, "OpenGL", "Buffer Download", MP_RGB(192, 192, 128));
+void Buffer::ImmediateDownload(size_t offset, std::span<u8> data) noexcept {
+ glGetNamedBufferSubData(buffer.handle, static_cast<GLintptr>(offset),
+ static_cast<GLsizeiptr>(data.size_bytes()), data.data());
+}
-Buffer::Buffer(const Device& device_, VAddr cpu_addr_, std::size_t size_)
- : BufferBlock{cpu_addr_, size_} {
- gl_buffer.Create();
- glNamedBufferData(gl_buffer.handle, static_cast<GLsizeiptr>(size_), nullptr, GL_DYNAMIC_DRAW);
- if (device_.UseAssemblyShaders() || device_.HasVertexBufferUnifiedMemory()) {
- glMakeNamedBufferResidentNV(gl_buffer.handle, GL_READ_WRITE);
- glGetNamedBufferParameterui64vNV(gl_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, &gpu_address);
+void Buffer::MakeResident(GLenum access) noexcept {
+ // Abuse GLenum's order to exit early
+ // GL_NONE (default) < GL_READ_ONLY < GL_READ_WRITE
+ if (access <= current_residency_access || buffer.handle == 0) {
+ return;
+ }
+ if (std::exchange(current_residency_access, access) != GL_NONE) {
+ // If the buffer is already resident, remove its residency before promoting it
+ glMakeNamedBufferNonResidentNV(buffer.handle);
}
+ glMakeNamedBufferResidentNV(buffer.handle, access);
}
-Buffer::~Buffer() = default;
-
-void Buffer::Upload(std::size_t offset, std::size_t data_size, const u8* data) {
- glNamedBufferSubData(Handle(), static_cast<GLintptr>(offset),
- static_cast<GLsizeiptr>(data_size), data);
+BufferCacheRuntime::BufferCacheRuntime(const Device& device_)
+ : device{device_}, has_fast_buffer_sub_data{device.HasFastBufferSubData()},
+ use_assembly_shaders{device.UseAssemblyShaders()},
+ has_unified_vertex_buffers{device.HasVertexBufferUnifiedMemory()},
+ stream_buffer{has_fast_buffer_sub_data ? std::nullopt : std::make_optional<StreamBuffer>()} {
+ GLint gl_max_attributes;
+ glGetIntegerv(GL_MAX_VERTEX_ATTRIBS, &gl_max_attributes);
+ max_attributes = static_cast<u32>(gl_max_attributes);
+ for (auto& stage_uniforms : fast_uniforms) {
+ for (OGLBuffer& buffer : stage_uniforms) {
+ buffer.Create();
+ glNamedBufferData(buffer.handle, BufferCache::SKIP_CACHE_SIZE, nullptr, GL_STREAM_DRAW);
+ }
+ }
+ for (auto& stage_uniforms : copy_uniforms) {
+ for (OGLBuffer& buffer : stage_uniforms) {
+ buffer.Create();
+ glNamedBufferData(buffer.handle, 0x10'000, nullptr, GL_STREAM_COPY);
+ }
+ }
+ for (OGLBuffer& buffer : copy_compute_uniforms) {
+ buffer.Create();
+ glNamedBufferData(buffer.handle, 0x10'000, nullptr, GL_STREAM_COPY);
+ }
}
-void Buffer::Download(std::size_t offset, std::size_t data_size, u8* data) {
- MICROPROFILE_SCOPE(OpenGL_Buffer_Download);
- const GLsizeiptr gl_size = static_cast<GLsizeiptr>(data_size);
- const GLintptr gl_offset = static_cast<GLintptr>(offset);
- if (read_buffer.handle == 0) {
- read_buffer.Create();
- glNamedBufferData(read_buffer.handle, static_cast<GLsizeiptr>(Size()), nullptr,
- GL_STREAM_READ);
+void BufferCacheRuntime::CopyBuffer(Buffer& dst_buffer, Buffer& src_buffer,
+ std::span<const VideoCommon::BufferCopy> copies) {
+ for (const VideoCommon::BufferCopy& copy : copies) {
+ glCopyNamedBufferSubData(
+ src_buffer.Handle(), dst_buffer.Handle(), static_cast<GLintptr>(copy.src_offset),
+ static_cast<GLintptr>(copy.dst_offset), static_cast<GLsizeiptr>(copy.size));
}
- glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
- glCopyNamedBufferSubData(gl_buffer.handle, read_buffer.handle, gl_offset, gl_offset, gl_size);
- glGetNamedBufferSubData(read_buffer.handle, gl_offset, gl_size, data);
}
-void Buffer::CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset,
- std::size_t copy_size) {
- glCopyNamedBufferSubData(src.Handle(), Handle(), static_cast<GLintptr>(src_offset),
- static_cast<GLintptr>(dst_offset), static_cast<GLsizeiptr>(copy_size));
+void BufferCacheRuntime::BindIndexBuffer(Buffer& buffer, u32 offset, u32 size) {
+ if (has_unified_vertex_buffers) {
+ buffer.MakeResident(GL_READ_ONLY);
+ glBufferAddressRangeNV(GL_ELEMENT_ARRAY_ADDRESS_NV, 0, buffer.HostGpuAddr() + offset,
+ static_cast<GLsizeiptr>(size));
+ } else {
+ glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, buffer.Handle());
+ index_buffer_offset = offset;
+ }
}
-OGLBufferCache::OGLBufferCache(VideoCore::RasterizerInterface& rasterizer_,
- Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_,
- const Device& device_, OGLStreamBuffer& stream_buffer_,
- StateTracker& state_tracker)
- : GenericBufferCache{rasterizer_, gpu_memory_, cpu_memory_, stream_buffer_}, device{device_} {
- if (!device.HasFastBufferSubData()) {
+void BufferCacheRuntime::BindVertexBuffer(u32 index, Buffer& buffer, u32 offset, u32 size,
+ u32 stride) {
+ if (index >= max_attributes) {
return;
}
-
- static constexpr GLsizeiptr size = static_cast<GLsizeiptr>(Maxwell::MaxConstBufferSize);
- glCreateBuffers(static_cast<GLsizei>(std::size(cbufs)), std::data(cbufs));
- for (const GLuint cbuf : cbufs) {
- glNamedBufferData(cbuf, size, nullptr, GL_STREAM_DRAW);
+ if (has_unified_vertex_buffers) {
+ buffer.MakeResident(GL_READ_ONLY);
+ glBindVertexBuffer(index, 0, 0, static_cast<GLsizei>(stride));
+ glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, index,
+ buffer.HostGpuAddr() + offset, static_cast<GLsizeiptr>(size));
+ } else {
+ glBindVertexBuffer(index, buffer.Handle(), static_cast<GLintptr>(offset),
+ static_cast<GLsizei>(stride));
}
}
-OGLBufferCache::~OGLBufferCache() {
- glDeleteBuffers(static_cast<GLsizei>(std::size(cbufs)), std::data(cbufs));
+void BufferCacheRuntime::BindUniformBuffer(size_t stage, u32 binding_index, Buffer& buffer,
+ u32 offset, u32 size) {
+ if (use_assembly_shaders) {
+ GLuint handle;
+ if (offset != 0) {
+ handle = copy_uniforms[stage][binding_index].handle;
+ glCopyNamedBufferSubData(buffer.Handle(), handle, offset, 0, size);
+ } else {
+ handle = buffer.Handle();
+ }
+ glBindBufferRangeNV(PABO_LUT[stage], binding_index, handle, 0,
+ static_cast<GLsizeiptr>(size));
+ } else {
+ const GLuint base_binding = device.GetBaseBindings(stage).uniform_buffer;
+ const GLuint binding = base_binding + binding_index;
+ glBindBufferRange(GL_UNIFORM_BUFFER, binding, buffer.Handle(),
+ static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
+ }
}
-std::shared_ptr<Buffer> OGLBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) {
- return std::make_shared<Buffer>(device, cpu_addr, size);
+void BufferCacheRuntime::BindComputeUniformBuffer(u32 binding_index, Buffer& buffer, u32 offset,
+ u32 size) {
+ if (use_assembly_shaders) {
+ GLuint handle;
+ if (offset != 0) {
+ handle = copy_compute_uniforms[binding_index].handle;
+ glCopyNamedBufferSubData(buffer.Handle(), handle, offset, 0, size);
+ } else {
+ handle = buffer.Handle();
+ }
+ glBindBufferRangeNV(GL_COMPUTE_PROGRAM_PARAMETER_BUFFER_NV, binding_index, handle, 0,
+ static_cast<GLsizeiptr>(size));
+ } else {
+ glBindBufferRange(GL_UNIFORM_BUFFER, binding_index, buffer.Handle(),
+ static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
+ }
}
-OGLBufferCache::BufferInfo OGLBufferCache::GetEmptyBuffer(std::size_t) {
- return {0, 0, 0};
+void BufferCacheRuntime::BindStorageBuffer(size_t stage, u32 binding_index, Buffer& buffer,
+ u32 offset, u32 size, bool is_written) {
+ if (use_assembly_shaders) {
+ const BindlessSSBO ssbo{
+ .address = buffer.HostGpuAddr() + offset,
+ .length = static_cast<GLsizei>(size),
+ .padding = 0,
+ };
+ buffer.MakeResident(is_written ? GL_READ_WRITE : GL_READ_ONLY);
+ glProgramLocalParametersI4uivNV(PROGRAM_LUT[stage], binding_index, 1,
+ reinterpret_cast<const GLuint*>(&ssbo));
+ } else {
+ const GLuint base_binding = device.GetBaseBindings(stage).shader_storage_buffer;
+ const GLuint binding = base_binding + binding_index;
+ glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, buffer.Handle(),
+ static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
+ }
}
-OGLBufferCache::BufferInfo OGLBufferCache::ConstBufferUpload(const void* raw_pointer,
- std::size_t size) {
- DEBUG_ASSERT(cbuf_cursor < std::size(cbufs));
- const GLuint cbuf = cbufs[cbuf_cursor++];
+void BufferCacheRuntime::BindComputeStorageBuffer(u32 binding_index, Buffer& buffer, u32 offset,
+ u32 size, bool is_written) {
+ if (use_assembly_shaders) {
+ const BindlessSSBO ssbo{
+ .address = buffer.HostGpuAddr() + offset,
+ .length = static_cast<GLsizei>(size),
+ .padding = 0,
+ };
+ buffer.MakeResident(is_written ? GL_READ_WRITE : GL_READ_ONLY);
+ glProgramLocalParametersI4uivNV(GL_COMPUTE_PROGRAM_NV, binding_index, 1,
+ reinterpret_cast<const GLuint*>(&ssbo));
+ } else if (size == 0) {
+ glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding_index, 0, 0, 0);
+ } else {
+ glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding_index, buffer.Handle(),
+ static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
+ }
+}
- glNamedBufferSubData(cbuf, 0, static_cast<GLsizeiptr>(size), raw_pointer);
- return {cbuf, 0, 0};
+void BufferCacheRuntime::BindTransformFeedbackBuffer(u32 index, Buffer& buffer, u32 offset,
+ u32 size) {
+ glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER, index, buffer.Handle(),
+ static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
}
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h
index 17ee90316..d8b20a9af 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.h
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.h
@@ -5,79 +5,157 @@
#pragma once
#include <array>
-#include <memory>
+#include <span>
+#include "common/alignment.h"
#include "common/common_types.h"
+#include "common/dynamic_library.h"
#include "video_core/buffer_cache/buffer_cache.h"
-#include "video_core/engines/maxwell_3d.h"
+#include "video_core/rasterizer_interface.h"
+#include "video_core/renderer_opengl/gl_device.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_stream_buffer.h"
-namespace Core {
-class System;
-}
-
namespace OpenGL {
-class Device;
-class OGLStreamBuffer;
-class RasterizerOpenGL;
-class StateTracker;
+class BufferCacheRuntime;
-class Buffer : public VideoCommon::BufferBlock {
+class Buffer : public VideoCommon::BufferBase<VideoCore::RasterizerInterface> {
public:
- explicit Buffer(const Device& device_, VAddr cpu_addr_, std::size_t size_);
- ~Buffer();
+ explicit Buffer(BufferCacheRuntime&, VideoCore::RasterizerInterface& rasterizer, VAddr cpu_addr,
+ u64 size_bytes);
+ explicit Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams);
- void Upload(std::size_t offset, std::size_t data_size, const u8* data);
+ void ImmediateUpload(size_t offset, std::span<const u8> data) noexcept;
- void Download(std::size_t offset, std::size_t data_size, u8* data);
+ void ImmediateDownload(size_t offset, std::span<u8> data) noexcept;
- void CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset,
- std::size_t copy_size);
+ void MakeResident(GLenum access) noexcept;
- GLuint Handle() const noexcept {
- return gl_buffer.handle;
+ [[nodiscard]] GLuint64EXT HostGpuAddr() const noexcept {
+ return address;
}
- u64 Address() const noexcept {
- return gpu_address;
+ [[nodiscard]] GLuint Handle() const noexcept {
+ return buffer.handle;
}
private:
- OGLBuffer gl_buffer;
- OGLBuffer read_buffer;
- u64 gpu_address = 0;
+ GLuint64EXT address = 0;
+ OGLBuffer buffer;
+ GLenum current_residency_access = GL_NONE;
};
-using GenericBufferCache = VideoCommon::BufferCache<Buffer, GLuint, OGLStreamBuffer>;
-class OGLBufferCache final : public GenericBufferCache {
+class BufferCacheRuntime {
+ friend Buffer;
+
public:
- explicit OGLBufferCache(VideoCore::RasterizerInterface& rasterizer,
- Tegra::MemoryManager& gpu_memory, Core::Memory::Memory& cpu_memory,
- const Device& device, OGLStreamBuffer& stream_buffer,
- StateTracker& state_tracker);
- ~OGLBufferCache();
+ static constexpr u8 INVALID_BINDING = std::numeric_limits<u8>::max();
+
+ explicit BufferCacheRuntime(const Device& device_);
+
+ void CopyBuffer(Buffer& dst_buffer, Buffer& src_buffer,
+ std::span<const VideoCommon::BufferCopy> copies);
+
+ void BindIndexBuffer(Buffer& buffer, u32 offset, u32 size);
+
+ void BindVertexBuffer(u32 index, Buffer& buffer, u32 offset, u32 size, u32 stride);
+
+ void BindUniformBuffer(size_t stage, u32 binding_index, Buffer& buffer, u32 offset, u32 size);
+
+ void BindComputeUniformBuffer(u32 binding_index, Buffer& buffer, u32 offset, u32 size);
+
+ void BindStorageBuffer(size_t stage, u32 binding_index, Buffer& buffer, u32 offset, u32 size,
+ bool is_written);
+
+ void BindComputeStorageBuffer(u32 binding_index, Buffer& buffer, u32 offset, u32 size,
+ bool is_written);
+
+ void BindTransformFeedbackBuffer(u32 index, Buffer& buffer, u32 offset, u32 size);
+
+ void BindFastUniformBuffer(size_t stage, u32 binding_index, u32 size) {
+ if (use_assembly_shaders) {
+ const GLuint handle = fast_uniforms[stage][binding_index].handle;
+ const GLsizeiptr gl_size = static_cast<GLsizeiptr>(size);
+ glBindBufferRangeNV(PABO_LUT[stage], binding_index, handle, 0, gl_size);
+ } else {
+ const GLuint base_binding = device.GetBaseBindings(stage).uniform_buffer;
+ const GLuint binding = base_binding + binding_index;
+ glBindBufferRange(GL_UNIFORM_BUFFER, binding,
+ fast_uniforms[stage][binding_index].handle, 0,
+ static_cast<GLsizeiptr>(size));
+ }
+ }
- BufferInfo GetEmptyBuffer(std::size_t) override;
+ void PushFastUniformBuffer(size_t stage, u32 binding_index, std::span<const u8> data) {
+ if (use_assembly_shaders) {
+ glProgramBufferParametersIuivNV(
+ PABO_LUT[stage], binding_index, 0,
+ static_cast<GLsizei>(data.size_bytes() / sizeof(GLuint)),
+ reinterpret_cast<const GLuint*>(data.data()));
+ } else {
+ glNamedBufferSubData(fast_uniforms[stage][binding_index].handle, 0,
+ static_cast<GLsizeiptr>(data.size_bytes()), data.data());
+ }
+ }
- void Acquire() noexcept {
- cbuf_cursor = 0;
+ std::span<u8> BindMappedUniformBuffer(size_t stage, u32 binding_index, u32 size) noexcept {
+ const auto [mapped_span, offset] = stream_buffer->Request(static_cast<size_t>(size));
+ const GLuint base_binding = device.GetBaseBindings(stage).uniform_buffer;
+ const GLuint binding = base_binding + binding_index;
+ glBindBufferRange(GL_UNIFORM_BUFFER, binding, stream_buffer->Handle(),
+ static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
+ return mapped_span;
}
-protected:
- std::shared_ptr<Buffer> CreateBlock(VAddr cpu_addr, std::size_t size) override;
+ [[nodiscard]] const GLvoid* IndexOffset() const noexcept {
+ return reinterpret_cast<const GLvoid*>(static_cast<uintptr_t>(index_buffer_offset));
+ }
- BufferInfo ConstBufferUpload(const void* raw_pointer, std::size_t size) override;
+ [[nodiscard]] bool HasFastBufferSubData() const noexcept {
+ return has_fast_buffer_sub_data;
+ }
private:
- static constexpr std::size_t NUM_CBUFS = Tegra::Engines::Maxwell3D::Regs::MaxConstBuffers *
- Tegra::Engines::Maxwell3D::Regs::MaxShaderProgram;
+ static constexpr std::array PABO_LUT{
+ GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV,
+ GL_TESS_EVALUATION_PROGRAM_PARAMETER_BUFFER_NV, GL_GEOMETRY_PROGRAM_PARAMETER_BUFFER_NV,
+ GL_FRAGMENT_PROGRAM_PARAMETER_BUFFER_NV,
+ };
const Device& device;
- std::size_t cbuf_cursor = 0;
- std::array<GLuint, NUM_CBUFS> cbufs{};
+ bool has_fast_buffer_sub_data = false;
+ bool use_assembly_shaders = false;
+ bool has_unified_vertex_buffers = false;
+
+ u32 max_attributes = 0;
+
+ std::optional<StreamBuffer> stream_buffer;
+
+ std::array<std::array<OGLBuffer, VideoCommon::NUM_GRAPHICS_UNIFORM_BUFFERS>,
+ VideoCommon::NUM_STAGES>
+ fast_uniforms;
+ std::array<std::array<OGLBuffer, VideoCommon::NUM_GRAPHICS_UNIFORM_BUFFERS>,
+ VideoCommon::NUM_STAGES>
+ copy_uniforms;
+ std::array<OGLBuffer, VideoCommon::NUM_COMPUTE_UNIFORM_BUFFERS> copy_compute_uniforms;
+
+ u32 index_buffer_offset = 0;
+};
+
+struct BufferCacheParams {
+ using Runtime = OpenGL::BufferCacheRuntime;
+ using Buffer = OpenGL::Buffer;
+
+ static constexpr bool IS_OPENGL = true;
+ static constexpr bool HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS = true;
+ static constexpr bool HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT = true;
+ static constexpr bool NEEDS_BIND_UNIFORM_INDEX = true;
+ static constexpr bool NEEDS_BIND_STORAGE_INDEX = true;
+ static constexpr bool USE_MEMORY_MAPS = false;
};
+using BufferCache = VideoCommon::BufferCache<BufferCacheParams>;
+
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp
index 04c267ee4..48d5c4a5e 100644
--- a/src/video_core/renderer_opengl/gl_device.cpp
+++ b/src/video_core/renderer_opengl/gl_device.cpp
@@ -21,9 +21,7 @@
#include "video_core/renderer_opengl/gl_resource_manager.h"
namespace OpenGL {
-
namespace {
-
// One uniform block is reserved for emulation purposes
constexpr u32 ReservedUniformBlocks = 1;
@@ -197,11 +195,13 @@ bool IsASTCSupported() {
const bool nsight = std::getenv("NVTX_INJECTION64_PATH") || std::getenv("NSIGHT_LAUNCHED");
return nsight || HasExtension(extensions, "GL_EXT_debug_tool");
}
-
} // Anonymous namespace
-Device::Device()
- : max_uniform_buffers{BuildMaxUniformBuffers()}, base_bindings{BuildBaseBindings()} {
+Device::Device() {
+ if (!GLAD_GL_VERSION_4_6) {
+ LOG_ERROR(Render_OpenGL, "OpenGL 4.6 is not available");
+ throw std::runtime_error{"Insufficient version"};
+ }
const std::string_view vendor = reinterpret_cast<const char*>(glGetString(GL_VENDOR));
const std::string_view version = reinterpret_cast<const char*>(glGetString(GL_VERSION));
const std::vector extensions = GetExtensions();
@@ -217,6 +217,9 @@ Device::Device()
"Beta driver 443.24 is known to have issues. There might be performance issues.");
disable_fast_buffer_sub_data = true;
}
+
+ max_uniform_buffers = BuildMaxUniformBuffers();
+ base_bindings = BuildBaseBindings();
uniform_buffer_alignment = GetInteger<size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT);
shader_storage_alignment = GetInteger<size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT);
max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS);
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h
index 9141de635..ee053776d 100644
--- a/src/video_core/renderer_opengl/gl_device.h
+++ b/src/video_core/renderer_opengl/gl_device.h
@@ -10,11 +10,9 @@
namespace OpenGL {
-static constexpr u32 EmulationUniformBlockBinding = 0;
-
-class Device final {
+class Device {
public:
- struct BaseBindings final {
+ struct BaseBindings {
u32 uniform_buffer{};
u32 shader_storage_buffer{};
u32 sampler{};
diff --git a/src/video_core/renderer_opengl/gl_fence_manager.cpp b/src/video_core/renderer_opengl/gl_fence_manager.cpp
index 3e9c922f5..151290101 100644
--- a/src/video_core/renderer_opengl/gl_fence_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_fence_manager.cpp
@@ -47,7 +47,7 @@ void GLInnerFence::Wait() {
FenceManagerOpenGL::FenceManagerOpenGL(VideoCore::RasterizerInterface& rasterizer_,
Tegra::GPU& gpu_, TextureCache& texture_cache_,
- OGLBufferCache& buffer_cache_, QueryCache& query_cache_)
+ BufferCache& buffer_cache_, QueryCache& query_cache_)
: GenericFenceManager{rasterizer_, gpu_, texture_cache_, buffer_cache_, query_cache_} {}
Fence FenceManagerOpenGL::CreateFence(u32 value, bool is_stubbed) {
diff --git a/src/video_core/renderer_opengl/gl_fence_manager.h b/src/video_core/renderer_opengl/gl_fence_manager.h
index 30dbee613..e714aa115 100644
--- a/src/video_core/renderer_opengl/gl_fence_manager.h
+++ b/src/video_core/renderer_opengl/gl_fence_manager.h
@@ -32,14 +32,13 @@ private:
};
using Fence = std::shared_ptr<GLInnerFence>;
-using GenericFenceManager =
- VideoCommon::FenceManager<Fence, TextureCache, OGLBufferCache, QueryCache>;
+using GenericFenceManager = VideoCommon::FenceManager<Fence, TextureCache, BufferCache, QueryCache>;
class FenceManagerOpenGL final : public GenericFenceManager {
public:
- explicit FenceManagerOpenGL(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_,
- TextureCache& texture_cache_, OGLBufferCache& buffer_cache_,
- QueryCache& query_cache_);
+ explicit FenceManagerOpenGL(VideoCore::RasterizerInterface& rasterizer, Tegra::GPU& gpu,
+ TextureCache& texture_cache, BufferCache& buffer_cache,
+ QueryCache& query_cache);
protected:
Fence CreateFence(u32 value, bool is_stubbed) override;
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index ea4ca9a82..ecffc6abf 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -44,28 +44,14 @@ using VideoCore::Surface::PixelFormat;
using VideoCore::Surface::SurfaceTarget;
using VideoCore::Surface::SurfaceType;
-MICROPROFILE_DEFINE(OpenGL_VAO, "OpenGL", "Vertex Format Setup", MP_RGB(128, 128, 192));
-MICROPROFILE_DEFINE(OpenGL_VB, "OpenGL", "Vertex Buffer Setup", MP_RGB(128, 128, 192));
-MICROPROFILE_DEFINE(OpenGL_Shader, "OpenGL", "Shader Setup", MP_RGB(128, 128, 192));
-MICROPROFILE_DEFINE(OpenGL_UBO, "OpenGL", "Const Buffer Setup", MP_RGB(128, 128, 192));
-MICROPROFILE_DEFINE(OpenGL_Index, "OpenGL", "Index Buffer Setup", MP_RGB(128, 128, 192));
-MICROPROFILE_DEFINE(OpenGL_Texture, "OpenGL", "Texture Setup", MP_RGB(128, 128, 192));
-MICROPROFILE_DEFINE(OpenGL_Framebuffer, "OpenGL", "Framebuffer Setup", MP_RGB(128, 128, 192));
MICROPROFILE_DEFINE(OpenGL_Drawing, "OpenGL", "Drawing", MP_RGB(128, 128, 192));
+MICROPROFILE_DEFINE(OpenGL_Clears, "OpenGL", "Clears", MP_RGB(128, 128, 192));
MICROPROFILE_DEFINE(OpenGL_Blits, "OpenGL", "Blits", MP_RGB(128, 128, 192));
-MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Mgmt", MP_RGB(100, 255, 100));
-MICROPROFILE_DEFINE(OpenGL_PrimitiveAssembly, "OpenGL", "Prim Asmbl", MP_RGB(255, 100, 100));
+MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Management", MP_RGB(100, 255, 100));
namespace {
-constexpr size_t NUM_CONST_BUFFERS_PER_STAGE = 18;
-constexpr size_t NUM_CONST_BUFFERS_BYTES_PER_STAGE =
- NUM_CONST_BUFFERS_PER_STAGE * Maxwell::MaxConstBufferSize;
-constexpr size_t TOTAL_CONST_BUFFER_BYTES =
- NUM_CONST_BUFFERS_BYTES_PER_STAGE * Maxwell::MaxShaderStage;
-
constexpr size_t NUM_SUPPORTED_VERTEX_ATTRIBUTES = 16;
-constexpr size_t NUM_SUPPORTED_VERTEX_BINDINGS = 16;
struct TextureHandle {
constexpr TextureHandle(u32 data, bool via_header_index) {
@@ -101,20 +87,6 @@ TextureHandle GetTextureInfo(const Engine& engine, bool via_header_index, const
return TextureHandle(engine.AccessConstBuffer32(shader_type, buffer, offset), via_header_index);
}
-std::size_t GetConstBufferSize(const Tegra::Engines::ConstBufferInfo& buffer,
- const ConstBufferEntry& entry) {
- if (!entry.IsIndirect()) {
- return entry.GetSize();
- }
- if (buffer.size > Maxwell::MaxConstBufferSize) {
- LOG_WARNING(Render_OpenGL, "Indirect constbuffer size {} exceeds maximum {}", buffer.size,
- Maxwell::MaxConstBufferSize);
- return Maxwell::MaxConstBufferSize;
- }
-
- return buffer.size;
-}
-
/// Translates hardware transform feedback indices
/// @param location Hardware location
/// @return Pair of ARB_transform_feedback3 token stream first and third arguments
@@ -147,14 +119,6 @@ void oglEnable(GLenum cap, bool state) {
(state ? glEnable : glDisable)(cap);
}
-void UpdateBindlessSSBOs(GLenum target, const BindlessSSBO* ssbos, size_t num_ssbos) {
- if (num_ssbos == 0) {
- return;
- }
- glProgramLocalParametersI4uivNV(target, 0, static_cast<GLsizei>(num_ssbos),
- reinterpret_cast<const GLuint*>(ssbos));
-}
-
ImageViewType ImageViewTypeFromEntry(const SamplerEntry& entry) {
if (entry.is_buffer) {
return ImageViewType::Buffer;
@@ -201,44 +165,28 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra
: RasterizerAccelerated(cpu_memory_), gpu(gpu_), maxwell3d(gpu.Maxwell3D()),
kepler_compute(gpu.KeplerCompute()), gpu_memory(gpu.MemoryManager()), device(device_),
screen_info(screen_info_), program_manager(program_manager_), state_tracker(state_tracker_),
- stream_buffer(device, state_tracker),
texture_cache_runtime(device, program_manager, state_tracker),
texture_cache(texture_cache_runtime, *this, maxwell3d, kepler_compute, gpu_memory),
+ buffer_cache_runtime(device),
+ buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime),
shader_cache(*this, emu_window_, gpu, maxwell3d, kepler_compute, gpu_memory, device),
query_cache(*this, maxwell3d, gpu_memory),
- buffer_cache(*this, gpu_memory, cpu_memory_, device, stream_buffer, state_tracker),
fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache),
async_shaders(emu_window_) {
- unified_uniform_buffer.Create();
- glNamedBufferStorage(unified_uniform_buffer.handle, TOTAL_CONST_BUFFER_BYTES, nullptr, 0);
-
- if (device.UseAssemblyShaders()) {
- glCreateBuffers(static_cast<GLsizei>(staging_cbufs.size()), staging_cbufs.data());
- for (const GLuint cbuf : staging_cbufs) {
- glNamedBufferStorage(cbuf, static_cast<GLsizeiptr>(Maxwell::MaxConstBufferSize),
- nullptr, 0);
- }
- }
if (device.UseAsynchronousShaders()) {
async_shaders.AllocateWorkers();
}
}
-RasterizerOpenGL::~RasterizerOpenGL() {
- if (device.UseAssemblyShaders()) {
- glDeleteBuffers(static_cast<GLsizei>(staging_cbufs.size()), staging_cbufs.data());
- }
-}
+RasterizerOpenGL::~RasterizerOpenGL() = default;
-void RasterizerOpenGL::SetupVertexFormat() {
+void RasterizerOpenGL::SyncVertexFormats() {
auto& flags = maxwell3d.dirty.flags;
if (!flags[Dirty::VertexFormats]) {
return;
}
flags[Dirty::VertexFormats] = false;
- MICROPROFILE_SCOPE(OpenGL_VAO);
-
// Use the vertex array as-is, assumes that the data is formatted correctly for OpenGL. Enables
// the first 16 vertex attributes always, as we don't know which ones are actually used until
// shader time. Note, Tegra technically supports 32, but we're capping this to 16 for now to
@@ -274,55 +222,7 @@ void RasterizerOpenGL::SetupVertexFormat() {
}
}
-void RasterizerOpenGL::SetupVertexBuffer() {
- auto& flags = maxwell3d.dirty.flags;
- if (!flags[Dirty::VertexBuffers]) {
- return;
- }
- flags[Dirty::VertexBuffers] = false;
-
- MICROPROFILE_SCOPE(OpenGL_VB);
-
- const bool use_unified_memory = device.HasVertexBufferUnifiedMemory();
-
- // Upload all guest vertex arrays sequentially to our buffer
- const auto& regs = maxwell3d.regs;
- for (std::size_t index = 0; index < NUM_SUPPORTED_VERTEX_BINDINGS; ++index) {
- if (!flags[Dirty::VertexBuffer0 + index]) {
- continue;
- }
- flags[Dirty::VertexBuffer0 + index] = false;
-
- const auto& vertex_array = regs.vertex_array[index];
- if (!vertex_array.IsEnabled()) {
- continue;
- }
-
- const GPUVAddr start = vertex_array.StartAddress();
- const GPUVAddr end = regs.vertex_array_limit[index].LimitAddress();
- ASSERT(end >= start);
-
- const GLuint gl_index = static_cast<GLuint>(index);
- const u64 size = end - start;
- if (size == 0) {
- glBindVertexBuffer(gl_index, 0, 0, vertex_array.stride);
- if (use_unified_memory) {
- glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, gl_index, 0, 0);
- }
- continue;
- }
- const auto info = buffer_cache.UploadMemory(start, size);
- if (use_unified_memory) {
- glBindVertexBuffer(gl_index, 0, 0, vertex_array.stride);
- glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, gl_index,
- info.address + info.offset, size);
- } else {
- glBindVertexBuffer(gl_index, info.handle, info.offset, vertex_array.stride);
- }
- }
-}
-
-void RasterizerOpenGL::SetupVertexInstances() {
+void RasterizerOpenGL::SyncVertexInstances() {
auto& flags = maxwell3d.dirty.flags;
if (!flags[Dirty::VertexInstances]) {
return;
@@ -343,17 +243,7 @@ void RasterizerOpenGL::SetupVertexInstances() {
}
}
-GLintptr RasterizerOpenGL::SetupIndexBuffer() {
- MICROPROFILE_SCOPE(OpenGL_Index);
- const auto& regs = maxwell3d.regs;
- const std::size_t size = CalculateIndexBufferSize();
- const auto info = buffer_cache.UploadMemory(regs.index_array.IndexStart(), size);
- glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, info.handle);
- return info.offset;
-}
-
-void RasterizerOpenGL::SetupShaders() {
- MICROPROFILE_SCOPE(OpenGL_Shader);
+void RasterizerOpenGL::SetupShaders(bool is_indexed) {
u32 clip_distances = 0;
std::array<Shader*, Maxwell::MaxShaderStage> shaders{};
@@ -410,11 +300,19 @@ void RasterizerOpenGL::SetupShaders() {
const size_t stage = index == 0 ? 0 : index - 1;
shaders[stage] = shader;
- SetupDrawConstBuffers(stage, shader);
- SetupDrawGlobalMemory(stage, shader);
SetupDrawTextures(shader, stage);
SetupDrawImages(shader, stage);
+ buffer_cache.SetEnabledUniformBuffers(stage, shader->GetEntries().enabled_uniform_buffers);
+
+ buffer_cache.UnbindGraphicsStorageBuffers(stage);
+ u32 ssbo_index = 0;
+ for (const auto& buffer : shader->GetEntries().global_memory_entries) {
+ buffer_cache.BindGraphicsStorageBuffer(stage, ssbo_index, buffer.cbuf_index,
+ buffer.cbuf_offset, buffer.is_written);
+ ++ssbo_index;
+ }
+
// Workaround for Intel drivers.
// When a clip distance is enabled but not set in the shader it crops parts of the screen
// (sometimes it's half the screen, sometimes three quarters). To avoid this, enable the
@@ -430,43 +328,26 @@ void RasterizerOpenGL::SetupShaders() {
SyncClipEnabled(clip_distances);
maxwell3d.dirty.flags[Dirty::Shaders] = false;
+ buffer_cache.UpdateGraphicsBuffers(is_indexed);
+
const std::span indices_span(image_view_indices.data(), image_view_indices.size());
texture_cache.FillGraphicsImageViews(indices_span, image_view_ids);
+ buffer_cache.BindHostGeometryBuffers(is_indexed);
+
size_t image_view_index = 0;
size_t texture_index = 0;
size_t image_index = 0;
for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) {
const Shader* const shader = shaders[stage];
- if (shader) {
- const auto base = device.GetBaseBindings(stage);
- BindTextures(shader->GetEntries(), base.sampler, base.image, image_view_index,
- texture_index, image_index);
- }
- }
-}
-
-std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const {
- const auto& regs = maxwell3d.regs;
-
- std::size_t size = 0;
- for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) {
- if (!regs.vertex_array[index].IsEnabled())
+ if (!shader) {
continue;
-
- const GPUVAddr start = regs.vertex_array[index].StartAddress();
- const GPUVAddr end = regs.vertex_array_limit[index].LimitAddress();
-
- size += end - start;
- ASSERT(end >= start);
+ }
+ buffer_cache.BindHostStageBuffers(stage);
+ const auto& base = device.GetBaseBindings(stage);
+ BindTextures(shader->GetEntries(), base.sampler, base.image, image_view_index,
+ texture_index, image_index);
}
-
- return size;
-}
-
-std::size_t RasterizerOpenGL::CalculateIndexBufferSize() const {
- return static_cast<std::size_t>(maxwell3d.regs.index_array.count) *
- static_cast<std::size_t>(maxwell3d.regs.index_array.FormatSizeInBytes());
}
void RasterizerOpenGL::LoadDiskResources(u64 title_id, const std::atomic_bool& stop_loading,
@@ -475,6 +356,7 @@ void RasterizerOpenGL::LoadDiskResources(u64 title_id, const std::atomic_bool& s
}
void RasterizerOpenGL::Clear() {
+ MICROPROFILE_SCOPE(OpenGL_Clears);
if (!maxwell3d.ShouldExecute()) {
return;
}
@@ -525,11 +407,9 @@ void RasterizerOpenGL::Clear() {
}
UNIMPLEMENTED_IF(regs.clear_flags.viewport);
- {
- auto lock = texture_cache.AcquireLock();
- texture_cache.UpdateRenderTargets(true);
- state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle());
- }
+ std::scoped_lock lock{texture_cache.mutex};
+ texture_cache.UpdateRenderTargets(true);
+ state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle());
if (use_color) {
glClearBufferfv(GL_COLOR, regs.clear_buffers.RT, regs.clear_color);
@@ -541,7 +421,6 @@ void RasterizerOpenGL::Clear() {
} else if (use_stencil) {
glClearBufferiv(GL_STENCIL, 0, &regs.clear_stencil);
}
-
++num_queued_commands;
}
@@ -550,75 +429,12 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
query_cache.UpdateCounters();
- SyncViewport();
- SyncRasterizeEnable();
- SyncPolygonModes();
- SyncColorMask();
- SyncFragmentColorClampState();
- SyncMultiSampleState();
- SyncDepthTestState();
- SyncDepthClamp();
- SyncStencilTestState();
- SyncBlendState();
- SyncLogicOpState();
- SyncCullMode();
- SyncPrimitiveRestart();
- SyncScissorTest();
- SyncPointState();
- SyncLineState();
- SyncPolygonOffset();
- SyncAlphaTest();
- SyncFramebufferSRGB();
-
- buffer_cache.Acquire();
- current_cbuf = 0;
-
- std::size_t buffer_size = CalculateVertexArraysSize();
-
- // Add space for index buffer
- if (is_indexed) {
- buffer_size = Common::AlignUp(buffer_size, 4) + CalculateIndexBufferSize();
- }
-
- // Uniform space for the 5 shader stages
- buffer_size =
- Common::AlignUp<std::size_t>(buffer_size, 4) +
- (sizeof(MaxwellUniformData) + device.GetUniformBufferAlignment()) * Maxwell::MaxShaderStage;
-
- // Add space for at least 18 constant buffers
- buffer_size += Maxwell::MaxConstBuffers *
- (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment());
-
- // Prepare the vertex array.
- buffer_cache.Map(buffer_size);
-
- // Prepare vertex array format.
- SetupVertexFormat();
-
- // Upload vertex and index data.
- SetupVertexBuffer();
- SetupVertexInstances();
- GLintptr index_buffer_offset = 0;
- if (is_indexed) {
- index_buffer_offset = SetupIndexBuffer();
- }
-
- // Setup emulation uniform buffer.
- if (!device.UseAssemblyShaders()) {
- MaxwellUniformData ubo;
- ubo.SetFromRegs(maxwell3d);
- const auto info =
- buffer_cache.UploadHostMemory(&ubo, sizeof(ubo), device.GetUniformBufferAlignment());
- glBindBufferRange(GL_UNIFORM_BUFFER, EmulationUniformBlockBinding, info.handle, info.offset,
- static_cast<GLsizeiptr>(sizeof(ubo)));
- }
+ SyncState();
// Setup shaders and their used resources.
- auto lock = texture_cache.AcquireLock();
- SetupShaders();
+ std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
+ SetupShaders(is_indexed);
- // Signal the buffer cache that we are not going to upload more things.
- buffer_cache.Unmap();
texture_cache.UpdateRenderTargets(false);
state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle());
program_manager.BindGraphicsPipeline();
@@ -632,7 +448,7 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
if (is_indexed) {
const GLint base_vertex = static_cast<GLint>(maxwell3d.regs.vb_element_base);
const GLsizei num_vertices = static_cast<GLsizei>(maxwell3d.regs.index_array.count);
- const GLvoid* offset = reinterpret_cast<const GLvoid*>(index_buffer_offset);
+ const GLvoid* const offset = buffer_cache_runtime.IndexOffset();
const GLenum format = MaxwellToGL::IndexFormat(maxwell3d.regs.index_array.format);
if (num_instances == 1 && base_instance == 0 && base_vertex == 0) {
glDrawElements(primitive_mode, num_vertices, format, offset);
@@ -672,22 +488,22 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
}
void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
- buffer_cache.Acquire();
- current_cbuf = 0;
-
Shader* const kernel = shader_cache.GetComputeKernel(code_addr);
- auto lock = texture_cache.AcquireLock();
+ std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
BindComputeTextures(kernel);
- const size_t buffer_size = Tegra::Engines::KeplerCompute::NumConstBuffers *
- (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment());
- buffer_cache.Map(buffer_size);
-
- SetupComputeConstBuffers(kernel);
- SetupComputeGlobalMemory(kernel);
-
- buffer_cache.Unmap();
+ const auto& entries = kernel->GetEntries();
+ buffer_cache.SetEnabledComputeUniformBuffers(entries.enabled_uniform_buffers);
+ buffer_cache.UnbindComputeStorageBuffers();
+ u32 ssbo_index = 0;
+ for (const auto& buffer : entries.global_memory_entries) {
+ buffer_cache.BindComputeStorageBuffer(ssbo_index, buffer.cbuf_index, buffer.cbuf_offset,
+ buffer.is_written);
+ ++ssbo_index;
+ }
+ buffer_cache.UpdateComputeBuffers();
+ buffer_cache.BindHostComputeBuffers();
const auto& launch_desc = kepler_compute.launch_description;
glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z);
@@ -703,6 +519,12 @@ void RasterizerOpenGL::Query(GPUVAddr gpu_addr, VideoCore::QueryType type,
query_cache.Query(gpu_addr, type, timestamp);
}
+void RasterizerOpenGL::BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr,
+ u32 size) {
+ std::scoped_lock lock{buffer_cache.mutex};
+ buffer_cache.BindGraphicsUniformBuffer(stage, index, gpu_addr, size);
+}
+
void RasterizerOpenGL::FlushAll() {}
void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) {
@@ -711,19 +533,23 @@ void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) {
return;
}
{
- auto lock = texture_cache.AcquireLock();
+ std::scoped_lock lock{texture_cache.mutex};
texture_cache.DownloadMemory(addr, size);
}
- buffer_cache.FlushRegion(addr, size);
+ {
+ std::scoped_lock lock{buffer_cache.mutex};
+ buffer_cache.DownloadMemory(addr, size);
+ }
query_cache.FlushRegion(addr, size);
}
bool RasterizerOpenGL::MustFlushRegion(VAddr addr, u64 size) {
+ std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
if (!Settings::IsGPULevelHigh()) {
- return buffer_cache.MustFlushRegion(addr, size);
+ return buffer_cache.IsRegionGpuModified(addr, size);
}
return texture_cache.IsRegionGpuModified(addr, size) ||
- buffer_cache.MustFlushRegion(addr, size);
+ buffer_cache.IsRegionGpuModified(addr, size);
}
void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) {
@@ -732,11 +558,14 @@ void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) {
return;
}
{
- auto lock = texture_cache.AcquireLock();
+ std::scoped_lock lock{texture_cache.mutex};
texture_cache.WriteMemory(addr, size);
}
+ {
+ std::scoped_lock lock{buffer_cache.mutex};
+ buffer_cache.WriteMemory(addr, size);
+ }
shader_cache.InvalidateRegion(addr, size);
- buffer_cache.InvalidateRegion(addr, size);
query_cache.InvalidateRegion(addr, size);
}
@@ -745,26 +574,35 @@ void RasterizerOpenGL::OnCPUWrite(VAddr addr, u64 size) {
if (addr == 0 || size == 0) {
return;
}
+ shader_cache.OnCPUWrite(addr, size);
{
- auto lock = texture_cache.AcquireLock();
+ std::scoped_lock lock{texture_cache.mutex};
texture_cache.WriteMemory(addr, size);
}
- shader_cache.OnCPUWrite(addr, size);
- buffer_cache.OnCPUWrite(addr, size);
+ {
+ std::scoped_lock lock{buffer_cache.mutex};
+ buffer_cache.CachedWriteMemory(addr, size);
+ }
}
void RasterizerOpenGL::SyncGuestHost() {
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
- buffer_cache.SyncGuestHost();
shader_cache.SyncGuestHost();
+ {
+ std::scoped_lock lock{buffer_cache.mutex};
+ buffer_cache.FlushCachedWrites();
+ }
}
void RasterizerOpenGL::UnmapMemory(VAddr addr, u64 size) {
{
- auto lock = texture_cache.AcquireLock();
+ std::scoped_lock lock{texture_cache.mutex};
texture_cache.UnmapMemory(addr, size);
}
- buffer_cache.OnCPUWrite(addr, size);
+ {
+ std::scoped_lock lock{buffer_cache.mutex};
+ buffer_cache.WriteMemory(addr, size);
+ }
shader_cache.OnCPUWrite(addr, size);
}
@@ -799,14 +637,7 @@ void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size) {
}
void RasterizerOpenGL::WaitForIdle() {
- // Place a barrier on everything that is not framebuffer related.
- // This is related to another flag that is not currently implemented.
- glMemoryBarrier(GL_VERTEX_ATTRIB_ARRAY_BARRIER_BIT | GL_ELEMENT_ARRAY_BARRIER_BIT |
- GL_UNIFORM_BARRIER_BIT | GL_TEXTURE_FETCH_BARRIER_BIT |
- GL_SHADER_IMAGE_ACCESS_BARRIER_BIT | GL_COMMAND_BARRIER_BIT |
- GL_PIXEL_BUFFER_BARRIER_BIT | GL_TEXTURE_UPDATE_BARRIER_BIT |
- GL_BUFFER_UPDATE_BARRIER_BIT | GL_TRANSFORM_FEEDBACK_BARRIER_BIT |
- GL_SHADER_STORAGE_BARRIER_BIT | GL_QUERY_BUFFER_BARRIER_BIT);
+ glMemoryBarrier(GL_ALL_BARRIER_BITS);
}
void RasterizerOpenGL::FragmentBarrier() {
@@ -831,18 +662,21 @@ void RasterizerOpenGL::TickFrame() {
num_queued_commands = 0;
fence_manager.TickFrame();
- buffer_cache.TickFrame();
{
- auto lock = texture_cache.AcquireLock();
+ std::scoped_lock lock{texture_cache.mutex};
texture_cache.TickFrame();
}
+ {
+ std::scoped_lock lock{buffer_cache.mutex};
+ buffer_cache.TickFrame();
+ }
}
bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src,
const Tegra::Engines::Fermi2D::Surface& dst,
const Tegra::Engines::Fermi2D::Config& copy_config) {
MICROPROFILE_SCOPE(OpenGL_Blits);
- auto lock = texture_cache.AcquireLock();
+ std::scoped_lock lock{texture_cache.mutex};
texture_cache.BlitImage(dst, src, copy_config);
return true;
}
@@ -854,7 +688,7 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
}
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
- auto lock = texture_cache.AcquireLock();
+ std::scoped_lock lock{texture_cache.mutex};
ImageView* const image_view{texture_cache.TryFindFramebufferImageView(framebuffer_addr)};
if (!image_view) {
return false;
@@ -921,166 +755,6 @@ void RasterizerOpenGL::BindTextures(const ShaderEntries& entries, GLuint base_te
}
}
-void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, Shader* shader) {
- static constexpr std::array PARAMETER_LUT{
- GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV,
- GL_TESS_EVALUATION_PROGRAM_PARAMETER_BUFFER_NV, GL_GEOMETRY_PROGRAM_PARAMETER_BUFFER_NV,
- GL_FRAGMENT_PROGRAM_PARAMETER_BUFFER_NV,
- };
- MICROPROFILE_SCOPE(OpenGL_UBO);
- const auto& stages = maxwell3d.state.shader_stages;
- const auto& shader_stage = stages[stage_index];
- const auto& entries = shader->GetEntries();
- const bool use_unified = entries.use_unified_uniforms;
- const std::size_t base_unified_offset = stage_index * NUM_CONST_BUFFERS_BYTES_PER_STAGE;
-
- const auto base_bindings = device.GetBaseBindings(stage_index);
- u32 binding = device.UseAssemblyShaders() ? 0 : base_bindings.uniform_buffer;
- for (const auto& entry : entries.const_buffers) {
- const u32 index = entry.GetIndex();
- const auto& buffer = shader_stage.const_buffers[index];
- SetupConstBuffer(PARAMETER_LUT[stage_index], binding, buffer, entry, use_unified,
- base_unified_offset + index * Maxwell::MaxConstBufferSize);
- ++binding;
- }
- if (use_unified) {
- const u32 index = static_cast<u32>(base_bindings.shader_storage_buffer +
- entries.global_memory_entries.size());
- glBindBufferRange(GL_SHADER_STORAGE_BUFFER, index, unified_uniform_buffer.handle,
- base_unified_offset, NUM_CONST_BUFFERS_BYTES_PER_STAGE);
- }
-}
-
-void RasterizerOpenGL::SetupComputeConstBuffers(Shader* kernel) {
- MICROPROFILE_SCOPE(OpenGL_UBO);
- const auto& launch_desc = kepler_compute.launch_description;
- const auto& entries = kernel->GetEntries();
- const bool use_unified = entries.use_unified_uniforms;
-
- u32 binding = 0;
- for (const auto& entry : entries.const_buffers) {
- const auto& config = launch_desc.const_buffer_config[entry.GetIndex()];
- const std::bitset<8> mask = launch_desc.const_buffer_enable_mask.Value();
- Tegra::Engines::ConstBufferInfo buffer;
- buffer.address = config.Address();
- buffer.size = config.size;
- buffer.enabled = mask[entry.GetIndex()];
- SetupConstBuffer(GL_COMPUTE_PROGRAM_PARAMETER_BUFFER_NV, binding, buffer, entry,
- use_unified, entry.GetIndex() * Maxwell::MaxConstBufferSize);
- ++binding;
- }
- if (use_unified) {
- const GLuint index = static_cast<GLuint>(entries.global_memory_entries.size());
- glBindBufferRange(GL_SHADER_STORAGE_BUFFER, index, unified_uniform_buffer.handle, 0,
- NUM_CONST_BUFFERS_BYTES_PER_STAGE);
- }
-}
-
-void RasterizerOpenGL::SetupConstBuffer(GLenum stage, u32 binding,
- const Tegra::Engines::ConstBufferInfo& buffer,
- const ConstBufferEntry& entry, bool use_unified,
- std::size_t unified_offset) {
- if (!buffer.enabled) {
- // Set values to zero to unbind buffers
- if (device.UseAssemblyShaders()) {
- glBindBufferRangeNV(stage, entry.GetIndex(), 0, 0, 0);
- } else {
- glBindBufferRange(GL_UNIFORM_BUFFER, binding, 0, 0, sizeof(float));
- }
- return;
- }
-
- // Align the actual size so it ends up being a multiple of vec4 to meet the OpenGL std140
- // UBO alignment requirements.
- const std::size_t size = Common::AlignUp(GetConstBufferSize(buffer, entry), sizeof(GLvec4));
-
- const bool fast_upload = !use_unified && device.HasFastBufferSubData();
-
- const std::size_t alignment = use_unified ? 4 : device.GetUniformBufferAlignment();
- const GPUVAddr gpu_addr = buffer.address;
- auto info = buffer_cache.UploadMemory(gpu_addr, size, alignment, false, fast_upload);
-
- if (device.UseAssemblyShaders()) {
- UNIMPLEMENTED_IF(use_unified);
- if (info.offset != 0) {
- const GLuint staging_cbuf = staging_cbufs[current_cbuf++];
- glCopyNamedBufferSubData(info.handle, staging_cbuf, info.offset, 0, size);
- info.handle = staging_cbuf;
- info.offset = 0;
- }
- glBindBufferRangeNV(stage, binding, info.handle, info.offset, size);
- return;
- }
-
- if (use_unified) {
- glCopyNamedBufferSubData(info.handle, unified_uniform_buffer.handle, info.offset,
- unified_offset, size);
- } else {
- glBindBufferRange(GL_UNIFORM_BUFFER, binding, info.handle, info.offset, size);
- }
-}
-
-void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, Shader* shader) {
- static constexpr std::array TARGET_LUT = {
- GL_VERTEX_PROGRAM_NV, GL_TESS_CONTROL_PROGRAM_NV, GL_TESS_EVALUATION_PROGRAM_NV,
- GL_GEOMETRY_PROGRAM_NV, GL_FRAGMENT_PROGRAM_NV,
- };
- const auto& cbufs{maxwell3d.state.shader_stages[stage_index]};
- const auto& entries{shader->GetEntries().global_memory_entries};
-
- std::array<BindlessSSBO, 32> ssbos;
- ASSERT(entries.size() < ssbos.size());
-
- const bool assembly_shaders = device.UseAssemblyShaders();
- u32 binding = assembly_shaders ? 0 : device.GetBaseBindings(stage_index).shader_storage_buffer;
- for (const auto& entry : entries) {
- const GPUVAddr addr{cbufs.const_buffers[entry.cbuf_index].address + entry.cbuf_offset};
- const GPUVAddr gpu_addr{gpu_memory.Read<u64>(addr)};
- const u32 size{gpu_memory.Read<u32>(addr + 8)};
- SetupGlobalMemory(binding, entry, gpu_addr, size, &ssbos[binding]);
- ++binding;
- }
- if (assembly_shaders) {
- UpdateBindlessSSBOs(TARGET_LUT[stage_index], ssbos.data(), entries.size());
- }
-}
-
-void RasterizerOpenGL::SetupComputeGlobalMemory(Shader* kernel) {
- const auto& cbufs{kepler_compute.launch_description.const_buffer_config};
- const auto& entries{kernel->GetEntries().global_memory_entries};
-
- std::array<BindlessSSBO, 32> ssbos;
- ASSERT(entries.size() < ssbos.size());
-
- u32 binding = 0;
- for (const auto& entry : entries) {
- const GPUVAddr addr{cbufs[entry.cbuf_index].Address() + entry.cbuf_offset};
- const GPUVAddr gpu_addr{gpu_memory.Read<u64>(addr)};
- const u32 size{gpu_memory.Read<u32>(addr + 8)};
- SetupGlobalMemory(binding, entry, gpu_addr, size, &ssbos[binding]);
- ++binding;
- }
- if (device.UseAssemblyShaders()) {
- UpdateBindlessSSBOs(GL_COMPUTE_PROGRAM_NV, ssbos.data(), ssbos.size());
- }
-}
-
-void RasterizerOpenGL::SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry,
- GPUVAddr gpu_addr, size_t size, BindlessSSBO* ssbo) {
- const size_t alignment{device.GetShaderStorageBufferAlignment()};
- const auto info = buffer_cache.UploadMemory(gpu_addr, size, alignment, entry.is_written);
- if (device.UseAssemblyShaders()) {
- *ssbo = BindlessSSBO{
- .address = static_cast<GLuint64EXT>(info.address + info.offset),
- .length = static_cast<GLsizei>(size),
- .padding = 0,
- };
- } else {
- glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, info.handle, info.offset,
- static_cast<GLsizeiptr>(size));
- }
-}
-
void RasterizerOpenGL::SetupDrawTextures(const Shader* shader, size_t stage_index) {
const bool via_header_index =
maxwell3d.regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex;
@@ -1128,6 +802,30 @@ void RasterizerOpenGL::SetupComputeImages(const Shader* shader) {
}
}
+void RasterizerOpenGL::SyncState() {
+ SyncViewport();
+ SyncRasterizeEnable();
+ SyncPolygonModes();
+ SyncColorMask();
+ SyncFragmentColorClampState();
+ SyncMultiSampleState();
+ SyncDepthTestState();
+ SyncDepthClamp();
+ SyncStencilTestState();
+ SyncBlendState();
+ SyncLogicOpState();
+ SyncCullMode();
+ SyncPrimitiveRestart();
+ SyncScissorTest();
+ SyncPointState();
+ SyncLineState();
+ SyncPolygonOffset();
+ SyncAlphaTest();
+ SyncFramebufferSRGB();
+ SyncVertexFormats();
+ SyncVertexInstances();
+}
+
void RasterizerOpenGL::SyncViewport() {
auto& flags = maxwell3d.dirty.flags;
const auto& regs = maxwell3d.regs;
@@ -1163,9 +861,11 @@ void RasterizerOpenGL::SyncViewport() {
if (regs.screen_y_control.y_negate != 0) {
flip_y = !flip_y;
}
- glClipControl(flip_y ? GL_UPPER_LEFT : GL_LOWER_LEFT,
- regs.depth_mode == Maxwell::DepthMode::ZeroToOne ? GL_ZERO_TO_ONE
- : GL_NEGATIVE_ONE_TO_ONE);
+ const bool is_zero_to_one = regs.depth_mode == Maxwell::DepthMode::ZeroToOne;
+ const GLenum origin = flip_y ? GL_UPPER_LEFT : GL_LOWER_LEFT;
+ const GLenum depth = is_zero_to_one ? GL_ZERO_TO_ONE : GL_NEGATIVE_ONE_TO_ONE;
+ state_tracker.ClipControl(origin, depth);
+ state_tracker.SetYNegate(regs.screen_y_control.y_negate != 0);
}
if (dirty_viewport) {
@@ -1649,36 +1349,13 @@ void RasterizerOpenGL::BeginTransformFeedback(GLenum primitive_mode) {
if (regs.tfb_enabled == 0) {
return;
}
-
if (device.UseAssemblyShaders()) {
SyncTransformFeedback();
}
-
UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationControl) ||
regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationEval) ||
regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::Geometry));
-
- for (std::size_t index = 0; index < Maxwell::NumTransformFeedbackBuffers; ++index) {
- const auto& binding = regs.tfb_bindings[index];
- if (!binding.buffer_enable) {
- if (enabled_transform_feedback_buffers[index]) {
- glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER, static_cast<GLuint>(index), 0, 0,
- 0);
- }
- enabled_transform_feedback_buffers[index] = false;
- continue;
- }
- enabled_transform_feedback_buffers[index] = true;
-
- auto& tfb_buffer = transform_feedback_buffers[index];
- tfb_buffer.Create();
-
- const GLuint handle = tfb_buffer.handle;
- const std::size_t size = binding.buffer_size;
- glNamedBufferData(handle, static_cast<GLsizeiptr>(size), nullptr, GL_STREAM_COPY);
- glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER, static_cast<GLuint>(index), handle, 0,
- static_cast<GLsizeiptr>(size));
- }
+ UNIMPLEMENTED_IF(primitive_mode != GL_POINTS);
// We may have to call BeginTransformFeedbackNV here since they seem to call different
// implementations on Nvidia's driver (the pointer is different) but we are using
@@ -1692,23 +1369,7 @@ void RasterizerOpenGL::EndTransformFeedback() {
if (regs.tfb_enabled == 0) {
return;
}
-
glEndTransformFeedback();
-
- for (std::size_t index = 0; index < Maxwell::NumTransformFeedbackBuffers; ++index) {
- const auto& binding = regs.tfb_bindings[index];
- if (!binding.buffer_enable) {
- continue;
- }
- UNIMPLEMENTED_IF(binding.buffer_offset != 0);
-
- const GLuint handle = transform_feedback_buffers[index].handle;
- const GPUVAddr gpu_addr = binding.Address();
- const std::size_t size = binding.buffer_size;
- const auto info = buffer_cache.UploadMemory(gpu_addr, size, 4, true);
- glCopyNamedBufferSubData(handle, info.handle, 0, info.offset,
- static_cast<GLsizeiptr>(size));
- }
}
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 82e03e677..3745cf637 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -30,7 +30,6 @@
#include "video_core/renderer_opengl/gl_shader_decompiler.h"
#include "video_core/renderer_opengl/gl_shader_manager.h"
#include "video_core/renderer_opengl/gl_state_tracker.h"
-#include "video_core/renderer_opengl/gl_stream_buffer.h"
#include "video_core/renderer_opengl/gl_texture_cache.h"
#include "video_core/shader/async_shaders.h"
#include "video_core/textures/texture.h"
@@ -72,6 +71,7 @@ public:
void DispatchCompute(GPUVAddr code_addr) override;
void ResetCounter(VideoCore::QueryType type) override;
void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override;
+ void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override;
void FlushAll() override;
void FlushRegion(VAddr addr, u64 size) override;
bool MustFlushRegion(VAddr addr, u64 size) override;
@@ -119,27 +119,6 @@ private:
void BindTextures(const ShaderEntries& entries, GLuint base_texture, GLuint base_image,
size_t& image_view_index, size_t& texture_index, size_t& image_index);
- /// Configures the current constbuffers to use for the draw command.
- void SetupDrawConstBuffers(std::size_t stage_index, Shader* shader);
-
- /// Configures the current constbuffers to use for the kernel invocation.
- void SetupComputeConstBuffers(Shader* kernel);
-
- /// Configures a constant buffer.
- void SetupConstBuffer(GLenum stage, u32 binding, const Tegra::Engines::ConstBufferInfo& buffer,
- const ConstBufferEntry& entry, bool use_unified,
- std::size_t unified_offset);
-
- /// Configures the current global memory entries to use for the draw command.
- void SetupDrawGlobalMemory(std::size_t stage_index, Shader* shader);
-
- /// Configures the current global memory entries to use for the kernel invocation.
- void SetupComputeGlobalMemory(Shader* kernel);
-
- /// Configures a global memory buffer.
- void SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry, GPUVAddr gpu_addr,
- size_t size, BindlessSSBO* ssbo);
-
/// Configures the current textures to use for the draw command.
void SetupDrawTextures(const Shader* shader, size_t stage_index);
@@ -152,6 +131,9 @@ private:
/// Configures images in a compute shader.
void SetupComputeImages(const Shader* shader);
+ /// Syncs state to match guest's
+ void SyncState();
+
/// Syncs the viewport and depth range to match the guest state
void SyncViewport();
@@ -215,6 +197,12 @@ private:
/// Syncs the framebuffer sRGB state to match the guest state
void SyncFramebufferSRGB();
+ /// Syncs vertex formats to match the guest state
+ void SyncVertexFormats();
+
+ /// Syncs vertex instances to match the guest state
+ void SyncVertexInstances();
+
/// Syncs transform feedback state to match guest state
/// @note Only valid on assembly shaders
void SyncTransformFeedback();
@@ -225,19 +213,7 @@ private:
/// End a transform feedback
void EndTransformFeedback();
- std::size_t CalculateVertexArraysSize() const;
-
- std::size_t CalculateIndexBufferSize() const;
-
- /// Updates the current vertex format
- void SetupVertexFormat();
-
- void SetupVertexBuffer();
- void SetupVertexInstances();
-
- GLintptr SetupIndexBuffer();
-
- void SetupShaders();
+ void SetupShaders(bool is_indexed);
Tegra::GPU& gpu;
Tegra::Engines::Maxwell3D& maxwell3d;
@@ -249,12 +225,12 @@ private:
ProgramManager& program_manager;
StateTracker& state_tracker;
- OGLStreamBuffer stream_buffer;
TextureCacheRuntime texture_cache_runtime;
TextureCache texture_cache;
+ BufferCacheRuntime buffer_cache_runtime;
+ BufferCache buffer_cache;
ShaderCacheOpenGL shader_cache;
QueryCache query_cache;
- OGLBufferCache buffer_cache;
FenceManagerOpenGL fence_manager;
VideoCommon::Shader::AsyncShaders async_shaders;
@@ -262,20 +238,8 @@ private:
boost::container::static_vector<u32, MAX_IMAGE_VIEWS> image_view_indices;
std::array<ImageViewId, MAX_IMAGE_VIEWS> image_view_ids;
boost::container::static_vector<GLuint, MAX_TEXTURES> sampler_handles;
- std::array<GLuint, MAX_TEXTURES> texture_handles;
- std::array<GLuint, MAX_IMAGES> image_handles;
-
- std::array<OGLBuffer, Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers>
- transform_feedback_buffers;
- std::bitset<Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers>
- enabled_transform_feedback_buffers;
-
- static constexpr std::size_t NUM_CONSTANT_BUFFERS =
- Tegra::Engines::Maxwell3D::Regs::MaxConstBuffers *
- Tegra::Engines::Maxwell3D::Regs::MaxShaderProgram;
- std::array<GLuint, NUM_CONSTANT_BUFFERS> staging_cbufs{};
- std::size_t current_cbuf = 0;
- OGLBuffer unified_uniform_buffer;
+ std::array<GLuint, MAX_TEXTURES> texture_handles{};
+ std::array<GLuint, MAX_IMAGES> image_handles{};
/// Number of commands queued to the OpenGL driver. Resetted on flush.
std::size_t num_queued_commands = 0;
diff --git a/src/video_core/renderer_opengl/gl_resource_manager.cpp b/src/video_core/renderer_opengl/gl_resource_manager.cpp
index 0e34a0f20..3428e5e21 100644
--- a/src/video_core/renderer_opengl/gl_resource_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_resource_manager.cpp
@@ -171,12 +171,6 @@ void OGLBuffer::Release() {
handle = 0;
}
-void OGLBuffer::MakeStreamCopy(std::size_t buffer_size) {
- ASSERT_OR_EXECUTE((handle != 0 && buffer_size != 0), { return; });
-
- glNamedBufferData(handle, buffer_size, nullptr, GL_STREAM_COPY);
-}
-
void OGLSync::Create() {
if (handle != 0)
return;
diff --git a/src/video_core/renderer_opengl/gl_resource_manager.h b/src/video_core/renderer_opengl/gl_resource_manager.h
index f48398669..552d79db4 100644
--- a/src/video_core/renderer_opengl/gl_resource_manager.h
+++ b/src/video_core/renderer_opengl/gl_resource_manager.h
@@ -234,9 +234,6 @@ public:
/// Deletes the internal OpenGL resource
void Release();
- // Converts the buffer into a stream copy buffer with a fixed size
- void MakeStreamCopy(std::size_t buffer_size);
-
GLuint handle = 0;
};
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index c35b71b6b..ac78d344c 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -64,7 +64,7 @@ using TextureIR = std::variant<TextureOffset, TextureDerivates, TextureArgument>
constexpr u32 MAX_CONSTBUFFER_SCALARS = static_cast<u32>(Maxwell::MaxConstBufferSize) / sizeof(u32);
constexpr u32 MAX_CONSTBUFFER_ELEMENTS = MAX_CONSTBUFFER_SCALARS / sizeof(u32);
-constexpr std::string_view CommonDeclarations = R"(#define ftoi floatBitsToInt
+constexpr std::string_view COMMON_DECLARATIONS = R"(#define ftoi floatBitsToInt
#define ftou floatBitsToUint
#define itof intBitsToFloat
#define utof uintBitsToFloat
@@ -77,10 +77,6 @@ bvec2 HalfFloatNanComparison(bvec2 comparison, vec2 pair1, vec2 pair2) {{
const float fswzadd_modifiers_a[] = float[4](-1.0f, 1.0f, -1.0f, 0.0f );
const float fswzadd_modifiers_b[] = float[4](-1.0f, -1.0f, 1.0f, -1.0f );
-
-layout (std140, binding = {}) uniform vs_config {{
- float y_direction;
-}};
)";
class ShaderWriter final {
@@ -402,13 +398,6 @@ std::string FlowStackTopName(MetaStackClass stack) {
return fmt::format("{}_flow_stack_top", GetFlowStackPrefix(stack));
}
-bool UseUnifiedUniforms(const Device& device, const ShaderIR& ir, ShaderType stage) {
- const u32 num_ubos = static_cast<u32>(ir.GetConstantBuffers().size());
- // We waste one UBO for emulation
- const u32 num_available_ubos = device.GetMaxUniformBuffers(stage) - 1;
- return num_ubos > num_available_ubos;
-}
-
struct GenericVaryingDescription {
std::string name;
u8 first_element = 0;
@@ -420,9 +409,8 @@ public:
explicit GLSLDecompiler(const Device& device_, const ShaderIR& ir_, const Registry& registry_,
ShaderType stage_, std::string_view identifier_,
std::string_view suffix_)
- : device{device_}, ir{ir_}, registry{registry_}, stage{stage_}, identifier{identifier_},
- suffix{suffix_}, header{ir.GetHeader()}, use_unified_uniforms{
- UseUnifiedUniforms(device_, ir_, stage_)} {
+ : device{device_}, ir{ir_}, registry{registry_}, stage{stage_},
+ identifier{identifier_}, suffix{suffix_}, header{ir.GetHeader()} {
if (stage != ShaderType::Compute) {
transform_feedback = BuildTransformFeedback(registry.GetGraphicsInfo());
}
@@ -516,7 +504,8 @@ private:
if (!identifier.empty()) {
code.AddLine("// {}", identifier);
}
- code.AddLine("#version 440 {}", ir.UsesLegacyVaryings() ? "compatibility" : "core");
+ const bool use_compatibility = ir.UsesLegacyVaryings() || ir.UsesYNegate();
+ code.AddLine("#version 440 {}", use_compatibility ? "compatibility" : "core");
code.AddLine("#extension GL_ARB_separate_shader_objects : enable");
if (device.HasShaderBallot()) {
code.AddLine("#extension GL_ARB_shader_ballot : require");
@@ -542,7 +531,7 @@ private:
code.AddNewLine();
- code.AddLine(CommonDeclarations, EmulationUniformBlockBinding);
+ code.AddLine(COMMON_DECLARATIONS);
}
void DeclareVertex() {
@@ -865,17 +854,6 @@ private:
}
void DeclareConstantBuffers() {
- if (use_unified_uniforms) {
- const u32 binding = device.GetBaseBindings(stage).shader_storage_buffer +
- static_cast<u32>(ir.GetGlobalMemory().size());
- code.AddLine("layout (std430, binding = {}) readonly buffer UnifiedUniforms {{",
- binding);
- code.AddLine(" uint cbufs[];");
- code.AddLine("}};");
- code.AddNewLine();
- return;
- }
-
u32 binding = device.GetBaseBindings(stage).uniform_buffer;
for (const auto& [index, info] : ir.GetConstantBuffers()) {
const u32 num_elements = Common::DivCeil(info.GetSize(), 4 * sizeof(u32));
@@ -1081,29 +1059,17 @@ private:
if (const auto cbuf = std::get_if<CbufNode>(&*node)) {
const Node offset = cbuf->GetOffset();
- const u32 base_unified_offset = cbuf->GetIndex() * MAX_CONSTBUFFER_SCALARS;
if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) {
// Direct access
const u32 offset_imm = immediate->GetValue();
ASSERT_MSG(offset_imm % 4 == 0, "Unaligned cbuf direct access");
- if (use_unified_uniforms) {
- return {fmt::format("cbufs[{}]", base_unified_offset + offset_imm / 4),
- Type::Uint};
- } else {
- return {fmt::format("{}[{}][{}]", GetConstBuffer(cbuf->GetIndex()),
- offset_imm / (4 * 4), (offset_imm / 4) % 4),
- Type::Uint};
- }
- }
-
- // Indirect access
- if (use_unified_uniforms) {
- return {fmt::format("cbufs[{} + ({} >> 2)]", base_unified_offset,
- Visit(offset).AsUint()),
+ return {fmt::format("{}[{}][{}]", GetConstBuffer(cbuf->GetIndex()),
+ offset_imm / (4 * 4), (offset_imm / 4) % 4),
Type::Uint};
}
+ // Indirect access
const std::string final_offset = code.GenerateTemporary();
code.AddLine("uint {} = {} >> 2;", final_offset, Visit(offset).AsUint());
@@ -2293,7 +2259,6 @@ private:
}
}
}
-
if (header.ps.omap.depth) {
// The depth output is always 2 registers after the last color output, and current_reg
// already contains one past the last color register.
@@ -2337,7 +2302,8 @@ private:
}
Expression YNegate(Operation operation) {
- return {"y_direction", Type::Float};
+ // Y_NEGATE is mapped to this uniform value
+ return {"gl_FrontMaterial.ambient.a", Type::Float};
}
template <u32 element>
@@ -2787,7 +2753,6 @@ private:
const std::string_view identifier;
const std::string_view suffix;
const Header header;
- const bool use_unified_uniforms;
std::unordered_map<u8, VaryingTFB> transform_feedback;
ShaderWriter code;
@@ -3003,8 +2968,10 @@ ShaderEntries MakeEntries(const Device& device, const ShaderIR& ir, ShaderType s
for (std::size_t i = 0; i < std::size(clip_distances); ++i) {
entries.clip_distances = (clip_distances[i] ? 1U : 0U) << i;
}
+ for (const auto& buffer : entries.const_buffers) {
+ entries.enabled_uniform_buffers |= 1U << buffer.GetIndex();
+ }
entries.shader_length = ir.GetLength();
- entries.use_unified_uniforms = UseUnifiedUniforms(device, ir, stage);
return entries;
}
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h
index be68994bb..0397a000c 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.h
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h
@@ -55,7 +55,7 @@ struct ShaderEntries {
std::vector<ImageEntry> images;
std::size_t shader_length{};
u32 clip_distances{};
- bool use_unified_uniforms{};
+ u32 enabled_uniform_buffers{};
};
ShaderEntries MakeEntries(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
diff --git a/src/video_core/renderer_opengl/gl_state_tracker.cpp b/src/video_core/renderer_opengl/gl_state_tracker.cpp
index 60e6fa39f..dbdf5230f 100644
--- a/src/video_core/renderer_opengl/gl_state_tracker.cpp
+++ b/src/video_core/renderer_opengl/gl_state_tracker.cpp
@@ -36,16 +36,10 @@ void SetupDirtyColorMasks(Tables& tables) {
FillBlock(tables[1], OFF(color_mask), NUM(color_mask), ColorMasks);
}
-void SetupDirtyVertexArrays(Tables& tables) {
- static constexpr std::size_t num_array = 3;
+void SetupDirtyVertexInstances(Tables& tables) {
static constexpr std::size_t instance_base_offset = 3;
for (std::size_t i = 0; i < Regs::NumVertexArrays; ++i) {
const std::size_t array_offset = OFF(vertex_array) + i * NUM(vertex_array[0]);
- const std::size_t limit_offset = OFF(vertex_array_limit) + i * NUM(vertex_array_limit[0]);
-
- FillBlock(tables, array_offset, num_array, VertexBuffer0 + i, VertexBuffers);
- FillBlock(tables, limit_offset, NUM(vertex_array_limit), VertexBuffer0 + i, VertexBuffers);
-
const std::size_t instance_array_offset = array_offset + instance_base_offset;
tables[0][instance_array_offset] = static_cast<u8>(VertexInstance0 + i);
tables[1][instance_array_offset] = VertexInstances;
@@ -217,11 +211,11 @@ void SetupDirtyMisc(Tables& tables) {
StateTracker::StateTracker(Tegra::GPU& gpu) : flags{gpu.Maxwell3D().dirty.flags} {
auto& dirty = gpu.Maxwell3D().dirty;
auto& tables = dirty.tables;
- SetupDirtyRenderTargets(tables);
+ SetupDirtyFlags(tables);
SetupDirtyColorMasks(tables);
SetupDirtyViewports(tables);
SetupDirtyScissors(tables);
- SetupDirtyVertexArrays(tables);
+ SetupDirtyVertexInstances(tables);
SetupDirtyVertexFormat(tables);
SetupDirtyShaders(tables);
SetupDirtyPolygonModes(tables);
@@ -241,19 +235,6 @@ StateTracker::StateTracker(Tegra::GPU& gpu) : flags{gpu.Maxwell3D().dirty.flags}
SetupDirtyClipControl(tables);
SetupDirtyDepthClampEnabled(tables);
SetupDirtyMisc(tables);
-
- auto& store = dirty.on_write_stores;
- store[VertexBuffers] = true;
- for (std::size_t i = 0; i < Regs::NumVertexArrays; ++i) {
- store[VertexBuffer0 + i] = true;
- }
-}
-
-void StateTracker::InvalidateStreamBuffer() {
- flags[Dirty::VertexBuffers] = true;
- for (int index = Dirty::VertexBuffer0; index <= Dirty::VertexBuffer31; ++index) {
- flags[index] = true;
- }
}
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_state_tracker.h b/src/video_core/renderer_opengl/gl_state_tracker.h
index 574615d3c..94c905116 100644
--- a/src/video_core/renderer_opengl/gl_state_tracker.h
+++ b/src/video_core/renderer_opengl/gl_state_tracker.h
@@ -28,10 +28,6 @@ enum : u8 {
VertexFormat0,
VertexFormat31 = VertexFormat0 + 31,
- VertexBuffers,
- VertexBuffer0,
- VertexBuffer31 = VertexBuffer0 + 31,
-
VertexInstances,
VertexInstance0,
VertexInstance31 = VertexInstance0 + 31,
@@ -92,8 +88,6 @@ class StateTracker {
public:
explicit StateTracker(Tegra::GPU& gpu);
- void InvalidateStreamBuffer();
-
void BindIndexBuffer(GLuint new_index_buffer) {
if (index_buffer == new_index_buffer) {
return;
@@ -110,13 +104,32 @@ public:
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer);
}
+ void ClipControl(GLenum new_origin, GLenum new_depth) {
+ if (new_origin == origin && new_depth == depth) {
+ return;
+ }
+ origin = new_origin;
+ depth = new_depth;
+ glClipControl(origin, depth);
+ }
+
+ void SetYNegate(bool new_y_negate) {
+ if (new_y_negate == y_negate) {
+ return;
+ }
+ // Y_NEGATE is mapped to gl_FrontMaterial.ambient.a
+ y_negate = new_y_negate;
+ const std::array ambient{0.0f, 0.0f, 0.0f, y_negate ? -1.0f : 1.0f};
+ glMaterialfv(GL_FRONT, GL_AMBIENT, ambient.data());
+ }
+
void NotifyScreenDrawVertexArray() {
flags[OpenGL::Dirty::VertexFormats] = true;
flags[OpenGL::Dirty::VertexFormat0 + 0] = true;
flags[OpenGL::Dirty::VertexFormat0 + 1] = true;
- flags[OpenGL::Dirty::VertexBuffers] = true;
- flags[OpenGL::Dirty::VertexBuffer0] = true;
+ flags[VideoCommon::Dirty::VertexBuffers] = true;
+ flags[VideoCommon::Dirty::VertexBuffer0] = true;
flags[OpenGL::Dirty::VertexInstances] = true;
flags[OpenGL::Dirty::VertexInstance0 + 0] = true;
@@ -202,6 +215,9 @@ private:
GLuint framebuffer = 0;
GLuint index_buffer = 0;
+ GLenum origin = GL_LOWER_LEFT;
+ GLenum depth = GL_NEGATIVE_ONE_TO_ONE;
+ bool y_negate = false;
};
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.cpp b/src/video_core/renderer_opengl/gl_stream_buffer.cpp
index e0819cdf2..bfb992a79 100644
--- a/src/video_core/renderer_opengl/gl_stream_buffer.cpp
+++ b/src/video_core/renderer_opengl/gl_stream_buffer.cpp
@@ -1,70 +1,64 @@
-// Copyright 2018 Citra Emulator Project
+// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
-#include <tuple>
-#include <vector>
+#include <array>
+#include <memory>
+#include <span>
+
+#include <glad/glad.h>
#include "common/alignment.h"
#include "common/assert.h"
-#include "common/microprofile.h"
-#include "video_core/renderer_opengl/gl_device.h"
-#include "video_core/renderer_opengl/gl_state_tracker.h"
#include "video_core/renderer_opengl/gl_stream_buffer.h"
-MICROPROFILE_DEFINE(OpenGL_StreamBuffer, "OpenGL", "Stream Buffer Orphaning",
- MP_RGB(128, 128, 192));
-
namespace OpenGL {
-OGLStreamBuffer::OGLStreamBuffer(const Device& device, StateTracker& state_tracker_)
- : state_tracker{state_tracker_} {
- gl_buffer.Create();
-
- static constexpr GLbitfield flags = GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT;
- glNamedBufferStorage(gl_buffer.handle, BUFFER_SIZE, nullptr, flags);
- mapped_ptr = static_cast<u8*>(
- glMapNamedBufferRange(gl_buffer.handle, 0, BUFFER_SIZE, flags | GL_MAP_FLUSH_EXPLICIT_BIT));
-
- if (device.UseAssemblyShaders() || device.HasVertexBufferUnifiedMemory()) {
- glMakeNamedBufferResidentNV(gl_buffer.handle, GL_READ_ONLY);
- glGetNamedBufferParameterui64vNV(gl_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, &gpu_address);
+StreamBuffer::StreamBuffer() {
+ static constexpr GLenum flags = GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | GL_MAP_COHERENT_BIT;
+ buffer.Create();
+ glObjectLabel(GL_BUFFER, buffer.handle, -1, "Stream Buffer");
+ glNamedBufferStorage(buffer.handle, STREAM_BUFFER_SIZE, nullptr, flags);
+ mapped_pointer =
+ static_cast<u8*>(glMapNamedBufferRange(buffer.handle, 0, STREAM_BUFFER_SIZE, flags));
+ for (OGLSync& sync : fences) {
+ sync.Create();
}
}
-OGLStreamBuffer::~OGLStreamBuffer() {
- glUnmapNamedBuffer(gl_buffer.handle);
- gl_buffer.Release();
-}
-
-std::pair<u8*, GLintptr> OGLStreamBuffer::Map(GLsizeiptr size, GLintptr alignment) {
- ASSERT(size <= BUFFER_SIZE);
- ASSERT(alignment <= BUFFER_SIZE);
- mapped_size = size;
-
- if (alignment > 0) {
- buffer_pos = Common::AlignUp<std::size_t>(buffer_pos, alignment);
+std::pair<std::span<u8>, size_t> StreamBuffer::Request(size_t size) noexcept {
+ ASSERT(size < REGION_SIZE);
+ for (size_t region = Region(used_iterator), region_end = Region(iterator); region < region_end;
+ ++region) {
+ fences[region].Create();
}
+ used_iterator = iterator;
- if (buffer_pos + size > BUFFER_SIZE) {
- MICROPROFILE_SCOPE(OpenGL_StreamBuffer);
- glInvalidateBufferData(gl_buffer.handle);
- state_tracker.InvalidateStreamBuffer();
-
- buffer_pos = 0;
+ for (size_t region = Region(free_iterator) + 1,
+ region_end = std::min(Region(iterator + size) + 1, NUM_SYNCS);
+ region < region_end; ++region) {
+ glClientWaitSync(fences[region].handle, 0, GL_TIMEOUT_IGNORED);
+ fences[region].Release();
}
-
- return std::make_pair(mapped_ptr + buffer_pos, buffer_pos);
-}
-
-void OGLStreamBuffer::Unmap(GLsizeiptr size) {
- ASSERT(size <= mapped_size);
-
- if (size > 0) {
- glFlushMappedNamedBufferRange(gl_buffer.handle, buffer_pos, size);
+ if (iterator + size > free_iterator) {
+ free_iterator = iterator + size;
}
-
- buffer_pos += size;
+ if (iterator + size > STREAM_BUFFER_SIZE) {
+ for (size_t region = Region(used_iterator); region < NUM_SYNCS; ++region) {
+ fences[region].Create();
+ }
+ used_iterator = 0;
+ iterator = 0;
+ free_iterator = size;
+
+ for (size_t region = 0, region_end = Region(size); region <= region_end; ++region) {
+ glClientWaitSync(fences[region].handle, 0, GL_TIMEOUT_IGNORED);
+ fences[region].Release();
+ }
+ }
+ const size_t offset = iterator;
+ iterator = Common::AlignUp(iterator + size, MAX_ALIGNMENT);
+ return {std::span(mapped_pointer + offset, size), offset};
}
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.h b/src/video_core/renderer_opengl/gl_stream_buffer.h
index dd9cf67eb..6dbb6bfba 100644
--- a/src/video_core/renderer_opengl/gl_stream_buffer.h
+++ b/src/video_core/renderer_opengl/gl_stream_buffer.h
@@ -1,9 +1,12 @@
-// Copyright 2018 Citra Emulator Project
+// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
+#include <array>
+#include <memory>
+#include <span>
#include <utility>
#include <glad/glad.h>
@@ -13,48 +16,35 @@
namespace OpenGL {
-class Device;
-class StateTracker;
+class StreamBuffer {
+ static constexpr size_t STREAM_BUFFER_SIZE = 64 * 1024 * 1024;
+ static constexpr size_t NUM_SYNCS = 16;
+ static constexpr size_t REGION_SIZE = STREAM_BUFFER_SIZE / NUM_SYNCS;
+ static constexpr size_t MAX_ALIGNMENT = 256;
+ static_assert(STREAM_BUFFER_SIZE % MAX_ALIGNMENT == 0);
+ static_assert(STREAM_BUFFER_SIZE % NUM_SYNCS == 0);
+ static_assert(REGION_SIZE % MAX_ALIGNMENT == 0);
-class OGLStreamBuffer : private NonCopyable {
public:
- explicit OGLStreamBuffer(const Device& device, StateTracker& state_tracker_);
- ~OGLStreamBuffer();
-
- /*
- * Allocates a linear chunk of memory in the GPU buffer with at least "size" bytes
- * and the optional alignment requirement.
- * If the buffer is full, the whole buffer is reallocated which invalidates old chunks.
- * The return values are the pointer to the new chunk, and the offset within the buffer.
- * The actual used size must be specified on unmapping the chunk.
- */
- std::pair<u8*, GLintptr> Map(GLsizeiptr size, GLintptr alignment = 0);
-
- void Unmap(GLsizeiptr size);
-
- GLuint Handle() const {
- return gl_buffer.handle;
- }
+ explicit StreamBuffer();
- u64 Address() const {
- return gpu_address;
- }
+ [[nodiscard]] std::pair<std::span<u8>, size_t> Request(size_t size) noexcept;
- GLsizeiptr Size() const noexcept {
- return BUFFER_SIZE;
+ [[nodiscard]] GLuint Handle() const noexcept {
+ return buffer.handle;
}
private:
- static constexpr GLsizeiptr BUFFER_SIZE = 256 * 1024 * 1024;
-
- StateTracker& state_tracker;
-
- OGLBuffer gl_buffer;
+ [[nodiscard]] static size_t Region(size_t offset) noexcept {
+ return offset / REGION_SIZE;
+ }
- GLuint64EXT gpu_address = 0;
- GLintptr buffer_pos = 0;
- GLsizeiptr mapped_size = 0;
- u8* mapped_ptr = nullptr;
+ size_t iterator = 0;
+ size_t used_iterator = 0;
+ size_t free_iterator = 0;
+ u8* mapped_pointer = nullptr;
+ OGLBuffer buffer;
+ std::array<OGLSync, NUM_SYNCS> fences;
};
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp
index 546cb6d00..31eb54123 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp
@@ -398,9 +398,6 @@ void AttachTexture(GLuint fbo, GLenum attachment, const ImageView* image_view) {
} // Anonymous namespace
-ImageBufferMap::ImageBufferMap(GLuint handle_, u8* map, size_t size, OGLSync* sync_)
- : span(map, size), sync{sync_}, handle{handle_} {}
-
ImageBufferMap::~ImageBufferMap() {
if (sync) {
sync->Create();
@@ -487,11 +484,11 @@ void TextureCacheRuntime::Finish() {
glFinish();
}
-ImageBufferMap TextureCacheRuntime::MapUploadBuffer(size_t size) {
+ImageBufferMap TextureCacheRuntime::UploadStagingBuffer(size_t size) {
return upload_buffers.RequestMap(size, true);
}
-ImageBufferMap TextureCacheRuntime::MapDownloadBuffer(size_t size) {
+ImageBufferMap TextureCacheRuntime::DownloadStagingBuffer(size_t size) {
return download_buffers.RequestMap(size, false);
}
@@ -553,15 +550,14 @@ void TextureCacheRuntime::BlitFramebuffer(Framebuffer* dst, Framebuffer* src,
}
void TextureCacheRuntime::AccelerateImageUpload(Image& image, const ImageBufferMap& map,
- size_t buffer_offset,
std::span<const SwizzleParameters> swizzles) {
switch (image.info.type) {
case ImageType::e2D:
- return util_shaders.BlockLinearUpload2D(image, map, buffer_offset, swizzles);
+ return util_shaders.BlockLinearUpload2D(image, map, swizzles);
case ImageType::e3D:
- return util_shaders.BlockLinearUpload3D(image, map, buffer_offset, swizzles);
+ return util_shaders.BlockLinearUpload3D(image, map, swizzles);
case ImageType::Linear:
- return util_shaders.PitchUpload(image, map, buffer_offset, swizzles);
+ return util_shaders.PitchUpload(image, map, swizzles);
default:
UNREACHABLE();
break;
@@ -596,7 +592,11 @@ ImageBufferMap TextureCacheRuntime::StagingBuffers::RequestMap(size_t requested_
bool insert_fence) {
const size_t index = RequestBuffer(requested_size);
OGLSync* const sync = insert_fence ? &syncs[index] : nullptr;
- return ImageBufferMap(buffers[index].handle, maps[index], requested_size, sync);
+ return ImageBufferMap{
+ .mapped_span = std::span(maps[index], requested_size),
+ .sync = sync,
+ .buffer = buffers[index].handle,
+ };
}
size_t TextureCacheRuntime::StagingBuffers::RequestBuffer(size_t requested_size) {
@@ -709,10 +709,10 @@ Image::Image(TextureCacheRuntime& runtime, const VideoCommon::ImageInfo& info_,
}
}
-void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset,
+void Image::UploadMemory(const ImageBufferMap& map,
std::span<const VideoCommon::BufferImageCopy> copies) {
- glBindBuffer(GL_PIXEL_UNPACK_BUFFER, map.Handle());
- glFlushMappedBufferRange(GL_PIXEL_UNPACK_BUFFER, buffer_offset, unswizzled_size_bytes);
+ glBindBuffer(GL_PIXEL_UNPACK_BUFFER, map.buffer);
+ glFlushMappedBufferRange(GL_PIXEL_UNPACK_BUFFER, map.offset, unswizzled_size_bytes);
glPixelStorei(GL_UNPACK_ALIGNMENT, 1);
@@ -728,23 +728,23 @@ void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset,
current_image_height = copy.buffer_image_height;
glPixelStorei(GL_UNPACK_IMAGE_HEIGHT, current_image_height);
}
- CopyBufferToImage(copy, buffer_offset);
+ CopyBufferToImage(copy, map.offset);
}
}
-void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset,
+void Image::UploadMemory(const ImageBufferMap& map,
std::span<const VideoCommon::BufferCopy> copies) {
for (const VideoCommon::BufferCopy& copy : copies) {
- glCopyNamedBufferSubData(map.Handle(), buffer.handle, copy.src_offset + buffer_offset,
+ glCopyNamedBufferSubData(map.buffer, buffer.handle, copy.src_offset + map.offset,
copy.dst_offset, copy.size);
}
}
-void Image::DownloadMemory(ImageBufferMap& map, size_t buffer_offset,
+void Image::DownloadMemory(ImageBufferMap& map,
std::span<const VideoCommon::BufferImageCopy> copies) {
glMemoryBarrier(GL_PIXEL_BUFFER_BARRIER_BIT); // TODO: Move this to its own API
- glBindBuffer(GL_PIXEL_PACK_BUFFER, map.Handle());
+ glBindBuffer(GL_PIXEL_PACK_BUFFER, map.buffer);
glPixelStorei(GL_PACK_ALIGNMENT, 1);
u32 current_row_length = std::numeric_limits<u32>::max();
@@ -759,7 +759,7 @@ void Image::DownloadMemory(ImageBufferMap& map, size_t buffer_offset,
current_image_height = copy.buffer_image_height;
glPixelStorei(GL_PACK_IMAGE_HEIGHT, current_image_height);
}
- CopyImageToBuffer(copy, buffer_offset);
+ CopyImageToBuffer(copy, map.offset);
}
}
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h
index 15b7c3676..874cf54f4 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.h
+++ b/src/video_core/renderer_opengl/gl_texture_cache.h
@@ -31,23 +31,13 @@ using VideoCommon::NUM_RT;
using VideoCommon::Offset2D;
using VideoCommon::RenderTargets;
-class ImageBufferMap {
-public:
- explicit ImageBufferMap(GLuint handle, u8* map, size_t size, OGLSync* sync);
+struct ImageBufferMap {
~ImageBufferMap();
- GLuint Handle() const noexcept {
- return handle;
- }
-
- std::span<u8> Span() const noexcept {
- return span;
- }
-
-private:
- std::span<u8> span;
+ std::span<u8> mapped_span;
+ size_t offset = 0;
OGLSync* sync;
- GLuint handle;
+ GLuint buffer;
};
struct FormatProperties {
@@ -69,9 +59,9 @@ public:
void Finish();
- ImageBufferMap MapUploadBuffer(size_t size);
+ ImageBufferMap UploadStagingBuffer(size_t size);
- ImageBufferMap MapDownloadBuffer(size_t size);
+ ImageBufferMap DownloadStagingBuffer(size_t size);
void CopyImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies);
@@ -89,7 +79,7 @@ public:
Tegra::Engines::Fermi2D::Filter filter,
Tegra::Engines::Fermi2D::Operation operation);
- void AccelerateImageUpload(Image& image, const ImageBufferMap& map, size_t buffer_offset,
+ void AccelerateImageUpload(Image& image, const ImageBufferMap& map,
std::span<const VideoCommon::SwizzleParameters> swizzles);
void InsertUploadMemoryBarrier();
@@ -148,14 +138,12 @@ public:
explicit Image(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, GPUVAddr gpu_addr,
VAddr cpu_addr);
- void UploadMemory(const ImageBufferMap& map, size_t buffer_offset,
+ void UploadMemory(const ImageBufferMap& map,
std::span<const VideoCommon::BufferImageCopy> copies);
- void UploadMemory(const ImageBufferMap& map, size_t buffer_offset,
- std::span<const VideoCommon::BufferCopy> copies);
+ void UploadMemory(const ImageBufferMap& map, std::span<const VideoCommon::BufferCopy> copies);
- void DownloadMemory(ImageBufferMap& map, size_t buffer_offset,
- std::span<const VideoCommon::BufferImageCopy> copies);
+ void DownloadMemory(ImageBufferMap& map, std::span<const VideoCommon::BufferImageCopy> copies);
GLuint Handle() const noexcept {
return texture.handle;
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index 21159e498..9d2acd4d9 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -29,9 +29,7 @@
#include "video_core/textures/decoders.h"
namespace OpenGL {
-
namespace {
-
constexpr GLint PositionLocation = 0;
constexpr GLint TexCoordLocation = 1;
constexpr GLint ModelViewMatrixLocation = 0;
@@ -124,7 +122,6 @@ void APIENTRY DebugHandler(GLenum source, GLenum type, GLuint id, GLenum severit
break;
}
}
-
} // Anonymous namespace
RendererOpenGL::RendererOpenGL(Core::TelemetrySession& telemetry_session_,
@@ -132,7 +129,17 @@ RendererOpenGL::RendererOpenGL(Core::TelemetrySession& telemetry_session_,
Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu_,
std::unique_ptr<Core::Frontend::GraphicsContext> context_)
: RendererBase{emu_window_, std::move(context_)}, telemetry_session{telemetry_session_},
- emu_window{emu_window_}, cpu_memory{cpu_memory_}, gpu{gpu_}, program_manager{device} {}
+ emu_window{emu_window_}, cpu_memory{cpu_memory_}, gpu{gpu_}, state_tracker{gpu},
+ program_manager{device},
+ rasterizer(emu_window, gpu, cpu_memory, device, screen_info, program_manager, state_tracker) {
+ if (Settings::values.renderer_debug && GLAD_GL_KHR_debug) {
+ glEnable(GL_DEBUG_OUTPUT);
+ glEnable(GL_DEBUG_OUTPUT_SYNCHRONOUS);
+ glDebugMessageCallback(DebugHandler, nullptr);
+ }
+ AddTelemetryFields();
+ InitOpenGLObjects();
+}
RendererOpenGL::~RendererOpenGL() = default;
@@ -148,7 +155,7 @@ void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
++m_current_frame;
- rasterizer->TickFrame();
+ rasterizer.TickFrame();
context->SwapBuffers();
render_window.OnFrameDisplayed();
@@ -179,7 +186,7 @@ void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuf
framebuffer_crop_rect = framebuffer.crop_rect;
const VAddr framebuffer_addr{framebuffer.address + framebuffer.offset};
- if (rasterizer->AccelerateDisplay(framebuffer, framebuffer_addr, framebuffer.stride)) {
+ if (rasterizer.AccelerateDisplay(framebuffer, framebuffer_addr, framebuffer.stride)) {
return;
}
@@ -267,6 +274,7 @@ void RendererOpenGL::InitOpenGLObjects() {
// Enable unified vertex attributes and query vertex buffer address when the driver supports it
if (device.HasVertexBufferUnifiedMemory()) {
glEnableClientState(GL_VERTEX_ATTRIB_ARRAY_UNIFIED_NV);
+ glEnableClientState(GL_ELEMENT_ARRAY_UNIFIED_NV);
glMakeNamedBufferResidentNV(vertex_buffer.handle, GL_READ_ONLY);
glGetNamedBufferParameterui64vNV(vertex_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV,
@@ -289,14 +297,6 @@ void RendererOpenGL::AddTelemetryFields() {
telemetry_session.AddField(user_system, "GPU_OpenGL_Version", std::string(gl_version));
}
-void RendererOpenGL::CreateRasterizer() {
- if (rasterizer) {
- return;
- }
- rasterizer = std::make_unique<RasterizerOpenGL>(emu_window, gpu, cpu_memory, device,
- screen_info, program_manager, state_tracker);
-}
-
void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture,
const Tegra::FramebufferConfig& framebuffer) {
texture.width = framebuffer.width;
@@ -407,6 +407,7 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
program_manager.BindHostPipeline(pipeline.handle);
+ state_tracker.ClipControl(GL_LOWER_LEFT, GL_ZERO_TO_ONE);
glEnable(GL_CULL_FACE);
if (screen_info.display_srgb) {
glEnable(GL_FRAMEBUFFER_SRGB);
@@ -425,7 +426,6 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
glCullFace(GL_BACK);
glFrontFace(GL_CW);
glColorMaski(0, GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
- glClipControl(GL_LOWER_LEFT, GL_ZERO_TO_ONE);
glViewportIndexedf(0, 0.0f, 0.0f, static_cast<GLfloat>(layout.width),
static_cast<GLfloat>(layout.height));
glDepthRangeIndexed(0, 0.0, 0.0);
@@ -497,25 +497,4 @@ void RendererOpenGL::RenderScreenshot() {
renderer_settings.screenshot_requested = false;
}
-bool RendererOpenGL::Init() {
- if (Settings::values.renderer_debug && GLAD_GL_KHR_debug) {
- glEnable(GL_DEBUG_OUTPUT);
- glEnable(GL_DEBUG_OUTPUT_SYNCHRONOUS);
- glDebugMessageCallback(DebugHandler, nullptr);
- }
-
- AddTelemetryFields();
-
- if (!GLAD_GL_VERSION_4_6) {
- return false;
- }
-
- InitOpenGLObjects();
- CreateRasterizer();
-
- return true;
-}
-
-void RendererOpenGL::ShutDown() {}
-
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h
index 44e109794..cc19a110f 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.h
+++ b/src/video_core/renderer_opengl/renderer_opengl.h
@@ -10,6 +10,7 @@
#include "common/math_util.h"
#include "video_core/renderer_base.h"
#include "video_core/renderer_opengl/gl_device.h"
+#include "video_core/renderer_opengl/gl_rasterizer.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_shader_manager.h"
#include "video_core/renderer_opengl/gl_state_tracker.h"
@@ -63,18 +64,18 @@ public:
std::unique_ptr<Core::Frontend::GraphicsContext> context_);
~RendererOpenGL() override;
- bool Init() override;
- void ShutDown() override;
void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override;
+ VideoCore::RasterizerInterface* ReadRasterizer() override {
+ return &rasterizer;
+ }
+
private:
/// Initializes the OpenGL state and creates persistent objects.
void InitOpenGLObjects();
void AddTelemetryFields();
- void CreateRasterizer();
-
void ConfigureFramebufferTexture(TextureInfo& texture,
const Tegra::FramebufferConfig& framebuffer);
@@ -98,8 +99,10 @@ private:
Core::Memory::Memory& cpu_memory;
Tegra::GPU& gpu;
- const Device device;
- StateTracker state_tracker{gpu};
+ Device device;
+ StateTracker state_tracker;
+ ProgramManager program_manager;
+ RasterizerOpenGL rasterizer;
// OpenGL object IDs
OGLSampler present_sampler;
@@ -115,9 +118,6 @@ private:
/// Display information for Switch screen
ScreenInfo screen_info;
- /// Global dummy shader pipeline
- ProgramManager program_manager;
-
/// OpenGL framebuffer data
std::vector<u8> gl_framebuffer_data;
diff --git a/src/video_core/renderer_opengl/util_shaders.cpp b/src/video_core/renderer_opengl/util_shaders.cpp
index eb849cbf2..1b58e8617 100644
--- a/src/video_core/renderer_opengl/util_shaders.cpp
+++ b/src/video_core/renderer_opengl/util_shaders.cpp
@@ -63,7 +63,7 @@ UtilShaders::UtilShaders(ProgramManager& program_manager_)
UtilShaders::~UtilShaders() = default;
-void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map, size_t buffer_offset,
+void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map,
std::span<const SwizzleParameters> swizzles) {
static constexpr Extent3D WORKGROUP_SIZE{32, 32, 1};
static constexpr GLuint BINDING_SWIZZLE_BUFFER = 0;
@@ -71,13 +71,13 @@ void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map, s
static constexpr GLuint BINDING_OUTPUT_IMAGE = 0;
program_manager.BindHostCompute(block_linear_unswizzle_2d_program.handle);
- glFlushMappedNamedBufferRange(map.Handle(), buffer_offset, image.guest_size_bytes);
+ glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle);
const GLenum store_format = StoreFormat(BytesPerBlock(image.info.format));
for (const SwizzleParameters& swizzle : swizzles) {
const Extent3D num_tiles = swizzle.num_tiles;
- const size_t input_offset = swizzle.buffer_offset + buffer_offset;
+ const size_t input_offset = swizzle.buffer_offset + map.offset;
const u32 num_dispatches_x = Common::DivCeil(num_tiles.width, WORKGROUP_SIZE.width);
const u32 num_dispatches_y = Common::DivCeil(num_tiles.height, WORKGROUP_SIZE.height);
@@ -91,8 +91,8 @@ void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map, s
glUniform1ui(5, params.x_shift);
glUniform1ui(6, params.block_height);
glUniform1ui(7, params.block_height_mask);
- glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.Handle(),
- input_offset, image.guest_size_bytes - swizzle.buffer_offset);
+ glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.buffer, input_offset,
+ image.guest_size_bytes - swizzle.buffer_offset);
glBindImageTexture(BINDING_OUTPUT_IMAGE, image.Handle(), swizzle.level, GL_TRUE, 0,
GL_WRITE_ONLY, store_format);
glDispatchCompute(num_dispatches_x, num_dispatches_y, image.info.resources.layers);
@@ -100,7 +100,7 @@ void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map, s
program_manager.RestoreGuestCompute();
}
-void UtilShaders::BlockLinearUpload3D(Image& image, const ImageBufferMap& map, size_t buffer_offset,
+void UtilShaders::BlockLinearUpload3D(Image& image, const ImageBufferMap& map,
std::span<const SwizzleParameters> swizzles) {
static constexpr Extent3D WORKGROUP_SIZE{16, 8, 8};
@@ -108,14 +108,14 @@ void UtilShaders::BlockLinearUpload3D(Image& image, const ImageBufferMap& map, s
static constexpr GLuint BINDING_INPUT_BUFFER = 1;
static constexpr GLuint BINDING_OUTPUT_IMAGE = 0;
- glFlushMappedNamedBufferRange(map.Handle(), buffer_offset, image.guest_size_bytes);
+ glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes);
program_manager.BindHostCompute(block_linear_unswizzle_3d_program.handle);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle);
const GLenum store_format = StoreFormat(BytesPerBlock(image.info.format));
for (const SwizzleParameters& swizzle : swizzles) {
const Extent3D num_tiles = swizzle.num_tiles;
- const size_t input_offset = swizzle.buffer_offset + buffer_offset;
+ const size_t input_offset = swizzle.buffer_offset + map.offset;
const u32 num_dispatches_x = Common::DivCeil(num_tiles.width, WORKGROUP_SIZE.width);
const u32 num_dispatches_y = Common::DivCeil(num_tiles.height, WORKGROUP_SIZE.height);
@@ -132,8 +132,8 @@ void UtilShaders::BlockLinearUpload3D(Image& image, const ImageBufferMap& map, s
glUniform1ui(7, params.block_height_mask);
glUniform1ui(8, params.block_depth);
glUniform1ui(9, params.block_depth_mask);
- glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.Handle(),
- input_offset, image.guest_size_bytes - swizzle.buffer_offset);
+ glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.buffer, input_offset,
+ image.guest_size_bytes - swizzle.buffer_offset);
glBindImageTexture(BINDING_OUTPUT_IMAGE, image.Handle(), swizzle.level, GL_TRUE, 0,
GL_WRITE_ONLY, store_format);
glDispatchCompute(num_dispatches_x, num_dispatches_y, num_dispatches_z);
@@ -141,7 +141,7 @@ void UtilShaders::BlockLinearUpload3D(Image& image, const ImageBufferMap& map, s
program_manager.RestoreGuestCompute();
}
-void UtilShaders::PitchUpload(Image& image, const ImageBufferMap& map, size_t buffer_offset,
+void UtilShaders::PitchUpload(Image& image, const ImageBufferMap& map,
std::span<const SwizzleParameters> swizzles) {
static constexpr Extent3D WORKGROUP_SIZE{32, 32, 1};
static constexpr GLuint BINDING_INPUT_BUFFER = 0;
@@ -159,7 +159,7 @@ void UtilShaders::PitchUpload(Image& image, const ImageBufferMap& map, size_t bu
"Non-power of two images are not implemented");
program_manager.BindHostCompute(pitch_unswizzle_program.handle);
- glFlushMappedNamedBufferRange(map.Handle(), buffer_offset, image.guest_size_bytes);
+ glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes);
glUniform2ui(LOC_ORIGIN, 0, 0);
glUniform2i(LOC_DESTINATION, 0, 0);
glUniform1ui(LOC_BYTES_PER_BLOCK, bytes_per_block);
@@ -167,13 +167,13 @@ void UtilShaders::PitchUpload(Image& image, const ImageBufferMap& map, size_t bu
glBindImageTexture(BINDING_OUTPUT_IMAGE, image.Handle(), 0, GL_FALSE, 0, GL_WRITE_ONLY, format);
for (const SwizzleParameters& swizzle : swizzles) {
const Extent3D num_tiles = swizzle.num_tiles;
- const size_t input_offset = swizzle.buffer_offset + buffer_offset;
+ const size_t input_offset = swizzle.buffer_offset + map.offset;
const u32 num_dispatches_x = Common::DivCeil(num_tiles.width, WORKGROUP_SIZE.width);
const u32 num_dispatches_y = Common::DivCeil(num_tiles.height, WORKGROUP_SIZE.height);
- glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.Handle(),
- input_offset, image.guest_size_bytes - swizzle.buffer_offset);
+ glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.buffer, input_offset,
+ image.guest_size_bytes - swizzle.buffer_offset);
glDispatchCompute(num_dispatches_x, num_dispatches_y, 1);
}
program_manager.RestoreGuestCompute();
diff --git a/src/video_core/renderer_opengl/util_shaders.h b/src/video_core/renderer_opengl/util_shaders.h
index 359997255..7b1d16b09 100644
--- a/src/video_core/renderer_opengl/util_shaders.h
+++ b/src/video_core/renderer_opengl/util_shaders.h
@@ -15,21 +15,22 @@
namespace OpenGL {
class Image;
-class ImageBufferMap;
class ProgramManager;
+struct ImageBufferMap;
+
class UtilShaders {
public:
explicit UtilShaders(ProgramManager& program_manager);
~UtilShaders();
- void BlockLinearUpload2D(Image& image, const ImageBufferMap& map, size_t buffer_offset,
+ void BlockLinearUpload2D(Image& image, const ImageBufferMap& map,
std::span<const VideoCommon::SwizzleParameters> swizzles);
- void BlockLinearUpload3D(Image& image, const ImageBufferMap& map, size_t buffer_offset,
+ void BlockLinearUpload3D(Image& image, const ImageBufferMap& map,
std::span<const VideoCommon::SwizzleParameters> swizzles);
- void PitchUpload(Image& image, const ImageBufferMap& map, size_t buffer_offset,
+ void PitchUpload(Image& image, const ImageBufferMap& map,
std::span<const VideoCommon::SwizzleParameters> swizzles);
void CopyBC4(Image& dst_image, Image& src_image,