summaryrefslogtreecommitdiffstats
path: root/src/video_core/renderer_opengl
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/video_core/renderer_opengl/blit_image.cpp3
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.cpp77
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.h41
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache_base.cpp9
-rw-r--r--src/video_core/renderer_opengl/gl_compute_pipeline.cpp47
-rw-r--r--src/video_core/renderer_opengl/gl_compute_pipeline.h15
-rw-r--r--src/video_core/renderer_opengl/gl_device.cpp5
-rw-r--r--src/video_core/renderer_opengl/gl_device.h14
-rw-r--r--src/video_core/renderer_opengl/gl_fence_manager.cpp4
-rw-r--r--src/video_core/renderer_opengl/gl_fence_manager.h12
-rw-r--r--src/video_core/renderer_opengl/gl_fsr.cpp101
-rw-r--r--src/video_core/renderer_opengl/gl_fsr.h43
-rw-r--r--src/video_core/renderer_opengl/gl_graphics_pipeline.cpp40
-rw-r--r--src/video_core/renderer_opengl/gl_graphics_pipeline.h7
-rw-r--r--src/video_core/renderer_opengl/gl_query_cache.cpp12
-rw-r--r--src/video_core/renderer_opengl/gl_query_cache.h6
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp83
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h20
-rw-r--r--src/video_core/renderer_opengl/gl_resource_manager.cpp10
-rw-r--r--src/video_core/renderer_opengl/gl_resource_manager.h3
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp25
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.h6
-rw-r--r--src/video_core/renderer_opengl/gl_shader_context.h7
-rw-r--r--src/video_core/renderer_opengl/gl_shader_manager.cpp13
-rw-r--r--src/video_core/renderer_opengl/gl_shader_manager.h3
-rw-r--r--src/video_core/renderer_opengl/gl_staging_buffer_pool.cpp150
-rw-r--r--src/video_core/renderer_opengl/gl_staging_buffer_pool.h95
-rw-r--r--src/video_core/renderer_opengl/gl_stream_buffer.cpp63
-rw-r--r--src/video_core/renderer_opengl/gl_stream_buffer.h51
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.cpp299
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.h82
-rw-r--r--src/video_core/renderer_opengl/maxwell_to_gl.h3
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.cpp90
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.h3
-rw-r--r--src/video_core/renderer_opengl/util_shaders.cpp42
-rw-r--r--src/video_core/renderer_opengl/util_shaders.h15
36 files changed, 1106 insertions, 393 deletions
diff --git a/src/video_core/renderer_opengl/blit_image.cpp b/src/video_core/renderer_opengl/blit_image.cpp
index 9a560a73b..3b03e8d5a 100644
--- a/src/video_core/renderer_opengl/blit_image.cpp
+++ b/src/video_core/renderer_opengl/blit_image.cpp
@@ -22,7 +22,7 @@ BlitImageHelper::~BlitImageHelper() = default;
void BlitImageHelper::BlitColor(GLuint dst_framebuffer, GLuint src_image_view, GLuint src_sampler,
const Region2D& dst_region, const Region2D& src_region,
const Extent3D& src_size) {
- glEnable(GL_CULL_FACE);
+ glDisable(GL_CULL_FACE);
glDisable(GL_COLOR_LOGIC_OP);
glDisable(GL_DEPTH_TEST);
glDisable(GL_STENCIL_TEST);
@@ -31,7 +31,6 @@ void BlitImageHelper::BlitColor(GLuint dst_framebuffer, GLuint src_image_view, G
glDisable(GL_ALPHA_TEST);
glDisablei(GL_BLEND, 0);
glPolygonMode(GL_FRONT_AND_BACK, GL_FILL);
- glCullFace(GL_BACK);
glFrontFace(GL_CW);
glColorMaski(0, GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
glDepthRangeIndexed(0, 0.0, 0.0);
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
index 6af4ae793..38d553d3c 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
@@ -106,8 +106,10 @@ GLuint Buffer::View(u32 offset, u32 size, PixelFormat format) {
return views.back().texture.handle;
}
-BufferCacheRuntime::BufferCacheRuntime(const Device& device_)
- : device{device_}, has_fast_buffer_sub_data{device.HasFastBufferSubData()},
+BufferCacheRuntime::BufferCacheRuntime(const Device& device_,
+ StagingBufferPool& staging_buffer_pool_)
+ : device{device_}, staging_buffer_pool{staging_buffer_pool_},
+ has_fast_buffer_sub_data{device.HasFastBufferSubData()},
use_assembly_shaders{device.UseAssemblyShaders()},
has_unified_vertex_buffers{device.HasVertexBufferUnifiedMemory()},
stream_buffer{has_fast_buffer_sub_data ? std::nullopt : std::make_optional<StreamBuffer>()} {
@@ -117,7 +119,7 @@ BufferCacheRuntime::BufferCacheRuntime(const Device& device_)
for (auto& stage_uniforms : fast_uniforms) {
for (OGLBuffer& buffer : stage_uniforms) {
buffer.Create();
- glNamedBufferData(buffer.handle, BufferCache::DEFAULT_SKIP_CACHE_SIZE, nullptr,
+ glNamedBufferData(buffer.handle, VideoCommon::DEFAULT_SKIP_CACHE_SIZE, nullptr,
GL_STREAM_DRAW);
}
}
@@ -140,6 +142,14 @@ BufferCacheRuntime::BufferCacheRuntime(const Device& device_)
}();
}
+StagingBufferMap BufferCacheRuntime::UploadStagingBuffer(size_t size) {
+ return staging_buffer_pool.RequestUploadBuffer(size);
+}
+
+StagingBufferMap BufferCacheRuntime::DownloadStagingBuffer(size_t size) {
+ return staging_buffer_pool.RequestDownloadBuffer(size);
+}
+
u64 BufferCacheRuntime::GetDeviceMemoryUsage() const {
if (device.CanReportMemoryUsage()) {
return device_access_memory - device.GetCurrentDedicatedVideoMemory();
@@ -147,13 +157,47 @@ u64 BufferCacheRuntime::GetDeviceMemoryUsage() const {
return 2_GiB;
}
-void BufferCacheRuntime::CopyBuffer(Buffer& dst_buffer, Buffer& src_buffer,
- std::span<const VideoCommon::BufferCopy> copies) {
+void BufferCacheRuntime::CopyBuffer(GLuint dst_buffer, GLuint src_buffer,
+ std::span<const VideoCommon::BufferCopy> copies, bool barrier) {
+ if (barrier) {
+ PreCopyBarrier();
+ }
for (const VideoCommon::BufferCopy& copy : copies) {
- glCopyNamedBufferSubData(
- src_buffer.Handle(), dst_buffer.Handle(), static_cast<GLintptr>(copy.src_offset),
- static_cast<GLintptr>(copy.dst_offset), static_cast<GLsizeiptr>(copy.size));
+ glCopyNamedBufferSubData(src_buffer, dst_buffer, static_cast<GLintptr>(copy.src_offset),
+ static_cast<GLintptr>(copy.dst_offset),
+ static_cast<GLsizeiptr>(copy.size));
}
+ if (barrier) {
+ PostCopyBarrier();
+ }
+}
+
+void BufferCacheRuntime::CopyBuffer(GLuint dst_buffer, Buffer& src_buffer,
+ std::span<const VideoCommon::BufferCopy> copies, bool barrier) {
+ CopyBuffer(dst_buffer, src_buffer.Handle(), copies, barrier);
+}
+
+void BufferCacheRuntime::CopyBuffer(Buffer& dst_buffer, GLuint src_buffer,
+ std::span<const VideoCommon::BufferCopy> copies, bool barrier) {
+ CopyBuffer(dst_buffer.Handle(), src_buffer, copies, barrier);
+}
+
+void BufferCacheRuntime::CopyBuffer(Buffer& dst_buffer, Buffer& src_buffer,
+ std::span<const VideoCommon::BufferCopy> copies) {
+ CopyBuffer(dst_buffer.Handle(), src_buffer.Handle(), copies);
+}
+
+void BufferCacheRuntime::PreCopyBarrier() {
+ // TODO: finer grained barrier?
+ glMemoryBarrier(GL_ALL_BARRIER_BITS);
+}
+
+void BufferCacheRuntime::PostCopyBarrier() {
+ glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT | GL_CLIENT_MAPPED_BUFFER_BARRIER_BIT);
+}
+
+void BufferCacheRuntime::Finish() {
+ glFinish();
}
void BufferCacheRuntime::ClearBuffer(Buffer& dest_buffer, u32 offset, size_t size, u32 value) {
@@ -188,6 +232,15 @@ void BufferCacheRuntime::BindVertexBuffer(u32 index, Buffer& buffer, u32 offset,
}
}
+void BufferCacheRuntime::BindVertexBuffers(VideoCommon::HostBindings<Buffer>& bindings) {
+ for (u32 index = 0; index < bindings.buffers.size(); ++index) {
+ BindVertexBuffer(bindings.min_index + index, *bindings.buffers[index],
+ static_cast<u32>(bindings.offsets[index]),
+ static_cast<u32>(bindings.sizes[index]),
+ static_cast<u32>(bindings.strides[index]));
+ }
+}
+
void BufferCacheRuntime::BindUniformBuffer(size_t stage, u32 binding_index, Buffer& buffer,
u32 offset, u32 size) {
if (use_assembly_shaders) {
@@ -276,6 +329,14 @@ void BufferCacheRuntime::BindTransformFeedbackBuffer(u32 index, Buffer& buffer,
static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
}
+void BufferCacheRuntime::BindTransformFeedbackBuffers(VideoCommon::HostBindings<Buffer>& bindings) {
+ for (u32 index = 0; index < bindings.buffers.size(); ++index) {
+ glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER, index, bindings.buffers[index]->Handle(),
+ static_cast<GLintptr>(bindings.offsets[index]),
+ static_cast<GLsizeiptr>(bindings.sizes[index]));
+ }
+}
+
void BufferCacheRuntime::BindTextureBuffer(Buffer& buffer, u32 offset, u32 size,
PixelFormat format) {
*texture_handles++ = buffer.View(offset, size, format);
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h
index bb1962073..41b746f3b 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.h
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.h
@@ -7,11 +7,12 @@
#include <span>
#include "common/common_types.h"
-#include "video_core/buffer_cache/buffer_cache.h"
+#include "video_core/buffer_cache/buffer_cache_base.h"
+#include "video_core/buffer_cache/memory_tracker_base.h"
#include "video_core/rasterizer_interface.h"
#include "video_core/renderer_opengl/gl_device.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
-#include "video_core/renderer_opengl/gl_stream_buffer.h"
+#include "video_core/renderer_opengl/gl_staging_buffer_pool.h"
namespace OpenGL {
@@ -59,17 +60,36 @@ class BufferCacheRuntime {
public:
static constexpr u8 INVALID_BINDING = std::numeric_limits<u8>::max();
- explicit BufferCacheRuntime(const Device& device_);
+ explicit BufferCacheRuntime(const Device& device_, StagingBufferPool& staging_buffer_pool_);
+
+ [[nodiscard]] StagingBufferMap UploadStagingBuffer(size_t size);
+
+ [[nodiscard]] StagingBufferMap DownloadStagingBuffer(size_t size);
+
+ void CopyBuffer(GLuint dst_buffer, GLuint src_buffer,
+ std::span<const VideoCommon::BufferCopy> copies, bool barrier = true);
+
+ void CopyBuffer(GLuint dst_buffer, Buffer& src_buffer,
+ std::span<const VideoCommon::BufferCopy> copies, bool barrier = true);
+
+ void CopyBuffer(Buffer& dst_buffer, GLuint src_buffer,
+ std::span<const VideoCommon::BufferCopy> copies, bool barrier = true);
void CopyBuffer(Buffer& dst_buffer, Buffer& src_buffer,
std::span<const VideoCommon::BufferCopy> copies);
+ void PreCopyBarrier();
+ void PostCopyBarrier();
+ void Finish();
+
void ClearBuffer(Buffer& dest_buffer, u32 offset, size_t size, u32 value);
void BindIndexBuffer(Buffer& buffer, u32 offset, u32 size);
void BindVertexBuffer(u32 index, Buffer& buffer, u32 offset, u32 size, u32 stride);
+ void BindVertexBuffers(VideoCommon::HostBindings<Buffer>& bindings);
+
void BindUniformBuffer(size_t stage, u32 binding_index, Buffer& buffer, u32 offset, u32 size);
void BindComputeUniformBuffer(u32 binding_index, Buffer& buffer, u32 offset, u32 size);
@@ -82,6 +102,8 @@ public:
void BindTransformFeedbackBuffer(u32 index, Buffer& buffer, u32 offset, u32 size);
+ void BindTransformFeedbackBuffers(VideoCommon::HostBindings<Buffer>& bindings);
+
void BindTextureBuffer(Buffer& buffer, u32 offset, u32 size,
VideoCore::Surface::PixelFormat format);
@@ -160,10 +182,6 @@ public:
return device.CanReportMemoryUsage();
}
- u32 GetStorageBufferAlignment() const {
- return static_cast<u32>(device.GetShaderStorageBufferAlignment());
- }
-
private:
static constexpr std::array PABO_LUT{
GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV,
@@ -172,6 +190,7 @@ private:
};
const Device& device;
+ StagingBufferPool& staging_buffer_pool;
bool has_fast_buffer_sub_data = false;
bool use_assembly_shaders = false;
@@ -204,14 +223,20 @@ private:
struct BufferCacheParams {
using Runtime = OpenGL::BufferCacheRuntime;
using Buffer = OpenGL::Buffer;
+ using Async_Buffer = OpenGL::StagingBufferMap;
+ using MemoryTracker = VideoCommon::MemoryTrackerBase<VideoCore::RasterizerInterface>;
static constexpr bool IS_OPENGL = true;
static constexpr bool HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS = true;
static constexpr bool HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT = true;
static constexpr bool NEEDS_BIND_UNIFORM_INDEX = true;
static constexpr bool NEEDS_BIND_STORAGE_INDEX = true;
- static constexpr bool USE_MEMORY_MAPS = false;
+ static constexpr bool USE_MEMORY_MAPS = true;
static constexpr bool SEPARATE_IMAGE_BUFFER_BINDINGS = true;
+ static constexpr bool IMPLEMENTS_ASYNC_DOWNLOADS = false;
+
+ // TODO: Investigate why OpenGL seems to perform worse with persistently mapped buffer uploads
+ static constexpr bool USE_MEMORY_MAPS_FOR_UPLOADS = false;
};
using BufferCache = VideoCommon::BufferCache<BufferCacheParams>;
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache_base.cpp b/src/video_core/renderer_opengl/gl_buffer_cache_base.cpp
new file mode 100644
index 000000000..f15ae8e25
--- /dev/null
+++ b/src/video_core/renderer_opengl/gl_buffer_cache_base.cpp
@@ -0,0 +1,9 @@
+// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+#include "video_core/buffer_cache/buffer_cache.h"
+#include "video_core/renderer_opengl/gl_buffer_cache.h"
+
+namespace VideoCommon {
+template class VideoCommon::BufferCache<OpenGL::BufferCacheParams>;
+}
diff --git a/src/video_core/renderer_opengl/gl_compute_pipeline.cpp b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp
index 26d066004..f9ca55c36 100644
--- a/src/video_core/renderer_opengl/gl_compute_pipeline.cpp
+++ b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp
@@ -30,7 +30,7 @@ bool ComputePipelineKey::operator==(const ComputePipelineKey& rhs) const noexcep
ComputePipeline::ComputePipeline(const Device& device, TextureCache& texture_cache_,
BufferCache& buffer_cache_, ProgramManager& program_manager_,
const Shader::Info& info_, std::string code,
- std::vector<u32> code_v)
+ std::vector<u32> code_v, bool force_context_flush)
: texture_cache{texture_cache_}, buffer_cache{buffer_cache_},
program_manager{program_manager_}, info{info_} {
switch (device.GetShaderBackend()) {
@@ -63,6 +63,16 @@ ComputePipeline::ComputePipeline(const Device& device, TextureCache& texture_cac
writes_global_memory = !use_storage_buffers &&
std::ranges::any_of(info.storage_buffers_descriptors,
[](const auto& desc) { return desc.is_written; });
+ uses_local_memory = info.uses_local_memory;
+ if (force_context_flush) {
+ std::scoped_lock lock{built_mutex};
+ built_fence.Create();
+ // Flush this context to ensure compilation commands and fence are in the GPU pipe.
+ glFlush();
+ built_condvar.notify_one();
+ } else {
+ is_built = true;
+ }
}
void ComputePipeline::Configure() {
@@ -78,7 +88,8 @@ void ComputePipeline::Configure() {
texture_cache.SynchronizeComputeDescriptors();
boost::container::static_vector<VideoCommon::ImageViewInOut, MAX_TEXTURES + MAX_IMAGES> views;
- std::array<GLuint, MAX_TEXTURES> samplers;
+ boost::container::static_vector<VideoCommon::SamplerId, MAX_TEXTURES> samplers;
+ std::array<GLuint, MAX_TEXTURES> gl_samplers;
std::array<GLuint, MAX_TEXTURES> textures;
std::array<GLuint, MAX_IMAGES> images;
GLsizei sampler_binding{};
@@ -122,7 +133,6 @@ void ComputePipeline::Configure() {
for (u32 index = 0; index < desc.count; ++index) {
const auto handle{read_handle(desc, index)};
views.push_back({handle.first});
- samplers[sampler_binding++] = 0;
}
}
for (const auto& desc : info.image_buffer_descriptors) {
@@ -133,8 +143,8 @@ void ComputePipeline::Configure() {
const auto handle{read_handle(desc, index)};
views.push_back({handle.first});
- Sampler* const sampler = texture_cache.GetComputeSampler(handle.second);
- samplers[sampler_binding++] = sampler->Handle();
+ VideoCommon::SamplerId sampler = texture_cache.GetComputeSamplerId(handle.second);
+ samplers.push_back(sampler);
}
}
for (const auto& desc : info.image_descriptors) {
@@ -142,6 +152,9 @@ void ComputePipeline::Configure() {
}
texture_cache.FillComputeImageViews(std::span(views.data(), views.size()));
+ if (!is_built) {
+ WaitForBuild();
+ }
if (assembly_program.handle != 0) {
program_manager.BindComputeAssemblyProgram(assembly_program.handle);
} else {
@@ -174,10 +187,17 @@ void ComputePipeline::Configure() {
const VideoCommon::ImageViewInOut* views_it{views.data() + num_texture_buffers +
num_image_buffers};
+ const VideoCommon::SamplerId* samplers_it{samplers.data()};
texture_binding += num_texture_buffers;
image_binding += num_image_buffers;
u32 texture_scaling_mask{};
+
+ for (const auto& desc : info.texture_buffer_descriptors) {
+ for (u32 index = 0; index < desc.count; ++index) {
+ gl_samplers[sampler_binding++] = 0;
+ }
+ }
for (const auto& desc : info.texture_descriptors) {
for (u32 index = 0; index < desc.count; ++index) {
ImageView& image_view{texture_cache.GetImageView((views_it++)->id)};
@@ -186,6 +206,12 @@ void ComputePipeline::Configure() {
texture_scaling_mask |= 1u << texture_binding;
}
++texture_binding;
+
+ const Sampler& sampler{texture_cache.GetSampler(*(samplers_it++))};
+ const bool use_fallback_sampler{sampler.HasAddedAnisotropy() &&
+ !image_view.SupportsAnisotropy()};
+ gl_samplers[sampler_binding++] =
+ use_fallback_sampler ? sampler.HandleWithDefaultAnisotropy() : sampler.Handle();
}
}
u32 image_scaling_mask{};
@@ -216,11 +242,20 @@ void ComputePipeline::Configure() {
if (texture_binding != 0) {
ASSERT(texture_binding == sampler_binding);
glBindTextures(0, texture_binding, textures.data());
- glBindSamplers(0, sampler_binding, samplers.data());
+ glBindSamplers(0, sampler_binding, gl_samplers.data());
}
if (image_binding != 0) {
glBindImageTextures(0, image_binding, images.data());
}
}
+void ComputePipeline::WaitForBuild() {
+ if (built_fence.handle == 0) {
+ std::unique_lock lock{built_mutex};
+ built_condvar.wait(lock, [this] { return built_fence.handle != 0; });
+ }
+ ASSERT(glClientWaitSync(built_fence.handle, 0, GL_TIMEOUT_IGNORED) != GL_WAIT_FAILED);
+ is_built = true;
+}
+
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_compute_pipeline.h b/src/video_core/renderer_opengl/gl_compute_pipeline.h
index 6534dec32..c26b4fa5e 100644
--- a/src/video_core/renderer_opengl/gl_compute_pipeline.h
+++ b/src/video_core/renderer_opengl/gl_compute_pipeline.h
@@ -50,7 +50,8 @@ class ComputePipeline {
public:
explicit ComputePipeline(const Device& device, TextureCache& texture_cache_,
BufferCache& buffer_cache_, ProgramManager& program_manager_,
- const Shader::Info& info_, std::string code, std::vector<u32> code_v);
+ const Shader::Info& info_, std::string code, std::vector<u32> code_v,
+ bool force_context_flush = false);
void Configure();
@@ -58,6 +59,10 @@ public:
return writes_global_memory;
}
+ [[nodiscard]] bool UsesLocalMemory() const noexcept {
+ return uses_local_memory;
+ }
+
void SetEngine(Tegra::Engines::KeplerCompute* kepler_compute_,
Tegra::MemoryManager* gpu_memory_) {
kepler_compute = kepler_compute_;
@@ -65,6 +70,8 @@ public:
}
private:
+ void WaitForBuild();
+
TextureCache& texture_cache;
BufferCache& buffer_cache;
Tegra::MemoryManager* gpu_memory;
@@ -81,6 +88,12 @@ private:
bool use_storage_buffers{};
bool writes_global_memory{};
+ bool uses_local_memory{};
+
+ std::mutex built_mutex;
+ std::condition_variable built_condvar;
+ OGLSync built_fence{};
+ bool is_built{false};
};
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp
index 22ed16ebf..33e63c17d 100644
--- a/src/video_core/renderer_opengl/gl_device.cpp
+++ b/src/video_core/renderer_opengl/gl_device.cpp
@@ -108,7 +108,8 @@ bool IsASTCSupported() {
[[nodiscard]] bool IsDebugToolAttached(std::span<const std::string_view> extensions) {
const bool nsight = std::getenv("NVTX_INJECTION64_PATH") || std::getenv("NSIGHT_LAUNCHED");
- return nsight || HasExtension(extensions, "GL_EXT_debug_tool");
+ return nsight || HasExtension(extensions, "GL_EXT_debug_tool") ||
+ Settings::values.renderer_debug.GetValue();
}
} // Anonymous namespace
@@ -193,6 +194,7 @@ Device::Device(Core::Frontend::EmuWindow& emu_window) {
has_bool_ref_bug = true;
}
}
+ has_lmem_perf_bug = is_nvidia;
strict_context_required = emu_window.StrictContextRequired();
// Blocks AMD and Intel OpenGL drivers on Windows from using asynchronous shader compilation.
@@ -200,6 +202,7 @@ Device::Device(Core::Frontend::EmuWindow& emu_window) {
use_asynchronous_shaders = Settings::values.use_asynchronous_shaders.GetValue() &&
!(is_amd || (is_intel && !is_linux)) && !strict_context_required;
use_driver_cache = is_nvidia;
+ supports_conditional_barriers = !is_intel;
LOG_INFO(Render_OpenGL, "Renderer_VariableAOFFI: {}", has_variable_aoffi);
LOG_INFO(Render_OpenGL, "Renderer_ComponentIndexingBug: {}", has_component_indexing_bug);
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h
index 3ff8cad83..a5a6bbbba 100644
--- a/src/video_core/renderer_opengl/gl_device.h
+++ b/src/video_core/renderer_opengl/gl_device.h
@@ -176,6 +176,10 @@ public:
return vendor_name == "ATI Technologies Inc.";
}
+ bool IsIntel() const {
+ return vendor_name == "Intel";
+ }
+
bool CanReportMemoryUsage() const {
return can_report_memory;
}
@@ -184,6 +188,14 @@ public:
return strict_context_required;
}
+ bool SupportsConditionalBarriers() const {
+ return supports_conditional_barriers;
+ }
+
+ bool HasLmemPerfBug() const {
+ return has_lmem_perf_bug;
+ }
+
private:
static bool TestVariableAoffi();
static bool TestPreciseBug();
@@ -229,6 +241,8 @@ private:
bool has_bool_ref_bug{};
bool can_report_memory{};
bool strict_context_required{};
+ bool supports_conditional_barriers{};
+ bool has_lmem_perf_bug{};
std::string vendor_name;
};
diff --git a/src/video_core/renderer_opengl/gl_fence_manager.cpp b/src/video_core/renderer_opengl/gl_fence_manager.cpp
index 91463f854..5326172af 100644
--- a/src/video_core/renderer_opengl/gl_fence_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_fence_manager.cpp
@@ -27,9 +27,7 @@ bool GLInnerFence::IsSignaled() const {
return true;
}
ASSERT(sync_object.handle != 0);
- GLint sync_status;
- glGetSynciv(sync_object.handle, GL_SYNC_STATUS, 1, nullptr, &sync_status);
- return sync_status == GL_SIGNALED;
+ return sync_object.IsSignaled();
}
void GLInnerFence::Wait() {
diff --git a/src/video_core/renderer_opengl/gl_fence_manager.h b/src/video_core/renderer_opengl/gl_fence_manager.h
index f1446e732..e21b19dcc 100644
--- a/src/video_core/renderer_opengl/gl_fence_manager.h
+++ b/src/video_core/renderer_opengl/gl_fence_manager.h
@@ -30,7 +30,17 @@ private:
};
using Fence = std::shared_ptr<GLInnerFence>;
-using GenericFenceManager = VideoCommon::FenceManager<Fence, TextureCache, BufferCache, QueryCache>;
+
+struct FenceManagerParams {
+ using FenceType = Fence;
+ using BufferCacheType = BufferCache;
+ using TextureCacheType = TextureCache;
+ using QueryCacheType = QueryCache;
+
+ static constexpr bool HAS_ASYNC_CHECK = false;
+};
+
+using GenericFenceManager = VideoCommon::FenceManager<FenceManagerParams>;
class FenceManagerOpenGL final : public GenericFenceManager {
public:
diff --git a/src/video_core/renderer_opengl/gl_fsr.cpp b/src/video_core/renderer_opengl/gl_fsr.cpp
new file mode 100644
index 000000000..77262dcf1
--- /dev/null
+++ b/src/video_core/renderer_opengl/gl_fsr.cpp
@@ -0,0 +1,101 @@
+// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include "common/settings.h"
+#include "video_core/fsr.h"
+#include "video_core/renderer_opengl/gl_fsr.h"
+#include "video_core/renderer_opengl/gl_shader_manager.h"
+#include "video_core/renderer_opengl/gl_shader_util.h"
+
+namespace OpenGL {
+using namespace FSR;
+
+using FsrConstants = std::array<u32, 4 * 4>;
+
+FSR::FSR(std::string_view fsr_vertex_source, std::string_view fsr_easu_source,
+ std::string_view fsr_rcas_source)
+ : fsr_vertex{CreateProgram(fsr_vertex_source, GL_VERTEX_SHADER)},
+ fsr_easu_frag{CreateProgram(fsr_easu_source, GL_FRAGMENT_SHADER)},
+ fsr_rcas_frag{CreateProgram(fsr_rcas_source, GL_FRAGMENT_SHADER)} {
+ glProgramUniform2f(fsr_vertex.handle, 0, 1.0f, 1.0f);
+ glProgramUniform2f(fsr_vertex.handle, 1, 0.0f, 0.0f);
+}
+
+FSR::~FSR() = default;
+
+void FSR::Draw(ProgramManager& program_manager, const Common::Rectangle<u32>& screen,
+ u32 input_image_width, u32 input_image_height,
+ const Common::Rectangle<int>& crop_rect) {
+
+ const auto output_image_width = screen.GetWidth();
+ const auto output_image_height = screen.GetHeight();
+
+ if (fsr_intermediate_tex.handle) {
+ GLint fsr_tex_width, fsr_tex_height;
+ glGetTextureLevelParameteriv(fsr_intermediate_tex.handle, 0, GL_TEXTURE_WIDTH,
+ &fsr_tex_width);
+ glGetTextureLevelParameteriv(fsr_intermediate_tex.handle, 0, GL_TEXTURE_HEIGHT,
+ &fsr_tex_height);
+ if (static_cast<u32>(fsr_tex_width) != output_image_width ||
+ static_cast<u32>(fsr_tex_height) != output_image_height) {
+ fsr_intermediate_tex.Release();
+ }
+ }
+ if (!fsr_intermediate_tex.handle) {
+ fsr_intermediate_tex.Create(GL_TEXTURE_2D);
+ glTextureStorage2D(fsr_intermediate_tex.handle, 1, GL_RGB16F, output_image_width,
+ output_image_height);
+ glNamedFramebufferTexture(fsr_framebuffer.handle, GL_COLOR_ATTACHMENT0,
+ fsr_intermediate_tex.handle, 0);
+ }
+
+ GLint old_draw_fb;
+ glGetIntegerv(GL_DRAW_FRAMEBUFFER_BINDING, &old_draw_fb);
+
+ glFrontFace(GL_CW);
+ glBindFramebuffer(GL_DRAW_FRAMEBUFFER, fsr_framebuffer.handle);
+ glViewportIndexedf(0, 0.0f, 0.0f, static_cast<GLfloat>(output_image_width),
+ static_cast<GLfloat>(output_image_height));
+
+ FsrConstants constants;
+ FsrEasuConOffset(
+ constants.data() + 0, constants.data() + 4, constants.data() + 8, constants.data() + 12,
+
+ static_cast<f32>(crop_rect.GetWidth()), static_cast<f32>(crop_rect.GetHeight()),
+ static_cast<f32>(input_image_width), static_cast<f32>(input_image_height),
+ static_cast<f32>(output_image_width), static_cast<f32>(output_image_height),
+ static_cast<f32>(crop_rect.left), static_cast<f32>(crop_rect.top));
+
+ glProgramUniform4uiv(fsr_easu_frag.handle, 0, sizeof(constants), std::data(constants));
+
+ program_manager.BindPresentPrograms(fsr_vertex.handle, fsr_easu_frag.handle);
+ glDrawArrays(GL_TRIANGLES, 0, 3);
+
+ glBindFramebuffer(GL_DRAW_FRAMEBUFFER, old_draw_fb);
+ glBindTextureUnit(0, fsr_intermediate_tex.handle);
+
+ const float sharpening =
+ static_cast<float>(Settings::values.fsr_sharpening_slider.GetValue()) / 100.0f;
+
+ FsrRcasCon(constants.data(), sharpening);
+ glProgramUniform4uiv(fsr_rcas_frag.handle, 0, sizeof(constants), std::data(constants));
+}
+
+void FSR::InitBuffers() {
+ fsr_framebuffer.Create();
+}
+
+void FSR::ReleaseBuffers() {
+ fsr_framebuffer.Release();
+ fsr_intermediate_tex.Release();
+}
+
+const OGLProgram& FSR::GetPresentFragmentProgram() const noexcept {
+ return fsr_rcas_frag;
+}
+
+bool FSR::AreBuffersInitialized() const noexcept {
+ return fsr_framebuffer.handle;
+}
+
+} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_fsr.h b/src/video_core/renderer_opengl/gl_fsr.h
new file mode 100644
index 000000000..1f6ae3115
--- /dev/null
+++ b/src/video_core/renderer_opengl/gl_fsr.h
@@ -0,0 +1,43 @@
+// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#pragma once
+
+#include <string_view>
+
+#include "common/common_types.h"
+#include "common/math_util.h"
+#include "video_core/fsr.h"
+#include "video_core/renderer_opengl/gl_resource_manager.h"
+
+namespace OpenGL {
+
+class ProgramManager;
+
+class FSR {
+public:
+ explicit FSR(std::string_view fsr_vertex_source, std::string_view fsr_easu_source,
+ std::string_view fsr_rcas_source);
+ ~FSR();
+
+ void Draw(ProgramManager& program_manager, const Common::Rectangle<u32>& screen,
+ u32 input_image_width, u32 input_image_height,
+ const Common::Rectangle<int>& crop_rect);
+
+ void InitBuffers();
+
+ void ReleaseBuffers();
+
+ [[nodiscard]] const OGLProgram& GetPresentFragmentProgram() const noexcept;
+
+ [[nodiscard]] bool AreBuffersInitialized() const noexcept;
+
+private:
+ OGLFramebuffer fsr_framebuffer;
+ OGLProgram fsr_vertex;
+ OGLProgram fsr_easu_frag;
+ OGLProgram fsr_rcas_frag;
+ OGLTexture fsr_intermediate_tex;
+};
+
+} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp
index c115dabe1..23a48c6fe 100644
--- a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp
+++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp
@@ -176,7 +176,7 @@ GraphicsPipeline::GraphicsPipeline(const Device& device, TextureCache& texture_c
std::array<std::string, 5> sources,
std::array<std::vector<u32>, 5> sources_spirv,
const std::array<const Shader::Info*, 5>& infos,
- const GraphicsPipelineKey& key_)
+ const GraphicsPipelineKey& key_, bool force_context_flush)
: texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, program_manager{program_manager_},
state_tracker{state_tracker_}, key{key_} {
if (shader_notify) {
@@ -215,6 +215,7 @@ GraphicsPipeline::GraphicsPipeline(const Device& device, TextureCache& texture_c
writes_global_memory |= std::ranges::any_of(
info.storage_buffers_descriptors, [](const auto& desc) { return desc.is_written; });
+ uses_local_memory |= info.uses_local_memory;
}
ASSERT(num_textures <= MAX_TEXTURES);
ASSERT(num_images <= MAX_IMAGES);
@@ -231,7 +232,8 @@ GraphicsPipeline::GraphicsPipeline(const Device& device, TextureCache& texture_c
const bool in_parallel = thread_worker != nullptr;
const auto backend = device.GetShaderBackend();
auto func{[this, sources = std::move(sources), sources_spirv = std::move(sources_spirv),
- shader_notify, backend, in_parallel](ShaderContext::Context*) mutable {
+ shader_notify, backend, in_parallel,
+ force_context_flush](ShaderContext::Context*) mutable {
for (size_t stage = 0; stage < 5; ++stage) {
switch (backend) {
case Settings::ShaderBackend::GLSL:
@@ -251,7 +253,7 @@ GraphicsPipeline::GraphicsPipeline(const Device& device, TextureCache& texture_c
break;
}
}
- if (in_parallel) {
+ if (force_context_flush || in_parallel) {
std::scoped_lock lock{built_mutex};
built_fence.Create();
// Flush this context to ensure compilation commands and fence are in the GPU pipe.
@@ -274,9 +276,9 @@ GraphicsPipeline::GraphicsPipeline(const Device& device, TextureCache& texture_c
template <typename Spec>
void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
std::array<VideoCommon::ImageViewInOut, MAX_TEXTURES + MAX_IMAGES> views;
- std::array<GLuint, MAX_TEXTURES> samplers;
+ std::array<VideoCommon::SamplerId, MAX_TEXTURES> samplers;
size_t views_index{};
- GLsizei sampler_binding{};
+ size_t samplers_index{};
texture_cache.SynchronizeGraphicsDescriptors();
@@ -336,7 +338,6 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
for (u32 index = 0; index < desc.count; ++index) {
const auto handle{read_handle(desc, index)};
views[views_index++] = {handle.first};
- samplers[sampler_binding++] = 0;
}
}
}
@@ -350,8 +351,8 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
const auto handle{read_handle(desc, index)};
views[views_index++] = {handle.first};
- Sampler* const sampler{texture_cache.GetGraphicsSampler(handle.second)};
- samplers[sampler_binding++] = sampler->Handle();
+ VideoCommon::SamplerId sampler{texture_cache.GetGraphicsSamplerId(handle.second)};
+ samplers[samplers_index++] = sampler;
}
}
if constexpr (Spec::has_images) {
@@ -444,10 +445,13 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
program_manager.BindSourcePrograms(source_programs);
}
const VideoCommon::ImageViewInOut* views_it{views.data()};
+ const VideoCommon::SamplerId* samplers_it{samplers.data()};
GLsizei texture_binding = 0;
GLsizei image_binding = 0;
+ GLsizei sampler_binding{};
std::array<GLuint, MAX_TEXTURES> textures;
std::array<GLuint, MAX_IMAGES> images;
+ std::array<GLuint, MAX_TEXTURES> gl_samplers;
const auto prepare_stage{[&](size_t stage) {
buffer_cache.runtime.SetImagePointers(&textures[texture_binding], &images[image_binding]);
buffer_cache.BindHostStageBuffers(stage);
@@ -464,6 +468,13 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
u32 stage_image_binding{};
const auto& info{stage_infos[stage]};
+ if constexpr (Spec::has_texture_buffers) {
+ for (const auto& desc : info.texture_buffer_descriptors) {
+ for (u32 index = 0; index < desc.count; ++index) {
+ gl_samplers[sampler_binding++] = 0;
+ }
+ }
+ }
for (const auto& desc : info.texture_descriptors) {
for (u32 index = 0; index < desc.count; ++index) {
ImageView& image_view{texture_cache.GetImageView((views_it++)->id)};
@@ -473,6 +484,12 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
}
++texture_binding;
++stage_texture_binding;
+
+ const Sampler& sampler{texture_cache.GetSampler(*(samplers_it++))};
+ const bool use_fallback_sampler{sampler.HasAddedAnisotropy() &&
+ !image_view.SupportsAnisotropy()};
+ gl_samplers[sampler_binding++] =
+ use_fallback_sampler ? sampler.HandleWithDefaultAnisotropy() : sampler.Handle();
}
}
for (const auto& desc : info.image_descriptors) {
@@ -533,7 +550,7 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
if (texture_binding != 0) {
ASSERT(texture_binding == sampler_binding);
glBindTextures(0, texture_binding, textures.data());
- glBindSamplers(0, sampler_binding, samplers.data());
+ glBindSamplers(0, sampler_binding, gl_samplers.data());
}
if (image_binding != 0) {
glBindImageTextures(0, image_binding, images.data());
@@ -620,10 +637,7 @@ bool GraphicsPipeline::IsBuilt() noexcept {
if (built_fence.handle == 0) {
return false;
}
- // Timeout of zero means this is non-blocking
- const auto sync_status = glClientWaitSync(built_fence.handle, 0, 0);
- ASSERT(sync_status != GL_WAIT_FAILED);
- is_built = sync_status != GL_TIMEOUT_EXPIRED;
+ is_built = built_fence.IsSignaled();
return is_built;
}
diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.h b/src/video_core/renderer_opengl/gl_graphics_pipeline.h
index 1c06b3655..7b3d7eae8 100644
--- a/src/video_core/renderer_opengl/gl_graphics_pipeline.h
+++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.h
@@ -78,7 +78,7 @@ public:
std::array<std::string, 5> sources,
std::array<std::vector<u32>, 5> sources_spirv,
const std::array<const Shader::Info*, 5>& infos,
- const GraphicsPipelineKey& key_);
+ const GraphicsPipelineKey& key_, bool force_context_flush = false);
void Configure(bool is_indexed) {
configure_func(this, is_indexed);
@@ -98,6 +98,10 @@ public:
return writes_global_memory;
}
+ [[nodiscard]] bool UsesLocalMemory() const noexcept {
+ return uses_local_memory;
+ }
+
[[nodiscard]] bool IsBuilt() noexcept;
template <typename Spec>
@@ -146,6 +150,7 @@ private:
bool use_storage_buffers{};
bool writes_global_memory{};
+ bool uses_local_memory{};
static constexpr std::size_t XFB_ENTRY_STRIDE = 3;
GLsizei num_xfb_attribs{};
diff --git a/src/video_core/renderer_opengl/gl_query_cache.cpp b/src/video_core/renderer_opengl/gl_query_cache.cpp
index 5070db441..99d7347f5 100644
--- a/src/video_core/renderer_opengl/gl_query_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_query_cache.cpp
@@ -26,8 +26,8 @@ constexpr GLenum GetTarget(VideoCore::QueryType type) {
} // Anonymous namespace
-QueryCache::QueryCache(RasterizerOpenGL& rasterizer_)
- : QueryCacheBase(rasterizer_), gl_rasterizer{rasterizer_} {}
+QueryCache::QueryCache(RasterizerOpenGL& rasterizer_, Core::Memory::Memory& cpu_memory_)
+ : QueryCacheBase(rasterizer_, cpu_memory_), gl_rasterizer{rasterizer_} {}
QueryCache::~QueryCache() = default;
@@ -74,7 +74,7 @@ void HostCounter::EndQuery() {
glEndQuery(GetTarget(type));
}
-u64 HostCounter::BlockingQuery() const {
+u64 HostCounter::BlockingQuery([[maybe_unused]] bool async) const {
GLint64 value;
glGetQueryObjecti64v(query.handle, GL_QUERY_RESULT, &value);
return static_cast<u64>(value);
@@ -96,7 +96,7 @@ CachedQuery& CachedQuery::operator=(CachedQuery&& rhs) noexcept {
return *this;
}
-void CachedQuery::Flush() {
+u64 CachedQuery::Flush([[maybe_unused]] bool async) {
// Waiting for a query while another query of the same target is enabled locks Nvidia's driver.
// To avoid this disable and re-enable keeping the dependency stream.
// But we only have to do this if we have pending waits to be done.
@@ -106,11 +106,13 @@ void CachedQuery::Flush() {
stream.Update(false);
}
- VideoCommon::CachedQueryBase<HostCounter>::Flush();
+ auto result = VideoCommon::CachedQueryBase<HostCounter>::Flush();
if (slice_counter) {
stream.Update(true);
}
+
+ return result;
}
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_query_cache.h b/src/video_core/renderer_opengl/gl_query_cache.h
index 14ce59990..872513f22 100644
--- a/src/video_core/renderer_opengl/gl_query_cache.h
+++ b/src/video_core/renderer_opengl/gl_query_cache.h
@@ -28,7 +28,7 @@ using CounterStream = VideoCommon::CounterStreamBase<QueryCache, HostCounter>;
class QueryCache final
: public VideoCommon::QueryCacheBase<QueryCache, CachedQuery, CounterStream, HostCounter> {
public:
- explicit QueryCache(RasterizerOpenGL& rasterizer_);
+ explicit QueryCache(RasterizerOpenGL& rasterizer_, Core::Memory::Memory& cpu_memory_);
~QueryCache();
OGLQuery AllocateQuery(VideoCore::QueryType type);
@@ -51,7 +51,7 @@ public:
void EndQuery();
private:
- u64 BlockingQuery() const override;
+ u64 BlockingQuery(bool async = false) const override;
QueryCache& cache;
const VideoCore::QueryType type;
@@ -70,7 +70,7 @@ public:
CachedQuery(const CachedQuery&) = delete;
CachedQuery& operator=(const CachedQuery&) = delete;
- void Flush() override;
+ u64 Flush(bool async = false) override;
private:
QueryCache* cache;
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 7bced675c..edf527f2d 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -24,6 +24,7 @@
#include "video_core/renderer_opengl/gl_query_cache.h"
#include "video_core/renderer_opengl/gl_rasterizer.h"
#include "video_core/renderer_opengl/gl_shader_cache.h"
+#include "video_core/renderer_opengl/gl_staging_buffer_pool.h"
#include "video_core/renderer_opengl/gl_texture_cache.h"
#include "video_core/renderer_opengl/maxwell_to_gl.h"
#include "video_core/renderer_opengl/renderer_opengl.h"
@@ -58,12 +59,13 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra
StateTracker& state_tracker_)
: RasterizerAccelerated(cpu_memory_), gpu(gpu_), device(device_), screen_info(screen_info_),
program_manager(program_manager_), state_tracker(state_tracker_),
- texture_cache_runtime(device, program_manager, state_tracker),
- texture_cache(texture_cache_runtime, *this), buffer_cache_runtime(device),
+ texture_cache_runtime(device, program_manager, state_tracker, staging_buffer_pool),
+ texture_cache(texture_cache_runtime, *this),
+ buffer_cache_runtime(device, staging_buffer_pool),
buffer_cache(*this, cpu_memory_, buffer_cache_runtime),
shader_cache(*this, emu_window_, device, texture_cache, buffer_cache, program_manager,
state_tracker, gpu.ShaderNotify()),
- query_cache(*this), accelerate_dma(buffer_cache),
+ query_cache(*this, cpu_memory_), accelerate_dma(buffer_cache, texture_cache),
fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache),
blit_image(program_manager_) {}
@@ -220,6 +222,9 @@ void RasterizerOpenGL::PrepareDraw(bool is_indexed, Func&& draw_func) {
gpu.TickWork();
std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
+ if (pipeline->UsesLocalMemory()) {
+ program_manager.LocalMemoryWarmup();
+ }
pipeline->SetEngine(maxwell3d, gpu_memory);
pipeline->Configure(is_indexed);
@@ -357,6 +362,7 @@ void RasterizerOpenGL::DrawTexture() {
.y = static_cast<s32>(draw_texture_state.src_y1)}};
blit_image.BlitColor(texture_cache.GetFramebuffer()->Handle(), texture.DefaultHandle(),
sampler->Handle(), dst_region, src_region, texture.size);
+ state_tracker.InvalidateState();
}
++num_queued_commands;
@@ -368,6 +374,9 @@ void RasterizerOpenGL::DispatchCompute() {
if (!pipeline) {
return;
}
+ if (pipeline->UsesLocalMemory()) {
+ program_manager.LocalMemoryWarmup();
+ }
pipeline->SetEngine(kepler_compute, gpu_memory);
pipeline->Configure();
const auto& qmd{kepler_compute->launch_description};
@@ -432,6 +441,29 @@ bool RasterizerOpenGL::MustFlushRegion(VAddr addr, u64 size, VideoCommon::CacheT
return false;
}
+VideoCore::RasterizerDownloadArea RasterizerOpenGL::GetFlushArea(VAddr addr, u64 size) {
+ {
+ std::scoped_lock lock{texture_cache.mutex};
+ auto area = texture_cache.GetFlushArea(addr, size);
+ if (area) {
+ return *area;
+ }
+ }
+ {
+ std::scoped_lock lock{buffer_cache.mutex};
+ auto area = buffer_cache.GetFlushArea(addr, size);
+ if (area) {
+ return *area;
+ }
+ }
+ VideoCore::RasterizerDownloadArea new_area{
+ .start_address = Common::AlignDown(addr, Core::Memory::YUZU_PAGESIZE),
+ .end_address = Common::AlignUp(addr + size, Core::Memory::YUZU_PAGESIZE),
+ .preemtive = true,
+ };
+ return new_area;
+}
+
void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size, VideoCommon::CacheType which) {
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
if (addr == 0 || size == 0) {
@@ -576,7 +608,7 @@ bool RasterizerOpenGL::AccelerateConditionalRendering() {
// Reimplement Host conditional rendering.
return false;
}
- // Medium / Low Hack: stub any checks on queries writen into the buffer cache.
+ // Medium / Low Hack: stub any checks on queries written into the buffer cache.
const GPUVAddr condition_address{maxwell3d->regs.render_enable.Address()};
Maxwell::ReportSemaphore::Compare cmp;
if (gpu_memory->IsMemoryDirty(condition_address, sizeof(cmp),
@@ -1262,7 +1294,8 @@ void RasterizerOpenGL::ReleaseChannel(s32 channel_id) {
query_cache.EraseChannel(channel_id);
}
-AccelerateDMA::AccelerateDMA(BufferCache& buffer_cache_) : buffer_cache{buffer_cache_} {}
+AccelerateDMA::AccelerateDMA(BufferCache& buffer_cache_, TextureCache& texture_cache_)
+ : buffer_cache{buffer_cache_}, texture_cache{texture_cache_} {}
bool AccelerateDMA::BufferCopy(GPUVAddr src_address, GPUVAddr dest_address, u64 amount) {
std::scoped_lock lock{buffer_cache.mutex};
@@ -1274,4 +1307,44 @@ bool AccelerateDMA::BufferClear(GPUVAddr src_address, u64 amount, u32 value) {
return buffer_cache.DMAClear(src_address, amount, value);
}
+template <bool IS_IMAGE_UPLOAD>
+bool AccelerateDMA::DmaBufferImageCopy(const Tegra::DMA::ImageCopy& copy_info,
+ const Tegra::DMA::BufferOperand& buffer_operand,
+ const Tegra::DMA::ImageOperand& image_operand) {
+ std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
+ const auto image_id = texture_cache.DmaImageId(image_operand, IS_IMAGE_UPLOAD);
+ if (image_id == VideoCommon::NULL_IMAGE_ID) {
+ return false;
+ }
+ const u32 buffer_size = static_cast<u32>(buffer_operand.pitch * buffer_operand.height);
+ static constexpr auto sync_info = VideoCommon::ObtainBufferSynchronize::FullSynchronize;
+ const auto post_op = VideoCommon::ObtainBufferOperation::DoNothing;
+ const auto [buffer, offset] =
+ buffer_cache.ObtainBuffer(buffer_operand.address, buffer_size, sync_info, post_op);
+
+ const auto [image, copy] = texture_cache.DmaBufferImageCopy(
+ copy_info, buffer_operand, image_operand, image_id, IS_IMAGE_UPLOAD);
+ const std::span copy_span{&copy, 1};
+
+ if constexpr (IS_IMAGE_UPLOAD) {
+ image->UploadMemory(buffer->Handle(), offset, copy_span);
+ } else {
+ texture_cache.DownloadImageIntoBuffer(image, buffer->Handle(), offset, copy_span,
+ buffer_operand.address, buffer_size);
+ }
+ return true;
+}
+
+bool AccelerateDMA::ImageToBuffer(const Tegra::DMA::ImageCopy& copy_info,
+ const Tegra::DMA::ImageOperand& image_operand,
+ const Tegra::DMA::BufferOperand& buffer_operand) {
+ return DmaBufferImageCopy<false>(copy_info, buffer_operand, image_operand);
+}
+
+bool AccelerateDMA::BufferToImage(const Tegra::DMA::ImageCopy& copy_info,
+ const Tegra::DMA::BufferOperand& buffer_operand,
+ const Tegra::DMA::ImageOperand& image_operand) {
+ return DmaBufferImageCopy<true>(copy_info, buffer_operand, image_operand);
+}
+
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 0c45832ae..a73ad15c1 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -50,14 +50,26 @@ static_assert(sizeof(BindlessSSBO) * CHAR_BIT == 128);
class AccelerateDMA : public Tegra::Engines::AccelerateDMAInterface {
public:
- explicit AccelerateDMA(BufferCache& buffer_cache);
+ explicit AccelerateDMA(BufferCache& buffer_cache, TextureCache& texture_cache);
bool BufferCopy(GPUVAddr src_address, GPUVAddr dest_address, u64 amount) override;
bool BufferClear(GPUVAddr src_address, u64 amount, u32 value) override;
+ bool ImageToBuffer(const Tegra::DMA::ImageCopy& copy_info, const Tegra::DMA::ImageOperand& src,
+ const Tegra::DMA::BufferOperand& dst) override;
+
+ bool BufferToImage(const Tegra::DMA::ImageCopy& copy_info, const Tegra::DMA::BufferOperand& src,
+ const Tegra::DMA::ImageOperand& dst) override;
+
private:
+ template <bool IS_IMAGE_UPLOAD>
+ bool DmaBufferImageCopy(const Tegra::DMA::ImageCopy& copy_info,
+ const Tegra::DMA::BufferOperand& src,
+ const Tegra::DMA::ImageOperand& dst);
+
BufferCache& buffer_cache;
+ TextureCache& texture_cache;
};
class RasterizerOpenGL : public VideoCore::RasterizerAccelerated,
@@ -83,6 +95,7 @@ public:
VideoCommon::CacheType which = VideoCommon::CacheType::All) override;
bool MustFlushRegion(VAddr addr, u64 size,
VideoCommon::CacheType which = VideoCommon::CacheType::All) override;
+ VideoCore::RasterizerDownloadArea GetFlushArea(VAddr addr, u64 size) override;
void InvalidateRegion(VAddr addr, u64 size,
VideoCommon::CacheType which = VideoCommon::CacheType::All) override;
void OnCPUWrite(VAddr addr, u64 size) override;
@@ -150,7 +163,7 @@ private:
/// Syncs the cull mode to match the guest state
void SyncCullMode();
- /// Syncs the primitve restart to match the guest state
+ /// Syncs the primitive restart to match the guest state
void SyncPrimitiveRestart();
/// Syncs the depth test state to match the guest state
@@ -217,6 +230,7 @@ private:
ProgramManager& program_manager;
StateTracker& state_tracker;
+ StagingBufferPool staging_buffer_pool;
TextureCacheRuntime texture_cache_runtime;
TextureCache texture_cache;
BufferCacheRuntime buffer_cache_runtime;
@@ -234,7 +248,7 @@ private:
std::array<GLuint, MAX_TEXTURES> texture_handles{};
std::array<GLuint, MAX_IMAGES> image_handles{};
- /// Number of commands queued to the OpenGL driver. Resetted on flush.
+ /// Number of commands queued to the OpenGL driver. Reset on flush.
size_t num_queued_commands = 0;
bool has_written_global_memory = false;
diff --git a/src/video_core/renderer_opengl/gl_resource_manager.cpp b/src/video_core/renderer_opengl/gl_resource_manager.cpp
index 3a664fdec..eae8fd110 100644
--- a/src/video_core/renderer_opengl/gl_resource_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_resource_manager.cpp
@@ -3,6 +3,7 @@
#include <string_view>
#include <glad/glad.h>
+#include "common/assert.h"
#include "common/microprofile.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_shader_util.h"
@@ -158,6 +159,15 @@ void OGLSync::Release() {
handle = 0;
}
+bool OGLSync::IsSignaled() const noexcept {
+ // At least on Nvidia, glClientWaitSync with a timeout of 0
+ // is faster than glGetSynciv of GL_SYNC_STATUS.
+ // Timeout of 0 means this check is non-blocking.
+ const auto sync_status = glClientWaitSync(handle, 0, 0);
+ ASSERT(sync_status != GL_WAIT_FAILED);
+ return sync_status != GL_TIMEOUT_EXPIRED;
+}
+
void OGLFramebuffer::Create() {
if (handle != 0)
return;
diff --git a/src/video_core/renderer_opengl/gl_resource_manager.h b/src/video_core/renderer_opengl/gl_resource_manager.h
index bc05ba4bd..77362acd2 100644
--- a/src/video_core/renderer_opengl/gl_resource_manager.h
+++ b/src/video_core/renderer_opengl/gl_resource_manager.h
@@ -263,6 +263,9 @@ public:
/// Deletes the internal OpenGL resource
void Release();
+ /// Checks if the sync has been signaled
+ bool IsSignaled() const noexcept;
+
GLsync handle = 0;
};
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 7dd854e0f..0329ed820 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -85,7 +85,9 @@ Shader::RuntimeInfo MakeRuntimeInfo(const GraphicsPipelineKey& key,
case Shader::Stage::VertexB:
case Shader::Stage::Geometry:
if (!use_assembly_shaders && key.xfb_enabled != 0) {
- info.xfb_varyings = VideoCommon::MakeTransformFeedbackVaryings(key.xfb_state);
+ auto [varyings, count] = VideoCommon::MakeTransformFeedbackVaryings(key.xfb_state);
+ info.xfb_varyings = varyings;
+ info.xfb_count = count;
}
break;
case Shader::Stage::TessellationEval:
@@ -218,6 +220,7 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo
.lower_left_origin_mode = true,
.need_declared_frag_colors = true,
.need_fastmath_off = device.NeedsFastmathOff(),
+ .need_gather_subpixel_offset = device.IsAmd() || device.IsIntel(),
.has_broken_spirv_clamp = true,
.has_broken_unsigned_image_offsets = true,
@@ -231,13 +234,14 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo
.gl_max_compute_smem_size = device.GetMaxComputeSharedMemorySize(),
},
host_info{
+ .support_float64 = true,
.support_float16 = false,
.support_int64 = device.HasShaderInt64(),
.needs_demote_reorder = device.IsAmd(),
.support_snorm_render_buffer = false,
.support_viewport_index_layer = device.HasVertexViewportLayer(),
- .min_ssbo_alignment = static_cast<u32>(device.GetShaderStorageBufferAlignment()),
.support_geometry_shader_passthrough = device.HasGeometryShaderPassthrough(),
+ .support_conditional_barrier = device.SupportsConditionalBarriers(),
} {
if (use_asynchronous_shaders) {
workers = CreateWorkers();
@@ -286,7 +290,7 @@ void ShaderCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading,
file.read(reinterpret_cast<char*>(&key), sizeof(key));
queue_work([this, key, env = std::move(env), &state, &callback](Context* ctx) mutable {
ctx->pools.ReleaseContents();
- auto pipeline{CreateComputePipeline(ctx->pools, key, env)};
+ auto pipeline{CreateComputePipeline(ctx->pools, key, env, true)};
std::scoped_lock lock{state.mutex};
if (pipeline) {
compute_cache.emplace(key, std::move(pipeline));
@@ -307,7 +311,7 @@ void ShaderCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading,
env_ptrs.push_back(&env);
}
ctx->pools.ReleaseContents();
- auto pipeline{CreateGraphicsPipeline(ctx->pools, key, MakeSpan(env_ptrs), false)};
+ auto pipeline{CreateGraphicsPipeline(ctx->pools, key, MakeSpan(env_ptrs), false, true)};
std::scoped_lock lock{state.mutex};
if (pipeline) {
graphics_cache.emplace(key, std::move(pipeline));
@@ -439,7 +443,8 @@ std::unique_ptr<GraphicsPipeline> ShaderCache::CreateGraphicsPipeline() {
std::unique_ptr<GraphicsPipeline> ShaderCache::CreateGraphicsPipeline(
ShaderContext::ShaderPools& pools, const GraphicsPipelineKey& key,
- std::span<Shader::Environment* const> envs, bool build_in_parallel) try {
+ std::span<Shader::Environment* const> envs, bool use_shader_workers,
+ bool force_context_flush) try {
LOG_INFO(Render_OpenGL, "0x{:016x}", key.Hash());
size_t env_index{};
u32 total_storage_buffers{};
@@ -531,10 +536,10 @@ std::unique_ptr<GraphicsPipeline> ShaderCache::CreateGraphicsPipeline(
}
previous_program = &program;
}
- auto* const thread_worker{build_in_parallel ? workers.get() : nullptr};
+ auto* const thread_worker{use_shader_workers ? workers.get() : nullptr};
return std::make_unique<GraphicsPipeline>(device, texture_cache, buffer_cache, program_manager,
state_tracker, thread_worker, &shader_notify, sources,
- sources_spirv, infos, key);
+ sources_spirv, infos, key, force_context_flush);
} catch (Shader::Exception& exception) {
LOG_ERROR(Render_OpenGL, "{}", exception.what());
@@ -559,8 +564,8 @@ std::unique_ptr<ComputePipeline> ShaderCache::CreateComputePipeline(
}
std::unique_ptr<ComputePipeline> ShaderCache::CreateComputePipeline(
- ShaderContext::ShaderPools& pools, const ComputePipelineKey& key,
- Shader::Environment& env) try {
+ ShaderContext::ShaderPools& pools, const ComputePipelineKey& key, Shader::Environment& env,
+ bool force_context_flush) try {
LOG_INFO(Render_OpenGL, "0x{:016x}", key.Hash());
Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()};
@@ -589,7 +594,7 @@ std::unique_ptr<ComputePipeline> ShaderCache::CreateComputePipeline(
}
return std::make_unique<ComputePipeline>(device, texture_cache, buffer_cache, program_manager,
- program.info, code, code_spirv);
+ program.info, code, code_spirv, force_context_flush);
} catch (Shader::Exception& exception) {
LOG_ERROR(Render_OpenGL, "{}", exception.what());
return nullptr;
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h
index f82420592..6b9732fca 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_cache.h
@@ -50,14 +50,16 @@ private:
std::unique_ptr<GraphicsPipeline> CreateGraphicsPipeline(
ShaderContext::ShaderPools& pools, const GraphicsPipelineKey& key,
- std::span<Shader::Environment* const> envs, bool build_in_parallel);
+ std::span<Shader::Environment* const> envs, bool use_shader_workers,
+ bool force_context_flush = false);
std::unique_ptr<ComputePipeline> CreateComputePipeline(const ComputePipelineKey& key,
const VideoCommon::ShaderInfo* shader);
std::unique_ptr<ComputePipeline> CreateComputePipeline(ShaderContext::ShaderPools& pools,
const ComputePipelineKey& key,
- Shader::Environment& env);
+ Shader::Environment& env,
+ bool force_context_flush = false);
std::unique_ptr<ShaderWorker> CreateWorkers() const;
diff --git a/src/video_core/renderer_opengl/gl_shader_context.h b/src/video_core/renderer_opengl/gl_shader_context.h
index ca2bd8e8e..d12cd06fa 100644
--- a/src/video_core/renderer_opengl/gl_shader_context.h
+++ b/src/video_core/renderer_opengl/gl_shader_context.h
@@ -4,6 +4,7 @@
#pragma once
#include "core/frontend/emu_window.h"
+#include "core/frontend/graphics_context.h"
#include "shader_recompiler/frontend/ir/basic_block.h"
#include "shader_recompiler/frontend/maxwell/control_flow.h"
@@ -15,9 +16,9 @@ struct ShaderPools {
inst.ReleaseContents();
}
- Shader::ObjectPool<Shader::IR::Inst> inst;
- Shader::ObjectPool<Shader::IR::Block> block;
- Shader::ObjectPool<Shader::Maxwell::Flow::Block> flow_block;
+ Shader::ObjectPool<Shader::IR::Inst> inst{8192};
+ Shader::ObjectPool<Shader::IR::Block> block{32};
+ Shader::ObjectPool<Shader::Maxwell::Flow::Block> flow_block{32};
};
struct Context {
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp
index 98841ae65..03d4b9d06 100644
--- a/src/video_core/renderer_opengl/gl_shader_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp
@@ -3,7 +3,9 @@
#include <glad/glad.h>
+#include "video_core/host_shaders/opengl_lmem_warmup_comp.h"
#include "video_core/renderer_opengl/gl_shader_manager.h"
+#include "video_core/renderer_opengl/gl_shader_util.h"
namespace OpenGL {
@@ -17,6 +19,10 @@ ProgramManager::ProgramManager(const Device& device) {
if (device.UseAssemblyShaders()) {
glEnable(GL_COMPUTE_PROGRAM_NV);
}
+ if (device.HasLmemPerfBug()) {
+ lmem_warmup_program =
+ CreateProgram(HostShaders::OPENGL_LMEM_WARMUP_COMP, GL_COMPUTE_SHADER);
+ }
}
void ProgramManager::BindComputeProgram(GLuint program) {
@@ -98,6 +104,13 @@ void ProgramManager::BindAssemblyPrograms(std::span<const OGLAssemblyProgram, NU
void ProgramManager::RestoreGuestCompute() {}
+void ProgramManager::LocalMemoryWarmup() {
+ if (lmem_warmup_program.handle != 0) {
+ BindComputeProgram(lmem_warmup_program.handle);
+ glDispatchCompute(1, 1, 1);
+ }
+}
+
void ProgramManager::BindPipeline() {
if (!is_pipeline_bound) {
is_pipeline_bound = true;
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h
index 07ffab77f..852d8c88e 100644
--- a/src/video_core/renderer_opengl/gl_shader_manager.h
+++ b/src/video_core/renderer_opengl/gl_shader_manager.h
@@ -30,6 +30,8 @@ public:
void RestoreGuestCompute();
+ void LocalMemoryWarmup();
+
private:
void BindPipeline();
@@ -44,6 +46,7 @@ private:
u32 current_stage_mask = 0;
std::array<GLuint, NUM_STAGES> current_programs{};
GLuint current_assembly_compute_program = 0;
+ OGLProgram lmem_warmup_program;
};
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_staging_buffer_pool.cpp b/src/video_core/renderer_opengl/gl_staging_buffer_pool.cpp
new file mode 100644
index 000000000..bbb06e51f
--- /dev/null
+++ b/src/video_core/renderer_opengl/gl_staging_buffer_pool.cpp
@@ -0,0 +1,150 @@
+// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <array>
+#include <memory>
+#include <span>
+
+#include <glad/glad.h>
+
+#include "common/alignment.h"
+#include "common/assert.h"
+#include "common/bit_util.h"
+#include "common/microprofile.h"
+#include "video_core/renderer_opengl/gl_staging_buffer_pool.h"
+
+MICROPROFILE_DEFINE(OpenGL_BufferRequest, "OpenGL", "BufferRequest", MP_RGB(128, 128, 192));
+
+namespace OpenGL {
+
+StagingBufferMap::~StagingBufferMap() {
+ if (sync) {
+ sync->Create();
+ }
+}
+
+StagingBuffers::StagingBuffers(GLenum storage_flags_, GLenum map_flags_)
+ : storage_flags{storage_flags_}, map_flags{map_flags_} {}
+
+StagingBuffers::~StagingBuffers() = default;
+
+StagingBufferMap StagingBuffers::RequestMap(size_t requested_size, bool insert_fence) {
+ MICROPROFILE_SCOPE(OpenGL_BufferRequest);
+
+ const size_t index = RequestBuffer(requested_size);
+ OGLSync* const sync = insert_fence ? &syncs[index] : nullptr;
+ sync_indices[index] = insert_fence ? ++current_sync_index : 0;
+ return StagingBufferMap{
+ .mapped_span = std::span(maps[index], requested_size),
+ .sync = sync,
+ .buffer = buffers[index].handle,
+ };
+}
+
+size_t StagingBuffers::RequestBuffer(size_t requested_size) {
+ if (const std::optional<size_t> index = FindBuffer(requested_size); index) {
+ return *index;
+ }
+
+ OGLBuffer& buffer = buffers.emplace_back();
+ buffer.Create();
+ const auto next_pow2_size = Common::NextPow2(requested_size);
+ glNamedBufferStorage(buffer.handle, next_pow2_size, nullptr,
+ storage_flags | GL_MAP_PERSISTENT_BIT);
+ maps.push_back(static_cast<u8*>(glMapNamedBufferRange(buffer.handle, 0, next_pow2_size,
+ map_flags | GL_MAP_PERSISTENT_BIT)));
+ syncs.emplace_back();
+ sync_indices.emplace_back();
+ sizes.push_back(next_pow2_size);
+
+ ASSERT(syncs.size() == buffers.size() && buffers.size() == maps.size() &&
+ maps.size() == sizes.size());
+
+ return buffers.size() - 1;
+}
+
+std::optional<size_t> StagingBuffers::FindBuffer(size_t requested_size) {
+ size_t known_unsignaled_index = current_sync_index + 1;
+ size_t smallest_buffer = std::numeric_limits<size_t>::max();
+ std::optional<size_t> found;
+ const size_t num_buffers = sizes.size();
+ for (size_t index = 0; index < num_buffers; ++index) {
+ const size_t buffer_size = sizes[index];
+ if (buffer_size < requested_size || buffer_size >= smallest_buffer) {
+ continue;
+ }
+ if (syncs[index].handle != 0) {
+ if (sync_indices[index] >= known_unsignaled_index) {
+ // This fence is later than a fence that is known to not be signaled
+ continue;
+ }
+ if (!syncs[index].IsSignaled()) {
+ // Since this fence hasn't been signaled, it's safe to assume all later
+ // fences haven't been signaled either
+ known_unsignaled_index = std::min(known_unsignaled_index, sync_indices[index]);
+ continue;
+ }
+ syncs[index].Release();
+ }
+ smallest_buffer = buffer_size;
+ found = index;
+ }
+ return found;
+}
+
+StreamBuffer::StreamBuffer() {
+ static constexpr GLenum flags = GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | GL_MAP_COHERENT_BIT;
+ buffer.Create();
+ glObjectLabel(GL_BUFFER, buffer.handle, -1, "Stream Buffer");
+ glNamedBufferStorage(buffer.handle, STREAM_BUFFER_SIZE, nullptr, flags);
+ mapped_pointer =
+ static_cast<u8*>(glMapNamedBufferRange(buffer.handle, 0, STREAM_BUFFER_SIZE, flags));
+ for (OGLSync& sync : fences) {
+ sync.Create();
+ }
+}
+
+std::pair<std::span<u8>, size_t> StreamBuffer::Request(size_t size) noexcept {
+ ASSERT(size < REGION_SIZE);
+ for (size_t region = Region(used_iterator), region_end = Region(iterator); region < region_end;
+ ++region) {
+ fences[region].Create();
+ }
+ used_iterator = iterator;
+
+ for (size_t region = Region(free_iterator) + 1,
+ region_end = std::min(Region(iterator + size) + 1, NUM_SYNCS);
+ region < region_end; ++region) {
+ glClientWaitSync(fences[region].handle, 0, GL_TIMEOUT_IGNORED);
+ fences[region].Release();
+ }
+ if (iterator + size >= free_iterator) {
+ free_iterator = iterator + size;
+ }
+ if (iterator + size > STREAM_BUFFER_SIZE) {
+ for (size_t region = Region(used_iterator); region < NUM_SYNCS; ++region) {
+ fences[region].Create();
+ }
+ used_iterator = 0;
+ iterator = 0;
+ free_iterator = size;
+
+ for (size_t region = 0, region_end = Region(size); region <= region_end; ++region) {
+ glClientWaitSync(fences[region].handle, 0, GL_TIMEOUT_IGNORED);
+ fences[region].Release();
+ }
+ }
+ const size_t offset = iterator;
+ iterator = Common::AlignUp(iterator + size, MAX_ALIGNMENT);
+ return {std::span(mapped_pointer + offset, size), offset};
+}
+
+StagingBufferMap StagingBufferPool::RequestUploadBuffer(size_t size) {
+ return upload_buffers.RequestMap(size, true);
+}
+
+StagingBufferMap StagingBufferPool::RequestDownloadBuffer(size_t size) {
+ return download_buffers.RequestMap(size, false);
+}
+
+} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_staging_buffer_pool.h b/src/video_core/renderer_opengl/gl_staging_buffer_pool.h
new file mode 100644
index 000000000..60f72d3a0
--- /dev/null
+++ b/src/video_core/renderer_opengl/gl_staging_buffer_pool.h
@@ -0,0 +1,95 @@
+// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#pragma once
+
+#include <array>
+#include <optional>
+#include <span>
+#include <utility>
+#include <vector>
+
+#include <glad/glad.h>
+
+#include "common/common_types.h"
+#include "common/literals.h"
+#include "video_core/renderer_opengl/gl_resource_manager.h"
+
+namespace OpenGL {
+
+using namespace Common::Literals;
+
+struct StagingBufferMap {
+ ~StagingBufferMap();
+
+ std::span<u8> mapped_span;
+ size_t offset = 0;
+ OGLSync* sync;
+ GLuint buffer;
+};
+
+struct StagingBuffers {
+ explicit StagingBuffers(GLenum storage_flags_, GLenum map_flags_);
+ ~StagingBuffers();
+
+ StagingBufferMap RequestMap(size_t requested_size, bool insert_fence);
+
+ size_t RequestBuffer(size_t requested_size);
+
+ std::optional<size_t> FindBuffer(size_t requested_size);
+
+ std::vector<OGLSync> syncs;
+ std::vector<OGLBuffer> buffers;
+ std::vector<u8*> maps;
+ std::vector<size_t> sizes;
+ std::vector<size_t> sync_indices;
+ GLenum storage_flags;
+ GLenum map_flags;
+ size_t current_sync_index = 0;
+};
+
+class StreamBuffer {
+ static constexpr size_t STREAM_BUFFER_SIZE = 64_MiB;
+ static constexpr size_t NUM_SYNCS = 16;
+ static constexpr size_t REGION_SIZE = STREAM_BUFFER_SIZE / NUM_SYNCS;
+ static constexpr size_t MAX_ALIGNMENT = 256;
+ static_assert(STREAM_BUFFER_SIZE % MAX_ALIGNMENT == 0);
+ static_assert(STREAM_BUFFER_SIZE % NUM_SYNCS == 0);
+ static_assert(REGION_SIZE % MAX_ALIGNMENT == 0);
+
+public:
+ explicit StreamBuffer();
+
+ [[nodiscard]] std::pair<std::span<u8>, size_t> Request(size_t size) noexcept;
+
+ [[nodiscard]] GLuint Handle() const noexcept {
+ return buffer.handle;
+ }
+
+private:
+ [[nodiscard]] static size_t Region(size_t offset) noexcept {
+ return offset / REGION_SIZE;
+ }
+
+ size_t iterator = 0;
+ size_t used_iterator = 0;
+ size_t free_iterator = 0;
+ u8* mapped_pointer = nullptr;
+ OGLBuffer buffer;
+ std::array<OGLSync, NUM_SYNCS> fences;
+};
+
+class StagingBufferPool {
+public:
+ StagingBufferPool() = default;
+ ~StagingBufferPool() = default;
+
+ StagingBufferMap RequestUploadBuffer(size_t size);
+ StagingBufferMap RequestDownloadBuffer(size_t size);
+
+private:
+ StagingBuffers upload_buffers{GL_MAP_WRITE_BIT, GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT};
+ StagingBuffers download_buffers{GL_MAP_READ_BIT | GL_CLIENT_STORAGE_BIT, GL_MAP_READ_BIT};
+};
+
+} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.cpp b/src/video_core/renderer_opengl/gl_stream_buffer.cpp
deleted file mode 100644
index 2005c8993..000000000
--- a/src/video_core/renderer_opengl/gl_stream_buffer.cpp
+++ /dev/null
@@ -1,63 +0,0 @@
-// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
-// SPDX-License-Identifier: GPL-2.0-or-later
-
-#include <array>
-#include <memory>
-#include <span>
-
-#include <glad/glad.h>
-
-#include "common/alignment.h"
-#include "common/assert.h"
-#include "video_core/renderer_opengl/gl_stream_buffer.h"
-
-namespace OpenGL {
-
-StreamBuffer::StreamBuffer() {
- static constexpr GLenum flags = GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | GL_MAP_COHERENT_BIT;
- buffer.Create();
- glObjectLabel(GL_BUFFER, buffer.handle, -1, "Stream Buffer");
- glNamedBufferStorage(buffer.handle, STREAM_BUFFER_SIZE, nullptr, flags);
- mapped_pointer =
- static_cast<u8*>(glMapNamedBufferRange(buffer.handle, 0, STREAM_BUFFER_SIZE, flags));
- for (OGLSync& sync : fences) {
- sync.Create();
- }
-}
-
-std::pair<std::span<u8>, size_t> StreamBuffer::Request(size_t size) noexcept {
- ASSERT(size < REGION_SIZE);
- for (size_t region = Region(used_iterator), region_end = Region(iterator); region < region_end;
- ++region) {
- fences[region].Create();
- }
- used_iterator = iterator;
-
- for (size_t region = Region(free_iterator) + 1,
- region_end = std::min(Region(iterator + size) + 1, NUM_SYNCS);
- region < region_end; ++region) {
- glClientWaitSync(fences[region].handle, 0, GL_TIMEOUT_IGNORED);
- fences[region].Release();
- }
- if (iterator + size >= free_iterator) {
- free_iterator = iterator + size;
- }
- if (iterator + size > STREAM_BUFFER_SIZE) {
- for (size_t region = Region(used_iterator); region < NUM_SYNCS; ++region) {
- fences[region].Create();
- }
- used_iterator = 0;
- iterator = 0;
- free_iterator = size;
-
- for (size_t region = 0, region_end = Region(size); region <= region_end; ++region) {
- glClientWaitSync(fences[region].handle, 0, GL_TIMEOUT_IGNORED);
- fences[region].Release();
- }
- }
- const size_t offset = iterator;
- iterator = Common::AlignUp(iterator + size, MAX_ALIGNMENT);
- return {std::span(mapped_pointer + offset, size), offset};
-}
-
-} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.h b/src/video_core/renderer_opengl/gl_stream_buffer.h
deleted file mode 100644
index 8fe927aaf..000000000
--- a/src/video_core/renderer_opengl/gl_stream_buffer.h
+++ /dev/null
@@ -1,51 +0,0 @@
-// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
-// SPDX-License-Identifier: GPL-2.0-or-later
-
-#pragma once
-
-#include <array>
-#include <span>
-#include <utility>
-
-#include <glad/glad.h>
-
-#include "common/common_types.h"
-#include "common/literals.h"
-#include "video_core/renderer_opengl/gl_resource_manager.h"
-
-namespace OpenGL {
-
-using namespace Common::Literals;
-
-class StreamBuffer {
- static constexpr size_t STREAM_BUFFER_SIZE = 64_MiB;
- static constexpr size_t NUM_SYNCS = 16;
- static constexpr size_t REGION_SIZE = STREAM_BUFFER_SIZE / NUM_SYNCS;
- static constexpr size_t MAX_ALIGNMENT = 256;
- static_assert(STREAM_BUFFER_SIZE % MAX_ALIGNMENT == 0);
- static_assert(STREAM_BUFFER_SIZE % NUM_SYNCS == 0);
- static_assert(REGION_SIZE % MAX_ALIGNMENT == 0);
-
-public:
- explicit StreamBuffer();
-
- [[nodiscard]] std::pair<std::span<u8>, size_t> Request(size_t size) noexcept;
-
- [[nodiscard]] GLuint Handle() const noexcept {
- return buffer.handle;
- }
-
-private:
- [[nodiscard]] static size_t Region(size_t offset) noexcept {
- return offset / REGION_SIZE;
- }
-
- size_t iterator = 0;
- size_t used_iterator = 0;
- size_t free_iterator = 0;
- u8* mapped_pointer = nullptr;
- OGLBuffer buffer;
- std::array<OGLSync, NUM_SYNCS> fences;
-};
-
-} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp
index 9f7ce7414..3b446be07 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp
@@ -112,13 +112,17 @@ GLenum ImageTarget(Shader::TextureType type, int num_samples = 1) {
return GL_NONE;
}
-GLenum TextureMode(PixelFormat format, bool is_first) {
+GLenum TextureMode(PixelFormat format, std::array<SwizzleSource, 4> swizzle) {
+ bool any_r =
+ std::ranges::any_of(swizzle, [](SwizzleSource s) { return s == SwizzleSource::R; });
switch (format) {
case PixelFormat::D24_UNORM_S8_UINT:
case PixelFormat::D32_FLOAT_S8_UINT:
- return is_first ? GL_DEPTH_COMPONENT : GL_STENCIL_INDEX;
+ // R = depth, G = stencil
+ return any_r ? GL_DEPTH_COMPONENT : GL_STENCIL_INDEX;
case PixelFormat::S8_UINT_D24_UNORM:
- return is_first ? GL_STENCIL_INDEX : GL_DEPTH_COMPONENT;
+ // R = stencil, G = depth
+ return any_r ? GL_STENCIL_INDEX : GL_DEPTH_COMPONENT;
default:
ASSERT(false);
return GL_DEPTH_COMPONENT;
@@ -208,8 +212,7 @@ void ApplySwizzle(GLuint handle, PixelFormat format, std::array<SwizzleSource, 4
case PixelFormat::D32_FLOAT_S8_UINT:
case PixelFormat::S8_UINT_D24_UNORM:
UNIMPLEMENTED_IF(swizzle[0] != SwizzleSource::R && swizzle[0] != SwizzleSource::G);
- glTextureParameteri(handle, GL_DEPTH_STENCIL_TEXTURE_MODE,
- TextureMode(format, swizzle[0] == SwizzleSource::R));
+ glTextureParameteri(handle, GL_DEPTH_STENCIL_TEXTURE_MODE, TextureMode(format, swizzle));
std::ranges::transform(swizzle, swizzle.begin(), ConvertGreenRed);
break;
case PixelFormat::A5B5G5R1_UNORM: {
@@ -228,8 +231,11 @@ void ApplySwizzle(GLuint handle, PixelFormat format, std::array<SwizzleSource, 4
[[nodiscard]] bool CanBeAccelerated(const TextureCacheRuntime& runtime,
const VideoCommon::ImageInfo& info) {
- if (IsPixelFormatASTC(info.format)) {
- return !runtime.HasNativeASTC() && Settings::values.accelerate_astc.GetValue();
+ if (IsPixelFormatASTC(info.format) && info.size.depth == 1 && !runtime.HasNativeASTC()) {
+ return Settings::values.accelerate_astc.GetValue() &&
+ Settings::values.astc_recompression.GetValue() ==
+ Settings::AstcRecompression::Uncompressed &&
+ !Settings::values.async_astc.GetValue();
}
// Disable other accelerated uploads for now as they don't implement swizzled uploads
return false;
@@ -258,6 +264,14 @@ void ApplySwizzle(GLuint handle, PixelFormat format, std::array<SwizzleSource, 4
return format_info.compatibility_class == store_class;
}
+[[nodiscard]] bool CanBeDecodedAsync(const TextureCacheRuntime& runtime,
+ const VideoCommon::ImageInfo& info) {
+ if (IsPixelFormatASTC(info.format) && !runtime.HasNativeASTC()) {
+ return Settings::values.async_astc.GetValue();
+ }
+ return false;
+}
+
[[nodiscard]] CopyOrigin MakeCopyOrigin(VideoCommon::Offset3D offset,
VideoCommon::SubresourceLayers subresource, GLenum target) {
switch (target) {
@@ -425,18 +439,31 @@ OGLTexture MakeImage(const VideoCommon::ImageInfo& info, GLenum gl_internal_form
return GL_R32UI;
}
-} // Anonymous namespace
+[[nodiscard]] bool IsAstcRecompressionEnabled() {
+ return Settings::values.astc_recompression.GetValue() !=
+ Settings::AstcRecompression::Uncompressed;
+}
-ImageBufferMap::~ImageBufferMap() {
- if (sync) {
- sync->Create();
+[[nodiscard]] GLenum SelectAstcFormat(PixelFormat format, bool is_srgb) {
+ switch (Settings::values.astc_recompression.GetValue()) {
+ case Settings::AstcRecompression::Bc1:
+ return is_srgb ? GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT : GL_COMPRESSED_RGBA_S3TC_DXT1_EXT;
+ break;
+ case Settings::AstcRecompression::Bc3:
+ return is_srgb ? GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT : GL_COMPRESSED_RGBA_S3TC_DXT5_EXT;
+ break;
+ default:
+ return is_srgb ? GL_SRGB8_ALPHA8 : GL_RGBA8;
}
}
+} // Anonymous namespace
TextureCacheRuntime::TextureCacheRuntime(const Device& device_, ProgramManager& program_manager,
- StateTracker& state_tracker_)
- : device{device_}, state_tracker{state_tracker_}, util_shaders(program_manager),
- format_conversion_pass{util_shaders}, resolution{Settings::values.resolution_info} {
+ StateTracker& state_tracker_,
+ StagingBufferPool& staging_buffer_pool_)
+ : device{device_}, state_tracker{state_tracker_}, staging_buffer_pool{staging_buffer_pool_},
+ util_shaders(program_manager), format_conversion_pass{util_shaders},
+ resolution{Settings::values.resolution_info} {
static constexpr std::array TARGETS{GL_TEXTURE_1D_ARRAY, GL_TEXTURE_2D_ARRAY, GL_TEXTURE_3D};
for (size_t i = 0; i < TARGETS.size(); ++i) {
const GLenum target = TARGETS[i];
@@ -526,12 +553,12 @@ void TextureCacheRuntime::Finish() {
glFinish();
}
-ImageBufferMap TextureCacheRuntime::UploadStagingBuffer(size_t size) {
- return upload_buffers.RequestMap(size, true);
+StagingBufferMap TextureCacheRuntime::UploadStagingBuffer(size_t size) {
+ return staging_buffer_pool.RequestUploadBuffer(size);
}
-ImageBufferMap TextureCacheRuntime::DownloadStagingBuffer(size_t size) {
- return download_buffers.RequestMap(size, false);
+StagingBufferMap TextureCacheRuntime::DownloadStagingBuffer(size_t size) {
+ return staging_buffer_pool.RequestDownloadBuffer(size);
}
u64 TextureCacheRuntime::GetDeviceMemoryUsage() const {
@@ -557,6 +584,14 @@ void TextureCacheRuntime::CopyImage(Image& dst_image, Image& src_image,
}
}
+void TextureCacheRuntime::CopyImageMSAA(Image& dst_image, Image& src_image,
+ std::span<const VideoCommon::ImageCopy> copies) {
+ LOG_DEBUG(Render_OpenGL, "Copying from {} samples to {} samples", src_image.info.num_samples,
+ dst_image.info.num_samples);
+ // TODO: Leverage the format conversion pass if possible/accurate.
+ util_shaders.CopyMSAA(dst_image, src_image, copies);
+}
+
void TextureCacheRuntime::ReinterpretImage(Image& dst, Image& src,
std::span<const VideoCommon::ImageCopy> copies) {
LOG_DEBUG(Render_OpenGL, "Converting {} to {}", src.info.format, dst.info.format);
@@ -608,7 +643,7 @@ void TextureCacheRuntime::BlitFramebuffer(Framebuffer* dst, Framebuffer* src,
is_linear ? GL_LINEAR : GL_NEAREST);
}
-void TextureCacheRuntime::AccelerateImageUpload(Image& image, const ImageBufferMap& map,
+void TextureCacheRuntime::AccelerateImageUpload(Image& image, const StagingBufferMap& map,
std::span<const SwizzleParameters> swizzles) {
switch (image.info.type) {
case ImageType::e2D:
@@ -650,78 +685,27 @@ bool TextureCacheRuntime::HasNativeASTC() const noexcept {
return device.HasASTC();
}
-TextureCacheRuntime::StagingBuffers::StagingBuffers(GLenum storage_flags_, GLenum map_flags_)
- : storage_flags{storage_flags_}, map_flags{map_flags_} {}
-
-TextureCacheRuntime::StagingBuffers::~StagingBuffers() = default;
-
-ImageBufferMap TextureCacheRuntime::StagingBuffers::RequestMap(size_t requested_size,
- bool insert_fence) {
- const size_t index = RequestBuffer(requested_size);
- OGLSync* const sync = insert_fence ? &syncs[index] : nullptr;
- return ImageBufferMap{
- .mapped_span = std::span(maps[index], requested_size),
- .sync = sync,
- .buffer = buffers[index].handle,
- };
-}
-
-size_t TextureCacheRuntime::StagingBuffers::RequestBuffer(size_t requested_size) {
- if (const std::optional<size_t> index = FindBuffer(requested_size); index) {
- return *index;
- }
-
- OGLBuffer& buffer = buffers.emplace_back();
- buffer.Create();
- glNamedBufferStorage(buffer.handle, requested_size, nullptr,
- storage_flags | GL_MAP_PERSISTENT_BIT);
- maps.push_back(static_cast<u8*>(glMapNamedBufferRange(buffer.handle, 0, requested_size,
- map_flags | GL_MAP_PERSISTENT_BIT)));
-
- syncs.emplace_back();
- sizes.push_back(requested_size);
-
- ASSERT(syncs.size() == buffers.size() && buffers.size() == maps.size() &&
- maps.size() == sizes.size());
-
- return buffers.size() - 1;
-}
-
-std::optional<size_t> TextureCacheRuntime::StagingBuffers::FindBuffer(size_t requested_size) {
- size_t smallest_buffer = std::numeric_limits<size_t>::max();
- std::optional<size_t> found;
- const size_t num_buffers = sizes.size();
- for (size_t index = 0; index < num_buffers; ++index) {
- const size_t buffer_size = sizes[index];
- if (buffer_size < requested_size || buffer_size >= smallest_buffer) {
- continue;
- }
- if (syncs[index].handle != 0) {
- GLint status;
- glGetSynciv(syncs[index].handle, GL_SYNC_STATUS, 1, nullptr, &status);
- if (status != GL_SIGNALED) {
- continue;
- }
- syncs[index].Release();
- }
- smallest_buffer = buffer_size;
- found = index;
- }
- return found;
-}
-
Image::Image(TextureCacheRuntime& runtime_, const VideoCommon::ImageInfo& info_, GPUVAddr gpu_addr_,
VAddr cpu_addr_)
: VideoCommon::ImageBase(info_, gpu_addr_, cpu_addr_), runtime{&runtime_} {
- if (CanBeAccelerated(*runtime, info)) {
+ if (CanBeDecodedAsync(*runtime, info)) {
+ flags |= ImageFlagBits::AsynchronousDecode;
+ } else if (CanBeAccelerated(*runtime, info)) {
flags |= ImageFlagBits::AcceleratedUpload;
}
if (IsConverted(runtime->device, info.format, info.type)) {
flags |= ImageFlagBits::Converted;
flags |= ImageFlagBits::CostlyLoad;
- gl_internal_format = IsPixelFormatSRGB(info.format) ? GL_SRGB8_ALPHA8 : GL_RGBA8;
+
+ const bool is_srgb = IsPixelFormatSRGB(info.format);
+ gl_internal_format = is_srgb ? GL_SRGB8_ALPHA8 : GL_RGBA8;
gl_format = GL_RGBA;
gl_type = GL_UNSIGNED_INT_8_8_8_8_REV;
+
+ if (IsPixelFormatASTC(info.format) && IsAstcRecompressionEnabled()) {
+ gl_internal_format = SelectAstcFormat(info.format, is_srgb);
+ gl_format = GL_NONE;
+ }
} else {
const auto& tuple = MaxwellToGL::GetFormatTuple(info.format);
gl_internal_format = tuple.internal_format;
@@ -743,14 +727,14 @@ Image::Image(const VideoCommon::NullImageParams& params) : VideoCommon::ImageBas
Image::~Image() = default;
-void Image::UploadMemory(const ImageBufferMap& map,
+void Image::UploadMemory(GLuint buffer_handle, size_t buffer_offset,
std::span<const VideoCommon::BufferImageCopy> copies) {
const bool is_rescaled = True(flags & ImageFlagBits::Rescaled);
if (is_rescaled) {
ScaleDown(true);
}
- glBindBuffer(GL_PIXEL_UNPACK_BUFFER, map.buffer);
- glFlushMappedBufferRange(GL_PIXEL_UNPACK_BUFFER, map.offset, unswizzled_size_bytes);
+ glBindBuffer(GL_PIXEL_UNPACK_BUFFER, buffer_handle);
+ glFlushMappedBufferRange(GL_PIXEL_UNPACK_BUFFER, buffer_offset, unswizzled_size_bytes);
glPixelStorei(GL_UNPACK_ALIGNMENT, 1);
@@ -769,45 +753,65 @@ void Image::UploadMemory(const ImageBufferMap& map,
current_image_height = copy.buffer_image_height;
glPixelStorei(GL_UNPACK_IMAGE_HEIGHT, current_image_height);
}
- CopyBufferToImage(copy, map.offset);
+ CopyBufferToImage(copy, buffer_offset);
}
if (is_rescaled) {
ScaleUp();
}
}
-void Image::DownloadMemory(ImageBufferMap& map,
+void Image::UploadMemory(const StagingBufferMap& map,
+ std::span<const VideoCommon::BufferImageCopy> copies) {
+ UploadMemory(map.buffer, map.offset, copies);
+}
+
+void Image::DownloadMemory(GLuint buffer_handle, size_t buffer_offset,
+ std::span<const VideoCommon::BufferImageCopy> copies) {
+ std::array buffer_handles{buffer_handle};
+ std::array buffer_offsets{buffer_offset};
+ DownloadMemory(buffer_handles, buffer_offsets, copies);
+}
+
+void Image::DownloadMemory(std::span<GLuint> buffer_handles, std::span<size_t> buffer_offsets,
std::span<const VideoCommon::BufferImageCopy> copies) {
const bool is_rescaled = True(flags & ImageFlagBits::Rescaled);
if (is_rescaled) {
ScaleDown();
}
glMemoryBarrier(GL_PIXEL_BUFFER_BARRIER_BIT); // TODO: Move this to its own API
- glBindBuffer(GL_PIXEL_PACK_BUFFER, map.buffer);
- glPixelStorei(GL_PACK_ALIGNMENT, 1);
+ for (size_t i = 0; i < buffer_handles.size(); i++) {
+ auto& buffer_handle = buffer_handles[i];
+ glBindBuffer(GL_PIXEL_PACK_BUFFER, buffer_handle);
+ glPixelStorei(GL_PACK_ALIGNMENT, 1);
- u32 current_row_length = std::numeric_limits<u32>::max();
- u32 current_image_height = std::numeric_limits<u32>::max();
+ u32 current_row_length = std::numeric_limits<u32>::max();
+ u32 current_image_height = std::numeric_limits<u32>::max();
- for (const VideoCommon::BufferImageCopy& copy : copies) {
- if (copy.image_subresource.base_level >= gl_num_levels) {
- continue;
- }
- if (current_row_length != copy.buffer_row_length) {
- current_row_length = copy.buffer_row_length;
- glPixelStorei(GL_PACK_ROW_LENGTH, current_row_length);
- }
- if (current_image_height != copy.buffer_image_height) {
- current_image_height = copy.buffer_image_height;
- glPixelStorei(GL_PACK_IMAGE_HEIGHT, current_image_height);
+ for (const VideoCommon::BufferImageCopy& copy : copies) {
+ if (copy.image_subresource.base_level >= gl_num_levels) {
+ continue;
+ }
+ if (current_row_length != copy.buffer_row_length) {
+ current_row_length = copy.buffer_row_length;
+ glPixelStorei(GL_PACK_ROW_LENGTH, current_row_length);
+ }
+ if (current_image_height != copy.buffer_image_height) {
+ current_image_height = copy.buffer_image_height;
+ glPixelStorei(GL_PACK_IMAGE_HEIGHT, current_image_height);
+ }
+ CopyImageToBuffer(copy, buffer_offsets[i]);
}
- CopyImageToBuffer(copy, map.offset);
}
if (is_rescaled) {
ScaleUp(true);
}
}
+void Image::DownloadMemory(StagingBufferMap& map,
+ std::span<const VideoCommon::BufferImageCopy> copies) {
+ DownloadMemory(map.buffer, map.offset, copies);
+}
+
GLuint Image::StorageHandle() noexcept {
switch (info.format) {
case PixelFormat::A8B8G8R8_SRGB:
@@ -821,9 +825,12 @@ GLuint Image::StorageHandle() noexcept {
case PixelFormat::ASTC_2D_8X5_SRGB:
case PixelFormat::ASTC_2D_5X4_SRGB:
case PixelFormat::ASTC_2D_5X5_SRGB:
+ case PixelFormat::ASTC_2D_10X5_SRGB:
+ case PixelFormat::ASTC_2D_10X6_SRGB:
case PixelFormat::ASTC_2D_10X8_SRGB:
case PixelFormat::ASTC_2D_6X6_SRGB:
case PixelFormat::ASTC_2D_10X10_SRGB:
+ case PixelFormat::ASTC_2D_12X10_SRGB:
case PixelFormat::ASTC_2D_12X12_SRGB:
case PixelFormat::ASTC_2D_8X6_SRGB:
case PixelFormat::ASTC_2D_6X5_SRGB:
@@ -1083,10 +1090,16 @@ bool Image::ScaleDown(bool ignore) {
ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewInfo& info,
ImageId image_id_, Image& image, const SlotVector<Image>&)
- : VideoCommon::ImageViewBase{info, image.info, image_id_}, views{runtime.null_image_views} {
+ : VideoCommon::ImageViewBase{info, image.info, image_id_, image.gpu_addr},
+ views{runtime.null_image_views} {
const Device& device = runtime.device;
if (True(image.flags & ImageFlagBits::Converted)) {
- internal_format = IsPixelFormatSRGB(info.format) ? GL_SRGB8_ALPHA8 : GL_RGBA8;
+ const bool is_srgb = IsPixelFormatSRGB(info.format);
+ internal_format = is_srgb ? GL_SRGB8_ALPHA8 : GL_RGBA8;
+
+ if (IsPixelFormatASTC(info.format) && IsAstcRecompressionEnabled()) {
+ internal_format = SelectAstcFormat(info.format, is_srgb);
+ }
} else {
internal_format = MaxwellToGL::GetFormatTuple(format).internal_format;
}
@@ -1174,12 +1187,12 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI
ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info,
const VideoCommon::ImageViewInfo& view_info, GPUVAddr gpu_addr_)
- : VideoCommon::ImageViewBase{info, view_info}, gpu_addr{gpu_addr_},
+ : VideoCommon::ImageViewBase{info, view_info, gpu_addr_},
buffer_size{VideoCommon::CalculateGuestSizeInBytes(info)} {}
ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info,
const VideoCommon::ImageViewInfo& view_info)
- : VideoCommon::ImageViewBase{info, view_info} {}
+ : VideoCommon::ImageViewBase{info, view_info, 0} {}
ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::NullImageViewParams& params)
: VideoCommon::ImageViewBase{params}, views{runtime.null_image_views} {}
@@ -1239,7 +1252,7 @@ GLuint ImageView::MakeView(Shader::TextureType view_type, GLenum view_format) {
ApplySwizzle(view.handle, format, casted_swizzle);
}
if (set_object_label) {
- const std::string name = VideoCommon::Name(*this);
+ const std::string name = VideoCommon::Name(*this, gpu_addr);
glObjectLabel(GL_TEXTURE, view.handle, static_cast<GLsizei>(name.size()), name.data());
}
return view.handle;
@@ -1255,36 +1268,48 @@ Sampler::Sampler(TextureCacheRuntime& runtime, const TSCEntry& config) {
UNIMPLEMENTED_IF(config.cubemap_anisotropy != 1);
- sampler.Create();
- const GLuint handle = sampler.handle;
- glSamplerParameteri(handle, GL_TEXTURE_WRAP_S, MaxwellToGL::WrapMode(config.wrap_u));
- glSamplerParameteri(handle, GL_TEXTURE_WRAP_T, MaxwellToGL::WrapMode(config.wrap_v));
- glSamplerParameteri(handle, GL_TEXTURE_WRAP_R, MaxwellToGL::WrapMode(config.wrap_p));
- glSamplerParameteri(handle, GL_TEXTURE_COMPARE_MODE, compare_mode);
- glSamplerParameteri(handle, GL_TEXTURE_COMPARE_FUNC, compare_func);
- glSamplerParameteri(handle, GL_TEXTURE_MAG_FILTER, mag);
- glSamplerParameteri(handle, GL_TEXTURE_MIN_FILTER, min);
- glSamplerParameterf(handle, GL_TEXTURE_LOD_BIAS, config.LodBias());
- glSamplerParameterf(handle, GL_TEXTURE_MIN_LOD, config.MinLod());
- glSamplerParameterf(handle, GL_TEXTURE_MAX_LOD, config.MaxLod());
- glSamplerParameterfv(handle, GL_TEXTURE_BORDER_COLOR, config.BorderColor().data());
-
- if (GLAD_GL_ARB_texture_filter_anisotropic || GLAD_GL_EXT_texture_filter_anisotropic) {
- const f32 max_anisotropy = std::clamp(config.MaxAnisotropy(), 1.0f, 16.0f);
- glSamplerParameterf(handle, GL_TEXTURE_MAX_ANISOTROPY, max_anisotropy);
- } else {
- LOG_WARNING(Render_OpenGL, "GL_ARB_texture_filter_anisotropic is required");
- }
- if (GLAD_GL_ARB_texture_filter_minmax || GLAD_GL_EXT_texture_filter_minmax) {
- glSamplerParameteri(handle, GL_TEXTURE_REDUCTION_MODE_ARB, reduction_filter);
- } else if (reduction_filter != GL_WEIGHTED_AVERAGE_ARB) {
- LOG_WARNING(Render_OpenGL, "GL_ARB_texture_filter_minmax is required");
- }
- if (GLAD_GL_ARB_seamless_cubemap_per_texture || GLAD_GL_AMD_seamless_cubemap_per_texture) {
- glSamplerParameteri(handle, GL_TEXTURE_CUBE_MAP_SEAMLESS, seamless);
- } else if (seamless == GL_FALSE) {
- // We default to false because it's more common
- LOG_WARNING(Render_OpenGL, "GL_ARB_seamless_cubemap_per_texture is required");
+ const f32 max_anisotropy = std::clamp(config.MaxAnisotropy(), 1.0f, 16.0f);
+
+ const auto create_sampler = [&](const f32 anisotropy) {
+ OGLSampler new_sampler;
+ new_sampler.Create();
+ const GLuint handle = new_sampler.handle;
+ glSamplerParameteri(handle, GL_TEXTURE_WRAP_S, MaxwellToGL::WrapMode(config.wrap_u));
+ glSamplerParameteri(handle, GL_TEXTURE_WRAP_T, MaxwellToGL::WrapMode(config.wrap_v));
+ glSamplerParameteri(handle, GL_TEXTURE_WRAP_R, MaxwellToGL::WrapMode(config.wrap_p));
+ glSamplerParameteri(handle, GL_TEXTURE_COMPARE_MODE, compare_mode);
+ glSamplerParameteri(handle, GL_TEXTURE_COMPARE_FUNC, compare_func);
+ glSamplerParameteri(handle, GL_TEXTURE_MAG_FILTER, mag);
+ glSamplerParameteri(handle, GL_TEXTURE_MIN_FILTER, min);
+ glSamplerParameterf(handle, GL_TEXTURE_LOD_BIAS, config.LodBias());
+ glSamplerParameterf(handle, GL_TEXTURE_MIN_LOD, config.MinLod());
+ glSamplerParameterf(handle, GL_TEXTURE_MAX_LOD, config.MaxLod());
+ glSamplerParameterfv(handle, GL_TEXTURE_BORDER_COLOR, config.BorderColor().data());
+
+ if (GLAD_GL_ARB_texture_filter_anisotropic || GLAD_GL_EXT_texture_filter_anisotropic) {
+ glSamplerParameterf(handle, GL_TEXTURE_MAX_ANISOTROPY, anisotropy);
+ } else {
+ LOG_WARNING(Render_OpenGL, "GL_ARB_texture_filter_anisotropic is required");
+ }
+ if (GLAD_GL_ARB_texture_filter_minmax || GLAD_GL_EXT_texture_filter_minmax) {
+ glSamplerParameteri(handle, GL_TEXTURE_REDUCTION_MODE_ARB, reduction_filter);
+ } else if (reduction_filter != GL_WEIGHTED_AVERAGE_ARB) {
+ LOG_WARNING(Render_OpenGL, "GL_ARB_texture_filter_minmax is required");
+ }
+ if (GLAD_GL_ARB_seamless_cubemap_per_texture || GLAD_GL_AMD_seamless_cubemap_per_texture) {
+ glSamplerParameteri(handle, GL_TEXTURE_CUBE_MAP_SEAMLESS, seamless);
+ } else if (seamless == GL_FALSE) {
+ // We default to false because it's more common
+ LOG_WARNING(Render_OpenGL, "GL_ARB_seamless_cubemap_per_texture is required");
+ }
+ return new_sampler;
+ };
+
+ sampler = create_sampler(max_anisotropy);
+
+ const f32 max_anisotropy_default = static_cast<f32>(1U << config.max_anisotropy);
+ if (max_anisotropy > max_anisotropy_default) {
+ sampler_default_anisotropy = create_sampler(max_anisotropy_default);
}
}
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h
index 5d9d370f2..3676eaaa9 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.h
+++ b/src/video_core/renderer_opengl/gl_texture_cache.h
@@ -11,6 +11,7 @@
#include "shader_recompiler/shader_info.h"
#include "video_core/renderer_opengl/gl_device.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
+#include "video_core/renderer_opengl/gl_staging_buffer_pool.h"
#include "video_core/renderer_opengl/util_shaders.h"
#include "video_core/texture_cache/image_view_base.h"
#include "video_core/texture_cache/texture_cache_base.h"
@@ -37,15 +38,6 @@ using VideoCommon::Region2D;
using VideoCommon::RenderTargets;
using VideoCommon::SlotVector;
-struct ImageBufferMap {
- ~ImageBufferMap();
-
- std::span<u8> mapped_span;
- size_t offset = 0;
- OGLSync* sync;
- GLuint buffer;
-};
-
struct FormatProperties {
GLenum compatibility_class;
bool compatibility_by_size;
@@ -74,14 +66,15 @@ class TextureCacheRuntime {
public:
explicit TextureCacheRuntime(const Device& device, ProgramManager& program_manager,
- StateTracker& state_tracker);
+ StateTracker& state_tracker,
+ StagingBufferPool& staging_buffer_pool);
~TextureCacheRuntime();
void Finish();
- ImageBufferMap UploadStagingBuffer(size_t size);
+ StagingBufferMap UploadStagingBuffer(size_t size);
- ImageBufferMap DownloadStagingBuffer(size_t size);
+ StagingBufferMap DownloadStagingBuffer(size_t size);
u64 GetDeviceLocalMemory() const {
return device_access_memory;
@@ -93,12 +86,19 @@ public:
return device.CanReportMemoryUsage();
}
- bool ShouldReinterpret([[maybe_unused]] Image& dst, [[maybe_unused]] Image& src) {
+ bool ShouldReinterpret([[maybe_unused]] Image& dst,
+ [[maybe_unused]] Image& src) const noexcept {
+ return true;
+ }
+
+ bool CanUploadMSAA() const noexcept {
return true;
}
void CopyImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies);
+ void CopyImageMSAA(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies);
+
void ReinterpretImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies);
void ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view) {
@@ -113,7 +113,7 @@ public:
const Region2D& src_region, Tegra::Engines::Fermi2D::Filter filter,
Tegra::Engines::Fermi2D::Operation operation);
- void AccelerateImageUpload(Image& image, const ImageBufferMap& map,
+ void AccelerateImageUpload(Image& image, const StagingBufferMap& map,
std::span<const VideoCommon::SwizzleParameters> swizzles);
void InsertUploadMemoryBarrier();
@@ -137,36 +137,21 @@ public:
return state_tracker;
}
-private:
- struct StagingBuffers {
- explicit StagingBuffers(GLenum storage_flags_, GLenum map_flags_);
- ~StagingBuffers();
-
- ImageBufferMap RequestMap(size_t requested_size, bool insert_fence);
-
- size_t RequestBuffer(size_t requested_size);
-
- std::optional<size_t> FindBuffer(size_t requested_size);
-
- std::vector<OGLSync> syncs;
- std::vector<OGLBuffer> buffers;
- std::vector<u8*> maps;
- std::vector<size_t> sizes;
- GLenum storage_flags;
- GLenum map_flags;
- };
+ void BarrierFeedbackLoop() const noexcept {
+ // OpenGL does not require a barrier for attachment feedback loops.
+ }
+private:
const Device& device;
StateTracker& state_tracker;
+ StagingBufferPool& staging_buffer_pool;
+
UtilShaders util_shaders;
FormatConversionPass format_conversion_pass;
std::array<std::unordered_map<GLenum, FormatProperties>, 3> format_properties;
bool has_broken_texture_view_formats = false;
- StagingBuffers upload_buffers{GL_MAP_WRITE_BIT, GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT};
- StagingBuffers download_buffers{GL_MAP_READ_BIT | GL_CLIENT_STORAGE_BIT, GL_MAP_READ_BIT};
-
OGLTexture null_image_1d_array;
OGLTexture null_image_cube_array;
OGLTexture null_image_3d;
@@ -199,10 +184,20 @@ public:
Image(Image&&) = default;
Image& operator=(Image&&) = default;
- void UploadMemory(const ImageBufferMap& map,
+ void UploadMemory(GLuint buffer_handle, size_t buffer_offset,
std::span<const VideoCommon::BufferImageCopy> copies);
- void DownloadMemory(ImageBufferMap& map, std::span<const VideoCommon::BufferImageCopy> copies);
+ void UploadMemory(const StagingBufferMap& map,
+ std::span<const VideoCommon::BufferImageCopy> copies);
+
+ void DownloadMemory(GLuint buffer_handle, size_t buffer_offset,
+ std::span<const VideoCommon::BufferImageCopy> copies);
+
+ void DownloadMemory(std::span<GLuint> buffer_handle, std::span<size_t> buffer_offset,
+ std::span<const VideoCommon::BufferImageCopy> copies);
+
+ void DownloadMemory(StagingBufferMap& map,
+ std::span<const VideoCommon::BufferImageCopy> copies);
GLuint StorageHandle() noexcept;
@@ -298,7 +293,6 @@ private:
std::unique_ptr<StorageViews> storage_views;
GLenum internal_format = GL_NONE;
GLuint default_handle = 0;
- GPUVAddr gpu_addr = 0;
u32 buffer_size = 0;
GLuint original_texture = 0;
int num_samples = 0;
@@ -315,12 +309,21 @@ class Sampler {
public:
explicit Sampler(TextureCacheRuntime&, const Tegra::Texture::TSCEntry&);
- GLuint Handle() const noexcept {
+ [[nodiscard]] GLuint Handle() const noexcept {
return sampler.handle;
}
+ [[nodiscard]] GLuint HandleWithDefaultAnisotropy() const noexcept {
+ return sampler_default_anisotropy.handle;
+ }
+
+ [[nodiscard]] bool HasAddedAnisotropy() const noexcept {
+ return static_cast<bool>(sampler_default_anisotropy.handle);
+ }
+
private:
OGLSampler sampler;
+ OGLSampler sampler_default_anisotropy;
};
class Framebuffer {
@@ -363,6 +366,7 @@ struct TextureCacheParams {
using Sampler = OpenGL::Sampler;
using Framebuffer = OpenGL::Framebuffer;
using AsyncBuffer = u32;
+ using BufferType = GLuint;
};
using TextureCache = VideoCommon::TextureCache<TextureCacheParams>;
diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h
index ef1190e1f..c7dc7e0a1 100644
--- a/src/video_core/renderer_opengl/maxwell_to_gl.h
+++ b/src/video_core/renderer_opengl/maxwell_to_gl.h
@@ -100,10 +100,13 @@ constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> FORMAT_TAB
{GL_COMPRESSED_RGBA_ASTC_6x6_KHR}, // ASTC_2D_6X6_UNORM
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR}, // ASTC_2D_6X6_SRGB
{GL_COMPRESSED_RGBA_ASTC_10x6_KHR}, // ASTC_2D_10X6_UNORM
+ {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x6_KHR}, // ASTC_2D_10X6_SRGB
{GL_COMPRESSED_RGBA_ASTC_10x5_KHR}, // ASTC_2D_10X5_UNORM
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x5_KHR}, // ASTC_2D_10X5_SRGB
{GL_COMPRESSED_RGBA_ASTC_10x10_KHR}, // ASTC_2D_10X10_UNORM
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR}, // ASTC_2D_10X10_SRGB
+ {GL_COMPRESSED_RGBA_ASTC_12x10_KHR}, // ASTC_2D_12X10_UNORM
+ {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x10_KHR}, // ASTC_2D_12X10_SRGB
{GL_COMPRESSED_RGBA_ASTC_12x12_KHR}, // ASTC_2D_12X12_UNORM
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR}, // ASTC_2D_12X12_SRGB
{GL_COMPRESSED_RGBA_ASTC_8x6_KHR}, // ASTC_2D_8X6_UNORM
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index de95f2634..2a74c1d05 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -17,8 +17,14 @@
#include "core/frontend/emu_window.h"
#include "core/memory.h"
#include "core/telemetry_session.h"
+#include "video_core/host_shaders/ffx_a_h.h"
+#include "video_core/host_shaders/ffx_fsr1_h.h"
+#include "video_core/host_shaders/full_screen_triangle_vert.h"
#include "video_core/host_shaders/fxaa_frag.h"
#include "video_core/host_shaders/fxaa_vert.h"
+#include "video_core/host_shaders/opengl_fidelityfx_fsr_easu_frag.h"
+#include "video_core/host_shaders/opengl_fidelityfx_fsr_frag.h"
+#include "video_core/host_shaders/opengl_fidelityfx_fsr_rcas_frag.h"
#include "video_core/host_shaders/opengl_present_frag.h"
#include "video_core/host_shaders/opengl_present_scaleforce_frag.h"
#include "video_core/host_shaders/opengl_present_vert.h"
@@ -31,6 +37,7 @@
#include "video_core/host_shaders/smaa_edge_detection_vert.h"
#include "video_core/host_shaders/smaa_neighborhood_blending_frag.h"
#include "video_core/host_shaders/smaa_neighborhood_blending_vert.h"
+#include "video_core/renderer_opengl/gl_fsr.h"
#include "video_core/renderer_opengl/gl_rasterizer.h"
#include "video_core/renderer_opengl/gl_shader_manager.h"
#include "video_core/renderer_opengl/gl_shader_util.h"
@@ -268,12 +275,17 @@ void RendererOpenGL::InitOpenGLObjects() {
fxaa_vertex = CreateProgram(HostShaders::FXAA_VERT, GL_VERTEX_SHADER);
fxaa_fragment = CreateProgram(HostShaders::FXAA_FRAG, GL_FRAGMENT_SHADER);
- const auto SmaaShader = [](std::string_view specialized_source, GLenum stage) {
- std::string shader_source{specialized_source};
- constexpr std::string_view include_string = "#include \"opengl_smaa.glsl\"";
+ const auto replace_include = [](std::string& shader_source, std::string_view include_name,
+ std::string_view include_content) {
+ const std::string include_string = fmt::format("#include \"{}\"", include_name);
const std::size_t pos = shader_source.find(include_string);
ASSERT(pos != std::string::npos);
- shader_source.replace(pos, include_string.size(), HostShaders::OPENGL_SMAA_GLSL);
+ shader_source.replace(pos, include_string.size(), include_content);
+ };
+
+ const auto SmaaShader = [&](std::string_view specialized_source, GLenum stage) {
+ std::string shader_source{specialized_source};
+ replace_include(shader_source, "opengl_smaa.glsl", HostShaders::OPENGL_SMAA_GLSL);
return CreateProgram(shader_source, stage);
};
@@ -298,14 +310,32 @@ void RendererOpenGL::InitOpenGLObjects() {
CreateProgram(fmt::format("#version 460\n{}", HostShaders::OPENGL_PRESENT_SCALEFORCE_FRAG),
GL_FRAGMENT_SHADER);
+ std::string fsr_source{HostShaders::OPENGL_FIDELITYFX_FSR_FRAG};
+ replace_include(fsr_source, "ffx_a.h", HostShaders::FFX_A_H);
+ replace_include(fsr_source, "ffx_fsr1.h", HostShaders::FFX_FSR1_H);
+
+ std::string fsr_easu_frag_source{HostShaders::OPENGL_FIDELITYFX_FSR_EASU_FRAG};
+ std::string fsr_rcas_frag_source{HostShaders::OPENGL_FIDELITYFX_FSR_RCAS_FRAG};
+ replace_include(fsr_easu_frag_source, "opengl_fidelityfx_fsr.frag", fsr_source);
+ replace_include(fsr_rcas_frag_source, "opengl_fidelityfx_fsr.frag", fsr_source);
+
+ fsr = std::make_unique<FSR>(HostShaders::FULL_SCREEN_TRIANGLE_VERT, fsr_easu_frag_source,
+ fsr_rcas_frag_source);
+
// Generate presentation sampler
present_sampler.Create();
glSamplerParameteri(present_sampler.handle, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
glSamplerParameteri(present_sampler.handle, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
+ glSamplerParameteri(present_sampler.handle, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
+ glSamplerParameteri(present_sampler.handle, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
+ glSamplerParameteri(present_sampler.handle, GL_TEXTURE_WRAP_R, GL_CLAMP_TO_EDGE);
present_sampler_nn.Create();
glSamplerParameteri(present_sampler_nn.handle, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glSamplerParameteri(present_sampler_nn.handle, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
+ glSamplerParameteri(present_sampler_nn.handle, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
+ glSamplerParameteri(present_sampler_nn.handle, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
+ glSamplerParameteri(present_sampler_nn.handle, GL_TEXTURE_WRAP_R, GL_CLAMP_TO_EDGE);
// Generate VBO handle for drawing
vertex_buffer.Create();
@@ -525,6 +555,31 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
glBindTextureUnit(0, aa_texture.handle);
}
+ glDisablei(GL_SCISSOR_TEST, 0);
+
+ if (Settings::values.scaling_filter.GetValue() == Settings::ScalingFilter::Fsr) {
+ if (!fsr->AreBuffersInitialized()) {
+ fsr->InitBuffers();
+ }
+
+ auto crop_rect = framebuffer_crop_rect;
+ if (crop_rect.GetWidth() == 0) {
+ crop_rect.right = framebuffer_width;
+ }
+ if (crop_rect.GetHeight() == 0) {
+ crop_rect.bottom = framebuffer_height;
+ }
+ crop_rect = crop_rect.Scale(Settings::values.resolution_info.up_factor);
+ const auto fsr_input_width = Settings::values.resolution_info.ScaleUp(framebuffer_width);
+ const auto fsr_input_height = Settings::values.resolution_info.ScaleUp(framebuffer_height);
+ glBindSampler(0, present_sampler.handle);
+ fsr->Draw(program_manager, layout.screen, fsr_input_width, fsr_input_height, crop_rect);
+ } else {
+ if (fsr->AreBuffersInitialized()) {
+ fsr->ReleaseBuffers();
+ }
+ }
+
const std::array ortho_matrix =
MakeOrthographicMatrix(static_cast<float>(layout.width), static_cast<float>(layout.height));
@@ -540,10 +595,7 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
case Settings::ScalingFilter::ScaleForce:
return present_scaleforce_fragment.handle;
case Settings::ScalingFilter::Fsr:
- LOG_WARNING(
- Render_OpenGL,
- "FidelityFX Super Resolution is not supported in OpenGL, changing to ScaleForce");
- return present_scaleforce_fragment.handle;
+ return fsr->GetPresentFragmentProgram().handle;
default:
return present_bilinear_fragment.handle;
}
@@ -578,15 +630,18 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
f32 scale_u = static_cast<f32>(framebuffer_width) / static_cast<f32>(screen_info.texture.width);
f32 scale_v =
static_cast<f32>(framebuffer_height) / static_cast<f32>(screen_info.texture.height);
- // Scale the output by the crop width/height. This is commonly used with 1280x720 rendering
- // (e.g. handheld mode) on a 1920x1080 framebuffer.
- if (framebuffer_crop_rect.GetWidth() > 0) {
- scale_u = static_cast<f32>(framebuffer_crop_rect.GetWidth()) /
- static_cast<f32>(screen_info.texture.width);
- }
- if (framebuffer_crop_rect.GetHeight() > 0) {
- scale_v = static_cast<f32>(framebuffer_crop_rect.GetHeight()) /
- static_cast<f32>(screen_info.texture.height);
+
+ if (Settings::values.scaling_filter.GetValue() != Settings::ScalingFilter::Fsr) {
+ // Scale the output by the crop width/height. This is commonly used with 1280x720 rendering
+ // (e.g. handheld mode) on a 1920x1080 framebuffer.
+ if (framebuffer_crop_rect.GetWidth() > 0) {
+ scale_u = static_cast<f32>(framebuffer_crop_rect.GetWidth()) /
+ static_cast<f32>(screen_info.texture.width);
+ }
+ if (framebuffer_crop_rect.GetHeight() > 0) {
+ scale_v = static_cast<f32>(framebuffer_crop_rect.GetHeight()) /
+ static_cast<f32>(screen_info.texture.height);
+ }
}
if (Settings::values.anti_aliasing.GetValue() == Settings::AntiAliasing::Fxaa &&
!screen_info.was_accelerated) {
@@ -612,7 +667,6 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
} else {
glDisable(GL_FRAMEBUFFER_SRGB);
}
- glDisablei(GL_SCISSOR_TEST, 0);
glViewportIndexedf(0, 0.0f, 0.0f, static_cast<GLfloat>(layout.width),
static_cast<GLfloat>(layout.height));
diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h
index cc97d7b26..f1d5fd954 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.h
+++ b/src/video_core/renderer_opengl/renderer_opengl.h
@@ -10,6 +10,7 @@
#include "video_core/renderer_base.h"
#include "video_core/renderer_opengl/gl_device.h"
+#include "video_core/renderer_opengl/gl_fsr.h"
#include "video_core/renderer_opengl/gl_rasterizer.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_shader_manager.h"
@@ -141,6 +142,8 @@ private:
OGLTexture smaa_edges_tex;
OGLTexture smaa_blend_tex;
+ std::unique_ptr<FSR> fsr;
+
/// OpenGL framebuffer data
std::vector<u8> gl_framebuffer_data;
diff --git a/src/video_core/renderer_opengl/util_shaders.cpp b/src/video_core/renderer_opengl/util_shaders.cpp
index 404def62e..544982d18 100644
--- a/src/video_core/renderer_opengl/util_shaders.cpp
+++ b/src/video_core/renderer_opengl/util_shaders.cpp
@@ -12,11 +12,14 @@
#include "video_core/host_shaders/astc_decoder_comp.h"
#include "video_core/host_shaders/block_linear_unswizzle_2d_comp.h"
#include "video_core/host_shaders/block_linear_unswizzle_3d_comp.h"
+#include "video_core/host_shaders/convert_msaa_to_non_msaa_comp.h"
+#include "video_core/host_shaders/convert_non_msaa_to_msaa_comp.h"
#include "video_core/host_shaders/opengl_convert_s8d24_comp.h"
#include "video_core/host_shaders/opengl_copy_bc4_comp.h"
#include "video_core/host_shaders/pitch_unswizzle_comp.h"
#include "video_core/renderer_opengl/gl_shader_manager.h"
#include "video_core/renderer_opengl/gl_shader_util.h"
+#include "video_core/renderer_opengl/gl_staging_buffer_pool.h"
#include "video_core/renderer_opengl/gl_texture_cache.h"
#include "video_core/renderer_opengl/util_shaders.h"
#include "video_core/texture_cache/accelerated_swizzle.h"
@@ -51,7 +54,9 @@ UtilShaders::UtilShaders(ProgramManager& program_manager_)
block_linear_unswizzle_3d_program(MakeProgram(BLOCK_LINEAR_UNSWIZZLE_3D_COMP)),
pitch_unswizzle_program(MakeProgram(PITCH_UNSWIZZLE_COMP)),
copy_bc4_program(MakeProgram(OPENGL_COPY_BC4_COMP)),
- convert_s8d24_program(MakeProgram(OPENGL_CONVERT_S8D24_COMP)) {
+ convert_s8d24_program(MakeProgram(OPENGL_CONVERT_S8D24_COMP)),
+ convert_ms_to_nonms_program(MakeProgram(CONVERT_MSAA_TO_NON_MSAA_COMP)),
+ convert_nonms_to_ms_program(MakeProgram(CONVERT_NON_MSAA_TO_MSAA_COMP)) {
const auto swizzle_table = Tegra::Texture::MakeSwizzleTable();
swizzle_table_buffer.Create();
glNamedBufferStorage(swizzle_table_buffer.handle, sizeof(swizzle_table), &swizzle_table, 0);
@@ -59,7 +64,7 @@ UtilShaders::UtilShaders(ProgramManager& program_manager_)
UtilShaders::~UtilShaders() = default;
-void UtilShaders::ASTCDecode(Image& image, const ImageBufferMap& map,
+void UtilShaders::ASTCDecode(Image& image, const StagingBufferMap& map,
std::span<const VideoCommon::SwizzleParameters> swizzles) {
static constexpr GLuint BINDING_INPUT_BUFFER = 0;
static constexpr GLuint BINDING_OUTPUT_IMAGE = 0;
@@ -107,7 +112,7 @@ void UtilShaders::ASTCDecode(Image& image, const ImageBufferMap& map,
program_manager.RestoreGuestCompute();
}
-void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map,
+void UtilShaders::BlockLinearUpload2D(Image& image, const StagingBufferMap& map,
std::span<const SwizzleParameters> swizzles) {
static constexpr Extent3D WORKGROUP_SIZE{32, 32, 1};
static constexpr GLuint BINDING_SWIZZLE_BUFFER = 0;
@@ -144,7 +149,7 @@ void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map,
program_manager.RestoreGuestCompute();
}
-void UtilShaders::BlockLinearUpload3D(Image& image, const ImageBufferMap& map,
+void UtilShaders::BlockLinearUpload3D(Image& image, const StagingBufferMap& map,
std::span<const SwizzleParameters> swizzles) {
static constexpr Extent3D WORKGROUP_SIZE{16, 8, 8};
@@ -185,7 +190,7 @@ void UtilShaders::BlockLinearUpload3D(Image& image, const ImageBufferMap& map,
program_manager.RestoreGuestCompute();
}
-void UtilShaders::PitchUpload(Image& image, const ImageBufferMap& map,
+void UtilShaders::PitchUpload(Image& image, const StagingBufferMap& map,
std::span<const SwizzleParameters> swizzles) {
static constexpr Extent3D WORKGROUP_SIZE{32, 32, 1};
static constexpr GLuint BINDING_INPUT_BUFFER = 0;
@@ -269,6 +274,33 @@ void UtilShaders::ConvertS8D24(Image& dst_image, std::span<const ImageCopy> copi
program_manager.RestoreGuestCompute();
}
+void UtilShaders::CopyMSAA(Image& dst_image, Image& src_image,
+ std::span<const VideoCommon::ImageCopy> copies) {
+ const bool is_ms_to_non_ms = src_image.info.num_samples > 1 && dst_image.info.num_samples == 1;
+ const auto program_handle =
+ is_ms_to_non_ms ? convert_ms_to_nonms_program.handle : convert_nonms_to_ms_program.handle;
+ program_manager.BindComputeProgram(program_handle);
+
+ for (const ImageCopy& copy : copies) {
+ ASSERT(copy.src_subresource.base_layer == 0);
+ ASSERT(copy.src_subresource.num_layers == 1);
+ ASSERT(copy.dst_subresource.base_layer == 0);
+ ASSERT(copy.dst_subresource.num_layers == 1);
+
+ glBindImageTexture(0, src_image.StorageHandle(), copy.src_subresource.base_level, GL_TRUE,
+ 0, GL_READ_ONLY, GL_RGBA8);
+ glBindImageTexture(1, dst_image.StorageHandle(), copy.dst_subresource.base_level, GL_TRUE,
+ 0, GL_WRITE_ONLY, GL_RGBA8);
+
+ const u32 num_dispatches_x = Common::DivCeil(copy.extent.width, 8U);
+ const u32 num_dispatches_y = Common::DivCeil(copy.extent.height, 8U);
+ const u32 num_dispatches_z = copy.extent.depth;
+
+ glDispatchCompute(num_dispatches_x, num_dispatches_y, num_dispatches_z);
+ }
+ program_manager.RestoreGuestCompute();
+}
+
GLenum StoreFormat(u32 bytes_per_block) {
switch (bytes_per_block) {
case 1:
diff --git a/src/video_core/renderer_opengl/util_shaders.h b/src/video_core/renderer_opengl/util_shaders.h
index 44efb6ecf..feecd404c 100644
--- a/src/video_core/renderer_opengl/util_shaders.h
+++ b/src/video_core/renderer_opengl/util_shaders.h
@@ -16,23 +16,23 @@ namespace OpenGL {
class Image;
class ProgramManager;
-struct ImageBufferMap;
+struct StagingBufferMap;
class UtilShaders {
public:
explicit UtilShaders(ProgramManager& program_manager);
~UtilShaders();
- void ASTCDecode(Image& image, const ImageBufferMap& map,
+ void ASTCDecode(Image& image, const StagingBufferMap& map,
std::span<const VideoCommon::SwizzleParameters> swizzles);
- void BlockLinearUpload2D(Image& image, const ImageBufferMap& map,
+ void BlockLinearUpload2D(Image& image, const StagingBufferMap& map,
std::span<const VideoCommon::SwizzleParameters> swizzles);
- void BlockLinearUpload3D(Image& image, const ImageBufferMap& map,
+ void BlockLinearUpload3D(Image& image, const StagingBufferMap& map,
std::span<const VideoCommon::SwizzleParameters> swizzles);
- void PitchUpload(Image& image, const ImageBufferMap& map,
+ void PitchUpload(Image& image, const StagingBufferMap& map,
std::span<const VideoCommon::SwizzleParameters> swizzles);
void CopyBC4(Image& dst_image, Image& src_image,
@@ -40,6 +40,9 @@ public:
void ConvertS8D24(Image& dst_image, std::span<const VideoCommon::ImageCopy> copies);
+ void CopyMSAA(Image& dst_image, Image& src_image,
+ std::span<const VideoCommon::ImageCopy> copies);
+
private:
ProgramManager& program_manager;
@@ -51,6 +54,8 @@ private:
OGLProgram pitch_unswizzle_program;
OGLProgram copy_bc4_program;
OGLProgram convert_s8d24_program;
+ OGLProgram convert_ms_to_nonms_program;
+ OGLProgram convert_nonms_to_ms_program;
};
GLenum StoreFormat(u32 bytes_per_block);