summaryrefslogtreecommitdiffstats
path: root/src/video_core/renderer_opengl
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core/renderer_opengl')
-rw-r--r--src/video_core/renderer_opengl/blit_image.cpp59
-rw-r--r--src/video_core/renderer_opengl/blit_image.h38
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.h4
-rw-r--r--src/video_core/renderer_opengl/gl_device.cpp1
-rw-r--r--src/video_core/renderer_opengl/gl_device.h7
-rw-r--r--src/video_core/renderer_opengl/gl_fsr.cpp101
-rw-r--r--src/video_core/renderer_opengl/gl_fsr.h43
-rw-r--r--src/video_core/renderer_opengl/gl_graphics_pipeline.h1
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp232
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h21
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp5
-rw-r--r--src/video_core/renderer_opengl/gl_shader_manager.cpp121
-rw-r--r--src/video_core/renderer_opengl/gl_shader_manager.h129
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.h2
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.cpp98
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.h3
16 files changed, 676 insertions, 189 deletions
diff --git a/src/video_core/renderer_opengl/blit_image.cpp b/src/video_core/renderer_opengl/blit_image.cpp
new file mode 100644
index 000000000..9a560a73b
--- /dev/null
+++ b/src/video_core/renderer_opengl/blit_image.cpp
@@ -0,0 +1,59 @@
+// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <algorithm>
+
+#include "video_core/host_shaders/blit_color_float_frag.h"
+#include "video_core/host_shaders/full_screen_triangle_vert.h"
+#include "video_core/renderer_opengl/blit_image.h"
+#include "video_core/renderer_opengl/gl_shader_manager.h"
+#include "video_core/renderer_opengl/gl_shader_util.h"
+
+namespace OpenGL {
+
+BlitImageHelper::BlitImageHelper(ProgramManager& program_manager_)
+ : program_manager(program_manager_),
+ full_screen_vert(CreateProgram(HostShaders::FULL_SCREEN_TRIANGLE_VERT, GL_VERTEX_SHADER)),
+ blit_color_to_color_frag(
+ CreateProgram(HostShaders::BLIT_COLOR_FLOAT_FRAG, GL_FRAGMENT_SHADER)) {}
+
+BlitImageHelper::~BlitImageHelper() = default;
+
+void BlitImageHelper::BlitColor(GLuint dst_framebuffer, GLuint src_image_view, GLuint src_sampler,
+ const Region2D& dst_region, const Region2D& src_region,
+ const Extent3D& src_size) {
+ glEnable(GL_CULL_FACE);
+ glDisable(GL_COLOR_LOGIC_OP);
+ glDisable(GL_DEPTH_TEST);
+ glDisable(GL_STENCIL_TEST);
+ glDisable(GL_POLYGON_OFFSET_FILL);
+ glDisable(GL_RASTERIZER_DISCARD);
+ glDisable(GL_ALPHA_TEST);
+ glDisablei(GL_BLEND, 0);
+ glPolygonMode(GL_FRONT_AND_BACK, GL_FILL);
+ glCullFace(GL_BACK);
+ glFrontFace(GL_CW);
+ glColorMaski(0, GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
+ glDepthRangeIndexed(0, 0.0, 0.0);
+
+ program_manager.BindPresentPrograms(full_screen_vert.handle, blit_color_to_color_frag.handle);
+ glProgramUniform2f(full_screen_vert.handle, 0,
+ static_cast<float>(src_region.end.x - src_region.start.x) /
+ static_cast<float>(src_size.width),
+ static_cast<float>(src_region.end.y - src_region.start.y) /
+ static_cast<float>(src_size.height));
+ glProgramUniform2f(full_screen_vert.handle, 1,
+ static_cast<float>(src_region.start.x) / static_cast<float>(src_size.width),
+ static_cast<float>(src_region.start.y) /
+ static_cast<float>(src_size.height));
+ glViewport(std::min(dst_region.start.x, dst_region.end.x),
+ std::min(dst_region.start.y, dst_region.end.y),
+ std::abs(dst_region.end.x - dst_region.start.x),
+ std::abs(dst_region.end.y - dst_region.start.y));
+ glBindFramebuffer(GL_DRAW_FRAMEBUFFER, dst_framebuffer);
+ glBindSampler(0, src_sampler);
+ glBindTextureUnit(0, src_image_view);
+ glClear(GL_COLOR_BUFFER_BIT);
+ glDrawArrays(GL_TRIANGLES, 0, 3);
+}
+} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/blit_image.h b/src/video_core/renderer_opengl/blit_image.h
new file mode 100644
index 000000000..5a2b12d16
--- /dev/null
+++ b/src/video_core/renderer_opengl/blit_image.h
@@ -0,0 +1,38 @@
+// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#pragma once
+
+#include <glad/glad.h>
+
+#include "video_core/engines/fermi_2d.h"
+#include "video_core/renderer_opengl/gl_resource_manager.h"
+#include "video_core/texture_cache/types.h"
+
+namespace OpenGL {
+
+using VideoCommon::Extent3D;
+using VideoCommon::Offset2D;
+using VideoCommon::Region2D;
+
+class ProgramManager;
+class Framebuffer;
+class ImageView;
+
+class BlitImageHelper {
+public:
+ explicit BlitImageHelper(ProgramManager& program_manager);
+ ~BlitImageHelper();
+
+ void BlitColor(GLuint dst_framebuffer, GLuint src_image_view, GLuint src_sampler,
+ const Region2D& dst_region, const Region2D& src_region,
+ const Extent3D& src_size);
+
+private:
+ ProgramManager& program_manager;
+
+ OGLProgram full_screen_vert;
+ OGLProgram blit_color_to_color_frag;
+};
+
+} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h
index a8c3f8b67..bb1962073 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.h
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.h
@@ -160,6 +160,10 @@ public:
return device.CanReportMemoryUsage();
}
+ u32 GetStorageBufferAlignment() const {
+ return static_cast<u32>(device.GetShaderStorageBufferAlignment());
+ }
+
private:
static constexpr std::array PABO_LUT{
GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV,
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp
index cee5c3247..22ed16ebf 100644
--- a/src/video_core/renderer_opengl/gl_device.cpp
+++ b/src/video_core/renderer_opengl/gl_device.cpp
@@ -166,6 +166,7 @@ Device::Device(Core::Frontend::EmuWindow& emu_window) {
has_shader_int64 = HasExtension(extensions, "GL_ARB_gpu_shader_int64");
has_amd_shader_half_float = GLAD_GL_AMD_gpu_shader_half_float;
has_sparse_texture_2 = GLAD_GL_ARB_sparse_texture2;
+ has_draw_texture = GLAD_GL_NV_draw_texture;
warp_size_potentially_larger_than_guest = !is_nvidia && !is_intel;
need_fastmath_off = is_nvidia;
can_report_memory = GLAD_GL_NVX_gpu_memory_info;
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h
index 2a72d84be..3ff8cad83 100644
--- a/src/video_core/renderer_opengl/gl_device.h
+++ b/src/video_core/renderer_opengl/gl_device.h
@@ -4,6 +4,8 @@
#pragma once
#include <cstddef>
+#include <string>
+
#include "common/common_types.h"
#include "core/frontend/emu_window.h"
#include "shader_recompiler/stage.h"
@@ -146,6 +148,10 @@ public:
return has_sparse_texture_2;
}
+ bool HasDrawTexture() const {
+ return has_draw_texture;
+ }
+
bool IsWarpSizePotentiallyLargerThanGuest() const {
return warp_size_potentially_larger_than_guest;
}
@@ -216,6 +222,7 @@ private:
bool has_shader_int64{};
bool has_amd_shader_half_float{};
bool has_sparse_texture_2{};
+ bool has_draw_texture{};
bool warp_size_potentially_larger_than_guest{};
bool need_fastmath_off{};
bool has_cbuf_ftou_bug{};
diff --git a/src/video_core/renderer_opengl/gl_fsr.cpp b/src/video_core/renderer_opengl/gl_fsr.cpp
new file mode 100644
index 000000000..77262dcf1
--- /dev/null
+++ b/src/video_core/renderer_opengl/gl_fsr.cpp
@@ -0,0 +1,101 @@
+// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include "common/settings.h"
+#include "video_core/fsr.h"
+#include "video_core/renderer_opengl/gl_fsr.h"
+#include "video_core/renderer_opengl/gl_shader_manager.h"
+#include "video_core/renderer_opengl/gl_shader_util.h"
+
+namespace OpenGL {
+using namespace FSR;
+
+using FsrConstants = std::array<u32, 4 * 4>;
+
+FSR::FSR(std::string_view fsr_vertex_source, std::string_view fsr_easu_source,
+ std::string_view fsr_rcas_source)
+ : fsr_vertex{CreateProgram(fsr_vertex_source, GL_VERTEX_SHADER)},
+ fsr_easu_frag{CreateProgram(fsr_easu_source, GL_FRAGMENT_SHADER)},
+ fsr_rcas_frag{CreateProgram(fsr_rcas_source, GL_FRAGMENT_SHADER)} {
+ glProgramUniform2f(fsr_vertex.handle, 0, 1.0f, 1.0f);
+ glProgramUniform2f(fsr_vertex.handle, 1, 0.0f, 0.0f);
+}
+
+FSR::~FSR() = default;
+
+void FSR::Draw(ProgramManager& program_manager, const Common::Rectangle<u32>& screen,
+ u32 input_image_width, u32 input_image_height,
+ const Common::Rectangle<int>& crop_rect) {
+
+ const auto output_image_width = screen.GetWidth();
+ const auto output_image_height = screen.GetHeight();
+
+ if (fsr_intermediate_tex.handle) {
+ GLint fsr_tex_width, fsr_tex_height;
+ glGetTextureLevelParameteriv(fsr_intermediate_tex.handle, 0, GL_TEXTURE_WIDTH,
+ &fsr_tex_width);
+ glGetTextureLevelParameteriv(fsr_intermediate_tex.handle, 0, GL_TEXTURE_HEIGHT,
+ &fsr_tex_height);
+ if (static_cast<u32>(fsr_tex_width) != output_image_width ||
+ static_cast<u32>(fsr_tex_height) != output_image_height) {
+ fsr_intermediate_tex.Release();
+ }
+ }
+ if (!fsr_intermediate_tex.handle) {
+ fsr_intermediate_tex.Create(GL_TEXTURE_2D);
+ glTextureStorage2D(fsr_intermediate_tex.handle, 1, GL_RGB16F, output_image_width,
+ output_image_height);
+ glNamedFramebufferTexture(fsr_framebuffer.handle, GL_COLOR_ATTACHMENT0,
+ fsr_intermediate_tex.handle, 0);
+ }
+
+ GLint old_draw_fb;
+ glGetIntegerv(GL_DRAW_FRAMEBUFFER_BINDING, &old_draw_fb);
+
+ glFrontFace(GL_CW);
+ glBindFramebuffer(GL_DRAW_FRAMEBUFFER, fsr_framebuffer.handle);
+ glViewportIndexedf(0, 0.0f, 0.0f, static_cast<GLfloat>(output_image_width),
+ static_cast<GLfloat>(output_image_height));
+
+ FsrConstants constants;
+ FsrEasuConOffset(
+ constants.data() + 0, constants.data() + 4, constants.data() + 8, constants.data() + 12,
+
+ static_cast<f32>(crop_rect.GetWidth()), static_cast<f32>(crop_rect.GetHeight()),
+ static_cast<f32>(input_image_width), static_cast<f32>(input_image_height),
+ static_cast<f32>(output_image_width), static_cast<f32>(output_image_height),
+ static_cast<f32>(crop_rect.left), static_cast<f32>(crop_rect.top));
+
+ glProgramUniform4uiv(fsr_easu_frag.handle, 0, sizeof(constants), std::data(constants));
+
+ program_manager.BindPresentPrograms(fsr_vertex.handle, fsr_easu_frag.handle);
+ glDrawArrays(GL_TRIANGLES, 0, 3);
+
+ glBindFramebuffer(GL_DRAW_FRAMEBUFFER, old_draw_fb);
+ glBindTextureUnit(0, fsr_intermediate_tex.handle);
+
+ const float sharpening =
+ static_cast<float>(Settings::values.fsr_sharpening_slider.GetValue()) / 100.0f;
+
+ FsrRcasCon(constants.data(), sharpening);
+ glProgramUniform4uiv(fsr_rcas_frag.handle, 0, sizeof(constants), std::data(constants));
+}
+
+void FSR::InitBuffers() {
+ fsr_framebuffer.Create();
+}
+
+void FSR::ReleaseBuffers() {
+ fsr_framebuffer.Release();
+ fsr_intermediate_tex.Release();
+}
+
+const OGLProgram& FSR::GetPresentFragmentProgram() const noexcept {
+ return fsr_rcas_frag;
+}
+
+bool FSR::AreBuffersInitialized() const noexcept {
+ return fsr_framebuffer.handle;
+}
+
+} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_fsr.h b/src/video_core/renderer_opengl/gl_fsr.h
new file mode 100644
index 000000000..1f6ae3115
--- /dev/null
+++ b/src/video_core/renderer_opengl/gl_fsr.h
@@ -0,0 +1,43 @@
+// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#pragma once
+
+#include <string_view>
+
+#include "common/common_types.h"
+#include "common/math_util.h"
+#include "video_core/fsr.h"
+#include "video_core/renderer_opengl/gl_resource_manager.h"
+
+namespace OpenGL {
+
+class ProgramManager;
+
+class FSR {
+public:
+ explicit FSR(std::string_view fsr_vertex_source, std::string_view fsr_easu_source,
+ std::string_view fsr_rcas_source);
+ ~FSR();
+
+ void Draw(ProgramManager& program_manager, const Common::Rectangle<u32>& screen,
+ u32 input_image_width, u32 input_image_height,
+ const Common::Rectangle<int>& crop_rect);
+
+ void InitBuffers();
+
+ void ReleaseBuffers();
+
+ [[nodiscard]] const OGLProgram& GetPresentFragmentProgram() const noexcept;
+
+ [[nodiscard]] bool AreBuffersInitialized() const noexcept;
+
+private:
+ OGLFramebuffer fsr_framebuffer;
+ OGLProgram fsr_vertex;
+ OGLProgram fsr_easu_frag;
+ OGLProgram fsr_rcas_frag;
+ OGLTexture fsr_intermediate_tex;
+};
+
+} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.h b/src/video_core/renderer_opengl/gl_graphics_pipeline.h
index ea53ddb46..1c06b3655 100644
--- a/src/video_core/renderer_opengl/gl_graphics_pipeline.h
+++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.h
@@ -40,6 +40,7 @@ struct GraphicsPipelineKey {
BitField<6, 2, Maxwell::Tessellation::DomainType> tessellation_primitive;
BitField<8, 2, Maxwell::Tessellation::Spacing> tessellation_spacing;
BitField<10, 1, u32> tessellation_clockwise;
+ BitField<11, 3, Tegra::Engines::Maxwell3D::EngineHint> app_stage;
};
std::array<u32, 3> padding;
VideoCommon::TransformFeedbackState xfb_state;
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index a44b8c454..7bced675c 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -64,7 +64,8 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra
shader_cache(*this, emu_window_, device, texture_cache, buffer_cache, program_manager,
state_tracker, gpu.ShaderNotify()),
query_cache(*this), accelerate_dma(buffer_cache),
- fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache) {}
+ fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache),
+ blit_image(program_manager_) {}
RasterizerOpenGL::~RasterizerOpenGL() = default;
@@ -139,6 +140,7 @@ void RasterizerOpenGL::LoadDiskResources(u64 title_id, std::stop_token stop_load
void RasterizerOpenGL::Clear(u32 layer_count) {
MICROPROFILE_SCOPE(OpenGL_Clears);
+ gpu_memory->FlushCaching();
const auto& regs = maxwell3d->regs;
bool use_color{};
bool use_depth{};
@@ -202,10 +204,12 @@ void RasterizerOpenGL::Clear(u32 layer_count) {
++num_queued_commands;
}
-void RasterizerOpenGL::Draw(bool is_indexed, u32 instance_count) {
+template <typename Func>
+void RasterizerOpenGL::PrepareDraw(bool is_indexed, Func&& draw_func) {
MICROPROFILE_SCOPE(OpenGL_Drawing);
SCOPE_EXIT({ gpu.TickWork(); });
+ gpu_memory->FlushCaching();
query_cache.UpdateCounters();
GraphicsPipeline* const pipeline{shader_cache.CurrentGraphicsPipeline()};
@@ -226,49 +230,140 @@ void RasterizerOpenGL::Draw(bool is_indexed, u32 instance_count) {
const GLenum primitive_mode = MaxwellToGL::PrimitiveTopology(draw_state.topology);
BeginTransformFeedback(pipeline, primitive_mode);
- const GLuint base_instance = static_cast<GLuint>(draw_state.base_instance);
- const GLsizei num_instances = static_cast<GLsizei>(instance_count);
- if (is_indexed) {
- const GLint base_vertex = static_cast<GLint>(draw_state.base_index);
- const GLsizei num_vertices = static_cast<GLsizei>(draw_state.index_buffer.count);
- const GLvoid* const offset = buffer_cache_runtime.IndexOffset();
- const GLenum format = MaxwellToGL::IndexFormat(draw_state.index_buffer.format);
- if (num_instances == 1 && base_instance == 0 && base_vertex == 0) {
- glDrawElements(primitive_mode, num_vertices, format, offset);
- } else if (num_instances == 1 && base_instance == 0) {
- glDrawElementsBaseVertex(primitive_mode, num_vertices, format, offset, base_vertex);
- } else if (base_vertex == 0 && base_instance == 0) {
- glDrawElementsInstanced(primitive_mode, num_vertices, format, offset, num_instances);
- } else if (base_vertex == 0) {
- glDrawElementsInstancedBaseInstance(primitive_mode, num_vertices, format, offset,
- num_instances, base_instance);
- } else if (base_instance == 0) {
- glDrawElementsInstancedBaseVertex(primitive_mode, num_vertices, format, offset,
- num_instances, base_vertex);
+ draw_func(primitive_mode);
+
+ EndTransformFeedback();
+
+ ++num_queued_commands;
+ has_written_global_memory |= pipeline->WritesGlobalMemory();
+}
+
+void RasterizerOpenGL::Draw(bool is_indexed, u32 instance_count) {
+ PrepareDraw(is_indexed, [this, is_indexed, instance_count](GLenum primitive_mode) {
+ const auto& draw_state = maxwell3d->draw_manager->GetDrawState();
+ const GLuint base_instance = static_cast<GLuint>(draw_state.base_instance);
+ const GLsizei num_instances = static_cast<GLsizei>(instance_count);
+ if (is_indexed) {
+ const GLint base_vertex = static_cast<GLint>(draw_state.base_index);
+ const GLsizei num_vertices = static_cast<GLsizei>(draw_state.index_buffer.count);
+ const GLvoid* const offset = buffer_cache_runtime.IndexOffset();
+ const GLenum format = MaxwellToGL::IndexFormat(draw_state.index_buffer.format);
+ if (num_instances == 1 && base_instance == 0 && base_vertex == 0) {
+ glDrawElements(primitive_mode, num_vertices, format, offset);
+ } else if (num_instances == 1 && base_instance == 0) {
+ glDrawElementsBaseVertex(primitive_mode, num_vertices, format, offset, base_vertex);
+ } else if (base_vertex == 0 && base_instance == 0) {
+ glDrawElementsInstanced(primitive_mode, num_vertices, format, offset,
+ num_instances);
+ } else if (base_vertex == 0) {
+ glDrawElementsInstancedBaseInstance(primitive_mode, num_vertices, format, offset,
+ num_instances, base_instance);
+ } else if (base_instance == 0) {
+ glDrawElementsInstancedBaseVertex(primitive_mode, num_vertices, format, offset,
+ num_instances, base_vertex);
+ } else {
+ glDrawElementsInstancedBaseVertexBaseInstance(primitive_mode, num_vertices, format,
+ offset, num_instances, base_vertex,
+ base_instance);
+ }
} else {
- glDrawElementsInstancedBaseVertexBaseInstance(primitive_mode, num_vertices, format,
- offset, num_instances, base_vertex,
- base_instance);
+ const GLint base_vertex = static_cast<GLint>(draw_state.vertex_buffer.first);
+ const GLsizei num_vertices = static_cast<GLsizei>(draw_state.vertex_buffer.count);
+ if (num_instances == 1 && base_instance == 0) {
+ glDrawArrays(primitive_mode, base_vertex, num_vertices);
+ } else if (base_instance == 0) {
+ glDrawArraysInstanced(primitive_mode, base_vertex, num_vertices, num_instances);
+ } else {
+ glDrawArraysInstancedBaseInstance(primitive_mode, base_vertex, num_vertices,
+ num_instances, base_instance);
+ }
}
- } else {
- const GLint base_vertex = static_cast<GLint>(draw_state.vertex_buffer.first);
- const GLsizei num_vertices = static_cast<GLsizei>(draw_state.vertex_buffer.count);
- if (num_instances == 1 && base_instance == 0) {
- glDrawArrays(primitive_mode, base_vertex, num_vertices);
- } else if (base_instance == 0) {
- glDrawArraysInstanced(primitive_mode, base_vertex, num_vertices, num_instances);
+ });
+}
+
+void RasterizerOpenGL::DrawIndirect() {
+ const auto& params = maxwell3d->draw_manager->GetIndirectParams();
+ buffer_cache.SetDrawIndirect(&params);
+ PrepareDraw(params.is_indexed, [this, &params](GLenum primitive_mode) {
+ const auto [buffer, offset] = buffer_cache.GetDrawIndirectBuffer();
+ const GLvoid* const gl_offset =
+ reinterpret_cast<const GLvoid*>(static_cast<uintptr_t>(offset));
+ glBindBuffer(GL_DRAW_INDIRECT_BUFFER, buffer->Handle());
+ if (params.include_count) {
+ const auto [draw_buffer, offset_base] = buffer_cache.GetDrawIndirectCount();
+ glBindBuffer(GL_PARAMETER_BUFFER, draw_buffer->Handle());
+
+ if (params.is_indexed) {
+ const GLenum format = MaxwellToGL::IndexFormat(maxwell3d->regs.index_buffer.format);
+ glMultiDrawElementsIndirectCount(primitive_mode, format, gl_offset,
+ static_cast<GLintptr>(offset_base),
+ static_cast<GLsizei>(params.max_draw_counts),
+ static_cast<GLsizei>(params.stride));
+ } else {
+ glMultiDrawArraysIndirectCount(primitive_mode, gl_offset,
+ static_cast<GLintptr>(offset_base),
+ static_cast<GLsizei>(params.max_draw_counts),
+ static_cast<GLsizei>(params.stride));
+ }
+ return;
+ }
+ if (params.is_indexed) {
+ const GLenum format = MaxwellToGL::IndexFormat(maxwell3d->regs.index_buffer.format);
+ glMultiDrawElementsIndirect(primitive_mode, format, gl_offset,
+ static_cast<GLsizei>(params.max_draw_counts),
+ static_cast<GLsizei>(params.stride));
} else {
- glDrawArraysInstancedBaseInstance(primitive_mode, base_vertex, num_vertices,
- num_instances, base_instance);
+ glMultiDrawArraysIndirect(primitive_mode, gl_offset,
+ static_cast<GLsizei>(params.max_draw_counts),
+ static_cast<GLsizei>(params.stride));
}
+ });
+ buffer_cache.SetDrawIndirect(nullptr);
+}
+
+void RasterizerOpenGL::DrawTexture() {
+ MICROPROFILE_SCOPE(OpenGL_Drawing);
+
+ SCOPE_EXIT({ gpu.TickWork(); });
+ query_cache.UpdateCounters();
+
+ texture_cache.SynchronizeGraphicsDescriptors();
+ texture_cache.UpdateRenderTargets(false);
+
+ SyncState();
+
+ const auto& draw_texture_state = maxwell3d->draw_manager->GetDrawTextureState();
+ const auto& sampler = texture_cache.GetGraphicsSampler(draw_texture_state.src_sampler);
+ const auto& texture = texture_cache.GetImageView(draw_texture_state.src_texture);
+
+ if (device.HasDrawTexture()) {
+ state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle());
+
+ glDrawTextureNV(texture.DefaultHandle(), sampler->Handle(), draw_texture_state.dst_x0,
+ draw_texture_state.dst_y0, draw_texture_state.dst_x1,
+ draw_texture_state.dst_y1, 0,
+ draw_texture_state.src_x0 / static_cast<float>(texture.size.width),
+ draw_texture_state.src_y0 / static_cast<float>(texture.size.height),
+ draw_texture_state.src_x1 / static_cast<float>(texture.size.width),
+ draw_texture_state.src_y1 / static_cast<float>(texture.size.height));
+ } else {
+ Region2D dst_region = {Offset2D{.x = static_cast<s32>(draw_texture_state.dst_x0),
+ .y = static_cast<s32>(draw_texture_state.dst_y0)},
+ Offset2D{.x = static_cast<s32>(draw_texture_state.dst_x1),
+ .y = static_cast<s32>(draw_texture_state.dst_y1)}};
+ Region2D src_region = {Offset2D{.x = static_cast<s32>(draw_texture_state.src_x0),
+ .y = static_cast<s32>(draw_texture_state.src_y0)},
+ Offset2D{.x = static_cast<s32>(draw_texture_state.src_x1),
+ .y = static_cast<s32>(draw_texture_state.src_y1)}};
+ blit_image.BlitColor(texture_cache.GetFramebuffer()->Handle(), texture.DefaultHandle(),
+ sampler->Handle(), dst_region, src_region, texture.size);
}
- EndTransformFeedback();
++num_queued_commands;
- has_written_global_memory |= pipeline->WritesGlobalMemory();
}
void RasterizerOpenGL::DispatchCompute() {
+ gpu_memory->FlushCaching();
ComputePipeline* const pipeline{shader_cache.CurrentComputePipeline()};
if (!pipeline) {
return;
@@ -302,46 +397,60 @@ void RasterizerOpenGL::DisableGraphicsUniformBuffer(size_t stage, u32 index) {
void RasterizerOpenGL::FlushAll() {}
-void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) {
+void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size, VideoCommon::CacheType which) {
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
if (addr == 0 || size == 0) {
return;
}
- {
+ if (True(which & VideoCommon::CacheType::TextureCache)) {
std::scoped_lock lock{texture_cache.mutex};
texture_cache.DownloadMemory(addr, size);
}
- {
+ if ((True(which & VideoCommon::CacheType::BufferCache))) {
std::scoped_lock lock{buffer_cache.mutex};
buffer_cache.DownloadMemory(addr, size);
}
- query_cache.FlushRegion(addr, size);
+ if ((True(which & VideoCommon::CacheType::QueryCache))) {
+ query_cache.FlushRegion(addr, size);
+ }
}
-bool RasterizerOpenGL::MustFlushRegion(VAddr addr, u64 size) {
- std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
+bool RasterizerOpenGL::MustFlushRegion(VAddr addr, u64 size, VideoCommon::CacheType which) {
+ if ((True(which & VideoCommon::CacheType::BufferCache))) {
+ std::scoped_lock lock{buffer_cache.mutex};
+ if (buffer_cache.IsRegionGpuModified(addr, size)) {
+ return true;
+ }
+ }
if (!Settings::IsGPULevelHigh()) {
- return buffer_cache.IsRegionGpuModified(addr, size);
+ return false;
+ }
+ if (True(which & VideoCommon::CacheType::TextureCache)) {
+ std::scoped_lock lock{texture_cache.mutex};
+ return texture_cache.IsRegionGpuModified(addr, size);
}
- return texture_cache.IsRegionGpuModified(addr, size) ||
- buffer_cache.IsRegionGpuModified(addr, size);
+ return false;
}
-void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) {
+void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size, VideoCommon::CacheType which) {
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
if (addr == 0 || size == 0) {
return;
}
- {
+ if (True(which & VideoCommon::CacheType::TextureCache)) {
std::scoped_lock lock{texture_cache.mutex};
texture_cache.WriteMemory(addr, size);
}
- {
+ if (True(which & VideoCommon::CacheType::BufferCache)) {
std::scoped_lock lock{buffer_cache.mutex};
buffer_cache.WriteMemory(addr, size);
}
- shader_cache.InvalidateRegion(addr, size);
- query_cache.InvalidateRegion(addr, size);
+ if (True(which & VideoCommon::CacheType::ShaderCache)) {
+ shader_cache.InvalidateRegion(addr, size);
+ }
+ if (True(which & VideoCommon::CacheType::QueryCache)) {
+ query_cache.InvalidateRegion(addr, size);
+ }
}
void RasterizerOpenGL::OnCPUWrite(VAddr addr, u64 size) {
@@ -408,11 +517,12 @@ void RasterizerOpenGL::ReleaseFences() {
fence_manager.WaitPendingFences();
}
-void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size) {
+void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size,
+ VideoCommon::CacheType which) {
if (Settings::IsGPULevelExtreme()) {
- FlushRegion(addr, size);
+ FlushRegion(addr, size, which);
}
- InvalidateRegion(addr, size);
+ InvalidateRegion(addr, size, which);
}
void RasterizerOpenGL::WaitForIdle() {
@@ -460,6 +570,22 @@ void RasterizerOpenGL::TickFrame() {
}
}
+bool RasterizerOpenGL::AccelerateConditionalRendering() {
+ gpu_memory->FlushCaching();
+ if (Settings::IsGPULevelHigh()) {
+ // Reimplement Host conditional rendering.
+ return false;
+ }
+ // Medium / Low Hack: stub any checks on queries writen into the buffer cache.
+ const GPUVAddr condition_address{maxwell3d->regs.render_enable.Address()};
+ Maxwell::ReportSemaphore::Compare cmp;
+ if (gpu_memory->IsMemoryDirty(condition_address, sizeof(cmp),
+ VideoCommon::CacheType::BufferCache)) {
+ return true;
+ }
+ return false;
+}
+
bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src,
const Tegra::Engines::Fermi2D::Surface& dst,
const Tegra::Engines::Fermi2D::Config& copy_config) {
@@ -481,7 +607,7 @@ void RasterizerOpenGL::AccelerateInlineToMemory(GPUVAddr address, size_t copy_si
}
gpu_memory->WriteBlockUnsafe(address, memory.data(), copy_size);
{
- std::unique_lock<std::mutex> lock{buffer_cache.mutex};
+ std::unique_lock<std::recursive_mutex> lock{buffer_cache.mutex};
if (!buffer_cache.InlineMemory(*cpu_addr, copy_size, memory)) {
buffer_cache.WriteMemory(*cpu_addr, copy_size);
}
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index fc183c3ca..0c45832ae 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -16,6 +16,7 @@
#include "video_core/engines/maxwell_dma.h"
#include "video_core/rasterizer_accelerated.h"
#include "video_core/rasterizer_interface.h"
+#include "video_core/renderer_opengl/blit_image.h"
#include "video_core/renderer_opengl/gl_buffer_cache.h"
#include "video_core/renderer_opengl/gl_device.h"
#include "video_core/renderer_opengl/gl_fence_manager.h"
@@ -69,6 +70,8 @@ public:
~RasterizerOpenGL() override;
void Draw(bool is_indexed, u32 instance_count) override;
+ void DrawIndirect() override;
+ void DrawTexture() override;
void Clear(u32 layer_count) override;
void DispatchCompute() override;
void ResetCounter(VideoCore::QueryType type) override;
@@ -76,9 +79,12 @@ public:
void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override;
void DisableGraphicsUniformBuffer(size_t stage, u32 index) override;
void FlushAll() override;
- void FlushRegion(VAddr addr, u64 size) override;
- bool MustFlushRegion(VAddr addr, u64 size) override;
- void InvalidateRegion(VAddr addr, u64 size) override;
+ void FlushRegion(VAddr addr, u64 size,
+ VideoCommon::CacheType which = VideoCommon::CacheType::All) override;
+ bool MustFlushRegion(VAddr addr, u64 size,
+ VideoCommon::CacheType which = VideoCommon::CacheType::All) override;
+ void InvalidateRegion(VAddr addr, u64 size,
+ VideoCommon::CacheType which = VideoCommon::CacheType::All) override;
void OnCPUWrite(VAddr addr, u64 size) override;
void InvalidateGPUCache() override;
void UnmapMemory(VAddr addr, u64 size) override;
@@ -88,12 +94,14 @@ public:
void SignalSyncPoint(u32 value) override;
void SignalReference() override;
void ReleaseFences() override;
- void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
+ void FlushAndInvalidateRegion(
+ VAddr addr, u64 size, VideoCommon::CacheType which = VideoCommon::CacheType::All) override;
void WaitForIdle() override;
void FragmentBarrier() override;
void TiledCacheBarrier() override;
void FlushCommands() override;
void TickFrame() override;
+ bool AccelerateConditionalRendering() override;
bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src,
const Tegra::Engines::Fermi2D::Surface& dst,
const Tegra::Engines::Fermi2D::Config& copy_config) override;
@@ -121,6 +129,9 @@ private:
static constexpr size_t MAX_IMAGES = 48;
static constexpr size_t MAX_IMAGE_VIEWS = MAX_TEXTURES + MAX_IMAGES;
+ template <typename Func>
+ void PrepareDraw(bool is_indexed, Func&&);
+
/// Syncs state to match guest's
void SyncState();
@@ -215,6 +226,8 @@ private:
AccelerateDMA accelerate_dma;
FenceManagerOpenGL fence_manager;
+ BlitImageHelper blit_image;
+
boost::container::static_vector<u32, MAX_IMAGE_VIEWS> image_view_indices;
std::array<ImageViewId, MAX_IMAGE_VIEWS> image_view_ids;
boost::container::static_vector<GLuint, MAX_TEXTURES> sampler_handles;
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index f8868a012..7dd854e0f 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -51,7 +51,7 @@ using VideoCommon::LoadPipelines;
using VideoCommon::SerializePipeline;
using Context = ShaderContext::Context;
-constexpr u32 CACHE_VERSION = 7;
+constexpr u32 CACHE_VERSION = 9;
template <typename Container>
auto MakeSpan(Container& container) {
@@ -236,6 +236,8 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo
.needs_demote_reorder = device.IsAmd(),
.support_snorm_render_buffer = false,
.support_viewport_index_layer = device.HasVertexViewportLayer(),
+ .min_ssbo_alignment = static_cast<u32>(device.GetShaderStorageBufferAlignment()),
+ .support_geometry_shader_passthrough = device.HasGeometryShaderPassthrough(),
} {
if (use_asynchronous_shaders) {
workers = CreateWorkers();
@@ -350,6 +352,7 @@ GraphicsPipeline* ShaderCache::CurrentGraphicsPipeline() {
regs.tessellation.params.output_primitives.Value() ==
Maxwell::Tessellation::OutputPrimitives::Triangles_CW);
graphics_key.xfb_enabled.Assign(regs.transform_feedback_enabled != 0 ? 1 : 0);
+ graphics_key.app_stage.Assign(maxwell3d->engine_state);
if (graphics_key.xfb_enabled) {
SetXfbState(graphics_key.xfb_state, regs);
}
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp
index d9c29d8b7..98841ae65 100644
--- a/src/video_core/renderer_opengl/gl_shader_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp
@@ -1,2 +1,123 @@
// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <glad/glad.h>
+
+#include "video_core/renderer_opengl/gl_shader_manager.h"
+
+namespace OpenGL {
+
+static constexpr std::array ASSEMBLY_PROGRAM_ENUMS{
+ GL_VERTEX_PROGRAM_NV, GL_TESS_CONTROL_PROGRAM_NV, GL_TESS_EVALUATION_PROGRAM_NV,
+ GL_GEOMETRY_PROGRAM_NV, GL_FRAGMENT_PROGRAM_NV,
+};
+
+ProgramManager::ProgramManager(const Device& device) {
+ glCreateProgramPipelines(1, &pipeline.handle);
+ if (device.UseAssemblyShaders()) {
+ glEnable(GL_COMPUTE_PROGRAM_NV);
+ }
+}
+
+void ProgramManager::BindComputeProgram(GLuint program) {
+ glUseProgram(program);
+ is_compute_bound = true;
+}
+
+void ProgramManager::BindComputeAssemblyProgram(GLuint program) {
+ if (current_assembly_compute_program != program) {
+ current_assembly_compute_program = program;
+ glBindProgramARB(GL_COMPUTE_PROGRAM_NV, program);
+ }
+ UnbindPipeline();
+}
+
+void ProgramManager::BindSourcePrograms(std::span<const OGLProgram, NUM_STAGES> programs) {
+ static constexpr std::array<GLenum, 5> stage_enums{
+ GL_VERTEX_SHADER_BIT, GL_TESS_CONTROL_SHADER_BIT, GL_TESS_EVALUATION_SHADER_BIT,
+ GL_GEOMETRY_SHADER_BIT, GL_FRAGMENT_SHADER_BIT,
+ };
+ for (size_t stage = 0; stage < NUM_STAGES; ++stage) {
+ if (current_programs[stage] != programs[stage].handle) {
+ current_programs[stage] = programs[stage].handle;
+ glUseProgramStages(pipeline.handle, stage_enums[stage], programs[stage].handle);
+ }
+ }
+ BindPipeline();
+}
+
+void ProgramManager::BindPresentPrograms(GLuint vertex, GLuint fragment) {
+ if (current_programs[0] != vertex) {
+ current_programs[0] = vertex;
+ glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, vertex);
+ }
+ if (current_programs[4] != fragment) {
+ current_programs[4] = fragment;
+ glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, fragment);
+ }
+ glUseProgramStages(
+ pipeline.handle,
+ GL_TESS_CONTROL_SHADER_BIT | GL_TESS_EVALUATION_SHADER_BIT | GL_GEOMETRY_SHADER_BIT, 0);
+ current_programs[1] = 0;
+ current_programs[2] = 0;
+ current_programs[3] = 0;
+
+ if (current_stage_mask != 0) {
+ current_stage_mask = 0;
+ for (const GLenum program_type : ASSEMBLY_PROGRAM_ENUMS) {
+ glDisable(program_type);
+ }
+ }
+ BindPipeline();
+}
+
+void ProgramManager::BindAssemblyPrograms(std::span<const OGLAssemblyProgram, NUM_STAGES> programs,
+ u32 stage_mask) {
+ const u32 changed_mask = current_stage_mask ^ stage_mask;
+ current_stage_mask = stage_mask;
+
+ if (changed_mask != 0) {
+ for (size_t stage = 0; stage < NUM_STAGES; ++stage) {
+ if (((changed_mask >> stage) & 1) != 0) {
+ if (((stage_mask >> stage) & 1) != 0) {
+ glEnable(ASSEMBLY_PROGRAM_ENUMS[stage]);
+ } else {
+ glDisable(ASSEMBLY_PROGRAM_ENUMS[stage]);
+ }
+ }
+ }
+ }
+ for (size_t stage = 0; stage < NUM_STAGES; ++stage) {
+ if (current_programs[stage] != programs[stage].handle) {
+ current_programs[stage] = programs[stage].handle;
+ glBindProgramARB(ASSEMBLY_PROGRAM_ENUMS[stage], programs[stage].handle);
+ }
+ }
+ UnbindPipeline();
+}
+
+void ProgramManager::RestoreGuestCompute() {}
+
+void ProgramManager::BindPipeline() {
+ if (!is_pipeline_bound) {
+ is_pipeline_bound = true;
+ glBindProgramPipeline(pipeline.handle);
+ }
+ UnbindCompute();
+}
+
+void ProgramManager::UnbindPipeline() {
+ if (is_pipeline_bound) {
+ is_pipeline_bound = false;
+ glBindProgramPipeline(0);
+ }
+ UnbindCompute();
+}
+
+void ProgramManager::UnbindCompute() {
+ if (is_compute_bound) {
+ is_compute_bound = false;
+ glUseProgram(0);
+ }
+}
+} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h
index a84f5aeb3..07ffab77f 100644
--- a/src/video_core/renderer_opengl/gl_shader_manager.h
+++ b/src/video_core/renderer_opengl/gl_shader_manager.h
@@ -6,8 +6,6 @@
#include <array>
#include <span>
-#include <glad/glad.h>
-
#include "video_core/renderer_opengl/gl_device.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
@@ -16,121 +14,28 @@ namespace OpenGL {
class ProgramManager {
static constexpr size_t NUM_STAGES = 5;
- static constexpr std::array ASSEMBLY_PROGRAM_ENUMS{
- GL_VERTEX_PROGRAM_NV, GL_TESS_CONTROL_PROGRAM_NV, GL_TESS_EVALUATION_PROGRAM_NV,
- GL_GEOMETRY_PROGRAM_NV, GL_FRAGMENT_PROGRAM_NV,
- };
-
public:
- explicit ProgramManager(const Device& device) {
- glCreateProgramPipelines(1, &pipeline.handle);
- if (device.UseAssemblyShaders()) {
- glEnable(GL_COMPUTE_PROGRAM_NV);
- }
- }
-
- void BindComputeProgram(GLuint program) {
- glUseProgram(program);
- is_compute_bound = true;
- }
-
- void BindComputeAssemblyProgram(GLuint program) {
- if (current_assembly_compute_program != program) {
- current_assembly_compute_program = program;
- glBindProgramARB(GL_COMPUTE_PROGRAM_NV, program);
- }
- UnbindPipeline();
- }
-
- void BindSourcePrograms(std::span<const OGLProgram, NUM_STAGES> programs) {
- static constexpr std::array<GLenum, 5> stage_enums{
- GL_VERTEX_SHADER_BIT, GL_TESS_CONTROL_SHADER_BIT, GL_TESS_EVALUATION_SHADER_BIT,
- GL_GEOMETRY_SHADER_BIT, GL_FRAGMENT_SHADER_BIT,
- };
- for (size_t stage = 0; stage < NUM_STAGES; ++stage) {
- if (current_programs[stage] != programs[stage].handle) {
- current_programs[stage] = programs[stage].handle;
- glUseProgramStages(pipeline.handle, stage_enums[stage], programs[stage].handle);
- }
- }
- BindPipeline();
- }
-
- void BindPresentPrograms(GLuint vertex, GLuint fragment) {
- if (current_programs[0] != vertex) {
- current_programs[0] = vertex;
- glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, vertex);
- }
- if (current_programs[4] != fragment) {
- current_programs[4] = fragment;
- glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, fragment);
- }
- glUseProgramStages(
- pipeline.handle,
- GL_TESS_CONTROL_SHADER_BIT | GL_TESS_EVALUATION_SHADER_BIT | GL_GEOMETRY_SHADER_BIT, 0);
- current_programs[1] = 0;
- current_programs[2] = 0;
- current_programs[3] = 0;
-
- if (current_stage_mask != 0) {
- current_stage_mask = 0;
- for (const GLenum program_type : ASSEMBLY_PROGRAM_ENUMS) {
- glDisable(program_type);
- }
- }
- BindPipeline();
- }
+ explicit ProgramManager(const Device& device);
+
+ void BindComputeProgram(GLuint program);
+
+ void BindComputeAssemblyProgram(GLuint program);
+
+ void BindSourcePrograms(std::span<const OGLProgram, NUM_STAGES> programs);
+
+ void BindPresentPrograms(GLuint vertex, GLuint fragment);
void BindAssemblyPrograms(std::span<const OGLAssemblyProgram, NUM_STAGES> programs,
- u32 stage_mask) {
- const u32 changed_mask = current_stage_mask ^ stage_mask;
- current_stage_mask = stage_mask;
-
- if (changed_mask != 0) {
- for (size_t stage = 0; stage < NUM_STAGES; ++stage) {
- if (((changed_mask >> stage) & 1) != 0) {
- if (((stage_mask >> stage) & 1) != 0) {
- glEnable(ASSEMBLY_PROGRAM_ENUMS[stage]);
- } else {
- glDisable(ASSEMBLY_PROGRAM_ENUMS[stage]);
- }
- }
- }
- }
- for (size_t stage = 0; stage < NUM_STAGES; ++stage) {
- if (current_programs[stage] != programs[stage].handle) {
- current_programs[stage] = programs[stage].handle;
- glBindProgramARB(ASSEMBLY_PROGRAM_ENUMS[stage], programs[stage].handle);
- }
- }
- UnbindPipeline();
- }
-
- void RestoreGuestCompute() {}
+ u32 stage_mask);
+
+ void RestoreGuestCompute();
private:
- void BindPipeline() {
- if (!is_pipeline_bound) {
- is_pipeline_bound = true;
- glBindProgramPipeline(pipeline.handle);
- }
- UnbindCompute();
- }
-
- void UnbindPipeline() {
- if (is_pipeline_bound) {
- is_pipeline_bound = false;
- glBindProgramPipeline(0);
- }
- UnbindCompute();
- }
-
- void UnbindCompute() {
- if (is_compute_bound) {
- is_compute_bound = false;
- glUseProgram(0);
- }
- }
+ void BindPipeline();
+
+ void UnbindPipeline();
+
+ void UnbindCompute();
OGLPipeline pipeline;
bool is_pipeline_bound{};
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h
index 113528e9b..5d9d370f2 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.h
+++ b/src/video_core/renderer_opengl/gl_texture_cache.h
@@ -354,6 +354,7 @@ struct TextureCacheParams {
static constexpr bool FRAMEBUFFER_BLITS = true;
static constexpr bool HAS_EMULATED_COPIES = true;
static constexpr bool HAS_DEVICE_MEMORY_INFO = true;
+ static constexpr bool IMPLEMENTS_ASYNC_DOWNLOADS = false;
using Runtime = OpenGL::TextureCacheRuntime;
using Image = OpenGL::Image;
@@ -361,6 +362,7 @@ struct TextureCacheParams {
using ImageView = OpenGL::ImageView;
using Sampler = OpenGL::Sampler;
using Framebuffer = OpenGL::Framebuffer;
+ using AsyncBuffer = u32;
};
using TextureCache = VideoCommon::TextureCache<TextureCacheParams>;
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index bc75680f0..2a74c1d05 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -17,8 +17,14 @@
#include "core/frontend/emu_window.h"
#include "core/memory.h"
#include "core/telemetry_session.h"
+#include "video_core/host_shaders/ffx_a_h.h"
+#include "video_core/host_shaders/ffx_fsr1_h.h"
+#include "video_core/host_shaders/full_screen_triangle_vert.h"
#include "video_core/host_shaders/fxaa_frag.h"
#include "video_core/host_shaders/fxaa_vert.h"
+#include "video_core/host_shaders/opengl_fidelityfx_fsr_easu_frag.h"
+#include "video_core/host_shaders/opengl_fidelityfx_fsr_frag.h"
+#include "video_core/host_shaders/opengl_fidelityfx_fsr_rcas_frag.h"
#include "video_core/host_shaders/opengl_present_frag.h"
#include "video_core/host_shaders/opengl_present_scaleforce_frag.h"
#include "video_core/host_shaders/opengl_present_vert.h"
@@ -31,6 +37,7 @@
#include "video_core/host_shaders/smaa_edge_detection_vert.h"
#include "video_core/host_shaders/smaa_neighborhood_blending_frag.h"
#include "video_core/host_shaders/smaa_neighborhood_blending_vert.h"
+#include "video_core/renderer_opengl/gl_fsr.h"
#include "video_core/renderer_opengl/gl_rasterizer.h"
#include "video_core/renderer_opengl/gl_shader_manager.h"
#include "video_core/renderer_opengl/gl_shader_util.h"
@@ -268,12 +275,17 @@ void RendererOpenGL::InitOpenGLObjects() {
fxaa_vertex = CreateProgram(HostShaders::FXAA_VERT, GL_VERTEX_SHADER);
fxaa_fragment = CreateProgram(HostShaders::FXAA_FRAG, GL_FRAGMENT_SHADER);
- const auto SmaaShader = [](std::string_view specialized_source, GLenum stage) {
- std::string shader_source{specialized_source};
- constexpr std::string_view include_string = "#include \"opengl_smaa.glsl\"";
+ const auto replace_include = [](std::string& shader_source, std::string_view include_name,
+ std::string_view include_content) {
+ const std::string include_string = fmt::format("#include \"{}\"", include_name);
const std::size_t pos = shader_source.find(include_string);
ASSERT(pos != std::string::npos);
- shader_source.replace(pos, include_string.size(), HostShaders::OPENGL_SMAA_GLSL);
+ shader_source.replace(pos, include_string.size(), include_content);
+ };
+
+ const auto SmaaShader = [&](std::string_view specialized_source, GLenum stage) {
+ std::string shader_source{specialized_source};
+ replace_include(shader_source, "opengl_smaa.glsl", HostShaders::OPENGL_SMAA_GLSL);
return CreateProgram(shader_source, stage);
};
@@ -298,14 +310,32 @@ void RendererOpenGL::InitOpenGLObjects() {
CreateProgram(fmt::format("#version 460\n{}", HostShaders::OPENGL_PRESENT_SCALEFORCE_FRAG),
GL_FRAGMENT_SHADER);
+ std::string fsr_source{HostShaders::OPENGL_FIDELITYFX_FSR_FRAG};
+ replace_include(fsr_source, "ffx_a.h", HostShaders::FFX_A_H);
+ replace_include(fsr_source, "ffx_fsr1.h", HostShaders::FFX_FSR1_H);
+
+ std::string fsr_easu_frag_source{HostShaders::OPENGL_FIDELITYFX_FSR_EASU_FRAG};
+ std::string fsr_rcas_frag_source{HostShaders::OPENGL_FIDELITYFX_FSR_RCAS_FRAG};
+ replace_include(fsr_easu_frag_source, "opengl_fidelityfx_fsr.frag", fsr_source);
+ replace_include(fsr_rcas_frag_source, "opengl_fidelityfx_fsr.frag", fsr_source);
+
+ fsr = std::make_unique<FSR>(HostShaders::FULL_SCREEN_TRIANGLE_VERT, fsr_easu_frag_source,
+ fsr_rcas_frag_source);
+
// Generate presentation sampler
present_sampler.Create();
glSamplerParameteri(present_sampler.handle, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
glSamplerParameteri(present_sampler.handle, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
+ glSamplerParameteri(present_sampler.handle, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
+ glSamplerParameteri(present_sampler.handle, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
+ glSamplerParameteri(present_sampler.handle, GL_TEXTURE_WRAP_R, GL_CLAMP_TO_EDGE);
present_sampler_nn.Create();
glSamplerParameteri(present_sampler_nn.handle, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glSamplerParameteri(present_sampler_nn.handle, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
+ glSamplerParameteri(present_sampler_nn.handle, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
+ glSamplerParameteri(present_sampler_nn.handle, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
+ glSamplerParameteri(present_sampler_nn.handle, GL_TEXTURE_WRAP_R, GL_CLAMP_TO_EDGE);
// Generate VBO handle for drawing
vertex_buffer.Create();
@@ -442,7 +472,13 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
glBindTextureUnit(0, screen_info.display_texture);
- const auto anti_aliasing = Settings::values.anti_aliasing.GetValue();
+ auto anti_aliasing = Settings::values.anti_aliasing.GetValue();
+ if (anti_aliasing > Settings::AntiAliasing::LastAA) {
+ LOG_ERROR(Render_OpenGL, "Invalid antialiasing option selected {}", anti_aliasing);
+ anti_aliasing = Settings::AntiAliasing::None;
+ Settings::values.anti_aliasing.SetValue(anti_aliasing);
+ }
+
if (anti_aliasing != Settings::AntiAliasing::None) {
glEnablei(GL_SCISSOR_TEST, 0);
auto viewport_width = screen_info.texture.width;
@@ -519,6 +555,31 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
glBindTextureUnit(0, aa_texture.handle);
}
+ glDisablei(GL_SCISSOR_TEST, 0);
+
+ if (Settings::values.scaling_filter.GetValue() == Settings::ScalingFilter::Fsr) {
+ if (!fsr->AreBuffersInitialized()) {
+ fsr->InitBuffers();
+ }
+
+ auto crop_rect = framebuffer_crop_rect;
+ if (crop_rect.GetWidth() == 0) {
+ crop_rect.right = framebuffer_width;
+ }
+ if (crop_rect.GetHeight() == 0) {
+ crop_rect.bottom = framebuffer_height;
+ }
+ crop_rect = crop_rect.Scale(Settings::values.resolution_info.up_factor);
+ const auto fsr_input_width = Settings::values.resolution_info.ScaleUp(framebuffer_width);
+ const auto fsr_input_height = Settings::values.resolution_info.ScaleUp(framebuffer_height);
+ glBindSampler(0, present_sampler.handle);
+ fsr->Draw(program_manager, layout.screen, fsr_input_width, fsr_input_height, crop_rect);
+ } else {
+ if (fsr->AreBuffersInitialized()) {
+ fsr->ReleaseBuffers();
+ }
+ }
+
const std::array ortho_matrix =
MakeOrthographicMatrix(static_cast<float>(layout.width), static_cast<float>(layout.height));
@@ -534,10 +595,7 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
case Settings::ScalingFilter::ScaleForce:
return present_scaleforce_fragment.handle;
case Settings::ScalingFilter::Fsr:
- LOG_WARNING(
- Render_OpenGL,
- "FidelityFX Super Resolution is not supported in OpenGL, changing to ScaleForce");
- return present_scaleforce_fragment.handle;
+ return fsr->GetPresentFragmentProgram().handle;
default:
return present_bilinear_fragment.handle;
}
@@ -572,15 +630,18 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
f32 scale_u = static_cast<f32>(framebuffer_width) / static_cast<f32>(screen_info.texture.width);
f32 scale_v =
static_cast<f32>(framebuffer_height) / static_cast<f32>(screen_info.texture.height);
- // Scale the output by the crop width/height. This is commonly used with 1280x720 rendering
- // (e.g. handheld mode) on a 1920x1080 framebuffer.
- if (framebuffer_crop_rect.GetWidth() > 0) {
- scale_u = static_cast<f32>(framebuffer_crop_rect.GetWidth()) /
- static_cast<f32>(screen_info.texture.width);
- }
- if (framebuffer_crop_rect.GetHeight() > 0) {
- scale_v = static_cast<f32>(framebuffer_crop_rect.GetHeight()) /
- static_cast<f32>(screen_info.texture.height);
+
+ if (Settings::values.scaling_filter.GetValue() != Settings::ScalingFilter::Fsr) {
+ // Scale the output by the crop width/height. This is commonly used with 1280x720 rendering
+ // (e.g. handheld mode) on a 1920x1080 framebuffer.
+ if (framebuffer_crop_rect.GetWidth() > 0) {
+ scale_u = static_cast<f32>(framebuffer_crop_rect.GetWidth()) /
+ static_cast<f32>(screen_info.texture.width);
+ }
+ if (framebuffer_crop_rect.GetHeight() > 0) {
+ scale_v = static_cast<f32>(framebuffer_crop_rect.GetHeight()) /
+ static_cast<f32>(screen_info.texture.height);
+ }
}
if (Settings::values.anti_aliasing.GetValue() == Settings::AntiAliasing::Fxaa &&
!screen_info.was_accelerated) {
@@ -606,7 +667,6 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
} else {
glDisable(GL_FRAMEBUFFER_SRGB);
}
- glDisablei(GL_SCISSOR_TEST, 0);
glViewportIndexedf(0, 0.0f, 0.0f, static_cast<GLfloat>(layout.width),
static_cast<GLfloat>(layout.height));
diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h
index cc97d7b26..f1d5fd954 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.h
+++ b/src/video_core/renderer_opengl/renderer_opengl.h
@@ -10,6 +10,7 @@
#include "video_core/renderer_base.h"
#include "video_core/renderer_opengl/gl_device.h"
+#include "video_core/renderer_opengl/gl_fsr.h"
#include "video_core/renderer_opengl/gl_rasterizer.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_shader_manager.h"
@@ -141,6 +142,8 @@ private:
OGLTexture smaa_edges_tex;
OGLTexture smaa_blend_tex;
+ std::unique_ptr<FSR> fsr;
+
/// OpenGL framebuffer data
std::vector<u8> gl_framebuffer_data;