summaryrefslogtreecommitdiffstats
path: root/src/video_core/renderer_opengl
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core/renderer_opengl')
-rw-r--r--src/video_core/renderer_opengl/gl_compute_pipeline.cpp1
-rw-r--r--src/video_core/renderer_opengl/gl_compute_pipeline.h5
-rw-r--r--src/video_core/renderer_opengl/gl_device.cpp1
-rw-r--r--src/video_core/renderer_opengl/gl_device.h5
-rw-r--r--src/video_core/renderer_opengl/gl_graphics_pipeline.cpp1
-rw-r--r--src/video_core/renderer_opengl/gl_graphics_pipeline.h5
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp6
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp4
-rw-r--r--src/video_core/renderer_opengl/gl_shader_manager.cpp13
-rw-r--r--src/video_core/renderer_opengl/gl_shader_manager.h3
10 files changed, 43 insertions, 1 deletions
diff --git a/src/video_core/renderer_opengl/gl_compute_pipeline.cpp b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp
index 3151c0db8..f9ca55c36 100644
--- a/src/video_core/renderer_opengl/gl_compute_pipeline.cpp
+++ b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp
@@ -63,6 +63,7 @@ ComputePipeline::ComputePipeline(const Device& device, TextureCache& texture_cac
writes_global_memory = !use_storage_buffers &&
std::ranges::any_of(info.storage_buffers_descriptors,
[](const auto& desc) { return desc.is_written; });
+ uses_local_memory = info.uses_local_memory;
if (force_context_flush) {
std::scoped_lock lock{built_mutex};
built_fence.Create();
diff --git a/src/video_core/renderer_opengl/gl_compute_pipeline.h b/src/video_core/renderer_opengl/gl_compute_pipeline.h
index 9bcc72b59..c26b4fa5e 100644
--- a/src/video_core/renderer_opengl/gl_compute_pipeline.h
+++ b/src/video_core/renderer_opengl/gl_compute_pipeline.h
@@ -59,6 +59,10 @@ public:
return writes_global_memory;
}
+ [[nodiscard]] bool UsesLocalMemory() const noexcept {
+ return uses_local_memory;
+ }
+
void SetEngine(Tegra::Engines::KeplerCompute* kepler_compute_,
Tegra::MemoryManager* gpu_memory_) {
kepler_compute = kepler_compute_;
@@ -84,6 +88,7 @@ private:
bool use_storage_buffers{};
bool writes_global_memory{};
+ bool uses_local_memory{};
std::mutex built_mutex;
std::condition_variable built_condvar;
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp
index 03d234f2f..33e63c17d 100644
--- a/src/video_core/renderer_opengl/gl_device.cpp
+++ b/src/video_core/renderer_opengl/gl_device.cpp
@@ -194,6 +194,7 @@ Device::Device(Core::Frontend::EmuWindow& emu_window) {
has_bool_ref_bug = true;
}
}
+ has_lmem_perf_bug = is_nvidia;
strict_context_required = emu_window.StrictContextRequired();
// Blocks AMD and Intel OpenGL drivers on Windows from using asynchronous shader compilation.
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h
index ad27264e5..a5a6bbbba 100644
--- a/src/video_core/renderer_opengl/gl_device.h
+++ b/src/video_core/renderer_opengl/gl_device.h
@@ -192,6 +192,10 @@ public:
return supports_conditional_barriers;
}
+ bool HasLmemPerfBug() const {
+ return has_lmem_perf_bug;
+ }
+
private:
static bool TestVariableAoffi();
static bool TestPreciseBug();
@@ -238,6 +242,7 @@ private:
bool can_report_memory{};
bool strict_context_required{};
bool supports_conditional_barriers{};
+ bool has_lmem_perf_bug{};
std::string vendor_name;
};
diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp
index c58f760b8..23a48c6fe 100644
--- a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp
+++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp
@@ -215,6 +215,7 @@ GraphicsPipeline::GraphicsPipeline(const Device& device, TextureCache& texture_c
writes_global_memory |= std::ranges::any_of(
info.storage_buffers_descriptors, [](const auto& desc) { return desc.is_written; });
+ uses_local_memory |= info.uses_local_memory;
}
ASSERT(num_textures <= MAX_TEXTURES);
ASSERT(num_images <= MAX_IMAGES);
diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.h b/src/video_core/renderer_opengl/gl_graphics_pipeline.h
index 7bab3be0a..7b3d7eae8 100644
--- a/src/video_core/renderer_opengl/gl_graphics_pipeline.h
+++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.h
@@ -98,6 +98,10 @@ public:
return writes_global_memory;
}
+ [[nodiscard]] bool UsesLocalMemory() const noexcept {
+ return uses_local_memory;
+ }
+
[[nodiscard]] bool IsBuilt() noexcept;
template <typename Spec>
@@ -146,6 +150,7 @@ private:
bool use_storage_buffers{};
bool writes_global_memory{};
+ bool uses_local_memory{};
static constexpr std::size_t XFB_ENTRY_STRIDE = 3;
GLsizei num_xfb_attribs{};
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index fc711c44a..edf527f2d 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -222,6 +222,9 @@ void RasterizerOpenGL::PrepareDraw(bool is_indexed, Func&& draw_func) {
gpu.TickWork();
std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
+ if (pipeline->UsesLocalMemory()) {
+ program_manager.LocalMemoryWarmup();
+ }
pipeline->SetEngine(maxwell3d, gpu_memory);
pipeline->Configure(is_indexed);
@@ -371,6 +374,9 @@ void RasterizerOpenGL::DispatchCompute() {
if (!pipeline) {
return;
}
+ if (pipeline->UsesLocalMemory()) {
+ program_manager.LocalMemoryWarmup();
+ }
pipeline->SetEngine(kepler_compute, gpu_memory);
pipeline->Configure();
const auto& qmd{kepler_compute->launch_description};
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 3f077311e..0329ed820 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -85,7 +85,9 @@ Shader::RuntimeInfo MakeRuntimeInfo(const GraphicsPipelineKey& key,
case Shader::Stage::VertexB:
case Shader::Stage::Geometry:
if (!use_assembly_shaders && key.xfb_enabled != 0) {
- info.xfb_varyings = VideoCommon::MakeTransformFeedbackVaryings(key.xfb_state);
+ auto [varyings, count] = VideoCommon::MakeTransformFeedbackVaryings(key.xfb_state);
+ info.xfb_varyings = varyings;
+ info.xfb_count = count;
}
break;
case Shader::Stage::TessellationEval:
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp
index 98841ae65..03d4b9d06 100644
--- a/src/video_core/renderer_opengl/gl_shader_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp
@@ -3,7 +3,9 @@
#include <glad/glad.h>
+#include "video_core/host_shaders/opengl_lmem_warmup_comp.h"
#include "video_core/renderer_opengl/gl_shader_manager.h"
+#include "video_core/renderer_opengl/gl_shader_util.h"
namespace OpenGL {
@@ -17,6 +19,10 @@ ProgramManager::ProgramManager(const Device& device) {
if (device.UseAssemblyShaders()) {
glEnable(GL_COMPUTE_PROGRAM_NV);
}
+ if (device.HasLmemPerfBug()) {
+ lmem_warmup_program =
+ CreateProgram(HostShaders::OPENGL_LMEM_WARMUP_COMP, GL_COMPUTE_SHADER);
+ }
}
void ProgramManager::BindComputeProgram(GLuint program) {
@@ -98,6 +104,13 @@ void ProgramManager::BindAssemblyPrograms(std::span<const OGLAssemblyProgram, NU
void ProgramManager::RestoreGuestCompute() {}
+void ProgramManager::LocalMemoryWarmup() {
+ if (lmem_warmup_program.handle != 0) {
+ BindComputeProgram(lmem_warmup_program.handle);
+ glDispatchCompute(1, 1, 1);
+ }
+}
+
void ProgramManager::BindPipeline() {
if (!is_pipeline_bound) {
is_pipeline_bound = true;
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h
index 07ffab77f..852d8c88e 100644
--- a/src/video_core/renderer_opengl/gl_shader_manager.h
+++ b/src/video_core/renderer_opengl/gl_shader_manager.h
@@ -30,6 +30,8 @@ public:
void RestoreGuestCompute();
+ void LocalMemoryWarmup();
+
private:
void BindPipeline();
@@ -44,6 +46,7 @@ private:
u32 current_stage_mask = 0;
std::array<GLuint, NUM_STAGES> current_programs{};
GLuint current_assembly_compute_program = 0;
+ OGLProgram lmem_warmup_program;
};
} // namespace OpenGL