summaryrefslogtreecommitdiffstats
path: root/src/video_core/renderer_opengl
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core/renderer_opengl')
-rw-r--r--src/video_core/renderer_opengl/gl_compute_pipeline.cpp6
-rw-r--r--src/video_core/renderer_opengl/gl_device.cpp51
-rw-r--r--src/video_core/renderer_opengl/gl_graphics_pipeline.cpp24
-rw-r--r--src/video_core/renderer_opengl/gl_graphics_pipeline.h2
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp18
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp22
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.cpp8
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.cpp2
-rw-r--r--src/video_core/renderer_opengl/util_shaders.cpp1
9 files changed, 90 insertions, 44 deletions
diff --git a/src/video_core/renderer_opengl/gl_compute_pipeline.cpp b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp
index f9ca55c36..d70501860 100644
--- a/src/video_core/renderer_opengl/gl_compute_pipeline.cpp
+++ b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp
@@ -34,13 +34,13 @@ ComputePipeline::ComputePipeline(const Device& device, TextureCache& texture_cac
: texture_cache{texture_cache_}, buffer_cache{buffer_cache_},
program_manager{program_manager_}, info{info_} {
switch (device.GetShaderBackend()) {
- case Settings::ShaderBackend::GLSL:
+ case Settings::ShaderBackend::Glsl:
source_program = CreateProgram(code, GL_COMPUTE_SHADER);
break;
- case Settings::ShaderBackend::GLASM:
+ case Settings::ShaderBackend::Glasm:
assembly_program = CompileProgram(code, GL_COMPUTE_PROGRAM_NV);
break;
- case Settings::ShaderBackend::SPIRV:
+ case Settings::ShaderBackend::SpirV:
source_program = CreateProgram(code_v, GL_COMPUTE_SHADER);
break;
}
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp
index 33e63c17d..94258ccd0 100644
--- a/src/video_core/renderer_opengl/gl_device.cpp
+++ b/src/video_core/renderer_opengl/gl_device.cpp
@@ -106,6 +106,43 @@ bool IsASTCSupported() {
return true;
}
+static bool HasSlowSoftwareAstc(std::string_view vendor_name, std::string_view renderer) {
+// ifdef for Unix reduces string comparisons for non-Windows drivers, and Intel
+#ifdef YUZU_UNIX
+ // Sorted vaguely by how likely a vendor is to appear
+ if (vendor_name == "AMD") {
+ // RadeonSI
+ return true;
+ }
+ if (vendor_name == "Intel") {
+ // Must be inside YUZU_UNIX ifdef as the Windows driver uses the same vendor string
+ // iris, crocus
+ const bool is_intel_dg = (renderer.find("DG") != std::string_view::npos);
+ return is_intel_dg;
+ }
+ if (vendor_name == "nouveau") {
+ return true;
+ }
+ if (vendor_name == "X.Org") {
+ // R600
+ return true;
+ }
+#endif
+ if (vendor_name == "Collabora Ltd") {
+ // Zink
+ return true;
+ }
+ if (vendor_name == "Microsoft Corporation") {
+ // d3d12
+ return true;
+ }
+ if (vendor_name == "Mesa/X.org") {
+ // llvmpipe, softpipe, virgl
+ return true;
+ }
+ return false;
+}
+
[[nodiscard]] bool IsDebugToolAttached(std::span<const std::string_view> extensions) {
const bool nsight = std::getenv("NVTX_INJECTION64_PATH") || std::getenv("NSIGHT_LAUNCHED");
return nsight || HasExtension(extensions, "GL_EXT_debug_tool") ||
@@ -120,12 +157,16 @@ Device::Device(Core::Frontend::EmuWindow& emu_window) {
}
vendor_name = reinterpret_cast<const char*>(glGetString(GL_VENDOR));
const std::string_view version = reinterpret_cast<const char*>(glGetString(GL_VERSION));
+ const std::string_view renderer = reinterpret_cast<const char*>(glGetString(GL_RENDERER));
const std::vector extensions = GetExtensions();
const bool is_nvidia = vendor_name == "NVIDIA Corporation";
const bool is_amd = vendor_name == "ATI Technologies Inc.";
const bool is_intel = vendor_name == "Intel";
+ const bool has_slow_software_astc =
+ !is_nvidia && !is_amd && HasSlowSoftwareAstc(vendor_name, renderer);
+
#ifdef __unix__
constexpr bool is_linux = true;
#else
@@ -152,7 +193,7 @@ Device::Device(Core::Frontend::EmuWindow& emu_window) {
has_vertex_viewport_layer = GLAD_GL_ARB_shader_viewport_layer_array;
has_image_load_formatted = HasExtension(extensions, "GL_EXT_shader_image_load_formatted");
has_texture_shadow_lod = HasExtension(extensions, "GL_EXT_texture_shadow_lod");
- has_astc = IsASTCSupported();
+ has_astc = !has_slow_software_astc && IsASTCSupported();
has_variable_aoffi = TestVariableAoffi();
has_component_indexing_bug = is_amd;
has_precise_bug = TestPreciseBug();
@@ -177,15 +218,15 @@ Device::Device(Core::Frontend::EmuWindow& emu_window) {
has_fast_buffer_sub_data = is_nvidia && !disable_fast_buffer_sub_data;
shader_backend = Settings::values.shader_backend.GetValue();
- use_assembly_shaders = shader_backend == Settings::ShaderBackend::GLASM &&
+ use_assembly_shaders = shader_backend == Settings::ShaderBackend::Glasm &&
GLAD_GL_NV_gpu_program5 && GLAD_GL_NV_compute_program5 &&
GLAD_GL_NV_transform_feedback && GLAD_GL_NV_transform_feedback2;
- if (shader_backend == Settings::ShaderBackend::GLASM && !use_assembly_shaders) {
+ if (shader_backend == Settings::ShaderBackend::Glasm && !use_assembly_shaders) {
LOG_ERROR(Render_OpenGL, "Assembly shaders enabled but not supported");
- shader_backend = Settings::ShaderBackend::GLSL;
+ shader_backend = Settings::ShaderBackend::Glsl;
}
- if (shader_backend == Settings::ShaderBackend::GLSL && is_nvidia) {
+ if (shader_backend == Settings::ShaderBackend::Glsl && is_nvidia) {
const std::string_view driver_version = version.substr(13);
const int version_major =
std::atoi(driver_version.substr(0, driver_version.find(".")).data());
diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp
index 71f720c63..44a771d65 100644
--- a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp
+++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp
@@ -220,7 +220,8 @@ GraphicsPipeline::GraphicsPipeline(const Device& device, TextureCache& texture_c
ASSERT(num_textures <= MAX_TEXTURES);
ASSERT(num_images <= MAX_IMAGES);
- const bool assembly_shaders{assembly_programs[0].handle != 0};
+ const auto backend = device.GetShaderBackend();
+ const bool assembly_shaders{backend == Settings::ShaderBackend::Glasm};
use_storage_buffers =
!assembly_shaders || num_storage_buffers <= device.GetMaxGLASMStorageBufferBlocks();
writes_global_memory &= !use_storage_buffers;
@@ -230,24 +231,23 @@ GraphicsPipeline::GraphicsPipeline(const Device& device, TextureCache& texture_c
GenerateTransformFeedbackState();
}
const bool in_parallel = thread_worker != nullptr;
- const auto backend = device.GetShaderBackend();
auto func{[this, sources_ = std::move(sources), sources_spirv_ = std::move(sources_spirv),
shader_notify, backend, in_parallel,
force_context_flush](ShaderContext::Context*) mutable {
for (size_t stage = 0; stage < 5; ++stage) {
switch (backend) {
- case Settings::ShaderBackend::GLSL:
+ case Settings::ShaderBackend::Glsl:
if (!sources_[stage].empty()) {
source_programs[stage] = CreateProgram(sources_[stage], Stage(stage));
}
break;
- case Settings::ShaderBackend::GLASM:
+ case Settings::ShaderBackend::Glasm:
if (!sources_[stage].empty()) {
assembly_programs[stage] =
CompileProgram(sources_[stage], AssemblyStage(stage));
}
break;
- case Settings::ShaderBackend::SPIRV:
+ case Settings::ShaderBackend::SpirV:
if (!sources_spirv_[stage].empty()) {
source_programs[stage] = CreateProgram(sources_spirv_[stage], Stage(stage));
}
@@ -559,15 +559,13 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
}
void GraphicsPipeline::ConfigureTransformFeedbackImpl() const {
- glTransformFeedbackStreamAttribsNV(num_xfb_attribs, xfb_attribs.data(), num_xfb_strides,
- xfb_streams.data(), GL_INTERLEAVED_ATTRIBS);
+ glTransformFeedbackAttribsNV(num_xfb_attribs, xfb_attribs.data(), GL_SEPARATE_ATTRIBS);
}
void GraphicsPipeline::GenerateTransformFeedbackState() {
// TODO(Rodrigo): Inject SKIP_COMPONENTS*_NV when required. An unimplemented message will signal
// when this is required.
GLint* cursor{xfb_attribs.data()};
- GLint* current_stream{xfb_streams.data()};
for (size_t feedback = 0; feedback < Maxwell::NumTransformFeedbackBuffers; ++feedback) {
const auto& layout = key.xfb_state.layouts[feedback];
@@ -575,15 +573,6 @@ void GraphicsPipeline::GenerateTransformFeedbackState() {
if (layout.varying_count == 0) {
continue;
}
- *current_stream = static_cast<GLint>(feedback);
- if (current_stream != xfb_streams.data()) {
- // When stepping one stream, push the expected token
- cursor[0] = GL_NEXT_BUFFER_NV;
- cursor[1] = 0;
- cursor[2] = 0;
- cursor += XFB_ENTRY_STRIDE;
- }
- ++current_stream;
const auto& locations = key.xfb_state.varyings[feedback];
std::optional<u32> current_index;
@@ -619,7 +608,6 @@ void GraphicsPipeline::GenerateTransformFeedbackState() {
}
}
num_xfb_attribs = static_cast<GLsizei>((cursor - xfb_attribs.data()) / XFB_ENTRY_STRIDE);
- num_xfb_strides = static_cast<GLsizei>(current_stream - xfb_streams.data());
}
void GraphicsPipeline::WaitForBuild() {
diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.h b/src/video_core/renderer_opengl/gl_graphics_pipeline.h
index 7b3d7eae8..74fc9cc3d 100644
--- a/src/video_core/renderer_opengl/gl_graphics_pipeline.h
+++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.h
@@ -154,9 +154,7 @@ private:
static constexpr std::size_t XFB_ENTRY_STRIDE = 3;
GLsizei num_xfb_attribs{};
- GLsizei num_xfb_strides{};
std::array<GLint, 128 * XFB_ENTRY_STRIDE * Maxwell::NumTransformFeedbackBuffers> xfb_attribs{};
- std::array<GLint, Maxwell::NumTransformFeedbackBuffers> xfb_streams{};
std::mutex built_mutex;
std::condition_variable built_condvar;
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index aadd6967c..dd03efecd 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -380,6 +380,17 @@ void RasterizerOpenGL::DispatchCompute() {
pipeline->SetEngine(kepler_compute, gpu_memory);
pipeline->Configure();
const auto& qmd{kepler_compute->launch_description};
+ auto indirect_address = kepler_compute->GetIndirectComputeAddress();
+ if (indirect_address) {
+ // DispatchIndirect
+ static constexpr auto sync_info = VideoCommon::ObtainBufferSynchronize::FullSynchronize;
+ const auto post_op = VideoCommon::ObtainBufferOperation::DiscardWrite;
+ const auto [buffer, offset] =
+ buffer_cache.ObtainBuffer(*indirect_address, 12, sync_info, post_op);
+ glBindBuffer(GL_DISPATCH_INDIRECT_BUFFER, buffer->Handle());
+ glDispatchComputeIndirect(static_cast<GLintptr>(offset));
+ return;
+ }
glDispatchCompute(qmd.grid_dim_x, qmd.grid_dim_y, qmd.grid_dim_z);
++num_queued_commands;
has_written_global_memory |= pipeline->WritesGlobalMemory();
@@ -1335,7 +1346,8 @@ bool AccelerateDMA::DmaBufferImageCopy(const Tegra::DMA::ImageCopy& copy_info,
}
const u32 buffer_size = static_cast<u32>(buffer_operand.pitch * buffer_operand.height);
static constexpr auto sync_info = VideoCommon::ObtainBufferSynchronize::FullSynchronize;
- const auto post_op = VideoCommon::ObtainBufferOperation::DoNothing;
+ const auto post_op = IS_IMAGE_UPLOAD ? VideoCommon::ObtainBufferOperation::DoNothing
+ : VideoCommon::ObtainBufferOperation::MarkAsWritten;
const auto [buffer, offset] =
buffer_cache.ObtainBuffer(buffer_operand.address, buffer_size, sync_info, post_op);
@@ -1344,8 +1356,12 @@ bool AccelerateDMA::DmaBufferImageCopy(const Tegra::DMA::ImageCopy& copy_info,
const std::span copy_span{&copy, 1};
if constexpr (IS_IMAGE_UPLOAD) {
+ texture_cache.PrepareImage(image_id, true, false);
image->UploadMemory(buffer->Handle(), offset, copy_span);
} else {
+ if (offset % BytesPerBlock(image->info.format)) {
+ return false;
+ }
texture_cache.DownloadImageIntoBuffer(image, buffer->Handle(), offset, copy_span,
buffer_operand.address, buffer_size);
}
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 7e1d7f92e..2888e0238 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -445,7 +445,8 @@ std::unique_ptr<GraphicsPipeline> ShaderCache::CreateGraphicsPipeline(
ShaderContext::ShaderPools& pools, const GraphicsPipelineKey& key,
std::span<Shader::Environment* const> envs, bool use_shader_workers,
bool force_context_flush) try {
- LOG_INFO(Render_OpenGL, "0x{:016x}", key.Hash());
+ auto hash = key.Hash();
+ LOG_INFO(Render_OpenGL, "0x{:016x}", hash);
size_t env_index{};
u32 total_storage_buffers{};
std::array<Shader::IR::Program, Maxwell::MaxShaderProgram> programs;
@@ -474,7 +475,7 @@ std::unique_ptr<GraphicsPipeline> ShaderCache::CreateGraphicsPipeline(
Shader::Maxwell::Flow::CFG cfg(env, pools.flow_block, cfg_offset, index == 0);
if (Settings::values.dump_shaders) {
- env.Dump(key.unique_hashes[index]);
+ env.Dump(hash, key.unique_hashes[index]);
}
if (!uses_vertex_a || index != 1) {
@@ -522,14 +523,14 @@ std::unique_ptr<GraphicsPipeline> ShaderCache::CreateGraphicsPipeline(
const auto runtime_info{
MakeRuntimeInfo(key, program, previous_program, glasm_use_storage_buffers, use_glasm)};
switch (device.GetShaderBackend()) {
- case Settings::ShaderBackend::GLSL:
+ case Settings::ShaderBackend::Glsl:
ConvertLegacyToGeneric(program, runtime_info);
sources[stage_index] = EmitGLSL(profile, runtime_info, program, binding);
break;
- case Settings::ShaderBackend::GLASM:
+ case Settings::ShaderBackend::Glasm:
sources[stage_index] = EmitGLASM(profile, runtime_info, program, binding);
break;
- case Settings::ShaderBackend::SPIRV:
+ case Settings::ShaderBackend::SpirV:
ConvertLegacyToGeneric(program, runtime_info);
sources_spirv[stage_index] = EmitSPIRV(profile, runtime_info, program, binding);
break;
@@ -566,12 +567,13 @@ std::unique_ptr<ComputePipeline> ShaderCache::CreateComputePipeline(
std::unique_ptr<ComputePipeline> ShaderCache::CreateComputePipeline(
ShaderContext::ShaderPools& pools, const ComputePipelineKey& key, Shader::Environment& env,
bool force_context_flush) try {
- LOG_INFO(Render_OpenGL, "0x{:016x}", key.Hash());
+ auto hash = key.Hash();
+ LOG_INFO(Render_OpenGL, "0x{:016x}", hash);
Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()};
if (Settings::values.dump_shaders) {
- env.Dump(key.Hash());
+ env.Dump(hash, key.unique_hash);
}
auto program{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)};
@@ -582,13 +584,13 @@ std::unique_ptr<ComputePipeline> ShaderCache::CreateComputePipeline(
std::string code{};
std::vector<u32> code_spirv;
switch (device.GetShaderBackend()) {
- case Settings::ShaderBackend::GLSL:
+ case Settings::ShaderBackend::Glsl:
code = EmitGLSL(profile, program);
break;
- case Settings::ShaderBackend::GLASM:
+ case Settings::ShaderBackend::Glasm:
code = EmitGLASM(profile, info, program);
break;
- case Settings::ShaderBackend::SPIRV:
+ case Settings::ShaderBackend::SpirV:
code_spirv = EmitSPIRV(profile, program);
break;
}
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp
index 3b446be07..9cafd2983 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp
@@ -232,10 +232,9 @@ void ApplySwizzle(GLuint handle, PixelFormat format, std::array<SwizzleSource, 4
[[nodiscard]] bool CanBeAccelerated(const TextureCacheRuntime& runtime,
const VideoCommon::ImageInfo& info) {
if (IsPixelFormatASTC(info.format) && info.size.depth == 1 && !runtime.HasNativeASTC()) {
- return Settings::values.accelerate_astc.GetValue() &&
+ return Settings::values.accelerate_astc.GetValue() == Settings::AstcDecodeMode::Gpu &&
Settings::values.astc_recompression.GetValue() ==
- Settings::AstcRecompression::Uncompressed &&
- !Settings::values.async_astc.GetValue();
+ Settings::AstcRecompression::Uncompressed;
}
// Disable other accelerated uploads for now as they don't implement swizzled uploads
return false;
@@ -267,7 +266,8 @@ void ApplySwizzle(GLuint handle, PixelFormat format, std::array<SwizzleSource, 4
[[nodiscard]] bool CanBeDecodedAsync(const TextureCacheRuntime& runtime,
const VideoCommon::ImageInfo& info) {
if (IsPixelFormatASTC(info.format) && !runtime.HasNativeASTC()) {
- return Settings::values.async_astc.GetValue();
+ return Settings::values.accelerate_astc.GetValue() ==
+ Settings::AstcDecodeMode::CpuAsynchronous;
}
return false;
}
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index 2a74c1d05..6b8d4e554 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -473,7 +473,7 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
glBindTextureUnit(0, screen_info.display_texture);
auto anti_aliasing = Settings::values.anti_aliasing.GetValue();
- if (anti_aliasing > Settings::AntiAliasing::LastAA) {
+ if (anti_aliasing >= Settings::AntiAliasing::MaxEnum) {
LOG_ERROR(Render_OpenGL, "Invalid antialiasing option selected {}", anti_aliasing);
anti_aliasing = Settings::AntiAliasing::None;
Settings::values.anti_aliasing.SetValue(anti_aliasing);
diff --git a/src/video_core/renderer_opengl/util_shaders.cpp b/src/video_core/renderer_opengl/util_shaders.cpp
index 544982d18..c437013e6 100644
--- a/src/video_core/renderer_opengl/util_shaders.cpp
+++ b/src/video_core/renderer_opengl/util_shaders.cpp
@@ -68,6 +68,7 @@ void UtilShaders::ASTCDecode(Image& image, const StagingBufferMap& map,
std::span<const VideoCommon::SwizzleParameters> swizzles) {
static constexpr GLuint BINDING_INPUT_BUFFER = 0;
static constexpr GLuint BINDING_OUTPUT_IMAGE = 0;
+ program_manager.LocalMemoryWarmup();
const Extent2D tile_size{
.width = VideoCore::Surface::DefaultBlockWidth(image.info.format),