diff options
Diffstat (limited to 'src/video_core/renderer_opengl')
9 files changed, 90 insertions, 44 deletions
diff --git a/src/video_core/renderer_opengl/gl_compute_pipeline.cpp b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp index f9ca55c36..d70501860 100644 --- a/src/video_core/renderer_opengl/gl_compute_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp @@ -34,13 +34,13 @@ ComputePipeline::ComputePipeline(const Device& device, TextureCache& texture_cac : texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, program_manager{program_manager_}, info{info_} { switch (device.GetShaderBackend()) { - case Settings::ShaderBackend::GLSL: + case Settings::ShaderBackend::Glsl: source_program = CreateProgram(code, GL_COMPUTE_SHADER); break; - case Settings::ShaderBackend::GLASM: + case Settings::ShaderBackend::Glasm: assembly_program = CompileProgram(code, GL_COMPUTE_PROGRAM_NV); break; - case Settings::ShaderBackend::SPIRV: + case Settings::ShaderBackend::SpirV: source_program = CreateProgram(code_v, GL_COMPUTE_SHADER); break; } diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index 33e63c17d..94258ccd0 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -106,6 +106,43 @@ bool IsASTCSupported() { return true; } +static bool HasSlowSoftwareAstc(std::string_view vendor_name, std::string_view renderer) { +// ifdef for Unix reduces string comparisons for non-Windows drivers, and Intel +#ifdef YUZU_UNIX + // Sorted vaguely by how likely a vendor is to appear + if (vendor_name == "AMD") { + // RadeonSI + return true; + } + if (vendor_name == "Intel") { + // Must be inside YUZU_UNIX ifdef as the Windows driver uses the same vendor string + // iris, crocus + const bool is_intel_dg = (renderer.find("DG") != std::string_view::npos); + return is_intel_dg; + } + if (vendor_name == "nouveau") { + return true; + } + if (vendor_name == "X.Org") { + // R600 + return true; + } +#endif + if (vendor_name == "Collabora Ltd") { + // Zink + return true; + } + if (vendor_name == "Microsoft Corporation") { + // d3d12 + return true; + } + if (vendor_name == "Mesa/X.org") { + // llvmpipe, softpipe, virgl + return true; + } + return false; +} + [[nodiscard]] bool IsDebugToolAttached(std::span<const std::string_view> extensions) { const bool nsight = std::getenv("NVTX_INJECTION64_PATH") || std::getenv("NSIGHT_LAUNCHED"); return nsight || HasExtension(extensions, "GL_EXT_debug_tool") || @@ -120,12 +157,16 @@ Device::Device(Core::Frontend::EmuWindow& emu_window) { } vendor_name = reinterpret_cast<const char*>(glGetString(GL_VENDOR)); const std::string_view version = reinterpret_cast<const char*>(glGetString(GL_VERSION)); + const std::string_view renderer = reinterpret_cast<const char*>(glGetString(GL_RENDERER)); const std::vector extensions = GetExtensions(); const bool is_nvidia = vendor_name == "NVIDIA Corporation"; const bool is_amd = vendor_name == "ATI Technologies Inc."; const bool is_intel = vendor_name == "Intel"; + const bool has_slow_software_astc = + !is_nvidia && !is_amd && HasSlowSoftwareAstc(vendor_name, renderer); + #ifdef __unix__ constexpr bool is_linux = true; #else @@ -152,7 +193,7 @@ Device::Device(Core::Frontend::EmuWindow& emu_window) { has_vertex_viewport_layer = GLAD_GL_ARB_shader_viewport_layer_array; has_image_load_formatted = HasExtension(extensions, "GL_EXT_shader_image_load_formatted"); has_texture_shadow_lod = HasExtension(extensions, "GL_EXT_texture_shadow_lod"); - has_astc = IsASTCSupported(); + has_astc = !has_slow_software_astc && IsASTCSupported(); has_variable_aoffi = TestVariableAoffi(); has_component_indexing_bug = is_amd; has_precise_bug = TestPreciseBug(); @@ -177,15 +218,15 @@ Device::Device(Core::Frontend::EmuWindow& emu_window) { has_fast_buffer_sub_data = is_nvidia && !disable_fast_buffer_sub_data; shader_backend = Settings::values.shader_backend.GetValue(); - use_assembly_shaders = shader_backend == Settings::ShaderBackend::GLASM && + use_assembly_shaders = shader_backend == Settings::ShaderBackend::Glasm && GLAD_GL_NV_gpu_program5 && GLAD_GL_NV_compute_program5 && GLAD_GL_NV_transform_feedback && GLAD_GL_NV_transform_feedback2; - if (shader_backend == Settings::ShaderBackend::GLASM && !use_assembly_shaders) { + if (shader_backend == Settings::ShaderBackend::Glasm && !use_assembly_shaders) { LOG_ERROR(Render_OpenGL, "Assembly shaders enabled but not supported"); - shader_backend = Settings::ShaderBackend::GLSL; + shader_backend = Settings::ShaderBackend::Glsl; } - if (shader_backend == Settings::ShaderBackend::GLSL && is_nvidia) { + if (shader_backend == Settings::ShaderBackend::Glsl && is_nvidia) { const std::string_view driver_version = version.substr(13); const int version_major = std::atoi(driver_version.substr(0, driver_version.find(".")).data()); diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp index 71f720c63..44a771d65 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp @@ -220,7 +220,8 @@ GraphicsPipeline::GraphicsPipeline(const Device& device, TextureCache& texture_c ASSERT(num_textures <= MAX_TEXTURES); ASSERT(num_images <= MAX_IMAGES); - const bool assembly_shaders{assembly_programs[0].handle != 0}; + const auto backend = device.GetShaderBackend(); + const bool assembly_shaders{backend == Settings::ShaderBackend::Glasm}; use_storage_buffers = !assembly_shaders || num_storage_buffers <= device.GetMaxGLASMStorageBufferBlocks(); writes_global_memory &= !use_storage_buffers; @@ -230,24 +231,23 @@ GraphicsPipeline::GraphicsPipeline(const Device& device, TextureCache& texture_c GenerateTransformFeedbackState(); } const bool in_parallel = thread_worker != nullptr; - const auto backend = device.GetShaderBackend(); auto func{[this, sources_ = std::move(sources), sources_spirv_ = std::move(sources_spirv), shader_notify, backend, in_parallel, force_context_flush](ShaderContext::Context*) mutable { for (size_t stage = 0; stage < 5; ++stage) { switch (backend) { - case Settings::ShaderBackend::GLSL: + case Settings::ShaderBackend::Glsl: if (!sources_[stage].empty()) { source_programs[stage] = CreateProgram(sources_[stage], Stage(stage)); } break; - case Settings::ShaderBackend::GLASM: + case Settings::ShaderBackend::Glasm: if (!sources_[stage].empty()) { assembly_programs[stage] = CompileProgram(sources_[stage], AssemblyStage(stage)); } break; - case Settings::ShaderBackend::SPIRV: + case Settings::ShaderBackend::SpirV: if (!sources_spirv_[stage].empty()) { source_programs[stage] = CreateProgram(sources_spirv_[stage], Stage(stage)); } @@ -559,15 +559,13 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { } void GraphicsPipeline::ConfigureTransformFeedbackImpl() const { - glTransformFeedbackStreamAttribsNV(num_xfb_attribs, xfb_attribs.data(), num_xfb_strides, - xfb_streams.data(), GL_INTERLEAVED_ATTRIBS); + glTransformFeedbackAttribsNV(num_xfb_attribs, xfb_attribs.data(), GL_SEPARATE_ATTRIBS); } void GraphicsPipeline::GenerateTransformFeedbackState() { // TODO(Rodrigo): Inject SKIP_COMPONENTS*_NV when required. An unimplemented message will signal // when this is required. GLint* cursor{xfb_attribs.data()}; - GLint* current_stream{xfb_streams.data()}; for (size_t feedback = 0; feedback < Maxwell::NumTransformFeedbackBuffers; ++feedback) { const auto& layout = key.xfb_state.layouts[feedback]; @@ -575,15 +573,6 @@ void GraphicsPipeline::GenerateTransformFeedbackState() { if (layout.varying_count == 0) { continue; } - *current_stream = static_cast<GLint>(feedback); - if (current_stream != xfb_streams.data()) { - // When stepping one stream, push the expected token - cursor[0] = GL_NEXT_BUFFER_NV; - cursor[1] = 0; - cursor[2] = 0; - cursor += XFB_ENTRY_STRIDE; - } - ++current_stream; const auto& locations = key.xfb_state.varyings[feedback]; std::optional<u32> current_index; @@ -619,7 +608,6 @@ void GraphicsPipeline::GenerateTransformFeedbackState() { } } num_xfb_attribs = static_cast<GLsizei>((cursor - xfb_attribs.data()) / XFB_ENTRY_STRIDE); - num_xfb_strides = static_cast<GLsizei>(current_stream - xfb_streams.data()); } void GraphicsPipeline::WaitForBuild() { diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.h b/src/video_core/renderer_opengl/gl_graphics_pipeline.h index 7b3d7eae8..74fc9cc3d 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.h +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.h @@ -154,9 +154,7 @@ private: static constexpr std::size_t XFB_ENTRY_STRIDE = 3; GLsizei num_xfb_attribs{}; - GLsizei num_xfb_strides{}; std::array<GLint, 128 * XFB_ENTRY_STRIDE * Maxwell::NumTransformFeedbackBuffers> xfb_attribs{}; - std::array<GLint, Maxwell::NumTransformFeedbackBuffers> xfb_streams{}; std::mutex built_mutex; std::condition_variable built_condvar; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index aadd6967c..dd03efecd 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -380,6 +380,17 @@ void RasterizerOpenGL::DispatchCompute() { pipeline->SetEngine(kepler_compute, gpu_memory); pipeline->Configure(); const auto& qmd{kepler_compute->launch_description}; + auto indirect_address = kepler_compute->GetIndirectComputeAddress(); + if (indirect_address) { + // DispatchIndirect + static constexpr auto sync_info = VideoCommon::ObtainBufferSynchronize::FullSynchronize; + const auto post_op = VideoCommon::ObtainBufferOperation::DiscardWrite; + const auto [buffer, offset] = + buffer_cache.ObtainBuffer(*indirect_address, 12, sync_info, post_op); + glBindBuffer(GL_DISPATCH_INDIRECT_BUFFER, buffer->Handle()); + glDispatchComputeIndirect(static_cast<GLintptr>(offset)); + return; + } glDispatchCompute(qmd.grid_dim_x, qmd.grid_dim_y, qmd.grid_dim_z); ++num_queued_commands; has_written_global_memory |= pipeline->WritesGlobalMemory(); @@ -1335,7 +1346,8 @@ bool AccelerateDMA::DmaBufferImageCopy(const Tegra::DMA::ImageCopy& copy_info, } const u32 buffer_size = static_cast<u32>(buffer_operand.pitch * buffer_operand.height); static constexpr auto sync_info = VideoCommon::ObtainBufferSynchronize::FullSynchronize; - const auto post_op = VideoCommon::ObtainBufferOperation::DoNothing; + const auto post_op = IS_IMAGE_UPLOAD ? VideoCommon::ObtainBufferOperation::DoNothing + : VideoCommon::ObtainBufferOperation::MarkAsWritten; const auto [buffer, offset] = buffer_cache.ObtainBuffer(buffer_operand.address, buffer_size, sync_info, post_op); @@ -1344,8 +1356,12 @@ bool AccelerateDMA::DmaBufferImageCopy(const Tegra::DMA::ImageCopy& copy_info, const std::span copy_span{©, 1}; if constexpr (IS_IMAGE_UPLOAD) { + texture_cache.PrepareImage(image_id, true, false); image->UploadMemory(buffer->Handle(), offset, copy_span); } else { + if (offset % BytesPerBlock(image->info.format)) { + return false; + } texture_cache.DownloadImageIntoBuffer(image, buffer->Handle(), offset, copy_span, buffer_operand.address, buffer_size); } diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 7e1d7f92e..2888e0238 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -445,7 +445,8 @@ std::unique_ptr<GraphicsPipeline> ShaderCache::CreateGraphicsPipeline( ShaderContext::ShaderPools& pools, const GraphicsPipelineKey& key, std::span<Shader::Environment* const> envs, bool use_shader_workers, bool force_context_flush) try { - LOG_INFO(Render_OpenGL, "0x{:016x}", key.Hash()); + auto hash = key.Hash(); + LOG_INFO(Render_OpenGL, "0x{:016x}", hash); size_t env_index{}; u32 total_storage_buffers{}; std::array<Shader::IR::Program, Maxwell::MaxShaderProgram> programs; @@ -474,7 +475,7 @@ std::unique_ptr<GraphicsPipeline> ShaderCache::CreateGraphicsPipeline( Shader::Maxwell::Flow::CFG cfg(env, pools.flow_block, cfg_offset, index == 0); if (Settings::values.dump_shaders) { - env.Dump(key.unique_hashes[index]); + env.Dump(hash, key.unique_hashes[index]); } if (!uses_vertex_a || index != 1) { @@ -522,14 +523,14 @@ std::unique_ptr<GraphicsPipeline> ShaderCache::CreateGraphicsPipeline( const auto runtime_info{ MakeRuntimeInfo(key, program, previous_program, glasm_use_storage_buffers, use_glasm)}; switch (device.GetShaderBackend()) { - case Settings::ShaderBackend::GLSL: + case Settings::ShaderBackend::Glsl: ConvertLegacyToGeneric(program, runtime_info); sources[stage_index] = EmitGLSL(profile, runtime_info, program, binding); break; - case Settings::ShaderBackend::GLASM: + case Settings::ShaderBackend::Glasm: sources[stage_index] = EmitGLASM(profile, runtime_info, program, binding); break; - case Settings::ShaderBackend::SPIRV: + case Settings::ShaderBackend::SpirV: ConvertLegacyToGeneric(program, runtime_info); sources_spirv[stage_index] = EmitSPIRV(profile, runtime_info, program, binding); break; @@ -566,12 +567,13 @@ std::unique_ptr<ComputePipeline> ShaderCache::CreateComputePipeline( std::unique_ptr<ComputePipeline> ShaderCache::CreateComputePipeline( ShaderContext::ShaderPools& pools, const ComputePipelineKey& key, Shader::Environment& env, bool force_context_flush) try { - LOG_INFO(Render_OpenGL, "0x{:016x}", key.Hash()); + auto hash = key.Hash(); + LOG_INFO(Render_OpenGL, "0x{:016x}", hash); Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()}; if (Settings::values.dump_shaders) { - env.Dump(key.Hash()); + env.Dump(hash, key.unique_hash); } auto program{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)}; @@ -582,13 +584,13 @@ std::unique_ptr<ComputePipeline> ShaderCache::CreateComputePipeline( std::string code{}; std::vector<u32> code_spirv; switch (device.GetShaderBackend()) { - case Settings::ShaderBackend::GLSL: + case Settings::ShaderBackend::Glsl: code = EmitGLSL(profile, program); break; - case Settings::ShaderBackend::GLASM: + case Settings::ShaderBackend::Glasm: code = EmitGLASM(profile, info, program); break; - case Settings::ShaderBackend::SPIRV: + case Settings::ShaderBackend::SpirV: code_spirv = EmitSPIRV(profile, program); break; } diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 3b446be07..9cafd2983 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -232,10 +232,9 @@ void ApplySwizzle(GLuint handle, PixelFormat format, std::array<SwizzleSource, 4 [[nodiscard]] bool CanBeAccelerated(const TextureCacheRuntime& runtime, const VideoCommon::ImageInfo& info) { if (IsPixelFormatASTC(info.format) && info.size.depth == 1 && !runtime.HasNativeASTC()) { - return Settings::values.accelerate_astc.GetValue() && + return Settings::values.accelerate_astc.GetValue() == Settings::AstcDecodeMode::Gpu && Settings::values.astc_recompression.GetValue() == - Settings::AstcRecompression::Uncompressed && - !Settings::values.async_astc.GetValue(); + Settings::AstcRecompression::Uncompressed; } // Disable other accelerated uploads for now as they don't implement swizzled uploads return false; @@ -267,7 +266,8 @@ void ApplySwizzle(GLuint handle, PixelFormat format, std::array<SwizzleSource, 4 [[nodiscard]] bool CanBeDecodedAsync(const TextureCacheRuntime& runtime, const VideoCommon::ImageInfo& info) { if (IsPixelFormatASTC(info.format) && !runtime.HasNativeASTC()) { - return Settings::values.async_astc.GetValue(); + return Settings::values.accelerate_astc.GetValue() == + Settings::AstcDecodeMode::CpuAsynchronous; } return false; } diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index 2a74c1d05..6b8d4e554 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -473,7 +473,7 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) { glBindTextureUnit(0, screen_info.display_texture); auto anti_aliasing = Settings::values.anti_aliasing.GetValue(); - if (anti_aliasing > Settings::AntiAliasing::LastAA) { + if (anti_aliasing >= Settings::AntiAliasing::MaxEnum) { LOG_ERROR(Render_OpenGL, "Invalid antialiasing option selected {}", anti_aliasing); anti_aliasing = Settings::AntiAliasing::None; Settings::values.anti_aliasing.SetValue(anti_aliasing); diff --git a/src/video_core/renderer_opengl/util_shaders.cpp b/src/video_core/renderer_opengl/util_shaders.cpp index 544982d18..c437013e6 100644 --- a/src/video_core/renderer_opengl/util_shaders.cpp +++ b/src/video_core/renderer_opengl/util_shaders.cpp @@ -68,6 +68,7 @@ void UtilShaders::ASTCDecode(Image& image, const StagingBufferMap& map, std::span<const VideoCommon::SwizzleParameters> swizzles) { static constexpr GLuint BINDING_INPUT_BUFFER = 0; static constexpr GLuint BINDING_OUTPUT_IMAGE = 0; + program_manager.LocalMemoryWarmup(); const Extent2D tile_size{ .width = VideoCore::Surface::DefaultBlockWidth(image.info.format), |