diff options
Diffstat (limited to '')
22 files changed, 188 insertions, 83 deletions
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index f0f450edb..8be7bd594 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -289,8 +289,11 @@ std::pair<typename P::Buffer*, u32> BufferCache<P>::ObtainBuffer(GPUVAddr gpu_ad MarkWrittenBuffer(buffer_id, *cpu_addr, size); break; case ObtainBufferOperation::DiscardWrite: { - IntervalType interval{*cpu_addr, size}; + VAddr cpu_addr_start = Common::AlignDown(*cpu_addr, 64); + VAddr cpu_addr_end = Common::AlignUp(*cpu_addr + size, 64); + IntervalType interval{cpu_addr_start, cpu_addr_end}; ClearDownload(interval); + common_ranges.subtract(interval); break; } default: @@ -1159,6 +1162,11 @@ void BufferCache<P>::UpdateDrawIndirect() { .size = static_cast<u32>(size), .buffer_id = FindBuffer(*cpu_addr, static_cast<u32>(size)), }; + VAddr cpu_addr_start = Common::AlignDown(*cpu_addr, 64); + VAddr cpu_addr_end = Common::AlignUp(*cpu_addr + size, 64); + IntervalType interval{cpu_addr_start, cpu_addr_end}; + ClearDownload(interval); + common_ranges.subtract(interval); }; if (current_draw_indirect->include_count) { update(current_draw_indirect->count_start_address, sizeof(u32), diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp index 9f1b340a9..58ce0d8c2 100644 --- a/src/video_core/dma_pusher.cpp +++ b/src/video_core/dma_pusher.cpp @@ -14,6 +14,7 @@ namespace Tegra { constexpr u32 MacroRegistersStart = 0xE00; +constexpr u32 ComputeInline = 0x6D; DmaPusher::DmaPusher(Core::System& system_, GPU& gpu_, MemoryManager& memory_manager_, Control::ChannelState& channel_state_) @@ -83,12 +84,35 @@ bool DmaPusher::Step() { dma_state.dma_get, command_list_header.size * sizeof(u32)); } } - Core::Memory::GpuGuestMemory<Tegra::CommandHeader, - Core::Memory::GuestMemoryFlags::UnsafeRead> - headers(memory_manager, dma_state.dma_get, command_list_header.size, &command_headers); - ProcessCommands(headers); + const auto safe_process = [&] { + Core::Memory::GpuGuestMemory<Tegra::CommandHeader, + Core::Memory::GuestMemoryFlags::SafeRead> + headers(memory_manager, dma_state.dma_get, command_list_header.size, + &command_headers); + ProcessCommands(headers); + }; + const auto unsafe_process = [&] { + Core::Memory::GpuGuestMemory<Tegra::CommandHeader, + Core::Memory::GuestMemoryFlags::UnsafeRead> + headers(memory_manager, dma_state.dma_get, command_list_header.size, + &command_headers); + ProcessCommands(headers); + }; + if (Settings::IsGPULevelHigh()) { + if (dma_state.method >= MacroRegistersStart) { + unsafe_process(); + return true; + } + if (subchannel_type[dma_state.subchannel] == Engines::EngineTypes::KeplerCompute && + dma_state.method == ComputeInline) { + unsafe_process(); + return true; + } + safe_process(); + return true; + } + unsafe_process(); } - return true; } diff --git a/src/video_core/dma_pusher.h b/src/video_core/dma_pusher.h index 8a2784cdc..c9fab2d90 100644 --- a/src/video_core/dma_pusher.h +++ b/src/video_core/dma_pusher.h @@ -130,8 +130,10 @@ public: void DispatchCalls(); - void BindSubchannel(Engines::EngineInterface* engine, u32 subchannel_id) { + void BindSubchannel(Engines::EngineInterface* engine, u32 subchannel_id, + Engines::EngineTypes engine_type) { subchannels[subchannel_id] = engine; + subchannel_type[subchannel_id] = engine_type; } void BindRasterizer(VideoCore::RasterizerInterface* rasterizer); @@ -170,6 +172,7 @@ private: const bool ib_enable{true}; ///< IB mode enabled std::array<Engines::EngineInterface*, max_subchannels> subchannels{}; + std::array<Engines::EngineTypes, max_subchannels> subchannel_type; GPU& gpu; Core::System& system; diff --git a/src/video_core/engines/engine_interface.h b/src/video_core/engines/engine_interface.h index 392322358..54631ee6c 100644 --- a/src/video_core/engines/engine_interface.h +++ b/src/video_core/engines/engine_interface.h @@ -11,6 +11,14 @@ namespace Tegra::Engines { +enum class EngineTypes : u32 { + KeplerCompute, + Maxwell3D, + Fermi2D, + MaxwellDMA, + KeplerMemory, +}; + class EngineInterface { public: virtual ~EngineInterface() = default; diff --git a/src/video_core/engines/engine_upload.h b/src/video_core/engines/engine_upload.h index 7242d2529..21bf8aeb4 100644 --- a/src/video_core/engines/engine_upload.h +++ b/src/video_core/engines/engine_upload.h @@ -69,6 +69,14 @@ public: /// Binds a rasterizer to this engine. void BindRasterizer(VideoCore::RasterizerInterface* rasterizer); + GPUVAddr ExecTargetAddress() const { + return regs.dest.Address(); + } + + u32 GetUploadSize() const { + return copy_size; + } + private: void ProcessData(std::span<const u8> read_buffer); diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp index a38d9528a..cd61ab222 100644 --- a/src/video_core/engines/kepler_compute.cpp +++ b/src/video_core/engines/kepler_compute.cpp @@ -43,16 +43,33 @@ void KeplerCompute::CallMethod(u32 method, u32 method_argument, bool is_last_cal switch (method) { case KEPLER_COMPUTE_REG_INDEX(exec_upload): { + UploadInfo info{.upload_address = upload_address, + .exec_address = upload_state.ExecTargetAddress(), + .copy_size = upload_state.GetUploadSize()}; + uploads.push_back(info); upload_state.ProcessExec(regs.exec_upload.linear != 0); break; } case KEPLER_COMPUTE_REG_INDEX(data_upload): { + upload_address = current_dma_segment; upload_state.ProcessData(method_argument, is_last_call); break; } - case KEPLER_COMPUTE_REG_INDEX(launch): + case KEPLER_COMPUTE_REG_INDEX(launch): { + const GPUVAddr launch_desc_loc = regs.launch_desc_loc.Address(); + + for (auto& data : uploads) { + const GPUVAddr offset = data.exec_address - launch_desc_loc; + if (offset / sizeof(u32) == LAUNCH_REG_INDEX(grid_dim_x) && + memory_manager.IsMemoryDirty(data.upload_address, data.copy_size)) { + indirect_compute = {data.upload_address}; + } + } + uploads.clear(); ProcessLaunch(); + indirect_compute = std::nullopt; break; + } default: break; } @@ -62,6 +79,7 @@ void KeplerCompute::CallMultiMethod(u32 method, const u32* base_start, u32 amoun u32 methods_pending) { switch (method) { case KEPLER_COMPUTE_REG_INDEX(data_upload): + upload_address = current_dma_segment; upload_state.ProcessData(base_start, amount); return; default: diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h index 2092e685f..735e05fb4 100644 --- a/src/video_core/engines/kepler_compute.h +++ b/src/video_core/engines/kepler_compute.h @@ -5,6 +5,7 @@ #include <array> #include <cstddef> +#include <optional> #include <vector> #include "common/bit_field.h" #include "common/common_funcs.h" @@ -36,6 +37,9 @@ namespace Tegra::Engines { #define KEPLER_COMPUTE_REG_INDEX(field_name) \ (offsetof(Tegra::Engines::KeplerCompute::Regs, field_name) / sizeof(u32)) +#define LAUNCH_REG_INDEX(field_name) \ + (offsetof(Tegra::Engines::KeplerCompute::LaunchParams, field_name) / sizeof(u32)) + class KeplerCompute final : public EngineInterface { public: explicit KeplerCompute(Core::System& system, MemoryManager& memory_manager); @@ -201,6 +205,10 @@ public: void CallMultiMethod(u32 method, const u32* base_start, u32 amount, u32 methods_pending) override; + std::optional<GPUVAddr> GetIndirectComputeAddress() const { + return indirect_compute; + } + private: void ProcessLaunch(); @@ -216,6 +224,15 @@ private: MemoryManager& memory_manager; VideoCore::RasterizerInterface* rasterizer = nullptr; Upload::State upload_state; + GPUVAddr upload_address; + + struct UploadInfo { + GPUVAddr upload_address; + GPUVAddr exec_address; + u32 copy_size; + }; + std::vector<UploadInfo> uploads; + std::optional<GPUVAddr> indirect_compute{}; }; #define ASSERT_REG_POSITION(field_name, position) \ diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index c3696096d..06e349e43 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -257,6 +257,7 @@ u32 Maxwell3D::GetMaxCurrentVertices() { const u32 address_size = static_cast<u32>(gpu_addr_end - gpu_addr_begin); num_vertices = std::max( num_vertices, address_size / std::max(attribute.SizeInBytes(), array.stride.Value())); + break; } return num_vertices; } @@ -269,10 +270,13 @@ size_t Maxwell3D::EstimateIndexBufferSize() { std::numeric_limits<u32>::max()}; const size_t byte_size = regs.index_buffer.FormatSizeInBytes(); const size_t log2_byte_size = Common::Log2Ceil64(byte_size); + const size_t cap{GetMaxCurrentVertices() * 3 * byte_size}; + const size_t lower_cap = + std::min<size_t>(static_cast<size_t>(end_address - start_address), cap); return std::min<size_t>( memory_manager.GetMemoryLayoutSize(start_address, byte_size * max_sizes[log2_byte_size]) / byte_size, - static_cast<size_t>(end_address - start_address)); + lower_cap); } u32 Maxwell3D::ProcessShadowRam(u32 method, u32 argument) { diff --git a/src/video_core/engines/puller.cpp b/src/video_core/engines/puller.cpp index 7718a09b3..6de2543b7 100644 --- a/src/video_core/engines/puller.cpp +++ b/src/video_core/engines/puller.cpp @@ -34,19 +34,24 @@ void Puller::ProcessBindMethod(const MethodCall& method_call) { bound_engines[method_call.subchannel] = engine_id; switch (engine_id) { case EngineID::FERMI_TWOD_A: - dma_pusher.BindSubchannel(channel_state.fermi_2d.get(), method_call.subchannel); + dma_pusher.BindSubchannel(channel_state.fermi_2d.get(), method_call.subchannel, + EngineTypes::Fermi2D); break; case EngineID::MAXWELL_B: - dma_pusher.BindSubchannel(channel_state.maxwell_3d.get(), method_call.subchannel); + dma_pusher.BindSubchannel(channel_state.maxwell_3d.get(), method_call.subchannel, + EngineTypes::Maxwell3D); break; case EngineID::KEPLER_COMPUTE_B: - dma_pusher.BindSubchannel(channel_state.kepler_compute.get(), method_call.subchannel); + dma_pusher.BindSubchannel(channel_state.kepler_compute.get(), method_call.subchannel, + EngineTypes::KeplerCompute); break; case EngineID::MAXWELL_DMA_COPY_A: - dma_pusher.BindSubchannel(channel_state.maxwell_dma.get(), method_call.subchannel); + dma_pusher.BindSubchannel(channel_state.maxwell_dma.get(), method_call.subchannel, + EngineTypes::MaxwellDMA); break; case EngineID::KEPLER_INLINE_TO_MEMORY_B: - dma_pusher.BindSubchannel(channel_state.kepler_memory.get(), method_call.subchannel); + dma_pusher.BindSubchannel(channel_state.kepler_memory.get(), method_call.subchannel, + EngineTypes::KeplerMemory); break; default: UNIMPLEMENTED_MSG("Unimplemented engine {:04X}", engine_id); diff --git a/src/video_core/host1x/codecs/codec.cpp b/src/video_core/host1x/codecs/codec.cpp index 220cce28a..8d7da50fc 100644 --- a/src/video_core/host1x/codecs/codec.cpp +++ b/src/video_core/host1x/codecs/codec.cpp @@ -319,6 +319,7 @@ void Codec::Decode() { LOG_WARNING(Service_NVDRV, "Zero width or height in frame"); return; } + bool is_interlaced = initial_frame->interlaced_frame != 0; if (av_codec_ctx->hw_device_ctx) { final_frame = AVFramePtr{av_frame_alloc(), AVFrameDeleter}; ASSERT_MSG(final_frame, "av_frame_alloc final_frame failed"); @@ -334,7 +335,7 @@ void Codec::Decode() { UNIMPLEMENTED_MSG("Unexpected video format: {}", final_frame->format); return; } - if (!final_frame->interlaced_frame) { + if (!is_interlaced) { av_frames.push(std::move(final_frame)); } else { if (!filters_initialized) { diff --git a/src/video_core/macro/macro.cpp b/src/video_core/macro/macro.cpp index 905505ca1..5d0bb9cc4 100644 --- a/src/video_core/macro/macro.cpp +++ b/src/video_core/macro/macro.cpp @@ -27,14 +27,24 @@ MICROPROFILE_DEFINE(MacroHLE, "GPU", "Execute macro HLE", MP_RGB(128, 192, 192)) namespace Tegra { -static void Dump(u64 hash, std::span<const u32> code) { +static void Dump(u64 hash, std::span<const u32> code, bool decompiled = false) { const auto base_dir{Common::FS::GetYuzuPath(Common::FS::YuzuPath::DumpDir)}; const auto macro_dir{base_dir / "macros"}; if (!Common::FS::CreateDir(base_dir) || !Common::FS::CreateDir(macro_dir)) { LOG_ERROR(Common_Filesystem, "Failed to create macro dump directories"); return; } - const auto name{macro_dir / fmt::format("{:016x}.macro", hash)}; + auto name{macro_dir / fmt::format("{:016x}.macro", hash)}; + + if (decompiled) { + auto new_name{macro_dir / fmt::format("decompiled_{:016x}.macro", hash)}; + if (Common::FS::Exists(name)) { + (void)Common::FS::RenameFile(name, new_name); + return; + } + name = new_name; + } + std::fstream macro_file(name, std::ios::out | std::ios::binary); if (!macro_file) { LOG_ERROR(Common_Filesystem, "Unable to open or create file at {}", @@ -90,9 +100,6 @@ void MacroEngine::Execute(u32 method, const std::vector<u32>& parameters) { if (!mid_method.has_value()) { cache_info.lle_program = Compile(macro_code->second); cache_info.hash = Common::HashValue(macro_code->second); - if (Settings::values.dump_macros) { - Dump(cache_info.hash, macro_code->second); - } } else { const auto& macro_cached = uploaded_macro_code[mid_method.value()]; const auto rebased_method = method - mid_method.value(); @@ -102,9 +109,6 @@ void MacroEngine::Execute(u32 method, const std::vector<u32>& parameters) { code.size() * sizeof(u32)); cache_info.hash = Common::HashValue(code); cache_info.lle_program = Compile(code); - if (Settings::values.dump_macros) { - Dump(cache_info.hash, code); - } } auto hle_program = hle_macros->GetHLEProgram(cache_info.hash); @@ -117,6 +121,10 @@ void MacroEngine::Execute(u32 method, const std::vector<u32>& parameters) { MICROPROFILE_SCOPE(MacroHLE); cache_info.hle_program->Execute(parameters, method); } + + if (Settings::values.dump_macros) { + Dump(cache_info.hash, macro_code->second, cache_info.has_hle_program); + } } } diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 1ba31be88..dd03efecd 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -380,6 +380,17 @@ void RasterizerOpenGL::DispatchCompute() { pipeline->SetEngine(kepler_compute, gpu_memory); pipeline->Configure(); const auto& qmd{kepler_compute->launch_description}; + auto indirect_address = kepler_compute->GetIndirectComputeAddress(); + if (indirect_address) { + // DispatchIndirect + static constexpr auto sync_info = VideoCommon::ObtainBufferSynchronize::FullSynchronize; + const auto post_op = VideoCommon::ObtainBufferOperation::DiscardWrite; + const auto [buffer, offset] = + buffer_cache.ObtainBuffer(*indirect_address, 12, sync_info, post_op); + glBindBuffer(GL_DISPATCH_INDIRECT_BUFFER, buffer->Handle()); + glDispatchComputeIndirect(static_cast<GLintptr>(offset)); + return; + } glDispatchCompute(qmd.grid_dim_x, qmd.grid_dim_y, qmd.grid_dim_z); ++num_queued_commands; has_written_global_memory |= pipeline->WritesGlobalMemory(); diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index 454bb66a4..c4c30d807 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -66,21 +66,6 @@ std::string BuildCommaSeparatedExtensions( return fmt::format("{}", fmt::join(available_extensions, ",")); } -DebugCallback MakeDebugCallback(const vk::Instance& instance, const vk::InstanceDispatch& dld) { - if (!Settings::values.renderer_debug) { - return DebugCallback{}; - } - const std::optional properties = vk::EnumerateInstanceExtensionProperties(dld); - const auto it = std::ranges::find_if(*properties, [](const auto& prop) { - return std::strcmp(VK_EXT_DEBUG_UTILS_EXTENSION_NAME, prop.extensionName) == 0; - }); - if (it != properties->end()) { - return CreateDebugUtilsCallback(instance); - } else { - return CreateDebugReportCallback(instance); - } -} - } // Anonymous namespace Device CreateDevice(const vk::Instance& instance, const vk::InstanceDispatch& dld, @@ -103,7 +88,8 @@ RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_, cpu_memory(cpu_memory_), gpu(gpu_), library(OpenLibrary(context.get())), instance(CreateInstance(*library, dld, VK_API_VERSION_1_1, render_window.GetWindowInfo().type, Settings::values.renderer_debug.GetValue())), - debug_callback(MakeDebugCallback(instance, dld)), + debug_messenger(Settings::values.renderer_debug ? CreateDebugUtilsCallback(instance) + : vk::DebugUtilsMessenger{}), surface(CreateSurface(instance, render_window.GetWindowInfo())), device(CreateDevice(instance, dld, *surface)), memory_allocator(device), state_tracker(), scheduler(device, state_tracker), diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h index 89e98425e..590bc1c64 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.h +++ b/src/video_core/renderer_vulkan/renderer_vulkan.h @@ -35,8 +35,6 @@ class GPU; namespace Vulkan { -using DebugCallback = std::variant<vk::DebugUtilsMessenger, vk::DebugReportCallback>; - Device CreateDevice(const vk::Instance& instance, const vk::InstanceDispatch& dld, VkSurfaceKHR surface); @@ -75,7 +73,7 @@ private: vk::InstanceDispatch dld; vk::Instance instance; - DebugCallback debug_callback; + vk::DebugUtilsMessenger debug_messenger; vk::SurfaceKHR surface; ScreenInfo screen_info; diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp index 60a6ac651..e15865d16 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp @@ -529,17 +529,20 @@ void BufferCacheRuntime::BindVertexBuffers(VideoCommon::HostBindings<Buffer>& bi buffer_handles.push_back(handle); } if (device.IsExtExtendedDynamicStateSupported()) { - scheduler.Record([bindings_ = std::move(bindings), + scheduler.Record([this, bindings_ = std::move(bindings), buffer_handles_ = std::move(buffer_handles)](vk::CommandBuffer cmdbuf) { cmdbuf.BindVertexBuffers2EXT(bindings_.min_index, - bindings_.max_index - bindings_.min_index, + std::min(bindings_.max_index - bindings_.min_index, + device.GetMaxVertexInputBindings()), buffer_handles_.data(), bindings_.offsets.data(), bindings_.sizes.data(), bindings_.strides.data()); }); } else { - scheduler.Record([bindings_ = std::move(bindings), + scheduler.Record([this, bindings_ = std::move(bindings), buffer_handles_ = std::move(buffer_handles)](vk::CommandBuffer cmdbuf) { - cmdbuf.BindVertexBuffers(bindings_.min_index, bindings_.max_index - bindings_.min_index, + cmdbuf.BindVertexBuffers(bindings_.min_index, + std::min(bindings_.max_index - bindings_.min_index, + device.GetMaxVertexInputBindings()), buffer_handles_.data(), bindings_.offsets.data()); }); } diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index c1314ca99..4f83a88e1 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -611,9 +611,6 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline( const u32 cfg_offset{static_cast<u32>(env.StartAddress() + sizeof(Shader::ProgramHeader))}; Shader::Maxwell::Flow::CFG cfg(env, pools.flow_block, cfg_offset, index == 0); - if (Settings::values.dump_shaders) { - env.Dump(hash, key.unique_hashes[index]); - } if (!uses_vertex_a || index != 1) { // Normal path programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg, host_info); @@ -624,6 +621,10 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline( programs[index] = MergeDualVertexPrograms(program_va, program_vb, env); } + if (Settings::values.dump_shaders) { + env.Dump(hash, key.unique_hashes[index]); + } + if (programs[index].info.requires_layer_emulation) { layer_source_program = &programs[index]; } @@ -664,6 +665,19 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline( std::move(modules), infos); } catch (const Shader::Exception& exception) { + auto hash = key.Hash(); + size_t env_index{0}; + for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { + if (key.unique_hashes[index] == 0) { + continue; + } + Shader::Environment& env{*envs[env_index]}; + ++env_index; + + const u32 cfg_offset{static_cast<u32>(env.StartAddress() + sizeof(Shader::ProgramHeader))}; + Shader::Maxwell::Flow::CFG cfg(env, pools.flow_block, cfg_offset, index == 0); + env.Dump(hash, key.unique_hashes[index]); + } LOG_ERROR(Render_Vulkan, "{}", exception.what()); return nullptr; } diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 032f694bc..01e76a82c 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -463,6 +463,20 @@ void RasterizerVulkan::DispatchCompute() { pipeline->Configure(*kepler_compute, *gpu_memory, scheduler, buffer_cache, texture_cache); const auto& qmd{kepler_compute->launch_description}; + auto indirect_address = kepler_compute->GetIndirectComputeAddress(); + if (indirect_address) { + // DispatchIndirect + static constexpr auto sync_info = VideoCommon::ObtainBufferSynchronize::FullSynchronize; + const auto post_op = VideoCommon::ObtainBufferOperation::DiscardWrite; + const auto [buffer, offset] = + buffer_cache.ObtainBuffer(*indirect_address, 12, sync_info, post_op); + scheduler.RequestOutsideRenderPassOperationContext(); + scheduler.Record([indirect_buffer = buffer->Handle(), + indirect_offset = offset](vk::CommandBuffer cmdbuf) { + cmdbuf.DispatchIndirect(indirect_buffer, indirect_offset); + }); + return; + } const std::array<u32, 3> dim{qmd.grid_dim_x, qmd.grid_dim_y, qmd.grid_dim_z}; scheduler.RequestOutsideRenderPassOperationContext(); scheduler.Record([dim](vk::CommandBuffer cmdbuf) { cmdbuf.Dispatch(dim[0], dim[1], dim[2]); }); diff --git a/src/video_core/vulkan_common/vulkan_debug_callback.cpp b/src/video_core/vulkan_common/vulkan_debug_callback.cpp index 67e8065a4..448df2d3a 100644 --- a/src/video_core/vulkan_common/vulkan_debug_callback.cpp +++ b/src/video_core/vulkan_common/vulkan_debug_callback.cpp @@ -63,22 +63,6 @@ VkBool32 DebugUtilCallback(VkDebugUtilsMessageSeverityFlagBitsEXT severity, return VK_FALSE; } -VkBool32 DebugReportCallback(VkDebugReportFlagsEXT flags, VkDebugReportObjectTypeEXT objectType, - uint64_t object, size_t location, int32_t messageCode, - const char* pLayerPrefix, const char* pMessage, void* pUserData) { - const VkDebugReportFlagBitsEXT severity = static_cast<VkDebugReportFlagBitsEXT>(flags); - const std::string_view message{pMessage}; - if (severity & VK_DEBUG_REPORT_ERROR_BIT_EXT) { - LOG_CRITICAL(Render_Vulkan, "{}", message); - } else if (severity & VK_DEBUG_REPORT_WARNING_BIT_EXT) { - LOG_WARNING(Render_Vulkan, "{}", message); - } else if (severity & VK_DEBUG_REPORT_INFORMATION_BIT_EXT) { - LOG_INFO(Render_Vulkan, "{}", message); - } else if (severity & VK_DEBUG_REPORT_DEBUG_BIT_EXT) { - LOG_DEBUG(Render_Vulkan, "{}", message); - } - return VK_FALSE; -} } // Anonymous namespace vk::DebugUtilsMessenger CreateDebugUtilsCallback(const vk::Instance& instance) { @@ -98,15 +82,4 @@ vk::DebugUtilsMessenger CreateDebugUtilsCallback(const vk::Instance& instance) { }); } -vk::DebugReportCallback CreateDebugReportCallback(const vk::Instance& instance) { - return instance.CreateDebugReportCallback({ - .sType = VK_STRUCTURE_TYPE_DEBUG_REPORT_CALLBACK_CREATE_INFO_EXT, - .pNext = nullptr, - .flags = VK_DEBUG_REPORT_DEBUG_BIT_EXT | VK_DEBUG_REPORT_INFORMATION_BIT_EXT | - VK_DEBUG_REPORT_ERROR_BIT_EXT | VK_DEBUG_REPORT_WARNING_BIT_EXT, - .pfnCallback = DebugReportCallback, - .pUserData = nullptr, - }); -} - } // namespace Vulkan diff --git a/src/video_core/vulkan_common/vulkan_debug_callback.h b/src/video_core/vulkan_common/vulkan_debug_callback.h index a8af7b406..5e940782f 100644 --- a/src/video_core/vulkan_common/vulkan_debug_callback.h +++ b/src/video_core/vulkan_common/vulkan_debug_callback.h @@ -9,6 +9,4 @@ namespace Vulkan { vk::DebugUtilsMessenger CreateDebugUtilsCallback(const vk::Instance& instance); -vk::DebugReportCallback CreateDebugReportCallback(const vk::Instance& instance); - } // namespace Vulkan diff --git a/src/video_core/vulkan_common/vulkan_instance.cpp b/src/video_core/vulkan_common/vulkan_instance.cpp index bc16145be..180657a75 100644 --- a/src/video_core/vulkan_common/vulkan_instance.cpp +++ b/src/video_core/vulkan_common/vulkan_instance.cpp @@ -76,11 +76,9 @@ namespace { extensions.push_back(VK_KHR_PORTABILITY_ENUMERATION_EXTENSION_NAME); } #endif - if (enable_validation) { - const bool debug_utils = - AreExtensionsSupported(dld, std::array{VK_EXT_DEBUG_UTILS_EXTENSION_NAME}); - extensions.push_back(debug_utils ? VK_EXT_DEBUG_UTILS_EXTENSION_NAME - : VK_EXT_DEBUG_REPORT_EXTENSION_NAME); + if (enable_validation && + AreExtensionsSupported(dld, std::array{VK_EXT_DEBUG_UTILS_EXTENSION_NAME})) { + extensions.push_back(VK_EXT_DEBUG_UTILS_EXTENSION_NAME); } return extensions; } diff --git a/src/video_core/vulkan_common/vulkan_wrapper.cpp b/src/video_core/vulkan_common/vulkan_wrapper.cpp index 78e5a248f..c3f388d89 100644 --- a/src/video_core/vulkan_common/vulkan_wrapper.cpp +++ b/src/video_core/vulkan_common/vulkan_wrapper.cpp @@ -92,6 +92,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept { X(vkCmdCopyImage); X(vkCmdCopyImageToBuffer); X(vkCmdDispatch); + X(vkCmdDispatchIndirect); X(vkCmdDraw); X(vkCmdDrawIndexed); X(vkCmdDrawIndirect); diff --git a/src/video_core/vulkan_common/vulkan_wrapper.h b/src/video_core/vulkan_common/vulkan_wrapper.h index c226a2a29..049fa8038 100644 --- a/src/video_core/vulkan_common/vulkan_wrapper.h +++ b/src/video_core/vulkan_common/vulkan_wrapper.h @@ -203,6 +203,7 @@ struct DeviceDispatch : InstanceDispatch { PFN_vkCmdCopyImage vkCmdCopyImage{}; PFN_vkCmdCopyImageToBuffer vkCmdCopyImageToBuffer{}; PFN_vkCmdDispatch vkCmdDispatch{}; + PFN_vkCmdDispatchIndirect vkCmdDispatchIndirect{}; PFN_vkCmdDraw vkCmdDraw{}; PFN_vkCmdDrawIndexed vkCmdDrawIndexed{}; PFN_vkCmdDrawIndirect vkCmdDrawIndirect{}; @@ -1209,6 +1210,10 @@ public: dld->vkCmdDispatch(handle, x, y, z); } + void DispatchIndirect(VkBuffer indirect_buffer, VkDeviceSize offset) const noexcept { + dld->vkCmdDispatchIndirect(handle, indirect_buffer, offset); + } + void PipelineBarrier(VkPipelineStageFlags src_stage_mask, VkPipelineStageFlags dst_stage_mask, VkDependencyFlags dependency_flags, Span<VkMemoryBarrier> memory_barriers, Span<VkBufferMemoryBarrier> buffer_barriers, |