From a9ca39f8591532ba6d37f7a3e068d5eefe416464 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Mon, 7 Feb 2022 07:52:04 +0100 Subject: NVDRV: Further improvements. --- src/core/hle/service/nvdrv/core/container.cpp | 8 +- src/core/hle/service/nvdrv/core/container.h | 8 +- src/core/hle/service/nvdrv/core/nvmap.cpp | 7 +- src/core/hle/service/nvdrv/core/nvmap.h | 7 +- .../hle/service/nvdrv/core/syncpoint_manager.cpp | 112 ++++++++++++++++--- .../hle/service/nvdrv/core/syncpoint_manager.h | 120 ++++++++++++++------- src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp | 18 ++-- src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp | 59 +++++----- src/core/hle/service/nvdrv/devices/nvhost_gpu.h | 19 ++-- .../hle/service/nvdrv/devices/nvhost_nvdec.cpp | 2 +- .../service/nvdrv/devices/nvhost_nvdec_common.cpp | 15 ++- .../service/nvdrv/devices/nvhost_nvdec_common.h | 6 +- src/core/hle/service/nvdrv/devices/nvhost_vic.cpp | 2 +- src/video_core/engines/maxwell_3d.cpp | 18 +--- src/video_core/engines/maxwell_dma.cpp | 18 +++- src/video_core/engines/puller.cpp | 18 +--- 16 files changed, 278 insertions(+), 159 deletions(-) diff --git a/src/core/hle/service/nvdrv/core/container.cpp b/src/core/hle/service/nvdrv/core/container.cpp index fbd66f001..4175d3d9c 100644 --- a/src/core/hle/service/nvdrv/core/container.cpp +++ b/src/core/hle/service/nvdrv/core/container.cpp @@ -1,7 +1,7 @@ -// Copyright 2021 yuzu emulator team -// Copyright 2021 Skyline Team and Contributors (https://github.com/skyline-emu/) -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. +// SPDX-FileCopyrightText: 2022 yuzu emulator team and Skyline Team and Contributors +// (https://github.com/skyline-emu/) +// SPDX-License-Identifier: GPL-3.0-or-later Licensed under GPLv3 +// or any later version Refer to the license.txt file included. #include "core/hle/service/nvdrv/core/container.h" #include "core/hle/service/nvdrv/core/nvmap.h" diff --git a/src/core/hle/service/nvdrv/core/container.h b/src/core/hle/service/nvdrv/core/container.h index da75d74ff..e069ade4e 100644 --- a/src/core/hle/service/nvdrv/core/container.h +++ b/src/core/hle/service/nvdrv/core/container.h @@ -1,7 +1,7 @@ -// Copyright 2021 yuzu emulator team -// Copyright 2021 Skyline Team and Contributors (https://github.com/skyline-emu/) -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. +// SPDX-FileCopyrightText: 2022 yuzu emulator team and Skyline Team and Contributors +// (https://github.com/skyline-emu/) +// SPDX-License-Identifier: GPL-3.0-or-later Licensed under GPLv3 +// or any later version Refer to the license.txt file included. #pragma once diff --git a/src/core/hle/service/nvdrv/core/nvmap.cpp b/src/core/hle/service/nvdrv/core/nvmap.cpp index 9acec7ba6..86d825af9 100644 --- a/src/core/hle/service/nvdrv/core/nvmap.cpp +++ b/src/core/hle/service/nvdrv/core/nvmap.cpp @@ -1,6 +1,7 @@ -// Copyright 2021 Skyline Team and Contributors (https://github.com/skyline-emu/) -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. +// SPDX-FileCopyrightText: 2022 yuzu emulator team and Skyline Team and Contributors +// (https://github.com/skyline-emu/) +// SPDX-License-Identifier: GPL-3.0-or-later Licensed under GPLv3 +// or any later version Refer to the license.txt file included. #include "common/alignment.h" #include "common/assert.h" diff --git a/src/core/hle/service/nvdrv/core/nvmap.h b/src/core/hle/service/nvdrv/core/nvmap.h index 5acdc961e..4f37dcf43 100644 --- a/src/core/hle/service/nvdrv/core/nvmap.h +++ b/src/core/hle/service/nvdrv/core/nvmap.h @@ -1,6 +1,7 @@ -// Copyright 2021 Skyline Team and Contributors (https://github.com/skyline-emu/) -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. +// SPDX-FileCopyrightText: 2022 yuzu emulator team and Skyline Team and Contributors +// (https://github.com/skyline-emu/) +// SPDX-License-Identifier: GPL-3.0-or-later Licensed under GPLv3 +// or any later version Refer to the license.txt file included. #pragma once diff --git a/src/core/hle/service/nvdrv/core/syncpoint_manager.cpp b/src/core/hle/service/nvdrv/core/syncpoint_manager.cpp index 61e00448c..b34481b48 100644 --- a/src/core/hle/service/nvdrv/core/syncpoint_manager.cpp +++ b/src/core/hle/service/nvdrv/core/syncpoint_manager.cpp @@ -1,5 +1,7 @@ -// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project -// SPDX-License-Identifier: GPL-2.0-or-later +// SPDX-FileCopyrightText: 2022 yuzu emulator team and Skyline Team and Contributors +// (https://github.com/skyline-emu/) +// SPDX-License-Identifier: GPL-3.0-or-later Licensed under GPLv3 +// or any later version Refer to the license.txt file included. #include "common/assert.h" #include "core/hle/service/nvdrv/core/syncpoint_manager.h" @@ -7,32 +9,108 @@ namespace Service::Nvidia::NvCore { -SyncpointManager::SyncpointManager(Tegra::Host1x::Host1x& host1x_) : host1x{host1x_} {} +SyncpointManager::SyncpointManager(Tegra::Host1x::Host1x& host1x_) : host1x{host1x_} { + constexpr u32 VBlank0SyncpointId{26}; + constexpr u32 VBlank1SyncpointId{27}; + + // Reserve both vblank syncpoints as client managed as they use Continuous Mode + // Refer to section 14.3.5.3 of the TRM for more information on Continuous Mode + // https://github.com/Jetson-TX1-AndroidTV/android_kernel_jetson_tx1_hdmi_primary/blob/8f74a72394efb871cb3f886a3de2998cd7ff2990/drivers/gpu/host1x/drm/dc.c#L660 + ReserveSyncpoint(VBlank0SyncpointId, true); + ReserveSyncpoint(VBlank1SyncpointId, true); + + for (u32 syncpointId : channel_syncpoints) { + if (syncpointId) { + ReserveSyncpoint(syncpointId, false); + } + } +} SyncpointManager::~SyncpointManager() = default; -u32 SyncpointManager::RefreshSyncpoint(u32 syncpoint_id) { - syncpoints[syncpoint_id].min = host1x.GetSyncpointManager().GetHostSyncpointValue(syncpoint_id); - return GetSyncpointMin(syncpoint_id); +u32 SyncpointManager::ReserveSyncpoint(u32 id, bool clientManaged) { + if (syncpoints.at(id).reserved) { + UNREACHABLE_MSG("Requested syncpoint is in use"); + return 0; + } + + syncpoints.at(id).reserved = true; + syncpoints.at(id).interfaceManaged = clientManaged; + + return id; } -u32 SyncpointManager::AllocateSyncpoint() { - for (u32 syncpoint_id = 1; syncpoint_id < MaxSyncPoints; syncpoint_id++) { - if (!syncpoints[syncpoint_id].is_allocated) { - syncpoints[syncpoint_id].is_allocated = true; - return syncpoint_id; +u32 SyncpointManager::FindFreeSyncpoint() { + for (u32 i{1}; i < syncpoints.size(); i++) { + if (!syncpoints[i].reserved) { + return i; } } - ASSERT_MSG(false, "No more available syncpoints!"); - return {}; + UNREACHABLE_MSG("Failed to find a free syncpoint!"); + return 0; +} + +u32 SyncpointManager::AllocateSyncpoint(bool clientManaged) { + std::lock_guard lock(reservation_lock); + return ReserveSyncpoint(FindFreeSyncpoint(), clientManaged); +} + +bool SyncpointManager::IsSyncpointAllocated(u32 id) { + return (id <= SyncpointCount) && syncpoints[id].reserved; +} + +bool SyncpointManager::HasSyncpointExpired(u32 id, u32 threshold) { + const SyncpointInfo& syncpoint{syncpoints.at(id)}; + + if (!syncpoint.reserved) { + UNREACHABLE(); + return 0; + } + + // If the interface manages counters then we don't keep track of the maximum value as it handles + // sanity checking the values then + if (syncpoint.interfaceManaged) { + return static_cast(syncpoint.counterMin - threshold) >= 0; + } else { + return (syncpoint.counterMax - threshold) >= (syncpoint.counterMin - threshold); + } +} + +u32 SyncpointManager::IncrementSyncpointMaxExt(u32 id, u32 amount) { + if (!syncpoints.at(id).reserved) { + UNREACHABLE(); + return 0; + } + + return syncpoints.at(id).counterMax += amount; +} + +u32 SyncpointManager::ReadSyncpointMinValue(u32 id) { + if (!syncpoints.at(id).reserved) { + UNREACHABLE(); + return 0; + } + + return syncpoints.at(id).counterMin; +} + +u32 SyncpointManager::UpdateMin(u32 id) { + if (!syncpoints.at(id).reserved) { + UNREACHABLE(); + return 0; + } + + syncpoints.at(id).counterMin = host1x.GetSyncpointManager().GetHostSyncpointValue(id); + return syncpoints.at(id).counterMin; } -u32 SyncpointManager::IncreaseSyncpoint(u32 syncpoint_id, u32 value) { - for (u32 index = 0; index < value; ++index) { - syncpoints[syncpoint_id].max.fetch_add(1, std::memory_order_relaxed); +NvFence SyncpointManager::GetSyncpointFence(u32 id) { + if (!syncpoints.at(id).reserved) { + UNREACHABLE(); + return NvFence{}; } - return GetSyncpointMax(syncpoint_id); + return {.id = static_cast(id), .value = syncpoints.at(id).counterMax}; } } // namespace Service::Nvidia::NvCore diff --git a/src/core/hle/service/nvdrv/core/syncpoint_manager.h b/src/core/hle/service/nvdrv/core/syncpoint_manager.h index f332edc6e..bfc8ba84b 100644 --- a/src/core/hle/service/nvdrv/core/syncpoint_manager.h +++ b/src/core/hle/service/nvdrv/core/syncpoint_manager.h @@ -1,10 +1,13 @@ -// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project -// SPDX-License-Identifier: GPL-2.0-or-later +// SPDX-FileCopyrightText: 2022 yuzu emulator team and Skyline Team and Contributors +// (https://github.com/skyline-emu/) +// SPDX-License-Identifier: GPL-3.0-or-later Licensed under GPLv3 +// or any later version Refer to the license.txt file included. #pragma once #include #include +#include #include "common/common_types.h" #include "core/hle/service/nvdrv/nvdata.h" @@ -19,68 +22,111 @@ class Host1x; namespace Service::Nvidia::NvCore { +enum class ChannelType : u32 { + MsEnc = 0, + VIC = 1, + GPU = 2, + NvDec = 3, + Display = 4, + NvJpg = 5, + TSec = 6, + Max = 7 +}; + +/** + * @brief SyncpointManager handles allocating and accessing host1x syncpoints, these are cached + * versions of the HW syncpoints which are intermittently synced + * @note Refer to Chapter 14 of the Tegra X1 TRM for an exhaustive overview of them + * @url https://http.download.nvidia.com/tegra-public-appnotes/host1x.html + * @url + * https://github.com/Jetson-TX1-AndroidTV/android_kernel_jetson_tx1_hdmi_primary/blob/jetson-tx1/drivers/video/tegra/host/nvhost_syncpt.c + */ class SyncpointManager final { public: explicit SyncpointManager(Tegra::Host1x::Host1x& host1x); ~SyncpointManager(); /** - * Returns true if the specified syncpoint is expired for the given value. - * @param syncpoint_id Syncpoint ID to check. - * @param value Value to check against the specified syncpoint. - * @returns True if the specified syncpoint is expired for the given value, otherwise False. + * @brief Checks if the given syncpoint is both allocated and below the number of HW syncpoints */ - bool IsSyncpointExpired(u32 syncpoint_id, u32 value) const { - return (GetSyncpointMax(syncpoint_id) - value) >= (GetSyncpointMin(syncpoint_id) - value); - } + bool IsSyncpointAllocated(u32 id); /** - * Gets the lower bound for the specified syncpoint. - * @param syncpoint_id Syncpoint ID to get the lower bound for. - * @returns The lower bound for the specified syncpoint. + * @brief Finds a free syncpoint and reserves it + * @return The ID of the reserved syncpoint */ - u32 GetSyncpointMin(u32 syncpoint_id) const { - return syncpoints.at(syncpoint_id).min.load(std::memory_order_relaxed); - } + u32 AllocateSyncpoint(bool clientManaged); /** - * Gets the uper bound for the specified syncpoint. - * @param syncpoint_id Syncpoint ID to get the upper bound for. - * @returns The upper bound for the specified syncpoint. + * @url + * https://github.com/Jetson-TX1-AndroidTV/android_kernel_jetson_tx1_hdmi_primary/blob/8f74a72394efb871cb3f886a3de2998cd7ff2990/drivers/gpu/host1x/syncpt.c#L259 */ - u32 GetSyncpointMax(u32 syncpoint_id) const { - return syncpoints.at(syncpoint_id).max.load(std::memory_order_relaxed); + bool HasSyncpointExpired(u32 id, u32 threshold); + + bool IsFenceSignalled(NvFence fence) { + return HasSyncpointExpired(fence.id, fence.value); } /** - * Refreshes the minimum value for the specified syncpoint. - * @param syncpoint_id Syncpoint ID to be refreshed. - * @returns The new syncpoint minimum value. + * @brief Atomically increments the maximum value of a syncpoint by the given amount + * @return The new max value of the syncpoint */ - u32 RefreshSyncpoint(u32 syncpoint_id); + u32 IncrementSyncpointMaxExt(u32 id, u32 amount); /** - * Allocates a new syncoint. - * @returns The syncpoint ID for the newly allocated syncpoint. + * @return The minimum value of the syncpoint */ - u32 AllocateSyncpoint(); + u32 ReadSyncpointMinValue(u32 id); /** - * Increases the maximum value for the specified syncpoint. - * @param syncpoint_id Syncpoint ID to be increased. - * @param value Value to increase the specified syncpoint by. - * @returns The new syncpoint maximum value. + * @brief Synchronises the minimum value of the syncpoint to with the GPU + * @return The new minimum value of the syncpoint */ - u32 IncreaseSyncpoint(u32 syncpoint_id, u32 value); + u32 UpdateMin(u32 id); + + /** + * @return A fence that will be signalled once this syncpoint hits its maximum value + */ + NvFence GetSyncpointFence(u32 id); + + static constexpr std::array(ChannelType::Max)> channel_syncpoints{ + 0x0, // `MsEnc` is unimplemented + 0xC, // `VIC` + 0x0, // `GPU` syncpoints are allocated per-channel instead + 0x36, // `NvDec` + 0x0, // `Display` is unimplemented + 0x37, // `NvJpg` + 0x0, // `TSec` is unimplemented + }; //!< Maps each channel ID to a constant syncpoint private: - struct Syncpoint { - std::atomic min; - std::atomic max; - std::atomic is_allocated; + /** + * @note reservation_lock should be locked when calling this + */ + u32 ReserveSyncpoint(u32 id, bool clientManaged); + + /** + * @return The ID of the first free syncpoint + */ + u32 FindFreeSyncpoint(); + + struct SyncpointInfo { + std::atomic counterMin; //!< The least value the syncpoint can be (The value it was + //!< when it was last synchronized with host1x) + std::atomic counterMax; //!< The maximum value the syncpoint can reach according to the + //!< current usage + bool interfaceManaged; //!< If the syncpoint is managed by a host1x client interface, a + //!< client interface is a HW block that can handle host1x + //!< transactions on behalf of a host1x client (Which would otherwise + //!< need to be manually synced using PIO which is synchronous and + //!< requires direct cooperation of the CPU) + bool reserved; //!< If the syncpoint is reserved or not, not to be confused with a reserved + //!< value }; - std::array syncpoints{}; + constexpr static std::size_t SyncpointCount{192}; + std::array syncpoints{}; + std::mutex reservation_lock; Tegra::Host1x::Host1x& host1x; }; diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp index 076edb02f..a84e4d425 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp @@ -112,17 +112,23 @@ NvResult nvhost_ctrl::IocCtrlEventWait(const std::vector& input, std::vector } if (params.fence.value == 0) { - params.value.raw = syncpoint_manager.GetSyncpointMin(fence_id); + if (!syncpoint_manager.IsSyncpointAllocated(params.fence.id)) { + LOG_WARNING(Service_NVDRV, + "Unallocated syncpt_id={}, threshold={}, timeout={}, is_allocation={}", + params.fence.id, params.fence.value, params.timeout, is_allocation); + } else { + params.value.raw = syncpoint_manager.ReadSyncpointMinValue(fence_id); + } return NvResult::Success; } - if (syncpoint_manager.IsSyncpointExpired(fence_id, params.fence.value)) { - params.value.raw = syncpoint_manager.GetSyncpointMin(fence_id); + if (syncpoint_manager.IsFenceSignalled(params.fence)) { + params.value.raw = syncpoint_manager.ReadSyncpointMinValue(fence_id); return NvResult::Success; } - if (const auto new_value = syncpoint_manager.RefreshSyncpoint(fence_id); - syncpoint_manager.IsSyncpointExpired(fence_id, params.fence.value)) { + if (const auto new_value = syncpoint_manager.UpdateMin(fence_id); + syncpoint_manager.IsFenceSignalled(params.fence)) { params.value.raw = new_value; return NvResult::Success; } @@ -296,7 +302,7 @@ NvResult nvhost_ctrl::IocCtrlClearEventWait(const std::vector& input, std::v EventState::Waiting) { auto& host1x_syncpoint_manager = system.Host1x().GetSyncpointManager(); host1x_syncpoint_manager.DeregisterHostAction(event.assigned_syncpt, event.wait_handle); - syncpoint_manager.RefreshSyncpoint(event.assigned_syncpt); + syncpoint_manager.UpdateMin(event.assigned_syncpt); event.wait_handle = {}; } event.fails++; diff --git a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp index 3f981af5a..c2cc09993 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp @@ -31,9 +31,7 @@ nvhost_gpu::nvhost_gpu(Core::System& system_, EventInterface& events_interface_, : nvdevice{system_}, events_interface{events_interface_}, core{core_}, syncpoint_manager{core_.GetSyncpointManager()}, nvmap{core.GetNvMapFile()}, channel_state{system.GPU().AllocateChannel()} { - channel_fence.id = syncpoint_manager.AllocateSyncpoint(); - channel_fence.value = - system_.Host1x().GetSyncpointManager().GetGuestSyncpointValue(channel_fence.id); + channel_syncpoint = syncpoint_manager.AllocateSyncpoint(false); sm_exception_breakpoint_int_report_event = events_interface.CreateEvent("GpuChannelSMExceptionBreakpointInt"); sm_exception_breakpoint_pause_report_event = @@ -191,10 +189,8 @@ NvResult nvhost_gpu::AllocGPFIFOEx2(const std::vector& input, std::vector BuildWaitCommandList(NvFence fence) { }; } -static std::vector BuildIncrementCommandList(NvFence fence, - u32 add_increment) { +static std::vector BuildIncrementCommandList(NvFence fence) { std::vector result{ Tegra::BuildCommandHeader(Tegra::BufferMethods::SyncpointPayload, 1, Tegra::SubmissionMode::Increasing), {}}; - for (u32 count = 0; count < add_increment; ++count) { + for (u32 count = 0; count < 2; ++count) { result.emplace_back(Tegra::BuildCommandHeader(Tegra::BufferMethods::SyncpointOperation, 1, Tegra::SubmissionMode::Increasing)); result.emplace_back( @@ -239,14 +234,12 @@ static std::vector BuildIncrementCommandList(NvFence fence return result; } -static std::vector BuildIncrementWithWfiCommandList(NvFence fence, - u32 add_increment) { +static std::vector BuildIncrementWithWfiCommandList(NvFence fence) { std::vector result{ Tegra::BuildCommandHeader(Tegra::BufferMethods::WaitForIdle, 1, Tegra::SubmissionMode::Increasing), {}}; - const std::vector increment{ - BuildIncrementCommandList(fence, add_increment)}; + const std::vector increment{BuildIncrementCommandList(fence)}; result.insert(result.end(), increment.begin(), increment.end()); @@ -260,35 +253,41 @@ NvResult nvhost_gpu::SubmitGPFIFOImpl(IoctlSubmitGpfifo& params, std::vector auto& gpu = system.GPU(); + std::scoped_lock lock(channel_mutex); + const auto bind_id = channel_state->bind_id; - params.fence_out.id = channel_fence.id; + auto& flags = params.flags; - if (params.flags.add_wait.Value() && - !syncpoint_manager.IsSyncpointExpired(params.fence_out.id, params.fence_out.value)) { - gpu.PushGPUEntries(bind_id, Tegra::CommandList{BuildWaitCommandList(params.fence_out)}); - } + if (flags.fence_wait.Value()) { + if (flags.increment_value.Value()) { + return NvResult::BadParameter; + } - if (params.flags.add_increment.Value() || params.flags.increment.Value()) { - const u32 increment_value = params.flags.increment.Value() ? params.fence_out.value : 0; - params.fence_out.value = syncpoint_manager.IncreaseSyncpoint( - params.fence_out.id, params.AddIncrementValue() + increment_value); - } else { - params.fence_out.value = syncpoint_manager.GetSyncpointMax(params.fence_out.id); + if (!syncpoint_manager.IsFenceSignalled(params.fence)) { + gpu.PushGPUEntries(bind_id, Tegra::CommandList{BuildWaitCommandList(params.fence)}); + } } gpu.PushGPUEntries(bind_id, std::move(entries)); + params.fence.id = channel_syncpoint; + + u32 increment{(flags.fence_increment.Value() != 0 ? 2 : 0) + + (flags.increment_value.Value() != 0 ? params.fence.value : 0)}; + params.fence.value = syncpoint_manager.IncrementSyncpointMaxExt(channel_syncpoint, increment); - if (params.flags.add_increment.Value()) { - if (params.flags.suppress_wfi) { - gpu.PushGPUEntries(bind_id, Tegra::CommandList{BuildIncrementCommandList( - params.fence_out, params.AddIncrementValue())}); + if (flags.fence_increment.Value()) { + if (flags.suppress_wfi.Value()) { + gpu.PushGPUEntries(bind_id, + Tegra::CommandList{BuildIncrementCommandList(params.fence)}); } else { - gpu.PushGPUEntries(bind_id, Tegra::CommandList{BuildIncrementWithWfiCommandList( - params.fence_out, params.AddIncrementValue())}); + gpu.PushGPUEntries(bind_id, + Tegra::CommandList{BuildIncrementWithWfiCommandList(params.fence)}); } } + flags.raw = 0; + std::memcpy(output.data(), ¶ms, sizeof(IoctlSubmitGpfifo)); return NvResult::Success; } diff --git a/src/core/hle/service/nvdrv/devices/nvhost_gpu.h b/src/core/hle/service/nvdrv/devices/nvhost_gpu.h index 3a65ed06d..1e4ecd55b 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.h +++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.h @@ -163,17 +163,13 @@ private: u32_le num_entries{}; // number of fence objects being submitted union { u32_le raw; - BitField<0, 1, u32_le> add_wait; // append a wait sync_point to the list - BitField<1, 1, u32_le> add_increment; // append an increment to the list - BitField<2, 1, u32_le> new_hw_format; // mostly ignored - BitField<4, 1, u32_le> suppress_wfi; // suppress wait for interrupt - BitField<8, 1, u32_le> increment; // increment the returned fence + BitField<0, 1, u32_le> fence_wait; // append a wait sync_point to the list + BitField<1, 1, u32_le> fence_increment; // append an increment to the list + BitField<2, 1, u32_le> new_hw_format; // mostly ignored + BitField<4, 1, u32_le> suppress_wfi; // suppress wait for interrupt + BitField<8, 1, u32_le> increment_value; // increment the returned fence } flags; - NvFence fence_out{}; // returned new fence object for others to wait on - - u32 AddIncrementValue() const { - return flags.add_increment.Value() << 1; - } + NvFence fence{}; // returned new fence object for others to wait on }; static_assert(sizeof(IoctlSubmitGpfifo) == 16 + sizeof(NvFence), "IoctlSubmitGpfifo is incorrect size"); @@ -213,7 +209,8 @@ private: NvCore::SyncpointManager& syncpoint_manager; NvCore::NvMap& nvmap; std::shared_ptr channel_state; - NvFence channel_fence; + u32 channel_syncpoint; + std::mutex channel_mutex; // Events Kernel::KEvent* sm_exception_breakpoint_int_report_event; diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp index 00947ea19..5e3820085 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp @@ -13,7 +13,7 @@ namespace Service::Nvidia::Devices { u32 nvhost_nvdec::next_id{}; nvhost_nvdec::nvhost_nvdec(Core::System& system_, NvCore::Container& core) - : nvhost_nvdec_common{system_, core} {} + : nvhost_nvdec_common{system_, core, NvCore::ChannelType::NvDec} {} nvhost_nvdec::~nvhost_nvdec() = default; NvResult nvhost_nvdec::Ioctl1(DeviceFD fd, Ioctl command, const std::vector& input, diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp index b17589aa3..008092dbb 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp @@ -48,9 +48,10 @@ std::size_t WriteVectors(std::vector& dst, const std::vector& src, std::s std::unordered_map nvhost_nvdec_common::fd_to_id{}; -nvhost_nvdec_common::nvhost_nvdec_common(Core::System& system_, NvCore::Container& core_) - : nvdevice{system_}, core{core_}, - syncpoint_manager{core.GetSyncpointManager()}, nvmap{core.GetNvMapFile()} {} +nvhost_nvdec_common::nvhost_nvdec_common(Core::System& system_, NvCore::Container& core_, + NvCore::ChannelType channel_type_) + : nvdevice{system_}, core{core_}, syncpoint_manager{core.GetSyncpointManager()}, + nvmap{core.GetNvMapFile()}, channel_type{channel_type_} {} nvhost_nvdec_common::~nvhost_nvdec_common() = default; NvResult nvhost_nvdec_common::SetNVMAPfd(const std::vector& input) { @@ -88,7 +89,7 @@ NvResult nvhost_nvdec_common::Submit(DeviceFD fd, const std::vector& input, for (std::size_t i = 0; i < syncpt_increments.size(); i++) { const SyncptIncr& syncpt_incr = syncpt_increments[i]; fence_thresholds[i] = - syncpoint_manager.IncreaseSyncpoint(syncpt_incr.id, syncpt_incr.increments); + syncpoint_manager.IncrementSyncpointMaxExt(syncpt_incr.id, syncpt_incr.increments); } } for (const auto& cmd_buffer : command_buffers) { @@ -116,10 +117,8 @@ NvResult nvhost_nvdec_common::GetSyncpoint(const std::vector& input, std::ve std::memcpy(¶ms, input.data(), sizeof(IoctlGetSyncpoint)); LOG_DEBUG(Service_NVDRV, "called GetSyncpoint, id={}", params.param); - if (device_syncpoints[params.param] == 0 && system.GPU().UseNvdec()) { - device_syncpoints[params.param] = syncpoint_manager.AllocateSyncpoint(); - } - params.value = device_syncpoints[params.param]; + const u32 id{NvCore::SyncpointManager::channel_syncpoints[static_cast(channel_type)]}; + params.value = id; std::memcpy(output.data(), ¶ms, sizeof(IoctlGetSyncpoint)); return NvResult::Success; diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h index 53029af6a..51bb7c2cb 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h +++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h @@ -6,6 +6,7 @@ #include #include "common/common_types.h" #include "common/swap.h" +#include "core/hle/service/nvdrv/core/syncpoint_manager.h" #include "core/hle/service/nvdrv/devices/nvdevice.h" namespace Service::Nvidia { @@ -13,14 +14,14 @@ namespace Service::Nvidia { namespace NvCore { class Container; class NvMap; -class SyncpointManager; } // namespace NvCore namespace Devices { class nvhost_nvdec_common : public nvdevice { public: - explicit nvhost_nvdec_common(Core::System& system_, NvCore::Container& core); + explicit nvhost_nvdec_common(Core::System& system_, NvCore::Container& core, + NvCore::ChannelType channel_type); ~nvhost_nvdec_common() override; protected: @@ -121,6 +122,7 @@ protected: NvCore::Container& core; NvCore::SyncpointManager& syncpoint_manager; NvCore::NvMap& nvmap; + NvCore::ChannelType channel_type; std::array device_syncpoints{}; }; }; // namespace Devices diff --git a/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp b/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp index c89ff6b27..490e399f4 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp @@ -12,7 +12,7 @@ namespace Service::Nvidia::Devices { u32 nvhost_vic::next_id{}; nvhost_vic::nvhost_vic(Core::System& system_, NvCore::Container& core) - : nvhost_nvdec_common{system_, core} {} + : nvhost_nvdec_common{system_, core, NvCore::ChannelType::VIC} {} nvhost_vic::~nvhost_vic() = default; diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 632052c53..3c6e44a25 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -453,18 +453,10 @@ void Maxwell3D::ProcessFirmwareCall4() { } void Maxwell3D::StampQueryResult(u64 payload, bool long_query) { - struct LongQueryResult { - u64_le value; - u64_le timestamp; - }; - static_assert(sizeof(LongQueryResult) == 16, "LongQueryResult has wrong size"); const GPUVAddr sequence_address{regs.query.QueryAddress()}; if (long_query) { - // Write the 128-bit result structure in long mode. Note: We emulate an infinitely fast - // GPU, this command may actually take a while to complete in real hardware due to GPU - // wait queues. - LongQueryResult query_result{payload, system.GPU().GetTicks()}; - memory_manager.WriteBlock(sequence_address, &query_result, sizeof(query_result)); + memory_manager.Write(sequence_address + sizeof(u64), system.GPU().GetTicks()); + memory_manager.Write(sequence_address, payload); } else { memory_manager.Write(sequence_address, static_cast(payload)); } @@ -493,10 +485,10 @@ void Maxwell3D::ProcessQueryGet() { const GPUVAddr sequence_address{regs.query.QueryAddress()}; const u32 payload = regs.query.query_sequence; std::function operation([this, sequence_address, payload] { - LongQueryResult query_result{payload, system.GPU().GetTicks()}; - memory_manager.WriteBlock(sequence_address, &query_result, sizeof(query_result)); + memory_manager.Write(sequence_address + sizeof(u64), system.GPU().GetTicks()); + memory_manager.Write(sequence_address, payload); }); - rasterizer->SignalFence(std::move(operation)); + rasterizer->SyncOperation(std::move(operation)); } break; case Regs::QueryOperation::Acquire: diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp index a12a95ce2..bcffd1862 100644 --- a/src/video_core/engines/maxwell_dma.cpp +++ b/src/video_core/engines/maxwell_dma.cpp @@ -274,16 +274,24 @@ void MaxwellDMA::FastCopyBlockLinearToPitch() { void MaxwellDMA::ReleaseSemaphore() { const auto type = regs.launch_dma.semaphore_type; const GPUVAddr address = regs.semaphore.address; + const u32 payload = regs.semaphore.payload; switch (type) { case LaunchDMA::SemaphoreType::NONE: break; - case LaunchDMA::SemaphoreType::RELEASE_ONE_WORD_SEMAPHORE: - memory_manager.Write(address, regs.semaphore.payload); + case LaunchDMA::SemaphoreType::RELEASE_ONE_WORD_SEMAPHORE: { + std::function operation( + [this, address, payload] { memory_manager.Write(address, payload); }); + rasterizer->SignalFence(std::move(operation)); break; - case LaunchDMA::SemaphoreType::RELEASE_FOUR_WORD_SEMAPHORE: - memory_manager.Write(address, static_cast(regs.semaphore.payload)); - memory_manager.Write(address + 8, system.GPU().GetTicks()); + } + case LaunchDMA::SemaphoreType::RELEASE_FOUR_WORD_SEMAPHORE: { + std::function operation([this, address, payload] { + memory_manager.Write(address + sizeof(u64), system.GPU().GetTicks()); + memory_manager.Write(address, payload); + }); + rasterizer->SignalFence(std::move(operation)); break; + } default: ASSERT_MSG(false, "Unknown semaphore type: {}", static_cast(type.Value())); } diff --git a/src/video_core/engines/puller.cpp b/src/video_core/engines/puller.cpp index dd9494efa..c3ed11c13 100644 --- a/src/video_core/engines/puller.cpp +++ b/src/video_core/engines/puller.cpp @@ -59,6 +59,7 @@ void Puller::ProcessFenceActionMethod() { case Puller::FenceOperation::Acquire: // UNIMPLEMENTED_MSG("Channel Scheduling pending."); // WaitFence(regs.fence_action.syncpoint_id, regs.fence_value); + rasterizer->ReleaseFences(); break; case Puller::FenceOperation::Increment: rasterizer->SignalSyncPoint(regs.fence_action.syncpoint_id); @@ -73,19 +74,11 @@ void Puller::ProcessSemaphoreTriggerMethod() { const auto op = static_cast(regs.semaphore_trigger & semaphoreOperationMask); if (op == GpuSemaphoreOperation::WriteLong) { - struct Block { - u32 sequence; - u32 zeros = 0; - u64 timestamp; - }; - const GPUVAddr sequence_address{regs.semaphore_address.SemaphoreAddress()}; const u32 payload = regs.semaphore_sequence; std::function operation([this, sequence_address, payload] { - Block block{}; - block.sequence = payload; - block.timestamp = gpu.GetTicks(); - memory_manager.WriteBlock(sequence_address, &block, sizeof(block)); + memory_manager.Write(sequence_address + sizeof(u64), gpu.GetTicks()); + memory_manager.Write(sequence_address, payload); }); rasterizer->SignalFence(std::move(operation)); } else { @@ -98,7 +91,6 @@ void Puller::ProcessSemaphoreTriggerMethod() { regs.acquire_mode = false; if (word != regs.acquire_value) { rasterizer->ReleaseFences(); - std::this_thread::sleep_for(std::chrono::milliseconds(1)); continue; } } else if (op == GpuSemaphoreOperation::AcquireGequal) { @@ -106,13 +98,11 @@ void Puller::ProcessSemaphoreTriggerMethod() { regs.acquire_mode = true; if (word < regs.acquire_value) { rasterizer->ReleaseFences(); - std::this_thread::sleep_for(std::chrono::milliseconds(1)); continue; } } else if (op == GpuSemaphoreOperation::AcquireMask) { if (word && regs.semaphore_sequence == 0) { rasterizer->ReleaseFences(); - std::this_thread::sleep_for(std::chrono::milliseconds(1)); continue; } } else { @@ -128,7 +118,7 @@ void Puller::ProcessSemaphoreRelease() { std::function operation([this, sequence_address, payload] { memory_manager.Write(sequence_address, payload); }); - rasterizer->SignalFence(std::move(operation)); + rasterizer->SyncOperation(std::move(operation)); } void Puller::ProcessSemaphoreAcquire() { -- cgit v1.2.3