summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChloe <25727384+ogniK5377@users.noreply.github.com>2020-11-04 08:36:55 +0100
committerGitHub <noreply@github.com>2020-11-04 08:36:55 +0100
commit6bbbbe8f85369dfc7a67441e5f7f6ab7a6484ae1 (patch)
treea97a13d00eaae7e81f028a57fef7dadf9d96a27c
parentMerge pull request #4874 from lioncash/nodiscard2 (diff)
parentfixup! hle service: nvdrv: nvhost_gpu: Update to use SyncpointManager and other improvements. (diff)
downloadyuzu-6bbbbe8f85369dfc7a67441e5f7f6ab7a6484ae1.tar
yuzu-6bbbbe8f85369dfc7a67441e5f7f6ab7a6484ae1.tar.gz
yuzu-6bbbbe8f85369dfc7a67441e5f7f6ab7a6484ae1.tar.bz2
yuzu-6bbbbe8f85369dfc7a67441e5f7f6ab7a6484ae1.tar.lz
yuzu-6bbbbe8f85369dfc7a67441e5f7f6ab7a6484ae1.tar.xz
yuzu-6bbbbe8f85369dfc7a67441e5f7f6ab7a6484ae1.tar.zst
yuzu-6bbbbe8f85369dfc7a67441e5f7f6ab7a6484ae1.zip
-rw-r--r--src/core/CMakeLists.txt2
-rw-r--r--src/core/core.cpp10
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp33
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_ctrl.h4
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp135
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_gpu.h20
-rw-r--r--src/core/hle/service/nvdrv/nvdrv.cpp17
-rw-r--r--src/core/hle/service/nvdrv/nvdrv.h14
-rw-r--r--src/core/hle/service/nvdrv/syncpoint_manager.cpp39
-rw-r--r--src/core/hle/service/nvdrv/syncpoint_manager.h85
-rw-r--r--src/core/hle/service/nvflinger/nvflinger.cpp4
-rw-r--r--src/video_core/dma_pusher.cpp80
-rw-r--r--src/video_core/dma_pusher.h49
-rw-r--r--src/video_core/gpu.cpp48
-rw-r--r--src/video_core/gpu.h25
15 files changed, 448 insertions, 117 deletions
diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt
index e0f207f3e..9a983e81d 100644
--- a/src/core/CMakeLists.txt
+++ b/src/core/CMakeLists.txt
@@ -454,6 +454,8 @@ add_library(core STATIC
hle/service/nvdrv/nvdrv.h
hle/service/nvdrv/nvmemp.cpp
hle/service/nvdrv/nvmemp.h
+ hle/service/nvdrv/syncpoint_manager.cpp
+ hle/service/nvdrv/syncpoint_manager.h
hle/service/nvflinger/buffer_queue.cpp
hle/service/nvflinger/buffer_queue.h
hle/service/nvflinger/nvflinger.cpp
diff --git a/src/core/core.cpp b/src/core/core.cpp
index fde2ccc09..242796008 100644
--- a/src/core/core.cpp
+++ b/src/core/core.cpp
@@ -179,16 +179,18 @@ struct System::Impl {
arp_manager.ResetAll();
telemetry_session = std::make_unique<Core::TelemetrySession>();
+
+ gpu_core = VideoCore::CreateGPU(emu_window, system);
+ if (!gpu_core) {
+ return ResultStatus::ErrorVideoCore;
+ }
+
service_manager = std::make_shared<Service::SM::ServiceManager>(kernel);
Service::Init(service_manager, system);
GDBStub::DeferStart();
interrupt_manager = std::make_unique<Core::Hardware::InterruptManager>(system);
- gpu_core = VideoCore::CreateGPU(emu_window, system);
- if (!gpu_core) {
- return ResultStatus::ErrorVideoCore;
- }
// Initialize time manager, which must happen after kernel is created
time_manager.Initialize();
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp
index 75d9191ff..8356a8139 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp
@@ -15,8 +15,9 @@
namespace Service::Nvidia::Devices {
-nvhost_ctrl::nvhost_ctrl(Core::System& system, EventInterface& events_interface)
- : nvdevice(system), events_interface{events_interface} {}
+nvhost_ctrl::nvhost_ctrl(Core::System& system, EventInterface& events_interface,
+ SyncpointManager& syncpoint_manager)
+ : nvdevice(system), events_interface{events_interface}, syncpoint_manager{syncpoint_manager} {}
nvhost_ctrl::~nvhost_ctrl() = default;
u32 nvhost_ctrl::ioctl(Ioctl command, const std::vector<u8>& input, const std::vector<u8>& input2,
@@ -70,19 +71,33 @@ u32 nvhost_ctrl::IocCtrlEventWait(const std::vector<u8>& input, std::vector<u8>&
return NvResult::BadParameter;
}
+ if (syncpoint_manager.IsSyncpointExpired(params.syncpt_id, params.threshold)) {
+ params.value = syncpoint_manager.GetSyncpointMin(params.syncpt_id);
+ std::memcpy(output.data(), &params, sizeof(params));
+ return NvResult::Success;
+ }
+
+ if (const auto new_value = syncpoint_manager.RefreshSyncpoint(params.syncpt_id);
+ syncpoint_manager.IsSyncpointExpired(params.syncpt_id, params.threshold)) {
+ params.value = new_value;
+ std::memcpy(output.data(), &params, sizeof(params));
+ return NvResult::Success;
+ }
+
auto event = events_interface.events[event_id];
auto& gpu = system.GPU();
+
// This is mostly to take into account unimplemented features. As synced
// gpu is always synced.
if (!gpu.IsAsync()) {
- event.writable->Signal();
+ event.event.writable->Signal();
return NvResult::Success;
}
auto lock = gpu.LockSync();
- const u32 current_syncpoint_value = gpu.GetSyncpointValue(params.syncpt_id);
+ const u32 current_syncpoint_value = event.fence.value;
const s32 diff = current_syncpoint_value - params.threshold;
if (diff >= 0) {
- event.writable->Signal();
+ event.event.writable->Signal();
params.value = current_syncpoint_value;
std::memcpy(output.data(), &params, sizeof(params));
return NvResult::Success;
@@ -109,7 +124,7 @@ u32 nvhost_ctrl::IocCtrlEventWait(const std::vector<u8>& input, std::vector<u8>&
params.value = ((params.syncpt_id & 0xfff) << 16) | 0x10000000;
}
params.value |= event_id;
- event.writable->Clear();
+ event.event.writable->Clear();
gpu.RegisterSyncptInterrupt(params.syncpt_id, target_value);
if (!is_async && ctrl.fresh_call) {
ctrl.must_delay = true;
@@ -157,15 +172,19 @@ u32 nvhost_ctrl::IocCtrlEventUnregister(const std::vector<u8>& input, std::vecto
u32 nvhost_ctrl::IocCtrlClearEventWait(const std::vector<u8>& input, std::vector<u8>& output) {
IocCtrlEventSignalParams params{};
std::memcpy(&params, input.data(), sizeof(params));
+
u32 event_id = params.event_id & 0x00FF;
LOG_WARNING(Service_NVDRV, "cleared event wait on, event_id: {:X}", event_id);
+
if (event_id >= MaxNvEvents) {
return NvResult::BadParameter;
}
if (events_interface.status[event_id] == EventState::Waiting) {
events_interface.LiberateEvent(event_id);
- events_interface.events[event_id].writable->Signal();
}
+
+ syncpoint_manager.RefreshSyncpoint(events_interface.events[event_id].fence.id);
+
return NvResult::Success;
}
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h
index f7b04d9f1..24ad96cb9 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h
@@ -14,7 +14,8 @@ namespace Service::Nvidia::Devices {
class nvhost_ctrl final : public nvdevice {
public:
- explicit nvhost_ctrl(Core::System& system, EventInterface& events_interface);
+ explicit nvhost_ctrl(Core::System& system, EventInterface& events_interface,
+ SyncpointManager& syncpoint_manager);
~nvhost_ctrl() override;
u32 ioctl(Ioctl command, const std::vector<u8>& input, const std::vector<u8>& input2,
@@ -145,6 +146,7 @@ private:
u32 IocCtrlClearEventWait(const std::vector<u8>& input, std::vector<u8>& output);
EventInterface& events_interface;
+ SyncpointManager& syncpoint_manager;
};
} // namespace Service::Nvidia::Devices
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
index f1966ac0e..152019548 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
@@ -7,14 +7,20 @@
#include "common/logging/log.h"
#include "core/core.h"
#include "core/hle/service/nvdrv/devices/nvhost_gpu.h"
+#include "core/hle/service/nvdrv/syncpoint_manager.h"
#include "core/memory.h"
#include "video_core/gpu.h"
#include "video_core/memory_manager.h"
namespace Service::Nvidia::Devices {
-nvhost_gpu::nvhost_gpu(Core::System& system, std::shared_ptr<nvmap> nvmap_dev)
- : nvdevice(system), nvmap_dev(std::move(nvmap_dev)) {}
+nvhost_gpu::nvhost_gpu(Core::System& system, std::shared_ptr<nvmap> nvmap_dev,
+ SyncpointManager& syncpoint_manager)
+ : nvdevice(system), nvmap_dev(std::move(nvmap_dev)), syncpoint_manager{syncpoint_manager} {
+ channel_fence.id = syncpoint_manager.AllocateSyncpoint();
+ channel_fence.value = system.GPU().GetSyncpointValue(channel_fence.id);
+}
+
nvhost_gpu::~nvhost_gpu() = default;
u32 nvhost_gpu::ioctl(Ioctl command, const std::vector<u8>& input, const std::vector<u8>& input2,
@@ -126,10 +132,10 @@ u32 nvhost_gpu::AllocGPFIFOEx2(const std::vector<u8>& input, std::vector<u8>& ou
params.num_entries, params.flags, params.unk0, params.unk1, params.unk2,
params.unk3);
- auto& gpu = system.GPU();
- params.fence_out.id = assigned_syncpoints;
- params.fence_out.value = gpu.GetSyncpointValue(assigned_syncpoints);
- assigned_syncpoints++;
+ channel_fence.value = system.GPU().GetSyncpointValue(channel_fence.id);
+
+ params.fence_out = channel_fence;
+
std::memcpy(output.data(), &params, output.size());
return 0;
}
@@ -145,39 +151,100 @@ u32 nvhost_gpu::AllocateObjectContext(const std::vector<u8>& input, std::vector<
return 0;
}
-u32 nvhost_gpu::SubmitGPFIFO(const std::vector<u8>& input, std::vector<u8>& output) {
- if (input.size() < sizeof(IoctlSubmitGpfifo)) {
- UNIMPLEMENTED();
+static std::vector<Tegra::CommandHeader> BuildWaitCommandList(Fence fence) {
+ return {
+ Tegra::BuildCommandHeader(Tegra::BufferMethods::FenceValue, 1,
+ Tegra::SubmissionMode::Increasing),
+ {fence.value},
+ Tegra::BuildCommandHeader(Tegra::BufferMethods::FenceAction, 1,
+ Tegra::SubmissionMode::Increasing),
+ Tegra::GPU::FenceAction::Build(Tegra::GPU::FenceOperation::Acquire, fence.id),
+ };
+}
+
+static std::vector<Tegra::CommandHeader> BuildIncrementCommandList(Fence fence, u32 add_increment) {
+ std::vector<Tegra::CommandHeader> result{
+ Tegra::BuildCommandHeader(Tegra::BufferMethods::FenceValue, 1,
+ Tegra::SubmissionMode::Increasing),
+ {}};
+
+ for (u32 count = 0; count < add_increment; ++count) {
+ result.emplace_back(Tegra::BuildCommandHeader(Tegra::BufferMethods::FenceAction, 1,
+ Tegra::SubmissionMode::Increasing));
+ result.emplace_back(
+ Tegra::GPU::FenceAction::Build(Tegra::GPU::FenceOperation::Increment, fence.id));
}
- IoctlSubmitGpfifo params{};
- std::memcpy(&params, input.data(), sizeof(IoctlSubmitGpfifo));
+
+ return result;
+}
+
+static std::vector<Tegra::CommandHeader> BuildIncrementWithWfiCommandList(Fence fence,
+ u32 add_increment) {
+ std::vector<Tegra::CommandHeader> result{
+ Tegra::BuildCommandHeader(Tegra::BufferMethods::WaitForInterrupt, 1,
+ Tegra::SubmissionMode::Increasing),
+ {}};
+ const std::vector<Tegra::CommandHeader> increment{
+ BuildIncrementCommandList(fence, add_increment)};
+
+ result.insert(result.end(), increment.begin(), increment.end());
+
+ return result;
+}
+
+u32 nvhost_gpu::SubmitGPFIFOImpl(IoctlSubmitGpfifo& params, std::vector<u8>& output,
+ Tegra::CommandList&& entries) {
LOG_TRACE(Service_NVDRV, "called, gpfifo={:X}, num_entries={:X}, flags={:X}", params.address,
params.num_entries, params.flags.raw);
- ASSERT_MSG(input.size() == sizeof(IoctlSubmitGpfifo) +
- params.num_entries * sizeof(Tegra::CommandListHeader),
- "Incorrect input size");
+ auto& gpu = system.GPU();
- Tegra::CommandList entries(params.num_entries);
- std::memcpy(entries.data(), &input[sizeof(IoctlSubmitGpfifo)],
- params.num_entries * sizeof(Tegra::CommandListHeader));
+ params.fence_out.id = channel_fence.id;
- UNIMPLEMENTED_IF(params.flags.add_wait.Value() != 0);
- UNIMPLEMENTED_IF(params.flags.add_increment.Value() != 0);
+ if (params.flags.add_wait.Value() &&
+ !syncpoint_manager.IsSyncpointExpired(params.fence_out.id, params.fence_out.value)) {
+ gpu.PushGPUEntries(Tegra::CommandList{BuildWaitCommandList(params.fence_out)});
+ }
- auto& gpu = system.GPU();
- u32 current_syncpoint_value = gpu.GetSyncpointValue(params.fence_out.id);
- if (params.flags.increment.Value()) {
- params.fence_out.value += current_syncpoint_value;
+ if (params.flags.add_increment.Value() || params.flags.increment.Value()) {
+ const u32 increment_value = params.flags.increment.Value() ? params.fence_out.value : 0;
+ params.fence_out.value = syncpoint_manager.IncreaseSyncpoint(
+ params.fence_out.id, params.AddIncrementValue() + increment_value);
} else {
- params.fence_out.value = current_syncpoint_value;
+ params.fence_out.value = syncpoint_manager.GetSyncpointMax(params.fence_out.id);
}
+
+ entries.RefreshIntegrityChecks(gpu);
gpu.PushGPUEntries(std::move(entries));
+ if (params.flags.add_increment.Value()) {
+ if (params.flags.suppress_wfi) {
+ gpu.PushGPUEntries(Tegra::CommandList{
+ BuildIncrementCommandList(params.fence_out, params.AddIncrementValue())});
+ } else {
+ gpu.PushGPUEntries(Tegra::CommandList{
+ BuildIncrementWithWfiCommandList(params.fence_out, params.AddIncrementValue())});
+ }
+ }
+
std::memcpy(output.data(), &params, sizeof(IoctlSubmitGpfifo));
return 0;
}
+u32 nvhost_gpu::SubmitGPFIFO(const std::vector<u8>& input, std::vector<u8>& output) {
+ if (input.size() < sizeof(IoctlSubmitGpfifo)) {
+ UNIMPLEMENTED();
+ }
+ IoctlSubmitGpfifo params{};
+ std::memcpy(&params, input.data(), sizeof(IoctlSubmitGpfifo));
+
+ Tegra::CommandList entries(params.num_entries);
+ std::memcpy(entries.command_lists.data(), &input[sizeof(IoctlSubmitGpfifo)],
+ params.num_entries * sizeof(Tegra::CommandListHeader));
+
+ return SubmitGPFIFOImpl(params, output, std::move(entries));
+}
+
u32 nvhost_gpu::KickoffPB(const std::vector<u8>& input, std::vector<u8>& output,
const std::vector<u8>& input2, IoctlVersion version) {
if (input.size() < sizeof(IoctlSubmitGpfifo)) {
@@ -185,31 +252,17 @@ u32 nvhost_gpu::KickoffPB(const std::vector<u8>& input, std::vector<u8>& output,
}
IoctlSubmitGpfifo params{};
std::memcpy(&params, input.data(), sizeof(IoctlSubmitGpfifo));
- LOG_TRACE(Service_NVDRV, "called, gpfifo={:X}, num_entries={:X}, flags={:X}", params.address,
- params.num_entries, params.flags.raw);
Tegra::CommandList entries(params.num_entries);
if (version == IoctlVersion::Version2) {
- std::memcpy(entries.data(), input2.data(),
+ std::memcpy(entries.command_lists.data(), input2.data(),
params.num_entries * sizeof(Tegra::CommandListHeader));
} else {
- system.Memory().ReadBlock(params.address, entries.data(),
+ system.Memory().ReadBlock(params.address, entries.command_lists.data(),
params.num_entries * sizeof(Tegra::CommandListHeader));
}
- UNIMPLEMENTED_IF(params.flags.add_wait.Value() != 0);
- UNIMPLEMENTED_IF(params.flags.add_increment.Value() != 0);
-
- auto& gpu = system.GPU();
- u32 current_syncpoint_value = gpu.GetSyncpointValue(params.fence_out.id);
- if (params.flags.increment.Value()) {
- params.fence_out.value += current_syncpoint_value;
- } else {
- params.fence_out.value = current_syncpoint_value;
- }
- gpu.PushGPUEntries(std::move(entries));
- std::memcpy(output.data(), &params, output.size());
- return 0;
+ return SubmitGPFIFOImpl(params, output, std::move(entries));
}
u32 nvhost_gpu::GetWaitbase(const std::vector<u8>& input, std::vector<u8>& output) {
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_gpu.h b/src/core/hle/service/nvdrv/devices/nvhost_gpu.h
index 2ac74743f..a252fc06d 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.h
@@ -11,6 +11,11 @@
#include "common/swap.h"
#include "core/hle/service/nvdrv/devices/nvdevice.h"
#include "core/hle/service/nvdrv/nvdata.h"
+#include "video_core/dma_pusher.h"
+
+namespace Service::Nvidia {
+class SyncpointManager;
+}
namespace Service::Nvidia::Devices {
@@ -21,7 +26,8 @@ constexpr u32 NVGPU_IOCTL_CHANNEL_KICKOFF_PB(0x1b);
class nvhost_gpu final : public nvdevice {
public:
- explicit nvhost_gpu(Core::System& system, std::shared_ptr<nvmap> nvmap_dev);
+ explicit nvhost_gpu(Core::System& system, std::shared_ptr<nvmap> nvmap_dev,
+ SyncpointManager& syncpoint_manager);
~nvhost_gpu() override;
u32 ioctl(Ioctl command, const std::vector<u8>& input, const std::vector<u8>& input2,
@@ -162,10 +168,15 @@ private:
u32_le raw;
BitField<0, 1, u32_le> add_wait; // append a wait sync_point to the list
BitField<1, 1, u32_le> add_increment; // append an increment to the list
- BitField<2, 1, u32_le> new_hw_format; // Mostly ignored
+ BitField<2, 1, u32_le> new_hw_format; // mostly ignored
+ BitField<4, 1, u32_le> suppress_wfi; // suppress wait for interrupt
BitField<8, 1, u32_le> increment; // increment the returned fence
} flags;
Fence fence_out; // returned new fence object for others to wait on
+
+ u32 AddIncrementValue() const {
+ return flags.add_increment.Value() << 1;
+ }
};
static_assert(sizeof(IoctlSubmitGpfifo) == 16 + sizeof(Fence),
"IoctlSubmitGpfifo is incorrect size");
@@ -190,6 +201,8 @@ private:
u32 SetChannelPriority(const std::vector<u8>& input, std::vector<u8>& output);
u32 AllocGPFIFOEx2(const std::vector<u8>& input, std::vector<u8>& output);
u32 AllocateObjectContext(const std::vector<u8>& input, std::vector<u8>& output);
+ u32 SubmitGPFIFOImpl(IoctlSubmitGpfifo& params, std::vector<u8>& output,
+ Tegra::CommandList&& entries);
u32 SubmitGPFIFO(const std::vector<u8>& input, std::vector<u8>& output);
u32 KickoffPB(const std::vector<u8>& input, std::vector<u8>& output,
const std::vector<u8>& input2, IoctlVersion version);
@@ -198,7 +211,8 @@ private:
u32 ChannelSetTimeslice(const std::vector<u8>& input, std::vector<u8>& output);
std::shared_ptr<nvmap> nvmap_dev;
- u32 assigned_syncpoints{};
+ SyncpointManager& syncpoint_manager;
+ Fence channel_fence;
};
} // namespace Service::Nvidia::Devices
diff --git a/src/core/hle/service/nvdrv/nvdrv.cpp b/src/core/hle/service/nvdrv/nvdrv.cpp
index 803c1a984..a46755cdc 100644
--- a/src/core/hle/service/nvdrv/nvdrv.cpp
+++ b/src/core/hle/service/nvdrv/nvdrv.cpp
@@ -21,6 +21,7 @@
#include "core/hle/service/nvdrv/interface.h"
#include "core/hle/service/nvdrv/nvdrv.h"
#include "core/hle/service/nvdrv/nvmemp.h"
+#include "core/hle/service/nvdrv/syncpoint_manager.h"
#include "core/hle/service/nvflinger/nvflinger.h"
namespace Service::Nvidia {
@@ -36,21 +37,23 @@ void InstallInterfaces(SM::ServiceManager& service_manager, NVFlinger::NVFlinger
nvflinger.SetNVDrvInstance(module_);
}
-Module::Module(Core::System& system) {
+Module::Module(Core::System& system) : syncpoint_manager{system.GPU()} {
auto& kernel = system.Kernel();
for (u32 i = 0; i < MaxNvEvents; i++) {
std::string event_label = fmt::format("NVDRV::NvEvent_{}", i);
- events_interface.events[i] = Kernel::WritableEvent::CreateEventPair(kernel, event_label);
+ events_interface.events[i] = {Kernel::WritableEvent::CreateEventPair(kernel, event_label)};
events_interface.status[i] = EventState::Free;
events_interface.registered[i] = false;
}
auto nvmap_dev = std::make_shared<Devices::nvmap>(system);
devices["/dev/nvhost-as-gpu"] = std::make_shared<Devices::nvhost_as_gpu>(system, nvmap_dev);
- devices["/dev/nvhost-gpu"] = std::make_shared<Devices::nvhost_gpu>(system, nvmap_dev);
+ devices["/dev/nvhost-gpu"] =
+ std::make_shared<Devices::nvhost_gpu>(system, nvmap_dev, syncpoint_manager);
devices["/dev/nvhost-ctrl-gpu"] = std::make_shared<Devices::nvhost_ctrl_gpu>(system);
devices["/dev/nvmap"] = nvmap_dev;
devices["/dev/nvdisp_disp0"] = std::make_shared<Devices::nvdisp_disp0>(system, nvmap_dev);
- devices["/dev/nvhost-ctrl"] = std::make_shared<Devices::nvhost_ctrl>(system, events_interface);
+ devices["/dev/nvhost-ctrl"] =
+ std::make_shared<Devices::nvhost_ctrl>(system, events_interface, syncpoint_manager);
devices["/dev/nvhost-nvdec"] = std::make_shared<Devices::nvhost_nvdec>(system, nvmap_dev);
devices["/dev/nvhost-nvjpg"] = std::make_shared<Devices::nvhost_nvjpg>(system);
devices["/dev/nvhost-vic"] = std::make_shared<Devices::nvhost_vic>(system, nvmap_dev);
@@ -95,17 +98,17 @@ void Module::SignalSyncpt(const u32 syncpoint_id, const u32 value) {
if (events_interface.assigned_syncpt[i] == syncpoint_id &&
events_interface.assigned_value[i] == value) {
events_interface.LiberateEvent(i);
- events_interface.events[i].writable->Signal();
+ events_interface.events[i].event.writable->Signal();
}
}
}
std::shared_ptr<Kernel::ReadableEvent> Module::GetEvent(const u32 event_id) const {
- return events_interface.events[event_id].readable;
+ return events_interface.events[event_id].event.readable;
}
std::shared_ptr<Kernel::WritableEvent> Module::GetEventWriteable(const u32 event_id) const {
- return events_interface.events[event_id].writable;
+ return events_interface.events[event_id].event.writable;
}
} // namespace Service::Nvidia
diff --git a/src/core/hle/service/nvdrv/nvdrv.h b/src/core/hle/service/nvdrv/nvdrv.h
index 7706a5590..f3d863dac 100644
--- a/src/core/hle/service/nvdrv/nvdrv.h
+++ b/src/core/hle/service/nvdrv/nvdrv.h
@@ -10,6 +10,7 @@
#include "common/common_types.h"
#include "core/hle/kernel/writable_event.h"
#include "core/hle/service/nvdrv/nvdata.h"
+#include "core/hle/service/nvdrv/syncpoint_manager.h"
#include "core/hle/service/service.h"
namespace Core {
@@ -22,15 +23,23 @@ class NVFlinger;
namespace Service::Nvidia {
+class SyncpointManager;
+
namespace Devices {
class nvdevice;
}
+/// Represents an Nvidia event
+struct NvEvent {
+ Kernel::EventPair event;
+ Fence fence{};
+};
+
struct EventInterface {
// Mask representing currently busy events
u64 events_mask{};
// Each kernel event associated to an NV event
- std::array<Kernel::EventPair, MaxNvEvents> events;
+ std::array<NvEvent, MaxNvEvents> events;
// The status of the current NVEvent
std::array<EventState, MaxNvEvents> status{};
// Tells if an NVEvent is registered or not
@@ -119,6 +128,9 @@ public:
std::shared_ptr<Kernel::WritableEvent> GetEventWriteable(u32 event_id) const;
private:
+ /// Manages syncpoints on the host
+ SyncpointManager syncpoint_manager;
+
/// Id to use for the next open file descriptor.
u32 next_fd = 1;
diff --git a/src/core/hle/service/nvdrv/syncpoint_manager.cpp b/src/core/hle/service/nvdrv/syncpoint_manager.cpp
new file mode 100644
index 000000000..0151a03b7
--- /dev/null
+++ b/src/core/hle/service/nvdrv/syncpoint_manager.cpp
@@ -0,0 +1,39 @@
+// Copyright 2020 yuzu emulator team
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/assert.h"
+#include "core/hle/service/nvdrv/syncpoint_manager.h"
+#include "video_core/gpu.h"
+
+namespace Service::Nvidia {
+
+SyncpointManager::SyncpointManager(Tegra::GPU& gpu) : gpu{gpu} {}
+
+SyncpointManager::~SyncpointManager() = default;
+
+u32 SyncpointManager::RefreshSyncpoint(u32 syncpoint_id) {
+ syncpoints[syncpoint_id].min = gpu.GetSyncpointValue(syncpoint_id);
+ return GetSyncpointMin(syncpoint_id);
+}
+
+u32 SyncpointManager::AllocateSyncpoint() {
+ for (u32 syncpoint_id = 1; syncpoint_id < MaxSyncPoints; syncpoint_id++) {
+ if (!syncpoints[syncpoint_id].is_allocated) {
+ syncpoints[syncpoint_id].is_allocated = true;
+ return syncpoint_id;
+ }
+ }
+ UNREACHABLE_MSG("No more available syncpoints!");
+ return {};
+}
+
+u32 SyncpointManager::IncreaseSyncpoint(u32 syncpoint_id, u32 value) {
+ for (u32 index = 0; index < value; ++index) {
+ syncpoints[syncpoint_id].max.fetch_add(1, std::memory_order_relaxed);
+ }
+
+ return GetSyncpointMax(syncpoint_id);
+}
+
+} // namespace Service::Nvidia
diff --git a/src/core/hle/service/nvdrv/syncpoint_manager.h b/src/core/hle/service/nvdrv/syncpoint_manager.h
new file mode 100644
index 000000000..4168b6c7e
--- /dev/null
+++ b/src/core/hle/service/nvdrv/syncpoint_manager.h
@@ -0,0 +1,85 @@
+// Copyright 2020 yuzu emulator team
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <array>
+#include <atomic>
+
+#include "common/common_types.h"
+#include "core/hle/service/nvdrv/nvdata.h"
+
+namespace Tegra {
+class GPU;
+}
+
+namespace Service::Nvidia {
+
+class SyncpointManager final {
+public:
+ explicit SyncpointManager(Tegra::GPU& gpu);
+ ~SyncpointManager();
+
+ /**
+ * Returns true if the specified syncpoint is expired for the given value.
+ * @param syncpoint_id Syncpoint ID to check.
+ * @param value Value to check against the specified syncpoint.
+ * @returns True if the specified syncpoint is expired for the given value, otherwise False.
+ */
+ bool IsSyncpointExpired(u32 syncpoint_id, u32 value) const {
+ return (GetSyncpointMax(syncpoint_id) - value) >= (GetSyncpointMin(syncpoint_id) - value);
+ }
+
+ /**
+ * Gets the lower bound for the specified syncpoint.
+ * @param syncpoint_id Syncpoint ID to get the lower bound for.
+ * @returns The lower bound for the specified syncpoint.
+ */
+ u32 GetSyncpointMin(u32 syncpoint_id) const {
+ return syncpoints[syncpoint_id].min.load(std::memory_order_relaxed);
+ }
+
+ /**
+ * Gets the uper bound for the specified syncpoint.
+ * @param syncpoint_id Syncpoint ID to get the upper bound for.
+ * @returns The upper bound for the specified syncpoint.
+ */
+ u32 GetSyncpointMax(u32 syncpoint_id) const {
+ return syncpoints[syncpoint_id].max.load(std::memory_order_relaxed);
+ }
+
+ /**
+ * Refreshes the minimum value for the specified syncpoint.
+ * @param syncpoint_id Syncpoint ID to be refreshed.
+ * @returns The new syncpoint minimum value.
+ */
+ u32 RefreshSyncpoint(u32 syncpoint_id);
+
+ /**
+ * Allocates a new syncoint.
+ * @returns The syncpoint ID for the newly allocated syncpoint.
+ */
+ u32 AllocateSyncpoint();
+
+ /**
+ * Increases the maximum value for the specified syncpoint.
+ * @param syncpoint_id Syncpoint ID to be increased.
+ * @param value Value to increase the specified syncpoint by.
+ * @returns The new syncpoint maximum value.
+ */
+ u32 IncreaseSyncpoint(u32 syncpoint_id, u32 value);
+
+private:
+ struct Syncpoint {
+ std::atomic<u32> min;
+ std::atomic<u32> max;
+ std::atomic<bool> is_allocated;
+ };
+
+ std::array<Syncpoint, MaxSyncPoints> syncpoints{};
+
+ Tegra::GPU& gpu;
+};
+
+} // namespace Service::Nvidia
diff --git a/src/core/hle/service/nvflinger/nvflinger.cpp b/src/core/hle/service/nvflinger/nvflinger.cpp
index c64673dba..44aa2bdae 100644
--- a/src/core/hle/service/nvflinger/nvflinger.cpp
+++ b/src/core/hle/service/nvflinger/nvflinger.cpp
@@ -242,6 +242,10 @@ void NVFlinger::Compose() {
const auto& igbp_buffer = buffer->get().igbp_buffer;
+ if (!system.IsPoweredOn()) {
+ return; // We are likely shutting down
+ }
+
auto& gpu = system.GPU();
const auto& multi_fence = buffer->get().multi_fence;
guard->unlock();
diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp
index f2f96ac33..105b85a92 100644
--- a/src/video_core/dma_pusher.cpp
+++ b/src/video_core/dma_pusher.cpp
@@ -2,6 +2,7 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
+#include "common/cityhash.h"
#include "common/microprofile.h"
#include "core/core.h"
#include "core/memory.h"
@@ -12,6 +13,20 @@
namespace Tegra {
+void CommandList::RefreshIntegrityChecks(GPU& gpu) {
+ command_list_hashes.resize(command_lists.size());
+
+ for (std::size_t index = 0; index < command_lists.size(); ++index) {
+ const CommandListHeader command_list_header = command_lists[index];
+ std::vector<CommandHeader> command_headers(command_list_header.size);
+ gpu.MemoryManager().ReadBlockUnsafe(command_list_header.addr, command_headers.data(),
+ command_list_header.size * sizeof(u32));
+ command_list_hashes[index] =
+ Common::CityHash64(reinterpret_cast<char*>(command_headers.data()),
+ command_list_header.size * sizeof(u32));
+ }
+}
+
DmaPusher::DmaPusher(Core::System& system, GPU& gpu) : gpu{gpu}, system{system} {}
DmaPusher::~DmaPusher() = default;
@@ -45,32 +60,51 @@ bool DmaPusher::Step() {
return false;
}
- const CommandList& command_list{dma_pushbuffer.front()};
- ASSERT_OR_EXECUTE(!command_list.empty(), {
- // Somehow the command_list is empty, in order to avoid a crash
- // We ignore it and assume its size is 0.
- dma_pushbuffer.pop();
- dma_pushbuffer_subindex = 0;
- return true;
- });
- const CommandListHeader command_list_header{command_list[dma_pushbuffer_subindex++]};
- const GPUVAddr dma_get = command_list_header.addr;
-
- if (dma_pushbuffer_subindex >= command_list.size()) {
- // We've gone through the current list, remove it from the queue
- dma_pushbuffer.pop();
- dma_pushbuffer_subindex = 0;
- }
+ CommandList& command_list{dma_pushbuffer.front()};
- if (command_list_header.size == 0) {
- return true;
- }
+ ASSERT_OR_EXECUTE(
+ command_list.command_lists.size() || command_list.prefetch_command_list.size(), {
+ // Somehow the command_list is empty, in order to avoid a crash
+ // We ignore it and assume its size is 0.
+ dma_pushbuffer.pop();
+ dma_pushbuffer_subindex = 0;
+ return true;
+ });
- // Push buffer non-empty, read a word
- command_headers.resize(command_list_header.size);
- gpu.MemoryManager().ReadBlockUnsafe(dma_get, command_headers.data(),
- command_list_header.size * sizeof(u32));
+ if (command_list.prefetch_command_list.size()) {
+ // Prefetched command list from nvdrv, used for things like synchronization
+ command_headers = std::move(command_list.prefetch_command_list);
+ dma_pushbuffer.pop();
+ } else {
+ const CommandListHeader command_list_header{
+ command_list.command_lists[dma_pushbuffer_subindex]};
+ const u64 next_hash = command_list.command_list_hashes[dma_pushbuffer_subindex++];
+ const GPUVAddr dma_get = command_list_header.addr;
+
+ if (dma_pushbuffer_subindex >= command_list.command_lists.size()) {
+ // We've gone through the current list, remove it from the queue
+ dma_pushbuffer.pop();
+ dma_pushbuffer_subindex = 0;
+ }
+ if (command_list_header.size == 0) {
+ return true;
+ }
+
+ // Push buffer non-empty, read a word
+ command_headers.resize(command_list_header.size);
+ gpu.MemoryManager().ReadBlockUnsafe(dma_get, command_headers.data(),
+ command_list_header.size * sizeof(u32));
+
+ // Integrity check
+ const u64 new_hash = Common::CityHash64(reinterpret_cast<char*>(command_headers.data()),
+ command_list_header.size * sizeof(u32));
+ if (new_hash != next_hash) {
+ LOG_CRITICAL(HW_GPU, "CommandList at addr=0x{:X} is corrupt, skipping!", dma_get);
+ dma_pushbuffer.pop();
+ return true;
+ }
+ }
for (std::size_t index = 0; index < command_headers.size();) {
const CommandHeader& command_header = command_headers[index];
diff --git a/src/video_core/dma_pusher.h b/src/video_core/dma_pusher.h
index efa90d170..8496ba2da 100644
--- a/src/video_core/dma_pusher.h
+++ b/src/video_core/dma_pusher.h
@@ -27,6 +27,31 @@ enum class SubmissionMode : u32 {
IncreaseOnce = 5
};
+// Note that, traditionally, methods are treated as 4-byte addressable locations, and hence
+// their numbers are written down multiplied by 4 in Docs. Here we are not multiply by 4.
+// So the values you see in docs might be multiplied by 4.
+enum class BufferMethods : u32 {
+ BindObject = 0x0,
+ Nop = 0x2,
+ SemaphoreAddressHigh = 0x4,
+ SemaphoreAddressLow = 0x5,
+ SemaphoreSequence = 0x6,
+ SemaphoreTrigger = 0x7,
+ NotifyIntr = 0x8,
+ WrcacheFlush = 0x9,
+ Unk28 = 0xA,
+ UnkCacheFlush = 0xB,
+ RefCnt = 0x14,
+ SemaphoreAcquire = 0x1A,
+ SemaphoreRelease = 0x1B,
+ FenceValue = 0x1C,
+ FenceAction = 0x1D,
+ WaitForInterrupt = 0x1E,
+ Unk7c = 0x1F,
+ Yield = 0x20,
+ NonPullerMethods = 0x40,
+};
+
struct CommandListHeader {
union {
u64 raw;
@@ -49,9 +74,29 @@ union CommandHeader {
static_assert(std::is_standard_layout_v<CommandHeader>, "CommandHeader is not standard layout");
static_assert(sizeof(CommandHeader) == sizeof(u32), "CommandHeader has incorrect size!");
+static constexpr CommandHeader BuildCommandHeader(BufferMethods method, u32 arg_count,
+ SubmissionMode mode) {
+ CommandHeader result{};
+ result.method.Assign(static_cast<u32>(method));
+ result.arg_count.Assign(arg_count);
+ result.mode.Assign(mode);
+ return result;
+}
+
class GPU;
-using CommandList = std::vector<Tegra::CommandListHeader>;
+struct CommandList final {
+ CommandList() = default;
+ explicit CommandList(std::size_t size) : command_lists(size) {}
+ explicit CommandList(std::vector<Tegra::CommandHeader>&& prefetch_command_list)
+ : prefetch_command_list{std::move(prefetch_command_list)} {}
+
+ void RefreshIntegrityChecks(GPU& gpu);
+
+ std::vector<Tegra::CommandListHeader> command_lists;
+ std::vector<u64> command_list_hashes;
+ std::vector<Tegra::CommandHeader> prefetch_command_list;
+};
/**
* The DmaPusher class implements DMA submission to FIFOs, providing an area of memory that the
@@ -60,7 +105,7 @@ using CommandList = std::vector<Tegra::CommandListHeader>;
* See https://envytools.readthedocs.io/en/latest/hw/fifo/dma-pusher.html#fifo-dma-pusher for
* details on this implementation.
*/
-class DmaPusher {
+class DmaPusher final {
public:
explicit DmaPusher(Core::System& system, GPU& gpu);
~DmaPusher();
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index 171f78183..ebd149c3a 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -194,30 +194,6 @@ void GPU::SyncGuestHost() {
void GPU::OnCommandListEnd() {
renderer->Rasterizer().ReleaseFences();
}
-// Note that, traditionally, methods are treated as 4-byte addressable locations, and hence
-// their numbers are written down multiplied by 4 in Docs. Here we are not multiply by 4.
-// So the values you see in docs might be multiplied by 4.
-enum class BufferMethods {
- BindObject = 0x0,
- Nop = 0x2,
- SemaphoreAddressHigh = 0x4,
- SemaphoreAddressLow = 0x5,
- SemaphoreSequence = 0x6,
- SemaphoreTrigger = 0x7,
- NotifyIntr = 0x8,
- WrcacheFlush = 0x9,
- Unk28 = 0xA,
- UnkCacheFlush = 0xB,
- RefCnt = 0x14,
- SemaphoreAcquire = 0x1A,
- SemaphoreRelease = 0x1B,
- FenceValue = 0x1C,
- FenceAction = 0x1D,
- Unk78 = 0x1E,
- Unk7c = 0x1F,
- Yield = 0x20,
- NonPullerMethods = 0x40,
-};
enum class GpuSemaphoreOperation {
AcquireEqual = 0x1,
@@ -277,7 +253,12 @@ void GPU::CallPullerMethod(const MethodCall& method_call) {
case BufferMethods::UnkCacheFlush:
case BufferMethods::WrcacheFlush:
case BufferMethods::FenceValue:
+ break;
case BufferMethods::FenceAction:
+ ProcessFenceActionMethod();
+ break;
+ case BufferMethods::WaitForInterrupt:
+ ProcessWaitForInterruptMethod();
break;
case BufferMethods::SemaphoreTrigger: {
ProcessSemaphoreTriggerMethod();
@@ -391,6 +372,25 @@ void GPU::ProcessBindMethod(const MethodCall& method_call) {
}
}
+void GPU::ProcessFenceActionMethod() {
+ switch (regs.fence_action.op) {
+ case FenceOperation::Acquire:
+ WaitFence(regs.fence_action.syncpoint_id, regs.fence_value);
+ break;
+ case FenceOperation::Increment:
+ IncrementSyncPoint(regs.fence_action.syncpoint_id);
+ break;
+ default:
+ UNIMPLEMENTED_MSG("Unimplemented operation {}",
+ static_cast<u32>(regs.fence_action.op.Value()));
+ }
+}
+
+void GPU::ProcessWaitForInterruptMethod() {
+ // TODO(bunnei) ImplementMe
+ LOG_WARNING(HW_GPU, "(STUBBED) called");
+}
+
void GPU::ProcessSemaphoreTriggerMethod() {
const auto semaphoreOperationMask = 0xF;
const auto op =
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index b8c613b11..5444b49f3 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -263,6 +263,24 @@ public:
return use_nvdec;
}
+ enum class FenceOperation : u32 {
+ Acquire = 0,
+ Increment = 1,
+ };
+
+ union FenceAction {
+ u32 raw;
+ BitField<0, 1, FenceOperation> op;
+ BitField<8, 24, u32> syncpoint_id;
+
+ static constexpr CommandHeader Build(FenceOperation op, u32 syncpoint_id) {
+ FenceAction result{};
+ result.op.Assign(op);
+ result.syncpoint_id.Assign(syncpoint_id);
+ return {result.raw};
+ }
+ };
+
struct Regs {
static constexpr size_t NUM_REGS = 0x40;
@@ -291,10 +309,7 @@ public:
u32 semaphore_acquire;
u32 semaphore_release;
u32 fence_value;
- union {
- BitField<4, 4, u32> operation;
- BitField<8, 8, u32> id;
- } fence_action;
+ FenceAction fence_action;
INSERT_UNION_PADDING_WORDS(0xE2);
// Puller state
@@ -342,6 +357,8 @@ protected:
private:
void ProcessBindMethod(const MethodCall& method_call);
+ void ProcessFenceActionMethod();
+ void ProcessWaitForInterruptMethod();
void ProcessSemaphoreTriggerMethod();
void ProcessSemaphoreRelease();
void ProcessSemaphoreAcquire();