From eb67a45ca82bc01ac843c853fd3c17f2a90e0250 Mon Sep 17 00:00:00 2001 From: ameerj Date: Mon, 26 Oct 2020 23:07:36 -0400 Subject: video_core: NVDEC Implementation This commit aims to implement the NVDEC (Nvidia Decoder) functionality, with video frame decoding being handled by the FFmpeg library. The process begins with Ioctl commands being sent to the NVDEC and VIC (Video Image Composer) emulated devices. These allocate the necessary GPU buffers for the frame data, along with providing information on the incoming video data. A Submit command then signals the GPU to process and decode the frame data. To decode the frame, the respective codec's header must be manually composed from the information provided by NVDEC, then sent with the raw frame data to the ffmpeg library. Currently, H264 and VP9 are supported, with VP9 having some minor artifacting issues related mainly to the reference frame composition in its uncompressed header. Async GPU is not properly implemented at the moment. Co-Authored-By: David <25727384+ogniK5377@users.noreply.github.com> --- src/core/CMakeLists.txt | 2 + .../hle/service/nvdrv/devices/nvhost_nvdec.cpp | 100 +++------ src/core/hle/service/nvdrv/devices/nvhost_nvdec.h | 71 +------ .../service/nvdrv/devices/nvhost_nvdec_common.cpp | 234 +++++++++++++++++++++ .../service/nvdrv/devices/nvhost_nvdec_common.h | 168 +++++++++++++++ src/core/hle/service/nvdrv/devices/nvhost_vic.cpp | 90 ++------ src/core/hle/service/nvdrv/devices/nvhost_vic.h | 88 ++------ src/core/hle/service/nvdrv/devices/nvmap.h | 1 + src/core/hle/service/nvdrv/nvdrv.cpp | 4 +- src/core/settings.cpp | 2 + src/core/settings.h | 1 + src/core/telemetry_session.cpp | 2 + 12 files changed, 475 insertions(+), 288 deletions(-) create mode 100644 src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp create mode 100644 src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h (limited to 'src/core') diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index db1c9fdef..e0f207f3e 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -439,6 +439,8 @@ add_library(core STATIC hle/service/nvdrv/devices/nvhost_gpu.h hle/service/nvdrv/devices/nvhost_nvdec.cpp hle/service/nvdrv/devices/nvhost_nvdec.h + hle/service/nvdrv/devices/nvhost_nvdec_common.cpp + hle/service/nvdrv/devices/nvhost_nvdec_common.h hle/service/nvdrv/devices/nvhost_nvjpg.cpp hle/service/nvdrv/devices/nvhost_nvjpg.h hle/service/nvdrv/devices/nvhost_vic.cpp diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp index fcb612864..b6df48360 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp @@ -2,15 +2,17 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include - #include "common/assert.h" #include "common/logging/log.h" +#include "core/core.h" #include "core/hle/service/nvdrv/devices/nvhost_nvdec.h" +#include "video_core/memory_manager.h" +#include "video_core/renderer_base.h" namespace Service::Nvidia::Devices { -nvhost_nvdec::nvhost_nvdec(Core::System& system) : nvdevice(system) {} +nvhost_nvdec::nvhost_nvdec(Core::System& system, std::shared_ptr nvmap_dev) + : nvhost_nvdec_common(system, std::move(nvmap_dev)) {} nvhost_nvdec::~nvhost_nvdec() = default; u32 nvhost_nvdec::ioctl(Ioctl command, const std::vector& input, const std::vector& input2, @@ -21,7 +23,7 @@ u32 nvhost_nvdec::ioctl(Ioctl command, const std::vector& input, const std:: switch (static_cast(command.raw)) { case IoctlCommand::IocSetNVMAPfdCommand: - return SetNVMAPfd(input, output); + return SetNVMAPfd(input); case IoctlCommand::IocSubmit: return Submit(input, output); case IoctlCommand::IocGetSyncpoint: @@ -29,79 +31,29 @@ u32 nvhost_nvdec::ioctl(Ioctl command, const std::vector& input, const std:: case IoctlCommand::IocGetWaitbase: return GetWaitbase(input, output); case IoctlCommand::IocMapBuffer: - return MapBuffer(input, output); + case IoctlCommand::IocMapBuffer2: + case IoctlCommand::IocMapBuffer3: case IoctlCommand::IocMapBufferEx: - return MapBufferEx(input, output); - case IoctlCommand::IocUnmapBufferEx: - return UnmapBufferEx(input, output); + return MapBuffer(input, output); + case IoctlCommand::IocUnmapBufferEx: { + // This command is sent when the video stream has ended, flush all video contexts + // This is usually sent in the folowing order: vic, nvdec, vic. + // Inform the GPU to clear any remaining nvdec buffers when this is detected. + LOG_INFO(Service_NVDRV, "NVDEC video stream ended"); + Tegra::ChCommandHeaderList cmdlist(1); + cmdlist[0] = Tegra::ChCommandHeader{0xDEADB33F}; + system.GPU().PushCommandBuffer(cmdlist); + [[fallthrough]]; // fallthrough to unmap buffers + }; + case IoctlCommand::IocUnmapBuffer: + case IoctlCommand::IocUnmapBuffer2: + case IoctlCommand::IocUnmapBuffer3: + return UnmapBuffer(input, output); + case IoctlCommand::IocSetSubmitTimeout: + return SetSubmitTimeout(input, output); } - UNIMPLEMENTED_MSG("Unimplemented ioctl"); - return 0; -} - -u32 nvhost_nvdec::SetNVMAPfd(const std::vector& input, std::vector& output) { - IoctlSetNvmapFD params{}; - std::memcpy(¶ms, input.data(), sizeof(IoctlSetNvmapFD)); - LOG_DEBUG(Service_NVDRV, "called, fd={}", params.nvmap_fd); - - nvmap_fd = params.nvmap_fd; - return 0; -} - -u32 nvhost_nvdec::Submit(const std::vector& input, std::vector& output) { - IoctlSubmit params{}; - std::memcpy(¶ms, input.data(), sizeof(IoctlSubmit)); - LOG_WARNING(Service_NVDRV, "(STUBBED) called"); - std::memcpy(output.data(), ¶ms, sizeof(IoctlSubmit)); - return 0; -} - -u32 nvhost_nvdec::GetSyncpoint(const std::vector& input, std::vector& output) { - IoctlGetSyncpoint params{}; - std::memcpy(¶ms, input.data(), sizeof(IoctlGetSyncpoint)); - LOG_INFO(Service_NVDRV, "called, unknown=0x{:X}", params.unknown); - params.value = 0; // Seems to be hard coded at 0 - std::memcpy(output.data(), ¶ms, sizeof(IoctlGetSyncpoint)); - return 0; -} - -u32 nvhost_nvdec::GetWaitbase(const std::vector& input, std::vector& output) { - IoctlGetWaitbase params{}; - std::memcpy(¶ms, input.data(), sizeof(IoctlGetWaitbase)); - LOG_INFO(Service_NVDRV, "called, unknown=0x{:X}", params.unknown); - params.value = 0; // Seems to be hard coded at 0 - std::memcpy(output.data(), ¶ms, sizeof(IoctlGetWaitbase)); - return 0; -} - -u32 nvhost_nvdec::MapBuffer(const std::vector& input, std::vector& output) { - IoctlMapBuffer params{}; - std::memcpy(¶ms, input.data(), sizeof(IoctlMapBuffer)); - LOG_WARNING(Service_NVDRV, "(STUBBED) called with address={:08X}{:08X}", params.address_2, - params.address_1); - params.address_1 = 0; - params.address_2 = 0; - std::memcpy(output.data(), ¶ms, sizeof(IoctlMapBuffer)); - return 0; -} - -u32 nvhost_nvdec::MapBufferEx(const std::vector& input, std::vector& output) { - IoctlMapBufferEx params{}; - std::memcpy(¶ms, input.data(), sizeof(IoctlMapBufferEx)); - LOG_WARNING(Service_NVDRV, "(STUBBED) called with address={:08X}{:08X}", params.address_2, - params.address_1); - params.address_1 = 0; - params.address_2 = 0; - std::memcpy(output.data(), ¶ms, sizeof(IoctlMapBufferEx)); - return 0; -} - -u32 nvhost_nvdec::UnmapBufferEx(const std::vector& input, std::vector& output) { - IoctlUnmapBufferEx params{}; - std::memcpy(¶ms, input.data(), sizeof(IoctlUnmapBufferEx)); - LOG_WARNING(Service_NVDRV, "(STUBBED) called"); - std::memcpy(output.data(), ¶ms, sizeof(IoctlUnmapBufferEx)); + UNIMPLEMENTED_MSG("Unimplemented ioctl 0x{:X}", command.raw); return 0; } diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h index 4332db118..102777ddd 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h +++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h @@ -4,16 +4,14 @@ #pragma once -#include -#include "common/common_types.h" -#include "common/swap.h" -#include "core/hle/service/nvdrv/devices/nvdevice.h" +#include +#include "core/hle/service/nvdrv/devices/nvhost_nvdec_common.h" namespace Service::Nvidia::Devices { -class nvhost_nvdec final : public nvdevice { +class nvhost_nvdec final : public nvhost_nvdec_common { public: - explicit nvhost_nvdec(Core::System& system); + explicit nvhost_nvdec(Core::System& system, std::shared_ptr nvmap_dev); ~nvhost_nvdec() override; u32 ioctl(Ioctl command, const std::vector& input, const std::vector& input2, @@ -27,62 +25,15 @@ private: IocGetSyncpoint = 0xC0080002, IocGetWaitbase = 0xC0080003, IocMapBuffer = 0xC01C0009, + IocMapBuffer2 = 0xC16C0009, + IocMapBuffer3 = 0xC15C0009, IocMapBufferEx = 0xC0A40009, - IocUnmapBufferEx = 0xC0A4000A, + IocUnmapBuffer = 0xC0A4000A, + IocUnmapBuffer2 = 0xC16C000A, + IocUnmapBufferEx = 0xC01C000A, + IocUnmapBuffer3 = 0xC15C000A, + IocSetSubmitTimeout = 0x40040007, }; - - struct IoctlSetNvmapFD { - u32_le nvmap_fd; - }; - static_assert(sizeof(IoctlSetNvmapFD) == 0x4, "IoctlSetNvmapFD is incorrect size"); - - struct IoctlSubmit { - INSERT_PADDING_BYTES(0x40); // TODO(DarkLordZach): RE this structure - }; - static_assert(sizeof(IoctlSubmit) == 0x40, "IoctlSubmit has incorrect size"); - - struct IoctlGetSyncpoint { - u32 unknown; // seems to be ignored? Nintendo added this - u32 value; - }; - static_assert(sizeof(IoctlGetSyncpoint) == 0x08, "IoctlGetSyncpoint has incorrect size"); - - struct IoctlGetWaitbase { - u32 unknown; // seems to be ignored? Nintendo added this - u32 value; - }; - static_assert(sizeof(IoctlGetWaitbase) == 0x08, "IoctlGetWaitbase has incorrect size"); - - struct IoctlMapBuffer { - u32 unknown; - u32 address_1; - u32 address_2; - INSERT_PADDING_BYTES(0x10); // TODO(DarkLordZach): RE this structure - }; - static_assert(sizeof(IoctlMapBuffer) == 0x1C, "IoctlMapBuffer is incorrect size"); - - struct IoctlMapBufferEx { - u32 unknown; - u32 address_1; - u32 address_2; - INSERT_PADDING_BYTES(0x98); // TODO(DarkLordZach): RE this structure - }; - static_assert(sizeof(IoctlMapBufferEx) == 0xA4, "IoctlMapBufferEx has incorrect size"); - - struct IoctlUnmapBufferEx { - INSERT_PADDING_BYTES(0xA4); // TODO(DarkLordZach): RE this structure - }; - static_assert(sizeof(IoctlUnmapBufferEx) == 0xA4, "IoctlUnmapBufferEx has incorrect size"); - - u32_le nvmap_fd{}; - - u32 SetNVMAPfd(const std::vector& input, std::vector& output); - u32 Submit(const std::vector& input, std::vector& output); - u32 GetSyncpoint(const std::vector& input, std::vector& output); - u32 GetWaitbase(const std::vector& input, std::vector& output); - u32 MapBuffer(const std::vector& input, std::vector& output); - u32 MapBufferEx(const std::vector& input, std::vector& output); - u32 UnmapBufferEx(const std::vector& input, std::vector& output); }; } // namespace Service::Nvidia::Devices diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp new file mode 100644 index 000000000..85792495f --- /dev/null +++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp @@ -0,0 +1,234 @@ +// Copyright 2020 yuzu emulator team +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include + +#include "common/assert.h" +#include "common/common_types.h" +#include "common/logging/log.h" +#include "core/core.h" +#include "core/hle/service/nvdrv/devices/nvhost_nvdec_common.h" +#include "core/hle/service/nvdrv/devices/nvmap.h" +#include "core/memory.h" +#include "video_core/memory_manager.h" +#include "video_core/renderer_base.h" + +namespace Service::Nvidia::Devices { + +namespace { +// Splice vectors will copy count amount of type T from the input vector into the dst vector. +template +std::size_t SpliceVectors(const std::vector& input, std::vector& dst, std::size_t count, + std::size_t offset) { + std::memcpy(dst.data(), input.data() + offset, count * sizeof(T)); + offset += count * sizeof(T); + return offset; +} + +// Write vectors will write data to the output buffer +template +std::size_t WriteVectors(std::vector& dst, const std::vector& src, std::size_t offset) { + std::memcpy(dst.data() + offset, src.data(), src.size() * sizeof(T)); + offset += src.size() * sizeof(T); + return offset; +} +} // Anonymous namespace + +namespace NvErrCodes { +constexpr u32 Success{}; +constexpr u32 OutOfMemory{static_cast(-12)}; +constexpr u32 InvalidInput{static_cast(-22)}; +} // namespace NvErrCodes + +nvhost_nvdec_common::nvhost_nvdec_common(Core::System& system, std::shared_ptr nvmap_dev) + : nvdevice(system), nvmap_dev(std::move(nvmap_dev)) {} +nvhost_nvdec_common::~nvhost_nvdec_common() = default; + +u32 nvhost_nvdec_common::SetNVMAPfd(const std::vector& input) { + IoctlSetNvmapFD params{}; + std::memcpy(¶ms, input.data(), sizeof(IoctlSetNvmapFD)); + LOG_DEBUG(Service_NVDRV, "called, fd={}", params.nvmap_fd); + + nvmap_fd = params.nvmap_fd; + return 0; +} + +u32 nvhost_nvdec_common::Submit(const std::vector& input, std::vector& output) { + IoctlSubmit params{}; + std::memcpy(¶ms, input.data(), sizeof(IoctlSubmit)); + LOG_DEBUG(Service_NVDRV, "called NVDEC Submit, cmd_buffer_count={}", params.cmd_buffer_count); + + // Instantiate param buffers + std::size_t offset = sizeof(IoctlSubmit); + std::vector command_buffers(params.cmd_buffer_count); + std::vector relocs(params.relocation_count); + std::vector reloc_shifts(params.relocation_count); + std::vector syncpt_increments(params.syncpoint_count); + std::vector wait_checks(params.syncpoint_count); + std::vector fences(params.fence_count); + + // Splice input into their respective buffers + offset = SpliceVectors(input, command_buffers, params.cmd_buffer_count, offset); + offset = SpliceVectors(input, relocs, params.relocation_count, offset); + offset = SpliceVectors(input, reloc_shifts, params.relocation_count, offset); + offset = SpliceVectors(input, syncpt_increments, params.syncpoint_count, offset); + offset = SpliceVectors(input, wait_checks, params.syncpoint_count, offset); + offset = SpliceVectors(input, fences, params.fence_count, offset); + + // TODO(ameerj): For async gpu, utilize fences for syncpoint 'max' increment + + auto& gpu = system.GPU(); + + for (const auto& cmd_buffer : command_buffers) { + auto object = nvmap_dev->GetObject(cmd_buffer.memory_id); + ASSERT_OR_EXECUTE(object, return NvErrCodes::InvalidInput;); + const auto map = FindBufferMap(object->dma_map_addr); + if (!map) { + LOG_ERROR(Service_NVDRV, "Tried to submit an invalid offset 0x{:X} dma 0x{:X}", + object->addr, object->dma_map_addr); + return 0; + } + Tegra::ChCommandHeaderList cmdlist(cmd_buffer.word_count); + gpu.MemoryManager().ReadBlock(map->StartAddr() + cmd_buffer.offset, cmdlist.data(), + cmdlist.size() * sizeof(u32)); + gpu.PushCommandBuffer(cmdlist); + } + + std::memcpy(output.data(), ¶ms, sizeof(IoctlSubmit)); + // Some games expect command_buffers to be written back + offset = sizeof(IoctlSubmit); + offset = WriteVectors(output, command_buffers, offset); + offset = WriteVectors(output, relocs, offset); + offset = WriteVectors(output, reloc_shifts, offset); + offset = WriteVectors(output, syncpt_increments, offset); + offset = WriteVectors(output, wait_checks, offset); + + return NvErrCodes::Success; +} + +u32 nvhost_nvdec_common::GetSyncpoint(const std::vector& input, std::vector& output) { + IoctlGetSyncpoint params{}; + std::memcpy(¶ms, input.data(), sizeof(IoctlGetSyncpoint)); + LOG_DEBUG(Service_NVDRV, "called GetSyncpoint, id={}", params.param); + + // We found that implementing this causes deadlocks with async gpu, along with degraded + // performance. TODO: RE the nvdec async implementation + params.value = 0; + std::memcpy(output.data(), ¶ms, sizeof(IoctlGetSyncpoint)); + + return NvErrCodes::Success; +} + +u32 nvhost_nvdec_common::GetWaitbase(const std::vector& input, std::vector& output) { + IoctlGetWaitbase params{}; + std::memcpy(¶ms, input.data(), sizeof(IoctlGetWaitbase)); + params.value = 0; // Seems to be hard coded at 0 + std::memcpy(output.data(), ¶ms, sizeof(IoctlGetWaitbase)); + return 0; +} + +u32 nvhost_nvdec_common::MapBuffer(const std::vector& input, std::vector& output) { + IoctlMapBuffer params{}; + std::memcpy(¶ms, input.data(), sizeof(IoctlMapBuffer)); + std::vector cmd_buffer_handles(params.num_entries); + + SpliceVectors(input, cmd_buffer_handles, params.num_entries, sizeof(IoctlMapBuffer)); + + auto& gpu = system.GPU(); + + for (auto& cmf_buff : cmd_buffer_handles) { + auto object{nvmap_dev->GetObject(cmf_buff.map_handle)}; + if (!object) { + LOG_ERROR(Service_NVDRV, "invalid cmd_buffer nvmap_handle={:X}", cmf_buff.map_handle); + std::memcpy(output.data(), ¶ms, output.size()); + return NvErrCodes::InvalidInput; + } + if (object->dma_map_addr == 0) { + // NVDEC and VIC memory is in the 32-bit address space + // MapAllocate32 will attempt to map a lower 32-bit value in the shared gpu memory space + const GPUVAddr low_addr = gpu.MemoryManager().MapAllocate32(object->addr, object->size); + object->dma_map_addr = static_cast(low_addr); + // Ensure that the dma_map_addr is indeed in the lower 32-bit address space. + ASSERT(object->dma_map_addr == low_addr); + } + if (!object->dma_map_addr) { + LOG_ERROR(Service_NVDRV, "failed to map size={}", object->size); + } else { + cmf_buff.map_address = object->dma_map_addr; + AddBufferMap(object->dma_map_addr, object->size, object->addr, + object->status == nvmap::Object::Status::Allocated); + } + } + std::memcpy(output.data(), ¶ms, sizeof(IoctlMapBuffer)); + std::memcpy(output.data() + sizeof(IoctlMapBuffer), cmd_buffer_handles.data(), + cmd_buffer_handles.size() * sizeof(MapBufferEntry)); + + return NvErrCodes::Success; +} + +u32 nvhost_nvdec_common::UnmapBuffer(const std::vector& input, std::vector& output) { + IoctlMapBuffer params{}; + std::memcpy(¶ms, input.data(), sizeof(IoctlMapBuffer)); + std::vector cmd_buffer_handles(params.num_entries); + SpliceVectors(input, cmd_buffer_handles, params.num_entries, sizeof(IoctlMapBuffer)); + + auto& gpu = system.GPU(); + + for (auto& cmf_buff : cmd_buffer_handles) { + const auto object{nvmap_dev->GetObject(cmf_buff.map_handle)}; + if (!object) { + LOG_ERROR(Service_NVDRV, "invalid cmd_buffer nvmap_handle={:X}", cmf_buff.map_handle); + std::memcpy(output.data(), ¶ms, output.size()); + return NvErrCodes::InvalidInput; + } + if (const auto size{RemoveBufferMap(object->dma_map_addr)}; size) { + gpu.MemoryManager().Unmap(object->dma_map_addr, *size); + } else { + // This occurs quite frequently, however does not seem to impact functionality + LOG_DEBUG(Service_NVDRV, "invalid offset=0x{:X} dma=0x{:X}", object->addr, + object->dma_map_addr); + } + object->dma_map_addr = 0; + } + std::memset(output.data(), 0, output.size()); + return NvErrCodes::Success; +} + +u32 nvhost_nvdec_common::SetSubmitTimeout(const std::vector& input, std::vector& output) { + std::memcpy(&submit_timeout, input.data(), input.size()); + LOG_WARNING(Service_NVDRV, "(STUBBED) called"); + return NvErrCodes::Success; +} + +std::optional nvhost_nvdec_common::FindBufferMap( + GPUVAddr gpu_addr) const { + const auto it = std::find_if( + buffer_mappings.begin(), buffer_mappings.upper_bound(gpu_addr), [&](const auto& entry) { + return (gpu_addr >= entry.second.StartAddr() && gpu_addr < entry.second.EndAddr()); + }); + + ASSERT(it != buffer_mappings.end()); + return it->second; +} + +void nvhost_nvdec_common::AddBufferMap(GPUVAddr gpu_addr, std::size_t size, VAddr cpu_addr, + bool is_allocated) { + buffer_mappings.insert_or_assign(gpu_addr, BufferMap{gpu_addr, size, cpu_addr, is_allocated}); +} + +std::optional nvhost_nvdec_common::RemoveBufferMap(GPUVAddr gpu_addr) { + const auto iter{buffer_mappings.find(gpu_addr)}; + if (iter == buffer_mappings.end()) { + return std::nullopt; + } + std::size_t size = 0; + if (iter->second.IsAllocated()) { + size = iter->second.Size(); + } + buffer_mappings.erase(iter); + return size; +} + +} // namespace Service::Nvidia::Devices diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h new file mode 100644 index 000000000..c249c5349 --- /dev/null +++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h @@ -0,0 +1,168 @@ +// Copyright 2020 yuzu emulator team +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include "common/common_types.h" +#include "common/swap.h" +#include "core/hle/service/nvdrv/devices/nvdevice.h" + +namespace Service::Nvidia::Devices { +class nvmap; + +class nvhost_nvdec_common : public nvdevice { +public: + explicit nvhost_nvdec_common(Core::System& system, std::shared_ptr nvmap_dev); + ~nvhost_nvdec_common() override; + + virtual u32 ioctl(Ioctl command, const std::vector& input, const std::vector& input2, + std::vector& output, std::vector& output2, IoctlCtrl& ctrl, + IoctlVersion version) = 0; + +protected: + class BufferMap final { + public: + constexpr BufferMap() = default; + + constexpr BufferMap(GPUVAddr start_addr, std::size_t size) + : start_addr{start_addr}, end_addr{start_addr + size} {} + + constexpr BufferMap(GPUVAddr start_addr, std::size_t size, VAddr cpu_addr, + bool is_allocated) + : start_addr{start_addr}, end_addr{start_addr + size}, cpu_addr{cpu_addr}, + is_allocated{is_allocated} {} + + constexpr VAddr StartAddr() const { + return start_addr; + } + + constexpr VAddr EndAddr() const { + return end_addr; + } + + constexpr std::size_t Size() const { + return end_addr - start_addr; + } + + constexpr VAddr CpuAddr() const { + return cpu_addr; + } + + constexpr bool IsAllocated() const { + return is_allocated; + } + + private: + GPUVAddr start_addr{}; + GPUVAddr end_addr{}; + VAddr cpu_addr{}; + bool is_allocated{}; + }; + + struct IoctlSetNvmapFD { + u32_le nvmap_fd; + }; + static_assert(sizeof(IoctlSetNvmapFD) == 4, "IoctlSetNvmapFD is incorrect size"); + + struct IoctlSubmitCommandBuffer { + u32_le id; + u32_le offset; + u32_le count; + }; + static_assert(sizeof(IoctlSubmitCommandBuffer) == 0xC, + "IoctlSubmitCommandBuffer is incorrect size"); + struct IoctlSubmit { + u32_le cmd_buffer_count; + u32_le relocation_count; + u32_le syncpoint_count; + u32_le fence_count; + }; + static_assert(sizeof(IoctlSubmit) == 0x10, "IoctlSubmit has incorrect size"); + + struct CommandBuffer { + s32 memory_id; + u32 offset; + s32 word_count; + }; + static_assert(sizeof(CommandBuffer) == 0xC, "CommandBuffer has incorrect size"); + + struct Reloc { + s32 cmdbuffer_memory; + s32 cmdbuffer_offset; + s32 target; + s32 target_offset; + }; + static_assert(sizeof(Reloc) == 0x10, "CommandBuffer has incorrect size"); + + struct SyncptIncr { + u32 id; + u32 increments; + }; + static_assert(sizeof(SyncptIncr) == 0x8, "CommandBuffer has incorrect size"); + + struct Fence { + u32 id; + u32 value; + }; + static_assert(sizeof(Fence) == 0x8, "CommandBuffer has incorrect size"); + + struct IoctlGetSyncpoint { + // Input + u32_le param; + // Output + u32_le value; + }; + static_assert(sizeof(IoctlGetSyncpoint) == 8, "IocGetIdParams has wrong size"); + + struct IoctlGetWaitbase { + u32_le unknown; // seems to be ignored? Nintendo added this + u32_le value; + }; + static_assert(sizeof(IoctlGetWaitbase) == 0x8, "IoctlGetWaitbase is incorrect size"); + + struct IoctlMapBuffer { + u32_le num_entries; + u32_le data_address; // Ignored by the driver. + u32_le attach_host_ch_das; + }; + static_assert(sizeof(IoctlMapBuffer) == 0x0C, "IoctlMapBuffer is incorrect size"); + + struct IocGetIdParams { + // Input + u32_le param; + // Output + u32_le value; + }; + static_assert(sizeof(IocGetIdParams) == 8, "IocGetIdParams has wrong size"); + + // Used for mapping and unmapping command buffers + struct MapBufferEntry { + u32_le map_handle; + u32_le map_address; + }; + static_assert(sizeof(IoctlMapBuffer) == 0x0C, "IoctlMapBuffer is incorrect size"); + + /// Ioctl command implementations + u32 SetNVMAPfd(const std::vector& input); + u32 Submit(const std::vector& input, std::vector& output); + u32 GetSyncpoint(const std::vector& input, std::vector& output); + u32 GetWaitbase(const std::vector& input, std::vector& output); + u32 MapBuffer(const std::vector& input, std::vector& output); + u32 UnmapBuffer(const std::vector& input, std::vector& output); + u32 SetSubmitTimeout(const std::vector& input, std::vector& output); + + std::optional FindBufferMap(GPUVAddr gpu_addr) const; + void AddBufferMap(GPUVAddr gpu_addr, std::size_t size, VAddr cpu_addr, bool is_allocated); + std::optional RemoveBufferMap(GPUVAddr gpu_addr); + + u32_le nvmap_fd{}; + u32_le submit_timeout{}; + std::shared_ptr nvmap_dev; + + // This is expected to be ordered, therefore we must use a map, not unordered_map + std::map buffer_mappings; +}; +}; // namespace Service::Nvidia::Devices diff --git a/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp b/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp index 9da19ad56..60db54d00 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp @@ -2,15 +2,17 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include - #include "common/assert.h" #include "common/logging/log.h" +#include "core/core.h" #include "core/hle/service/nvdrv/devices/nvhost_vic.h" +#include "video_core/memory_manager.h" +#include "video_core/renderer_base.h" namespace Service::Nvidia::Devices { +nvhost_vic::nvhost_vic(Core::System& system, std::shared_ptr nvmap_dev) + : nvhost_nvdec_common(system, std::move(nvmap_dev)) {} -nvhost_vic::nvhost_vic(Core::System& system) : nvdevice(system) {} nvhost_vic::~nvhost_vic() = default; u32 nvhost_vic::ioctl(Ioctl command, const std::vector& input, const std::vector& input2, @@ -21,7 +23,7 @@ u32 nvhost_vic::ioctl(Ioctl command, const std::vector& input, const std::ve switch (static_cast(command.raw)) { case IoctlCommand::IocSetNVMAPfdCommand: - return SetNVMAPfd(input, output); + return SetNVMAPfd(input); case IoctlCommand::IocSubmit: return Submit(input, output); case IoctlCommand::IocGetSyncpoint: @@ -29,83 +31,19 @@ u32 nvhost_vic::ioctl(Ioctl command, const std::vector& input, const std::ve case IoctlCommand::IocGetWaitbase: return GetWaitbase(input, output); case IoctlCommand::IocMapBuffer: - return MapBuffer(input, output); + case IoctlCommand::IocMapBuffer2: + case IoctlCommand::IocMapBuffer3: + case IoctlCommand::IocMapBuffer4: case IoctlCommand::IocMapBufferEx: return MapBuffer(input, output); + case IoctlCommand::IocUnmapBuffer: + case IoctlCommand::IocUnmapBuffer2: + case IoctlCommand::IocUnmapBuffer3: case IoctlCommand::IocUnmapBufferEx: - return UnmapBufferEx(input, output); + return UnmapBuffer(input, output); } - UNIMPLEMENTED_MSG("Unimplemented ioctl"); - return 0; -} - -u32 nvhost_vic::SetNVMAPfd(const std::vector& input, std::vector& output) { - IoctlSetNvmapFD params{}; - std::memcpy(¶ms, input.data(), sizeof(IoctlSetNvmapFD)); - LOG_DEBUG(Service_NVDRV, "called, fd={}", params.nvmap_fd); - - nvmap_fd = params.nvmap_fd; - return 0; -} - -u32 nvhost_vic::Submit(const std::vector& input, std::vector& output) { - IoctlSubmit params{}; - std::memcpy(¶ms, input.data(), sizeof(IoctlSubmit)); - LOG_WARNING(Service_NVDRV, "(STUBBED) called"); - - // Workaround for Luigi's Mansion 3, as nvhost_vic is not implemented for asynch GPU - params.command_buffer = {}; - - std::memcpy(output.data(), ¶ms, sizeof(IoctlSubmit)); - return 0; -} - -u32 nvhost_vic::GetSyncpoint(const std::vector& input, std::vector& output) { - IoctlGetSyncpoint params{}; - std::memcpy(¶ms, input.data(), sizeof(IoctlGetSyncpoint)); - LOG_INFO(Service_NVDRV, "called, unknown=0x{:X}", params.unknown); - params.value = 0; // Seems to be hard coded at 0 - std::memcpy(output.data(), ¶ms, sizeof(IoctlGetSyncpoint)); - return 0; -} - -u32 nvhost_vic::GetWaitbase(const std::vector& input, std::vector& output) { - IoctlGetWaitbase params{}; - std::memcpy(¶ms, input.data(), sizeof(IoctlGetWaitbase)); - LOG_INFO(Service_NVDRV, "called, unknown=0x{:X}", params.unknown); - params.value = 0; // Seems to be hard coded at 0 - std::memcpy(output.data(), ¶ms, sizeof(IoctlGetWaitbase)); - return 0; -} - -u32 nvhost_vic::MapBuffer(const std::vector& input, std::vector& output) { - IoctlMapBuffer params{}; - std::memcpy(¶ms, input.data(), sizeof(IoctlMapBuffer)); - LOG_WARNING(Service_NVDRV, "(STUBBED) called with address={:08X}{:08X}", params.address_2, - params.address_1); - params.address_1 = 0; - params.address_2 = 0; - std::memcpy(output.data(), ¶ms, sizeof(IoctlMapBuffer)); - return 0; -} - -u32 nvhost_vic::MapBufferEx(const std::vector& input, std::vector& output) { - IoctlMapBufferEx params{}; - std::memcpy(¶ms, input.data(), sizeof(IoctlMapBufferEx)); - LOG_WARNING(Service_NVDRV, "(STUBBED) called with address={:08X}{:08X}", params.address_2, - params.address_1); - params.address_1 = 0; - params.address_2 = 0; - std::memcpy(output.data(), ¶ms, sizeof(IoctlMapBufferEx)); - return 0; -} - -u32 nvhost_vic::UnmapBufferEx(const std::vector& input, std::vector& output) { - IoctlUnmapBufferEx params{}; - std::memcpy(¶ms, input.data(), sizeof(IoctlUnmapBufferEx)); - LOG_WARNING(Service_NVDRV, "(STUBBED) called"); - std::memcpy(output.data(), ¶ms, sizeof(IoctlUnmapBufferEx)); + UNIMPLEMENTED_MSG("Unimplemented ioctl 0x{:X}", command.raw); return 0; } diff --git a/src/core/hle/service/nvdrv/devices/nvhost_vic.h b/src/core/hle/service/nvdrv/devices/nvhost_vic.h index a7bb7bbd5..f975b190c 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_vic.h +++ b/src/core/hle/service/nvdrv/devices/nvhost_vic.h @@ -4,19 +4,15 @@ #pragma once -#include -#include -#include "common/common_types.h" -#include "common/swap.h" -#include "core/hle/service/nvdrv/devices/nvdevice.h" +#include "core/hle/service/nvdrv/devices/nvhost_nvdec_common.h" namespace Service::Nvidia::Devices { +class nvmap; -class nvhost_vic final : public nvdevice { +class nvhost_vic final : public nvhost_nvdec_common { public: - explicit nvhost_vic(Core::System& system); - ~nvhost_vic() override; - + explicit nvhost_vic(Core::System& system, std::shared_ptr nvmap_dev); + ~nvhost_vic(); u32 ioctl(Ioctl command, const std::vector& input, const std::vector& input2, std::vector& output, std::vector& output2, IoctlCtrl& ctrl, IoctlVersion version) override; @@ -28,74 +24,14 @@ private: IocGetSyncpoint = 0xC0080002, IocGetWaitbase = 0xC0080003, IocMapBuffer = 0xC01C0009, + IocMapBuffer2 = 0xC0340009, + IocMapBuffer3 = 0xC0140009, + IocMapBuffer4 = 0xC00C0009, IocMapBufferEx = 0xC03C0009, - IocUnmapBufferEx = 0xC03C000A, - }; - - struct IoctlSetNvmapFD { - u32_le nvmap_fd; - }; - static_assert(sizeof(IoctlSetNvmapFD) == 4, "IoctlSetNvmapFD is incorrect size"); - - struct IoctlSubmitCommandBuffer { - u32 id; - u32 offset; - u32 count; - }; - static_assert(sizeof(IoctlSubmitCommandBuffer) == 0xC, - "IoctlSubmitCommandBuffer is incorrect size"); - - struct IoctlSubmit { - u32 command_buffer_count; - u32 relocations_count; - u32 syncpt_count; - u32 wait_count; - std::array command_buffer; - }; - static_assert(sizeof(IoctlSubmit) == 0x40, "IoctlSubmit is incorrect size"); - - struct IoctlGetSyncpoint { - u32 unknown; // seems to be ignored? Nintendo added this - u32 value; - }; - static_assert(sizeof(IoctlGetSyncpoint) == 0x8, "IoctlGetSyncpoint is incorrect size"); - - struct IoctlGetWaitbase { - u32 unknown; // seems to be ignored? Nintendo added this - u32 value; - }; - static_assert(sizeof(IoctlGetWaitbase) == 0x8, "IoctlGetWaitbase is incorrect size"); - - struct IoctlMapBuffer { - u32 unknown; - u32 address_1; - u32 address_2; - INSERT_PADDING_BYTES(0x10); // TODO(DarkLordZach): RE this structure - }; - static_assert(sizeof(IoctlMapBuffer) == 0x1C, "IoctlMapBuffer is incorrect size"); - - struct IoctlMapBufferEx { - u32 unknown; - u32 address_1; - u32 address_2; - INSERT_PADDING_BYTES(0x30); // TODO(DarkLordZach): RE this structure + IocUnmapBuffer = 0xC03C000A, + IocUnmapBuffer2 = 0xC034000A, + IocUnmapBuffer3 = 0xC00C000A, + IocUnmapBufferEx = 0xC01C000A, }; - static_assert(sizeof(IoctlMapBufferEx) == 0x3C, "IoctlMapBufferEx is incorrect size"); - - struct IoctlUnmapBufferEx { - INSERT_PADDING_BYTES(0x3C); // TODO(DarkLordZach): RE this structure - }; - static_assert(sizeof(IoctlUnmapBufferEx) == 0x3C, "IoctlUnmapBufferEx is incorrect size"); - - u32_le nvmap_fd{}; - - u32 SetNVMAPfd(const std::vector& input, std::vector& output); - u32 Submit(const std::vector& input, std::vector& output); - u32 GetSyncpoint(const std::vector& input, std::vector& output); - u32 GetWaitbase(const std::vector& input, std::vector& output); - u32 MapBuffer(const std::vector& input, std::vector& output); - u32 MapBufferEx(const std::vector& input, std::vector& output); - u32 UnmapBufferEx(const std::vector& input, std::vector& output); }; - } // namespace Service::Nvidia::Devices diff --git a/src/core/hle/service/nvdrv/devices/nvmap.h b/src/core/hle/service/nvdrv/devices/nvmap.h index 84624be00..04b9ef540 100644 --- a/src/core/hle/service/nvdrv/devices/nvmap.h +++ b/src/core/hle/service/nvdrv/devices/nvmap.h @@ -37,6 +37,7 @@ public: VAddr addr; Status status; u32 refcount; + u32 dma_map_addr; }; std::shared_ptr GetObject(u32 handle) const { diff --git a/src/core/hle/service/nvdrv/nvdrv.cpp b/src/core/hle/service/nvdrv/nvdrv.cpp index 197c77db0..803c1a984 100644 --- a/src/core/hle/service/nvdrv/nvdrv.cpp +++ b/src/core/hle/service/nvdrv/nvdrv.cpp @@ -51,9 +51,9 @@ Module::Module(Core::System& system) { devices["/dev/nvmap"] = nvmap_dev; devices["/dev/nvdisp_disp0"] = std::make_shared(system, nvmap_dev); devices["/dev/nvhost-ctrl"] = std::make_shared(system, events_interface); - devices["/dev/nvhost-nvdec"] = std::make_shared(system); + devices["/dev/nvhost-nvdec"] = std::make_shared(system, nvmap_dev); devices["/dev/nvhost-nvjpg"] = std::make_shared(system); - devices["/dev/nvhost-vic"] = std::make_shared(system); + devices["/dev/nvhost-vic"] = std::make_shared(system, nvmap_dev); } Module::~Module() = default; diff --git a/src/core/settings.cpp b/src/core/settings.cpp index 28d3f9099..e14c02045 100644 --- a/src/core/settings.cpp +++ b/src/core/settings.cpp @@ -63,6 +63,7 @@ void LogSettings() { log_setting("Renderer_GPUAccuracyLevel", values.gpu_accuracy.GetValue()); log_setting("Renderer_UseAsynchronousGpuEmulation", values.use_asynchronous_gpu_emulation.GetValue()); + log_setting("Renderer_UseNvdecEmulation", values.use_nvdec_emulation.GetValue()); log_setting("Renderer_UseVsync", values.use_vsync.GetValue()); log_setting("Renderer_UseAssemblyShaders", values.use_assembly_shaders.GetValue()); log_setting("Renderer_UseAsynchronousShaders", values.use_asynchronous_shaders.GetValue()); @@ -119,6 +120,7 @@ void RestoreGlobalState() { values.use_disk_shader_cache.SetGlobal(true); values.gpu_accuracy.SetGlobal(true); values.use_asynchronous_gpu_emulation.SetGlobal(true); + values.use_nvdec_emulation.SetGlobal(true); values.use_vsync.SetGlobal(true); values.use_assembly_shaders.SetGlobal(true); values.use_asynchronous_shaders.SetGlobal(true); diff --git a/src/core/settings.h b/src/core/settings.h index 9834f44bb..604805615 100644 --- a/src/core/settings.h +++ b/src/core/settings.h @@ -111,6 +111,7 @@ struct Values { Setting use_disk_shader_cache; Setting gpu_accuracy; Setting use_asynchronous_gpu_emulation; + Setting use_nvdec_emulation; Setting use_vsync; Setting use_assembly_shaders; Setting use_asynchronous_shaders; diff --git a/src/core/telemetry_session.cpp b/src/core/telemetry_session.cpp index da09c0dbc..ebc19e18a 100644 --- a/src/core/telemetry_session.cpp +++ b/src/core/telemetry_session.cpp @@ -206,6 +206,8 @@ void TelemetrySession::AddInitialInfo(Loader::AppLoader& app_loader) { TranslateGPUAccuracyLevel(Settings::values.gpu_accuracy.GetValue())); AddField(field_type, "Renderer_UseAsynchronousGpuEmulation", Settings::values.use_asynchronous_gpu_emulation.GetValue()); + AddField(field_type, "Renderer_UseNvdecEmulation", + Settings::values.use_nvdec_emulation.GetValue()); AddField(field_type, "Renderer_UseVsync", Settings::values.use_vsync.GetValue()); AddField(field_type, "Renderer_UseAssemblyShaders", Settings::values.use_assembly_shaders.GetValue()); -- cgit v1.2.3