From eb67a45ca82bc01ac843c853fd3c17f2a90e0250 Mon Sep 17 00:00:00 2001
From: ameerj <aj662@drexel.edu>
Date: Mon, 26 Oct 2020 23:07:36 -0400
Subject: video_core: NVDEC Implementation

This commit aims to implement the NVDEC (Nvidia Decoder) functionality, with video frame decoding being handled by the FFmpeg library.

The process begins with Ioctl commands being sent to the NVDEC and VIC (Video Image Composer) emulated devices. These allocate the necessary GPU buffers for the frame data, along with providing information on the incoming video data. A Submit command then signals the GPU to process and decode the frame data.

To decode the frame, the respective codec's header must be manually composed from the information provided by NVDEC, then sent with the raw frame data to the ffmpeg library.

Currently, H264 and VP9 are supported, with VP9 having some minor artifacting issues related mainly to the reference frame composition in its uncompressed header.

Async GPU is not properly implemented at the moment.

Co-Authored-By: David <25727384+ogniK5377@users.noreply.github.com>
---
 .../service/nvdrv/devices/nvhost_nvdec_common.h    | 168 +++++++++++++++++++++
 1 file changed, 168 insertions(+)
 create mode 100644 src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h

(limited to 'src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h')
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h
new file mode 100644
index 000000000..c249c5349
--- /dev/null
+++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h
@@ -0,0 +1,168 @@
+// Copyright 2020 yuzu emulator team
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <map>
+#include <vector>
+#include "common/common_types.h"
+#include "common/swap.h"
+#include "core/hle/service/nvdrv/devices/nvdevice.h"
+
+namespace Service::Nvidia::Devices {
+class nvmap;
+
+class nvhost_nvdec_common : public nvdevice {
+public:
+    explicit nvhost_nvdec_common(Core::System& system, std::shared_ptr<nvmap> nvmap_dev);
+    ~nvhost_nvdec_common() override;
+
+    virtual u32 ioctl(Ioctl command, const std::vector<u8>& input, const std::vector<u8>& input2,
+                      std::vector<u8>& output, std::vector<u8>& output2, IoctlCtrl& ctrl,
+                      IoctlVersion version) = 0;
+
+protected:
+    class BufferMap final {
+    public:
+        constexpr BufferMap() = default;
+
+        constexpr BufferMap(GPUVAddr start_addr, std::size_t size)
+            : start_addr{start_addr}, end_addr{start_addr + size} {}
+
+        constexpr BufferMap(GPUVAddr start_addr, std::size_t size, VAddr cpu_addr,
+                            bool is_allocated)
+            : start_addr{start_addr}, end_addr{start_addr + size}, cpu_addr{cpu_addr},
+              is_allocated{is_allocated} {}
+
+        constexpr VAddr StartAddr() const {
+            return start_addr;
+        }
+
+        constexpr VAddr EndAddr() const {
+            return end_addr;
+        }
+
+        constexpr std::size_t Size() const {
+            return end_addr - start_addr;
+        }
+
+        constexpr VAddr CpuAddr() const {
+            return cpu_addr;
+        }
+
+        constexpr bool IsAllocated() const {
+            return is_allocated;
+        }
+
+    private:
+        GPUVAddr start_addr{};
+        GPUVAddr end_addr{};
+        VAddr cpu_addr{};
+        bool is_allocated{};
+    };
+
+    struct IoctlSetNvmapFD {
+        u32_le nvmap_fd;
+    };
+    static_assert(sizeof(IoctlSetNvmapFD) == 4, "IoctlSetNvmapFD is incorrect size");
+
+    struct IoctlSubmitCommandBuffer {
+        u32_le id;
+        u32_le offset;
+        u32_le count;
+    };
+    static_assert(sizeof(IoctlSubmitCommandBuffer) == 0xC,
+                  "IoctlSubmitCommandBuffer is incorrect size");
+    struct IoctlSubmit {
+        u32_le cmd_buffer_count;
+        u32_le relocation_count;
+        u32_le syncpoint_count;
+        u32_le fence_count;
+    };
+    static_assert(sizeof(IoctlSubmit) == 0x10, "IoctlSubmit has incorrect size");
+
+    struct CommandBuffer {
+        s32 memory_id;
+        u32 offset;
+        s32 word_count;
+    };
+    static_assert(sizeof(CommandBuffer) == 0xC, "CommandBuffer has incorrect size");
+
+    struct Reloc {
+        s32 cmdbuffer_memory;
+        s32 cmdbuffer_offset;
+        s32 target;
+        s32 target_offset;
+    };
+    static_assert(sizeof(Reloc) == 0x10, "CommandBuffer has incorrect size");
+
+    struct SyncptIncr {
+        u32 id;
+        u32 increments;
+    };
+    static_assert(sizeof(SyncptIncr) == 0x8, "CommandBuffer has incorrect size");
+
+    struct Fence {
+        u32 id;
+        u32 value;
+    };
+    static_assert(sizeof(Fence) == 0x8, "CommandBuffer has incorrect size");
+
+    struct IoctlGetSyncpoint {
+        // Input
+        u32_le param;
+        // Output
+        u32_le value;
+    };
+    static_assert(sizeof(IoctlGetSyncpoint) == 8, "IocGetIdParams has wrong size");
+
+    struct IoctlGetWaitbase {
+        u32_le unknown; // seems to be ignored? Nintendo added this
+        u32_le value;
+    };
+    static_assert(sizeof(IoctlGetWaitbase) == 0x8, "IoctlGetWaitbase is incorrect size");
+
+    struct IoctlMapBuffer {
+        u32_le num_entries;
+        u32_le data_address; // Ignored by the driver.
+        u32_le attach_host_ch_das;
+    };
+    static_assert(sizeof(IoctlMapBuffer) == 0x0C, "IoctlMapBuffer is incorrect size");
+
+    struct IocGetIdParams {
+        // Input
+        u32_le param;
+        // Output
+        u32_le value;
+    };
+    static_assert(sizeof(IocGetIdParams) == 8, "IocGetIdParams has wrong size");
+
+    // Used for mapping and unmapping command buffers
+    struct MapBufferEntry {
+        u32_le map_handle;
+        u32_le map_address;
+    };
+    static_assert(sizeof(IoctlMapBuffer) == 0x0C, "IoctlMapBuffer is incorrect size");
+
+    /// Ioctl command implementations
+    u32 SetNVMAPfd(const std::vector<u8>& input);
+    u32 Submit(const std::vector<u8>& input, std::vector<u8>& output);
+    u32 GetSyncpoint(const std::vector<u8>& input, std::vector<u8>& output);
+    u32 GetWaitbase(const std::vector<u8>& input, std::vector<u8>& output);
+    u32 MapBuffer(const std::vector<u8>& input, std::vector<u8>& output);
+    u32 UnmapBuffer(const std::vector<u8>& input, std::vector<u8>& output);
+    u32 SetSubmitTimeout(const std::vector<u8>& input, std::vector<u8>& output);
+
+    std::optional<BufferMap> FindBufferMap(GPUVAddr gpu_addr) const;
+    void AddBufferMap(GPUVAddr gpu_addr, std::size_t size, VAddr cpu_addr, bool is_allocated);
+    std::optional<std::size_t> RemoveBufferMap(GPUVAddr gpu_addr);
+
+    u32_le nvmap_fd{};
+    u32_le submit_timeout{};
+    std::shared_ptr<nvmap> nvmap_dev;
+
+    // This is expected to be ordered, therefore we must use a map, not unordered_map
+    std::map<GPUVAddr, BufferMap> buffer_mappings;
+};
+}; // namespace Service::Nvidia::Devices
-- 
cgit v1.2.3