From 6053b955525be69eb73a928a7bdd43ba8f5e69a7 Mon Sep 17 00:00:00 2001
From: bunnei <bunneidev@gmail.com>
Date: Mon, 26 Oct 2020 22:11:41 -0700
Subject: video_core: gpu: Implement WaitFence and IncrementSyncPoint.

---
 src/video_core/dma_pusher.h | 25 +++++++++++++++++++++++
 src/video_core/gpu.cpp      | 48 ++++++++++++++++++++++-----------------------
 src/video_core/gpu.h        | 25 +++++++++++++++++++----
 3 files changed, 70 insertions(+), 28 deletions(-)

(limited to 'src/video_core')
diff --git a/src/video_core/dma_pusher.h b/src/video_core/dma_pusher.h
index efa90d170..2026b7857 100644
--- a/src/video_core/dma_pusher.h
+++ b/src/video_core/dma_pusher.h
@@ -27,6 +27,31 @@ enum class SubmissionMode : u32 {
     IncreaseOnce = 5
 };
 
+// Note that, traditionally, methods are treated as 4-byte addressable locations, and hence
+// their numbers are written down multiplied by 4 in Docs. Here we are not multiply by 4.
+// So the values you see in docs might be multiplied by 4.
+enum class BufferMethods : u32 {
+    BindObject = 0x0,
+    Nop = 0x2,
+    SemaphoreAddressHigh = 0x4,
+    SemaphoreAddressLow = 0x5,
+    SemaphoreSequence = 0x6,
+    SemaphoreTrigger = 0x7,
+    NotifyIntr = 0x8,
+    WrcacheFlush = 0x9,
+    Unk28 = 0xA,
+    UnkCacheFlush = 0xB,
+    RefCnt = 0x14,
+    SemaphoreAcquire = 0x1A,
+    SemaphoreRelease = 0x1B,
+    FenceValue = 0x1C,
+    FenceAction = 0x1D,
+    WaitForInterrupt = 0x1E,
+    Unk7c = 0x1F,
+    Yield = 0x20,
+    NonPullerMethods = 0x40,
+};
+
 struct CommandListHeader {
     union {
         u64 raw;
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index 171f78183..ebd149c3a 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -194,30 +194,6 @@ void GPU::SyncGuestHost() {
 void GPU::OnCommandListEnd() {
     renderer->Rasterizer().ReleaseFences();
 }
-// Note that, traditionally, methods are treated as 4-byte addressable locations, and hence
-// their numbers are written down multiplied by 4 in Docs. Here we are not multiply by 4.
-// So the values you see in docs might be multiplied by 4.
-enum class BufferMethods {
-    BindObject = 0x0,
-    Nop = 0x2,
-    SemaphoreAddressHigh = 0x4,
-    SemaphoreAddressLow = 0x5,
-    SemaphoreSequence = 0x6,
-    SemaphoreTrigger = 0x7,
-    NotifyIntr = 0x8,
-    WrcacheFlush = 0x9,
-    Unk28 = 0xA,
-    UnkCacheFlush = 0xB,
-    RefCnt = 0x14,
-    SemaphoreAcquire = 0x1A,
-    SemaphoreRelease = 0x1B,
-    FenceValue = 0x1C,
-    FenceAction = 0x1D,
-    Unk78 = 0x1E,
-    Unk7c = 0x1F,
-    Yield = 0x20,
-    NonPullerMethods = 0x40,
-};
 
 enum class GpuSemaphoreOperation {
     AcquireEqual = 0x1,
@@ -277,7 +253,12 @@ void GPU::CallPullerMethod(const MethodCall& method_call) {
     case BufferMethods::UnkCacheFlush:
     case BufferMethods::WrcacheFlush:
     case BufferMethods::FenceValue:
+        break;
     case BufferMethods::FenceAction:
+        ProcessFenceActionMethod();
+        break;
+    case BufferMethods::WaitForInterrupt:
+        ProcessWaitForInterruptMethod();
         break;
     case BufferMethods::SemaphoreTrigger: {
         ProcessSemaphoreTriggerMethod();
@@ -391,6 +372,25 @@ void GPU::ProcessBindMethod(const MethodCall& method_call) {
     }
 }
 
+void GPU::ProcessFenceActionMethod() {
+    switch (regs.fence_action.op) {
+    case FenceOperation::Acquire:
+        WaitFence(regs.fence_action.syncpoint_id, regs.fence_value);
+        break;
+    case FenceOperation::Increment:
+        IncrementSyncPoint(regs.fence_action.syncpoint_id);
+        break;
+    default:
+        UNIMPLEMENTED_MSG("Unimplemented operation {}",
+                          static_cast<u32>(regs.fence_action.op.Value()));
+    }
+}
+
+void GPU::ProcessWaitForInterruptMethod() {
+    // TODO(bunnei) ImplementMe
+    LOG_WARNING(HW_GPU, "(STUBBED) called");
+}
+
 void GPU::ProcessSemaphoreTriggerMethod() {
     const auto semaphoreOperationMask = 0xF;
     const auto op =
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index b8c613b11..5444b49f3 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -263,6 +263,24 @@ public:
         return use_nvdec;
     }
 
+    enum class FenceOperation : u32 {
+        Acquire = 0,
+        Increment = 1,
+    };
+
+    union FenceAction {
+        u32 raw;
+        BitField<0, 1, FenceOperation> op;
+        BitField<8, 24, u32> syncpoint_id;
+
+        static constexpr CommandHeader Build(FenceOperation op, u32 syncpoint_id) {
+            FenceAction result{};
+            result.op.Assign(op);
+            result.syncpoint_id.Assign(syncpoint_id);
+            return {result.raw};
+        }
+    };
+
     struct Regs {
         static constexpr size_t NUM_REGS = 0x40;
 
@@ -291,10 +309,7 @@ public:
                 u32 semaphore_acquire;
                 u32 semaphore_release;
                 u32 fence_value;
-                union {
-                    BitField<4, 4, u32> operation;
-                    BitField<8, 8, u32> id;
-                } fence_action;
+                FenceAction fence_action;
                 INSERT_UNION_PADDING_WORDS(0xE2);
 
                 // Puller state
@@ -342,6 +357,8 @@ protected:
 
 private:
     void ProcessBindMethod(const MethodCall& method_call);
+    void ProcessFenceActionMethod();
+    void ProcessWaitForInterruptMethod();
     void ProcessSemaphoreTriggerMethod();
     void ProcessSemaphoreRelease();
     void ProcessSemaphoreAcquire();
-- 
cgit v1.2.3


From c64545d07ae57816bc658ca7c45559d0b0d49f89 Mon Sep 17 00:00:00 2001
From: bunnei <bunneidev@gmail.com>
Date: Thu, 29 Oct 2020 21:13:04 -0700
Subject: video_core: dma_pusher: Add support for prefetched command lists.

---
 src/video_core/dma_pusher.cpp | 56 +++++++++++++++++++++++++------------------
 src/video_core/dma_pusher.h   | 21 ++++++++++++++--
 2 files changed, 52 insertions(+), 25 deletions(-)

(limited to 'src/video_core')

diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp
index f2f96ac33..9c49c6153 100644
--- a/src/video_core/dma_pusher.cpp
+++ b/src/video_core/dma_pusher.cpp
@@ -45,32 +45,42 @@ bool DmaPusher::Step() {
         return false;
     }
 
-    const CommandList& command_list{dma_pushbuffer.front()};
-    ASSERT_OR_EXECUTE(!command_list.empty(), {
-        // Somehow the command_list is empty, in order to avoid a crash
-        // We ignore it and assume its size is 0.
+    CommandList& command_list{dma_pushbuffer.front()};
+
+    ASSERT_OR_EXECUTE(
+        command_list.command_lists.size() || command_list.prefetch_command_list.size(), {
+            // Somehow the command_list is empty, in order to avoid a crash
+            // We ignore it and assume its size is 0.
+            dma_pushbuffer.pop();
+            dma_pushbuffer_subindex = 0;
+            return true;
+        });
+
+    if (command_list.prefetch_command_list.size()) {
+        // Prefetched command list from nvdrv, used for things like synchronization
+        command_headers = std::move(command_list.prefetch_command_list);
         dma_pushbuffer.pop();
-        dma_pushbuffer_subindex = 0;
-        return true;
-    });
-    const CommandListHeader command_list_header{command_list[dma_pushbuffer_subindex++]};
-    const GPUVAddr dma_get = command_list_header.addr;
-
-    if (dma_pushbuffer_subindex >= command_list.size()) {
-        // We've gone through the current list, remove it from the queue
-        dma_pushbuffer.pop();
-        dma_pushbuffer_subindex = 0;
-    }
-
-    if (command_list_header.size == 0) {
-        return true;
-    }
+    } else {
+        const CommandListHeader command_list_header{
+            command_list.command_lists[dma_pushbuffer_subindex]};
+        const u64 next_hash = command_list.command_list_hashes[dma_pushbuffer_subindex++];
+        const GPUVAddr dma_get = command_list_header.addr;
+
+        if (dma_pushbuffer_subindex >= command_list.command_lists.size()) {
+            // We've gone through the current list, remove it from the queue
+            dma_pushbuffer.pop();
+            dma_pushbuffer_subindex = 0;
+        }
 
-    // Push buffer non-empty, read a word
-    command_headers.resize(command_list_header.size);
-    gpu.MemoryManager().ReadBlockUnsafe(dma_get, command_headers.data(),
-                                        command_list_header.size * sizeof(u32));
+        if (command_list_header.size == 0) {
+            return true;
+        }
 
+        // Push buffer non-empty, read a word
+        command_headers.resize(command_list_header.size);
+        gpu.MemoryManager().ReadBlockUnsafe(dma_get, command_headers.data(),
+                                            command_list_header.size * sizeof(u32));
+    }
     for (std::size_t index = 0; index < command_headers.size();) {
         const CommandHeader& command_header = command_headers[index];
 
diff --git a/src/video_core/dma_pusher.h b/src/video_core/dma_pusher.h
index 2026b7857..99b30ca0d 100644
--- a/src/video_core/dma_pusher.h
+++ b/src/video_core/dma_pusher.h
@@ -74,9 +74,26 @@ union CommandHeader {
 static_assert(std::is_standard_layout_v<CommandHeader>, "CommandHeader is not standard layout");
 static_assert(sizeof(CommandHeader) == sizeof(u32), "CommandHeader has incorrect size!");
 
+static constexpr CommandHeader BuildCommandHeader(BufferMethods method, u32 arg_count,
+                                                  SubmissionMode mode) {
+    CommandHeader result{};
+    result.method.Assign(static_cast<u32>(method));
+    result.arg_count.Assign(arg_count);
+    result.mode.Assign(mode);
+    return result;
+}
+
 class GPU;
 
-using CommandList = std::vector<Tegra::CommandListHeader>;
+struct CommandList final {
+    CommandList() = default;
+    explicit CommandList(std::size_t size) : command_lists(size) {}
+    explicit CommandList(std::vector<Tegra::CommandHeader>&& prefetch_command_list)
+        : prefetch_command_list{std::move(prefetch_command_list)} {}
+
+    std::vector<Tegra::CommandListHeader> command_lists;
+    std::vector<Tegra::CommandHeader> prefetch_command_list;
+};
 
 /**
  * The DmaPusher class implements DMA submission to FIFOs, providing an area of memory that the
@@ -85,7 +102,7 @@ using CommandList = std::vector<Tegra::CommandListHeader>;
  * See https://envytools.readthedocs.io/en/latest/hw/fifo/dma-pusher.html#fifo-dma-pusher for
  * details on this implementation.
  */
-class DmaPusher {
+class DmaPusher final {
 public:
     explicit DmaPusher(Core::System& system, GPU& gpu);
     ~DmaPusher();
-- 
cgit v1.2.3


From c6e1c46ac70bf31b54f756f9611b1cf086b63fb0 Mon Sep 17 00:00:00 2001
From: bunnei <bunneidev@gmail.com>
Date: Thu, 29 Oct 2020 21:13:48 -0700
Subject: video_core: dma_pusher: Add support for integrity checks.

- Log corrupted command lists, rather than crash.
---
 src/video_core/dma_pusher.cpp | 24 ++++++++++++++++++++++++
 src/video_core/dma_pusher.h   |  3 +++
 2 files changed, 27 insertions(+)

(limited to 'src/video_core')

diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp
index 9c49c6153..105b85a92 100644
--- a/src/video_core/dma_pusher.cpp
+++ b/src/video_core/dma_pusher.cpp
@@ -2,6 +2,7 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
+#include "common/cityhash.h"
 #include "common/microprofile.h"
 #include "core/core.h"
 #include "core/memory.h"
@@ -12,6 +13,20 @@
 
 namespace Tegra {
 
+void CommandList::RefreshIntegrityChecks(GPU& gpu) {
+    command_list_hashes.resize(command_lists.size());
+
+    for (std::size_t index = 0; index < command_lists.size(); ++index) {
+        const CommandListHeader command_list_header = command_lists[index];
+        std::vector<CommandHeader> command_headers(command_list_header.size);
+        gpu.MemoryManager().ReadBlockUnsafe(command_list_header.addr, command_headers.data(),
+                                            command_list_header.size * sizeof(u32));
+        command_list_hashes[index] =
+            Common::CityHash64(reinterpret_cast<char*>(command_headers.data()),
+                               command_list_header.size * sizeof(u32));
+    }
+}
+
 DmaPusher::DmaPusher(Core::System& system, GPU& gpu) : gpu{gpu}, system{system} {}
 
 DmaPusher::~DmaPusher() = default;
@@ -80,6 +95,15 @@ bool DmaPusher::Step() {
         command_headers.resize(command_list_header.size);
         gpu.MemoryManager().ReadBlockUnsafe(dma_get, command_headers.data(),
                                             command_list_header.size * sizeof(u32));
+
+        // Integrity check
+        const u64 new_hash = Common::CityHash64(reinterpret_cast<char*>(command_headers.data()),
+                                                command_list_header.size * sizeof(u32));
+        if (new_hash != next_hash) {
+            LOG_CRITICAL(HW_GPU, "CommandList at addr=0x{:X} is corrupt, skipping!", dma_get);
+            dma_pushbuffer.pop();
+            return true;
+        }
     }
     for (std::size_t index = 0; index < command_headers.size();) {
         const CommandHeader& command_header = command_headers[index];
diff --git a/src/video_core/dma_pusher.h b/src/video_core/dma_pusher.h
index 99b30ca0d..8496ba2da 100644
--- a/src/video_core/dma_pusher.h
+++ b/src/video_core/dma_pusher.h
@@ -91,7 +91,10 @@ struct CommandList final {
     explicit CommandList(std::vector<Tegra::CommandHeader>&& prefetch_command_list)
         : prefetch_command_list{std::move(prefetch_command_list)} {}
 
+    void RefreshIntegrityChecks(GPU& gpu);
+
     std::vector<Tegra::CommandListHeader> command_lists;
+    std::vector<u64> command_list_hashes;
     std::vector<Tegra::CommandHeader> prefetch_command_list;
 };
 
-- 
cgit v1.2.3