14 files changed, 175 insertions, 71 deletions
diff --git a/src/video_core/command_classes/codecs/codec.h b/src/video_core/command_classes/codecs/codec.h
index cb67094f6..5bbe6a332 100644
--- a/src/video_core/command_classes/codecs/codec.h
+++ b/src/video_core/command_classes/codecs/codec.h
@@ -42,11 +42,11 @@ public:
     void Decode();
 
     /// Returns most recently decoded frame
-    AVFrame* GetCurrentFrame();
-    const AVFrame* GetCurrentFrame() const;
+    [[nodiscard]] AVFrame* GetCurrentFrame();
+    [[nodiscard]] const AVFrame* GetCurrentFrame() const;
 
     /// Returns the value of current_codec
-    NvdecCommon::VideoCodec GetCurrentCodec() const;
+    [[nodiscard]] NvdecCommon::VideoCodec GetCurrentCodec() const;
 
 private:
     bool initialized{};
diff --git a/src/video_core/command_classes/codecs/h264.cpp b/src/video_core/command_classes/codecs/h264.cpp
index 549a40f52..33e063e20 100644
--- a/src/video_core/command_classes/codecs/h264.cpp
+++ b/src/video_core/command_classes/codecs/h264.cpp
@@ -43,7 +43,8 @@ H264::H264(GPU& gpu_) : gpu(gpu_) {}
 
 H264::~H264() = default;
 
-std::vector<u8>& H264::ComposeFrameHeader(NvdecCommon::NvdecRegisters& state, bool is_first_frame) {
+const std::vector<u8>& H264::ComposeFrameHeader(NvdecCommon::NvdecRegisters& state,
+                                                bool is_first_frame) {
     H264DecoderContext context{};
     gpu.MemoryManager().ReadBlock(state.picture_info_offset, &context, sizeof(H264DecoderContext));
 
diff --git a/src/video_core/command_classes/codecs/h264.h b/src/video_core/command_classes/codecs/h264.h
index f2292fd2f..273449495 100644
--- a/src/video_core/command_classes/codecs/h264.h
+++ b/src/video_core/command_classes/codecs/h264.h
@@ -51,14 +51,14 @@ public:
     void WriteScalingList(const std::vector<u8>& list, s32 start, s32 count);
 
     /// Return the bitstream as a vector.
-    std::vector<u8>& GetByteArray();
-    const std::vector<u8>& GetByteArray() const;
+    [[nodiscard]] std::vector<u8>& GetByteArray();
+    [[nodiscard]] const std::vector<u8>& GetByteArray() const;
 
 private:
     void WriteBits(s32 value, s32 bit_count);
     void WriteExpGolombCodedInt(s32 value);
     void WriteExpGolombCodedUInt(u32 value);
-    s32 GetFreeBufferBits();
+    [[nodiscard]] s32 GetFreeBufferBits();
     void Flush();
 
     s32 buffer_size{8};
@@ -74,8 +74,8 @@ public:
     ~H264();
 
     /// Compose the H264 header of the frame for FFmpeg decoding
-    std::vector<u8>& ComposeFrameHeader(NvdecCommon::NvdecRegisters& state,
-                                        bool is_first_frame = false);
+    [[nodiscard]] const std::vector<u8>& ComposeFrameHeader(NvdecCommon::NvdecRegisters& state,
+                                                            bool is_first_frame = false);
 
 private:
     struct H264ParameterSet {
diff --git a/src/video_core/command_classes/codecs/vp9.cpp b/src/video_core/command_classes/codecs/vp9.cpp
index 42520f856..ab44fdc9e 100644
--- a/src/video_core/command_classes/codecs/vp9.cpp
+++ b/src/video_core/command_classes/codecs/vp9.cpp
@@ -854,7 +854,7 @@ VpxBitStreamWriter VP9::ComposeUncompressedHeader() {
     return uncomp_writer;
 }
 
-std::vector<u8>& VP9::ComposeFrameHeader(NvdecCommon::NvdecRegisters& state) {
+const std::vector<u8>& VP9::ComposeFrameHeader(NvdecCommon::NvdecRegisters& state) {
     std::vector<u8> bitstream;
     {
         Vp9FrameContainer curr_frame = GetCurrentFrame(state);
diff --git a/src/video_core/command_classes/codecs/vp9.h b/src/video_core/command_classes/codecs/vp9.h
index 05c9682fa..e2504512c 100644
--- a/src/video_core/command_classes/codecs/vp9.h
+++ b/src/video_core/command_classes/codecs/vp9.h
@@ -119,7 +119,7 @@ public:
 
     /// Composes the VP9 frame from the GPU state information. Based on the official VP9 spec
     /// documentation
-    std::vector<u8>& ComposeFrameHeader(NvdecCommon::NvdecRegisters& state);
+    [[nodiscard]] const std::vector<u8>& ComposeFrameHeader(NvdecCommon::NvdecRegisters& state);
 
     /// Returns true if the most recent frame was a hidden frame.
     [[nodiscard]] bool WasFrameHidden() const {
diff --git a/src/video_core/command_classes/codecs/vp9_types.h b/src/video_core/command_classes/codecs/vp9_types.h
index a50acf6e8..4f0b05d22 100644
--- a/src/video_core/command_classes/codecs/vp9_types.h
+++ b/src/video_core/command_classes/codecs/vp9_types.h
@@ -231,9 +231,8 @@ struct PictureInfo {
     u32 surface_params{};
     INSERT_PADDING_WORDS(3);
 
-    Vp9PictureInfo Convert() const {
-
-        return Vp9PictureInfo{
+    [[nodiscard]] Vp9PictureInfo Convert() const {
+        return {
             .is_key_frame = (vp9_flags & FrameFlags::IsKeyFrame) != 0,
             .intra_only = (vp9_flags & FrameFlags::IntraOnly) != 0,
             .last_frame_was_key = (vp9_flags & FrameFlags::LastFrameIsKeyFrame) != 0,
diff --git a/src/video_core/command_classes/nvdec.h b/src/video_core/command_classes/nvdec.h
index af14f9857..eec4443f9 100644
--- a/src/video_core/command_classes/nvdec.h
+++ b/src/video_core/command_classes/nvdec.h
@@ -26,8 +26,8 @@ public:
     void ProcessMethod(Method method, const std::vector<u32>& arguments);
 
     /// Return most recently decoded frame
-    AVFrame* GetFrame();
-    const AVFrame* GetFrame() const;
+    [[nodiscard]] AVFrame* GetFrame();
+    [[nodiscard]] const AVFrame* GetFrame() const;
 
 private:
     /// Invoke codec to decode a frame
diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp
index f2f96ac33..105b85a92 100644
--- a/src/video_core/dma_pusher.cpp
+++ b/src/video_core/dma_pusher.cpp
@@ -2,6 +2,7 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
+#include "common/cityhash.h"
 #include "common/microprofile.h"
 #include "core/core.h"
 #include "core/memory.h"
@@ -12,6 +13,20 @@
 
 namespace Tegra {
 
+void CommandList::RefreshIntegrityChecks(GPU& gpu) {
+    command_list_hashes.resize(command_lists.size());
+
+    for (std::size_t index = 0; index < command_lists.size(); ++index) {
+        const CommandListHeader command_list_header = command_lists[index];
+        std::vector<CommandHeader> command_headers(command_list_header.size);
+        gpu.MemoryManager().ReadBlockUnsafe(command_list_header.addr, command_headers.data(),
+                                            command_list_header.size * sizeof(u32));
+        command_list_hashes[index] =
+            Common::CityHash64(reinterpret_cast<char*>(command_headers.data()),
+                               command_list_header.size * sizeof(u32));
+    }
+}
+
 DmaPusher::DmaPusher(Core::System& system, GPU& gpu) : gpu{gpu}, system{system} {}
 
 DmaPusher::~DmaPusher() = default;
@@ -45,32 +60,51 @@ bool DmaPusher::Step() {
         return false;
     }
 
-    const CommandList& command_list{dma_pushbuffer.front()};
-    ASSERT_OR_EXECUTE(!command_list.empty(), {
-        // Somehow the command_list is empty, in order to avoid a crash
-        // We ignore it and assume its size is 0.
-        dma_pushbuffer.pop();
-        dma_pushbuffer_subindex = 0;
-        return true;
-    });
-    const CommandListHeader command_list_header{command_list[dma_pushbuffer_subindex++]};
-    const GPUVAddr dma_get = command_list_header.addr;
-
-    if (dma_pushbuffer_subindex >= command_list.size()) {
-        // We've gone through the current list, remove it from the queue
-        dma_pushbuffer.pop();
-        dma_pushbuffer_subindex = 0;
-    }
+    CommandList& command_list{dma_pushbuffer.front()};
 
-    if (command_list_header.size == 0) {
-        return true;
-    }
+    ASSERT_OR_EXECUTE(
+        command_list.command_lists.size() || command_list.prefetch_command_list.size(), {
+            // Somehow the command_list is empty, in order to avoid a crash
+            // We ignore it and assume its size is 0.
+            dma_pushbuffer.pop();
+            dma_pushbuffer_subindex = 0;
+            return true;
+        });
 
-    // Push buffer non-empty, read a word
-    command_headers.resize(command_list_header.size);
-    gpu.MemoryManager().ReadBlockUnsafe(dma_get, command_headers.data(),
-                                        command_list_header.size * sizeof(u32));
+    if (command_list.prefetch_command_list.size()) {
+        // Prefetched command list from nvdrv, used for things like synchronization
+        command_headers = std::move(command_list.prefetch_command_list);
+        dma_pushbuffer.pop();
+    } else {
+        const CommandListHeader command_list_header{
+            command_list.command_lists[dma_pushbuffer_subindex]};
+        const u64 next_hash = command_list.command_list_hashes[dma_pushbuffer_subindex++];
+        const GPUVAddr dma_get = command_list_header.addr;
+
+        if (dma_pushbuffer_subindex >= command_list.command_lists.size()) {
+            // We've gone through the current list, remove it from the queue
+            dma_pushbuffer.pop();
+            dma_pushbuffer_subindex = 0;
+        }
 
+        if (command_list_header.size == 0) {
+            return true;
+        }
+
+        // Push buffer non-empty, read a word
+        command_headers.resize(command_list_header.size);
+        gpu.MemoryManager().ReadBlockUnsafe(dma_get, command_headers.data(),
+                                            command_list_header.size * sizeof(u32));
+
+        // Integrity check
+        const u64 new_hash = Common::CityHash64(reinterpret_cast<char*>(command_headers.data()),
+                                                command_list_header.size * sizeof(u32));
+        if (new_hash != next_hash) {
+            LOG_CRITICAL(HW_GPU, "CommandList at addr=0x{:X} is corrupt, skipping!", dma_get);
+            dma_pushbuffer.pop();
+            return true;
+        }
+    }
     for (std::size_t index = 0; index < command_headers.size();) {
         const CommandHeader& command_header = command_headers[index];
 
diff --git a/src/video_core/dma_pusher.h b/src/video_core/dma_pusher.h
index efa90d170..8496ba2da 100644
--- a/src/video_core/dma_pusher.h
+++ b/src/video_core/dma_pusher.h
@@ -27,6 +27,31 @@ enum class SubmissionMode : u32 {
     IncreaseOnce = 5
 };
 
+// Note that, traditionally, methods are treated as 4-byte addressable locations, and hence
+// their numbers are written down multiplied by 4 in Docs. Here we are not multiply by 4.
+// So the values you see in docs might be multiplied by 4.
+enum class BufferMethods : u32 {
+    BindObject = 0x0,
+    Nop = 0x2,
+    SemaphoreAddressHigh = 0x4,
+    SemaphoreAddressLow = 0x5,
+    SemaphoreSequence = 0x6,
+    SemaphoreTrigger = 0x7,
+    NotifyIntr = 0x8,
+    WrcacheFlush = 0x9,
+    Unk28 = 0xA,
+    UnkCacheFlush = 0xB,
+    RefCnt = 0x14,
+    SemaphoreAcquire = 0x1A,
+    SemaphoreRelease = 0x1B,
+    FenceValue = 0x1C,
+    FenceAction = 0x1D,
+    WaitForInterrupt = 0x1E,
+    Unk7c = 0x1F,
+    Yield = 0x20,
+    NonPullerMethods = 0x40,
+};
+
 struct CommandListHeader {
     union {
         u64 raw;
@@ -49,9 +74,29 @@ union CommandHeader {
 static_assert(std::is_standard_layout_v<CommandHeader>, "CommandHeader is not standard layout");
 static_assert(sizeof(CommandHeader) == sizeof(u32), "CommandHeader has incorrect size!");
 
+static constexpr CommandHeader BuildCommandHeader(BufferMethods method, u32 arg_count,
+                                                  SubmissionMode mode) {
+    CommandHeader result{};
+    result.method.Assign(static_cast<u32>(method));
+    result.arg_count.Assign(arg_count);
+    result.mode.Assign(mode);
+    return result;
+}
+
 class GPU;
 
-using CommandList = std::vector<Tegra::CommandListHeader>;
+struct CommandList final {
+    CommandList() = default;
+    explicit CommandList(std::size_t size) : command_lists(size) {}
+    explicit CommandList(std::vector<Tegra::CommandHeader>&& prefetch_command_list)
+        : prefetch_command_list{std::move(prefetch_command_list)} {}
+
+    void RefreshIntegrityChecks(GPU& gpu);
+
+    std::vector<Tegra::CommandListHeader> command_lists;
+    std::vector<u64> command_list_hashes;
+    std::vector<Tegra::CommandHeader> prefetch_command_list;
+};
 
 /**
  * The DmaPusher class implements DMA submission to FIFOs, providing an area of memory that the
@@ -60,7 +105,7 @@ using CommandList = std::vector<Tegra::CommandListHeader>;
  * See https://envytools.readthedocs.io/en/latest/hw/fifo/dma-pusher.html#fifo-dma-pusher for
  * details on this implementation.
  */
-class DmaPusher {
+class DmaPusher final {
 public:
     explicit DmaPusher(Core::System& system, GPU& gpu);
     ~DmaPusher();
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index 171f78183..ebd149c3a 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -194,30 +194,6 @@ void GPU::SyncGuestHost() {
 void GPU::OnCommandListEnd() {
     renderer->Rasterizer().ReleaseFences();
 }
-// Note that, traditionally, methods are treated as 4-byte addressable locations, and hence
-// their numbers are written down multiplied by 4 in Docs. Here we are not multiply by 4.
-// So the values you see in docs might be multiplied by 4.
-enum class BufferMethods {
-    BindObject = 0x0,
-    Nop = 0x2,
-    SemaphoreAddressHigh = 0x4,
-    SemaphoreAddressLow = 0x5,
-    SemaphoreSequence = 0x6,
-    SemaphoreTrigger = 0x7,
-    NotifyIntr = 0x8,
-    WrcacheFlush = 0x9,
-    Unk28 = 0xA,
-    UnkCacheFlush = 0xB,
-    RefCnt = 0x14,
-    SemaphoreAcquire = 0x1A,
-    SemaphoreRelease = 0x1B,
-    FenceValue = 0x1C,
-    FenceAction = 0x1D,
-    Unk78 = 0x1E,
-    Unk7c = 0x1F,
-    Yield = 0x20,
-    NonPullerMethods = 0x40,
-};
 
 enum class GpuSemaphoreOperation {
     AcquireEqual = 0x1,
@@ -277,7 +253,12 @@ void GPU::CallPullerMethod(const MethodCall& method_call) {
     case BufferMethods::UnkCacheFlush:
     case BufferMethods::WrcacheFlush:
     case BufferMethods::FenceValue:
+        break;
     case BufferMethods::FenceAction:
+        ProcessFenceActionMethod();
+        break;
+    case BufferMethods::WaitForInterrupt:
+        ProcessWaitForInterruptMethod();
         break;
     case BufferMethods::SemaphoreTrigger: {
         ProcessSemaphoreTriggerMethod();
@@ -391,6 +372,25 @@ void GPU::ProcessBindMethod(const MethodCall& method_call) {
     }
 }
 
+void GPU::ProcessFenceActionMethod() {
+    switch (regs.fence_action.op) {
+    case FenceOperation::Acquire:
+        WaitFence(regs.fence_action.syncpoint_id, regs.fence_value);
+        break;
+    case FenceOperation::Increment:
+        IncrementSyncPoint(regs.fence_action.syncpoint_id);
+        break;
+    default:
+        UNIMPLEMENTED_MSG("Unimplemented operation {}",
+                          static_cast<u32>(regs.fence_action.op.Value()));
+    }
+}
+
+void GPU::ProcessWaitForInterruptMethod() {
+    // TODO(bunnei) ImplementMe
+    LOG_WARNING(HW_GPU, "(STUBBED) called");
+}
+
 void GPU::ProcessSemaphoreTriggerMethod() {
     const auto semaphoreOperationMask = 0xF;
     const auto op =
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index b8c613b11..5444b49f3 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -263,6 +263,24 @@ public:
         return use_nvdec;
     }
 
+    enum class FenceOperation : u32 {
+        Acquire = 0,
+        Increment = 1,
+    };
+
+    union FenceAction {
+        u32 raw;
+        BitField<0, 1, FenceOperation> op;
+        BitField<8, 24, u32> syncpoint_id;
+
+        static constexpr CommandHeader Build(FenceOperation op, u32 syncpoint_id) {
+            FenceAction result{};
+            result.op.Assign(op);
+            result.syncpoint_id.Assign(syncpoint_id);
+            return {result.raw};
+        }
+    };
+
     struct Regs {
         static constexpr size_t NUM_REGS = 0x40;
 
@@ -291,10 +309,7 @@ public:
                 u32 semaphore_acquire;
                 u32 semaphore_release;
                 u32 fence_value;
-                union {
-                    BitField<4, 4, u32> operation;
-                    BitField<8, 8, u32> id;
-                } fence_action;
+                FenceAction fence_action;
                 INSERT_UNION_PADDING_WORDS(0xE2);
 
                 // Puller state
@@ -342,6 +357,8 @@ protected:
 
 private:
     void ProcessBindMethod(const MethodCall& method_call);
+    void ProcessFenceActionMethod();
+    void ProcessWaitForInterruptMethod();
     void ProcessSemaphoreTriggerMethod();
     void ProcessSemaphoreRelease();
     void ProcessSemaphoreAcquire();
diff --git a/src/video_core/renderer_vulkan/vk_device.cpp b/src/video_core/renderer_vulkan/vk_device.cpp
index e1217ca83..f34ed6735 100644
--- a/src/video_core/renderer_vulkan/vk_device.cpp
+++ b/src/video_core/renderer_vulkan/vk_device.cpp
@@ -771,13 +771,18 @@ void VKDevice::CollectTelemetryParameters() {
     VkPhysicalDeviceDriverPropertiesKHR driver{
         .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES_KHR,
         .pNext = nullptr,
+        .driverID = {},
+        .driverName = {},
+        .driverInfo = {},
+        .conformanceVersion = {},
     };
 
-    VkPhysicalDeviceProperties2KHR properties{
+    VkPhysicalDeviceProperties2KHR device_properties{
         .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2_KHR,
         .pNext = &driver,
+        .properties = {},
     };
-    physical.GetProperties2KHR(properties);
+    physical.GetProperties2KHR(device_properties);
 
     driver_id = driver.driverID;
     vendor_name = driver.driverName;
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
index 696eaeb5f..0e8f9c352 100644
--- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
@@ -159,6 +159,7 @@ std::vector<vk::ShaderModule> VKGraphicsPipeline::CreateShaderModules(
         .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,
         .pNext = nullptr,
         .flags = 0,
+        .codeSize = 0,
     };
 
     std::vector<vk::ShaderModule> modules;
@@ -388,6 +389,7 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa
         .logicOp = VK_LOGIC_OP_COPY,
         .attachmentCount = static_cast<u32>(num_attachments),
         .pAttachments = cb_attachments.data(),
+        .blendConstants = {},
     };
 
     std::vector dynamic_states{
diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp
index e8515321b..13dd16356 100644
--- a/src/video_core/texture_cache/surface_params.cpp
+++ b/src/video_core/texture_cache/surface_params.cpp
@@ -240,6 +240,7 @@ SurfaceParams SurfaceParams::CreateForFermiCopySurface(
         .is_tiled = is_tiled,
         .srgb_conversion = config.format == Tegra::RenderTargetFormat::B8G8R8A8_SRGB ||
                            config.format == Tegra::RenderTargetFormat::A8B8G8R8_SRGB,
+        .is_layered = false,
         .block_width = is_tiled ? std::min(config.BlockWidth(), 5U) : 0U,
         .block_height = is_tiled ? std::min(config.BlockHeight(), 5U) : 0U,
         .block_depth = is_tiled ? std::min(config.BlockDepth(), 5U) : 0U,