1 files changed, 82 insertions, 62 deletions
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp
index e03685af1..17ef61147 100644
--- a/src/video_core/renderer_vulkan/vk_scheduler.cpp
+++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp
@@ -46,15 +46,17 @@ Scheduler::Scheduler(const Device& device_, StateTracker& state_tracker_)
 
 Scheduler::~Scheduler() = default;
 
-void Scheduler::Flush(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore) {
-    SubmitExecution(signal_semaphore, wait_semaphore);
+u64 Scheduler::Flush(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore) {
+    // When flushing, we only send data to the worker thread; no waiting is necessary.
+    const u64 signal_value = SubmitExecution(signal_semaphore, wait_semaphore);
     AllocateNewContext();
+    return signal_value;
 }
 
 void Scheduler::Finish(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore) {
+    // When finishing, we need to wait for the submission to have executed on the device.
     const u64 presubmit_tick = CurrentTick();
     SubmitExecution(signal_semaphore, wait_semaphore);
-    WaitWorker();
     Wait(presubmit_tick);
     AllocateNewContext();
 }
@@ -63,8 +65,14 @@ void Scheduler::WaitWorker() {
     MICROPROFILE_SCOPE(Vulkan_WaitForWorker);
     DispatchWork();
 
-    std::unique_lock lock{work_mutex};
-    wait_cv.wait(lock, [this] { return work_queue.empty(); });
+    // Ensure the queue is drained.
+    {
+        std::unique_lock ql{queue_mutex};
+        event_cv.wait(ql, [this] { return work_queue.empty(); });
+    }
+
+    // Now wait for execution to finish.
+    std::scoped_lock el{execution_mutex};
 }
 
 void Scheduler::DispatchWork() {
@@ -72,10 +80,10 @@ void Scheduler::DispatchWork() {
         return;
     }
     {
-        std::scoped_lock lock{work_mutex};
+        std::scoped_lock ql{queue_mutex};
         work_queue.push(std::move(chunk));
     }
-    work_cv.notify_one();
+    event_cv.notify_all();
     AcquireNewChunk();
 }
 
@@ -137,30 +145,55 @@ bool Scheduler::UpdateRescaling(bool is_rescaling) {
 
 void Scheduler::WorkerThread(std::stop_token stop_token) {
     Common::SetCurrentThreadName("VulkanWorker");
-    do {
+
+    const auto TryPopQueue{[this](auto& work) -> bool {
+        if (work_queue.empty()) {
+            return false;
+        }
+
+        work = std::move(work_queue.front());
+        work_queue.pop();
+        event_cv.notify_all();
+        return true;
+    }};
+
+    while (!stop_token.stop_requested()) {
         std::unique_ptr<CommandChunk> work;
-        bool has_submit{false};
+
         {
-            std::unique_lock lock{work_mutex};
-            if (work_queue.empty()) {
-                wait_cv.notify_all();
-            }
-            Common::CondvarWait(work_cv, lock, stop_token, [&] { return !work_queue.empty(); });
+            std::unique_lock lk{queue_mutex};
+
+            // Wait for work.
+            Common::CondvarWait(event_cv, lk, stop_token, [&] { return TryPopQueue(work); });
+
+            // If we've been asked to stop, we're done.
             if (stop_token.stop_requested()) {
-                continue;
+                return;
             }
-            work = std::move(work_queue.front());
-            work_queue.pop();
 
-            has_submit = work->HasSubmit();
+            // Exchange lock ownership so that we take the execution lock before
+            // the queue lock goes out of scope. This allows us to force execution
+            // to complete in the next step.
+            std::exchange(lk, std::unique_lock{execution_mutex});
+
+            // Perform the work, tracking whether the chunk was a submission
+            // before executing.
+            const bool has_submit = work->HasSubmit();
             work->ExecuteAll(current_cmdbuf);
+
+            // If the chunk was a submission, reallocate the command buffer.
+            if (has_submit) {
+                AllocateWorkerCommandBuffer();
+            }
         }
-        if (has_submit) {
-            AllocateWorkerCommandBuffer();
+
+        {
+            std::scoped_lock rl{reserve_mutex};
+
+            // Recycle the chunk back to the reserve.
+            chunk_reserve.emplace_back(std::move(work));
         }
-        std::scoped_lock reserve_lock{reserve_mutex};
-        chunk_reserve.push_back(std::move(work));
-    } while (!stop_token.stop_requested());
+    }
 }
 
 void Scheduler::AllocateWorkerCommandBuffer() {
@@ -173,52 +206,21 @@ void Scheduler::AllocateWorkerCommandBuffer() {
     });
 }
 
-void Scheduler::SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore) {
+u64 Scheduler::SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore) {
     EndPendingOperations();
     InvalidateState();
 
     const u64 signal_value = master_semaphore->NextTick();
     Record([signal_semaphore, wait_semaphore, signal_value, this](vk::CommandBuffer cmdbuf) {
         cmdbuf.End();
-        const VkSemaphore timeline_semaphore = master_semaphore->Handle();
-
-        const u32 num_signal_semaphores = signal_semaphore ? 2U : 1U;
-        const std::array signal_values{signal_value, u64(0)};
-        const std::array signal_semaphores{timeline_semaphore, signal_semaphore};
-
-        const u32 num_wait_semaphores = wait_semaphore ? 2U : 1U;
-        const std::array wait_values{signal_value - 1, u64(1)};
-        const std::array wait_semaphores{timeline_semaphore, wait_semaphore};
-        static constexpr std::array<VkPipelineStageFlags, 2> wait_stage_masks{
-            VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
-            VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
-        };
-
-        const VkTimelineSemaphoreSubmitInfo timeline_si{
-            .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO,
-            .pNext = nullptr,
-            .waitSemaphoreValueCount = num_wait_semaphores,
-            .pWaitSemaphoreValues = wait_values.data(),
-            .signalSemaphoreValueCount = num_signal_semaphores,
-            .pSignalSemaphoreValues = signal_values.data(),
-        };
-        const VkSubmitInfo submit_info{
-            .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
-            .pNext = &timeline_si,
-            .waitSemaphoreCount = num_wait_semaphores,
-            .pWaitSemaphores = wait_semaphores.data(),
-            .pWaitDstStageMask = wait_stage_masks.data(),
-            .commandBufferCount = 1,
-            .pCommandBuffers = cmdbuf.address(),
-            .signalSemaphoreCount = num_signal_semaphores,
-            .pSignalSemaphores = signal_semaphores.data(),
-        };
 
         if (on_submit) {
             on_submit();
         }
 
-        switch (const VkResult result = device.GetGraphicsQueue().Submit(submit_info)) {
+        std::scoped_lock lock{submit_mutex};
+        switch (const VkResult result = master_semaphore->SubmitQueue(
+                    cmdbuf, signal_semaphore, wait_semaphore, signal_value)) {
         case VK_SUCCESS:
             break;
         case VK_ERROR_DEVICE_LOST:
@@ -231,12 +233,20 @@ void Scheduler::SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_s
     });
     chunk->MarkSubmit();
     DispatchWork();
+    return signal_value;
 }
 
 void Scheduler::AllocateNewContext() {
     // Enable counters once again. These are disabled when a command buffer is finished.
     if (query_cache) {
+#if ANDROID
+        if (Settings::IsGPULevelHigh()) {
+            // This is problematic on Android, disable on GPU Normal.
+            query_cache->UpdateCounters();
+        }
+#else
         query_cache->UpdateCounters();
+#endif
     }
 }
 
@@ -247,7 +257,14 @@ void Scheduler::InvalidateState() {
 }
 
 void Scheduler::EndPendingOperations() {
+#if ANDROID
+    if (Settings::IsGPULevelHigh()) {
+        // This is problematic on Android, disable on GPU Normal.
+        query_cache->DisableStreams();
+    }
+#else
     query_cache->DisableStreams();
+#endif
     EndRenderPass();
 }
 
@@ -289,13 +306,16 @@ void Scheduler::EndRenderPass() {
 }
 
 void Scheduler::AcquireNewChunk() {
-    std::scoped_lock lock{reserve_mutex};
+    std::scoped_lock rl{reserve_mutex};
+
     if (chunk_reserve.empty()) {
+        // If we don't have anything reserved, we need to make a new chunk.
         chunk = std::make_unique<CommandChunk>();
-        return;
+    } else {
+        // Otherwise, we can just take from the reserve.
+        chunk = std::move(chunk_reserve.back());
+        chunk_reserve.pop_back();
     }
-    chunk = std::move(chunk_reserve.back());
-    chunk_reserve.pop_back();
 }
 
 } // namespace Vulkan