7 files changed, 103 insertions, 83 deletions
diff --git a/src/core/hle/kernel/address_arbiter.cpp b/src/core/hle/kernel/address_arbiter.cpp
index 2ea3dcb61..8475b698c 100644
--- a/src/core/hle/kernel/address_arbiter.cpp
+++ b/src/core/hle/kernel/address_arbiter.cpp
@@ -201,42 +201,39 @@ void AddressArbiter::HandleWakeupThread(std::shared_ptr<Thread> thread) {
 void AddressArbiter::InsertThread(std::shared_ptr<Thread> thread) {
     const VAddr arb_addr = thread->GetArbiterWaitAddress();
     std::list<std::shared_ptr<Thread>>& thread_list = arb_threads[arb_addr];
-    auto it = thread_list.begin();
-    while (it != thread_list.end()) {
-        const std::shared_ptr<Thread>& current_thread = *it;
-        if (current_thread->GetPriority() >= thread->GetPriority()) {
-            thread_list.insert(it, thread);
-            return;
-        }
-        ++it;
+
+    const auto iter =
+        std::find_if(thread_list.cbegin(), thread_list.cend(), [&thread](const auto& entry) {
+            return entry->GetPriority() >= thread->GetPriority();
+        });
+
+    if (iter == thread_list.cend()) {
+        thread_list.push_back(std::move(thread));
+    } else {
+        thread_list.insert(iter, std::move(thread));
     }
-    thread_list.push_back(std::move(thread));
 }
 
 void AddressArbiter::RemoveThread(std::shared_ptr<Thread> thread) {
     const VAddr arb_addr = thread->GetArbiterWaitAddress();
     std::list<std::shared_ptr<Thread>>& thread_list = arb_threads[arb_addr];
-    auto it = thread_list.begin();
-    while (it != thread_list.end()) {
-        const std::shared_ptr<Thread>& current_thread = *it;
-        if (current_thread.get() == thread.get()) {
-            thread_list.erase(it);
-            return;
-        }
-        ++it;
-    }
-    UNREACHABLE();
+
+    const auto iter = std::find_if(thread_list.cbegin(), thread_list.cend(),
+                                   [&thread](const auto& entry) { return thread == entry; });
+
+    ASSERT(iter != thread_list.cend());
+
+    thread_list.erase(iter);
 }
 
-std::vector<std::shared_ptr<Thread>> AddressArbiter::GetThreadsWaitingOnAddress(VAddr address) {
-    std::vector<std::shared_ptr<Thread>> result;
-    std::list<std::shared_ptr<Thread>>& thread_list = arb_threads[address];
-    auto it = thread_list.begin();
-    while (it != thread_list.end()) {
-        std::shared_ptr<Thread> current_thread = *it;
-        result.push_back(std::move(current_thread));
-        ++it;
+std::vector<std::shared_ptr<Thread>> AddressArbiter::GetThreadsWaitingOnAddress(
+    VAddr address) const {
+    const auto iter = arb_threads.find(address);
+    if (iter == arb_threads.cend()) {
+        return {};
     }
-    return result;
+
+    const std::list<std::shared_ptr<Thread>>& thread_list = iter->second;
+    return {thread_list.cbegin(), thread_list.cend()};
 }
 } // namespace Kernel
diff --git a/src/core/hle/kernel/address_arbiter.h b/src/core/hle/kernel/address_arbiter.h
index 386983e54..f958eee5a 100644
--- a/src/core/hle/kernel/address_arbiter.h
+++ b/src/core/hle/kernel/address_arbiter.h
@@ -86,7 +86,7 @@ private:
     void RemoveThread(std::shared_ptr<Thread> thread);
 
     // Gets the threads waiting on an address.
-    std::vector<std::shared_ptr<Thread>> GetThreadsWaitingOnAddress(VAddr address);
+    std::vector<std::shared_ptr<Thread>> GetThreadsWaitingOnAddress(VAddr address) const;
 
     /// List of threads waiting for a address arbiter
     std::unordered_map<VAddr, std::list<std::shared_ptr<Thread>>> arb_threads;
diff --git a/src/core/hle/service/audio/hwopus.cpp b/src/core/hle/service/audio/hwopus.cpp
index cb839e4a2..d19513cbb 100644
--- a/src/core/hle/service/audio/hwopus.cpp
+++ b/src/core/hle/service/audio/hwopus.cpp
@@ -170,8 +170,10 @@ public:
             {3, nullptr, "SetContextForMultiStream"},
             {4, &IHardwareOpusDecoderManager::DecodeInterleavedWithPerfOld, "DecodeInterleavedWithPerfOld"},
             {5, nullptr, "DecodeInterleavedForMultiStreamWithPerfOld"},
-            {6, &IHardwareOpusDecoderManager::DecodeInterleaved, "DecodeInterleaved"},
-            {7, nullptr, "DecodeInterleavedForMultiStream"},
+            {6, &IHardwareOpusDecoderManager::DecodeInterleaved, "DecodeInterleavedWithPerfAndResetOld"},
+            {7, nullptr, "DecodeInterleavedForMultiStreamWithPerfAndResetOld"},
+            {8, &IHardwareOpusDecoderManager::DecodeInterleaved, "DecodeInterleaved"},
+            {9, nullptr, "DecodeInterleavedForMultiStream"},
         };
         // clang-format on
 
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 7cea146f0..0b3e8749b 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -9,6 +9,7 @@
 #include "core/core_timing.h"
 #include "video_core/engines/maxwell_3d.h"
 #include "video_core/engines/shader_type.h"
+#include "video_core/gpu.h"
 #include "video_core/memory_manager.h"
 #include "video_core/rasterizer_interface.h"
 #include "video_core/textures/texture.h"
@@ -519,61 +520,63 @@ void Maxwell3D::ProcessFirmwareCall4() {
     regs.reg_array[0xd00] = 1;
 }
 
-void Maxwell3D::ProcessQueryGet() {
+void Maxwell3D::StampQueryResult(u64 payload, bool long_query) {
+    struct LongQueryResult {
+        u64_le value;
+        u64_le timestamp;
+    };
+    static_assert(sizeof(LongQueryResult) == 16, "LongQueryResult has wrong size");
     const GPUVAddr sequence_address{regs.query.QueryAddress()};
-    // Since the sequence address is given as a GPU VAddr, we have to convert it to an application
-    // VAddr before writing.
+    if (long_query) {
+        // Write the 128-bit result structure in long mode. Note: We emulate an infinitely fast
+        // GPU, this command may actually take a while to complete in real hardware due to GPU
+        // wait queues.
+        LongQueryResult query_result{payload, system.GPU().GetTicks()};
+        memory_manager.WriteBlock(sequence_address, &query_result, sizeof(query_result));
+    } else {
+        memory_manager.Write<u32>(sequence_address, static_cast<u32>(payload));
+    }
+}
 
+void Maxwell3D::ProcessQueryGet() {
     // TODO(Subv): Support the other query units.
     ASSERT_MSG(regs.query.query_get.unit == Regs::QueryUnit::Crop,
                "Units other than CROP are unimplemented");
 
-    u64 result = 0;
-
-    // TODO(Subv): Support the other query variables
-    switch (regs.query.query_get.select) {
-    case Regs::QuerySelect::Zero:
-        // This seems to actually write the query sequence to the query address.
-        result = regs.query.query_sequence;
+    switch (regs.query.query_get.operation) {
+    case Regs::QueryOperation::Release: {
+        const u64 result = regs.query.query_sequence;
+        StampQueryResult(result, regs.query.query_get.short_query == 0);
         break;
-    default:
-        result = 1;
-        UNIMPLEMENTED_MSG("Unimplemented query select type {}",
-                          static_cast<u32>(regs.query.query_get.select.Value()));
     }
-
-    // TODO(Subv): Research and implement how query sync conditions work.
-
-    struct LongQueryResult {
-        u64_le value;
-        u64_le timestamp;
-    };
-    static_assert(sizeof(LongQueryResult) == 16, "LongQueryResult has wrong size");
-
-    switch (regs.query.query_get.mode) {
-    case Regs::QueryMode::Write:
-    case Regs::QueryMode::Write2: {
-        u32 sequence = regs.query.query_sequence;
-        if (regs.query.query_get.short_query) {
-            // Write the current query sequence to the sequence address.
-            // TODO(Subv): Find out what happens if you use a long query type but mark it as a short
-            // query.
-            memory_manager.Write<u32>(sequence_address, sequence);
-        } else {
-            // Write the 128-bit result structure in long mode. Note: We emulate an infinitely fast
-            // GPU, this command may actually take a while to complete in real hardware due to GPU
-            // wait queues.
-            LongQueryResult query_result{};
-            query_result.value = result;
-            // TODO(Subv): Generate a real GPU timestamp and write it here instead of CoreTiming
-            query_result.timestamp = system.CoreTiming().GetTicks();
-            memory_manager.WriteBlock(sequence_address, &query_result, sizeof(query_result));
+    case Regs::QueryOperation::Acquire: {
+        // Todo(Blinkhawk): Under this operation, the GPU waits for the CPU
+        // to write a value that matches the current payload.
+        UNIMPLEMENTED_MSG("Unimplemented query operation ACQUIRE");
+        break;
+    }
+    case Regs::QueryOperation::Counter: {
+        u64 result{};
+        switch (regs.query.query_get.select) {
+        case Regs::QuerySelect::Zero:
+            result = 0;
+            break;
+        default:
+            result = 1;
+            UNIMPLEMENTED_MSG("Unimplemented query select type {}",
+                              static_cast<u32>(regs.query.query_get.select.Value()));
         }
+        StampQueryResult(result, regs.query.query_get.short_query == 0);
+        break;
+    }
+    case Regs::QueryOperation::Trap: {
+        UNIMPLEMENTED_MSG("Unimplemented query operation TRAP");
+        break;
+    }
+    default: {
+        UNIMPLEMENTED_MSG("Unknown query operation");
         break;
     }
-    default:
-        UNIMPLEMENTED_MSG("Query mode {} not implemented",
-                          static_cast<u32>(regs.query.query_get.mode.Value()));
     }
 }
 
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index 7b1912a66..0a2af54e5 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -71,12 +71,11 @@ public:
         static constexpr std::size_t MaxConstBuffers = 18;
         static constexpr std::size_t MaxConstBufferSize = 0x10000;
 
-        enum class QueryMode : u32 {
-            Write = 0,
-            Sync = 1,
-            // TODO(Subv): It is currently unknown what the difference between method 2 and method 0
-            // is.
-            Write2 = 2,
+        enum class QueryOperation : u32 {
+            Release = 0,
+            Acquire = 1,
+            Counter = 2,
+            Trap = 3,
         };
 
         enum class QueryUnit : u32 {
@@ -1081,7 +1080,7 @@ public:
                     u32 query_sequence;
                     union {
                         u32 raw;
-                        BitField<0, 2, QueryMode> mode;
+                        BitField<0, 2, QueryOperation> operation;
                         BitField<4, 1, u32> fence;
                         BitField<12, 4, QueryUnit> unit;
                         BitField<16, 1, QuerySyncCondition> sync_cond;
@@ -1413,6 +1412,9 @@ private:
     /// Handles a write to the QUERY_GET register.
     void ProcessQueryGet();
 
+    // Writes the query result accordingly
+    void StampQueryResult(u64 payload, bool long_query);
+
     // Handles Conditional Rendering
     void ProcessQueryCondition();
 
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index 062ca83b8..4419ab735 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -6,6 +6,7 @@
 #include "common/microprofile.h"
 #include "core/core.h"
 #include "core/core_timing.h"
+#include "core/core_timing_util.h"
 #include "core/memory.h"
 #include "video_core/engines/fermi_2d.h"
 #include "video_core/engines/kepler_compute.h"
@@ -122,6 +123,19 @@ bool GPU::CancelSyncptInterrupt(const u32 syncpoint_id, const u32 value) {
     return true;
 }
 
+u64 GPU::GetTicks() const {
+    // This values were reversed engineered by fincs from NVN
+    // The gpu clock is reported in units of 385/625 nanoseconds
+    constexpr u64 gpu_ticks_num = 384;
+    constexpr u64 gpu_ticks_den = 625;
+
+    const u64 cpu_ticks = system.CoreTiming().GetTicks();
+    const u64 nanoseconds = Core::Timing::CyclesToNs(cpu_ticks).count();
+    const u64 nanoseconds_num = nanoseconds / gpu_ticks_den;
+    const u64 nanoseconds_rem = nanoseconds % gpu_ticks_den;
+    return nanoseconds_num * gpu_ticks_num + (nanoseconds_rem * gpu_ticks_num) / gpu_ticks_den;
+}
+
 void GPU::FlushCommands() {
     renderer.Rasterizer().FlushCommands();
 }
@@ -340,7 +354,7 @@ void GPU::ProcessSemaphoreTriggerMethod() {
         block.sequence = regs.semaphore_sequence;
         // TODO(Kmather73): Generate a real GPU timestamp and write it here instead of
         // CoreTiming
-        block.timestamp = system.CoreTiming().GetTicks();
+        block.timestamp = GetTicks();
         memory_manager->WriteBlock(regs.semaphore_address.SemaphoreAddress(), &block,
                                    sizeof(block));
     } else {
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index b648317bb..07727210c 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -192,6 +192,8 @@ public:
 
     bool CancelSyncptInterrupt(u32 syncpoint_id, u32 value);
 
+    u64 GetTicks() const;
+
     std::unique_lock<std::mutex> LockSync() {
         return std::unique_lock{sync_mutex};
     }