diff options
-rw-r--r-- | src/core/hle/kernel/address_arbiter.cpp | 53 | ||||
-rw-r--r-- | src/core/hle/kernel/address_arbiter.h | 2 | ||||
-rw-r--r-- | src/core/hle/kernel/thread.cpp | 4 | ||||
-rw-r--r-- | src/core/hle/service/bcat/backend/backend.cpp | 4 | ||||
-rw-r--r-- | src/video_core/engines/maxwell_3d.cpp | 91 | ||||
-rw-r--r-- | src/video_core/engines/maxwell_3d.h | 16 | ||||
-rw-r--r-- | src/video_core/gpu.cpp | 16 | ||||
-rw-r--r-- | src/video_core/gpu.h | 2 |
8 files changed, 103 insertions, 85 deletions
diff --git a/src/core/hle/kernel/address_arbiter.cpp b/src/core/hle/kernel/address_arbiter.cpp index 2ea3dcb61..8475b698c 100644 --- a/src/core/hle/kernel/address_arbiter.cpp +++ b/src/core/hle/kernel/address_arbiter.cpp @@ -201,42 +201,39 @@ void AddressArbiter::HandleWakeupThread(std::shared_ptr<Thread> thread) { void AddressArbiter::InsertThread(std::shared_ptr<Thread> thread) { const VAddr arb_addr = thread->GetArbiterWaitAddress(); std::list<std::shared_ptr<Thread>>& thread_list = arb_threads[arb_addr]; - auto it = thread_list.begin(); - while (it != thread_list.end()) { - const std::shared_ptr<Thread>& current_thread = *it; - if (current_thread->GetPriority() >= thread->GetPriority()) { - thread_list.insert(it, thread); - return; - } - ++it; + + const auto iter = + std::find_if(thread_list.cbegin(), thread_list.cend(), [&thread](const auto& entry) { + return entry->GetPriority() >= thread->GetPriority(); + }); + + if (iter == thread_list.cend()) { + thread_list.push_back(std::move(thread)); + } else { + thread_list.insert(iter, std::move(thread)); } - thread_list.push_back(std::move(thread)); } void AddressArbiter::RemoveThread(std::shared_ptr<Thread> thread) { const VAddr arb_addr = thread->GetArbiterWaitAddress(); std::list<std::shared_ptr<Thread>>& thread_list = arb_threads[arb_addr]; - auto it = thread_list.begin(); - while (it != thread_list.end()) { - const std::shared_ptr<Thread>& current_thread = *it; - if (current_thread.get() == thread.get()) { - thread_list.erase(it); - return; - } - ++it; - } - UNREACHABLE(); + + const auto iter = std::find_if(thread_list.cbegin(), thread_list.cend(), + [&thread](const auto& entry) { return thread == entry; }); + + ASSERT(iter != thread_list.cend()); + + thread_list.erase(iter); } -std::vector<std::shared_ptr<Thread>> AddressArbiter::GetThreadsWaitingOnAddress(VAddr address) { - std::vector<std::shared_ptr<Thread>> result; - std::list<std::shared_ptr<Thread>>& thread_list = arb_threads[address]; - auto it = thread_list.begin(); - while (it != thread_list.end()) { - std::shared_ptr<Thread> current_thread = *it; - result.push_back(std::move(current_thread)); - ++it; +std::vector<std::shared_ptr<Thread>> AddressArbiter::GetThreadsWaitingOnAddress( + VAddr address) const { + const auto iter = arb_threads.find(address); + if (iter == arb_threads.cend()) { + return {}; } - return result; + + const std::list<std::shared_ptr<Thread>>& thread_list = iter->second; + return {thread_list.cbegin(), thread_list.cend()}; } } // namespace Kernel diff --git a/src/core/hle/kernel/address_arbiter.h b/src/core/hle/kernel/address_arbiter.h index 386983e54..f958eee5a 100644 --- a/src/core/hle/kernel/address_arbiter.h +++ b/src/core/hle/kernel/address_arbiter.h @@ -86,7 +86,7 @@ private: void RemoveThread(std::shared_ptr<Thread> thread); // Gets the threads waiting on an address. - std::vector<std::shared_ptr<Thread>> GetThreadsWaitingOnAddress(VAddr address); + std::vector<std::shared_ptr<Thread>> GetThreadsWaitingOnAddress(VAddr address) const; /// List of threads waiting for a address arbiter std::unordered_map<VAddr, std::list<std::shared_ptr<Thread>>> arb_threads; diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp index e965b5b04..ad464e03b 100644 --- a/src/core/hle/kernel/thread.cpp +++ b/src/core/hle/kernel/thread.cpp @@ -474,7 +474,7 @@ void Thread::AdjustSchedulingOnPriority(u32 old_priority) { if (GetSchedulingStatus() != ThreadSchedStatus::Runnable) { return; } - auto& scheduler = Core::System::GetInstance().GlobalScheduler(); + auto& scheduler = kernel.GlobalScheduler(); if (processor_id >= 0) { scheduler.Unschedule(old_priority, static_cast<u32>(processor_id), this); } @@ -506,7 +506,7 @@ void Thread::AdjustSchedulingOnPriority(u32 old_priority) { } void Thread::AdjustSchedulingOnAffinity(u64 old_affinity_mask, s32 old_core) { - auto& scheduler = Core::System::GetInstance().GlobalScheduler(); + auto& scheduler = kernel.GlobalScheduler(); if (GetSchedulingStatus() != ThreadSchedStatus::Runnable || current_priority >= THREADPRIO_COUNT) { return; diff --git a/src/core/hle/service/bcat/backend/backend.cpp b/src/core/hle/service/bcat/backend/backend.cpp index 6f5ea095a..def3410cc 100644 --- a/src/core/hle/service/bcat/backend/backend.cpp +++ b/src/core/hle/service/bcat/backend/backend.cpp @@ -117,13 +117,13 @@ bool NullBackend::SynchronizeDirectory(TitleIDVersion title, std::string name, } bool NullBackend::Clear(u64 title_id) { - LOG_DEBUG(Service_BCAT, "called, title_id={:016X}"); + LOG_DEBUG(Service_BCAT, "called, title_id={:016X}", title_id); return true; } void NullBackend::SetPassphrase(u64 title_id, const Passphrase& passphrase) { - LOG_DEBUG(Service_BCAT, "called, title_id={:016X}, passphrase = {}", title_id, + LOG_DEBUG(Service_BCAT, "called, title_id={:016X}, passphrase={}", title_id, Common::HexToString(passphrase)); } diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 7cea146f0..0b3e8749b 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -9,6 +9,7 @@ #include "core/core_timing.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/engines/shader_type.h" +#include "video_core/gpu.h" #include "video_core/memory_manager.h" #include "video_core/rasterizer_interface.h" #include "video_core/textures/texture.h" @@ -519,61 +520,63 @@ void Maxwell3D::ProcessFirmwareCall4() { regs.reg_array[0xd00] = 1; } -void Maxwell3D::ProcessQueryGet() { +void Maxwell3D::StampQueryResult(u64 payload, bool long_query) { + struct LongQueryResult { + u64_le value; + u64_le timestamp; + }; + static_assert(sizeof(LongQueryResult) == 16, "LongQueryResult has wrong size"); const GPUVAddr sequence_address{regs.query.QueryAddress()}; - // Since the sequence address is given as a GPU VAddr, we have to convert it to an application - // VAddr before writing. + if (long_query) { + // Write the 128-bit result structure in long mode. Note: We emulate an infinitely fast + // GPU, this command may actually take a while to complete in real hardware due to GPU + // wait queues. + LongQueryResult query_result{payload, system.GPU().GetTicks()}; + memory_manager.WriteBlock(sequence_address, &query_result, sizeof(query_result)); + } else { + memory_manager.Write<u32>(sequence_address, static_cast<u32>(payload)); + } +} +void Maxwell3D::ProcessQueryGet() { // TODO(Subv): Support the other query units. ASSERT_MSG(regs.query.query_get.unit == Regs::QueryUnit::Crop, "Units other than CROP are unimplemented"); - u64 result = 0; - - // TODO(Subv): Support the other query variables - switch (regs.query.query_get.select) { - case Regs::QuerySelect::Zero: - // This seems to actually write the query sequence to the query address. - result = regs.query.query_sequence; + switch (regs.query.query_get.operation) { + case Regs::QueryOperation::Release: { + const u64 result = regs.query.query_sequence; + StampQueryResult(result, regs.query.query_get.short_query == 0); break; - default: - result = 1; - UNIMPLEMENTED_MSG("Unimplemented query select type {}", - static_cast<u32>(regs.query.query_get.select.Value())); } - - // TODO(Subv): Research and implement how query sync conditions work. - - struct LongQueryResult { - u64_le value; - u64_le timestamp; - }; - static_assert(sizeof(LongQueryResult) == 16, "LongQueryResult has wrong size"); - - switch (regs.query.query_get.mode) { - case Regs::QueryMode::Write: - case Regs::QueryMode::Write2: { - u32 sequence = regs.query.query_sequence; - if (regs.query.query_get.short_query) { - // Write the current query sequence to the sequence address. - // TODO(Subv): Find out what happens if you use a long query type but mark it as a short - // query. - memory_manager.Write<u32>(sequence_address, sequence); - } else { - // Write the 128-bit result structure in long mode. Note: We emulate an infinitely fast - // GPU, this command may actually take a while to complete in real hardware due to GPU - // wait queues. - LongQueryResult query_result{}; - query_result.value = result; - // TODO(Subv): Generate a real GPU timestamp and write it here instead of CoreTiming - query_result.timestamp = system.CoreTiming().GetTicks(); - memory_manager.WriteBlock(sequence_address, &query_result, sizeof(query_result)); + case Regs::QueryOperation::Acquire: { + // Todo(Blinkhawk): Under this operation, the GPU waits for the CPU + // to write a value that matches the current payload. + UNIMPLEMENTED_MSG("Unimplemented query operation ACQUIRE"); + break; + } + case Regs::QueryOperation::Counter: { + u64 result{}; + switch (regs.query.query_get.select) { + case Regs::QuerySelect::Zero: + result = 0; + break; + default: + result = 1; + UNIMPLEMENTED_MSG("Unimplemented query select type {}", + static_cast<u32>(regs.query.query_get.select.Value())); } + StampQueryResult(result, regs.query.query_get.short_query == 0); + break; + } + case Regs::QueryOperation::Trap: { + UNIMPLEMENTED_MSG("Unimplemented query operation TRAP"); + break; + } + default: { + UNIMPLEMENTED_MSG("Unknown query operation"); break; } - default: - UNIMPLEMENTED_MSG("Query mode {} not implemented", - static_cast<u32>(regs.query.query_get.mode.Value())); } } diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 7b1912a66..0a2af54e5 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -71,12 +71,11 @@ public: static constexpr std::size_t MaxConstBuffers = 18; static constexpr std::size_t MaxConstBufferSize = 0x10000; - enum class QueryMode : u32 { - Write = 0, - Sync = 1, - // TODO(Subv): It is currently unknown what the difference between method 2 and method 0 - // is. - Write2 = 2, + enum class QueryOperation : u32 { + Release = 0, + Acquire = 1, + Counter = 2, + Trap = 3, }; enum class QueryUnit : u32 { @@ -1081,7 +1080,7 @@ public: u32 query_sequence; union { u32 raw; - BitField<0, 2, QueryMode> mode; + BitField<0, 2, QueryOperation> operation; BitField<4, 1, u32> fence; BitField<12, 4, QueryUnit> unit; BitField<16, 1, QuerySyncCondition> sync_cond; @@ -1413,6 +1412,9 @@ private: /// Handles a write to the QUERY_GET register. void ProcessQueryGet(); + // Writes the query result accordingly + void StampQueryResult(u64 payload, bool long_query); + // Handles Conditional Rendering void ProcessQueryCondition(); diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 062ca83b8..4419ab735 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -6,6 +6,7 @@ #include "common/microprofile.h" #include "core/core.h" #include "core/core_timing.h" +#include "core/core_timing_util.h" #include "core/memory.h" #include "video_core/engines/fermi_2d.h" #include "video_core/engines/kepler_compute.h" @@ -122,6 +123,19 @@ bool GPU::CancelSyncptInterrupt(const u32 syncpoint_id, const u32 value) { return true; } +u64 GPU::GetTicks() const { + // This values were reversed engineered by fincs from NVN + // The gpu clock is reported in units of 385/625 nanoseconds + constexpr u64 gpu_ticks_num = 384; + constexpr u64 gpu_ticks_den = 625; + + const u64 cpu_ticks = system.CoreTiming().GetTicks(); + const u64 nanoseconds = Core::Timing::CyclesToNs(cpu_ticks).count(); + const u64 nanoseconds_num = nanoseconds / gpu_ticks_den; + const u64 nanoseconds_rem = nanoseconds % gpu_ticks_den; + return nanoseconds_num * gpu_ticks_num + (nanoseconds_rem * gpu_ticks_num) / gpu_ticks_den; +} + void GPU::FlushCommands() { renderer.Rasterizer().FlushCommands(); } @@ -340,7 +354,7 @@ void GPU::ProcessSemaphoreTriggerMethod() { block.sequence = regs.semaphore_sequence; // TODO(Kmather73): Generate a real GPU timestamp and write it here instead of // CoreTiming - block.timestamp = system.CoreTiming().GetTicks(); + block.timestamp = GetTicks(); memory_manager->WriteBlock(regs.semaphore_address.SemaphoreAddress(), &block, sizeof(block)); } else { diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index b648317bb..07727210c 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h @@ -192,6 +192,8 @@ public: bool CancelSyncptInterrupt(u32 syncpoint_id, u32 value); + u64 GetTicks() const; + std::unique_lock<std::mutex> LockSync() { return std::unique_lock{sync_mutex}; } |