summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/core/hle/kernel/address_arbiter.cpp53
-rw-r--r--src/core/hle/kernel/address_arbiter.h2
-rw-r--r--src/core/hle/service/audio/hwopus.cpp6
-rw-r--r--src/video_core/engines/maxwell_3d.cpp91
-rw-r--r--src/video_core/engines/maxwell_3d.h16
-rw-r--r--src/video_core/gpu.cpp16
-rw-r--r--src/video_core/gpu.h2
7 files changed, 103 insertions, 83 deletions
diff --git a/src/core/hle/kernel/address_arbiter.cpp b/src/core/hle/kernel/address_arbiter.cpp
index 2ea3dcb61..8475b698c 100644
--- a/src/core/hle/kernel/address_arbiter.cpp
+++ b/src/core/hle/kernel/address_arbiter.cpp
@@ -201,42 +201,39 @@ void AddressArbiter::HandleWakeupThread(std::shared_ptr<Thread> thread) {
void AddressArbiter::InsertThread(std::shared_ptr<Thread> thread) {
const VAddr arb_addr = thread->GetArbiterWaitAddress();
std::list<std::shared_ptr<Thread>>& thread_list = arb_threads[arb_addr];
- auto it = thread_list.begin();
- while (it != thread_list.end()) {
- const std::shared_ptr<Thread>& current_thread = *it;
- if (current_thread->GetPriority() >= thread->GetPriority()) {
- thread_list.insert(it, thread);
- return;
- }
- ++it;
+
+ const auto iter =
+ std::find_if(thread_list.cbegin(), thread_list.cend(), [&thread](const auto& entry) {
+ return entry->GetPriority() >= thread->GetPriority();
+ });
+
+ if (iter == thread_list.cend()) {
+ thread_list.push_back(std::move(thread));
+ } else {
+ thread_list.insert(iter, std::move(thread));
}
- thread_list.push_back(std::move(thread));
}
void AddressArbiter::RemoveThread(std::shared_ptr<Thread> thread) {
const VAddr arb_addr = thread->GetArbiterWaitAddress();
std::list<std::shared_ptr<Thread>>& thread_list = arb_threads[arb_addr];
- auto it = thread_list.begin();
- while (it != thread_list.end()) {
- const std::shared_ptr<Thread>& current_thread = *it;
- if (current_thread.get() == thread.get()) {
- thread_list.erase(it);
- return;
- }
- ++it;
- }
- UNREACHABLE();
+
+ const auto iter = std::find_if(thread_list.cbegin(), thread_list.cend(),
+ [&thread](const auto& entry) { return thread == entry; });
+
+ ASSERT(iter != thread_list.cend());
+
+ thread_list.erase(iter);
}
-std::vector<std::shared_ptr<Thread>> AddressArbiter::GetThreadsWaitingOnAddress(VAddr address) {
- std::vector<std::shared_ptr<Thread>> result;
- std::list<std::shared_ptr<Thread>>& thread_list = arb_threads[address];
- auto it = thread_list.begin();
- while (it != thread_list.end()) {
- std::shared_ptr<Thread> current_thread = *it;
- result.push_back(std::move(current_thread));
- ++it;
+std::vector<std::shared_ptr<Thread>> AddressArbiter::GetThreadsWaitingOnAddress(
+ VAddr address) const {
+ const auto iter = arb_threads.find(address);
+ if (iter == arb_threads.cend()) {
+ return {};
}
- return result;
+
+ const std::list<std::shared_ptr<Thread>>& thread_list = iter->second;
+ return {thread_list.cbegin(), thread_list.cend()};
}
} // namespace Kernel
diff --git a/src/core/hle/kernel/address_arbiter.h b/src/core/hle/kernel/address_arbiter.h
index 386983e54..f958eee5a 100644
--- a/src/core/hle/kernel/address_arbiter.h
+++ b/src/core/hle/kernel/address_arbiter.h
@@ -86,7 +86,7 @@ private:
void RemoveThread(std::shared_ptr<Thread> thread);
// Gets the threads waiting on an address.
- std::vector<std::shared_ptr<Thread>> GetThreadsWaitingOnAddress(VAddr address);
+ std::vector<std::shared_ptr<Thread>> GetThreadsWaitingOnAddress(VAddr address) const;
/// List of threads waiting for a address arbiter
std::unordered_map<VAddr, std::list<std::shared_ptr<Thread>>> arb_threads;
diff --git a/src/core/hle/service/audio/hwopus.cpp b/src/core/hle/service/audio/hwopus.cpp
index cb839e4a2..d19513cbb 100644
--- a/src/core/hle/service/audio/hwopus.cpp
+++ b/src/core/hle/service/audio/hwopus.cpp
@@ -170,8 +170,10 @@ public:
{3, nullptr, "SetContextForMultiStream"},
{4, &IHardwareOpusDecoderManager::DecodeInterleavedWithPerfOld, "DecodeInterleavedWithPerfOld"},
{5, nullptr, "DecodeInterleavedForMultiStreamWithPerfOld"},
- {6, &IHardwareOpusDecoderManager::DecodeInterleaved, "DecodeInterleaved"},
- {7, nullptr, "DecodeInterleavedForMultiStream"},
+ {6, &IHardwareOpusDecoderManager::DecodeInterleaved, "DecodeInterleavedWithPerfAndResetOld"},
+ {7, nullptr, "DecodeInterleavedForMultiStreamWithPerfAndResetOld"},
+ {8, &IHardwareOpusDecoderManager::DecodeInterleaved, "DecodeInterleaved"},
+ {9, nullptr, "DecodeInterleavedForMultiStream"},
};
// clang-format on
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 7cea146f0..0b3e8749b 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -9,6 +9,7 @@
#include "core/core_timing.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/engines/shader_type.h"
+#include "video_core/gpu.h"
#include "video_core/memory_manager.h"
#include "video_core/rasterizer_interface.h"
#include "video_core/textures/texture.h"
@@ -519,61 +520,63 @@ void Maxwell3D::ProcessFirmwareCall4() {
regs.reg_array[0xd00] = 1;
}
-void Maxwell3D::ProcessQueryGet() {
+void Maxwell3D::StampQueryResult(u64 payload, bool long_query) {
+ struct LongQueryResult {
+ u64_le value;
+ u64_le timestamp;
+ };
+ static_assert(sizeof(LongQueryResult) == 16, "LongQueryResult has wrong size");
const GPUVAddr sequence_address{regs.query.QueryAddress()};
- // Since the sequence address is given as a GPU VAddr, we have to convert it to an application
- // VAddr before writing.
+ if (long_query) {
+ // Write the 128-bit result structure in long mode. Note: We emulate an infinitely fast
+ // GPU, this command may actually take a while to complete in real hardware due to GPU
+ // wait queues.
+ LongQueryResult query_result{payload, system.GPU().GetTicks()};
+ memory_manager.WriteBlock(sequence_address, &query_result, sizeof(query_result));
+ } else {
+ memory_manager.Write<u32>(sequence_address, static_cast<u32>(payload));
+ }
+}
+void Maxwell3D::ProcessQueryGet() {
// TODO(Subv): Support the other query units.
ASSERT_MSG(regs.query.query_get.unit == Regs::QueryUnit::Crop,
"Units other than CROP are unimplemented");
- u64 result = 0;
-
- // TODO(Subv): Support the other query variables
- switch (regs.query.query_get.select) {
- case Regs::QuerySelect::Zero:
- // This seems to actually write the query sequence to the query address.
- result = regs.query.query_sequence;
+ switch (regs.query.query_get.operation) {
+ case Regs::QueryOperation::Release: {
+ const u64 result = regs.query.query_sequence;
+ StampQueryResult(result, regs.query.query_get.short_query == 0);
break;
- default:
- result = 1;
- UNIMPLEMENTED_MSG("Unimplemented query select type {}",
- static_cast<u32>(regs.query.query_get.select.Value()));
}
-
- // TODO(Subv): Research and implement how query sync conditions work.
-
- struct LongQueryResult {
- u64_le value;
- u64_le timestamp;
- };
- static_assert(sizeof(LongQueryResult) == 16, "LongQueryResult has wrong size");
-
- switch (regs.query.query_get.mode) {
- case Regs::QueryMode::Write:
- case Regs::QueryMode::Write2: {
- u32 sequence = regs.query.query_sequence;
- if (regs.query.query_get.short_query) {
- // Write the current query sequence to the sequence address.
- // TODO(Subv): Find out what happens if you use a long query type but mark it as a short
- // query.
- memory_manager.Write<u32>(sequence_address, sequence);
- } else {
- // Write the 128-bit result structure in long mode. Note: We emulate an infinitely fast
- // GPU, this command may actually take a while to complete in real hardware due to GPU
- // wait queues.
- LongQueryResult query_result{};
- query_result.value = result;
- // TODO(Subv): Generate a real GPU timestamp and write it here instead of CoreTiming
- query_result.timestamp = system.CoreTiming().GetTicks();
- memory_manager.WriteBlock(sequence_address, &query_result, sizeof(query_result));
+ case Regs::QueryOperation::Acquire: {
+ // Todo(Blinkhawk): Under this operation, the GPU waits for the CPU
+ // to write a value that matches the current payload.
+ UNIMPLEMENTED_MSG("Unimplemented query operation ACQUIRE");
+ break;
+ }
+ case Regs::QueryOperation::Counter: {
+ u64 result{};
+ switch (regs.query.query_get.select) {
+ case Regs::QuerySelect::Zero:
+ result = 0;
+ break;
+ default:
+ result = 1;
+ UNIMPLEMENTED_MSG("Unimplemented query select type {}",
+ static_cast<u32>(regs.query.query_get.select.Value()));
}
+ StampQueryResult(result, regs.query.query_get.short_query == 0);
+ break;
+ }
+ case Regs::QueryOperation::Trap: {
+ UNIMPLEMENTED_MSG("Unimplemented query operation TRAP");
+ break;
+ }
+ default: {
+ UNIMPLEMENTED_MSG("Unknown query operation");
break;
}
- default:
- UNIMPLEMENTED_MSG("Query mode {} not implemented",
- static_cast<u32>(regs.query.query_get.mode.Value()));
}
}
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index 7b1912a66..0a2af54e5 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -71,12 +71,11 @@ public:
static constexpr std::size_t MaxConstBuffers = 18;
static constexpr std::size_t MaxConstBufferSize = 0x10000;
- enum class QueryMode : u32 {
- Write = 0,
- Sync = 1,
- // TODO(Subv): It is currently unknown what the difference between method 2 and method 0
- // is.
- Write2 = 2,
+ enum class QueryOperation : u32 {
+ Release = 0,
+ Acquire = 1,
+ Counter = 2,
+ Trap = 3,
};
enum class QueryUnit : u32 {
@@ -1081,7 +1080,7 @@ public:
u32 query_sequence;
union {
u32 raw;
- BitField<0, 2, QueryMode> mode;
+ BitField<0, 2, QueryOperation> operation;
BitField<4, 1, u32> fence;
BitField<12, 4, QueryUnit> unit;
BitField<16, 1, QuerySyncCondition> sync_cond;
@@ -1413,6 +1412,9 @@ private:
/// Handles a write to the QUERY_GET register.
void ProcessQueryGet();
+ // Writes the query result accordingly
+ void StampQueryResult(u64 payload, bool long_query);
+
// Handles Conditional Rendering
void ProcessQueryCondition();
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index 062ca83b8..4419ab735 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -6,6 +6,7 @@
#include "common/microprofile.h"
#include "core/core.h"
#include "core/core_timing.h"
+#include "core/core_timing_util.h"
#include "core/memory.h"
#include "video_core/engines/fermi_2d.h"
#include "video_core/engines/kepler_compute.h"
@@ -122,6 +123,19 @@ bool GPU::CancelSyncptInterrupt(const u32 syncpoint_id, const u32 value) {
return true;
}
+u64 GPU::GetTicks() const {
+ // This values were reversed engineered by fincs from NVN
+ // The gpu clock is reported in units of 385/625 nanoseconds
+ constexpr u64 gpu_ticks_num = 384;
+ constexpr u64 gpu_ticks_den = 625;
+
+ const u64 cpu_ticks = system.CoreTiming().GetTicks();
+ const u64 nanoseconds = Core::Timing::CyclesToNs(cpu_ticks).count();
+ const u64 nanoseconds_num = nanoseconds / gpu_ticks_den;
+ const u64 nanoseconds_rem = nanoseconds % gpu_ticks_den;
+ return nanoseconds_num * gpu_ticks_num + (nanoseconds_rem * gpu_ticks_num) / gpu_ticks_den;
+}
+
void GPU::FlushCommands() {
renderer.Rasterizer().FlushCommands();
}
@@ -340,7 +354,7 @@ void GPU::ProcessSemaphoreTriggerMethod() {
block.sequence = regs.semaphore_sequence;
// TODO(Kmather73): Generate a real GPU timestamp and write it here instead of
// CoreTiming
- block.timestamp = system.CoreTiming().GetTicks();
+ block.timestamp = GetTicks();
memory_manager->WriteBlock(regs.semaphore_address.SemaphoreAddress(), &block,
sizeof(block));
} else {
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index b648317bb..07727210c 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -192,6 +192,8 @@ public:
bool CancelSyncptInterrupt(u32 syncpoint_id, u32 value);
+ u64 GetTicks() const;
+
std::unique_lock<std::mutex> LockSync() {
return std::unique_lock{sync_mutex};
}