summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/core/CMakeLists.txt6
-rw-r--r--src/core/arm/dynarmic/arm_dynarmic.cpp3
-rw-r--r--src/core/core_timing.cpp3
-rw-r--r--src/core/core_timing_util.cpp18
-rw-r--r--src/core/core_timing_util.h12
-rw-r--r--src/core/cpu_manager.h5
-rw-r--r--src/core/frontend/framebuffer_layout.cpp21
-rw-r--r--src/core/frontend/framebuffer_layout.h15
-rw-r--r--src/core/hardware_properties.h45
-rw-r--r--src/core/hle/kernel/client_session.cpp7
-rw-r--r--src/core/hle/kernel/client_session.h6
-rw-r--r--src/core/hle/kernel/hle_ipc.cpp20
-rw-r--r--src/core/hle/kernel/kernel.cpp17
-rw-r--r--src/core/hle/kernel/kernel.h7
-rw-r--r--src/core/hle/kernel/process.cpp4
-rw-r--r--src/core/hle/kernel/process.h8
-rw-r--r--src/core/hle/kernel/readable_event.cpp16
-rw-r--r--src/core/hle/kernel/readable_event.h10
-rw-r--r--src/core/hle/kernel/scheduler.cpp12
-rw-r--r--src/core/hle/kernel/scheduler.h13
-rw-r--r--src/core/hle/kernel/server_port.cpp6
-rw-r--r--src/core/hle/kernel/server_port.h6
-rw-r--r--src/core/hle/kernel/server_session.cpp12
-rw-r--r--src/core/hle/kernel/server_session.h6
-rw-r--r--src/core/hle/kernel/session.cpp7
-rw-r--r--src/core/hle/kernel/session.h6
-rw-r--r--src/core/hle/kernel/svc.cpp72
-rw-r--r--src/core/hle/kernel/synchronization.cpp87
-rw-r--r--src/core/hle/kernel/synchronization.h44
-rw-r--r--src/core/hle/kernel/synchronization_object.cpp (renamed from src/core/hle/kernel/wait_object.cpp)28
-rw-r--r--src/core/hle/kernel/synchronization_object.h (renamed from src/core/hle/kernel/wait_object.h)23
-rw-r--r--src/core/hle/kernel/thread.cpp37
-rw-r--r--src/core/hle/kernel/thread.h23
-rw-r--r--src/core/hle/kernel/writable_event.cpp3
-rw-r--r--src/core/hle/service/audio/hwopus.cpp6
-rw-r--r--src/core/hle/service/hid/hid.cpp7
-rw-r--r--src/core/hle/service/ldn/ldn.cpp10
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp12
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_gpu.h8
-rw-r--r--src/core/hle/service/nvflinger/nvflinger.cpp7
-rw-r--r--src/core/hle/service/time/standard_steady_clock_core.cpp3
-rw-r--r--src/core/hle/service/time/tick_based_steady_clock_core.cpp3
-rw-r--r--src/core/hle/service/time/time.cpp3
-rw-r--r--src/core/hle/service/time/time_sharedmemory.cpp3
-rw-r--r--src/core/memory/cheat_engine.cpp3
-rw-r--r--src/core/settings.h1
-rw-r--r--src/core/tools/freezer.cpp3
-rw-r--r--src/video_core/CMakeLists.txt5
-rw-r--r--src/video_core/engines/maxwell_3d.cpp122
-rw-r--r--src/video_core/engines/maxwell_3d.h56
-rw-r--r--src/video_core/engines/shader_bytecode.h4
-rw-r--r--src/video_core/gpu.cpp18
-rw-r--r--src/video_core/gpu.h2
-rw-r--r--src/video_core/memory_manager.cpp14
-rw-r--r--src/video_core/memory_manager.h7
-rw-r--r--src/video_core/query_cache.h359
-rw-r--r--src/video_core/rasterizer_interface.h12
-rw-r--r--src/video_core/renderer_opengl/gl_query_cache.cpp120
-rw-r--r--src/video_core/renderer_opengl/gl_query_cache.h78
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp32
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h41
-rw-r--r--src/video_core/renderer_opengl/gl_resource_manager.cpp17
-rw-r--r--src/video_core/renderer_opengl/gl_resource_manager.h25
-rw-r--r--src/video_core/renderer_vulkan/vk_device.cpp10
-rw-r--r--src/video_core/renderer_vulkan/vk_query_cache.cpp122
-rw-r--r--src/video_core/renderer_vulkan/vk_query_cache.h104
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.cpp21
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.h6
-rw-r--r--src/video_core/renderer_vulkan/vk_scheduler.cpp8
-rw-r--r--src/video_core/renderer_vulkan/vk_scheduler.h15
-rw-r--r--src/video_core/shader/decode/arithmetic_integer.cpp4
-rw-r--r--src/video_core/shader/decode/bfi.cpp2
-rw-r--r--src/yuzu/configuration/config.cpp2
-rw-r--r--src/yuzu/configuration/configure_graphics.cpp2
-rw-r--r--src/yuzu/configuration/configure_graphics.ui35
-rw-r--r--src/yuzu/debugger/wait_tree.cpp35
-rw-r--r--src/yuzu/debugger/wait_tree.h22
-rw-r--r--src/yuzu_cmd/config.cpp2
-rw-r--r--src/yuzu_cmd/default_ini.h4
-rw-r--r--src/yuzu_tester/config.cpp2
-rw-r--r--src/yuzu_tester/default_ini.h4
81 files changed, 1663 insertions, 326 deletions
diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt
index d342cafe0..26612e692 100644
--- a/src/core/CMakeLists.txt
+++ b/src/core/CMakeLists.txt
@@ -181,14 +181,16 @@ add_library(core STATIC
hle/kernel/svc.cpp
hle/kernel/svc.h
hle/kernel/svc_wrap.h
+ hle/kernel/synchronization_object.cpp
+ hle/kernel/synchronization_object.h
+ hle/kernel/synchronization.cpp
+ hle/kernel/synchronization.h
hle/kernel/thread.cpp
hle/kernel/thread.h
hle/kernel/transfer_memory.cpp
hle/kernel/transfer_memory.h
hle/kernel/vm_manager.cpp
hle/kernel/vm_manager.h
- hle/kernel/wait_object.cpp
- hle/kernel/wait_object.h
hle/kernel/writable_event.cpp
hle/kernel/writable_event.h
hle/lock.cpp
diff --git a/src/core/arm/dynarmic/arm_dynarmic.cpp b/src/core/arm/dynarmic/arm_dynarmic.cpp
index 791640a3a..29eaf74e5 100644
--- a/src/core/arm/dynarmic/arm_dynarmic.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic.cpp
@@ -14,6 +14,7 @@
#include "core/core_timing.h"
#include "core/core_timing_util.h"
#include "core/gdbstub/gdbstub.h"
+#include "core/hardware_properties.h"
#include "core/hle/kernel/process.h"
#include "core/hle/kernel/scheduler.h"
#include "core/hle/kernel/svc.h"
@@ -153,7 +154,7 @@ std::unique_ptr<Dynarmic::A64::Jit> ARM_Dynarmic::MakeJit(Common::PageTable& pag
config.tpidr_el0 = &cb->tpidr_el0;
config.dczid_el0 = 4;
config.ctr_el0 = 0x8444c004;
- config.cntfrq_el0 = Timing::CNTFREQ;
+ config.cntfrq_el0 = Hardware::CNTFREQ;
// Unpredictable instructions
config.define_unpredictable_behaviour = true;
diff --git a/src/core/core_timing.cpp b/src/core/core_timing.cpp
index aa09fa453..46d4178c4 100644
--- a/src/core/core_timing.cpp
+++ b/src/core/core_timing.cpp
@@ -12,6 +12,7 @@
#include "common/assert.h"
#include "common/thread.h"
#include "core/core_timing_util.h"
+#include "core/hardware_properties.h"
namespace Core::Timing {
@@ -215,7 +216,7 @@ void CoreTiming::Idle() {
}
std::chrono::microseconds CoreTiming::GetGlobalTimeUs() const {
- return std::chrono::microseconds{GetTicks() * 1000000 / BASE_CLOCK_RATE};
+ return std::chrono::microseconds{GetTicks() * 1000000 / Hardware::BASE_CLOCK_RATE};
}
s64 CoreTiming::GetDowncount() const {
diff --git a/src/core/core_timing_util.cpp b/src/core/core_timing_util.cpp
index a10472a95..de50d3b14 100644
--- a/src/core/core_timing_util.cpp
+++ b/src/core/core_timing_util.cpp
@@ -11,7 +11,7 @@
namespace Core::Timing {
-constexpr u64 MAX_VALUE_TO_MULTIPLY = std::numeric_limits<s64>::max() / BASE_CLOCK_RATE;
+constexpr u64 MAX_VALUE_TO_MULTIPLY = std::numeric_limits<s64>::max() / Hardware::BASE_CLOCK_RATE;
s64 msToCycles(std::chrono::milliseconds ms) {
if (static_cast<u64>(ms.count() / 1000) > MAX_VALUE_TO_MULTIPLY) {
@@ -20,9 +20,9 @@ s64 msToCycles(std::chrono::milliseconds ms) {
}
if (static_cast<u64>(ms.count()) > MAX_VALUE_TO_MULTIPLY) {
LOG_DEBUG(Core_Timing, "Time very big, do rounding");
- return BASE_CLOCK_RATE * (ms.count() / 1000);
+ return Hardware::BASE_CLOCK_RATE * (ms.count() / 1000);
}
- return (BASE_CLOCK_RATE * ms.count()) / 1000;
+ return (Hardware::BASE_CLOCK_RATE * ms.count()) / 1000;
}
s64 usToCycles(std::chrono::microseconds us) {
@@ -32,9 +32,9 @@ s64 usToCycles(std::chrono::microseconds us) {
}
if (static_cast<u64>(us.count()) > MAX_VALUE_TO_MULTIPLY) {
LOG_DEBUG(Core_Timing, "Time very big, do rounding");
- return BASE_CLOCK_RATE * (us.count() / 1000000);
+ return Hardware::BASE_CLOCK_RATE * (us.count() / 1000000);
}
- return (BASE_CLOCK_RATE * us.count()) / 1000000;
+ return (Hardware::BASE_CLOCK_RATE * us.count()) / 1000000;
}
s64 nsToCycles(std::chrono::nanoseconds ns) {
@@ -44,14 +44,14 @@ s64 nsToCycles(std::chrono::nanoseconds ns) {
}
if (static_cast<u64>(ns.count()) > MAX_VALUE_TO_MULTIPLY) {
LOG_DEBUG(Core_Timing, "Time very big, do rounding");
- return BASE_CLOCK_RATE * (ns.count() / 1000000000);
+ return Hardware::BASE_CLOCK_RATE * (ns.count() / 1000000000);
}
- return (BASE_CLOCK_RATE * ns.count()) / 1000000000;
+ return (Hardware::BASE_CLOCK_RATE * ns.count()) / 1000000000;
}
u64 CpuCyclesToClockCycles(u64 ticks) {
- const u128 temporal = Common::Multiply64Into128(ticks, CNTFREQ);
- return Common::Divide128On32(temporal, static_cast<u32>(BASE_CLOCK_RATE)).first;
+ const u128 temporal = Common::Multiply64Into128(ticks, Hardware::CNTFREQ);
+ return Common::Divide128On32(temporal, static_cast<u32>(Hardware::BASE_CLOCK_RATE)).first;
}
} // namespace Core::Timing
diff --git a/src/core/core_timing_util.h b/src/core/core_timing_util.h
index cdd84d70f..addc72b19 100644
--- a/src/core/core_timing_util.h
+++ b/src/core/core_timing_util.h
@@ -6,28 +6,24 @@
#include <chrono>
#include "common/common_types.h"
+#include "core/hardware_properties.h"
namespace Core::Timing {
-// The below clock rate is based on Switch's clockspeed being widely known as 1.020GHz
-// The exact value used is of course unverified.
-constexpr u64 BASE_CLOCK_RATE = 1019215872; // Switch clock speed is 1020MHz un/docked
-constexpr u64 CNTFREQ = 19200000; // Value from fusee.
-
s64 msToCycles(std::chrono::milliseconds ms);
s64 usToCycles(std::chrono::microseconds us);
s64 nsToCycles(std::chrono::nanoseconds ns);
inline std::chrono::milliseconds CyclesToMs(s64 cycles) {
- return std::chrono::milliseconds(cycles * 1000 / BASE_CLOCK_RATE);
+ return std::chrono::milliseconds(cycles * 1000 / Hardware::BASE_CLOCK_RATE);
}
inline std::chrono::nanoseconds CyclesToNs(s64 cycles) {
- return std::chrono::nanoseconds(cycles * 1000000000 / BASE_CLOCK_RATE);
+ return std::chrono::nanoseconds(cycles * 1000000000 / Hardware::BASE_CLOCK_RATE);
}
inline std::chrono::microseconds CyclesToUs(s64 cycles) {
- return std::chrono::microseconds(cycles * 1000000 / BASE_CLOCK_RATE);
+ return std::chrono::microseconds(cycles * 1000000 / Hardware::BASE_CLOCK_RATE);
}
u64 CpuCyclesToClockCycles(u64 ticks);
diff --git a/src/core/cpu_manager.h b/src/core/cpu_manager.h
index feb619e1b..97554d1bb 100644
--- a/src/core/cpu_manager.h
+++ b/src/core/cpu_manager.h
@@ -6,6 +6,7 @@
#include <array>
#include <memory>
+#include "core/hardware_properties.h"
namespace Core {
@@ -39,9 +40,7 @@ public:
void RunLoop(bool tight_loop);
private:
- static constexpr std::size_t NUM_CPU_CORES = 4;
-
- std::array<std::unique_ptr<CoreManager>, NUM_CPU_CORES> core_managers;
+ std::array<std::unique_ptr<CoreManager>, Hardware::NUM_CPU_CORES> core_managers;
std::size_t active_core{}; ///< Active core, only used in single thread mode
System& system;
diff --git a/src/core/frontend/framebuffer_layout.cpp b/src/core/frontend/framebuffer_layout.cpp
index d6d2cf3f0..2dc795d56 100644
--- a/src/core/frontend/framebuffer_layout.cpp
+++ b/src/core/frontend/framebuffer_layout.cpp
@@ -27,9 +27,9 @@ FramebufferLayout DefaultFrameLayout(u32 width, u32 height) {
// so just calculate them both even if the other isn't showing.
FramebufferLayout res{width, height};
- const float emulation_aspect_ratio{static_cast<float>(ScreenUndocked::Height) /
- ScreenUndocked::Width};
- const auto window_aspect_ratio = static_cast<float>(height) / width;
+ const float window_aspect_ratio = static_cast<float>(height) / width;
+ const float emulation_aspect_ratio = EmulationAspectRatio(
+ static_cast<AspectRatio>(Settings::values.aspect_ratio), window_aspect_ratio);
const Common::Rectangle<u32> screen_window_area{0, 0, width, height};
Common::Rectangle<u32> screen = MaxRectangle(screen_window_area, emulation_aspect_ratio);
@@ -58,4 +58,19 @@ FramebufferLayout FrameLayoutFromResolutionScale(u32 res_scale) {
return DefaultFrameLayout(width, height);
}
+float EmulationAspectRatio(AspectRatio aspect, float window_aspect_ratio) {
+ switch (aspect) {
+ case AspectRatio::Default:
+ return static_cast<float>(ScreenUndocked::Height) / ScreenUndocked::Width;
+ case AspectRatio::R4_3:
+ return 3.0f / 4.0f;
+ case AspectRatio::R21_9:
+ return 9.0f / 21.0f;
+ case AspectRatio::StretchToWindow:
+ return window_aspect_ratio;
+ default:
+ return static_cast<float>(ScreenUndocked::Height) / ScreenUndocked::Width;
+ }
+}
+
} // namespace Layout
diff --git a/src/core/frontend/framebuffer_layout.h b/src/core/frontend/framebuffer_layout.h
index d2370adde..1d39c1faf 100644
--- a/src/core/frontend/framebuffer_layout.h
+++ b/src/core/frontend/framebuffer_layout.h
@@ -18,6 +18,13 @@ enum ScreenDocked : u32 {
HeightDocked = 1080,
};
+enum class AspectRatio {
+ Default,
+ R4_3,
+ R21_9,
+ StretchToWindow,
+};
+
/// Describes the layout of the window framebuffer
struct FramebufferLayout {
u32 width{ScreenUndocked::Width};
@@ -48,4 +55,12 @@ FramebufferLayout DefaultFrameLayout(u32 width, u32 height);
*/
FramebufferLayout FrameLayoutFromResolutionScale(u32 res_scale);
+/**
+ * Convenience method to determine emulation aspect ratio
+ * @param aspect Represents the index of aspect ratio stored in Settings::values.aspect_ratio
+ * @param window_aspect_ratio Current window aspect ratio
+ * @return Emulation render window aspect ratio
+ */
+float EmulationAspectRatio(AspectRatio aspect, float window_aspect_ratio);
+
} // namespace Layout
diff --git a/src/core/hardware_properties.h b/src/core/hardware_properties.h
new file mode 100644
index 000000000..213461b6a
--- /dev/null
+++ b/src/core/hardware_properties.h
@@ -0,0 +1,45 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <tuple>
+
+#include "common/common_types.h"
+
+namespace Core {
+
+namespace Hardware {
+
+// The below clock rate is based on Switch's clockspeed being widely known as 1.020GHz
+// The exact value used is of course unverified.
+constexpr u64 BASE_CLOCK_RATE = 1019215872; // Switch cpu frequency is 1020MHz un/docked
+constexpr u64 CNTFREQ = 19200000; // Switch's hardware clock speed
+constexpr u32 NUM_CPU_CORES = 4; // Number of CPU Cores
+
+} // namespace Hardware
+
+struct EmuThreadHandle {
+ u32 host_handle;
+ u32 guest_handle;
+
+ u64 GetRaw() const {
+ return (static_cast<u64>(host_handle) << 32) | guest_handle;
+ }
+
+ bool operator==(const EmuThreadHandle& rhs) const {
+ return std::tie(host_handle, guest_handle) == std::tie(rhs.host_handle, rhs.guest_handle);
+ }
+
+ bool operator!=(const EmuThreadHandle& rhs) const {
+ return !operator==(rhs);
+ }
+
+ static constexpr EmuThreadHandle InvalidHandle() {
+ constexpr u32 invalid_handle = 0xFFFFFFFF;
+ return {invalid_handle, invalid_handle};
+ }
+};
+
+} // namespace Core
diff --git a/src/core/hle/kernel/client_session.cpp b/src/core/hle/kernel/client_session.cpp
index 4669a14ad..6d66276bc 100644
--- a/src/core/hle/kernel/client_session.cpp
+++ b/src/core/hle/kernel/client_session.cpp
@@ -12,7 +12,7 @@
namespace Kernel {
-ClientSession::ClientSession(KernelCore& kernel) : WaitObject{kernel} {}
+ClientSession::ClientSession(KernelCore& kernel) : SynchronizationObject{kernel} {}
ClientSession::~ClientSession() {
// This destructor will be called automatically when the last ClientSession handle is closed by
@@ -31,6 +31,11 @@ void ClientSession::Acquire(Thread* thread) {
UNIMPLEMENTED();
}
+bool ClientSession::IsSignaled() const {
+ UNIMPLEMENTED();
+ return true;
+}
+
ResultVal<std::shared_ptr<ClientSession>> ClientSession::Create(KernelCore& kernel,
std::shared_ptr<Session> parent,
std::string name) {
diff --git a/src/core/hle/kernel/client_session.h b/src/core/hle/kernel/client_session.h
index b4289a9a8..d15b09554 100644
--- a/src/core/hle/kernel/client_session.h
+++ b/src/core/hle/kernel/client_session.h
@@ -7,7 +7,7 @@
#include <memory>
#include <string>
-#include "core/hle/kernel/wait_object.h"
+#include "core/hle/kernel/synchronization_object.h"
#include "core/hle/result.h"
union ResultCode;
@@ -22,7 +22,7 @@ class KernelCore;
class Session;
class Thread;
-class ClientSession final : public WaitObject {
+class ClientSession final : public SynchronizationObject {
public:
explicit ClientSession(KernelCore& kernel);
~ClientSession() override;
@@ -48,6 +48,8 @@ public:
void Acquire(Thread* thread) override;
+ bool IsSignaled() const override;
+
private:
static ResultVal<std::shared_ptr<ClientSession>> Create(KernelCore& kernel,
std::shared_ptr<Session> parent,
diff --git a/src/core/hle/kernel/hle_ipc.cpp b/src/core/hle/kernel/hle_ipc.cpp
index ab05788d7..c558a2f33 100644
--- a/src/core/hle/kernel/hle_ipc.cpp
+++ b/src/core/hle/kernel/hle_ipc.cpp
@@ -47,15 +47,15 @@ std::shared_ptr<WritableEvent> HLERequestContext::SleepClientThread(
const std::string& reason, u64 timeout, WakeupCallback&& callback,
std::shared_ptr<WritableEvent> writable_event) {
// Put the client thread to sleep until the wait event is signaled or the timeout expires.
- thread->SetWakeupCallback([context = *this, callback](ThreadWakeupReason reason,
- std::shared_ptr<Thread> thread,
- std::shared_ptr<WaitObject> object,
- std::size_t index) mutable -> bool {
- ASSERT(thread->GetStatus() == ThreadStatus::WaitHLEEvent);
- callback(thread, context, reason);
- context.WriteToOutgoingCommandBuffer(*thread);
- return true;
- });
+ thread->SetWakeupCallback(
+ [context = *this, callback](ThreadWakeupReason reason, std::shared_ptr<Thread> thread,
+ std::shared_ptr<SynchronizationObject> object,
+ std::size_t index) mutable -> bool {
+ ASSERT(thread->GetStatus() == ThreadStatus::WaitHLEEvent);
+ callback(thread, context, reason);
+ context.WriteToOutgoingCommandBuffer(*thread);
+ return true;
+ });
auto& kernel = Core::System::GetInstance().Kernel();
if (!writable_event) {
@@ -67,7 +67,7 @@ std::shared_ptr<WritableEvent> HLERequestContext::SleepClientThread(
const auto readable_event{writable_event->GetReadableEvent()};
writable_event->Clear();
thread->SetStatus(ThreadStatus::WaitHLEEvent);
- thread->SetWaitObjects({readable_event});
+ thread->SetSynchronizationObjects({readable_event});
readable_event->AddWaitingThread(thread);
if (timeout > 0) {
diff --git a/src/core/hle/kernel/kernel.cpp b/src/core/hle/kernel/kernel.cpp
index edd4c4259..4eb1d8703 100644
--- a/src/core/hle/kernel/kernel.cpp
+++ b/src/core/hle/kernel/kernel.cpp
@@ -23,6 +23,7 @@
#include "core/hle/kernel/process.h"
#include "core/hle/kernel/resource_limit.h"
#include "core/hle/kernel/scheduler.h"
+#include "core/hle/kernel/synchronization.h"
#include "core/hle/kernel/thread.h"
#include "core/hle/lock.h"
#include "core/hle/result.h"
@@ -54,10 +55,10 @@ static void ThreadWakeupCallback(u64 thread_handle, [[maybe_unused]] s64 cycles_
if (thread->GetStatus() == ThreadStatus::WaitSynch ||
thread->GetStatus() == ThreadStatus::WaitHLEEvent) {
// Remove the thread from each of its waiting objects' waitlists
- for (const auto& object : thread->GetWaitObjects()) {
+ for (const auto& object : thread->GetSynchronizationObjects()) {
object->RemoveWaitingThread(thread);
}
- thread->ClearWaitObjects();
+ thread->ClearSynchronizationObjects();
// Invoke the wakeup callback before clearing the wait objects
if (thread->HasWakeupCallback()) {
@@ -96,7 +97,8 @@ static void ThreadWakeupCallback(u64 thread_handle, [[maybe_unused]] s64 cycles_
}
struct KernelCore::Impl {
- explicit Impl(Core::System& system) : system{system}, global_scheduler{system} {}
+ explicit Impl(Core::System& system)
+ : system{system}, global_scheduler{system}, synchronization{system} {}
void Initialize(KernelCore& kernel) {
Shutdown();
@@ -191,6 +193,7 @@ struct KernelCore::Impl {
std::vector<std::shared_ptr<Process>> process_list;
Process* current_process = nullptr;
Kernel::GlobalScheduler global_scheduler;
+ Kernel::Synchronization synchronization;
std::shared_ptr<ResourceLimit> system_resource_limit;
@@ -270,6 +273,14 @@ const Kernel::PhysicalCore& KernelCore::PhysicalCore(std::size_t id) const {
return impl->cores[id];
}
+Kernel::Synchronization& KernelCore::Synchronization() {
+ return impl->synchronization;
+}
+
+const Kernel::Synchronization& KernelCore::Synchronization() const {
+ return impl->synchronization;
+}
+
Core::ExclusiveMonitor& KernelCore::GetExclusiveMonitor() {
return *impl->exclusive_monitor;
}
diff --git a/src/core/hle/kernel/kernel.h b/src/core/hle/kernel/kernel.h
index fccffaf3a..1eede3063 100644
--- a/src/core/hle/kernel/kernel.h
+++ b/src/core/hle/kernel/kernel.h
@@ -29,6 +29,7 @@ class HandleTable;
class PhysicalCore;
class Process;
class ResourceLimit;
+class Synchronization;
class Thread;
/// Represents a single instance of the kernel.
@@ -92,6 +93,12 @@ public:
/// Gets the an instance of the respective physical CPU core.
const Kernel::PhysicalCore& PhysicalCore(std::size_t id) const;
+ /// Gets the an instance of the Synchronization Interface.
+ Kernel::Synchronization& Synchronization();
+
+ /// Gets the an instance of the Synchronization Interface.
+ const Kernel::Synchronization& Synchronization() const;
+
/// Stops execution of 'id' core, in order to reschedule a new thread.
void PrepareReschedule(std::size_t id);
diff --git a/src/core/hle/kernel/process.cpp b/src/core/hle/kernel/process.cpp
index b9035a0be..2fcb7326c 100644
--- a/src/core/hle/kernel/process.cpp
+++ b/src/core/hle/kernel/process.cpp
@@ -337,7 +337,7 @@ void Process::LoadModule(CodeSet module_, VAddr base_addr) {
}
Process::Process(Core::System& system)
- : WaitObject{system.Kernel()}, vm_manager{system},
+ : SynchronizationObject{system.Kernel()}, vm_manager{system},
address_arbiter{system}, mutex{system}, system{system} {}
Process::~Process() = default;
@@ -357,7 +357,7 @@ void Process::ChangeStatus(ProcessStatus new_status) {
status = new_status;
is_signaled = true;
- WakeupAllWaitingThreads();
+ Signal();
}
void Process::AllocateMainThreadStack(u64 stack_size) {
diff --git a/src/core/hle/kernel/process.h b/src/core/hle/kernel/process.h
index 3483fa19d..4887132a7 100644
--- a/src/core/hle/kernel/process.h
+++ b/src/core/hle/kernel/process.h
@@ -15,8 +15,8 @@
#include "core/hle/kernel/handle_table.h"
#include "core/hle/kernel/mutex.h"
#include "core/hle/kernel/process_capability.h"
+#include "core/hle/kernel/synchronization_object.h"
#include "core/hle/kernel/vm_manager.h"
-#include "core/hle/kernel/wait_object.h"
#include "core/hle/result.h"
namespace Core {
@@ -60,7 +60,7 @@ enum class ProcessStatus {
DebugBreak,
};
-class Process final : public WaitObject {
+class Process final : public SynchronizationObject {
public:
explicit Process(Core::System& system);
~Process() override;
@@ -359,10 +359,6 @@ private:
/// specified by metadata provided to the process during loading.
bool is_64bit_process = true;
- /// Whether or not this process is signaled. This occurs
- /// upon the process changing to a different state.
- bool is_signaled = false;
-
/// Total running time for the process in ticks.
u64 total_process_running_time_ticks = 0;
diff --git a/src/core/hle/kernel/readable_event.cpp b/src/core/hle/kernel/readable_event.cpp
index d8ac97aa1..9d3d3a81b 100644
--- a/src/core/hle/kernel/readable_event.cpp
+++ b/src/core/hle/kernel/readable_event.cpp
@@ -11,30 +11,30 @@
namespace Kernel {
-ReadableEvent::ReadableEvent(KernelCore& kernel) : WaitObject{kernel} {}
+ReadableEvent::ReadableEvent(KernelCore& kernel) : SynchronizationObject{kernel} {}
ReadableEvent::~ReadableEvent() = default;
bool ReadableEvent::ShouldWait(const Thread* thread) const {
- return !signaled;
+ return !is_signaled;
}
void ReadableEvent::Acquire(Thread* thread) {
- ASSERT_MSG(!ShouldWait(thread), "object unavailable!");
+ ASSERT_MSG(IsSignaled(), "object unavailable!");
}
void ReadableEvent::Signal() {
- if (!signaled) {
- signaled = true;
- WakeupAllWaitingThreads();
+ if (!is_signaled) {
+ is_signaled = true;
+ SynchronizationObject::Signal();
};
}
void ReadableEvent::Clear() {
- signaled = false;
+ is_signaled = false;
}
ResultCode ReadableEvent::Reset() {
- if (!signaled) {
+ if (!is_signaled) {
return ERR_INVALID_STATE;
}
diff --git a/src/core/hle/kernel/readable_event.h b/src/core/hle/kernel/readable_event.h
index 11ff71c3a..3264dd066 100644
--- a/src/core/hle/kernel/readable_event.h
+++ b/src/core/hle/kernel/readable_event.h
@@ -5,7 +5,7 @@
#pragma once
#include "core/hle/kernel/object.h"
-#include "core/hle/kernel/wait_object.h"
+#include "core/hle/kernel/synchronization_object.h"
union ResultCode;
@@ -14,7 +14,7 @@ namespace Kernel {
class KernelCore;
class WritableEvent;
-class ReadableEvent final : public WaitObject {
+class ReadableEvent final : public SynchronizationObject {
friend class WritableEvent;
public:
@@ -46,13 +46,11 @@ public:
/// then ERR_INVALID_STATE will be returned.
ResultCode Reset();
+ void Signal() override;
+
private:
explicit ReadableEvent(KernelCore& kernel);
- void Signal();
-
- bool signaled{};
-
std::string name; ///< Name of event (optional)
};
diff --git a/src/core/hle/kernel/scheduler.cpp b/src/core/hle/kernel/scheduler.cpp
index eb196a690..86f1421bf 100644
--- a/src/core/hle/kernel/scheduler.cpp
+++ b/src/core/hle/kernel/scheduler.cpp
@@ -124,8 +124,8 @@ bool GlobalScheduler::YieldThreadAndBalanceLoad(Thread* yielding_thread) {
"Thread yielding without being in front");
scheduled_queue[core_id].yield(priority);
- std::array<Thread*, NUM_CPU_CORES> current_threads;
- for (u32 i = 0; i < NUM_CPU_CORES; i++) {
+ std::array<Thread*, Core::Hardware::NUM_CPU_CORES> current_threads;
+ for (std::size_t i = 0; i < current_threads.size(); i++) {
current_threads[i] = scheduled_queue[i].empty() ? nullptr : scheduled_queue[i].front();
}
@@ -177,8 +177,8 @@ bool GlobalScheduler::YieldThreadAndWaitForLoadBalancing(Thread* yielding_thread
// function...
if (scheduled_queue[core_id].empty()) {
// Here, "current_threads" is calculated after the ""yield"", unlike yield -1
- std::array<Thread*, NUM_CPU_CORES> current_threads;
- for (u32 i = 0; i < NUM_CPU_CORES; i++) {
+ std::array<Thread*, Core::Hardware::NUM_CPU_CORES> current_threads;
+ for (std::size_t i = 0; i < current_threads.size(); i++) {
current_threads[i] = scheduled_queue[i].empty() ? nullptr : scheduled_queue[i].front();
}
for (auto& thread : suggested_queue[core_id]) {
@@ -208,7 +208,7 @@ bool GlobalScheduler::YieldThreadAndWaitForLoadBalancing(Thread* yielding_thread
}
void GlobalScheduler::PreemptThreads() {
- for (std::size_t core_id = 0; core_id < NUM_CPU_CORES; core_id++) {
+ for (std::size_t core_id = 0; core_id < Core::Hardware::NUM_CPU_CORES; core_id++) {
const u32 priority = preemption_priorities[core_id];
if (scheduled_queue[core_id].size(priority) > 0) {
@@ -349,7 +349,7 @@ bool GlobalScheduler::AskForReselectionOrMarkRedundant(Thread* current_thread,
}
void GlobalScheduler::Shutdown() {
- for (std::size_t core = 0; core < NUM_CPU_CORES; core++) {
+ for (std::size_t core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) {
scheduled_queue[core].clear();
suggested_queue[core].clear();
}
diff --git a/src/core/hle/kernel/scheduler.h b/src/core/hle/kernel/scheduler.h
index 14b77960a..96db049cb 100644
--- a/src/core/hle/kernel/scheduler.h
+++ b/src/core/hle/kernel/scheduler.h
@@ -10,6 +10,7 @@
#include "common/common_types.h"
#include "common/multi_level_queue.h"
+#include "core/hardware_properties.h"
#include "core/hle/kernel/thread.h"
namespace Core {
@@ -23,8 +24,6 @@ class Process;
class GlobalScheduler final {
public:
- static constexpr u32 NUM_CPU_CORES = 4;
-
explicit GlobalScheduler(Core::System& system);
~GlobalScheduler();
@@ -125,7 +124,7 @@ public:
void PreemptThreads();
u32 CpuCoresCount() const {
- return NUM_CPU_CORES;
+ return Core::Hardware::NUM_CPU_CORES;
}
void SetReselectionPending() {
@@ -149,13 +148,15 @@ private:
bool AskForReselectionOrMarkRedundant(Thread* current_thread, const Thread* winner);
static constexpr u32 min_regular_priority = 2;
- std::array<Common::MultiLevelQueue<Thread*, THREADPRIO_COUNT>, NUM_CPU_CORES> scheduled_queue;
- std::array<Common::MultiLevelQueue<Thread*, THREADPRIO_COUNT>, NUM_CPU_CORES> suggested_queue;
+ std::array<Common::MultiLevelQueue<Thread*, THREADPRIO_COUNT>, Core::Hardware::NUM_CPU_CORES>
+ scheduled_queue;
+ std::array<Common::MultiLevelQueue<Thread*, THREADPRIO_COUNT>, Core::Hardware::NUM_CPU_CORES>
+ suggested_queue;
std::atomic<bool> is_reselection_pending{false};
// The priority levels at which the global scheduler preempts threads every 10 ms. They are
// ordered from Core 0 to Core 3.
- std::array<u32, NUM_CPU_CORES> preemption_priorities = {59, 59, 59, 62};
+ std::array<u32, Core::Hardware::NUM_CPU_CORES> preemption_priorities = {59, 59, 59, 62};
/// Lists all thread ids that aren't deleted/etc.
std::vector<std::shared_ptr<Thread>> thread_list;
diff --git a/src/core/hle/kernel/server_port.cpp b/src/core/hle/kernel/server_port.cpp
index a4ccfa35e..a549ae9d7 100644
--- a/src/core/hle/kernel/server_port.cpp
+++ b/src/core/hle/kernel/server_port.cpp
@@ -13,7 +13,7 @@
namespace Kernel {
-ServerPort::ServerPort(KernelCore& kernel) : WaitObject{kernel} {}
+ServerPort::ServerPort(KernelCore& kernel) : SynchronizationObject{kernel} {}
ServerPort::~ServerPort() = default;
ResultVal<std::shared_ptr<ServerSession>> ServerPort::Accept() {
@@ -39,6 +39,10 @@ void ServerPort::Acquire(Thread* thread) {
ASSERT_MSG(!ShouldWait(thread), "object unavailable!");
}
+bool ServerPort::IsSignaled() const {
+ return !pending_sessions.empty();
+}
+
ServerPort::PortPair ServerPort::CreatePortPair(KernelCore& kernel, u32 max_sessions,
std::string name) {
std::shared_ptr<ServerPort> server_port = std::make_shared<ServerPort>(kernel);
diff --git a/src/core/hle/kernel/server_port.h b/src/core/hle/kernel/server_port.h
index 8be8a75ea..41b191b86 100644
--- a/src/core/hle/kernel/server_port.h
+++ b/src/core/hle/kernel/server_port.h
@@ -10,7 +10,7 @@
#include <vector>
#include "common/common_types.h"
#include "core/hle/kernel/object.h"
-#include "core/hle/kernel/wait_object.h"
+#include "core/hle/kernel/synchronization_object.h"
#include "core/hle/result.h"
namespace Kernel {
@@ -20,7 +20,7 @@ class KernelCore;
class ServerSession;
class SessionRequestHandler;
-class ServerPort final : public WaitObject {
+class ServerPort final : public SynchronizationObject {
public:
explicit ServerPort(KernelCore& kernel);
~ServerPort() override;
@@ -82,6 +82,8 @@ public:
bool ShouldWait(const Thread* thread) const override;
void Acquire(Thread* thread) override;
+ bool IsSignaled() const override;
+
private:
/// ServerSessions waiting to be accepted by the port
std::vector<std::shared_ptr<ServerSession>> pending_sessions;
diff --git a/src/core/hle/kernel/server_session.cpp b/src/core/hle/kernel/server_session.cpp
index 7825e1ec4..4604e35c5 100644
--- a/src/core/hle/kernel/server_session.cpp
+++ b/src/core/hle/kernel/server_session.cpp
@@ -24,7 +24,7 @@
namespace Kernel {
-ServerSession::ServerSession(KernelCore& kernel) : WaitObject{kernel} {}
+ServerSession::ServerSession(KernelCore& kernel) : SynchronizationObject{kernel} {}
ServerSession::~ServerSession() = default;
ResultVal<std::shared_ptr<ServerSession>> ServerSession::Create(KernelCore& kernel,
@@ -50,6 +50,16 @@ bool ServerSession::ShouldWait(const Thread* thread) const {
return pending_requesting_threads.empty() || currently_handling != nullptr;
}
+bool ServerSession::IsSignaled() const {
+ // Closed sessions should never wait, an error will be returned from svcReplyAndReceive.
+ if (!parent->Client()) {
+ return true;
+ }
+
+ // Wait if we have no pending requests, or if we're currently handling a request.
+ return !pending_requesting_threads.empty() && currently_handling == nullptr;
+}
+
void ServerSession::Acquire(Thread* thread) {
ASSERT_MSG(!ShouldWait(thread), "object unavailable!");
// We are now handling a request, pop it from the stack.
diff --git a/src/core/hle/kernel/server_session.h b/src/core/hle/kernel/server_session.h
index d6e48109e..77e4f6721 100644
--- a/src/core/hle/kernel/server_session.h
+++ b/src/core/hle/kernel/server_session.h
@@ -10,7 +10,7 @@
#include <vector>
#include "common/threadsafe_queue.h"
-#include "core/hle/kernel/wait_object.h"
+#include "core/hle/kernel/synchronization_object.h"
#include "core/hle/result.h"
namespace Memory {
@@ -41,7 +41,7 @@ class Thread;
* After the server replies to the request, the response is marshalled back to the caller's
* TLS buffer and control is transferred back to it.
*/
-class ServerSession final : public WaitObject {
+class ServerSession final : public SynchronizationObject {
public:
explicit ServerSession(KernelCore& kernel);
~ServerSession() override;
@@ -73,6 +73,8 @@ public:
return parent.get();
}
+ bool IsSignaled() const override;
+
/**
* Sets the HLE handler for the session. This handler will be called to service IPC requests
* instead of the regular IPC machinery. (The regular IPC machinery is currently not
diff --git a/src/core/hle/kernel/session.cpp b/src/core/hle/kernel/session.cpp
index dee6e2b72..e4dd53e24 100644
--- a/src/core/hle/kernel/session.cpp
+++ b/src/core/hle/kernel/session.cpp
@@ -9,7 +9,7 @@
namespace Kernel {
-Session::Session(KernelCore& kernel) : WaitObject{kernel} {}
+Session::Session(KernelCore& kernel) : SynchronizationObject{kernel} {}
Session::~Session() = default;
Session::SessionPair Session::Create(KernelCore& kernel, std::string name) {
@@ -29,6 +29,11 @@ bool Session::ShouldWait(const Thread* thread) const {
return {};
}
+bool Session::IsSignaled() const {
+ UNIMPLEMENTED();
+ return true;
+}
+
void Session::Acquire(Thread* thread) {
UNIMPLEMENTED();
}
diff --git a/src/core/hle/kernel/session.h b/src/core/hle/kernel/session.h
index 15a5ac15f..7cd9c0d77 100644
--- a/src/core/hle/kernel/session.h
+++ b/src/core/hle/kernel/session.h
@@ -8,7 +8,7 @@
#include <string>
#include <utility>
-#include "core/hle/kernel/wait_object.h"
+#include "core/hle/kernel/synchronization_object.h"
namespace Kernel {
@@ -19,7 +19,7 @@ class ServerSession;
* Parent structure to link the client and server endpoints of a session with their associated
* client port.
*/
-class Session final : public WaitObject {
+class Session final : public SynchronizationObject {
public:
explicit Session(KernelCore& kernel);
~Session() override;
@@ -39,6 +39,8 @@ public:
bool ShouldWait(const Thread* thread) const override;
+ bool IsSignaled() const override;
+
void Acquire(Thread* thread) override;
std::shared_ptr<ClientSession> Client() {
diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp
index 9cae5c73d..fd91779a3 100644
--- a/src/core/hle/kernel/svc.cpp
+++ b/src/core/hle/kernel/svc.cpp
@@ -32,6 +32,7 @@
#include "core/hle/kernel/shared_memory.h"
#include "core/hle/kernel/svc.h"
#include "core/hle/kernel/svc_wrap.h"
+#include "core/hle/kernel/synchronization.h"
#include "core/hle/kernel/thread.h"
#include "core/hle/kernel/transfer_memory.h"
#include "core/hle/kernel/writable_event.h"
@@ -433,22 +434,6 @@ static ResultCode GetProcessId(Core::System& system, u64* process_id, Handle han
return ERR_INVALID_HANDLE;
}
-/// Default thread wakeup callback for WaitSynchronization
-static bool DefaultThreadWakeupCallback(ThreadWakeupReason reason, std::shared_ptr<Thread> thread,
- std::shared_ptr<WaitObject> object, std::size_t index) {
- ASSERT(thread->GetStatus() == ThreadStatus::WaitSynch);
-
- if (reason == ThreadWakeupReason::Timeout) {
- thread->SetWaitSynchronizationResult(RESULT_TIMEOUT);
- return true;
- }
-
- ASSERT(reason == ThreadWakeupReason::Signal);
- thread->SetWaitSynchronizationResult(RESULT_SUCCESS);
- thread->SetWaitSynchronizationOutput(static_cast<u32>(index));
- return true;
-};
-
/// Wait for the given handles to synchronize, timeout after the specified nanoseconds
static ResultCode WaitSynchronization(Core::System& system, Handle* index, VAddr handles_address,
u64 handle_count, s64 nano_seconds) {
@@ -472,14 +457,14 @@ static ResultCode WaitSynchronization(Core::System& system, Handle* index, VAddr
}
auto* const thread = system.CurrentScheduler().GetCurrentThread();
-
- using ObjectPtr = Thread::ThreadWaitObjects::value_type;
- Thread::ThreadWaitObjects objects(handle_count);
- const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable();
+ auto& kernel = system.Kernel();
+ using ObjectPtr = Thread::ThreadSynchronizationObjects::value_type;
+ Thread::ThreadSynchronizationObjects objects(handle_count);
+ const auto& handle_table = kernel.CurrentProcess()->GetHandleTable();
for (u64 i = 0; i < handle_count; ++i) {
const Handle handle = memory.Read32(handles_address + i * sizeof(Handle));
- const auto object = handle_table.Get<WaitObject>(handle);
+ const auto object = handle_table.Get<SynchronizationObject>(handle);
if (object == nullptr) {
LOG_ERROR(Kernel_SVC, "Object is a nullptr");
@@ -488,47 +473,10 @@ static ResultCode WaitSynchronization(Core::System& system, Handle* index, VAddr
objects[i] = object;
}
-
- // Find the first object that is acquirable in the provided list of objects
- auto itr = std::find_if(objects.begin(), objects.end(), [thread](const ObjectPtr& object) {
- return !object->ShouldWait(thread);
- });
-
- if (itr != objects.end()) {
- // We found a ready object, acquire it and set the result value
- WaitObject* object = itr->get();
- object->Acquire(thread);
- *index = static_cast<s32>(std::distance(objects.begin(), itr));
- return RESULT_SUCCESS;
- }
-
- // No objects were ready to be acquired, prepare to suspend the thread.
-
- // If a timeout value of 0 was provided, just return the Timeout error code instead of
- // suspending the thread.
- if (nano_seconds == 0) {
- return RESULT_TIMEOUT;
- }
-
- if (thread->IsSyncCancelled()) {
- thread->SetSyncCancelled(false);
- return ERR_SYNCHRONIZATION_CANCELED;
- }
-
- for (auto& object : objects) {
- object->AddWaitingThread(SharedFrom(thread));
- }
-
- thread->SetWaitObjects(std::move(objects));
- thread->SetStatus(ThreadStatus::WaitSynch);
-
- // Create an event to wake the thread up after the specified nanosecond delay has passed
- thread->WakeAfterDelay(nano_seconds);
- thread->SetWakeupCallback(DefaultThreadWakeupCallback);
-
- system.PrepareReschedule(thread->GetProcessorID());
-
- return RESULT_TIMEOUT;
+ auto& synchronization = kernel.Synchronization();
+ const auto [result, handle_result] = synchronization.WaitFor(objects, nano_seconds);
+ *index = handle_result;
+ return result;
}
/// Resumes a thread waiting on WaitSynchronization
diff --git a/src/core/hle/kernel/synchronization.cpp b/src/core/hle/kernel/synchronization.cpp
new file mode 100644
index 000000000..dc37fad1a
--- /dev/null
+++ b/src/core/hle/kernel/synchronization.cpp
@@ -0,0 +1,87 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "core/core.h"
+#include "core/hle/kernel/errors.h"
+#include "core/hle/kernel/handle_table.h"
+#include "core/hle/kernel/kernel.h"
+#include "core/hle/kernel/scheduler.h"
+#include "core/hle/kernel/synchronization.h"
+#include "core/hle/kernel/synchronization_object.h"
+#include "core/hle/kernel/thread.h"
+
+namespace Kernel {
+
+/// Default thread wakeup callback for WaitSynchronization
+static bool DefaultThreadWakeupCallback(ThreadWakeupReason reason, std::shared_ptr<Thread> thread,
+ std::shared_ptr<SynchronizationObject> object,
+ std::size_t index) {
+ ASSERT(thread->GetStatus() == ThreadStatus::WaitSynch);
+
+ if (reason == ThreadWakeupReason::Timeout) {
+ thread->SetWaitSynchronizationResult(RESULT_TIMEOUT);
+ return true;
+ }
+
+ ASSERT(reason == ThreadWakeupReason::Signal);
+ thread->SetWaitSynchronizationResult(RESULT_SUCCESS);
+ thread->SetWaitSynchronizationOutput(static_cast<u32>(index));
+ return true;
+}
+
+Synchronization::Synchronization(Core::System& system) : system{system} {}
+
+void Synchronization::SignalObject(SynchronizationObject& obj) const {
+ if (obj.IsSignaled()) {
+ obj.WakeupAllWaitingThreads();
+ }
+}
+
+std::pair<ResultCode, Handle> Synchronization::WaitFor(
+ std::vector<std::shared_ptr<SynchronizationObject>>& sync_objects, s64 nano_seconds) {
+ auto* const thread = system.CurrentScheduler().GetCurrentThread();
+ // Find the first object that is acquirable in the provided list of objects
+ const auto itr = std::find_if(sync_objects.begin(), sync_objects.end(),
+ [thread](const std::shared_ptr<SynchronizationObject>& object) {
+ return object->IsSignaled();
+ });
+
+ if (itr != sync_objects.end()) {
+ // We found a ready object, acquire it and set the result value
+ SynchronizationObject* object = itr->get();
+ object->Acquire(thread);
+ const u32 index = static_cast<s32>(std::distance(sync_objects.begin(), itr));
+ return {RESULT_SUCCESS, index};
+ }
+
+ // No objects were ready to be acquired, prepare to suspend the thread.
+
+ // If a timeout value of 0 was provided, just return the Timeout error code instead of
+ // suspending the thread.
+ if (nano_seconds == 0) {
+ return {RESULT_TIMEOUT, InvalidHandle};
+ }
+
+ if (thread->IsSyncCancelled()) {
+ thread->SetSyncCancelled(false);
+ return {ERR_SYNCHRONIZATION_CANCELED, InvalidHandle};
+ }
+
+ for (auto& object : sync_objects) {
+ object->AddWaitingThread(SharedFrom(thread));
+ }
+
+ thread->SetSynchronizationObjects(std::move(sync_objects));
+ thread->SetStatus(ThreadStatus::WaitSynch);
+
+ // Create an event to wake the thread up after the specified nanosecond delay has passed
+ thread->WakeAfterDelay(nano_seconds);
+ thread->SetWakeupCallback(DefaultThreadWakeupCallback);
+
+ system.PrepareReschedule(thread->GetProcessorID());
+
+ return {RESULT_TIMEOUT, InvalidHandle};
+}
+
+} // namespace Kernel
diff --git a/src/core/hle/kernel/synchronization.h b/src/core/hle/kernel/synchronization.h
new file mode 100644
index 000000000..379f4b1d3
--- /dev/null
+++ b/src/core/hle/kernel/synchronization.h
@@ -0,0 +1,44 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <memory>
+#include <utility>
+#include <vector>
+
+#include "core/hle/kernel/object.h"
+#include "core/hle/result.h"
+
+namespace Core {
+class System;
+} // namespace Core
+
+namespace Kernel {
+
+class SynchronizationObject;
+
+/**
+ * The 'Synchronization' class is an interface for handling synchronization methods
+ * used by Synchronization objects and synchronization SVCs. This centralizes processing of
+ * such
+ */
+class Synchronization {
+public:
+ explicit Synchronization(Core::System& system);
+
+ /// Signals a synchronization object, waking up all its waiting threads
+ void SignalObject(SynchronizationObject& obj) const;
+
+ /// Tries to see if waiting for any of the sync_objects is necessary, if not
+ /// it returns Success and the handle index of the signaled sync object. In
+ /// case not, the current thread will be locked and wait for nano_seconds or
+ /// for a synchronization object to signal.
+ std::pair<ResultCode, Handle> WaitFor(
+ std::vector<std::shared_ptr<SynchronizationObject>>& sync_objects, s64 nano_seconds);
+
+private:
+ Core::System& system;
+};
+} // namespace Kernel
diff --git a/src/core/hle/kernel/wait_object.cpp b/src/core/hle/kernel/synchronization_object.cpp
index 1838260fd..43f3eef18 100644
--- a/src/core/hle/kernel/wait_object.cpp
+++ b/src/core/hle/kernel/synchronization_object.cpp
@@ -10,20 +10,26 @@
#include "core/hle/kernel/kernel.h"
#include "core/hle/kernel/object.h"
#include "core/hle/kernel/process.h"
+#include "core/hle/kernel/synchronization.h"
+#include "core/hle/kernel/synchronization_object.h"
#include "core/hle/kernel/thread.h"
namespace Kernel {
-WaitObject::WaitObject(KernelCore& kernel) : Object{kernel} {}
-WaitObject::~WaitObject() = default;
+SynchronizationObject::SynchronizationObject(KernelCore& kernel) : Object{kernel} {}
+SynchronizationObject::~SynchronizationObject() = default;
-void WaitObject::AddWaitingThread(std::shared_ptr<Thread> thread) {
+void SynchronizationObject::Signal() {
+ kernel.Synchronization().SignalObject(*this);
+}
+
+void SynchronizationObject::AddWaitingThread(std::shared_ptr<Thread> thread) {
auto itr = std::find(waiting_threads.begin(), waiting_threads.end(), thread);
if (itr == waiting_threads.end())
waiting_threads.push_back(std::move(thread));
}
-void WaitObject::RemoveWaitingThread(std::shared_ptr<Thread> thread) {
+void SynchronizationObject::RemoveWaitingThread(std::shared_ptr<Thread> thread) {
auto itr = std::find(waiting_threads.begin(), waiting_threads.end(), thread);
// If a thread passed multiple handles to the same object,
// the kernel might attempt to remove the thread from the object's
@@ -32,7 +38,7 @@ void WaitObject::RemoveWaitingThread(std::shared_ptr<Thread> thread) {
waiting_threads.erase(itr);
}
-std::shared_ptr<Thread> WaitObject::GetHighestPriorityReadyThread() const {
+std::shared_ptr<Thread> SynchronizationObject::GetHighestPriorityReadyThread() const {
Thread* candidate = nullptr;
u32 candidate_priority = THREADPRIO_LOWEST + 1;
@@ -57,7 +63,7 @@ std::shared_ptr<Thread> WaitObject::GetHighestPriorityReadyThread() const {
return SharedFrom(candidate);
}
-void WaitObject::WakeupWaitingThread(std::shared_ptr<Thread> thread) {
+void SynchronizationObject::WakeupWaitingThread(std::shared_ptr<Thread> thread) {
ASSERT(!ShouldWait(thread.get()));
if (!thread) {
@@ -65,7 +71,7 @@ void WaitObject::WakeupWaitingThread(std::shared_ptr<Thread> thread) {
}
if (thread->IsSleepingOnWait()) {
- for (const auto& object : thread->GetWaitObjects()) {
+ for (const auto& object : thread->GetSynchronizationObjects()) {
ASSERT(!object->ShouldWait(thread.get()));
object->Acquire(thread.get());
}
@@ -73,9 +79,9 @@ void WaitObject::WakeupWaitingThread(std::shared_ptr<Thread> thread) {
Acquire(thread.get());
}
- const std::size_t index = thread->GetWaitObjectIndex(SharedFrom(this));
+ const std::size_t index = thread->GetSynchronizationObjectIndex(SharedFrom(this));
- thread->ClearWaitObjects();
+ thread->ClearSynchronizationObjects();
thread->CancelWakeupTimer();
@@ -90,13 +96,13 @@ void WaitObject::WakeupWaitingThread(std::shared_ptr<Thread> thread) {
}
}
-void WaitObject::WakeupAllWaitingThreads() {
+void SynchronizationObject::WakeupAllWaitingThreads() {
while (auto thread = GetHighestPriorityReadyThread()) {
WakeupWaitingThread(thread);
}
}
-const std::vector<std::shared_ptr<Thread>>& WaitObject::GetWaitingThreads() const {
+const std::vector<std::shared_ptr<Thread>>& SynchronizationObject::GetWaitingThreads() const {
return waiting_threads;
}
diff --git a/src/core/hle/kernel/wait_object.h b/src/core/hle/kernel/synchronization_object.h
index 9a17958a4..741c31faf 100644
--- a/src/core/hle/kernel/wait_object.h
+++ b/src/core/hle/kernel/synchronization_object.h
@@ -15,10 +15,10 @@ class KernelCore;
class Thread;
/// Class that represents a Kernel object that a thread can be waiting on
-class WaitObject : public Object {
+class SynchronizationObject : public Object {
public:
- explicit WaitObject(KernelCore& kernel);
- ~WaitObject() override;
+ explicit SynchronizationObject(KernelCore& kernel);
+ ~SynchronizationObject() override;
/**
* Check if the specified thread should wait until the object is available
@@ -30,6 +30,13 @@ public:
/// Acquire/lock the object for the specified thread if it is available
virtual void Acquire(Thread* thread) = 0;
+ /// Signal this object
+ virtual void Signal();
+
+ virtual bool IsSignaled() const {
+ return is_signaled;
+ }
+
/**
* Add a thread to wait on this object
* @param thread Pointer to thread to add
@@ -60,16 +67,20 @@ public:
/// Get a const reference to the waiting threads list for debug use
const std::vector<std::shared_ptr<Thread>>& GetWaitingThreads() const;
+protected:
+ bool is_signaled{}; // Tells if this sync object is signalled;
+
private:
/// Threads waiting for this object to become available
std::vector<std::shared_ptr<Thread>> waiting_threads;
};
-// Specialization of DynamicObjectCast for WaitObjects
+// Specialization of DynamicObjectCast for SynchronizationObjects
template <>
-inline std::shared_ptr<WaitObject> DynamicObjectCast<WaitObject>(std::shared_ptr<Object> object) {
+inline std::shared_ptr<SynchronizationObject> DynamicObjectCast<SynchronizationObject>(
+ std::shared_ptr<Object> object) {
if (object != nullptr && object->IsWaitable()) {
- return std::static_pointer_cast<WaitObject>(object);
+ return std::static_pointer_cast<SynchronizationObject>(object);
}
return nullptr;
}
diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp
index ad464e03b..ae5f2c8bd 100644
--- a/src/core/hle/kernel/thread.cpp
+++ b/src/core/hle/kernel/thread.cpp
@@ -15,6 +15,7 @@
#include "core/core.h"
#include "core/core_timing.h"
#include "core/core_timing_util.h"
+#include "core/hardware_properties.h"
#include "core/hle/kernel/errors.h"
#include "core/hle/kernel/handle_table.h"
#include "core/hle/kernel/kernel.h"
@@ -31,11 +32,15 @@ bool Thread::ShouldWait(const Thread* thread) const {
return status != ThreadStatus::Dead;
}
+bool Thread::IsSignaled() const {
+ return status == ThreadStatus::Dead;
+}
+
void Thread::Acquire(Thread* thread) {
ASSERT_MSG(!ShouldWait(thread), "object unavailable!");
}
-Thread::Thread(KernelCore& kernel) : WaitObject{kernel} {}
+Thread::Thread(KernelCore& kernel) : SynchronizationObject{kernel} {}
Thread::~Thread() = default;
void Thread::Stop() {
@@ -45,7 +50,7 @@ void Thread::Stop() {
kernel.ThreadWakeupCallbackHandleTable().Close(callback_handle);
callback_handle = 0;
SetStatus(ThreadStatus::Dead);
- WakeupAllWaitingThreads();
+ Signal();
// Clean up any dangling references in objects that this thread was waiting for
for (auto& wait_object : wait_objects) {
@@ -215,7 +220,7 @@ void Thread::SetWaitSynchronizationOutput(s32 output) {
context.cpu_registers[1] = output;
}
-s32 Thread::GetWaitObjectIndex(std::shared_ptr<WaitObject> object) const {
+s32 Thread::GetSynchronizationObjectIndex(std::shared_ptr<SynchronizationObject> object) const {
ASSERT_MSG(!wait_objects.empty(), "Thread is not waiting for anything");
const auto match = std::find(wait_objects.rbegin(), wait_objects.rend(), object);
return static_cast<s32>(std::distance(match, wait_objects.rend()) - 1);
@@ -336,14 +341,16 @@ void Thread::ChangeCore(u32 core, u64 mask) {
SetCoreAndAffinityMask(core, mask);
}
-bool Thread::AllWaitObjectsReady() const {
- return std::none_of(
- wait_objects.begin(), wait_objects.end(),
- [this](const std::shared_ptr<WaitObject>& object) { return object->ShouldWait(this); });
+bool Thread::AllSynchronizationObjectsReady() const {
+ return std::none_of(wait_objects.begin(), wait_objects.end(),
+ [this](const std::shared_ptr<SynchronizationObject>& object) {
+ return object->ShouldWait(this);
+ });
}
bool Thread::InvokeWakeupCallback(ThreadWakeupReason reason, std::shared_ptr<Thread> thread,
- std::shared_ptr<WaitObject> object, std::size_t index) {
+ std::shared_ptr<SynchronizationObject> object,
+ std::size_t index) {
ASSERT(wakeup_callback);
return wakeup_callback(reason, std::move(thread), std::move(object), index);
}
@@ -425,7 +432,7 @@ ResultCode Thread::SetCoreAndAffinityMask(s32 new_core, u64 new_affinity_mask) {
const s32 old_core = processor_id;
if (processor_id >= 0 && ((affinity_mask >> processor_id) & 1) == 0) {
if (static_cast<s32>(ideal_core) < 0) {
- processor_id = HighestSetCore(affinity_mask, GlobalScheduler::NUM_CPU_CORES);
+ processor_id = HighestSetCore(affinity_mask, Core::Hardware::NUM_CPU_CORES);
} else {
processor_id = ideal_core;
}
@@ -449,7 +456,7 @@ void Thread::AdjustSchedulingOnStatus(u32 old_flags) {
scheduler.Unschedule(current_priority, static_cast<u32>(processor_id), this);
}
- for (u32 core = 0; core < GlobalScheduler::NUM_CPU_CORES; core++) {
+ for (u32 core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) {
if (core != static_cast<u32>(processor_id) && ((affinity_mask >> core) & 1) != 0) {
scheduler.Unsuggest(current_priority, core, this);
}
@@ -460,7 +467,7 @@ void Thread::AdjustSchedulingOnStatus(u32 old_flags) {
scheduler.Schedule(current_priority, static_cast<u32>(processor_id), this);
}
- for (u32 core = 0; core < GlobalScheduler::NUM_CPU_CORES; core++) {
+ for (u32 core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) {
if (core != static_cast<u32>(processor_id) && ((affinity_mask >> core) & 1) != 0) {
scheduler.Suggest(current_priority, core, this);
}
@@ -479,7 +486,7 @@ void Thread::AdjustSchedulingOnPriority(u32 old_priority) {
scheduler.Unschedule(old_priority, static_cast<u32>(processor_id), this);
}
- for (u32 core = 0; core < GlobalScheduler::NUM_CPU_CORES; core++) {
+ for (u32 core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) {
if (core != static_cast<u32>(processor_id) && ((affinity_mask >> core) & 1) != 0) {
scheduler.Unsuggest(old_priority, core, this);
}
@@ -496,7 +503,7 @@ void Thread::AdjustSchedulingOnPriority(u32 old_priority) {
}
}
- for (u32 core = 0; core < GlobalScheduler::NUM_CPU_CORES; core++) {
+ for (u32 core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) {
if (core != static_cast<u32>(processor_id) && ((affinity_mask >> core) & 1) != 0) {
scheduler.Suggest(current_priority, core, this);
}
@@ -512,7 +519,7 @@ void Thread::AdjustSchedulingOnAffinity(u64 old_affinity_mask, s32 old_core) {
return;
}
- for (u32 core = 0; core < GlobalScheduler::NUM_CPU_CORES; core++) {
+ for (u32 core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) {
if (((old_affinity_mask >> core) & 1) != 0) {
if (core == static_cast<u32>(old_core)) {
scheduler.Unschedule(current_priority, core, this);
@@ -522,7 +529,7 @@ void Thread::AdjustSchedulingOnAffinity(u64 old_affinity_mask, s32 old_core) {
}
}
- for (u32 core = 0; core < GlobalScheduler::NUM_CPU_CORES; core++) {
+ for (u32 core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) {
if (((affinity_mask >> core) & 1) != 0) {
if (core == static_cast<u32>(processor_id)) {
scheduler.Schedule(current_priority, core, this);
diff --git a/src/core/hle/kernel/thread.h b/src/core/hle/kernel/thread.h
index 3bcf9e137..7a4916318 100644
--- a/src/core/hle/kernel/thread.h
+++ b/src/core/hle/kernel/thread.h
@@ -11,7 +11,7 @@
#include "common/common_types.h"
#include "core/arm/arm_interface.h"
#include "core/hle/kernel/object.h"
-#include "core/hle/kernel/wait_object.h"
+#include "core/hle/kernel/synchronization_object.h"
#include "core/hle/result.h"
namespace Kernel {
@@ -95,7 +95,7 @@ enum class ThreadSchedMasks : u32 {
ForcePauseMask = 0x0070,
};
-class Thread final : public WaitObject {
+class Thread final : public SynchronizationObject {
public:
explicit Thread(KernelCore& kernel);
~Thread() override;
@@ -104,11 +104,11 @@ public:
using ThreadContext = Core::ARM_Interface::ThreadContext;
- using ThreadWaitObjects = std::vector<std::shared_ptr<WaitObject>>;
+ using ThreadSynchronizationObjects = std::vector<std::shared_ptr<SynchronizationObject>>;
using WakeupCallback =
std::function<bool(ThreadWakeupReason reason, std::shared_ptr<Thread> thread,
- std::shared_ptr<WaitObject> object, std::size_t index)>;
+ std::shared_ptr<SynchronizationObject> object, std::size_t index)>;
/**
* Creates and returns a new thread. The new thread is immediately scheduled
@@ -146,6 +146,7 @@ public:
bool ShouldWait(const Thread* thread) const override;
void Acquire(Thread* thread) override;
+ bool IsSignaled() const override;
/**
* Gets the thread's current priority
@@ -233,7 +234,7 @@ public:
*
* @param object Object to query the index of.
*/
- s32 GetWaitObjectIndex(std::shared_ptr<WaitObject> object) const;
+ s32 GetSynchronizationObjectIndex(std::shared_ptr<SynchronizationObject> object) const;
/**
* Stops a thread, invalidating it from further use
@@ -314,15 +315,15 @@ public:
return owner_process;
}
- const ThreadWaitObjects& GetWaitObjects() const {
+ const ThreadSynchronizationObjects& GetSynchronizationObjects() const {
return wait_objects;
}
- void SetWaitObjects(ThreadWaitObjects objects) {
+ void SetSynchronizationObjects(ThreadSynchronizationObjects objects) {
wait_objects = std::move(objects);
}
- void ClearWaitObjects() {
+ void ClearSynchronizationObjects() {
for (const auto& waiting_object : wait_objects) {
waiting_object->RemoveWaitingThread(SharedFrom(this));
}
@@ -330,7 +331,7 @@ public:
}
/// Determines whether all the objects this thread is waiting on are ready.
- bool AllWaitObjectsReady() const;
+ bool AllSynchronizationObjectsReady() const;
const MutexWaitingThreads& GetMutexWaitingThreads() const {
return wait_mutex_threads;
@@ -395,7 +396,7 @@ public:
* will cause an assertion to trigger.
*/
bool InvokeWakeupCallback(ThreadWakeupReason reason, std::shared_ptr<Thread> thread,
- std::shared_ptr<WaitObject> object, std::size_t index);
+ std::shared_ptr<SynchronizationObject> object, std::size_t index);
u32 GetIdealCore() const {
return ideal_core;
@@ -494,7 +495,7 @@ private:
/// Objects that the thread is waiting on, in the same order as they were
/// passed to WaitSynchronization.
- ThreadWaitObjects wait_objects;
+ ThreadSynchronizationObjects wait_objects;
/// List of threads that are waiting for a mutex that is held by this thread.
MutexWaitingThreads wait_mutex_threads;
diff --git a/src/core/hle/kernel/writable_event.cpp b/src/core/hle/kernel/writable_event.cpp
index c9332e3e1..fc2f7c424 100644
--- a/src/core/hle/kernel/writable_event.cpp
+++ b/src/core/hle/kernel/writable_event.cpp
@@ -22,7 +22,6 @@ EventPair WritableEvent::CreateEventPair(KernelCore& kernel, std::string name) {
writable_event->name = name + ":Writable";
writable_event->readable = readable_event;
readable_event->name = name + ":Readable";
- readable_event->signaled = false;
return {std::move(readable_event), std::move(writable_event)};
}
@@ -40,7 +39,7 @@ void WritableEvent::Clear() {
}
bool WritableEvent::IsSignaled() const {
- return readable->signaled;
+ return readable->IsSignaled();
}
} // namespace Kernel
diff --git a/src/core/hle/service/audio/hwopus.cpp b/src/core/hle/service/audio/hwopus.cpp
index cb839e4a2..d19513cbb 100644
--- a/src/core/hle/service/audio/hwopus.cpp
+++ b/src/core/hle/service/audio/hwopus.cpp
@@ -170,8 +170,10 @@ public:
{3, nullptr, "SetContextForMultiStream"},
{4, &IHardwareOpusDecoderManager::DecodeInterleavedWithPerfOld, "DecodeInterleavedWithPerfOld"},
{5, nullptr, "DecodeInterleavedForMultiStreamWithPerfOld"},
- {6, &IHardwareOpusDecoderManager::DecodeInterleaved, "DecodeInterleaved"},
- {7, nullptr, "DecodeInterleavedForMultiStream"},
+ {6, &IHardwareOpusDecoderManager::DecodeInterleaved, "DecodeInterleavedWithPerfAndResetOld"},
+ {7, nullptr, "DecodeInterleavedForMultiStreamWithPerfAndResetOld"},
+ {8, &IHardwareOpusDecoderManager::DecodeInterleaved, "DecodeInterleaved"},
+ {9, nullptr, "DecodeInterleavedForMultiStream"},
};
// clang-format on
diff --git a/src/core/hle/service/hid/hid.cpp b/src/core/hle/service/hid/hid.cpp
index 89bf8b815..e6b56a9f9 100644
--- a/src/core/hle/service/hid/hid.cpp
+++ b/src/core/hle/service/hid/hid.cpp
@@ -10,6 +10,7 @@
#include "core/core_timing_util.h"
#include "core/frontend/emu_window.h"
#include "core/frontend/input.h"
+#include "core/hardware_properties.h"
#include "core/hle/ipc_helpers.h"
#include "core/hle/kernel/client_port.h"
#include "core/hle/kernel/client_session.h"
@@ -37,11 +38,11 @@ namespace Service::HID {
// Updating period for each HID device.
// TODO(ogniK): Find actual polling rate of hid
-constexpr s64 pad_update_ticks = static_cast<s64>(Core::Timing::BASE_CLOCK_RATE / 66);
+constexpr s64 pad_update_ticks = static_cast<s64>(Core::Hardware::BASE_CLOCK_RATE / 66);
[[maybe_unused]] constexpr s64 accelerometer_update_ticks =
- static_cast<s64>(Core::Timing::BASE_CLOCK_RATE / 100);
+ static_cast<s64>(Core::Hardware::BASE_CLOCK_RATE / 100);
[[maybe_unused]] constexpr s64 gyroscope_update_ticks =
- static_cast<s64>(Core::Timing::BASE_CLOCK_RATE / 100);
+ static_cast<s64>(Core::Hardware::BASE_CLOCK_RATE / 100);
constexpr std::size_t SHARED_MEMORY_SIZE = 0x40000;
IAppletResource::IAppletResource(Core::System& system)
diff --git a/src/core/hle/service/ldn/ldn.cpp b/src/core/hle/service/ldn/ldn.cpp
index ed5059047..92adde6d4 100644
--- a/src/core/hle/service/ldn/ldn.cpp
+++ b/src/core/hle/service/ldn/ldn.cpp
@@ -129,12 +129,20 @@ public:
{304, nullptr, "Disconnect"},
{400, nullptr, "Initialize"},
{401, nullptr, "Finalize"},
- {402, nullptr, "SetOperationMode"},
+ {402, &IUserLocalCommunicationService::Initialize2, "Initialize2"}, // 7.0.0+
};
// clang-format on
RegisterHandlers(functions);
}
+
+ void Initialize2(Kernel::HLERequestContext& ctx) {
+ LOG_WARNING(Service_LDN, "(STUBBED) called");
+ // Result success seem make this services start network and continue.
+ // If we just pass result error then it will stop and maybe try again and again.
+ IPC::ResponseBuilder rb{ctx, 2};
+ rb.Push(RESULT_UNKNOWN);
+ }
};
class LDNS final : public ServiceFramework<LDNS> {
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
index 6d8bca8bb..f1966ac0e 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
@@ -44,6 +44,8 @@ u32 nvhost_gpu::ioctl(Ioctl command, const std::vector<u8>& input, const std::ve
return GetWaitbase(input, output);
case IoctlCommand::IocChannelSetTimeoutCommand:
return ChannelSetTimeout(input, output);
+ case IoctlCommand::IocChannelSetTimeslice:
+ return ChannelSetTimeslice(input, output);
default:
break;
}
@@ -228,4 +230,14 @@ u32 nvhost_gpu::ChannelSetTimeout(const std::vector<u8>& input, std::vector<u8>&
return 0;
}
+u32 nvhost_gpu::ChannelSetTimeslice(const std::vector<u8>& input, std::vector<u8>& output) {
+ IoctlSetTimeslice params{};
+ std::memcpy(&params, input.data(), sizeof(IoctlSetTimeslice));
+ LOG_INFO(Service_NVDRV, "called, timeslice=0x{:X}", params.timeslice);
+
+ channel_timeslice = params.timeslice;
+
+ return 0;
+}
+
} // namespace Service::Nvidia::Devices
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_gpu.h b/src/core/hle/service/nvdrv/devices/nvhost_gpu.h
index d056dd046..2ac74743f 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.h
@@ -48,6 +48,7 @@ private:
IocAllocObjCtxCommand = 0xC0104809,
IocChannelGetWaitbaseCommand = 0xC0080003,
IocChannelSetTimeoutCommand = 0x40044803,
+ IocChannelSetTimeslice = 0xC004481D,
};
enum class CtxObjects : u32_le {
@@ -101,6 +102,11 @@ private:
static_assert(sizeof(IoctlChannelSetPriority) == 4,
"IoctlChannelSetPriority is incorrect size");
+ struct IoctlSetTimeslice {
+ u32_le timeslice;
+ };
+ static_assert(sizeof(IoctlSetTimeslice) == 4, "IoctlSetTimeslice is incorrect size");
+
struct IoctlEventIdControl {
u32_le cmd; // 0=disable, 1=enable, 2=clear
u32_le id;
@@ -174,6 +180,7 @@ private:
u64_le user_data{};
IoctlZCullBind zcull_params{};
u32_le channel_priority{};
+ u32_le channel_timeslice{};
u32 SetNVMAPfd(const std::vector<u8>& input, std::vector<u8>& output);
u32 SetClientData(const std::vector<u8>& input, std::vector<u8>& output);
@@ -188,6 +195,7 @@ private:
const std::vector<u8>& input2, IoctlVersion version);
u32 GetWaitbase(const std::vector<u8>& input, std::vector<u8>& output);
u32 ChannelSetTimeout(const std::vector<u8>& input, std::vector<u8>& output);
+ u32 ChannelSetTimeslice(const std::vector<u8>& input, std::vector<u8>& output);
std::shared_ptr<nvmap> nvmap_dev;
u32 assigned_syncpoints{};
diff --git a/src/core/hle/service/nvflinger/nvflinger.cpp b/src/core/hle/service/nvflinger/nvflinger.cpp
index 62752e419..134152210 100644
--- a/src/core/hle/service/nvflinger/nvflinger.cpp
+++ b/src/core/hle/service/nvflinger/nvflinger.cpp
@@ -12,6 +12,7 @@
#include "core/core.h"
#include "core/core_timing.h"
#include "core/core_timing_util.h"
+#include "core/hardware_properties.h"
#include "core/hle/kernel/kernel.h"
#include "core/hle/kernel/readable_event.h"
#include "core/hle/service/nvdrv/devices/nvdisp_disp0.h"
@@ -26,8 +27,8 @@
namespace Service::NVFlinger {
-constexpr s64 frame_ticks = static_cast<s64>(Core::Timing::BASE_CLOCK_RATE / 60);
-constexpr s64 frame_ticks_30fps = static_cast<s64>(Core::Timing::BASE_CLOCK_RATE / 30);
+constexpr s64 frame_ticks = static_cast<s64>(Core::Hardware::BASE_CLOCK_RATE / 60);
+constexpr s64 frame_ticks_30fps = static_cast<s64>(Core::Hardware::BASE_CLOCK_RATE / 30);
NVFlinger::NVFlinger(Core::System& system) : system(system) {
displays.emplace_back(0, "Default", system);
@@ -222,7 +223,7 @@ void NVFlinger::Compose() {
s64 NVFlinger::GetNextTicks() const {
constexpr s64 max_hertz = 120LL;
- return (Core::Timing::BASE_CLOCK_RATE * (1LL << swap_interval)) / max_hertz;
+ return (Core::Hardware::BASE_CLOCK_RATE * (1LL << swap_interval)) / max_hertz;
}
} // namespace Service::NVFlinger
diff --git a/src/core/hle/service/time/standard_steady_clock_core.cpp b/src/core/hle/service/time/standard_steady_clock_core.cpp
index ca1a783fc..1575f0b49 100644
--- a/src/core/hle/service/time/standard_steady_clock_core.cpp
+++ b/src/core/hle/service/time/standard_steady_clock_core.cpp
@@ -5,6 +5,7 @@
#include "core/core.h"
#include "core/core_timing.h"
#include "core/core_timing_util.h"
+#include "core/hardware_properties.h"
#include "core/hle/service/time/standard_steady_clock_core.h"
namespace Service::Time::Clock {
@@ -12,7 +13,7 @@ namespace Service::Time::Clock {
TimeSpanType StandardSteadyClockCore::GetCurrentRawTimePoint(Core::System& system) {
const TimeSpanType ticks_time_span{TimeSpanType::FromTicks(
Core::Timing::CpuCyclesToClockCycles(system.CoreTiming().GetTicks()),
- Core::Timing::CNTFREQ)};
+ Core::Hardware::CNTFREQ)};
TimeSpanType raw_time_point{setup_value.nanoseconds + ticks_time_span.nanoseconds};
if (raw_time_point.nanoseconds < cached_raw_time_point.nanoseconds) {
diff --git a/src/core/hle/service/time/tick_based_steady_clock_core.cpp b/src/core/hle/service/time/tick_based_steady_clock_core.cpp
index c77b98189..44d5bc651 100644
--- a/src/core/hle/service/time/tick_based_steady_clock_core.cpp
+++ b/src/core/hle/service/time/tick_based_steady_clock_core.cpp
@@ -5,6 +5,7 @@
#include "core/core.h"
#include "core/core_timing.h"
#include "core/core_timing_util.h"
+#include "core/hardware_properties.h"
#include "core/hle/service/time/tick_based_steady_clock_core.h"
namespace Service::Time::Clock {
@@ -12,7 +13,7 @@ namespace Service::Time::Clock {
SteadyClockTimePoint TickBasedSteadyClockCore::GetTimePoint(Core::System& system) {
const TimeSpanType ticks_time_span{TimeSpanType::FromTicks(
Core::Timing::CpuCyclesToClockCycles(system.CoreTiming().GetTicks()),
- Core::Timing::CNTFREQ)};
+ Core::Hardware::CNTFREQ)};
return {ticks_time_span.ToSeconds(), GetClockSourceId()};
}
diff --git a/src/core/hle/service/time/time.cpp b/src/core/hle/service/time/time.cpp
index 8ef4efcef..749b7be70 100644
--- a/src/core/hle/service/time/time.cpp
+++ b/src/core/hle/service/time/time.cpp
@@ -6,6 +6,7 @@
#include "core/core.h"
#include "core/core_timing.h"
#include "core/core_timing_util.h"
+#include "core/hardware_properties.h"
#include "core/hle/ipc_helpers.h"
#include "core/hle/kernel/client_port.h"
#include "core/hle/kernel/client_session.h"
@@ -233,7 +234,7 @@ void Module::Interface::CalculateMonotonicSystemClockBaseTimePoint(Kernel::HLERe
if (current_time_point.clock_source_id == context.steady_time_point.clock_source_id) {
const auto ticks{Clock::TimeSpanType::FromTicks(
Core::Timing::CpuCyclesToClockCycles(system.CoreTiming().GetTicks()),
- Core::Timing::CNTFREQ)};
+ Core::Hardware::CNTFREQ)};
const s64 base_time_point{context.offset + current_time_point.time_point -
ticks.ToSeconds()};
IPC::ResponseBuilder rb{ctx, (sizeof(s64) / 4) + 2};
diff --git a/src/core/hle/service/time/time_sharedmemory.cpp b/src/core/hle/service/time/time_sharedmemory.cpp
index 9b03191bf..fdaef233f 100644
--- a/src/core/hle/service/time/time_sharedmemory.cpp
+++ b/src/core/hle/service/time/time_sharedmemory.cpp
@@ -5,6 +5,7 @@
#include "core/core.h"
#include "core/core_timing.h"
#include "core/core_timing_util.h"
+#include "core/hardware_properties.h"
#include "core/hle/service/time/clock_types.h"
#include "core/hle/service/time/steady_clock_core.h"
#include "core/hle/service/time/time_sharedmemory.h"
@@ -31,7 +32,7 @@ void SharedMemory::SetupStandardSteadyClock(Core::System& system,
Clock::TimeSpanType current_time_point) {
const Clock::TimeSpanType ticks_time_span{Clock::TimeSpanType::FromTicks(
Core::Timing::CpuCyclesToClockCycles(system.CoreTiming().GetTicks()),
- Core::Timing::CNTFREQ)};
+ Core::Hardware::CNTFREQ)};
const Clock::SteadyClockContext context{
static_cast<u64>(current_time_point.nanoseconds - ticks_time_span.nanoseconds),
clock_source_id};
diff --git a/src/core/memory/cheat_engine.cpp b/src/core/memory/cheat_engine.cpp
index d1e6bed93..4472500d2 100644
--- a/src/core/memory/cheat_engine.cpp
+++ b/src/core/memory/cheat_engine.cpp
@@ -9,6 +9,7 @@
#include "core/core.h"
#include "core/core_timing.h"
#include "core/core_timing_util.h"
+#include "core/hardware_properties.h"
#include "core/hle/kernel/process.h"
#include "core/hle/service/hid/controllers/npad.h"
#include "core/hle/service/hid/hid.h"
@@ -17,7 +18,7 @@
namespace Memory {
-constexpr s64 CHEAT_ENGINE_TICKS = static_cast<s64>(Core::Timing::BASE_CLOCK_RATE / 12);
+constexpr s64 CHEAT_ENGINE_TICKS = static_cast<s64>(Core::Hardware::BASE_CLOCK_RATE / 12);
constexpr u32 KEYPAD_BITMASK = 0x3FFFFFF;
StandardVmCallbacks::StandardVmCallbacks(Core::System& system, const CheatProcessMetadata& metadata)
diff --git a/src/core/settings.h b/src/core/settings.h
index e1a9a0ffa..f837d3fbc 100644
--- a/src/core/settings.h
+++ b/src/core/settings.h
@@ -429,6 +429,7 @@ struct Values {
int vulkan_device;
float resolution_factor;
+ int aspect_ratio;
bool use_frame_limit;
u16 frame_limit;
bool use_disk_shader_cache;
diff --git a/src/core/tools/freezer.cpp b/src/core/tools/freezer.cpp
index 55e0dbc49..1e060f009 100644
--- a/src/core/tools/freezer.cpp
+++ b/src/core/tools/freezer.cpp
@@ -7,13 +7,14 @@
#include "core/core.h"
#include "core/core_timing.h"
#include "core/core_timing_util.h"
+#include "core/hardware_properties.h"
#include "core/memory.h"
#include "core/tools/freezer.h"
namespace Tools {
namespace {
-constexpr s64 MEMORY_FREEZER_TICKS = static_cast<s64>(Core::Timing::BASE_CLOCK_RATE / 60);
+constexpr s64 MEMORY_FREEZER_TICKS = static_cast<s64>(Core::Hardware::BASE_CLOCK_RATE / 60);
u64 MemoryReadWidth(Memory::Memory& memory, u32 width, VAddr addr) {
switch (width) {
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index db9332d00..4b0c6346f 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -37,6 +37,7 @@ add_library(video_core STATIC
memory_manager.h
morton.cpp
morton.h
+ query_cache.h
rasterizer_accelerated.cpp
rasterizer_accelerated.h
rasterizer_cache.cpp
@@ -74,6 +75,8 @@ add_library(video_core STATIC
renderer_opengl/gl_stream_buffer.h
renderer_opengl/gl_texture_cache.cpp
renderer_opengl/gl_texture_cache.h
+ renderer_opengl/gl_query_cache.cpp
+ renderer_opengl/gl_query_cache.h
renderer_opengl/maxwell_to_gl.h
renderer_opengl/renderer_opengl.cpp
renderer_opengl/renderer_opengl.h
@@ -177,6 +180,8 @@ if (ENABLE_VULKAN)
renderer_vulkan/vk_memory_manager.h
renderer_vulkan/vk_pipeline_cache.cpp
renderer_vulkan/vk_pipeline_cache.h
+ renderer_vulkan/vk_query_cache.cpp
+ renderer_vulkan/vk_query_cache.h
renderer_vulkan/vk_rasterizer.cpp
renderer_vulkan/vk_rasterizer.h
renderer_vulkan/vk_renderpass_cache.cpp
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 7cea146f0..842cdcbcf 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -4,17 +4,21 @@
#include <cinttypes>
#include <cstring>
+#include <optional>
#include "common/assert.h"
#include "core/core.h"
#include "core/core_timing.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/engines/shader_type.h"
+#include "video_core/gpu.h"
#include "video_core/memory_manager.h"
#include "video_core/rasterizer_interface.h"
#include "video_core/textures/texture.h"
namespace Tegra::Engines {
+using VideoCore::QueryType;
+
/// First register id that is actually a Macro call.
constexpr u32 MacroRegistersStart = 0xE00;
@@ -399,6 +403,10 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
ProcessQueryCondition();
break;
}
+ case MAXWELL3D_REG_INDEX(counter_reset): {
+ ProcessCounterReset();
+ break;
+ }
case MAXWELL3D_REG_INDEX(sync_info): {
ProcessSyncPoint();
break;
@@ -519,61 +527,51 @@ void Maxwell3D::ProcessFirmwareCall4() {
regs.reg_array[0xd00] = 1;
}
-void Maxwell3D::ProcessQueryGet() {
+void Maxwell3D::StampQueryResult(u64 payload, bool long_query) {
+ struct LongQueryResult {
+ u64_le value;
+ u64_le timestamp;
+ };
+ static_assert(sizeof(LongQueryResult) == 16, "LongQueryResult has wrong size");
const GPUVAddr sequence_address{regs.query.QueryAddress()};
- // Since the sequence address is given as a GPU VAddr, we have to convert it to an application
- // VAddr before writing.
+ if (long_query) {
+ // Write the 128-bit result structure in long mode. Note: We emulate an infinitely fast
+ // GPU, this command may actually take a while to complete in real hardware due to GPU
+ // wait queues.
+ LongQueryResult query_result{payload, system.GPU().GetTicks()};
+ memory_manager.WriteBlock(sequence_address, &query_result, sizeof(query_result));
+ } else {
+ memory_manager.Write<u32>(sequence_address, static_cast<u32>(payload));
+ }
+}
+void Maxwell3D::ProcessQueryGet() {
// TODO(Subv): Support the other query units.
ASSERT_MSG(regs.query.query_get.unit == Regs::QueryUnit::Crop,
"Units other than CROP are unimplemented");
- u64 result = 0;
-
- // TODO(Subv): Support the other query variables
- switch (regs.query.query_get.select) {
- case Regs::QuerySelect::Zero:
- // This seems to actually write the query sequence to the query address.
- result = regs.query.query_sequence;
+ switch (regs.query.query_get.operation) {
+ case Regs::QueryOperation::Release:
+ StampQueryResult(regs.query.query_sequence, regs.query.query_get.short_query == 0);
break;
- default:
- result = 1;
- UNIMPLEMENTED_MSG("Unimplemented query select type {}",
- static_cast<u32>(regs.query.query_get.select.Value()));
- }
-
- // TODO(Subv): Research and implement how query sync conditions work.
-
- struct LongQueryResult {
- u64_le value;
- u64_le timestamp;
- };
- static_assert(sizeof(LongQueryResult) == 16, "LongQueryResult has wrong size");
-
- switch (regs.query.query_get.mode) {
- case Regs::QueryMode::Write:
- case Regs::QueryMode::Write2: {
- u32 sequence = regs.query.query_sequence;
- if (regs.query.query_get.short_query) {
- // Write the current query sequence to the sequence address.
- // TODO(Subv): Find out what happens if you use a long query type but mark it as a short
- // query.
- memory_manager.Write<u32>(sequence_address, sequence);
- } else {
- // Write the 128-bit result structure in long mode. Note: We emulate an infinitely fast
- // GPU, this command may actually take a while to complete in real hardware due to GPU
- // wait queues.
- LongQueryResult query_result{};
- query_result.value = result;
- // TODO(Subv): Generate a real GPU timestamp and write it here instead of CoreTiming
- query_result.timestamp = system.CoreTiming().GetTicks();
- memory_manager.WriteBlock(sequence_address, &query_result, sizeof(query_result));
+ case Regs::QueryOperation::Acquire:
+ // TODO(Blinkhawk): Under this operation, the GPU waits for the CPU to write a value that
+ // matches the current payload.
+ UNIMPLEMENTED_MSG("Unimplemented query operation ACQUIRE");
+ break;
+ case Regs::QueryOperation::Counter:
+ if (const std::optional<u64> result = GetQueryResult()) {
+ // If the query returns an empty optional it means it's cached and deferred.
+ // In this case we have a non-empty result, so we stamp it immediately.
+ StampQueryResult(*result, regs.query.query_get.short_query == 0);
}
break;
- }
+ case Regs::QueryOperation::Trap:
+ UNIMPLEMENTED_MSG("Unimplemented query operation TRAP");
+ break;
default:
- UNIMPLEMENTED_MSG("Query mode {} not implemented",
- static_cast<u32>(regs.query.query_get.mode.Value()));
+ UNIMPLEMENTED_MSG("Unknown query operation");
+ break;
}
}
@@ -590,20 +588,20 @@ void Maxwell3D::ProcessQueryCondition() {
}
case Regs::ConditionMode::ResNonZero: {
Regs::QueryCompare cmp;
- memory_manager.ReadBlockUnsafe(condition_address, &cmp, sizeof(cmp));
+ memory_manager.ReadBlock(condition_address, &cmp, sizeof(cmp));
execute_on = cmp.initial_sequence != 0U && cmp.initial_mode != 0U;
break;
}
case Regs::ConditionMode::Equal: {
Regs::QueryCompare cmp;
- memory_manager.ReadBlockUnsafe(condition_address, &cmp, sizeof(cmp));
+ memory_manager.ReadBlock(condition_address, &cmp, sizeof(cmp));
execute_on =
cmp.initial_sequence == cmp.current_sequence && cmp.initial_mode == cmp.current_mode;
break;
}
case Regs::ConditionMode::NotEqual: {
Regs::QueryCompare cmp;
- memory_manager.ReadBlockUnsafe(condition_address, &cmp, sizeof(cmp));
+ memory_manager.ReadBlock(condition_address, &cmp, sizeof(cmp));
execute_on =
cmp.initial_sequence != cmp.current_sequence || cmp.initial_mode != cmp.current_mode;
break;
@@ -616,6 +614,18 @@ void Maxwell3D::ProcessQueryCondition() {
}
}
+void Maxwell3D::ProcessCounterReset() {
+ switch (regs.counter_reset) {
+ case Regs::CounterReset::SampleCnt:
+ rasterizer.ResetCounter(QueryType::SamplesPassed);
+ break;
+ default:
+ LOG_WARNING(Render_OpenGL, "Unimplemented counter reset={}",
+ static_cast<int>(regs.counter_reset));
+ break;
+ }
+}
+
void Maxwell3D::ProcessSyncPoint() {
const u32 sync_point = regs.sync_info.sync_point.Value();
const u32 increment = regs.sync_info.increment.Value();
@@ -658,6 +668,22 @@ void Maxwell3D::DrawArrays() {
}
}
+std::optional<u64> Maxwell3D::GetQueryResult() {
+ switch (regs.query.query_get.select) {
+ case Regs::QuerySelect::Zero:
+ return 0;
+ case Regs::QuerySelect::SamplesPassed:
+ // Deferred.
+ rasterizer.Query(regs.query.QueryAddress(), VideoCore::QueryType::SamplesPassed,
+ system.GPU().GetTicks());
+ return {};
+ default:
+ UNIMPLEMENTED_MSG("Unimplemented query select type {}",
+ static_cast<u32>(regs.query.query_get.select.Value()));
+ return 1;
+ }
+}
+
void Maxwell3D::ProcessCBBind(std::size_t stage_index) {
// Bind the buffer currently in CB_ADDRESS to the specified index in the desired shader stage.
auto& shader = state.shader_stages[stage_index];
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index 7b1912a66..26939be3f 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -6,6 +6,7 @@
#include <array>
#include <bitset>
+#include <optional>
#include <type_traits>
#include <unordered_map>
#include <vector>
@@ -71,12 +72,11 @@ public:
static constexpr std::size_t MaxConstBuffers = 18;
static constexpr std::size_t MaxConstBufferSize = 0x10000;
- enum class QueryMode : u32 {
- Write = 0,
- Sync = 1,
- // TODO(Subv): It is currently unknown what the difference between method 2 and method 0
- // is.
- Write2 = 2,
+ enum class QueryOperation : u32 {
+ Release = 0,
+ Acquire = 1,
+ Counter = 2,
+ Trap = 3,
};
enum class QueryUnit : u32 {
@@ -410,6 +410,27 @@ public:
Linear = 1,
};
+ enum class CounterReset : u32 {
+ SampleCnt = 0x01,
+ Unk02 = 0x02,
+ Unk03 = 0x03,
+ Unk04 = 0x04,
+ EmittedPrimitives = 0x10, // Not tested
+ Unk11 = 0x11,
+ Unk12 = 0x12,
+ Unk13 = 0x13,
+ Unk15 = 0x15,
+ Unk16 = 0x16,
+ Unk17 = 0x17,
+ Unk18 = 0x18,
+ Unk1A = 0x1A,
+ Unk1B = 0x1B,
+ Unk1C = 0x1C,
+ Unk1D = 0x1D,
+ Unk1E = 0x1E,
+ GeneratedPrimitives = 0x1F,
+ };
+
struct Cull {
enum class FrontFace : u32 {
ClockWise = 0x0900,
@@ -858,7 +879,7 @@ public:
BitField<7, 1, u32> c7;
} clip_distance_enabled;
- INSERT_UNION_PADDING_WORDS(0x1);
+ u32 samplecnt_enable;
float point_size;
@@ -866,7 +887,11 @@ public:
u32 point_sprite_enable;
- INSERT_UNION_PADDING_WORDS(0x5);
+ INSERT_UNION_PADDING_WORDS(0x3);
+
+ CounterReset counter_reset;
+
+ INSERT_UNION_PADDING_WORDS(0x1);
u32 zeta_enable;
@@ -1081,7 +1106,7 @@ public:
u32 query_sequence;
union {
u32 raw;
- BitField<0, 2, QueryMode> mode;
+ BitField<0, 2, QueryOperation> operation;
BitField<4, 1, u32> fence;
BitField<12, 4, QueryUnit> unit;
BitField<16, 1, QuerySyncCondition> sync_cond;
@@ -1413,9 +1438,15 @@ private:
/// Handles a write to the QUERY_GET register.
void ProcessQueryGet();
- // Handles Conditional Rendering
+ /// Writes the query result accordingly.
+ void StampQueryResult(u64 payload, bool long_query);
+
+ /// Handles conditional rendering.
void ProcessQueryCondition();
+ /// Handles counter resets.
+ void ProcessCounterReset();
+
/// Handles writes to syncing register.
void ProcessSyncPoint();
@@ -1432,6 +1463,9 @@ private:
// Handles a instance drawcall from MME
void StepInstance(MMEDrawMode expected_mode, u32 count);
+
+ /// Returns a query's value or an empty object if the value will be deferred through a cache.
+ std::optional<u64> GetQueryResult();
};
#define ASSERT_REG_POSITION(field_name, position) \
@@ -1497,8 +1531,10 @@ ASSERT_REG_POSITION(screen_y_control, 0x4EB);
ASSERT_REG_POSITION(vb_element_base, 0x50D);
ASSERT_REG_POSITION(vb_base_instance, 0x50E);
ASSERT_REG_POSITION(clip_distance_enabled, 0x544);
+ASSERT_REG_POSITION(samplecnt_enable, 0x545);
ASSERT_REG_POSITION(point_size, 0x546);
ASSERT_REG_POSITION(point_sprite_enable, 0x548);
+ASSERT_REG_POSITION(counter_reset, 0x54C);
ASSERT_REG_POSITION(zeta_enable, 0x54E);
ASSERT_REG_POSITION(multisample_control, 0x54F);
ASSERT_REG_POSITION(condition, 0x554);
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index 402869fde..c9bc83cd7 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -1677,11 +1677,11 @@ union Instruction {
} xmad;
union {
- BitField<20, 14, u64> offset;
+ BitField<20, 14, u64> shifted_offset;
BitField<34, 5, u64> index;
u64 GetOffset() const {
- return offset * 4;
+ return shifted_offset * 4;
}
} cbuf34;
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index 062ca83b8..7d7137109 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -6,6 +6,7 @@
#include "common/microprofile.h"
#include "core/core.h"
#include "core/core_timing.h"
+#include "core/core_timing_util.h"
#include "core/memory.h"
#include "video_core/engines/fermi_2d.h"
#include "video_core/engines/kepler_compute.h"
@@ -23,7 +24,7 @@ MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192));
GPU::GPU(Core::System& system, VideoCore::RendererBase& renderer, bool is_async)
: system{system}, renderer{renderer}, is_async{is_async} {
auto& rasterizer{renderer.Rasterizer()};
- memory_manager = std::make_unique<Tegra::MemoryManager>(system);
+ memory_manager = std::make_unique<Tegra::MemoryManager>(system, rasterizer);
dma_pusher = std::make_unique<Tegra::DmaPusher>(*this);
maxwell_3d = std::make_unique<Engines::Maxwell3D>(system, rasterizer, *memory_manager);
fermi_2d = std::make_unique<Engines::Fermi2D>(rasterizer);
@@ -122,6 +123,19 @@ bool GPU::CancelSyncptInterrupt(const u32 syncpoint_id, const u32 value) {
return true;
}
+u64 GPU::GetTicks() const {
+ // This values were reversed engineered by fincs from NVN
+ // The gpu clock is reported in units of 385/625 nanoseconds
+ constexpr u64 gpu_ticks_num = 384;
+ constexpr u64 gpu_ticks_den = 625;
+
+ const u64 cpu_ticks = system.CoreTiming().GetTicks();
+ const u64 nanoseconds = Core::Timing::CyclesToNs(cpu_ticks).count();
+ const u64 nanoseconds_num = nanoseconds / gpu_ticks_den;
+ const u64 nanoseconds_rem = nanoseconds % gpu_ticks_den;
+ return nanoseconds_num * gpu_ticks_num + (nanoseconds_rem * gpu_ticks_num) / gpu_ticks_den;
+}
+
void GPU::FlushCommands() {
renderer.Rasterizer().FlushCommands();
}
@@ -340,7 +354,7 @@ void GPU::ProcessSemaphoreTriggerMethod() {
block.sequence = regs.semaphore_sequence;
// TODO(Kmather73): Generate a real GPU timestamp and write it here instead of
// CoreTiming
- block.timestamp = system.CoreTiming().GetTicks();
+ block.timestamp = GetTicks();
memory_manager->WriteBlock(regs.semaphore_address.SemaphoreAddress(), &block,
sizeof(block));
} else {
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index b648317bb..07727210c 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -192,6 +192,8 @@ public:
bool CancelSyncptInterrupt(u32 syncpoint_id, u32 value);
+ u64 GetTicks() const;
+
std::unique_lock<std::mutex> LockSync() {
return std::unique_lock{sync_mutex};
}
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp
index f1d50be3e..11848fbce 100644
--- a/src/video_core/memory_manager.cpp
+++ b/src/video_core/memory_manager.cpp
@@ -9,12 +9,13 @@
#include "core/hle/kernel/process.h"
#include "core/hle/kernel/vm_manager.h"
#include "core/memory.h"
-#include "video_core/gpu.h"
#include "video_core/memory_manager.h"
+#include "video_core/rasterizer_interface.h"
namespace Tegra {
-MemoryManager::MemoryManager(Core::System& system) : system{system} {
+MemoryManager::MemoryManager(Core::System& system, VideoCore::RasterizerInterface& rasterizer)
+ : rasterizer{rasterizer}, system{system} {
std::fill(page_table.pointers.begin(), page_table.pointers.end(), nullptr);
std::fill(page_table.attributes.begin(), page_table.attributes.end(),
Common::PageType::Unmapped);
@@ -83,8 +84,7 @@ GPUVAddr MemoryManager::UnmapBuffer(GPUVAddr gpu_addr, u64 size) {
const auto cpu_addr = GpuToCpuAddress(gpu_addr);
ASSERT(cpu_addr);
- system.GPU().FlushAndInvalidateRegion(cache_addr, aligned_size);
-
+ rasterizer.FlushAndInvalidateRegion(cache_addr, aligned_size);
UnmapRange(gpu_addr, aligned_size);
ASSERT(system.CurrentProcess()
->VMManager()
@@ -242,7 +242,7 @@ void MemoryManager::ReadBlock(GPUVAddr src_addr, void* dest_buffer, const std::s
switch (page_table.attributes[page_index]) {
case Common::PageType::Memory: {
const u8* src_ptr{page_table.pointers[page_index] + page_offset};
- system.GPU().FlushRegion(ToCacheAddr(src_ptr), copy_amount);
+ rasterizer.FlushRegion(ToCacheAddr(src_ptr), copy_amount);
std::memcpy(dest_buffer, src_ptr, copy_amount);
break;
}
@@ -292,7 +292,7 @@ void MemoryManager::WriteBlock(GPUVAddr dest_addr, const void* src_buffer, const
switch (page_table.attributes[page_index]) {
case Common::PageType::Memory: {
u8* dest_ptr{page_table.pointers[page_index] + page_offset};
- system.GPU().InvalidateRegion(ToCacheAddr(dest_ptr), copy_amount);
+ rasterizer.InvalidateRegion(ToCacheAddr(dest_ptr), copy_amount);
std::memcpy(dest_ptr, src_buffer, copy_amount);
break;
}
@@ -340,7 +340,7 @@ void MemoryManager::CopyBlock(GPUVAddr dest_addr, GPUVAddr src_addr, const std::
switch (page_table.attributes[page_index]) {
case Common::PageType::Memory: {
const u8* src_ptr{page_table.pointers[page_index] + page_offset};
- system.GPU().FlushRegion(ToCacheAddr(src_ptr), copy_amount);
+ rasterizer.FlushRegion(ToCacheAddr(src_ptr), copy_amount);
WriteBlock(dest_addr, src_ptr, copy_amount);
break;
}
diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h
index 393447eb4..aea010087 100644
--- a/src/video_core/memory_manager.h
+++ b/src/video_core/memory_manager.h
@@ -10,6 +10,10 @@
#include "common/common_types.h"
#include "common/page_table.h"
+namespace VideoCore {
+class RasterizerInterface;
+}
+
namespace Core {
class System;
}
@@ -47,7 +51,7 @@ struct VirtualMemoryArea {
class MemoryManager final {
public:
- explicit MemoryManager(Core::System& system);
+ explicit MemoryManager(Core::System& system, VideoCore::RasterizerInterface& rasterizer);
~MemoryManager();
GPUVAddr AllocateSpace(u64 size, u64 align);
@@ -172,6 +176,7 @@ private:
Common::PageTable page_table{page_bits};
VMAMap vma_map;
+ VideoCore::RasterizerInterface& rasterizer;
Core::System& system;
};
diff --git a/src/video_core/query_cache.h b/src/video_core/query_cache.h
new file mode 100644
index 000000000..e66054ed0
--- /dev/null
+++ b/src/video_core/query_cache.h
@@ -0,0 +1,359 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <algorithm>
+#include <array>
+#include <cstring>
+#include <iterator>
+#include <memory>
+#include <mutex>
+#include <optional>
+#include <unordered_map>
+#include <vector>
+
+#include "common/assert.h"
+#include "core/core.h"
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/gpu.h"
+#include "video_core/memory_manager.h"
+#include "video_core/rasterizer_interface.h"
+
+namespace VideoCommon {
+
+template <class QueryCache, class HostCounter>
+class CounterStreamBase {
+public:
+ explicit CounterStreamBase(QueryCache& cache, VideoCore::QueryType type)
+ : cache{cache}, type{type} {}
+
+ /// Updates the state of the stream, enabling or disabling as needed.
+ void Update(bool enabled) {
+ if (enabled) {
+ Enable();
+ } else {
+ Disable();
+ }
+ }
+
+ /// Resets the stream to zero. It doesn't disable the query after resetting.
+ void Reset() {
+ if (current) {
+ current->EndQuery();
+
+ // Immediately start a new query to avoid disabling its state.
+ current = cache.Counter(nullptr, type);
+ }
+ last = nullptr;
+ }
+
+ /// Returns the current counter slicing as needed.
+ std::shared_ptr<HostCounter> Current() {
+ if (!current) {
+ return nullptr;
+ }
+ current->EndQuery();
+ last = std::move(current);
+ current = cache.Counter(last, type);
+ return last;
+ }
+
+ /// Returns true when the counter stream is enabled.
+ bool IsEnabled() const {
+ return current != nullptr;
+ }
+
+private:
+ /// Enables the stream.
+ void Enable() {
+ if (current) {
+ return;
+ }
+ current = cache.Counter(last, type);
+ }
+
+ // Disables the stream.
+ void Disable() {
+ if (current) {
+ current->EndQuery();
+ }
+ last = std::exchange(current, nullptr);
+ }
+
+ QueryCache& cache;
+ const VideoCore::QueryType type;
+
+ std::shared_ptr<HostCounter> current;
+ std::shared_ptr<HostCounter> last;
+};
+
+template <class QueryCache, class CachedQuery, class CounterStream, class HostCounter,
+ class QueryPool>
+class QueryCacheBase {
+public:
+ explicit QueryCacheBase(Core::System& system, VideoCore::RasterizerInterface& rasterizer)
+ : system{system}, rasterizer{rasterizer}, streams{{CounterStream{
+ static_cast<QueryCache&>(*this),
+ VideoCore::QueryType::SamplesPassed}}} {}
+
+ void InvalidateRegion(CacheAddr addr, std::size_t size) {
+ std::unique_lock lock{mutex};
+ FlushAndRemoveRegion(addr, size);
+ }
+
+ void FlushRegion(CacheAddr addr, std::size_t size) {
+ std::unique_lock lock{mutex};
+ FlushAndRemoveRegion(addr, size);
+ }
+
+ /**
+ * Records a query in GPU mapped memory, potentially marked with a timestamp.
+ * @param gpu_addr GPU address to flush to when the mapped memory is read.
+ * @param type Query type, e.g. SamplesPassed.
+ * @param timestamp Timestamp, when empty the flushed query is assumed to be short.
+ */
+ void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) {
+ std::unique_lock lock{mutex};
+ auto& memory_manager = system.GPU().MemoryManager();
+ const auto host_ptr = memory_manager.GetPointer(gpu_addr);
+
+ CachedQuery* query = TryGet(ToCacheAddr(host_ptr));
+ if (!query) {
+ const auto cpu_addr = memory_manager.GpuToCpuAddress(gpu_addr);
+ ASSERT_OR_EXECUTE(cpu_addr, return;);
+
+ query = Register(type, *cpu_addr, host_ptr, timestamp.has_value());
+ }
+
+ query->BindCounter(Stream(type).Current(), timestamp);
+ }
+
+ /// Updates counters from GPU state. Expected to be called once per draw, clear or dispatch.
+ void UpdateCounters() {
+ std::unique_lock lock{mutex};
+ const auto& regs = system.GPU().Maxwell3D().regs;
+ Stream(VideoCore::QueryType::SamplesPassed).Update(regs.samplecnt_enable);
+ }
+
+ /// Resets a counter to zero. It doesn't disable the query after resetting.
+ void ResetCounter(VideoCore::QueryType type) {
+ std::unique_lock lock{mutex};
+ Stream(type).Reset();
+ }
+
+ /// Disable all active streams. Expected to be called at the end of a command buffer.
+ void DisableStreams() {
+ std::unique_lock lock{mutex};
+ for (auto& stream : streams) {
+ stream.Update(false);
+ }
+ }
+
+ /// Returns a new host counter.
+ std::shared_ptr<HostCounter> Counter(std::shared_ptr<HostCounter> dependency,
+ VideoCore::QueryType type) {
+ return std::make_shared<HostCounter>(static_cast<QueryCache&>(*this), std::move(dependency),
+ type);
+ }
+
+ /// Returns the counter stream of the specified type.
+ CounterStream& Stream(VideoCore::QueryType type) {
+ return streams[static_cast<std::size_t>(type)];
+ }
+
+ /// Returns the counter stream of the specified type.
+ const CounterStream& Stream(VideoCore::QueryType type) const {
+ return streams[static_cast<std::size_t>(type)];
+ }
+
+protected:
+ std::array<QueryPool, VideoCore::NumQueryTypes> query_pools;
+
+private:
+ /// Flushes a memory range to guest memory and removes it from the cache.
+ void FlushAndRemoveRegion(CacheAddr addr, std::size_t size) {
+ const u64 addr_begin = static_cast<u64>(addr);
+ const u64 addr_end = addr_begin + static_cast<u64>(size);
+ const auto in_range = [addr_begin, addr_end](CachedQuery& query) {
+ const u64 cache_begin = query.GetCacheAddr();
+ const u64 cache_end = cache_begin + query.SizeInBytes();
+ return cache_begin < addr_end && addr_begin < cache_end;
+ };
+
+ const u64 page_end = addr_end >> PAGE_SHIFT;
+ for (u64 page = addr_begin >> PAGE_SHIFT; page <= page_end; ++page) {
+ const auto& it = cached_queries.find(page);
+ if (it == std::end(cached_queries)) {
+ continue;
+ }
+ auto& contents = it->second;
+ for (auto& query : contents) {
+ if (!in_range(query)) {
+ continue;
+ }
+ rasterizer.UpdatePagesCachedCount(query.CpuAddr(), query.SizeInBytes(), -1);
+ query.Flush();
+ }
+ contents.erase(std::remove_if(std::begin(contents), std::end(contents), in_range),
+ std::end(contents));
+ }
+ }
+
+ /// Registers the passed parameters as cached and returns a pointer to the stored cached query.
+ CachedQuery* Register(VideoCore::QueryType type, VAddr cpu_addr, u8* host_ptr, bool timestamp) {
+ rasterizer.UpdatePagesCachedCount(cpu_addr, CachedQuery::SizeInBytes(timestamp), 1);
+ const u64 page = static_cast<u64>(ToCacheAddr(host_ptr)) >> PAGE_SHIFT;
+ return &cached_queries[page].emplace_back(static_cast<QueryCache&>(*this), type, cpu_addr,
+ host_ptr);
+ }
+
+ /// Tries to a get a cached query. Returns nullptr on failure.
+ CachedQuery* TryGet(CacheAddr addr) {
+ const u64 page = static_cast<u64>(addr) >> PAGE_SHIFT;
+ const auto it = cached_queries.find(page);
+ if (it == std::end(cached_queries)) {
+ return nullptr;
+ }
+ auto& contents = it->second;
+ const auto found =
+ std::find_if(std::begin(contents), std::end(contents),
+ [addr](auto& query) { return query.GetCacheAddr() == addr; });
+ return found != std::end(contents) ? &*found : nullptr;
+ }
+
+ static constexpr std::uintptr_t PAGE_SIZE = 4096;
+ static constexpr unsigned PAGE_SHIFT = 12;
+
+ Core::System& system;
+ VideoCore::RasterizerInterface& rasterizer;
+
+ std::recursive_mutex mutex;
+
+ std::unordered_map<u64, std::vector<CachedQuery>> cached_queries;
+
+ std::array<CounterStream, VideoCore::NumQueryTypes> streams;
+};
+
+template <class QueryCache, class HostCounter>
+class HostCounterBase {
+public:
+ explicit HostCounterBase(std::shared_ptr<HostCounter> dependency_)
+ : dependency{std::move(dependency_)}, depth{dependency ? (dependency->Depth() + 1) : 0} {
+ // Avoid nesting too many dependencies to avoid a stack overflow when these are deleted.
+ constexpr u64 depth_threshold = 96;
+ if (depth > depth_threshold) {
+ depth = 0;
+ base_result = dependency->Query();
+ dependency = nullptr;
+ }
+ }
+ virtual ~HostCounterBase() = default;
+
+ /// Returns the current value of the query.
+ u64 Query() {
+ if (result) {
+ return *result;
+ }
+
+ u64 value = BlockingQuery() + base_result;
+ if (dependency) {
+ value += dependency->Query();
+ dependency = nullptr;
+ }
+
+ result = value;
+ return *result;
+ }
+
+ /// Returns true when flushing this query will potentially wait.
+ bool WaitPending() const noexcept {
+ return result.has_value();
+ }
+
+ u64 Depth() const noexcept {
+ return depth;
+ }
+
+protected:
+ /// Returns the value of query from the backend API blocking as needed.
+ virtual u64 BlockingQuery() const = 0;
+
+private:
+ std::shared_ptr<HostCounter> dependency; ///< Counter to add to this value.
+ std::optional<u64> result; ///< Filled with the already returned value.
+ u64 depth; ///< Number of nested dependencies.
+ u64 base_result = 0; ///< Equivalent to nested dependencies value.
+};
+
+template <class HostCounter>
+class CachedQueryBase {
+public:
+ explicit CachedQueryBase(VAddr cpu_addr, u8* host_ptr)
+ : cpu_addr{cpu_addr}, host_ptr{host_ptr} {}
+ virtual ~CachedQueryBase() = default;
+
+ CachedQueryBase(CachedQueryBase&&) noexcept = default;
+ CachedQueryBase(const CachedQueryBase&) = delete;
+
+ CachedQueryBase& operator=(CachedQueryBase&&) noexcept = default;
+ CachedQueryBase& operator=(const CachedQueryBase&) = delete;
+
+ /// Flushes the query to guest memory.
+ virtual void Flush() {
+ // When counter is nullptr it means that it's just been reseted. We are supposed to write a
+ // zero in these cases.
+ const u64 value = counter ? counter->Query() : 0;
+ std::memcpy(host_ptr, &value, sizeof(u64));
+
+ if (timestamp) {
+ std::memcpy(host_ptr + TIMESTAMP_OFFSET, &*timestamp, sizeof(u64));
+ }
+ }
+
+ /// Binds a counter to this query.
+ void BindCounter(std::shared_ptr<HostCounter> counter_, std::optional<u64> timestamp_) {
+ if (counter) {
+ // If there's an old counter set it means the query is being rewritten by the game.
+ // To avoid losing the data forever, flush here.
+ Flush();
+ }
+ counter = std::move(counter_);
+ timestamp = timestamp_;
+ }
+
+ VAddr CpuAddr() const noexcept {
+ return cpu_addr;
+ }
+
+ CacheAddr GetCacheAddr() const noexcept {
+ return ToCacheAddr(host_ptr);
+ }
+
+ u64 SizeInBytes() const noexcept {
+ return SizeInBytes(timestamp.has_value());
+ }
+
+ static constexpr u64 SizeInBytes(bool with_timestamp) noexcept {
+ return with_timestamp ? LARGE_QUERY_SIZE : SMALL_QUERY_SIZE;
+ }
+
+protected:
+ /// Returns true when querying the counter may potentially block.
+ bool WaitPending() const noexcept {
+ return counter && counter->WaitPending();
+ }
+
+private:
+ static constexpr std::size_t SMALL_QUERY_SIZE = 8; // Query size without timestamp.
+ static constexpr std::size_t LARGE_QUERY_SIZE = 16; // Query size with timestamp.
+ static constexpr std::intptr_t TIMESTAMP_OFFSET = 8; // Timestamp offset in a large query.
+
+ VAddr cpu_addr; ///< Guest CPU address.
+ u8* host_ptr; ///< Writable host pointer.
+ std::shared_ptr<HostCounter> counter; ///< Host counter to query, owns the dependency tree.
+ std::optional<u64> timestamp; ///< Timestamp to flush to guest memory.
+};
+
+} // namespace VideoCommon
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h
index c586cd6fe..e9f1436f0 100644
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -6,6 +6,7 @@
#include <atomic>
#include <functional>
+#include <optional>
#include "common/common_types.h"
#include "video_core/engines/fermi_2d.h"
#include "video_core/gpu.h"
@@ -17,6 +18,11 @@ class MemoryManager;
namespace VideoCore {
+enum class QueryType {
+ SamplesPassed,
+};
+constexpr std::size_t NumQueryTypes = 1;
+
enum class LoadCallbackStage {
Prepare,
Decompile,
@@ -41,6 +47,12 @@ public:
/// Dispatches a compute shader invocation
virtual void DispatchCompute(GPUVAddr code_addr) = 0;
+ /// Resets the counter of a query
+ virtual void ResetCounter(QueryType type) = 0;
+
+ /// Records a GPU query and caches it
+ virtual void Query(GPUVAddr gpu_addr, QueryType type, std::optional<u64> timestamp) = 0;
+
/// Notify rasterizer that all caches should be flushed to Switch memory
virtual void FlushAll() = 0;
diff --git a/src/video_core/renderer_opengl/gl_query_cache.cpp b/src/video_core/renderer_opengl/gl_query_cache.cpp
new file mode 100644
index 000000000..f12e9f55f
--- /dev/null
+++ b/src/video_core/renderer_opengl/gl_query_cache.cpp
@@ -0,0 +1,120 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <cstring>
+#include <memory>
+#include <unordered_map>
+#include <utility>
+#include <vector>
+
+#include <glad/glad.h>
+
+#include "common/assert.h"
+#include "core/core.h"
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/memory_manager.h"
+#include "video_core/renderer_opengl/gl_query_cache.h"
+#include "video_core/renderer_opengl/gl_rasterizer.h"
+
+namespace OpenGL {
+
+namespace {
+
+constexpr std::array<GLenum, VideoCore::NumQueryTypes> QueryTargets = {GL_SAMPLES_PASSED};
+
+constexpr GLenum GetTarget(VideoCore::QueryType type) {
+ return QueryTargets[static_cast<std::size_t>(type)];
+}
+
+} // Anonymous namespace
+
+QueryCache::QueryCache(Core::System& system, RasterizerOpenGL& gl_rasterizer)
+ : VideoCommon::QueryCacheBase<
+ QueryCache, CachedQuery, CounterStream, HostCounter,
+ std::vector<OGLQuery>>{system,
+ static_cast<VideoCore::RasterizerInterface&>(gl_rasterizer)},
+ gl_rasterizer{gl_rasterizer} {}
+
+QueryCache::~QueryCache() = default;
+
+OGLQuery QueryCache::AllocateQuery(VideoCore::QueryType type) {
+ auto& reserve = query_pools[static_cast<std::size_t>(type)];
+ OGLQuery query;
+ if (reserve.empty()) {
+ query.Create(GetTarget(type));
+ return query;
+ }
+
+ query = std::move(reserve.back());
+ reserve.pop_back();
+ return query;
+}
+
+void QueryCache::Reserve(VideoCore::QueryType type, OGLQuery&& query) {
+ query_pools[static_cast<std::size_t>(type)].push_back(std::move(query));
+}
+
+bool QueryCache::AnyCommandQueued() const noexcept {
+ return gl_rasterizer.AnyCommandQueued();
+}
+
+HostCounter::HostCounter(QueryCache& cache, std::shared_ptr<HostCounter> dependency,
+ VideoCore::QueryType type)
+ : VideoCommon::HostCounterBase<QueryCache, HostCounter>{std::move(dependency)}, cache{cache},
+ type{type}, query{cache.AllocateQuery(type)} {
+ glBeginQuery(GetTarget(type), query.handle);
+}
+
+HostCounter::~HostCounter() {
+ cache.Reserve(type, std::move(query));
+}
+
+void HostCounter::EndQuery() {
+ if (!cache.AnyCommandQueued()) {
+ // There are chances a query waited on without commands (glDraw, glClear, glDispatch). Not
+ // having any of these causes a lock. glFlush is considered a command, so we can safely wait
+ // for this. Insert to the OpenGL command stream a flush.
+ glFlush();
+ }
+ glEndQuery(GetTarget(type));
+}
+
+u64 HostCounter::BlockingQuery() const {
+ GLint64 value;
+ glGetQueryObjecti64v(query.handle, GL_QUERY_RESULT, &value);
+ return static_cast<u64>(value);
+}
+
+CachedQuery::CachedQuery(QueryCache& cache, VideoCore::QueryType type, VAddr cpu_addr, u8* host_ptr)
+ : VideoCommon::CachedQueryBase<HostCounter>{cpu_addr, host_ptr}, cache{&cache}, type{type} {}
+
+CachedQuery::CachedQuery(CachedQuery&& rhs) noexcept
+ : VideoCommon::CachedQueryBase<HostCounter>(std::move(rhs)), cache{rhs.cache}, type{rhs.type} {}
+
+CachedQuery& CachedQuery::operator=(CachedQuery&& rhs) noexcept {
+ VideoCommon::CachedQueryBase<HostCounter>::operator=(std::move(rhs));
+ cache = rhs.cache;
+ type = rhs.type;
+ return *this;
+}
+
+void CachedQuery::Flush() {
+ // Waiting for a query while another query of the same target is enabled locks Nvidia's driver.
+ // To avoid this disable and re-enable keeping the dependency stream.
+ // But we only have to do this if we have pending waits to be done.
+ auto& stream = cache->Stream(type);
+ const bool slice_counter = WaitPending() && stream.IsEnabled();
+ if (slice_counter) {
+ stream.Update(false);
+ }
+
+ VideoCommon::CachedQueryBase<HostCounter>::Flush();
+
+ if (slice_counter) {
+ stream.Update(true);
+ }
+}
+
+} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_query_cache.h b/src/video_core/renderer_opengl/gl_query_cache.h
new file mode 100644
index 000000000..d8e7052a1
--- /dev/null
+++ b/src/video_core/renderer_opengl/gl_query_cache.h
@@ -0,0 +1,78 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <array>
+#include <memory>
+#include <vector>
+
+#include "common/common_types.h"
+#include "video_core/query_cache.h"
+#include "video_core/rasterizer_interface.h"
+#include "video_core/renderer_opengl/gl_resource_manager.h"
+
+namespace Core {
+class System;
+}
+
+namespace OpenGL {
+
+class CachedQuery;
+class HostCounter;
+class QueryCache;
+class RasterizerOpenGL;
+
+using CounterStream = VideoCommon::CounterStreamBase<QueryCache, HostCounter>;
+
+class QueryCache final : public VideoCommon::QueryCacheBase<QueryCache, CachedQuery, CounterStream,
+ HostCounter, std::vector<OGLQuery>> {
+public:
+ explicit QueryCache(Core::System& system, RasterizerOpenGL& rasterizer);
+ ~QueryCache();
+
+ OGLQuery AllocateQuery(VideoCore::QueryType type);
+
+ void Reserve(VideoCore::QueryType type, OGLQuery&& query);
+
+ bool AnyCommandQueued() const noexcept;
+
+private:
+ RasterizerOpenGL& gl_rasterizer;
+};
+
+class HostCounter final : public VideoCommon::HostCounterBase<QueryCache, HostCounter> {
+public:
+ explicit HostCounter(QueryCache& cache, std::shared_ptr<HostCounter> dependency,
+ VideoCore::QueryType type);
+ ~HostCounter();
+
+ void EndQuery();
+
+private:
+ u64 BlockingQuery() const override;
+
+ QueryCache& cache;
+ const VideoCore::QueryType type;
+ OGLQuery query;
+};
+
+class CachedQuery final : public VideoCommon::CachedQueryBase<HostCounter> {
+public:
+ explicit CachedQuery(QueryCache& cache, VideoCore::QueryType type, VAddr cpu_addr,
+ u8* host_ptr);
+ CachedQuery(CachedQuery&& rhs) noexcept;
+ CachedQuery(const CachedQuery&) = delete;
+
+ CachedQuery& operator=(CachedQuery&& rhs) noexcept;
+ CachedQuery& operator=(const CachedQuery&) = delete;
+
+ void Flush() override;
+
+private:
+ QueryCache* cache;
+ VideoCore::QueryType type;
+};
+
+} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index b0eb14c8b..4bdc8db85 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -25,6 +25,7 @@
#include "video_core/engines/maxwell_3d.h"
#include "video_core/engines/shader_type.h"
#include "video_core/memory_manager.h"
+#include "video_core/renderer_opengl/gl_query_cache.h"
#include "video_core/renderer_opengl/gl_rasterizer.h"
#include "video_core/renderer_opengl/gl_shader_cache.h"
#include "video_core/renderer_opengl/gl_shader_gen.h"
@@ -92,8 +93,8 @@ std::size_t GetConstBufferSize(const Tegra::Engines::ConstBufferInfo& buffer,
RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window,
ScreenInfo& info)
: RasterizerAccelerated{system.Memory()}, texture_cache{system, *this, device},
- shader_cache{*this, system, emu_window, device}, system{system}, screen_info{info},
- buffer_cache{*this, system, device, STREAM_BUFFER_SIZE} {
+ shader_cache{*this, system, emu_window, device}, query_cache{system, *this}, system{system},
+ screen_info{info}, buffer_cache{*this, system, device, STREAM_BUFFER_SIZE} {
shader_program_manager = std::make_unique<GLShader::ProgramManager>();
state.draw.shader_program = 0;
state.Apply();
@@ -541,11 +542,16 @@ void RasterizerOpenGL::Clear() {
} else if (use_stencil) {
glClearBufferiv(GL_STENCIL, 0, &regs.clear_stencil);
}
+
+ ++num_queued_commands;
}
void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
MICROPROFILE_SCOPE(OpenGL_Drawing);
auto& gpu = system.GPU().Maxwell3D();
+ const auto& regs = gpu.regs;
+
+ query_cache.UpdateCounters();
SyncRasterizeEnable(state);
SyncColorMask();
@@ -638,6 +644,8 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
glTextureBarrier();
}
+ ++num_queued_commands;
+
const GLuint base_instance = static_cast<GLuint>(gpu.regs.vb_base_instance);
const GLsizei num_instances =
static_cast<GLsizei>(is_instanced ? gpu.mme_draw.instance_count : 1);
@@ -707,6 +715,16 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
state.ApplyProgramPipeline();
glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z);
+ ++num_queued_commands;
+}
+
+void RasterizerOpenGL::ResetCounter(VideoCore::QueryType type) {
+ query_cache.ResetCounter(type);
+}
+
+void RasterizerOpenGL::Query(GPUVAddr gpu_addr, VideoCore::QueryType type,
+ std::optional<u64> timestamp) {
+ query_cache.Query(gpu_addr, type, timestamp);
}
void RasterizerOpenGL::FlushAll() {}
@@ -718,6 +736,7 @@ void RasterizerOpenGL::FlushRegion(CacheAddr addr, u64 size) {
}
texture_cache.FlushRegion(addr, size);
buffer_cache.FlushRegion(addr, size);
+ query_cache.FlushRegion(addr, size);
}
void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) {
@@ -728,6 +747,7 @@ void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) {
texture_cache.InvalidateRegion(addr, size);
shader_cache.InvalidateRegion(addr, size);
buffer_cache.InvalidateRegion(addr, size);
+ query_cache.InvalidateRegion(addr, size);
}
void RasterizerOpenGL::FlushAndInvalidateRegion(CacheAddr addr, u64 size) {
@@ -738,10 +758,18 @@ void RasterizerOpenGL::FlushAndInvalidateRegion(CacheAddr addr, u64 size) {
}
void RasterizerOpenGL::FlushCommands() {
+ // Only flush when we have commands queued to OpenGL.
+ if (num_queued_commands == 0) {
+ return;
+ }
+ num_queued_commands = 0;
glFlush();
}
void RasterizerOpenGL::TickFrame() {
+ // Ticking a frame means that buffers will be swapped, calling glFlush implicitly.
+ num_queued_commands = 0;
+
buffer_cache.TickFrame();
}
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 0501f3828..c772fd4ba 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -24,6 +24,7 @@
#include "video_core/renderer_opengl/gl_buffer_cache.h"
#include "video_core/renderer_opengl/gl_device.h"
#include "video_core/renderer_opengl/gl_framebuffer_cache.h"
+#include "video_core/renderer_opengl/gl_query_cache.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_sampler_cache.h"
#include "video_core/renderer_opengl/gl_shader_cache.h"
@@ -61,6 +62,8 @@ public:
bool DrawMultiBatch(bool is_indexed) override;
void Clear() override;
void DispatchCompute(GPUVAddr code_addr) override;
+ void ResetCounter(VideoCore::QueryType type) override;
+ void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override;
void FlushAll() override;
void FlushRegion(CacheAddr addr, u64 size) override;
void InvalidateRegion(CacheAddr addr, u64 size) override;
@@ -75,6 +78,11 @@ public:
void LoadDiskResources(const std::atomic_bool& stop_loading,
const VideoCore::DiskResourceLoadCallback& callback) override;
+ /// Returns true when there are commands queued to the OpenGL server.
+ bool AnyCommandQueued() const {
+ return num_queued_commands > 0;
+ }
+
private:
/// Configures the color and depth framebuffer states.
void ConfigureFramebuffers();
@@ -180,10 +188,23 @@ private:
/// Syncs the alpha test state to match the guest state
void SyncAlphaTest();
- /// Check for extension that are not strictly required
- /// but are needed for correct emulation
+ /// Check for extension that are not strictly required but are needed for correct emulation
void CheckExtensions();
+ std::size_t CalculateVertexArraysSize() const;
+
+ std::size_t CalculateIndexBufferSize() const;
+
+ /// Updates and returns a vertex array object representing current vertex format
+ GLuint SetupVertexFormat();
+
+ void SetupVertexBuffer(GLuint vao);
+ void SetupVertexInstances(GLuint vao);
+
+ GLintptr SetupIndexBuffer();
+
+ void SetupShaders(GLenum primitive_mode);
+
const Device device;
OpenGLState state;
@@ -191,6 +212,7 @@ private:
ShaderCacheOpenGL shader_cache;
SamplerCacheOpenGL sampler_cache;
FramebufferCacheOpenGL framebuffer_cache;
+ QueryCache query_cache;
Core::System& system;
ScreenInfo& screen_info;
@@ -208,19 +230,8 @@ private:
BindBuffersRangePushBuffer bind_ubo_pushbuffer{GL_UNIFORM_BUFFER};
BindBuffersRangePushBuffer bind_ssbo_pushbuffer{GL_SHADER_STORAGE_BUFFER};
- std::size_t CalculateVertexArraysSize() const;
-
- std::size_t CalculateIndexBufferSize() const;
-
- /// Updates and returns a vertex array object representing current vertex format
- GLuint SetupVertexFormat();
-
- void SetupVertexBuffer(GLuint vao);
- void SetupVertexInstances(GLuint vao);
-
- GLintptr SetupIndexBuffer();
-
- void SetupShaders(GLenum primitive_mode);
+ /// Number of commands queued to the OpenGL driver. Reseted on flush.
+ std::size_t num_queued_commands = 0;
};
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_resource_manager.cpp b/src/video_core/renderer_opengl/gl_resource_manager.cpp
index 5c96c1d46..f0ddfb276 100644
--- a/src/video_core/renderer_opengl/gl_resource_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_resource_manager.cpp
@@ -207,4 +207,21 @@ void OGLFramebuffer::Release() {
handle = 0;
}
+void OGLQuery::Create(GLenum target) {
+ if (handle != 0)
+ return;
+
+ MICROPROFILE_SCOPE(OpenGL_ResourceCreation);
+ glCreateQueries(target, 1, &handle);
+}
+
+void OGLQuery::Release() {
+ if (handle == 0)
+ return;
+
+ MICROPROFILE_SCOPE(OpenGL_ResourceDeletion);
+ glDeleteQueries(1, &handle);
+ handle = 0;
+}
+
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_resource_manager.h b/src/video_core/renderer_opengl/gl_resource_manager.h
index 3a85a1d4c..514d1d165 100644
--- a/src/video_core/renderer_opengl/gl_resource_manager.h
+++ b/src/video_core/renderer_opengl/gl_resource_manager.h
@@ -266,4 +266,29 @@ public:
GLuint handle = 0;
};
+class OGLQuery : private NonCopyable {
+public:
+ OGLQuery() = default;
+
+ OGLQuery(OGLQuery&& o) noexcept : handle(std::exchange(o.handle, 0)) {}
+
+ ~OGLQuery() {
+ Release();
+ }
+
+ OGLQuery& operator=(OGLQuery&& o) noexcept {
+ Release();
+ handle = std::exchange(o.handle, 0);
+ return *this;
+ }
+
+ /// Creates a new internal OpenGL resource and stores the handle
+ void Create(GLenum target);
+
+ /// Deletes the internal OpenGL resource
+ void Release();
+
+ GLuint handle = 0;
+};
+
} // namespace OpenGL
diff --git a/src/video_core/renderer_vulkan/vk_device.cpp b/src/video_core/renderer_vulkan/vk_device.cpp
index 9840f26e5..588a6835f 100644
--- a/src/video_core/renderer_vulkan/vk_device.cpp
+++ b/src/video_core/renderer_vulkan/vk_device.cpp
@@ -104,6 +104,7 @@ bool VKDevice::Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instan
features.depthBiasClamp = true;
features.geometryShader = true;
features.tessellationShader = true;
+ features.occlusionQueryPrecise = true;
features.fragmentStoresAndAtomics = true;
features.shaderImageGatherExtended = true;
features.shaderStorageImageWriteWithoutFormat = true;
@@ -117,6 +118,10 @@ bool VKDevice::Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instan
bit8_storage.uniformAndStorageBuffer8BitAccess = true;
SetNext(next, bit8_storage);
+ vk::PhysicalDeviceHostQueryResetFeaturesEXT host_query_reset;
+ host_query_reset.hostQueryReset = true;
+ SetNext(next, host_query_reset);
+
vk::PhysicalDeviceFloat16Int8FeaturesKHR float16_int8;
if (is_float16_supported) {
float16_int8.shaderFloat16 = true;
@@ -273,6 +278,7 @@ bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDev
VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME,
VK_EXT_SHADER_SUBGROUP_BALLOT_EXTENSION_NAME,
VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME,
+ VK_EXT_HOST_QUERY_RESET_EXTENSION_NAME,
};
std::bitset<required_extensions.size()> available_extensions{};
@@ -340,6 +346,7 @@ bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDev
std::make_pair(features.depthBiasClamp, "depthBiasClamp"),
std::make_pair(features.geometryShader, "geometryShader"),
std::make_pair(features.tessellationShader, "tessellationShader"),
+ std::make_pair(features.occlusionQueryPrecise, "occlusionQueryPrecise"),
std::make_pair(features.fragmentStoresAndAtomics, "fragmentStoresAndAtomics"),
std::make_pair(features.shaderImageGatherExtended, "shaderImageGatherExtended"),
std::make_pair(features.shaderStorageImageWriteWithoutFormat,
@@ -376,7 +383,7 @@ std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynami
}
};
- extensions.reserve(13);
+ extensions.reserve(14);
extensions.push_back(VK_KHR_SWAPCHAIN_EXTENSION_NAME);
extensions.push_back(VK_KHR_16BIT_STORAGE_EXTENSION_NAME);
extensions.push_back(VK_KHR_8BIT_STORAGE_EXTENSION_NAME);
@@ -384,6 +391,7 @@ std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynami
extensions.push_back(VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME);
extensions.push_back(VK_EXT_SHADER_SUBGROUP_BALLOT_EXTENSION_NAME);
extensions.push_back(VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME);
+ extensions.push_back(VK_EXT_HOST_QUERY_RESET_EXTENSION_NAME);
[[maybe_unused]] const bool nsight =
std::getenv("NVTX_INJECTION64_PATH") || std::getenv("NSIGHT_LAUNCHED");
diff --git a/src/video_core/renderer_vulkan/vk_query_cache.cpp b/src/video_core/renderer_vulkan/vk_query_cache.cpp
new file mode 100644
index 000000000..ffbf60dda
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_query_cache.cpp
@@ -0,0 +1,122 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <cstddef>
+#include <cstdint>
+#include <utility>
+#include <vector>
+
+#include "video_core/renderer_vulkan/declarations.h"
+#include "video_core/renderer_vulkan/vk_device.h"
+#include "video_core/renderer_vulkan/vk_query_cache.h"
+#include "video_core/renderer_vulkan/vk_resource_manager.h"
+#include "video_core/renderer_vulkan/vk_scheduler.h"
+
+namespace Vulkan {
+
+namespace {
+
+constexpr std::array QUERY_TARGETS = {vk::QueryType::eOcclusion};
+
+constexpr vk::QueryType GetTarget(VideoCore::QueryType type) {
+ return QUERY_TARGETS[static_cast<std::size_t>(type)];
+}
+
+} // Anonymous namespace
+
+QueryPool::QueryPool() : VKFencedPool{GROW_STEP} {}
+
+QueryPool::~QueryPool() = default;
+
+void QueryPool::Initialize(const VKDevice& device_, VideoCore::QueryType type_) {
+ device = &device_;
+ type = type_;
+}
+
+std::pair<vk::QueryPool, std::uint32_t> QueryPool::Commit(VKFence& fence) {
+ std::size_t index;
+ do {
+ index = CommitResource(fence);
+ } while (usage[index]);
+ usage[index] = true;
+
+ return {*pools[index / GROW_STEP], static_cast<std::uint32_t>(index % GROW_STEP)};
+}
+
+void QueryPool::Allocate(std::size_t begin, std::size_t end) {
+ usage.resize(end);
+
+ const auto dev = device->GetLogical();
+ const u32 size = static_cast<u32>(end - begin);
+ const vk::QueryPoolCreateInfo query_pool_ci({}, GetTarget(type), size, {});
+ pools.push_back(dev.createQueryPoolUnique(query_pool_ci, nullptr, device->GetDispatchLoader()));
+}
+
+void QueryPool::Reserve(std::pair<vk::QueryPool, std::uint32_t> query) {
+ const auto it =
+ std::find_if(std::begin(pools), std::end(pools),
+ [query_pool = query.first](auto& pool) { return query_pool == *pool; });
+ ASSERT(it != std::end(pools));
+
+ const std::ptrdiff_t pool_index = std::distance(std::begin(pools), it);
+ usage[pool_index * GROW_STEP + static_cast<std::ptrdiff_t>(query.second)] = false;
+}
+
+VKQueryCache::VKQueryCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
+ const VKDevice& device, VKScheduler& scheduler)
+ : VideoCommon::QueryCacheBase<VKQueryCache, CachedQuery, CounterStream, HostCounter,
+ QueryPool>{system, rasterizer},
+ device{device}, scheduler{scheduler} {
+ for (std::size_t i = 0; i < static_cast<std::size_t>(VideoCore::NumQueryTypes); ++i) {
+ query_pools[i].Initialize(device, static_cast<VideoCore::QueryType>(i));
+ }
+}
+
+VKQueryCache::~VKQueryCache() = default;
+
+std::pair<vk::QueryPool, std::uint32_t> VKQueryCache::AllocateQuery(VideoCore::QueryType type) {
+ return query_pools[static_cast<std::size_t>(type)].Commit(scheduler.GetFence());
+}
+
+void VKQueryCache::Reserve(VideoCore::QueryType type,
+ std::pair<vk::QueryPool, std::uint32_t> query) {
+ query_pools[static_cast<std::size_t>(type)].Reserve(query);
+}
+
+HostCounter::HostCounter(VKQueryCache& cache, std::shared_ptr<HostCounter> dependency,
+ VideoCore::QueryType type)
+ : VideoCommon::HostCounterBase<VKQueryCache, HostCounter>{std::move(dependency)}, cache{cache},
+ type{type}, query{cache.AllocateQuery(type)}, ticks{cache.Scheduler().Ticks()} {
+ const auto dev = cache.Device().GetLogical();
+ cache.Scheduler().Record([dev, query = query](vk::CommandBuffer cmdbuf, auto& dld) {
+ dev.resetQueryPoolEXT(query.first, query.second, 1, dld);
+ cmdbuf.beginQuery(query.first, query.second, vk::QueryControlFlagBits::ePrecise, dld);
+ });
+}
+
+HostCounter::~HostCounter() {
+ cache.Reserve(type, query);
+}
+
+void HostCounter::EndQuery() {
+ cache.Scheduler().Record([query = query](auto cmdbuf, auto& dld) {
+ cmdbuf.endQuery(query.first, query.second, dld);
+ });
+}
+
+u64 HostCounter::BlockingQuery() const {
+ if (ticks >= cache.Scheduler().Ticks()) {
+ cache.Scheduler().Flush();
+ }
+
+ const auto dev = cache.Device().GetLogical();
+ const auto& dld = cache.Device().GetDispatchLoader();
+ u64 value;
+ dev.getQueryPoolResults(query.first, query.second, 1, sizeof(value), &value, sizeof(value),
+ vk::QueryResultFlagBits::e64 | vk::QueryResultFlagBits::eWait, dld);
+ return value;
+}
+
+} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_query_cache.h b/src/video_core/renderer_vulkan/vk_query_cache.h
new file mode 100644
index 000000000..c3092ee96
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_query_cache.h
@@ -0,0 +1,104 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <cstddef>
+#include <cstdint>
+#include <memory>
+#include <utility>
+#include <vector>
+
+#include "common/common_types.h"
+#include "video_core/query_cache.h"
+#include "video_core/renderer_vulkan/declarations.h"
+#include "video_core/renderer_vulkan/vk_resource_manager.h"
+
+namespace VideoCore {
+class RasterizerInterface;
+}
+
+namespace Vulkan {
+
+class CachedQuery;
+class HostCounter;
+class VKDevice;
+class VKQueryCache;
+class VKScheduler;
+
+using CounterStream = VideoCommon::CounterStreamBase<VKQueryCache, HostCounter>;
+
+class QueryPool final : public VKFencedPool {
+public:
+ explicit QueryPool();
+ ~QueryPool() override;
+
+ void Initialize(const VKDevice& device, VideoCore::QueryType type);
+
+ std::pair<vk::QueryPool, std::uint32_t> Commit(VKFence& fence);
+
+ void Reserve(std::pair<vk::QueryPool, std::uint32_t> query);
+
+protected:
+ void Allocate(std::size_t begin, std::size_t end) override;
+
+private:
+ static constexpr std::size_t GROW_STEP = 512;
+
+ const VKDevice* device = nullptr;
+ VideoCore::QueryType type = {};
+
+ std::vector<UniqueQueryPool> pools;
+ std::vector<bool> usage;
+};
+
+class VKQueryCache final
+ : public VideoCommon::QueryCacheBase<VKQueryCache, CachedQuery, CounterStream, HostCounter,
+ QueryPool> {
+public:
+ explicit VKQueryCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
+ const VKDevice& device, VKScheduler& scheduler);
+ ~VKQueryCache();
+
+ std::pair<vk::QueryPool, std::uint32_t> AllocateQuery(VideoCore::QueryType type);
+
+ void Reserve(VideoCore::QueryType type, std::pair<vk::QueryPool, std::uint32_t> query);
+
+ const VKDevice& Device() const noexcept {
+ return device;
+ }
+
+ VKScheduler& Scheduler() const noexcept {
+ return scheduler;
+ }
+
+private:
+ const VKDevice& device;
+ VKScheduler& scheduler;
+};
+
+class HostCounter final : public VideoCommon::HostCounterBase<VKQueryCache, HostCounter> {
+public:
+ explicit HostCounter(VKQueryCache& cache, std::shared_ptr<HostCounter> dependency,
+ VideoCore::QueryType type);
+ ~HostCounter();
+
+ void EndQuery();
+
+private:
+ u64 BlockingQuery() const override;
+
+ VKQueryCache& cache;
+ const VideoCore::QueryType type;
+ const std::pair<vk::QueryPool, std::uint32_t> query;
+ const u64 ticks;
+};
+
+class CachedQuery : public VideoCommon::CachedQueryBase<HostCounter> {
+public:
+ explicit CachedQuery(VKQueryCache&, VideoCore::QueryType, VAddr cpu_addr, u8* host_ptr)
+ : VideoCommon::CachedQueryBase<HostCounter>{cpu_addr, host_ptr} {}
+};
+
+} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index aada38702..79aa121ed 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -289,7 +289,9 @@ RasterizerVulkan::RasterizerVulkan(Core::System& system, Core::Frontend::EmuWind
staging_pool),
pipeline_cache(system, *this, device, scheduler, descriptor_pool, update_descriptor_queue),
buffer_cache(*this, system, device, memory_manager, scheduler, staging_pool),
- sampler_cache(device) {}
+ sampler_cache(device), query_cache(system, *this, device, scheduler) {
+ scheduler.SetQueryCache(query_cache);
+}
RasterizerVulkan::~RasterizerVulkan() = default;
@@ -308,6 +310,8 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
FlushWork();
+ query_cache.UpdateCounters();
+
const auto& gpu = system.GPU().Maxwell3D();
GraphicsPipelineCacheKey key{GetFixedPipelineState(gpu.regs)};
@@ -362,6 +366,8 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
void RasterizerVulkan::Clear() {
MICROPROFILE_SCOPE(Vulkan_Clearing);
+ query_cache.UpdateCounters();
+
const auto& gpu = system.GPU().Maxwell3D();
if (!system.GPU().Maxwell3D().ShouldExecute()) {
return;
@@ -429,6 +435,8 @@ void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) {
sampled_views.clear();
image_views.clear();
+ query_cache.UpdateCounters();
+
const auto& launch_desc = system.GPU().KeplerCompute().launch_description;
const ComputePipelineCacheKey key{
code_addr,
@@ -471,17 +479,28 @@ void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) {
});
}
+void RasterizerVulkan::ResetCounter(VideoCore::QueryType type) {
+ query_cache.ResetCounter(type);
+}
+
+void RasterizerVulkan::Query(GPUVAddr gpu_addr, VideoCore::QueryType type,
+ std::optional<u64> timestamp) {
+ query_cache.Query(gpu_addr, type, timestamp);
+}
+
void RasterizerVulkan::FlushAll() {}
void RasterizerVulkan::FlushRegion(CacheAddr addr, u64 size) {
texture_cache.FlushRegion(addr, size);
buffer_cache.FlushRegion(addr, size);
+ query_cache.FlushRegion(addr, size);
}
void RasterizerVulkan::InvalidateRegion(CacheAddr addr, u64 size) {
texture_cache.InvalidateRegion(addr, size);
pipeline_cache.InvalidateRegion(addr, size);
buffer_cache.InvalidateRegion(addr, size);
+ query_cache.InvalidateRegion(addr, size);
}
void RasterizerVulkan::FlushAndInvalidateRegion(CacheAddr addr, u64 size) {
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h
index 7be71e734..add1ad88c 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.h
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.h
@@ -24,6 +24,7 @@
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
#include "video_core/renderer_vulkan/vk_memory_manager.h"
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
+#include "video_core/renderer_vulkan/vk_query_cache.h"
#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
#include "video_core/renderer_vulkan/vk_resource_manager.h"
#include "video_core/renderer_vulkan/vk_sampler_cache.h"
@@ -96,7 +97,7 @@ struct ImageView {
vk::ImageLayout* layout = nullptr;
};
-class RasterizerVulkan : public VideoCore::RasterizerAccelerated {
+class RasterizerVulkan final : public VideoCore::RasterizerAccelerated {
public:
explicit RasterizerVulkan(Core::System& system, Core::Frontend::EmuWindow& render_window,
VKScreenInfo& screen_info, const VKDevice& device,
@@ -108,6 +109,8 @@ public:
bool DrawMultiBatch(bool is_indexed) override;
void Clear() override;
void DispatchCompute(GPUVAddr code_addr) override;
+ void ResetCounter(VideoCore::QueryType type) override;
+ void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override;
void FlushAll() override;
void FlushRegion(CacheAddr addr, u64 size) override;
void InvalidateRegion(CacheAddr addr, u64 size) override;
@@ -247,6 +250,7 @@ private:
VKPipelineCache pipeline_cache;
VKBufferCache buffer_cache;
VKSamplerCache sampler_cache;
+ VKQueryCache query_cache;
std::array<View, Maxwell::NumRenderTargets> color_attachments;
View zeta_attachment;
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp
index d66133ad1..92bd6c344 100644
--- a/src/video_core/renderer_vulkan/vk_scheduler.cpp
+++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp
@@ -6,6 +6,7 @@
#include "common/microprofile.h"
#include "video_core/renderer_vulkan/declarations.h"
#include "video_core/renderer_vulkan/vk_device.h"
+#include "video_core/renderer_vulkan/vk_query_cache.h"
#include "video_core/renderer_vulkan/vk_resource_manager.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
@@ -139,6 +140,8 @@ void VKScheduler::SubmitExecution(vk::Semaphore semaphore) {
}
void VKScheduler::AllocateNewContext() {
+ ++ticks;
+
std::unique_lock lock{mutex};
current_fence = next_fence;
next_fence = &resource_manager.CommitFence();
@@ -146,6 +149,10 @@ void VKScheduler::AllocateNewContext() {
current_cmdbuf = resource_manager.CommitCommandBuffer(*current_fence);
current_cmdbuf.begin({vk::CommandBufferUsageFlagBits::eOneTimeSubmit},
device.GetDispatchLoader());
+ // Enable counters once again. These are disabled when a command buffer is finished.
+ if (query_cache) {
+ query_cache->UpdateCounters();
+ }
}
void VKScheduler::InvalidateState() {
@@ -159,6 +166,7 @@ void VKScheduler::InvalidateState() {
}
void VKScheduler::EndPendingOperations() {
+ query_cache->DisableStreams();
EndRenderPass();
}
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h
index bcdffbba0..62fd7858b 100644
--- a/src/video_core/renderer_vulkan/vk_scheduler.h
+++ b/src/video_core/renderer_vulkan/vk_scheduler.h
@@ -4,6 +4,7 @@
#pragma once
+#include <atomic>
#include <condition_variable>
#include <memory>
#include <optional>
@@ -18,6 +19,7 @@ namespace Vulkan {
class VKDevice;
class VKFence;
+class VKQueryCache;
class VKResourceManager;
class VKFenceView {
@@ -67,6 +69,11 @@ public:
/// Binds a pipeline to the current execution context.
void BindGraphicsPipeline(vk::Pipeline pipeline);
+ /// Assigns the query cache.
+ void SetQueryCache(VKQueryCache& query_cache_) {
+ query_cache = &query_cache_;
+ }
+
/// Returns true when viewports have been set in the current command buffer.
bool TouchViewports() {
return std::exchange(state.viewports, true);
@@ -112,6 +119,11 @@ public:
return current_fence;
}
+ /// Returns the current command buffer tick.
+ u64 Ticks() const {
+ return ticks;
+ }
+
private:
class Command {
public:
@@ -205,6 +217,8 @@ private:
const VKDevice& device;
VKResourceManager& resource_manager;
+ VKQueryCache* query_cache = nullptr;
+
vk::CommandBuffer current_cmdbuf;
VKFence* current_fence = nullptr;
VKFence* next_fence = nullptr;
@@ -227,6 +241,7 @@ private:
Common::SPSCQueue<std::unique_ptr<CommandChunk>> chunk_reserve;
std::mutex mutex;
std::condition_variable cv;
+ std::atomic<u64> ticks = 0;
bool quit = false;
};
diff --git a/src/video_core/shader/decode/arithmetic_integer.cpp b/src/video_core/shader/decode/arithmetic_integer.cpp
index e60875cc4..21366869d 100644
--- a/src/video_core/shader/decode/arithmetic_integer.cpp
+++ b/src/video_core/shader/decode/arithmetic_integer.cpp
@@ -166,13 +166,13 @@ u32 ShaderIR::DecodeArithmeticInteger(NodeBlock& bb, u32 pc) {
const auto [op_rhs, test] = [&]() -> std::pair<Node, Node> {
switch (opcode->get().GetId()) {
case OpCode::Id::ICMP_CR:
- return {GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset),
+ return {GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()),
GetRegister(instr.gpr39)};
case OpCode::Id::ICMP_R:
return {GetRegister(instr.gpr20), GetRegister(instr.gpr39)};
case OpCode::Id::ICMP_RC:
return {GetRegister(instr.gpr39),
- GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset)};
+ GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())};
case OpCode::Id::ICMP_IMM:
return {Immediate(instr.alu.GetSignedImm20_20()), GetRegister(instr.gpr39)};
default:
diff --git a/src/video_core/shader/decode/bfi.cpp b/src/video_core/shader/decode/bfi.cpp
index f992bbe2a..70d1c055b 100644
--- a/src/video_core/shader/decode/bfi.cpp
+++ b/src/video_core/shader/decode/bfi.cpp
@@ -21,7 +21,7 @@ u32 ShaderIR::DecodeBfi(NodeBlock& bb, u32 pc) {
switch (opcode->get().GetId()) {
case OpCode::Id::BFI_RC:
return {GetRegister(instr.gpr39),
- GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset)};
+ GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())};
case OpCode::Id::BFI_IMM_R:
return {Immediate(instr.alu.GetSignedImm20_20()), GetRegister(instr.gpr39)};
default:
diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp
index cd94693c1..6209fff75 100644
--- a/src/yuzu/configuration/config.cpp
+++ b/src/yuzu/configuration/config.cpp
@@ -630,6 +630,7 @@ void Config::ReadRendererValues() {
Settings::values.vulkan_device = ReadSetting(QStringLiteral("vulkan_device"), 0).toInt();
Settings::values.resolution_factor =
ReadSetting(QStringLiteral("resolution_factor"), 1.0).toFloat();
+ Settings::values.aspect_ratio = ReadSetting(QStringLiteral("aspect_ratio"), 0).toInt();
Settings::values.use_frame_limit =
ReadSetting(QStringLiteral("use_frame_limit"), true).toBool();
Settings::values.frame_limit = ReadSetting(QStringLiteral("frame_limit"), 100).toInt();
@@ -1064,6 +1065,7 @@ void Config::SaveRendererValues() {
WriteSetting(QStringLiteral("vulkan_device"), Settings::values.vulkan_device, 0);
WriteSetting(QStringLiteral("resolution_factor"),
static_cast<double>(Settings::values.resolution_factor), 1.0);
+ WriteSetting(QStringLiteral("aspect_ratio"), Settings::values.aspect_ratio, 0);
WriteSetting(QStringLiteral("use_frame_limit"), Settings::values.use_frame_limit, true);
WriteSetting(QStringLiteral("frame_limit"), Settings::values.frame_limit, 100);
WriteSetting(QStringLiteral("use_disk_shader_cache"), Settings::values.use_disk_shader_cache,
diff --git a/src/yuzu/configuration/configure_graphics.cpp b/src/yuzu/configuration/configure_graphics.cpp
index f57a24e36..ea899c080 100644
--- a/src/yuzu/configuration/configure_graphics.cpp
+++ b/src/yuzu/configuration/configure_graphics.cpp
@@ -97,6 +97,7 @@ void ConfigureGraphics::SetConfiguration() {
ui->api->setCurrentIndex(static_cast<int>(Settings::values.renderer_backend));
ui->resolution_factor_combobox->setCurrentIndex(
static_cast<int>(FromResolutionFactor(Settings::values.resolution_factor)));
+ ui->aspect_ratio_combobox->setCurrentIndex(Settings::values.aspect_ratio);
ui->use_disk_shader_cache->setEnabled(runtime_lock);
ui->use_disk_shader_cache->setChecked(Settings::values.use_disk_shader_cache);
ui->use_accurate_gpu_emulation->setChecked(Settings::values.use_accurate_gpu_emulation);
@@ -114,6 +115,7 @@ void ConfigureGraphics::ApplyConfiguration() {
Settings::values.vulkan_device = vulkan_device;
Settings::values.resolution_factor =
ToResolutionFactor(static_cast<Resolution>(ui->resolution_factor_combobox->currentIndex()));
+ Settings::values.aspect_ratio = ui->aspect_ratio_combobox->currentIndex();
Settings::values.use_disk_shader_cache = ui->use_disk_shader_cache->isChecked();
Settings::values.use_accurate_gpu_emulation = ui->use_accurate_gpu_emulation->isChecked();
Settings::values.use_asynchronous_gpu_emulation =
diff --git a/src/yuzu/configuration/configure_graphics.ui b/src/yuzu/configuration/configure_graphics.ui
index e24372204..db60426ab 100644
--- a/src/yuzu/configuration/configure_graphics.ui
+++ b/src/yuzu/configuration/configure_graphics.ui
@@ -139,6 +139,41 @@
</layout>
</item>
<item>
+ <layout class="QHBoxLayout" name="horizontalLayout_6">
+ <item>
+ <widget class="QLabel" name="ar_label">
+ <property name="text">
+ <string>Aspect Ratio:</string>
+ </property>
+ </widget>
+ </item>
+ <item>
+ <widget class="QComboBox" name="aspect_ratio_combobox">
+ <item>
+ <property name="text">
+ <string>Default (16:9)</string>
+ </property>
+ </item>
+ <item>
+ <property name="text">
+ <string>Force 4:3</string>
+ </property>
+ </item>
+ <item>
+ <property name="text">
+ <string>Force 21:9</string>
+ </property>
+ </item>
+ <item>
+ <property name="text">
+ <string>Stretch to Window</string>
+ </property>
+ </item>
+ </widget>
+ </item>
+ </layout>
+ </item>
+ <item>
<layout class="QHBoxLayout" name="horizontalLayout_3">
<item>
<widget class="QLabel" name="bg_label">
diff --git a/src/yuzu/debugger/wait_tree.cpp b/src/yuzu/debugger/wait_tree.cpp
index 727bd8a94..3f1a94627 100644
--- a/src/yuzu/debugger/wait_tree.cpp
+++ b/src/yuzu/debugger/wait_tree.cpp
@@ -12,8 +12,8 @@
#include "core/hle/kernel/process.h"
#include "core/hle/kernel/readable_event.h"
#include "core/hle/kernel/scheduler.h"
+#include "core/hle/kernel/synchronization_object.h"
#include "core/hle/kernel/thread.h"
-#include "core/hle/kernel/wait_object.h"
#include "core/memory.h"
WaitTreeItem::WaitTreeItem() = default;
@@ -133,8 +133,9 @@ std::vector<std::unique_ptr<WaitTreeItem>> WaitTreeCallstack::GetChildren() cons
return list;
}
-WaitTreeWaitObject::WaitTreeWaitObject(const Kernel::WaitObject& o) : object(o) {}
-WaitTreeWaitObject::~WaitTreeWaitObject() = default;
+WaitTreeSynchronizationObject::WaitTreeSynchronizationObject(const Kernel::SynchronizationObject& o)
+ : object(o) {}
+WaitTreeSynchronizationObject::~WaitTreeSynchronizationObject() = default;
WaitTreeExpandableItem::WaitTreeExpandableItem() = default;
WaitTreeExpandableItem::~WaitTreeExpandableItem() = default;
@@ -143,25 +144,26 @@ bool WaitTreeExpandableItem::IsExpandable() const {
return true;
}
-QString WaitTreeWaitObject::GetText() const {
+QString WaitTreeSynchronizationObject::GetText() const {
return tr("[%1]%2 %3")
.arg(object.GetObjectId())
.arg(QString::fromStdString(object.GetTypeName()),
QString::fromStdString(object.GetName()));
}
-std::unique_ptr<WaitTreeWaitObject> WaitTreeWaitObject::make(const Kernel::WaitObject& object) {
+std::unique_ptr<WaitTreeSynchronizationObject> WaitTreeSynchronizationObject::make(
+ const Kernel::SynchronizationObject& object) {
switch (object.GetHandleType()) {
case Kernel::HandleType::ReadableEvent:
return std::make_unique<WaitTreeEvent>(static_cast<const Kernel::ReadableEvent&>(object));
case Kernel::HandleType::Thread:
return std::make_unique<WaitTreeThread>(static_cast<const Kernel::Thread&>(object));
default:
- return std::make_unique<WaitTreeWaitObject>(object);
+ return std::make_unique<WaitTreeSynchronizationObject>(object);
}
}
-std::vector<std::unique_ptr<WaitTreeItem>> WaitTreeWaitObject::GetChildren() const {
+std::vector<std::unique_ptr<WaitTreeItem>> WaitTreeSynchronizationObject::GetChildren() const {
std::vector<std::unique_ptr<WaitTreeItem>> list;
const auto& threads = object.GetWaitingThreads();
@@ -173,8 +175,8 @@ std::vector<std::unique_ptr<WaitTreeItem>> WaitTreeWaitObject::GetChildren() con
return list;
}
-WaitTreeObjectList::WaitTreeObjectList(const std::vector<std::shared_ptr<Kernel::WaitObject>>& list,
- bool w_all)
+WaitTreeObjectList::WaitTreeObjectList(
+ const std::vector<std::shared_ptr<Kernel::SynchronizationObject>>& list, bool w_all)
: object_list(list), wait_all(w_all) {}
WaitTreeObjectList::~WaitTreeObjectList() = default;
@@ -188,11 +190,12 @@ QString WaitTreeObjectList::GetText() const {
std::vector<std::unique_ptr<WaitTreeItem>> WaitTreeObjectList::GetChildren() const {
std::vector<std::unique_ptr<WaitTreeItem>> list(object_list.size());
std::transform(object_list.begin(), object_list.end(), list.begin(),
- [](const auto& t) { return WaitTreeWaitObject::make(*t); });
+ [](const auto& t) { return WaitTreeSynchronizationObject::make(*t); });
return list;
}
-WaitTreeThread::WaitTreeThread(const Kernel::Thread& thread) : WaitTreeWaitObject(thread) {}
+WaitTreeThread::WaitTreeThread(const Kernel::Thread& thread)
+ : WaitTreeSynchronizationObject(thread) {}
WaitTreeThread::~WaitTreeThread() = default;
QString WaitTreeThread::GetText() const {
@@ -241,7 +244,8 @@ QString WaitTreeThread::GetText() const {
const QString pc_info = tr(" PC = 0x%1 LR = 0x%2")
.arg(context.pc, 8, 16, QLatin1Char{'0'})
.arg(context.cpu_registers[30], 8, 16, QLatin1Char{'0'});
- return QStringLiteral("%1%2 (%3) ").arg(WaitTreeWaitObject::GetText(), pc_info, status);
+ return QStringLiteral("%1%2 (%3) ")
+ .arg(WaitTreeSynchronizationObject::GetText(), pc_info, status);
}
QColor WaitTreeThread::GetColor() const {
@@ -273,7 +277,7 @@ QColor WaitTreeThread::GetColor() const {
}
std::vector<std::unique_ptr<WaitTreeItem>> WaitTreeThread::GetChildren() const {
- std::vector<std::unique_ptr<WaitTreeItem>> list(WaitTreeWaitObject::GetChildren());
+ std::vector<std::unique_ptr<WaitTreeItem>> list(WaitTreeSynchronizationObject::GetChildren());
const auto& thread = static_cast<const Kernel::Thread&>(object);
@@ -314,7 +318,7 @@ std::vector<std::unique_ptr<WaitTreeItem>> WaitTreeThread::GetChildren() const {
}
if (thread.GetStatus() == Kernel::ThreadStatus::WaitSynch) {
- list.push_back(std::make_unique<WaitTreeObjectList>(thread.GetWaitObjects(),
+ list.push_back(std::make_unique<WaitTreeObjectList>(thread.GetSynchronizationObjects(),
thread.IsSleepingOnWait()));
}
@@ -323,7 +327,8 @@ std::vector<std::unique_ptr<WaitTreeItem>> WaitTreeThread::GetChildren() const {
return list;
}
-WaitTreeEvent::WaitTreeEvent(const Kernel::ReadableEvent& object) : WaitTreeWaitObject(object) {}
+WaitTreeEvent::WaitTreeEvent(const Kernel::ReadableEvent& object)
+ : WaitTreeSynchronizationObject(object) {}
WaitTreeEvent::~WaitTreeEvent() = default;
WaitTreeThreadList::WaitTreeThreadList(const std::vector<std::shared_ptr<Kernel::Thread>>& list)
diff --git a/src/yuzu/debugger/wait_tree.h b/src/yuzu/debugger/wait_tree.h
index 631274a5f..8e3bc4b24 100644
--- a/src/yuzu/debugger/wait_tree.h
+++ b/src/yuzu/debugger/wait_tree.h
@@ -19,7 +19,7 @@ class EmuThread;
namespace Kernel {
class HandleTable;
class ReadableEvent;
-class WaitObject;
+class SynchronizationObject;
class Thread;
} // namespace Kernel
@@ -99,35 +99,37 @@ private:
const Kernel::Thread& thread;
};
-class WaitTreeWaitObject : public WaitTreeExpandableItem {
+class WaitTreeSynchronizationObject : public WaitTreeExpandableItem {
Q_OBJECT
public:
- explicit WaitTreeWaitObject(const Kernel::WaitObject& object);
- ~WaitTreeWaitObject() override;
+ explicit WaitTreeSynchronizationObject(const Kernel::SynchronizationObject& object);
+ ~WaitTreeSynchronizationObject() override;
- static std::unique_ptr<WaitTreeWaitObject> make(const Kernel::WaitObject& object);
+ static std::unique_ptr<WaitTreeSynchronizationObject> make(
+ const Kernel::SynchronizationObject& object);
QString GetText() const override;
std::vector<std::unique_ptr<WaitTreeItem>> GetChildren() const override;
protected:
- const Kernel::WaitObject& object;
+ const Kernel::SynchronizationObject& object;
};
class WaitTreeObjectList : public WaitTreeExpandableItem {
Q_OBJECT
public:
- WaitTreeObjectList(const std::vector<std::shared_ptr<Kernel::WaitObject>>& list, bool wait_all);
+ WaitTreeObjectList(const std::vector<std::shared_ptr<Kernel::SynchronizationObject>>& list,
+ bool wait_all);
~WaitTreeObjectList() override;
QString GetText() const override;
std::vector<std::unique_ptr<WaitTreeItem>> GetChildren() const override;
private:
- const std::vector<std::shared_ptr<Kernel::WaitObject>>& object_list;
+ const std::vector<std::shared_ptr<Kernel::SynchronizationObject>>& object_list;
bool wait_all;
};
-class WaitTreeThread : public WaitTreeWaitObject {
+class WaitTreeThread : public WaitTreeSynchronizationObject {
Q_OBJECT
public:
explicit WaitTreeThread(const Kernel::Thread& thread);
@@ -138,7 +140,7 @@ public:
std::vector<std::unique_ptr<WaitTreeItem>> GetChildren() const override;
};
-class WaitTreeEvent : public WaitTreeWaitObject {
+class WaitTreeEvent : public WaitTreeSynchronizationObject {
Q_OBJECT
public:
explicit WaitTreeEvent(const Kernel::ReadableEvent& object);
diff --git a/src/yuzu_cmd/config.cpp b/src/yuzu_cmd/config.cpp
index b01a36023..96f1ce3af 100644
--- a/src/yuzu_cmd/config.cpp
+++ b/src/yuzu_cmd/config.cpp
@@ -379,6 +379,8 @@ void Config::ReadValues() {
Settings::values.resolution_factor =
static_cast<float>(sdl2_config->GetReal("Renderer", "resolution_factor", 1.0));
+ Settings::values.aspect_ratio =
+ static_cast<int>(sdl2_config->GetInteger("Renderer", "aspect_ratio", 0));
Settings::values.use_frame_limit = sdl2_config->GetBoolean("Renderer", "use_frame_limit", true);
Settings::values.frame_limit =
static_cast<u16>(sdl2_config->GetInteger("Renderer", "frame_limit", 100));
diff --git a/src/yuzu_cmd/default_ini.h b/src/yuzu_cmd/default_ini.h
index 00fd88279..8a2b658cd 100644
--- a/src/yuzu_cmd/default_ini.h
+++ b/src/yuzu_cmd/default_ini.h
@@ -122,6 +122,10 @@ use_shader_jit =
# factor for the Switch resolution
resolution_factor =
+# Aspect ratio
+# 0: Default (16:9), 1: Force 4:3, 2: Force 21:9, 3: Stretch to Window
+aspect_ratio =
+
# Whether to enable V-Sync (caps the framerate at 60FPS) or not.
# 0 (default): Off, 1: On
use_vsync =
diff --git a/src/yuzu_tester/config.cpp b/src/yuzu_tester/config.cpp
index 84ab4d687..0ac93b62a 100644
--- a/src/yuzu_tester/config.cpp
+++ b/src/yuzu_tester/config.cpp
@@ -118,6 +118,8 @@ void Config::ReadValues() {
// Renderer
Settings::values.resolution_factor =
static_cast<float>(sdl2_config->GetReal("Renderer", "resolution_factor", 1.0));
+ Settings::values.aspect_ratio =
+ static_cast<int>(sdl2_config->GetInteger("Renderer", "aspect_ratio", 0));
Settings::values.use_frame_limit = false;
Settings::values.frame_limit = 100;
Settings::values.use_disk_shader_cache =
diff --git a/src/yuzu_tester/default_ini.h b/src/yuzu_tester/default_ini.h
index 9a3e86d68..8d93f7b88 100644
--- a/src/yuzu_tester/default_ini.h
+++ b/src/yuzu_tester/default_ini.h
@@ -26,6 +26,10 @@ use_shader_jit =
# factor for the Switch resolution
resolution_factor =
+# Aspect ratio
+# 0: Default (16:9), 1: Force 4:3, 2: Force 21:9, 3: Stretch to Window
+aspect_ratio =
+
# Whether to enable V-Sync (caps the framerate at 60FPS) or not.
# 0 (default): Off, 1: On
use_vsync =