From 5b5e60ffeca1a718cd980e74f0528d6ab91788cf Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Wed, 25 Sep 2019 19:43:23 -0400 Subject: GPU_Async: Correct fences, display events and more. This commit uses guest fences on vSync event instead of an articial fake fence we had. It also corrects to keep signaling display events while loading the game as the OS is suppose to send buffers to vSync during that time. --- src/video_core/gpu.cpp | 13 +++++++++++++ src/video_core/gpu.h | 3 +++ src/video_core/gpu_thread.cpp | 14 +------------- src/video_core/gpu_thread.h | 6 ------ 4 files changed, 17 insertions(+), 19 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 76cfe8107..d94be9c9d 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include "common/assert.h" +#include "common/microprofile.h" #include "core/core.h" #include "core/core_timing.h" #include "core/memory.h" @@ -17,6 +18,8 @@ namespace Tegra { +MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192)); + GPU::GPU(Core::System& system, VideoCore::RendererBase& renderer, bool is_async) : system{system}, renderer{renderer}, is_async{is_async} { auto& rasterizer{renderer.Rasterizer()}; @@ -63,6 +66,16 @@ const DmaPusher& GPU::DmaPusher() const { return *dma_pusher; } +void GPU::WaitFence(u32 syncpoint_id, u32 value) const { + // Synced GPU, is always in sync + if (!is_async) { + return; + } + MICROPROFILE_SCOPE(GPU_wait); + while (syncpoints[syncpoint_id].load() < value) { + } +} + void GPU::IncrementSyncPoint(const u32 syncpoint_id) { syncpoints[syncpoint_id]++; std::lock_guard lock{sync_mutex}; diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index 29fa8e95b..e20b0687a 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h @@ -177,6 +177,9 @@ public: /// Returns a reference to the GPU DMA pusher. Tegra::DmaPusher& DmaPusher(); + /// Allows the CPU/NvFlinger to wait on the GPU before presenting a frame. + void WaitFence(u32 syncpoint_id, u32 value) const; + void IncrementSyncPoint(u32 syncpoint_id); u32 GetSyncpointValue(u32 syncpoint_id) const; diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp index 5f039e4fd..d7048b6ae 100644 --- a/src/video_core/gpu_thread.cpp +++ b/src/video_core/gpu_thread.cpp @@ -5,8 +5,6 @@ #include "common/assert.h" #include "common/microprofile.h" #include "core/core.h" -#include "core/core_timing.h" -#include "core/core_timing_util.h" #include "core/frontend/scope_acquire_window_context.h" #include "video_core/dma_pusher.h" #include "video_core/gpu.h" @@ -68,14 +66,10 @@ ThreadManager::~ThreadManager() { void ThreadManager::StartThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher) { thread = std::thread{RunThread, std::ref(renderer), std::ref(dma_pusher), std::ref(state)}; - synchronization_event = system.CoreTiming().RegisterEvent( - "GPUThreadSynch", [this](u64 fence, s64) { state.WaitForSynchronization(fence); }); } void ThreadManager::SubmitList(Tegra::CommandList&& entries) { - const u64 fence{PushCommand(SubmitListCommand(std::move(entries)))}; - const s64 synchronization_ticks{Core::Timing::usToCycles(std::chrono::microseconds{9000})}; - system.CoreTiming().ScheduleEvent(synchronization_ticks, synchronization_event, fence); + PushCommand(SubmitListCommand(std::move(entries))); } void ThreadManager::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { @@ -102,10 +96,4 @@ u64 ThreadManager::PushCommand(CommandData&& command_data) { return fence; } -MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192)); -void SynchState::WaitForSynchronization(u64 fence) { - while (signaled_fence.load() < fence) - ; -} - } // namespace VideoCommon::GPUThread diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h index 3ae0ec9f3..108f456bd 100644 --- a/src/video_core/gpu_thread.h +++ b/src/video_core/gpu_thread.h @@ -21,9 +21,6 @@ class DmaPusher; namespace Core { class System; -namespace Timing { -struct EventType; -} // namespace Timing } // namespace Core namespace VideoCommon::GPUThread { @@ -89,8 +86,6 @@ struct CommandDataContainer { struct SynchState final { std::atomic_bool is_running{true}; - void WaitForSynchronization(u64 fence); - using CommandQueue = Common::SPSCQueue; CommandQueue queue; u64 last_fence{}; @@ -128,7 +123,6 @@ private: private: SynchState state; Core::System& system; - Core::Timing::EventType* synchronization_event{}; std::thread thread; std::thread::id thread_id; }; -- cgit v1.2.3 From ffc2ce89a03d8160c408922cd72a1f45e333c0fe Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Thu, 26 Sep 2019 10:12:05 -0400 Subject: Nvdrv: Do framelimiting only in the CPU Thread --- src/video_core/renderer_opengl/renderer_opengl.cpp | 3 --- 1 file changed, 3 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index 1e6ef66ab..7f6ff0857 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -135,9 +135,6 @@ void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { render_window.PollEvents(); - system.FrameLimiter().DoFrameLimiting(system.CoreTiming().GetGlobalTimeUs()); - system.GetPerfStats().BeginSystemFrame(); - // Restore the rasterizer state prev_state.AllDirty(); prev_state.Apply(); -- cgit v1.2.3 From 3f104464dec13f9ba90eaca5dafca87ee4116a60 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Thu, 26 Sep 2019 19:08:22 -0400 Subject: Core: Wait for GPU to be idle before shutting down. --- src/video_core/gpu.h | 3 +++ src/video_core/gpu_asynch.cpp | 4 ++++ src/video_core/gpu_asynch.h | 1 + src/video_core/gpu_synch.h | 1 + src/video_core/gpu_thread.cpp | 5 +++++ src/video_core/gpu_thread.h | 3 +++ 6 files changed, 17 insertions(+) (limited to 'src/video_core') diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index e20b0687a..dbca19f35 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h @@ -177,6 +177,9 @@ public: /// Returns a reference to the GPU DMA pusher. Tegra::DmaPusher& DmaPusher(); + // Waits for the GPU to finish working + virtual void WaitIdle() const = 0; + /// Allows the CPU/NvFlinger to wait on the GPU before presenting a frame. void WaitFence(u32 syncpoint_id, u32 value) const; diff --git a/src/video_core/gpu_asynch.cpp b/src/video_core/gpu_asynch.cpp index f2a3a390e..04222d060 100644 --- a/src/video_core/gpu_asynch.cpp +++ b/src/video_core/gpu_asynch.cpp @@ -44,4 +44,8 @@ void GPUAsynch::TriggerCpuInterrupt(const u32 syncpoint_id, const u32 value) con interrupt_manager.GPUInterruptSyncpt(syncpoint_id, value); } +void GPUAsynch::WaitIdle() const { + gpu_thread.WaitIdle(); +} + } // namespace VideoCommon diff --git a/src/video_core/gpu_asynch.h b/src/video_core/gpu_asynch.h index a12f9bac4..1241ade1d 100644 --- a/src/video_core/gpu_asynch.h +++ b/src/video_core/gpu_asynch.h @@ -25,6 +25,7 @@ public: void FlushRegion(CacheAddr addr, u64 size) override; void InvalidateRegion(CacheAddr addr, u64 size) override; void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override; + void WaitIdle() const override; protected: void TriggerCpuInterrupt(u32 syncpoint_id, u32 value) const override; diff --git a/src/video_core/gpu_synch.h b/src/video_core/gpu_synch.h index 5eb1c461c..c71baee89 100644 --- a/src/video_core/gpu_synch.h +++ b/src/video_core/gpu_synch.h @@ -24,6 +24,7 @@ public: void FlushRegion(CacheAddr addr, u64 size) override; void InvalidateRegion(CacheAddr addr, u64 size) override; void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override; + void WaitIdle() const override {} protected: void TriggerCpuInterrupt([[maybe_unused]] u32 syncpoint_id, diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp index d7048b6ae..4a42634d2 100644 --- a/src/video_core/gpu_thread.cpp +++ b/src/video_core/gpu_thread.cpp @@ -90,6 +90,11 @@ void ThreadManager::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { InvalidateRegion(addr, size); } +void ThreadManager::WaitIdle() const { + while (state.last_fence > state.signaled_fence.load()) { + } +} + u64 ThreadManager::PushCommand(CommandData&& command_data) { const u64 fence{++state.last_fence}; state.queue.Push(CommandDataContainer(std::move(command_data), fence)); diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h index 108f456bd..08dc96bb3 100644 --- a/src/video_core/gpu_thread.h +++ b/src/video_core/gpu_thread.h @@ -116,6 +116,9 @@ public: /// Notify rasterizer that any caches of the specified region should be flushed and invalidated void FlushAndInvalidateRegion(CacheAddr addr, u64 size); + // Wait until the gpu thread is idle. + void WaitIdle() const; + private: /// Pushes a command to be executed by the GPU thread u64 PushCommand(CommandData&& command_data); -- cgit v1.2.3 From 9f2719d1a43e04cc2f5296d0f9719aab9626f730 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Fri, 27 Sep 2019 09:39:42 -0400 Subject: Gl_Rasterizer: Protect CPU Memory mapping from multiple threads. --- src/video_core/renderer_opengl/gl_rasterizer.cpp | 1 + src/video_core/renderer_opengl/gl_rasterizer.h | 3 +++ 2 files changed, 4 insertions(+) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 6a17bed72..deb3e10a5 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -348,6 +348,7 @@ static constexpr auto RangeFromInterval(Map& map, const Interval& interval) { } void RasterizerOpenGL::UpdatePagesCachedCount(VAddr addr, u64 size, int delta) { + std::lock_guard lock{pages_mutex}; const u64 page_start{addr >> Memory::PAGE_BITS}; const u64 page_end{(addr + size + Memory::PAGE_SIZE - 1) >> Memory::PAGE_BITS}; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 9c10ebda3..c24a02d71 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -230,6 +231,8 @@ private: using CachedPageMap = boost::icl::interval_map; CachedPageMap cached_pages; + + std::mutex pages_mutex; }; } // namespace OpenGL -- cgit v1.2.3 From 538f5880fff5e29fd5539eea371c3131013908e4 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sun, 29 Sep 2019 10:12:28 -0400 Subject: GL_Renderer: Remove lefting snippet. --- src/video_core/renderer_opengl/renderer_opengl.cpp | 2 -- 1 file changed, 2 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index 7f6ff0857..4bbd17b12 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -102,8 +102,6 @@ RendererOpenGL::RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::Syst RendererOpenGL::~RendererOpenGL() = default; void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { - system.GetPerfStats().EndSystemFrame(); - // Maintain the rasterizer's state as a priority OpenGLState prev_state = OpenGLState::GetCurState(); state.AllDirty(); -- cgit v1.2.3 From cfc2f30dc409cbbb36c19c74a98f3017e6d722f2 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Fri, 11 Oct 2019 13:41:15 -0400 Subject: AsyncGpu: Address Feedback --- src/video_core/gpu.cpp | 2 +- src/video_core/gpu_thread.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index d94be9c9d..da7359d4d 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -72,7 +72,7 @@ void GPU::WaitFence(u32 syncpoint_id, u32 value) const { return; } MICROPROFILE_SCOPE(GPU_wait); - while (syncpoints[syncpoint_id].load() < value) { + while (syncpoints[syncpoint_id].load(std::memory_order_relaxed) < value) { } } diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp index 4a42634d2..758a37f14 100644 --- a/src/video_core/gpu_thread.cpp +++ b/src/video_core/gpu_thread.cpp @@ -91,7 +91,7 @@ void ThreadManager::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { } void ThreadManager::WaitIdle() const { - while (state.last_fence > state.signaled_fence.load()) { + while (state.last_fence > state.signaled_fence.load(std::memory_order_relaxed)) { } } -- cgit v1.2.3