diff options
Diffstat (limited to '')
83 files changed, 1675 insertions, 570 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index dc782e252..467d769a2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -151,15 +151,22 @@ if (ENABLE_SDL2) set(SDL2_INCLUDE_DIR "${SDL2_PREFIX}/include" CACHE PATH "Path to SDL2 headers") set(SDL2_LIBRARY "${SDL2_PREFIX}/lib/x64/SDL2.lib" CACHE PATH "Path to SDL2 library") set(SDL2_DLL_DIR "${SDL2_PREFIX}/lib/x64/" CACHE PATH "Path to SDL2.dll") - else() - find_package(SDL2 REQUIRED) - endif() - if (SDL2_FOUND) - # TODO(yuriks): Make FindSDL2.cmake export an IMPORTED library instead add_library(SDL2 INTERFACE) target_link_libraries(SDL2 INTERFACE "${SDL2_LIBRARY}") target_include_directories(SDL2 INTERFACE "${SDL2_INCLUDE_DIR}") + else() + find_package(SDL2 REQUIRED) + + # Some installations don't set SDL2_LIBRARIES + if("${SDL2_LIBRARIES}" STREQUAL "") + message(WARNING "SDL2_LIBRARIES wasn't set, manually setting to SDL2::SDL2") + set(SDL2_LIBRARIES "SDL2::SDL2") + endif() + + include_directories(${SDL2_INCLUDE_DIRS}) + add_library(SDL2 INTERFACE) + target_link_libraries(SDL2 INTERFACE "${SDL2_LIBRARIES}") endif() else() set(SDL2_FOUND NO) diff --git a/externals/cmake-modules/FindSDL2.cmake b/externals/cmake-modules/FindSDL2.cmake deleted file mode 100644 index 22ce752c5..000000000 --- a/externals/cmake-modules/FindSDL2.cmake +++ /dev/null @@ -1,239 +0,0 @@ - -# This module defines -# SDL2_LIBRARY, the name of the library to link against -# SDL2_FOUND, if false, do not try to link to SDL2 -# SDL2_INCLUDE_DIR, where to find SDL.h -# SDL2_DLL_DIR, where to find SDL2.dll if it exists -# -# This module responds to the the flag: -# SDL2_BUILDING_LIBRARY -# If this is defined, then no SDL2main will be linked in because -# only applications need main(). -# Otherwise, it is assumed you are building an application and this -# module will attempt to locate and set the the proper link flags -# as part of the returned SDL2_LIBRARY variable. -# -# Don't forget to include SDLmain.h and SDLmain.m your project for the -# OS X framework based version. (Other versions link to -lSDL2main which -# this module will try to find on your behalf.) Also for OS X, this -# module will automatically add the -framework Cocoa on your behalf. -# -# -# Additional Note: If you see an empty SDL2_LIBRARY_TEMP in your configuration -# and no SDL2_LIBRARY, it means CMake did not find your SDL2 library -# (SDL2.dll, libsdl2.so, SDL2.framework, etc). -# Set SDL2_LIBRARY_TEMP to point to your SDL2 library, and configure again. -# Similarly, if you see an empty SDL2MAIN_LIBRARY, you should set this value -# as appropriate. These values are used to generate the final SDL2_LIBRARY -# variable, but when these values are unset, SDL2_LIBRARY does not get created. -# -# -# $SDL2DIR is an environment variable that would -# correspond to the ./configure --prefix=$SDL2DIR -# used in building SDL2. -# l.e.galup 9-20-02 -# -# Modified by Eric Wing. -# Added code to assist with automated building by using environmental variables -# and providing a more controlled/consistent search behavior. -# Added new modifications to recognize OS X frameworks and -# additional Unix paths (FreeBSD, etc). -# Also corrected the header search path to follow "proper" SDL guidelines. -# Added a search for SDL2main which is needed by some platforms. -# Added a search for threads which is needed by some platforms. -# Added needed compile switches for MinGW. -# -# On OSX, this will prefer the Framework version (if found) over others. -# People will have to manually change the cache values of -# SDL2_LIBRARY to override this selection or set the CMake environment -# CMAKE_INCLUDE_PATH to modify the search paths. -# -# Note that the header path has changed from SDL2/SDL.h to just SDL.h -# This needed to change because "proper" SDL convention -# is #include "SDL.h", not <SDL2/SDL.h>. This is done for portability -# reasons because not all systems place things in SDL2/ (see FreeBSD). - -#============================================================================= -# Copyright 2003-2009 Kitware, Inc. -# -# Distributed under the OSI-approved BSD License (the "License"). -# -# This software is distributed WITHOUT ANY WARRANTY; without even the -# implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. -# See the License for more information. -#============================================================================= -# CMake - Cross Platform Makefile Generator -# Copyright 2000-2016 Kitware, Inc. -# Copyright 2000-2011 Insight Software Consortium -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions -# are met: -# -# * Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# * Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# -# * Neither the names of Kitware, Inc., the Insight Software Consortium, -# nor the names of their contributors may be used to endorse or promote -# products derived from this software without specific prior written -# permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# -# ------------------------------------------------------------------------------ -# -# The above copyright and license notice applies to distributions of -# CMake in source and binary form. Some source files contain additional -# notices of original copyright by their contributors; see each source -# for details. Third-party software packages supplied with CMake under -# compatible licenses provide their own copyright notices documented in -# corresponding subdirectories. -# -# ------------------------------------------------------------------------------ -# -# CMake was initially developed by Kitware with the following sponsorship: -# -# * National Library of Medicine at the National Institutes of Health -# as part of the Insight Segmentation and Registration Toolkit (ITK). -# -# * US National Labs (Los Alamos, Livermore, Sandia) ASC Parallel -# Visualization Initiative. -# -# * National Alliance for Medical Image Computing (NAMIC) is funded by the -# National Institutes of Health through the NIH Roadmap for Medical Research, -# Grant U54 EB005149. -# -# * Kitware, Inc. -# - -message("<FindSDL2.cmake>") - -SET(SDL2_SEARCH_PATHS - ~/Library/Frameworks - /Library/Frameworks - /usr/local - /usr - /sw # Fink - /opt/local # DarwinPorts - /opt/csw # Blastwave - /opt - ${SDL2_PATH} -) - -if(CMAKE_SIZEOF_VOID_P EQUAL 8) - set(VC_LIB_PATH_SUFFIX lib/x64) -else() - set(VC_LIB_PATH_SUFFIX lib/x86) -endif() - -FIND_LIBRARY(SDL2_LIBRARY_TEMP - NAMES SDL2 - HINTS - $ENV{SDL2DIR} - PATH_SUFFIXES lib64 lib ${VC_LIB_PATH_SUFFIX} - PATHS ${SDL2_SEARCH_PATHS} -) - -IF(SDL2_LIBRARY_TEMP) - if(MSVC) - get_filename_component(SDL2_DLL_DIR_TEMP ${SDL2_LIBRARY_TEMP} DIRECTORY) - if(EXISTS ${SDL2_DLL_DIR_TEMP}/SDL2.dll) - set(SDL2_DLL_DIR ${SDL2_DLL_DIR_TEMP}) - unset(SDL2_DLL_DIR_TEMP) - endif() - endif() - - FIND_PATH(SDL2_INCLUDE_DIR SDL.h - HINTS - $ENV{SDL2DIR} - PATH_SUFFIXES include/SDL2 include - PATHS ${SDL2_SEARCH_PATHS} - ) - - IF(NOT SDL2_BUILDING_LIBRARY) - IF(NOT ${SDL2_INCLUDE_DIR} MATCHES ".framework") - # Non-OS X framework versions expect you to also dynamically link to - # SDL2main. This is mainly for Windows and OS X. Other (Unix) platforms - # seem to provide SDL2main for compatibility even though they don't - # necessarily need it. - FIND_LIBRARY(SDL2MAIN_LIBRARY - NAMES SDL2main - HINTS - $ENV{SDL2DIR} - PATH_SUFFIXES lib64 lib - PATHS ${SDL2_SEARCH_PATHS} - ) - ENDIF(NOT ${SDL2_INCLUDE_DIR} MATCHES ".framework") - ENDIF(NOT SDL2_BUILDING_LIBRARY) - - # SDL2 may require threads on your system. - # The Apple build may not need an explicit flag because one of the - # frameworks may already provide it. - # But for non-OSX systems, I will use the CMake Threads package. - IF(NOT APPLE) - FIND_PACKAGE(Threads) - ENDIF(NOT APPLE) - - # MinGW needs an additional library, mwindows - # It's total link flags should look like -lmingw32 -lSDL2main -lSDL2 -lmwindows - # (Actually on second look, I think it only needs one of the m* libraries.) - IF(MINGW) - SET(MINGW32_LIBRARY mingw32 CACHE STRING "mwindows for MinGW") - ENDIF(MINGW) - - # For SDL2main - IF(NOT SDL2_BUILDING_LIBRARY) - IF(SDL2MAIN_LIBRARY) - SET(SDL2_LIBRARY_TEMP ${SDL2MAIN_LIBRARY} ${SDL2_LIBRARY_TEMP}) - ENDIF(SDL2MAIN_LIBRARY) - ENDIF(NOT SDL2_BUILDING_LIBRARY) - - # For OS X, SDL2 uses Cocoa as a backend so it must link to Cocoa. - # CMake doesn't display the -framework Cocoa string in the UI even - # though it actually is there if I modify a pre-used variable. - # I think it has something to do with the CACHE STRING. - # So I use a temporary variable until the end so I can set the - # "real" variable in one-shot. - IF(APPLE) - SET(SDL2_LIBRARY_TEMP ${SDL2_LIBRARY_TEMP} "-framework Cocoa") - ENDIF(APPLE) - - # For threads, as mentioned Apple doesn't need this. - # In fact, there seems to be a problem if I used the Threads package - # and try using this line, so I'm just skipping it entirely for OS X. - IF(NOT APPLE) - SET(SDL2_LIBRARY_TEMP ${SDL2_LIBRARY_TEMP} ${CMAKE_THREAD_LIBS_INIT}) - ENDIF(NOT APPLE) - - # For MinGW library - IF(MINGW) - SET(SDL2_LIBRARY_TEMP ${MINGW32_LIBRARY} ${SDL2_LIBRARY_TEMP}) - ENDIF(MINGW) - - # Set the final string here so the GUI reflects the final state. - SET(SDL2_LIBRARY ${SDL2_LIBRARY_TEMP} CACHE STRING "Where the SDL2 Library can be found") - - # Unset the temp variable to INTERNAL so it is not seen in the CMake GUI - UNSET(SDL2_LIBRARY_TEMP) -ENDIF(SDL2_LIBRARY_TEMP) - -message("</FindSDL2.cmake>") - -INCLUDE(FindPackageHandleStandardArgs) - -FIND_PACKAGE_HANDLE_STANDARD_ARGS(SDL2 REQUIRED_VARS SDL2_LIBRARY SDL2_INCLUDE_DIR) diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index d342cafe0..26612e692 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -181,14 +181,16 @@ add_library(core STATIC hle/kernel/svc.cpp hle/kernel/svc.h hle/kernel/svc_wrap.h + hle/kernel/synchronization_object.cpp + hle/kernel/synchronization_object.h + hle/kernel/synchronization.cpp + hle/kernel/synchronization.h hle/kernel/thread.cpp hle/kernel/thread.h hle/kernel/transfer_memory.cpp hle/kernel/transfer_memory.h hle/kernel/vm_manager.cpp hle/kernel/vm_manager.h - hle/kernel/wait_object.cpp - hle/kernel/wait_object.h hle/kernel/writable_event.cpp hle/kernel/writable_event.h hle/lock.cpp diff --git a/src/core/arm/dynarmic/arm_dynarmic.cpp b/src/core/arm/dynarmic/arm_dynarmic.cpp index 791640a3a..29eaf74e5 100644 --- a/src/core/arm/dynarmic/arm_dynarmic.cpp +++ b/src/core/arm/dynarmic/arm_dynarmic.cpp @@ -14,6 +14,7 @@ #include "core/core_timing.h" #include "core/core_timing_util.h" #include "core/gdbstub/gdbstub.h" +#include "core/hardware_properties.h" #include "core/hle/kernel/process.h" #include "core/hle/kernel/scheduler.h" #include "core/hle/kernel/svc.h" @@ -153,7 +154,7 @@ std::unique_ptr<Dynarmic::A64::Jit> ARM_Dynarmic::MakeJit(Common::PageTable& pag config.tpidr_el0 = &cb->tpidr_el0; config.dczid_el0 = 4; config.ctr_el0 = 0x8444c004; - config.cntfrq_el0 = Timing::CNTFREQ; + config.cntfrq_el0 = Hardware::CNTFREQ; // Unpredictable instructions config.define_unpredictable_behaviour = true; diff --git a/src/core/core_timing.cpp b/src/core/core_timing.cpp index aa09fa453..46d4178c4 100644 --- a/src/core/core_timing.cpp +++ b/src/core/core_timing.cpp @@ -12,6 +12,7 @@ #include "common/assert.h" #include "common/thread.h" #include "core/core_timing_util.h" +#include "core/hardware_properties.h" namespace Core::Timing { @@ -215,7 +216,7 @@ void CoreTiming::Idle() { } std::chrono::microseconds CoreTiming::GetGlobalTimeUs() const { - return std::chrono::microseconds{GetTicks() * 1000000 / BASE_CLOCK_RATE}; + return std::chrono::microseconds{GetTicks() * 1000000 / Hardware::BASE_CLOCK_RATE}; } s64 CoreTiming::GetDowncount() const { diff --git a/src/core/core_timing_util.cpp b/src/core/core_timing_util.cpp index a10472a95..de50d3b14 100644 --- a/src/core/core_timing_util.cpp +++ b/src/core/core_timing_util.cpp @@ -11,7 +11,7 @@ namespace Core::Timing { -constexpr u64 MAX_VALUE_TO_MULTIPLY = std::numeric_limits<s64>::max() / BASE_CLOCK_RATE; +constexpr u64 MAX_VALUE_TO_MULTIPLY = std::numeric_limits<s64>::max() / Hardware::BASE_CLOCK_RATE; s64 msToCycles(std::chrono::milliseconds ms) { if (static_cast<u64>(ms.count() / 1000) > MAX_VALUE_TO_MULTIPLY) { @@ -20,9 +20,9 @@ s64 msToCycles(std::chrono::milliseconds ms) { } if (static_cast<u64>(ms.count()) > MAX_VALUE_TO_MULTIPLY) { LOG_DEBUG(Core_Timing, "Time very big, do rounding"); - return BASE_CLOCK_RATE * (ms.count() / 1000); + return Hardware::BASE_CLOCK_RATE * (ms.count() / 1000); } - return (BASE_CLOCK_RATE * ms.count()) / 1000; + return (Hardware::BASE_CLOCK_RATE * ms.count()) / 1000; } s64 usToCycles(std::chrono::microseconds us) { @@ -32,9 +32,9 @@ s64 usToCycles(std::chrono::microseconds us) { } if (static_cast<u64>(us.count()) > MAX_VALUE_TO_MULTIPLY) { LOG_DEBUG(Core_Timing, "Time very big, do rounding"); - return BASE_CLOCK_RATE * (us.count() / 1000000); + return Hardware::BASE_CLOCK_RATE * (us.count() / 1000000); } - return (BASE_CLOCK_RATE * us.count()) / 1000000; + return (Hardware::BASE_CLOCK_RATE * us.count()) / 1000000; } s64 nsToCycles(std::chrono::nanoseconds ns) { @@ -44,14 +44,14 @@ s64 nsToCycles(std::chrono::nanoseconds ns) { } if (static_cast<u64>(ns.count()) > MAX_VALUE_TO_MULTIPLY) { LOG_DEBUG(Core_Timing, "Time very big, do rounding"); - return BASE_CLOCK_RATE * (ns.count() / 1000000000); + return Hardware::BASE_CLOCK_RATE * (ns.count() / 1000000000); } - return (BASE_CLOCK_RATE * ns.count()) / 1000000000; + return (Hardware::BASE_CLOCK_RATE * ns.count()) / 1000000000; } u64 CpuCyclesToClockCycles(u64 ticks) { - const u128 temporal = Common::Multiply64Into128(ticks, CNTFREQ); - return Common::Divide128On32(temporal, static_cast<u32>(BASE_CLOCK_RATE)).first; + const u128 temporal = Common::Multiply64Into128(ticks, Hardware::CNTFREQ); + return Common::Divide128On32(temporal, static_cast<u32>(Hardware::BASE_CLOCK_RATE)).first; } } // namespace Core::Timing diff --git a/src/core/core_timing_util.h b/src/core/core_timing_util.h index cdd84d70f..addc72b19 100644 --- a/src/core/core_timing_util.h +++ b/src/core/core_timing_util.h @@ -6,28 +6,24 @@ #include <chrono> #include "common/common_types.h" +#include "core/hardware_properties.h" namespace Core::Timing { -// The below clock rate is based on Switch's clockspeed being widely known as 1.020GHz -// The exact value used is of course unverified. -constexpr u64 BASE_CLOCK_RATE = 1019215872; // Switch clock speed is 1020MHz un/docked -constexpr u64 CNTFREQ = 19200000; // Value from fusee. - s64 msToCycles(std::chrono::milliseconds ms); s64 usToCycles(std::chrono::microseconds us); s64 nsToCycles(std::chrono::nanoseconds ns); inline std::chrono::milliseconds CyclesToMs(s64 cycles) { - return std::chrono::milliseconds(cycles * 1000 / BASE_CLOCK_RATE); + return std::chrono::milliseconds(cycles * 1000 / Hardware::BASE_CLOCK_RATE); } inline std::chrono::nanoseconds CyclesToNs(s64 cycles) { - return std::chrono::nanoseconds(cycles * 1000000000 / BASE_CLOCK_RATE); + return std::chrono::nanoseconds(cycles * 1000000000 / Hardware::BASE_CLOCK_RATE); } inline std::chrono::microseconds CyclesToUs(s64 cycles) { - return std::chrono::microseconds(cycles * 1000000 / BASE_CLOCK_RATE); + return std::chrono::microseconds(cycles * 1000000 / Hardware::BASE_CLOCK_RATE); } u64 CpuCyclesToClockCycles(u64 ticks); diff --git a/src/core/cpu_manager.h b/src/core/cpu_manager.h index feb619e1b..97554d1bb 100644 --- a/src/core/cpu_manager.h +++ b/src/core/cpu_manager.h @@ -6,6 +6,7 @@ #include <array> #include <memory> +#include "core/hardware_properties.h" namespace Core { @@ -39,9 +40,7 @@ public: void RunLoop(bool tight_loop); private: - static constexpr std::size_t NUM_CPU_CORES = 4; - - std::array<std::unique_ptr<CoreManager>, NUM_CPU_CORES> core_managers; + std::array<std::unique_ptr<CoreManager>, Hardware::NUM_CPU_CORES> core_managers; std::size_t active_core{}; ///< Active core, only used in single thread mode System& system; diff --git a/src/core/frontend/framebuffer_layout.cpp b/src/core/frontend/framebuffer_layout.cpp index d6d2cf3f0..2dc795d56 100644 --- a/src/core/frontend/framebuffer_layout.cpp +++ b/src/core/frontend/framebuffer_layout.cpp @@ -27,9 +27,9 @@ FramebufferLayout DefaultFrameLayout(u32 width, u32 height) { // so just calculate them both even if the other isn't showing. FramebufferLayout res{width, height}; - const float emulation_aspect_ratio{static_cast<float>(ScreenUndocked::Height) / - ScreenUndocked::Width}; - const auto window_aspect_ratio = static_cast<float>(height) / width; + const float window_aspect_ratio = static_cast<float>(height) / width; + const float emulation_aspect_ratio = EmulationAspectRatio( + static_cast<AspectRatio>(Settings::values.aspect_ratio), window_aspect_ratio); const Common::Rectangle<u32> screen_window_area{0, 0, width, height}; Common::Rectangle<u32> screen = MaxRectangle(screen_window_area, emulation_aspect_ratio); @@ -58,4 +58,19 @@ FramebufferLayout FrameLayoutFromResolutionScale(u32 res_scale) { return DefaultFrameLayout(width, height); } +float EmulationAspectRatio(AspectRatio aspect, float window_aspect_ratio) { + switch (aspect) { + case AspectRatio::Default: + return static_cast<float>(ScreenUndocked::Height) / ScreenUndocked::Width; + case AspectRatio::R4_3: + return 3.0f / 4.0f; + case AspectRatio::R21_9: + return 9.0f / 21.0f; + case AspectRatio::StretchToWindow: + return window_aspect_ratio; + default: + return static_cast<float>(ScreenUndocked::Height) / ScreenUndocked::Width; + } +} + } // namespace Layout diff --git a/src/core/frontend/framebuffer_layout.h b/src/core/frontend/framebuffer_layout.h index d2370adde..1d39c1faf 100644 --- a/src/core/frontend/framebuffer_layout.h +++ b/src/core/frontend/framebuffer_layout.h @@ -18,6 +18,13 @@ enum ScreenDocked : u32 { HeightDocked = 1080, }; +enum class AspectRatio { + Default, + R4_3, + R21_9, + StretchToWindow, +}; + /// Describes the layout of the window framebuffer struct FramebufferLayout { u32 width{ScreenUndocked::Width}; @@ -48,4 +55,12 @@ FramebufferLayout DefaultFrameLayout(u32 width, u32 height); */ FramebufferLayout FrameLayoutFromResolutionScale(u32 res_scale); +/** + * Convenience method to determine emulation aspect ratio + * @param aspect Represents the index of aspect ratio stored in Settings::values.aspect_ratio + * @param window_aspect_ratio Current window aspect ratio + * @return Emulation render window aspect ratio + */ +float EmulationAspectRatio(AspectRatio aspect, float window_aspect_ratio); + } // namespace Layout diff --git a/src/core/hardware_properties.h b/src/core/hardware_properties.h new file mode 100644 index 000000000..213461b6a --- /dev/null +++ b/src/core/hardware_properties.h @@ -0,0 +1,45 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <tuple> + +#include "common/common_types.h" + +namespace Core { + +namespace Hardware { + +// The below clock rate is based on Switch's clockspeed being widely known as 1.020GHz +// The exact value used is of course unverified. +constexpr u64 BASE_CLOCK_RATE = 1019215872; // Switch cpu frequency is 1020MHz un/docked +constexpr u64 CNTFREQ = 19200000; // Switch's hardware clock speed +constexpr u32 NUM_CPU_CORES = 4; // Number of CPU Cores + +} // namespace Hardware + +struct EmuThreadHandle { + u32 host_handle; + u32 guest_handle; + + u64 GetRaw() const { + return (static_cast<u64>(host_handle) << 32) | guest_handle; + } + + bool operator==(const EmuThreadHandle& rhs) const { + return std::tie(host_handle, guest_handle) == std::tie(rhs.host_handle, rhs.guest_handle); + } + + bool operator!=(const EmuThreadHandle& rhs) const { + return !operator==(rhs); + } + + static constexpr EmuThreadHandle InvalidHandle() { + constexpr u32 invalid_handle = 0xFFFFFFFF; + return {invalid_handle, invalid_handle}; + } +}; + +} // namespace Core diff --git a/src/core/hle/kernel/client_session.cpp b/src/core/hle/kernel/client_session.cpp index 4669a14ad..6d66276bc 100644 --- a/src/core/hle/kernel/client_session.cpp +++ b/src/core/hle/kernel/client_session.cpp @@ -12,7 +12,7 @@ namespace Kernel { -ClientSession::ClientSession(KernelCore& kernel) : WaitObject{kernel} {} +ClientSession::ClientSession(KernelCore& kernel) : SynchronizationObject{kernel} {} ClientSession::~ClientSession() { // This destructor will be called automatically when the last ClientSession handle is closed by @@ -31,6 +31,11 @@ void ClientSession::Acquire(Thread* thread) { UNIMPLEMENTED(); } +bool ClientSession::IsSignaled() const { + UNIMPLEMENTED(); + return true; +} + ResultVal<std::shared_ptr<ClientSession>> ClientSession::Create(KernelCore& kernel, std::shared_ptr<Session> parent, std::string name) { diff --git a/src/core/hle/kernel/client_session.h b/src/core/hle/kernel/client_session.h index b4289a9a8..d15b09554 100644 --- a/src/core/hle/kernel/client_session.h +++ b/src/core/hle/kernel/client_session.h @@ -7,7 +7,7 @@ #include <memory> #include <string> -#include "core/hle/kernel/wait_object.h" +#include "core/hle/kernel/synchronization_object.h" #include "core/hle/result.h" union ResultCode; @@ -22,7 +22,7 @@ class KernelCore; class Session; class Thread; -class ClientSession final : public WaitObject { +class ClientSession final : public SynchronizationObject { public: explicit ClientSession(KernelCore& kernel); ~ClientSession() override; @@ -48,6 +48,8 @@ public: void Acquire(Thread* thread) override; + bool IsSignaled() const override; + private: static ResultVal<std::shared_ptr<ClientSession>> Create(KernelCore& kernel, std::shared_ptr<Session> parent, diff --git a/src/core/hle/kernel/hle_ipc.cpp b/src/core/hle/kernel/hle_ipc.cpp index ab05788d7..c558a2f33 100644 --- a/src/core/hle/kernel/hle_ipc.cpp +++ b/src/core/hle/kernel/hle_ipc.cpp @@ -47,15 +47,15 @@ std::shared_ptr<WritableEvent> HLERequestContext::SleepClientThread( const std::string& reason, u64 timeout, WakeupCallback&& callback, std::shared_ptr<WritableEvent> writable_event) { // Put the client thread to sleep until the wait event is signaled or the timeout expires. - thread->SetWakeupCallback([context = *this, callback](ThreadWakeupReason reason, - std::shared_ptr<Thread> thread, - std::shared_ptr<WaitObject> object, - std::size_t index) mutable -> bool { - ASSERT(thread->GetStatus() == ThreadStatus::WaitHLEEvent); - callback(thread, context, reason); - context.WriteToOutgoingCommandBuffer(*thread); - return true; - }); + thread->SetWakeupCallback( + [context = *this, callback](ThreadWakeupReason reason, std::shared_ptr<Thread> thread, + std::shared_ptr<SynchronizationObject> object, + std::size_t index) mutable -> bool { + ASSERT(thread->GetStatus() == ThreadStatus::WaitHLEEvent); + callback(thread, context, reason); + context.WriteToOutgoingCommandBuffer(*thread); + return true; + }); auto& kernel = Core::System::GetInstance().Kernel(); if (!writable_event) { @@ -67,7 +67,7 @@ std::shared_ptr<WritableEvent> HLERequestContext::SleepClientThread( const auto readable_event{writable_event->GetReadableEvent()}; writable_event->Clear(); thread->SetStatus(ThreadStatus::WaitHLEEvent); - thread->SetWaitObjects({readable_event}); + thread->SetSynchronizationObjects({readable_event}); readable_event->AddWaitingThread(thread); if (timeout > 0) { diff --git a/src/core/hle/kernel/kernel.cpp b/src/core/hle/kernel/kernel.cpp index edd4c4259..4eb1d8703 100644 --- a/src/core/hle/kernel/kernel.cpp +++ b/src/core/hle/kernel/kernel.cpp @@ -23,6 +23,7 @@ #include "core/hle/kernel/process.h" #include "core/hle/kernel/resource_limit.h" #include "core/hle/kernel/scheduler.h" +#include "core/hle/kernel/synchronization.h" #include "core/hle/kernel/thread.h" #include "core/hle/lock.h" #include "core/hle/result.h" @@ -54,10 +55,10 @@ static void ThreadWakeupCallback(u64 thread_handle, [[maybe_unused]] s64 cycles_ if (thread->GetStatus() == ThreadStatus::WaitSynch || thread->GetStatus() == ThreadStatus::WaitHLEEvent) { // Remove the thread from each of its waiting objects' waitlists - for (const auto& object : thread->GetWaitObjects()) { + for (const auto& object : thread->GetSynchronizationObjects()) { object->RemoveWaitingThread(thread); } - thread->ClearWaitObjects(); + thread->ClearSynchronizationObjects(); // Invoke the wakeup callback before clearing the wait objects if (thread->HasWakeupCallback()) { @@ -96,7 +97,8 @@ static void ThreadWakeupCallback(u64 thread_handle, [[maybe_unused]] s64 cycles_ } struct KernelCore::Impl { - explicit Impl(Core::System& system) : system{system}, global_scheduler{system} {} + explicit Impl(Core::System& system) + : system{system}, global_scheduler{system}, synchronization{system} {} void Initialize(KernelCore& kernel) { Shutdown(); @@ -191,6 +193,7 @@ struct KernelCore::Impl { std::vector<std::shared_ptr<Process>> process_list; Process* current_process = nullptr; Kernel::GlobalScheduler global_scheduler; + Kernel::Synchronization synchronization; std::shared_ptr<ResourceLimit> system_resource_limit; @@ -270,6 +273,14 @@ const Kernel::PhysicalCore& KernelCore::PhysicalCore(std::size_t id) const { return impl->cores[id]; } +Kernel::Synchronization& KernelCore::Synchronization() { + return impl->synchronization; +} + +const Kernel::Synchronization& KernelCore::Synchronization() const { + return impl->synchronization; +} + Core::ExclusiveMonitor& KernelCore::GetExclusiveMonitor() { return *impl->exclusive_monitor; } diff --git a/src/core/hle/kernel/kernel.h b/src/core/hle/kernel/kernel.h index fccffaf3a..1eede3063 100644 --- a/src/core/hle/kernel/kernel.h +++ b/src/core/hle/kernel/kernel.h @@ -29,6 +29,7 @@ class HandleTable; class PhysicalCore; class Process; class ResourceLimit; +class Synchronization; class Thread; /// Represents a single instance of the kernel. @@ -92,6 +93,12 @@ public: /// Gets the an instance of the respective physical CPU core. const Kernel::PhysicalCore& PhysicalCore(std::size_t id) const; + /// Gets the an instance of the Synchronization Interface. + Kernel::Synchronization& Synchronization(); + + /// Gets the an instance of the Synchronization Interface. + const Kernel::Synchronization& Synchronization() const; + /// Stops execution of 'id' core, in order to reschedule a new thread. void PrepareReschedule(std::size_t id); diff --git a/src/core/hle/kernel/process.cpp b/src/core/hle/kernel/process.cpp index b9035a0be..2fcb7326c 100644 --- a/src/core/hle/kernel/process.cpp +++ b/src/core/hle/kernel/process.cpp @@ -337,7 +337,7 @@ void Process::LoadModule(CodeSet module_, VAddr base_addr) { } Process::Process(Core::System& system) - : WaitObject{system.Kernel()}, vm_manager{system}, + : SynchronizationObject{system.Kernel()}, vm_manager{system}, address_arbiter{system}, mutex{system}, system{system} {} Process::~Process() = default; @@ -357,7 +357,7 @@ void Process::ChangeStatus(ProcessStatus new_status) { status = new_status; is_signaled = true; - WakeupAllWaitingThreads(); + Signal(); } void Process::AllocateMainThreadStack(u64 stack_size) { diff --git a/src/core/hle/kernel/process.h b/src/core/hle/kernel/process.h index 3483fa19d..4887132a7 100644 --- a/src/core/hle/kernel/process.h +++ b/src/core/hle/kernel/process.h @@ -15,8 +15,8 @@ #include "core/hle/kernel/handle_table.h" #include "core/hle/kernel/mutex.h" #include "core/hle/kernel/process_capability.h" +#include "core/hle/kernel/synchronization_object.h" #include "core/hle/kernel/vm_manager.h" -#include "core/hle/kernel/wait_object.h" #include "core/hle/result.h" namespace Core { @@ -60,7 +60,7 @@ enum class ProcessStatus { DebugBreak, }; -class Process final : public WaitObject { +class Process final : public SynchronizationObject { public: explicit Process(Core::System& system); ~Process() override; @@ -359,10 +359,6 @@ private: /// specified by metadata provided to the process during loading. bool is_64bit_process = true; - /// Whether or not this process is signaled. This occurs - /// upon the process changing to a different state. - bool is_signaled = false; - /// Total running time for the process in ticks. u64 total_process_running_time_ticks = 0; diff --git a/src/core/hle/kernel/readable_event.cpp b/src/core/hle/kernel/readable_event.cpp index d8ac97aa1..9d3d3a81b 100644 --- a/src/core/hle/kernel/readable_event.cpp +++ b/src/core/hle/kernel/readable_event.cpp @@ -11,30 +11,30 @@ namespace Kernel { -ReadableEvent::ReadableEvent(KernelCore& kernel) : WaitObject{kernel} {} +ReadableEvent::ReadableEvent(KernelCore& kernel) : SynchronizationObject{kernel} {} ReadableEvent::~ReadableEvent() = default; bool ReadableEvent::ShouldWait(const Thread* thread) const { - return !signaled; + return !is_signaled; } void ReadableEvent::Acquire(Thread* thread) { - ASSERT_MSG(!ShouldWait(thread), "object unavailable!"); + ASSERT_MSG(IsSignaled(), "object unavailable!"); } void ReadableEvent::Signal() { - if (!signaled) { - signaled = true; - WakeupAllWaitingThreads(); + if (!is_signaled) { + is_signaled = true; + SynchronizationObject::Signal(); }; } void ReadableEvent::Clear() { - signaled = false; + is_signaled = false; } ResultCode ReadableEvent::Reset() { - if (!signaled) { + if (!is_signaled) { return ERR_INVALID_STATE; } diff --git a/src/core/hle/kernel/readable_event.h b/src/core/hle/kernel/readable_event.h index 11ff71c3a..3264dd066 100644 --- a/src/core/hle/kernel/readable_event.h +++ b/src/core/hle/kernel/readable_event.h @@ -5,7 +5,7 @@ #pragma once #include "core/hle/kernel/object.h" -#include "core/hle/kernel/wait_object.h" +#include "core/hle/kernel/synchronization_object.h" union ResultCode; @@ -14,7 +14,7 @@ namespace Kernel { class KernelCore; class WritableEvent; -class ReadableEvent final : public WaitObject { +class ReadableEvent final : public SynchronizationObject { friend class WritableEvent; public: @@ -46,13 +46,11 @@ public: /// then ERR_INVALID_STATE will be returned. ResultCode Reset(); + void Signal() override; + private: explicit ReadableEvent(KernelCore& kernel); - void Signal(); - - bool signaled{}; - std::string name; ///< Name of event (optional) }; diff --git a/src/core/hle/kernel/scheduler.cpp b/src/core/hle/kernel/scheduler.cpp index eb196a690..86f1421bf 100644 --- a/src/core/hle/kernel/scheduler.cpp +++ b/src/core/hle/kernel/scheduler.cpp @@ -124,8 +124,8 @@ bool GlobalScheduler::YieldThreadAndBalanceLoad(Thread* yielding_thread) { "Thread yielding without being in front"); scheduled_queue[core_id].yield(priority); - std::array<Thread*, NUM_CPU_CORES> current_threads; - for (u32 i = 0; i < NUM_CPU_CORES; i++) { + std::array<Thread*, Core::Hardware::NUM_CPU_CORES> current_threads; + for (std::size_t i = 0; i < current_threads.size(); i++) { current_threads[i] = scheduled_queue[i].empty() ? nullptr : scheduled_queue[i].front(); } @@ -177,8 +177,8 @@ bool GlobalScheduler::YieldThreadAndWaitForLoadBalancing(Thread* yielding_thread // function... if (scheduled_queue[core_id].empty()) { // Here, "current_threads" is calculated after the ""yield"", unlike yield -1 - std::array<Thread*, NUM_CPU_CORES> current_threads; - for (u32 i = 0; i < NUM_CPU_CORES; i++) { + std::array<Thread*, Core::Hardware::NUM_CPU_CORES> current_threads; + for (std::size_t i = 0; i < current_threads.size(); i++) { current_threads[i] = scheduled_queue[i].empty() ? nullptr : scheduled_queue[i].front(); } for (auto& thread : suggested_queue[core_id]) { @@ -208,7 +208,7 @@ bool GlobalScheduler::YieldThreadAndWaitForLoadBalancing(Thread* yielding_thread } void GlobalScheduler::PreemptThreads() { - for (std::size_t core_id = 0; core_id < NUM_CPU_CORES; core_id++) { + for (std::size_t core_id = 0; core_id < Core::Hardware::NUM_CPU_CORES; core_id++) { const u32 priority = preemption_priorities[core_id]; if (scheduled_queue[core_id].size(priority) > 0) { @@ -349,7 +349,7 @@ bool GlobalScheduler::AskForReselectionOrMarkRedundant(Thread* current_thread, } void GlobalScheduler::Shutdown() { - for (std::size_t core = 0; core < NUM_CPU_CORES; core++) { + for (std::size_t core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) { scheduled_queue[core].clear(); suggested_queue[core].clear(); } diff --git a/src/core/hle/kernel/scheduler.h b/src/core/hle/kernel/scheduler.h index 14b77960a..96db049cb 100644 --- a/src/core/hle/kernel/scheduler.h +++ b/src/core/hle/kernel/scheduler.h @@ -10,6 +10,7 @@ #include "common/common_types.h" #include "common/multi_level_queue.h" +#include "core/hardware_properties.h" #include "core/hle/kernel/thread.h" namespace Core { @@ -23,8 +24,6 @@ class Process; class GlobalScheduler final { public: - static constexpr u32 NUM_CPU_CORES = 4; - explicit GlobalScheduler(Core::System& system); ~GlobalScheduler(); @@ -125,7 +124,7 @@ public: void PreemptThreads(); u32 CpuCoresCount() const { - return NUM_CPU_CORES; + return Core::Hardware::NUM_CPU_CORES; } void SetReselectionPending() { @@ -149,13 +148,15 @@ private: bool AskForReselectionOrMarkRedundant(Thread* current_thread, const Thread* winner); static constexpr u32 min_regular_priority = 2; - std::array<Common::MultiLevelQueue<Thread*, THREADPRIO_COUNT>, NUM_CPU_CORES> scheduled_queue; - std::array<Common::MultiLevelQueue<Thread*, THREADPRIO_COUNT>, NUM_CPU_CORES> suggested_queue; + std::array<Common::MultiLevelQueue<Thread*, THREADPRIO_COUNT>, Core::Hardware::NUM_CPU_CORES> + scheduled_queue; + std::array<Common::MultiLevelQueue<Thread*, THREADPRIO_COUNT>, Core::Hardware::NUM_CPU_CORES> + suggested_queue; std::atomic<bool> is_reselection_pending{false}; // The priority levels at which the global scheduler preempts threads every 10 ms. They are // ordered from Core 0 to Core 3. - std::array<u32, NUM_CPU_CORES> preemption_priorities = {59, 59, 59, 62}; + std::array<u32, Core::Hardware::NUM_CPU_CORES> preemption_priorities = {59, 59, 59, 62}; /// Lists all thread ids that aren't deleted/etc. std::vector<std::shared_ptr<Thread>> thread_list; diff --git a/src/core/hle/kernel/server_port.cpp b/src/core/hle/kernel/server_port.cpp index a4ccfa35e..a549ae9d7 100644 --- a/src/core/hle/kernel/server_port.cpp +++ b/src/core/hle/kernel/server_port.cpp @@ -13,7 +13,7 @@ namespace Kernel { -ServerPort::ServerPort(KernelCore& kernel) : WaitObject{kernel} {} +ServerPort::ServerPort(KernelCore& kernel) : SynchronizationObject{kernel} {} ServerPort::~ServerPort() = default; ResultVal<std::shared_ptr<ServerSession>> ServerPort::Accept() { @@ -39,6 +39,10 @@ void ServerPort::Acquire(Thread* thread) { ASSERT_MSG(!ShouldWait(thread), "object unavailable!"); } +bool ServerPort::IsSignaled() const { + return !pending_sessions.empty(); +} + ServerPort::PortPair ServerPort::CreatePortPair(KernelCore& kernel, u32 max_sessions, std::string name) { std::shared_ptr<ServerPort> server_port = std::make_shared<ServerPort>(kernel); diff --git a/src/core/hle/kernel/server_port.h b/src/core/hle/kernel/server_port.h index 8be8a75ea..41b191b86 100644 --- a/src/core/hle/kernel/server_port.h +++ b/src/core/hle/kernel/server_port.h @@ -10,7 +10,7 @@ #include <vector> #include "common/common_types.h" #include "core/hle/kernel/object.h" -#include "core/hle/kernel/wait_object.h" +#include "core/hle/kernel/synchronization_object.h" #include "core/hle/result.h" namespace Kernel { @@ -20,7 +20,7 @@ class KernelCore; class ServerSession; class SessionRequestHandler; -class ServerPort final : public WaitObject { +class ServerPort final : public SynchronizationObject { public: explicit ServerPort(KernelCore& kernel); ~ServerPort() override; @@ -82,6 +82,8 @@ public: bool ShouldWait(const Thread* thread) const override; void Acquire(Thread* thread) override; + bool IsSignaled() const override; + private: /// ServerSessions waiting to be accepted by the port std::vector<std::shared_ptr<ServerSession>> pending_sessions; diff --git a/src/core/hle/kernel/server_session.cpp b/src/core/hle/kernel/server_session.cpp index 7825e1ec4..4604e35c5 100644 --- a/src/core/hle/kernel/server_session.cpp +++ b/src/core/hle/kernel/server_session.cpp @@ -24,7 +24,7 @@ namespace Kernel { -ServerSession::ServerSession(KernelCore& kernel) : WaitObject{kernel} {} +ServerSession::ServerSession(KernelCore& kernel) : SynchronizationObject{kernel} {} ServerSession::~ServerSession() = default; ResultVal<std::shared_ptr<ServerSession>> ServerSession::Create(KernelCore& kernel, @@ -50,6 +50,16 @@ bool ServerSession::ShouldWait(const Thread* thread) const { return pending_requesting_threads.empty() || currently_handling != nullptr; } +bool ServerSession::IsSignaled() const { + // Closed sessions should never wait, an error will be returned from svcReplyAndReceive. + if (!parent->Client()) { + return true; + } + + // Wait if we have no pending requests, or if we're currently handling a request. + return !pending_requesting_threads.empty() && currently_handling == nullptr; +} + void ServerSession::Acquire(Thread* thread) { ASSERT_MSG(!ShouldWait(thread), "object unavailable!"); // We are now handling a request, pop it from the stack. diff --git a/src/core/hle/kernel/server_session.h b/src/core/hle/kernel/server_session.h index d6e48109e..77e4f6721 100644 --- a/src/core/hle/kernel/server_session.h +++ b/src/core/hle/kernel/server_session.h @@ -10,7 +10,7 @@ #include <vector> #include "common/threadsafe_queue.h" -#include "core/hle/kernel/wait_object.h" +#include "core/hle/kernel/synchronization_object.h" #include "core/hle/result.h" namespace Memory { @@ -41,7 +41,7 @@ class Thread; * After the server replies to the request, the response is marshalled back to the caller's * TLS buffer and control is transferred back to it. */ -class ServerSession final : public WaitObject { +class ServerSession final : public SynchronizationObject { public: explicit ServerSession(KernelCore& kernel); ~ServerSession() override; @@ -73,6 +73,8 @@ public: return parent.get(); } + bool IsSignaled() const override; + /** * Sets the HLE handler for the session. This handler will be called to service IPC requests * instead of the regular IPC machinery. (The regular IPC machinery is currently not diff --git a/src/core/hle/kernel/session.cpp b/src/core/hle/kernel/session.cpp index dee6e2b72..e4dd53e24 100644 --- a/src/core/hle/kernel/session.cpp +++ b/src/core/hle/kernel/session.cpp @@ -9,7 +9,7 @@ namespace Kernel { -Session::Session(KernelCore& kernel) : WaitObject{kernel} {} +Session::Session(KernelCore& kernel) : SynchronizationObject{kernel} {} Session::~Session() = default; Session::SessionPair Session::Create(KernelCore& kernel, std::string name) { @@ -29,6 +29,11 @@ bool Session::ShouldWait(const Thread* thread) const { return {}; } +bool Session::IsSignaled() const { + UNIMPLEMENTED(); + return true; +} + void Session::Acquire(Thread* thread) { UNIMPLEMENTED(); } diff --git a/src/core/hle/kernel/session.h b/src/core/hle/kernel/session.h index 15a5ac15f..7cd9c0d77 100644 --- a/src/core/hle/kernel/session.h +++ b/src/core/hle/kernel/session.h @@ -8,7 +8,7 @@ #include <string> #include <utility> -#include "core/hle/kernel/wait_object.h" +#include "core/hle/kernel/synchronization_object.h" namespace Kernel { @@ -19,7 +19,7 @@ class ServerSession; * Parent structure to link the client and server endpoints of a session with their associated * client port. */ -class Session final : public WaitObject { +class Session final : public SynchronizationObject { public: explicit Session(KernelCore& kernel); ~Session() override; @@ -39,6 +39,8 @@ public: bool ShouldWait(const Thread* thread) const override; + bool IsSignaled() const override; + void Acquire(Thread* thread) override; std::shared_ptr<ClientSession> Client() { diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp index 9cae5c73d..fd91779a3 100644 --- a/src/core/hle/kernel/svc.cpp +++ b/src/core/hle/kernel/svc.cpp @@ -32,6 +32,7 @@ #include "core/hle/kernel/shared_memory.h" #include "core/hle/kernel/svc.h" #include "core/hle/kernel/svc_wrap.h" +#include "core/hle/kernel/synchronization.h" #include "core/hle/kernel/thread.h" #include "core/hle/kernel/transfer_memory.h" #include "core/hle/kernel/writable_event.h" @@ -433,22 +434,6 @@ static ResultCode GetProcessId(Core::System& system, u64* process_id, Handle han return ERR_INVALID_HANDLE; } -/// Default thread wakeup callback for WaitSynchronization -static bool DefaultThreadWakeupCallback(ThreadWakeupReason reason, std::shared_ptr<Thread> thread, - std::shared_ptr<WaitObject> object, std::size_t index) { - ASSERT(thread->GetStatus() == ThreadStatus::WaitSynch); - - if (reason == ThreadWakeupReason::Timeout) { - thread->SetWaitSynchronizationResult(RESULT_TIMEOUT); - return true; - } - - ASSERT(reason == ThreadWakeupReason::Signal); - thread->SetWaitSynchronizationResult(RESULT_SUCCESS); - thread->SetWaitSynchronizationOutput(static_cast<u32>(index)); - return true; -}; - /// Wait for the given handles to synchronize, timeout after the specified nanoseconds static ResultCode WaitSynchronization(Core::System& system, Handle* index, VAddr handles_address, u64 handle_count, s64 nano_seconds) { @@ -472,14 +457,14 @@ static ResultCode WaitSynchronization(Core::System& system, Handle* index, VAddr } auto* const thread = system.CurrentScheduler().GetCurrentThread(); - - using ObjectPtr = Thread::ThreadWaitObjects::value_type; - Thread::ThreadWaitObjects objects(handle_count); - const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable(); + auto& kernel = system.Kernel(); + using ObjectPtr = Thread::ThreadSynchronizationObjects::value_type; + Thread::ThreadSynchronizationObjects objects(handle_count); + const auto& handle_table = kernel.CurrentProcess()->GetHandleTable(); for (u64 i = 0; i < handle_count; ++i) { const Handle handle = memory.Read32(handles_address + i * sizeof(Handle)); - const auto object = handle_table.Get<WaitObject>(handle); + const auto object = handle_table.Get<SynchronizationObject>(handle); if (object == nullptr) { LOG_ERROR(Kernel_SVC, "Object is a nullptr"); @@ -488,47 +473,10 @@ static ResultCode WaitSynchronization(Core::System& system, Handle* index, VAddr objects[i] = object; } - - // Find the first object that is acquirable in the provided list of objects - auto itr = std::find_if(objects.begin(), objects.end(), [thread](const ObjectPtr& object) { - return !object->ShouldWait(thread); - }); - - if (itr != objects.end()) { - // We found a ready object, acquire it and set the result value - WaitObject* object = itr->get(); - object->Acquire(thread); - *index = static_cast<s32>(std::distance(objects.begin(), itr)); - return RESULT_SUCCESS; - } - - // No objects were ready to be acquired, prepare to suspend the thread. - - // If a timeout value of 0 was provided, just return the Timeout error code instead of - // suspending the thread. - if (nano_seconds == 0) { - return RESULT_TIMEOUT; - } - - if (thread->IsSyncCancelled()) { - thread->SetSyncCancelled(false); - return ERR_SYNCHRONIZATION_CANCELED; - } - - for (auto& object : objects) { - object->AddWaitingThread(SharedFrom(thread)); - } - - thread->SetWaitObjects(std::move(objects)); - thread->SetStatus(ThreadStatus::WaitSynch); - - // Create an event to wake the thread up after the specified nanosecond delay has passed - thread->WakeAfterDelay(nano_seconds); - thread->SetWakeupCallback(DefaultThreadWakeupCallback); - - system.PrepareReschedule(thread->GetProcessorID()); - - return RESULT_TIMEOUT; + auto& synchronization = kernel.Synchronization(); + const auto [result, handle_result] = synchronization.WaitFor(objects, nano_seconds); + *index = handle_result; + return result; } /// Resumes a thread waiting on WaitSynchronization diff --git a/src/core/hle/kernel/synchronization.cpp b/src/core/hle/kernel/synchronization.cpp new file mode 100644 index 000000000..dc37fad1a --- /dev/null +++ b/src/core/hle/kernel/synchronization.cpp @@ -0,0 +1,87 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "core/core.h" +#include "core/hle/kernel/errors.h" +#include "core/hle/kernel/handle_table.h" +#include "core/hle/kernel/kernel.h" +#include "core/hle/kernel/scheduler.h" +#include "core/hle/kernel/synchronization.h" +#include "core/hle/kernel/synchronization_object.h" +#include "core/hle/kernel/thread.h" + +namespace Kernel { + +/// Default thread wakeup callback for WaitSynchronization +static bool DefaultThreadWakeupCallback(ThreadWakeupReason reason, std::shared_ptr<Thread> thread, + std::shared_ptr<SynchronizationObject> object, + std::size_t index) { + ASSERT(thread->GetStatus() == ThreadStatus::WaitSynch); + + if (reason == ThreadWakeupReason::Timeout) { + thread->SetWaitSynchronizationResult(RESULT_TIMEOUT); + return true; + } + + ASSERT(reason == ThreadWakeupReason::Signal); + thread->SetWaitSynchronizationResult(RESULT_SUCCESS); + thread->SetWaitSynchronizationOutput(static_cast<u32>(index)); + return true; +} + +Synchronization::Synchronization(Core::System& system) : system{system} {} + +void Synchronization::SignalObject(SynchronizationObject& obj) const { + if (obj.IsSignaled()) { + obj.WakeupAllWaitingThreads(); + } +} + +std::pair<ResultCode, Handle> Synchronization::WaitFor( + std::vector<std::shared_ptr<SynchronizationObject>>& sync_objects, s64 nano_seconds) { + auto* const thread = system.CurrentScheduler().GetCurrentThread(); + // Find the first object that is acquirable in the provided list of objects + const auto itr = std::find_if(sync_objects.begin(), sync_objects.end(), + [thread](const std::shared_ptr<SynchronizationObject>& object) { + return object->IsSignaled(); + }); + + if (itr != sync_objects.end()) { + // We found a ready object, acquire it and set the result value + SynchronizationObject* object = itr->get(); + object->Acquire(thread); + const u32 index = static_cast<s32>(std::distance(sync_objects.begin(), itr)); + return {RESULT_SUCCESS, index}; + } + + // No objects were ready to be acquired, prepare to suspend the thread. + + // If a timeout value of 0 was provided, just return the Timeout error code instead of + // suspending the thread. + if (nano_seconds == 0) { + return {RESULT_TIMEOUT, InvalidHandle}; + } + + if (thread->IsSyncCancelled()) { + thread->SetSyncCancelled(false); + return {ERR_SYNCHRONIZATION_CANCELED, InvalidHandle}; + } + + for (auto& object : sync_objects) { + object->AddWaitingThread(SharedFrom(thread)); + } + + thread->SetSynchronizationObjects(std::move(sync_objects)); + thread->SetStatus(ThreadStatus::WaitSynch); + + // Create an event to wake the thread up after the specified nanosecond delay has passed + thread->WakeAfterDelay(nano_seconds); + thread->SetWakeupCallback(DefaultThreadWakeupCallback); + + system.PrepareReschedule(thread->GetProcessorID()); + + return {RESULT_TIMEOUT, InvalidHandle}; +} + +} // namespace Kernel diff --git a/src/core/hle/kernel/synchronization.h b/src/core/hle/kernel/synchronization.h new file mode 100644 index 000000000..379f4b1d3 --- /dev/null +++ b/src/core/hle/kernel/synchronization.h @@ -0,0 +1,44 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <memory> +#include <utility> +#include <vector> + +#include "core/hle/kernel/object.h" +#include "core/hle/result.h" + +namespace Core { +class System; +} // namespace Core + +namespace Kernel { + +class SynchronizationObject; + +/** + * The 'Synchronization' class is an interface for handling synchronization methods + * used by Synchronization objects and synchronization SVCs. This centralizes processing of + * such + */ +class Synchronization { +public: + explicit Synchronization(Core::System& system); + + /// Signals a synchronization object, waking up all its waiting threads + void SignalObject(SynchronizationObject& obj) const; + + /// Tries to see if waiting for any of the sync_objects is necessary, if not + /// it returns Success and the handle index of the signaled sync object. In + /// case not, the current thread will be locked and wait for nano_seconds or + /// for a synchronization object to signal. + std::pair<ResultCode, Handle> WaitFor( + std::vector<std::shared_ptr<SynchronizationObject>>& sync_objects, s64 nano_seconds); + +private: + Core::System& system; +}; +} // namespace Kernel diff --git a/src/core/hle/kernel/wait_object.cpp b/src/core/hle/kernel/synchronization_object.cpp index 1838260fd..43f3eef18 100644 --- a/src/core/hle/kernel/wait_object.cpp +++ b/src/core/hle/kernel/synchronization_object.cpp @@ -10,20 +10,26 @@ #include "core/hle/kernel/kernel.h" #include "core/hle/kernel/object.h" #include "core/hle/kernel/process.h" +#include "core/hle/kernel/synchronization.h" +#include "core/hle/kernel/synchronization_object.h" #include "core/hle/kernel/thread.h" namespace Kernel { -WaitObject::WaitObject(KernelCore& kernel) : Object{kernel} {} -WaitObject::~WaitObject() = default; +SynchronizationObject::SynchronizationObject(KernelCore& kernel) : Object{kernel} {} +SynchronizationObject::~SynchronizationObject() = default; -void WaitObject::AddWaitingThread(std::shared_ptr<Thread> thread) { +void SynchronizationObject::Signal() { + kernel.Synchronization().SignalObject(*this); +} + +void SynchronizationObject::AddWaitingThread(std::shared_ptr<Thread> thread) { auto itr = std::find(waiting_threads.begin(), waiting_threads.end(), thread); if (itr == waiting_threads.end()) waiting_threads.push_back(std::move(thread)); } -void WaitObject::RemoveWaitingThread(std::shared_ptr<Thread> thread) { +void SynchronizationObject::RemoveWaitingThread(std::shared_ptr<Thread> thread) { auto itr = std::find(waiting_threads.begin(), waiting_threads.end(), thread); // If a thread passed multiple handles to the same object, // the kernel might attempt to remove the thread from the object's @@ -32,7 +38,7 @@ void WaitObject::RemoveWaitingThread(std::shared_ptr<Thread> thread) { waiting_threads.erase(itr); } -std::shared_ptr<Thread> WaitObject::GetHighestPriorityReadyThread() const { +std::shared_ptr<Thread> SynchronizationObject::GetHighestPriorityReadyThread() const { Thread* candidate = nullptr; u32 candidate_priority = THREADPRIO_LOWEST + 1; @@ -57,7 +63,7 @@ std::shared_ptr<Thread> WaitObject::GetHighestPriorityReadyThread() const { return SharedFrom(candidate); } -void WaitObject::WakeupWaitingThread(std::shared_ptr<Thread> thread) { +void SynchronizationObject::WakeupWaitingThread(std::shared_ptr<Thread> thread) { ASSERT(!ShouldWait(thread.get())); if (!thread) { @@ -65,7 +71,7 @@ void WaitObject::WakeupWaitingThread(std::shared_ptr<Thread> thread) { } if (thread->IsSleepingOnWait()) { - for (const auto& object : thread->GetWaitObjects()) { + for (const auto& object : thread->GetSynchronizationObjects()) { ASSERT(!object->ShouldWait(thread.get())); object->Acquire(thread.get()); } @@ -73,9 +79,9 @@ void WaitObject::WakeupWaitingThread(std::shared_ptr<Thread> thread) { Acquire(thread.get()); } - const std::size_t index = thread->GetWaitObjectIndex(SharedFrom(this)); + const std::size_t index = thread->GetSynchronizationObjectIndex(SharedFrom(this)); - thread->ClearWaitObjects(); + thread->ClearSynchronizationObjects(); thread->CancelWakeupTimer(); @@ -90,13 +96,13 @@ void WaitObject::WakeupWaitingThread(std::shared_ptr<Thread> thread) { } } -void WaitObject::WakeupAllWaitingThreads() { +void SynchronizationObject::WakeupAllWaitingThreads() { while (auto thread = GetHighestPriorityReadyThread()) { WakeupWaitingThread(thread); } } -const std::vector<std::shared_ptr<Thread>>& WaitObject::GetWaitingThreads() const { +const std::vector<std::shared_ptr<Thread>>& SynchronizationObject::GetWaitingThreads() const { return waiting_threads; } diff --git a/src/core/hle/kernel/wait_object.h b/src/core/hle/kernel/synchronization_object.h index 9a17958a4..741c31faf 100644 --- a/src/core/hle/kernel/wait_object.h +++ b/src/core/hle/kernel/synchronization_object.h @@ -15,10 +15,10 @@ class KernelCore; class Thread; /// Class that represents a Kernel object that a thread can be waiting on -class WaitObject : public Object { +class SynchronizationObject : public Object { public: - explicit WaitObject(KernelCore& kernel); - ~WaitObject() override; + explicit SynchronizationObject(KernelCore& kernel); + ~SynchronizationObject() override; /** * Check if the specified thread should wait until the object is available @@ -30,6 +30,13 @@ public: /// Acquire/lock the object for the specified thread if it is available virtual void Acquire(Thread* thread) = 0; + /// Signal this object + virtual void Signal(); + + virtual bool IsSignaled() const { + return is_signaled; + } + /** * Add a thread to wait on this object * @param thread Pointer to thread to add @@ -60,16 +67,20 @@ public: /// Get a const reference to the waiting threads list for debug use const std::vector<std::shared_ptr<Thread>>& GetWaitingThreads() const; +protected: + bool is_signaled{}; // Tells if this sync object is signalled; + private: /// Threads waiting for this object to become available std::vector<std::shared_ptr<Thread>> waiting_threads; }; -// Specialization of DynamicObjectCast for WaitObjects +// Specialization of DynamicObjectCast for SynchronizationObjects template <> -inline std::shared_ptr<WaitObject> DynamicObjectCast<WaitObject>(std::shared_ptr<Object> object) { +inline std::shared_ptr<SynchronizationObject> DynamicObjectCast<SynchronizationObject>( + std::shared_ptr<Object> object) { if (object != nullptr && object->IsWaitable()) { - return std::static_pointer_cast<WaitObject>(object); + return std::static_pointer_cast<SynchronizationObject>(object); } return nullptr; } diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp index ad464e03b..ae5f2c8bd 100644 --- a/src/core/hle/kernel/thread.cpp +++ b/src/core/hle/kernel/thread.cpp @@ -15,6 +15,7 @@ #include "core/core.h" #include "core/core_timing.h" #include "core/core_timing_util.h" +#include "core/hardware_properties.h" #include "core/hle/kernel/errors.h" #include "core/hle/kernel/handle_table.h" #include "core/hle/kernel/kernel.h" @@ -31,11 +32,15 @@ bool Thread::ShouldWait(const Thread* thread) const { return status != ThreadStatus::Dead; } +bool Thread::IsSignaled() const { + return status == ThreadStatus::Dead; +} + void Thread::Acquire(Thread* thread) { ASSERT_MSG(!ShouldWait(thread), "object unavailable!"); } -Thread::Thread(KernelCore& kernel) : WaitObject{kernel} {} +Thread::Thread(KernelCore& kernel) : SynchronizationObject{kernel} {} Thread::~Thread() = default; void Thread::Stop() { @@ -45,7 +50,7 @@ void Thread::Stop() { kernel.ThreadWakeupCallbackHandleTable().Close(callback_handle); callback_handle = 0; SetStatus(ThreadStatus::Dead); - WakeupAllWaitingThreads(); + Signal(); // Clean up any dangling references in objects that this thread was waiting for for (auto& wait_object : wait_objects) { @@ -215,7 +220,7 @@ void Thread::SetWaitSynchronizationOutput(s32 output) { context.cpu_registers[1] = output; } -s32 Thread::GetWaitObjectIndex(std::shared_ptr<WaitObject> object) const { +s32 Thread::GetSynchronizationObjectIndex(std::shared_ptr<SynchronizationObject> object) const { ASSERT_MSG(!wait_objects.empty(), "Thread is not waiting for anything"); const auto match = std::find(wait_objects.rbegin(), wait_objects.rend(), object); return static_cast<s32>(std::distance(match, wait_objects.rend()) - 1); @@ -336,14 +341,16 @@ void Thread::ChangeCore(u32 core, u64 mask) { SetCoreAndAffinityMask(core, mask); } -bool Thread::AllWaitObjectsReady() const { - return std::none_of( - wait_objects.begin(), wait_objects.end(), - [this](const std::shared_ptr<WaitObject>& object) { return object->ShouldWait(this); }); +bool Thread::AllSynchronizationObjectsReady() const { + return std::none_of(wait_objects.begin(), wait_objects.end(), + [this](const std::shared_ptr<SynchronizationObject>& object) { + return object->ShouldWait(this); + }); } bool Thread::InvokeWakeupCallback(ThreadWakeupReason reason, std::shared_ptr<Thread> thread, - std::shared_ptr<WaitObject> object, std::size_t index) { + std::shared_ptr<SynchronizationObject> object, + std::size_t index) { ASSERT(wakeup_callback); return wakeup_callback(reason, std::move(thread), std::move(object), index); } @@ -425,7 +432,7 @@ ResultCode Thread::SetCoreAndAffinityMask(s32 new_core, u64 new_affinity_mask) { const s32 old_core = processor_id; if (processor_id >= 0 && ((affinity_mask >> processor_id) & 1) == 0) { if (static_cast<s32>(ideal_core) < 0) { - processor_id = HighestSetCore(affinity_mask, GlobalScheduler::NUM_CPU_CORES); + processor_id = HighestSetCore(affinity_mask, Core::Hardware::NUM_CPU_CORES); } else { processor_id = ideal_core; } @@ -449,7 +456,7 @@ void Thread::AdjustSchedulingOnStatus(u32 old_flags) { scheduler.Unschedule(current_priority, static_cast<u32>(processor_id), this); } - for (u32 core = 0; core < GlobalScheduler::NUM_CPU_CORES; core++) { + for (u32 core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) { if (core != static_cast<u32>(processor_id) && ((affinity_mask >> core) & 1) != 0) { scheduler.Unsuggest(current_priority, core, this); } @@ -460,7 +467,7 @@ void Thread::AdjustSchedulingOnStatus(u32 old_flags) { scheduler.Schedule(current_priority, static_cast<u32>(processor_id), this); } - for (u32 core = 0; core < GlobalScheduler::NUM_CPU_CORES; core++) { + for (u32 core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) { if (core != static_cast<u32>(processor_id) && ((affinity_mask >> core) & 1) != 0) { scheduler.Suggest(current_priority, core, this); } @@ -479,7 +486,7 @@ void Thread::AdjustSchedulingOnPriority(u32 old_priority) { scheduler.Unschedule(old_priority, static_cast<u32>(processor_id), this); } - for (u32 core = 0; core < GlobalScheduler::NUM_CPU_CORES; core++) { + for (u32 core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) { if (core != static_cast<u32>(processor_id) && ((affinity_mask >> core) & 1) != 0) { scheduler.Unsuggest(old_priority, core, this); } @@ -496,7 +503,7 @@ void Thread::AdjustSchedulingOnPriority(u32 old_priority) { } } - for (u32 core = 0; core < GlobalScheduler::NUM_CPU_CORES; core++) { + for (u32 core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) { if (core != static_cast<u32>(processor_id) && ((affinity_mask >> core) & 1) != 0) { scheduler.Suggest(current_priority, core, this); } @@ -512,7 +519,7 @@ void Thread::AdjustSchedulingOnAffinity(u64 old_affinity_mask, s32 old_core) { return; } - for (u32 core = 0; core < GlobalScheduler::NUM_CPU_CORES; core++) { + for (u32 core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) { if (((old_affinity_mask >> core) & 1) != 0) { if (core == static_cast<u32>(old_core)) { scheduler.Unschedule(current_priority, core, this); @@ -522,7 +529,7 @@ void Thread::AdjustSchedulingOnAffinity(u64 old_affinity_mask, s32 old_core) { } } - for (u32 core = 0; core < GlobalScheduler::NUM_CPU_CORES; core++) { + for (u32 core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) { if (((affinity_mask >> core) & 1) != 0) { if (core == static_cast<u32>(processor_id)) { scheduler.Schedule(current_priority, core, this); diff --git a/src/core/hle/kernel/thread.h b/src/core/hle/kernel/thread.h index 3bcf9e137..7a4916318 100644 --- a/src/core/hle/kernel/thread.h +++ b/src/core/hle/kernel/thread.h @@ -11,7 +11,7 @@ #include "common/common_types.h" #include "core/arm/arm_interface.h" #include "core/hle/kernel/object.h" -#include "core/hle/kernel/wait_object.h" +#include "core/hle/kernel/synchronization_object.h" #include "core/hle/result.h" namespace Kernel { @@ -95,7 +95,7 @@ enum class ThreadSchedMasks : u32 { ForcePauseMask = 0x0070, }; -class Thread final : public WaitObject { +class Thread final : public SynchronizationObject { public: explicit Thread(KernelCore& kernel); ~Thread() override; @@ -104,11 +104,11 @@ public: using ThreadContext = Core::ARM_Interface::ThreadContext; - using ThreadWaitObjects = std::vector<std::shared_ptr<WaitObject>>; + using ThreadSynchronizationObjects = std::vector<std::shared_ptr<SynchronizationObject>>; using WakeupCallback = std::function<bool(ThreadWakeupReason reason, std::shared_ptr<Thread> thread, - std::shared_ptr<WaitObject> object, std::size_t index)>; + std::shared_ptr<SynchronizationObject> object, std::size_t index)>; /** * Creates and returns a new thread. The new thread is immediately scheduled @@ -146,6 +146,7 @@ public: bool ShouldWait(const Thread* thread) const override; void Acquire(Thread* thread) override; + bool IsSignaled() const override; /** * Gets the thread's current priority @@ -233,7 +234,7 @@ public: * * @param object Object to query the index of. */ - s32 GetWaitObjectIndex(std::shared_ptr<WaitObject> object) const; + s32 GetSynchronizationObjectIndex(std::shared_ptr<SynchronizationObject> object) const; /** * Stops a thread, invalidating it from further use @@ -314,15 +315,15 @@ public: return owner_process; } - const ThreadWaitObjects& GetWaitObjects() const { + const ThreadSynchronizationObjects& GetSynchronizationObjects() const { return wait_objects; } - void SetWaitObjects(ThreadWaitObjects objects) { + void SetSynchronizationObjects(ThreadSynchronizationObjects objects) { wait_objects = std::move(objects); } - void ClearWaitObjects() { + void ClearSynchronizationObjects() { for (const auto& waiting_object : wait_objects) { waiting_object->RemoveWaitingThread(SharedFrom(this)); } @@ -330,7 +331,7 @@ public: } /// Determines whether all the objects this thread is waiting on are ready. - bool AllWaitObjectsReady() const; + bool AllSynchronizationObjectsReady() const; const MutexWaitingThreads& GetMutexWaitingThreads() const { return wait_mutex_threads; @@ -395,7 +396,7 @@ public: * will cause an assertion to trigger. */ bool InvokeWakeupCallback(ThreadWakeupReason reason, std::shared_ptr<Thread> thread, - std::shared_ptr<WaitObject> object, std::size_t index); + std::shared_ptr<SynchronizationObject> object, std::size_t index); u32 GetIdealCore() const { return ideal_core; @@ -494,7 +495,7 @@ private: /// Objects that the thread is waiting on, in the same order as they were /// passed to WaitSynchronization. - ThreadWaitObjects wait_objects; + ThreadSynchronizationObjects wait_objects; /// List of threads that are waiting for a mutex that is held by this thread. MutexWaitingThreads wait_mutex_threads; diff --git a/src/core/hle/kernel/writable_event.cpp b/src/core/hle/kernel/writable_event.cpp index c9332e3e1..fc2f7c424 100644 --- a/src/core/hle/kernel/writable_event.cpp +++ b/src/core/hle/kernel/writable_event.cpp @@ -22,7 +22,6 @@ EventPair WritableEvent::CreateEventPair(KernelCore& kernel, std::string name) { writable_event->name = name + ":Writable"; writable_event->readable = readable_event; readable_event->name = name + ":Readable"; - readable_event->signaled = false; return {std::move(readable_event), std::move(writable_event)}; } @@ -40,7 +39,7 @@ void WritableEvent::Clear() { } bool WritableEvent::IsSignaled() const { - return readable->signaled; + return readable->IsSignaled(); } } // namespace Kernel diff --git a/src/core/hle/service/audio/hwopus.cpp b/src/core/hle/service/audio/hwopus.cpp index cb839e4a2..d19513cbb 100644 --- a/src/core/hle/service/audio/hwopus.cpp +++ b/src/core/hle/service/audio/hwopus.cpp @@ -170,8 +170,10 @@ public: {3, nullptr, "SetContextForMultiStream"}, {4, &IHardwareOpusDecoderManager::DecodeInterleavedWithPerfOld, "DecodeInterleavedWithPerfOld"}, {5, nullptr, "DecodeInterleavedForMultiStreamWithPerfOld"}, - {6, &IHardwareOpusDecoderManager::DecodeInterleaved, "DecodeInterleaved"}, - {7, nullptr, "DecodeInterleavedForMultiStream"}, + {6, &IHardwareOpusDecoderManager::DecodeInterleaved, "DecodeInterleavedWithPerfAndResetOld"}, + {7, nullptr, "DecodeInterleavedForMultiStreamWithPerfAndResetOld"}, + {8, &IHardwareOpusDecoderManager::DecodeInterleaved, "DecodeInterleaved"}, + {9, nullptr, "DecodeInterleavedForMultiStream"}, }; // clang-format on diff --git a/src/core/hle/service/hid/hid.cpp b/src/core/hle/service/hid/hid.cpp index 89bf8b815..e6b56a9f9 100644 --- a/src/core/hle/service/hid/hid.cpp +++ b/src/core/hle/service/hid/hid.cpp @@ -10,6 +10,7 @@ #include "core/core_timing_util.h" #include "core/frontend/emu_window.h" #include "core/frontend/input.h" +#include "core/hardware_properties.h" #include "core/hle/ipc_helpers.h" #include "core/hle/kernel/client_port.h" #include "core/hle/kernel/client_session.h" @@ -37,11 +38,11 @@ namespace Service::HID { // Updating period for each HID device. // TODO(ogniK): Find actual polling rate of hid -constexpr s64 pad_update_ticks = static_cast<s64>(Core::Timing::BASE_CLOCK_RATE / 66); +constexpr s64 pad_update_ticks = static_cast<s64>(Core::Hardware::BASE_CLOCK_RATE / 66); [[maybe_unused]] constexpr s64 accelerometer_update_ticks = - static_cast<s64>(Core::Timing::BASE_CLOCK_RATE / 100); + static_cast<s64>(Core::Hardware::BASE_CLOCK_RATE / 100); [[maybe_unused]] constexpr s64 gyroscope_update_ticks = - static_cast<s64>(Core::Timing::BASE_CLOCK_RATE / 100); + static_cast<s64>(Core::Hardware::BASE_CLOCK_RATE / 100); constexpr std::size_t SHARED_MEMORY_SIZE = 0x40000; IAppletResource::IAppletResource(Core::System& system) diff --git a/src/core/hle/service/ldn/ldn.cpp b/src/core/hle/service/ldn/ldn.cpp index ed5059047..92adde6d4 100644 --- a/src/core/hle/service/ldn/ldn.cpp +++ b/src/core/hle/service/ldn/ldn.cpp @@ -129,12 +129,20 @@ public: {304, nullptr, "Disconnect"}, {400, nullptr, "Initialize"}, {401, nullptr, "Finalize"}, - {402, nullptr, "SetOperationMode"}, + {402, &IUserLocalCommunicationService::Initialize2, "Initialize2"}, // 7.0.0+ }; // clang-format on RegisterHandlers(functions); } + + void Initialize2(Kernel::HLERequestContext& ctx) { + LOG_WARNING(Service_LDN, "(STUBBED) called"); + // Result success seem make this services start network and continue. + // If we just pass result error then it will stop and maybe try again and again. + IPC::ResponseBuilder rb{ctx, 2}; + rb.Push(RESULT_UNKNOWN); + } }; class LDNS final : public ServiceFramework<LDNS> { diff --git a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp index 6d8bca8bb..f1966ac0e 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp @@ -44,6 +44,8 @@ u32 nvhost_gpu::ioctl(Ioctl command, const std::vector<u8>& input, const std::ve return GetWaitbase(input, output); case IoctlCommand::IocChannelSetTimeoutCommand: return ChannelSetTimeout(input, output); + case IoctlCommand::IocChannelSetTimeslice: + return ChannelSetTimeslice(input, output); default: break; } @@ -228,4 +230,14 @@ u32 nvhost_gpu::ChannelSetTimeout(const std::vector<u8>& input, std::vector<u8>& return 0; } +u32 nvhost_gpu::ChannelSetTimeslice(const std::vector<u8>& input, std::vector<u8>& output) { + IoctlSetTimeslice params{}; + std::memcpy(¶ms, input.data(), sizeof(IoctlSetTimeslice)); + LOG_INFO(Service_NVDRV, "called, timeslice=0x{:X}", params.timeslice); + + channel_timeslice = params.timeslice; + + return 0; +} + } // namespace Service::Nvidia::Devices diff --git a/src/core/hle/service/nvdrv/devices/nvhost_gpu.h b/src/core/hle/service/nvdrv/devices/nvhost_gpu.h index d056dd046..2ac74743f 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.h +++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.h @@ -48,6 +48,7 @@ private: IocAllocObjCtxCommand = 0xC0104809, IocChannelGetWaitbaseCommand = 0xC0080003, IocChannelSetTimeoutCommand = 0x40044803, + IocChannelSetTimeslice = 0xC004481D, }; enum class CtxObjects : u32_le { @@ -101,6 +102,11 @@ private: static_assert(sizeof(IoctlChannelSetPriority) == 4, "IoctlChannelSetPriority is incorrect size"); + struct IoctlSetTimeslice { + u32_le timeslice; + }; + static_assert(sizeof(IoctlSetTimeslice) == 4, "IoctlSetTimeslice is incorrect size"); + struct IoctlEventIdControl { u32_le cmd; // 0=disable, 1=enable, 2=clear u32_le id; @@ -174,6 +180,7 @@ private: u64_le user_data{}; IoctlZCullBind zcull_params{}; u32_le channel_priority{}; + u32_le channel_timeslice{}; u32 SetNVMAPfd(const std::vector<u8>& input, std::vector<u8>& output); u32 SetClientData(const std::vector<u8>& input, std::vector<u8>& output); @@ -188,6 +195,7 @@ private: const std::vector<u8>& input2, IoctlVersion version); u32 GetWaitbase(const std::vector<u8>& input, std::vector<u8>& output); u32 ChannelSetTimeout(const std::vector<u8>& input, std::vector<u8>& output); + u32 ChannelSetTimeslice(const std::vector<u8>& input, std::vector<u8>& output); std::shared_ptr<nvmap> nvmap_dev; u32 assigned_syncpoints{}; diff --git a/src/core/hle/service/nvflinger/nvflinger.cpp b/src/core/hle/service/nvflinger/nvflinger.cpp index 62752e419..134152210 100644 --- a/src/core/hle/service/nvflinger/nvflinger.cpp +++ b/src/core/hle/service/nvflinger/nvflinger.cpp @@ -12,6 +12,7 @@ #include "core/core.h" #include "core/core_timing.h" #include "core/core_timing_util.h" +#include "core/hardware_properties.h" #include "core/hle/kernel/kernel.h" #include "core/hle/kernel/readable_event.h" #include "core/hle/service/nvdrv/devices/nvdisp_disp0.h" @@ -26,8 +27,8 @@ namespace Service::NVFlinger { -constexpr s64 frame_ticks = static_cast<s64>(Core::Timing::BASE_CLOCK_RATE / 60); -constexpr s64 frame_ticks_30fps = static_cast<s64>(Core::Timing::BASE_CLOCK_RATE / 30); +constexpr s64 frame_ticks = static_cast<s64>(Core::Hardware::BASE_CLOCK_RATE / 60); +constexpr s64 frame_ticks_30fps = static_cast<s64>(Core::Hardware::BASE_CLOCK_RATE / 30); NVFlinger::NVFlinger(Core::System& system) : system(system) { displays.emplace_back(0, "Default", system); @@ -222,7 +223,7 @@ void NVFlinger::Compose() { s64 NVFlinger::GetNextTicks() const { constexpr s64 max_hertz = 120LL; - return (Core::Timing::BASE_CLOCK_RATE * (1LL << swap_interval)) / max_hertz; + return (Core::Hardware::BASE_CLOCK_RATE * (1LL << swap_interval)) / max_hertz; } } // namespace Service::NVFlinger diff --git a/src/core/hle/service/time/standard_steady_clock_core.cpp b/src/core/hle/service/time/standard_steady_clock_core.cpp index ca1a783fc..1575f0b49 100644 --- a/src/core/hle/service/time/standard_steady_clock_core.cpp +++ b/src/core/hle/service/time/standard_steady_clock_core.cpp @@ -5,6 +5,7 @@ #include "core/core.h" #include "core/core_timing.h" #include "core/core_timing_util.h" +#include "core/hardware_properties.h" #include "core/hle/service/time/standard_steady_clock_core.h" namespace Service::Time::Clock { @@ -12,7 +13,7 @@ namespace Service::Time::Clock { TimeSpanType StandardSteadyClockCore::GetCurrentRawTimePoint(Core::System& system) { const TimeSpanType ticks_time_span{TimeSpanType::FromTicks( Core::Timing::CpuCyclesToClockCycles(system.CoreTiming().GetTicks()), - Core::Timing::CNTFREQ)}; + Core::Hardware::CNTFREQ)}; TimeSpanType raw_time_point{setup_value.nanoseconds + ticks_time_span.nanoseconds}; if (raw_time_point.nanoseconds < cached_raw_time_point.nanoseconds) { diff --git a/src/core/hle/service/time/tick_based_steady_clock_core.cpp b/src/core/hle/service/time/tick_based_steady_clock_core.cpp index c77b98189..44d5bc651 100644 --- a/src/core/hle/service/time/tick_based_steady_clock_core.cpp +++ b/src/core/hle/service/time/tick_based_steady_clock_core.cpp @@ -5,6 +5,7 @@ #include "core/core.h" #include "core/core_timing.h" #include "core/core_timing_util.h" +#include "core/hardware_properties.h" #include "core/hle/service/time/tick_based_steady_clock_core.h" namespace Service::Time::Clock { @@ -12,7 +13,7 @@ namespace Service::Time::Clock { SteadyClockTimePoint TickBasedSteadyClockCore::GetTimePoint(Core::System& system) { const TimeSpanType ticks_time_span{TimeSpanType::FromTicks( Core::Timing::CpuCyclesToClockCycles(system.CoreTiming().GetTicks()), - Core::Timing::CNTFREQ)}; + Core::Hardware::CNTFREQ)}; return {ticks_time_span.ToSeconds(), GetClockSourceId()}; } diff --git a/src/core/hle/service/time/time.cpp b/src/core/hle/service/time/time.cpp index 8ef4efcef..749b7be70 100644 --- a/src/core/hle/service/time/time.cpp +++ b/src/core/hle/service/time/time.cpp @@ -6,6 +6,7 @@ #include "core/core.h" #include "core/core_timing.h" #include "core/core_timing_util.h" +#include "core/hardware_properties.h" #include "core/hle/ipc_helpers.h" #include "core/hle/kernel/client_port.h" #include "core/hle/kernel/client_session.h" @@ -233,7 +234,7 @@ void Module::Interface::CalculateMonotonicSystemClockBaseTimePoint(Kernel::HLERe if (current_time_point.clock_source_id == context.steady_time_point.clock_source_id) { const auto ticks{Clock::TimeSpanType::FromTicks( Core::Timing::CpuCyclesToClockCycles(system.CoreTiming().GetTicks()), - Core::Timing::CNTFREQ)}; + Core::Hardware::CNTFREQ)}; const s64 base_time_point{context.offset + current_time_point.time_point - ticks.ToSeconds()}; IPC::ResponseBuilder rb{ctx, (sizeof(s64) / 4) + 2}; diff --git a/src/core/hle/service/time/time_sharedmemory.cpp b/src/core/hle/service/time/time_sharedmemory.cpp index 9b03191bf..fdaef233f 100644 --- a/src/core/hle/service/time/time_sharedmemory.cpp +++ b/src/core/hle/service/time/time_sharedmemory.cpp @@ -5,6 +5,7 @@ #include "core/core.h" #include "core/core_timing.h" #include "core/core_timing_util.h" +#include "core/hardware_properties.h" #include "core/hle/service/time/clock_types.h" #include "core/hle/service/time/steady_clock_core.h" #include "core/hle/service/time/time_sharedmemory.h" @@ -31,7 +32,7 @@ void SharedMemory::SetupStandardSteadyClock(Core::System& system, Clock::TimeSpanType current_time_point) { const Clock::TimeSpanType ticks_time_span{Clock::TimeSpanType::FromTicks( Core::Timing::CpuCyclesToClockCycles(system.CoreTiming().GetTicks()), - Core::Timing::CNTFREQ)}; + Core::Hardware::CNTFREQ)}; const Clock::SteadyClockContext context{ static_cast<u64>(current_time_point.nanoseconds - ticks_time_span.nanoseconds), clock_source_id}; diff --git a/src/core/memory/cheat_engine.cpp b/src/core/memory/cheat_engine.cpp index d1e6bed93..4472500d2 100644 --- a/src/core/memory/cheat_engine.cpp +++ b/src/core/memory/cheat_engine.cpp @@ -9,6 +9,7 @@ #include "core/core.h" #include "core/core_timing.h" #include "core/core_timing_util.h" +#include "core/hardware_properties.h" #include "core/hle/kernel/process.h" #include "core/hle/service/hid/controllers/npad.h" #include "core/hle/service/hid/hid.h" @@ -17,7 +18,7 @@ namespace Memory { -constexpr s64 CHEAT_ENGINE_TICKS = static_cast<s64>(Core::Timing::BASE_CLOCK_RATE / 12); +constexpr s64 CHEAT_ENGINE_TICKS = static_cast<s64>(Core::Hardware::BASE_CLOCK_RATE / 12); constexpr u32 KEYPAD_BITMASK = 0x3FFFFFF; StandardVmCallbacks::StandardVmCallbacks(Core::System& system, const CheatProcessMetadata& metadata) diff --git a/src/core/settings.h b/src/core/settings.h index e1a9a0ffa..f837d3fbc 100644 --- a/src/core/settings.h +++ b/src/core/settings.h @@ -429,6 +429,7 @@ struct Values { int vulkan_device; float resolution_factor; + int aspect_ratio; bool use_frame_limit; u16 frame_limit; bool use_disk_shader_cache; diff --git a/src/core/tools/freezer.cpp b/src/core/tools/freezer.cpp index 55e0dbc49..1e060f009 100644 --- a/src/core/tools/freezer.cpp +++ b/src/core/tools/freezer.cpp @@ -7,13 +7,14 @@ #include "core/core.h" #include "core/core_timing.h" #include "core/core_timing_util.h" +#include "core/hardware_properties.h" #include "core/memory.h" #include "core/tools/freezer.h" namespace Tools { namespace { -constexpr s64 MEMORY_FREEZER_TICKS = static_cast<s64>(Core::Timing::BASE_CLOCK_RATE / 60); +constexpr s64 MEMORY_FREEZER_TICKS = static_cast<s64>(Core::Hardware::BASE_CLOCK_RATE / 60); u64 MemoryReadWidth(Memory::Memory& memory, u32 width, VAddr addr) { switch (width) { diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index db9332d00..4b0c6346f 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -37,6 +37,7 @@ add_library(video_core STATIC memory_manager.h morton.cpp morton.h + query_cache.h rasterizer_accelerated.cpp rasterizer_accelerated.h rasterizer_cache.cpp @@ -74,6 +75,8 @@ add_library(video_core STATIC renderer_opengl/gl_stream_buffer.h renderer_opengl/gl_texture_cache.cpp renderer_opengl/gl_texture_cache.h + renderer_opengl/gl_query_cache.cpp + renderer_opengl/gl_query_cache.h renderer_opengl/maxwell_to_gl.h renderer_opengl/renderer_opengl.cpp renderer_opengl/renderer_opengl.h @@ -177,6 +180,8 @@ if (ENABLE_VULKAN) renderer_vulkan/vk_memory_manager.h renderer_vulkan/vk_pipeline_cache.cpp renderer_vulkan/vk_pipeline_cache.h + renderer_vulkan/vk_query_cache.cpp + renderer_vulkan/vk_query_cache.h renderer_vulkan/vk_rasterizer.cpp renderer_vulkan/vk_rasterizer.h renderer_vulkan/vk_renderpass_cache.cpp diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 7cea146f0..842cdcbcf 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -4,17 +4,21 @@ #include <cinttypes> #include <cstring> +#include <optional> #include "common/assert.h" #include "core/core.h" #include "core/core_timing.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/engines/shader_type.h" +#include "video_core/gpu.h" #include "video_core/memory_manager.h" #include "video_core/rasterizer_interface.h" #include "video_core/textures/texture.h" namespace Tegra::Engines { +using VideoCore::QueryType; + /// First register id that is actually a Macro call. constexpr u32 MacroRegistersStart = 0xE00; @@ -399,6 +403,10 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) { ProcessQueryCondition(); break; } + case MAXWELL3D_REG_INDEX(counter_reset): { + ProcessCounterReset(); + break; + } case MAXWELL3D_REG_INDEX(sync_info): { ProcessSyncPoint(); break; @@ -519,61 +527,51 @@ void Maxwell3D::ProcessFirmwareCall4() { regs.reg_array[0xd00] = 1; } -void Maxwell3D::ProcessQueryGet() { +void Maxwell3D::StampQueryResult(u64 payload, bool long_query) { + struct LongQueryResult { + u64_le value; + u64_le timestamp; + }; + static_assert(sizeof(LongQueryResult) == 16, "LongQueryResult has wrong size"); const GPUVAddr sequence_address{regs.query.QueryAddress()}; - // Since the sequence address is given as a GPU VAddr, we have to convert it to an application - // VAddr before writing. + if (long_query) { + // Write the 128-bit result structure in long mode. Note: We emulate an infinitely fast + // GPU, this command may actually take a while to complete in real hardware due to GPU + // wait queues. + LongQueryResult query_result{payload, system.GPU().GetTicks()}; + memory_manager.WriteBlock(sequence_address, &query_result, sizeof(query_result)); + } else { + memory_manager.Write<u32>(sequence_address, static_cast<u32>(payload)); + } +} +void Maxwell3D::ProcessQueryGet() { // TODO(Subv): Support the other query units. ASSERT_MSG(regs.query.query_get.unit == Regs::QueryUnit::Crop, "Units other than CROP are unimplemented"); - u64 result = 0; - - // TODO(Subv): Support the other query variables - switch (regs.query.query_get.select) { - case Regs::QuerySelect::Zero: - // This seems to actually write the query sequence to the query address. - result = regs.query.query_sequence; + switch (regs.query.query_get.operation) { + case Regs::QueryOperation::Release: + StampQueryResult(regs.query.query_sequence, regs.query.query_get.short_query == 0); break; - default: - result = 1; - UNIMPLEMENTED_MSG("Unimplemented query select type {}", - static_cast<u32>(regs.query.query_get.select.Value())); - } - - // TODO(Subv): Research and implement how query sync conditions work. - - struct LongQueryResult { - u64_le value; - u64_le timestamp; - }; - static_assert(sizeof(LongQueryResult) == 16, "LongQueryResult has wrong size"); - - switch (regs.query.query_get.mode) { - case Regs::QueryMode::Write: - case Regs::QueryMode::Write2: { - u32 sequence = regs.query.query_sequence; - if (regs.query.query_get.short_query) { - // Write the current query sequence to the sequence address. - // TODO(Subv): Find out what happens if you use a long query type but mark it as a short - // query. - memory_manager.Write<u32>(sequence_address, sequence); - } else { - // Write the 128-bit result structure in long mode. Note: We emulate an infinitely fast - // GPU, this command may actually take a while to complete in real hardware due to GPU - // wait queues. - LongQueryResult query_result{}; - query_result.value = result; - // TODO(Subv): Generate a real GPU timestamp and write it here instead of CoreTiming - query_result.timestamp = system.CoreTiming().GetTicks(); - memory_manager.WriteBlock(sequence_address, &query_result, sizeof(query_result)); + case Regs::QueryOperation::Acquire: + // TODO(Blinkhawk): Under this operation, the GPU waits for the CPU to write a value that + // matches the current payload. + UNIMPLEMENTED_MSG("Unimplemented query operation ACQUIRE"); + break; + case Regs::QueryOperation::Counter: + if (const std::optional<u64> result = GetQueryResult()) { + // If the query returns an empty optional it means it's cached and deferred. + // In this case we have a non-empty result, so we stamp it immediately. + StampQueryResult(*result, regs.query.query_get.short_query == 0); } break; - } + case Regs::QueryOperation::Trap: + UNIMPLEMENTED_MSG("Unimplemented query operation TRAP"); + break; default: - UNIMPLEMENTED_MSG("Query mode {} not implemented", - static_cast<u32>(regs.query.query_get.mode.Value())); + UNIMPLEMENTED_MSG("Unknown query operation"); + break; } } @@ -590,20 +588,20 @@ void Maxwell3D::ProcessQueryCondition() { } case Regs::ConditionMode::ResNonZero: { Regs::QueryCompare cmp; - memory_manager.ReadBlockUnsafe(condition_address, &cmp, sizeof(cmp)); + memory_manager.ReadBlock(condition_address, &cmp, sizeof(cmp)); execute_on = cmp.initial_sequence != 0U && cmp.initial_mode != 0U; break; } case Regs::ConditionMode::Equal: { Regs::QueryCompare cmp; - memory_manager.ReadBlockUnsafe(condition_address, &cmp, sizeof(cmp)); + memory_manager.ReadBlock(condition_address, &cmp, sizeof(cmp)); execute_on = cmp.initial_sequence == cmp.current_sequence && cmp.initial_mode == cmp.current_mode; break; } case Regs::ConditionMode::NotEqual: { Regs::QueryCompare cmp; - memory_manager.ReadBlockUnsafe(condition_address, &cmp, sizeof(cmp)); + memory_manager.ReadBlock(condition_address, &cmp, sizeof(cmp)); execute_on = cmp.initial_sequence != cmp.current_sequence || cmp.initial_mode != cmp.current_mode; break; @@ -616,6 +614,18 @@ void Maxwell3D::ProcessQueryCondition() { } } +void Maxwell3D::ProcessCounterReset() { + switch (regs.counter_reset) { + case Regs::CounterReset::SampleCnt: + rasterizer.ResetCounter(QueryType::SamplesPassed); + break; + default: + LOG_WARNING(Render_OpenGL, "Unimplemented counter reset={}", + static_cast<int>(regs.counter_reset)); + break; + } +} + void Maxwell3D::ProcessSyncPoint() { const u32 sync_point = regs.sync_info.sync_point.Value(); const u32 increment = regs.sync_info.increment.Value(); @@ -658,6 +668,22 @@ void Maxwell3D::DrawArrays() { } } +std::optional<u64> Maxwell3D::GetQueryResult() { + switch (regs.query.query_get.select) { + case Regs::QuerySelect::Zero: + return 0; + case Regs::QuerySelect::SamplesPassed: + // Deferred. + rasterizer.Query(regs.query.QueryAddress(), VideoCore::QueryType::SamplesPassed, + system.GPU().GetTicks()); + return {}; + default: + UNIMPLEMENTED_MSG("Unimplemented query select type {}", + static_cast<u32>(regs.query.query_get.select.Value())); + return 1; + } +} + void Maxwell3D::ProcessCBBind(std::size_t stage_index) { // Bind the buffer currently in CB_ADDRESS to the specified index in the desired shader stage. auto& shader = state.shader_stages[stage_index]; diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 7b1912a66..26939be3f 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -6,6 +6,7 @@ #include <array> #include <bitset> +#include <optional> #include <type_traits> #include <unordered_map> #include <vector> @@ -71,12 +72,11 @@ public: static constexpr std::size_t MaxConstBuffers = 18; static constexpr std::size_t MaxConstBufferSize = 0x10000; - enum class QueryMode : u32 { - Write = 0, - Sync = 1, - // TODO(Subv): It is currently unknown what the difference between method 2 and method 0 - // is. - Write2 = 2, + enum class QueryOperation : u32 { + Release = 0, + Acquire = 1, + Counter = 2, + Trap = 3, }; enum class QueryUnit : u32 { @@ -410,6 +410,27 @@ public: Linear = 1, }; + enum class CounterReset : u32 { + SampleCnt = 0x01, + Unk02 = 0x02, + Unk03 = 0x03, + Unk04 = 0x04, + EmittedPrimitives = 0x10, // Not tested + Unk11 = 0x11, + Unk12 = 0x12, + Unk13 = 0x13, + Unk15 = 0x15, + Unk16 = 0x16, + Unk17 = 0x17, + Unk18 = 0x18, + Unk1A = 0x1A, + Unk1B = 0x1B, + Unk1C = 0x1C, + Unk1D = 0x1D, + Unk1E = 0x1E, + GeneratedPrimitives = 0x1F, + }; + struct Cull { enum class FrontFace : u32 { ClockWise = 0x0900, @@ -858,7 +879,7 @@ public: BitField<7, 1, u32> c7; } clip_distance_enabled; - INSERT_UNION_PADDING_WORDS(0x1); + u32 samplecnt_enable; float point_size; @@ -866,7 +887,11 @@ public: u32 point_sprite_enable; - INSERT_UNION_PADDING_WORDS(0x5); + INSERT_UNION_PADDING_WORDS(0x3); + + CounterReset counter_reset; + + INSERT_UNION_PADDING_WORDS(0x1); u32 zeta_enable; @@ -1081,7 +1106,7 @@ public: u32 query_sequence; union { u32 raw; - BitField<0, 2, QueryMode> mode; + BitField<0, 2, QueryOperation> operation; BitField<4, 1, u32> fence; BitField<12, 4, QueryUnit> unit; BitField<16, 1, QuerySyncCondition> sync_cond; @@ -1413,9 +1438,15 @@ private: /// Handles a write to the QUERY_GET register. void ProcessQueryGet(); - // Handles Conditional Rendering + /// Writes the query result accordingly. + void StampQueryResult(u64 payload, bool long_query); + + /// Handles conditional rendering. void ProcessQueryCondition(); + /// Handles counter resets. + void ProcessCounterReset(); + /// Handles writes to syncing register. void ProcessSyncPoint(); @@ -1432,6 +1463,9 @@ private: // Handles a instance drawcall from MME void StepInstance(MMEDrawMode expected_mode, u32 count); + + /// Returns a query's value or an empty object if the value will be deferred through a cache. + std::optional<u64> GetQueryResult(); }; #define ASSERT_REG_POSITION(field_name, position) \ @@ -1497,8 +1531,10 @@ ASSERT_REG_POSITION(screen_y_control, 0x4EB); ASSERT_REG_POSITION(vb_element_base, 0x50D); ASSERT_REG_POSITION(vb_base_instance, 0x50E); ASSERT_REG_POSITION(clip_distance_enabled, 0x544); +ASSERT_REG_POSITION(samplecnt_enable, 0x545); ASSERT_REG_POSITION(point_size, 0x546); ASSERT_REG_POSITION(point_sprite_enable, 0x548); +ASSERT_REG_POSITION(counter_reset, 0x54C); ASSERT_REG_POSITION(zeta_enable, 0x54E); ASSERT_REG_POSITION(multisample_control, 0x54F); ASSERT_REG_POSITION(condition, 0x554); diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index 402869fde..c9bc83cd7 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -1677,11 +1677,11 @@ union Instruction { } xmad; union { - BitField<20, 14, u64> offset; + BitField<20, 14, u64> shifted_offset; BitField<34, 5, u64> index; u64 GetOffset() const { - return offset * 4; + return shifted_offset * 4; } } cbuf34; diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 062ca83b8..7d7137109 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -6,6 +6,7 @@ #include "common/microprofile.h" #include "core/core.h" #include "core/core_timing.h" +#include "core/core_timing_util.h" #include "core/memory.h" #include "video_core/engines/fermi_2d.h" #include "video_core/engines/kepler_compute.h" @@ -23,7 +24,7 @@ MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192)); GPU::GPU(Core::System& system, VideoCore::RendererBase& renderer, bool is_async) : system{system}, renderer{renderer}, is_async{is_async} { auto& rasterizer{renderer.Rasterizer()}; - memory_manager = std::make_unique<Tegra::MemoryManager>(system); + memory_manager = std::make_unique<Tegra::MemoryManager>(system, rasterizer); dma_pusher = std::make_unique<Tegra::DmaPusher>(*this); maxwell_3d = std::make_unique<Engines::Maxwell3D>(system, rasterizer, *memory_manager); fermi_2d = std::make_unique<Engines::Fermi2D>(rasterizer); @@ -122,6 +123,19 @@ bool GPU::CancelSyncptInterrupt(const u32 syncpoint_id, const u32 value) { return true; } +u64 GPU::GetTicks() const { + // This values were reversed engineered by fincs from NVN + // The gpu clock is reported in units of 385/625 nanoseconds + constexpr u64 gpu_ticks_num = 384; + constexpr u64 gpu_ticks_den = 625; + + const u64 cpu_ticks = system.CoreTiming().GetTicks(); + const u64 nanoseconds = Core::Timing::CyclesToNs(cpu_ticks).count(); + const u64 nanoseconds_num = nanoseconds / gpu_ticks_den; + const u64 nanoseconds_rem = nanoseconds % gpu_ticks_den; + return nanoseconds_num * gpu_ticks_num + (nanoseconds_rem * gpu_ticks_num) / gpu_ticks_den; +} + void GPU::FlushCommands() { renderer.Rasterizer().FlushCommands(); } @@ -340,7 +354,7 @@ void GPU::ProcessSemaphoreTriggerMethod() { block.sequence = regs.semaphore_sequence; // TODO(Kmather73): Generate a real GPU timestamp and write it here instead of // CoreTiming - block.timestamp = system.CoreTiming().GetTicks(); + block.timestamp = GetTicks(); memory_manager->WriteBlock(regs.semaphore_address.SemaphoreAddress(), &block, sizeof(block)); } else { diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index b648317bb..07727210c 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h @@ -192,6 +192,8 @@ public: bool CancelSyncptInterrupt(u32 syncpoint_id, u32 value); + u64 GetTicks() const; + std::unique_lock<std::mutex> LockSync() { return std::unique_lock{sync_mutex}; } diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp index f1d50be3e..11848fbce 100644 --- a/src/video_core/memory_manager.cpp +++ b/src/video_core/memory_manager.cpp @@ -9,12 +9,13 @@ #include "core/hle/kernel/process.h" #include "core/hle/kernel/vm_manager.h" #include "core/memory.h" -#include "video_core/gpu.h" #include "video_core/memory_manager.h" +#include "video_core/rasterizer_interface.h" namespace Tegra { -MemoryManager::MemoryManager(Core::System& system) : system{system} { +MemoryManager::MemoryManager(Core::System& system, VideoCore::RasterizerInterface& rasterizer) + : rasterizer{rasterizer}, system{system} { std::fill(page_table.pointers.begin(), page_table.pointers.end(), nullptr); std::fill(page_table.attributes.begin(), page_table.attributes.end(), Common::PageType::Unmapped); @@ -83,8 +84,7 @@ GPUVAddr MemoryManager::UnmapBuffer(GPUVAddr gpu_addr, u64 size) { const auto cpu_addr = GpuToCpuAddress(gpu_addr); ASSERT(cpu_addr); - system.GPU().FlushAndInvalidateRegion(cache_addr, aligned_size); - + rasterizer.FlushAndInvalidateRegion(cache_addr, aligned_size); UnmapRange(gpu_addr, aligned_size); ASSERT(system.CurrentProcess() ->VMManager() @@ -242,7 +242,7 @@ void MemoryManager::ReadBlock(GPUVAddr src_addr, void* dest_buffer, const std::s switch (page_table.attributes[page_index]) { case Common::PageType::Memory: { const u8* src_ptr{page_table.pointers[page_index] + page_offset}; - system.GPU().FlushRegion(ToCacheAddr(src_ptr), copy_amount); + rasterizer.FlushRegion(ToCacheAddr(src_ptr), copy_amount); std::memcpy(dest_buffer, src_ptr, copy_amount); break; } @@ -292,7 +292,7 @@ void MemoryManager::WriteBlock(GPUVAddr dest_addr, const void* src_buffer, const switch (page_table.attributes[page_index]) { case Common::PageType::Memory: { u8* dest_ptr{page_table.pointers[page_index] + page_offset}; - system.GPU().InvalidateRegion(ToCacheAddr(dest_ptr), copy_amount); + rasterizer.InvalidateRegion(ToCacheAddr(dest_ptr), copy_amount); std::memcpy(dest_ptr, src_buffer, copy_amount); break; } @@ -340,7 +340,7 @@ void MemoryManager::CopyBlock(GPUVAddr dest_addr, GPUVAddr src_addr, const std:: switch (page_table.attributes[page_index]) { case Common::PageType::Memory: { const u8* src_ptr{page_table.pointers[page_index] + page_offset}; - system.GPU().FlushRegion(ToCacheAddr(src_ptr), copy_amount); + rasterizer.FlushRegion(ToCacheAddr(src_ptr), copy_amount); WriteBlock(dest_addr, src_ptr, copy_amount); break; } diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h index 393447eb4..aea010087 100644 --- a/src/video_core/memory_manager.h +++ b/src/video_core/memory_manager.h @@ -10,6 +10,10 @@ #include "common/common_types.h" #include "common/page_table.h" +namespace VideoCore { +class RasterizerInterface; +} + namespace Core { class System; } @@ -47,7 +51,7 @@ struct VirtualMemoryArea { class MemoryManager final { public: - explicit MemoryManager(Core::System& system); + explicit MemoryManager(Core::System& system, VideoCore::RasterizerInterface& rasterizer); ~MemoryManager(); GPUVAddr AllocateSpace(u64 size, u64 align); @@ -172,6 +176,7 @@ private: Common::PageTable page_table{page_bits}; VMAMap vma_map; + VideoCore::RasterizerInterface& rasterizer; Core::System& system; }; diff --git a/src/video_core/query_cache.h b/src/video_core/query_cache.h new file mode 100644 index 000000000..e66054ed0 --- /dev/null +++ b/src/video_core/query_cache.h @@ -0,0 +1,359 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <algorithm> +#include <array> +#include <cstring> +#include <iterator> +#include <memory> +#include <mutex> +#include <optional> +#include <unordered_map> +#include <vector> + +#include "common/assert.h" +#include "core/core.h" +#include "video_core/engines/maxwell_3d.h" +#include "video_core/gpu.h" +#include "video_core/memory_manager.h" +#include "video_core/rasterizer_interface.h" + +namespace VideoCommon { + +template <class QueryCache, class HostCounter> +class CounterStreamBase { +public: + explicit CounterStreamBase(QueryCache& cache, VideoCore::QueryType type) + : cache{cache}, type{type} {} + + /// Updates the state of the stream, enabling or disabling as needed. + void Update(bool enabled) { + if (enabled) { + Enable(); + } else { + Disable(); + } + } + + /// Resets the stream to zero. It doesn't disable the query after resetting. + void Reset() { + if (current) { + current->EndQuery(); + + // Immediately start a new query to avoid disabling its state. + current = cache.Counter(nullptr, type); + } + last = nullptr; + } + + /// Returns the current counter slicing as needed. + std::shared_ptr<HostCounter> Current() { + if (!current) { + return nullptr; + } + current->EndQuery(); + last = std::move(current); + current = cache.Counter(last, type); + return last; + } + + /// Returns true when the counter stream is enabled. + bool IsEnabled() const { + return current != nullptr; + } + +private: + /// Enables the stream. + void Enable() { + if (current) { + return; + } + current = cache.Counter(last, type); + } + + // Disables the stream. + void Disable() { + if (current) { + current->EndQuery(); + } + last = std::exchange(current, nullptr); + } + + QueryCache& cache; + const VideoCore::QueryType type; + + std::shared_ptr<HostCounter> current; + std::shared_ptr<HostCounter> last; +}; + +template <class QueryCache, class CachedQuery, class CounterStream, class HostCounter, + class QueryPool> +class QueryCacheBase { +public: + explicit QueryCacheBase(Core::System& system, VideoCore::RasterizerInterface& rasterizer) + : system{system}, rasterizer{rasterizer}, streams{{CounterStream{ + static_cast<QueryCache&>(*this), + VideoCore::QueryType::SamplesPassed}}} {} + + void InvalidateRegion(CacheAddr addr, std::size_t size) { + std::unique_lock lock{mutex}; + FlushAndRemoveRegion(addr, size); + } + + void FlushRegion(CacheAddr addr, std::size_t size) { + std::unique_lock lock{mutex}; + FlushAndRemoveRegion(addr, size); + } + + /** + * Records a query in GPU mapped memory, potentially marked with a timestamp. + * @param gpu_addr GPU address to flush to when the mapped memory is read. + * @param type Query type, e.g. SamplesPassed. + * @param timestamp Timestamp, when empty the flushed query is assumed to be short. + */ + void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) { + std::unique_lock lock{mutex}; + auto& memory_manager = system.GPU().MemoryManager(); + const auto host_ptr = memory_manager.GetPointer(gpu_addr); + + CachedQuery* query = TryGet(ToCacheAddr(host_ptr)); + if (!query) { + const auto cpu_addr = memory_manager.GpuToCpuAddress(gpu_addr); + ASSERT_OR_EXECUTE(cpu_addr, return;); + + query = Register(type, *cpu_addr, host_ptr, timestamp.has_value()); + } + + query->BindCounter(Stream(type).Current(), timestamp); + } + + /// Updates counters from GPU state. Expected to be called once per draw, clear or dispatch. + void UpdateCounters() { + std::unique_lock lock{mutex}; + const auto& regs = system.GPU().Maxwell3D().regs; + Stream(VideoCore::QueryType::SamplesPassed).Update(regs.samplecnt_enable); + } + + /// Resets a counter to zero. It doesn't disable the query after resetting. + void ResetCounter(VideoCore::QueryType type) { + std::unique_lock lock{mutex}; + Stream(type).Reset(); + } + + /// Disable all active streams. Expected to be called at the end of a command buffer. + void DisableStreams() { + std::unique_lock lock{mutex}; + for (auto& stream : streams) { + stream.Update(false); + } + } + + /// Returns a new host counter. + std::shared_ptr<HostCounter> Counter(std::shared_ptr<HostCounter> dependency, + VideoCore::QueryType type) { + return std::make_shared<HostCounter>(static_cast<QueryCache&>(*this), std::move(dependency), + type); + } + + /// Returns the counter stream of the specified type. + CounterStream& Stream(VideoCore::QueryType type) { + return streams[static_cast<std::size_t>(type)]; + } + + /// Returns the counter stream of the specified type. + const CounterStream& Stream(VideoCore::QueryType type) const { + return streams[static_cast<std::size_t>(type)]; + } + +protected: + std::array<QueryPool, VideoCore::NumQueryTypes> query_pools; + +private: + /// Flushes a memory range to guest memory and removes it from the cache. + void FlushAndRemoveRegion(CacheAddr addr, std::size_t size) { + const u64 addr_begin = static_cast<u64>(addr); + const u64 addr_end = addr_begin + static_cast<u64>(size); + const auto in_range = [addr_begin, addr_end](CachedQuery& query) { + const u64 cache_begin = query.GetCacheAddr(); + const u64 cache_end = cache_begin + query.SizeInBytes(); + return cache_begin < addr_end && addr_begin < cache_end; + }; + + const u64 page_end = addr_end >> PAGE_SHIFT; + for (u64 page = addr_begin >> PAGE_SHIFT; page <= page_end; ++page) { + const auto& it = cached_queries.find(page); + if (it == std::end(cached_queries)) { + continue; + } + auto& contents = it->second; + for (auto& query : contents) { + if (!in_range(query)) { + continue; + } + rasterizer.UpdatePagesCachedCount(query.CpuAddr(), query.SizeInBytes(), -1); + query.Flush(); + } + contents.erase(std::remove_if(std::begin(contents), std::end(contents), in_range), + std::end(contents)); + } + } + + /// Registers the passed parameters as cached and returns a pointer to the stored cached query. + CachedQuery* Register(VideoCore::QueryType type, VAddr cpu_addr, u8* host_ptr, bool timestamp) { + rasterizer.UpdatePagesCachedCount(cpu_addr, CachedQuery::SizeInBytes(timestamp), 1); + const u64 page = static_cast<u64>(ToCacheAddr(host_ptr)) >> PAGE_SHIFT; + return &cached_queries[page].emplace_back(static_cast<QueryCache&>(*this), type, cpu_addr, + host_ptr); + } + + /// Tries to a get a cached query. Returns nullptr on failure. + CachedQuery* TryGet(CacheAddr addr) { + const u64 page = static_cast<u64>(addr) >> PAGE_SHIFT; + const auto it = cached_queries.find(page); + if (it == std::end(cached_queries)) { + return nullptr; + } + auto& contents = it->second; + const auto found = + std::find_if(std::begin(contents), std::end(contents), + [addr](auto& query) { return query.GetCacheAddr() == addr; }); + return found != std::end(contents) ? &*found : nullptr; + } + + static constexpr std::uintptr_t PAGE_SIZE = 4096; + static constexpr unsigned PAGE_SHIFT = 12; + + Core::System& system; + VideoCore::RasterizerInterface& rasterizer; + + std::recursive_mutex mutex; + + std::unordered_map<u64, std::vector<CachedQuery>> cached_queries; + + std::array<CounterStream, VideoCore::NumQueryTypes> streams; +}; + +template <class QueryCache, class HostCounter> +class HostCounterBase { +public: + explicit HostCounterBase(std::shared_ptr<HostCounter> dependency_) + : dependency{std::move(dependency_)}, depth{dependency ? (dependency->Depth() + 1) : 0} { + // Avoid nesting too many dependencies to avoid a stack overflow when these are deleted. + constexpr u64 depth_threshold = 96; + if (depth > depth_threshold) { + depth = 0; + base_result = dependency->Query(); + dependency = nullptr; + } + } + virtual ~HostCounterBase() = default; + + /// Returns the current value of the query. + u64 Query() { + if (result) { + return *result; + } + + u64 value = BlockingQuery() + base_result; + if (dependency) { + value += dependency->Query(); + dependency = nullptr; + } + + result = value; + return *result; + } + + /// Returns true when flushing this query will potentially wait. + bool WaitPending() const noexcept { + return result.has_value(); + } + + u64 Depth() const noexcept { + return depth; + } + +protected: + /// Returns the value of query from the backend API blocking as needed. + virtual u64 BlockingQuery() const = 0; + +private: + std::shared_ptr<HostCounter> dependency; ///< Counter to add to this value. + std::optional<u64> result; ///< Filled with the already returned value. + u64 depth; ///< Number of nested dependencies. + u64 base_result = 0; ///< Equivalent to nested dependencies value. +}; + +template <class HostCounter> +class CachedQueryBase { +public: + explicit CachedQueryBase(VAddr cpu_addr, u8* host_ptr) + : cpu_addr{cpu_addr}, host_ptr{host_ptr} {} + virtual ~CachedQueryBase() = default; + + CachedQueryBase(CachedQueryBase&&) noexcept = default; + CachedQueryBase(const CachedQueryBase&) = delete; + + CachedQueryBase& operator=(CachedQueryBase&&) noexcept = default; + CachedQueryBase& operator=(const CachedQueryBase&) = delete; + + /// Flushes the query to guest memory. + virtual void Flush() { + // When counter is nullptr it means that it's just been reseted. We are supposed to write a + // zero in these cases. + const u64 value = counter ? counter->Query() : 0; + std::memcpy(host_ptr, &value, sizeof(u64)); + + if (timestamp) { + std::memcpy(host_ptr + TIMESTAMP_OFFSET, &*timestamp, sizeof(u64)); + } + } + + /// Binds a counter to this query. + void BindCounter(std::shared_ptr<HostCounter> counter_, std::optional<u64> timestamp_) { + if (counter) { + // If there's an old counter set it means the query is being rewritten by the game. + // To avoid losing the data forever, flush here. + Flush(); + } + counter = std::move(counter_); + timestamp = timestamp_; + } + + VAddr CpuAddr() const noexcept { + return cpu_addr; + } + + CacheAddr GetCacheAddr() const noexcept { + return ToCacheAddr(host_ptr); + } + + u64 SizeInBytes() const noexcept { + return SizeInBytes(timestamp.has_value()); + } + + static constexpr u64 SizeInBytes(bool with_timestamp) noexcept { + return with_timestamp ? LARGE_QUERY_SIZE : SMALL_QUERY_SIZE; + } + +protected: + /// Returns true when querying the counter may potentially block. + bool WaitPending() const noexcept { + return counter && counter->WaitPending(); + } + +private: + static constexpr std::size_t SMALL_QUERY_SIZE = 8; // Query size without timestamp. + static constexpr std::size_t LARGE_QUERY_SIZE = 16; // Query size with timestamp. + static constexpr std::intptr_t TIMESTAMP_OFFSET = 8; // Timestamp offset in a large query. + + VAddr cpu_addr; ///< Guest CPU address. + u8* host_ptr; ///< Writable host pointer. + std::shared_ptr<HostCounter> counter; ///< Host counter to query, owns the dependency tree. + std::optional<u64> timestamp; ///< Timestamp to flush to guest memory. +}; + +} // namespace VideoCommon diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index c586cd6fe..e9f1436f0 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h @@ -6,6 +6,7 @@ #include <atomic> #include <functional> +#include <optional> #include "common/common_types.h" #include "video_core/engines/fermi_2d.h" #include "video_core/gpu.h" @@ -17,6 +18,11 @@ class MemoryManager; namespace VideoCore { +enum class QueryType { + SamplesPassed, +}; +constexpr std::size_t NumQueryTypes = 1; + enum class LoadCallbackStage { Prepare, Decompile, @@ -41,6 +47,12 @@ public: /// Dispatches a compute shader invocation virtual void DispatchCompute(GPUVAddr code_addr) = 0; + /// Resets the counter of a query + virtual void ResetCounter(QueryType type) = 0; + + /// Records a GPU query and caches it + virtual void Query(GPUVAddr gpu_addr, QueryType type, std::optional<u64> timestamp) = 0; + /// Notify rasterizer that all caches should be flushed to Switch memory virtual void FlushAll() = 0; diff --git a/src/video_core/renderer_opengl/gl_query_cache.cpp b/src/video_core/renderer_opengl/gl_query_cache.cpp new file mode 100644 index 000000000..f12e9f55f --- /dev/null +++ b/src/video_core/renderer_opengl/gl_query_cache.cpp @@ -0,0 +1,120 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <algorithm> +#include <cstring> +#include <memory> +#include <unordered_map> +#include <utility> +#include <vector> + +#include <glad/glad.h> + +#include "common/assert.h" +#include "core/core.h" +#include "video_core/engines/maxwell_3d.h" +#include "video_core/memory_manager.h" +#include "video_core/renderer_opengl/gl_query_cache.h" +#include "video_core/renderer_opengl/gl_rasterizer.h" + +namespace OpenGL { + +namespace { + +constexpr std::array<GLenum, VideoCore::NumQueryTypes> QueryTargets = {GL_SAMPLES_PASSED}; + +constexpr GLenum GetTarget(VideoCore::QueryType type) { + return QueryTargets[static_cast<std::size_t>(type)]; +} + +} // Anonymous namespace + +QueryCache::QueryCache(Core::System& system, RasterizerOpenGL& gl_rasterizer) + : VideoCommon::QueryCacheBase< + QueryCache, CachedQuery, CounterStream, HostCounter, + std::vector<OGLQuery>>{system, + static_cast<VideoCore::RasterizerInterface&>(gl_rasterizer)}, + gl_rasterizer{gl_rasterizer} {} + +QueryCache::~QueryCache() = default; + +OGLQuery QueryCache::AllocateQuery(VideoCore::QueryType type) { + auto& reserve = query_pools[static_cast<std::size_t>(type)]; + OGLQuery query; + if (reserve.empty()) { + query.Create(GetTarget(type)); + return query; + } + + query = std::move(reserve.back()); + reserve.pop_back(); + return query; +} + +void QueryCache::Reserve(VideoCore::QueryType type, OGLQuery&& query) { + query_pools[static_cast<std::size_t>(type)].push_back(std::move(query)); +} + +bool QueryCache::AnyCommandQueued() const noexcept { + return gl_rasterizer.AnyCommandQueued(); +} + +HostCounter::HostCounter(QueryCache& cache, std::shared_ptr<HostCounter> dependency, + VideoCore::QueryType type) + : VideoCommon::HostCounterBase<QueryCache, HostCounter>{std::move(dependency)}, cache{cache}, + type{type}, query{cache.AllocateQuery(type)} { + glBeginQuery(GetTarget(type), query.handle); +} + +HostCounter::~HostCounter() { + cache.Reserve(type, std::move(query)); +} + +void HostCounter::EndQuery() { + if (!cache.AnyCommandQueued()) { + // There are chances a query waited on without commands (glDraw, glClear, glDispatch). Not + // having any of these causes a lock. glFlush is considered a command, so we can safely wait + // for this. Insert to the OpenGL command stream a flush. + glFlush(); + } + glEndQuery(GetTarget(type)); +} + +u64 HostCounter::BlockingQuery() const { + GLint64 value; + glGetQueryObjecti64v(query.handle, GL_QUERY_RESULT, &value); + return static_cast<u64>(value); +} + +CachedQuery::CachedQuery(QueryCache& cache, VideoCore::QueryType type, VAddr cpu_addr, u8* host_ptr) + : VideoCommon::CachedQueryBase<HostCounter>{cpu_addr, host_ptr}, cache{&cache}, type{type} {} + +CachedQuery::CachedQuery(CachedQuery&& rhs) noexcept + : VideoCommon::CachedQueryBase<HostCounter>(std::move(rhs)), cache{rhs.cache}, type{rhs.type} {} + +CachedQuery& CachedQuery::operator=(CachedQuery&& rhs) noexcept { + VideoCommon::CachedQueryBase<HostCounter>::operator=(std::move(rhs)); + cache = rhs.cache; + type = rhs.type; + return *this; +} + +void CachedQuery::Flush() { + // Waiting for a query while another query of the same target is enabled locks Nvidia's driver. + // To avoid this disable and re-enable keeping the dependency stream. + // But we only have to do this if we have pending waits to be done. + auto& stream = cache->Stream(type); + const bool slice_counter = WaitPending() && stream.IsEnabled(); + if (slice_counter) { + stream.Update(false); + } + + VideoCommon::CachedQueryBase<HostCounter>::Flush(); + + if (slice_counter) { + stream.Update(true); + } +} + +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_query_cache.h b/src/video_core/renderer_opengl/gl_query_cache.h new file mode 100644 index 000000000..d8e7052a1 --- /dev/null +++ b/src/video_core/renderer_opengl/gl_query_cache.h @@ -0,0 +1,78 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <array> +#include <memory> +#include <vector> + +#include "common/common_types.h" +#include "video_core/query_cache.h" +#include "video_core/rasterizer_interface.h" +#include "video_core/renderer_opengl/gl_resource_manager.h" + +namespace Core { +class System; +} + +namespace OpenGL { + +class CachedQuery; +class HostCounter; +class QueryCache; +class RasterizerOpenGL; + +using CounterStream = VideoCommon::CounterStreamBase<QueryCache, HostCounter>; + +class QueryCache final : public VideoCommon::QueryCacheBase<QueryCache, CachedQuery, CounterStream, + HostCounter, std::vector<OGLQuery>> { +public: + explicit QueryCache(Core::System& system, RasterizerOpenGL& rasterizer); + ~QueryCache(); + + OGLQuery AllocateQuery(VideoCore::QueryType type); + + void Reserve(VideoCore::QueryType type, OGLQuery&& query); + + bool AnyCommandQueued() const noexcept; + +private: + RasterizerOpenGL& gl_rasterizer; +}; + +class HostCounter final : public VideoCommon::HostCounterBase<QueryCache, HostCounter> { +public: + explicit HostCounter(QueryCache& cache, std::shared_ptr<HostCounter> dependency, + VideoCore::QueryType type); + ~HostCounter(); + + void EndQuery(); + +private: + u64 BlockingQuery() const override; + + QueryCache& cache; + const VideoCore::QueryType type; + OGLQuery query; +}; + +class CachedQuery final : public VideoCommon::CachedQueryBase<HostCounter> { +public: + explicit CachedQuery(QueryCache& cache, VideoCore::QueryType type, VAddr cpu_addr, + u8* host_ptr); + CachedQuery(CachedQuery&& rhs) noexcept; + CachedQuery(const CachedQuery&) = delete; + + CachedQuery& operator=(CachedQuery&& rhs) noexcept; + CachedQuery& operator=(const CachedQuery&) = delete; + + void Flush() override; + +private: + QueryCache* cache; + VideoCore::QueryType type; +}; + +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index b0eb14c8b..4bdc8db85 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -25,6 +25,7 @@ #include "video_core/engines/maxwell_3d.h" #include "video_core/engines/shader_type.h" #include "video_core/memory_manager.h" +#include "video_core/renderer_opengl/gl_query_cache.h" #include "video_core/renderer_opengl/gl_rasterizer.h" #include "video_core/renderer_opengl/gl_shader_cache.h" #include "video_core/renderer_opengl/gl_shader_gen.h" @@ -92,8 +93,8 @@ std::size_t GetConstBufferSize(const Tegra::Engines::ConstBufferInfo& buffer, RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window, ScreenInfo& info) : RasterizerAccelerated{system.Memory()}, texture_cache{system, *this, device}, - shader_cache{*this, system, emu_window, device}, system{system}, screen_info{info}, - buffer_cache{*this, system, device, STREAM_BUFFER_SIZE} { + shader_cache{*this, system, emu_window, device}, query_cache{system, *this}, system{system}, + screen_info{info}, buffer_cache{*this, system, device, STREAM_BUFFER_SIZE} { shader_program_manager = std::make_unique<GLShader::ProgramManager>(); state.draw.shader_program = 0; state.Apply(); @@ -541,11 +542,16 @@ void RasterizerOpenGL::Clear() { } else if (use_stencil) { glClearBufferiv(GL_STENCIL, 0, ®s.clear_stencil); } + + ++num_queued_commands; } void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { MICROPROFILE_SCOPE(OpenGL_Drawing); auto& gpu = system.GPU().Maxwell3D(); + const auto& regs = gpu.regs; + + query_cache.UpdateCounters(); SyncRasterizeEnable(state); SyncColorMask(); @@ -638,6 +644,8 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { glTextureBarrier(); } + ++num_queued_commands; + const GLuint base_instance = static_cast<GLuint>(gpu.regs.vb_base_instance); const GLsizei num_instances = static_cast<GLsizei>(is_instanced ? gpu.mme_draw.instance_count : 1); @@ -707,6 +715,16 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { state.ApplyProgramPipeline(); glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z); + ++num_queued_commands; +} + +void RasterizerOpenGL::ResetCounter(VideoCore::QueryType type) { + query_cache.ResetCounter(type); +} + +void RasterizerOpenGL::Query(GPUVAddr gpu_addr, VideoCore::QueryType type, + std::optional<u64> timestamp) { + query_cache.Query(gpu_addr, type, timestamp); } void RasterizerOpenGL::FlushAll() {} @@ -718,6 +736,7 @@ void RasterizerOpenGL::FlushRegion(CacheAddr addr, u64 size) { } texture_cache.FlushRegion(addr, size); buffer_cache.FlushRegion(addr, size); + query_cache.FlushRegion(addr, size); } void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) { @@ -728,6 +747,7 @@ void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) { texture_cache.InvalidateRegion(addr, size); shader_cache.InvalidateRegion(addr, size); buffer_cache.InvalidateRegion(addr, size); + query_cache.InvalidateRegion(addr, size); } void RasterizerOpenGL::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { @@ -738,10 +758,18 @@ void RasterizerOpenGL::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { } void RasterizerOpenGL::FlushCommands() { + // Only flush when we have commands queued to OpenGL. + if (num_queued_commands == 0) { + return; + } + num_queued_commands = 0; glFlush(); } void RasterizerOpenGL::TickFrame() { + // Ticking a frame means that buffers will be swapped, calling glFlush implicitly. + num_queued_commands = 0; + buffer_cache.TickFrame(); } diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 0501f3828..c772fd4ba 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -24,6 +24,7 @@ #include "video_core/renderer_opengl/gl_buffer_cache.h" #include "video_core/renderer_opengl/gl_device.h" #include "video_core/renderer_opengl/gl_framebuffer_cache.h" +#include "video_core/renderer_opengl/gl_query_cache.h" #include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/gl_sampler_cache.h" #include "video_core/renderer_opengl/gl_shader_cache.h" @@ -61,6 +62,8 @@ public: bool DrawMultiBatch(bool is_indexed) override; void Clear() override; void DispatchCompute(GPUVAddr code_addr) override; + void ResetCounter(VideoCore::QueryType type) override; + void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override; void FlushAll() override; void FlushRegion(CacheAddr addr, u64 size) override; void InvalidateRegion(CacheAddr addr, u64 size) override; @@ -75,6 +78,11 @@ public: void LoadDiskResources(const std::atomic_bool& stop_loading, const VideoCore::DiskResourceLoadCallback& callback) override; + /// Returns true when there are commands queued to the OpenGL server. + bool AnyCommandQueued() const { + return num_queued_commands > 0; + } + private: /// Configures the color and depth framebuffer states. void ConfigureFramebuffers(); @@ -180,10 +188,23 @@ private: /// Syncs the alpha test state to match the guest state void SyncAlphaTest(); - /// Check for extension that are not strictly required - /// but are needed for correct emulation + /// Check for extension that are not strictly required but are needed for correct emulation void CheckExtensions(); + std::size_t CalculateVertexArraysSize() const; + + std::size_t CalculateIndexBufferSize() const; + + /// Updates and returns a vertex array object representing current vertex format + GLuint SetupVertexFormat(); + + void SetupVertexBuffer(GLuint vao); + void SetupVertexInstances(GLuint vao); + + GLintptr SetupIndexBuffer(); + + void SetupShaders(GLenum primitive_mode); + const Device device; OpenGLState state; @@ -191,6 +212,7 @@ private: ShaderCacheOpenGL shader_cache; SamplerCacheOpenGL sampler_cache; FramebufferCacheOpenGL framebuffer_cache; + QueryCache query_cache; Core::System& system; ScreenInfo& screen_info; @@ -208,19 +230,8 @@ private: BindBuffersRangePushBuffer bind_ubo_pushbuffer{GL_UNIFORM_BUFFER}; BindBuffersRangePushBuffer bind_ssbo_pushbuffer{GL_SHADER_STORAGE_BUFFER}; - std::size_t CalculateVertexArraysSize() const; - - std::size_t CalculateIndexBufferSize() const; - - /// Updates and returns a vertex array object representing current vertex format - GLuint SetupVertexFormat(); - - void SetupVertexBuffer(GLuint vao); - void SetupVertexInstances(GLuint vao); - - GLintptr SetupIndexBuffer(); - - void SetupShaders(GLenum primitive_mode); + /// Number of commands queued to the OpenGL driver. Reseted on flush. + std::size_t num_queued_commands = 0; }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_resource_manager.cpp b/src/video_core/renderer_opengl/gl_resource_manager.cpp index 5c96c1d46..f0ddfb276 100644 --- a/src/video_core/renderer_opengl/gl_resource_manager.cpp +++ b/src/video_core/renderer_opengl/gl_resource_manager.cpp @@ -207,4 +207,21 @@ void OGLFramebuffer::Release() { handle = 0; } +void OGLQuery::Create(GLenum target) { + if (handle != 0) + return; + + MICROPROFILE_SCOPE(OpenGL_ResourceCreation); + glCreateQueries(target, 1, &handle); +} + +void OGLQuery::Release() { + if (handle == 0) + return; + + MICROPROFILE_SCOPE(OpenGL_ResourceDeletion); + glDeleteQueries(1, &handle); + handle = 0; +} + } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_resource_manager.h b/src/video_core/renderer_opengl/gl_resource_manager.h index 3a85a1d4c..514d1d165 100644 --- a/src/video_core/renderer_opengl/gl_resource_manager.h +++ b/src/video_core/renderer_opengl/gl_resource_manager.h @@ -266,4 +266,29 @@ public: GLuint handle = 0; }; +class OGLQuery : private NonCopyable { +public: + OGLQuery() = default; + + OGLQuery(OGLQuery&& o) noexcept : handle(std::exchange(o.handle, 0)) {} + + ~OGLQuery() { + Release(); + } + + OGLQuery& operator=(OGLQuery&& o) noexcept { + Release(); + handle = std::exchange(o.handle, 0); + return *this; + } + + /// Creates a new internal OpenGL resource and stores the handle + void Create(GLenum target); + + /// Deletes the internal OpenGL resource + void Release(); + + GLuint handle = 0; +}; + } // namespace OpenGL diff --git a/src/video_core/renderer_vulkan/vk_device.cpp b/src/video_core/renderer_vulkan/vk_device.cpp index 9840f26e5..588a6835f 100644 --- a/src/video_core/renderer_vulkan/vk_device.cpp +++ b/src/video_core/renderer_vulkan/vk_device.cpp @@ -104,6 +104,7 @@ bool VKDevice::Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instan features.depthBiasClamp = true; features.geometryShader = true; features.tessellationShader = true; + features.occlusionQueryPrecise = true; features.fragmentStoresAndAtomics = true; features.shaderImageGatherExtended = true; features.shaderStorageImageWriteWithoutFormat = true; @@ -117,6 +118,10 @@ bool VKDevice::Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instan bit8_storage.uniformAndStorageBuffer8BitAccess = true; SetNext(next, bit8_storage); + vk::PhysicalDeviceHostQueryResetFeaturesEXT host_query_reset; + host_query_reset.hostQueryReset = true; + SetNext(next, host_query_reset); + vk::PhysicalDeviceFloat16Int8FeaturesKHR float16_int8; if (is_float16_supported) { float16_int8.shaderFloat16 = true; @@ -273,6 +278,7 @@ bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDev VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME, VK_EXT_SHADER_SUBGROUP_BALLOT_EXTENSION_NAME, VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME, + VK_EXT_HOST_QUERY_RESET_EXTENSION_NAME, }; std::bitset<required_extensions.size()> available_extensions{}; @@ -340,6 +346,7 @@ bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDev std::make_pair(features.depthBiasClamp, "depthBiasClamp"), std::make_pair(features.geometryShader, "geometryShader"), std::make_pair(features.tessellationShader, "tessellationShader"), + std::make_pair(features.occlusionQueryPrecise, "occlusionQueryPrecise"), std::make_pair(features.fragmentStoresAndAtomics, "fragmentStoresAndAtomics"), std::make_pair(features.shaderImageGatherExtended, "shaderImageGatherExtended"), std::make_pair(features.shaderStorageImageWriteWithoutFormat, @@ -376,7 +383,7 @@ std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynami } }; - extensions.reserve(13); + extensions.reserve(14); extensions.push_back(VK_KHR_SWAPCHAIN_EXTENSION_NAME); extensions.push_back(VK_KHR_16BIT_STORAGE_EXTENSION_NAME); extensions.push_back(VK_KHR_8BIT_STORAGE_EXTENSION_NAME); @@ -384,6 +391,7 @@ std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynami extensions.push_back(VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME); extensions.push_back(VK_EXT_SHADER_SUBGROUP_BALLOT_EXTENSION_NAME); extensions.push_back(VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME); + extensions.push_back(VK_EXT_HOST_QUERY_RESET_EXTENSION_NAME); [[maybe_unused]] const bool nsight = std::getenv("NVTX_INJECTION64_PATH") || std::getenv("NSIGHT_LAUNCHED"); diff --git a/src/video_core/renderer_vulkan/vk_query_cache.cpp b/src/video_core/renderer_vulkan/vk_query_cache.cpp new file mode 100644 index 000000000..ffbf60dda --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_query_cache.cpp @@ -0,0 +1,122 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <algorithm> +#include <cstddef> +#include <cstdint> +#include <utility> +#include <vector> + +#include "video_core/renderer_vulkan/declarations.h" +#include "video_core/renderer_vulkan/vk_device.h" +#include "video_core/renderer_vulkan/vk_query_cache.h" +#include "video_core/renderer_vulkan/vk_resource_manager.h" +#include "video_core/renderer_vulkan/vk_scheduler.h" + +namespace Vulkan { + +namespace { + +constexpr std::array QUERY_TARGETS = {vk::QueryType::eOcclusion}; + +constexpr vk::QueryType GetTarget(VideoCore::QueryType type) { + return QUERY_TARGETS[static_cast<std::size_t>(type)]; +} + +} // Anonymous namespace + +QueryPool::QueryPool() : VKFencedPool{GROW_STEP} {} + +QueryPool::~QueryPool() = default; + +void QueryPool::Initialize(const VKDevice& device_, VideoCore::QueryType type_) { + device = &device_; + type = type_; +} + +std::pair<vk::QueryPool, std::uint32_t> QueryPool::Commit(VKFence& fence) { + std::size_t index; + do { + index = CommitResource(fence); + } while (usage[index]); + usage[index] = true; + + return {*pools[index / GROW_STEP], static_cast<std::uint32_t>(index % GROW_STEP)}; +} + +void QueryPool::Allocate(std::size_t begin, std::size_t end) { + usage.resize(end); + + const auto dev = device->GetLogical(); + const u32 size = static_cast<u32>(end - begin); + const vk::QueryPoolCreateInfo query_pool_ci({}, GetTarget(type), size, {}); + pools.push_back(dev.createQueryPoolUnique(query_pool_ci, nullptr, device->GetDispatchLoader())); +} + +void QueryPool::Reserve(std::pair<vk::QueryPool, std::uint32_t> query) { + const auto it = + std::find_if(std::begin(pools), std::end(pools), + [query_pool = query.first](auto& pool) { return query_pool == *pool; }); + ASSERT(it != std::end(pools)); + + const std::ptrdiff_t pool_index = std::distance(std::begin(pools), it); + usage[pool_index * GROW_STEP + static_cast<std::ptrdiff_t>(query.second)] = false; +} + +VKQueryCache::VKQueryCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer, + const VKDevice& device, VKScheduler& scheduler) + : VideoCommon::QueryCacheBase<VKQueryCache, CachedQuery, CounterStream, HostCounter, + QueryPool>{system, rasterizer}, + device{device}, scheduler{scheduler} { + for (std::size_t i = 0; i < static_cast<std::size_t>(VideoCore::NumQueryTypes); ++i) { + query_pools[i].Initialize(device, static_cast<VideoCore::QueryType>(i)); + } +} + +VKQueryCache::~VKQueryCache() = default; + +std::pair<vk::QueryPool, std::uint32_t> VKQueryCache::AllocateQuery(VideoCore::QueryType type) { + return query_pools[static_cast<std::size_t>(type)].Commit(scheduler.GetFence()); +} + +void VKQueryCache::Reserve(VideoCore::QueryType type, + std::pair<vk::QueryPool, std::uint32_t> query) { + query_pools[static_cast<std::size_t>(type)].Reserve(query); +} + +HostCounter::HostCounter(VKQueryCache& cache, std::shared_ptr<HostCounter> dependency, + VideoCore::QueryType type) + : VideoCommon::HostCounterBase<VKQueryCache, HostCounter>{std::move(dependency)}, cache{cache}, + type{type}, query{cache.AllocateQuery(type)}, ticks{cache.Scheduler().Ticks()} { + const auto dev = cache.Device().GetLogical(); + cache.Scheduler().Record([dev, query = query](vk::CommandBuffer cmdbuf, auto& dld) { + dev.resetQueryPoolEXT(query.first, query.second, 1, dld); + cmdbuf.beginQuery(query.first, query.second, vk::QueryControlFlagBits::ePrecise, dld); + }); +} + +HostCounter::~HostCounter() { + cache.Reserve(type, query); +} + +void HostCounter::EndQuery() { + cache.Scheduler().Record([query = query](auto cmdbuf, auto& dld) { + cmdbuf.endQuery(query.first, query.second, dld); + }); +} + +u64 HostCounter::BlockingQuery() const { + if (ticks >= cache.Scheduler().Ticks()) { + cache.Scheduler().Flush(); + } + + const auto dev = cache.Device().GetLogical(); + const auto& dld = cache.Device().GetDispatchLoader(); + u64 value; + dev.getQueryPoolResults(query.first, query.second, 1, sizeof(value), &value, sizeof(value), + vk::QueryResultFlagBits::e64 | vk::QueryResultFlagBits::eWait, dld); + return value; +} + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_query_cache.h b/src/video_core/renderer_vulkan/vk_query_cache.h new file mode 100644 index 000000000..c3092ee96 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_query_cache.h @@ -0,0 +1,104 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <cstddef> +#include <cstdint> +#include <memory> +#include <utility> +#include <vector> + +#include "common/common_types.h" +#include "video_core/query_cache.h" +#include "video_core/renderer_vulkan/declarations.h" +#include "video_core/renderer_vulkan/vk_resource_manager.h" + +namespace VideoCore { +class RasterizerInterface; +} + +namespace Vulkan { + +class CachedQuery; +class HostCounter; +class VKDevice; +class VKQueryCache; +class VKScheduler; + +using CounterStream = VideoCommon::CounterStreamBase<VKQueryCache, HostCounter>; + +class QueryPool final : public VKFencedPool { +public: + explicit QueryPool(); + ~QueryPool() override; + + void Initialize(const VKDevice& device, VideoCore::QueryType type); + + std::pair<vk::QueryPool, std::uint32_t> Commit(VKFence& fence); + + void Reserve(std::pair<vk::QueryPool, std::uint32_t> query); + +protected: + void Allocate(std::size_t begin, std::size_t end) override; + +private: + static constexpr std::size_t GROW_STEP = 512; + + const VKDevice* device = nullptr; + VideoCore::QueryType type = {}; + + std::vector<UniqueQueryPool> pools; + std::vector<bool> usage; +}; + +class VKQueryCache final + : public VideoCommon::QueryCacheBase<VKQueryCache, CachedQuery, CounterStream, HostCounter, + QueryPool> { +public: + explicit VKQueryCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer, + const VKDevice& device, VKScheduler& scheduler); + ~VKQueryCache(); + + std::pair<vk::QueryPool, std::uint32_t> AllocateQuery(VideoCore::QueryType type); + + void Reserve(VideoCore::QueryType type, std::pair<vk::QueryPool, std::uint32_t> query); + + const VKDevice& Device() const noexcept { + return device; + } + + VKScheduler& Scheduler() const noexcept { + return scheduler; + } + +private: + const VKDevice& device; + VKScheduler& scheduler; +}; + +class HostCounter final : public VideoCommon::HostCounterBase<VKQueryCache, HostCounter> { +public: + explicit HostCounter(VKQueryCache& cache, std::shared_ptr<HostCounter> dependency, + VideoCore::QueryType type); + ~HostCounter(); + + void EndQuery(); + +private: + u64 BlockingQuery() const override; + + VKQueryCache& cache; + const VideoCore::QueryType type; + const std::pair<vk::QueryPool, std::uint32_t> query; + const u64 ticks; +}; + +class CachedQuery : public VideoCommon::CachedQueryBase<HostCounter> { +public: + explicit CachedQuery(VKQueryCache&, VideoCore::QueryType, VAddr cpu_addr, u8* host_ptr) + : VideoCommon::CachedQueryBase<HostCounter>{cpu_addr, host_ptr} {} +}; + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index aada38702..79aa121ed 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -289,7 +289,9 @@ RasterizerVulkan::RasterizerVulkan(Core::System& system, Core::Frontend::EmuWind staging_pool), pipeline_cache(system, *this, device, scheduler, descriptor_pool, update_descriptor_queue), buffer_cache(*this, system, device, memory_manager, scheduler, staging_pool), - sampler_cache(device) {} + sampler_cache(device), query_cache(system, *this, device, scheduler) { + scheduler.SetQueryCache(query_cache); +} RasterizerVulkan::~RasterizerVulkan() = default; @@ -308,6 +310,8 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { FlushWork(); + query_cache.UpdateCounters(); + const auto& gpu = system.GPU().Maxwell3D(); GraphicsPipelineCacheKey key{GetFixedPipelineState(gpu.regs)}; @@ -362,6 +366,8 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { void RasterizerVulkan::Clear() { MICROPROFILE_SCOPE(Vulkan_Clearing); + query_cache.UpdateCounters(); + const auto& gpu = system.GPU().Maxwell3D(); if (!system.GPU().Maxwell3D().ShouldExecute()) { return; @@ -429,6 +435,8 @@ void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) { sampled_views.clear(); image_views.clear(); + query_cache.UpdateCounters(); + const auto& launch_desc = system.GPU().KeplerCompute().launch_description; const ComputePipelineCacheKey key{ code_addr, @@ -471,17 +479,28 @@ void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) { }); } +void RasterizerVulkan::ResetCounter(VideoCore::QueryType type) { + query_cache.ResetCounter(type); +} + +void RasterizerVulkan::Query(GPUVAddr gpu_addr, VideoCore::QueryType type, + std::optional<u64> timestamp) { + query_cache.Query(gpu_addr, type, timestamp); +} + void RasterizerVulkan::FlushAll() {} void RasterizerVulkan::FlushRegion(CacheAddr addr, u64 size) { texture_cache.FlushRegion(addr, size); buffer_cache.FlushRegion(addr, size); + query_cache.FlushRegion(addr, size); } void RasterizerVulkan::InvalidateRegion(CacheAddr addr, u64 size) { texture_cache.InvalidateRegion(addr, size); pipeline_cache.InvalidateRegion(addr, size); buffer_cache.InvalidateRegion(addr, size); + query_cache.InvalidateRegion(addr, size); } void RasterizerVulkan::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 7be71e734..add1ad88c 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -24,6 +24,7 @@ #include "video_core/renderer_vulkan/vk_descriptor_pool.h" #include "video_core/renderer_vulkan/vk_memory_manager.h" #include "video_core/renderer_vulkan/vk_pipeline_cache.h" +#include "video_core/renderer_vulkan/vk_query_cache.h" #include "video_core/renderer_vulkan/vk_renderpass_cache.h" #include "video_core/renderer_vulkan/vk_resource_manager.h" #include "video_core/renderer_vulkan/vk_sampler_cache.h" @@ -96,7 +97,7 @@ struct ImageView { vk::ImageLayout* layout = nullptr; }; -class RasterizerVulkan : public VideoCore::RasterizerAccelerated { +class RasterizerVulkan final : public VideoCore::RasterizerAccelerated { public: explicit RasterizerVulkan(Core::System& system, Core::Frontend::EmuWindow& render_window, VKScreenInfo& screen_info, const VKDevice& device, @@ -108,6 +109,8 @@ public: bool DrawMultiBatch(bool is_indexed) override; void Clear() override; void DispatchCompute(GPUVAddr code_addr) override; + void ResetCounter(VideoCore::QueryType type) override; + void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override; void FlushAll() override; void FlushRegion(CacheAddr addr, u64 size) override; void InvalidateRegion(CacheAddr addr, u64 size) override; @@ -247,6 +250,7 @@ private: VKPipelineCache pipeline_cache; VKBufferCache buffer_cache; VKSamplerCache sampler_cache; + VKQueryCache query_cache; std::array<View, Maxwell::NumRenderTargets> color_attachments; View zeta_attachment; diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp index d66133ad1..92bd6c344 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp @@ -6,6 +6,7 @@ #include "common/microprofile.h" #include "video_core/renderer_vulkan/declarations.h" #include "video_core/renderer_vulkan/vk_device.h" +#include "video_core/renderer_vulkan/vk_query_cache.h" #include "video_core/renderer_vulkan/vk_resource_manager.h" #include "video_core/renderer_vulkan/vk_scheduler.h" @@ -139,6 +140,8 @@ void VKScheduler::SubmitExecution(vk::Semaphore semaphore) { } void VKScheduler::AllocateNewContext() { + ++ticks; + std::unique_lock lock{mutex}; current_fence = next_fence; next_fence = &resource_manager.CommitFence(); @@ -146,6 +149,10 @@ void VKScheduler::AllocateNewContext() { current_cmdbuf = resource_manager.CommitCommandBuffer(*current_fence); current_cmdbuf.begin({vk::CommandBufferUsageFlagBits::eOneTimeSubmit}, device.GetDispatchLoader()); + // Enable counters once again. These are disabled when a command buffer is finished. + if (query_cache) { + query_cache->UpdateCounters(); + } } void VKScheduler::InvalidateState() { @@ -159,6 +166,7 @@ void VKScheduler::InvalidateState() { } void VKScheduler::EndPendingOperations() { + query_cache->DisableStreams(); EndRenderPass(); } diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h index bcdffbba0..62fd7858b 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.h +++ b/src/video_core/renderer_vulkan/vk_scheduler.h @@ -4,6 +4,7 @@ #pragma once +#include <atomic> #include <condition_variable> #include <memory> #include <optional> @@ -18,6 +19,7 @@ namespace Vulkan { class VKDevice; class VKFence; +class VKQueryCache; class VKResourceManager; class VKFenceView { @@ -67,6 +69,11 @@ public: /// Binds a pipeline to the current execution context. void BindGraphicsPipeline(vk::Pipeline pipeline); + /// Assigns the query cache. + void SetQueryCache(VKQueryCache& query_cache_) { + query_cache = &query_cache_; + } + /// Returns true when viewports have been set in the current command buffer. bool TouchViewports() { return std::exchange(state.viewports, true); @@ -112,6 +119,11 @@ public: return current_fence; } + /// Returns the current command buffer tick. + u64 Ticks() const { + return ticks; + } + private: class Command { public: @@ -205,6 +217,8 @@ private: const VKDevice& device; VKResourceManager& resource_manager; + VKQueryCache* query_cache = nullptr; + vk::CommandBuffer current_cmdbuf; VKFence* current_fence = nullptr; VKFence* next_fence = nullptr; @@ -227,6 +241,7 @@ private: Common::SPSCQueue<std::unique_ptr<CommandChunk>> chunk_reserve; std::mutex mutex; std::condition_variable cv; + std::atomic<u64> ticks = 0; bool quit = false; }; diff --git a/src/video_core/shader/decode/arithmetic_integer.cpp b/src/video_core/shader/decode/arithmetic_integer.cpp index e60875cc4..21366869d 100644 --- a/src/video_core/shader/decode/arithmetic_integer.cpp +++ b/src/video_core/shader/decode/arithmetic_integer.cpp @@ -166,13 +166,13 @@ u32 ShaderIR::DecodeArithmeticInteger(NodeBlock& bb, u32 pc) { const auto [op_rhs, test] = [&]() -> std::pair<Node, Node> { switch (opcode->get().GetId()) { case OpCode::Id::ICMP_CR: - return {GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset), + return {GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()), GetRegister(instr.gpr39)}; case OpCode::Id::ICMP_R: return {GetRegister(instr.gpr20), GetRegister(instr.gpr39)}; case OpCode::Id::ICMP_RC: return {GetRegister(instr.gpr39), - GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset)}; + GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())}; case OpCode::Id::ICMP_IMM: return {Immediate(instr.alu.GetSignedImm20_20()), GetRegister(instr.gpr39)}; default: diff --git a/src/video_core/shader/decode/bfi.cpp b/src/video_core/shader/decode/bfi.cpp index f992bbe2a..70d1c055b 100644 --- a/src/video_core/shader/decode/bfi.cpp +++ b/src/video_core/shader/decode/bfi.cpp @@ -21,7 +21,7 @@ u32 ShaderIR::DecodeBfi(NodeBlock& bb, u32 pc) { switch (opcode->get().GetId()) { case OpCode::Id::BFI_RC: return {GetRegister(instr.gpr39), - GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset)}; + GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())}; case OpCode::Id::BFI_IMM_R: return {Immediate(instr.alu.GetSignedImm20_20()), GetRegister(instr.gpr39)}; default: diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp index cd94693c1..6209fff75 100644 --- a/src/yuzu/configuration/config.cpp +++ b/src/yuzu/configuration/config.cpp @@ -630,6 +630,7 @@ void Config::ReadRendererValues() { Settings::values.vulkan_device = ReadSetting(QStringLiteral("vulkan_device"), 0).toInt(); Settings::values.resolution_factor = ReadSetting(QStringLiteral("resolution_factor"), 1.0).toFloat(); + Settings::values.aspect_ratio = ReadSetting(QStringLiteral("aspect_ratio"), 0).toInt(); Settings::values.use_frame_limit = ReadSetting(QStringLiteral("use_frame_limit"), true).toBool(); Settings::values.frame_limit = ReadSetting(QStringLiteral("frame_limit"), 100).toInt(); @@ -1064,6 +1065,7 @@ void Config::SaveRendererValues() { WriteSetting(QStringLiteral("vulkan_device"), Settings::values.vulkan_device, 0); WriteSetting(QStringLiteral("resolution_factor"), static_cast<double>(Settings::values.resolution_factor), 1.0); + WriteSetting(QStringLiteral("aspect_ratio"), Settings::values.aspect_ratio, 0); WriteSetting(QStringLiteral("use_frame_limit"), Settings::values.use_frame_limit, true); WriteSetting(QStringLiteral("frame_limit"), Settings::values.frame_limit, 100); WriteSetting(QStringLiteral("use_disk_shader_cache"), Settings::values.use_disk_shader_cache, diff --git a/src/yuzu/configuration/configure_graphics.cpp b/src/yuzu/configuration/configure_graphics.cpp index f57a24e36..ea899c080 100644 --- a/src/yuzu/configuration/configure_graphics.cpp +++ b/src/yuzu/configuration/configure_graphics.cpp @@ -97,6 +97,7 @@ void ConfigureGraphics::SetConfiguration() { ui->api->setCurrentIndex(static_cast<int>(Settings::values.renderer_backend)); ui->resolution_factor_combobox->setCurrentIndex( static_cast<int>(FromResolutionFactor(Settings::values.resolution_factor))); + ui->aspect_ratio_combobox->setCurrentIndex(Settings::values.aspect_ratio); ui->use_disk_shader_cache->setEnabled(runtime_lock); ui->use_disk_shader_cache->setChecked(Settings::values.use_disk_shader_cache); ui->use_accurate_gpu_emulation->setChecked(Settings::values.use_accurate_gpu_emulation); @@ -114,6 +115,7 @@ void ConfigureGraphics::ApplyConfiguration() { Settings::values.vulkan_device = vulkan_device; Settings::values.resolution_factor = ToResolutionFactor(static_cast<Resolution>(ui->resolution_factor_combobox->currentIndex())); + Settings::values.aspect_ratio = ui->aspect_ratio_combobox->currentIndex(); Settings::values.use_disk_shader_cache = ui->use_disk_shader_cache->isChecked(); Settings::values.use_accurate_gpu_emulation = ui->use_accurate_gpu_emulation->isChecked(); Settings::values.use_asynchronous_gpu_emulation = diff --git a/src/yuzu/configuration/configure_graphics.ui b/src/yuzu/configuration/configure_graphics.ui index e24372204..db60426ab 100644 --- a/src/yuzu/configuration/configure_graphics.ui +++ b/src/yuzu/configuration/configure_graphics.ui @@ -139,6 +139,41 @@ </layout> </item> <item> + <layout class="QHBoxLayout" name="horizontalLayout_6"> + <item> + <widget class="QLabel" name="ar_label"> + <property name="text"> + <string>Aspect Ratio:</string> + </property> + </widget> + </item> + <item> + <widget class="QComboBox" name="aspect_ratio_combobox"> + <item> + <property name="text"> + <string>Default (16:9)</string> + </property> + </item> + <item> + <property name="text"> + <string>Force 4:3</string> + </property> + </item> + <item> + <property name="text"> + <string>Force 21:9</string> + </property> + </item> + <item> + <property name="text"> + <string>Stretch to Window</string> + </property> + </item> + </widget> + </item> + </layout> + </item> + <item> <layout class="QHBoxLayout" name="horizontalLayout_3"> <item> <widget class="QLabel" name="bg_label"> diff --git a/src/yuzu/debugger/wait_tree.cpp b/src/yuzu/debugger/wait_tree.cpp index 727bd8a94..3f1a94627 100644 --- a/src/yuzu/debugger/wait_tree.cpp +++ b/src/yuzu/debugger/wait_tree.cpp @@ -12,8 +12,8 @@ #include "core/hle/kernel/process.h" #include "core/hle/kernel/readable_event.h" #include "core/hle/kernel/scheduler.h" +#include "core/hle/kernel/synchronization_object.h" #include "core/hle/kernel/thread.h" -#include "core/hle/kernel/wait_object.h" #include "core/memory.h" WaitTreeItem::WaitTreeItem() = default; @@ -133,8 +133,9 @@ std::vector<std::unique_ptr<WaitTreeItem>> WaitTreeCallstack::GetChildren() cons return list; } -WaitTreeWaitObject::WaitTreeWaitObject(const Kernel::WaitObject& o) : object(o) {} -WaitTreeWaitObject::~WaitTreeWaitObject() = default; +WaitTreeSynchronizationObject::WaitTreeSynchronizationObject(const Kernel::SynchronizationObject& o) + : object(o) {} +WaitTreeSynchronizationObject::~WaitTreeSynchronizationObject() = default; WaitTreeExpandableItem::WaitTreeExpandableItem() = default; WaitTreeExpandableItem::~WaitTreeExpandableItem() = default; @@ -143,25 +144,26 @@ bool WaitTreeExpandableItem::IsExpandable() const { return true; } -QString WaitTreeWaitObject::GetText() const { +QString WaitTreeSynchronizationObject::GetText() const { return tr("[%1]%2 %3") .arg(object.GetObjectId()) .arg(QString::fromStdString(object.GetTypeName()), QString::fromStdString(object.GetName())); } -std::unique_ptr<WaitTreeWaitObject> WaitTreeWaitObject::make(const Kernel::WaitObject& object) { +std::unique_ptr<WaitTreeSynchronizationObject> WaitTreeSynchronizationObject::make( + const Kernel::SynchronizationObject& object) { switch (object.GetHandleType()) { case Kernel::HandleType::ReadableEvent: return std::make_unique<WaitTreeEvent>(static_cast<const Kernel::ReadableEvent&>(object)); case Kernel::HandleType::Thread: return std::make_unique<WaitTreeThread>(static_cast<const Kernel::Thread&>(object)); default: - return std::make_unique<WaitTreeWaitObject>(object); + return std::make_unique<WaitTreeSynchronizationObject>(object); } } -std::vector<std::unique_ptr<WaitTreeItem>> WaitTreeWaitObject::GetChildren() const { +std::vector<std::unique_ptr<WaitTreeItem>> WaitTreeSynchronizationObject::GetChildren() const { std::vector<std::unique_ptr<WaitTreeItem>> list; const auto& threads = object.GetWaitingThreads(); @@ -173,8 +175,8 @@ std::vector<std::unique_ptr<WaitTreeItem>> WaitTreeWaitObject::GetChildren() con return list; } -WaitTreeObjectList::WaitTreeObjectList(const std::vector<std::shared_ptr<Kernel::WaitObject>>& list, - bool w_all) +WaitTreeObjectList::WaitTreeObjectList( + const std::vector<std::shared_ptr<Kernel::SynchronizationObject>>& list, bool w_all) : object_list(list), wait_all(w_all) {} WaitTreeObjectList::~WaitTreeObjectList() = default; @@ -188,11 +190,12 @@ QString WaitTreeObjectList::GetText() const { std::vector<std::unique_ptr<WaitTreeItem>> WaitTreeObjectList::GetChildren() const { std::vector<std::unique_ptr<WaitTreeItem>> list(object_list.size()); std::transform(object_list.begin(), object_list.end(), list.begin(), - [](const auto& t) { return WaitTreeWaitObject::make(*t); }); + [](const auto& t) { return WaitTreeSynchronizationObject::make(*t); }); return list; } -WaitTreeThread::WaitTreeThread(const Kernel::Thread& thread) : WaitTreeWaitObject(thread) {} +WaitTreeThread::WaitTreeThread(const Kernel::Thread& thread) + : WaitTreeSynchronizationObject(thread) {} WaitTreeThread::~WaitTreeThread() = default; QString WaitTreeThread::GetText() const { @@ -241,7 +244,8 @@ QString WaitTreeThread::GetText() const { const QString pc_info = tr(" PC = 0x%1 LR = 0x%2") .arg(context.pc, 8, 16, QLatin1Char{'0'}) .arg(context.cpu_registers[30], 8, 16, QLatin1Char{'0'}); - return QStringLiteral("%1%2 (%3) ").arg(WaitTreeWaitObject::GetText(), pc_info, status); + return QStringLiteral("%1%2 (%3) ") + .arg(WaitTreeSynchronizationObject::GetText(), pc_info, status); } QColor WaitTreeThread::GetColor() const { @@ -273,7 +277,7 @@ QColor WaitTreeThread::GetColor() const { } std::vector<std::unique_ptr<WaitTreeItem>> WaitTreeThread::GetChildren() const { - std::vector<std::unique_ptr<WaitTreeItem>> list(WaitTreeWaitObject::GetChildren()); + std::vector<std::unique_ptr<WaitTreeItem>> list(WaitTreeSynchronizationObject::GetChildren()); const auto& thread = static_cast<const Kernel::Thread&>(object); @@ -314,7 +318,7 @@ std::vector<std::unique_ptr<WaitTreeItem>> WaitTreeThread::GetChildren() const { } if (thread.GetStatus() == Kernel::ThreadStatus::WaitSynch) { - list.push_back(std::make_unique<WaitTreeObjectList>(thread.GetWaitObjects(), + list.push_back(std::make_unique<WaitTreeObjectList>(thread.GetSynchronizationObjects(), thread.IsSleepingOnWait())); } @@ -323,7 +327,8 @@ std::vector<std::unique_ptr<WaitTreeItem>> WaitTreeThread::GetChildren() const { return list; } -WaitTreeEvent::WaitTreeEvent(const Kernel::ReadableEvent& object) : WaitTreeWaitObject(object) {} +WaitTreeEvent::WaitTreeEvent(const Kernel::ReadableEvent& object) + : WaitTreeSynchronizationObject(object) {} WaitTreeEvent::~WaitTreeEvent() = default; WaitTreeThreadList::WaitTreeThreadList(const std::vector<std::shared_ptr<Kernel::Thread>>& list) diff --git a/src/yuzu/debugger/wait_tree.h b/src/yuzu/debugger/wait_tree.h index 631274a5f..8e3bc4b24 100644 --- a/src/yuzu/debugger/wait_tree.h +++ b/src/yuzu/debugger/wait_tree.h @@ -19,7 +19,7 @@ class EmuThread; namespace Kernel { class HandleTable; class ReadableEvent; -class WaitObject; +class SynchronizationObject; class Thread; } // namespace Kernel @@ -99,35 +99,37 @@ private: const Kernel::Thread& thread; }; -class WaitTreeWaitObject : public WaitTreeExpandableItem { +class WaitTreeSynchronizationObject : public WaitTreeExpandableItem { Q_OBJECT public: - explicit WaitTreeWaitObject(const Kernel::WaitObject& object); - ~WaitTreeWaitObject() override; + explicit WaitTreeSynchronizationObject(const Kernel::SynchronizationObject& object); + ~WaitTreeSynchronizationObject() override; - static std::unique_ptr<WaitTreeWaitObject> make(const Kernel::WaitObject& object); + static std::unique_ptr<WaitTreeSynchronizationObject> make( + const Kernel::SynchronizationObject& object); QString GetText() const override; std::vector<std::unique_ptr<WaitTreeItem>> GetChildren() const override; protected: - const Kernel::WaitObject& object; + const Kernel::SynchronizationObject& object; }; class WaitTreeObjectList : public WaitTreeExpandableItem { Q_OBJECT public: - WaitTreeObjectList(const std::vector<std::shared_ptr<Kernel::WaitObject>>& list, bool wait_all); + WaitTreeObjectList(const std::vector<std::shared_ptr<Kernel::SynchronizationObject>>& list, + bool wait_all); ~WaitTreeObjectList() override; QString GetText() const override; std::vector<std::unique_ptr<WaitTreeItem>> GetChildren() const override; private: - const std::vector<std::shared_ptr<Kernel::WaitObject>>& object_list; + const std::vector<std::shared_ptr<Kernel::SynchronizationObject>>& object_list; bool wait_all; }; -class WaitTreeThread : public WaitTreeWaitObject { +class WaitTreeThread : public WaitTreeSynchronizationObject { Q_OBJECT public: explicit WaitTreeThread(const Kernel::Thread& thread); @@ -138,7 +140,7 @@ public: std::vector<std::unique_ptr<WaitTreeItem>> GetChildren() const override; }; -class WaitTreeEvent : public WaitTreeWaitObject { +class WaitTreeEvent : public WaitTreeSynchronizationObject { Q_OBJECT public: explicit WaitTreeEvent(const Kernel::ReadableEvent& object); diff --git a/src/yuzu_cmd/config.cpp b/src/yuzu_cmd/config.cpp index b01a36023..96f1ce3af 100644 --- a/src/yuzu_cmd/config.cpp +++ b/src/yuzu_cmd/config.cpp @@ -379,6 +379,8 @@ void Config::ReadValues() { Settings::values.resolution_factor = static_cast<float>(sdl2_config->GetReal("Renderer", "resolution_factor", 1.0)); + Settings::values.aspect_ratio = + static_cast<int>(sdl2_config->GetInteger("Renderer", "aspect_ratio", 0)); Settings::values.use_frame_limit = sdl2_config->GetBoolean("Renderer", "use_frame_limit", true); Settings::values.frame_limit = static_cast<u16>(sdl2_config->GetInteger("Renderer", "frame_limit", 100)); diff --git a/src/yuzu_cmd/default_ini.h b/src/yuzu_cmd/default_ini.h index 00fd88279..8a2b658cd 100644 --- a/src/yuzu_cmd/default_ini.h +++ b/src/yuzu_cmd/default_ini.h @@ -122,6 +122,10 @@ use_shader_jit = # factor for the Switch resolution resolution_factor = +# Aspect ratio +# 0: Default (16:9), 1: Force 4:3, 2: Force 21:9, 3: Stretch to Window +aspect_ratio = + # Whether to enable V-Sync (caps the framerate at 60FPS) or not. # 0 (default): Off, 1: On use_vsync = diff --git a/src/yuzu_tester/config.cpp b/src/yuzu_tester/config.cpp index 84ab4d687..0ac93b62a 100644 --- a/src/yuzu_tester/config.cpp +++ b/src/yuzu_tester/config.cpp @@ -118,6 +118,8 @@ void Config::ReadValues() { // Renderer Settings::values.resolution_factor = static_cast<float>(sdl2_config->GetReal("Renderer", "resolution_factor", 1.0)); + Settings::values.aspect_ratio = + static_cast<int>(sdl2_config->GetInteger("Renderer", "aspect_ratio", 0)); Settings::values.use_frame_limit = false; Settings::values.frame_limit = 100; Settings::values.use_disk_shader_cache = diff --git a/src/yuzu_tester/default_ini.h b/src/yuzu_tester/default_ini.h index 9a3e86d68..8d93f7b88 100644 --- a/src/yuzu_tester/default_ini.h +++ b/src/yuzu_tester/default_ini.h @@ -26,6 +26,10 @@ use_shader_jit = # factor for the Switch resolution resolution_factor = +# Aspect ratio +# 0: Default (16:9), 1: Force 4:3, 2: Force 21:9, 3: Stretch to Window +aspect_ratio = + # Whether to enable V-Sync (caps the framerate at 60FPS) or not. # 0 (default): Off, 1: On use_vsync = |