diff options
Diffstat (limited to 'src')
68 files changed, 971 insertions, 362 deletions
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index 57922b51c..b18a2a2f5 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt @@ -53,6 +53,8 @@ add_library(common STATIC div_ceil.h dynamic_library.cpp dynamic_library.h + error.cpp + error.h fiber.cpp fiber.h fs/file.cpp @@ -88,7 +90,6 @@ add_library(common STATIC microprofile.cpp microprofile.h microprofileui.h - misc.cpp nvidia_flags.cpp nvidia_flags.h page_table.cpp diff --git a/src/common/common_funcs.h b/src/common/common_funcs.h index 53bd7da60..4c1e29de6 100644 --- a/src/common/common_funcs.h +++ b/src/common/common_funcs.h @@ -4,9 +4,8 @@ #pragma once -#include <algorithm> #include <array> -#include <string> +#include <iterator> #if !defined(ARCHITECTURE_x86_64) #include <cstdlib> // for exit @@ -49,16 +48,6 @@ __declspec(dllimport) void __stdcall DebugBreak(void); #endif // _MSC_VER ndef -// Generic function to get last error message. -// Call directly after the command or use the error num. -// This function might change the error code. -// Defined in misc.cpp. -[[nodiscard]] std::string GetLastErrorMsg(); - -// Like GetLastErrorMsg(), but passing an explicit error code. -// Defined in misc.cpp. -[[nodiscard]] std::string NativeErrorToString(int e); - #define DECLARE_ENUM_FLAG_OPERATORS(type) \ [[nodiscard]] constexpr type operator|(type a, type b) noexcept { \ using T = std::underlying_type_t<type>; \ @@ -72,6 +61,14 @@ __declspec(dllimport) void __stdcall DebugBreak(void); using T = std::underlying_type_t<type>; \ return static_cast<type>(static_cast<T>(a) ^ static_cast<T>(b)); \ } \ + [[nodiscard]] constexpr type operator<<(type a, type b) noexcept { \ + using T = std::underlying_type_t<type>; \ + return static_cast<type>(static_cast<T>(a) << static_cast<T>(b)); \ + } \ + [[nodiscard]] constexpr type operator>>(type a, type b) noexcept { \ + using T = std::underlying_type_t<type>; \ + return static_cast<type>(static_cast<T>(a) >> static_cast<T>(b)); \ + } \ constexpr type& operator|=(type& a, type b) noexcept { \ a = a | b; \ return a; \ @@ -84,6 +81,14 @@ __declspec(dllimport) void __stdcall DebugBreak(void); a = a ^ b; \ return a; \ } \ + constexpr type& operator<<=(type& a, type b) noexcept { \ + a = a << b; \ + return a; \ + } \ + constexpr type& operator>>=(type& a, type b) noexcept { \ + a = a >> b; \ + return a; \ + } \ [[nodiscard]] constexpr type operator~(type key) noexcept { \ using T = std::underlying_type_t<type>; \ return static_cast<type>(~static_cast<T>(key)); \ diff --git a/src/common/misc.cpp b/src/common/error.cpp index 495385b9e..d4455e310 100644 --- a/src/common/misc.cpp +++ b/src/common/error.cpp @@ -10,7 +10,9 @@ #include <cstring> #endif -#include "common/common_funcs.h" +#include "common/error.h" + +namespace Common { std::string NativeErrorToString(int e) { #ifdef _WIN32 @@ -50,3 +52,5 @@ std::string GetLastErrorMsg() { return NativeErrorToString(errno); #endif } + +} // namespace Common diff --git a/src/common/error.h b/src/common/error.h new file mode 100644 index 000000000..e084d4b0f --- /dev/null +++ b/src/common/error.h @@ -0,0 +1,21 @@ +// Copyright 2013 Dolphin Emulator Project / 2014 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <string> + +namespace Common { + +// Generic function to get last error message. +// Call directly after the command or use the error num. +// This function might change the error code. +// Defined in error.cpp. +[[nodiscard]] std::string GetLastErrorMsg(); + +// Like GetLastErrorMsg(), but passing an explicit error code. +// Defined in error.cpp. +[[nodiscard]] std::string NativeErrorToString(int e); + +} // namespace Common diff --git a/src/common/settings.cpp b/src/common/settings.cpp index fd3b639cd..0d2df80a8 100644 --- a/src/common/settings.cpp +++ b/src/common/settings.cpp @@ -54,7 +54,7 @@ void LogSettings() { log_setting("Renderer_GPUAccuracyLevel", values.gpu_accuracy.GetValue()); log_setting("Renderer_UseAsynchronousGpuEmulation", values.use_asynchronous_gpu_emulation.GetValue()); - log_setting("Renderer_UseNvdecEmulation", values.use_nvdec_emulation.GetValue()); + log_setting("Renderer_NvdecEmulation", values.nvdec_emulation.GetValue()); log_setting("Renderer_AccelerateASTC", values.accelerate_astc.GetValue()); log_setting("Renderer_UseVsync", values.use_vsync.GetValue()); log_setting("Renderer_ShaderBackend", values.shader_backend.GetValue()); @@ -136,7 +136,7 @@ void RestoreGlobalState(bool is_powered_on) { values.use_disk_shader_cache.SetGlobal(true); values.gpu_accuracy.SetGlobal(true); values.use_asynchronous_gpu_emulation.SetGlobal(true); - values.use_nvdec_emulation.SetGlobal(true); + values.nvdec_emulation.SetGlobal(true); values.accelerate_astc.SetGlobal(true); values.use_vsync.SetGlobal(true); values.shader_backend.SetGlobal(true); diff --git a/src/common/settings.h b/src/common/settings.h index ec4d381e8..b7195670b 100644 --- a/src/common/settings.h +++ b/src/common/settings.h @@ -48,6 +48,12 @@ enum class FullscreenMode : u32 { Exclusive = 1, }; +enum class NvdecEmulation : u32 { + Off = 0, + CPU = 1, + GPU = 2, +}; + /** The BasicSetting class is a simple resource manager. It defines a label and default value * alongside the actual value of the setting for simpler and less-error prone use with frontend * configurations. Setting a default value and label is required, though subclasses may deviate from @@ -466,7 +472,7 @@ struct Values { RangedSetting<GPUAccuracy> gpu_accuracy{GPUAccuracy::High, GPUAccuracy::Normal, GPUAccuracy::Extreme, "gpu_accuracy"}; Setting<bool> use_asynchronous_gpu_emulation{true, "use_asynchronous_gpu_emulation"}; - Setting<bool> use_nvdec_emulation{true, "use_nvdec_emulation"}; + Setting<NvdecEmulation> nvdec_emulation{NvdecEmulation::GPU, "nvdec_emulation"}; Setting<bool> accelerate_astc{true, "accelerate_astc"}; Setting<bool> use_vsync{true, "use_vsync"}; BasicRangedSetting<u16> fps_cap{1000, 1, 1000, "fps_cap"}; diff --git a/src/common/thread.cpp b/src/common/thread.cpp index d2c1ac60d..946a1114d 100644 --- a/src/common/thread.cpp +++ b/src/common/thread.cpp @@ -2,7 +2,9 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include "common/common_funcs.h" +#include <string> + +#include "common/error.h" #include "common/logging/log.h" #include "common/thread.h" #ifdef __APPLE__ @@ -21,8 +23,6 @@ #include <unistd.h> #endif -#include <string> - #ifdef __FreeBSD__ #define cpu_set_t cpuset_t #endif diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index 87d47e2e5..7140d0db8 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -263,6 +263,8 @@ add_library(core STATIC hle/service/acc/acc_u0.h hle/service/acc/acc_u1.cpp hle/service/acc/acc_u1.h + hle/service/acc/async_context.cpp + hle/service/acc/async_context.h hle/service/acc/errors.h hle/service/acc/profile_manager.cpp hle/service/acc/profile_manager.h diff --git a/src/core/cpu_manager.cpp b/src/core/cpu_manager.cpp index 7e195346b..77efcabf0 100644 --- a/src/core/cpu_manager.cpp +++ b/src/core/cpu_manager.cpp @@ -21,34 +21,25 @@ namespace Core { CpuManager::CpuManager(System& system_) : system{system_} {} CpuManager::~CpuManager() = default; -void CpuManager::ThreadStart(CpuManager& cpu_manager, std::size_t core) { - cpu_manager.RunThread(core); +void CpuManager::ThreadStart(std::stop_token stop_token, CpuManager& cpu_manager, + std::size_t core) { + cpu_manager.RunThread(stop_token, core); } void CpuManager::Initialize() { running_mode = true; if (is_multicore) { for (std::size_t core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) { - core_data[core].host_thread = - std::make_unique<std::thread>(ThreadStart, std::ref(*this), core); + core_data[core].host_thread = std::jthread(ThreadStart, std::ref(*this), core); } } else { - core_data[0].host_thread = std::make_unique<std::thread>(ThreadStart, std::ref(*this), 0); + core_data[0].host_thread = std::jthread(ThreadStart, std::ref(*this), 0); } } void CpuManager::Shutdown() { running_mode = false; Pause(false); - if (is_multicore) { - for (auto& data : core_data) { - data.host_thread->join(); - data.host_thread.reset(); - } - } else { - core_data[0].host_thread->join(); - core_data[0].host_thread.reset(); - } } std::function<void(void*)> CpuManager::GetGuestThreadStartFunc() { @@ -317,7 +308,7 @@ void CpuManager::Pause(bool paused) { } } -void CpuManager::RunThread(std::size_t core) { +void CpuManager::RunThread(std::stop_token stop_token, std::size_t core) { /// Initialization system.RegisterCoreThread(core); std::string name; @@ -361,6 +352,10 @@ void CpuManager::RunThread(std::size_t core) { return; } + if (stop_token.stop_requested()) { + break; + } + auto current_thread = system.Kernel().CurrentScheduler()->GetCurrentThread(); data.is_running = true; Common::Fiber::YieldTo(data.host_context, *current_thread->GetHostContext()); diff --git a/src/core/cpu_manager.h b/src/core/cpu_manager.h index 140263b09..9d92d4af0 100644 --- a/src/core/cpu_manager.h +++ b/src/core/cpu_manager.h @@ -78,9 +78,9 @@ private: void SingleCoreRunSuspendThread(); void SingleCorePause(bool paused); - static void ThreadStart(CpuManager& cpu_manager, std::size_t core); + static void ThreadStart(std::stop_token stop_token, CpuManager& cpu_manager, std::size_t core); - void RunThread(std::size_t core); + void RunThread(std::stop_token stop_token, std::size_t core); struct CoreData { std::shared_ptr<Common::Fiber> host_context; @@ -89,7 +89,7 @@ private: std::atomic<bool> is_running; std::atomic<bool> is_paused; std::atomic<bool> initialized; - std::unique_ptr<std::thread> host_thread; + std::jthread host_thread; }; std::atomic<bool> running_mode{}; diff --git a/src/core/file_sys/kernel_executable.h b/src/core/file_sys/kernel_executable.h index 044c554d3..79ca82f8b 100644 --- a/src/core/file_sys/kernel_executable.h +++ b/src/core/file_sys/kernel_executable.h @@ -5,6 +5,7 @@ #pragma once #include <array> +#include <string> #include <vector> #include "common/common_funcs.h" diff --git a/src/core/hle/api_version.h b/src/core/hle/api_version.h index 43d5670a9..626e30753 100644 --- a/src/core/hle/api_version.h +++ b/src/core/hle/api_version.h @@ -28,13 +28,20 @@ constexpr char DISPLAY_TITLE[] = "NintendoSDK Firmware for NX 12.1.0-1.0"; // Atmosphere version constants. -constexpr u8 ATMOSPHERE_RELEASE_VERSION_MAJOR = 0; -constexpr u8 ATMOSPHERE_RELEASE_VERSION_MINOR = 19; -constexpr u8 ATMOSPHERE_RELEASE_VERSION_MICRO = 5; +constexpr u8 ATMOSPHERE_RELEASE_VERSION_MAJOR = 1; +constexpr u8 ATMOSPHERE_RELEASE_VERSION_MINOR = 0; +constexpr u8 ATMOSPHERE_RELEASE_VERSION_MICRO = 0; + +constexpr u32 AtmosphereTargetFirmwareWithRevision(u8 major, u8 minor, u8 micro, u8 rev) { + return u32{major} << 24 | u32{minor} << 16 | u32{micro} << 8 | u32{rev}; +} + +constexpr u32 AtmosphereTargetFirmware(u8 major, u8 minor, u8 micro) { + return AtmosphereTargetFirmwareWithRevision(major, minor, micro, 0); +} constexpr u32 GetTargetFirmware() { - return u32{HOS_VERSION_MAJOR} << 24 | u32{HOS_VERSION_MINOR} << 16 | - u32{HOS_VERSION_MICRO} << 8 | 0U; + return AtmosphereTargetFirmware(HOS_VERSION_MAJOR, HOS_VERSION_MINOR, HOS_VERSION_MICRO); } } // namespace HLE::ApiVersion diff --git a/src/core/hle/kernel/kernel.h b/src/core/hle/kernel/kernel.h index 3a6db0b1c..901d43da9 100644 --- a/src/core/hle/kernel/kernel.h +++ b/src/core/hle/kernel/kernel.h @@ -5,6 +5,7 @@ #pragma once #include <array> +#include <functional> #include <memory> #include <string> #include <unordered_map> diff --git a/src/core/hle/service/acc/acc.cpp b/src/core/hle/service/acc/acc.cpp index 882fc1492..6d9ec0a8a 100644 --- a/src/core/hle/service/acc/acc.cpp +++ b/src/core/hle/service/acc/acc.cpp @@ -23,6 +23,7 @@ #include "core/hle/service/acc/acc_su.h" #include "core/hle/service/acc/acc_u0.h" #include "core/hle/service/acc/acc_u1.h" +#include "core/hle/service/acc/async_context.h" #include "core/hle/service/acc/errors.h" #include "core/hle/service/acc/profile_manager.h" #include "core/hle/service/glue/arp.h" @@ -454,22 +455,6 @@ public: : IProfileCommon{system_, "IProfileEditor", true, user_id_, profile_manager_} {} }; -class IAsyncContext final : public ServiceFramework<IAsyncContext> { -public: - explicit IAsyncContext(Core::System& system_) : ServiceFramework{system_, "IAsyncContext"} { - // clang-format off - static const FunctionInfo functions[] = { - {0, nullptr, "GetSystemEvent"}, - {1, nullptr, "Cancel"}, - {2, nullptr, "HasDone"}, - {3, nullptr, "GetResult"}, - }; - // clang-format on - - RegisterHandlers(functions); - } -}; - class ISessionObject final : public ServiceFramework<ISessionObject> { public: explicit ISessionObject(Core::System& system_, Common::UUID) @@ -504,16 +489,44 @@ public: } }; +class EnsureTokenIdCacheAsyncInterface final : public IAsyncContext { +public: + explicit EnsureTokenIdCacheAsyncInterface(Core::System& system_) : IAsyncContext{system_} { + MarkComplete(); + } + ~EnsureTokenIdCacheAsyncInterface() = default; + + void LoadIdTokenCache(Kernel::HLERequestContext& ctx) { + LOG_WARNING(Service_ACC, "(STUBBED) called"); + + IPC::ResponseBuilder rb{ctx, 2}; + rb.Push(ResultSuccess); + } + +protected: + bool IsComplete() const override { + return true; + } + + void Cancel() override {} + + ResultCode GetResult() const override { + return ResultSuccess; + } +}; + class IManagerForApplication final : public ServiceFramework<IManagerForApplication> { public: explicit IManagerForApplication(Core::System& system_, Common::UUID user_id_) - : ServiceFramework{system_, "IManagerForApplication"}, user_id{user_id_} { + : ServiceFramework{system_, "IManagerForApplication"}, + ensure_token_id{std::make_shared<EnsureTokenIdCacheAsyncInterface>(system)}, + user_id{user_id_} { // clang-format off static const FunctionInfo functions[] = { {0, &IManagerForApplication::CheckAvailability, "CheckAvailability"}, {1, &IManagerForApplication::GetAccountId, "GetAccountId"}, - {2, nullptr, "EnsureIdTokenCacheAsync"}, - {3, nullptr, "LoadIdTokenCache"}, + {2, &IManagerForApplication::EnsureIdTokenCacheAsync, "EnsureIdTokenCacheAsync"}, + {3, &IManagerForApplication::LoadIdTokenCache, "LoadIdTokenCache"}, {130, &IManagerForApplication::GetNintendoAccountUserResourceCacheForApplication, "GetNintendoAccountUserResourceCacheForApplication"}, {150, nullptr, "CreateAuthorizationRequest"}, {160, &IManagerForApplication::StoreOpenContext, "StoreOpenContext"}, @@ -540,6 +553,20 @@ private: rb.PushRaw<u64>(user_id.GetNintendoID()); } + void EnsureIdTokenCacheAsync(Kernel::HLERequestContext& ctx) { + LOG_WARNING(Service_ACC, "(STUBBED) called"); + + IPC::ResponseBuilder rb{ctx, 2, 0, 1}; + rb.Push(ResultSuccess); + rb.PushIpcInterface(ensure_token_id); + } + + void LoadIdTokenCache(Kernel::HLERequestContext& ctx) { + LOG_WARNING(Service_ACC, "(STUBBED) called"); + + ensure_token_id->LoadIdTokenCache(ctx); + } + void GetNintendoAccountUserResourceCacheForApplication(Kernel::HLERequestContext& ctx) { LOG_WARNING(Service_ACC, "(STUBBED) called"); @@ -562,6 +589,7 @@ private: rb.Push(ResultSuccess); } + std::shared_ptr<EnsureTokenIdCacheAsyncInterface> ensure_token_id{}; Common::UUID user_id{Common::INVALID_UUID}; }; diff --git a/src/core/hle/service/acc/async_context.cpp b/src/core/hle/service/acc/async_context.cpp new file mode 100644 index 000000000..459323132 --- /dev/null +++ b/src/core/hle/service/acc/async_context.cpp @@ -0,0 +1,68 @@ +// Copyright 2021 yuzu emulator team +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "core/core.h" +#include "core/hle/ipc_helpers.h" +#include "core/hle/service/acc/async_context.h" + +namespace Service::Account { +IAsyncContext::IAsyncContext(Core::System& system_) + : ServiceFramework{system_, "IAsyncContext"}, compeletion_event{system_.Kernel()} { + + Kernel::KAutoObject::Create(std::addressof(compeletion_event)); + compeletion_event.Initialize("IAsyncContext:CompletionEvent"); + + // clang-format off + static const FunctionInfo functions[] = { + {0, &IAsyncContext::GetSystemEvent, "GetSystemEvent"}, + {1, &IAsyncContext::Cancel, "Cancel"}, + {2, &IAsyncContext::HasDone, "HasDone"}, + {3, &IAsyncContext::GetResult, "GetResult"}, + }; + // clang-format on + + RegisterHandlers(functions); +} + +void IAsyncContext::GetSystemEvent(Kernel::HLERequestContext& ctx) { + LOG_DEBUG(Service_ACC, "called"); + + IPC::ResponseBuilder rb{ctx, 2, 1}; + rb.Push(ResultSuccess); + rb.PushCopyObjects(compeletion_event.GetReadableEvent()); +} + +void IAsyncContext::Cancel(Kernel::HLERequestContext& ctx) { + LOG_DEBUG(Service_ACC, "called"); + + Cancel(); + MarkComplete(); + + IPC::ResponseBuilder rb{ctx, 2}; + rb.Push(ResultSuccess); +} + +void IAsyncContext::HasDone(Kernel::HLERequestContext& ctx) { + LOG_DEBUG(Service_ACC, "called"); + + is_complete.store(IsComplete()); + + IPC::ResponseBuilder rb{ctx, 3}; + rb.Push(ResultSuccess); + rb.Push(is_complete.load()); +} + +void IAsyncContext::GetResult(Kernel::HLERequestContext& ctx) { + LOG_DEBUG(Service_ACC, "called"); + + IPC::ResponseBuilder rb{ctx, 3}; + rb.Push(GetResult()); +} + +void IAsyncContext::MarkComplete() { + is_complete.store(true); + compeletion_event.GetWritableEvent().Signal(); +} + +} // namespace Service::Account diff --git a/src/core/hle/service/acc/async_context.h b/src/core/hle/service/acc/async_context.h new file mode 100644 index 000000000..c694b4946 --- /dev/null +++ b/src/core/hle/service/acc/async_context.h @@ -0,0 +1,37 @@ +// Copyright 2021 yuzu emulator team +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <atomic> +#include "core/hle/kernel/k_event.h" +#include "core/hle/service/service.h" + +namespace Core { +class System; +} + +namespace Service::Account { + +class IAsyncContext : public ServiceFramework<IAsyncContext> { +public: + explicit IAsyncContext(Core::System& system_); + + void GetSystemEvent(Kernel::HLERequestContext& ctx); + void Cancel(Kernel::HLERequestContext& ctx); + void HasDone(Kernel::HLERequestContext& ctx); + void GetResult(Kernel::HLERequestContext& ctx); + +protected: + virtual bool IsComplete() const = 0; + virtual void Cancel() = 0; + virtual ResultCode GetResult() const = 0; + + void MarkComplete(); + + std::atomic<bool> is_complete{false}; + Kernel::KEvent compeletion_event; +}; + +} // namespace Service::Account diff --git a/src/core/hle/service/am/am.cpp b/src/core/hle/service/am/am.cpp index a538f82e3..c3ac73131 100644 --- a/src/core/hle/service/am/am.cpp +++ b/src/core/hle/service/am/am.cpp @@ -1270,7 +1270,8 @@ void ILibraryAppletCreator::CreateHandleStorage(Kernel::HLERequestContext& ctx) IApplicationFunctions::IApplicationFunctions(Core::System& system_) : ServiceFramework{system_, "IApplicationFunctions"}, gpu_error_detected_event{system.Kernel()}, friend_invitation_storage_channel_event{system.Kernel()}, - health_warning_disappeared_system_event{system.Kernel()} { + notification_storage_channel_event{system.Kernel()}, health_warning_disappeared_system_event{ + system.Kernel()} { // clang-format off static const FunctionInfo functions[] = { {1, &IApplicationFunctions::PopLaunchParameter, "PopLaunchParameter"}, @@ -1322,7 +1323,7 @@ IApplicationFunctions::IApplicationFunctions(Core::System& system_) {131, nullptr, "SetDelayTimeToAbortOnGpuError"}, {140, &IApplicationFunctions::GetFriendInvitationStorageChannelEvent, "GetFriendInvitationStorageChannelEvent"}, {141, &IApplicationFunctions::TryPopFromFriendInvitationStorageChannel, "TryPopFromFriendInvitationStorageChannel"}, - {150, nullptr, "GetNotificationStorageChannelEvent"}, + {150, &IApplicationFunctions::GetNotificationStorageChannelEvent, "GetNotificationStorageChannelEvent"}, {151, nullptr, "TryPopFromNotificationStorageChannel"}, {160, &IApplicationFunctions::GetHealthWarningDisappearedSystemEvent, "GetHealthWarningDisappearedSystemEvent"}, {170, nullptr, "SetHdcpAuthenticationActivated"}, @@ -1340,11 +1341,14 @@ IApplicationFunctions::IApplicationFunctions(Core::System& system_) Kernel::KAutoObject::Create(std::addressof(gpu_error_detected_event)); Kernel::KAutoObject::Create(std::addressof(friend_invitation_storage_channel_event)); + Kernel::KAutoObject::Create(std::addressof(notification_storage_channel_event)); Kernel::KAutoObject::Create(std::addressof(health_warning_disappeared_system_event)); gpu_error_detected_event.Initialize("IApplicationFunctions:GpuErrorDetectedSystemEvent"); friend_invitation_storage_channel_event.Initialize( "IApplicationFunctions:FriendInvitationStorageChannelEvent"); + notification_storage_channel_event.Initialize( + "IApplicationFunctions:NotificationStorageChannelEvent"); health_warning_disappeared_system_event.Initialize( "IApplicationFunctions:HealthWarningDisappearedSystemEvent"); } @@ -1762,6 +1766,14 @@ void IApplicationFunctions::TryPopFromFriendInvitationStorageChannel( rb.Push(ERR_NO_DATA_IN_CHANNEL); } +void IApplicationFunctions::GetNotificationStorageChannelEvent(Kernel::HLERequestContext& ctx) { + LOG_DEBUG(Service_AM, "called"); + + IPC::ResponseBuilder rb{ctx, 2, 1}; + rb.Push(ResultSuccess); + rb.PushCopyObjects(notification_storage_channel_event.GetReadableEvent()); +} + void IApplicationFunctions::GetHealthWarningDisappearedSystemEvent(Kernel::HLERequestContext& ctx) { LOG_DEBUG(Service_AM, "called"); diff --git a/src/core/hle/service/am/am.h b/src/core/hle/service/am/am.h index 184030a8e..c13aa5787 100644 --- a/src/core/hle/service/am/am.h +++ b/src/core/hle/service/am/am.h @@ -295,6 +295,7 @@ private: void GetGpuErrorDetectedSystemEvent(Kernel::HLERequestContext& ctx); void GetFriendInvitationStorageChannelEvent(Kernel::HLERequestContext& ctx); void TryPopFromFriendInvitationStorageChannel(Kernel::HLERequestContext& ctx); + void GetNotificationStorageChannelEvent(Kernel::HLERequestContext& ctx); void GetHealthWarningDisappearedSystemEvent(Kernel::HLERequestContext& ctx); bool launch_popped_application_specific = false; @@ -302,6 +303,7 @@ private: s32 previous_program_index{-1}; Kernel::KEvent gpu_error_detected_event; Kernel::KEvent friend_invitation_storage_channel_event; + Kernel::KEvent notification_storage_channel_event; Kernel::KEvent health_warning_disappeared_system_event; }; diff --git a/src/core/hle/service/filesystem/filesystem.cpp b/src/core/hle/service/filesystem/filesystem.cpp index 4a9b13e45..c8d65f328 100644 --- a/src/core/hle/service/filesystem/filesystem.cpp +++ b/src/core/hle/service/filesystem/filesystem.cpp @@ -97,14 +97,24 @@ ResultCode VfsDirectoryServiceWrapper::DeleteFile(const std::string& path_) cons ResultCode VfsDirectoryServiceWrapper::CreateDirectory(const std::string& path_) const { std::string path(Common::FS::SanitizePath(path_)); - auto dir = GetDirectoryRelativeWrapped(backing, Common::FS::GetParentPath(path)); - if (dir == nullptr || Common::FS::GetFilename(Common::FS::GetParentPath(path)).empty()) { - dir = backing; - } - auto new_dir = dir->CreateSubdirectory(Common::FS::GetFilename(path)); - if (new_dir == nullptr) { - // TODO(DarkLordZach): Find a better error code for this - return ResultUnknown; + + // NOTE: This is inaccurate behavior. CreateDirectory is not recursive. + // CreateDirectory should return PathNotFound if the parent directory does not exist. + // This is here temporarily in order to have UMM "work" in the meantime. + // TODO (Morph): Remove this when a hardware test verifies the correct behavior. + const auto components = Common::FS::SplitPathComponents(path); + std::string relative_path; + for (const auto& component : components) { + // Skip empty path components + if (component.empty()) { + continue; + } + relative_path = Common::FS::SanitizePath(relative_path + '/' + component); + auto new_dir = backing->CreateSubdirectory(relative_path); + if (new_dir == nullptr) { + // TODO(DarkLordZach): Find a better error code for this + return ResultUnknown; + } } return ResultSuccess; } diff --git a/src/core/hle/service/hid/controllers/touchscreen.h b/src/core/hle/service/hid/controllers/touchscreen.h index ef2becefd..8e9b40c0a 100644 --- a/src/core/hle/service/hid/controllers/touchscreen.h +++ b/src/core/hle/service/hid/controllers/touchscreen.h @@ -15,6 +15,20 @@ namespace Service::HID { class Controller_Touchscreen final : public ControllerBase { public: + enum class TouchScreenModeForNx : u8 { + UseSystemSetting, + Finger, + Heat2, + }; + + struct TouchScreenConfigurationForNx { + TouchScreenModeForNx mode; + INSERT_PADDING_BYTES_NOINIT(0x7); + INSERT_PADDING_BYTES_NOINIT(0xF); // Reserved + }; + static_assert(sizeof(TouchScreenConfigurationForNx) == 0x17, + "TouchScreenConfigurationForNx is an invalid size"); + explicit Controller_Touchscreen(Core::System& system_); ~Controller_Touchscreen() override; diff --git a/src/core/hle/service/hid/hid.cpp b/src/core/hle/service/hid/hid.cpp index b8b80570d..a1707a72a 100644 --- a/src/core/hle/service/hid/hid.cpp +++ b/src/core/hle/service/hid/hid.cpp @@ -331,7 +331,7 @@ Hid::Hid(Core::System& system_) {529, nullptr, "SetDisallowedPalmaConnection"}, {1000, &Hid::SetNpadCommunicationMode, "SetNpadCommunicationMode"}, {1001, &Hid::GetNpadCommunicationMode, "GetNpadCommunicationMode"}, - {1002, nullptr, "SetTouchScreenConfiguration"}, + {1002, &Hid::SetTouchScreenConfiguration, "SetTouchScreenConfiguration"}, {1003, nullptr, "IsFirmwareUpdateNeededForNotification"}, {2000, nullptr, "ActivateDigitizer"}, }; @@ -1631,6 +1631,18 @@ void Hid::GetNpadCommunicationMode(Kernel::HLERequestContext& ctx) { .GetNpadCommunicationMode()); } +void Hid::SetTouchScreenConfiguration(Kernel::HLERequestContext& ctx) { + IPC::RequestParser rp{ctx}; + const auto touchscreen_mode{rp.PopRaw<Controller_Touchscreen::TouchScreenConfigurationForNx>()}; + const auto applet_resource_user_id{rp.Pop<u64>()}; + + LOG_WARNING(Service_HID, "(STUBBED) called, touchscreen_mode={}, applet_resource_user_id={}", + touchscreen_mode.mode, applet_resource_user_id); + + IPC::ResponseBuilder rb{ctx, 2}; + rb.Push(ResultSuccess); +} + class HidDbg final : public ServiceFramework<HidDbg> { public: explicit HidDbg(Core::System& system_) : ServiceFramework{system_, "hid:dbg"} { diff --git a/src/core/hle/service/hid/hid.h b/src/core/hle/service/hid/hid.h index 9c5c7f252..b1fe75e94 100644 --- a/src/core/hle/service/hid/hid.h +++ b/src/core/hle/service/hid/hid.h @@ -159,6 +159,7 @@ private: void SetPalmaBoostMode(Kernel::HLERequestContext& ctx); void SetNpadCommunicationMode(Kernel::HLERequestContext& ctx); void GetNpadCommunicationMode(Kernel::HLERequestContext& ctx); + void SetTouchScreenConfiguration(Kernel::HLERequestContext& ctx); enum class VibrationDeviceType : u32 { Unknown = 0, diff --git a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp index ce6065db2..a33e47d0b 100644 --- a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp +++ b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp @@ -42,15 +42,14 @@ void nvdisp_disp0::OnClose(DeviceFD fd) {} void nvdisp_disp0::flip(u32 buffer_handle, u32 offset, u32 format, u32 width, u32 height, u32 stride, NVFlinger::BufferQueue::BufferTransformFlags transform, const Common::Rectangle<int>& crop_rect) { - VAddr addr = nvmap_dev->GetObjectAddress(buffer_handle); + const VAddr addr = nvmap_dev->GetObjectAddress(buffer_handle); LOG_TRACE(Service, "Drawing from address {:X} offset {:08X} Width {} Height {} Stride {} Format {}", addr, offset, width, height, stride, format); - using PixelFormat = Tegra::FramebufferConfig::PixelFormat; - const Tegra::FramebufferConfig framebuffer{ - addr, offset, width, height, stride, static_cast<PixelFormat>(format), - transform, crop_rect}; + const auto pixel_format = static_cast<Tegra::FramebufferConfig::PixelFormat>(format); + const Tegra::FramebufferConfig framebuffer{addr, offset, width, height, + stride, pixel_format, transform, crop_rect}; system.GetPerfStats().EndSystemFrame(); system.GPU().SwapBuffers(&framebuffer); diff --git a/src/core/hle/service/nvflinger/buffer_queue.cpp b/src/core/hle/service/nvflinger/buffer_queue.cpp index 59ddf6298..b4c3a6099 100644 --- a/src/core/hle/service/nvflinger/buffer_queue.cpp +++ b/src/core/hle/service/nvflinger/buffer_queue.cpp @@ -9,17 +9,20 @@ #include "core/core.h" #include "core/hle/kernel/k_writable_event.h" #include "core/hle/kernel/kernel.h" +#include "core/hle/service/kernel_helpers.h" #include "core/hle/service/nvflinger/buffer_queue.h" namespace Service::NVFlinger { -BufferQueue::BufferQueue(Kernel::KernelCore& kernel, u32 id_, u64 layer_id_) - : id(id_), layer_id(layer_id_), buffer_wait_event{kernel} { - Kernel::KAutoObject::Create(std::addressof(buffer_wait_event)); - buffer_wait_event.Initialize("BufferQueue:WaitEvent"); +BufferQueue::BufferQueue(Kernel::KernelCore& kernel, u32 id_, u64 layer_id_, + KernelHelpers::ServiceContext& service_context_) + : id(id_), layer_id(layer_id_), service_context{service_context_} { + buffer_wait_event = service_context.CreateEvent("BufferQueue:WaitEvent"); } -BufferQueue::~BufferQueue() = default; +BufferQueue::~BufferQueue() { + service_context.CloseEvent(buffer_wait_event); +} void BufferQueue::SetPreallocatedBuffer(u32 slot, const IGBPBuffer& igbp_buffer) { ASSERT(slot < buffer_slots); @@ -41,7 +44,7 @@ void BufferQueue::SetPreallocatedBuffer(u32 slot, const IGBPBuffer& igbp_buffer) .multi_fence = {}, }; - buffer_wait_event.GetWritableEvent().Signal(); + buffer_wait_event->GetWritableEvent().Signal(); } std::optional<std::pair<u32, Service::Nvidia::MultiFence*>> BufferQueue::DequeueBuffer(u32 width, @@ -119,7 +122,7 @@ void BufferQueue::CancelBuffer(u32 slot, const Service::Nvidia::MultiFence& mult } free_buffers_condition.notify_one(); - buffer_wait_event.GetWritableEvent().Signal(); + buffer_wait_event->GetWritableEvent().Signal(); } std::optional<std::reference_wrapper<const BufferQueue::Buffer>> BufferQueue::AcquireBuffer() { @@ -154,7 +157,7 @@ void BufferQueue::ReleaseBuffer(u32 slot) { } free_buffers_condition.notify_one(); - buffer_wait_event.GetWritableEvent().Signal(); + buffer_wait_event->GetWritableEvent().Signal(); } void BufferQueue::Connect() { @@ -169,7 +172,7 @@ void BufferQueue::Disconnect() { std::unique_lock lock{queue_sequence_mutex}; queue_sequence.clear(); } - buffer_wait_event.GetWritableEvent().Signal(); + buffer_wait_event->GetWritableEvent().Signal(); is_connect = false; free_buffers_condition.notify_one(); } @@ -189,11 +192,11 @@ u32 BufferQueue::Query(QueryType type) { } Kernel::KWritableEvent& BufferQueue::GetWritableBufferWaitEvent() { - return buffer_wait_event.GetWritableEvent(); + return buffer_wait_event->GetWritableEvent(); } Kernel::KReadableEvent& BufferQueue::GetBufferWaitEvent() { - return buffer_wait_event.GetReadableEvent(); + return buffer_wait_event->GetReadableEvent(); } } // namespace Service::NVFlinger diff --git a/src/core/hle/service/nvflinger/buffer_queue.h b/src/core/hle/service/nvflinger/buffer_queue.h index 61e337ac5..78de3f354 100644 --- a/src/core/hle/service/nvflinger/buffer_queue.h +++ b/src/core/hle/service/nvflinger/buffer_queue.h @@ -24,6 +24,10 @@ class KReadableEvent; class KWritableEvent; } // namespace Kernel +namespace Service::KernelHelpers { +class ServiceContext; +} // namespace Service::KernelHelpers + namespace Service::NVFlinger { constexpr u32 buffer_slots = 0x40; @@ -38,7 +42,9 @@ struct IGBPBuffer { u32_le index; INSERT_PADDING_WORDS(3); u32_le gpu_buffer_id; - INSERT_PADDING_WORDS(17); + INSERT_PADDING_WORDS(6); + u32_le external_format; + INSERT_PADDING_WORDS(10); u32_le nvmap_handle; u32_le offset; INSERT_PADDING_WORDS(60); @@ -54,7 +60,8 @@ public: NativeWindowFormat = 2, }; - explicit BufferQueue(Kernel::KernelCore& kernel, u32 id_, u64 layer_id_); + explicit BufferQueue(Kernel::KernelCore& kernel, u32 id_, u64 layer_id_, + KernelHelpers::ServiceContext& service_context_); ~BufferQueue(); enum class BufferTransformFlags : u32 { @@ -130,12 +137,14 @@ private: std::list<u32> free_buffers; std::array<Buffer, buffer_slots> buffers; std::list<u32> queue_sequence; - Kernel::KEvent buffer_wait_event; + Kernel::KEvent* buffer_wait_event{}; std::mutex free_buffers_mutex; std::condition_variable free_buffers_condition; std::mutex queue_sequence_mutex; + + KernelHelpers::ServiceContext& service_context; }; } // namespace Service::NVFlinger diff --git a/src/core/hle/service/nvflinger/nvflinger.cpp b/src/core/hle/service/nvflinger/nvflinger.cpp index 941748970..3ead813b0 100644 --- a/src/core/hle/service/nvflinger/nvflinger.cpp +++ b/src/core/hle/service/nvflinger/nvflinger.cpp @@ -61,12 +61,13 @@ void NVFlinger::SplitVSync() { } } -NVFlinger::NVFlinger(Core::System& system_) : system(system_) { - displays.emplace_back(0, "Default", system); - displays.emplace_back(1, "External", system); - displays.emplace_back(2, "Edid", system); - displays.emplace_back(3, "Internal", system); - displays.emplace_back(4, "Null", system); +NVFlinger::NVFlinger(Core::System& system_) + : system(system_), service_context(system_, "nvflinger") { + displays.emplace_back(0, "Default", service_context, system); + displays.emplace_back(1, "External", service_context, system); + displays.emplace_back(2, "Edid", service_context, system); + displays.emplace_back(3, "Internal", service_context, system); + displays.emplace_back(4, "Null", service_context, system); guard = std::make_shared<std::mutex>(); // Schedule the screen composition events @@ -146,7 +147,7 @@ std::optional<u64> NVFlinger::CreateLayer(u64 display_id) { void NVFlinger::CreateLayerAtId(VI::Display& display, u64 layer_id) { const u32 buffer_queue_id = next_buffer_queue_id++; buffer_queues.emplace_back( - std::make_unique<BufferQueue>(system.Kernel(), buffer_queue_id, layer_id)); + std::make_unique<BufferQueue>(system.Kernel(), buffer_queue_id, layer_id, service_context)); display.CreateLayer(layer_id, *buffer_queues.back()); } @@ -297,7 +298,7 @@ void NVFlinger::Compose() { auto nvdisp = nvdrv->GetDevice<Nvidia::Devices::nvdisp_disp0>("/dev/nvdisp_disp0"); ASSERT(nvdisp); - nvdisp->flip(igbp_buffer.gpu_buffer_id, igbp_buffer.offset, igbp_buffer.format, + nvdisp->flip(igbp_buffer.gpu_buffer_id, igbp_buffer.offset, igbp_buffer.external_format, igbp_buffer.width, igbp_buffer.height, igbp_buffer.stride, buffer->get().transform, buffer->get().crop_rect); diff --git a/src/core/hle/service/nvflinger/nvflinger.h b/src/core/hle/service/nvflinger/nvflinger.h index d80fd07ef..6d84cafb4 100644 --- a/src/core/hle/service/nvflinger/nvflinger.h +++ b/src/core/hle/service/nvflinger/nvflinger.h @@ -15,6 +15,7 @@ #include <vector> #include "common/common_types.h" +#include "core/hle/service/kernel_helpers.h" namespace Common { class Event; @@ -135,6 +136,8 @@ private: std::unique_ptr<std::thread> vsync_thread; std::unique_ptr<Common::Event> wait_event; std::atomic<bool> is_running{}; + + KernelHelpers::ServiceContext service_context; }; } // namespace Service::NVFlinger diff --git a/src/core/hle/service/vi/display/vi_display.cpp b/src/core/hle/service/vi/display/vi_display.cpp index 0dd342dbf..b7705c02a 100644 --- a/src/core/hle/service/vi/display/vi_display.cpp +++ b/src/core/hle/service/vi/display/vi_display.cpp @@ -12,18 +12,21 @@ #include "core/hle/kernel/k_event.h" #include "core/hle/kernel/k_readable_event.h" #include "core/hle/kernel/k_writable_event.h" +#include "core/hle/service/kernel_helpers.h" #include "core/hle/service/vi/display/vi_display.h" #include "core/hle/service/vi/layer/vi_layer.h" namespace Service::VI { -Display::Display(u64 id, std::string name_, Core::System& system) - : display_id{id}, name{std::move(name_)}, vsync_event{system.Kernel()} { - Kernel::KAutoObject::Create(std::addressof(vsync_event)); - vsync_event.Initialize(fmt::format("Display VSync Event {}", id)); +Display::Display(u64 id, std::string name_, KernelHelpers::ServiceContext& service_context_, + Core::System& system_) + : display_id{id}, name{std::move(name_)}, service_context{service_context_} { + vsync_event = service_context.CreateEvent(fmt::format("Display VSync Event {}", id)); } -Display::~Display() = default; +Display::~Display() { + service_context.CloseEvent(vsync_event); +} Layer& Display::GetLayer(std::size_t index) { return *layers.at(index); @@ -34,11 +37,11 @@ const Layer& Display::GetLayer(std::size_t index) const { } Kernel::KReadableEvent& Display::GetVSyncEvent() { - return vsync_event.GetReadableEvent(); + return vsync_event->GetReadableEvent(); } void Display::SignalVSyncEvent() { - vsync_event.GetWritableEvent().Signal(); + vsync_event->GetWritableEvent().Signal(); } void Display::CreateLayer(u64 layer_id, NVFlinger::BufferQueue& buffer_queue) { diff --git a/src/core/hle/service/vi/display/vi_display.h b/src/core/hle/service/vi/display/vi_display.h index 166f2a4cc..0979fc421 100644 --- a/src/core/hle/service/vi/display/vi_display.h +++ b/src/core/hle/service/vi/display/vi_display.h @@ -18,6 +18,9 @@ class KEvent; namespace Service::NVFlinger { class BufferQueue; } +namespace Service::KernelHelpers { +class ServiceContext; +} // namespace Service::KernelHelpers namespace Service::VI { @@ -31,10 +34,13 @@ class Display { public: /// Constructs a display with a given unique ID and name. /// - /// @param id The unique ID for this display. + /// @param id The unique ID for this display. + /// @param service_context_ The ServiceContext for the owning service. /// @param name_ The name for this display. + /// @param system_ The global system instance. /// - Display(u64 id, std::string name_, Core::System& system); + Display(u64 id, std::string name_, KernelHelpers::ServiceContext& service_context_, + Core::System& system_); ~Display(); /// Gets the unique ID assigned to this display. @@ -98,9 +104,10 @@ public: private: u64 display_id; std::string name; + KernelHelpers::ServiceContext& service_context; std::vector<std::shared_ptr<Layer>> layers; - Kernel::KEvent vsync_event; + Kernel::KEvent* vsync_event{}; }; } // namespace Service::VI diff --git a/src/core/network/network.cpp b/src/core/network/network.cpp index 4732d4485..72eea52f0 100644 --- a/src/core/network/network.cpp +++ b/src/core/network/network.cpp @@ -7,7 +7,8 @@ #include <limits> #include <utility> #include <vector> -#include "common/common_funcs.h" + +#include "common/error.h" #ifdef _WIN32 #include <winsock2.h> @@ -223,7 +224,7 @@ Errno GetAndLogLastError() { if (err == Errno::AGAIN) { return err; } - LOG_ERROR(Network, "Socket operation error: {}", NativeErrorToString(e)); + LOG_ERROR(Network, "Socket operation error: {}", Common::NativeErrorToString(e)); return err; } diff --git a/src/core/telemetry_session.cpp b/src/core/telemetry_session.cpp index 5a8cfd301..1f1607998 100644 --- a/src/core/telemetry_session.cpp +++ b/src/core/telemetry_session.cpp @@ -72,6 +72,18 @@ static const char* TranslateGPUAccuracyLevel(Settings::GPUAccuracy backend) { return "Unknown"; } +static const char* TranslateNvdecEmulation(Settings::NvdecEmulation backend) { + switch (backend) { + case Settings::NvdecEmulation::Off: + return "Off"; + case Settings::NvdecEmulation::CPU: + return "CPU"; + case Settings::NvdecEmulation::GPU: + return "GPU"; + } + return "Unknown"; +} + u64 GetTelemetryId() { u64 telemetry_id{}; const auto filename = Common::FS::GetYuzuPath(Common::FS::YuzuPath::ConfigDir) / "telemetry_id"; @@ -229,8 +241,8 @@ void TelemetrySession::AddInitialInfo(Loader::AppLoader& app_loader, TranslateGPUAccuracyLevel(Settings::values.gpu_accuracy.GetValue())); AddField(field_type, "Renderer_UseAsynchronousGpuEmulation", Settings::values.use_asynchronous_gpu_emulation.GetValue()); - AddField(field_type, "Renderer_UseNvdecEmulation", - Settings::values.use_nvdec_emulation.GetValue()); + AddField(field_type, "Renderer_NvdecEmulation", + TranslateNvdecEmulation(Settings::values.nvdec_emulation.GetValue())); AddField(field_type, "Renderer_AccelerateASTC", Settings::values.accelerate_astc.GetValue()); AddField(field_type, "Renderer_UseVsync", Settings::values.use_vsync.GetValue()); AddField(field_type, "Renderer_ShaderBackend", diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp index a982dd8a2..cd285e2c8 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp @@ -11,6 +11,8 @@ namespace Shader::Backend::GLSL { namespace { +constexpr char THREAD_ID[]{"gl_SubGroupInvocationARB"}; + void SetInBoundsFlag(EmitContext& ctx, IR::Inst& inst) { IR::Inst* const in_bounds{inst.GetAssociatedPseudoOperation(IR::Opcode::GetInBoundsFromOp)}; if (!in_bounds) { @@ -43,84 +45,100 @@ void UseShuffleNv(EmitContext& ctx, IR::Inst& inst, std::string_view shfl_op, ctx.AddU32("{}={}({},{},{},shfl_in_bounds);", inst, shfl_op, value, index, width); SetInBoundsFlag(ctx, inst); } + +std::string_view BallotIndex(EmitContext& ctx) { + if (!ctx.profile.warp_size_potentially_larger_than_guest) { + return ".x"; + } + return "[gl_SubGroupInvocationARB>>5]"; +} + +std::string GetMask(EmitContext& ctx, std::string_view mask) { + const auto ballot_index{BallotIndex(ctx)}; + return fmt::format("uint(uvec2({}){})", mask, ballot_index); +} } // Anonymous namespace void EmitLaneId(EmitContext& ctx, IR::Inst& inst) { - ctx.AddU32("{}=gl_SubGroupInvocationARB&31u;", inst); + ctx.AddU32("{}={}&31u;", inst, THREAD_ID); } void EmitVoteAll(EmitContext& ctx, IR::Inst& inst, std::string_view pred) { if (!ctx.profile.warp_size_potentially_larger_than_guest) { ctx.AddU1("{}=allInvocationsEqualARB({});", inst, pred); - } else { - const auto active_mask{fmt::format("uvec2(ballotARB(true))[gl_SubGroupInvocationARB]")}; - const auto ballot{fmt::format("uvec2(ballotARB({}))[gl_SubGroupInvocationARB]", pred)}; - ctx.AddU1("{}=({}&{})=={};", inst, ballot, active_mask, active_mask); + return; } + const auto ballot_index{BallotIndex(ctx)}; + const auto active_mask{fmt::format("uvec2(ballotARB(true)){}", ballot_index)}; + const auto ballot{fmt::format("uvec2(ballotARB({})){}", pred, ballot_index)}; + ctx.AddU1("{}=({}&{})=={};", inst, ballot, active_mask, active_mask); } void EmitVoteAny(EmitContext& ctx, IR::Inst& inst, std::string_view pred) { if (!ctx.profile.warp_size_potentially_larger_than_guest) { ctx.AddU1("{}=anyInvocationARB({});", inst, pred); - } else { - const auto active_mask{fmt::format("uvec2(ballotARB(true))[gl_SubGroupInvocationARB]")}; - const auto ballot{fmt::format("uvec2(ballotARB({}))[gl_SubGroupInvocationARB]", pred)}; - ctx.AddU1("{}=({}&{})!=0u;", inst, ballot, active_mask, active_mask); + return; } + const auto ballot_index{BallotIndex(ctx)}; + const auto active_mask{fmt::format("uvec2(ballotARB(true)){}", ballot_index)}; + const auto ballot{fmt::format("uvec2(ballotARB({})){}", pred, ballot_index)}; + ctx.AddU1("{}=({}&{})!=0u;", inst, ballot, active_mask, active_mask); } void EmitVoteEqual(EmitContext& ctx, IR::Inst& inst, std::string_view pred) { if (!ctx.profile.warp_size_potentially_larger_than_guest) { ctx.AddU1("{}=allInvocationsEqualARB({});", inst, pred); - } else { - const auto active_mask{fmt::format("uvec2(ballotARB(true))[gl_SubGroupInvocationARB]")}; - const auto ballot{fmt::format("uvec2(ballotARB({}))[gl_SubGroupInvocationARB]", pred)}; - const auto value{fmt::format("({}^{})", ballot, active_mask)}; - ctx.AddU1("{}=({}==0)||({}=={});", inst, value, value, active_mask); + return; } + const auto ballot_index{BallotIndex(ctx)}; + const auto active_mask{fmt::format("uvec2(ballotARB(true)){}", ballot_index)}; + const auto ballot{fmt::format("uvec2(ballotARB({})){}", pred, ballot_index)}; + const auto value{fmt::format("({}^{})", ballot, active_mask)}; + ctx.AddU1("{}=({}==0)||({}=={});", inst, value, value, active_mask); } void EmitSubgroupBallot(EmitContext& ctx, IR::Inst& inst, std::string_view pred) { - if (!ctx.profile.warp_size_potentially_larger_than_guest) { - ctx.AddU32("{}=uvec2(ballotARB({})).x;", inst, pred); - } else { - ctx.AddU32("{}=uvec2(ballotARB({}))[gl_SubGroupInvocationARB];", inst, pred); - } + const auto ballot_index{BallotIndex(ctx)}; + ctx.AddU32("{}=uvec2(ballotARB({})){};", inst, pred, ballot_index); } void EmitSubgroupEqMask(EmitContext& ctx, IR::Inst& inst) { - ctx.AddU32("{}=uint(gl_SubGroupEqMaskARB.x);", inst); + ctx.AddU32("{}={};", inst, GetMask(ctx, "gl_SubGroupEqMaskARB")); } void EmitSubgroupLtMask(EmitContext& ctx, IR::Inst& inst) { - ctx.AddU32("{}=uint(gl_SubGroupLtMaskARB.x);", inst); + ctx.AddU32("{}={};", inst, GetMask(ctx, "gl_SubGroupLtMaskARB")); } void EmitSubgroupLeMask(EmitContext& ctx, IR::Inst& inst) { - ctx.AddU32("{}=uint(gl_SubGroupLeMaskARB.x);", inst); + ctx.AddU32("{}={};", inst, GetMask(ctx, "gl_SubGroupLeMaskARB")); } void EmitSubgroupGtMask(EmitContext& ctx, IR::Inst& inst) { - ctx.AddU32("{}=uint(gl_SubGroupGtMaskARB.x);", inst); + ctx.AddU32("{}={};", inst, GetMask(ctx, "gl_SubGroupGtMaskARB")); } void EmitSubgroupGeMask(EmitContext& ctx, IR::Inst& inst) { - ctx.AddU32("{}=uint(gl_SubGroupGeMaskARB.x);", inst); + ctx.AddU32("{}={};", inst, GetMask(ctx, "gl_SubGroupGeMaskARB")); } void EmitShuffleIndex(EmitContext& ctx, IR::Inst& inst, std::string_view value, - std::string_view index, std::string_view clamp, - std::string_view segmentation_mask) { + std::string_view index, std::string_view clamp, std::string_view seg_mask) { if (ctx.profile.support_gl_warp_intrinsics) { - UseShuffleNv(ctx, inst, "shuffleNV", value, index, clamp, segmentation_mask); + UseShuffleNv(ctx, inst, "shuffleNV", value, index, clamp, seg_mask); return; } - const auto not_seg_mask{fmt::format("(~{})", segmentation_mask)}; - const auto thread_id{"gl_SubGroupInvocationARB"}; - const auto min_thread_id{ComputeMinThreadId(thread_id, segmentation_mask)}; - const auto max_thread_id{ComputeMaxThreadId(min_thread_id, clamp, not_seg_mask)}; + const bool big_warp{ctx.profile.warp_size_potentially_larger_than_guest}; + const auto is_upper_partition{"int(gl_SubGroupInvocationARB)>=32"}; + const auto upper_index{fmt::format("{}?{}+32:{}", is_upper_partition, index, index)}; + const auto upper_clamp{fmt::format("{}?{}+32:{}", is_upper_partition, clamp, clamp)}; + + const auto not_seg_mask{fmt::format("(~{})", seg_mask)}; + const auto min_thread_id{ComputeMinThreadId(THREAD_ID, seg_mask)}; + const auto max_thread_id{ + ComputeMaxThreadId(min_thread_id, big_warp ? upper_clamp : clamp, not_seg_mask)}; - const auto lhs{fmt::format("({}&{})", index, not_seg_mask)}; + const auto lhs{fmt::format("({}&{})", big_warp ? upper_index : index, not_seg_mask)}; const auto src_thread_id{fmt::format("({})|({})", lhs, min_thread_id)}; ctx.Add("shfl_in_bounds=int({})<=int({});", src_thread_id, max_thread_id); SetInBoundsFlag(ctx, inst); @@ -128,29 +146,34 @@ void EmitShuffleIndex(EmitContext& ctx, IR::Inst& inst, std::string_view value, } void EmitShuffleUp(EmitContext& ctx, IR::Inst& inst, std::string_view value, std::string_view index, - std::string_view clamp, std::string_view segmentation_mask) { + std::string_view clamp, std::string_view seg_mask) { if (ctx.profile.support_gl_warp_intrinsics) { - UseShuffleNv(ctx, inst, "shuffleUpNV", value, index, clamp, segmentation_mask); + UseShuffleNv(ctx, inst, "shuffleUpNV", value, index, clamp, seg_mask); return; } - const auto thread_id{"gl_SubGroupInvocationARB"}; - const auto max_thread_id{GetMaxThreadId(thread_id, clamp, segmentation_mask)}; - const auto src_thread_id{fmt::format("({}-{})", thread_id, index)}; + const bool big_warp{ctx.profile.warp_size_potentially_larger_than_guest}; + const auto is_upper_partition{"int(gl_SubGroupInvocationARB)>=32"}; + const auto upper_clamp{fmt::format("{}?{}+32:{}", is_upper_partition, clamp, clamp)}; + + const auto max_thread_id{GetMaxThreadId(THREAD_ID, big_warp ? upper_clamp : clamp, seg_mask)}; + const auto src_thread_id{fmt::format("({}-{})", THREAD_ID, index)}; ctx.Add("shfl_in_bounds=int({})>=int({});", src_thread_id, max_thread_id); SetInBoundsFlag(ctx, inst); ctx.AddU32("{}=shfl_in_bounds?readInvocationARB({},{}):{};", inst, value, src_thread_id, value); } void EmitShuffleDown(EmitContext& ctx, IR::Inst& inst, std::string_view value, - std::string_view index, std::string_view clamp, - std::string_view segmentation_mask) { + std::string_view index, std::string_view clamp, std::string_view seg_mask) { if (ctx.profile.support_gl_warp_intrinsics) { - UseShuffleNv(ctx, inst, "shuffleDownNV", value, index, clamp, segmentation_mask); + UseShuffleNv(ctx, inst, "shuffleDownNV", value, index, clamp, seg_mask); return; } - const auto thread_id{"gl_SubGroupInvocationARB"}; - const auto max_thread_id{GetMaxThreadId(thread_id, clamp, segmentation_mask)}; - const auto src_thread_id{fmt::format("({}+{})", thread_id, index)}; + const bool big_warp{ctx.profile.warp_size_potentially_larger_than_guest}; + const auto is_upper_partition{"int(gl_SubGroupInvocationARB)>=32"}; + const auto upper_clamp{fmt::format("{}?{}+32:{}", is_upper_partition, clamp, clamp)}; + + const auto max_thread_id{GetMaxThreadId(THREAD_ID, big_warp ? upper_clamp : clamp, seg_mask)}; + const auto src_thread_id{fmt::format("({}+{})", THREAD_ID, index)}; ctx.Add("shfl_in_bounds=int({})<=int({});", src_thread_id, max_thread_id); SetInBoundsFlag(ctx, inst); ctx.AddU32("{}=shfl_in_bounds?readInvocationARB({},{}):{};", inst, value, src_thread_id, value); @@ -158,14 +181,17 @@ void EmitShuffleDown(EmitContext& ctx, IR::Inst& inst, std::string_view value, void EmitShuffleButterfly(EmitContext& ctx, IR::Inst& inst, std::string_view value, std::string_view index, std::string_view clamp, - std::string_view segmentation_mask) { + std::string_view seg_mask) { if (ctx.profile.support_gl_warp_intrinsics) { - UseShuffleNv(ctx, inst, "shuffleXorNV", value, index, clamp, segmentation_mask); + UseShuffleNv(ctx, inst, "shuffleXorNV", value, index, clamp, seg_mask); return; } - const auto thread_id{"gl_SubGroupInvocationARB"}; - const auto max_thread_id{GetMaxThreadId(thread_id, clamp, segmentation_mask)}; - const auto src_thread_id{fmt::format("({}^{})", thread_id, index)}; + const bool big_warp{ctx.profile.warp_size_potentially_larger_than_guest}; + const auto is_upper_partition{"int(gl_SubGroupInvocationARB)>=32"}; + const auto upper_clamp{fmt::format("{}?{}+32:{}", is_upper_partition, clamp, clamp)}; + + const auto max_thread_id{GetMaxThreadId(THREAD_ID, big_warp ? upper_clamp : clamp, seg_mask)}; + const auto src_thread_id{fmt::format("({}^{})", THREAD_ID, index)}; ctx.Add("shfl_in_bounds=int({})<=int({});", src_thread_id, max_thread_id); SetInBoundsFlag(ctx, inst); ctx.AddU32("{}=shfl_in_bounds?readInvocationARB({},{}):{};", inst, value, src_thread_id, value); diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index 2d29d8c14..2885e6799 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -15,6 +15,8 @@ namespace Shader::Backend::SPIRV { namespace { +constexpr size_t NUM_FIXEDFNCTEXTURE = 10; + enum class Operation { Increment, Decrement, @@ -427,6 +429,16 @@ Id DescType(EmitContext& ctx, Id sampled_type, Id pointer_type, u32 count) { return pointer_type; } } + +size_t FindNextUnusedLocation(const std::bitset<IR::NUM_GENERICS>& used_locations, + size_t start_offset) { + for (size_t location = start_offset; location < used_locations.size(); ++location) { + if (!used_locations.test(location)) { + return location; + } + } + throw RuntimeError("Unable to get an unused location for legacy attribute"); +} } // Anonymous namespace void VectorTypes::Define(Sirit::Module& sirit_ctx, Id base_type, std::string_view name) { @@ -1227,6 +1239,7 @@ void EmitContext::DefineInputs(const IR::Program& program) { loads[IR::Attribute::TessellationEvaluationPointV]) { tess_coord = DefineInput(*this, F32[3], false, spv::BuiltIn::TessCoord); } + std::bitset<IR::NUM_GENERICS> used_locations{}; for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { const AttributeType input_type{runtime_info.generic_input_types[index]}; if (!runtime_info.previous_stage_stores.Generic(index)) { @@ -1238,6 +1251,7 @@ void EmitContext::DefineInputs(const IR::Program& program) { if (input_type == AttributeType::Disabled) { continue; } + used_locations.set(index); const Id type{GetAttributeType(*this, input_type)}; const Id id{DefineInput(*this, type, true)}; Decorate(id, spv::Decoration::Location, static_cast<u32>(index)); @@ -1263,6 +1277,26 @@ void EmitContext::DefineInputs(const IR::Program& program) { break; } } + size_t previous_unused_location = 0; + if (loads.AnyComponent(IR::Attribute::ColorFrontDiffuseR)) { + const size_t location = FindNextUnusedLocation(used_locations, previous_unused_location); + previous_unused_location = location; + used_locations.set(location); + const Id id{DefineInput(*this, F32[4], true)}; + Decorate(id, spv::Decoration::Location, location); + input_front_color = id; + } + for (size_t index = 0; index < NUM_FIXEDFNCTEXTURE; ++index) { + if (loads.AnyComponent(IR::Attribute::FixedFncTexture0S + index * 4)) { + const size_t location = + FindNextUnusedLocation(used_locations, previous_unused_location); + previous_unused_location = location; + used_locations.set(location); + const Id id{DefineInput(*this, F32[4], true)}; + Decorate(id, spv::Decoration::Location, location); + input_fixed_fnc_textures[index] = id; + } + } if (stage == Stage::TessellationEval) { for (size_t index = 0; index < info.uses_patches.size(); ++index) { if (!info.uses_patches[index]) { @@ -1313,9 +1347,31 @@ void EmitContext::DefineOutputs(const IR::Program& program) { viewport_mask = DefineOutput(*this, TypeArray(U32[1], Const(1u)), std::nullopt, spv::BuiltIn::ViewportMaskNV); } + std::bitset<IR::NUM_GENERICS> used_locations{}; for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { if (info.stores.Generic(index)) { DefineGenericOutput(*this, index, invocations); + used_locations.set(index); + } + } + size_t previous_unused_location = 0; + if (info.stores.AnyComponent(IR::Attribute::ColorFrontDiffuseR)) { + const size_t location = FindNextUnusedLocation(used_locations, previous_unused_location); + previous_unused_location = location; + used_locations.set(location); + const Id id{DefineOutput(*this, F32[4], invocations)}; + Decorate(id, spv::Decoration::Location, static_cast<u32>(location)); + output_front_color = id; + } + for (size_t index = 0; index < NUM_FIXEDFNCTEXTURE; ++index) { + if (info.stores.AnyComponent(IR::Attribute::FixedFncTexture0S + index * 4)) { + const size_t location = + FindNextUnusedLocation(used_locations, previous_unused_location); + previous_unused_location = location; + used_locations.set(location); + const Id id{DefineOutput(*this, F32[4], invocations)}; + Decorate(id, spv::Decoration::Location, location); + output_fixed_fnc_textures[index] = id; } } switch (stage) { diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h index e277bc358..847d0c0e6 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.h +++ b/src/shader_recompiler/backend/spirv/emit_context.h @@ -268,10 +268,14 @@ public: Id write_global_func_u32x4{}; Id input_position{}; + Id input_front_color{}; + std::array<Id, 10> input_fixed_fnc_textures{}; std::array<Id, 32> input_generics{}; Id output_point_size{}; Id output_position{}; + Id output_front_color{}; + std::array<Id, 10> output_fixed_fnc_textures{}; std::array<std::array<GenericElementInfo, 4>, 32> output_generics{}; Id output_tess_level_outer{}; diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index 9e54a17ee..68f360b3c 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -43,6 +43,25 @@ Id AttrPointer(EmitContext& ctx, Id pointer_type, Id vertex, Id base, Args&&... } } +bool IsFixedFncTexture(IR::Attribute attribute) { + return attribute >= IR::Attribute::FixedFncTexture0S && + attribute <= IR::Attribute::FixedFncTexture9Q; +} + +u32 FixedFncTextureAttributeIndex(IR::Attribute attribute) { + if (!IsFixedFncTexture(attribute)) { + throw InvalidArgument("Attribute {} is not a FixedFncTexture", attribute); + } + return (static_cast<u32>(attribute) - static_cast<u32>(IR::Attribute::FixedFncTexture0S)) / 4u; +} + +u32 FixedFncTextureAttributeElement(IR::Attribute attribute) { + if (!IsFixedFncTexture(attribute)) { + throw InvalidArgument("Attribute {} is not a FixedFncTexture", attribute); + } + return static_cast<u32>(attribute) % 4u; +} + template <typename... Args> Id OutputAccessChain(EmitContext& ctx, Id result_type, Id base, Args&&... args) { if (ctx.stage == Stage::TessellationControl) { @@ -74,6 +93,13 @@ std::optional<OutAttr> OutputAttrPointer(EmitContext& ctx, IR::Attribute attr) { return OutputAccessChain(ctx, ctx.output_f32, info.id, index_id); } } + if (IsFixedFncTexture(attr)) { + const u32 index{FixedFncTextureAttributeIndex(attr)}; + const u32 element{FixedFncTextureAttributeElement(attr)}; + const Id element_id{ctx.Const(element)}; + return OutputAccessChain(ctx, ctx.output_f32, ctx.output_fixed_fnc_textures[index], + element_id); + } switch (attr) { case IR::Attribute::PointSize: return ctx.output_point_size; @@ -85,6 +111,14 @@ std::optional<OutAttr> OutputAttrPointer(EmitContext& ctx, IR::Attribute attr) { const Id element_id{ctx.Const(element)}; return OutputAccessChain(ctx, ctx.output_f32, ctx.output_position, element_id); } + case IR::Attribute::ColorFrontDiffuseR: + case IR::Attribute::ColorFrontDiffuseG: + case IR::Attribute::ColorFrontDiffuseB: + case IR::Attribute::ColorFrontDiffuseA: { + const u32 element{static_cast<u32>(attr) % 4}; + const Id element_id{ctx.Const(element)}; + return OutputAccessChain(ctx, ctx.output_f32, ctx.output_front_color, element_id); + } case IR::Attribute::ClipDistance0: case IR::Attribute::ClipDistance1: case IR::Attribute::ClipDistance2: @@ -307,6 +341,12 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex) { const Id value{ctx.OpLoad(type->id, pointer)}; return type->needs_cast ? ctx.OpBitcast(ctx.F32[1], value) : value; } + if (IsFixedFncTexture(attr)) { + const u32 index{FixedFncTextureAttributeIndex(attr)}; + const Id attr_id{ctx.input_fixed_fnc_textures[index]}; + const Id attr_ptr{AttrPointer(ctx, ctx.input_f32, vertex, attr_id, ctx.Const(element))}; + return ctx.OpLoad(ctx.F32[1], attr_ptr); + } switch (attr) { case IR::Attribute::PrimitiveId: return ctx.OpBitcast(ctx.F32[1], ctx.OpLoad(ctx.U32[1], ctx.primitive_id)); @@ -316,6 +356,13 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex) { case IR::Attribute::PositionW: return ctx.OpLoad(ctx.F32[1], AttrPointer(ctx, ctx.input_f32, vertex, ctx.input_position, ctx.Const(element))); + case IR::Attribute::ColorFrontDiffuseR: + case IR::Attribute::ColorFrontDiffuseG: + case IR::Attribute::ColorFrontDiffuseB: + case IR::Attribute::ColorFrontDiffuseA: { + return ctx.OpLoad(ctx.F32[1], AttrPointer(ctx, ctx.input_f32, vertex, ctx.input_front_color, + ctx.Const(element))); + } case IR::Attribute::InstanceId: if (ctx.profile.support_vertex_instance_id) { return ctx.OpBitcast(ctx.F32[1], ctx.OpLoad(ctx.U32[1], ctx.instance_id)); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp index 78b1e1ba7..cef52c56e 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp @@ -7,8 +7,13 @@ namespace Shader::Backend::SPIRV { namespace { +Id GetThreadId(EmitContext& ctx) { + return ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id); +} + Id WarpExtract(EmitContext& ctx, Id value) { - const Id local_index{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)}; + const Id thread_id{GetThreadId(ctx)}; + const Id local_index{ctx.OpShiftRightArithmetic(ctx.U32[1], thread_id, ctx.Const(5U))}; return ctx.OpVectorExtractDynamic(ctx.U32[1], value, local_index); } @@ -48,10 +53,17 @@ Id SelectValue(EmitContext& ctx, Id in_range, Id value, Id src_thread_id) { return ctx.OpSelect(ctx.U32[1], in_range, ctx.OpSubgroupReadInvocationKHR(ctx.U32[1], value, src_thread_id), value); } + +Id GetUpperClamp(EmitContext& ctx, Id invocation_id, Id clamp) { + const Id thirty_two{ctx.Const(32u)}; + const Id is_upper_partition{ctx.OpSGreaterThanEqual(ctx.U1, invocation_id, thirty_two)}; + const Id upper_clamp{ctx.OpIAdd(ctx.U32[1], thirty_two, clamp)}; + return ctx.OpSelect(ctx.U32[1], is_upper_partition, upper_clamp, clamp); +} } // Anonymous namespace Id EmitLaneId(EmitContext& ctx) { - const Id id{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)}; + const Id id{GetThreadId(ctx)}; if (!ctx.profile.warp_size_potentially_larger_than_guest) { return id; } @@ -123,7 +135,15 @@ Id EmitSubgroupGeMask(EmitContext& ctx) { Id EmitShuffleIndex(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, Id segmentation_mask) { const Id not_seg_mask{ctx.OpNot(ctx.U32[1], segmentation_mask)}; - const Id thread_id{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)}; + const Id thread_id{GetThreadId(ctx)}; + if (ctx.profile.warp_size_potentially_larger_than_guest) { + const Id thirty_two{ctx.Const(32u)}; + const Id is_upper_partition{ctx.OpSGreaterThanEqual(ctx.U1, thread_id, thirty_two)}; + const Id upper_index{ctx.OpIAdd(ctx.U32[1], thirty_two, index)}; + const Id upper_clamp{ctx.OpIAdd(ctx.U32[1], thirty_two, clamp)}; + index = ctx.OpSelect(ctx.U32[1], is_upper_partition, upper_index, index); + clamp = ctx.OpSelect(ctx.U32[1], is_upper_partition, upper_clamp, clamp); + } const Id min_thread_id{ComputeMinThreadId(ctx, thread_id, segmentation_mask)}; const Id max_thread_id{ComputeMaxThreadId(ctx, min_thread_id, clamp, not_seg_mask)}; @@ -137,7 +157,10 @@ Id EmitShuffleIndex(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id cla Id EmitShuffleUp(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, Id segmentation_mask) { - const Id thread_id{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)}; + const Id thread_id{GetThreadId(ctx)}; + if (ctx.profile.warp_size_potentially_larger_than_guest) { + clamp = GetUpperClamp(ctx, thread_id, clamp); + } const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)}; const Id src_thread_id{ctx.OpISub(ctx.U32[1], thread_id, index)}; const Id in_range{ctx.OpSGreaterThanEqual(ctx.U1, src_thread_id, max_thread_id)}; @@ -148,7 +171,10 @@ Id EmitShuffleUp(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, Id EmitShuffleDown(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, Id segmentation_mask) { - const Id thread_id{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)}; + const Id thread_id{GetThreadId(ctx)}; + if (ctx.profile.warp_size_potentially_larger_than_guest) { + clamp = GetUpperClamp(ctx, thread_id, clamp); + } const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)}; const Id src_thread_id{ctx.OpIAdd(ctx.U32[1], thread_id, index)}; const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)}; @@ -159,7 +185,10 @@ Id EmitShuffleDown(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clam Id EmitShuffleButterfly(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, Id segmentation_mask) { - const Id thread_id{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)}; + const Id thread_id{GetThreadId(ctx)}; + if (ctx.profile.warp_size_potentially_larger_than_guest) { + clamp = GetUpperClamp(ctx, thread_id, clamp); + } const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)}; const Id src_thread_id{ctx.OpBitwiseXor(ctx.U32[1], thread_id, index)}; const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)}; diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 2f6cdd216..269db21a5 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -231,6 +231,7 @@ endif() target_include_directories(video_core PRIVATE ${FFmpeg_INCLUDE_DIR}) target_link_libraries(video_core PRIVATE ${FFmpeg_LIBRARIES}) +target_link_options(video_core PRIVATE ${FFmpeg_LDFLAGS}) add_dependencies(video_core host_shaders) target_include_directories(video_core PRIVATE ${HOST_SHADERS_INCLUDE}) diff --git a/src/video_core/command_classes/codecs/codec.cpp b/src/video_core/command_classes/codecs/codec.cpp index f798a0053..61966cbfe 100644 --- a/src/video_core/command_classes/codecs/codec.cpp +++ b/src/video_core/command_classes/codecs/codec.cpp @@ -5,6 +5,7 @@ #include <fstream> #include <vector> #include "common/assert.h" +#include "common/settings.h" #include "video_core/command_classes/codecs/codec.h" #include "video_core/command_classes/codecs/h264.h" #include "video_core/command_classes/codecs/vp9.h" @@ -16,108 +17,146 @@ extern "C" { } namespace Tegra { -#if defined(LIBVA_FOUND) -// Hardware acceleration code from FFmpeg/doc/examples/hw_decode.c originally under MIT license namespace { -constexpr std::array<const char*, 2> VAAPI_DRIVERS = { - "i915", - "amdgpu", -}; +constexpr AVPixelFormat PREFERRED_GPU_FMT = AV_PIX_FMT_NV12; +constexpr AVPixelFormat PREFERRED_CPU_FMT = AV_PIX_FMT_YUV420P; + +void AVPacketDeleter(AVPacket* ptr) { + av_packet_free(&ptr); +} -AVPixelFormat GetHwFormat(AVCodecContext*, const AVPixelFormat* pix_fmts) { +using AVPacketPtr = std::unique_ptr<AVPacket, decltype(&AVPacketDeleter)>; + +AVPixelFormat GetGpuFormat(AVCodecContext* av_codec_ctx, const AVPixelFormat* pix_fmts) { for (const AVPixelFormat* p = pix_fmts; *p != AV_PIX_FMT_NONE; ++p) { - if (*p == AV_PIX_FMT_VAAPI) { - return AV_PIX_FMT_VAAPI; + if (*p == av_codec_ctx->pix_fmt) { + return av_codec_ctx->pix_fmt; } } LOG_INFO(Service_NVDRV, "Could not find compatible GPU AV format, falling back to CPU"); - return *pix_fmts; + av_buffer_unref(&av_codec_ctx->hw_device_ctx); + av_codec_ctx->pix_fmt = PREFERRED_CPU_FMT; + return PREFERRED_CPU_FMT; +} +} // namespace + +void AVFrameDeleter(AVFrame* ptr) { + av_frame_free(&ptr); } -bool CreateVaapiHwdevice(AVBufferRef** av_hw_device) { +Codec::Codec(GPU& gpu_, const NvdecCommon::NvdecRegisters& regs) + : gpu(gpu_), state{regs}, h264_decoder(std::make_unique<Decoder::H264>(gpu)), + vp9_decoder(std::make_unique<Decoder::VP9>(gpu)) {} + +Codec::~Codec() { + if (!initialized) { + return; + } + // Free libav memory + avcodec_free_context(&av_codec_ctx); + av_buffer_unref(&av_gpu_decoder); +} + +bool Codec::CreateGpuAvDevice() { +#if defined(LIBVA_FOUND) + static constexpr std::array<const char*, 3> VAAPI_DRIVERS = { + "i915", + "iHD", + "amdgpu", + }; AVDictionary* hwdevice_options = nullptr; av_dict_set(&hwdevice_options, "connection_type", "drm", 0); for (const auto& driver : VAAPI_DRIVERS) { av_dict_set(&hwdevice_options, "kernel_driver", driver, 0); - const int hwdevice_error = av_hwdevice_ctx_create(av_hw_device, AV_HWDEVICE_TYPE_VAAPI, + const int hwdevice_error = av_hwdevice_ctx_create(&av_gpu_decoder, AV_HWDEVICE_TYPE_VAAPI, nullptr, hwdevice_options, 0); if (hwdevice_error >= 0) { LOG_INFO(Service_NVDRV, "Using VA-API with {}", driver); av_dict_free(&hwdevice_options); + av_codec_ctx->pix_fmt = AV_PIX_FMT_VAAPI; return true; } LOG_DEBUG(Service_NVDRV, "VA-API av_hwdevice_ctx_create failed {}", hwdevice_error); } LOG_DEBUG(Service_NVDRV, "VA-API av_hwdevice_ctx_create failed for all drivers"); av_dict_free(&hwdevice_options); - return false; -} -} // namespace #endif - -void AVFrameDeleter(AVFrame* ptr) { - av_frame_free(&ptr); + static constexpr auto HW_CONFIG_METHOD = AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX; + static constexpr std::array GPU_DECODER_TYPES{ + AV_HWDEVICE_TYPE_CUDA, +#ifdef _WIN32 + AV_HWDEVICE_TYPE_D3D11VA, +#else + AV_HWDEVICE_TYPE_VDPAU, +#endif + }; + for (const auto& type : GPU_DECODER_TYPES) { + const int hwdevice_res = av_hwdevice_ctx_create(&av_gpu_decoder, type, nullptr, nullptr, 0); + if (hwdevice_res < 0) { + LOG_DEBUG(Service_NVDRV, "{} av_hwdevice_ctx_create failed {}", + av_hwdevice_get_type_name(type), hwdevice_res); + continue; + } + for (int i = 0;; i++) { + const AVCodecHWConfig* config = avcodec_get_hw_config(av_codec, i); + if (!config) { + LOG_DEBUG(Service_NVDRV, "{} decoder does not support device type {}.", + av_codec->name, av_hwdevice_get_type_name(type)); + break; + } + if (config->methods & HW_CONFIG_METHOD && config->device_type == type) { + av_codec_ctx->pix_fmt = config->pix_fmt; + LOG_INFO(Service_NVDRV, "Using {} GPU decoder", av_hwdevice_get_type_name(type)); + return true; + } + } + } + return false; } -Codec::Codec(GPU& gpu_, const NvdecCommon::NvdecRegisters& regs) - : gpu(gpu_), state{regs}, h264_decoder(std::make_unique<Decoder::H264>(gpu)), - vp9_decoder(std::make_unique<Decoder::VP9>(gpu)) {} - -Codec::~Codec() { - if (!initialized) { - return; - } - // Free libav memory - avcodec_send_packet(av_codec_ctx, nullptr); - AVFrame* av_frame = av_frame_alloc(); - avcodec_receive_frame(av_codec_ctx, av_frame); - avcodec_flush_buffers(av_codec_ctx); - av_frame_free(&av_frame); - avcodec_close(av_codec_ctx); - av_buffer_unref(&av_hw_device); +void Codec::InitializeAvCodecContext() { + av_codec_ctx = avcodec_alloc_context3(av_codec); + av_opt_set(av_codec_ctx->priv_data, "tune", "zerolatency", 0); } -void Codec::InitializeHwdec() { - // Prioritize integrated GPU to mitigate bandwidth bottlenecks -#if defined(LIBVA_FOUND) - if (CreateVaapiHwdevice(&av_hw_device)) { - const auto hw_device_ctx = av_buffer_ref(av_hw_device); - ASSERT_MSG(hw_device_ctx, "av_buffer_ref failed"); - av_codec_ctx->hw_device_ctx = hw_device_ctx; - av_codec_ctx->get_format = GetHwFormat; +void Codec::InitializeGpuDecoder() { + if (!CreateGpuAvDevice()) { + av_buffer_unref(&av_gpu_decoder); return; } -#endif - // TODO more GPU accelerated decoders + auto* hw_device_ctx = av_buffer_ref(av_gpu_decoder); + ASSERT_MSG(hw_device_ctx, "av_buffer_ref failed"); + av_codec_ctx->hw_device_ctx = hw_device_ctx; + av_codec_ctx->get_format = GetGpuFormat; } void Codec::Initialize() { - AVCodecID codec; - switch (current_codec) { - case NvdecCommon::VideoCodec::H264: - codec = AV_CODEC_ID_H264; - break; - case NvdecCommon::VideoCodec::Vp9: - codec = AV_CODEC_ID_VP9; - break; - default: - UNIMPLEMENTED_MSG("Unknown codec {}", current_codec); + const AVCodecID codec = [&] { + switch (current_codec) { + case NvdecCommon::VideoCodec::H264: + return AV_CODEC_ID_H264; + case NvdecCommon::VideoCodec::Vp9: + return AV_CODEC_ID_VP9; + default: + UNIMPLEMENTED_MSG("Unknown codec {}", current_codec); + return AV_CODEC_ID_NONE; + } + }(); + av_codec = avcodec_find_decoder(codec); + + InitializeAvCodecContext(); + if (Settings::values.nvdec_emulation.GetValue() == Settings::NvdecEmulation::GPU) { + InitializeGpuDecoder(); + } + if (const int res = avcodec_open2(av_codec_ctx, av_codec, nullptr); res < 0) { + LOG_ERROR(Service_NVDRV, "avcodec_open2() Failed with result {}", res); + avcodec_free_context(&av_codec_ctx); + av_buffer_unref(&av_gpu_decoder); return; } - av_codec = avcodec_find_decoder(codec); - av_codec_ctx = avcodec_alloc_context3(av_codec); - av_opt_set(av_codec_ctx->priv_data, "tune", "zerolatency", 0); - InitializeHwdec(); if (!av_codec_ctx->hw_device_ctx) { LOG_INFO(Service_NVDRV, "Using FFmpeg software decoding"); } - const auto av_error = avcodec_open2(av_codec_ctx, av_codec, nullptr); - if (av_error < 0) { - LOG_ERROR(Service_NVDRV, "avcodec_open2() Failed."); - avcodec_close(av_codec_ctx); - av_buffer_unref(&av_hw_device); - return; - } initialized = true; } @@ -133,6 +172,9 @@ void Codec::Decode() { if (is_first_frame) { Initialize(); } + if (!initialized) { + return; + } bool vp9_hidden_frame = false; std::vector<u8> frame_data; if (current_codec == NvdecCommon::VideoCodec::H264) { @@ -141,50 +183,48 @@ void Codec::Decode() { frame_data = vp9_decoder->ComposeFrameHeader(state); vp9_hidden_frame = vp9_decoder->WasFrameHidden(); } - AVPacket packet{}; - av_init_packet(&packet); - packet.data = frame_data.data(); - packet.size = static_cast<s32>(frame_data.size()); - if (const int ret = avcodec_send_packet(av_codec_ctx, &packet); ret) { - LOG_DEBUG(Service_NVDRV, "avcodec_send_packet error {}", ret); + AVPacketPtr packet{av_packet_alloc(), AVPacketDeleter}; + if (!packet) { + LOG_ERROR(Service_NVDRV, "av_packet_alloc failed"); + return; + } + packet->data = frame_data.data(); + packet->size = static_cast<s32>(frame_data.size()); + if (const int res = avcodec_send_packet(av_codec_ctx, packet.get()); res != 0) { + LOG_DEBUG(Service_NVDRV, "avcodec_send_packet error {}", res); return; } // Only receive/store visible frames if (vp9_hidden_frame) { return; } - AVFrame* hw_frame = av_frame_alloc(); - AVFrame* sw_frame = hw_frame; - ASSERT_MSG(hw_frame, "av_frame_alloc hw_frame failed"); - if (const int ret = avcodec_receive_frame(av_codec_ctx, hw_frame); ret) { + AVFramePtr initial_frame{av_frame_alloc(), AVFrameDeleter}; + AVFramePtr final_frame{nullptr, AVFrameDeleter}; + ASSERT_MSG(initial_frame, "av_frame_alloc initial_frame failed"); + if (const int ret = avcodec_receive_frame(av_codec_ctx, initial_frame.get()); ret) { LOG_DEBUG(Service_NVDRV, "avcodec_receive_frame error {}", ret); - av_frame_free(&hw_frame); return; } - if (!hw_frame->width || !hw_frame->height) { + if (initial_frame->width == 0 || initial_frame->height == 0) { LOG_WARNING(Service_NVDRV, "Zero width or height in frame"); - av_frame_free(&hw_frame); return; } -#if defined(LIBVA_FOUND) - // Hardware acceleration code from FFmpeg/doc/examples/hw_decode.c under MIT license - if (hw_frame->format == AV_PIX_FMT_VAAPI) { - sw_frame = av_frame_alloc(); - ASSERT_MSG(sw_frame, "av_frame_alloc sw_frame failed"); + if (av_codec_ctx->hw_device_ctx) { + final_frame = AVFramePtr{av_frame_alloc(), AVFrameDeleter}; + ASSERT_MSG(final_frame, "av_frame_alloc final_frame failed"); // Can't use AV_PIX_FMT_YUV420P and share code with software decoding in vic.cpp // because Intel drivers crash unless using AV_PIX_FMT_NV12 - sw_frame->format = AV_PIX_FMT_NV12; - const int transfer_data_ret = av_hwframe_transfer_data(sw_frame, hw_frame, 0); - ASSERT_MSG(!transfer_data_ret, "av_hwframe_transfer_data error {}", transfer_data_ret); - av_frame_free(&hw_frame); + final_frame->format = PREFERRED_GPU_FMT; + const int ret = av_hwframe_transfer_data(final_frame.get(), initial_frame.get(), 0); + ASSERT_MSG(!ret, "av_hwframe_transfer_data error {}", ret); + } else { + final_frame = std::move(initial_frame); } -#endif - if (sw_frame->format != AV_PIX_FMT_YUV420P && sw_frame->format != AV_PIX_FMT_NV12) { - UNIMPLEMENTED_MSG("Unexpected video format from host graphics: {}", sw_frame->format); - av_frame_free(&sw_frame); + if (final_frame->format != PREFERRED_CPU_FMT && final_frame->format != PREFERRED_GPU_FMT) { + UNIMPLEMENTED_MSG("Unexpected video format: {}", final_frame->format); return; } - av_frames.push(AVFramePtr{sw_frame, AVFrameDeleter}); + av_frames.push(std::move(final_frame)); if (av_frames.size() > 10) { LOG_TRACE(Service_NVDRV, "av_frames.push overflow dropped frame"); av_frames.pop(); diff --git a/src/video_core/command_classes/codecs/codec.h b/src/video_core/command_classes/codecs/codec.h index 71936203f..f9a80886f 100644 --- a/src/video_core/command_classes/codecs/codec.h +++ b/src/video_core/command_classes/codecs/codec.h @@ -5,6 +5,7 @@ #pragma once #include <memory> +#include <string_view> #include <queue> #include "common/common_types.h" #include "video_core/command_classes/nvdec_common.h" @@ -50,18 +51,23 @@ public: /// Returns the value of current_codec [[nodiscard]] NvdecCommon::VideoCodec GetCurrentCodec() const; + /// Return name of the current codec [[nodiscard]] std::string_view GetCurrentCodecName() const; private: - void InitializeHwdec(); + void InitializeAvCodecContext(); + + void InitializeGpuDecoder(); + + bool CreateGpuAvDevice(); bool initialized{}; NvdecCommon::VideoCodec current_codec{NvdecCommon::VideoCodec::None}; AVCodec* av_codec{nullptr}; - AVBufferRef* av_hw_device{nullptr}; AVCodecContext* av_codec_ctx{nullptr}; + AVBufferRef* av_gpu_decoder{nullptr}; GPU& gpu; const NvdecCommon::NvdecRegisters& state; diff --git a/src/video_core/command_classes/codecs/h264.cpp b/src/video_core/command_classes/codecs/h264.cpp index 5fb6d45ee..51ee14c13 100644 --- a/src/video_core/command_classes/codecs/h264.cpp +++ b/src/video_core/command_classes/codecs/h264.cpp @@ -95,7 +95,8 @@ const std::vector<u8>& H264::ComposeFrameHeader(const NvdecCommon::NvdecRegister const s32 pic_height = context.h264_parameter_set.frame_height_in_map_units / (context.h264_parameter_set.frame_mbs_only_flag ? 1 : 2); - writer.WriteUe(16); + // TODO (ameerj): Where do we get this number, it seems to be particular for each stream + writer.WriteUe(6); // Max number of reference frames writer.WriteBit(false); writer.WriteUe(context.h264_parameter_set.pic_width_in_mbs - 1); writer.WriteUe(pic_height - 1); diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 1aa43523a..7f4ca6282 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -475,10 +475,10 @@ public: // These values are used by Nouveau and some games. AddGL = 0x8006, - SubtractGL = 0x8007, - ReverseSubtractGL = 0x8008, - MinGL = 0x800a, - MaxGL = 0x800b + MinGL = 0x8007, + MaxGL = 0x8008, + SubtractGL = 0x800a, + ReverseSubtractGL = 0x800b }; enum class Factor : u32 { diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp index c60ed6453..dce00e829 100644 --- a/src/video_core/memory_manager.cpp +++ b/src/video_core/memory_manager.cpp @@ -2,6 +2,8 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include <algorithm> + #include "common/alignment.h" #include "common/assert.h" #include "common/logging/log.h" diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index 7c9b0d6db..9ff0a28cd 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -164,7 +164,8 @@ void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { blit_screen.Recreate(); } const VkSemaphore render_semaphore = blit_screen.DrawToSwapchain(*framebuffer, use_accelerated); - scheduler.Flush(render_semaphore); + const VkSemaphore present_semaphore = swapchain.CurrentPresentSemaphore(); + scheduler.Flush(render_semaphore, present_semaphore); scheduler.WaitWorker(); swapchain.Present(render_semaphore); diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.cpp b/src/video_core/renderer_vulkan/vk_blit_screen.cpp index cb0580182..888bc7392 100644 --- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp +++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp @@ -358,7 +358,7 @@ void VKBlitScreen::CreateDescriptorPool() { void VKBlitScreen::CreateRenderPass() { const VkAttachmentDescription color_attachment{ .flags = 0, - .format = swapchain.GetImageFormat(), + .format = swapchain.GetImageViewFormat(), .samples = VK_SAMPLE_COUNT_1_BIT, .loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR, .storeOp = VK_ATTACHMENT_STORE_OP_STORE, diff --git a/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp b/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp index 8e77e4796..d87da2a34 100644 --- a/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp +++ b/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp @@ -2,6 +2,7 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include <algorithm> #include <mutex> #include <span> #include <vector> @@ -18,7 +19,6 @@ namespace Vulkan { // Prefer small grow rates to avoid saturating the descriptor pool with barely used pipelines constexpr size_t SETS_GROW_RATE = 16; constexpr s32 SCORE_THRESHOLD = 3; -constexpr u32 SETS_PER_POOL = 64; struct DescriptorBank { DescriptorBankInfo info; @@ -58,11 +58,12 @@ static DescriptorBankInfo MakeBankInfo(std::span<const Shader::Info> infos) { static void AllocatePool(const Device& device, DescriptorBank& bank) { std::array<VkDescriptorPoolSize, 6> pool_sizes; size_t pool_cursor{}; + const u32 sets_per_pool = device.GetSetsPerPool(); const auto add = [&](VkDescriptorType type, u32 count) { if (count > 0) { pool_sizes[pool_cursor++] = { .type = type, - .descriptorCount = count * SETS_PER_POOL, + .descriptorCount = count * sets_per_pool, }; } }; @@ -77,7 +78,7 @@ static void AllocatePool(const Device& device, DescriptorBank& bank) { .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, .pNext = nullptr, .flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, - .maxSets = SETS_PER_POOL, + .maxSets = sets_per_pool, .poolSizeCount = static_cast<u32>(pool_cursor), .pPoolSizes = std::data(pool_sizes), })); diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 3ac18ea54..3bcd6d6cc 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -228,9 +228,7 @@ void RasterizerVulkan::Clear() { }; const u32 color_attachment = regs.clear_buffers.RT; - const auto attachment_aspect_mask = framebuffer->ImageRanges()[color_attachment].aspectMask; - const bool is_color_rt = (attachment_aspect_mask & VK_IMAGE_ASPECT_COLOR_BIT) != 0; - if (use_color && is_color_rt) { + if (use_color && framebuffer->HasAspectColorBit(color_attachment)) { VkClearValue clear_value; std::memcpy(clear_value.color.float32, regs.clear_color, sizeof(regs.clear_color)); @@ -248,12 +246,15 @@ void RasterizerVulkan::Clear() { return; } VkImageAspectFlags aspect_flags = 0; - if (use_depth) { + if (use_depth && framebuffer->HasAspectDepthBit()) { aspect_flags |= VK_IMAGE_ASPECT_DEPTH_BIT; } - if (use_stencil) { + if (use_stencil && framebuffer->HasAspectStencilBit()) { aspect_flags |= VK_IMAGE_ASPECT_STENCIL_BIT; } + if (aspect_flags == 0) { + return; + } scheduler.Record([clear_depth = regs.clear_depth, clear_stencil = regs.clear_stencil, clear_rect, aspect_flags](vk::CommandBuffer cmdbuf) { VkClearAttachment attachment; @@ -764,12 +765,7 @@ void RasterizerVulkan::UpdateStencilOp(Tegra::Engines::Maxwell3D::Regs& regs) { const Maxwell::StencilOp zpass = regs.stencil_front_op_zpass; const Maxwell::ComparisonOp compare = regs.stencil_front_func_func; if (regs.stencil_two_side_enable) { - scheduler.Record([fail, zfail, zpass, compare](vk::CommandBuffer cmdbuf) { - cmdbuf.SetStencilOpEXT(VK_STENCIL_FACE_FRONT_AND_BACK, MaxwellToVK::StencilOp(fail), - MaxwellToVK::StencilOp(zpass), MaxwellToVK::StencilOp(zfail), - MaxwellToVK::ComparisonOp(compare)); - }); - } else { + // Separate stencil op per face const Maxwell::StencilOp back_fail = regs.stencil_back_op_fail; const Maxwell::StencilOp back_zfail = regs.stencil_back_op_zfail; const Maxwell::StencilOp back_zpass = regs.stencil_back_op_zpass; @@ -784,6 +780,13 @@ void RasterizerVulkan::UpdateStencilOp(Tegra::Engines::Maxwell3D::Regs& regs) { MaxwellToVK::StencilOp(back_zfail), MaxwellToVK::ComparisonOp(back_compare)); }); + } else { + // Front face defines the stencil op of both faces + scheduler.Record([fail, zfail, zpass, compare](vk::CommandBuffer cmdbuf) { + cmdbuf.SetStencilOpEXT(VK_STENCIL_FACE_FRONT_AND_BACK, MaxwellToVK::StencilOp(fail), + MaxwellToVK::StencilOp(zpass), MaxwellToVK::StencilOp(zfail), + MaxwellToVK::ComparisonOp(compare)); + }); } } diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp index 4840962de..1d438787a 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp @@ -55,14 +55,14 @@ VKScheduler::~VKScheduler() { worker_thread.join(); } -void VKScheduler::Flush(VkSemaphore semaphore) { - SubmitExecution(semaphore); +void VKScheduler::Flush(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore) { + SubmitExecution(signal_semaphore, wait_semaphore); AllocateNewContext(); } -void VKScheduler::Finish(VkSemaphore semaphore) { +void VKScheduler::Finish(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore) { const u64 presubmit_tick = CurrentTick(); - SubmitExecution(semaphore); + SubmitExecution(signal_semaphore, wait_semaphore); WaitWorker(); Wait(presubmit_tick); AllocateNewContext(); @@ -171,37 +171,41 @@ void VKScheduler::AllocateWorkerCommandBuffer() { }); } -void VKScheduler::SubmitExecution(VkSemaphore semaphore) { +void VKScheduler::SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore) { EndPendingOperations(); InvalidateState(); const u64 signal_value = master_semaphore->NextTick(); - Record([semaphore, signal_value, this](vk::CommandBuffer cmdbuf) { + Record([signal_semaphore, wait_semaphore, signal_value, this](vk::CommandBuffer cmdbuf) { cmdbuf.End(); - - const u32 num_signal_semaphores = semaphore ? 2U : 1U; - - const u64 wait_value = signal_value - 1; - const VkPipelineStageFlags wait_stage_mask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT; - const VkSemaphore timeline_semaphore = master_semaphore->Handle(); + + const u32 num_signal_semaphores = signal_semaphore ? 2U : 1U; const std::array signal_values{signal_value, u64(0)}; - const std::array signal_semaphores{timeline_semaphore, semaphore}; + const std::array signal_semaphores{timeline_semaphore, signal_semaphore}; + + const u32 num_wait_semaphores = wait_semaphore ? 2U : 1U; + const std::array wait_values{signal_value - 1, u64(1)}; + const std::array wait_semaphores{timeline_semaphore, wait_semaphore}; + static constexpr std::array<VkPipelineStageFlags, 2> wait_stage_masks{ + VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, + }; const VkTimelineSemaphoreSubmitInfoKHR timeline_si{ .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO_KHR, .pNext = nullptr, - .waitSemaphoreValueCount = 1, - .pWaitSemaphoreValues = &wait_value, + .waitSemaphoreValueCount = num_wait_semaphores, + .pWaitSemaphoreValues = wait_values.data(), .signalSemaphoreValueCount = num_signal_semaphores, .pSignalSemaphoreValues = signal_values.data(), }; const VkSubmitInfo submit_info{ .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, .pNext = &timeline_si, - .waitSemaphoreCount = 1, - .pWaitSemaphores = &timeline_semaphore, - .pWaitDstStageMask = &wait_stage_mask, + .waitSemaphoreCount = num_wait_semaphores, + .pWaitSemaphores = wait_semaphores.data(), + .pWaitDstStageMask = wait_stage_masks.data(), .commandBufferCount = 1, .pCommandBuffers = cmdbuf.address(), .signalSemaphoreCount = num_signal_semaphores, diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h index cf39a2363..759ed5a48 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.h +++ b/src/video_core/renderer_vulkan/vk_scheduler.h @@ -34,10 +34,10 @@ public: ~VKScheduler(); /// Sends the current execution context to the GPU. - void Flush(VkSemaphore semaphore = nullptr); + void Flush(VkSemaphore signal_semaphore = nullptr, VkSemaphore wait_semaphore = nullptr); /// Sends the current execution context to the GPU and waits for it to complete. - void Finish(VkSemaphore semaphore = nullptr); + void Finish(VkSemaphore signal_semaphore = nullptr, VkSemaphore wait_semaphore = nullptr); /// Waits for the worker thread to finish executing everything. After this function returns it's /// safe to touch worker resources. @@ -191,7 +191,7 @@ private: void AllocateWorkerCommandBuffer(); - void SubmitExecution(VkSemaphore semaphore); + void SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore); void AllocateNewContext(); diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.h b/src/video_core/renderer_vulkan/vk_state_tracker.h index 5f78f6950..d90935f52 100644 --- a/src/video_core/renderer_vulkan/vk_state_tracker.h +++ b/src/video_core/renderer_vulkan/vk_state_tracker.h @@ -110,10 +110,6 @@ public: return Exchange(Dirty::DepthTestEnable, false); } - bool TouchDepthBoundsEnable() { - return Exchange(Dirty::DepthBoundsEnable, false); - } - bool TouchDepthWriteEnable() { return Exchange(Dirty::DepthWriteEnable, false); } diff --git a/src/video_core/renderer_vulkan/vk_swapchain.cpp b/src/video_core/renderer_vulkan/vk_swapchain.cpp index d990eefba..aadf03cb0 100644 --- a/src/video_core/renderer_vulkan/vk_swapchain.cpp +++ b/src/video_core/renderer_vulkan/vk_swapchain.cpp @@ -20,16 +20,15 @@ namespace Vulkan { namespace { -VkSurfaceFormatKHR ChooseSwapSurfaceFormat(vk::Span<VkSurfaceFormatKHR> formats, bool srgb) { +VkSurfaceFormatKHR ChooseSwapSurfaceFormat(vk::Span<VkSurfaceFormatKHR> formats) { if (formats.size() == 1 && formats[0].format == VK_FORMAT_UNDEFINED) { VkSurfaceFormatKHR format; format.format = VK_FORMAT_B8G8R8A8_UNORM; format.colorSpace = VK_COLOR_SPACE_SRGB_NONLINEAR_KHR; return format; } - const auto& found = std::find_if(formats.begin(), formats.end(), [srgb](const auto& format) { - const auto request_format = srgb ? VK_FORMAT_B8G8R8A8_SRGB : VK_FORMAT_B8G8R8A8_UNORM; - return format.format == request_format && + const auto& found = std::find_if(formats.begin(), formats.end(), [](const auto& format) { + return format.format == VK_FORMAT_B8G8R8A8_UNORM && format.colorSpace == VK_COLOR_SPACE_SRGB_NONLINEAR_KHR; }); return found != formats.end() ? *found : formats[0]; @@ -107,14 +106,12 @@ void VKSwapchain::AcquireNextImage() { } void VKSwapchain::Present(VkSemaphore render_semaphore) { - const VkSemaphore present_semaphore{*present_semaphores[frame_index]}; - const std::array<VkSemaphore, 2> semaphores{present_semaphore, render_semaphore}; const auto present_queue{device.GetPresentQueue()}; const VkPresentInfoKHR present_info{ .sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR, .pNext = nullptr, - .waitSemaphoreCount = render_semaphore ? 2U : 1U, - .pWaitSemaphores = semaphores.data(), + .waitSemaphoreCount = render_semaphore ? 1U : 0U, + .pWaitSemaphores = &render_semaphore, .swapchainCount = 1, .pSwapchains = swapchain.address(), .pImageIndices = &image_index, @@ -145,7 +142,7 @@ void VKSwapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, const auto formats{physical_device.GetSurfaceFormatsKHR(surface)}; const auto present_modes{physical_device.GetSurfacePresentModesKHR(surface)}; - const VkSurfaceFormatKHR surface_format{ChooseSwapSurfaceFormat(formats, srgb)}; + const VkSurfaceFormatKHR surface_format{ChooseSwapSurfaceFormat(formats)}; const VkPresentModeKHR present_mode{ChooseSwapPresentMode(present_modes)}; u32 requested_image_count{capabilities.minImageCount + 1}; @@ -180,6 +177,17 @@ void VKSwapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, swapchain_ci.queueFamilyIndexCount = static_cast<u32>(queue_indices.size()); swapchain_ci.pQueueFamilyIndices = queue_indices.data(); } + static constexpr std::array view_formats{VK_FORMAT_B8G8R8A8_UNORM, VK_FORMAT_B8G8R8A8_SRGB}; + VkImageFormatListCreateInfo format_list{ + .sType = VK_STRUCTURE_TYPE_IMAGE_FORMAT_LIST_CREATE_INFO_KHR, + .pNext = nullptr, + .viewFormatCount = static_cast<u32>(view_formats.size()), + .pViewFormats = view_formats.data(), + }; + if (device.IsKhrSwapchainMutableFormatEnabled()) { + format_list.pNext = std::exchange(swapchain_ci.pNext, &format_list); + swapchain_ci.flags |= VK_SWAPCHAIN_CREATE_MUTABLE_FORMAT_BIT_KHR; + } // Request the size again to reduce the possibility of a TOCTOU race condition. const auto updated_capabilities = physical_device.GetSurfaceCapabilitiesKHR(surface); swapchain_ci.imageExtent = ChooseSwapExtent(updated_capabilities, width, height); @@ -191,7 +199,7 @@ void VKSwapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, images = swapchain.GetImages(); image_count = static_cast<u32>(images.size()); - image_format = surface_format.format; + image_view_format = srgb ? VK_FORMAT_B8G8R8A8_SRGB : VK_FORMAT_B8G8R8A8_UNORM; } void VKSwapchain::CreateSemaphores() { @@ -207,7 +215,7 @@ void VKSwapchain::CreateImageViews() { .flags = 0, .image = {}, .viewType = VK_IMAGE_VIEW_TYPE_2D, - .format = image_format, + .format = image_view_format, .components = { .r = VK_COMPONENT_SWIZZLE_IDENTITY, diff --git a/src/video_core/renderer_vulkan/vk_swapchain.h b/src/video_core/renderer_vulkan/vk_swapchain.h index 35c2cdc14..5bce41e21 100644 --- a/src/video_core/renderer_vulkan/vk_swapchain.h +++ b/src/video_core/renderer_vulkan/vk_swapchain.h @@ -68,8 +68,12 @@ public: return *image_views[index]; } - VkFormat GetImageFormat() const { - return image_format; + VkFormat GetImageViewFormat() const { + return image_view_format; + } + + VkSemaphore CurrentPresentSemaphore() const { + return *present_semaphores[frame_index]; } private: @@ -96,7 +100,7 @@ private: u32 image_index{}; u32 frame_index{}; - VkFormat image_format{}; + VkFormat image_view_format{}; VkExtent2D extent{}; bool current_srgb{}; diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index 8f4df7122..ff979a7ac 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -1186,9 +1186,12 @@ Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM renderpass_key.depth_format = depth_buffer->format; num_layers = std::max(num_layers, depth_buffer->range.extent.layers); images[num_images] = depth_buffer->ImageHandle(); - image_ranges[num_images] = MakeSubresourceRange(depth_buffer); + const VkImageSubresourceRange subresource_range = MakeSubresourceRange(depth_buffer); + image_ranges[num_images] = subresource_range; samples = depth_buffer->Samples(); ++num_images; + has_depth = (subresource_range.aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) != 0; + has_stencil = (subresource_range.aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT) != 0; } else { renderpass_key.depth_format = PixelFormat::Invalid; } diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h index 5fe6b7ba3..6d5a68bfe 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.h +++ b/src/video_core/renderer_vulkan/vk_texture_cache.h @@ -232,6 +232,18 @@ public: return image_ranges; } + [[nodiscard]] bool HasAspectColorBit(size_t index) const noexcept { + return (image_ranges.at(index).aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) != 0; + } + + [[nodiscard]] bool HasAspectDepthBit() const noexcept { + return has_depth; + } + + [[nodiscard]] bool HasAspectStencilBit() const noexcept { + return has_stencil; + } + private: vk::Framebuffer framebuffer; VkRenderPass renderpass{}; @@ -241,6 +253,8 @@ private: u32 num_images = 0; std::array<VkImage, 9> images{}; std::array<VkImageSubresourceRange, 9> image_ranges{}; + bool has_depth{}; + bool has_stencil{}; }; struct TextureCacheParams { diff --git a/src/video_core/shader_environment.cpp b/src/video_core/shader_environment.cpp index 8a4581c19..81a878bb2 100644 --- a/src/video_core/shader_environment.cpp +++ b/src/video_core/shader_environment.cpp @@ -2,6 +2,7 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include <algorithm> #include <filesystem> #include <fstream> #include <memory> diff --git a/src/video_core/texture_cache/slot_vector.h b/src/video_core/texture_cache/slot_vector.h index 6180b8c0e..74cd3c9d8 100644 --- a/src/video_core/texture_cache/slot_vector.h +++ b/src/video_core/texture_cache/slot_vector.h @@ -4,6 +4,7 @@ #pragma once +#include <algorithm> #include <array> #include <bit> #include <concepts> diff --git a/src/video_core/video_core.cpp b/src/video_core/video_core.cpp index 3b575db4d..cae543a51 100644 --- a/src/video_core/video_core.cpp +++ b/src/video_core/video_core.cpp @@ -37,7 +37,8 @@ std::unique_ptr<VideoCore::RendererBase> CreateRenderer( namespace VideoCore { std::unique_ptr<Tegra::GPU> CreateGPU(Core::Frontend::EmuWindow& emu_window, Core::System& system) { - const bool use_nvdec = Settings::values.use_nvdec_emulation.GetValue(); + const auto nvdec_value = Settings::values.nvdec_emulation.GetValue(); + const bool use_nvdec = nvdec_value != Settings::NvdecEmulation::Off; const bool use_async = Settings::values.use_asynchronous_gpu_emulation.GetValue(); auto gpu = std::make_unique<Tegra::GPU>(system, use_async, use_nvdec); auto context = emu_window.CreateSharedContext(); diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index c7f0d26ce..2caf98c7c 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -611,6 +611,12 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR graphics_queue = logical.GetQueue(graphics_family); present_queue = logical.GetQueue(present_family); + + sets_per_pool = 64; + if (driver_id == VK_DRIVER_ID_AMD_PROPRIETARY || driver_id == VK_DRIVER_ID_AMD_OPEN_SOURCE) { + // AMD drivers need a higher amount of Sets per Pool in certain circunstances like in XC2. + sets_per_pool = 96; + } } Device::~Device() = default; @@ -851,6 +857,8 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) { bool has_khr_shader_float16_int8{}; bool has_khr_workgroup_memory_explicit_layout{}; bool has_khr_pipeline_executable_properties{}; + bool has_khr_image_format_list{}; + bool has_khr_swapchain_mutable_format{}; bool has_ext_subgroup_size_control{}; bool has_ext_transform_feedback{}; bool has_ext_custom_border_color{}; @@ -900,6 +908,9 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) { test(has_ext_shader_atomic_int64, VK_KHR_SHADER_ATOMIC_INT64_EXTENSION_NAME, false); test(has_khr_workgroup_memory_explicit_layout, VK_KHR_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_EXTENSION_NAME, false); + test(has_khr_image_format_list, VK_KHR_IMAGE_FORMAT_LIST_EXTENSION_NAME, false); + test(has_khr_swapchain_mutable_format, VK_KHR_SWAPCHAIN_MUTABLE_FORMAT_EXTENSION_NAME, + false); test(has_ext_line_rasterization, VK_EXT_LINE_RASTERIZATION_EXTENSION_NAME, false); if (Settings::values.enable_nsight_aftermath) { test(nv_device_diagnostics_config, VK_NV_DEVICE_DIAGNOSTICS_CONFIG_EXTENSION_NAME, @@ -1078,6 +1089,11 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) { khr_pipeline_executable_properties = true; } } + if (has_khr_image_format_list && has_khr_swapchain_mutable_format) { + extensions.push_back(VK_KHR_IMAGE_FORMAT_LIST_EXTENSION_NAME); + extensions.push_back(VK_KHR_SWAPCHAIN_MUTABLE_FORMAT_EXTENSION_NAME); + khr_swapchain_mutable_format = true; + } if (khr_push_descriptor) { VkPhysicalDevicePushDescriptorPropertiesKHR push_descriptor; push_descriptor.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PUSH_DESCRIPTOR_PROPERTIES_KHR; diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index 234d74129..bc180a32a 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -224,6 +224,11 @@ public: return khr_pipeline_executable_properties; } + /// Returns true if VK_KHR_swapchain_mutable_format is enabled. + bool IsKhrSwapchainMutableFormatEnabled() const { + return khr_swapchain_mutable_format; + } + /// Returns true if the device supports VK_KHR_workgroup_memory_explicit_layout. bool IsKhrWorkgroupMemoryExplicitLayoutSupported() const { return khr_workgroup_memory_explicit_layout; @@ -318,6 +323,10 @@ public: return device_access_memory; } + u32 GetSetsPerPool() const { + return sets_per_pool; + } + private: /// Checks if the physical device is suitable. void CheckSuitability(bool requires_swapchain) const; @@ -371,6 +380,7 @@ private: VkShaderStageFlags guest_warp_stages{}; ///< Stages where the guest warp size can be forced. u64 device_access_memory{}; ///< Total size of device local memory in bytes. u32 max_push_descriptors{}; ///< Maximum number of push descriptors + u32 sets_per_pool{}; ///< Sets per Description Pool bool is_optimal_astc_supported{}; ///< Support for native ASTC. bool is_float16_supported{}; ///< Support for float16 arithmetic. bool is_int8_supported{}; ///< Support for int8 arithmetic. @@ -390,6 +400,7 @@ private: bool khr_workgroup_memory_explicit_layout{}; ///< Support for explicit workgroup layouts. bool khr_push_descriptor{}; ///< Support for VK_KHR_push_descritor. bool khr_pipeline_executable_properties{}; ///< Support for executable properties. + bool khr_swapchain_mutable_format{}; ///< Support for VK_KHR_swapchain_mutable_format. bool ext_index_type_uint8{}; ///< Support for VK_EXT_index_type_uint8. bool ext_sampler_filter_minmax{}; ///< Support for VK_EXT_sampler_filter_minmax. bool ext_depth_range_unrestricted{}; ///< Support for VK_EXT_depth_range_unrestricted. diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp index 85d292bcc..8744d8e5d 100644 --- a/src/yuzu/configuration/config.cpp +++ b/src/yuzu/configuration/config.cpp @@ -812,7 +812,7 @@ void Config::ReadRendererValues() { ReadGlobalSetting(Settings::values.use_disk_shader_cache); ReadGlobalSetting(Settings::values.gpu_accuracy); ReadGlobalSetting(Settings::values.use_asynchronous_gpu_emulation); - ReadGlobalSetting(Settings::values.use_nvdec_emulation); + ReadGlobalSetting(Settings::values.nvdec_emulation); ReadGlobalSetting(Settings::values.accelerate_astc); ReadGlobalSetting(Settings::values.use_vsync); ReadGlobalSetting(Settings::values.shader_backend); @@ -1349,7 +1349,10 @@ void Config::SaveRendererValues() { static_cast<u32>(Settings::values.gpu_accuracy.GetDefault()), Settings::values.gpu_accuracy.UsingGlobal()); WriteGlobalSetting(Settings::values.use_asynchronous_gpu_emulation); - WriteGlobalSetting(Settings::values.use_nvdec_emulation); + WriteSetting(QString::fromStdString(Settings::values.nvdec_emulation.GetLabel()), + static_cast<u32>(Settings::values.nvdec_emulation.GetValue(global)), + static_cast<u32>(Settings::values.nvdec_emulation.GetDefault()), + Settings::values.nvdec_emulation.UsingGlobal()); WriteGlobalSetting(Settings::values.accelerate_astc); WriteGlobalSetting(Settings::values.use_vsync); WriteSetting(QString::fromStdString(Settings::values.shader_backend.GetLabel()), diff --git a/src/yuzu/configuration/config.h b/src/yuzu/configuration/config.h index 9555f4498..4733227b6 100644 --- a/src/yuzu/configuration/config.h +++ b/src/yuzu/configuration/config.h @@ -182,5 +182,6 @@ private: Q_DECLARE_METATYPE(Settings::CPUAccuracy); Q_DECLARE_METATYPE(Settings::GPUAccuracy); Q_DECLARE_METATYPE(Settings::FullscreenMode); +Q_DECLARE_METATYPE(Settings::NvdecEmulation); Q_DECLARE_METATYPE(Settings::RendererBackend); Q_DECLARE_METATYPE(Settings::ShaderBackend); diff --git a/src/yuzu/configuration/configure_graphics.cpp b/src/yuzu/configuration/configure_graphics.cpp index 37e896258..c594164be 100644 --- a/src/yuzu/configuration/configure_graphics.cpp +++ b/src/yuzu/configuration/configure_graphics.cpp @@ -88,24 +88,30 @@ void ConfigureGraphics::SetConfiguration() { ui->api_widget->setEnabled(runtime_lock); ui->use_asynchronous_gpu_emulation->setEnabled(runtime_lock); ui->use_disk_shader_cache->setEnabled(runtime_lock); - ui->use_nvdec_emulation->setEnabled(runtime_lock); + ui->nvdec_emulation_widget->setEnabled(runtime_lock); ui->accelerate_astc->setEnabled(runtime_lock); ui->use_disk_shader_cache->setChecked(Settings::values.use_disk_shader_cache.GetValue()); ui->use_asynchronous_gpu_emulation->setChecked( Settings::values.use_asynchronous_gpu_emulation.GetValue()); - ui->use_nvdec_emulation->setChecked(Settings::values.use_nvdec_emulation.GetValue()); ui->accelerate_astc->setChecked(Settings::values.accelerate_astc.GetValue()); if (Settings::IsConfiguringGlobal()) { ui->api->setCurrentIndex(static_cast<int>(Settings::values.renderer_backend.GetValue())); ui->fullscreen_mode_combobox->setCurrentIndex( static_cast<int>(Settings::values.fullscreen_mode.GetValue())); + ui->nvdec_emulation->setCurrentIndex( + static_cast<int>(Settings::values.nvdec_emulation.GetValue())); ui->aspect_ratio_combobox->setCurrentIndex(Settings::values.aspect_ratio.GetValue()); } else { ConfigurationShared::SetPerGameSetting(ui->api, &Settings::values.renderer_backend); ConfigurationShared::SetHighlight(ui->api_widget, !Settings::values.renderer_backend.UsingGlobal()); + ConfigurationShared::SetPerGameSetting(ui->nvdec_emulation, + &Settings::values.nvdec_emulation); + ConfigurationShared::SetHighlight(ui->nvdec_emulation_widget, + !Settings::values.nvdec_emulation.UsingGlobal()); + ConfigurationShared::SetPerGameSetting(ui->fullscreen_mode_combobox, &Settings::values.fullscreen_mode); ConfigurationShared::SetHighlight(ui->fullscreen_mode_label, @@ -137,8 +143,6 @@ void ConfigureGraphics::ApplyConfiguration() { ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_asynchronous_gpu_emulation, ui->use_asynchronous_gpu_emulation, use_asynchronous_gpu_emulation); - ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_nvdec_emulation, - ui->use_nvdec_emulation, use_nvdec_emulation); ConfigurationShared::ApplyPerGameSetting(&Settings::values.accelerate_astc, ui->accelerate_astc, accelerate_astc); @@ -147,6 +151,9 @@ void ConfigureGraphics::ApplyConfiguration() { if (Settings::values.renderer_backend.UsingGlobal()) { Settings::values.renderer_backend.SetValue(GetCurrentGraphicsBackend()); } + if (Settings::values.nvdec_emulation.UsingGlobal()) { + Settings::values.nvdec_emulation.SetValue(GetCurrentNvdecEmulation()); + } if (Settings::values.shader_backend.UsingGlobal()) { Settings::values.shader_backend.SetValue(shader_backend); } @@ -180,6 +187,13 @@ void ConfigureGraphics::ApplyConfiguration() { } } + if (ui->nvdec_emulation->currentIndex() == ConfigurationShared::USE_GLOBAL_INDEX) { + Settings::values.nvdec_emulation.SetGlobal(true); + } else { + Settings::values.nvdec_emulation.SetGlobal(false); + Settings::values.nvdec_emulation.SetValue(GetCurrentNvdecEmulation()); + } + if (ui->bg_combobox->currentIndex() == ConfigurationShared::USE_GLOBAL_INDEX) { Settings::values.bg_red.SetGlobal(true); Settings::values.bg_green.SetGlobal(true); @@ -278,6 +292,20 @@ Settings::RendererBackend ConfigureGraphics::GetCurrentGraphicsBackend() const { ConfigurationShared::USE_GLOBAL_OFFSET); } +Settings::NvdecEmulation ConfigureGraphics::GetCurrentNvdecEmulation() const { + if (Settings::IsConfiguringGlobal()) { + return static_cast<Settings::NvdecEmulation>(ui->nvdec_emulation->currentIndex()); + } + + if (ui->nvdec_emulation->currentIndex() == ConfigurationShared::USE_GLOBAL_INDEX) { + Settings::values.nvdec_emulation.SetGlobal(true); + return Settings::values.nvdec_emulation.GetValue(); + } + Settings::values.nvdec_emulation.SetGlobal(false); + return static_cast<Settings::NvdecEmulation>(ui->nvdec_emulation->currentIndex() - + ConfigurationShared::USE_GLOBAL_OFFSET); +} + void ConfigureGraphics::SetupPerGameUI() { if (Settings::IsConfiguringGlobal()) { ui->api->setEnabled(Settings::values.renderer_backend.UsingGlobal()); @@ -286,7 +314,7 @@ void ConfigureGraphics::SetupPerGameUI() { ui->aspect_ratio_combobox->setEnabled(Settings::values.aspect_ratio.UsingGlobal()); ui->use_asynchronous_gpu_emulation->setEnabled( Settings::values.use_asynchronous_gpu_emulation.UsingGlobal()); - ui->use_nvdec_emulation->setEnabled(Settings::values.use_nvdec_emulation.UsingGlobal()); + ui->nvdec_emulation->setEnabled(Settings::values.nvdec_emulation.UsingGlobal()); ui->accelerate_astc->setEnabled(Settings::values.accelerate_astc.UsingGlobal()); ui->use_disk_shader_cache->setEnabled(Settings::values.use_disk_shader_cache.UsingGlobal()); ui->bg_button->setEnabled(Settings::values.bg_red.UsingGlobal()); @@ -301,8 +329,6 @@ void ConfigureGraphics::SetupPerGameUI() { ConfigurationShared::SetColoredTristate( ui->use_disk_shader_cache, Settings::values.use_disk_shader_cache, use_disk_shader_cache); - ConfigurationShared::SetColoredTristate( - ui->use_nvdec_emulation, Settings::values.use_nvdec_emulation, use_nvdec_emulation); ConfigurationShared::SetColoredTristate(ui->accelerate_astc, Settings::values.accelerate_astc, accelerate_astc); ConfigurationShared::SetColoredTristate(ui->use_asynchronous_gpu_emulation, @@ -316,4 +342,6 @@ void ConfigureGraphics::SetupPerGameUI() { static_cast<int>(Settings::values.fullscreen_mode.GetValue(true))); ConfigurationShared::InsertGlobalItem( ui->api, static_cast<int>(Settings::values.renderer_backend.GetValue(true))); + ConfigurationShared::InsertGlobalItem( + ui->nvdec_emulation, static_cast<int>(Settings::values.nvdec_emulation.GetValue(true))); } diff --git a/src/yuzu/configuration/configure_graphics.h b/src/yuzu/configuration/configure_graphics.h index c866b911b..7d7ac329d 100644 --- a/src/yuzu/configuration/configure_graphics.h +++ b/src/yuzu/configuration/configure_graphics.h @@ -43,6 +43,7 @@ private: void SetupPerGameUI(); Settings::RendererBackend GetCurrentGraphicsBackend() const; + Settings::NvdecEmulation GetCurrentNvdecEmulation() const; std::unique_ptr<Ui::ConfigureGraphics> ui; QColor bg_color; diff --git a/src/yuzu/configuration/configure_graphics.ui b/src/yuzu/configuration/configure_graphics.ui index 099ddbb7c..1a12cfa4d 100644 --- a/src/yuzu/configuration/configure_graphics.ui +++ b/src/yuzu/configuration/configure_graphics.ui @@ -156,7 +156,7 @@ <item> <widget class="QCheckBox" name="use_disk_shader_cache"> <property name="text"> - <string>Use disk shader cache</string> + <string>Use disk pipeline cache</string> </property> </widget> </item> @@ -168,13 +168,6 @@ </widget> </item> <item> - <widget class="QCheckBox" name="use_nvdec_emulation"> - <property name="text"> - <string>Use NVDEC emulation</string> - </property> - </widget> - </item> - <item> <widget class="QCheckBox" name="accelerate_astc"> <property name="text"> <string>Accelerate ASTC texture decoding</string> @@ -182,6 +175,50 @@ </widget> </item> <item> + <widget class="QWidget" name="nvdec_emulation_widget" native="true"> + <layout class="QHBoxLayout" name="nvdec_emulation_layout"> + <property name="leftMargin"> + <number>0</number> + </property> + <property name="topMargin"> + <number>0</number> + </property> + <property name="rightMargin"> + <number>0</number> + </property> + <property name="bottomMargin"> + <number>0</number> + </property> + <item> + <widget class="QLabel" name="nvdec_emulation_label"> + <property name="text"> + <string>NVDEC emulation:</string> + </property> + </widget> + </item> + <item> + <widget class="QComboBox" name="nvdec_emulation"> + <item> + <property name="text"> + <string>Disabled</string> + </property> + </item> + <item> + <property name="text"> + <string>CPU Decoding</string> + </property> + </item> + <item> + <property name="text"> + <string>GPU Decoding</string> + </property> + </item> + </widget> + </item> + </layout> + </widget> + </item> + <item> <widget class="QWidget" name="fullscreen_mode_layout" native="true"> <layout class="QHBoxLayout" name="horizontalLayout_1"> <property name="leftMargin"> diff --git a/src/yuzu/configuration/configure_graphics_advanced.ui b/src/yuzu/configuration/configure_graphics_advanced.ui index 5891f8299..b91abc2f0 100644 --- a/src/yuzu/configuration/configure_graphics_advanced.ui +++ b/src/yuzu/configuration/configure_graphics_advanced.ui @@ -82,7 +82,7 @@ <string>Enables asynchronous shader compilation, which may reduce shader stutter. This feature is experimental.</string> </property> <property name="text"> - <string>Use asynchronous shader building (hack)</string> + <string>Use asynchronous shader building (Hack)</string> </property> </widget> </item> @@ -92,7 +92,7 @@ <string>Enables Fast GPU Time. This option will force most games to run at their highest native resolution.</string> </property> <property name="text"> - <string>Use Fast GPU Time (hack)</string> + <string>Use Fast GPU Time (Hack)</string> </property> </widget> </item> diff --git a/src/yuzu/game_list.cpp b/src/yuzu/game_list.cpp index e97804220..f9d949e75 100644 --- a/src/yuzu/game_list.cpp +++ b/src/yuzu/game_list.cpp @@ -515,16 +515,16 @@ void GameList::AddGamePopup(QMenu& context_menu, u64 program_id, const std::stri QAction* open_save_location = context_menu.addAction(tr("Open Save Data Location")); QAction* open_mod_location = context_menu.addAction(tr("Open Mod Data Location")); QAction* open_transferable_shader_cache = - context_menu.addAction(tr("Open Transferable Shader Cache")); + context_menu.addAction(tr("Open Transferable Pipeline Cache")); context_menu.addSeparator(); QMenu* remove_menu = context_menu.addMenu(tr("Remove")); QAction* remove_update = remove_menu->addAction(tr("Remove Installed Update")); QAction* remove_dlc = remove_menu->addAction(tr("Remove All Installed DLC")); QAction* remove_custom_config = remove_menu->addAction(tr("Remove Custom Configuration")); - QAction* remove_gl_shader_cache = remove_menu->addAction(tr("Remove OpenGL Shader Cache")); - QAction* remove_vk_shader_cache = remove_menu->addAction(tr("Remove Vulkan Shader Cache")); + QAction* remove_gl_shader_cache = remove_menu->addAction(tr("Remove OpenGL Pipeline Cache")); + QAction* remove_vk_shader_cache = remove_menu->addAction(tr("Remove Vulkan Pipeline Cache")); remove_menu->addSeparator(); - QAction* remove_shader_cache = remove_menu->addAction(tr("Remove All Shader Caches")); + QAction* remove_shader_cache = remove_menu->addAction(tr("Remove All Pipeline Caches")); QAction* remove_all_content = remove_menu->addAction(tr("Remove All Installed Contents")); QMenu* dump_romfs_menu = context_menu.addMenu(tr("Dump RomFS")); QAction* dump_romfs = dump_romfs_menu->addAction(tr("Dump RomFS")); diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp index e36774cc6..77d53e7bc 100644 --- a/src/yuzu/main.cpp +++ b/src/yuzu/main.cpp @@ -3174,12 +3174,11 @@ std::optional<u64> GMainWindow::SelectRomFSDumpTarget(const FileSys::ContentProv } bool GMainWindow::ConfirmClose() { - if (emu_thread == nullptr || !UISettings::values.confirm_before_closing) + if (emu_thread == nullptr || !UISettings::values.confirm_before_closing) { return true; - - QMessageBox::StandardButton answer = - QMessageBox::question(this, tr("yuzu"), tr("Are you sure you want to close yuzu?"), - QMessageBox::Yes | QMessageBox::No, QMessageBox::No); + } + const auto text = tr("Are you sure you want to close yuzu?"); + const auto answer = QMessageBox::question(this, tr("yuzu"), text); return answer != QMessageBox::No; } @@ -3261,14 +3260,13 @@ bool GMainWindow::ConfirmChangeGame() { } bool GMainWindow::ConfirmForceLockedExit() { - if (emu_thread == nullptr) + if (emu_thread == nullptr || !UISettings::values.confirm_before_closing) { return true; + } + const auto text = tr("The currently running application has requested yuzu to not exit.\n\n" + "Would you like to bypass this and exit anyway?"); - const auto answer = - QMessageBox::question(this, tr("yuzu"), - tr("The currently running application has requested yuzu to not " - "exit.\n\nWould you like to bypass this and exit anyway?"), - QMessageBox::Yes | QMessageBox::No, QMessageBox::No); + const auto answer = QMessageBox::question(this, tr("yuzu"), text); return answer != QMessageBox::No; } diff --git a/src/yuzu_cmd/config.cpp b/src/yuzu_cmd/config.cpp index 757dd1ea0..891f7be6f 100644 --- a/src/yuzu_cmd/config.cpp +++ b/src/yuzu_cmd/config.cpp @@ -465,7 +465,7 @@ void Config::ReadValues() { ReadSetting("Renderer", Settings::values.disable_fps_limit); ReadSetting("Renderer", Settings::values.shader_backend); ReadSetting("Renderer", Settings::values.use_asynchronous_shaders); - ReadSetting("Renderer", Settings::values.use_nvdec_emulation); + ReadSetting("Renderer", Settings::values.nvdec_emulation); ReadSetting("Renderer", Settings::values.accelerate_astc); ReadSetting("Renderer", Settings::values.use_fast_gpu_time); diff --git a/src/yuzu_cmd/default_ini.h b/src/yuzu_cmd/default_ini.h index e02eceb99..72f3213fb 100644 --- a/src/yuzu_cmd/default_ini.h +++ b/src/yuzu_cmd/default_ini.h @@ -261,9 +261,9 @@ shader_backend = # 0 (default): Off, 1: On use_asynchronous_shaders = -# Enable NVDEC emulation. -# 0: Off, 1 (default): On -use_nvdec_emulation = +# NVDEC emulation. +# 0: Disabled, 1: CPU Decoding, 2 (default): GPU Decoding +nvdec_emulation = # Accelerate ASTC texture decoding. # 0: Off, 1 (default): On |