diff options
Diffstat (limited to 'src')
56 files changed, 411 insertions, 461 deletions
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 3575a3cb3..0ac3d254e 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -58,13 +58,11 @@ if (MSVC) # Warnings /W3 - /we4018 # 'expression': signed/unsigned mismatch + /WX + /we4062 # Enumerator 'identifier' in a switch of enum 'enumeration' is not handled - /we4101 # 'identifier': unreferenced local variable /we4189 # 'identifier': local variable is initialized but not referenced /we4265 # 'class': class has virtual functions, but destructor is not virtual - /we4267 # 'var': conversion from 'size_t' to 'type', possible loss of data - /we4305 # 'context': truncation from 'type1' to 'type2' /we4388 # 'expression': signed/unsigned mismatch /we4389 # 'operator': signed/unsigned mismatch /we4456 # Declaration of 'identifier' hides previous local declaration @@ -75,10 +73,13 @@ if (MSVC) /we4547 # 'operator': operator before comma has no effect; expected operator with side-effect /we4549 # 'operator1': operator before comma has no effect; did you intend 'operator2'? /we4555 # Expression has no effect; expected expression with side-effect - /we4715 # 'function': not all control paths return a value - /we4834 # Discarding return value of function with 'nodiscard' attribute + /we4826 # Conversion from 'type1' to 'type2' is sign-extended. This may cause unexpected runtime behavior. /we5038 # data member 'member1' will be initialized after data member 'member2' + /we5233 # explicit lambda capture 'identifier' is not used /we5245 # 'function': unreferenced function with internal linkage has been removed + + /wd4100 # 'identifier': unreferenced formal parameter + /wd4324 # 'struct_name': structure was padded due to __declspec(align()) ) if (USE_CCACHE) @@ -99,24 +100,18 @@ if (MSVC) set(CMAKE_EXE_LINKER_FLAGS_RELEASE "/DEBUG /MANIFEST:NO /INCREMENTAL:NO /OPT:REF,ICF" CACHE STRING "" FORCE) else() add_compile_options( - -Wall - -Werror=array-bounds - -Werror=implicit-fallthrough + -Werror=all + -Werror=extra -Werror=missing-declarations - -Werror=missing-field-initializers - -Werror=reorder -Werror=shadow - -Werror=sign-compare - -Werror=switch - -Werror=uninitialized - -Werror=unused-function - -Werror=unused-result - -Werror=unused-variable - -Wextra - -Wmissing-declarations + -Werror=unused + -Wno-attributes -Wno-invalid-offsetof -Wno-unused-parameter + + $<$<CXX_COMPILER_ID:Clang>:-Wno-braced-scalar-init> + $<$<CXX_COMPILER_ID:Clang>:-Wno-unused-private-field> ) if (ARCHITECTURE_x86_64) diff --git a/src/audio_core/CMakeLists.txt b/src/audio_core/CMakeLists.txt index 144f1bab2..0a1f3bf18 100644 --- a/src/audio_core/CMakeLists.txt +++ b/src/audio_core/CMakeLists.txt @@ -206,20 +206,11 @@ if (MSVC) /we4244 # 'conversion': conversion from 'type1' to 'type2', possible loss of data /we4245 # 'conversion': conversion from 'type1' to 'type2', signed/unsigned mismatch /we4254 # 'operator': conversion from 'type1:field_bits' to 'type2:field_bits', possible loss of data - /we4456 # Declaration of 'identifier' hides previous local declaration - /we4457 # Declaration of 'identifier' hides function parameter - /we4458 # Declaration of 'identifier' hides class member - /we4459 # Declaration of 'identifier' hides global declaration + /we4800 # Implicit conversion from 'type' to bool. Possible information loss ) else() target_compile_options(audio_core PRIVATE -Werror=conversion - -Werror=ignored-qualifiers - -Werror=shadow - -Werror=unused-variable - - $<$<CXX_COMPILER_ID:GNU>:-Werror=unused-but-set-parameter> - $<$<CXX_COMPILER_ID:GNU>:-Werror=unused-but-set-variable> -Wno-sign-conversion ) diff --git a/src/audio_core/renderer/behavior/info_updater.cpp b/src/audio_core/renderer/behavior/info_updater.cpp index c0a307b89..574cf0982 100644 --- a/src/audio_core/renderer/behavior/info_updater.cpp +++ b/src/audio_core/renderer/behavior/info_updater.cpp @@ -91,7 +91,7 @@ Result InfoUpdater::UpdateVoices(VoiceContext& voice_context, voice_info.Initialize(); for (u32 channel = 0; channel < in_param.channel_count; channel++) { - std::memset(voice_states[channel], 0, sizeof(VoiceState)); + *voice_states[channel] = {}; } } diff --git a/src/audio_core/renderer/command/effect/biquad_filter.cpp b/src/audio_core/renderer/command/effect/biquad_filter.cpp index 1baae74fd..edb30ce72 100644 --- a/src/audio_core/renderer/command/effect/biquad_filter.cpp +++ b/src/audio_core/renderer/command/effect/biquad_filter.cpp @@ -94,7 +94,7 @@ void BiquadFilterCommand::Dump([[maybe_unused]] const ADSP::CommandListProcessor void BiquadFilterCommand::Process(const ADSP::CommandListProcessor& processor) { auto state_{reinterpret_cast<VoiceState::BiquadFilterState*>(state)}; if (needs_init) { - std::memset(state_, 0, sizeof(VoiceState::BiquadFilterState)); + *state_ = {}; } auto input_buffer{ diff --git a/src/audio_core/renderer/command/effect/multi_tap_biquad_filter.cpp b/src/audio_core/renderer/command/effect/multi_tap_biquad_filter.cpp index b3c3ba4ba..48a7cba8a 100644 --- a/src/audio_core/renderer/command/effect/multi_tap_biquad_filter.cpp +++ b/src/audio_core/renderer/command/effect/multi_tap_biquad_filter.cpp @@ -30,7 +30,7 @@ void MultiTapBiquadFilterCommand::Process(const ADSP::CommandListProcessor& proc for (u32 i = 0; i < filter_tap_count; i++) { auto state{reinterpret_cast<VoiceState::BiquadFilterState*>(states[i])}; if (needs_init[i]) { - std::memset(state, 0, sizeof(VoiceState::BiquadFilterState)); + *state = {}; } ApplyBiquadFilterFloat(output_buffer, input_buffer, biquads[i].b, biquads[i].a, *state, diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index 46cf75fde..c0555f840 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt @@ -156,12 +156,13 @@ if (MSVC) ) target_compile_options(common PRIVATE /W4 - /WX + + /we4242 # 'identifier': conversion from 'type1' to 'type2', possible loss of data + /we4254 # 'operator': conversion from 'type1:field_bits' to 'type2:field_bits', possible loss of data + /we4800 # Implicit conversion from 'type' to bool. Possible information loss ) else() target_compile_options(common PRIVATE - -Werror - $<$<CXX_COMPILER_ID:Clang>:-fsized-deallocation> ) endif() diff --git a/src/common/bit_field.h b/src/common/bit_field.h index 7e1df62b1..e4e58ea45 100644 --- a/src/common/bit_field.h +++ b/src/common/bit_field.h @@ -141,10 +141,6 @@ public: constexpr BitField(BitField&&) noexcept = default; constexpr BitField& operator=(BitField&&) noexcept = default; - [[nodiscard]] constexpr operator T() const { - return Value(); - } - constexpr void Assign(const T& value) { #ifdef _MSC_VER storage = static_cast<StorageType>((storage & ~mask) | FormatValue(value)); @@ -162,6 +158,17 @@ public: return ExtractValue(storage); } + template <typename ConvertedToType> + [[nodiscard]] constexpr ConvertedToType As() const { + static_assert(!std::is_same_v<T, ConvertedToType>, + "Unnecessary cast. Use Value() instead."); + return static_cast<ConvertedToType>(Value()); + } + + [[nodiscard]] constexpr operator T() const { + return Value(); + } + [[nodiscard]] constexpr explicit operator bool() const { return Value() != 0; } diff --git a/src/common/bounded_threadsafe_queue.h b/src/common/bounded_threadsafe_queue.h index 7e465549b..21217801e 100644 --- a/src/common/bounded_threadsafe_queue.h +++ b/src/common/bounded_threadsafe_queue.h @@ -21,11 +21,6 @@ constexpr size_t hardware_interference_size = std::hardware_destructive_interfer constexpr size_t hardware_interference_size = 64; #endif -#ifdef _MSC_VER -#pragma warning(push) -#pragma warning(disable : 4324) -#endif - template <typename T, size_t capacity = 0x400> class MPSCQueue { public: @@ -160,8 +155,4 @@ private: static_assert(std::is_nothrow_destructible_v<T>, "T must be nothrow destructible"); }; -#ifdef _MSC_VER -#pragma warning(pop) -#endif - } // namespace Common diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index 055bea641..113e663b5 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -774,19 +774,15 @@ if (MSVC) /we4244 # 'conversion': conversion from 'type1' to 'type2', possible loss of data /we4245 # 'conversion': conversion from 'type1' to 'type2', signed/unsigned mismatch /we4254 # 'operator': conversion from 'type1:field_bits' to 'type2:field_bits', possible loss of data + /we4800 # Implicit conversion from 'type' to bool. Possible information loss ) else() target_compile_options(core PRIVATE -Werror=conversion - -Werror=ignored-qualifiers - $<$<CXX_COMPILER_ID:GNU>:-Werror=class-memaccess> - $<$<CXX_COMPILER_ID:GNU>:-Werror=unused-but-set-parameter> - $<$<CXX_COMPILER_ID:GNU>:-Werror=unused-but-set-variable> + -Wno-sign-conversion $<$<CXX_COMPILER_ID:Clang>:-fsized-deallocation> - - -Wno-sign-conversion ) endif() diff --git a/src/core/core.cpp b/src/core/core.cpp index 7fb8bc019..40a610435 100644 --- a/src/core/core.cpp +++ b/src/core/core.cpp @@ -384,6 +384,7 @@ struct System::Impl { kernel.ShutdownCores(); cpu_manager.Shutdown(); debugger.reset(); + services->KillNVNFlinger(); kernel.CloseServices(); services.reset(); service_manager.reset(); diff --git a/src/core/file_sys/card_image.cpp b/src/core/file_sys/card_image.cpp index f23d9373b..5d02865f4 100644 --- a/src/core/file_sys/card_image.cpp +++ b/src/core/file_sys/card_image.cpp @@ -232,8 +232,8 @@ const std::vector<std::shared_ptr<NCA>>& XCI::GetNCAs() const { std::shared_ptr<NCA> XCI::GetNCAByType(NCAContentType type) const { const auto program_id = secure_partition->GetProgramTitleID(); - const auto iter = std::find_if( - ncas.begin(), ncas.end(), [this, type, program_id](const std::shared_ptr<NCA>& nca) { + const auto iter = + std::find_if(ncas.begin(), ncas.end(), [type, program_id](const std::shared_ptr<NCA>& nca) { return nca->GetType() == type && nca->GetTitleId() == program_id; }); return iter == ncas.end() ? nullptr : *iter; diff --git a/src/core/file_sys/program_metadata.cpp b/src/core/file_sys/program_metadata.cpp index 08d489eab..f00479bd3 100644 --- a/src/core/file_sys/program_metadata.cpp +++ b/src/core/file_sys/program_metadata.cpp @@ -127,7 +127,7 @@ void ProgramMetadata::LoadManual(bool is_64_bit, ProgramAddressSpaceType address } bool ProgramMetadata::Is64BitProgram() const { - return npdm_header.has_64_bit_instructions; + return npdm_header.has_64_bit_instructions.As<bool>(); } ProgramAddressSpaceType ProgramMetadata::GetAddressSpaceType() const { diff --git a/src/core/hid/emulated_controller.cpp b/src/core/hid/emulated_controller.cpp index 06c2081a9..b17b00f2c 100644 --- a/src/core/hid/emulated_controller.cpp +++ b/src/core/hid/emulated_controller.cpp @@ -1135,27 +1135,27 @@ bool EmulatedController::IsControllerSupported(bool use_temporary_value) const { const auto type = is_configuring && use_temporary_value ? tmp_npad_type : npad_type; switch (type) { case NpadStyleIndex::ProController: - return supported_style_tag.fullkey; + return supported_style_tag.fullkey.As<bool>(); case NpadStyleIndex::Handheld: - return supported_style_tag.handheld; + return supported_style_tag.handheld.As<bool>(); case NpadStyleIndex::JoyconDual: - return supported_style_tag.joycon_dual; + return supported_style_tag.joycon_dual.As<bool>(); case NpadStyleIndex::JoyconLeft: - return supported_style_tag.joycon_left; + return supported_style_tag.joycon_left.As<bool>(); case NpadStyleIndex::JoyconRight: - return supported_style_tag.joycon_right; + return supported_style_tag.joycon_right.As<bool>(); case NpadStyleIndex::GameCube: - return supported_style_tag.gamecube; + return supported_style_tag.gamecube.As<bool>(); case NpadStyleIndex::Pokeball: - return supported_style_tag.palma; + return supported_style_tag.palma.As<bool>(); case NpadStyleIndex::NES: - return supported_style_tag.lark; + return supported_style_tag.lark.As<bool>(); case NpadStyleIndex::SNES: - return supported_style_tag.lucia; + return supported_style_tag.lucia.As<bool>(); case NpadStyleIndex::N64: - return supported_style_tag.lagoon; + return supported_style_tag.lagoon.As<bool>(); case NpadStyleIndex::SegaGenesis: - return supported_style_tag.lager; + return supported_style_tag.lager.As<bool>(); default: return false; } diff --git a/src/core/hle/ipc_helpers.h b/src/core/hle/ipc_helpers.h index aa27be767..18fde8bd6 100644 --- a/src/core/hle/ipc_helpers.h +++ b/src/core/hle/ipc_helpers.h @@ -406,7 +406,7 @@ inline s32 RequestParser::Pop() { } // Ignore the -Wclass-memaccess warning on memcpy for non-trivially default constructible objects. -#if defined(__GNUC__) +#if defined(__GNUC__) && !defined(__clang__) && !defined(__INTEL_COMPILER) #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wclass-memaccess" #endif @@ -417,7 +417,7 @@ void RequestParser::PopRaw(T& value) { std::memcpy(&value, cmdbuf + index, sizeof(T)); index += (sizeof(T) + 3) / 4; // round up to word length } -#if defined(__GNUC__) +#if defined(__GNUC__) && !defined(__clang__) && !defined(__INTEL_COMPILER) #pragma GCC diagnostic pop #endif diff --git a/src/core/hle/kernel/k_session_request.h b/src/core/hle/kernel/k_session_request.h index fcf521597..e5558bc2c 100644 --- a/src/core/hle/kernel/k_session_request.h +++ b/src/core/hle/kernel/k_session_request.h @@ -3,6 +3,8 @@ #pragma once +#include <array> + #include "core/hle/kernel/k_auto_object.h" #include "core/hle/kernel/k_event.h" #include "core/hle/kernel/k_memory_block.h" @@ -52,8 +54,7 @@ public: }; public: - explicit SessionMappings(KernelCore& kernel_) - : kernel(kernel_), m_mappings(nullptr), m_num_send(), m_num_recv(), m_num_exch() {} + explicit SessionMappings(KernelCore& kernel_) : kernel(kernel_) {} void Initialize() {} void Finalize(); @@ -149,17 +150,15 @@ public: private: KernelCore& kernel; - Mapping m_static_mappings[NumStaticMappings]; - Mapping* m_mappings; - u8 m_num_send; - u8 m_num_recv; - u8 m_num_exch; + std::array<Mapping, NumStaticMappings> m_static_mappings; + Mapping* m_mappings{}; + u8 m_num_send{}; + u8 m_num_recv{}; + u8 m_num_exch{}; }; public: - explicit KSessionRequest(KernelCore& kernel_) - : KAutoObject(kernel_), m_mappings(kernel_), m_thread(nullptr), m_server(nullptr), - m_event(nullptr) {} + explicit KSessionRequest(KernelCore& kernel_) : KAutoObject(kernel_), m_mappings(kernel_) {} static KSessionRequest* Create(KernelCore& kernel) { KSessionRequest* req = KSessionRequest::Allocate(kernel); @@ -281,7 +280,7 @@ public: private: // NOTE: This is public and virtual in Nintendo's kernel. - void Finalize() { + void Finalize() override { m_mappings.Finalize(); if (m_thread) { @@ -297,11 +296,11 @@ private: private: SessionMappings m_mappings; - KThread* m_thread; - KProcess* m_server; - KEvent* m_event; - uintptr_t m_address; - size_t m_size; + KThread* m_thread{}; + KProcess* m_server{}; + KEvent* m_event{}; + uintptr_t m_address{}; + size_t m_size{}; }; } // namespace Kernel diff --git a/src/core/hle/kernel/kernel.cpp b/src/core/hle/kernel/kernel.cpp index eed2dc9f3..fdc774e30 100644 --- a/src/core/hle/kernel/kernel.cpp +++ b/src/core/hle/kernel/kernel.cpp @@ -48,8 +48,8 @@ namespace Kernel { struct KernelCore::Impl { explicit Impl(Core::System& system_, KernelCore& kernel_) - : time_manager{system_}, - service_threads_manager{1, "ServiceThreadsManager"}, system{system_} {} + : time_manager{system_}, service_threads_manager{1, "ServiceThreadsManager"}, + service_thread_barrier{2}, system{system_} {} void SetMulticore(bool is_multi) { is_multicore = is_multi; @@ -737,7 +737,12 @@ struct KernelCore::Impl { } void ClearServiceThreads() { - service_threads_manager.QueueWork([this]() { service_threads.clear(); }); + service_threads_manager.QueueWork([this] { + service_threads.clear(); + default_service_thread.reset(); + service_thread_barrier.Sync(); + }); + service_thread_barrier.Sync(); } std::mutex server_objects_lock; @@ -802,6 +807,7 @@ struct KernelCore::Impl { std::unordered_set<std::shared_ptr<ServiceThread>> service_threads; std::weak_ptr<ServiceThread> default_service_thread; Common::ThreadWorker service_threads_manager; + Common::Barrier service_thread_barrier; std::array<KThread*, Core::Hardware::NUM_CPU_CORES> shutdown_threads; std::array<std::unique_ptr<Kernel::KScheduler>, Core::Hardware::NUM_CPU_CORES> schedulers{}; diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp index b07ae3f02..4aca5b27d 100644 --- a/src/core/hle/kernel/svc.cpp +++ b/src/core/hle/kernel/svc.cpp @@ -751,8 +751,8 @@ static void Break(Core::System& system, u32 reason, u64 info1, u64 info2) { } system.GetReporter().SaveSvcBreakReport( - static_cast<u32>(break_reason.break_type.Value()), break_reason.signal_debugger, info1, - info2, has_dumped_buffer ? std::make_optional(debug_buffer) : std::nullopt); + static_cast<u32>(break_reason.break_type.Value()), break_reason.signal_debugger.As<bool>(), + info1, info2, has_dumped_buffer ? std::make_optional(debug_buffer) : std::nullopt); if (!break_reason.signal_debugger) { LOG_CRITICAL( diff --git a/src/core/hle/service/am/applets/applets.h b/src/core/hle/service/am/applets/applets.h index e78a57657..12c6a5b1a 100644 --- a/src/core/hle/service/am/applets/applets.h +++ b/src/core/hle/service/am/applets/applets.h @@ -164,7 +164,7 @@ protected: u32_le size; u32_le library_version; u32_le theme_color; - u8 play_startup_sound; + bool play_startup_sound; u64_le system_tick; }; static_assert(sizeof(CommonArguments) == 0x20, "CommonArguments has incorrect size."); diff --git a/src/core/hle/service/hid/controllers/npad.cpp b/src/core/hle/service/hid/controllers/npad.cpp index b85831de1..2f871de31 100644 --- a/src/core/hle/service/hid/controllers/npad.cpp +++ b/src/core/hle/service/hid/controllers/npad.cpp @@ -745,8 +745,9 @@ void Controller_NPad::SetSupportedNpadIdTypes(u8* data, std::size_t length) { } void Controller_NPad::GetSupportedNpadIdTypes(u32* data, std::size_t max_length) { - ASSERT(max_length < supported_npad_id_types.size()); - std::memcpy(data, supported_npad_id_types.data(), supported_npad_id_types.size()); + const auto copy_amount = supported_npad_id_types.size() * sizeof(u32); + ASSERT(max_length <= copy_amount); + std::memcpy(data, supported_npad_id_types.data(), copy_amount); } std::size_t Controller_NPad::GetSupportedNpadIdTypesSize() const { @@ -1501,25 +1502,25 @@ bool Controller_NPad::IsControllerSupported(Core::HID::NpadStyleIndex controller Core::HID::NpadStyleTag style = GetSupportedStyleSet(); switch (controller) { case Core::HID::NpadStyleIndex::ProController: - return style.fullkey; + return style.fullkey.As<bool>(); case Core::HID::NpadStyleIndex::JoyconDual: - return style.joycon_dual; + return style.joycon_dual.As<bool>(); case Core::HID::NpadStyleIndex::JoyconLeft: - return style.joycon_left; + return style.joycon_left.As<bool>(); case Core::HID::NpadStyleIndex::JoyconRight: - return style.joycon_right; + return style.joycon_right.As<bool>(); case Core::HID::NpadStyleIndex::GameCube: - return style.gamecube; + return style.gamecube.As<bool>(); case Core::HID::NpadStyleIndex::Pokeball: - return style.palma; + return style.palma.As<bool>(); case Core::HID::NpadStyleIndex::NES: - return style.lark; + return style.lark.As<bool>(); case Core::HID::NpadStyleIndex::SNES: - return style.lucia; + return style.lucia.As<bool>(); case Core::HID::NpadStyleIndex::N64: - return style.lagoon; + return style.lagoon.As<bool>(); case Core::HID::NpadStyleIndex::SegaGenesis: - return style.lager; + return style.lager.As<bool>(); default: return false; } diff --git a/src/core/hle/service/nvdrv/nvdrv.cpp b/src/core/hle/service/nvdrv/nvdrv.cpp index 9d9924395..9f4c7c99a 100644 --- a/src/core/hle/service/nvdrv/nvdrv.cpp +++ b/src/core/hle/service/nvdrv/nvdrv.cpp @@ -53,7 +53,7 @@ void InstallInterfaces(SM::ServiceManager& service_manager, NVFlinger::NVFlinger } Module::Module(Core::System& system) - : service_context{system, "nvdrv"}, events_interface{*this}, container{system.Host1x()} { + : container{system.Host1x()}, service_context{system, "nvdrv"}, events_interface{*this} { builders["/dev/nvhost-as-gpu"] = [this, &system](DeviceFD fd) { std::shared_ptr<Devices::nvdevice> device = std::make_shared<Devices::nvhost_as_gpu>(system, *this, container); diff --git a/src/core/hle/service/nvdrv/nvdrv.h b/src/core/hle/service/nvdrv/nvdrv.h index 146d046a9..f3c81bd88 100644 --- a/src/core/hle/service/nvdrv/nvdrv.h +++ b/src/core/hle/service/nvdrv/nvdrv.h @@ -97,6 +97,9 @@ private: friend class EventInterface; friend class Service::NVFlinger::NVFlinger; + /// Manages syncpoints on the host + NvCore::Container container; + /// Id to use for the next open file descriptor. DeviceFD next_fd = 1; @@ -108,9 +111,6 @@ private: EventInterface events_interface; - /// Manages syncpoints on the host - NvCore::Container container; - std::unordered_map<std::string, std::function<FilesContainerType::iterator(DeviceFD)>> builders; }; diff --git a/src/core/hle/service/nvflinger/nvflinger.cpp b/src/core/hle/service/nvflinger/nvflinger.cpp index aa14d2cbc..dad93b38e 100644 --- a/src/core/hle/service/nvflinger/nvflinger.cpp +++ b/src/core/hle/service/nvflinger/nvflinger.cpp @@ -102,15 +102,19 @@ NVFlinger::~NVFlinger() { system.CoreTiming().UnscheduleEvent(single_composition_event, {}); } + ShutdownLayers(); + + if (nvdrv) { + nvdrv->Close(disp_fd); + } +} + +void NVFlinger::ShutdownLayers() { for (auto& display : displays) { for (size_t layer = 0; layer < display.GetNumLayers(); ++layer) { display.GetLayer(layer).Core().NotifyShutdown(); } } - - if (nvdrv) { - nvdrv->Close(disp_fd); - } } void NVFlinger::SetNVDrvInstance(std::shared_ptr<Nvidia::Module> instance) { diff --git a/src/core/hle/service/nvflinger/nvflinger.h b/src/core/hle/service/nvflinger/nvflinger.h index 99509bc5b..b8191c595 100644 --- a/src/core/hle/service/nvflinger/nvflinger.h +++ b/src/core/hle/service/nvflinger/nvflinger.h @@ -48,6 +48,8 @@ public: explicit NVFlinger(Core::System& system_, HosBinderDriverServer& hos_binder_driver_server_); ~NVFlinger(); + void ShutdownLayers(); + /// Sets the NVDrv module instance to use to send buffers to the GPU. void SetNVDrvInstance(std::shared_ptr<Nvidia::Module> instance); diff --git a/src/core/hle/service/service.cpp b/src/core/hle/service/service.cpp index dadaf897f..5db6588e4 100644 --- a/src/core/hle/service/service.cpp +++ b/src/core/hle/service/service.cpp @@ -303,4 +303,8 @@ Services::Services(std::shared_ptr<SM::ServiceManager>& sm, Core::System& system Services::~Services() = default; +void Services::KillNVNFlinger() { + nv_flinger->ShutdownLayers(); +} + } // namespace Service diff --git a/src/core/hle/service/service.h b/src/core/hle/service/service.h index 5bf197c51..ec9deeee4 100644 --- a/src/core/hle/service/service.h +++ b/src/core/hle/service/service.h @@ -238,6 +238,8 @@ public: explicit Services(std::shared_ptr<SM::ServiceManager>& sm, Core::System& system); ~Services(); + void KillNVNFlinger(); + private: std::unique_ptr<NVFlinger::HosBinderDriverServer> hos_binder_driver_server; std::unique_ptr<NVFlinger::NVFlinger> nv_flinger; diff --git a/src/core/memory.cpp b/src/core/memory.cpp index 9637cb5b1..3ca80c8ff 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp @@ -233,18 +233,17 @@ struct Memory::Impl { current_vaddr, src_addr, size); std::memset(dest_buffer, 0, copy_amount); }, - [&dest_buffer](const std::size_t copy_amount, const u8* const src_ptr) { + [&](const std::size_t copy_amount, const u8* const src_ptr) { std::memcpy(dest_buffer, src_ptr, copy_amount); }, - [&system = system, &dest_buffer](const VAddr current_vaddr, - const std::size_t copy_amount, - const u8* const host_ptr) { + [&](const VAddr current_vaddr, const std::size_t copy_amount, + const u8* const host_ptr) { if constexpr (!UNSAFE) { system.GPU().FlushRegion(current_vaddr, copy_amount); } std::memcpy(dest_buffer, host_ptr, copy_amount); }, - [&dest_buffer](const std::size_t copy_amount) { + [&](const std::size_t copy_amount) { dest_buffer = static_cast<u8*>(dest_buffer) + copy_amount; }); } @@ -267,17 +266,16 @@ struct Memory::Impl { "Unmapped WriteBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})", current_vaddr, dest_addr, size); }, - [&src_buffer](const std::size_t copy_amount, u8* const dest_ptr) { + [&](const std::size_t copy_amount, u8* const dest_ptr) { std::memcpy(dest_ptr, src_buffer, copy_amount); }, - [&system = system, &src_buffer](const VAddr current_vaddr, - const std::size_t copy_amount, u8* const host_ptr) { + [&](const VAddr current_vaddr, const std::size_t copy_amount, u8* const host_ptr) { if constexpr (!UNSAFE) { system.GPU().InvalidateRegion(current_vaddr, copy_amount); } std::memcpy(host_ptr, src_buffer, copy_amount); }, - [&src_buffer](const std::size_t copy_amount) { + [&](const std::size_t copy_amount) { src_buffer = static_cast<const u8*>(src_buffer) + copy_amount; }); } @@ -301,8 +299,7 @@ struct Memory::Impl { [](const std::size_t copy_amount, u8* const dest_ptr) { std::memset(dest_ptr, 0, copy_amount); }, - [&system = system](const VAddr current_vaddr, const std::size_t copy_amount, - u8* const host_ptr) { + [&](const VAddr current_vaddr, const std::size_t copy_amount, u8* const host_ptr) { system.GPU().InvalidateRegion(current_vaddr, copy_amount); std::memset(host_ptr, 0, copy_amount); }, @@ -313,22 +310,20 @@ struct Memory::Impl { const std::size_t size) { WalkBlock( process, dest_addr, size, - [this, &process, &dest_addr, &src_addr, size](const std::size_t copy_amount, - const VAddr current_vaddr) { + [&](const std::size_t copy_amount, const VAddr current_vaddr) { LOG_ERROR(HW_Memory, "Unmapped CopyBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})", current_vaddr, src_addr, size); ZeroBlock(process, dest_addr, copy_amount); }, - [this, &process, &dest_addr](const std::size_t copy_amount, const u8* const src_ptr) { + [&](const std::size_t copy_amount, const u8* const src_ptr) { WriteBlockImpl<false>(process, dest_addr, src_ptr, copy_amount); }, - [this, &system = system, &process, &dest_addr]( - const VAddr current_vaddr, const std::size_t copy_amount, u8* const host_ptr) { + [&](const VAddr current_vaddr, const std::size_t copy_amount, u8* const host_ptr) { system.GPU().FlushRegion(current_vaddr, copy_amount); WriteBlockImpl<false>(process, dest_addr, host_ptr, copy_amount); }, - [&dest_addr, &src_addr](const std::size_t copy_amount) { + [&](const std::size_t copy_amount) { dest_addr += static_cast<VAddr>(copy_amount); src_addr += static_cast<VAddr>(copy_amount); }); @@ -575,7 +570,7 @@ struct Memory::Impl { [vaddr]() { LOG_ERROR(HW_Memory, "Unmapped Read{} @ 0x{:016X}", sizeof(T) * 8, vaddr); }, - [&system = system, vaddr]() { system.GPU().FlushRegion(vaddr, sizeof(T)); }); + [&]() { system.GPU().FlushRegion(vaddr, sizeof(T)); }); if (ptr) { std::memcpy(&result, ptr, sizeof(T)); } @@ -599,7 +594,7 @@ struct Memory::Impl { LOG_ERROR(HW_Memory, "Unmapped Write{} @ 0x{:016X} = 0x{:016X}", sizeof(T) * 8, vaddr, static_cast<u64>(data)); }, - [&system = system, vaddr]() { system.GPU().InvalidateRegion(vaddr, sizeof(T)); }); + [&]() { system.GPU().InvalidateRegion(vaddr, sizeof(T)); }); if (ptr) { std::memcpy(ptr, &data, sizeof(T)); } @@ -613,7 +608,7 @@ struct Memory::Impl { LOG_ERROR(HW_Memory, "Unmapped WriteExclusive{} @ 0x{:016X} = 0x{:016X}", sizeof(T) * 8, vaddr, static_cast<u64>(data)); }, - [&system = system, vaddr]() { system.GPU().InvalidateRegion(vaddr, sizeof(T)); }); + [&]() { system.GPU().InvalidateRegion(vaddr, sizeof(T)); }); if (ptr) { const auto volatile_pointer = reinterpret_cast<volatile T*>(ptr); return Common::AtomicCompareAndSwap(volatile_pointer, data, expected); @@ -628,7 +623,7 @@ struct Memory::Impl { LOG_ERROR(HW_Memory, "Unmapped WriteExclusive128 @ 0x{:016X} = 0x{:016X}{:016X}", vaddr, static_cast<u64>(data[1]), static_cast<u64>(data[0])); }, - [&system = system, vaddr]() { system.GPU().InvalidateRegion(vaddr, sizeof(u128)); }); + [&]() { system.GPU().InvalidateRegion(vaddr, sizeof(u128)); }); if (ptr) { const auto volatile_pointer = reinterpret_cast<volatile u64*>(ptr); return Common::AtomicCompareAndSwap(volatile_pointer, data, expected); diff --git a/src/input_common/CMakeLists.txt b/src/input_common/CMakeLists.txt index 2cf9eb97f..cc6f0ffc0 100644 --- a/src/input_common/CMakeLists.txt +++ b/src/input_common/CMakeLists.txt @@ -39,21 +39,14 @@ add_library(input_common STATIC if (MSVC) target_compile_options(input_common PRIVATE /W4 - /WX /we4242 # 'identifier': conversion from 'type1' to 'type2', possible loss of data - /we4244 # 'conversion': conversion from 'type1' to 'type2', possible loss of data - /we4245 # 'conversion': conversion from 'type1' to 'type2', signed/unsigned mismatch /we4254 # 'operator': conversion from 'type1:field_bits' to 'type2:field_bits', possible loss of data + /we4800 # Implicit conversion from 'type' to bool. Possible information loss ) else() target_compile_options(input_common PRIVATE - -Werror -Werror=conversion - -Werror=ignored-qualifiers - $<$<CXX_COMPILER_ID:GNU>:-Werror=unused-but-set-parameter> - $<$<CXX_COMPILER_ID:GNU>:-Werror=unused-but-set-variable> - -Werror=unused-variable ) endif() diff --git a/src/input_common/drivers/sdl_driver.cpp b/src/input_common/drivers/sdl_driver.cpp index ddbe8a896..45ce588f0 100644 --- a/src/input_common/drivers/sdl_driver.cpp +++ b/src/input_common/drivers/sdl_driver.cpp @@ -40,8 +40,8 @@ public: void EnableMotion() { if (sdl_controller) { SDL_GameController* controller = sdl_controller.get(); - has_accel = SDL_GameControllerHasSensor(controller, SDL_SENSOR_ACCEL); - has_gyro = SDL_GameControllerHasSensor(controller, SDL_SENSOR_GYRO); + has_accel = SDL_GameControllerHasSensor(controller, SDL_SENSOR_ACCEL) == SDL_TRUE; + has_gyro = SDL_GameControllerHasSensor(controller, SDL_SENSOR_GYRO) == SDL_TRUE; if (has_accel) { SDL_GameControllerSetSensorEnabled(controller, SDL_SENSOR_ACCEL, SDL_TRUE); } diff --git a/src/input_common/input_poller.cpp b/src/input_common/input_poller.cpp index fff9731ce..4ac182147 100644 --- a/src/input_common/input_poller.cpp +++ b/src/input_common/input_poller.cpp @@ -801,8 +801,8 @@ std::unique_ptr<Common::Input::InputDevice> InputFactory::CreateButtonDevice( const auto button_id = params.Get("button", 0); const auto keyboard_key = params.Get("code", 0); - const auto toggle = params.Get("toggle", false); - const auto inverted = params.Get("inverted", false); + const auto toggle = params.Get("toggle", false) != 0; + const auto inverted = params.Get("inverted", false) != 0; input_engine->PreSetController(identifier); input_engine->PreSetButton(identifier, button_id); input_engine->PreSetButton(identifier, keyboard_key); @@ -824,8 +824,8 @@ std::unique_ptr<Common::Input::InputDevice> InputFactory::CreateHatButtonDevice( const auto button_id = params.Get("hat", 0); const auto direction = input_engine->GetHatButtonId(params.Get("direction", "")); - const auto toggle = params.Get("toggle", false); - const auto inverted = params.Get("inverted", false); + const auto toggle = params.Get("toggle", false) != 0; + const auto inverted = params.Get("inverted", false) != 0; input_engine->PreSetController(identifier); input_engine->PreSetHatButton(identifier, button_id); @@ -883,7 +883,7 @@ std::unique_ptr<Common::Input::InputDevice> InputFactory::CreateAnalogDevice( .threshold = std::clamp(params.Get("threshold", 0.5f), 0.0f, 1.0f), .offset = std::clamp(params.Get("offset", 0.0f), -1.0f, 1.0f), .inverted = params.Get("invert", "+") == "-", - .toggle = static_cast<bool>(params.Get("toggle", false)), + .toggle = params.Get("toggle", false) != 0, }; input_engine->PreSetController(identifier); input_engine->PreSetAxis(identifier, axis); @@ -899,8 +899,8 @@ std::unique_ptr<Common::Input::InputDevice> InputFactory::CreateTriggerDevice( }; const auto button = params.Get("button", 0); - const auto toggle = params.Get("toggle", false); - const auto inverted = params.Get("inverted", false); + const auto toggle = params.Get("toggle", false) != 0; + const auto inverted = params.Get("inverted", false) != 0; const auto axis = params.Get("axis", 0); const Common::Input::AnalogProperties properties = { @@ -930,8 +930,8 @@ std::unique_ptr<Common::Input::InputDevice> InputFactory::CreateTouchDevice( }; const auto button = params.Get("button", 0); - const auto toggle = params.Get("toggle", false); - const auto inverted = params.Get("inverted", false); + const auto toggle = params.Get("toggle", false) != 0; + const auto inverted = params.Get("inverted", false) != 0; const auto axis_x = params.Get("axis_x", 0); const Common::Input::AnalogProperties properties_x = { diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index af8e51fe8..bcdd60db9 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -241,24 +241,14 @@ target_link_libraries(shader_recompiler PUBLIC common fmt::fmt sirit) if (MSVC) target_compile_options(shader_recompiler PRIVATE /W4 - /WX - /we4018 # 'expression' : signed/unsigned mismatch - /we4244 # 'argument' : conversion from 'type1' to 'type2', possible loss of data (floating-point) - /we4245 # 'conversion' : conversion from 'type1' to 'type2', signed/unsigned mismatch + + /we4242 # 'identifier': conversion from 'type1' to 'type2', possible loss of data /we4254 # 'operator': conversion from 'type1:field_bits' to 'type2:field_bits', possible loss of data - /we4267 # 'var' : conversion from 'size_t' to 'type', possible loss of data - /we4305 # 'context' : truncation from 'type1' to 'type2' /we4800 # Implicit conversion from 'type' to bool. Possible information loss - /we4826 # Conversion from 'type1' to 'type2' is sign-extended. This may cause unexpected runtime behavior. ) else() target_compile_options(shader_recompiler PRIVATE - -Werror -Werror=conversion - -Werror=ignored-qualifiers - $<$<CXX_COMPILER_ID:GNU>:-Werror=unused-but-set-parameter> - $<$<CXX_COMPILER_ID:GNU>:-Werror=unused-but-set-variable> - -Werror=unused-variable # Bracket depth determines maximum size of a fold expression in Clang since 9c9974c3ccb6. # And this in turns limits the size of a std::array. diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp index 7094d8e42..1f4ffdd62 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp @@ -5,10 +5,6 @@ #include "shader_recompiler/backend/glasm/glasm_emit_context.h" #include "shader_recompiler/frontend/ir/value.h" -#ifdef _MSC_VER -#pragma warning(disable : 4100) -#endif - namespace Shader::Backend::GLASM { #define NotImplemented() throw NotImplementedException("GLASM instruction {}", __LINE__) diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp index b03a8ba1e..9f1ed95a4 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp @@ -7,10 +7,6 @@ #include "shader_recompiler/backend/glsl/glsl_emit_context.h" #include "shader_recompiler/frontend/ir/value.h" -#ifdef _MSC_VER -#pragma warning(disable : 4100) -#endif - namespace Shader::Backend::GLSL { void EmitGetRegister(EmitContext& ctx) { diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.cpp b/src/shader_recompiler/frontend/maxwell/translate_program.cpp index 77efb4f57..b58741d4d 100644 --- a/src/shader_recompiler/frontend/maxwell/translate_program.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp @@ -137,28 +137,35 @@ bool IsLegacyAttribute(IR::Attribute attribute) { } std::map<IR::Attribute, IR::Attribute> GenerateLegacyToGenericMappings( - const VaryingState& state, std::queue<IR::Attribute> ununsed_generics) { + const VaryingState& state, std::queue<IR::Attribute> unused_generics, + const std::map<IR::Attribute, IR::Attribute>& previous_stage_mapping) { std::map<IR::Attribute, IR::Attribute> mapping; + auto update_mapping = [&mapping, &unused_generics, previous_stage_mapping](IR::Attribute attr, + size_t count) { + if (previous_stage_mapping.find(attr) != previous_stage_mapping.end()) { + for (size_t i = 0; i < count; ++i) { + mapping.insert({attr + i, previous_stage_mapping.at(attr + i)}); + } + } else { + for (size_t i = 0; i < count; ++i) { + mapping.insert({attr + i, unused_generics.front() + i}); + } + unused_generics.pop(); + } + }; for (size_t index = 0; index < 4; ++index) { auto attr = IR::Attribute::ColorFrontDiffuseR + index * 4; if (state.AnyComponent(attr)) { - for (size_t i = 0; i < 4; ++i) { - mapping.insert({attr + i, ununsed_generics.front() + i}); - } - ununsed_generics.pop(); + update_mapping(attr, 4); } } if (state[IR::Attribute::FogCoordinate]) { - mapping.insert({IR::Attribute::FogCoordinate, ununsed_generics.front()}); - ununsed_generics.pop(); + update_mapping(IR::Attribute::FogCoordinate, 1); } for (size_t index = 0; index < IR::NUM_FIXEDFNCTEXTURE; ++index) { auto attr = IR::Attribute::FixedFncTexture0S + index * 4; if (state.AnyComponent(attr)) { - for (size_t i = 0; i < 4; ++i) { - mapping.insert({attr + i, ununsed_generics.front() + i}); - } - ununsed_generics.pop(); + update_mapping(attr, 4); } } return mapping; @@ -265,21 +272,22 @@ IR::Program MergeDualVertexPrograms(IR::Program& vertex_a, IR::Program& vertex_b void ConvertLegacyToGeneric(IR::Program& program, const Shader::RuntimeInfo& runtime_info) { auto& stores = program.info.stores; if (stores.Legacy()) { - std::queue<IR::Attribute> ununsed_output_generics{}; + std::queue<IR::Attribute> unused_output_generics{}; for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { if (!stores.Generic(index)) { - ununsed_output_generics.push(IR::Attribute::Generic0X + index * 4); + unused_output_generics.push(IR::Attribute::Generic0X + index * 4); } } - auto mappings = GenerateLegacyToGenericMappings(stores, ununsed_output_generics); + program.info.legacy_stores_mapping = + GenerateLegacyToGenericMappings(stores, unused_output_generics, {}); for (IR::Block* const block : program.post_order_blocks) { for (IR::Inst& inst : block->Instructions()) { switch (inst.GetOpcode()) { case IR::Opcode::SetAttribute: { const auto attr = inst.Arg(0).Attribute(); if (IsLegacyAttribute(attr)) { - stores.Set(mappings[attr], true); - inst.SetArg(0, Shader::IR::Value(mappings[attr])); + stores.Set(program.info.legacy_stores_mapping[attr], true); + inst.SetArg(0, Shader::IR::Value(program.info.legacy_stores_mapping[attr])); } break; } @@ -292,15 +300,16 @@ void ConvertLegacyToGeneric(IR::Program& program, const Shader::RuntimeInfo& run auto& loads = program.info.loads; if (loads.Legacy()) { - std::queue<IR::Attribute> ununsed_input_generics{}; + std::queue<IR::Attribute> unused_input_generics{}; for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { const AttributeType input_type{runtime_info.generic_input_types[index]}; if (!runtime_info.previous_stage_stores.Generic(index) || !loads.Generic(index) || input_type == AttributeType::Disabled) { - ununsed_input_generics.push(IR::Attribute::Generic0X + index * 4); + unused_input_generics.push(IR::Attribute::Generic0X + index * 4); } } - auto mappings = GenerateLegacyToGenericMappings(loads, ununsed_input_generics); + auto mappings = GenerateLegacyToGenericMappings( + loads, unused_input_generics, runtime_info.previous_stage_legacy_stores_mapping); for (IR::Block* const block : program.post_order_blocks) { for (IR::Inst& inst : block->Instructions()) { switch (inst.GetOpcode()) { diff --git a/src/shader_recompiler/runtime_info.h b/src/shader_recompiler/runtime_info.h index dcb5ab158..549b81ef7 100644 --- a/src/shader_recompiler/runtime_info.h +++ b/src/shader_recompiler/runtime_info.h @@ -4,6 +4,7 @@ #pragma once #include <array> +#include <map> #include <optional> #include <vector> @@ -60,6 +61,7 @@ struct TransformFeedbackVarying { struct RuntimeInfo { std::array<AttributeType, 32> generic_input_types{}; VaryingState previous_stage_stores; + std::map<IR::Attribute, IR::Attribute> previous_stage_legacy_stores_mapping; bool convert_depth_mode{}; bool force_early_z{}; diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h index cc596da4f..81097bf1a 100644 --- a/src/shader_recompiler/shader_info.h +++ b/src/shader_recompiler/shader_info.h @@ -5,6 +5,7 @@ #include <array> #include <bitset> +#include <map> #include "common/common_types.h" #include "shader_recompiler/frontend/ir/type.h" @@ -127,6 +128,8 @@ struct Info { VaryingState stores; VaryingState passthrough; + std::map<IR::Attribute, IR::Attribute> legacy_stores_mapping; + bool loads_indexed_attributes{}; std::array<bool, 8> stores_frag_color{}; diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index cb8b46edf..106991969 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -279,14 +279,8 @@ if (MSVC) else() target_compile_options(video_core PRIVATE -Werror=conversion - -Wno-error=sign-conversion - -Werror=pessimizing-move - -Werror=redundant-move - -Werror=type-limits - $<$<CXX_COMPILER_ID:GNU>:-Werror=class-memaccess> - $<$<CXX_COMPILER_ID:GNU>:-Werror=unused-but-set-parameter> - $<$<CXX_COMPILER_ID:GNU>:-Werror=unused-but-set-variable> + -Wno-sign-conversion ) endif() diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 89a9d1f5a..5208bea75 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -117,10 +117,18 @@ void Maxwell3D::InitializeRegisterDefaults() { shadow_state = regs; - mme_inline[MAXWELL3D_REG_INDEX(draw.end)] = true; - mme_inline[MAXWELL3D_REG_INDEX(draw.begin)] = true; - mme_inline[MAXWELL3D_REG_INDEX(vertex_buffer.count)] = true; - mme_inline[MAXWELL3D_REG_INDEX(index_buffer.count)] = true; + draw_command[MAXWELL3D_REG_INDEX(draw.end)] = true; + draw_command[MAXWELL3D_REG_INDEX(draw.begin)] = true; + draw_command[MAXWELL3D_REG_INDEX(vertex_buffer.first)] = true; + draw_command[MAXWELL3D_REG_INDEX(vertex_buffer.count)] = true; + draw_command[MAXWELL3D_REG_INDEX(index_buffer.first)] = true; + draw_command[MAXWELL3D_REG_INDEX(index_buffer.count)] = true; + draw_command[MAXWELL3D_REG_INDEX(index_buffer32_first)] = true; + draw_command[MAXWELL3D_REG_INDEX(index_buffer16_first)] = true; + draw_command[MAXWELL3D_REG_INDEX(index_buffer8_first)] = true; + draw_command[MAXWELL3D_REG_INDEX(draw_inline_index)] = true; + draw_command[MAXWELL3D_REG_INDEX(inline_index_2x16.even)] = true; + draw_command[MAXWELL3D_REG_INDEX(inline_index_4x8.index0)] = true; } void Maxwell3D::ProcessMacro(u32 method, const u32* base_start, u32 amount, bool is_last_call) { @@ -208,25 +216,6 @@ void Maxwell3D::ProcessMethodCall(u32 method, u32 argument, u32 nonshadow_argume return ProcessCBBind(3); case MAXWELL3D_REG_INDEX(bind_groups[4].raw_config): return ProcessCBBind(4); - case MAXWELL3D_REG_INDEX(draw.end): - return DrawArrays(); - case MAXWELL3D_REG_INDEX(index_buffer32_first): - regs.index_buffer.count = regs.index_buffer32_first.count; - regs.index_buffer.first = regs.index_buffer32_first.first; - dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; - return DrawArrays(); - case MAXWELL3D_REG_INDEX(index_buffer16_first): - regs.index_buffer.count = regs.index_buffer16_first.count; - regs.index_buffer.first = regs.index_buffer16_first.first; - dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; - return DrawArrays(); - case MAXWELL3D_REG_INDEX(index_buffer8_first): - regs.index_buffer.count = regs.index_buffer8_first.count; - regs.index_buffer.first = regs.index_buffer8_first.first; - dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; - // a macro calls this one over and over, should it increase instancing? - // Used by Hades and likely other Vulkan games. - return DrawArrays(); case MAXWELL3D_REG_INDEX(topology_override): use_topology_override = true; return; @@ -261,14 +250,13 @@ void Maxwell3D::CallMacroMethod(u32 method, const std::vector<u32>& parameters) // Execute the current macro. macro_engine->Execute(macro_positions[entry], parameters); - if (mme_draw.current_mode != MMEDrawMode::Undefined) { - FlushMMEInlineDraw(); - } + + ProcessDeferredDraw(); } void Maxwell3D::CallMethod(u32 method, u32 method_argument, bool is_last_call) { - // It is an error to write to a register other than the current macro's ARG register before it - // has finished execution. + // It is an error to write to a register other than the current macro's ARG register before + // it has finished execution. if (executing_macro != 0) { ASSERT(method == executing_macro + 1); } @@ -283,9 +271,33 @@ void Maxwell3D::CallMethod(u32 method, u32 method_argument, bool is_last_call) { ASSERT_MSG(method < Regs::NUM_REGS, "Invalid Maxwell3D register, increase the size of the Regs structure"); - const u32 argument = ProcessShadowRam(method, method_argument); - ProcessDirtyRegisters(method, argument); - ProcessMethodCall(method, argument, method_argument, is_last_call); + if (draw_command[method]) { + regs.reg_array[method] = method_argument; + deferred_draw_method.push_back(method); + auto u32_to_u8 = [&](const u32 argument) { + inline_index_draw_indexes.push_back(static_cast<u8>(argument & 0x000000ff)); + inline_index_draw_indexes.push_back(static_cast<u8>((argument & 0x0000ff00) >> 8)); + inline_index_draw_indexes.push_back(static_cast<u8>((argument & 0x00ff0000) >> 16)); + inline_index_draw_indexes.push_back(static_cast<u8>((argument & 0xff000000) >> 24)); + }; + if (MAXWELL3D_REG_INDEX(draw_inline_index) == method) { + u32_to_u8(method_argument); + } else if (MAXWELL3D_REG_INDEX(inline_index_2x16.even) == method) { + u32_to_u8(regs.inline_index_2x16.even); + u32_to_u8(regs.inline_index_2x16.odd); + } else if (MAXWELL3D_REG_INDEX(inline_index_4x8.index0) == method) { + u32_to_u8(regs.inline_index_4x8.index0); + u32_to_u8(regs.inline_index_4x8.index1); + u32_to_u8(regs.inline_index_4x8.index2); + u32_to_u8(regs.inline_index_4x8.index3); + } + } else { + ProcessDeferredDraw(); + + const u32 argument = ProcessShadowRam(method, method_argument); + ProcessDirtyRegisters(method, argument); + ProcessMethodCall(method, argument, method_argument, is_last_call); + } } void Maxwell3D::CallMultiMethod(u32 method, const u32* base_start, u32 amount, @@ -326,55 +338,6 @@ void Maxwell3D::CallMultiMethod(u32 method, const u32* base_start, u32 amount, } } -void Maxwell3D::StepInstance(const MMEDrawMode expected_mode, const u32 count) { - if (mme_draw.current_mode == MMEDrawMode::Undefined) { - if (mme_draw.gl_begin_consume) { - mme_draw.current_mode = expected_mode; - mme_draw.current_count = count; - mme_draw.instance_count = 1; - mme_draw.gl_begin_consume = false; - mme_draw.gl_end_count = 0; - } - return; - } else { - if (mme_draw.current_mode == expected_mode && count == mme_draw.current_count && - mme_draw.instance_mode && mme_draw.gl_begin_consume) { - mme_draw.instance_count++; - mme_draw.gl_begin_consume = false; - return; - } else { - FlushMMEInlineDraw(); - } - } - // Tail call in case it needs to retry. - StepInstance(expected_mode, count); -} - -void Maxwell3D::CallMethodFromMME(u32 method, u32 method_argument) { - if (mme_inline[method]) { - regs.reg_array[method] = method_argument; - if (method == MAXWELL3D_REG_INDEX(vertex_buffer.count) || - method == MAXWELL3D_REG_INDEX(index_buffer.count)) { - const MMEDrawMode expected_mode = method == MAXWELL3D_REG_INDEX(vertex_buffer.count) - ? MMEDrawMode::Array - : MMEDrawMode::Indexed; - StepInstance(expected_mode, method_argument); - } else if (method == MAXWELL3D_REG_INDEX(draw.begin)) { - mme_draw.instance_mode = - (regs.draw.instance_id == Maxwell3D::Regs::Draw::InstanceId::Subsequent) || - (regs.draw.instance_id == Maxwell3D::Regs::Draw::InstanceId::Unchanged); - mme_draw.gl_begin_consume = true; - } else { - mme_draw.gl_end_count++; - } - } else { - if (mme_draw.current_mode != MMEDrawMode::Undefined) { - FlushMMEInlineDraw(); - } - CallMethod(method, method_argument, true); - } -} - void Maxwell3D::ProcessTopologyOverride() { using PrimitiveTopology = Maxwell3D::Regs::PrimitiveTopology; using PrimitiveTopologyOverride = Maxwell3D::Regs::PrimitiveTopologyOverride; @@ -404,41 +367,6 @@ void Maxwell3D::ProcessTopologyOverride() { } } -void Maxwell3D::FlushMMEInlineDraw() { - LOG_TRACE(HW_GPU, "called, topology={}, count={}", regs.draw.topology.Value(), - regs.vertex_buffer.count); - ASSERT_MSG(!(regs.index_buffer.count && regs.vertex_buffer.count), "Both indexed and direct?"); - ASSERT(mme_draw.instance_count == mme_draw.gl_end_count); - - // Both instance configuration registers can not be set at the same time. - ASSERT_MSG(regs.draw.instance_id == Maxwell3D::Regs::Draw::InstanceId::First || - regs.draw.instance_id != Maxwell3D::Regs::Draw::InstanceId::Unchanged, - "Illegal combination of instancing parameters"); - - ProcessTopologyOverride(); - - const bool is_indexed = mme_draw.current_mode == MMEDrawMode::Indexed; - if (ShouldExecute()) { - rasterizer->Draw(is_indexed, true); - } - - // TODO(bunnei): Below, we reset vertex count so that we can use these registers to determine if - // the game is trying to draw indexed or direct mode. This needs to be verified on HW still - - // it's possible that it is incorrect and that there is some other register used to specify the - // drawing mode. - if (is_indexed) { - regs.index_buffer.count = 0; - } else { - regs.vertex_buffer.count = 0; - } - mme_draw.current_mode = MMEDrawMode::Undefined; - mme_draw.current_count = 0; - mme_draw.instance_count = 0; - mme_draw.instance_mode = false; - mme_draw.gl_begin_consume = false; - mme_draw.gl_end_count = 0; -} - void Maxwell3D::ProcessMacroUpload(u32 data) { macro_engine->AddCode(regs.load_mme.instruction_ptr++, data); } @@ -473,9 +401,7 @@ void Maxwell3D::ProcessQueryGet() { switch (regs.report_semaphore.query.operation) { case Regs::ReportSemaphore::Operation::Release: - if (regs.report_semaphore.query.release == - Regs::ReportSemaphore::Release::AfterAllPreceedingWrites || - regs.report_semaphore.query.short_query != 0) { + if (regs.report_semaphore.query.short_query != 0) { const GPUVAddr sequence_address{regs.report_semaphore.Address()}; const u32 payload = regs.report_semaphore.payload; std::function<void()> operation([this, sequence_address, payload] { @@ -489,11 +415,10 @@ void Maxwell3D::ProcessQueryGet() { }; const GPUVAddr sequence_address{regs.report_semaphore.Address()}; const u32 payload = regs.report_semaphore.payload; - std::function<void()> operation([this, sequence_address, payload] { + [this, sequence_address, payload] { memory_manager.Write<u64>(sequence_address + sizeof(u64), system.GPU().GetTicks()); memory_manager.Write<u64>(sequence_address, payload); - }); - rasterizer->SyncOperation(std::move(operation)); + }(); } break; case Regs::ReportSemaphore::Operation::Acquire: @@ -569,47 +494,11 @@ void Maxwell3D::ProcessCounterReset() { void Maxwell3D::ProcessSyncPoint() { const u32 sync_point = regs.sync_info.sync_point.Value(); - const auto condition = regs.sync_info.condition.Value(); - [[maybe_unused]] const u32 cache_flush = regs.sync_info.clean_l2.Value(); - if (condition == Regs::SyncInfo::Condition::RopWritesDone) { - rasterizer->SignalSyncPoint(sync_point); - } -} - -void Maxwell3D::DrawArrays() { - LOG_TRACE(HW_GPU, "called, topology={}, count={}", regs.draw.topology.Value(), - regs.vertex_buffer.count); - ASSERT_MSG(!(regs.index_buffer.count && regs.vertex_buffer.count), "Both indexed and direct?"); - - // Both instance configuration registers can not be set at the same time. - ASSERT_MSG(regs.draw.instance_id == Maxwell3D::Regs::Draw::InstanceId::First || - regs.draw.instance_id != Maxwell3D::Regs::Draw::InstanceId::Unchanged, - "Illegal combination of instancing parameters"); - - ProcessTopologyOverride(); - - if (regs.draw.instance_id == Maxwell3D::Regs::Draw::InstanceId::Subsequent) { - // Increment the current instance *before* drawing. - state.current_instance++; - } else if (regs.draw.instance_id != Maxwell3D::Regs::Draw::InstanceId::Unchanged) { - // Reset the current instance to 0. - state.current_instance = 0; - } - - const bool is_indexed{regs.index_buffer.count && !regs.vertex_buffer.count}; - if (ShouldExecute()) { - rasterizer->Draw(is_indexed, false); - } - - // TODO(bunnei): Below, we reset vertex count so that we can use these registers to determine if - // the game is trying to draw indexed or direct mode. This needs to be verified on HW still - - // it's possible that it is incorrect and that there is some other register used to specify the - // drawing mode. - if (is_indexed) { - regs.index_buffer.count = 0; - } else { - regs.vertex_buffer.count = 0; + const u32 cache_flush = regs.sync_info.clean_l2.Value(); + if (cache_flush != 0) { + rasterizer->InvalidateGPUCache(); } + rasterizer->SignalSyncPoint(sync_point); } std::optional<u64> Maxwell3D::GetQueryResult() { @@ -694,4 +583,95 @@ void Maxwell3D::ProcessClearBuffers() { rasterizer->Clear(); } +void Maxwell3D::ProcessDeferredDraw() { + if (deferred_draw_method.empty()) { + return; + } + + enum class DrawMode { + Undefined, + General, + Instance, + }; + DrawMode draw_mode{DrawMode::Undefined}; + u32 instance_count = 1; + + auto first_method = deferred_draw_method[0]; + if (MAXWELL3D_REG_INDEX(draw.begin) == first_method) { + // The minimum number of methods for drawing must be greater than or equal to + // 3[draw.begin->vertex(index)count->draw.end] to avoid errors in index mode drawing + if (deferred_draw_method.size() < 3) { + return; + } + draw_mode = (regs.draw.instance_id == Maxwell3D::Regs::Draw::InstanceId::Subsequent) || + (regs.draw.instance_id == Maxwell3D::Regs::Draw::InstanceId::Unchanged) + ? DrawMode::Instance + : DrawMode::General; + } else if (MAXWELL3D_REG_INDEX(index_buffer32_first) == first_method || + MAXWELL3D_REG_INDEX(index_buffer16_first) == first_method || + MAXWELL3D_REG_INDEX(index_buffer8_first) == first_method) { + draw_mode = DrawMode::General; + } + + // Drawing will only begin with draw.begin or index_buffer method, other methods directly + // clear + if (draw_mode == DrawMode::Undefined) { + deferred_draw_method.clear(); + return; + } + + if (draw_mode == DrawMode::Instance) { + ASSERT_MSG(deferred_draw_method.size() % 4 == 0, "Instance mode method size error"); + instance_count = static_cast<u32>(deferred_draw_method.size()) / 4; + } else { + if (MAXWELL3D_REG_INDEX(index_buffer32_first) == first_method) { + regs.index_buffer.count = regs.index_buffer32_first.count; + regs.index_buffer.first = regs.index_buffer32_first.first; + dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; + } else if (MAXWELL3D_REG_INDEX(index_buffer32_first) == first_method) { + regs.index_buffer.count = regs.index_buffer16_first.count; + regs.index_buffer.first = regs.index_buffer16_first.first; + dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; + } else if (MAXWELL3D_REG_INDEX(index_buffer32_first) == first_method) { + regs.index_buffer.count = regs.index_buffer8_first.count; + regs.index_buffer.first = regs.index_buffer8_first.first; + dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; + } else { + auto second_method = deferred_draw_method[1]; + if (MAXWELL3D_REG_INDEX(draw_inline_index) == second_method || + MAXWELL3D_REG_INDEX(inline_index_2x16.even) == second_method || + MAXWELL3D_REG_INDEX(inline_index_4x8.index0) == second_method) { + regs.index_buffer.count = static_cast<u32>(inline_index_draw_indexes.size() / 4); + regs.index_buffer.format = Regs::IndexFormat::UnsignedInt; + } + } + } + + LOG_TRACE(HW_GPU, "called, topology={}, count={}", regs.draw.topology.Value(), + regs.vertex_buffer.count); + + ASSERT_MSG(!(regs.index_buffer.count && regs.vertex_buffer.count), "Both indexed and direct?"); + + // Both instance configuration registers can not be set at the same time. + ASSERT_MSG(regs.draw.instance_id == Maxwell3D::Regs::Draw::InstanceId::First || + regs.draw.instance_id != Maxwell3D::Regs::Draw::InstanceId::Unchanged, + "Illegal combination of instancing parameters"); + + ProcessTopologyOverride(); + + const bool is_indexed = regs.index_buffer.count && !regs.vertex_buffer.count; + if (ShouldExecute()) { + rasterizer->Draw(is_indexed, instance_count); + } + + if (is_indexed) { + regs.index_buffer.count = 0; + } else { + regs.vertex_buffer.count = 0; + } + + deferred_draw_method.clear(); + inline_index_draw_indexes.clear(); +} + } // namespace Tegra::Engines diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 75e3b868d..bd23ebc12 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -1739,14 +1739,11 @@ public: Footprint_1x1_Virtual = 2, }; - struct InlineIndex4x8Align { + struct InlineIndex4x8 { union { BitField<0, 30, u32> count; BitField<30, 2, u32> start; }; - }; - - struct InlineIndex4x8Index { union { BitField<0, 8, u32> index0; BitField<8, 8, u32> index1; @@ -2836,8 +2833,7 @@ public: u32 depth_write_enabled; ///< 0x12E8 u32 alpha_test_enabled; ///< 0x12EC INSERT_PADDING_BYTES_NOINIT(0x10); - InlineIndex4x8Align inline_index_4x8_align; ///< 0x1300 - InlineIndex4x8Index inline_index_4x8_index; ///< 0x1304 + InlineIndex4x8 inline_index_4x8; ///< 0x1300 D3DCullMode d3d_cull_mode; ///< 0x1308 ComparisonOp depth_test_func; ///< 0x130C f32 alpha_test_ref; ///< 0x1310 @@ -3048,8 +3044,6 @@ public: }; std::array<ShaderStageInfo, Regs::MaxShaderStage> shader_stages; - - u32 current_instance = 0; ///< Current instance to be used to simulate instanced rendering. }; State state{}; @@ -3064,11 +3058,6 @@ public: void CallMultiMethod(u32 method, const u32* base_start, u32 amount, u32 methods_pending) override; - /// Write the value to the register identified by method. - void CallMethodFromMME(u32 method, u32 method_argument); - - void FlushMMEInlineDraw(); - bool ShouldExecute() const { return execute_on; } @@ -3081,21 +3070,6 @@ public: return *rasterizer; } - enum class MMEDrawMode : u32 { - Undefined, - Array, - Indexed, - }; - - struct MMEDrawState { - MMEDrawMode current_mode{MMEDrawMode::Undefined}; - u32 current_count{}; - u32 instance_count{}; - bool instance_mode{}; - bool gl_begin_consume{}; - u32 gl_end_count{}; - } mme_draw; - struct DirtyState { using Flags = std::bitset<std::numeric_limits<u8>::max()>; using Table = std::array<u8, Regs::NUM_REGS>; @@ -3105,6 +3079,8 @@ public: Tables tables{}; } dirty; + std::vector<u8> inline_index_draw_indexes; + private: void InitializeRegisterDefaults(); @@ -3164,14 +3140,10 @@ private: /// Handles a write to the CB_BIND register. void ProcessCBBind(size_t stage_index); - /// Handles a write to the VERTEX_END_GL register, triggering a draw. - void DrawArrays(); - /// Handles use of topology overrides (e.g., to avoid using a topology assigned from a macro) void ProcessTopologyOverride(); - // Handles a instance drawcall from MME - void StepInstance(MMEDrawMode expected_mode, u32 count); + void ProcessDeferredDraw(); /// Returns a query's value or an empty object if the value will be deferred through a cache. std::optional<u64> GetQueryResult(); @@ -3184,8 +3156,6 @@ private: /// Start offsets of each macro in macro_memory std::array<u32, 0x80> macro_positions{}; - std::array<bool, Regs::NUM_REGS> mme_inline{}; - /// Macro method that is currently being executed / being fed parameters. u32 executing_macro = 0; /// Parameters that have been submitted to the macro call so far. @@ -3198,6 +3168,9 @@ private: bool execute_on{true}; bool use_topology_override{false}; + + std::array<bool, Regs::NUM_REGS> draw_command{}; + std::vector<u32> deferred_draw_method; }; #define ASSERT_REG_POSITION(field_name, position) \ @@ -3402,8 +3375,7 @@ ASSERT_REG_POSITION(alpha_to_coverage_dither, 0x12E0); ASSERT_REG_POSITION(blend_per_target_enabled, 0x12E4); ASSERT_REG_POSITION(depth_write_enabled, 0x12E8); ASSERT_REG_POSITION(alpha_test_enabled, 0x12EC); -ASSERT_REG_POSITION(inline_index_4x8_align, 0x1300); -ASSERT_REG_POSITION(inline_index_4x8_index, 0x1304); +ASSERT_REG_POSITION(inline_index_4x8, 0x1300); ASSERT_REG_POSITION(d3d_cull_mode, 0x1308); ASSERT_REG_POSITION(depth_test_func, 0x130C); ASSERT_REG_POSITION(alpha_test_ref, 0x1310); diff --git a/src/video_core/engines/puller.cpp b/src/video_core/engines/puller.cpp index cca890792..3977bb0fb 100644 --- a/src/video_core/engines/puller.cpp +++ b/src/video_core/engines/puller.cpp @@ -75,11 +75,10 @@ void Puller::ProcessSemaphoreTriggerMethod() { if (op == GpuSemaphoreOperation::WriteLong) { const GPUVAddr sequence_address{regs.semaphore_address.SemaphoreAddress()}; const u32 payload = regs.semaphore_sequence; - std::function<void()> operation([this, sequence_address, payload] { + [this, sequence_address, payload] { memory_manager.Write<u64>(sequence_address + sizeof(u64), gpu.GetTicks()); memory_manager.Write<u64>(sequence_address, payload); - }); - rasterizer->SignalFence(std::move(operation)); + }(); } else { do { const u32 word{memory_manager.Read<u32>(regs.semaphore_address.SemaphoreAddress())}; diff --git a/src/video_core/macro/macro_hle.cpp b/src/video_core/macro/macro_hle.cpp index 8a8adbb42..f896591bf 100644 --- a/src/video_core/macro/macro_hle.cpp +++ b/src/video_core/macro/macro_hle.cpp @@ -22,35 +22,29 @@ void HLE_771BB18C62444DA0(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& maxwell3d.regs.draw.topology.Assign( static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[0] & 0x3ffffff)); maxwell3d.regs.global_base_instance_index = parameters[5]; - maxwell3d.mme_draw.instance_count = instance_count; maxwell3d.regs.global_base_vertex_index = parameters[3]; maxwell3d.regs.index_buffer.count = parameters[1]; maxwell3d.regs.index_buffer.first = parameters[4]; if (maxwell3d.ShouldExecute()) { - maxwell3d.Rasterizer().Draw(true, true); + maxwell3d.Rasterizer().Draw(true, instance_count); } maxwell3d.regs.index_buffer.count = 0; - maxwell3d.mme_draw.instance_count = 0; - maxwell3d.mme_draw.current_mode = Engines::Maxwell3D::MMEDrawMode::Undefined; } void HLE_0D61FC9FAAC9FCAD(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& parameters) { - const u32 count = (maxwell3d.GetRegisterValue(0xD1B) & parameters[2]); + const u32 instance_count = (maxwell3d.GetRegisterValue(0xD1B) & parameters[2]); maxwell3d.regs.vertex_buffer.first = parameters[3]; maxwell3d.regs.vertex_buffer.count = parameters[1]; maxwell3d.regs.global_base_instance_index = parameters[4]; maxwell3d.regs.draw.topology.Assign( static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[0])); - maxwell3d.mme_draw.instance_count = count; if (maxwell3d.ShouldExecute()) { - maxwell3d.Rasterizer().Draw(false, true); + maxwell3d.Rasterizer().Draw(false, instance_count); } maxwell3d.regs.vertex_buffer.count = 0; - maxwell3d.mme_draw.instance_count = 0; - maxwell3d.mme_draw.current_mode = Engines::Maxwell3D::MMEDrawMode::Undefined; } void HLE_0217920100488FF7(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& parameters) { @@ -63,24 +57,21 @@ void HLE_0217920100488FF7(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; maxwell3d.regs.global_base_vertex_index = element_base; maxwell3d.regs.global_base_instance_index = base_instance; - maxwell3d.mme_draw.instance_count = instance_count; - maxwell3d.CallMethodFromMME(0x8e3, 0x640); - maxwell3d.CallMethodFromMME(0x8e4, element_base); - maxwell3d.CallMethodFromMME(0x8e5, base_instance); + maxwell3d.CallMethod(0x8e3, 0x640, true); + maxwell3d.CallMethod(0x8e4, element_base, true); + maxwell3d.CallMethod(0x8e5, base_instance, true); maxwell3d.regs.draw.topology.Assign( static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[0])); if (maxwell3d.ShouldExecute()) { - maxwell3d.Rasterizer().Draw(true, true); + maxwell3d.Rasterizer().Draw(true, instance_count); } maxwell3d.regs.vertex_id_base = 0x0; maxwell3d.regs.index_buffer.count = 0; maxwell3d.regs.global_base_vertex_index = 0x0; maxwell3d.regs.global_base_instance_index = 0x0; - maxwell3d.mme_draw.instance_count = 0; - maxwell3d.CallMethodFromMME(0x8e3, 0x640); - maxwell3d.CallMethodFromMME(0x8e4, 0x0); - maxwell3d.CallMethodFromMME(0x8e5, 0x0); - maxwell3d.mme_draw.current_mode = Engines::Maxwell3D::MMEDrawMode::Undefined; + maxwell3d.CallMethod(0x8e3, 0x640, true); + maxwell3d.CallMethod(0x8e4, 0x0, true); + maxwell3d.CallMethod(0x8e5, 0x0, true); } // Multidraw Indirect @@ -91,11 +82,9 @@ void HLE_3F5E74B9C9A50164(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& maxwell3d.regs.index_buffer.count = 0; maxwell3d.regs.global_base_vertex_index = 0x0; maxwell3d.regs.global_base_instance_index = 0x0; - maxwell3d.mme_draw.instance_count = 0; - maxwell3d.CallMethodFromMME(0x8e3, 0x640); - maxwell3d.CallMethodFromMME(0x8e4, 0x0); - maxwell3d.CallMethodFromMME(0x8e5, 0x0); - maxwell3d.mme_draw.current_mode = Engines::Maxwell3D::MMEDrawMode::Undefined; + maxwell3d.CallMethod(0x8e3, 0x640, true); + maxwell3d.CallMethod(0x8e4, 0x0, true); + maxwell3d.CallMethod(0x8e5, 0x0, true); maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; }); const u32 start_indirect = parameters[0]; @@ -127,15 +116,13 @@ void HLE_3F5E74B9C9A50164(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& maxwell3d.regs.index_buffer.count = num_vertices; maxwell3d.regs.global_base_vertex_index = base_vertex; maxwell3d.regs.global_base_instance_index = base_instance; - maxwell3d.mme_draw.instance_count = instance_count; - maxwell3d.CallMethodFromMME(0x8e3, 0x640); - maxwell3d.CallMethodFromMME(0x8e4, base_vertex); - maxwell3d.CallMethodFromMME(0x8e5, base_instance); + maxwell3d.CallMethod(0x8e3, 0x640, true); + maxwell3d.CallMethod(0x8e4, base_vertex, true); + maxwell3d.CallMethod(0x8e5, base_instance, true); maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; if (maxwell3d.ShouldExecute()) { - maxwell3d.Rasterizer().Draw(true, true); + maxwell3d.Rasterizer().Draw(true, instance_count); } - maxwell3d.mme_draw.current_mode = Engines::Maxwell3D::MMEDrawMode::Undefined; } } diff --git a/src/video_core/macro/macro_interpreter.cpp b/src/video_core/macro/macro_interpreter.cpp index f670b1bca..c0d32c112 100644 --- a/src/video_core/macro/macro_interpreter.cpp +++ b/src/video_core/macro/macro_interpreter.cpp @@ -335,7 +335,7 @@ void MacroInterpreterImpl::SetMethodAddress(u32 address) { } void MacroInterpreterImpl::Send(u32 value) { - maxwell3d.CallMethodFromMME(method_address.address, value); + maxwell3d.CallMethod(method_address.address, value, true); // Increment the method address by the method increment. method_address.address.Assign(method_address.address.Value() + method_address.increment.Value()); diff --git a/src/video_core/macro/macro_jit_x64.cpp b/src/video_core/macro/macro_jit_x64.cpp index a302a9603..25c1ce798 100644 --- a/src/video_core/macro/macro_jit_x64.cpp +++ b/src/video_core/macro/macro_jit_x64.cpp @@ -346,7 +346,7 @@ void MacroJITx64Impl::Compile_Read(Macro::Opcode opcode) { } void Send(Engines::Maxwell3D* maxwell3d, Macro::MethodAddress method_address, u32 value) { - maxwell3d->CallMethodFromMME(method_address.address, value); + maxwell3d->CallMethod(method_address.address, value, true); } void MacroJITx64Impl::Compile_Send(Xbyak::Reg32 value) { diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp index d07b21bd6..384350dbd 100644 --- a/src/video_core/memory_manager.cpp +++ b/src/video_core/memory_manager.cpp @@ -133,7 +133,7 @@ inline void MemoryManager::SetBigPageContinous(size_t big_page_index, bool value template <MemoryManager::EntryType entry_type> GPUVAddr MemoryManager::PageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr cpu_addr, size_t size, PTEKind kind) { - u64 remaining_size{size}; + [[maybe_unused]] u64 remaining_size{size}; if constexpr (entry_type == EntryType::Mapped) { page_table.ReserveRange(gpu_addr, size); } @@ -159,7 +159,7 @@ GPUVAddr MemoryManager::PageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr cp template <MemoryManager::EntryType entry_type> GPUVAddr MemoryManager::BigPageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr cpu_addr, size_t size, PTEKind kind) { - u64 remaining_size{size}; + [[maybe_unused]] u64 remaining_size{size}; for (u64 offset{}; offset < size; offset += big_page_size) { const GPUVAddr current_gpu_addr = gpu_addr + offset; [[maybe_unused]] const auto current_entry_type = GetEntry<true>(current_gpu_addr); diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index d2d40884c..1cbfef090 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h @@ -40,7 +40,7 @@ public: virtual ~RasterizerInterface() = default; /// Dispatches a draw invocation - virtual void Draw(bool is_indexed, bool is_instanced) = 0; + virtual void Draw(bool is_indexed, u32 instance_count) = 0; /// Clear the current framebuffer virtual void Clear() = 0; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index e5c09a969..1590b21de 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -205,7 +205,7 @@ void RasterizerOpenGL::Clear() { ++num_queued_commands; } -void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { +void RasterizerOpenGL::Draw(bool is_indexed, u32 instance_count) { MICROPROFILE_SCOPE(OpenGL_Drawing); SCOPE_EXIT({ gpu.TickWork(); }); @@ -222,14 +222,15 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { pipeline->SetEngine(maxwell3d, gpu_memory); pipeline->Configure(is_indexed); + BindInlineIndexBuffer(); + SyncState(); const GLenum primitive_mode = MaxwellToGL::PrimitiveTopology(maxwell3d->regs.draw.topology); BeginTransformFeedback(pipeline, primitive_mode); const GLuint base_instance = static_cast<GLuint>(maxwell3d->regs.global_base_instance_index); - const GLsizei num_instances = - static_cast<GLsizei>(is_instanced ? maxwell3d->mme_draw.instance_count : 1); + const GLsizei num_instances = static_cast<GLsizei>(instance_count); if (is_indexed) { const GLint base_vertex = static_cast<GLint>(maxwell3d->regs.global_base_vertex_index); const GLsizei num_vertices = static_cast<GLsizei>(maxwell3d->regs.index_buffer.count); @@ -1129,6 +1130,16 @@ void RasterizerOpenGL::ReleaseChannel(s32 channel_id) { query_cache.EraseChannel(channel_id); } +void RasterizerOpenGL::BindInlineIndexBuffer() { + if (maxwell3d->inline_index_draw_indexes.empty()) { + return; + } + const auto data_count = static_cast<u32>(maxwell3d->inline_index_draw_indexes.size()); + auto buffer = Buffer(buffer_cache_runtime, *this, 0, data_count); + buffer.ImmediateUpload(0, maxwell3d->inline_index_draw_indexes); + buffer_cache_runtime.BindIndexBuffer(buffer, 0, data_count); +} + AccelerateDMA::AccelerateDMA(BufferCache& buffer_cache_) : buffer_cache{buffer_cache_} {} bool AccelerateDMA::BufferCopy(GPUVAddr src_address, GPUVAddr dest_address, u64 amount) { diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 45131b785..793e0d608 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -68,7 +68,7 @@ public: StateTracker& state_tracker_); ~RasterizerOpenGL() override; - void Draw(bool is_indexed, bool is_instanced) override; + void Draw(bool is_indexed, u32 instance_count) override; void Clear() override; void DispatchCompute() override; void ResetCounter(VideoCore::QueryType type) override; @@ -199,6 +199,8 @@ private: /// End a transform feedback void EndTransformFeedback(); + void BindInlineIndexBuffer(); + Tegra::GPU& gpu; const Device& device; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 609f0a772..e94cfdb1a 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -63,6 +63,7 @@ Shader::RuntimeInfo MakeRuntimeInfo(const GraphicsPipelineKey& key, Shader::RuntimeInfo info; if (previous_program) { info.previous_stage_stores = previous_program->info.stores; + info.previous_stage_legacy_stores_mapping = previous_program->info.legacy_stores_mapping; } else { // Mark all stores as available for vertex shaders info.previous_stage_stores.mask.set(); diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 20f1d6584..13d5a1f67 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -134,6 +134,7 @@ Shader::RuntimeInfo MakeRuntimeInfo(std::span<const Shader::IR::Program> program Shader::RuntimeInfo info; if (previous_program) { info.previous_stage_stores = previous_program->info.stores; + info.previous_stage_legacy_stores_mapping = previous_program->info.legacy_stores_mapping; if (previous_program->is_geometry_passthrough) { info.previous_stage_stores.mask |= previous_program->info.passthrough.mask; } diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 47dfb45a1..9f05a7a18 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -127,11 +127,10 @@ VkRect2D GetScissorState(const Maxwell& regs, size_t index, u32 up_scale = 1, u3 return scissor; } -DrawParams MakeDrawParams(const Maxwell& regs, u32 num_instances, bool is_instanced, - bool is_indexed) { +DrawParams MakeDrawParams(const Maxwell& regs, u32 num_instances, bool is_indexed) { DrawParams params{ .base_instance = regs.global_base_instance_index, - .num_instances = is_instanced ? num_instances : 1, + .num_instances = num_instances, .base_vertex = is_indexed ? regs.global_base_vertex_index : regs.vertex_buffer.first, .num_vertices = is_indexed ? regs.index_buffer.count : regs.vertex_buffer.count, .first_index = is_indexed ? regs.index_buffer.first : 0, @@ -177,7 +176,7 @@ RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra RasterizerVulkan::~RasterizerVulkan() = default; -void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { +void RasterizerVulkan::Draw(bool is_indexed, u32 instance_count) { MICROPROFILE_SCOPE(Vulkan_Drawing); SCOPE_EXIT({ gpu.TickWork(); }); @@ -194,13 +193,15 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { pipeline->SetEngine(maxwell3d, gpu_memory); pipeline->Configure(is_indexed); + BindInlineIndexBuffer(); + BeginTransformFeedback(); UpdateDynamicStates(); const auto& regs{maxwell3d->regs}; - const u32 num_instances{maxwell3d->mme_draw.instance_count}; - const DrawParams draw_params{MakeDrawParams(regs, num_instances, is_instanced, is_indexed)}; + const u32 num_instances{instance_count}; + const DrawParams draw_params{MakeDrawParams(regs, num_instances, is_indexed)}; scheduler.Record([draw_params](vk::CommandBuffer cmdbuf) { if (draw_params.is_indexed) { cmdbuf.DrawIndexed(draw_params.num_vertices, draw_params.num_instances, @@ -1009,4 +1010,17 @@ void RasterizerVulkan::ReleaseChannel(s32 channel_id) { query_cache.EraseChannel(channel_id); } +void RasterizerVulkan::BindInlineIndexBuffer() { + if (maxwell3d->inline_index_draw_indexes.empty()) { + return; + } + const auto data_count = static_cast<u32>(maxwell3d->inline_index_draw_indexes.size()); + auto buffer = buffer_cache_runtime.UploadStagingBuffer(data_count); + std::memcpy(buffer.mapped_span.data(), maxwell3d->inline_index_draw_indexes.data(), data_count); + buffer_cache_runtime.BindIndexBuffer( + maxwell3d->regs.draw.topology, maxwell3d->regs.index_buffer.format, + maxwell3d->regs.index_buffer.first, maxwell3d->regs.index_buffer.count, buffer.buffer, + static_cast<u32>(buffer.offset), data_count); +} + } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 4cde3c983..e2fdc7611 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -64,7 +64,7 @@ public: StateTracker& state_tracker_, Scheduler& scheduler_); ~RasterizerVulkan() override; - void Draw(bool is_indexed, bool is_instanced) override; + void Draw(bool is_indexed, u32 instance_count) override; void Clear() override; void DispatchCompute() override; void ResetCounter(VideoCore::QueryType type) override; @@ -141,6 +141,8 @@ private: void UpdateVertexInput(Tegra::Engines::Maxwell3D::Regs& regs); + void BindInlineIndexBuffer(); + Tegra::GPU& gpu; ScreenInfo& screen_info; diff --git a/src/video_core/texture_cache/format_lookup_table.cpp b/src/video_core/texture_cache/format_lookup_table.cpp index ad935d386..08aa8ca33 100644 --- a/src/video_core/texture_cache/format_lookup_table.cpp +++ b/src/video_core/texture_cache/format_lookup_table.cpp @@ -150,6 +150,8 @@ PixelFormat PixelFormatFromTextureInfo(TextureFormat format, ComponentType red, return PixelFormat::D24_UNORM_S8_UINT; case Hash(TextureFormat::D32S8, FLOAT, UINT, UNORM, UNORM, LINEAR): return PixelFormat::D32_FLOAT_S8_UINT; + case Hash(TextureFormat::R32_B24G8, FLOAT, UINT, UNORM, UNORM, LINEAR): + return PixelFormat::D32_FLOAT_S8_UINT; case Hash(TextureFormat::BC1_RGBA, UNORM, LINEAR): return PixelFormat::BC1_RGBA_UNORM; case Hash(TextureFormat::BC1_RGBA, UNORM, SRGB): diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 0e0fd410f..8ef75fe73 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -442,7 +442,7 @@ void TextureCache<P>::WriteMemory(VAddr cpu_addr, size_t size) { template <class P> void TextureCache<P>::DownloadMemory(VAddr cpu_addr, size_t size) { std::vector<ImageId> images; - ForEachImageInRegion(cpu_addr, size, [this, &images](ImageId image_id, ImageBase& image) { + ForEachImageInRegion(cpu_addr, size, [&images](ImageId image_id, ImageBase& image) { if (!image.IsSafeDownload()) { return; } @@ -1502,9 +1502,9 @@ void TextureCache<P>::UnregisterImage(ImageId image_id) { image.flags &= ~ImageFlagBits::BadOverlap; lru_cache.Free(image.lru_index); const auto& clear_page_table = - [this, image_id](u64 page, - std::unordered_map<u64, std::vector<ImageId>, Common::IdentityHash<u64>>& - selected_page_table) { + [image_id](u64 page, + std::unordered_map<u64, std::vector<ImageId>, Common::IdentityHash<u64>>& + selected_page_table) { const auto page_it = selected_page_table.find(page); if (page_it == selected_page_table.end()) { ASSERT_MSG(false, "Unregistering unregistered page=0x{:x}", page << YUZU_PAGEBITS); diff --git a/src/video_core/textures/astc.cpp b/src/video_core/textures/astc.cpp index 15b9d4182..69a32819a 100644 --- a/src/video_core/textures/astc.cpp +++ b/src/video_core/textures/astc.cpp @@ -1661,8 +1661,8 @@ void Decompress(std::span<const uint8_t> data, uint32_t width, uint32_t height, for (u32 z = 0; z < depth; ++z) { const u32 depth_offset = z * height * width * 4; for (u32 y_index = 0; y_index < rows; ++y_index) { - auto decompress_stride = [data, width, height, depth, block_width, block_height, output, - rows, cols, z, depth_offset, y_index] { + auto decompress_stride = [data, width, height, block_width, block_height, output, rows, + cols, z, depth_offset, y_index] { const u32 y = y_index * block_height; for (u32 x_index = 0; x_index < cols; ++x_index) { const u32 block_index = (z * rows * cols) + (y_index * cols) + x_index; diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp index 52d067a2d..fd1a4b987 100644 --- a/src/video_core/textures/decoders.cpp +++ b/src/video_core/textures/decoders.cpp @@ -21,7 +21,7 @@ constexpr u32 pdep(u32 value) { u32 m = mask; for (u32 bit = 1; m; bit += bit) { if (value & bit) - result |= m & -m; + result |= m & (~m + 1); m &= m - 1; } return result; diff --git a/src/yuzu/multiplayer/state.cpp b/src/yuzu/multiplayer/state.cpp index ae2738ad4..285bb150d 100644 --- a/src/yuzu/multiplayer/state.cpp +++ b/src/yuzu/multiplayer/state.cpp @@ -268,7 +268,7 @@ bool MultiplayerState::OnCloseRoom() { return true; } // Save ban list - UISettings::values.multiplayer_ban_list = std::move(room->GetBanList()); + UISettings::values.multiplayer_ban_list = room->GetBanList(); room->Destroy(); announce_multiplayer_session->Stop(); diff --git a/src/yuzu/startup_checks.cpp b/src/yuzu/startup_checks.cpp index fc2693f9d..6a91212e2 100644 --- a/src/yuzu/startup_checks.cpp +++ b/src/yuzu/startup_checks.cpp @@ -49,7 +49,7 @@ bool CheckEnvVars(bool* is_child) { *is_child = true; return false; } else if (!SetEnvironmentVariableA(IS_CHILD_ENV_VAR, ENV_VAR_ENABLED_TEXT)) { - std::fprintf(stderr, "SetEnvironmentVariableA failed to set %s with error %d\n", + std::fprintf(stderr, "SetEnvironmentVariableA failed to set %s with error %lu\n", IS_CHILD_ENV_VAR, GetLastError()); return true; } @@ -62,7 +62,7 @@ bool StartupChecks(const char* arg0, bool* has_broken_vulkan, bool perform_vulka // Set the startup variable for child processes const bool env_var_set = SetEnvironmentVariableA(STARTUP_CHECK_ENV_VAR, ENV_VAR_ENABLED_TEXT); if (!env_var_set) { - std::fprintf(stderr, "SetEnvironmentVariableA failed to set %s with error %d\n", + std::fprintf(stderr, "SetEnvironmentVariableA failed to set %s with error %lu\n", STARTUP_CHECK_ENV_VAR, GetLastError()); return false; } @@ -81,22 +81,22 @@ bool StartupChecks(const char* arg0, bool* has_broken_vulkan, bool perform_vulka DWORD exit_code = STILL_ACTIVE; const int err = GetExitCodeProcess(process_info.hProcess, &exit_code); if (err == 0) { - std::fprintf(stderr, "GetExitCodeProcess failed with error %d\n", GetLastError()); + std::fprintf(stderr, "GetExitCodeProcess failed with error %lu\n", GetLastError()); } // Vulkan is broken if the child crashed (return value is not zero) *has_broken_vulkan = (exit_code != 0); if (CloseHandle(process_info.hProcess) == 0) { - std::fprintf(stderr, "CloseHandle failed with error %d\n", GetLastError()); + std::fprintf(stderr, "CloseHandle failed with error %lu\n", GetLastError()); } if (CloseHandle(process_info.hThread) == 0) { - std::fprintf(stderr, "CloseHandle failed with error %d\n", GetLastError()); + std::fprintf(stderr, "CloseHandle failed with error %lu\n", GetLastError()); } } if (!SetEnvironmentVariableA(STARTUP_CHECK_ENV_VAR, nullptr)) { - std::fprintf(stderr, "SetEnvironmentVariableA failed to clear %s with error %d\n", + std::fprintf(stderr, "SetEnvironmentVariableA failed to clear %s with error %lu\n", STARTUP_CHECK_ENV_VAR, GetLastError()); } @@ -135,7 +135,8 @@ bool SpawnChild(const char* arg0, PROCESS_INFORMATION* pi, int flags) { startup_info.cb = sizeof(startup_info); char p_name[255]; - std::strncpy(p_name, arg0, 255); + std::strncpy(p_name, arg0, 254); + p_name[254] = '\0'; const bool process_created = CreateProcessA(nullptr, // lpApplicationName p_name, // lpCommandLine @@ -149,7 +150,7 @@ bool SpawnChild(const char* arg0, PROCESS_INFORMATION* pi, int flags) { pi // lpProcessInformation ); if (!process_created) { - std::fprintf(stderr, "CreateProcessA failed with error %d\n", GetLastError()); + std::fprintf(stderr, "CreateProcessA failed with error %lu\n", GetLastError()); return false; } |