diff options
47 files changed, 519 insertions, 492 deletions
diff --git a/externals/FidelityFX-FSR/ffx-fsr/ffx_fsr1.h b/externals/FidelityFX-FSR/ffx-fsr/ffx_fsr1.h index 15ecfde5c..4e0b3d548 100644 --- a/externals/FidelityFX-FSR/ffx-fsr/ffx_fsr1.h +++ b/externals/FidelityFX-FSR/ffx-fsr/ffx_fsr1.h @@ -747,12 +747,12 @@ AF1 sharpness){ // Immediate constants for peak range. AF2 peakC=AF2(1.0,-1.0*4.0); // Limiters, these need to be high precision RCPs. - AF1 hitMinR=mn4R*ARcpF1(AF1_(4.0)*mx4R); - AF1 hitMinG=mn4G*ARcpF1(AF1_(4.0)*mx4G); - AF1 hitMinB=mn4B*ARcpF1(AF1_(4.0)*mx4B); - AF1 hitMaxR=(peakC.x-mx4R)*ARcpF1(AF1_(4.0)*mn4R+peakC.y); - AF1 hitMaxG=(peakC.x-mx4G)*ARcpF1(AF1_(4.0)*mn4G+peakC.y); - AF1 hitMaxB=(peakC.x-mx4B)*ARcpF1(AF1_(4.0)*mn4B+peakC.y); + AF1 hitMinR=min(mn4R,eR)*ARcpF1(AF1_(4.0)*mx4R); + AF1 hitMinG=min(mn4G,eG)*ARcpF1(AF1_(4.0)*mx4G); + AF1 hitMinB=min(mn4B,eB)*ARcpF1(AF1_(4.0)*mx4B); + AF1 hitMaxR=(peakC.x-max(mx4R,eR))*ARcpF1(AF1_(4.0)*mn4R+peakC.y); + AF1 hitMaxG=(peakC.x-max(mx4G,eG))*ARcpF1(AF1_(4.0)*mn4G+peakC.y); + AF1 hitMaxB=(peakC.x-max(mx4B,eB))*ARcpF1(AF1_(4.0)*mn4B+peakC.y); AF1 lobeR=max(-hitMinR,hitMaxR); AF1 lobeG=max(-hitMinG,hitMaxG); AF1 lobeB=max(-hitMinB,hitMaxB); @@ -845,12 +845,12 @@ AF1 sharpness){ // Immediate constants for peak range. AH2 peakC=AH2(1.0,-1.0*4.0); // Limiters, these need to be high precision RCPs. - AH1 hitMinR=mn4R*ARcpH1(AH1_(4.0)*mx4R); - AH1 hitMinG=mn4G*ARcpH1(AH1_(4.0)*mx4G); - AH1 hitMinB=mn4B*ARcpH1(AH1_(4.0)*mx4B); - AH1 hitMaxR=(peakC.x-mx4R)*ARcpH1(AH1_(4.0)*mn4R+peakC.y); - AH1 hitMaxG=(peakC.x-mx4G)*ARcpH1(AH1_(4.0)*mn4G+peakC.y); - AH1 hitMaxB=(peakC.x-mx4B)*ARcpH1(AH1_(4.0)*mn4B+peakC.y); + AH1 hitMinR=min(mn4R,eR)*ARcpH1(AH1_(4.0)*mx4R); + AH1 hitMinG=min(mn4G,eG)*ARcpH1(AH1_(4.0)*mx4G); + AH1 hitMinB=min(mn4B,eB)*ARcpH1(AH1_(4.0)*mx4B); + AH1 hitMaxR=(peakC.x-max(mx4R,eR))*ARcpH1(AH1_(4.0)*mn4R+peakC.y); + AH1 hitMaxG=(peakC.x-max(mx4G,eG))*ARcpH1(AH1_(4.0)*mn4G+peakC.y); + AH1 hitMaxB=(peakC.x-max(mx4B,eB))*ARcpH1(AH1_(4.0)*mn4B+peakC.y); AH1 lobeR=max(-hitMinR,hitMaxR); AH1 lobeG=max(-hitMinG,hitMaxG); AH1 lobeB=max(-hitMinB,hitMaxB); @@ -963,12 +963,12 @@ AF1 sharpness){ // Immediate constants for peak range. AH2 peakC=AH2(1.0,-1.0*4.0); // Limiters, these need to be high precision RCPs. - AH2 hitMinR=mn4R*ARcpH2(AH2_(4.0)*mx4R); - AH2 hitMinG=mn4G*ARcpH2(AH2_(4.0)*mx4G); - AH2 hitMinB=mn4B*ARcpH2(AH2_(4.0)*mx4B); - AH2 hitMaxR=(peakC.x-mx4R)*ARcpH2(AH2_(4.0)*mn4R+peakC.y); - AH2 hitMaxG=(peakC.x-mx4G)*ARcpH2(AH2_(4.0)*mn4G+peakC.y); - AH2 hitMaxB=(peakC.x-mx4B)*ARcpH2(AH2_(4.0)*mn4B+peakC.y); + AH2 hitMinR=min(mn4R,eR)*ARcpH2(AH2_(4.0)*mx4R); + AH2 hitMinG=min(mn4G,eG)*ARcpH2(AH2_(4.0)*mx4G); + AH2 hitMinB=min(mn4B,eB)*ARcpH2(AH2_(4.0)*mx4B); + AH2 hitMaxR=(peakC.x-max(mx4R,eR))*ARcpH2(AH2_(4.0)*mn4R+peakC.y); + AH2 hitMaxG=(peakC.x-max(mx4G,eG))*ARcpH2(AH2_(4.0)*mn4G+peakC.y); + AH2 hitMaxB=(peakC.x-max(mx4B,eB))*ARcpH2(AH2_(4.0)*mn4B+peakC.y); AH2 lobeR=max(-hitMinR,hitMaxR); AH2 lobeG=max(-hitMinG,hitMaxG); AH2 lobeB=max(-hitMinB,hitMaxB); diff --git a/src/common/settings.h b/src/common/settings.h index d01c0448c..9bee6e10f 100644 --- a/src/common/settings.h +++ b/src/common/settings.h @@ -554,6 +554,7 @@ struct Values { Setting<bool> use_docked_mode{true, "use_docked_mode"}; BasicSetting<bool> enable_raw_input{false, "enable_raw_input"}; + BasicSetting<bool> controller_navigation{true, "controller_navigation"}; Setting<bool> vibration_enabled{true, "vibration_enabled"}; Setting<bool> enable_accurate_vibrations{false, "enable_accurate_vibrations"}; diff --git a/src/common/x64/xbyak_abi.h b/src/common/x64/xbyak_abi.h index 0ddf9b83e..87b3d63a4 100644 --- a/src/common/x64/xbyak_abi.h +++ b/src/common/x64/xbyak_abi.h @@ -37,12 +37,12 @@ constexpr Xbyak::Reg IndexToReg(size_t reg_index) { } } -inline std::bitset<32> BuildRegSet(std::initializer_list<Xbyak::Reg> regs) { - std::bitset<32> bits; +constexpr std::bitset<32> BuildRegSet(std::initializer_list<Xbyak::Reg> regs) { + size_t bits = 0; for (const Xbyak::Reg& reg : regs) { - bits[RegToIndex(reg)] = true; + bits |= size_t{1} << RegToIndex(reg); } - return bits; + return {bits}; } constexpr inline std::bitset<32> ABI_ALL_GPRS(0x0000FFFF); @@ -57,7 +57,7 @@ constexpr inline Xbyak::Reg ABI_PARAM2 = Xbyak::util::rdx; constexpr inline Xbyak::Reg ABI_PARAM3 = Xbyak::util::r8; constexpr inline Xbyak::Reg ABI_PARAM4 = Xbyak::util::r9; -const std::bitset<32> ABI_ALL_CALLER_SAVED = BuildRegSet({ +constexpr inline std::bitset<32> ABI_ALL_CALLER_SAVED = BuildRegSet({ // GPRs Xbyak::util::rcx, Xbyak::util::rdx, @@ -74,7 +74,7 @@ const std::bitset<32> ABI_ALL_CALLER_SAVED = BuildRegSet({ Xbyak::util::xmm5, }); -const std::bitset<32> ABI_ALL_CALLEE_SAVED = BuildRegSet({ +constexpr inline std::bitset<32> ABI_ALL_CALLEE_SAVED = BuildRegSet({ // GPRs Xbyak::util::rbx, Xbyak::util::rsi, @@ -108,7 +108,7 @@ constexpr inline Xbyak::Reg ABI_PARAM2 = Xbyak::util::rsi; constexpr inline Xbyak::Reg ABI_PARAM3 = Xbyak::util::rdx; constexpr inline Xbyak::Reg ABI_PARAM4 = Xbyak::util::rcx; -const std::bitset<32> ABI_ALL_CALLER_SAVED = BuildRegSet({ +constexpr inline std::bitset<32> ABI_ALL_CALLER_SAVED = BuildRegSet({ // GPRs Xbyak::util::rcx, Xbyak::util::rdx, @@ -137,7 +137,7 @@ const std::bitset<32> ABI_ALL_CALLER_SAVED = BuildRegSet({ Xbyak::util::xmm15, }); -const std::bitset<32> ABI_ALL_CALLEE_SAVED = BuildRegSet({ +constexpr inline std::bitset<32> ABI_ALL_CALLEE_SAVED = BuildRegSet({ // GPRs Xbyak::util::rbx, Xbyak::util::rbp, diff --git a/src/core/hid/emulated_controller.cpp b/src/core/hid/emulated_controller.cpp index d12037b11..a7cdf45e6 100644 --- a/src/core/hid/emulated_controller.cpp +++ b/src/core/hid/emulated_controller.cpp @@ -749,6 +749,7 @@ void EmulatedController::SetMotion(const Common::Input::CallbackStatus& callback raw_status.gyro.y.value, raw_status.gyro.z.value, }); + emulated.SetGyroThreshold(raw_status.gyro.x.properties.threshold); emulated.UpdateRotation(raw_status.delta_timestamp); emulated.UpdateOrientation(raw_status.delta_timestamp); force_update_motion = raw_status.force_update; diff --git a/src/core/hid/motion_input.cpp b/src/core/hid/motion_input.cpp index 6e126be19..05042fd99 100644 --- a/src/core/hid/motion_input.cpp +++ b/src/core/hid/motion_input.cpp @@ -10,7 +10,7 @@ namespace Core::HID { MotionInput::MotionInput() { // Initialize PID constants with default values SetPID(0.3f, 0.005f, 0.0f); - SetGyroThreshold(0.00005f); + SetGyroThreshold(0.007f); } void MotionInput::SetPID(f32 new_kp, f32 new_ki, f32 new_kd) { @@ -31,7 +31,7 @@ void MotionInput::SetGyroscope(const Common::Vec3f& gyroscope) { gyro_bias = (gyro_bias * 0.9999f) + (gyroscope * 0.0001f); } - if (gyro.Length2() < gyro_threshold) { + if (gyro.Length() < gyro_threshold) { gyro = {}; } else { only_accelerometer = false; diff --git a/src/core/hle/kernel/k_affinity_mask.h b/src/core/hle/kernel/k_affinity_mask.h index b906895fc..cf704ce87 100644 --- a/src/core/hle/kernel/k_affinity_mask.h +++ b/src/core/hle/kernel/k_affinity_mask.h @@ -31,8 +31,6 @@ public: } constexpr void SetAffinity(s32 core, bool set) { - ASSERT(0 <= core && core < static_cast<s32>(Core::Hardware::NUM_CPU_CORES)); - if (set) { this->mask |= GetCoreBit(core); } else { diff --git a/src/core/hle/kernel/k_page_table.cpp b/src/core/hle/kernel/k_page_table.cpp index b650ea31d..2ebbc0819 100644 --- a/src/core/hle/kernel/k_page_table.cpp +++ b/src/core/hle/kernel/k_page_table.cpp @@ -276,22 +276,23 @@ ResultCode KPageTable::InitializeForProcess(FileSys::ProgramAddressSpaceType as_ ResultCode KPageTable::MapProcessCode(VAddr addr, std::size_t num_pages, KMemoryState state, KMemoryPermission perm) { - std::lock_guard lock{page_table_lock}; - const u64 size{num_pages * PageSize}; - if (!CanContain(addr, size, state)) { - return ResultInvalidCurrentMemory; - } + // Validate the mapping request. + R_UNLESS(this->CanContain(addr, size, state), ResultInvalidCurrentMemory); - if (IsRegionMapped(addr, size)) { - return ResultInvalidCurrentMemory; - } + // Lock the table. + std::lock_guard lock{page_table_lock}; + + // Verify that the destination memory is unmapped. + R_TRY(this->CheckMemoryState(addr, size, KMemoryState::All, KMemoryState::Free, + KMemoryPermission::None, KMemoryPermission::None, + KMemoryAttribute::None, KMemoryAttribute::None)); KPageLinkedList page_linked_list; - CASCADE_CODE(system.Kernel().MemoryManager().Allocate(page_linked_list, num_pages, memory_pool, - allocation_option)); - CASCADE_CODE(Operate(addr, num_pages, page_linked_list, OperationType::MapGroup)); + R_TRY(system.Kernel().MemoryManager().Allocate(page_linked_list, num_pages, memory_pool, + allocation_option)); + R_TRY(Operate(addr, num_pages, page_linked_list, OperationType::MapGroup)); block_manager->Update(addr, num_pages, state, perm); @@ -395,39 +396,12 @@ ResultCode KPageTable::UnmapProcessMemory(VAddr dst_addr, std::size_t size, return ResultSuccess; } -void KPageTable::MapPhysicalMemory(KPageLinkedList& page_linked_list, VAddr start, VAddr end) { - auto node{page_linked_list.Nodes().begin()}; - PAddr map_addr{node->GetAddress()}; - std::size_t src_num_pages{node->GetNumPages()}; - - block_manager->IterateForRange(start, end, [&](const KMemoryInfo& info) { - if (info.state != KMemoryState::Free) { - return; - } - - std::size_t dst_num_pages{GetSizeInRange(info, start, end) / PageSize}; - VAddr dst_addr{GetAddressInRange(info, start)}; - - while (dst_num_pages) { - if (!src_num_pages) { - node = std::next(node); - map_addr = node->GetAddress(); - src_num_pages = node->GetNumPages(); - } - - const std::size_t num_pages{std::min(src_num_pages, dst_num_pages)}; - Operate(dst_addr, num_pages, KMemoryPermission::UserReadWrite, OperationType::Map, - map_addr); - - dst_addr += num_pages * PageSize; - map_addr += num_pages * PageSize; - src_num_pages -= num_pages; - dst_num_pages -= num_pages; - } - }); -} ResultCode KPageTable::MapPhysicalMemory(VAddr addr, std::size_t size) { + // Lock the physical memory lock. + std::lock_guard phys_lk(map_physical_memory_lock); + + // Lock the table. std::lock_guard lock{page_table_lock}; std::size_t mapped_size{}; @@ -463,7 +437,35 @@ ResultCode KPageTable::MapPhysicalMemory(VAddr addr, std::size_t size) { // We succeeded, so commit the memory reservation. memory_reservation.Commit(); - MapPhysicalMemory(page_linked_list, addr, end_addr); + // Map the memory. + auto node{page_linked_list.Nodes().begin()}; + PAddr map_addr{node->GetAddress()}; + std::size_t src_num_pages{node->GetNumPages()}; + block_manager->IterateForRange(addr, end_addr, [&](const KMemoryInfo& info) { + if (info.state != KMemoryState::Free) { + return; + } + + std::size_t dst_num_pages{GetSizeInRange(info, addr, end_addr) / PageSize}; + VAddr dst_addr{GetAddressInRange(info, addr)}; + + while (dst_num_pages) { + if (!src_num_pages) { + node = std::next(node); + map_addr = node->GetAddress(); + src_num_pages = node->GetNumPages(); + } + + const std::size_t num_pages{std::min(src_num_pages, dst_num_pages)}; + Operate(dst_addr, num_pages, KMemoryPermission::UserReadWrite, OperationType::Map, + map_addr); + + dst_addr += num_pages * PageSize; + map_addr += num_pages * PageSize; + src_num_pages -= num_pages; + dst_num_pages -= num_pages; + } + }); mapped_physical_memory_size += remaining_size; @@ -503,23 +505,8 @@ ResultCode KPageTable::UnmapPhysicalMemory(VAddr addr, std::size_t size) { return ResultSuccess; } - CASCADE_CODE(UnmapMemory(addr, size)); - - auto process{system.Kernel().CurrentProcess()}; - process->GetResourceLimit()->Release(LimitableResource::PhysicalMemory, mapped_size); - mapped_physical_memory_size -= mapped_size; - - return ResultSuccess; -} - -ResultCode KPageTable::UnmapMemory(VAddr addr, std::size_t size) { - std::lock_guard lock{page_table_lock}; - - const VAddr end_addr{addr + size}; - ResultCode result{ResultSuccess}; - KPageLinkedList page_linked_list; - // Unmap each region within the range + KPageLinkedList page_linked_list; block_manager->IterateForRange(addr, end_addr, [&](const KMemoryInfo& info) { if (info.state == KMemoryState::Normal) { const std::size_t block_size{GetSizeInRange(info, addr, end_addr)}; @@ -535,7 +522,6 @@ ResultCode KPageTable::UnmapMemory(VAddr addr, std::size_t size) { } } }); - if (result.IsError()) { return result; } @@ -546,10 +532,14 @@ ResultCode KPageTable::UnmapMemory(VAddr addr, std::size_t size) { block_manager->Update(addr, num_pages, KMemoryState::Free); + auto process{system.Kernel().CurrentProcess()}; + process->GetResourceLimit()->Release(LimitableResource::PhysicalMemory, mapped_size); + mapped_physical_memory_size -= mapped_size; + return ResultSuccess; } -ResultCode KPageTable::Map(VAddr dst_addr, VAddr src_addr, std::size_t size) { +ResultCode KPageTable::MapMemory(VAddr dst_addr, VAddr src_addr, std::size_t size) { std::lock_guard lock{page_table_lock}; KMemoryState src_state{}; @@ -588,7 +578,7 @@ ResultCode KPageTable::Map(VAddr dst_addr, VAddr src_addr, std::size_t size) { return ResultSuccess; } -ResultCode KPageTable::Unmap(VAddr dst_addr, VAddr src_addr, std::size_t size) { +ResultCode KPageTable::UnmapMemory(VAddr dst_addr, VAddr src_addr, std::size_t size) { std::lock_guard lock{page_table_lock}; KMemoryState src_state{}; @@ -652,24 +642,26 @@ ResultCode KPageTable::MapPages(VAddr addr, const KPageLinkedList& page_linked_l return ResultSuccess; } -ResultCode KPageTable::MapPages(VAddr addr, KPageLinkedList& page_linked_list, KMemoryState state, - KMemoryPermission perm) { - std::lock_guard lock{page_table_lock}; - +ResultCode KPageTable::MapPages(VAddr address, KPageLinkedList& page_linked_list, + KMemoryState state, KMemoryPermission perm) { + // Check that the map is in range. const std::size_t num_pages{page_linked_list.GetNumPages()}; const std::size_t size{num_pages * PageSize}; + R_UNLESS(this->CanContain(address, size, state), ResultInvalidCurrentMemory); - if (!CanContain(addr, size, state)) { - return ResultInvalidCurrentMemory; - } + // Lock the table. + std::lock_guard lock{page_table_lock}; - if (IsRegionMapped(addr, num_pages * PageSize)) { - return ResultInvalidCurrentMemory; - } + // Check the memory state. + R_TRY(this->CheckMemoryState(address, size, KMemoryState::All, KMemoryState::Free, + KMemoryPermission::None, KMemoryPermission::None, + KMemoryAttribute::None, KMemoryAttribute::None)); - CASCADE_CODE(MapPages(addr, page_linked_list, perm)); + // Map the pages. + R_TRY(MapPages(address, page_linked_list, perm)); - block_manager->Update(addr, num_pages, state, perm); + // Update the blocks. + block_manager->Update(address, num_pages, state, perm); return ResultSuccess; } @@ -693,21 +685,23 @@ ResultCode KPageTable::UnmapPages(VAddr addr, const KPageLinkedList& page_linked ResultCode KPageTable::UnmapPages(VAddr addr, KPageLinkedList& page_linked_list, KMemoryState state) { - std::lock_guard lock{page_table_lock}; - + // Check that the unmap is in range. const std::size_t num_pages{page_linked_list.GetNumPages()}; const std::size_t size{num_pages * PageSize}; + R_UNLESS(this->Contains(addr, size), ResultInvalidCurrentMemory); - if (!CanContain(addr, size, state)) { - return ResultInvalidCurrentMemory; - } + // Lock the table. + std::lock_guard lock{page_table_lock}; - if (IsRegionMapped(addr, num_pages * PageSize)) { - return ResultInvalidCurrentMemory; - } + // Check the memory state. + R_TRY(this->CheckMemoryState(addr, size, KMemoryState::All, state, KMemoryPermission::None, + KMemoryPermission::None, KMemoryAttribute::All, + KMemoryAttribute::None)); - CASCADE_CODE(UnmapPages(addr, page_linked_list)); + // Perform the unmap. + R_TRY(UnmapPages(addr, page_linked_list)); + // Update the blocks. block_manager->Update(addr, num_pages, state, KMemoryPermission::None); return ResultSuccess; @@ -765,7 +759,6 @@ ResultCode KPageTable::SetProcessMemoryPermission(VAddr addr, std::size_t size, // Ensure cache coherency, if we're setting pages as executable. if (is_x) { - // Memory execution state is changing, invalidate CPU cache range system.InvalidateCpuInstructionCacheRange(addr, size); } @@ -793,12 +786,12 @@ ResultCode KPageTable::ReserveTransferMemory(VAddr addr, std::size_t size, KMemo KMemoryState state{}; KMemoryAttribute attribute{}; - CASCADE_CODE(CheckMemoryState( - &state, nullptr, &attribute, nullptr, addr, size, - KMemoryState::FlagCanTransfer | KMemoryState::FlagReferenceCounted, - KMemoryState::FlagCanTransfer | KMemoryState::FlagReferenceCounted, KMemoryPermission::All, - KMemoryPermission::UserReadWrite, KMemoryAttribute::Mask, KMemoryAttribute::None, - KMemoryAttribute::IpcAndDeviceMapped)); + R_TRY(CheckMemoryState(&state, nullptr, &attribute, nullptr, addr, size, + KMemoryState::FlagCanTransfer | KMemoryState::FlagReferenceCounted, + KMemoryState::FlagCanTransfer | KMemoryState::FlagReferenceCounted, + KMemoryPermission::All, KMemoryPermission::UserReadWrite, + KMemoryAttribute::Mask, KMemoryAttribute::None, + KMemoryAttribute::IpcAndDeviceMapped)); block_manager->Update(addr, size / PageSize, state, perm, attribute | KMemoryAttribute::Locked); @@ -810,12 +803,11 @@ ResultCode KPageTable::ResetTransferMemory(VAddr addr, std::size_t size) { KMemoryState state{}; - CASCADE_CODE( - CheckMemoryState(&state, nullptr, nullptr, nullptr, addr, size, - KMemoryState::FlagCanTransfer | KMemoryState::FlagReferenceCounted, - KMemoryState::FlagCanTransfer | KMemoryState::FlagReferenceCounted, - KMemoryPermission::None, KMemoryPermission::None, KMemoryAttribute::Mask, - KMemoryAttribute::Locked, KMemoryAttribute::IpcAndDeviceMapped)); + R_TRY(CheckMemoryState(&state, nullptr, nullptr, nullptr, addr, size, + KMemoryState::FlagCanTransfer | KMemoryState::FlagReferenceCounted, + KMemoryState::FlagCanTransfer | KMemoryState::FlagReferenceCounted, + KMemoryPermission::None, KMemoryPermission::None, KMemoryAttribute::Mask, + KMemoryAttribute::Locked, KMemoryAttribute::IpcAndDeviceMapped)); block_manager->Update(addr, size / PageSize, state, KMemoryPermission::UserReadWrite); return ResultSuccess; @@ -871,8 +863,9 @@ ResultCode KPageTable::SetMemoryAttribute(VAddr addr, std::size_t size, u32 mask AttributeTestMask, KMemoryAttribute::None, ~AttributeTestMask)); // Determine the new attribute. - const auto new_attr = ((old_attr & static_cast<KMemoryAttribute>(~mask)) | - static_cast<KMemoryAttribute>(attr & mask)); + const KMemoryAttribute new_attr = + static_cast<KMemoryAttribute>(((old_attr & static_cast<KMemoryAttribute>(~mask)) | + static_cast<KMemoryAttribute>(attr & mask))); // Perform operation. this->Operate(addr, num_pages, old_perm, OperationType::ChangePermissionsAndRefresh); @@ -896,6 +889,9 @@ ResultCode KPageTable::SetMaxHeapSize(std::size_t size) { } ResultCode KPageTable::SetHeapSize(VAddr* out, std::size_t size) { + // Lock the physical memory lock. + std::lock_guard phys_lk(map_physical_memory_lock); + // Try to perform a reduction in heap, instead of an extension. VAddr cur_address{}; std::size_t allocation_size{}; @@ -1025,12 +1021,12 @@ ResultVal<VAddr> KPageTable::AllocateAndMapMemory(std::size_t needed_num_pages, } if (is_map_only) { - CASCADE_CODE(Operate(addr, needed_num_pages, perm, OperationType::Map, map_addr)); + R_TRY(Operate(addr, needed_num_pages, perm, OperationType::Map, map_addr)); } else { KPageLinkedList page_group; - CASCADE_CODE(system.Kernel().MemoryManager().Allocate(page_group, needed_num_pages, - memory_pool, allocation_option)); - CASCADE_CODE(Operate(addr, needed_num_pages, page_group, OperationType::MapGroup)); + R_TRY(system.Kernel().MemoryManager().Allocate(page_group, needed_num_pages, memory_pool, + allocation_option)); + R_TRY(Operate(addr, needed_num_pages, page_group, OperationType::MapGroup)); } block_manager->Update(addr, needed_num_pages, state, perm); @@ -1186,7 +1182,7 @@ VAddr KPageTable::AllocateVirtualMemory(VAddr start, std::size_t region_num_page ResultCode KPageTable::Operate(VAddr addr, std::size_t num_pages, const KPageLinkedList& page_group, OperationType operation) { - std::lock_guard lock{page_table_lock}; + ASSERT(this->IsLockedByCurrentThread()); ASSERT(Common::IsAligned(addr, PageSize)); ASSERT(num_pages > 0); @@ -1211,7 +1207,7 @@ ResultCode KPageTable::Operate(VAddr addr, std::size_t num_pages, const KPageLin ResultCode KPageTable::Operate(VAddr addr, std::size_t num_pages, KMemoryPermission perm, OperationType operation, PAddr map_addr) { - std::lock_guard lock{page_table_lock}; + ASSERT(this->IsLockedByCurrentThread()); ASSERT(num_pages > 0); ASSERT(Common::IsAligned(addr, PageSize)); diff --git a/src/core/hle/kernel/k_page_table.h b/src/core/hle/kernel/k_page_table.h index f67986e91..60ae9b9e8 100644 --- a/src/core/hle/kernel/k_page_table.h +++ b/src/core/hle/kernel/k_page_table.h @@ -37,9 +37,8 @@ public: VAddr src_addr); ResultCode MapPhysicalMemory(VAddr addr, std::size_t size); ResultCode UnmapPhysicalMemory(VAddr addr, std::size_t size); - ResultCode UnmapMemory(VAddr addr, std::size_t size); - ResultCode Map(VAddr dst_addr, VAddr src_addr, std::size_t size); - ResultCode Unmap(VAddr dst_addr, VAddr src_addr, std::size_t size); + ResultCode MapMemory(VAddr dst_addr, VAddr src_addr, std::size_t size); + ResultCode UnmapMemory(VAddr dst_addr, VAddr src_addr, std::size_t size); ResultCode MapPages(VAddr addr, KPageLinkedList& page_linked_list, KMemoryState state, KMemoryPermission perm); ResultCode UnmapPages(VAddr addr, KPageLinkedList& page_linked_list, KMemoryState state); @@ -88,7 +87,6 @@ private: ResultCode MapPages(VAddr addr, const KPageLinkedList& page_linked_list, KMemoryPermission perm); ResultCode UnmapPages(VAddr addr, const KPageLinkedList& page_linked_list); - void MapPhysicalMemory(KPageLinkedList& page_linked_list, VAddr start, VAddr end); bool IsRegionMapped(VAddr address, u64 size); bool IsRegionContiguous(VAddr addr, u64 size) const; void AddRegionToPages(VAddr start, std::size_t num_pages, KPageLinkedList& page_linked_list); @@ -148,6 +146,7 @@ private: } std::recursive_mutex page_table_lock; + std::mutex map_physical_memory_lock; std::unique_ptr<KMemoryBlockManager> block_manager; public: @@ -249,7 +248,9 @@ public: return !IsOutsideASLRRegion(address, size); } constexpr PAddr GetPhysicalAddr(VAddr addr) { - return page_table_impl.backing_addr[addr >> PageBits] + addr; + const auto backing_addr = page_table_impl.backing_addr[addr >> PageBits]; + ASSERT(backing_addr); + return backing_addr + addr; } constexpr bool Contains(VAddr addr) const { return address_space_start <= addr && addr <= address_space_end - 1; diff --git a/src/core/hle/kernel/k_priority_queue.h b/src/core/hle/kernel/k_priority_queue.h index 0b894c8cf..bd779739d 100644 --- a/src/core/hle/kernel/k_priority_queue.h +++ b/src/core/hle/kernel/k_priority_queue.h @@ -258,7 +258,7 @@ private: private: constexpr void ClearAffinityBit(u64& affinity, s32 core) { - affinity &= ~(u64(1) << core); + affinity &= ~(UINT64_C(1) << core); } constexpr s32 GetNextCore(u64& affinity) { diff --git a/src/core/hle/kernel/k_process.cpp b/src/core/hle/kernel/k_process.cpp index 265ac6fa1..85c506979 100644 --- a/src/core/hle/kernel/k_process.cpp +++ b/src/core/hle/kernel/k_process.cpp @@ -146,6 +146,13 @@ ResultCode KProcess::Initialize(KProcess* process, Core::System& system, std::st // Open a reference to the resource limit. process->resource_limit->Open(); + // Clear remaining fields. + process->num_running_threads = 0; + process->is_signaled = false; + process->exception_thread = nullptr; + process->is_suspended = false; + process->schedule_count = 0; + return ResultSuccess; } @@ -157,20 +164,17 @@ KResourceLimit* KProcess::GetResourceLimit() const { return resource_limit; } -void KProcess::IncrementThreadCount() { - ASSERT(num_threads >= 0); - num_created_threads++; - - if (const auto count = ++num_threads; count > peak_num_threads) { - peak_num_threads = count; - } +void KProcess::IncrementRunningThreadCount() { + ASSERT(num_running_threads.load() >= 0); + ++num_running_threads; } -void KProcess::DecrementThreadCount() { - ASSERT(num_threads > 0); +void KProcess::DecrementRunningThreadCount() { + ASSERT(num_running_threads.load() > 0); - if (const auto count = --num_threads; count == 0) { - LOG_WARNING(Kernel, "Process termination is not fully implemented."); + if (const auto prev = num_running_threads--; prev == 1) { + // TODO(bunnei): Process termination to be implemented when multiprocess is supported. + UNIMPLEMENTED_MSG("KProcess termination is not implemennted!"); } } diff --git a/src/core/hle/kernel/k_process.h b/src/core/hle/kernel/k_process.h index c2a672021..38b446350 100644 --- a/src/core/hle/kernel/k_process.h +++ b/src/core/hle/kernel/k_process.h @@ -235,8 +235,8 @@ public: ++schedule_count; } - void IncrementThreadCount(); - void DecrementThreadCount(); + void IncrementRunningThreadCount(); + void DecrementRunningThreadCount(); void SetRunningThread(s32 core, KThread* thread, u64 idle_count) { running_threads[core] = thread; @@ -473,9 +473,7 @@ private: bool is_suspended{}; bool is_initialized{}; - std::atomic<s32> num_created_threads{}; - std::atomic<u16> num_threads{}; - u16 peak_num_threads{}; + std::atomic<u16> num_running_threads{}; std::array<KThread*, Core::Hardware::NUM_CPU_CORES> running_threads{}; std::array<u64, Core::Hardware::NUM_CPU_CORES> running_thread_idle_counts{}; diff --git a/src/core/hle/kernel/k_scheduler.cpp b/src/core/hle/kernel/k_scheduler.cpp index b32d4f285..c96520828 100644 --- a/src/core/hle/kernel/k_scheduler.cpp +++ b/src/core/hle/kernel/k_scheduler.cpp @@ -710,23 +710,19 @@ void KScheduler::Unload(KThread* thread) { } void KScheduler::Reload(KThread* thread) { - LOG_TRACE(Kernel, "core {}, reload thread {}", core_id, thread ? thread->GetName() : "nullptr"); + LOG_TRACE(Kernel, "core {}, reload thread {}", core_id, thread->GetName()); - if (thread) { - ASSERT_MSG(thread->GetState() == ThreadState::Runnable, "Thread must be runnable."); - - Core::ARM_Interface& cpu_core = system.ArmInterface(core_id); - cpu_core.LoadContext(thread->GetContext32()); - cpu_core.LoadContext(thread->GetContext64()); - cpu_core.SetTlsAddress(thread->GetTLSAddress()); - cpu_core.SetTPIDR_EL0(thread->GetTPIDR_EL0()); - cpu_core.ClearExclusiveState(); - } + Core::ARM_Interface& cpu_core = system.ArmInterface(core_id); + cpu_core.LoadContext(thread->GetContext32()); + cpu_core.LoadContext(thread->GetContext64()); + cpu_core.SetTlsAddress(thread->GetTLSAddress()); + cpu_core.SetTPIDR_EL0(thread->GetTPIDR_EL0()); + cpu_core.ClearExclusiveState(); } void KScheduler::SwitchContextStep2() { // Load context of new thread - Reload(current_thread.load()); + Reload(GetCurrentThread()); RescheduleCurrentCore(); } @@ -735,13 +731,17 @@ void KScheduler::ScheduleImpl() { KThread* previous_thread = GetCurrentThread(); KThread* next_thread = state.highest_priority_thread; - state.needs_scheduling = false; + state.needs_scheduling.store(false); // We never want to schedule a null thread, so use the idle thread if we don't have a next. if (next_thread == nullptr) { next_thread = idle_thread; } + if (next_thread->GetCurrentCore() != core_id) { + next_thread->SetCurrentCore(core_id); + } + // We never want to schedule a dummy thread, as these are only used by host threads for locking. if (next_thread->GetThreadType() == ThreadType::Dummy) { ASSERT_MSG(false, "Dummy threads should never be scheduled!"); @@ -755,14 +755,8 @@ void KScheduler::ScheduleImpl() { return; } - if (next_thread->GetCurrentCore() != core_id) { - next_thread->SetCurrentCore(core_id); - } - - current_thread.store(next_thread); - + // Update the CPU time tracking variables. KProcess* const previous_process = system.Kernel().CurrentProcess(); - UpdateLastContextSwitchTime(previous_thread, previous_process); // Save context for previous thread @@ -770,6 +764,10 @@ void KScheduler::ScheduleImpl() { std::shared_ptr<Common::Fiber>* old_context; old_context = &previous_thread->GetHostContext(); + + // Set the new thread. + current_thread.store(next_thread); + guard.Unlock(); Common::Fiber::YieldTo(*old_context, *switch_fiber); @@ -797,8 +795,8 @@ void KScheduler::SwitchToCurrent() { do { auto next_thread = current_thread.load(); if (next_thread != nullptr) { - next_thread->context_guard.Lock(); - if (next_thread->GetRawState() != ThreadState::Runnable) { + const auto locked = next_thread->context_guard.TryLock(); + if (state.needs_scheduling.load()) { next_thread->context_guard.Unlock(); break; } @@ -806,6 +804,9 @@ void KScheduler::SwitchToCurrent() { next_thread->context_guard.Unlock(); break; } + if (!locked) { + continue; + } } auto thread = next_thread ? next_thread : idle_thread; Common::Fiber::YieldTo(switch_fiber, *thread->GetHostContext()); diff --git a/src/core/hle/kernel/k_thread.cpp b/src/core/hle/kernel/k_thread.cpp index f42abb8a1..de3ffe0c7 100644 --- a/src/core/hle/kernel/k_thread.cpp +++ b/src/core/hle/kernel/k_thread.cpp @@ -215,7 +215,6 @@ ResultCode KThread::Initialize(KThreadFunction func, uintptr_t arg, VAddr user_s parent = owner; parent->Open(); - parent->IncrementThreadCount(); } // Initialize thread context. @@ -327,11 +326,6 @@ void KThread::Finalize() { } } - // Decrement the parent process's thread count. - if (parent != nullptr) { - parent->DecrementThreadCount(); - } - // Perform inherited finalization. KSynchronizationObject::Finalize(); } @@ -1011,7 +1005,7 @@ ResultCode KThread::Run() { if (IsUserThread() && IsSuspended()) { this->UpdateState(); } - owner->IncrementThreadCount(); + owner->IncrementRunningThreadCount(); } // Set our state and finish. @@ -1026,10 +1020,11 @@ ResultCode KThread::Run() { void KThread::Exit() { ASSERT(this == GetCurrentThreadPointer(kernel)); - // Release the thread resource hint from parent. + // Release the thread resource hint, running thread count from parent. if (parent != nullptr) { parent->GetResourceLimit()->Release(Kernel::LimitableResource::Threads, 0, 1); resource_limit_release_hint = true; + parent->DecrementRunningThreadCount(); } // Perform termination. diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp index c7f5140f4..40bb893ac 100644 --- a/src/core/hle/kernel/svc.cpp +++ b/src/core/hle/kernel/svc.cpp @@ -230,7 +230,7 @@ static ResultCode MapMemory(Core::System& system, VAddr dst_addr, VAddr src_addr return result; } - return page_table.Map(dst_addr, src_addr, size); + return page_table.MapMemory(dst_addr, src_addr, size); } static ResultCode MapMemory32(Core::System& system, u32 dst_addr, u32 src_addr, u32 size) { @@ -249,7 +249,7 @@ static ResultCode UnmapMemory(Core::System& system, VAddr dst_addr, VAddr src_ad return result; } - return page_table.Unmap(dst_addr, src_addr, size); + return page_table.UnmapMemory(dst_addr, src_addr, size); } static ResultCode UnmapMemory32(Core::System& system, u32 dst_addr, u32 src_addr, u32 size) { diff --git a/src/input_common/drivers/sdl_driver.cpp b/src/input_common/drivers/sdl_driver.cpp index ed6281772..577bf5c31 100644 --- a/src/input_common/drivers/sdl_driver.cpp +++ b/src/input_common/drivers/sdl_driver.cpp @@ -109,8 +109,9 @@ public: bool HasHDRumble() const { if (sdl_controller) { - return (SDL_GameControllerGetType(sdl_controller.get()) == - SDL_CONTROLLER_TYPE_NINTENDO_SWITCH_PRO); + const auto type = SDL_GameControllerGetType(sdl_controller.get()); + return (type == SDL_CONTROLLER_TYPE_NINTENDO_SWITCH_PRO) || + (type == SDL_CONTROLLER_TYPE_PS5); } return false; } diff --git a/src/input_common/drivers/udp_client.cpp b/src/input_common/drivers/udp_client.cpp index 9aaeb91be..d1cdb1ab2 100644 --- a/src/input_common/drivers/udp_client.cpp +++ b/src/input_common/drivers/udp_client.cpp @@ -339,7 +339,7 @@ void UDPClient::StartCommunication(std::size_t client, const std::string& host, } } -const PadIdentifier UDPClient::GetPadIdentifier(std::size_t pad_index) const { +PadIdentifier UDPClient::GetPadIdentifier(std::size_t pad_index) const { const std::size_t client = pad_index / PADS_PER_CLIENT; return { .guid = clients[client].uuid, @@ -348,9 +348,9 @@ const PadIdentifier UDPClient::GetPadIdentifier(std::size_t pad_index) const { }; } -const Common::UUID UDPClient::GetHostUUID(const std::string host) const { - const auto ip = boost::asio::ip::address_v4::from_string(host); - const auto hex_host = fmt::format("{:06x}", ip.to_ulong()); +Common::UUID UDPClient::GetHostUUID(const std::string& host) const { + const auto ip = boost::asio::ip::make_address_v4(host); + const auto hex_host = fmt::format("{:06x}", ip.to_uint()); return Common::UUID{hex_host}; } diff --git a/src/input_common/drivers/udp_client.h b/src/input_common/drivers/udp_client.h index 61a1fff37..30d7c2682 100644 --- a/src/input_common/drivers/udp_client.h +++ b/src/input_common/drivers/udp_client.h @@ -145,8 +145,8 @@ private: void OnPortInfo(Response::PortInfo); void OnPadData(Response::PadData, std::size_t client); void StartCommunication(std::size_t client, const std::string& host, u16 port); - const PadIdentifier GetPadIdentifier(std::size_t pad_index) const; - const Common::UUID GetHostUUID(const std::string host) const; + PadIdentifier GetPadIdentifier(std::size_t pad_index) const; + Common::UUID GetHostUUID(const std::string& host) const; Common::Input::ButtonNames GetUIButtonName(const Common::ParamPackage& params) const; diff --git a/src/input_common/input_engine.h b/src/input_common/input_engine.h index 390581c94..fe2faee5a 100644 --- a/src/input_common/input_engine.h +++ b/src/input_common/input_engine.h @@ -16,7 +16,7 @@ // Pad Identifier of data source struct PadIdentifier { - Common::UUID guid{}; + Common::UUID guid{Common::INVALID_UUID}; std::size_t port{}; std::size_t pad{}; @@ -89,7 +89,7 @@ struct UpdateCallback { // Triggered if data changed on the controller and the engine is on configuring mode struct MappingCallback { - std::function<void(MappingData)> on_data; + std::function<void(const MappingData&)> on_data; }; // Input Identifier of data source diff --git a/src/input_common/input_mapping.cpp b/src/input_common/input_mapping.cpp index 475257f42..a7a6ad8c2 100644 --- a/src/input_common/input_mapping.cpp +++ b/src/input_common/input_mapping.cpp @@ -2,14 +2,13 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included -#include "common/common_types.h" #include "common/settings.h" #include "input_common/input_engine.h" #include "input_common/input_mapping.h" namespace InputCommon { -MappingFactory::MappingFactory() {} +MappingFactory::MappingFactory() = default; void MappingFactory::BeginMapping(Polling::InputType type) { is_enabled = true; @@ -19,7 +18,7 @@ void MappingFactory::BeginMapping(Polling::InputType type) { second_axis = -1; } -[[nodiscard]] const Common::ParamPackage MappingFactory::GetNextInput() { +Common::ParamPackage MappingFactory::GetNextInput() { Common::ParamPackage input; input_queue.Pop(input); return input; @@ -57,7 +56,7 @@ void MappingFactory::StopMapping() { void MappingFactory::RegisterButton(const MappingData& data) { Common::ParamPackage new_input; new_input.Set("engine", data.engine); - if (data.pad.guid != Common::UUID{}) { + if (data.pad.guid.IsValid()) { new_input.Set("guid", data.pad.guid.Format()); } new_input.Set("port", static_cast<int>(data.pad.port)); @@ -93,7 +92,7 @@ void MappingFactory::RegisterButton(const MappingData& data) { void MappingFactory::RegisterStick(const MappingData& data) { Common::ParamPackage new_input; new_input.Set("engine", data.engine); - if (data.pad.guid != Common::UUID{}) { + if (data.pad.guid.IsValid()) { new_input.Set("guid", data.pad.guid.Format()); } new_input.Set("port", static_cast<int>(data.pad.port)); @@ -138,7 +137,7 @@ void MappingFactory::RegisterStick(const MappingData& data) { void MappingFactory::RegisterMotion(const MappingData& data) { Common::ParamPackage new_input; new_input.Set("engine", data.engine); - if (data.pad.guid != Common::UUID{}) { + if (data.pad.guid.IsValid()) { new_input.Set("guid", data.pad.guid.Format()); } new_input.Set("port", static_cast<int>(data.pad.port)); diff --git a/src/input_common/input_mapping.h b/src/input_common/input_mapping.h index 93564b5f8..e0dfbc7ad 100644 --- a/src/input_common/input_mapping.h +++ b/src/input_common/input_mapping.h @@ -3,8 +3,14 @@ // Refer to the license.txt file included #pragma once + +#include "common/param_package.h" #include "common/threadsafe_queue.h" +namespace InputCommon::Polling { +enum class InputType; +} + namespace InputCommon { class InputEngine; struct MappingData; @@ -20,7 +26,7 @@ public: void BeginMapping(Polling::InputType type); /// Returns an input event with mapping information from the input_queue - [[nodiscard]] const Common::ParamPackage GetNextInput(); + [[nodiscard]] Common::ParamPackage GetNextInput(); /** * Registers mapping input data from the driver diff --git a/src/input_common/input_poller.cpp b/src/input_common/input_poller.cpp index 7b370335f..2f3c0735a 100644 --- a/src/input_common/input_poller.cpp +++ b/src/input_common/input_poller.cpp @@ -504,9 +504,10 @@ private: class InputFromMotion final : public Common::Input::InputDevice { public: - explicit InputFromMotion(PadIdentifier identifier_, int motion_sensor_, + explicit InputFromMotion(PadIdentifier identifier_, int motion_sensor_, float gyro_threshold_, InputEngine* input_engine_) - : identifier(identifier_), motion_sensor(motion_sensor_), input_engine(input_engine_) { + : identifier(identifier_), motion_sensor(motion_sensor_), gyro_threshold(gyro_threshold_), + input_engine(input_engine_) { UpdateCallback engine_callback{[this]() { OnChange(); }}; const InputIdentifier input_identifier{ .identifier = identifier, @@ -525,8 +526,9 @@ public: const auto basic_motion = input_engine->GetMotion(identifier, motion_sensor); Common::Input::MotionStatus status{}; const Common::Input::AnalogProperties properties = { - .deadzone = 0.001f, + .deadzone = 0.0f, .range = 1.0f, + .threshold = gyro_threshold, .offset = 0.0f, }; status.accel.x = {.raw_value = basic_motion.accel_x, .properties = properties}; @@ -551,6 +553,7 @@ public: private: const PadIdentifier identifier; const int motion_sensor; + const float gyro_threshold; int callback_key; InputEngine* input_engine; }; @@ -873,9 +876,11 @@ std::unique_ptr<Common::Input::InputDevice> InputFactory::CreateMotionDevice( if (params.Has("motion")) { const auto motion_sensor = params.Get("motion", 0); + const auto gyro_threshold = params.Get("threshold", 0.007f); input_engine->PreSetController(identifier); input_engine->PreSetMotion(identifier, motion_sensor); - return std::make_unique<InputFromMotion>(identifier, motion_sensor, input_engine.get()); + return std::make_unique<InputFromMotion>(identifier, motion_sensor, gyro_threshold, + input_engine.get()); } const auto deadzone = std::clamp(params.Get("deadzone", 0.15f), 0.0f, 1.0f); diff --git a/src/input_common/main.cpp b/src/input_common/main.cpp index 940744c5f..a4d7ed645 100644 --- a/src/input_common/main.cpp +++ b/src/input_common/main.cpp @@ -27,7 +27,7 @@ namespace InputCommon { struct InputSubsystem::Impl { void Initialize() { mapping_factory = std::make_shared<MappingFactory>(); - MappingCallback mapping_callback{[this](MappingData data) { RegisterInput(data); }}; + MappingCallback mapping_callback{[this](const MappingData& data) { RegisterInput(data); }}; keyboard = std::make_shared<Keyboard>("keyboard"); keyboard->SetMappingCallback(mapping_callback); @@ -284,7 +284,7 @@ struct InputSubsystem::Impl { #endif } - void RegisterInput(MappingData data) { + void RegisterInput(const MappingData& data) { mapping_factory->RegisterInput(data); } @@ -394,7 +394,7 @@ void InputSubsystem::BeginMapping(Polling::InputType type) { impl->mapping_factory->BeginMapping(type); } -const Common::ParamPackage InputSubsystem::GetNextInput() const { +Common::ParamPackage InputSubsystem::GetNextInput() const { return impl->mapping_factory->GetNextInput(); } diff --git a/src/input_common/main.h b/src/input_common/main.h index c6f97f691..baf107e0f 100644 --- a/src/input_common/main.h +++ b/src/input_common/main.h @@ -126,7 +126,7 @@ public: void BeginMapping(Polling::InputType type); /// Returns an input event with mapping information. - [[nodiscard]] const Common::ParamPackage GetNextInput() const; + [[nodiscard]] Common::ParamPackage GetNextInput() const; /// Stop polling from all backends. void StopMapping() const; diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 50918317f..08b3a81ce 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -387,6 +387,14 @@ void SetupSignedNanCapabilities(const Profile& profile, const IR::Program& progr } } +void SetupTransformFeedbackCapabilities(EmitContext& ctx, Id main_func) { + if (ctx.runtime_info.xfb_varyings.empty()) { + return; + } + ctx.AddCapability(spv::Capability::TransformFeedback); + ctx.AddExecutionMode(main_func, spv::ExecutionMode::Xfb); +} + void SetupCapabilities(const Profile& profile, const Info& info, EmitContext& ctx) { if (info.uses_sampled_1d) { ctx.AddCapability(spv::Capability::Sampled1D); @@ -442,9 +450,6 @@ void SetupCapabilities(const Profile& profile, const Info& info, EmitContext& ct if (info.uses_sample_id) { ctx.AddCapability(spv::Capability::SampleRateShading); } - if (!ctx.runtime_info.xfb_varyings.empty()) { - ctx.AddCapability(spv::Capability::TransformFeedback); - } if (info.uses_derivatives) { ctx.AddCapability(spv::Capability::DerivativeControl); } @@ -484,6 +489,7 @@ std::vector<u32> EmitSPIRV(const Profile& profile, const RuntimeInfo& runtime_in SetupSignedNanCapabilities(profile, program, ctx, main); } SetupCapabilities(profile, program.info, ctx); + SetupTransformFeedbackCapabilities(ctx, main); PatchPhiNodes(program, ctx); return ctx.Assemble(); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp index 0d37b405c..46ba52a25 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp @@ -74,7 +74,7 @@ Id StorageAtomicU64(EmitContext& ctx, const IR::Value& binding, const IR::Value& const auto [scope, semantics]{AtomicArgs(ctx)}; return (ctx.*atomic_func)(ctx.U64, pointer, scope, semantics, value); } - LOG_ERROR(Shader_SPIRV, "Int64 atomics not supported, fallback to non-atomic"); + LOG_WARNING(Shader_SPIRV, "Int64 atomics not supported, fallback to non-atomic"); const Id pointer{StoragePointer(ctx, ctx.storage_types.U32x2, &StorageDefinitions::U32x2, binding, offset, sizeof(u32[2]))}; const Id original_value{ctx.OpBitcast(ctx.U64, ctx.OpLoad(ctx.U32[2], pointer))}; @@ -267,7 +267,7 @@ Id EmitStorageAtomicExchange64(EmitContext& ctx, const IR::Value& binding, const const auto [scope, semantics]{AtomicArgs(ctx)}; return ctx.OpAtomicExchange(ctx.U64, pointer, scope, semantics, value); } - LOG_ERROR(Shader_SPIRV, "Int64 atomics not supported, fallback to non-atomic"); + LOG_WARNING(Shader_SPIRV, "Int64 atomics not supported, fallback to non-atomic"); const Id pointer{StoragePointer(ctx, ctx.storage_types.U32x2, &StorageDefinitions::U32x2, binding, offset, sizeof(u32[2]))}; const Id original{ctx.OpBitcast(ctx.U64, ctx.OpLoad(ctx.U32[2], pointer))}; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/video_minimum_maximum.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/video_minimum_maximum.cpp index 78869601f..4851b0b8d 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/video_minimum_maximum.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/video_minimum_maximum.cpp @@ -57,16 +57,6 @@ void TranslatorVisitor::VMNMX(u64 insn) { if (vmnmx.sat != 0) { throw NotImplementedException("VMNMX SAT"); } - // Selectors were shown to default to 2 in unit tests - if (vmnmx.src_a_selector != 2) { - throw NotImplementedException("VMNMX Selector {}", vmnmx.src_a_selector.Value()); - } - if (vmnmx.src_b_selector != 2) { - throw NotImplementedException("VMNMX Selector {}", vmnmx.src_b_selector.Value()); - } - if (vmnmx.src_a_width != VideoWidth::Word) { - throw NotImplementedException("VMNMX Source Width {}", vmnmx.src_a_width.Value()); - } const bool is_b_imm{vmnmx.is_src_b_reg == 0}; const IR::U32 src_a{GetReg8(insn)}; @@ -76,10 +66,14 @@ void TranslatorVisitor::VMNMX(u64 insn) { const VideoWidth a_width{vmnmx.src_a_width}; const VideoWidth b_width{GetVideoSourceWidth(vmnmx.src_b_width, is_b_imm)}; + const u32 a_selector{static_cast<u32>(vmnmx.src_a_selector)}; + // Immediate values can't have a selector + const u32 b_selector{is_b_imm ? 0U : static_cast<u32>(vmnmx.src_b_selector)}; + const bool src_a_signed{vmnmx.src_a_sign != 0}; const bool src_b_signed{vmnmx.src_b_sign != 0}; - const IR::U32 op_a{ExtractVideoOperandValue(ir, src_a, a_width, 0, src_a_signed)}; - const IR::U32 op_b{ExtractVideoOperandValue(ir, src_b, b_width, 0, src_b_signed)}; + const IR::U32 op_a{ExtractVideoOperandValue(ir, src_a, a_width, a_selector, src_a_signed)}; + const IR::U32 op_b{ExtractVideoOperandValue(ir, src_b, b_width, b_selector, src_b_signed)}; // First operation's sign is only dependent on operand b's sign const bool op_1_signed{src_b_signed}; diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.h b/src/shader_recompiler/frontend/maxwell/translate_program.h index cd535f20d..eac83da9d 100644 --- a/src/shader_recompiler/frontend/maxwell/translate_program.h +++ b/src/shader_recompiler/frontend/maxwell/translate_program.h @@ -21,7 +21,6 @@ namespace Shader::Maxwell { [[nodiscard]] IR::Program MergeDualVertexPrograms(IR::Program& vertex_a, IR::Program& vertex_b, Environment& env_vertex_b); -[[nodiscard]] void ConvertLegacyToGeneric(IR::Program& program, - const Shader::RuntimeInfo& runtime_info); +void ConvertLegacyToGeneric(IR::Program& program, const RuntimeInfo& runtime_info); } // namespace Shader::Maxwell diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index a78c469be..b6a20f904 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -688,7 +688,7 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::StorageAtomicAnd64: case IR::Opcode::StorageAtomicOr64: case IR::Opcode::StorageAtomicXor64: - info.used_storage_buffer_types |= IR::Type::U64; + info.used_storage_buffer_types |= IR::Type::U64 | IR::Type::U32x2; info.uses_int64_bit_atomics = true; break; case IR::Opcode::BindlessImageAtomicIAdd32: diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 43bed63ac..048dba4f3 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -1474,6 +1474,8 @@ typename BufferCache<P>::OverlapResult BufferCache<P>::ResolveOverlaps(VAddr cpu // When this memory region has been joined a bunch of times, we assume it's being used // as a stream buffer. Increase the size to skip constantly recreating buffers. has_stream_leap = true; + begin -= PAGE_SIZE * 256; + cpu_addr = begin; end += PAGE_SIZE * 256; } } diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index b18b8a02a..c38ebd670 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -240,7 +240,7 @@ void Maxwell3D::CallMacroMethod(u32 method, const std::vector<u32>& parameters) ((method - MacroRegistersStart) >> 1) % static_cast<u32>(macro_positions.size()); // Execute the current macro. - macro_engine->Execute(*this, macro_positions[entry], parameters); + macro_engine->Execute(macro_positions[entry], parameters); if (mme_draw.current_mode != MMEDrawMode::Undefined) { FlushMMEInlineDraw(); } diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index 3188b83ed..26b8ea233 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h @@ -12,9 +12,6 @@ #include "video_core/framebuffer_config.h" namespace Core { -namespace Frontend { -class EmuWindow; -} class System; } // namespace Core @@ -25,7 +22,6 @@ class ShaderNotify; namespace Tegra { class DmaPusher; -class CDmaPusher; struct CommandList; enum class RenderTargetFormat : u32 { @@ -88,15 +84,9 @@ enum class DepthFormat : u32 { D32_FLOAT_S8X24_UINT = 0x19, }; -struct CommandListHeader; -class DebugContext; - namespace Engines { -class Fermi2D; class Maxwell3D; -class MaxwellDMA; class KeplerCompute; -class KeplerMemory; } // namespace Engines enum class EngineID { @@ -190,12 +180,6 @@ public: /// Returns a const reference to the GPU DMA pusher. [[nodiscard]] const Tegra::DmaPusher& DmaPusher() const; - /// Returns a reference to the GPU CDMA pusher. - [[nodiscard]] Tegra::CDmaPusher& CDmaPusher(); - - /// Returns a const reference to the GPU CDMA pusher. - [[nodiscard]] const Tegra::CDmaPusher& CDmaPusher() const; - /// Returns a reference to the underlying renderer. [[nodiscard]] VideoCore::RendererBase& Renderer(); diff --git a/src/video_core/macro/macro.cpp b/src/video_core/macro/macro.cpp index d7fabe605..0aeda4ce8 100644 --- a/src/video_core/macro/macro.cpp +++ b/src/video_core/macro/macro.cpp @@ -2,12 +2,13 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include <cstring> #include <optional> + #include <boost/container_hash/hash.hpp> + #include "common/assert.h" -#include "common/logging/log.h" #include "common/settings.h" -#include "video_core/engines/maxwell_3d.h" #include "video_core/macro/macro.h" #include "video_core/macro/macro_hle.h" #include "video_core/macro/macro_interpreter.h" @@ -24,8 +25,7 @@ void MacroEngine::AddCode(u32 method, u32 data) { uploaded_macro_code[method].push_back(data); } -void MacroEngine::Execute(Engines::Maxwell3D& maxwell3d, u32 method, - const std::vector<u32>& parameters) { +void MacroEngine::Execute(u32 method, const std::vector<u32>& parameters) { auto compiled_macro = macro_cache.find(method); if (compiled_macro != macro_cache.end()) { const auto& cache_info = compiled_macro->second; @@ -66,10 +66,9 @@ void MacroEngine::Execute(Engines::Maxwell3D& maxwell3d, u32 method, cache_info.lle_program = Compile(code); } - auto hle_program = hle_macros->GetHLEProgram(cache_info.hash); - if (hle_program.has_value()) { + if (auto hle_program = hle_macros->GetHLEProgram(cache_info.hash)) { cache_info.has_hle_program = true; - cache_info.hle_program = std::move(hle_program.value()); + cache_info.hle_program = std::move(hle_program); cache_info.hle_program->Execute(parameters, method); } else { cache_info.lle_program->Execute(parameters, method); diff --git a/src/video_core/macro/macro.h b/src/video_core/macro/macro.h index 31ee3440a..7aaa49286 100644 --- a/src/video_core/macro/macro.h +++ b/src/video_core/macro/macro.h @@ -119,7 +119,7 @@ public: void AddCode(u32 method, u32 data); // Compiles the macro if its not in the cache, and executes the compiled macro - void Execute(Engines::Maxwell3D& maxwell3d, u32 method, const std::vector<u32>& parameters); + void Execute(u32 method, const std::vector<u32>& parameters); protected: virtual std::unique_ptr<CachedMacro> Compile(const std::vector<u32>& code) = 0; diff --git a/src/video_core/macro/macro_hle.cpp b/src/video_core/macro/macro_hle.cpp index 70ac7c620..900ad23c9 100644 --- a/src/video_core/macro/macro_hle.cpp +++ b/src/video_core/macro/macro_hle.cpp @@ -5,12 +5,15 @@ #include <array> #include <vector> #include "video_core/engines/maxwell_3d.h" +#include "video_core/macro/macro.h" #include "video_core/macro/macro_hle.h" #include "video_core/rasterizer_interface.h" namespace Tegra { - namespace { + +using HLEFunction = void (*)(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& parameters); + // HLE'd functions void HLE_771BB18C62444DA0(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& parameters) { const u32 instance_count = parameters[2] & maxwell3d.GetRegisterValue(0xD1B); @@ -77,7 +80,6 @@ void HLE_0217920100488FF7(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& maxwell3d.CallMethodFromMME(0x8e5, 0x0); maxwell3d.mme_draw.current_mode = Engines::Maxwell3D::MMEDrawMode::Undefined; } -} // Anonymous namespace constexpr std::array<std::pair<u64, HLEFunction>, 3> hle_funcs{{ {0x771BB18C62444DA0, &HLE_771BB18C62444DA0}, @@ -85,25 +87,31 @@ constexpr std::array<std::pair<u64, HLEFunction>, 3> hle_funcs{{ {0x0217920100488FF7, &HLE_0217920100488FF7}, }}; +class HLEMacroImpl final : public CachedMacro { +public: + explicit HLEMacroImpl(Engines::Maxwell3D& maxwell3d_, HLEFunction func_) + : maxwell3d{maxwell3d_}, func{func_} {} + + void Execute(const std::vector<u32>& parameters, u32 method) override { + func(maxwell3d, parameters); + } + +private: + Engines::Maxwell3D& maxwell3d; + HLEFunction func; +}; +} // Anonymous namespace + HLEMacro::HLEMacro(Engines::Maxwell3D& maxwell3d_) : maxwell3d{maxwell3d_} {} HLEMacro::~HLEMacro() = default; -std::optional<std::unique_ptr<CachedMacro>> HLEMacro::GetHLEProgram(u64 hash) const { +std::unique_ptr<CachedMacro> HLEMacro::GetHLEProgram(u64 hash) const { const auto it = std::find_if(hle_funcs.cbegin(), hle_funcs.cend(), [hash](const auto& pair) { return pair.first == hash; }); if (it == hle_funcs.end()) { - return std::nullopt; + return nullptr; } return std::make_unique<HLEMacroImpl>(maxwell3d, it->second); } -HLEMacroImpl::~HLEMacroImpl() = default; - -HLEMacroImpl::HLEMacroImpl(Engines::Maxwell3D& maxwell3d_, HLEFunction func_) - : maxwell3d{maxwell3d_}, func{func_} {} - -void HLEMacroImpl::Execute(const std::vector<u32>& parameters, u32 method) { - func(maxwell3d, parameters); -} - } // namespace Tegra diff --git a/src/video_core/macro/macro_hle.h b/src/video_core/macro/macro_hle.h index cb3bd1600..b86ba84a1 100644 --- a/src/video_core/macro/macro_hle.h +++ b/src/video_core/macro/macro_hle.h @@ -5,10 +5,7 @@ #pragma once #include <memory> -#include <optional> -#include <vector> #include "common/common_types.h" -#include "video_core/macro/macro.h" namespace Tegra { @@ -16,29 +13,17 @@ namespace Engines { class Maxwell3D; } -using HLEFunction = void (*)(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& parameters); - class HLEMacro { public: explicit HLEMacro(Engines::Maxwell3D& maxwell3d_); ~HLEMacro(); - std::optional<std::unique_ptr<CachedMacro>> GetHLEProgram(u64 hash) const; - -private: - Engines::Maxwell3D& maxwell3d; -}; - -class HLEMacroImpl : public CachedMacro { -public: - explicit HLEMacroImpl(Engines::Maxwell3D& maxwell3d, HLEFunction func); - ~HLEMacroImpl(); - - void Execute(const std::vector<u32>& parameters, u32 method) override; + // Allocates and returns a cached macro if the hash matches a known function. + // Returns nullptr otherwise. + [[nodiscard]] std::unique_ptr<CachedMacro> GetHLEProgram(u64 hash) const; private: Engines::Maxwell3D& maxwell3d; - HLEFunction func; }; } // namespace Tegra diff --git a/src/video_core/macro/macro_interpreter.cpp b/src/video_core/macro/macro_interpreter.cpp index 8da26fd59..fba755448 100644 --- a/src/video_core/macro/macro_interpreter.cpp +++ b/src/video_core/macro/macro_interpreter.cpp @@ -2,6 +2,9 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include <array> +#include <optional> + #include "common/assert.h" #include "common/logging/log.h" #include "common/microprofile.h" @@ -11,16 +14,81 @@ MICROPROFILE_DEFINE(MacroInterp, "GPU", "Execute macro interpreter", MP_RGB(128, 128, 192)); namespace Tegra { -MacroInterpreter::MacroInterpreter(Engines::Maxwell3D& maxwell3d_) - : MacroEngine{maxwell3d_}, maxwell3d{maxwell3d_} {} +namespace { +class MacroInterpreterImpl final : public CachedMacro { +public: + explicit MacroInterpreterImpl(Engines::Maxwell3D& maxwell3d_, const std::vector<u32>& code_) + : maxwell3d{maxwell3d_}, code{code_} {} -std::unique_ptr<CachedMacro> MacroInterpreter::Compile(const std::vector<u32>& code) { - return std::make_unique<MacroInterpreterImpl>(maxwell3d, code); -} + void Execute(const std::vector<u32>& params, u32 method) override; + +private: + /// Resets the execution engine state, zeroing registers, etc. + void Reset(); + + /** + * Executes a single macro instruction located at the current program counter. Returns whether + * the interpreter should keep running. + * + * @param is_delay_slot Whether the current step is being executed due to a delay slot in a + * previous instruction. + */ + bool Step(bool is_delay_slot); + + /// Calculates the result of an ALU operation. src_a OP src_b; + u32 GetALUResult(Macro::ALUOperation operation, u32 src_a, u32 src_b); + + /// Performs the result operation on the input result and stores it in the specified register + /// (if necessary). + void ProcessResult(Macro::ResultOperation operation, u32 reg, u32 result); + + /// Evaluates the branch condition and returns whether the branch should be taken or not. + bool EvaluateBranchCondition(Macro::BranchCondition cond, u32 value) const; + + /// Reads an opcode at the current program counter location. + Macro::Opcode GetOpcode() const; + + /// Returns the specified register's value. Register 0 is hardcoded to always return 0. + u32 GetRegister(u32 register_id) const; + + /// Sets the register to the input value. + void SetRegister(u32 register_id, u32 value); + + /// Sets the method address to use for the next Send instruction. + void SetMethodAddress(u32 address); -MacroInterpreterImpl::MacroInterpreterImpl(Engines::Maxwell3D& maxwell3d_, - const std::vector<u32>& code_) - : maxwell3d{maxwell3d_}, code{code_} {} + /// Calls a GPU Engine method with the input parameter. + void Send(u32 value); + + /// Reads a GPU register located at the method address. + u32 Read(u32 method) const; + + /// Returns the next parameter in the parameter queue. + u32 FetchParameter(); + + Engines::Maxwell3D& maxwell3d; + + /// Current program counter + u32 pc{}; + /// Program counter to execute at after the delay slot is executed. + std::optional<u32> delayed_pc; + + /// General purpose macro registers. + std::array<u32, Macro::NUM_MACRO_REGISTERS> registers = {}; + + /// Method address to use for the next Send instruction. + Macro::MethodAddress method_address = {}; + + /// Input parameters of the current macro. + std::unique_ptr<u32[]> parameters; + std::size_t num_parameters = 0; + std::size_t parameters_capacity = 0; + /// Index of the next parameter that will be fetched by the 'parm' instruction. + u32 next_parameter_index = 0; + + bool carry_flag = false; + const std::vector<u32>& code; +}; void MacroInterpreterImpl::Execute(const std::vector<u32>& params, u32 method) { MICROPROFILE_SCOPE(MacroInterp); @@ -283,5 +351,13 @@ u32 MacroInterpreterImpl::FetchParameter() { ASSERT(next_parameter_index < num_parameters); return parameters[next_parameter_index++]; } +} // Anonymous namespace + +MacroInterpreter::MacroInterpreter(Engines::Maxwell3D& maxwell3d_) + : MacroEngine{maxwell3d_}, maxwell3d{maxwell3d_} {} + +std::unique_ptr<CachedMacro> MacroInterpreter::Compile(const std::vector<u32>& code) { + return std::make_unique<MacroInterpreterImpl>(maxwell3d, code); +} } // namespace Tegra diff --git a/src/video_core/macro/macro_interpreter.h b/src/video_core/macro/macro_interpreter.h index d50c619ce..8a9648e46 100644 --- a/src/video_core/macro/macro_interpreter.h +++ b/src/video_core/macro/macro_interpreter.h @@ -3,10 +3,9 @@ // Refer to the license.txt file included. #pragma once -#include <array> -#include <optional> + #include <vector> -#include "common/bit_field.h" + #include "common/common_types.h" #include "video_core/macro/macro.h" @@ -26,77 +25,4 @@ private: Engines::Maxwell3D& maxwell3d; }; -class MacroInterpreterImpl : public CachedMacro { -public: - explicit MacroInterpreterImpl(Engines::Maxwell3D& maxwell3d_, const std::vector<u32>& code_); - void Execute(const std::vector<u32>& params, u32 method) override; - -private: - /// Resets the execution engine state, zeroing registers, etc. - void Reset(); - - /** - * Executes a single macro instruction located at the current program counter. Returns whether - * the interpreter should keep running. - * - * @param is_delay_slot Whether the current step is being executed due to a delay slot in a - * previous instruction. - */ - bool Step(bool is_delay_slot); - - /// Calculates the result of an ALU operation. src_a OP src_b; - u32 GetALUResult(Macro::ALUOperation operation, u32 src_a, u32 src_b); - - /// Performs the result operation on the input result and stores it in the specified register - /// (if necessary). - void ProcessResult(Macro::ResultOperation operation, u32 reg, u32 result); - - /// Evaluates the branch condition and returns whether the branch should be taken or not. - bool EvaluateBranchCondition(Macro::BranchCondition cond, u32 value) const; - - /// Reads an opcode at the current program counter location. - Macro::Opcode GetOpcode() const; - - /// Returns the specified register's value. Register 0 is hardcoded to always return 0. - u32 GetRegister(u32 register_id) const; - - /// Sets the register to the input value. - void SetRegister(u32 register_id, u32 value); - - /// Sets the method address to use for the next Send instruction. - void SetMethodAddress(u32 address); - - /// Calls a GPU Engine method with the input parameter. - void Send(u32 value); - - /// Reads a GPU register located at the method address. - u32 Read(u32 method) const; - - /// Returns the next parameter in the parameter queue. - u32 FetchParameter(); - - Engines::Maxwell3D& maxwell3d; - - /// Current program counter - u32 pc; - /// Program counter to execute at after the delay slot is executed. - std::optional<u32> delayed_pc; - - /// General purpose macro registers. - std::array<u32, Macro::NUM_MACRO_REGISTERS> registers = {}; - - /// Method address to use for the next Send instruction. - Macro::MethodAddress method_address = {}; - - /// Input parameters of the current macro. - std::unique_ptr<u32[]> parameters; - std::size_t num_parameters = 0; - std::size_t parameters_capacity = 0; - /// Index of the next parameter that will be fetched by the 'parm' instruction. - u32 next_parameter_index = 0; - - bool carry_flag = false; - const std::vector<u32>& code; -}; - } // namespace Tegra diff --git a/src/video_core/macro/macro_jit_x64.cpp b/src/video_core/macro/macro_jit_x64.cpp index c6b2b2109..47b28ad16 100644 --- a/src/video_core/macro/macro_jit_x64.cpp +++ b/src/video_core/macro/macro_jit_x64.cpp @@ -2,9 +2,17 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include <array> +#include <bitset> +#include <optional> + +#include <xbyak/xbyak.h> + #include "common/assert.h" +#include "common/bit_field.h" #include "common/logging/log.h" #include "common/microprofile.h" +#include "common/x64/xbyak_abi.h" #include "common/x64/xbyak_util.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/macro/macro_interpreter.h" @@ -14,13 +22,14 @@ MICROPROFILE_DEFINE(MacroJitCompile, "GPU", "Compile macro JIT", MP_RGB(173, 255 MICROPROFILE_DEFINE(MacroJitExecute, "GPU", "Execute macro JIT", MP_RGB(255, 255, 0)); namespace Tegra { +namespace { constexpr Xbyak::Reg64 STATE = Xbyak::util::rbx; constexpr Xbyak::Reg32 RESULT = Xbyak::util::ebp; constexpr Xbyak::Reg64 PARAMETERS = Xbyak::util::r12; constexpr Xbyak::Reg32 METHOD_ADDRESS = Xbyak::util::r14d; constexpr Xbyak::Reg64 BRANCH_HOLDER = Xbyak::util::r15; -static const std::bitset<32> PERSISTENT_REGISTERS = Common::X64::BuildRegSet({ +constexpr std::bitset<32> PERSISTENT_REGISTERS = Common::X64::BuildRegSet({ STATE, RESULT, PARAMETERS, @@ -28,19 +37,75 @@ static const std::bitset<32> PERSISTENT_REGISTERS = Common::X64::BuildRegSet({ BRANCH_HOLDER, }); -MacroJITx64::MacroJITx64(Engines::Maxwell3D& maxwell3d_) - : MacroEngine{maxwell3d_}, maxwell3d{maxwell3d_} {} +// Arbitrarily chosen based on current booting games. +constexpr size_t MAX_CODE_SIZE = 0x10000; -std::unique_ptr<CachedMacro> MacroJITx64::Compile(const std::vector<u32>& code) { - return std::make_unique<MacroJITx64Impl>(maxwell3d, code); +std::bitset<32> PersistentCallerSavedRegs() { + return PERSISTENT_REGISTERS & Common::X64::ABI_ALL_CALLER_SAVED; } -MacroJITx64Impl::MacroJITx64Impl(Engines::Maxwell3D& maxwell3d_, const std::vector<u32>& code_) - : CodeGenerator{MAX_CODE_SIZE}, code{code_}, maxwell3d{maxwell3d_} { - Compile(); -} +class MacroJITx64Impl final : public Xbyak::CodeGenerator, public CachedMacro { +public: + explicit MacroJITx64Impl(Engines::Maxwell3D& maxwell3d_, const std::vector<u32>& code_) + : CodeGenerator{MAX_CODE_SIZE}, code{code_}, maxwell3d{maxwell3d_} { + Compile(); + } + + void Execute(const std::vector<u32>& parameters, u32 method) override; + + void Compile_ALU(Macro::Opcode opcode); + void Compile_AddImmediate(Macro::Opcode opcode); + void Compile_ExtractInsert(Macro::Opcode opcode); + void Compile_ExtractShiftLeftImmediate(Macro::Opcode opcode); + void Compile_ExtractShiftLeftRegister(Macro::Opcode opcode); + void Compile_Read(Macro::Opcode opcode); + void Compile_Branch(Macro::Opcode opcode); + +private: + void Optimizer_ScanFlags(); + + void Compile(); + bool Compile_NextInstruction(); + + Xbyak::Reg32 Compile_FetchParameter(); + Xbyak::Reg32 Compile_GetRegister(u32 index, Xbyak::Reg32 dst); + + void Compile_ProcessResult(Macro::ResultOperation operation, u32 reg); + void Compile_Send(Xbyak::Reg32 value); -MacroJITx64Impl::~MacroJITx64Impl() = default; + Macro::Opcode GetOpCode() const; + + struct JITState { + Engines::Maxwell3D* maxwell3d{}; + std::array<u32, Macro::NUM_MACRO_REGISTERS> registers{}; + u32 carry_flag{}; + }; + static_assert(offsetof(JITState, maxwell3d) == 0, "Maxwell3D is not at 0x0"); + using ProgramType = void (*)(JITState*, const u32*); + + struct OptimizerState { + bool can_skip_carry{}; + bool has_delayed_pc{}; + bool zero_reg_skip{}; + bool skip_dummy_addimmediate{}; + bool optimize_for_method_move{}; + bool enable_asserts{}; + }; + OptimizerState optimizer{}; + + std::optional<Macro::Opcode> next_opcode{}; + ProgramType program{nullptr}; + + std::array<Xbyak::Label, MAX_CODE_SIZE> labels; + std::array<Xbyak::Label, MAX_CODE_SIZE> delay_skip; + Xbyak::Label end_of_code{}; + + bool is_delay_slot{}; + u32 pc{}; + + const std::vector<u32>& code; + Engines::Maxwell3D& maxwell3d; +}; void MacroJITx64Impl::Execute(const std::vector<u32>& parameters, u32 method) { MICROPROFILE_SCOPE(MacroJitExecute); @@ -307,11 +372,11 @@ void MacroJITx64Impl::Compile_Read(Macro::Opcode opcode) { Compile_ProcessResult(opcode.result_operation, opcode.dst); } -static void Send(Engines::Maxwell3D* maxwell3d, Macro::MethodAddress method_address, u32 value) { +void Send(Engines::Maxwell3D* maxwell3d, Macro::MethodAddress method_address, u32 value) { maxwell3d->CallMethodFromMME(method_address.address, value); } -void Tegra::MacroJITx64Impl::Compile_Send(Xbyak::Reg32 value) { +void MacroJITx64Impl::Compile_Send(Xbyak::Reg32 value) { Common::X64::ABI_PushRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0); mov(Common::X64::ABI_PARAM1, qword[STATE]); mov(Common::X64::ABI_PARAM2, METHOD_ADDRESS); @@ -338,7 +403,7 @@ void Tegra::MacroJITx64Impl::Compile_Send(Xbyak::Reg32 value) { L(dont_process); } -void Tegra::MacroJITx64Impl::Compile_Branch(Macro::Opcode opcode) { +void MacroJITx64Impl::Compile_Branch(Macro::Opcode opcode) { ASSERT_MSG(!is_delay_slot, "Executing a branch in a delay slot is not valid"); const s32 jump_address = static_cast<s32>(pc) + static_cast<s32>(opcode.GetBranchTarget() / sizeof(s32)); @@ -392,7 +457,7 @@ void Tegra::MacroJITx64Impl::Compile_Branch(Macro::Opcode opcode) { L(end); } -void Tegra::MacroJITx64Impl::Optimizer_ScanFlags() { +void MacroJITx64Impl::Optimizer_ScanFlags() { optimizer.can_skip_carry = true; optimizer.has_delayed_pc = false; for (auto raw_op : code) { @@ -534,7 +599,7 @@ bool MacroJITx64Impl::Compile_NextInstruction() { return true; } -Xbyak::Reg32 Tegra::MacroJITx64Impl::Compile_FetchParameter() { +Xbyak::Reg32 MacroJITx64Impl::Compile_FetchParameter() { mov(eax, dword[PARAMETERS]); add(PARAMETERS, sizeof(u32)); return eax; @@ -611,9 +676,12 @@ Macro::Opcode MacroJITx64Impl::GetOpCode() const { ASSERT(pc < code.size()); return {code[pc]}; } +} // Anonymous namespace -std::bitset<32> MacroJITx64Impl::PersistentCallerSavedRegs() const { - return PERSISTENT_REGISTERS & Common::X64::ABI_ALL_CALLER_SAVED; -} +MacroJITx64::MacroJITx64(Engines::Maxwell3D& maxwell3d_) + : MacroEngine{maxwell3d_}, maxwell3d{maxwell3d_} {} +std::unique_ptr<CachedMacro> MacroJITx64::Compile(const std::vector<u32>& code) { + return std::make_unique<MacroJITx64Impl>(maxwell3d, code); +} } // namespace Tegra diff --git a/src/video_core/macro/macro_jit_x64.h b/src/video_core/macro/macro_jit_x64.h index d03d480b4..773b037ae 100644 --- a/src/video_core/macro/macro_jit_x64.h +++ b/src/video_core/macro/macro_jit_x64.h @@ -4,12 +4,7 @@ #pragma once -#include <array> -#include <bitset> -#include <xbyak/xbyak.h> -#include "common/bit_field.h" #include "common/common_types.h" -#include "common/x64/xbyak_abi.h" #include "video_core/macro/macro.h" namespace Tegra { @@ -18,9 +13,6 @@ namespace Engines { class Maxwell3D; } -/// MAX_CODE_SIZE is arbitrarily chosen based on current booting games -constexpr size_t MAX_CODE_SIZE = 0x10000; - class MacroJITx64 final : public MacroEngine { public: explicit MacroJITx64(Engines::Maxwell3D& maxwell3d_); @@ -32,67 +24,4 @@ private: Engines::Maxwell3D& maxwell3d; }; -class MacroJITx64Impl : public Xbyak::CodeGenerator, public CachedMacro { -public: - explicit MacroJITx64Impl(Engines::Maxwell3D& maxwell3d_, const std::vector<u32>& code_); - ~MacroJITx64Impl(); - - void Execute(const std::vector<u32>& parameters, u32 method) override; - - void Compile_ALU(Macro::Opcode opcode); - void Compile_AddImmediate(Macro::Opcode opcode); - void Compile_ExtractInsert(Macro::Opcode opcode); - void Compile_ExtractShiftLeftImmediate(Macro::Opcode opcode); - void Compile_ExtractShiftLeftRegister(Macro::Opcode opcode); - void Compile_Read(Macro::Opcode opcode); - void Compile_Branch(Macro::Opcode opcode); - -private: - void Optimizer_ScanFlags(); - - void Compile(); - bool Compile_NextInstruction(); - - Xbyak::Reg32 Compile_FetchParameter(); - Xbyak::Reg32 Compile_GetRegister(u32 index, Xbyak::Reg32 dst); - - void Compile_ProcessResult(Macro::ResultOperation operation, u32 reg); - void Compile_Send(Xbyak::Reg32 value); - - Macro::Opcode GetOpCode() const; - std::bitset<32> PersistentCallerSavedRegs() const; - - struct JITState { - Engines::Maxwell3D* maxwell3d{}; - std::array<u32, Macro::NUM_MACRO_REGISTERS> registers{}; - u32 carry_flag{}; - }; - static_assert(offsetof(JITState, maxwell3d) == 0, "Maxwell3D is not at 0x0"); - using ProgramType = void (*)(JITState*, const u32*); - - struct OptimizerState { - bool can_skip_carry{}; - bool has_delayed_pc{}; - bool zero_reg_skip{}; - bool skip_dummy_addimmediate{}; - bool optimize_for_method_move{}; - bool enable_asserts{}; - }; - OptimizerState optimizer{}; - - std::optional<Macro::Opcode> next_opcode{}; - ProgramType program{nullptr}; - - std::array<Xbyak::Label, MAX_CODE_SIZE> labels; - std::array<Xbyak::Label, MAX_CODE_SIZE> delay_skip; - Xbyak::Label end_of_code{}; - - bool is_delay_slot{}; - u32 pc{}; - std::optional<u32> delayed_pc; - - const std::vector<u32>& code; - Engines::Maxwell3D& maxwell3d; -}; - } // namespace Tegra diff --git a/src/video_core/renderer_vulkan/vk_fsr.cpp b/src/video_core/renderer_vulkan/vk_fsr.cpp index 73629d229..b630090e8 100644 --- a/src/video_core/renderer_vulkan/vk_fsr.cpp +++ b/src/video_core/renderer_vulkan/vk_fsr.cpp @@ -214,7 +214,7 @@ VkImageView FSR::Draw(VKScheduler& scheduler, size_t image_index, VkImageView im { VkImageMemoryBarrier fsr_write_barrier = base_barrier; - fsr_write_barrier.image = *images[image_index], + fsr_write_barrier.image = *images[image_index]; fsr_write_barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED; cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp index 7e8f92840..2c8c10c50 100644 --- a/src/yuzu/configuration/config.cpp +++ b/src/yuzu/configuration/config.cpp @@ -393,6 +393,8 @@ void Config::ReadControlValues() { ReadGlobalSetting(Settings::values.enable_accurate_vibrations); ReadGlobalSetting(Settings::values.motion_enabled); + ReadBasicSetting(Settings::values.controller_navigation); + qt_config->endGroup(); } @@ -1001,6 +1003,7 @@ void Config::SaveControlValues() { WriteBasicSetting(Settings::values.keyboard_enabled); WriteBasicSetting(Settings::values.emulate_analog_keyboard); WriteBasicSetting(Settings::values.mouse_panning_sensitivity); + WriteBasicSetting(Settings::values.controller_navigation); WriteBasicSetting(Settings::values.tas_enable); WriteBasicSetting(Settings::values.tas_loop); diff --git a/src/yuzu/configuration/configure_graphics.ui b/src/yuzu/configuration/configure_graphics.ui index 9241678e4..74f0e0b79 100644 --- a/src/yuzu/configuration/configure_graphics.ui +++ b/src/yuzu/configuration/configure_graphics.ui @@ -429,7 +429,7 @@ </item> <item> <property name="text"> - <string>AMD FidelityFX™️ Super Resolution [Vulkan Only]</string> + <string>AMD FidelityFX™️ Super Resolution (Vulkan Only)</string> </property> </item> </widget> diff --git a/src/yuzu/configuration/configure_input_advanced.cpp b/src/yuzu/configuration/configure_input_advanced.cpp index 65c8e59ac..20fc2599d 100644 --- a/src/yuzu/configuration/configure_input_advanced.cpp +++ b/src/yuzu/configuration/configure_input_advanced.cpp @@ -131,6 +131,7 @@ void ConfigureInputAdvanced::ApplyConfiguration() { Settings::values.touchscreen.enabled = ui->touchscreen_enabled->isChecked(); Settings::values.enable_raw_input = ui->enable_raw_input->isChecked(); Settings::values.enable_udp_controller = ui->enable_udp_controller->isChecked(); + Settings::values.controller_navigation = ui->controller_navigation->isChecked(); } void ConfigureInputAdvanced::LoadConfiguration() { @@ -162,6 +163,7 @@ void ConfigureInputAdvanced::LoadConfiguration() { ui->touchscreen_enabled->setChecked(Settings::values.touchscreen.enabled); ui->enable_raw_input->setChecked(Settings::values.enable_raw_input.GetValue()); ui->enable_udp_controller->setChecked(Settings::values.enable_udp_controller.GetValue()); + ui->controller_navigation->setChecked(Settings::values.controller_navigation.GetValue()); UpdateUIEnabled(); } diff --git a/src/yuzu/configuration/configure_input_advanced.ui b/src/yuzu/configuration/configure_input_advanced.ui index df0e4d602..66f2075f2 100644 --- a/src/yuzu/configuration/configure_input_advanced.ui +++ b/src/yuzu/configuration/configure_input_advanced.ui @@ -2655,6 +2655,19 @@ </widget> </item> <item row="4" column="0"> + <widget class="QCheckBox" name="controller_navigation"> + <property name="minimumSize"> + <size> + <width>0</width> + <height>23</height> + </size> + </property> + <property name="text"> + <string>Controller navigation</string> + </property> + </widget> + </item> + <item row="5" column="0"> <widget class="QCheckBox" name="mouse_panning"> <property name="minimumSize"> <size> @@ -2667,7 +2680,7 @@ </property> </widget> </item> - <item row="4" column="2"> + <item row="5" column="2"> <widget class="QSpinBox" name="mouse_panning_sensitivity"> <property name="toolTip"> <string>Mouse sensitivity</string> @@ -2689,14 +2702,14 @@ </property> </widget> </item> - <item row="5" column="0"> + <item row="6" column="0"> <widget class="QLabel" name="motion_touch"> <property name="text"> <string>Motion / Touch</string> </property> </widget> </item> - <item row="5" column="2"> + <item row="6" column="2"> <widget class="QPushButton" name="buttonMotionTouch"> <property name="text"> <string>Configure</string> diff --git a/src/yuzu/configuration/configure_input_player.cpp b/src/yuzu/configuration/configure_input_player.cpp index d2132b408..752504236 100644 --- a/src/yuzu/configuration/configure_input_player.cpp +++ b/src/yuzu/configuration/configure_input_player.cpp @@ -147,7 +147,7 @@ QString ConfigureInputPlayer::ButtonToText(const Common::ParamPackage& param) { // Retrieve the names from Qt if (param.Get("engine", "") == "keyboard") { const QString button_str = GetKeyName(param.Get("code", 0)); - return QObject::tr("%1%2").arg(toggle, button_str); + return QObject::tr("%1%2%3").arg(toggle, inverted, button_str); } if (common_button_name == Common::Input::ButtonNames::Invalid) { @@ -341,7 +341,7 @@ ConfigureInputPlayer::ConfigureInputPlayer(QWidget* parent, std::size_t player_i emulated_controller->SetButtonParam(button_id, {}); button_map[button_id]->setText(tr("[not set]")); }); - if (param.Has("button") || param.Has("hat")) { + if (param.Has("code") || param.Has("button") || param.Has("hat")) { context_menu.addAction(tr("Toggle button"), [&] { const bool toggle_value = !param.Get("toggle", false); param.Set("toggle", toggle_value); @@ -349,8 +349,8 @@ ConfigureInputPlayer::ConfigureInputPlayer(QWidget* parent, std::size_t player_i emulated_controller->SetButtonParam(button_id, param); }); context_menu.addAction(tr("Invert button"), [&] { - const bool toggle_value = !param.Get("inverted", false); - param.Set("inverted", toggle_value); + const bool invert_value = !param.Get("inverted", false); + param.Set("inverted", invert_value); button_map[button_id]->setText(ButtonToText(param)); emulated_controller->SetButtonParam(button_id, param); }); @@ -403,10 +403,22 @@ ConfigureInputPlayer::ConfigureInputPlayer(QWidget* parent, std::size_t player_i connect(button, &QPushButton::customContextMenuRequested, [=, this](const QPoint& menu_location) { QMenu context_menu; + Common::ParamPackage param = emulated_controller->GetMotionParam(motion_id); context_menu.addAction(tr("Clear"), [&] { emulated_controller->SetMotionParam(motion_id, {}); motion_map[motion_id]->setText(tr("[not set]")); }); + if (param.Has("motion")) { + context_menu.addAction(tr("Set gyro threshold"), [&] { + const int gyro_threshold = + static_cast<int>(param.Get("threshold", 0.007f) * 1000.0f); + const int new_threshold = QInputDialog::getInt( + this, tr("Set threshold"), tr("Choose a value between 0% and 100%"), + gyro_threshold, 0, 100); + param.Set("threshold", new_threshold / 1000.0f); + emulated_controller->SetMotionParam(motion_id, param); + }); + } context_menu.exec(motion_map[motion_id]->mapToGlobal(menu_location)); }); } @@ -510,28 +522,37 @@ ConfigureInputPlayer::ConfigureInputPlayer(QWidget* parent, std::size_t player_i analog_map_modifier_button[analog_id]->setContextMenuPolicy(Qt::CustomContextMenu); - connect(analog_map_modifier_button[analog_id], &QPushButton::customContextMenuRequested, - [=, this](const QPoint& menu_location) { - QMenu context_menu; - Common::ParamPackage param = emulated_controller->GetStickParam(analog_id); - context_menu.addAction(tr("Clear"), [&] { - param.Set("modifier", ""); - analog_map_modifier_button[analog_id]->setText(tr("[not set]")); - emulated_controller->SetStickParam(analog_id, param); - }); - context_menu.addAction(tr("Toggle button"), [&] { - Common::ParamPackage modifier_param = - Common::ParamPackage{param.Get("modifier", "")}; - const bool toggle_value = !modifier_param.Get("toggle", false); - modifier_param.Set("toggle", toggle_value); - param.Set("modifier", modifier_param.Serialize()); - analog_map_modifier_button[analog_id]->setText( - ButtonToText(modifier_param)); - emulated_controller->SetStickParam(analog_id, param); - }); - context_menu.exec( - analog_map_modifier_button[analog_id]->mapToGlobal(menu_location)); + connect( + analog_map_modifier_button[analog_id], &QPushButton::customContextMenuRequested, + [=, this](const QPoint& menu_location) { + QMenu context_menu; + Common::ParamPackage param = emulated_controller->GetStickParam(analog_id); + context_menu.addAction(tr("Clear"), [&] { + param.Set("modifier", ""); + analog_map_modifier_button[analog_id]->setText(tr("[not set]")); + emulated_controller->SetStickParam(analog_id, param); }); + context_menu.addAction(tr("Toggle button"), [&] { + Common::ParamPackage modifier_param = + Common::ParamPackage{param.Get("modifier", "")}; + const bool toggle_value = !modifier_param.Get("toggle", false); + modifier_param.Set("toggle", toggle_value); + param.Set("modifier", modifier_param.Serialize()); + analog_map_modifier_button[analog_id]->setText(ButtonToText(modifier_param)); + emulated_controller->SetStickParam(analog_id, param); + }); + context_menu.addAction(tr("Invert button"), [&] { + Common::ParamPackage modifier_param = + Common::ParamPackage{param.Get("modifier", "")}; + const bool invert_value = !modifier_param.Get("inverted", false); + modifier_param.Set("inverted", invert_value); + param.Set("modifier", modifier_param.Serialize()); + analog_map_modifier_button[analog_id]->setText(ButtonToText(modifier_param)); + emulated_controller->SetStickParam(analog_id, param); + }); + context_menu.exec( + analog_map_modifier_button[analog_id]->mapToGlobal(menu_location)); + }); connect(analog_map_range_spinbox[analog_id], qOverload<int>(&QSpinBox::valueChanged), [=, this] { diff --git a/src/yuzu/hotkeys.cpp b/src/yuzu/hotkeys.cpp index d96497c4e..6ed9611c7 100644 --- a/src/yuzu/hotkeys.cpp +++ b/src/yuzu/hotkeys.cpp @@ -190,6 +190,9 @@ void ControllerShortcut::ControllerUpdateEvent(Core::HID::ControllerTriggerType if (type != Core::HID::ControllerTriggerType::Button) { return; } + if (!Settings::values.controller_navigation) { + return; + } if (button_sequence.npad.raw == Core::HID::NpadButton::None && button_sequence.capture.raw == 0 && button_sequence.home.raw == 0) { return; diff --git a/src/yuzu/util/controller_navigation.cpp b/src/yuzu/util/controller_navigation.cpp index 86fb28b9f..c2b13123d 100644 --- a/src/yuzu/util/controller_navigation.cpp +++ b/src/yuzu/util/controller_navigation.cpp @@ -40,6 +40,9 @@ void ControllerNavigation::TriggerButton(Settings::NativeButton::Values native_b void ControllerNavigation::ControllerUpdateEvent(Core::HID::ControllerTriggerType type) { std::lock_guard lock{mutex}; + if (!Settings::values.controller_navigation) { + return; + } if (type == Core::HID::ControllerTriggerType::Button) { ControllerUpdateButton(); return; |