diff options
Diffstat (limited to '')
83 files changed, 3415 insertions, 1586 deletions
diff --git a/src/common/bit_util.h b/src/common/bit_util.h index f50d3308a..f37538e06 100644 --- a/src/common/bit_util.h +++ b/src/common/bit_util.h @@ -57,4 +57,11 @@ requires std::is_integral_v<T> return static_cast<T>(1ULL << ((8U * sizeof(T)) - std::countl_zero(value - 1U))); } +template <size_t bit_index, typename T> +requires std::is_integral_v<T> +[[nodiscard]] constexpr bool Bit(const T value) { + static_assert(bit_index < BitSize<T>(), "bit_index must be smaller than size of T"); + return ((value >> bit_index) & T(1)) == T(1); +} + } // namespace Common diff --git a/src/common/host_memory.cpp b/src/common/host_memory.cpp index 28949fe5e..c465cfc14 100644 --- a/src/common/host_memory.cpp +++ b/src/common/host_memory.cpp @@ -327,8 +327,8 @@ private: bool IsNiechePlaceholder(size_t virtual_offset, size_t length) const { const auto it = placeholders.upper_bound({virtual_offset, virtual_offset + length}); if (it != placeholders.end() && it->lower() == virtual_offset + length) { - const bool is_root = it == placeholders.begin() && virtual_offset == 0; - return is_root || std::prev(it)->upper() == virtual_offset; + return it == placeholders.begin() ? virtual_offset == 0 + : std::prev(it)->upper() == virtual_offset; } return false; } diff --git a/src/common/intrusive_red_black_tree.h b/src/common/intrusive_red_black_tree.h index 3173cc449..b296b639e 100644 --- a/src/common/intrusive_red_black_tree.h +++ b/src/common/intrusive_red_black_tree.h @@ -4,6 +4,8 @@ #pragma once +#include "common/alignment.h" +#include "common/common_funcs.h" #include "common/parent_of_member.h" #include "common/tree.h" @@ -15,32 +17,33 @@ class IntrusiveRedBlackTreeImpl; } +#pragma pack(push, 4) struct IntrusiveRedBlackTreeNode { + YUZU_NON_COPYABLE(IntrusiveRedBlackTreeNode); + public: - using EntryType = RBEntry<IntrusiveRedBlackTreeNode>; + using RBEntry = freebsd::RBEntry<IntrusiveRedBlackTreeNode>; - constexpr IntrusiveRedBlackTreeNode() = default; +private: + RBEntry m_entry; - void SetEntry(const EntryType& new_entry) { - entry = new_entry; - } +public: + explicit IntrusiveRedBlackTreeNode() = default; - [[nodiscard]] EntryType& GetEntry() { - return entry; + [[nodiscard]] constexpr RBEntry& GetRBEntry() { + return m_entry; } - - [[nodiscard]] const EntryType& GetEntry() const { - return entry; + [[nodiscard]] constexpr const RBEntry& GetRBEntry() const { + return m_entry; } -private: - EntryType entry{}; - - friend class impl::IntrusiveRedBlackTreeImpl; - - template <class, class, class> - friend class IntrusiveRedBlackTree; + constexpr void SetRBEntry(const RBEntry& entry) { + m_entry = entry; + } }; +static_assert(sizeof(IntrusiveRedBlackTreeNode) == + 3 * sizeof(void*) + std::max<size_t>(sizeof(freebsd::RBColor), 4)); +#pragma pack(pop) template <class T, class Traits, class Comparator> class IntrusiveRedBlackTree; @@ -48,12 +51,17 @@ class IntrusiveRedBlackTree; namespace impl { class IntrusiveRedBlackTreeImpl { + YUZU_NON_COPYABLE(IntrusiveRedBlackTreeImpl); + private: template <class, class, class> friend class ::Common::IntrusiveRedBlackTree; - using RootType = RBHead<IntrusiveRedBlackTreeNode>; - RootType root; +private: + using RootType = freebsd::RBHead<IntrusiveRedBlackTreeNode>; + +private: + RootType m_root; public: template <bool Const> @@ -81,149 +89,150 @@ public: IntrusiveRedBlackTreeImpl::reference>; private: - pointer node; + pointer m_node; public: - explicit Iterator(pointer n) : node(n) {} + constexpr explicit Iterator(pointer n) : m_node(n) {} - bool operator==(const Iterator& rhs) const { - return this->node == rhs.node; + constexpr bool operator==(const Iterator& rhs) const { + return m_node == rhs.m_node; } - bool operator!=(const Iterator& rhs) const { + constexpr bool operator!=(const Iterator& rhs) const { return !(*this == rhs); } - pointer operator->() const { - return this->node; + constexpr pointer operator->() const { + return m_node; } - reference operator*() const { - return *this->node; + constexpr reference operator*() const { + return *m_node; } - Iterator& operator++() { - this->node = GetNext(this->node); + constexpr Iterator& operator++() { + m_node = GetNext(m_node); return *this; } - Iterator& operator--() { - this->node = GetPrev(this->node); + constexpr Iterator& operator--() { + m_node = GetPrev(m_node); return *this; } - Iterator operator++(int) { + constexpr Iterator operator++(int) { const Iterator it{*this}; ++(*this); return it; } - Iterator operator--(int) { + constexpr Iterator operator--(int) { const Iterator it{*this}; --(*this); return it; } - operator Iterator<true>() const { - return Iterator<true>(this->node); + constexpr operator Iterator<true>() const { + return Iterator<true>(m_node); } }; private: - // Define accessors using RB_* functions. - bool EmptyImpl() const { - return root.IsEmpty(); + constexpr bool EmptyImpl() const { + return m_root.IsEmpty(); } - IntrusiveRedBlackTreeNode* GetMinImpl() const { - return RB_MIN(const_cast<RootType*>(&root)); + constexpr IntrusiveRedBlackTreeNode* GetMinImpl() const { + return freebsd::RB_MIN(const_cast<RootType&>(m_root)); } - IntrusiveRedBlackTreeNode* GetMaxImpl() const { - return RB_MAX(const_cast<RootType*>(&root)); + constexpr IntrusiveRedBlackTreeNode* GetMaxImpl() const { + return freebsd::RB_MAX(const_cast<RootType&>(m_root)); } - IntrusiveRedBlackTreeNode* RemoveImpl(IntrusiveRedBlackTreeNode* node) { - return RB_REMOVE(&root, node); + constexpr IntrusiveRedBlackTreeNode* RemoveImpl(IntrusiveRedBlackTreeNode* node) { + return freebsd::RB_REMOVE(m_root, node); } public: - static IntrusiveRedBlackTreeNode* GetNext(IntrusiveRedBlackTreeNode* node) { - return RB_NEXT(node); + static constexpr IntrusiveRedBlackTreeNode* GetNext(IntrusiveRedBlackTreeNode* node) { + return freebsd::RB_NEXT(node); } - static IntrusiveRedBlackTreeNode* GetPrev(IntrusiveRedBlackTreeNode* node) { - return RB_PREV(node); + static constexpr IntrusiveRedBlackTreeNode* GetPrev(IntrusiveRedBlackTreeNode* node) { + return freebsd::RB_PREV(node); } - static const IntrusiveRedBlackTreeNode* GetNext(const IntrusiveRedBlackTreeNode* node) { + static constexpr IntrusiveRedBlackTreeNode const* GetNext( + IntrusiveRedBlackTreeNode const* node) { return static_cast<const IntrusiveRedBlackTreeNode*>( GetNext(const_cast<IntrusiveRedBlackTreeNode*>(node))); } - static const IntrusiveRedBlackTreeNode* GetPrev(const IntrusiveRedBlackTreeNode* node) { + static constexpr IntrusiveRedBlackTreeNode const* GetPrev( + IntrusiveRedBlackTreeNode const* node) { return static_cast<const IntrusiveRedBlackTreeNode*>( GetPrev(const_cast<IntrusiveRedBlackTreeNode*>(node))); } public: - constexpr IntrusiveRedBlackTreeImpl() {} + constexpr IntrusiveRedBlackTreeImpl() = default; // Iterator accessors. - iterator begin() { + constexpr iterator begin() { return iterator(this->GetMinImpl()); } - const_iterator begin() const { + constexpr const_iterator begin() const { return const_iterator(this->GetMinImpl()); } - iterator end() { + constexpr iterator end() { return iterator(static_cast<IntrusiveRedBlackTreeNode*>(nullptr)); } - const_iterator end() const { + constexpr const_iterator end() const { return const_iterator(static_cast<const IntrusiveRedBlackTreeNode*>(nullptr)); } - const_iterator cbegin() const { + constexpr const_iterator cbegin() const { return this->begin(); } - const_iterator cend() const { + constexpr const_iterator cend() const { return this->end(); } - iterator iterator_to(reference ref) { - return iterator(&ref); + constexpr iterator iterator_to(reference ref) { + return iterator(std::addressof(ref)); } - const_iterator iterator_to(const_reference ref) const { - return const_iterator(&ref); + constexpr const_iterator iterator_to(const_reference ref) const { + return const_iterator(std::addressof(ref)); } // Content management. - bool empty() const { + constexpr bool empty() const { return this->EmptyImpl(); } - reference back() { + constexpr reference back() { return *this->GetMaxImpl(); } - const_reference back() const { + constexpr const_reference back() const { return *this->GetMaxImpl(); } - reference front() { + constexpr reference front() { return *this->GetMinImpl(); } - const_reference front() const { + constexpr const_reference front() const { return *this->GetMinImpl(); } - iterator erase(iterator it) { + constexpr iterator erase(iterator it) { auto cur = std::addressof(*it); auto next = GetNext(cur); this->RemoveImpl(cur); @@ -234,16 +243,16 @@ public: } // namespace impl template <typename T> -concept HasLightCompareType = requires { - { std::is_same<typename T::LightCompareType, void>::value } -> std::convertible_to<bool>; +concept HasRedBlackKeyType = requires { + { std::is_same<typename T::RedBlackKeyType, void>::value } -> std::convertible_to<bool>; }; namespace impl { template <typename T, typename Default> - consteval auto* GetLightCompareType() { - if constexpr (HasLightCompareType<T>) { - return static_cast<typename T::LightCompareType*>(nullptr); + consteval auto* GetRedBlackKeyType() { + if constexpr (HasRedBlackKeyType<T>) { + return static_cast<typename T::RedBlackKeyType*>(nullptr); } else { return static_cast<Default*>(nullptr); } @@ -252,16 +261,17 @@ namespace impl { } // namespace impl template <typename T, typename Default> -using LightCompareType = std::remove_pointer_t<decltype(impl::GetLightCompareType<T, Default>())>; +using RedBlackKeyType = std::remove_pointer_t<decltype(impl::GetRedBlackKeyType<T, Default>())>; template <class T, class Traits, class Comparator> class IntrusiveRedBlackTree { + YUZU_NON_COPYABLE(IntrusiveRedBlackTree); public: using ImplType = impl::IntrusiveRedBlackTreeImpl; private: - ImplType impl{}; + ImplType m_impl; public: template <bool Const> @@ -277,9 +287,9 @@ public: using iterator = Iterator<false>; using const_iterator = Iterator<true>; - using light_value_type = LightCompareType<Comparator, value_type>; - using const_light_pointer = const light_value_type*; - using const_light_reference = const light_value_type&; + using key_type = RedBlackKeyType<Comparator, value_type>; + using const_key_pointer = const key_type*; + using const_key_reference = const key_type&; template <bool Const> class Iterator { @@ -298,183 +308,201 @@ public: IntrusiveRedBlackTree::reference>; private: - ImplIterator iterator; + ImplIterator m_impl; private: - explicit Iterator(ImplIterator it) : iterator(it) {} + constexpr explicit Iterator(ImplIterator it) : m_impl(it) {} - explicit Iterator(typename std::conditional<Const, ImplType::const_iterator, - ImplType::iterator>::type::pointer ptr) - : iterator(ptr) {} + constexpr explicit Iterator(typename ImplIterator::pointer p) : m_impl(p) {} - ImplIterator GetImplIterator() const { - return this->iterator; + constexpr ImplIterator GetImplIterator() const { + return m_impl; } public: - bool operator==(const Iterator& rhs) const { - return this->iterator == rhs.iterator; + constexpr bool operator==(const Iterator& rhs) const { + return m_impl == rhs.m_impl; } - bool operator!=(const Iterator& rhs) const { + constexpr bool operator!=(const Iterator& rhs) const { return !(*this == rhs); } - pointer operator->() const { - return Traits::GetParent(std::addressof(*this->iterator)); + constexpr pointer operator->() const { + return Traits::GetParent(std::addressof(*m_impl)); } - reference operator*() const { - return *Traits::GetParent(std::addressof(*this->iterator)); + constexpr reference operator*() const { + return *Traits::GetParent(std::addressof(*m_impl)); } - Iterator& operator++() { - ++this->iterator; + constexpr Iterator& operator++() { + ++m_impl; return *this; } - Iterator& operator--() { - --this->iterator; + constexpr Iterator& operator--() { + --m_impl; return *this; } - Iterator operator++(int) { + constexpr Iterator operator++(int) { const Iterator it{*this}; - ++this->iterator; + ++m_impl; return it; } - Iterator operator--(int) { + constexpr Iterator operator--(int) { const Iterator it{*this}; - --this->iterator; + --m_impl; return it; } - operator Iterator<true>() const { - return Iterator<true>(this->iterator); + constexpr operator Iterator<true>() const { + return Iterator<true>(m_impl); } }; private: - static int CompareImpl(const IntrusiveRedBlackTreeNode* lhs, - const IntrusiveRedBlackTreeNode* rhs) { + static constexpr int CompareImpl(const IntrusiveRedBlackTreeNode* lhs, + const IntrusiveRedBlackTreeNode* rhs) { return Comparator::Compare(*Traits::GetParent(lhs), *Traits::GetParent(rhs)); } - static int LightCompareImpl(const void* elm, const IntrusiveRedBlackTreeNode* rhs) { - return Comparator::Compare(*static_cast<const_light_pointer>(elm), *Traits::GetParent(rhs)); + static constexpr int CompareKeyImpl(const_key_reference key, + const IntrusiveRedBlackTreeNode* rhs) { + return Comparator::Compare(key, *Traits::GetParent(rhs)); } // Define accessors using RB_* functions. - IntrusiveRedBlackTreeNode* InsertImpl(IntrusiveRedBlackTreeNode* node) { - return RB_INSERT(&impl.root, node, CompareImpl); + constexpr IntrusiveRedBlackTreeNode* InsertImpl(IntrusiveRedBlackTreeNode* node) { + return freebsd::RB_INSERT(m_impl.m_root, node, CompareImpl); } - IntrusiveRedBlackTreeNode* FindImpl(const IntrusiveRedBlackTreeNode* node) const { - return RB_FIND(const_cast<ImplType::RootType*>(&impl.root), - const_cast<IntrusiveRedBlackTreeNode*>(node), CompareImpl); + constexpr IntrusiveRedBlackTreeNode* FindImpl(IntrusiveRedBlackTreeNode const* node) const { + return freebsd::RB_FIND(const_cast<ImplType::RootType&>(m_impl.m_root), + const_cast<IntrusiveRedBlackTreeNode*>(node), CompareImpl); } - IntrusiveRedBlackTreeNode* NFindImpl(const IntrusiveRedBlackTreeNode* node) const { - return RB_NFIND(const_cast<ImplType::RootType*>(&impl.root), - const_cast<IntrusiveRedBlackTreeNode*>(node), CompareImpl); + constexpr IntrusiveRedBlackTreeNode* NFindImpl(IntrusiveRedBlackTreeNode const* node) const { + return freebsd::RB_NFIND(const_cast<ImplType::RootType&>(m_impl.m_root), + const_cast<IntrusiveRedBlackTreeNode*>(node), CompareImpl); } - IntrusiveRedBlackTreeNode* FindLightImpl(const_light_pointer lelm) const { - return RB_FIND_LIGHT(const_cast<ImplType::RootType*>(&impl.root), - static_cast<const void*>(lelm), LightCompareImpl); + constexpr IntrusiveRedBlackTreeNode* FindKeyImpl(const_key_reference key) const { + return freebsd::RB_FIND_KEY(const_cast<ImplType::RootType&>(m_impl.m_root), key, + CompareKeyImpl); } - IntrusiveRedBlackTreeNode* NFindLightImpl(const_light_pointer lelm) const { - return RB_NFIND_LIGHT(const_cast<ImplType::RootType*>(&impl.root), - static_cast<const void*>(lelm), LightCompareImpl); + constexpr IntrusiveRedBlackTreeNode* NFindKeyImpl(const_key_reference key) const { + return freebsd::RB_NFIND_KEY(const_cast<ImplType::RootType&>(m_impl.m_root), key, + CompareKeyImpl); + } + + constexpr IntrusiveRedBlackTreeNode* FindExistingImpl( + IntrusiveRedBlackTreeNode const* node) const { + return freebsd::RB_FIND_EXISTING(const_cast<ImplType::RootType&>(m_impl.m_root), + const_cast<IntrusiveRedBlackTreeNode*>(node), CompareImpl); + } + + constexpr IntrusiveRedBlackTreeNode* FindExistingKeyImpl(const_key_reference key) const { + return freebsd::RB_FIND_EXISTING_KEY(const_cast<ImplType::RootType&>(m_impl.m_root), key, + CompareKeyImpl); } public: constexpr IntrusiveRedBlackTree() = default; // Iterator accessors. - iterator begin() { - return iterator(this->impl.begin()); + constexpr iterator begin() { + return iterator(m_impl.begin()); } - const_iterator begin() const { - return const_iterator(this->impl.begin()); + constexpr const_iterator begin() const { + return const_iterator(m_impl.begin()); } - iterator end() { - return iterator(this->impl.end()); + constexpr iterator end() { + return iterator(m_impl.end()); } - const_iterator end() const { - return const_iterator(this->impl.end()); + constexpr const_iterator end() const { + return const_iterator(m_impl.end()); } - const_iterator cbegin() const { + constexpr const_iterator cbegin() const { return this->begin(); } - const_iterator cend() const { + constexpr const_iterator cend() const { return this->end(); } - iterator iterator_to(reference ref) { - return iterator(this->impl.iterator_to(*Traits::GetNode(std::addressof(ref)))); + constexpr iterator iterator_to(reference ref) { + return iterator(m_impl.iterator_to(*Traits::GetNode(std::addressof(ref)))); } - const_iterator iterator_to(const_reference ref) const { - return const_iterator(this->impl.iterator_to(*Traits::GetNode(std::addressof(ref)))); + constexpr const_iterator iterator_to(const_reference ref) const { + return const_iterator(m_impl.iterator_to(*Traits::GetNode(std::addressof(ref)))); } // Content management. - bool empty() const { - return this->impl.empty(); + constexpr bool empty() const { + return m_impl.empty(); } - reference back() { - return *Traits::GetParent(std::addressof(this->impl.back())); + constexpr reference back() { + return *Traits::GetParent(std::addressof(m_impl.back())); } - const_reference back() const { - return *Traits::GetParent(std::addressof(this->impl.back())); + constexpr const_reference back() const { + return *Traits::GetParent(std::addressof(m_impl.back())); } - reference front() { - return *Traits::GetParent(std::addressof(this->impl.front())); + constexpr reference front() { + return *Traits::GetParent(std::addressof(m_impl.front())); } - const_reference front() const { - return *Traits::GetParent(std::addressof(this->impl.front())); + constexpr const_reference front() const { + return *Traits::GetParent(std::addressof(m_impl.front())); } - iterator erase(iterator it) { - return iterator(this->impl.erase(it.GetImplIterator())); + constexpr iterator erase(iterator it) { + return iterator(m_impl.erase(it.GetImplIterator())); } - iterator insert(reference ref) { + constexpr iterator insert(reference ref) { ImplType::pointer node = Traits::GetNode(std::addressof(ref)); this->InsertImpl(node); return iterator(node); } - iterator find(const_reference ref) const { + constexpr iterator find(const_reference ref) const { return iterator(this->FindImpl(Traits::GetNode(std::addressof(ref)))); } - iterator nfind(const_reference ref) const { + constexpr iterator nfind(const_reference ref) const { return iterator(this->NFindImpl(Traits::GetNode(std::addressof(ref)))); } - iterator find_light(const_light_reference ref) const { - return iterator(this->FindLightImpl(std::addressof(ref))); + constexpr iterator find_key(const_key_reference ref) const { + return iterator(this->FindKeyImpl(ref)); + } + + constexpr iterator nfind_key(const_key_reference ref) const { + return iterator(this->NFindKeyImpl(ref)); + } + + constexpr iterator find_existing(const_reference ref) const { + return iterator(this->FindExistingImpl(Traits::GetNode(std::addressof(ref)))); } - iterator nfind_light(const_light_reference ref) const { - return iterator(this->NFindLightImpl(std::addressof(ref))); + constexpr iterator find_existing_key(const_key_reference ref) const { + return iterator(this->FindExistingKeyImpl(ref)); } }; -template <auto T, class Derived = impl::GetParentType<T>> +template <auto T, class Derived = Common::impl::GetParentType<T>> class IntrusiveRedBlackTreeMemberTraits; template <class Parent, IntrusiveRedBlackTreeNode Parent::*Member, class Derived> @@ -498,19 +526,16 @@ private: return std::addressof(parent->*Member); } - static constexpr Derived* GetParent(IntrusiveRedBlackTreeNode* node) { - return GetParentPointer<Member, Derived>(node); + static Derived* GetParent(IntrusiveRedBlackTreeNode* node) { + return Common::GetParentPointer<Member, Derived>(node); } - static constexpr Derived const* GetParent(const IntrusiveRedBlackTreeNode* node) { - return GetParentPointer<Member, Derived>(node); + static Derived const* GetParent(IntrusiveRedBlackTreeNode const* node) { + return Common::GetParentPointer<Member, Derived>(node); } - -private: - static constexpr TypedStorage<Derived> DerivedStorage = {}; }; -template <auto T, class Derived = impl::GetParentType<T>> +template <auto T, class Derived = Common::impl::GetParentType<T>> class IntrusiveRedBlackTreeMemberTraitsDeferredAssert; template <class Parent, IntrusiveRedBlackTreeNode Parent::*Member, class Derived> @@ -521,11 +546,6 @@ public: IntrusiveRedBlackTree<Derived, IntrusiveRedBlackTreeMemberTraitsDeferredAssert, Comparator>; using TreeTypeImpl = impl::IntrusiveRedBlackTreeImpl; - static constexpr bool IsValid() { - TypedStorage<Derived> DerivedStorage = {}; - return GetParent(GetNode(GetPointer(DerivedStorage))) == GetPointer(DerivedStorage); - } - private: template <class, class, class> friend class IntrusiveRedBlackTree; @@ -540,30 +560,36 @@ private: return std::addressof(parent->*Member); } - static constexpr Derived* GetParent(IntrusiveRedBlackTreeNode* node) { - return GetParentPointer<Member, Derived>(node); + static Derived* GetParent(IntrusiveRedBlackTreeNode* node) { + return Common::GetParentPointer<Member, Derived>(node); } - static constexpr Derived const* GetParent(const IntrusiveRedBlackTreeNode* node) { - return GetParentPointer<Member, Derived>(node); + static Derived const* GetParent(IntrusiveRedBlackTreeNode const* node) { + return Common::GetParentPointer<Member, Derived>(node); } }; template <class Derived> -class IntrusiveRedBlackTreeBaseNode : public IntrusiveRedBlackTreeNode { +class alignas(void*) IntrusiveRedBlackTreeBaseNode : public IntrusiveRedBlackTreeNode { public: + using IntrusiveRedBlackTreeNode::IntrusiveRedBlackTreeNode; + constexpr Derived* GetPrev() { - return static_cast<Derived*>(impl::IntrusiveRedBlackTreeImpl::GetPrev(this)); + return static_cast<Derived*>(static_cast<IntrusiveRedBlackTreeBaseNode*>( + impl::IntrusiveRedBlackTreeImpl::GetPrev(this))); } constexpr const Derived* GetPrev() const { - return static_cast<const Derived*>(impl::IntrusiveRedBlackTreeImpl::GetPrev(this)); + return static_cast<const Derived*>(static_cast<const IntrusiveRedBlackTreeBaseNode*>( + impl::IntrusiveRedBlackTreeImpl::GetPrev(this))); } constexpr Derived* GetNext() { - return static_cast<Derived*>(impl::IntrusiveRedBlackTreeImpl::GetNext(this)); + return static_cast<Derived*>(static_cast<IntrusiveRedBlackTreeBaseNode*>( + impl::IntrusiveRedBlackTreeImpl::GetNext(this))); } constexpr const Derived* GetNext() const { - return static_cast<const Derived*>(impl::IntrusiveRedBlackTreeImpl::GetNext(this)); + return static_cast<const Derived*>(static_cast<const IntrusiveRedBlackTreeBaseNode*>( + impl::IntrusiveRedBlackTreeImpl::GetNext(this))); } }; @@ -581,19 +607,22 @@ private: friend class impl::IntrusiveRedBlackTreeImpl; static constexpr IntrusiveRedBlackTreeNode* GetNode(Derived* parent) { - return static_cast<IntrusiveRedBlackTreeNode*>(parent); + return static_cast<IntrusiveRedBlackTreeNode*>( + static_cast<IntrusiveRedBlackTreeBaseNode<Derived>*>(parent)); } static constexpr IntrusiveRedBlackTreeNode const* GetNode(Derived const* parent) { - return static_cast<const IntrusiveRedBlackTreeNode*>(parent); + return static_cast<const IntrusiveRedBlackTreeNode*>( + static_cast<const IntrusiveRedBlackTreeBaseNode<Derived>*>(parent)); } static constexpr Derived* GetParent(IntrusiveRedBlackTreeNode* node) { - return static_cast<Derived*>(node); + return static_cast<Derived*>(static_cast<IntrusiveRedBlackTreeBaseNode<Derived>*>(node)); } - static constexpr Derived const* GetParent(const IntrusiveRedBlackTreeNode* node) { - return static_cast<const Derived*>(node); + static constexpr Derived const* GetParent(IntrusiveRedBlackTreeNode const* node) { + return static_cast<const Derived*>( + static_cast<const IntrusiveRedBlackTreeBaseNode<Derived>*>(node)); } }; diff --git a/src/common/logging/backend.cpp b/src/common/logging/backend.cpp index c51c05b28..4a2462ec4 100644 --- a/src/common/logging/backend.cpp +++ b/src/common/logging/backend.cpp @@ -218,19 +218,17 @@ private: Impl(const std::filesystem::path& file_backend_filename, const Filter& filter_) : filter{filter_}, file_backend{file_backend_filename} {} - ~Impl() { - StopBackendThread(); - } + ~Impl() = default; void StartBackendThread() { - backend_thread = std::thread([this] { + backend_thread = std::jthread([this](std::stop_token stop_token) { Common::SetCurrentThreadName("yuzu:Log"); Entry entry; const auto write_logs = [this, &entry]() { ForEachBackend([&entry](Backend& backend) { backend.Write(entry); }); }; - while (!stop.stop_requested()) { - entry = message_queue.PopWait(stop.get_token()); + while (!stop_token.stop_requested()) { + entry = message_queue.PopWait(stop_token); if (entry.filename != nullptr) { write_logs(); } @@ -244,11 +242,6 @@ private: }); } - void StopBackendThread() { - stop.request_stop(); - backend_thread.join(); - } - Entry CreateEntry(Class log_class, Level log_level, const char* filename, unsigned int line_nr, const char* function, std::string&& message) const { using std::chrono::duration_cast; @@ -283,10 +276,9 @@ private: ColorConsoleBackend color_console_backend{}; FileBackend file_backend; - std::stop_source stop; - std::thread backend_thread; MPSCQueue<Entry, true> message_queue{}; std::chrono::steady_clock::time_point time_origin{std::chrono::steady_clock::now()}; + std::jthread backend_thread; }; } // namespace diff --git a/src/common/settings.cpp b/src/common/settings.cpp index 2810cec15..877e0faa4 100644 --- a/src/common/settings.cpp +++ b/src/common/settings.cpp @@ -176,6 +176,7 @@ void RestoreGlobalState(bool is_powered_on) { values.cpuopt_unsafe_ignore_standard_fpcr.SetGlobal(true); values.cpuopt_unsafe_inaccurate_nan.SetGlobal(true); values.cpuopt_unsafe_fastmem_check.SetGlobal(true); + values.cpuopt_unsafe_ignore_global_monitor.SetGlobal(true); // Renderer values.renderer_backend.SetGlobal(true); diff --git a/src/common/settings.h b/src/common/settings.h index d06b23a14..a37d83fb3 100644 --- a/src/common/settings.h +++ b/src/common/settings.h @@ -484,12 +484,15 @@ struct Values { BasicSetting<bool> cpuopt_misc_ir{true, "cpuopt_misc_ir"}; BasicSetting<bool> cpuopt_reduce_misalign_checks{true, "cpuopt_reduce_misalign_checks"}; BasicSetting<bool> cpuopt_fastmem{true, "cpuopt_fastmem"}; + BasicSetting<bool> cpuopt_fastmem_exclusives{true, "cpuopt_fastmem_exclusives"}; + BasicSetting<bool> cpuopt_recompile_exclusives{true, "cpuopt_recompile_exclusives"}; Setting<bool> cpuopt_unsafe_unfuse_fma{true, "cpuopt_unsafe_unfuse_fma"}; Setting<bool> cpuopt_unsafe_reduce_fp_error{true, "cpuopt_unsafe_reduce_fp_error"}; Setting<bool> cpuopt_unsafe_ignore_standard_fpcr{true, "cpuopt_unsafe_ignore_standard_fpcr"}; Setting<bool> cpuopt_unsafe_inaccurate_nan{true, "cpuopt_unsafe_inaccurate_nan"}; Setting<bool> cpuopt_unsafe_fastmem_check{true, "cpuopt_unsafe_fastmem_check"}; + Setting<bool> cpuopt_unsafe_ignore_global_monitor{true, "cpuopt_unsafe_ignore_global_monitor"}; // Renderer RangedSetting<RendererBackend> renderer_backend{ diff --git a/src/common/telemetry.cpp b/src/common/telemetry.cpp index 6241d08b3..98c82cd17 100644 --- a/src/common/telemetry.cpp +++ b/src/common/telemetry.cpp @@ -55,22 +55,50 @@ void AppendBuildInfo(FieldCollection& fc) { void AppendCPUInfo(FieldCollection& fc) { #ifdef ARCHITECTURE_x86_64 - fc.AddField(FieldType::UserSystem, "CPU_Model", Common::GetCPUCaps().cpu_string); - fc.AddField(FieldType::UserSystem, "CPU_BrandString", Common::GetCPUCaps().brand_string); - fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_AES", Common::GetCPUCaps().aes); - fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_AVX", Common::GetCPUCaps().avx); - fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_AVX2", Common::GetCPUCaps().avx2); - fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_AVX512", Common::GetCPUCaps().avx512); - fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_BMI1", Common::GetCPUCaps().bmi1); - fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_BMI2", Common::GetCPUCaps().bmi2); - fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_FMA", Common::GetCPUCaps().fma); - fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_FMA4", Common::GetCPUCaps().fma4); - fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_SSE", Common::GetCPUCaps().sse); - fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_SSE2", Common::GetCPUCaps().sse2); - fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_SSE3", Common::GetCPUCaps().sse3); - fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_SSSE3", Common::GetCPUCaps().ssse3); - fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_SSE41", Common::GetCPUCaps().sse4_1); - fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_SSE42", Common::GetCPUCaps().sse4_2); + + const auto& caps = Common::GetCPUCaps(); + const auto add_field = [&fc](std::string_view field_name, const auto& field_value) { + fc.AddField(FieldType::UserSystem, field_name, field_value); + }; + add_field("CPU_Model", caps.cpu_string); + add_field("CPU_BrandString", caps.brand_string); + + add_field("CPU_Extension_x64_SSE", caps.sse); + add_field("CPU_Extension_x64_SSE2", caps.sse2); + add_field("CPU_Extension_x64_SSE3", caps.sse3); + add_field("CPU_Extension_x64_SSSE3", caps.ssse3); + add_field("CPU_Extension_x64_SSE41", caps.sse4_1); + add_field("CPU_Extension_x64_SSE42", caps.sse4_2); + + add_field("CPU_Extension_x64_AVX", caps.avx); + add_field("CPU_Extension_x64_AVX_VNNI", caps.avx_vnni); + add_field("CPU_Extension_x64_AVX2", caps.avx2); + + // Skylake-X/SP level AVX512, for compatibility with the previous telemetry field + add_field("CPU_Extension_x64_AVX512", + caps.avx512f && caps.avx512cd && caps.avx512vl && caps.avx512dq && caps.avx512bw); + + add_field("CPU_Extension_x64_AVX512F", caps.avx512f); + add_field("CPU_Extension_x64_AVX512CD", caps.avx512cd); + add_field("CPU_Extension_x64_AVX512VL", caps.avx512vl); + add_field("CPU_Extension_x64_AVX512DQ", caps.avx512dq); + add_field("CPU_Extension_x64_AVX512BW", caps.avx512bw); + add_field("CPU_Extension_x64_AVX512BITALG", caps.avx512bitalg); + add_field("CPU_Extension_x64_AVX512VBMI", caps.avx512vbmi); + + add_field("CPU_Extension_x64_AES", caps.aes); + add_field("CPU_Extension_x64_BMI1", caps.bmi1); + add_field("CPU_Extension_x64_BMI2", caps.bmi2); + add_field("CPU_Extension_x64_F16C", caps.f16c); + add_field("CPU_Extension_x64_FMA", caps.fma); + add_field("CPU_Extension_x64_FMA4", caps.fma4); + add_field("CPU_Extension_x64_GFNI", caps.gfni); + add_field("CPU_Extension_x64_INVARIANT_TSC", caps.invariant_tsc); + add_field("CPU_Extension_x64_LZCNT", caps.lzcnt); + add_field("CPU_Extension_x64_MOVBE", caps.movbe); + add_field("CPU_Extension_x64_PCLMULQDQ", caps.pclmulqdq); + add_field("CPU_Extension_x64_POPCNT", caps.popcnt); + add_field("CPU_Extension_x64_SHA", caps.sha); #else fc.AddField(FieldType::UserSystem, "CPU_Model", "Other"); #endif diff --git a/src/common/telemetry.h b/src/common/telemetry.h index 4d632f7eb..3524c857e 100644 --- a/src/common/telemetry.h +++ b/src/common/telemetry.h @@ -8,6 +8,7 @@ #include <map> #include <memory> #include <string> +#include <string_view> #include "common/common_funcs.h" #include "common/common_types.h" @@ -55,8 +56,8 @@ class Field : public FieldInterface { public: YUZU_NON_COPYABLE(Field); - Field(FieldType type_, std::string name_, T value_) - : name(std::move(name_)), type(type_), value(std::move(value_)) {} + Field(FieldType type_, std::string_view name_, T value_) + : name(name_), type(type_), value(std::move(value_)) {} ~Field() override = default; @@ -123,7 +124,7 @@ public: * @param value Value for the field to add. */ template <typename T> - void AddField(FieldType type, const char* name, T value) { + void AddField(FieldType type, std::string_view name, T value) { return AddField(std::make_unique<Field<T>>(type, name, std::move(value))); } diff --git a/src/common/tree.h b/src/common/tree.h index 18faa4a48..28370e343 100644 --- a/src/common/tree.h +++ b/src/common/tree.h @@ -43,294 +43,265 @@ * The maximum height of a red-black tree is 2lg (n+1). */ -#include "common/assert.h" +namespace Common::freebsd { -namespace Common { +enum class RBColor { + RB_BLACK = 0, + RB_RED = 1, +}; + +#pragma pack(push, 4) template <typename T> -class RBHead { +class RBEntry { public: - [[nodiscard]] T* Root() { - return rbh_root; - } + constexpr RBEntry() = default; - [[nodiscard]] const T* Root() const { - return rbh_root; + [[nodiscard]] constexpr T* Left() { + return m_rbe_left; } - - void SetRoot(T* root) { - rbh_root = root; + [[nodiscard]] constexpr const T* Left() const { + return m_rbe_left; } - [[nodiscard]] bool IsEmpty() const { - return Root() == nullptr; + constexpr void SetLeft(T* e) { + m_rbe_left = e; } -private: - T* rbh_root = nullptr; -}; - -enum class EntryColor { - Black, - Red, -}; - -template <typename T> -class RBEntry { -public: - [[nodiscard]] T* Left() { - return rbe_left; + [[nodiscard]] constexpr T* Right() { + return m_rbe_right; } - - [[nodiscard]] const T* Left() const { - return rbe_left; + [[nodiscard]] constexpr const T* Right() const { + return m_rbe_right; } - void SetLeft(T* left) { - rbe_left = left; + constexpr void SetRight(T* e) { + m_rbe_right = e; } - [[nodiscard]] T* Right() { - return rbe_right; + [[nodiscard]] constexpr T* Parent() { + return m_rbe_parent; } - - [[nodiscard]] const T* Right() const { - return rbe_right; + [[nodiscard]] constexpr const T* Parent() const { + return m_rbe_parent; } - void SetRight(T* right) { - rbe_right = right; + constexpr void SetParent(T* e) { + m_rbe_parent = e; } - [[nodiscard]] T* Parent() { - return rbe_parent; + [[nodiscard]] constexpr bool IsBlack() const { + return m_rbe_color == RBColor::RB_BLACK; } - - [[nodiscard]] const T* Parent() const { - return rbe_parent; + [[nodiscard]] constexpr bool IsRed() const { + return m_rbe_color == RBColor::RB_RED; } - - void SetParent(T* parent) { - rbe_parent = parent; + [[nodiscard]] constexpr RBColor Color() const { + return m_rbe_color; } - [[nodiscard]] bool IsBlack() const { - return rbe_color == EntryColor::Black; + constexpr void SetColor(RBColor c) { + m_rbe_color = c; } - [[nodiscard]] bool IsRed() const { - return rbe_color == EntryColor::Red; - } +private: + T* m_rbe_left{}; + T* m_rbe_right{}; + T* m_rbe_parent{}; + RBColor m_rbe_color{RBColor::RB_BLACK}; +}; +#pragma pack(pop) - [[nodiscard]] EntryColor Color() const { - return rbe_color; - } +template <typename T> +struct CheckRBEntry { + static constexpr bool value = false; +}; +template <typename T> +struct CheckRBEntry<RBEntry<T>> { + static constexpr bool value = true; +}; - void SetColor(EntryColor color) { - rbe_color = color; - } +template <typename T> +concept IsRBEntry = CheckRBEntry<T>::value; +template <typename T> +concept HasRBEntry = requires(T& t, const T& ct) { + { t.GetRBEntry() } -> std::same_as<RBEntry<T>&>; + { ct.GetRBEntry() } -> std::same_as<const RBEntry<T>&>; +}; + +template <typename T> +requires HasRBEntry<T> +class RBHead { private: - T* rbe_left = nullptr; - T* rbe_right = nullptr; - T* rbe_parent = nullptr; - EntryColor rbe_color{}; + T* m_rbh_root = nullptr; + +public: + [[nodiscard]] constexpr T* Root() { + return m_rbh_root; + } + [[nodiscard]] constexpr const T* Root() const { + return m_rbh_root; + } + constexpr void SetRoot(T* root) { + m_rbh_root = root; + } + + [[nodiscard]] constexpr bool IsEmpty() const { + return this->Root() == nullptr; + } }; -template <typename Node> -[[nodiscard]] RBEntry<Node>& RB_ENTRY(Node* node) { - return node->GetEntry(); +template <typename T> +requires HasRBEntry<T> +[[nodiscard]] constexpr RBEntry<T>& RB_ENTRY(T* t) { + return t->GetRBEntry(); } - -template <typename Node> -[[nodiscard]] const RBEntry<Node>& RB_ENTRY(const Node* node) { - return node->GetEntry(); +template <typename T> +requires HasRBEntry<T> +[[nodiscard]] constexpr const RBEntry<T>& RB_ENTRY(const T* t) { + return t->GetRBEntry(); } -template <typename Node> -[[nodiscard]] Node* RB_PARENT(Node* node) { - return RB_ENTRY(node).Parent(); +template <typename T> +requires HasRBEntry<T> +[[nodiscard]] constexpr T* RB_LEFT(T* t) { + return RB_ENTRY(t).Left(); } - -template <typename Node> -[[nodiscard]] const Node* RB_PARENT(const Node* node) { - return RB_ENTRY(node).Parent(); +template <typename T> +requires HasRBEntry<T> +[[nodiscard]] constexpr const T* RB_LEFT(const T* t) { + return RB_ENTRY(t).Left(); } -template <typename Node> -void RB_SET_PARENT(Node* node, Node* parent) { - return RB_ENTRY(node).SetParent(parent); +template <typename T> +requires HasRBEntry<T> +[[nodiscard]] constexpr T* RB_RIGHT(T* t) { + return RB_ENTRY(t).Right(); } - -template <typename Node> -[[nodiscard]] Node* RB_LEFT(Node* node) { - return RB_ENTRY(node).Left(); +template <typename T> +requires HasRBEntry<T> +[[nodiscard]] constexpr const T* RB_RIGHT(const T* t) { + return RB_ENTRY(t).Right(); } -template <typename Node> -[[nodiscard]] const Node* RB_LEFT(const Node* node) { - return RB_ENTRY(node).Left(); +template <typename T> +requires HasRBEntry<T> +[[nodiscard]] constexpr T* RB_PARENT(T* t) { + return RB_ENTRY(t).Parent(); } - -template <typename Node> -void RB_SET_LEFT(Node* node, Node* left) { - return RB_ENTRY(node).SetLeft(left); +template <typename T> +requires HasRBEntry<T> +[[nodiscard]] constexpr const T* RB_PARENT(const T* t) { + return RB_ENTRY(t).Parent(); } -template <typename Node> -[[nodiscard]] Node* RB_RIGHT(Node* node) { - return RB_ENTRY(node).Right(); +template <typename T> +requires HasRBEntry<T> +constexpr void RB_SET_LEFT(T* t, T* e) { + RB_ENTRY(t).SetLeft(e); } - -template <typename Node> -[[nodiscard]] const Node* RB_RIGHT(const Node* node) { - return RB_ENTRY(node).Right(); +template <typename T> +requires HasRBEntry<T> +constexpr void RB_SET_RIGHT(T* t, T* e) { + RB_ENTRY(t).SetRight(e); } - -template <typename Node> -void RB_SET_RIGHT(Node* node, Node* right) { - return RB_ENTRY(node).SetRight(right); +template <typename T> +requires HasRBEntry<T> +constexpr void RB_SET_PARENT(T* t, T* e) { + RB_ENTRY(t).SetParent(e); } -template <typename Node> -[[nodiscard]] bool RB_IS_BLACK(const Node* node) { - return RB_ENTRY(node).IsBlack(); +template <typename T> +requires HasRBEntry<T> +[[nodiscard]] constexpr bool RB_IS_BLACK(const T* t) { + return RB_ENTRY(t).IsBlack(); } - -template <typename Node> -[[nodiscard]] bool RB_IS_RED(const Node* node) { - return RB_ENTRY(node).IsRed(); +template <typename T> +requires HasRBEntry<T> +[[nodiscard]] constexpr bool RB_IS_RED(const T* t) { + return RB_ENTRY(t).IsRed(); } -template <typename Node> -[[nodiscard]] EntryColor RB_COLOR(const Node* node) { - return RB_ENTRY(node).Color(); +template <typename T> +requires HasRBEntry<T> +[[nodiscard]] constexpr RBColor RB_COLOR(const T* t) { + return RB_ENTRY(t).Color(); } -template <typename Node> -void RB_SET_COLOR(Node* node, EntryColor color) { - return RB_ENTRY(node).SetColor(color); +template <typename T> +requires HasRBEntry<T> +constexpr void RB_SET_COLOR(T* t, RBColor c) { + RB_ENTRY(t).SetColor(c); } -template <typename Node> -void RB_SET(Node* node, Node* parent) { - auto& entry = RB_ENTRY(node); - entry.SetParent(parent); - entry.SetLeft(nullptr); - entry.SetRight(nullptr); - entry.SetColor(EntryColor::Red); +template <typename T> +requires HasRBEntry<T> +constexpr void RB_SET(T* elm, T* parent) { + auto& rb_entry = RB_ENTRY(elm); + rb_entry.SetParent(parent); + rb_entry.SetLeft(nullptr); + rb_entry.SetRight(nullptr); + rb_entry.SetColor(RBColor::RB_RED); } -template <typename Node> -void RB_SET_BLACKRED(Node* black, Node* red) { - RB_SET_COLOR(black, EntryColor::Black); - RB_SET_COLOR(red, EntryColor::Red); +template <typename T> +requires HasRBEntry<T> +constexpr void RB_SET_BLACKRED(T* black, T* red) { + RB_SET_COLOR(black, RBColor::RB_BLACK); + RB_SET_COLOR(red, RBColor::RB_RED); } -template <typename Node> -void RB_ROTATE_LEFT(RBHead<Node>* head, Node* elm, Node*& tmp) { +template <typename T> +requires HasRBEntry<T> +constexpr void RB_ROTATE_LEFT(RBHead<T>& head, T* elm, T*& tmp) { tmp = RB_RIGHT(elm); - RB_SET_RIGHT(elm, RB_LEFT(tmp)); - if (RB_RIGHT(elm) != nullptr) { + if (RB_SET_RIGHT(elm, RB_LEFT(tmp)); RB_RIGHT(elm) != nullptr) { RB_SET_PARENT(RB_LEFT(tmp), elm); } - RB_SET_PARENT(tmp, RB_PARENT(elm)); - if (RB_PARENT(tmp) != nullptr) { + if (RB_SET_PARENT(tmp, RB_PARENT(elm)); RB_PARENT(tmp) != nullptr) { if (elm == RB_LEFT(RB_PARENT(elm))) { RB_SET_LEFT(RB_PARENT(elm), tmp); } else { RB_SET_RIGHT(RB_PARENT(elm), tmp); } } else { - head->SetRoot(tmp); + head.SetRoot(tmp); } RB_SET_LEFT(tmp, elm); RB_SET_PARENT(elm, tmp); } -template <typename Node> -void RB_ROTATE_RIGHT(RBHead<Node>* head, Node* elm, Node*& tmp) { +template <typename T> +requires HasRBEntry<T> +constexpr void RB_ROTATE_RIGHT(RBHead<T>& head, T* elm, T*& tmp) { tmp = RB_LEFT(elm); - RB_SET_LEFT(elm, RB_RIGHT(tmp)); - if (RB_LEFT(elm) != nullptr) { + if (RB_SET_LEFT(elm, RB_RIGHT(tmp)); RB_LEFT(elm) != nullptr) { RB_SET_PARENT(RB_RIGHT(tmp), elm); } - RB_SET_PARENT(tmp, RB_PARENT(elm)); - if (RB_PARENT(tmp) != nullptr) { + if (RB_SET_PARENT(tmp, RB_PARENT(elm)); RB_PARENT(tmp) != nullptr) { if (elm == RB_LEFT(RB_PARENT(elm))) { RB_SET_LEFT(RB_PARENT(elm), tmp); } else { RB_SET_RIGHT(RB_PARENT(elm), tmp); } } else { - head->SetRoot(tmp); + head.SetRoot(tmp); } RB_SET_RIGHT(tmp, elm); RB_SET_PARENT(elm, tmp); } -template <typename Node> -void RB_INSERT_COLOR(RBHead<Node>* head, Node* elm) { - Node* parent = nullptr; - Node* tmp = nullptr; - - while ((parent = RB_PARENT(elm)) != nullptr && RB_IS_RED(parent)) { - Node* gparent = RB_PARENT(parent); - if (parent == RB_LEFT(gparent)) { - tmp = RB_RIGHT(gparent); - if (tmp && RB_IS_RED(tmp)) { - RB_SET_COLOR(tmp, EntryColor::Black); - RB_SET_BLACKRED(parent, gparent); - elm = gparent; - continue; - } - - if (RB_RIGHT(parent) == elm) { - RB_ROTATE_LEFT(head, parent, tmp); - tmp = parent; - parent = elm; - elm = tmp; - } - - RB_SET_BLACKRED(parent, gparent); - RB_ROTATE_RIGHT(head, gparent, tmp); - } else { - tmp = RB_LEFT(gparent); - if (tmp && RB_IS_RED(tmp)) { - RB_SET_COLOR(tmp, EntryColor::Black); - RB_SET_BLACKRED(parent, gparent); - elm = gparent; - continue; - } - - if (RB_LEFT(parent) == elm) { - RB_ROTATE_RIGHT(head, parent, tmp); - tmp = parent; - parent = elm; - elm = tmp; - } - - RB_SET_BLACKRED(parent, gparent); - RB_ROTATE_LEFT(head, gparent, tmp); - } - } - - RB_SET_COLOR(head->Root(), EntryColor::Black); -} - -template <typename Node> -void RB_REMOVE_COLOR(RBHead<Node>* head, Node* parent, Node* elm) { - Node* tmp; - while ((elm == nullptr || RB_IS_BLACK(elm)) && elm != head->Root() && parent != nullptr) { +template <typename T> +requires HasRBEntry<T> +constexpr void RB_REMOVE_COLOR(RBHead<T>& head, T* parent, T* elm) { + T* tmp; + while ((elm == nullptr || RB_IS_BLACK(elm)) && elm != head.Root()) { if (RB_LEFT(parent) == elm) { tmp = RB_RIGHT(parent); - if (!tmp) { - ASSERT_MSG(false, "tmp is invalid!"); - break; - } if (RB_IS_RED(tmp)) { RB_SET_BLACKRED(tmp, parent); RB_ROTATE_LEFT(head, parent, tmp); @@ -339,29 +310,29 @@ void RB_REMOVE_COLOR(RBHead<Node>* head, Node* parent, Node* elm) { if ((RB_LEFT(tmp) == nullptr || RB_IS_BLACK(RB_LEFT(tmp))) && (RB_RIGHT(tmp) == nullptr || RB_IS_BLACK(RB_RIGHT(tmp)))) { - RB_SET_COLOR(tmp, EntryColor::Red); + RB_SET_COLOR(tmp, RBColor::RB_RED); elm = parent; parent = RB_PARENT(elm); } else { if (RB_RIGHT(tmp) == nullptr || RB_IS_BLACK(RB_RIGHT(tmp))) { - Node* oleft; + T* oleft; if ((oleft = RB_LEFT(tmp)) != nullptr) { - RB_SET_COLOR(oleft, EntryColor::Black); + RB_SET_COLOR(oleft, RBColor::RB_BLACK); } - RB_SET_COLOR(tmp, EntryColor::Red); + RB_SET_COLOR(tmp, RBColor::RB_RED); RB_ROTATE_RIGHT(head, tmp, oleft); tmp = RB_RIGHT(parent); } RB_SET_COLOR(tmp, RB_COLOR(parent)); - RB_SET_COLOR(parent, EntryColor::Black); + RB_SET_COLOR(parent, RBColor::RB_BLACK); if (RB_RIGHT(tmp)) { - RB_SET_COLOR(RB_RIGHT(tmp), EntryColor::Black); + RB_SET_COLOR(RB_RIGHT(tmp), RBColor::RB_BLACK); } RB_ROTATE_LEFT(head, parent, tmp); - elm = head->Root(); + elm = head.Root(); break; } } else { @@ -372,68 +343,56 @@ void RB_REMOVE_COLOR(RBHead<Node>* head, Node* parent, Node* elm) { tmp = RB_LEFT(parent); } - if (!tmp) { - ASSERT_MSG(false, "tmp is invalid!"); - break; - } - if ((RB_LEFT(tmp) == nullptr || RB_IS_BLACK(RB_LEFT(tmp))) && (RB_RIGHT(tmp) == nullptr || RB_IS_BLACK(RB_RIGHT(tmp)))) { - RB_SET_COLOR(tmp, EntryColor::Red); + RB_SET_COLOR(tmp, RBColor::RB_RED); elm = parent; parent = RB_PARENT(elm); } else { if (RB_LEFT(tmp) == nullptr || RB_IS_BLACK(RB_LEFT(tmp))) { - Node* oright; + T* oright; if ((oright = RB_RIGHT(tmp)) != nullptr) { - RB_SET_COLOR(oright, EntryColor::Black); + RB_SET_COLOR(oright, RBColor::RB_BLACK); } - RB_SET_COLOR(tmp, EntryColor::Red); + RB_SET_COLOR(tmp, RBColor::RB_RED); RB_ROTATE_LEFT(head, tmp, oright); tmp = RB_LEFT(parent); } RB_SET_COLOR(tmp, RB_COLOR(parent)); - RB_SET_COLOR(parent, EntryColor::Black); + RB_SET_COLOR(parent, RBColor::RB_BLACK); if (RB_LEFT(tmp)) { - RB_SET_COLOR(RB_LEFT(tmp), EntryColor::Black); + RB_SET_COLOR(RB_LEFT(tmp), RBColor::RB_BLACK); } RB_ROTATE_RIGHT(head, parent, tmp); - elm = head->Root(); + elm = head.Root(); break; } } } if (elm) { - RB_SET_COLOR(elm, EntryColor::Black); + RB_SET_COLOR(elm, RBColor::RB_BLACK); } } -template <typename Node> -Node* RB_REMOVE(RBHead<Node>* head, Node* elm) { - Node* child = nullptr; - Node* parent = nullptr; - Node* old = elm; - EntryColor color{}; - - const auto finalize = [&] { - if (color == EntryColor::Black) { - RB_REMOVE_COLOR(head, parent, child); - } - - return old; - }; +template <typename T> +requires HasRBEntry<T> +constexpr T* RB_REMOVE(RBHead<T>& head, T* elm) { + T* child = nullptr; + T* parent = nullptr; + T* old = elm; + RBColor color = RBColor::RB_BLACK; if (RB_LEFT(elm) == nullptr) { child = RB_RIGHT(elm); } else if (RB_RIGHT(elm) == nullptr) { child = RB_LEFT(elm); } else { - Node* left; + T* left; elm = RB_RIGHT(elm); while ((left = RB_LEFT(elm)) != nullptr) { elm = left; @@ -446,6 +405,7 @@ Node* RB_REMOVE(RBHead<Node>* head, Node* elm) { if (child) { RB_SET_PARENT(child, parent); } + if (parent) { if (RB_LEFT(parent) == elm) { RB_SET_LEFT(parent, child); @@ -453,14 +413,14 @@ Node* RB_REMOVE(RBHead<Node>* head, Node* elm) { RB_SET_RIGHT(parent, child); } } else { - head->SetRoot(child); + head.SetRoot(child); } if (RB_PARENT(elm) == old) { parent = elm; } - elm->SetEntry(old->GetEntry()); + elm->SetRBEntry(old->GetRBEntry()); if (RB_PARENT(old)) { if (RB_LEFT(RB_PARENT(old)) == old) { @@ -469,17 +429,24 @@ Node* RB_REMOVE(RBHead<Node>* head, Node* elm) { RB_SET_RIGHT(RB_PARENT(old), elm); } } else { - head->SetRoot(elm); + head.SetRoot(elm); } + RB_SET_PARENT(RB_LEFT(old), elm); + if (RB_RIGHT(old)) { RB_SET_PARENT(RB_RIGHT(old), elm); } + if (parent) { left = parent; } - return finalize(); + if (color == RBColor::RB_BLACK) { + RB_REMOVE_COLOR(head, parent, child); + } + + return old; } parent = RB_PARENT(elm); @@ -495,17 +462,69 @@ Node* RB_REMOVE(RBHead<Node>* head, Node* elm) { RB_SET_RIGHT(parent, child); } } else { - head->SetRoot(child); + head.SetRoot(child); + } + + if (color == RBColor::RB_BLACK) { + RB_REMOVE_COLOR(head, parent, child); + } + + return old; +} + +template <typename T> +requires HasRBEntry<T> +constexpr void RB_INSERT_COLOR(RBHead<T>& head, T* elm) { + T *parent = nullptr, *tmp = nullptr; + while ((parent = RB_PARENT(elm)) != nullptr && RB_IS_RED(parent)) { + T* gparent = RB_PARENT(parent); + if (parent == RB_LEFT(gparent)) { + tmp = RB_RIGHT(gparent); + if (tmp && RB_IS_RED(tmp)) { + RB_SET_COLOR(tmp, RBColor::RB_BLACK); + RB_SET_BLACKRED(parent, gparent); + elm = gparent; + continue; + } + + if (RB_RIGHT(parent) == elm) { + RB_ROTATE_LEFT(head, parent, tmp); + tmp = parent; + parent = elm; + elm = tmp; + } + + RB_SET_BLACKRED(parent, gparent); + RB_ROTATE_RIGHT(head, gparent, tmp); + } else { + tmp = RB_LEFT(gparent); + if (tmp && RB_IS_RED(tmp)) { + RB_SET_COLOR(tmp, RBColor::RB_BLACK); + RB_SET_BLACKRED(parent, gparent); + elm = gparent; + continue; + } + + if (RB_LEFT(parent) == elm) { + RB_ROTATE_RIGHT(head, parent, tmp); + tmp = parent; + parent = elm; + elm = tmp; + } + + RB_SET_BLACKRED(parent, gparent); + RB_ROTATE_LEFT(head, gparent, tmp); + } } - return finalize(); + RB_SET_COLOR(head.Root(), RBColor::RB_BLACK); } -// Inserts a node into the RB tree -template <typename Node, typename CompareFunction> -Node* RB_INSERT(RBHead<Node>* head, Node* elm, CompareFunction cmp) { - Node* parent = nullptr; - Node* tmp = head->Root(); +template <typename T, typename Compare> +requires HasRBEntry<T> +constexpr T* RB_INSERT(RBHead<T>& head, T* elm, Compare cmp) { + T* parent = nullptr; + T* tmp = head.Root(); int comp = 0; while (tmp) { @@ -529,17 +548,17 @@ Node* RB_INSERT(RBHead<Node>* head, Node* elm, CompareFunction cmp) { RB_SET_RIGHT(parent, elm); } } else { - head->SetRoot(elm); + head.SetRoot(elm); } RB_INSERT_COLOR(head, elm); return nullptr; } -// Finds the node with the same key as elm -template <typename Node, typename CompareFunction> -Node* RB_FIND(RBHead<Node>* head, Node* elm, CompareFunction cmp) { - Node* tmp = head->Root(); +template <typename T, typename Compare> +requires HasRBEntry<T> +constexpr T* RB_FIND(RBHead<T>& head, T* elm, Compare cmp) { + T* tmp = head.Root(); while (tmp) { const int comp = cmp(elm, tmp); @@ -555,11 +574,11 @@ Node* RB_FIND(RBHead<Node>* head, Node* elm, CompareFunction cmp) { return nullptr; } -// Finds the first node greater than or equal to the search key -template <typename Node, typename CompareFunction> -Node* RB_NFIND(RBHead<Node>* head, Node* elm, CompareFunction cmp) { - Node* tmp = head->Root(); - Node* res = nullptr; +template <typename T, typename Compare> +requires HasRBEntry<T> +constexpr T* RB_NFIND(RBHead<T>& head, T* elm, Compare cmp) { + T* tmp = head.Root(); + T* res = nullptr; while (tmp) { const int comp = cmp(elm, tmp); @@ -576,13 +595,13 @@ Node* RB_NFIND(RBHead<Node>* head, Node* elm, CompareFunction cmp) { return res; } -// Finds the node with the same key as lelm -template <typename Node, typename CompareFunction> -Node* RB_FIND_LIGHT(RBHead<Node>* head, const void* lelm, CompareFunction lcmp) { - Node* tmp = head->Root(); +template <typename T, typename U, typename Compare> +requires HasRBEntry<T> +constexpr T* RB_FIND_KEY(RBHead<T>& head, const U& key, Compare cmp) { + T* tmp = head.Root(); while (tmp) { - const int comp = lcmp(lelm, tmp); + const int comp = cmp(key, tmp); if (comp < 0) { tmp = RB_LEFT(tmp); } else if (comp > 0) { @@ -595,14 +614,14 @@ Node* RB_FIND_LIGHT(RBHead<Node>* head, const void* lelm, CompareFunction lcmp) return nullptr; } -// Finds the first node greater than or equal to the search key -template <typename Node, typename CompareFunction> -Node* RB_NFIND_LIGHT(RBHead<Node>* head, const void* lelm, CompareFunction lcmp) { - Node* tmp = head->Root(); - Node* res = nullptr; +template <typename T, typename U, typename Compare> +requires HasRBEntry<T> +constexpr T* RB_NFIND_KEY(RBHead<T>& head, const U& key, Compare cmp) { + T* tmp = head.Root(); + T* res = nullptr; while (tmp) { - const int comp = lcmp(lelm, tmp); + const int comp = cmp(key, tmp); if (comp < 0) { res = tmp; tmp = RB_LEFT(tmp); @@ -616,8 +635,43 @@ Node* RB_NFIND_LIGHT(RBHead<Node>* head, const void* lelm, CompareFunction lcmp) return res; } -template <typename Node> -Node* RB_NEXT(Node* elm) { +template <typename T, typename Compare> +requires HasRBEntry<T> +constexpr T* RB_FIND_EXISTING(RBHead<T>& head, T* elm, Compare cmp) { + T* tmp = head.Root(); + + while (true) { + const int comp = cmp(elm, tmp); + if (comp < 0) { + tmp = RB_LEFT(tmp); + } else if (comp > 0) { + tmp = RB_RIGHT(tmp); + } else { + return tmp; + } + } +} + +template <typename T, typename U, typename Compare> +requires HasRBEntry<T> +constexpr T* RB_FIND_EXISTING_KEY(RBHead<T>& head, const U& key, Compare cmp) { + T* tmp = head.Root(); + + while (true) { + const int comp = cmp(key, tmp); + if (comp < 0) { + tmp = RB_LEFT(tmp); + } else if (comp > 0) { + tmp = RB_RIGHT(tmp); + } else { + return tmp; + } + } +} + +template <typename T> +requires HasRBEntry<T> +constexpr T* RB_NEXT(T* elm) { if (RB_RIGHT(elm)) { elm = RB_RIGHT(elm); while (RB_LEFT(elm)) { @@ -636,8 +690,9 @@ Node* RB_NEXT(Node* elm) { return elm; } -template <typename Node> -Node* RB_PREV(Node* elm) { +template <typename T> +requires HasRBEntry<T> +constexpr T* RB_PREV(T* elm) { if (RB_LEFT(elm)) { elm = RB_LEFT(elm); while (RB_RIGHT(elm)) { @@ -656,30 +711,32 @@ Node* RB_PREV(Node* elm) { return elm; } -template <typename Node> -Node* RB_MINMAX(RBHead<Node>* head, bool is_min) { - Node* tmp = head->Root(); - Node* parent = nullptr; +template <typename T> +requires HasRBEntry<T> +constexpr T* RB_MIN(RBHead<T>& head) { + T* tmp = head.Root(); + T* parent = nullptr; while (tmp) { parent = tmp; - if (is_min) { - tmp = RB_LEFT(tmp); - } else { - tmp = RB_RIGHT(tmp); - } + tmp = RB_LEFT(tmp); } return parent; } -template <typename Node> -Node* RB_MIN(RBHead<Node>* head) { - return RB_MINMAX(head, true); -} +template <typename T> +requires HasRBEntry<T> +constexpr T* RB_MAX(RBHead<T>& head) { + T* tmp = head.Root(); + T* parent = nullptr; -template <typename Node> -Node* RB_MAX(RBHead<Node>* head) { - return RB_MINMAX(head, false); + while (tmp) { + parent = tmp; + tmp = RB_RIGHT(tmp); + } + + return parent; } -} // namespace Common + +} // namespace Common::freebsd diff --git a/src/common/x64/cpu_detect.cpp b/src/common/x64/cpu_detect.cpp index fbeacc7e2..d81edb140 100644 --- a/src/common/x64/cpu_detect.cpp +++ b/src/common/x64/cpu_detect.cpp @@ -1,8 +1,12 @@ -// Copyright 2013 Dolphin Emulator Project / 2015 Citra Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. +// Copyright 2013 Dolphin Emulator Project / 2015 Citra Emulator Project / 2022 Yuzu Emulator +// Project Licensed under GPLv2 or any later version Refer to the license.txt file included. +#include <array> #include <cstring> +#include <iterator> +#include <span> +#include <string_view> +#include "common/bit_util.h" #include "common/common_types.h" #include "common/x64/cpu_detect.h" @@ -17,7 +21,7 @@ // clang-format on #endif -static inline void __cpuidex(int info[4], int function_id, int subfunction_id) { +static inline void __cpuidex(int info[4], u32 function_id, u32 subfunction_id) { #if defined(__DragonFly__) || defined(__FreeBSD__) // Despite the name, this is just do_cpuid() with ECX as second input. cpuid_count((u_int)function_id, (u_int)subfunction_id, (u_int*)info); @@ -30,7 +34,7 @@ static inline void __cpuidex(int info[4], int function_id, int subfunction_id) { #endif } -static inline void __cpuid(int info[4], int function_id) { +static inline void __cpuid(int info[4], u32 function_id) { return __cpuidex(info, function_id, 0); } @@ -45,6 +49,17 @@ static inline u64 _xgetbv(u32 index) { namespace Common { +CPUCaps::Manufacturer CPUCaps::ParseManufacturer(std::string_view brand_string) { + if (brand_string == "GenuineIntel") { + return Manufacturer::Intel; + } else if (brand_string == "AuthenticAMD") { + return Manufacturer::AMD; + } else if (brand_string == "HygonGenuine") { + return Manufacturer::Hygon; + } + return Manufacturer::Unknown; +} + // Detects the various CPU features static CPUCaps Detect() { CPUCaps caps = {}; @@ -53,75 +68,74 @@ static CPUCaps Detect() { // yuzu at all anyway int cpu_id[4]; - memset(caps.brand_string, 0, sizeof(caps.brand_string)); - // Detect CPU's CPUID capabilities and grab CPU string + // Detect CPU's CPUID capabilities and grab manufacturer string __cpuid(cpu_id, 0x00000000); - u32 max_std_fn = cpu_id[0]; // EAX - - std::memcpy(&caps.brand_string[0], &cpu_id[1], sizeof(int)); - std::memcpy(&caps.brand_string[4], &cpu_id[3], sizeof(int)); - std::memcpy(&caps.brand_string[8], &cpu_id[2], sizeof(int)); - if (cpu_id[1] == 0x756e6547 && cpu_id[2] == 0x6c65746e && cpu_id[3] == 0x49656e69) - caps.manufacturer = Manufacturer::Intel; - else if (cpu_id[1] == 0x68747541 && cpu_id[2] == 0x444d4163 && cpu_id[3] == 0x69746e65) - caps.manufacturer = Manufacturer::AMD; - else if (cpu_id[1] == 0x6f677948 && cpu_id[2] == 0x656e6975 && cpu_id[3] == 0x6e65476e) - caps.manufacturer = Manufacturer::Hygon; - else - caps.manufacturer = Manufacturer::Unknown; + const u32 max_std_fn = cpu_id[0]; // EAX - __cpuid(cpu_id, 0x80000000); + std::memset(caps.brand_string, 0, std::size(caps.brand_string)); + std::memcpy(&caps.brand_string[0], &cpu_id[1], sizeof(u32)); + std::memcpy(&caps.brand_string[4], &cpu_id[3], sizeof(u32)); + std::memcpy(&caps.brand_string[8], &cpu_id[2], sizeof(u32)); + + caps.manufacturer = CPUCaps::ParseManufacturer(caps.brand_string); - u32 max_ex_fn = cpu_id[0]; + // Set reasonable default cpu string even if brand string not available + std::strncpy(caps.cpu_string, caps.brand_string, std::size(caps.brand_string)); - // Set reasonable default brand string even if brand string not available - strcpy(caps.cpu_string, caps.brand_string); + __cpuid(cpu_id, 0x80000000); + + const u32 max_ex_fn = cpu_id[0]; // Detect family and other miscellaneous features if (max_std_fn >= 1) { __cpuid(cpu_id, 0x00000001); - if ((cpu_id[3] >> 25) & 1) - caps.sse = true; - if ((cpu_id[3] >> 26) & 1) - caps.sse2 = true; - if ((cpu_id[2]) & 1) - caps.sse3 = true; - if ((cpu_id[2] >> 9) & 1) - caps.ssse3 = true; - if ((cpu_id[2] >> 19) & 1) - caps.sse4_1 = true; - if ((cpu_id[2] >> 20) & 1) - caps.sse4_2 = true; - if ((cpu_id[2] >> 25) & 1) - caps.aes = true; + caps.sse = Common::Bit<25>(cpu_id[3]); + caps.sse2 = Common::Bit<26>(cpu_id[3]); + caps.sse3 = Common::Bit<0>(cpu_id[2]); + caps.pclmulqdq = Common::Bit<1>(cpu_id[2]); + caps.ssse3 = Common::Bit<9>(cpu_id[2]); + caps.sse4_1 = Common::Bit<19>(cpu_id[2]); + caps.sse4_2 = Common::Bit<20>(cpu_id[2]); + caps.movbe = Common::Bit<22>(cpu_id[2]); + caps.popcnt = Common::Bit<23>(cpu_id[2]); + caps.aes = Common::Bit<25>(cpu_id[2]); + caps.f16c = Common::Bit<29>(cpu_id[2]); // AVX support requires 3 separate checks: // - Is the AVX bit set in CPUID? // - Is the XSAVE bit set in CPUID? // - XGETBV result has the XCR bit set. - if (((cpu_id[2] >> 28) & 1) && ((cpu_id[2] >> 27) & 1)) { + if (Common::Bit<28>(cpu_id[2]) && Common::Bit<27>(cpu_id[2])) { if ((_xgetbv(_XCR_XFEATURE_ENABLED_MASK) & 0x6) == 0x6) { caps.avx = true; - if ((cpu_id[2] >> 12) & 1) + if (Common::Bit<12>(cpu_id[2])) caps.fma = true; } } if (max_std_fn >= 7) { __cpuidex(cpu_id, 0x00000007, 0x00000000); - // Can't enable AVX2 unless the XSAVE/XGETBV checks above passed - if ((cpu_id[1] >> 5) & 1) - caps.avx2 = caps.avx; - if ((cpu_id[1] >> 3) & 1) - caps.bmi1 = true; - if ((cpu_id[1] >> 8) & 1) - caps.bmi2 = true; - // Checks for AVX512F, AVX512CD, AVX512VL, AVX512DQ, AVX512BW (Intel Skylake-X/SP) - if ((cpu_id[1] >> 16) & 1 && (cpu_id[1] >> 28) & 1 && (cpu_id[1] >> 31) & 1 && - (cpu_id[1] >> 17) & 1 && (cpu_id[1] >> 30) & 1) { - caps.avx512 = caps.avx2; + // Can't enable AVX{2,512} unless the XSAVE/XGETBV checks above passed + if (caps.avx) { + caps.avx2 = Common::Bit<5>(cpu_id[1]); + caps.avx512f = Common::Bit<16>(cpu_id[1]); + caps.avx512dq = Common::Bit<17>(cpu_id[1]); + caps.avx512cd = Common::Bit<28>(cpu_id[1]); + caps.avx512bw = Common::Bit<30>(cpu_id[1]); + caps.avx512vl = Common::Bit<31>(cpu_id[1]); + caps.avx512vbmi = Common::Bit<1>(cpu_id[2]); + caps.avx512bitalg = Common::Bit<12>(cpu_id[2]); } + + caps.bmi1 = Common::Bit<3>(cpu_id[1]); + caps.bmi2 = Common::Bit<8>(cpu_id[1]); + caps.sha = Common::Bit<29>(cpu_id[1]); + + caps.gfni = Common::Bit<8>(cpu_id[2]); + + __cpuidex(cpu_id, 0x00000007, 0x00000001); + caps.avx_vnni = caps.avx && Common::Bit<4>(cpu_id[0]); } } @@ -138,15 +152,13 @@ static CPUCaps Detect() { if (max_ex_fn >= 0x80000001) { // Check for more features __cpuid(cpu_id, 0x80000001); - if ((cpu_id[2] >> 16) & 1) - caps.fma4 = true; + caps.lzcnt = Common::Bit<5>(cpu_id[2]); + caps.fma4 = Common::Bit<16>(cpu_id[2]); } if (max_ex_fn >= 0x80000007) { __cpuid(cpu_id, 0x80000007); - if (cpu_id[3] & (1 << 8)) { - caps.invariant_tsc = true; - } + caps.invariant_tsc = Common::Bit<8>(cpu_id[3]); } if (max_std_fn >= 0x16) { diff --git a/src/common/x64/cpu_detect.h b/src/common/x64/cpu_detect.h index e3b63302e..40c48b132 100644 --- a/src/common/x64/cpu_detect.h +++ b/src/common/x64/cpu_detect.h @@ -1,42 +1,65 @@ -// Copyright 2013 Dolphin Emulator Project / 2015 Citra Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. +// Copyright 2013 Dolphin Emulator Project / 2015 Citra Emulator Project / 2022 Yuzu Emulator +// Project Project Licensed under GPLv2 or any later version Refer to the license.txt file included. #pragma once -namespace Common { +#include <string_view> +#include "common/common_types.h" -enum class Manufacturer : u32 { - Intel = 0, - AMD = 1, - Hygon = 2, - Unknown = 3, -}; +namespace Common { /// x86/x64 CPU capabilities that may be detected by this module struct CPUCaps { + + enum class Manufacturer : u8 { + Unknown = 0, + Intel = 1, + AMD = 2, + Hygon = 3, + }; + + static Manufacturer ParseManufacturer(std::string_view brand_string); + Manufacturer manufacturer; - char cpu_string[0x21]; - char brand_string[0x41]; - bool sse; - bool sse2; - bool sse3; - bool ssse3; - bool sse4_1; - bool sse4_2; - bool lzcnt; - bool avx; - bool avx2; - bool avx512; - bool bmi1; - bool bmi2; - bool fma; - bool fma4; - bool aes; - bool invariant_tsc; + char brand_string[13]; + + char cpu_string[48]; + u32 base_frequency; u32 max_frequency; u32 bus_frequency; + + bool sse : 1; + bool sse2 : 1; + bool sse3 : 1; + bool ssse3 : 1; + bool sse4_1 : 1; + bool sse4_2 : 1; + + bool avx : 1; + bool avx_vnni : 1; + bool avx2 : 1; + bool avx512f : 1; + bool avx512dq : 1; + bool avx512cd : 1; + bool avx512bw : 1; + bool avx512vl : 1; + bool avx512vbmi : 1; + bool avx512bitalg : 1; + + bool aes : 1; + bool bmi1 : 1; + bool bmi2 : 1; + bool f16c : 1; + bool fma : 1; + bool fma4 : 1; + bool gfni : 1; + bool invariant_tsc : 1; + bool lzcnt : 1; + bool movbe : 1; + bool pclmulqdq : 1; + bool popcnt : 1; + bool sha : 1; }; /** diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index 7ed43bfb1..1f234c822 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -154,6 +154,7 @@ add_library(core STATIC hle/api_version.h hle/ipc.h hle/ipc_helpers.h + hle/kernel/board/nintendo/nx/k_memory_layout.h hle/kernel/board/nintendo/nx/k_system_control.cpp hle/kernel/board/nintendo/nx/k_system_control.h hle/kernel/board/nintendo/nx/secure_monitor.h @@ -166,6 +167,7 @@ add_library(core STATIC hle/kernel/hle_ipc.h hle/kernel/init/init_slab_setup.cpp hle/kernel/init/init_slab_setup.h + hle/kernel/initial_process.h hle/kernel/k_address_arbiter.cpp hle/kernel/k_address_arbiter.h hle/kernel/k_address_space_info.cpp @@ -207,6 +209,7 @@ add_library(core STATIC hle/kernel/k_memory_region.h hle/kernel/k_memory_region_type.h hle/kernel/k_page_bitmap.h + hle/kernel/k_page_buffer.h hle/kernel/k_page_heap.cpp hle/kernel/k_page_heap.h hle/kernel/k_page_linked_list.h @@ -244,6 +247,8 @@ add_library(core STATIC hle/kernel/k_system_control.h hle/kernel/k_thread.cpp hle/kernel/k_thread.h + hle/kernel/k_thread_local_page.cpp + hle/kernel/k_thread_local_page.h hle/kernel/k_thread_queue.cpp hle/kernel/k_thread_queue.h hle/kernel/k_trace.h diff --git a/src/core/arm/dynarmic/arm_dynarmic_32.cpp b/src/core/arm/dynarmic/arm_dynarmic_32.cpp index b0d89c539..c1c843b8f 100644 --- a/src/core/arm/dynarmic/arm_dynarmic_32.cpp +++ b/src/core/arm/dynarmic/arm_dynarmic_32.cpp @@ -137,6 +137,8 @@ std::shared_ptr<Dynarmic::A32::Jit> ARM_Dynarmic_32::MakeJit(Common::PageTable* config.page_table_pointer_mask_bits = Common::PageTable::ATTRIBUTE_BITS; config.detect_misaligned_access_via_page_table = 16 | 32 | 64 | 128; config.only_detect_misalignment_via_page_table_on_page_boundary = true; + config.fastmem_exclusive_access = true; + config.recompile_on_exclusive_fastmem_failure = true; // Multi-process state config.processor_id = core_index; @@ -146,8 +148,8 @@ std::shared_ptr<Dynarmic::A32::Jit> ARM_Dynarmic_32::MakeJit(Common::PageTable* config.wall_clock_cntpct = uses_wall_clock; // Code cache size - config.code_cache_size = 512_MiB; - config.far_code_offset = 400_MiB; + config.code_cache_size = 128_MiB; + config.far_code_offset = 100_MiB; // Safe optimizations if (Settings::values.cpu_debug_mode) { @@ -178,6 +180,12 @@ std::shared_ptr<Dynarmic::A32::Jit> ARM_Dynarmic_32::MakeJit(Common::PageTable* if (!Settings::values.cpuopt_fastmem) { config.fastmem_pointer = nullptr; } + if (!Settings::values.cpuopt_fastmem_exclusives) { + config.fastmem_exclusive_access = false; + } + if (!Settings::values.cpuopt_recompile_exclusives) { + config.recompile_on_exclusive_fastmem_failure = false; + } } // Unsafe optimizations @@ -195,6 +203,9 @@ std::shared_ptr<Dynarmic::A32::Jit> ARM_Dynarmic_32::MakeJit(Common::PageTable* if (Settings::values.cpuopt_unsafe_inaccurate_nan) { config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_InaccurateNaN; } + if (Settings::values.cpuopt_unsafe_ignore_global_monitor) { + config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_IgnoreGlobalMonitor; + } } // Curated optimizations @@ -203,6 +214,7 @@ std::shared_ptr<Dynarmic::A32::Jit> ARM_Dynarmic_32::MakeJit(Common::PageTable* config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_UnfuseFMA; config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_IgnoreStandardFPCRValue; config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_InaccurateNaN; + config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_IgnoreGlobalMonitor; } return std::make_unique<Dynarmic::A32::Jit>(config); diff --git a/src/core/arm/dynarmic/arm_dynarmic_64.cpp b/src/core/arm/dynarmic/arm_dynarmic_64.cpp index 56836bd05..aa74fce4d 100644 --- a/src/core/arm/dynarmic/arm_dynarmic_64.cpp +++ b/src/core/arm/dynarmic/arm_dynarmic_64.cpp @@ -185,6 +185,9 @@ std::shared_ptr<Dynarmic::A64::Jit> ARM_Dynarmic_64::MakeJit(Common::PageTable* config.fastmem_pointer = page_table->fastmem_arena; config.fastmem_address_space_bits = address_space_bits; config.silently_mirror_fastmem = false; + + config.fastmem_exclusive_access = true; + config.recompile_on_exclusive_fastmem_failure = true; } // Multi-process state @@ -205,8 +208,8 @@ std::shared_ptr<Dynarmic::A64::Jit> ARM_Dynarmic_64::MakeJit(Common::PageTable* config.wall_clock_cntpct = uses_wall_clock; // Code cache size - config.code_cache_size = 512_MiB; - config.far_code_offset = 400_MiB; + config.code_cache_size = 128_MiB; + config.far_code_offset = 100_MiB; // Safe optimizations if (Settings::values.cpu_debug_mode) { @@ -237,6 +240,12 @@ std::shared_ptr<Dynarmic::A64::Jit> ARM_Dynarmic_64::MakeJit(Common::PageTable* if (!Settings::values.cpuopt_fastmem) { config.fastmem_pointer = nullptr; } + if (!Settings::values.cpuopt_fastmem_exclusives) { + config.fastmem_exclusive_access = false; + } + if (!Settings::values.cpuopt_recompile_exclusives) { + config.recompile_on_exclusive_fastmem_failure = false; + } } // Unsafe optimizations @@ -254,6 +263,9 @@ std::shared_ptr<Dynarmic::A64::Jit> ARM_Dynarmic_64::MakeJit(Common::PageTable* if (Settings::values.cpuopt_unsafe_fastmem_check) { config.fastmem_address_space_bits = 64; } + if (Settings::values.cpuopt_unsafe_ignore_global_monitor) { + config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_IgnoreGlobalMonitor; + } } // Curated optimizations @@ -262,6 +274,7 @@ std::shared_ptr<Dynarmic::A64::Jit> ARM_Dynarmic_64::MakeJit(Common::PageTable* config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_UnfuseFMA; config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_InaccurateNaN; config.fastmem_address_space_bits = 64; + config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_IgnoreGlobalMonitor; } return std::make_shared<Dynarmic::A64::Jit>(config); diff --git a/src/core/arm/dynarmic/arm_exclusive_monitor.cpp b/src/core/arm/dynarmic/arm_exclusive_monitor.cpp index 397d054a8..ea6b224e0 100644 --- a/src/core/arm/dynarmic/arm_exclusive_monitor.cpp +++ b/src/core/arm/dynarmic/arm_exclusive_monitor.cpp @@ -37,8 +37,8 @@ u128 DynarmicExclusiveMonitor::ExclusiveRead128(std::size_t core_index, VAddr ad }); } -void DynarmicExclusiveMonitor::ClearExclusive() { - monitor.Clear(); +void DynarmicExclusiveMonitor::ClearExclusive(std::size_t core_index) { + monitor.ClearProcessor(core_index); } bool DynarmicExclusiveMonitor::ExclusiveWrite8(std::size_t core_index, VAddr vaddr, u8 value) { diff --git a/src/core/arm/dynarmic/arm_exclusive_monitor.h b/src/core/arm/dynarmic/arm_exclusive_monitor.h index 265c4ecef..5a15b43ef 100644 --- a/src/core/arm/dynarmic/arm_exclusive_monitor.h +++ b/src/core/arm/dynarmic/arm_exclusive_monitor.h @@ -29,7 +29,7 @@ public: u32 ExclusiveRead32(std::size_t core_index, VAddr addr) override; u64 ExclusiveRead64(std::size_t core_index, VAddr addr) override; u128 ExclusiveRead128(std::size_t core_index, VAddr addr) override; - void ClearExclusive() override; + void ClearExclusive(std::size_t core_index) override; bool ExclusiveWrite8(std::size_t core_index, VAddr vaddr, u8 value) override; bool ExclusiveWrite16(std::size_t core_index, VAddr vaddr, u16 value) override; diff --git a/src/core/arm/exclusive_monitor.h b/src/core/arm/exclusive_monitor.h index 62f6e6023..9914ca3da 100644 --- a/src/core/arm/exclusive_monitor.h +++ b/src/core/arm/exclusive_monitor.h @@ -23,7 +23,7 @@ public: virtual u32 ExclusiveRead32(std::size_t core_index, VAddr addr) = 0; virtual u64 ExclusiveRead64(std::size_t core_index, VAddr addr) = 0; virtual u128 ExclusiveRead128(std::size_t core_index, VAddr addr) = 0; - virtual void ClearExclusive() = 0; + virtual void ClearExclusive(std::size_t core_index) = 0; virtual bool ExclusiveWrite8(std::size_t core_index, VAddr vaddr, u8 value) = 0; virtual bool ExclusiveWrite16(std::size_t core_index, VAddr vaddr, u16 value) = 0; diff --git a/src/core/core.cpp b/src/core/core.cpp index b0cfee3ee..c60a784c3 100644 --- a/src/core/core.cpp +++ b/src/core/core.cpp @@ -326,7 +326,9 @@ struct System::Impl { is_powered_on = false; exit_lock = false; - gpu_core->NotifyShutdown(); + if (gpu_core != nullptr) { + gpu_core->NotifyShutdown(); + } services.reset(); service_manager.reset(); diff --git a/src/core/frontend/emu_window.h b/src/core/frontend/emu_window.h index e413a520a..b3bffecb2 100644 --- a/src/core/frontend/emu_window.h +++ b/src/core/frontend/emu_window.h @@ -42,11 +42,20 @@ public: context.MakeCurrent(); } ~Scoped() { - context.DoneCurrent(); + if (active) { + context.DoneCurrent(); + } + } + + /// In the event that context was destroyed before the Scoped is destroyed, this provides a + /// mechanism to prevent calling a destroyed object's method during the deconstructor + void Cancel() { + active = false; } private: GraphicsContext& context; + bool active{true}; }; /// Calls MakeCurrent on the context and calls DoneCurrent when the scope for the returned value diff --git a/src/core/hle/ipc_helpers.h b/src/core/hle/ipc_helpers.h index 026257115..3c4e45fcd 100644 --- a/src/core/hle/ipc_helpers.h +++ b/src/core/hle/ipc_helpers.h @@ -385,7 +385,7 @@ public: T PopRaw(); template <class T> - std::shared_ptr<T> PopIpcInterface() { + std::weak_ptr<T> PopIpcInterface() { ASSERT(context->Session()->IsDomain()); ASSERT(context->GetDomainMessageHeader().input_object_count > 0); return context->GetDomainHandler<T>(Pop<u32>() - 1); diff --git a/src/core/hle/kernel/board/nintendo/nx/k_memory_layout.h b/src/core/hle/kernel/board/nintendo/nx/k_memory_layout.h new file mode 100644 index 000000000..01e225088 --- /dev/null +++ b/src/core/hle/kernel/board/nintendo/nx/k_memory_layout.h @@ -0,0 +1,13 @@ +// Copyright 2022 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "common/common_types.h" + +namespace Kernel { + +constexpr inline PAddr MainMemoryAddress = 0x80000000; + +} // namespace Kernel diff --git a/src/core/hle/kernel/board/nintendo/nx/k_system_control.cpp b/src/core/hle/kernel/board/nintendo/nx/k_system_control.cpp index 702cacffc..8027bec00 100644 --- a/src/core/hle/kernel/board/nintendo/nx/k_system_control.cpp +++ b/src/core/hle/kernel/board/nintendo/nx/k_system_control.cpp @@ -39,6 +39,10 @@ Smc::MemoryArrangement GetMemoryArrangeForInit() { } } // namespace +size_t KSystemControl::Init::GetRealMemorySize() { + return GetIntendedMemorySize(); +} + // Initialization. size_t KSystemControl::Init::GetIntendedMemorySize() { switch (GetMemorySizeForInit()) { @@ -53,7 +57,13 @@ size_t KSystemControl::Init::GetIntendedMemorySize() { } PAddr KSystemControl::Init::GetKernelPhysicalBaseAddress(u64 base_address) { - return base_address; + const size_t real_dram_size = KSystemControl::Init::GetRealMemorySize(); + const size_t intended_dram_size = KSystemControl::Init::GetIntendedMemorySize(); + if (intended_dram_size * 2 < real_dram_size) { + return base_address; + } else { + return base_address + ((real_dram_size - intended_dram_size) / 2); + } } bool KSystemControl::Init::ShouldIncreaseThreadResourceLimit() { diff --git a/src/core/hle/kernel/board/nintendo/nx/k_system_control.h b/src/core/hle/kernel/board/nintendo/nx/k_system_control.h index 52f230ced..df2a17f2a 100644 --- a/src/core/hle/kernel/board/nintendo/nx/k_system_control.h +++ b/src/core/hle/kernel/board/nintendo/nx/k_system_control.h @@ -13,6 +13,7 @@ public: class Init { public: // Initialization. + static std::size_t GetRealMemorySize(); static std::size_t GetIntendedMemorySize(); static PAddr GetKernelPhysicalBaseAddress(u64 base_address); static bool ShouldIncreaseThreadResourceLimit(); diff --git a/src/core/hle/kernel/hle_ipc.cpp b/src/core/hle/kernel/hle_ipc.cpp index e19544c54..9f2175f82 100644 --- a/src/core/hle/kernel/hle_ipc.cpp +++ b/src/core/hle/kernel/hle_ipc.cpp @@ -45,7 +45,7 @@ bool SessionRequestManager::HasSessionRequestHandler(const HLERequestContext& co LOG_CRITICAL(IPC, "object_id {} is too big!", object_id); return false; } - return DomainHandler(object_id - 1) != nullptr; + return DomainHandler(object_id - 1).lock() != nullptr; } else { return session_handler != nullptr; } @@ -53,9 +53,6 @@ bool SessionRequestManager::HasSessionRequestHandler(const HLERequestContext& co void SessionRequestHandler::ClientConnected(KServerSession* session) { session->ClientConnected(shared_from_this()); - - // Ensure our server session is tracked globally. - kernel.RegisterServerSession(session); } void SessionRequestHandler::ClientDisconnected(KServerSession* session) { diff --git a/src/core/hle/kernel/hle_ipc.h b/src/core/hle/kernel/hle_ipc.h index 754b41ff6..670cc741c 100644 --- a/src/core/hle/kernel/hle_ipc.h +++ b/src/core/hle/kernel/hle_ipc.h @@ -94,6 +94,7 @@ protected: std::weak_ptr<ServiceThread> service_thread; }; +using SessionRequestHandlerWeakPtr = std::weak_ptr<SessionRequestHandler>; using SessionRequestHandlerPtr = std::shared_ptr<SessionRequestHandler>; /** @@ -139,7 +140,7 @@ public: } } - SessionRequestHandlerPtr DomainHandler(std::size_t index) const { + SessionRequestHandlerWeakPtr DomainHandler(std::size_t index) const { ASSERT_MSG(index < DomainHandlerCount(), "Unexpected handler index {}", index); return domain_handlers.at(index); } @@ -328,10 +329,10 @@ public: template <typename T> std::shared_ptr<T> GetDomainHandler(std::size_t index) const { - return std::static_pointer_cast<T>(manager->DomainHandler(index)); + return std::static_pointer_cast<T>(manager.lock()->DomainHandler(index).lock()); } - void SetSessionRequestManager(std::shared_ptr<SessionRequestManager> manager_) { + void SetSessionRequestManager(std::weak_ptr<SessionRequestManager> manager_) { manager = std::move(manager_); } @@ -374,7 +375,7 @@ private: u32 handles_offset{}; u32 domain_offset{}; - std::shared_ptr<SessionRequestManager> manager; + std::weak_ptr<SessionRequestManager> manager; KernelCore& kernel; Core::Memory::Memory& memory; diff --git a/src/core/hle/kernel/init/init_slab_setup.cpp b/src/core/hle/kernel/init/init_slab_setup.cpp index 36fc0944a..b0f773ee0 100644 --- a/src/core/hle/kernel/init/init_slab_setup.cpp +++ b/src/core/hle/kernel/init/init_slab_setup.cpp @@ -7,19 +7,23 @@ #include "common/common_funcs.h" #include "common/common_types.h" #include "core/core.h" +#include "core/device_memory.h" #include "core/hardware_properties.h" #include "core/hle/kernel/init/init_slab_setup.h" #include "core/hle/kernel/k_code_memory.h" #include "core/hle/kernel/k_event.h" #include "core/hle/kernel/k_memory_layout.h" #include "core/hle/kernel/k_memory_manager.h" +#include "core/hle/kernel/k_page_buffer.h" #include "core/hle/kernel/k_port.h" #include "core/hle/kernel/k_process.h" #include "core/hle/kernel/k_resource_limit.h" #include "core/hle/kernel/k_session.h" #include "core/hle/kernel/k_shared_memory.h" +#include "core/hle/kernel/k_shared_memory_info.h" #include "core/hle/kernel/k_system_control.h" #include "core/hle/kernel/k_thread.h" +#include "core/hle/kernel/k_thread_local_page.h" #include "core/hle/kernel/k_transfer_memory.h" namespace Kernel::Init { @@ -32,9 +36,13 @@ namespace Kernel::Init { HANDLER(KEvent, (SLAB_COUNT(KEvent)), ##__VA_ARGS__) \ HANDLER(KPort, (SLAB_COUNT(KPort)), ##__VA_ARGS__) \ HANDLER(KSharedMemory, (SLAB_COUNT(KSharedMemory)), ##__VA_ARGS__) \ + HANDLER(KSharedMemoryInfo, (SLAB_COUNT(KSharedMemory) * 8), ##__VA_ARGS__) \ HANDLER(KTransferMemory, (SLAB_COUNT(KTransferMemory)), ##__VA_ARGS__) \ HANDLER(KCodeMemory, (SLAB_COUNT(KCodeMemory)), ##__VA_ARGS__) \ HANDLER(KSession, (SLAB_COUNT(KSession)), ##__VA_ARGS__) \ + HANDLER(KThreadLocalPage, \ + (SLAB_COUNT(KProcess) + (SLAB_COUNT(KProcess) + SLAB_COUNT(KThread)) / 8), \ + ##__VA_ARGS__) \ HANDLER(KResourceLimit, (SLAB_COUNT(KResourceLimit)), ##__VA_ARGS__) namespace { @@ -50,38 +58,46 @@ enum KSlabType : u32 { // Constexpr counts. constexpr size_t SlabCountKProcess = 80; constexpr size_t SlabCountKThread = 800; -constexpr size_t SlabCountKEvent = 700; +constexpr size_t SlabCountKEvent = 900; constexpr size_t SlabCountKInterruptEvent = 100; -constexpr size_t SlabCountKPort = 256 + 0x20; // Extra 0x20 ports over Nintendo for homebrew. +constexpr size_t SlabCountKPort = 384; constexpr size_t SlabCountKSharedMemory = 80; constexpr size_t SlabCountKTransferMemory = 200; constexpr size_t SlabCountKCodeMemory = 10; constexpr size_t SlabCountKDeviceAddressSpace = 300; -constexpr size_t SlabCountKSession = 933; +constexpr size_t SlabCountKSession = 1133; constexpr size_t SlabCountKLightSession = 100; constexpr size_t SlabCountKObjectName = 7; constexpr size_t SlabCountKResourceLimit = 5; constexpr size_t SlabCountKDebug = Core::Hardware::NUM_CPU_CORES; -constexpr size_t SlabCountKAlpha = 1; -constexpr size_t SlabCountKBeta = 6; +constexpr size_t SlabCountKIoPool = 1; +constexpr size_t SlabCountKIoRegion = 6; constexpr size_t SlabCountExtraKThread = 160; +/// Helper function to translate from the slab virtual address to the reserved location in physical +/// memory. +static PAddr TranslateSlabAddrToPhysical(KMemoryLayout& memory_layout, VAddr slab_addr) { + slab_addr -= memory_layout.GetSlabRegionAddress(); + return slab_addr + Core::DramMemoryMap::SlabHeapBase; +} + template <typename T> VAddr InitializeSlabHeap(Core::System& system, KMemoryLayout& memory_layout, VAddr address, size_t num_objects) { - // TODO(bunnei): This is just a place holder. We should initialize the appropriate KSlabHeap for - // kernel object type T with the backing kernel memory pointer once we emulate kernel memory. const size_t size = Common::AlignUp(sizeof(T) * num_objects, alignof(void*)); VAddr start = Common::AlignUp(address, alignof(T)); - // This is intentionally empty. Once KSlabHeap is fully implemented, we can replace this with - // the pointer to emulated memory to pass along. Until then, KSlabHeap will just allocate/free - // host memory. - void* backing_kernel_memory{}; + // This should use the virtual memory address passed in, but currently, we do not setup the + // kernel virtual memory layout. Instead, we simply map these at a region of physical memory + // that we reserve for the slab heaps. + // TODO(bunnei): Fix this once we support the kernel virtual memory layout. if (size > 0) { + void* backing_kernel_memory{ + system.DeviceMemory().GetPointer(TranslateSlabAddrToPhysical(memory_layout, start))}; + const KMemoryRegion* region = memory_layout.FindVirtual(start + size - 1); ASSERT(region != nullptr); ASSERT(region->IsDerivedFrom(KMemoryRegionType_KernelSlab)); @@ -91,6 +107,12 @@ VAddr InitializeSlabHeap(Core::System& system, KMemoryLayout& memory_layout, VAd return start + size; } +size_t CalculateSlabHeapGapSize() { + constexpr size_t KernelSlabHeapGapSize = 2_MiB - 296_KiB; + static_assert(KernelSlabHeapGapSize <= KernelSlabHeapGapsSizeMax); + return KernelSlabHeapGapSize; +} + } // namespace KSlabResourceCounts KSlabResourceCounts::CreateDefault() { @@ -109,8 +131,8 @@ KSlabResourceCounts KSlabResourceCounts::CreateDefault() { .num_KObjectName = SlabCountKObjectName, .num_KResourceLimit = SlabCountKResourceLimit, .num_KDebug = SlabCountKDebug, - .num_KAlpha = SlabCountKAlpha, - .num_KBeta = SlabCountKBeta, + .num_KIoPool = SlabCountKIoPool, + .num_KIoRegion = SlabCountKIoRegion, }; } @@ -136,11 +158,34 @@ size_t CalculateTotalSlabHeapSize(const KernelCore& kernel) { #undef ADD_SLAB_SIZE // Add the reserved size. - size += KernelSlabHeapGapsSize; + size += CalculateSlabHeapGapSize(); return size; } +void InitializeKPageBufferSlabHeap(Core::System& system) { + auto& kernel = system.Kernel(); + + const auto& counts = kernel.SlabResourceCounts(); + const size_t num_pages = + counts.num_KProcess + counts.num_KThread + (counts.num_KProcess + counts.num_KThread) / 8; + const size_t slab_size = num_pages * PageSize; + + // Reserve memory from the system resource limit. + ASSERT(kernel.GetSystemResourceLimit()->Reserve(LimitableResource::PhysicalMemory, slab_size)); + + // Allocate memory for the slab. + constexpr auto AllocateOption = KMemoryManager::EncodeOption( + KMemoryManager::Pool::System, KMemoryManager::Direction::FromFront); + const PAddr slab_address = + kernel.MemoryManager().AllocateAndOpenContinuous(num_pages, 1, AllocateOption); + ASSERT(slab_address != 0); + + // Initialize the slabheap. + KPageBuffer::InitializeSlabHeap(kernel, system.DeviceMemory().GetPointer(slab_address), + slab_size); +} + void InitializeSlabHeaps(Core::System& system, KMemoryLayout& memory_layout) { auto& kernel = system.Kernel(); @@ -160,13 +205,13 @@ void InitializeSlabHeaps(Core::System& system, KMemoryLayout& memory_layout) { } // Create an array to represent the gaps between the slabs. - const size_t total_gap_size = KernelSlabHeapGapsSize; + const size_t total_gap_size = CalculateSlabHeapGapSize(); std::array<size_t, slab_types.size()> slab_gaps; - for (size_t i = 0; i < slab_gaps.size(); i++) { + for (auto& slab_gap : slab_gaps) { // Note: This is an off-by-one error from Nintendo's intention, because GenerateRandomRange // is inclusive. However, Nintendo also has the off-by-one error, and it's "harmless", so we // will include it ourselves. - slab_gaps[i] = KSystemControl::GenerateRandomRange(0, total_gap_size); + slab_gap = KSystemControl::GenerateRandomRange(0, total_gap_size); } // Sort the array, so that we can treat differences between values as offsets to the starts of @@ -177,13 +222,21 @@ void InitializeSlabHeaps(Core::System& system, KMemoryLayout& memory_layout) { } } - for (size_t i = 0; i < slab_types.size(); i++) { + // Track the gaps, so that we can free them to the unused slab tree. + VAddr gap_start = address; + size_t gap_size = 0; + + for (size_t i = 0; i < slab_gaps.size(); i++) { // Add the random gap to the address. - address += (i == 0) ? slab_gaps[0] : slab_gaps[i] - slab_gaps[i - 1]; + const auto cur_gap = (i == 0) ? slab_gaps[0] : slab_gaps[i] - slab_gaps[i - 1]; + address += cur_gap; + gap_size += cur_gap; #define INITIALIZE_SLAB_HEAP(NAME, COUNT, ...) \ case KSlabType_##NAME: \ - address = InitializeSlabHeap<NAME>(system, memory_layout, address, COUNT); \ + if (COUNT > 0) { \ + address = InitializeSlabHeap<NAME>(system, memory_layout, address, COUNT); \ + } \ break; // Initialize the slabheap. @@ -192,7 +245,13 @@ void InitializeSlabHeaps(Core::System& system, KMemoryLayout& memory_layout) { FOREACH_SLAB_TYPE(INITIALIZE_SLAB_HEAP) // If we somehow get an invalid type, abort. default: - UNREACHABLE(); + UNREACHABLE_MSG("Unknown slab type: {}", slab_types[i]); + } + + // If we've hit the end of a gap, free it. + if (gap_start + gap_size != address) { + gap_start = address; + gap_size = 0; } } } diff --git a/src/core/hle/kernel/init/init_slab_setup.h b/src/core/hle/kernel/init/init_slab_setup.h index a8f7e0918..f54b67d02 100644 --- a/src/core/hle/kernel/init/init_slab_setup.h +++ b/src/core/hle/kernel/init/init_slab_setup.h @@ -32,12 +32,13 @@ struct KSlabResourceCounts { size_t num_KObjectName; size_t num_KResourceLimit; size_t num_KDebug; - size_t num_KAlpha; - size_t num_KBeta; + size_t num_KIoPool; + size_t num_KIoRegion; }; void InitializeSlabResourceCounts(KernelCore& kernel); size_t CalculateTotalSlabHeapSize(const KernelCore& kernel); +void InitializeKPageBufferSlabHeap(Core::System& system); void InitializeSlabHeaps(Core::System& system, KMemoryLayout& memory_layout); } // namespace Kernel::Init diff --git a/src/core/hle/kernel/initial_process.h b/src/core/hle/kernel/initial_process.h new file mode 100644 index 000000000..25b27909c --- /dev/null +++ b/src/core/hle/kernel/initial_process.h @@ -0,0 +1,23 @@ +// Copyright 2022 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "common/common_types.h" +#include "common/literals.h" +#include "core/hle/kernel/board/nintendo/nx/k_memory_layout.h" +#include "core/hle/kernel/board/nintendo/nx/k_system_control.h" + +namespace Kernel { + +using namespace Common::Literals; + +constexpr std::size_t InitialProcessBinarySizeMax = 12_MiB; + +static inline PAddr GetInitialProcessBinaryPhysicalAddress() { + return Kernel::Board::Nintendo::Nx::KSystemControl::Init::GetKernelPhysicalBaseAddress( + MainMemoryAddress); +} + +} // namespace Kernel diff --git a/src/core/hle/kernel/k_address_arbiter.cpp b/src/core/hle/kernel/k_address_arbiter.cpp index 783c69858..8cdd0490f 100644 --- a/src/core/hle/kernel/k_address_arbiter.cpp +++ b/src/core/hle/kernel/k_address_arbiter.cpp @@ -49,7 +49,7 @@ bool DecrementIfLessThan(Core::System& system, s32* out, VAddr address, s32 valu } } else { // Otherwise, clear our exclusive hold and finish - monitor.ClearExclusive(); + monitor.ClearExclusive(current_core); } // We're done. @@ -78,7 +78,7 @@ bool UpdateIfEqual(Core::System& system, s32* out, VAddr address, s32 value, s32 } } else { // Otherwise, clear our exclusive hold and finish. - monitor.ClearExclusive(); + monitor.ClearExclusive(current_core); } // We're done. @@ -115,7 +115,7 @@ ResultCode KAddressArbiter::Signal(VAddr addr, s32 count) { { KScopedSchedulerLock sl(kernel); - auto it = thread_tree.nfind_light({addr, -1}); + auto it = thread_tree.nfind_key({addr, -1}); while ((it != thread_tree.end()) && (count <= 0 || num_waiters < count) && (it->GetAddressArbiterKey() == addr)) { // End the thread's wait. @@ -148,7 +148,7 @@ ResultCode KAddressArbiter::SignalAndIncrementIfEqual(VAddr addr, s32 value, s32 return ResultInvalidState; } - auto it = thread_tree.nfind_light({addr, -1}); + auto it = thread_tree.nfind_key({addr, -1}); while ((it != thread_tree.end()) && (count <= 0 || num_waiters < count) && (it->GetAddressArbiterKey() == addr)) { // End the thread's wait. @@ -171,7 +171,7 @@ ResultCode KAddressArbiter::SignalAndModifyByWaitingCountIfEqual(VAddr addr, s32 { [[maybe_unused]] const KScopedSchedulerLock sl(kernel); - auto it = thread_tree.nfind_light({addr, -1}); + auto it = thread_tree.nfind_key({addr, -1}); // Determine the updated value. s32 new_value{}; if (count <= 0) { diff --git a/src/core/hle/kernel/k_condition_variable.cpp b/src/core/hle/kernel/k_condition_variable.cpp index aadcc297a..8e2a9593c 100644 --- a/src/core/hle/kernel/k_condition_variable.cpp +++ b/src/core/hle/kernel/k_condition_variable.cpp @@ -244,7 +244,7 @@ void KConditionVariable::Signal(u64 cv_key, s32 count) { { KScopedSchedulerLock sl(kernel); - auto it = thread_tree.nfind_light({cv_key, -1}); + auto it = thread_tree.nfind_key({cv_key, -1}); while ((it != thread_tree.end()) && (count <= 0 || num_waiters < count) && (it->GetConditionVariableKey() == cv_key)) { KThread* target_thread = std::addressof(*it); diff --git a/src/core/hle/kernel/k_memory_layout.h b/src/core/hle/kernel/k_memory_layout.h index 57ff538cc..0858827b6 100644 --- a/src/core/hle/kernel/k_memory_layout.h +++ b/src/core/hle/kernel/k_memory_layout.h @@ -57,11 +57,11 @@ constexpr std::size_t KernelPageTableHeapSize = GetMaximumOverheadSize(MainMemor constexpr std::size_t KernelInitialPageHeapSize = 128_KiB; constexpr std::size_t KernelSlabHeapDataSize = 5_MiB; -constexpr std::size_t KernelSlabHeapGapsSize = 2_MiB - 64_KiB; -constexpr std::size_t KernelSlabHeapSize = KernelSlabHeapDataSize + KernelSlabHeapGapsSize; +constexpr std::size_t KernelSlabHeapGapsSizeMax = 2_MiB - 64_KiB; +constexpr std::size_t KernelSlabHeapSize = KernelSlabHeapDataSize + KernelSlabHeapGapsSizeMax; // NOTE: This is calculated from KThread slab counts, assuming KThread size <= 0x860. -constexpr std::size_t KernelSlabHeapAdditionalSize = 416_KiB; +constexpr std::size_t KernelSlabHeapAdditionalSize = 0x68000; constexpr std::size_t KernelResourceSize = KernelPageTableHeapSize + KernelInitialPageHeapSize + KernelSlabHeapSize; @@ -173,6 +173,10 @@ public: return Dereference(FindVirtualLinear(address)); } + const KMemoryRegion& GetPhysicalLinearRegion(PAddr address) const { + return Dereference(FindPhysicalLinear(address)); + } + const KMemoryRegion* GetPhysicalKernelTraceBufferRegion() const { return GetPhysicalMemoryRegionTree().FindFirstDerived(KMemoryRegionType_KernelTraceBuffer); } diff --git a/src/core/hle/kernel/k_memory_manager.cpp b/src/core/hle/kernel/k_memory_manager.cpp index 1b44541b1..a2f18f643 100644 --- a/src/core/hle/kernel/k_memory_manager.cpp +++ b/src/core/hle/kernel/k_memory_manager.cpp @@ -10,189 +10,412 @@ #include "common/scope_exit.h" #include "core/core.h" #include "core/device_memory.h" +#include "core/hle/kernel/initial_process.h" #include "core/hle/kernel/k_memory_manager.h" #include "core/hle/kernel/k_page_linked_list.h" +#include "core/hle/kernel/kernel.h" #include "core/hle/kernel/svc_results.h" +#include "core/memory.h" namespace Kernel { -KMemoryManager::KMemoryManager(Core::System& system_) : system{system_} {} +namespace { + +constexpr KMemoryManager::Pool GetPoolFromMemoryRegionType(u32 type) { + if ((type | KMemoryRegionType_DramApplicationPool) == type) { + return KMemoryManager::Pool::Application; + } else if ((type | KMemoryRegionType_DramAppletPool) == type) { + return KMemoryManager::Pool::Applet; + } else if ((type | KMemoryRegionType_DramSystemPool) == type) { + return KMemoryManager::Pool::System; + } else if ((type | KMemoryRegionType_DramSystemNonSecurePool) == type) { + return KMemoryManager::Pool::SystemNonSecure; + } else { + UNREACHABLE_MSG("InvalidMemoryRegionType for conversion to Pool"); + return {}; + } +} -std::size_t KMemoryManager::Impl::Initialize(Pool new_pool, u64 start_address, u64 end_address) { - const auto size{end_address - start_address}; +} // namespace + +KMemoryManager::KMemoryManager(Core::System& system_) + : system{system_}, pool_locks{ + KLightLock{system_.Kernel()}, + KLightLock{system_.Kernel()}, + KLightLock{system_.Kernel()}, + KLightLock{system_.Kernel()}, + } {} + +void KMemoryManager::Initialize(VAddr management_region, size_t management_region_size) { + + // Clear the management region to zero. + const VAddr management_region_end = management_region + management_region_size; + + // Reset our manager count. + num_managers = 0; + + // Traverse the virtual memory layout tree, initializing each manager as appropriate. + while (num_managers != MaxManagerCount) { + // Locate the region that should initialize the current manager. + PAddr region_address = 0; + size_t region_size = 0; + Pool region_pool = Pool::Count; + for (const auto& it : system.Kernel().MemoryLayout().GetPhysicalMemoryRegionTree()) { + // We only care about regions that we need to create managers for. + if (!it.IsDerivedFrom(KMemoryRegionType_DramUserPool)) { + continue; + } - // Calculate metadata sizes - const auto ref_count_size{(size / PageSize) * sizeof(u16)}; - const auto optimize_map_size{(Common::AlignUp((size / PageSize), 64) / 64) * sizeof(u64)}; - const auto manager_size{Common::AlignUp(optimize_map_size + ref_count_size, PageSize)}; - const auto page_heap_size{KPageHeap::CalculateManagementOverheadSize(size)}; - const auto total_metadata_size{manager_size + page_heap_size}; - ASSERT(manager_size <= total_metadata_size); - ASSERT(Common::IsAligned(total_metadata_size, PageSize)); + // We want to initialize the managers in order. + if (it.GetAttributes() != num_managers) { + continue; + } - // Setup region - pool = new_pool; + const PAddr cur_start = it.GetAddress(); + const PAddr cur_end = it.GetEndAddress(); + + // Validate the region. + ASSERT(cur_end != 0); + ASSERT(cur_start != 0); + ASSERT(it.GetSize() > 0); + + // Update the region's extents. + if (region_address == 0) { + region_address = cur_start; + region_size = it.GetSize(); + region_pool = GetPoolFromMemoryRegionType(it.GetType()); + } else { + ASSERT(cur_start == region_address + region_size); + + // Update the size. + region_size = cur_end - region_address; + ASSERT(GetPoolFromMemoryRegionType(it.GetType()) == region_pool); + } + } + + // If we didn't find a region, we're done. + if (region_size == 0) { + break; + } - // Initialize the manager's KPageHeap - heap.Initialize(start_address, size, page_heap_size); + // Initialize a new manager for the region. + Impl* manager = std::addressof(managers[num_managers++]); + ASSERT(num_managers <= managers.size()); + + const size_t cur_size = manager->Initialize(region_address, region_size, management_region, + management_region_end, region_pool); + management_region += cur_size; + ASSERT(management_region <= management_region_end); + + // Insert the manager into the pool list. + const auto region_pool_index = static_cast<u32>(region_pool); + if (pool_managers_tail[region_pool_index] == nullptr) { + pool_managers_head[region_pool_index] = manager; + } else { + pool_managers_tail[region_pool_index]->SetNext(manager); + manager->SetPrev(pool_managers_tail[region_pool_index]); + } + pool_managers_tail[region_pool_index] = manager; + } - // Free the memory to the heap - heap.Free(start_address, size / PageSize); + // Free each region to its corresponding heap. + size_t reserved_sizes[MaxManagerCount] = {}; + const PAddr ini_start = GetInitialProcessBinaryPhysicalAddress(); + const PAddr ini_end = ini_start + InitialProcessBinarySizeMax; + const PAddr ini_last = ini_end - 1; + for (const auto& it : system.Kernel().MemoryLayout().GetPhysicalMemoryRegionTree()) { + if (it.IsDerivedFrom(KMemoryRegionType_DramUserPool)) { + // Get the manager for the region. + auto index = it.GetAttributes(); + auto& manager = managers[index]; + + const PAddr cur_start = it.GetAddress(); + const PAddr cur_last = it.GetLastAddress(); + const PAddr cur_end = it.GetEndAddress(); + + if (cur_start <= ini_start && ini_last <= cur_last) { + // Free memory before the ini to the heap. + if (cur_start != ini_start) { + manager.Free(cur_start, (ini_start - cur_start) / PageSize); + } - // Update the heap's used size - heap.UpdateUsedSize(); + // Open/reserve the ini memory. + manager.OpenFirst(ini_start, InitialProcessBinarySizeMax / PageSize); + reserved_sizes[it.GetAttributes()] += InitialProcessBinarySizeMax; - return total_metadata_size; -} + // Free memory after the ini to the heap. + if (ini_last != cur_last) { + ASSERT(cur_end != 0); + manager.Free(ini_end, cur_end - ini_end); + } + } else { + // Ensure there's no partial overlap with the ini image. + if (cur_start <= ini_last) { + ASSERT(cur_last < ini_start); + } else { + // Otherwise, check the region for general validity. + ASSERT(cur_end != 0); + } -void KMemoryManager::InitializeManager(Pool pool, u64 start_address, u64 end_address) { - ASSERT(pool < Pool::Count); - managers[static_cast<std::size_t>(pool)].Initialize(pool, start_address, end_address); + // Free the memory to the heap. + manager.Free(cur_start, it.GetSize() / PageSize); + } + } + } + + // Update the used size for all managers. + for (size_t i = 0; i < num_managers; ++i) { + managers[i].SetInitialUsedHeapSize(reserved_sizes[i]); + } } -VAddr KMemoryManager::AllocateAndOpenContinuous(std::size_t num_pages, std::size_t align_pages, - u32 option) { - // Early return if we're allocating no pages +PAddr KMemoryManager::AllocateAndOpenContinuous(size_t num_pages, size_t align_pages, u32 option) { + // Early return if we're allocating no pages. if (num_pages == 0) { - return {}; + return 0; } - // Lock the pool that we're allocating from + // Lock the pool that we're allocating from. const auto [pool, dir] = DecodeOption(option); - const auto pool_index{static_cast<std::size_t>(pool)}; - std::lock_guard lock{pool_locks[pool_index]}; - - // Choose a heap based on our page size request - const s32 heap_index{KPageHeap::GetAlignedBlockIndex(num_pages, align_pages)}; - - // Loop, trying to iterate from each block - // TODO (bunnei): Support multiple managers - Impl& chosen_manager{managers[pool_index]}; - VAddr allocated_block{chosen_manager.AllocateBlock(heap_index, false)}; + KScopedLightLock lk(pool_locks[static_cast<std::size_t>(pool)]); + + // Choose a heap based on our page size request. + const s32 heap_index = KPageHeap::GetAlignedBlockIndex(num_pages, align_pages); + + // Loop, trying to iterate from each block. + Impl* chosen_manager = nullptr; + PAddr allocated_block = 0; + for (chosen_manager = this->GetFirstManager(pool, dir); chosen_manager != nullptr; + chosen_manager = this->GetNextManager(chosen_manager, dir)) { + allocated_block = chosen_manager->AllocateBlock(heap_index, true); + if (allocated_block != 0) { + break; + } + } - // If we failed to allocate, quit now - if (!allocated_block) { - return {}; + // If we failed to allocate, quit now. + if (allocated_block == 0) { + return 0; } - // If we allocated more than we need, free some - const auto allocated_pages{KPageHeap::GetBlockNumPages(heap_index)}; + // If we allocated more than we need, free some. + const size_t allocated_pages = KPageHeap::GetBlockNumPages(heap_index); if (allocated_pages > num_pages) { - chosen_manager.Free(allocated_block + num_pages * PageSize, allocated_pages - num_pages); + chosen_manager->Free(allocated_block + num_pages * PageSize, allocated_pages - num_pages); } + // Open the first reference to the pages. + chosen_manager->OpenFirst(allocated_block, num_pages); + return allocated_block; } -ResultCode KMemoryManager::Allocate(KPageLinkedList& page_list, std::size_t num_pages, Pool pool, - Direction dir, u32 heap_fill_value) { - ASSERT(page_list.GetNumPages() == 0); +ResultCode KMemoryManager::AllocatePageGroupImpl(KPageLinkedList* out, size_t num_pages, Pool pool, + Direction dir, bool random) { + // Choose a heap based on our page size request. + const s32 heap_index = KPageHeap::GetBlockIndex(num_pages); + R_UNLESS(0 <= heap_index, ResultOutOfMemory); + + // Ensure that we don't leave anything un-freed. + auto group_guard = SCOPE_GUARD({ + for (const auto& it : out->Nodes()) { + auto& manager = this->GetManager(system.Kernel().MemoryLayout(), it.GetAddress()); + const size_t num_pages_to_free = + std::min(it.GetNumPages(), (manager.GetEndAddress() - it.GetAddress()) / PageSize); + manager.Free(it.GetAddress(), num_pages_to_free); + } + }); - // Early return if we're allocating no pages - if (num_pages == 0) { - return ResultSuccess; - } + // Keep allocating until we've allocated all our pages. + for (s32 index = heap_index; index >= 0 && num_pages > 0; index--) { + const size_t pages_per_alloc = KPageHeap::GetBlockNumPages(index); + for (Impl* cur_manager = this->GetFirstManager(pool, dir); cur_manager != nullptr; + cur_manager = this->GetNextManager(cur_manager, dir)) { + while (num_pages >= pages_per_alloc) { + // Allocate a block. + PAddr allocated_block = cur_manager->AllocateBlock(index, random); + if (allocated_block == 0) { + break; + } - // Lock the pool that we're allocating from - const auto pool_index{static_cast<std::size_t>(pool)}; - std::lock_guard lock{pool_locks[pool_index]}; + // Safely add it to our group. + { + auto block_guard = + SCOPE_GUARD({ cur_manager->Free(allocated_block, pages_per_alloc); }); + R_TRY(out->AddBlock(allocated_block, pages_per_alloc)); + block_guard.Cancel(); + } - // Choose a heap based on our page size request - const s32 heap_index{KPageHeap::GetBlockIndex(num_pages)}; - if (heap_index < 0) { - return ResultOutOfMemory; + num_pages -= pages_per_alloc; + } + } } - // TODO (bunnei): Support multiple managers - Impl& chosen_manager{managers[pool_index]}; + // Only succeed if we allocated as many pages as we wanted. + R_UNLESS(num_pages == 0, ResultOutOfMemory); - // Ensure that we don't leave anything un-freed - auto group_guard = detail::ScopeExit([&] { - for (const auto& it : page_list.Nodes()) { - const auto min_num_pages{std::min<size_t>( - it.GetNumPages(), (chosen_manager.GetEndAddress() - it.GetAddress()) / PageSize)}; - chosen_manager.Free(it.GetAddress(), min_num_pages); - } - }); + // We succeeded! + group_guard.Cancel(); + return ResultSuccess; +} - // Keep allocating until we've allocated all our pages - for (s32 index{heap_index}; index >= 0 && num_pages > 0; index--) { - const auto pages_per_alloc{KPageHeap::GetBlockNumPages(index)}; +ResultCode KMemoryManager::AllocateAndOpen(KPageLinkedList* out, size_t num_pages, u32 option) { + ASSERT(out != nullptr); + ASSERT(out->GetNumPages() == 0); - while (num_pages >= pages_per_alloc) { - // Allocate a block - VAddr allocated_block{chosen_manager.AllocateBlock(index, false)}; - if (!allocated_block) { - break; - } + // Early return if we're allocating no pages. + R_SUCCEED_IF(num_pages == 0); - // Safely add it to our group - { - auto block_guard = detail::ScopeExit( - [&] { chosen_manager.Free(allocated_block, pages_per_alloc); }); + // Lock the pool that we're allocating from. + const auto [pool, dir] = DecodeOption(option); + KScopedLightLock lk(pool_locks[static_cast<size_t>(pool)]); + + // Allocate the page group. + R_TRY(this->AllocatePageGroupImpl(out, num_pages, pool, dir, false)); + + // Open the first reference to the pages. + for (const auto& block : out->Nodes()) { + PAddr cur_address = block.GetAddress(); + size_t remaining_pages = block.GetNumPages(); + while (remaining_pages > 0) { + // Get the manager for the current address. + auto& manager = this->GetManager(system.Kernel().MemoryLayout(), cur_address); + + // Process part or all of the block. + const size_t cur_pages = + std::min(remaining_pages, manager.GetPageOffsetToEnd(cur_address)); + manager.OpenFirst(cur_address, cur_pages); + + // Advance. + cur_address += cur_pages * PageSize; + remaining_pages -= cur_pages; + } + } - if (const ResultCode result{page_list.AddBlock(allocated_block, pages_per_alloc)}; - result.IsError()) { - return result; - } + return ResultSuccess; +} - block_guard.Cancel(); - } +ResultCode KMemoryManager::AllocateAndOpenForProcess(KPageLinkedList* out, size_t num_pages, + u32 option, u64 process_id, u8 fill_pattern) { + ASSERT(out != nullptr); + ASSERT(out->GetNumPages() == 0); - num_pages -= pages_per_alloc; - } - } + // Decode the option. + const auto [pool, dir] = DecodeOption(option); - // Clear allocated memory. - for (const auto& it : page_list.Nodes()) { - std::memset(system.DeviceMemory().GetPointer(it.GetAddress()), heap_fill_value, - it.GetSize()); + // Allocate the memory. + { + // Lock the pool that we're allocating from. + KScopedLightLock lk(pool_locks[static_cast<size_t>(pool)]); + + // Allocate the page group. + R_TRY(this->AllocatePageGroupImpl(out, num_pages, pool, dir, false)); + + // Open the first reference to the pages. + for (const auto& block : out->Nodes()) { + PAddr cur_address = block.GetAddress(); + size_t remaining_pages = block.GetNumPages(); + while (remaining_pages > 0) { + // Get the manager for the current address. + auto& manager = this->GetManager(system.Kernel().MemoryLayout(), cur_address); + + // Process part or all of the block. + const size_t cur_pages = + std::min(remaining_pages, manager.GetPageOffsetToEnd(cur_address)); + manager.OpenFirst(cur_address, cur_pages); + + // Advance. + cur_address += cur_pages * PageSize; + remaining_pages -= cur_pages; + } + } } - // Only succeed if we allocated as many pages as we wanted - if (num_pages) { - return ResultOutOfMemory; + // Set all the allocated memory. + for (const auto& block : out->Nodes()) { + std::memset(system.DeviceMemory().GetPointer(block.GetAddress()), fill_pattern, + block.GetSize()); } - // We succeeded! - group_guard.Cancel(); - return ResultSuccess; } -ResultCode KMemoryManager::Free(KPageLinkedList& page_list, std::size_t num_pages, Pool pool, - Direction dir, u32 heap_fill_value) { - // Early return if we're freeing no pages - if (!num_pages) { - return ResultSuccess; +void KMemoryManager::Open(PAddr address, size_t num_pages) { + // Repeatedly open references until we've done so for all pages. + while (num_pages) { + auto& manager = this->GetManager(system.Kernel().MemoryLayout(), address); + const size_t cur_pages = std::min(num_pages, manager.GetPageOffsetToEnd(address)); + + { + KScopedLightLock lk(pool_locks[static_cast<size_t>(manager.GetPool())]); + manager.Open(address, cur_pages); + } + + num_pages -= cur_pages; + address += cur_pages * PageSize; } +} - // Lock the pool that we're freeing from - const auto pool_index{static_cast<std::size_t>(pool)}; - std::lock_guard lock{pool_locks[pool_index]}; +void KMemoryManager::Close(PAddr address, size_t num_pages) { + // Repeatedly close references until we've done so for all pages. + while (num_pages) { + auto& manager = this->GetManager(system.Kernel().MemoryLayout(), address); + const size_t cur_pages = std::min(num_pages, manager.GetPageOffsetToEnd(address)); - // TODO (bunnei): Support multiple managers - Impl& chosen_manager{managers[pool_index]}; + { + KScopedLightLock lk(pool_locks[static_cast<size_t>(manager.GetPool())]); + manager.Close(address, cur_pages); + } - // Free all of the pages - for (const auto& it : page_list.Nodes()) { - const auto min_num_pages{std::min<size_t>( - it.GetNumPages(), (chosen_manager.GetEndAddress() - it.GetAddress()) / PageSize)}; - chosen_manager.Free(it.GetAddress(), min_num_pages); + num_pages -= cur_pages; + address += cur_pages * PageSize; } +} - return ResultSuccess; +void KMemoryManager::Close(const KPageLinkedList& pg) { + for (const auto& node : pg.Nodes()) { + Close(node.GetAddress(), node.GetNumPages()); + } +} +void KMemoryManager::Open(const KPageLinkedList& pg) { + for (const auto& node : pg.Nodes()) { + Open(node.GetAddress(), node.GetNumPages()); + } +} + +size_t KMemoryManager::Impl::Initialize(PAddr address, size_t size, VAddr management, + VAddr management_end, Pool p) { + // Calculate management sizes. + const size_t ref_count_size = (size / PageSize) * sizeof(u16); + const size_t optimize_map_size = CalculateOptimizedProcessOverheadSize(size); + const size_t manager_size = Common::AlignUp(optimize_map_size + ref_count_size, PageSize); + const size_t page_heap_size = KPageHeap::CalculateManagementOverheadSize(size); + const size_t total_management_size = manager_size + page_heap_size; + ASSERT(manager_size <= total_management_size); + ASSERT(management + total_management_size <= management_end); + ASSERT(Common::IsAligned(total_management_size, PageSize)); + + // Setup region. + pool = p; + management_region = management; + page_reference_counts.resize( + Kernel::Board::Nintendo::Nx::KSystemControl::Init::GetIntendedMemorySize() / PageSize); + ASSERT(Common::IsAligned(management_region, PageSize)); + + // Initialize the manager's KPageHeap. + heap.Initialize(address, size, management + manager_size, page_heap_size); + + return total_management_size; } -std::size_t KMemoryManager::Impl::CalculateManagementOverheadSize(std::size_t region_size) { - const std::size_t ref_count_size = (region_size / PageSize) * sizeof(u16); - const std::size_t optimize_map_size = +size_t KMemoryManager::Impl::CalculateManagementOverheadSize(size_t region_size) { + const size_t ref_count_size = (region_size / PageSize) * sizeof(u16); + const size_t optimize_map_size = (Common::AlignUp((region_size / PageSize), Common::BitSize<u64>()) / Common::BitSize<u64>()) * sizeof(u64); - const std::size_t manager_meta_size = - Common::AlignUp(optimize_map_size + ref_count_size, PageSize); - const std::size_t page_heap_size = KPageHeap::CalculateManagementOverheadSize(region_size); + const size_t manager_meta_size = Common::AlignUp(optimize_map_size + ref_count_size, PageSize); + const size_t page_heap_size = KPageHeap::CalculateManagementOverheadSize(region_size); return manager_meta_size + page_heap_size; } diff --git a/src/core/hle/kernel/k_memory_manager.h b/src/core/hle/kernel/k_memory_manager.h index 17c7690f1..18775b262 100644 --- a/src/core/hle/kernel/k_memory_manager.h +++ b/src/core/hle/kernel/k_memory_manager.h @@ -5,11 +5,12 @@ #pragma once #include <array> -#include <mutex> #include <tuple> #include "common/common_funcs.h" #include "common/common_types.h" +#include "core/hle/kernel/k_light_lock.h" +#include "core/hle/kernel/k_memory_layout.h" #include "core/hle/kernel/k_page_heap.h" #include "core/hle/result.h" @@ -52,22 +53,33 @@ public: explicit KMemoryManager(Core::System& system_); - constexpr std::size_t GetSize(Pool pool) const { - return managers[static_cast<std::size_t>(pool)].GetSize(); + void Initialize(VAddr management_region, size_t management_region_size); + + constexpr size_t GetSize(Pool pool) const { + constexpr Direction GetSizeDirection = Direction::FromFront; + size_t total = 0; + for (auto* manager = this->GetFirstManager(pool, GetSizeDirection); manager != nullptr; + manager = this->GetNextManager(manager, GetSizeDirection)) { + total += manager->GetSize(); + } + return total; } - void InitializeManager(Pool pool, u64 start_address, u64 end_address); + PAddr AllocateAndOpenContinuous(size_t num_pages, size_t align_pages, u32 option); + ResultCode AllocateAndOpen(KPageLinkedList* out, size_t num_pages, u32 option); + ResultCode AllocateAndOpenForProcess(KPageLinkedList* out, size_t num_pages, u32 option, + u64 process_id, u8 fill_pattern); + + static constexpr size_t MaxManagerCount = 10; - VAddr AllocateAndOpenContinuous(size_t num_pages, size_t align_pages, u32 option); - ResultCode Allocate(KPageLinkedList& page_list, std::size_t num_pages, Pool pool, Direction dir, - u32 heap_fill_value = 0); - ResultCode Free(KPageLinkedList& page_list, std::size_t num_pages, Pool pool, Direction dir, - u32 heap_fill_value = 0); + void Close(PAddr address, size_t num_pages); + void Close(const KPageLinkedList& pg); - static constexpr std::size_t MaxManagerCount = 10; + void Open(PAddr address, size_t num_pages); + void Open(const KPageLinkedList& pg); public: - static std::size_t CalculateManagementOverheadSize(std::size_t region_size) { + static size_t CalculateManagementOverheadSize(size_t region_size) { return Impl::CalculateManagementOverheadSize(region_size); } @@ -100,17 +112,26 @@ private: Impl() = default; ~Impl() = default; - std::size_t Initialize(Pool new_pool, u64 start_address, u64 end_address); + size_t Initialize(PAddr address, size_t size, VAddr management, VAddr management_end, + Pool p); VAddr AllocateBlock(s32 index, bool random) { return heap.AllocateBlock(index, random); } - void Free(VAddr addr, std::size_t num_pages) { + void Free(VAddr addr, size_t num_pages) { heap.Free(addr, num_pages); } - constexpr std::size_t GetSize() const { + void SetInitialUsedHeapSize(size_t reserved_size) { + heap.SetInitialUsedSize(reserved_size); + } + + constexpr Pool GetPool() const { + return pool; + } + + constexpr size_t GetSize() const { return heap.GetSize(); } @@ -122,10 +143,88 @@ private: return heap.GetEndAddress(); } - static std::size_t CalculateManagementOverheadSize(std::size_t region_size); + constexpr size_t GetPageOffset(PAddr address) const { + return heap.GetPageOffset(address); + } + + constexpr size_t GetPageOffsetToEnd(PAddr address) const { + return heap.GetPageOffsetToEnd(address); + } + + constexpr void SetNext(Impl* n) { + next = n; + } + + constexpr void SetPrev(Impl* n) { + prev = n; + } + + constexpr Impl* GetNext() const { + return next; + } + + constexpr Impl* GetPrev() const { + return prev; + } + + void OpenFirst(PAddr address, size_t num_pages) { + size_t index = this->GetPageOffset(address); + const size_t end = index + num_pages; + while (index < end) { + const RefCount ref_count = (++page_reference_counts[index]); + ASSERT(ref_count == 1); - static constexpr std::size_t CalculateOptimizedProcessOverheadSize( - std::size_t region_size) { + index++; + } + } + + void Open(PAddr address, size_t num_pages) { + size_t index = this->GetPageOffset(address); + const size_t end = index + num_pages; + while (index < end) { + const RefCount ref_count = (++page_reference_counts[index]); + ASSERT(ref_count > 1); + + index++; + } + } + + void Close(PAddr address, size_t num_pages) { + size_t index = this->GetPageOffset(address); + const size_t end = index + num_pages; + + size_t free_start = 0; + size_t free_count = 0; + while (index < end) { + ASSERT(page_reference_counts[index] > 0); + const RefCount ref_count = (--page_reference_counts[index]); + + // Keep track of how many zero refcounts we see in a row, to minimize calls to free. + if (ref_count == 0) { + if (free_count > 0) { + free_count++; + } else { + free_start = index; + free_count = 1; + } + } else { + if (free_count > 0) { + this->Free(heap.GetAddress() + free_start * PageSize, free_count); + free_count = 0; + } + } + + index++; + } + + if (free_count > 0) { + this->Free(heap.GetAddress() + free_start * PageSize, free_count); + } + } + + static size_t CalculateManagementOverheadSize(size_t region_size); + + static constexpr size_t CalculateOptimizedProcessOverheadSize(size_t region_size) { return (Common::AlignUp((region_size / PageSize), Common::BitSize<u64>()) / Common::BitSize<u64>()) * sizeof(u64); @@ -135,13 +234,45 @@ private: using RefCount = u16; KPageHeap heap; + std::vector<RefCount> page_reference_counts; + VAddr management_region{}; Pool pool{}; + Impl* next{}; + Impl* prev{}; }; private: + Impl& GetManager(const KMemoryLayout& memory_layout, PAddr address) { + return managers[memory_layout.GetPhysicalLinearRegion(address).GetAttributes()]; + } + + const Impl& GetManager(const KMemoryLayout& memory_layout, PAddr address) const { + return managers[memory_layout.GetPhysicalLinearRegion(address).GetAttributes()]; + } + + constexpr Impl* GetFirstManager(Pool pool, Direction dir) const { + return dir == Direction::FromBack ? pool_managers_tail[static_cast<size_t>(pool)] + : pool_managers_head[static_cast<size_t>(pool)]; + } + + constexpr Impl* GetNextManager(Impl* cur, Direction dir) const { + if (dir == Direction::FromBack) { + return cur->GetPrev(); + } else { + return cur->GetNext(); + } + } + + ResultCode AllocatePageGroupImpl(KPageLinkedList* out, size_t num_pages, Pool pool, + Direction dir, bool random); + +private: Core::System& system; - std::array<std::mutex, static_cast<std::size_t>(Pool::Count)> pool_locks; + std::array<KLightLock, static_cast<size_t>(Pool::Count)> pool_locks; + std::array<Impl*, MaxManagerCount> pool_managers_head{}; + std::array<Impl*, MaxManagerCount> pool_managers_tail{}; std::array<Impl, MaxManagerCount> managers; + size_t num_managers{}; }; } // namespace Kernel diff --git a/src/core/hle/kernel/k_memory_region_type.h b/src/core/hle/kernel/k_memory_region_type.h index a05e66677..0baeddf51 100644 --- a/src/core/hle/kernel/k_memory_region_type.h +++ b/src/core/hle/kernel/k_memory_region_type.h @@ -14,7 +14,8 @@ namespace Kernel { enum KMemoryRegionType : u32 { - KMemoryRegionAttr_CarveoutProtected = 0x04000000, + KMemoryRegionAttr_CarveoutProtected = 0x02000000, + KMemoryRegionAttr_Uncached = 0x04000000, KMemoryRegionAttr_DidKernelMap = 0x08000000, KMemoryRegionAttr_ShouldKernelMap = 0x10000000, KMemoryRegionAttr_UserReadOnly = 0x20000000, @@ -239,6 +240,11 @@ static_assert(KMemoryRegionType_VirtualDramHeapBase.GetValue() == 0x1A); static_assert(KMemoryRegionType_VirtualDramKernelPtHeap.GetValue() == 0x2A); static_assert(KMemoryRegionType_VirtualDramKernelTraceBuffer.GetValue() == 0x4A); +// UNUSED: .DeriveSparse(2, 2, 0); +constexpr auto KMemoryRegionType_VirtualDramUnknownDebug = + KMemoryRegionType_Dram.DeriveSparse(2, 2, 1); +static_assert(KMemoryRegionType_VirtualDramUnknownDebug.GetValue() == (0x52)); + constexpr auto KMemoryRegionType_VirtualDramKernelInitPt = KMemoryRegionType_VirtualDramHeapBase.Derive(3, 0); constexpr auto KMemoryRegionType_VirtualDramPoolManagement = @@ -330,6 +336,8 @@ constexpr KMemoryRegionType GetTypeForVirtualLinearMapping(u32 type_id) { return KMemoryRegionType_VirtualDramKernelTraceBuffer; } else if (KMemoryRegionType_DramKernelPtHeap.IsAncestorOf(type_id)) { return KMemoryRegionType_VirtualDramKernelPtHeap; + } else if ((type_id | KMemoryRegionAttr_ShouldKernelMap) == type_id) { + return KMemoryRegionType_VirtualDramUnknownDebug; } else { return KMemoryRegionType_Dram; } diff --git a/src/core/hle/kernel/k_page_buffer.h b/src/core/hle/kernel/k_page_buffer.h new file mode 100644 index 000000000..0a9451228 --- /dev/null +++ b/src/core/hle/kernel/k_page_buffer.h @@ -0,0 +1,34 @@ +// Copyright 2022 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <array> + +#include "common/alignment.h" +#include "common/assert.h" +#include "common/common_types.h" +#include "core/core.h" +#include "core/device_memory.h" +#include "core/hle/kernel/memory_types.h" + +namespace Kernel { + +class KPageBuffer final : public KSlabAllocated<KPageBuffer> { +public: + KPageBuffer() = default; + + static KPageBuffer* FromPhysicalAddress(Core::System& system, PAddr phys_addr) { + ASSERT(Common::IsAligned(phys_addr, PageSize)); + return reinterpret_cast<KPageBuffer*>(system.DeviceMemory().GetPointer(phys_addr)); + } + +private: + [[maybe_unused]] alignas(PageSize) std::array<u8, PageSize> m_buffer{}; +}; + +static_assert(sizeof(KPageBuffer) == PageSize); +static_assert(alignof(KPageBuffer) == PageSize); + +} // namespace Kernel diff --git a/src/core/hle/kernel/k_page_heap.cpp b/src/core/hle/kernel/k_page_heap.cpp index 29d996d62..97a5890a0 100644 --- a/src/core/hle/kernel/k_page_heap.cpp +++ b/src/core/hle/kernel/k_page_heap.cpp @@ -7,35 +7,51 @@ namespace Kernel { -void KPageHeap::Initialize(VAddr address, std::size_t size, std::size_t metadata_size) { - // Check our assumptions - ASSERT(Common::IsAligned((address), PageSize)); +void KPageHeap::Initialize(PAddr address, size_t size, VAddr management_address, + size_t management_size, const size_t* block_shifts, + size_t num_block_shifts) { + // Check our assumptions. + ASSERT(Common::IsAligned(address, PageSize)); ASSERT(Common::IsAligned(size, PageSize)); + ASSERT(0 < num_block_shifts && num_block_shifts <= NumMemoryBlockPageShifts); + const VAddr management_end = management_address + management_size; - // Set our members - heap_address = address; - heap_size = size; - - // Setup bitmaps - metadata.resize(metadata_size / sizeof(u64)); - u64* cur_bitmap_storage{metadata.data()}; - for (std::size_t i = 0; i < MemoryBlockPageShifts.size(); i++) { - const std::size_t cur_block_shift{MemoryBlockPageShifts[i]}; - const std::size_t next_block_shift{ - (i != MemoryBlockPageShifts.size() - 1) ? MemoryBlockPageShifts[i + 1] : 0}; - cur_bitmap_storage = blocks[i].Initialize(heap_address, heap_size, cur_block_shift, - next_block_shift, cur_bitmap_storage); + // Set our members. + m_heap_address = address; + m_heap_size = size; + m_num_blocks = num_block_shifts; + + // Setup bitmaps. + m_management_data.resize(management_size / sizeof(u64)); + u64* cur_bitmap_storage{m_management_data.data()}; + for (size_t i = 0; i < num_block_shifts; i++) { + const size_t cur_block_shift = block_shifts[i]; + const size_t next_block_shift = (i != num_block_shifts - 1) ? block_shifts[i + 1] : 0; + cur_bitmap_storage = m_blocks[i].Initialize(m_heap_address, m_heap_size, cur_block_shift, + next_block_shift, cur_bitmap_storage); } + + // Ensure we didn't overextend our bounds. + ASSERT(VAddr(cur_bitmap_storage) <= management_end); +} + +size_t KPageHeap::GetNumFreePages() const { + size_t num_free = 0; + + for (size_t i = 0; i < m_num_blocks; i++) { + num_free += m_blocks[i].GetNumFreePages(); + } + + return num_free; } -VAddr KPageHeap::AllocateBlock(s32 index, bool random) { - const std::size_t needed_size{blocks[index].GetSize()}; +PAddr KPageHeap::AllocateBlock(s32 index, bool random) { + const size_t needed_size = m_blocks[index].GetSize(); - for (s32 i{index}; i < static_cast<s32>(MemoryBlockPageShifts.size()); i++) { - if (const VAddr addr{blocks[i].PopBlock(random)}; addr) { - if (const std::size_t allocated_size{blocks[i].GetSize()}; - allocated_size > needed_size) { - Free(addr + needed_size, (allocated_size - needed_size) / PageSize); + for (s32 i = index; i < static_cast<s32>(m_num_blocks); i++) { + if (const PAddr addr = m_blocks[i].PopBlock(random); addr != 0) { + if (const size_t allocated_size = m_blocks[i].GetSize(); allocated_size > needed_size) { + this->Free(addr + needed_size, (allocated_size - needed_size) / PageSize); } return addr; } @@ -44,34 +60,34 @@ VAddr KPageHeap::AllocateBlock(s32 index, bool random) { return 0; } -void KPageHeap::FreeBlock(VAddr block, s32 index) { +void KPageHeap::FreeBlock(PAddr block, s32 index) { do { - block = blocks[index++].PushBlock(block); + block = m_blocks[index++].PushBlock(block); } while (block != 0); } -void KPageHeap::Free(VAddr addr, std::size_t num_pages) { - // Freeing no pages is a no-op +void KPageHeap::Free(PAddr addr, size_t num_pages) { + // Freeing no pages is a no-op. if (num_pages == 0) { return; } - // Find the largest block size that we can free, and free as many as possible - s32 big_index{static_cast<s32>(MemoryBlockPageShifts.size()) - 1}; - const VAddr start{addr}; - const VAddr end{(num_pages * PageSize) + addr}; - VAddr before_start{start}; - VAddr before_end{start}; - VAddr after_start{end}; - VAddr after_end{end}; + // Find the largest block size that we can free, and free as many as possible. + s32 big_index = static_cast<s32>(m_num_blocks) - 1; + const PAddr start = addr; + const PAddr end = addr + num_pages * PageSize; + PAddr before_start = start; + PAddr before_end = start; + PAddr after_start = end; + PAddr after_end = end; while (big_index >= 0) { - const std::size_t block_size{blocks[big_index].GetSize()}; - const VAddr big_start{Common::AlignUp((start), block_size)}; - const VAddr big_end{Common::AlignDown((end), block_size)}; + const size_t block_size = m_blocks[big_index].GetSize(); + const PAddr big_start = Common::AlignUp(start, block_size); + const PAddr big_end = Common::AlignDown(end, block_size); if (big_start < big_end) { - // Free as many big blocks as we can - for (auto block{big_start}; block < big_end; block += block_size) { - FreeBlock(block, big_index); + // Free as many big blocks as we can. + for (auto block = big_start; block < big_end; block += block_size) { + this->FreeBlock(block, big_index); } before_end = big_start; after_start = big_end; @@ -81,31 +97,31 @@ void KPageHeap::Free(VAddr addr, std::size_t num_pages) { } ASSERT(big_index >= 0); - // Free space before the big blocks - for (s32 i{big_index - 1}; i >= 0; i--) { - const std::size_t block_size{blocks[i].GetSize()}; + // Free space before the big blocks. + for (s32 i = big_index - 1; i >= 0; i--) { + const size_t block_size = m_blocks[i].GetSize(); while (before_start + block_size <= before_end) { before_end -= block_size; - FreeBlock(before_end, i); + this->FreeBlock(before_end, i); } } - // Free space after the big blocks - for (s32 i{big_index - 1}; i >= 0; i--) { - const std::size_t block_size{blocks[i].GetSize()}; + // Free space after the big blocks. + for (s32 i = big_index - 1; i >= 0; i--) { + const size_t block_size = m_blocks[i].GetSize(); while (after_start + block_size <= after_end) { - FreeBlock(after_start, i); + this->FreeBlock(after_start, i); after_start += block_size; } } } -std::size_t KPageHeap::CalculateManagementOverheadSize(std::size_t region_size) { - std::size_t overhead_size = 0; - for (std::size_t i = 0; i < MemoryBlockPageShifts.size(); i++) { - const std::size_t cur_block_shift{MemoryBlockPageShifts[i]}; - const std::size_t next_block_shift{ - (i != MemoryBlockPageShifts.size() - 1) ? MemoryBlockPageShifts[i + 1] : 0}; +size_t KPageHeap::CalculateManagementOverheadSize(size_t region_size, const size_t* block_shifts, + size_t num_block_shifts) { + size_t overhead_size = 0; + for (size_t i = 0; i < num_block_shifts; i++) { + const size_t cur_block_shift = block_shifts[i]; + const size_t next_block_shift = (i != num_block_shifts - 1) ? block_shifts[i + 1] : 0; overhead_size += KPageHeap::Block::CalculateManagementOverheadSize( region_size, cur_block_shift, next_block_shift); } diff --git a/src/core/hle/kernel/k_page_heap.h b/src/core/hle/kernel/k_page_heap.h index a65aa28a0..60fff766b 100644 --- a/src/core/hle/kernel/k_page_heap.h +++ b/src/core/hle/kernel/k_page_heap.h @@ -23,54 +23,73 @@ public: KPageHeap() = default; ~KPageHeap() = default; - constexpr VAddr GetAddress() const { - return heap_address; + constexpr PAddr GetAddress() const { + return m_heap_address; } - constexpr std::size_t GetSize() const { - return heap_size; + constexpr size_t GetSize() const { + return m_heap_size; } - constexpr VAddr GetEndAddress() const { - return GetAddress() + GetSize(); + constexpr PAddr GetEndAddress() const { + return this->GetAddress() + this->GetSize(); } - constexpr std::size_t GetPageOffset(VAddr block) const { - return (block - GetAddress()) / PageSize; + constexpr size_t GetPageOffset(PAddr block) const { + return (block - this->GetAddress()) / PageSize; + } + constexpr size_t GetPageOffsetToEnd(PAddr block) const { + return (this->GetEndAddress() - block) / PageSize; + } + + void Initialize(PAddr heap_address, size_t heap_size, VAddr management_address, + size_t management_size) { + return this->Initialize(heap_address, heap_size, management_address, management_size, + MemoryBlockPageShifts.data(), NumMemoryBlockPageShifts); + } + + size_t GetFreeSize() const { + return this->GetNumFreePages() * PageSize; } - void Initialize(VAddr heap_address, std::size_t heap_size, std::size_t metadata_size); - VAddr AllocateBlock(s32 index, bool random); - void Free(VAddr addr, std::size_t num_pages); + void SetInitialUsedSize(size_t reserved_size) { + // Check that the reserved size is valid. + const size_t free_size = this->GetNumFreePages() * PageSize; + ASSERT(m_heap_size >= free_size + reserved_size); - void UpdateUsedSize() { - used_size = heap_size - (GetNumFreePages() * PageSize); + // Set the initial used size. + m_initial_used_size = m_heap_size - free_size - reserved_size; } - static std::size_t CalculateManagementOverheadSize(std::size_t region_size); + PAddr AllocateBlock(s32 index, bool random); + void Free(PAddr addr, size_t num_pages); + + static size_t CalculateManagementOverheadSize(size_t region_size) { + return CalculateManagementOverheadSize(region_size, MemoryBlockPageShifts.data(), + NumMemoryBlockPageShifts); + } - static constexpr s32 GetAlignedBlockIndex(std::size_t num_pages, std::size_t align_pages) { - const auto target_pages{std::max(num_pages, align_pages)}; - for (std::size_t i = 0; i < NumMemoryBlockPageShifts; i++) { - if (target_pages <= - (static_cast<std::size_t>(1) << MemoryBlockPageShifts[i]) / PageSize) { + static constexpr s32 GetAlignedBlockIndex(size_t num_pages, size_t align_pages) { + const size_t target_pages = std::max(num_pages, align_pages); + for (size_t i = 0; i < NumMemoryBlockPageShifts; i++) { + if (target_pages <= (size_t(1) << MemoryBlockPageShifts[i]) / PageSize) { return static_cast<s32>(i); } } return -1; } - static constexpr s32 GetBlockIndex(std::size_t num_pages) { - for (s32 i{static_cast<s32>(NumMemoryBlockPageShifts) - 1}; i >= 0; i--) { - if (num_pages >= (static_cast<std::size_t>(1) << MemoryBlockPageShifts[i]) / PageSize) { + static constexpr s32 GetBlockIndex(size_t num_pages) { + for (s32 i = static_cast<s32>(NumMemoryBlockPageShifts) - 1; i >= 0; i--) { + if (num_pages >= (size_t(1) << MemoryBlockPageShifts[i]) / PageSize) { return i; } } return -1; } - static constexpr std::size_t GetBlockSize(std::size_t index) { - return static_cast<std::size_t>(1) << MemoryBlockPageShifts[index]; + static constexpr size_t GetBlockSize(size_t index) { + return size_t(1) << MemoryBlockPageShifts[index]; } - static constexpr std::size_t GetBlockNumPages(std::size_t index) { + static constexpr size_t GetBlockNumPages(size_t index) { return GetBlockSize(index) / PageSize; } @@ -83,114 +102,116 @@ private: Block() = default; ~Block() = default; - constexpr std::size_t GetShift() const { - return block_shift; + constexpr size_t GetShift() const { + return m_block_shift; } - constexpr std::size_t GetNextShift() const { - return next_block_shift; + constexpr size_t GetNextShift() const { + return m_next_block_shift; } - constexpr std::size_t GetSize() const { - return static_cast<std::size_t>(1) << GetShift(); + constexpr size_t GetSize() const { + return u64(1) << this->GetShift(); } - constexpr std::size_t GetNumPages() const { - return GetSize() / PageSize; + constexpr size_t GetNumPages() const { + return this->GetSize() / PageSize; } - constexpr std::size_t GetNumFreeBlocks() const { - return bitmap.GetNumBits(); + constexpr size_t GetNumFreeBlocks() const { + return m_bitmap.GetNumBits(); } - constexpr std::size_t GetNumFreePages() const { - return GetNumFreeBlocks() * GetNumPages(); + constexpr size_t GetNumFreePages() const { + return this->GetNumFreeBlocks() * this->GetNumPages(); } - u64* Initialize(VAddr addr, std::size_t size, std::size_t bs, std::size_t nbs, - u64* bit_storage) { - // Set shifts - block_shift = bs; - next_block_shift = nbs; - - // Align up the address - VAddr end{addr + size}; - const auto align{(next_block_shift != 0) ? (1ULL << next_block_shift) - : (1ULL << block_shift)}; - addr = Common::AlignDown((addr), align); - end = Common::AlignUp((end), align); - - heap_address = addr; - end_offset = (end - addr) / (1ULL << block_shift); - return bitmap.Initialize(bit_storage, end_offset); + u64* Initialize(PAddr addr, size_t size, size_t bs, size_t nbs, u64* bit_storage) { + // Set shifts. + m_block_shift = bs; + m_next_block_shift = nbs; + + // Align up the address. + PAddr end = addr + size; + const size_t align = (m_next_block_shift != 0) ? (u64(1) << m_next_block_shift) + : (u64(1) << m_block_shift); + addr = Common::AlignDown(addr, align); + end = Common::AlignUp(end, align); + + m_heap_address = addr; + m_end_offset = (end - addr) / (u64(1) << m_block_shift); + return m_bitmap.Initialize(bit_storage, m_end_offset); } - VAddr PushBlock(VAddr address) { - // Set the bit for the free block - std::size_t offset{(address - heap_address) >> GetShift()}; - bitmap.SetBit(offset); + PAddr PushBlock(PAddr address) { + // Set the bit for the free block. + size_t offset = (address - m_heap_address) >> this->GetShift(); + m_bitmap.SetBit(offset); - // If we have a next shift, try to clear the blocks below and return the address - if (GetNextShift()) { - const auto diff{1ULL << (GetNextShift() - GetShift())}; + // If we have a next shift, try to clear the blocks below this one and return the new + // address. + if (this->GetNextShift()) { + const size_t diff = u64(1) << (this->GetNextShift() - this->GetShift()); offset = Common::AlignDown(offset, diff); - if (bitmap.ClearRange(offset, diff)) { - return heap_address + (offset << GetShift()); + if (m_bitmap.ClearRange(offset, diff)) { + return m_heap_address + (offset << this->GetShift()); } } - // We couldn't coalesce, or we're already as big as possible - return 0; + // We couldn't coalesce, or we're already as big as possible. + return {}; } - VAddr PopBlock(bool random) { - // Find a free block - const s64 soffset{bitmap.FindFreeBlock(random)}; + PAddr PopBlock(bool random) { + // Find a free block. + s64 soffset = m_bitmap.FindFreeBlock(random); if (soffset < 0) { - return 0; + return {}; } - const auto offset{static_cast<std::size_t>(soffset)}; + const size_t offset = static_cast<size_t>(soffset); - // Update our tracking and return it - bitmap.ClearBit(offset); - return heap_address + (offset << GetShift()); + // Update our tracking and return it. + m_bitmap.ClearBit(offset); + return m_heap_address + (offset << this->GetShift()); } - static constexpr std::size_t CalculateManagementOverheadSize(std::size_t region_size, - std::size_t cur_block_shift, - std::size_t next_block_shift) { - const auto cur_block_size{(1ULL << cur_block_shift)}; - const auto next_block_size{(1ULL << next_block_shift)}; - const auto align{(next_block_shift != 0) ? next_block_size : cur_block_size}; + public: + static constexpr size_t CalculateManagementOverheadSize(size_t region_size, + size_t cur_block_shift, + size_t next_block_shift) { + const size_t cur_block_size = (u64(1) << cur_block_shift); + const size_t next_block_size = (u64(1) << next_block_shift); + const size_t align = (next_block_shift != 0) ? next_block_size : cur_block_size; return KPageBitmap::CalculateManagementOverheadSize( (align * 2 + Common::AlignUp(region_size, align)) / cur_block_size); } private: - KPageBitmap bitmap; - VAddr heap_address{}; - uintptr_t end_offset{}; - std::size_t block_shift{}; - std::size_t next_block_shift{}; + KPageBitmap m_bitmap; + PAddr m_heap_address{}; + uintptr_t m_end_offset{}; + size_t m_block_shift{}; + size_t m_next_block_shift{}; }; - constexpr std::size_t GetNumFreePages() const { - std::size_t num_free{}; - - for (const auto& block : blocks) { - num_free += block.GetNumFreePages(); - } - - return num_free; - } +private: + void Initialize(PAddr heap_address, size_t heap_size, VAddr management_address, + size_t management_size, const size_t* block_shifts, size_t num_block_shifts); + size_t GetNumFreePages() const; - void FreeBlock(VAddr block, s32 index); + void FreeBlock(PAddr block, s32 index); - static constexpr std::size_t NumMemoryBlockPageShifts{7}; - static constexpr std::array<std::size_t, NumMemoryBlockPageShifts> MemoryBlockPageShifts{ + static constexpr size_t NumMemoryBlockPageShifts{7}; + static constexpr std::array<size_t, NumMemoryBlockPageShifts> MemoryBlockPageShifts{ 0xC, 0x10, 0x15, 0x16, 0x19, 0x1D, 0x1E, }; - VAddr heap_address{}; - std::size_t heap_size{}; - std::size_t used_size{}; - std::array<Block, NumMemoryBlockPageShifts> blocks{}; - std::vector<u64> metadata; +private: + static size_t CalculateManagementOverheadSize(size_t region_size, const size_t* block_shifts, + size_t num_block_shifts); + +private: + PAddr m_heap_address{}; + size_t m_heap_size{}; + size_t m_initial_used_size{}; + size_t m_num_blocks{}; + std::array<Block, NumMemoryBlockPageShifts> m_blocks{}; + std::vector<u64> m_management_data; }; } // namespace Kernel diff --git a/src/core/hle/kernel/k_page_table.cpp b/src/core/hle/kernel/k_page_table.cpp index 88aa2a152..02d93b12e 100644 --- a/src/core/hle/kernel/k_page_table.cpp +++ b/src/core/hle/kernel/k_page_table.cpp @@ -273,87 +273,219 @@ ResultCode KPageTable::MapProcessCode(VAddr addr, std::size_t num_pages, KMemory R_TRY(this->CheckMemoryState(addr, size, KMemoryState::All, KMemoryState::Free, KMemoryPermission::None, KMemoryPermission::None, KMemoryAttribute::None, KMemoryAttribute::None)); + KPageLinkedList pg; + R_TRY(system.Kernel().MemoryManager().AllocateAndOpen( + &pg, num_pages, + KMemoryManager::EncodeOption(KMemoryManager::Pool::Application, allocation_option))); - KPageLinkedList page_linked_list; - R_TRY(system.Kernel().MemoryManager().Allocate(page_linked_list, num_pages, memory_pool, - allocation_option)); - R_TRY(Operate(addr, num_pages, page_linked_list, OperationType::MapGroup)); + R_TRY(Operate(addr, num_pages, pg, OperationType::MapGroup)); block_manager->Update(addr, num_pages, state, perm); return ResultSuccess; } -ResultCode KPageTable::MapCodeMemory(VAddr dst_addr, VAddr src_addr, std::size_t size) { +ResultCode KPageTable::MapCodeMemory(VAddr dst_address, VAddr src_address, std::size_t size) { + // Validate the mapping request. + R_UNLESS(this->CanContain(dst_address, size, KMemoryState::AliasCode), + ResultInvalidMemoryRegion); + + // Lock the table. KScopedLightLock lk(general_lock); - const std::size_t num_pages{size / PageSize}; + // Verify that the source memory is normal heap. + KMemoryState src_state{}; + KMemoryPermission src_perm{}; + std::size_t num_src_allocator_blocks{}; + R_TRY(this->CheckMemoryState(&src_state, &src_perm, nullptr, &num_src_allocator_blocks, + src_address, size, KMemoryState::All, KMemoryState::Normal, + KMemoryPermission::All, KMemoryPermission::UserReadWrite, + KMemoryAttribute::All, KMemoryAttribute::None)); - KMemoryState state{}; - KMemoryPermission perm{}; - CASCADE_CODE(CheckMemoryState(&state, &perm, nullptr, nullptr, src_addr, size, - KMemoryState::All, KMemoryState::Normal, KMemoryPermission::All, - KMemoryPermission::UserReadWrite, KMemoryAttribute::Mask, - KMemoryAttribute::None, KMemoryAttribute::IpcAndDeviceMapped)); + // Verify that the destination memory is unmapped. + std::size_t num_dst_allocator_blocks{}; + R_TRY(this->CheckMemoryState(&num_dst_allocator_blocks, dst_address, size, KMemoryState::All, + KMemoryState::Free, KMemoryPermission::None, + KMemoryPermission::None, KMemoryAttribute::None, + KMemoryAttribute::None)); - if (IsRegionMapped(dst_addr, size)) { - return ResultInvalidCurrentMemory; - } + // Map the code memory. + { + // Determine the number of pages being operated on. + const std::size_t num_pages = size / PageSize; - KPageLinkedList page_linked_list; - AddRegionToPages(src_addr, num_pages, page_linked_list); + // Create page groups for the memory being mapped. + KPageLinkedList pg; + AddRegionToPages(src_address, num_pages, pg); - { - auto block_guard = detail::ScopeExit( - [&] { Operate(src_addr, num_pages, perm, OperationType::ChangePermissions); }); + // Reprotect the source as kernel-read/not mapped. + const auto new_perm = static_cast<KMemoryPermission>(KMemoryPermission::KernelRead | + KMemoryPermission::NotMapped); + R_TRY(Operate(src_address, num_pages, new_perm, OperationType::ChangePermissions)); - CASCADE_CODE(Operate(src_addr, num_pages, KMemoryPermission::None, - OperationType::ChangePermissions)); - CASCADE_CODE(MapPages(dst_addr, page_linked_list, KMemoryPermission::None)); + // Ensure that we unprotect the source pages on failure. + auto unprot_guard = SCOPE_GUARD({ + ASSERT(this->Operate(src_address, num_pages, src_perm, OperationType::ChangePermissions) + .IsSuccess()); + }); - block_guard.Cancel(); - } + // Map the alias pages. + R_TRY(MapPages(dst_address, pg, new_perm)); - block_manager->Update(src_addr, num_pages, state, KMemoryPermission::None, - KMemoryAttribute::Locked); - block_manager->Update(dst_addr, num_pages, KMemoryState::AliasCode); + // We successfully mapped the alias pages, so we don't need to unprotect the src pages on + // failure. + unprot_guard.Cancel(); + + // Apply the memory block updates. + block_manager->Update(src_address, num_pages, src_state, new_perm, + KMemoryAttribute::Locked); + block_manager->Update(dst_address, num_pages, KMemoryState::AliasCode, new_perm, + KMemoryAttribute::None); + } return ResultSuccess; } -ResultCode KPageTable::UnmapCodeMemory(VAddr dst_addr, VAddr src_addr, std::size_t size) { +ResultCode KPageTable::UnmapCodeMemory(VAddr dst_address, VAddr src_address, std::size_t size) { + // Validate the mapping request. + R_UNLESS(this->CanContain(dst_address, size, KMemoryState::AliasCode), + ResultInvalidMemoryRegion); + + // Lock the table. KScopedLightLock lk(general_lock); - if (!size) { - return ResultSuccess; + // Verify that the source memory is locked normal heap. + std::size_t num_src_allocator_blocks{}; + R_TRY(this->CheckMemoryState(std::addressof(num_src_allocator_blocks), src_address, size, + KMemoryState::All, KMemoryState::Normal, KMemoryPermission::None, + KMemoryPermission::None, KMemoryAttribute::All, + KMemoryAttribute::Locked)); + + // Verify that the destination memory is aliasable code. + std::size_t num_dst_allocator_blocks{}; + R_TRY(this->CheckMemoryStateContiguous( + std::addressof(num_dst_allocator_blocks), dst_address, size, KMemoryState::FlagCanCodeAlias, + KMemoryState::FlagCanCodeAlias, KMemoryPermission::None, KMemoryPermission::None, + KMemoryAttribute::All, KMemoryAttribute::None)); + + // Determine whether any pages being unmapped are code. + bool any_code_pages = false; + { + KMemoryBlockManager::const_iterator it = block_manager->FindIterator(dst_address); + while (true) { + // Get the memory info. + const KMemoryInfo info = it->GetMemoryInfo(); + + // Check if the memory has code flag. + if ((info.GetState() & KMemoryState::FlagCode) != KMemoryState::None) { + any_code_pages = true; + break; + } + + // Check if we're done. + if (dst_address + size - 1 <= info.GetLastAddress()) { + break; + } + + // Advance. + ++it; + } } - const std::size_t num_pages{size / PageSize}; + // Ensure that we maintain the instruction cache. + bool reprotected_pages = false; + SCOPE_EXIT({ + if (reprotected_pages && any_code_pages) { + system.InvalidateCpuInstructionCacheRange(dst_address, size); + } + }); - CASCADE_CODE(CheckMemoryState(nullptr, nullptr, nullptr, nullptr, src_addr, size, - KMemoryState::All, KMemoryState::Normal, KMemoryPermission::None, - KMemoryPermission::None, KMemoryAttribute::Mask, - KMemoryAttribute::Locked, KMemoryAttribute::IpcAndDeviceMapped)); + // Unmap. + { + // Determine the number of pages being operated on. + const std::size_t num_pages = size / PageSize; - KMemoryState state{}; - CASCADE_CODE(CheckMemoryState( - &state, nullptr, nullptr, nullptr, dst_addr, PageSize, KMemoryState::FlagCanCodeAlias, - KMemoryState::FlagCanCodeAlias, KMemoryPermission::None, KMemoryPermission::None, - KMemoryAttribute::Mask, KMemoryAttribute::None, KMemoryAttribute::IpcAndDeviceMapped)); - CASCADE_CODE(CheckMemoryState(dst_addr, size, KMemoryState::All, state, KMemoryPermission::None, - KMemoryPermission::None, KMemoryAttribute::Mask, - KMemoryAttribute::None)); - CASCADE_CODE(Operate(dst_addr, num_pages, KMemoryPermission::None, OperationType::Unmap)); + // Unmap the aliased copy of the pages. + R_TRY(Operate(dst_address, num_pages, KMemoryPermission::None, OperationType::Unmap)); - block_manager->Update(dst_addr, num_pages, KMemoryState::Free); - block_manager->Update(src_addr, num_pages, KMemoryState::Normal, - KMemoryPermission::UserReadWrite); + // Try to set the permissions for the source pages back to what they should be. + R_TRY(Operate(src_address, num_pages, KMemoryPermission::UserReadWrite, + OperationType::ChangePermissions)); - system.InvalidateCpuInstructionCacheRange(dst_addr, size); + // Apply the memory block updates. + block_manager->Update(dst_address, num_pages, KMemoryState::None); + block_manager->Update(src_address, num_pages, KMemoryState::Normal, + KMemoryPermission::UserReadWrite); + + // Note that we reprotected pages. + reprotected_pages = true; + } return ResultSuccess; } +VAddr KPageTable::FindFreeArea(VAddr region_start, std::size_t region_num_pages, + std::size_t num_pages, std::size_t alignment, std::size_t offset, + std::size_t guard_pages) { + VAddr address = 0; + + if (num_pages <= region_num_pages) { + if (this->IsAslrEnabled()) { + // Try to directly find a free area up to 8 times. + for (std::size_t i = 0; i < 8; i++) { + const std::size_t random_offset = + KSystemControl::GenerateRandomRange( + 0, (region_num_pages - num_pages - guard_pages) * PageSize / alignment) * + alignment; + const VAddr candidate = + Common::AlignDown((region_start + random_offset), alignment) + offset; + + KMemoryInfo info = this->QueryInfoImpl(candidate); + + if (info.state != KMemoryState::Free) { + continue; + } + if (region_start > candidate) { + continue; + } + if (info.GetAddress() + guard_pages * PageSize > candidate) { + continue; + } + + const VAddr candidate_end = candidate + (num_pages + guard_pages) * PageSize - 1; + if (candidate_end > info.GetLastAddress()) { + continue; + } + if (candidate_end > region_start + region_num_pages * PageSize - 1) { + continue; + } + + address = candidate; + break; + } + // Fall back to finding the first free area with a random offset. + if (address == 0) { + // NOTE: Nintendo does not account for guard pages here. + // This may theoretically cause an offset to be chosen that cannot be mapped. We + // will account for guard pages. + const std::size_t offset_pages = KSystemControl::GenerateRandomRange( + 0, region_num_pages - num_pages - guard_pages); + address = block_manager->FindFreeArea(region_start + offset_pages * PageSize, + region_num_pages - offset_pages, num_pages, + alignment, offset, guard_pages); + } + } + + // Find the first free area. + if (address == 0) { + address = block_manager->FindFreeArea(region_start, region_num_pages, num_pages, + alignment, offset, guard_pages); + } + } + + return address; +} + ResultCode KPageTable::UnmapProcessMemory(VAddr dst_addr, std::size_t size, KPageTable& src_page_table, VAddr src_addr) { KScopedLightLock lk(general_lock); @@ -443,9 +575,10 @@ ResultCode KPageTable::MapPhysicalMemory(VAddr address, std::size_t size) { R_UNLESS(memory_reservation.Succeeded(), ResultLimitReached); // Allocate pages for the new memory. - KPageLinkedList page_linked_list; - R_TRY(system.Kernel().MemoryManager().Allocate( - page_linked_list, (size - mapped_size) / PageSize, memory_pool, allocation_option)); + KPageLinkedList pg; + R_TRY(system.Kernel().MemoryManager().AllocateAndOpenForProcess( + &pg, (size - mapped_size) / PageSize, + KMemoryManager::EncodeOption(memory_pool, allocation_option), 0, 0)); // Map the memory. { @@ -547,7 +680,7 @@ ResultCode KPageTable::MapPhysicalMemory(VAddr address, std::size_t size) { }); // Iterate over the memory. - auto pg_it = page_linked_list.Nodes().begin(); + auto pg_it = pg.Nodes().begin(); PAddr pg_phys_addr = pg_it->GetAddress(); size_t pg_pages = pg_it->GetNumPages(); @@ -571,7 +704,7 @@ ResultCode KPageTable::MapPhysicalMemory(VAddr address, std::size_t size) { // Check if we're at the end of the physical block. if (pg_pages == 0) { // Ensure there are more pages to map. - ASSERT(pg_it != page_linked_list.Nodes().end()); + ASSERT(pg_it != pg.Nodes().end()); // Advance our physical block. ++pg_it; @@ -841,10 +974,14 @@ ResultCode KPageTable::UnmapPhysicalMemory(VAddr address, std::size_t size) { process->GetResourceLimit()->Release(LimitableResource::PhysicalMemory, mapped_size); // Update memory blocks. - system.Kernel().MemoryManager().Free(pg, size / PageSize, memory_pool, allocation_option); block_manager->Update(address, size / PageSize, KMemoryState::Free, KMemoryPermission::None, KMemoryAttribute::None); + // TODO(bunnei): This is a workaround until the next set of changes, where we add reference + // counting for mapped pages. Until then, we must manually close the reference to the page + // group. + system.Kernel().MemoryManager().Close(pg); + // We succeeded. remap_guard.Cancel(); @@ -980,6 +1117,46 @@ ResultCode KPageTable::MapPages(VAddr address, KPageLinkedList& page_linked_list return ResultSuccess; } +ResultCode KPageTable::MapPages(VAddr* out_addr, std::size_t num_pages, std::size_t alignment, + PAddr phys_addr, bool is_pa_valid, VAddr region_start, + std::size_t region_num_pages, KMemoryState state, + KMemoryPermission perm) { + ASSERT(Common::IsAligned(alignment, PageSize) && alignment >= PageSize); + + // Ensure this is a valid map request. + R_UNLESS(this->CanContain(region_start, region_num_pages * PageSize, state), + ResultInvalidCurrentMemory); + R_UNLESS(num_pages < region_num_pages, ResultOutOfMemory); + + // Lock the table. + KScopedLightLock lk(general_lock); + + // Find a random address to map at. + VAddr addr = this->FindFreeArea(region_start, region_num_pages, num_pages, alignment, 0, + this->GetNumGuardPages()); + R_UNLESS(addr != 0, ResultOutOfMemory); + ASSERT(Common::IsAligned(addr, alignment)); + ASSERT(this->CanContain(addr, num_pages * PageSize, state)); + ASSERT(this->CheckMemoryState(addr, num_pages * PageSize, KMemoryState::All, KMemoryState::Free, + KMemoryPermission::None, KMemoryPermission::None, + KMemoryAttribute::None, KMemoryAttribute::None) + .IsSuccess()); + + // Perform mapping operation. + if (is_pa_valid) { + R_TRY(this->Operate(addr, num_pages, perm, OperationType::Map, phys_addr)); + } else { + UNIMPLEMENTED(); + } + + // Update the blocks. + block_manager->Update(addr, num_pages, state, perm); + + // We successfully mapped the pages. + *out_addr = addr; + return ResultSuccess; +} + ResultCode KPageTable::UnmapPages(VAddr addr, const KPageLinkedList& page_linked_list) { ASSERT(this->IsLockedByCurrentThread()); @@ -1022,6 +1199,30 @@ ResultCode KPageTable::UnmapPages(VAddr addr, KPageLinkedList& page_linked_list, return ResultSuccess; } +ResultCode KPageTable::UnmapPages(VAddr address, std::size_t num_pages, KMemoryState state) { + // Check that the unmap is in range. + const std::size_t size = num_pages * PageSize; + R_UNLESS(this->Contains(address, size), ResultInvalidCurrentMemory); + + // Lock the table. + KScopedLightLock lk(general_lock); + + // Check the memory state. + std::size_t num_allocator_blocks{}; + R_TRY(this->CheckMemoryState(std::addressof(num_allocator_blocks), address, size, + KMemoryState::All, state, KMemoryPermission::None, + KMemoryPermission::None, KMemoryAttribute::All, + KMemoryAttribute::None)); + + // Perform the unmap. + R_TRY(Operate(address, num_pages, KMemoryPermission::None, OperationType::Unmap)); + + // Update the blocks. + block_manager->Update(address, num_pages, KMemoryState::Free, KMemoryPermission::None); + + return ResultSuccess; +} + ResultCode KPageTable::SetProcessMemoryPermission(VAddr addr, std::size_t size, Svc::MemoryPermission svc_perm) { const size_t num_pages = size / PageSize; @@ -1270,9 +1471,16 @@ ResultCode KPageTable::SetHeapSize(VAddr* out, std::size_t size) { R_UNLESS(memory_reservation.Succeeded(), ResultLimitReached); // Allocate pages for the heap extension. - KPageLinkedList page_linked_list; - R_TRY(system.Kernel().MemoryManager().Allocate(page_linked_list, allocation_size / PageSize, - memory_pool, allocation_option)); + KPageLinkedList pg; + R_TRY(system.Kernel().MemoryManager().AllocateAndOpen( + &pg, allocation_size / PageSize, + KMemoryManager::EncodeOption(memory_pool, allocation_option))); + + // Clear all the newly allocated pages. + for (const auto& it : pg.Nodes()) { + std::memset(system.DeviceMemory().GetPointer(it.GetAddress()), heap_fill_value, + it.GetSize()); + } // Map the pages. { @@ -1291,7 +1499,7 @@ ResultCode KPageTable::SetHeapSize(VAddr* out, std::size_t size) { // Map the pages. const auto num_pages = allocation_size / PageSize; - R_TRY(Operate(current_heap_end, num_pages, page_linked_list, OperationType::MapGroup)); + R_TRY(Operate(current_heap_end, num_pages, pg, OperationType::MapGroup)); // Clear all the newly allocated pages. for (std::size_t cur_page = 0; cur_page < num_pages; ++cur_page) { @@ -1339,8 +1547,9 @@ ResultVal<VAddr> KPageTable::AllocateAndMapMemory(std::size_t needed_num_pages, R_TRY(Operate(addr, needed_num_pages, perm, OperationType::Map, map_addr)); } else { KPageLinkedList page_group; - R_TRY(system.Kernel().MemoryManager().Allocate(page_group, needed_num_pages, memory_pool, - allocation_option)); + R_TRY(system.Kernel().MemoryManager().AllocateAndOpenForProcess( + &page_group, needed_num_pages, + KMemoryManager::EncodeOption(memory_pool, allocation_option), 0, 0)); R_TRY(Operate(addr, needed_num_pages, page_group, OperationType::MapGroup)); } @@ -1547,7 +1756,7 @@ ResultCode KPageTable::Operate(VAddr addr, std::size_t num_pages, KMemoryPermiss return ResultSuccess; } -constexpr VAddr KPageTable::GetRegionAddress(KMemoryState state) const { +VAddr KPageTable::GetRegionAddress(KMemoryState state) const { switch (state) { case KMemoryState::Free: case KMemoryState::Kernel: @@ -1583,7 +1792,7 @@ constexpr VAddr KPageTable::GetRegionAddress(KMemoryState state) const { } } -constexpr std::size_t KPageTable::GetRegionSize(KMemoryState state) const { +std::size_t KPageTable::GetRegionSize(KMemoryState state) const { switch (state) { case KMemoryState::Free: case KMemoryState::Kernel: diff --git a/src/core/hle/kernel/k_page_table.h b/src/core/hle/kernel/k_page_table.h index c98887d34..54c6adf8d 100644 --- a/src/core/hle/kernel/k_page_table.h +++ b/src/core/hle/kernel/k_page_table.h @@ -36,8 +36,8 @@ public: KMemoryManager::Pool pool); ResultCode MapProcessCode(VAddr addr, std::size_t pages_count, KMemoryState state, KMemoryPermission perm); - ResultCode MapCodeMemory(VAddr dst_addr, VAddr src_addr, std::size_t size); - ResultCode UnmapCodeMemory(VAddr dst_addr, VAddr src_addr, std::size_t size); + ResultCode MapCodeMemory(VAddr dst_address, VAddr src_address, std::size_t size); + ResultCode UnmapCodeMemory(VAddr dst_address, VAddr src_address, std::size_t size); ResultCode UnmapProcessMemory(VAddr dst_addr, std::size_t size, KPageTable& src_page_table, VAddr src_addr); ResultCode MapPhysicalMemory(VAddr addr, std::size_t size); @@ -46,7 +46,14 @@ public: ResultCode UnmapMemory(VAddr dst_addr, VAddr src_addr, std::size_t size); ResultCode MapPages(VAddr addr, KPageLinkedList& page_linked_list, KMemoryState state, KMemoryPermission perm); + ResultCode MapPages(VAddr* out_addr, std::size_t num_pages, std::size_t alignment, + PAddr phys_addr, KMemoryState state, KMemoryPermission perm) { + return this->MapPages(out_addr, num_pages, alignment, phys_addr, true, + this->GetRegionAddress(state), this->GetRegionSize(state) / PageSize, + state, perm); + } ResultCode UnmapPages(VAddr addr, KPageLinkedList& page_linked_list, KMemoryState state); + ResultCode UnmapPages(VAddr address, std::size_t num_pages, KMemoryState state); ResultCode SetProcessMemoryPermission(VAddr addr, std::size_t size, Svc::MemoryPermission svc_perm); KMemoryInfo QueryInfo(VAddr addr); @@ -91,6 +98,9 @@ private: ResultCode InitializeMemoryLayout(VAddr start, VAddr end); ResultCode MapPages(VAddr addr, const KPageLinkedList& page_linked_list, KMemoryPermission perm); + ResultCode MapPages(VAddr* out_addr, std::size_t num_pages, std::size_t alignment, + PAddr phys_addr, bool is_pa_valid, VAddr region_start, + std::size_t region_num_pages, KMemoryState state, KMemoryPermission perm); ResultCode UnmapPages(VAddr addr, const KPageLinkedList& page_linked_list); bool IsRegionMapped(VAddr address, u64 size); bool IsRegionContiguous(VAddr addr, u64 size) const; @@ -102,8 +112,11 @@ private: OperationType operation); ResultCode Operate(VAddr addr, std::size_t num_pages, KMemoryPermission perm, OperationType operation, PAddr map_addr = 0); - constexpr VAddr GetRegionAddress(KMemoryState state) const; - constexpr std::size_t GetRegionSize(KMemoryState state) const; + VAddr GetRegionAddress(KMemoryState state) const; + std::size_t GetRegionSize(KMemoryState state) const; + + VAddr FindFreeArea(VAddr region_start, std::size_t region_num_pages, std::size_t num_pages, + std::size_t alignment, std::size_t offset, std::size_t guard_pages); ResultCode CheckMemoryStateContiguous(std::size_t* out_blocks_needed, VAddr addr, std::size_t size, KMemoryState state_mask, @@ -137,7 +150,7 @@ private: return CheckMemoryState(nullptr, nullptr, nullptr, out_blocks_needed, addr, size, state_mask, state, perm_mask, perm, attr_mask, attr, ignore_attr); } - ResultCode CheckMemoryState(VAddr addr, size_t size, KMemoryState state_mask, + ResultCode CheckMemoryState(VAddr addr, std::size_t size, KMemoryState state_mask, KMemoryState state, KMemoryPermission perm_mask, KMemoryPermission perm, KMemoryAttribute attr_mask, KMemoryAttribute attr, @@ -210,7 +223,7 @@ public: constexpr VAddr GetAliasCodeRegionSize() const { return alias_code_region_end - alias_code_region_start; } - size_t GetNormalMemorySize() { + std::size_t GetNormalMemorySize() { KScopedLightLock lk(general_lock); return GetHeapSize() + mapped_physical_memory_size; } @@ -253,9 +266,10 @@ public: constexpr bool IsInsideASLRRegion(VAddr address, std::size_t size) const { return !IsOutsideASLRRegion(address, size); } - - PAddr GetPhysicalAddr(VAddr addr) { - ASSERT(IsLockedByCurrentThread()); + constexpr std::size_t GetNumGuardPages() const { + return IsKernel() ? 1 : 4; + } + PAddr GetPhysicalAddr(VAddr addr) const { const auto backing_addr = page_table_impl.backing_addr[addr >> PageBits]; ASSERT(backing_addr); return backing_addr + addr; @@ -276,10 +290,6 @@ private: return is_aslr_enabled; } - constexpr std::size_t GetNumGuardPages() const { - return IsKernel() ? 1 : 4; - } - constexpr bool ContainsPages(VAddr addr, std::size_t num_pages) const { return (address_space_start <= addr) && (num_pages <= (address_space_end - address_space_start) / PageSize) && @@ -311,6 +321,8 @@ private: bool is_kernel{}; bool is_aslr_enabled{}; + u32 heap_fill_value{}; + KMemoryManager::Pool memory_pool{KMemoryManager::Pool::Application}; KMemoryManager::Direction allocation_option{KMemoryManager::Direction::FromFront}; diff --git a/src/core/hle/kernel/k_port.cpp b/src/core/hle/kernel/k_port.cpp index a8ba09c4a..ceb98709f 100644 --- a/src/core/hle/kernel/k_port.cpp +++ b/src/core/hle/kernel/k_port.cpp @@ -57,7 +57,12 @@ ResultCode KPort::EnqueueSession(KServerSession* session) { R_UNLESS(state == State::Normal, ResultPortClosed); server.EnqueueSession(session); - server.GetSessionRequestHandler()->ClientConnected(server.AcceptSession()); + + if (auto session_ptr = server.GetSessionRequestHandler().lock()) { + session_ptr->ClientConnected(server.AcceptSession()); + } else { + UNREACHABLE(); + } return ResultSuccess; } diff --git a/src/core/hle/kernel/k_process.cpp b/src/core/hle/kernel/k_process.cpp index 9233261cd..b39405496 100644 --- a/src/core/hle/kernel/k_process.cpp +++ b/src/core/hle/kernel/k_process.cpp @@ -70,58 +70,6 @@ void SetupMainThread(Core::System& system, KProcess& owner_process, u32 priority } } // Anonymous namespace -// Represents a page used for thread-local storage. -// -// Each TLS page contains slots that may be used by processes and threads. -// Every process and thread is created with a slot in some arbitrary page -// (whichever page happens to have an available slot). -class TLSPage { -public: - static constexpr std::size_t num_slot_entries = - Core::Memory::PAGE_SIZE / Core::Memory::TLS_ENTRY_SIZE; - - explicit TLSPage(VAddr address) : base_address{address} {} - - bool HasAvailableSlots() const { - return !is_slot_used.all(); - } - - VAddr GetBaseAddress() const { - return base_address; - } - - std::optional<VAddr> ReserveSlot() { - for (std::size_t i = 0; i < is_slot_used.size(); i++) { - if (is_slot_used[i]) { - continue; - } - - is_slot_used[i] = true; - return base_address + (i * Core::Memory::TLS_ENTRY_SIZE); - } - - return std::nullopt; - } - - void ReleaseSlot(VAddr address) { - // Ensure that all given addresses are consistent with how TLS pages - // are intended to be used when releasing slots. - ASSERT(IsWithinPage(address)); - ASSERT((address % Core::Memory::TLS_ENTRY_SIZE) == 0); - - const std::size_t index = (address - base_address) / Core::Memory::TLS_ENTRY_SIZE; - is_slot_used[index] = false; - } - -private: - bool IsWithinPage(VAddr address) const { - return base_address <= address && address < base_address + Core::Memory::PAGE_SIZE; - } - - VAddr base_address; - std::bitset<num_slot_entries> is_slot_used; -}; - ResultCode KProcess::Initialize(KProcess* process, Core::System& system, std::string process_name, ProcessType type, KResourceLimit* res_limit) { auto& kernel = system.Kernel(); @@ -404,7 +352,7 @@ ResultCode KProcess::LoadFromMetadata(const FileSys::ProgramMetadata& metadata, } // Create TLS region - tls_region_address = CreateTLSRegion(); + R_TRY(this->CreateThreadLocalRegion(std::addressof(tls_region_address))); memory_reservation.Commit(); return handle_table.Initialize(capabilities.GetHandleTableSize()); @@ -444,7 +392,7 @@ void KProcess::PrepareForTermination() { stop_threads(kernel.System().GlobalSchedulerContext().GetThreadList()); - FreeTLSRegion(tls_region_address); + this->DeleteThreadLocalRegion(tls_region_address); tls_region_address = 0; if (resource_limit) { @@ -456,9 +404,6 @@ void KProcess::PrepareForTermination() { } void KProcess::Finalize() { - // Finalize the handle table and close any open handles. - handle_table.Finalize(); - // Free all shared memory infos. { auto it = shared_memory_list.begin(); @@ -483,67 +428,110 @@ void KProcess::Finalize() { resource_limit = nullptr; } + // Finalize the page table. + page_table.reset(); + // Perform inherited finalization. KAutoObjectWithSlabHeapAndContainer<KProcess, KWorkerTask>::Finalize(); } -/** - * Attempts to find a TLS page that contains a free slot for - * use by a thread. - * - * @returns If a page with an available slot is found, then an iterator - * pointing to the page is returned. Otherwise the end iterator - * is returned instead. - */ -static auto FindTLSPageWithAvailableSlots(std::vector<TLSPage>& tls_pages) { - return std::find_if(tls_pages.begin(), tls_pages.end(), - [](const auto& page) { return page.HasAvailableSlots(); }); -} +ResultCode KProcess::CreateThreadLocalRegion(VAddr* out) { + KThreadLocalPage* tlp = nullptr; + VAddr tlr = 0; -VAddr KProcess::CreateTLSRegion() { - KScopedSchedulerLock lock(kernel); - if (auto tls_page_iter{FindTLSPageWithAvailableSlots(tls_pages)}; - tls_page_iter != tls_pages.cend()) { - return *tls_page_iter->ReserveSlot(); - } + // See if we can get a region from a partially used TLP. + { + KScopedSchedulerLock sl{kernel}; - Page* const tls_page_ptr{kernel.GetUserSlabHeapPages().Allocate()}; - ASSERT(tls_page_ptr); + if (auto it = partially_used_tlp_tree.begin(); it != partially_used_tlp_tree.end()) { + tlr = it->Reserve(); + ASSERT(tlr != 0); - const VAddr start{page_table->GetKernelMapRegionStart()}; - const VAddr size{page_table->GetKernelMapRegionEnd() - start}; - const PAddr tls_map_addr{kernel.System().DeviceMemory().GetPhysicalAddr(tls_page_ptr)}; - const VAddr tls_page_addr{page_table - ->AllocateAndMapMemory(1, PageSize, true, start, size / PageSize, - KMemoryState::ThreadLocal, - KMemoryPermission::UserReadWrite, - tls_map_addr) - .ValueOr(0)}; + if (it->IsAllUsed()) { + tlp = std::addressof(*it); + partially_used_tlp_tree.erase(it); + fully_used_tlp_tree.insert(*tlp); + } - ASSERT(tls_page_addr); + *out = tlr; + return ResultSuccess; + } + } - std::memset(tls_page_ptr, 0, PageSize); - tls_pages.emplace_back(tls_page_addr); + // Allocate a new page. + tlp = KThreadLocalPage::Allocate(kernel); + R_UNLESS(tlp != nullptr, ResultOutOfMemory); + auto tlp_guard = SCOPE_GUARD({ KThreadLocalPage::Free(kernel, tlp); }); - const auto reserve_result{tls_pages.back().ReserveSlot()}; - ASSERT(reserve_result.has_value()); + // Initialize the new page. + R_TRY(tlp->Initialize(kernel, this)); + + // Reserve a TLR. + tlr = tlp->Reserve(); + ASSERT(tlr != 0); + + // Insert into our tree. + { + KScopedSchedulerLock sl{kernel}; + if (tlp->IsAllUsed()) { + fully_used_tlp_tree.insert(*tlp); + } else { + partially_used_tlp_tree.insert(*tlp); + } + } - return *reserve_result; + // We succeeded! + tlp_guard.Cancel(); + *out = tlr; + return ResultSuccess; } -void KProcess::FreeTLSRegion(VAddr tls_address) { - KScopedSchedulerLock lock(kernel); - const VAddr aligned_address = Common::AlignDown(tls_address, Core::Memory::PAGE_SIZE); - auto iter = - std::find_if(tls_pages.begin(), tls_pages.end(), [aligned_address](const auto& page) { - return page.GetBaseAddress() == aligned_address; - }); +ResultCode KProcess::DeleteThreadLocalRegion(VAddr addr) { + KThreadLocalPage* page_to_free = nullptr; + + // Release the region. + { + KScopedSchedulerLock sl{kernel}; + + // Try to find the page in the partially used list. + auto it = partially_used_tlp_tree.find_key(Common::AlignDown(addr, PageSize)); + if (it == partially_used_tlp_tree.end()) { + // If we don't find it, it has to be in the fully used list. + it = fully_used_tlp_tree.find_key(Common::AlignDown(addr, PageSize)); + R_UNLESS(it != fully_used_tlp_tree.end(), ResultInvalidAddress); + + // Release the region. + it->Release(addr); + + // Move the page out of the fully used list. + KThreadLocalPage* tlp = std::addressof(*it); + fully_used_tlp_tree.erase(it); + if (tlp->IsAllFree()) { + page_to_free = tlp; + } else { + partially_used_tlp_tree.insert(*tlp); + } + } else { + // Release the region. + it->Release(addr); + + // Handle the all-free case. + KThreadLocalPage* tlp = std::addressof(*it); + if (tlp->IsAllFree()) { + partially_used_tlp_tree.erase(it); + page_to_free = tlp; + } + } + } + + // If we should free the page it was in, do so. + if (page_to_free != nullptr) { + page_to_free->Finalize(); - // Something has gone very wrong if we're freeing a region - // with no actual page available. - ASSERT(iter != tls_pages.cend()); + KThreadLocalPage::Free(kernel, page_to_free); + } - iter->ReleaseSlot(tls_address); + return ResultSuccess; } void KProcess::LoadModule(CodeSet code_set, VAddr base_addr) { diff --git a/src/core/hle/kernel/k_process.h b/src/core/hle/kernel/k_process.h index cf1b67428..5ed0f2d83 100644 --- a/src/core/hle/kernel/k_process.h +++ b/src/core/hle/kernel/k_process.h @@ -15,6 +15,7 @@ #include "core/hle/kernel/k_condition_variable.h" #include "core/hle/kernel/k_handle_table.h" #include "core/hle/kernel/k_synchronization_object.h" +#include "core/hle/kernel/k_thread_local_page.h" #include "core/hle/kernel/k_worker_task.h" #include "core/hle/kernel/process_capability.h" #include "core/hle/kernel/slab_helpers.h" @@ -362,10 +363,10 @@ public: // Thread-local storage management // Marks the next available region as used and returns the address of the slot. - [[nodiscard]] VAddr CreateTLSRegion(); + [[nodiscard]] ResultCode CreateThreadLocalRegion(VAddr* out); // Frees a used TLS slot identified by the given address - void FreeTLSRegion(VAddr tls_address); + ResultCode DeleteThreadLocalRegion(VAddr addr); private: void PinThread(s32 core_id, KThread* thread) { @@ -413,13 +414,6 @@ private: /// The ideal CPU core for this process, threads are scheduled on this core by default. u8 ideal_core = 0; - /// The Thread Local Storage area is allocated as processes create threads, - /// each TLS area is 0x200 bytes, so one page (0x1000) is split up in 8 parts, and each part - /// holds the TLS for a specific thread. This vector contains which parts are in use for each - /// page as a bitmask. - /// This vector will grow as more pages are allocated for new threads. - std::vector<TLSPage> tls_pages; - /// Contains the parsed process capability descriptors. ProcessCapabilities capabilities; @@ -482,6 +476,12 @@ private: KThread* exception_thread{}; KLightLock state_lock; + + using TLPTree = + Common::IntrusiveRedBlackTreeBaseTraits<KThreadLocalPage>::TreeType<KThreadLocalPage>; + using TLPIterator = TLPTree::iterator; + TLPTree fully_used_tlp_tree; + TLPTree partially_used_tlp_tree; }; } // namespace Kernel diff --git a/src/core/hle/kernel/k_server_port.h b/src/core/hle/kernel/k_server_port.h index 6302d5e61..2185736be 100644 --- a/src/core/hle/kernel/k_server_port.h +++ b/src/core/hle/kernel/k_server_port.h @@ -30,11 +30,11 @@ public: /// Whether or not this server port has an HLE handler available. bool HasSessionRequestHandler() const { - return session_handler != nullptr; + return !session_handler.expired(); } /// Gets the HLE handler for this port. - SessionRequestHandlerPtr GetSessionRequestHandler() const { + SessionRequestHandlerWeakPtr GetSessionRequestHandler() const { return session_handler; } @@ -42,7 +42,7 @@ public: * Sets the HLE handler template for the port. ServerSessions crated by connecting to this port * will inherit a reference to this handler. */ - void SetSessionHandler(SessionRequestHandlerPtr&& handler) { + void SetSessionHandler(SessionRequestHandlerWeakPtr&& handler) { session_handler = std::move(handler); } @@ -66,7 +66,7 @@ private: void CleanupSessions(); SessionList session_list; - SessionRequestHandlerPtr session_handler; + SessionRequestHandlerWeakPtr session_handler; KPort* parent{}; }; diff --git a/src/core/hle/kernel/k_server_session.cpp b/src/core/hle/kernel/k_server_session.cpp index 4d94eb9cf..30c56ff29 100644 --- a/src/core/hle/kernel/k_server_session.cpp +++ b/src/core/hle/kernel/k_server_session.cpp @@ -27,10 +27,7 @@ namespace Kernel { KServerSession::KServerSession(KernelCore& kernel_) : KSynchronizationObject{kernel_} {} -KServerSession::~KServerSession() { - // Ensure that the global list tracking server sessions does not hold on to a reference. - kernel.UnregisterServerSession(this); -} +KServerSession::~KServerSession() = default; void KServerSession::Initialize(KSession* parent_session_, std::string&& name_, std::shared_ptr<SessionRequestManager> manager_) { @@ -49,6 +46,9 @@ void KServerSession::Destroy() { parent->OnServerClosed(); parent->Close(); + + // Release host emulation members. + manager.reset(); } void KServerSession::OnClientClosed() { @@ -98,7 +98,12 @@ ResultCode KServerSession::HandleDomainSyncRequest(Kernel::HLERequestContext& co UNREACHABLE(); return ResultSuccess; // Ignore error if asserts are off } - return manager->DomainHandler(object_id - 1)->HandleSyncRequest(*this, context); + if (auto strong_ptr = manager->DomainHandler(object_id - 1).lock()) { + return strong_ptr->HandleSyncRequest(*this, context); + } else { + UNREACHABLE(); + return ResultSuccess; + } case IPC::DomainMessageHeader::CommandType::CloseVirtualHandle: { LOG_DEBUG(IPC, "CloseVirtualHandle, object_id=0x{:08X}", object_id); diff --git a/src/core/hle/kernel/k_slab_heap.h b/src/core/hle/kernel/k_slab_heap.h index 05c0bec9c..5690cc757 100644 --- a/src/core/hle/kernel/k_slab_heap.h +++ b/src/core/hle/kernel/k_slab_heap.h @@ -16,39 +16,34 @@ class KernelCore; namespace impl { -class KSlabHeapImpl final { -public: +class KSlabHeapImpl { YUZU_NON_COPYABLE(KSlabHeapImpl); YUZU_NON_MOVEABLE(KSlabHeapImpl); +public: struct Node { Node* next{}; }; +public: constexpr KSlabHeapImpl() = default; - constexpr ~KSlabHeapImpl() = default; - void Initialize(std::size_t size) { - ASSERT(head == nullptr); - obj_size = size; - } - - constexpr std::size_t GetObjectSize() const { - return obj_size; + void Initialize() { + ASSERT(m_head == nullptr); } Node* GetHead() const { - return head; + return m_head; } void* Allocate() { - Node* ret = head.load(); + Node* ret = m_head.load(); do { if (ret == nullptr) { break; } - } while (!head.compare_exchange_weak(ret, ret->next)); + } while (!m_head.compare_exchange_weak(ret, ret->next)); return ret; } @@ -56,170 +51,157 @@ public: void Free(void* obj) { Node* node = static_cast<Node*>(obj); - Node* cur_head = head.load(); + Node* cur_head = m_head.load(); do { node->next = cur_head; - } while (!head.compare_exchange_weak(cur_head, node)); + } while (!m_head.compare_exchange_weak(cur_head, node)); } private: - std::atomic<Node*> head{}; - std::size_t obj_size{}; + std::atomic<Node*> m_head{}; }; } // namespace impl -class KSlabHeapBase { -public: +template <bool SupportDynamicExpansion> +class KSlabHeapBase : protected impl::KSlabHeapImpl { YUZU_NON_COPYABLE(KSlabHeapBase); YUZU_NON_MOVEABLE(KSlabHeapBase); - constexpr KSlabHeapBase() = default; - constexpr ~KSlabHeapBase() = default; +private: + size_t m_obj_size{}; + uintptr_t m_peak{}; + uintptr_t m_start{}; + uintptr_t m_end{}; - constexpr bool Contains(uintptr_t addr) const { - return start <= addr && addr < end; - } +private: + void UpdatePeakImpl(uintptr_t obj) { + static_assert(std::atomic_ref<uintptr_t>::is_always_lock_free); + std::atomic_ref<uintptr_t> peak_ref(m_peak); - constexpr std::size_t GetSlabHeapSize() const { - return (end - start) / GetObjectSize(); + const uintptr_t alloc_peak = obj + this->GetObjectSize(); + uintptr_t cur_peak = m_peak; + do { + if (alloc_peak <= cur_peak) { + break; + } + } while (!peak_ref.compare_exchange_strong(cur_peak, alloc_peak)); } - constexpr std::size_t GetObjectSize() const { - return impl.GetObjectSize(); - } +public: + constexpr KSlabHeapBase() = default; - constexpr uintptr_t GetSlabHeapAddress() const { - return start; + bool Contains(uintptr_t address) const { + return m_start <= address && address < m_end; } - std::size_t GetObjectIndexImpl(const void* obj) const { - return (reinterpret_cast<uintptr_t>(obj) - start) / GetObjectSize(); + void Initialize(size_t obj_size, void* memory, size_t memory_size) { + // Ensure we don't initialize a slab using null memory. + ASSERT(memory != nullptr); + + // Set our object size. + m_obj_size = obj_size; + + // Initialize the base allocator. + KSlabHeapImpl::Initialize(); + + // Set our tracking variables. + const size_t num_obj = (memory_size / obj_size); + m_start = reinterpret_cast<uintptr_t>(memory); + m_end = m_start + num_obj * obj_size; + m_peak = m_start; + + // Free the objects. + u8* cur = reinterpret_cast<u8*>(m_end); + + for (size_t i = 0; i < num_obj; i++) { + cur -= obj_size; + KSlabHeapImpl::Free(cur); + } } - std::size_t GetPeakIndex() const { - return GetObjectIndexImpl(reinterpret_cast<const void*>(peak)); + size_t GetSlabHeapSize() const { + return (m_end - m_start) / this->GetObjectSize(); } - void* AllocateImpl() { - return impl.Allocate(); + size_t GetObjectSize() const { + return m_obj_size; } - void FreeImpl(void* obj) { - // Don't allow freeing an object that wasn't allocated from this heap - ASSERT(Contains(reinterpret_cast<uintptr_t>(obj))); + void* Allocate() { + void* obj = KSlabHeapImpl::Allocate(); - impl.Free(obj); + return obj; } - void InitializeImpl(std::size_t obj_size, void* memory, std::size_t memory_size) { - // Ensure we don't initialize a slab using null memory - ASSERT(memory != nullptr); - - // Initialize the base allocator - impl.Initialize(obj_size); + void Free(void* obj) { + // Don't allow freeing an object that wasn't allocated from this heap. + const bool contained = this->Contains(reinterpret_cast<uintptr_t>(obj)); + ASSERT(contained); + KSlabHeapImpl::Free(obj); + } - // Set our tracking variables - const std::size_t num_obj = (memory_size / obj_size); - start = reinterpret_cast<uintptr_t>(memory); - end = start + num_obj * obj_size; - peak = start; + size_t GetObjectIndex(const void* obj) const { + if constexpr (SupportDynamicExpansion) { + if (!this->Contains(reinterpret_cast<uintptr_t>(obj))) { + return std::numeric_limits<size_t>::max(); + } + } - // Free the objects - u8* cur = reinterpret_cast<u8*>(end); + return (reinterpret_cast<uintptr_t>(obj) - m_start) / this->GetObjectSize(); + } - for (std::size_t i{}; i < num_obj; i++) { - cur -= obj_size; - impl.Free(cur); - } + size_t GetPeakIndex() const { + return this->GetObjectIndex(reinterpret_cast<const void*>(m_peak)); } -private: - using Impl = impl::KSlabHeapImpl; + uintptr_t GetSlabHeapAddress() const { + return m_start; + } - Impl impl; - uintptr_t peak{}; - uintptr_t start{}; - uintptr_t end{}; + size_t GetNumRemaining() const { + // Only calculate the number of remaining objects under debug configuration. + return 0; + } }; template <typename T> -class KSlabHeap final : public KSlabHeapBase { -public: - enum class AllocationType { - Host, - Guest, - }; +class KSlabHeap final : public KSlabHeapBase<false> { +private: + using BaseHeap = KSlabHeapBase<false>; - explicit constexpr KSlabHeap(AllocationType allocation_type_ = AllocationType::Host) - : KSlabHeapBase(), allocation_type{allocation_type_} {} +public: + constexpr KSlabHeap() = default; - void Initialize(void* memory, std::size_t memory_size) { - if (allocation_type == AllocationType::Guest) { - InitializeImpl(sizeof(T), memory, memory_size); - } + void Initialize(void* memory, size_t memory_size) { + BaseHeap::Initialize(sizeof(T), memory, memory_size); } T* Allocate() { - switch (allocation_type) { - case AllocationType::Host: - // Fallback for cases where we do not yet support allocating guest memory from the slab - // heap, such as for kernel memory regions. - return new T; - - case AllocationType::Guest: - T* obj = static_cast<T*>(AllocateImpl()); - if (obj != nullptr) { - new (obj) T(); - } - return obj; - } + T* obj = static_cast<T*>(BaseHeap::Allocate()); - UNREACHABLE_MSG("Invalid AllocationType {}", allocation_type); - return nullptr; + if (obj != nullptr) [[likely]] { + std::construct_at(obj); + } + return obj; } - T* AllocateWithKernel(KernelCore& kernel) { - switch (allocation_type) { - case AllocationType::Host: - // Fallback for cases where we do not yet support allocating guest memory from the slab - // heap, such as for kernel memory regions. - return new T(kernel); + T* Allocate(KernelCore& kernel) { + T* obj = static_cast<T*>(BaseHeap::Allocate()); - case AllocationType::Guest: - T* obj = static_cast<T*>(AllocateImpl()); - if (obj != nullptr) { - new (obj) T(kernel); - } - return obj; + if (obj != nullptr) [[likely]] { + std::construct_at(obj, kernel); } - - UNREACHABLE_MSG("Invalid AllocationType {}", allocation_type); - return nullptr; + return obj; } void Free(T* obj) { - switch (allocation_type) { - case AllocationType::Host: - // Fallback for cases where we do not yet support allocating guest memory from the slab - // heap, such as for kernel memory regions. - delete obj; - return; - - case AllocationType::Guest: - FreeImpl(obj); - return; - } - - UNREACHABLE_MSG("Invalid AllocationType {}", allocation_type); + BaseHeap::Free(obj); } - constexpr std::size_t GetObjectIndex(const T* obj) const { - return GetObjectIndexImpl(obj); + size_t GetObjectIndex(const T* obj) const { + return BaseHeap::GetObjectIndex(obj); } - -private: - const AllocationType allocation_type; }; } // namespace Kernel diff --git a/src/core/hle/kernel/k_thread.cpp b/src/core/hle/kernel/k_thread.cpp index de3ffe0c7..ba7f72c6b 100644 --- a/src/core/hle/kernel/k_thread.cpp +++ b/src/core/hle/kernel/k_thread.cpp @@ -210,7 +210,7 @@ ResultCode KThread::Initialize(KThreadFunction func, uintptr_t arg, VAddr user_s if (owner != nullptr) { // Setup the TLS, if needed. if (type == ThreadType::User) { - tls_address = owner->CreateTLSRegion(); + R_TRY(owner->CreateThreadLocalRegion(std::addressof(tls_address))); } parent = owner; @@ -305,7 +305,7 @@ void KThread::Finalize() { // If the thread has a local region, delete it. if (tls_address != 0) { - parent->FreeTLSRegion(tls_address); + ASSERT(parent->DeleteThreadLocalRegion(tls_address).IsSuccess()); } // Release any waiters. @@ -326,6 +326,9 @@ void KThread::Finalize() { } } + // Release host emulation members. + host_context.reset(); + // Perform inherited finalization. KSynchronizationObject::Finalize(); } diff --git a/src/core/hle/kernel/k_thread.h b/src/core/hle/kernel/k_thread.h index d058db62c..f46db7298 100644 --- a/src/core/hle/kernel/k_thread.h +++ b/src/core/hle/kernel/k_thread.h @@ -656,7 +656,7 @@ private: static_assert(sizeof(SyncObjectBuffer::sync_objects) == sizeof(SyncObjectBuffer::handles)); struct ConditionVariableComparator { - struct LightCompareType { + struct RedBlackKeyType { u64 cv_key{}; s32 priority{}; @@ -672,8 +672,8 @@ private: template <typename T> requires( std::same_as<T, KThread> || - std::same_as<T, LightCompareType>) static constexpr int Compare(const T& lhs, - const KThread& rhs) { + std::same_as<T, RedBlackKeyType>) static constexpr int Compare(const T& lhs, + const KThread& rhs) { const u64 l_key = lhs.GetConditionVariableKey(); const u64 r_key = rhs.GetConditionVariableKey(); diff --git a/src/core/hle/kernel/k_thread_local_page.cpp b/src/core/hle/kernel/k_thread_local_page.cpp new file mode 100644 index 000000000..4653c29f6 --- /dev/null +++ b/src/core/hle/kernel/k_thread_local_page.cpp @@ -0,0 +1,65 @@ +// Copyright 2022 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/scope_exit.h" +#include "core/hle/kernel/k_memory_block.h" +#include "core/hle/kernel/k_page_table.h" +#include "core/hle/kernel/k_process.h" +#include "core/hle/kernel/k_thread_local_page.h" +#include "core/hle/kernel/kernel.h" + +namespace Kernel { + +ResultCode KThreadLocalPage::Initialize(KernelCore& kernel, KProcess* process) { + // Set that this process owns us. + m_owner = process; + m_kernel = &kernel; + + // Allocate a new page. + KPageBuffer* page_buf = KPageBuffer::Allocate(kernel); + R_UNLESS(page_buf != nullptr, ResultOutOfMemory); + auto page_buf_guard = SCOPE_GUARD({ KPageBuffer::Free(kernel, page_buf); }); + + // Map the address in. + const auto phys_addr = kernel.System().DeviceMemory().GetPhysicalAddr(page_buf); + R_TRY(m_owner->PageTable().MapPages(std::addressof(m_virt_addr), 1, PageSize, phys_addr, + KMemoryState::ThreadLocal, + KMemoryPermission::UserReadWrite)); + + // We succeeded. + page_buf_guard.Cancel(); + + return ResultSuccess; +} + +ResultCode KThreadLocalPage::Finalize() { + // Get the physical address of the page. + const PAddr phys_addr = m_owner->PageTable().GetPhysicalAddr(m_virt_addr); + ASSERT(phys_addr); + + // Unmap the page. + R_TRY(m_owner->PageTable().UnmapPages(this->GetAddress(), 1, KMemoryState::ThreadLocal)); + + // Free the page. + KPageBuffer::Free(*m_kernel, KPageBuffer::FromPhysicalAddress(m_kernel->System(), phys_addr)); + + return ResultSuccess; +} + +VAddr KThreadLocalPage::Reserve() { + for (size_t i = 0; i < m_is_region_free.size(); i++) { + if (m_is_region_free[i]) { + m_is_region_free[i] = false; + return this->GetRegionAddress(i); + } + } + + return 0; +} + +void KThreadLocalPage::Release(VAddr addr) { + m_is_region_free[this->GetRegionIndex(addr)] = true; +} + +} // namespace Kernel diff --git a/src/core/hle/kernel/k_thread_local_page.h b/src/core/hle/kernel/k_thread_local_page.h new file mode 100644 index 000000000..658c67e94 --- /dev/null +++ b/src/core/hle/kernel/k_thread_local_page.h @@ -0,0 +1,112 @@ +// Copyright 2022 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <algorithm> +#include <array> + +#include "common/alignment.h" +#include "common/assert.h" +#include "common/common_types.h" +#include "common/intrusive_red_black_tree.h" +#include "core/hle/kernel/k_page_buffer.h" +#include "core/hle/kernel/memory_types.h" +#include "core/hle/kernel/slab_helpers.h" +#include "core/hle/result.h" + +namespace Kernel { + +class KernelCore; +class KProcess; + +class KThreadLocalPage final : public Common::IntrusiveRedBlackTreeBaseNode<KThreadLocalPage>, + public KSlabAllocated<KThreadLocalPage> { +public: + static constexpr size_t RegionsPerPage = PageSize / Svc::ThreadLocalRegionSize; + static_assert(RegionsPerPage > 0); + +public: + constexpr explicit KThreadLocalPage(VAddr addr = {}) : m_virt_addr(addr) { + m_is_region_free.fill(true); + } + + constexpr VAddr GetAddress() const { + return m_virt_addr; + } + + ResultCode Initialize(KernelCore& kernel, KProcess* process); + ResultCode Finalize(); + + VAddr Reserve(); + void Release(VAddr addr); + + bool IsAllUsed() const { + return std::ranges::all_of(m_is_region_free.begin(), m_is_region_free.end(), + [](bool is_free) { return !is_free; }); + } + + bool IsAllFree() const { + return std::ranges::all_of(m_is_region_free.begin(), m_is_region_free.end(), + [](bool is_free) { return is_free; }); + } + + bool IsAnyUsed() const { + return !this->IsAllFree(); + } + + bool IsAnyFree() const { + return !this->IsAllUsed(); + } + +public: + using RedBlackKeyType = VAddr; + + static constexpr RedBlackKeyType GetRedBlackKey(const RedBlackKeyType& v) { + return v; + } + static constexpr RedBlackKeyType GetRedBlackKey(const KThreadLocalPage& v) { + return v.GetAddress(); + } + + template <typename T> + requires(std::same_as<T, KThreadLocalPage> || + std::same_as<T, RedBlackKeyType>) static constexpr int Compare(const T& lhs, + const KThreadLocalPage& + rhs) { + const VAddr lval = GetRedBlackKey(lhs); + const VAddr rval = GetRedBlackKey(rhs); + + if (lval < rval) { + return -1; + } else if (lval == rval) { + return 0; + } else { + return 1; + } + } + +private: + constexpr VAddr GetRegionAddress(size_t i) const { + return this->GetAddress() + i * Svc::ThreadLocalRegionSize; + } + + constexpr bool Contains(VAddr addr) const { + return this->GetAddress() <= addr && addr < this->GetAddress() + PageSize; + } + + constexpr size_t GetRegionIndex(VAddr addr) const { + ASSERT(Common::IsAligned(addr, Svc::ThreadLocalRegionSize)); + ASSERT(this->Contains(addr)); + return (addr - this->GetAddress()) / Svc::ThreadLocalRegionSize; + } + +private: + VAddr m_virt_addr{}; + KProcess* m_owner{}; + KernelCore* m_kernel{}; + std::array<bool, RegionsPerPage> m_is_region_free{}; +}; + +} // namespace Kernel diff --git a/src/core/hle/kernel/kernel.cpp b/src/core/hle/kernel/kernel.cpp index 797f47021..f9828bc43 100644 --- a/src/core/hle/kernel/kernel.cpp +++ b/src/core/hle/kernel/kernel.cpp @@ -52,7 +52,7 @@ namespace Kernel { struct KernelCore::Impl { explicit Impl(Core::System& system_, KernelCore& kernel_) - : time_manager{system_}, object_list_container{kernel_}, + : time_manager{system_}, service_threads_manager{1, "yuzu:ServiceThreadsManager"}, system{system_} {} void SetMulticore(bool is_multi) { @@ -60,6 +60,7 @@ struct KernelCore::Impl { } void Initialize(KernelCore& kernel) { + global_object_list_container = std::make_unique<KAutoObjectWithListContainer>(kernel); global_scheduler_context = std::make_unique<Kernel::GlobalSchedulerContext>(kernel); global_handle_table = std::make_unique<Kernel::KHandleTable>(kernel); global_handle_table->Initialize(KHandleTable::MaxTableSize); @@ -70,14 +71,13 @@ struct KernelCore::Impl { // Derive the initial memory layout from the emulated board Init::InitializeSlabResourceCounts(kernel); - KMemoryLayout memory_layout; - DeriveInitialMemoryLayout(memory_layout); - Init::InitializeSlabHeaps(system, memory_layout); + DeriveInitialMemoryLayout(); + Init::InitializeSlabHeaps(system, *memory_layout); // Initialize kernel memory and resources. - InitializeSystemResourceLimit(kernel, system.CoreTiming(), memory_layout); - InitializeMemoryLayout(memory_layout); - InitializePageSlab(); + InitializeSystemResourceLimit(kernel, system.CoreTiming()); + InitializeMemoryLayout(); + Init::InitializeKPageBufferSlabHeap(system); InitializeSchedulers(); InitializeSuspendThreads(); InitializePreemption(kernel); @@ -108,19 +108,6 @@ struct KernelCore::Impl { for (auto* server_port : server_ports_) { server_port->Close(); } - // Close all open server sessions. - std::unordered_set<KServerSession*> server_sessions_; - { - std::lock_guard lk(server_sessions_lock); - server_sessions_ = server_sessions; - server_sessions.clear(); - } - for (auto* server_session : server_sessions_) { - server_session->Close(); - } - - // Ensure that the object list container is finalized and properly shutdown. - object_list_container.Finalize(); // Ensures all service threads gracefully shutdown. ClearServiceThreads(); @@ -195,11 +182,15 @@ struct KernelCore::Impl { { std::lock_guard lk(registered_objects_lock); if (registered_objects.size()) { - LOG_WARNING(Kernel, "{} kernel objects were dangling on shutdown!", - registered_objects.size()); + LOG_DEBUG(Kernel, "{} kernel objects were dangling on shutdown!", + registered_objects.size()); registered_objects.clear(); } } + + // Ensure that the object list container is finalized and properly shutdown. + global_object_list_container->Finalize(); + global_object_list_container.reset(); } void InitializePhysicalCores() { @@ -219,12 +210,11 @@ struct KernelCore::Impl { // Creates the default system resource limit void InitializeSystemResourceLimit(KernelCore& kernel, - const Core::Timing::CoreTiming& core_timing, - const KMemoryLayout& memory_layout) { + const Core::Timing::CoreTiming& core_timing) { system_resource_limit = KResourceLimit::Create(system.Kernel()); system_resource_limit->Initialize(&core_timing); - const auto [total_size, kernel_size] = memory_layout.GetTotalAndKernelMemorySizes(); + const auto [total_size, kernel_size] = memory_layout->GetTotalAndKernelMemorySizes(); // If setting the default system values fails, then something seriously wrong has occurred. ASSERT(system_resource_limit->SetLimitValue(LimitableResource::PhysicalMemory, total_size) @@ -293,15 +283,16 @@ struct KernelCore::Impl { // Gets the dummy KThread for the caller, allocating a new one if this is the first time KThread* GetHostDummyThread() { - auto make_thread = [this]() { - KThread* thread = KThread::Create(system.Kernel()); + auto initialize = [this](KThread* thread) { ASSERT(KThread::InitializeDummyThread(thread).IsSuccess()); thread->SetName(fmt::format("DummyThread:{}", GetHostThreadId())); return thread; }; - thread_local KThread* saved_thread = make_thread(); - return saved_thread; + thread_local auto raw_thread = KThread(system.Kernel()); + thread_local auto thread = initialize(&raw_thread); + + return thread; } /// Registers a CPU core thread by allocating a host thread ID for it @@ -353,16 +344,18 @@ struct KernelCore::Impl { return schedulers[thread_id]->GetCurrentThread(); } - void DeriveInitialMemoryLayout(KMemoryLayout& memory_layout) { + void DeriveInitialMemoryLayout() { + memory_layout = std::make_unique<KMemoryLayout>(); + // Insert the root region for the virtual memory tree, from which all other regions will // derive. - memory_layout.GetVirtualMemoryRegionTree().InsertDirectly( + memory_layout->GetVirtualMemoryRegionTree().InsertDirectly( KernelVirtualAddressSpaceBase, KernelVirtualAddressSpaceBase + KernelVirtualAddressSpaceSize - 1); // Insert the root region for the physical memory tree, from which all other regions will // derive. - memory_layout.GetPhysicalMemoryRegionTree().InsertDirectly( + memory_layout->GetPhysicalMemoryRegionTree().InsertDirectly( KernelPhysicalAddressSpaceBase, KernelPhysicalAddressSpaceBase + KernelPhysicalAddressSpaceSize - 1); @@ -379,7 +372,7 @@ struct KernelCore::Impl { if (!(kernel_region_start + KernelRegionSize - 1 <= KernelVirtualAddressSpaceLast)) { kernel_region_size = KernelVirtualAddressSpaceEnd - kernel_region_start; } - ASSERT(memory_layout.GetVirtualMemoryRegionTree().Insert( + ASSERT(memory_layout->GetVirtualMemoryRegionTree().Insert( kernel_region_start, kernel_region_size, KMemoryRegionType_Kernel)); // Setup the code region. @@ -388,11 +381,11 @@ struct KernelCore::Impl { Common::AlignDown(code_start_virt_addr, CodeRegionAlign); constexpr VAddr code_region_end = Common::AlignUp(code_end_virt_addr, CodeRegionAlign); constexpr size_t code_region_size = code_region_end - code_region_start; - ASSERT(memory_layout.GetVirtualMemoryRegionTree().Insert( + ASSERT(memory_layout->GetVirtualMemoryRegionTree().Insert( code_region_start, code_region_size, KMemoryRegionType_KernelCode)); // Setup board-specific device physical regions. - Init::SetupDevicePhysicalMemoryRegions(memory_layout); + Init::SetupDevicePhysicalMemoryRegions(*memory_layout); // Determine the amount of space needed for the misc region. size_t misc_region_needed_size; @@ -401,7 +394,7 @@ struct KernelCore::Impl { misc_region_needed_size = Core::Hardware::NUM_CPU_CORES * (3 * (PageSize + PageSize)); // Account for each auto-map device. - for (const auto& region : memory_layout.GetPhysicalMemoryRegionTree()) { + for (const auto& region : memory_layout->GetPhysicalMemoryRegionTree()) { if (region.HasTypeAttribute(KMemoryRegionAttr_ShouldKernelMap)) { // Check that the region is valid. ASSERT(region.GetEndAddress() != 0); @@ -426,22 +419,22 @@ struct KernelCore::Impl { // Setup the misc region. const VAddr misc_region_start = - memory_layout.GetVirtualMemoryRegionTree().GetRandomAlignedRegion( + memory_layout->GetVirtualMemoryRegionTree().GetRandomAlignedRegion( misc_region_size, MiscRegionAlign, KMemoryRegionType_Kernel); - ASSERT(memory_layout.GetVirtualMemoryRegionTree().Insert( + ASSERT(memory_layout->GetVirtualMemoryRegionTree().Insert( misc_region_start, misc_region_size, KMemoryRegionType_KernelMisc)); // Setup the stack region. constexpr size_t StackRegionSize = 14_MiB; constexpr size_t StackRegionAlign = KernelAslrAlignment; const VAddr stack_region_start = - memory_layout.GetVirtualMemoryRegionTree().GetRandomAlignedRegion( + memory_layout->GetVirtualMemoryRegionTree().GetRandomAlignedRegion( StackRegionSize, StackRegionAlign, KMemoryRegionType_Kernel); - ASSERT(memory_layout.GetVirtualMemoryRegionTree().Insert( + ASSERT(memory_layout->GetVirtualMemoryRegionTree().Insert( stack_region_start, StackRegionSize, KMemoryRegionType_KernelStack)); // Determine the size of the resource region. - const size_t resource_region_size = memory_layout.GetResourceRegionSizeForInit(); + const size_t resource_region_size = memory_layout->GetResourceRegionSizeForInit(); // Determine the size of the slab region. const size_t slab_region_size = @@ -458,23 +451,23 @@ struct KernelCore::Impl { Common::AlignUp(code_end_phys_addr + slab_region_size, SlabRegionAlign) - Common::AlignDown(code_end_phys_addr, SlabRegionAlign); const VAddr slab_region_start = - memory_layout.GetVirtualMemoryRegionTree().GetRandomAlignedRegion( + memory_layout->GetVirtualMemoryRegionTree().GetRandomAlignedRegion( slab_region_needed_size, SlabRegionAlign, KMemoryRegionType_Kernel) + (code_end_phys_addr % SlabRegionAlign); - ASSERT(memory_layout.GetVirtualMemoryRegionTree().Insert( + ASSERT(memory_layout->GetVirtualMemoryRegionTree().Insert( slab_region_start, slab_region_size, KMemoryRegionType_KernelSlab)); // Setup the temp region. constexpr size_t TempRegionSize = 128_MiB; constexpr size_t TempRegionAlign = KernelAslrAlignment; const VAddr temp_region_start = - memory_layout.GetVirtualMemoryRegionTree().GetRandomAlignedRegion( + memory_layout->GetVirtualMemoryRegionTree().GetRandomAlignedRegion( TempRegionSize, TempRegionAlign, KMemoryRegionType_Kernel); - ASSERT(memory_layout.GetVirtualMemoryRegionTree().Insert(temp_region_start, TempRegionSize, - KMemoryRegionType_KernelTemp)); + ASSERT(memory_layout->GetVirtualMemoryRegionTree().Insert(temp_region_start, TempRegionSize, + KMemoryRegionType_KernelTemp)); // Automatically map in devices that have auto-map attributes. - for (auto& region : memory_layout.GetPhysicalMemoryRegionTree()) { + for (auto& region : memory_layout->GetPhysicalMemoryRegionTree()) { // We only care about kernel regions. if (!region.IsDerivedFrom(KMemoryRegionType_Kernel)) { continue; @@ -501,21 +494,21 @@ struct KernelCore::Impl { const size_t map_size = Common::AlignUp(region.GetEndAddress(), PageSize) - map_phys_addr; const VAddr map_virt_addr = - memory_layout.GetVirtualMemoryRegionTree().GetRandomAlignedRegionWithGuard( + memory_layout->GetVirtualMemoryRegionTree().GetRandomAlignedRegionWithGuard( map_size, PageSize, KMemoryRegionType_KernelMisc, PageSize); - ASSERT(memory_layout.GetVirtualMemoryRegionTree().Insert( + ASSERT(memory_layout->GetVirtualMemoryRegionTree().Insert( map_virt_addr, map_size, KMemoryRegionType_KernelMiscMappedDevice)); region.SetPairAddress(map_virt_addr + region.GetAddress() - map_phys_addr); } - Init::SetupDramPhysicalMemoryRegions(memory_layout); + Init::SetupDramPhysicalMemoryRegions(*memory_layout); // Insert a physical region for the kernel code region. - ASSERT(memory_layout.GetPhysicalMemoryRegionTree().Insert( + ASSERT(memory_layout->GetPhysicalMemoryRegionTree().Insert( code_start_phys_addr, code_region_size, KMemoryRegionType_DramKernelCode)); // Insert a physical region for the kernel slab region. - ASSERT(memory_layout.GetPhysicalMemoryRegionTree().Insert( + ASSERT(memory_layout->GetPhysicalMemoryRegionTree().Insert( slab_start_phys_addr, slab_region_size, KMemoryRegionType_DramKernelSlab)); // Determine size available for kernel page table heaps, requiring > 8 MB. @@ -524,12 +517,12 @@ struct KernelCore::Impl { ASSERT(page_table_heap_size / 4_MiB > 2); // Insert a physical region for the kernel page table heap region - ASSERT(memory_layout.GetPhysicalMemoryRegionTree().Insert( + ASSERT(memory_layout->GetPhysicalMemoryRegionTree().Insert( slab_end_phys_addr, page_table_heap_size, KMemoryRegionType_DramKernelPtHeap)); // All DRAM regions that we haven't tagged by this point will be mapped under the linear // mapping. Tag them. - for (auto& region : memory_layout.GetPhysicalMemoryRegionTree()) { + for (auto& region : memory_layout->GetPhysicalMemoryRegionTree()) { if (region.GetType() == KMemoryRegionType_Dram) { // Check that the region is valid. ASSERT(region.GetEndAddress() != 0); @@ -541,7 +534,7 @@ struct KernelCore::Impl { // Get the linear region extents. const auto linear_extents = - memory_layout.GetPhysicalMemoryRegionTree().GetDerivedRegionExtents( + memory_layout->GetPhysicalMemoryRegionTree().GetDerivedRegionExtents( KMemoryRegionAttr_LinearMapped); ASSERT(linear_extents.GetEndAddress() != 0); @@ -553,7 +546,7 @@ struct KernelCore::Impl { Common::AlignUp(linear_extents.GetEndAddress(), LinearRegionAlign) - aligned_linear_phys_start; const VAddr linear_region_start = - memory_layout.GetVirtualMemoryRegionTree().GetRandomAlignedRegionWithGuard( + memory_layout->GetVirtualMemoryRegionTree().GetRandomAlignedRegionWithGuard( linear_region_size, LinearRegionAlign, KMemoryRegionType_None, LinearRegionAlign); const u64 linear_region_phys_to_virt_diff = linear_region_start - aligned_linear_phys_start; @@ -562,7 +555,7 @@ struct KernelCore::Impl { { PAddr cur_phys_addr = 0; u64 cur_size = 0; - for (auto& region : memory_layout.GetPhysicalMemoryRegionTree()) { + for (auto& region : memory_layout->GetPhysicalMemoryRegionTree()) { if (!region.HasTypeAttribute(KMemoryRegionAttr_LinearMapped)) { continue; } @@ -581,55 +574,49 @@ struct KernelCore::Impl { const VAddr region_virt_addr = region.GetAddress() + linear_region_phys_to_virt_diff; - ASSERT(memory_layout.GetVirtualMemoryRegionTree().Insert( + ASSERT(memory_layout->GetVirtualMemoryRegionTree().Insert( region_virt_addr, region.GetSize(), GetTypeForVirtualLinearMapping(region.GetType()))); region.SetPairAddress(region_virt_addr); KMemoryRegion* virt_region = - memory_layout.GetVirtualMemoryRegionTree().FindModifiable(region_virt_addr); + memory_layout->GetVirtualMemoryRegionTree().FindModifiable(region_virt_addr); ASSERT(virt_region != nullptr); virt_region->SetPairAddress(region.GetAddress()); } } // Insert regions for the initial page table region. - ASSERT(memory_layout.GetPhysicalMemoryRegionTree().Insert( + ASSERT(memory_layout->GetPhysicalMemoryRegionTree().Insert( resource_end_phys_addr, KernelPageTableHeapSize, KMemoryRegionType_DramKernelInitPt)); - ASSERT(memory_layout.GetVirtualMemoryRegionTree().Insert( + ASSERT(memory_layout->GetVirtualMemoryRegionTree().Insert( resource_end_phys_addr + linear_region_phys_to_virt_diff, KernelPageTableHeapSize, KMemoryRegionType_VirtualDramKernelInitPt)); // All linear-mapped DRAM regions that we haven't tagged by this point will be allocated to // some pool partition. Tag them. - for (auto& region : memory_layout.GetPhysicalMemoryRegionTree()) { + for (auto& region : memory_layout->GetPhysicalMemoryRegionTree()) { if (region.GetType() == (KMemoryRegionType_Dram | KMemoryRegionAttr_LinearMapped)) { region.SetType(KMemoryRegionType_DramPoolPartition); } } // Setup all other memory regions needed to arrange the pool partitions. - Init::SetupPoolPartitionMemoryRegions(memory_layout); + Init::SetupPoolPartitionMemoryRegions(*memory_layout); // Cache all linear regions in their own trees for faster access, later. - memory_layout.InitializeLinearMemoryRegionTrees(aligned_linear_phys_start, - linear_region_start); + memory_layout->InitializeLinearMemoryRegionTrees(aligned_linear_phys_start, + linear_region_start); } - void InitializeMemoryLayout(const KMemoryLayout& memory_layout) { - const auto system_pool = memory_layout.GetKernelSystemPoolRegionPhysicalExtents(); - const auto applet_pool = memory_layout.GetKernelAppletPoolRegionPhysicalExtents(); - const auto application_pool = memory_layout.GetKernelApplicationPoolRegionPhysicalExtents(); + void InitializeMemoryLayout() { + const auto system_pool = memory_layout->GetKernelSystemPoolRegionPhysicalExtents(); - // Initialize memory managers + // Initialize the memory manager. memory_manager = std::make_unique<KMemoryManager>(system); - memory_manager->InitializeManager(KMemoryManager::Pool::Application, - application_pool.GetAddress(), - application_pool.GetEndAddress()); - memory_manager->InitializeManager(KMemoryManager::Pool::Applet, applet_pool.GetAddress(), - applet_pool.GetEndAddress()); - memory_manager->InitializeManager(KMemoryManager::Pool::System, system_pool.GetAddress(), - system_pool.GetEndAddress()); + const auto& management_region = memory_layout->GetPoolManagementRegion(); + ASSERT(management_region.GetEndAddress() != 0); + memory_manager->Initialize(management_region.GetAddress(), management_region.GetSize()); // Setup memory regions for emulated processes // TODO(bunnei): These should not be hardcoded regions initialized within the kernel @@ -666,22 +653,6 @@ struct KernelCore::Impl { time_phys_addr, time_size, "Time:SharedMemory"); } - void InitializePageSlab() { - // Allocate slab heaps - user_slab_heap_pages = - std::make_unique<KSlabHeap<Page>>(KSlabHeap<Page>::AllocationType::Guest); - - // TODO(ameerj): This should be derived, not hardcoded within the kernel - constexpr u64 user_slab_heap_size{0x3de000}; - // Reserve slab heaps - ASSERT( - system_resource_limit->Reserve(LimitableResource::PhysicalMemory, user_slab_heap_size)); - // Initialize slab heap - user_slab_heap_pages->Initialize( - system.DeviceMemory().GetPointer(Core::DramMemoryMap::SlabHeapBase), - user_slab_heap_size); - } - KClientPort* CreateNamedServicePort(std::string name) { auto search = service_interface_factory.find(name); if (search == service_interface_factory.end()) { @@ -719,7 +690,6 @@ struct KernelCore::Impl { } std::mutex server_ports_lock; - std::mutex server_sessions_lock; std::mutex registered_objects_lock; std::mutex registered_in_use_objects_lock; @@ -743,14 +713,13 @@ struct KernelCore::Impl { // stores all the objects in place. std::unique_ptr<KHandleTable> global_handle_table; - KAutoObjectWithListContainer object_list_container; + std::unique_ptr<KAutoObjectWithListContainer> global_object_list_container; /// Map of named ports managed by the kernel, which can be retrieved using /// the ConnectToPort SVC. std::unordered_map<std::string, ServiceInterfaceFactory> service_interface_factory; NamedPortTable named_ports; std::unordered_set<KServerPort*> server_ports; - std::unordered_set<KServerSession*> server_sessions; std::unordered_set<KAutoObject*> registered_objects; std::unordered_set<KAutoObject*> registered_in_use_objects; @@ -762,7 +731,6 @@ struct KernelCore::Impl { // Kernel memory management std::unique_ptr<KMemoryManager> memory_manager; - std::unique_ptr<KSlabHeap<Page>> user_slab_heap_pages; // Shared memory for services Kernel::KSharedMemory* hid_shared_mem{}; @@ -770,6 +738,9 @@ struct KernelCore::Impl { Kernel::KSharedMemory* irs_shared_mem{}; Kernel::KSharedMemory* time_shared_mem{}; + // Memory layout + std::unique_ptr<KMemoryLayout> memory_layout; + // Threads used for services std::unordered_set<std::shared_ptr<Kernel::ServiceThread>> service_threads; Common::ThreadWorker service_threads_manager; @@ -918,11 +889,11 @@ const Core::ExclusiveMonitor& KernelCore::GetExclusiveMonitor() const { } KAutoObjectWithListContainer& KernelCore::ObjectListContainer() { - return impl->object_list_container; + return *impl->global_object_list_container; } const KAutoObjectWithListContainer& KernelCore::ObjectListContainer() const { - return impl->object_list_container; + return *impl->global_object_list_container; } void KernelCore::InvalidateAllInstructionCaches() { @@ -952,16 +923,6 @@ KClientPort* KernelCore::CreateNamedServicePort(std::string name) { return impl->CreateNamedServicePort(std::move(name)); } -void KernelCore::RegisterServerSession(KServerSession* server_session) { - std::lock_guard lk(impl->server_sessions_lock); - impl->server_sessions.insert(server_session); -} - -void KernelCore::UnregisterServerSession(KServerSession* server_session) { - std::lock_guard lk(impl->server_sessions_lock); - impl->server_sessions.erase(server_session); -} - void KernelCore::RegisterKernelObject(KAutoObject* object) { std::lock_guard lk(impl->registered_objects_lock); impl->registered_objects.insert(object); @@ -1034,14 +995,6 @@ const KMemoryManager& KernelCore::MemoryManager() const { return *impl->memory_manager; } -KSlabHeap<Page>& KernelCore::GetUserSlabHeapPages() { - return *impl->user_slab_heap_pages; -} - -const KSlabHeap<Page>& KernelCore::GetUserSlabHeapPages() const { - return *impl->user_slab_heap_pages; -} - Kernel::KSharedMemory& KernelCore::GetHidSharedMem() { return *impl->hid_shared_mem; } @@ -1135,6 +1088,10 @@ const KWorkerTaskManager& KernelCore::WorkerTaskManager() const { return impl->worker_task_manager; } +const KMemoryLayout& KernelCore::MemoryLayout() const { + return *impl->memory_layout; +} + bool KernelCore::IsPhantomModeForSingleCore() const { return impl->IsPhantomModeForSingleCore(); } diff --git a/src/core/hle/kernel/kernel.h b/src/core/hle/kernel/kernel.h index 0e04fc3bb..7087bbda6 100644 --- a/src/core/hle/kernel/kernel.h +++ b/src/core/hle/kernel/kernel.h @@ -41,7 +41,9 @@ class KClientSession; class KEvent; class KHandleTable; class KLinkedListNode; +class KMemoryLayout; class KMemoryManager; +class KPageBuffer; class KPort; class KProcess; class KResourceLimit; @@ -51,6 +53,7 @@ class KSession; class KSharedMemory; class KSharedMemoryInfo; class KThread; +class KThreadLocalPage; class KTransferMemory; class KWorkerTaskManager; class KWritableEvent; @@ -193,14 +196,6 @@ public: /// Opens a port to a service previously registered with RegisterNamedService. KClientPort* CreateNamedServicePort(std::string name); - /// Registers a server session with the gobal emulation state, to be freed on shutdown. This is - /// necessary because we do not emulate processes for HLE sessions. - void RegisterServerSession(KServerSession* server_session); - - /// Unregisters a server session previously registered with RegisterServerSession when it was - /// destroyed during the current emulation session. - void UnregisterServerSession(KServerSession* server_session); - /// Registers all kernel objects with the global emulation state, this is purely for tracking /// leaks after emulation has been shutdown. void RegisterKernelObject(KAutoObject* object); @@ -238,12 +233,6 @@ public: /// Gets the virtual memory manager for the kernel. const KMemoryManager& MemoryManager() const; - /// Gets the slab heap allocated for user space pages. - KSlabHeap<Page>& GetUserSlabHeapPages(); - - /// Gets the slab heap allocated for user space pages. - const KSlabHeap<Page>& GetUserSlabHeapPages() const; - /// Gets the shared memory object for HID services. Kernel::KSharedMemory& GetHidSharedMem(); @@ -335,6 +324,10 @@ public: return slab_heap_container->writeable_event; } else if constexpr (std::is_same_v<T, KCodeMemory>) { return slab_heap_container->code_memory; + } else if constexpr (std::is_same_v<T, KPageBuffer>) { + return slab_heap_container->page_buffer; + } else if constexpr (std::is_same_v<T, KThreadLocalPage>) { + return slab_heap_container->thread_local_page; } } @@ -350,6 +343,9 @@ public: /// Gets the current worker task manager, used for dispatching KThread/KProcess tasks. const KWorkerTaskManager& WorkerTaskManager() const; + /// Gets the memory layout. + const KMemoryLayout& MemoryLayout() const; + private: friend class KProcess; friend class KThread; @@ -393,6 +389,8 @@ private: KSlabHeap<KTransferMemory> transfer_memory; KSlabHeap<KWritableEvent> writeable_event; KSlabHeap<KCodeMemory> code_memory; + KSlabHeap<KPageBuffer> page_buffer; + KSlabHeap<KThreadLocalPage> thread_local_page; }; std::unique_ptr<SlabHeapContainer> slab_heap_container; diff --git a/src/core/hle/kernel/service_thread.cpp b/src/core/hle/kernel/service_thread.cpp index 4eb3a5988..52d25b837 100644 --- a/src/core/hle/kernel/service_thread.cpp +++ b/src/core/hle/kernel/service_thread.cpp @@ -49,12 +49,9 @@ ServiceThread::Impl::Impl(KernelCore& kernel, std::size_t num_threads, const std return; } + // Allocate a dummy guest thread for this host thread. kernel.RegisterHostThread(); - // Ensure the dummy thread allocated for this host thread is closed on exit. - auto* dummy_thread = kernel.GetCurrentEmuThread(); - SCOPE_EXIT({ dummy_thread->Close(); }); - while (true) { std::function<void()> task; diff --git a/src/core/hle/kernel/slab_helpers.h b/src/core/hle/kernel/slab_helpers.h index f1c11256e..dc1e48fc9 100644 --- a/src/core/hle/kernel/slab_helpers.h +++ b/src/core/hle/kernel/slab_helpers.h @@ -59,7 +59,7 @@ class KAutoObjectWithSlabHeapAndContainer : public Base { private: static Derived* Allocate(KernelCore& kernel) { - return kernel.SlabHeap<Derived>().AllocateWithKernel(kernel); + return kernel.SlabHeap<Derived>().Allocate(kernel); } static void Free(KernelCore& kernel, Derived* obj) { diff --git a/src/core/hle/kernel/svc_types.h b/src/core/hle/kernel/svc_types.h index 365e22e4e..b2e9ec092 100644 --- a/src/core/hle/kernel/svc_types.h +++ b/src/core/hle/kernel/svc_types.h @@ -96,4 +96,6 @@ constexpr inline s32 IdealCoreNoUpdate = -3; constexpr inline s32 LowestThreadPriority = 63; constexpr inline s32 HighestThreadPriority = 0; +constexpr inline size_t ThreadLocalRegionSize = 0x200; + } // namespace Kernel::Svc diff --git a/src/core/hle/service/am/am.cpp b/src/core/hle/service/am/am.cpp index 2f8e21568..420de3c54 100644 --- a/src/core/hle/service/am/am.cpp +++ b/src/core/hle/service/am/am.cpp @@ -980,7 +980,7 @@ private: LOG_DEBUG(Service_AM, "called"); IPC::RequestParser rp{ctx}; - applet->GetBroker().PushNormalDataFromGame(rp.PopIpcInterface<IStorage>()); + applet->GetBroker().PushNormalDataFromGame(rp.PopIpcInterface<IStorage>().lock()); IPC::ResponseBuilder rb{ctx, 2}; rb.Push(ResultSuccess); @@ -1007,7 +1007,7 @@ private: LOG_DEBUG(Service_AM, "called"); IPC::RequestParser rp{ctx}; - applet->GetBroker().PushInteractiveDataFromGame(rp.PopIpcInterface<IStorage>()); + applet->GetBroker().PushInteractiveDataFromGame(rp.PopIpcInterface<IStorage>().lock()); ASSERT(applet->IsInitialized()); applet->ExecuteInteractive(); diff --git a/src/core/hle/service/kernel_helpers.cpp b/src/core/hle/service/kernel_helpers.cpp index b8c2c6e51..ff0bbb788 100644 --- a/src/core/hle/service/kernel_helpers.cpp +++ b/src/core/hle/service/kernel_helpers.cpp @@ -17,21 +17,12 @@ namespace Service::KernelHelpers { ServiceContext::ServiceContext(Core::System& system_, std::string name_) : kernel(system_.Kernel()) { - - // Create a resource limit for the process. - const auto physical_memory_size = - kernel.MemoryManager().GetSize(Kernel::KMemoryManager::Pool::System); - auto* resource_limit = Kernel::CreateResourceLimitForProcess(system_, physical_memory_size); - // Create the process. process = Kernel::KProcess::Create(kernel); ASSERT(Kernel::KProcess::Initialize(process, system_, std::move(name_), Kernel::KProcess::ProcessType::KernelInternal, - resource_limit) + kernel.GetSystemResourceLimit()) .IsSuccess()); - - // Close reference to our resource limit, as the process opens one. - resource_limit->Close(); } ServiceContext::~ServiceContext() { diff --git a/src/core/hle/service/ldr/ldr.cpp b/src/core/hle/service/ldr/ldr.cpp index 9fc7bb1b1..099276420 100644 --- a/src/core/hle/service/ldr/ldr.cpp +++ b/src/core/hle/service/ldr/ldr.cpp @@ -288,7 +288,7 @@ public: } bool ValidateRegionForMap(Kernel::KPageTable& page_table, VAddr start, std::size_t size) const { - constexpr std::size_t padding_size{4 * Kernel::PageSize}; + const std::size_t padding_size{page_table.GetNumGuardPages() * Kernel::PageSize}; const auto start_info{page_table.QueryInfo(start - 1)}; if (start_info.state != Kernel::KMemoryState::Free) { @@ -308,31 +308,69 @@ public: return (start + size + padding_size) <= (end_info.GetAddress() + end_info.GetSize()); } - VAddr GetRandomMapRegion(const Kernel::KPageTable& page_table, std::size_t size) const { - VAddr addr{}; - const std::size_t end_pages{(page_table.GetAliasCodeRegionSize() - size) >> - Kernel::PageBits}; - do { - addr = page_table.GetAliasCodeRegionStart() + - (Kernel::KSystemControl::GenerateRandomRange(0, end_pages) << Kernel::PageBits); - } while (!page_table.IsInsideAddressSpace(addr, size) || - page_table.IsInsideHeapRegion(addr, size) || - page_table.IsInsideAliasRegion(addr, size)); - return addr; + ResultCode GetAvailableMapRegion(Kernel::KPageTable& page_table, u64 size, VAddr& out_addr) { + size = Common::AlignUp(size, Kernel::PageSize); + size += page_table.GetNumGuardPages() * Kernel::PageSize * 4; + + const auto is_region_available = [&](VAddr addr) { + const auto end_addr = addr + size; + while (addr < end_addr) { + if (system.Memory().IsValidVirtualAddress(addr)) { + return false; + } + + if (!page_table.IsInsideAddressSpace(out_addr, size)) { + return false; + } + + if (page_table.IsInsideHeapRegion(out_addr, size)) { + return false; + } + + if (page_table.IsInsideAliasRegion(out_addr, size)) { + return false; + } + + addr += Kernel::PageSize; + } + return true; + }; + + bool succeeded = false; + const auto map_region_end = + page_table.GetAliasCodeRegionStart() + page_table.GetAliasCodeRegionSize(); + while (current_map_addr < map_region_end) { + if (is_region_available(current_map_addr)) { + succeeded = true; + break; + } + current_map_addr += 0x100000; + } + + if (!succeeded) { + UNREACHABLE_MSG("Out of address space!"); + return Kernel::ResultOutOfMemory; + } + + out_addr = current_map_addr; + current_map_addr += size; + + return ResultSuccess; } - ResultVal<VAddr> MapProcessCodeMemory(Kernel::KProcess* process, VAddr baseAddress, - u64 size) const { + ResultVal<VAddr> MapProcessCodeMemory(Kernel::KProcess* process, VAddr base_addr, u64 size) { + auto& page_table{process->PageTable()}; + VAddr addr{}; + for (std::size_t retry = 0; retry < MAXIMUM_MAP_RETRIES; retry++) { - auto& page_table{process->PageTable()}; - const VAddr addr{GetRandomMapRegion(page_table, size)}; - const ResultCode result{page_table.MapCodeMemory(addr, baseAddress, size)}; + R_TRY(GetAvailableMapRegion(page_table, size, addr)); + const ResultCode result{page_table.MapCodeMemory(addr, base_addr, size)}; if (result == Kernel::ResultInvalidCurrentMemory) { continue; } - CASCADE_CODE(result); + R_TRY(result); if (ValidateRegionForMap(page_table, addr, size)) { return addr; @@ -343,7 +381,7 @@ public: } ResultVal<VAddr> MapNro(Kernel::KProcess* process, VAddr nro_addr, std::size_t nro_size, - VAddr bss_addr, std::size_t bss_size, std::size_t size) const { + VAddr bss_addr, std::size_t bss_size, std::size_t size) { for (std::size_t retry = 0; retry < MAXIMUM_MAP_RETRIES; retry++) { auto& page_table{process->PageTable()}; VAddr addr{}; @@ -597,6 +635,7 @@ public: LOG_WARNING(Service_LDR, "(STUBBED) called"); initialized = true; + current_map_addr = system.CurrentProcess()->PageTable().GetAliasCodeRegionStart(); IPC::ResponseBuilder rb{ctx, 2}; rb.Push(ResultSuccess); @@ -607,6 +646,7 @@ private: std::map<VAddr, NROInfo> nro; std::map<VAddr, std::vector<SHA256Hash>> nrr; + VAddr current_map_addr{}; bool IsValidNROHash(const SHA256Hash& hash) const { return std::any_of(nrr.begin(), nrr.end(), [&hash](const auto& p) { diff --git a/src/core/hle/service/sm/sm.cpp b/src/core/hle/service/sm/sm.cpp index eaa172595..695a1faa6 100644 --- a/src/core/hle/service/sm/sm.cpp +++ b/src/core/hle/service/sm/sm.cpp @@ -81,6 +81,8 @@ ResultVal<Kernel::KPort*> ServiceManager::GetServicePort(const std::string& name } auto* port = Kernel::KPort::Create(kernel); + SCOPE_EXIT({ port->Close(); }); + port->Initialize(ServerSessionCountMax, false, name); auto handler = it->second; port->GetServerPort().SetSessionHandler(std::move(handler)); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index b412957c7..2b360e073 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -22,7 +22,7 @@ constexpr u32 NUM_TEXTURE_AND_IMAGE_SCALING_WORDS = struct RescalingLayout { alignas(16) std::array<u32, NUM_TEXTURE_SCALING_WORDS> rescaling_textures; alignas(16) std::array<u32, NUM_IMAGE_SCALING_WORDS> rescaling_images; - alignas(16) u32 down_factor; + u32 down_factor; }; constexpr u32 RESCALING_LAYOUT_WORDS_OFFSET = offsetof(RescalingLayout, rescaling_textures); constexpr u32 RESCALING_LAYOUT_DOWN_FACTOR_OFFSET = offsetof(RescalingLayout, down_factor); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input.cpp index e0fe47912..f3c7ceb57 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input.cpp @@ -13,59 +13,535 @@ namespace { // Emulate GPU's LOP3.LUT (three-input logic op with 8-bit truth table) IR::U32 ApplyLUT(IR::IREmitter& ir, const IR::U32& a, const IR::U32& b, const IR::U32& c, u64 ttbl) { - IR::U32 r{ir.Imm32(0)}; - const IR::U32 not_a{ir.BitwiseNot(a)}; - const IR::U32 not_b{ir.BitwiseNot(b)}; - const IR::U32 not_c{ir.BitwiseNot(c)}; - if (ttbl & 0x01) { - // r |= ~a & ~b & ~c; - const auto lhs{ir.BitwiseAnd(not_a, not_b)}; - const auto rhs{ir.BitwiseAnd(lhs, not_c)}; - r = ir.BitwiseOr(r, rhs); + switch (ttbl) { + // generated code, do not edit manually + case 0: + return ir.Imm32(0); + case 1: + return ir.BitwiseNot(ir.BitwiseOr(a, ir.BitwiseOr(b, c))); + case 2: + return ir.BitwiseAnd(c, ir.BitwiseNot(ir.BitwiseOr(a, b))); + case 3: + return ir.BitwiseNot(ir.BitwiseOr(a, b)); + case 4: + return ir.BitwiseAnd(b, ir.BitwiseNot(ir.BitwiseOr(a, c))); + case 5: + return ir.BitwiseNot(ir.BitwiseOr(a, c)); + case 6: + return ir.BitwiseAnd(ir.BitwiseNot(a), ir.BitwiseXor(b, c)); + case 7: + return ir.BitwiseNot(ir.BitwiseOr(a, ir.BitwiseAnd(b, c))); + case 8: + return ir.BitwiseAnd(ir.BitwiseAnd(b, c), ir.BitwiseNot(a)); + case 9: + return ir.BitwiseNot(ir.BitwiseOr(a, ir.BitwiseXor(b, c))); + case 10: + return ir.BitwiseAnd(c, ir.BitwiseNot(a)); + case 11: + return ir.BitwiseAnd(ir.BitwiseNot(a), ir.BitwiseOr(c, ir.BitwiseNot(b))); + case 12: + return ir.BitwiseAnd(b, ir.BitwiseNot(a)); + case 13: + return ir.BitwiseAnd(ir.BitwiseNot(a), ir.BitwiseOr(b, ir.BitwiseNot(c))); + case 14: + return ir.BitwiseAnd(ir.BitwiseNot(a), ir.BitwiseOr(b, c)); + case 15: + return ir.BitwiseNot(a); + case 16: + return ir.BitwiseAnd(a, ir.BitwiseNot(ir.BitwiseOr(b, c))); + case 17: + return ir.BitwiseNot(ir.BitwiseOr(b, c)); + case 18: + return ir.BitwiseAnd(ir.BitwiseNot(b), ir.BitwiseXor(a, c)); + case 19: + return ir.BitwiseNot(ir.BitwiseOr(b, ir.BitwiseAnd(a, c))); + case 20: + return ir.BitwiseAnd(ir.BitwiseNot(c), ir.BitwiseXor(a, b)); + case 21: + return ir.BitwiseNot(ir.BitwiseOr(c, ir.BitwiseAnd(a, b))); + case 22: + return ir.BitwiseXor(ir.BitwiseOr(a, b), ir.BitwiseOr(c, ir.BitwiseAnd(a, b))); + case 23: + return ir.BitwiseXor(ir.BitwiseAnd(ir.BitwiseXor(a, b), ir.BitwiseXor(a, c)), + ir.BitwiseNot(a)); + case 24: + return ir.BitwiseAnd(ir.BitwiseXor(a, b), ir.BitwiseXor(a, c)); + case 25: + return ir.BitwiseNot(ir.BitwiseOr(ir.BitwiseAnd(a, b), ir.BitwiseXor(b, c))); + case 26: + return ir.BitwiseAnd(ir.BitwiseOr(c, ir.BitwiseNot(b)), ir.BitwiseXor(a, c)); + case 27: + return ir.BitwiseXor(ir.BitwiseOr(a, ir.BitwiseNot(c)), ir.BitwiseOr(b, c)); + case 28: + return ir.BitwiseAnd(ir.BitwiseOr(b, ir.BitwiseNot(c)), ir.BitwiseXor(a, b)); + case 29: + return ir.BitwiseXor(ir.BitwiseOr(a, ir.BitwiseNot(b)), ir.BitwiseOr(b, c)); + case 30: + return ir.BitwiseXor(a, ir.BitwiseOr(b, c)); + case 31: + return ir.BitwiseNot(ir.BitwiseAnd(a, ir.BitwiseOr(b, c))); + case 32: + return ir.BitwiseAnd(ir.BitwiseAnd(a, c), ir.BitwiseNot(b)); + case 33: + return ir.BitwiseNot(ir.BitwiseOr(b, ir.BitwiseXor(a, c))); + case 34: + return ir.BitwiseAnd(c, ir.BitwiseNot(b)); + case 35: + return ir.BitwiseAnd(ir.BitwiseNot(b), ir.BitwiseOr(c, ir.BitwiseNot(a))); + case 36: + return ir.BitwiseAnd(ir.BitwiseXor(a, b), ir.BitwiseXor(b, c)); + case 37: + return ir.BitwiseNot(ir.BitwiseOr(ir.BitwiseAnd(a, b), ir.BitwiseXor(a, c))); + case 38: + return ir.BitwiseAnd(ir.BitwiseOr(c, ir.BitwiseNot(a)), ir.BitwiseXor(b, c)); + case 39: + return ir.BitwiseXor(ir.BitwiseOr(a, c), ir.BitwiseOr(b, ir.BitwiseNot(c))); + case 40: + return ir.BitwiseAnd(c, ir.BitwiseXor(a, b)); + case 41: + return ir.BitwiseXor(ir.BitwiseOr(a, b), + ir.BitwiseOr(ir.BitwiseAnd(a, b), ir.BitwiseNot(c))); + case 42: + return ir.BitwiseAnd(c, ir.BitwiseNot(ir.BitwiseAnd(a, b))); + case 43: + return ir.BitwiseXor(ir.BitwiseOr(a, ir.BitwiseNot(c)), + ir.BitwiseOr(b, ir.BitwiseXor(a, c))); + case 44: + return ir.BitwiseAnd(ir.BitwiseOr(b, c), ir.BitwiseXor(a, b)); + case 45: + return ir.BitwiseXor(a, ir.BitwiseOr(b, ir.BitwiseNot(c))); + case 46: + return ir.BitwiseXor(ir.BitwiseAnd(a, b), ir.BitwiseOr(b, c)); + case 47: + return ir.BitwiseOr(ir.BitwiseAnd(c, ir.BitwiseNot(b)), ir.BitwiseNot(a)); + case 48: + return ir.BitwiseAnd(a, ir.BitwiseNot(b)); + case 49: + return ir.BitwiseAnd(ir.BitwiseNot(b), ir.BitwiseOr(a, ir.BitwiseNot(c))); + case 50: + return ir.BitwiseAnd(ir.BitwiseNot(b), ir.BitwiseOr(a, c)); + case 51: + return ir.BitwiseNot(b); + case 52: + return ir.BitwiseAnd(ir.BitwiseOr(a, ir.BitwiseNot(c)), ir.BitwiseXor(a, b)); + case 53: + return ir.BitwiseXor(ir.BitwiseOr(a, c), ir.BitwiseOr(b, ir.BitwiseNot(a))); + case 54: + return ir.BitwiseXor(b, ir.BitwiseOr(a, c)); + case 55: + return ir.BitwiseNot(ir.BitwiseAnd(b, ir.BitwiseOr(a, c))); + case 56: + return ir.BitwiseAnd(ir.BitwiseOr(a, c), ir.BitwiseXor(a, b)); + case 57: + return ir.BitwiseXor(b, ir.BitwiseOr(a, ir.BitwiseNot(c))); + case 58: + return ir.BitwiseXor(ir.BitwiseAnd(a, b), ir.BitwiseOr(a, c)); + case 59: + return ir.BitwiseOr(ir.BitwiseAnd(c, ir.BitwiseNot(a)), ir.BitwiseNot(b)); + case 60: + return ir.BitwiseXor(a, b); + case 61: + return ir.BitwiseOr(ir.BitwiseNot(ir.BitwiseOr(a, c)), ir.BitwiseXor(a, b)); + case 62: + return ir.BitwiseOr(ir.BitwiseAnd(c, ir.BitwiseNot(a)), ir.BitwiseXor(a, b)); + case 63: + return ir.BitwiseNot(ir.BitwiseAnd(a, b)); + case 64: + return ir.BitwiseAnd(ir.BitwiseAnd(a, b), ir.BitwiseNot(c)); + case 65: + return ir.BitwiseNot(ir.BitwiseOr(c, ir.BitwiseXor(a, b))); + case 66: + return ir.BitwiseAnd(ir.BitwiseXor(a, c), ir.BitwiseXor(b, c)); + case 67: + return ir.BitwiseNot(ir.BitwiseOr(ir.BitwiseAnd(a, c), ir.BitwiseXor(a, b))); + case 68: + return ir.BitwiseAnd(b, ir.BitwiseNot(c)); + case 69: + return ir.BitwiseAnd(ir.BitwiseNot(c), ir.BitwiseOr(b, ir.BitwiseNot(a))); + case 70: + return ir.BitwiseAnd(ir.BitwiseOr(b, ir.BitwiseNot(a)), ir.BitwiseXor(b, c)); + case 71: + return ir.BitwiseXor(ir.BitwiseOr(a, b), ir.BitwiseOr(c, ir.BitwiseNot(b))); + case 72: + return ir.BitwiseAnd(b, ir.BitwiseXor(a, c)); + case 73: + return ir.BitwiseXor(ir.BitwiseOr(a, c), + ir.BitwiseOr(ir.BitwiseAnd(a, c), ir.BitwiseNot(b))); + case 74: + return ir.BitwiseAnd(ir.BitwiseOr(b, c), ir.BitwiseXor(a, c)); + case 75: + return ir.BitwiseXor(a, ir.BitwiseOr(c, ir.BitwiseNot(b))); + case 76: + return ir.BitwiseAnd(b, ir.BitwiseNot(ir.BitwiseAnd(a, c))); + case 77: + return ir.BitwiseXor(ir.BitwiseOr(a, ir.BitwiseNot(b)), + ir.BitwiseOr(c, ir.BitwiseXor(a, b))); + case 78: + return ir.BitwiseXor(ir.BitwiseAnd(a, c), ir.BitwiseOr(b, c)); + case 79: + return ir.BitwiseOr(ir.BitwiseAnd(b, ir.BitwiseNot(c)), ir.BitwiseNot(a)); + case 80: + return ir.BitwiseAnd(a, ir.BitwiseNot(c)); + case 81: + return ir.BitwiseAnd(ir.BitwiseNot(c), ir.BitwiseOr(a, ir.BitwiseNot(b))); + case 82: + return ir.BitwiseAnd(ir.BitwiseOr(a, ir.BitwiseNot(b)), ir.BitwiseXor(a, c)); + case 83: + return ir.BitwiseXor(ir.BitwiseOr(a, b), ir.BitwiseOr(c, ir.BitwiseNot(a))); + case 84: + return ir.BitwiseAnd(ir.BitwiseNot(c), ir.BitwiseOr(a, b)); + case 85: + return ir.BitwiseNot(c); + case 86: + return ir.BitwiseXor(c, ir.BitwiseOr(a, b)); + case 87: + return ir.BitwiseNot(ir.BitwiseAnd(c, ir.BitwiseOr(a, b))); + case 88: + return ir.BitwiseAnd(ir.BitwiseOr(a, b), ir.BitwiseXor(a, c)); + case 89: + return ir.BitwiseXor(c, ir.BitwiseOr(a, ir.BitwiseNot(b))); + case 90: + return ir.BitwiseXor(a, c); + case 91: + return ir.BitwiseOr(ir.BitwiseNot(ir.BitwiseOr(a, b)), ir.BitwiseXor(a, c)); + case 92: + return ir.BitwiseXor(ir.BitwiseAnd(a, c), ir.BitwiseOr(a, b)); + case 93: + return ir.BitwiseOr(ir.BitwiseAnd(b, ir.BitwiseNot(a)), ir.BitwiseNot(c)); + case 94: + return ir.BitwiseOr(ir.BitwiseAnd(b, ir.BitwiseNot(a)), ir.BitwiseXor(a, c)); + case 95: + return ir.BitwiseNot(ir.BitwiseAnd(a, c)); + case 96: + return ir.BitwiseAnd(a, ir.BitwiseXor(b, c)); + case 97: + return ir.BitwiseXor(ir.BitwiseOr(b, c), + ir.BitwiseOr(ir.BitwiseAnd(b, c), ir.BitwiseNot(a))); + case 98: + return ir.BitwiseAnd(ir.BitwiseOr(a, c), ir.BitwiseXor(b, c)); + case 99: + return ir.BitwiseXor(b, ir.BitwiseOr(c, ir.BitwiseNot(a))); + case 100: + return ir.BitwiseAnd(ir.BitwiseOr(a, b), ir.BitwiseXor(b, c)); + case 101: + return ir.BitwiseXor(c, ir.BitwiseOr(b, ir.BitwiseNot(a))); + case 102: + return ir.BitwiseXor(b, c); + case 103: + return ir.BitwiseOr(ir.BitwiseNot(ir.BitwiseOr(a, b)), ir.BitwiseXor(b, c)); + case 104: + return ir.BitwiseAnd(ir.BitwiseOr(a, b), ir.BitwiseXor(c, ir.BitwiseAnd(a, b))); + case 105: + return ir.BitwiseXor(ir.BitwiseNot(a), ir.BitwiseXor(b, c)); + case 106: + return ir.BitwiseXor(c, ir.BitwiseAnd(a, b)); + case 107: + return ir.BitwiseXor(ir.BitwiseAnd(c, ir.BitwiseOr(a, b)), + ir.BitwiseXor(a, ir.BitwiseNot(b))); + case 108: + return ir.BitwiseXor(b, ir.BitwiseAnd(a, c)); + case 109: + return ir.BitwiseXor(ir.BitwiseAnd(b, ir.BitwiseOr(a, c)), + ir.BitwiseXor(a, ir.BitwiseNot(c))); + case 110: + return ir.BitwiseOr(ir.BitwiseAnd(b, ir.BitwiseNot(a)), ir.BitwiseXor(b, c)); + case 111: + return ir.BitwiseOr(ir.BitwiseNot(a), ir.BitwiseXor(b, c)); + case 112: + return ir.BitwiseAnd(a, ir.BitwiseNot(ir.BitwiseAnd(b, c))); + case 113: + return ir.BitwiseXor(ir.BitwiseOr(b, ir.BitwiseNot(a)), + ir.BitwiseOr(c, ir.BitwiseXor(a, b))); + case 114: + return ir.BitwiseXor(ir.BitwiseAnd(b, c), ir.BitwiseOr(a, c)); + case 115: + return ir.BitwiseOr(ir.BitwiseAnd(a, ir.BitwiseNot(c)), ir.BitwiseNot(b)); + case 116: + return ir.BitwiseXor(ir.BitwiseAnd(b, c), ir.BitwiseOr(a, b)); + case 117: + return ir.BitwiseOr(ir.BitwiseAnd(a, ir.BitwiseNot(b)), ir.BitwiseNot(c)); + case 118: + return ir.BitwiseOr(ir.BitwiseAnd(a, ir.BitwiseNot(b)), ir.BitwiseXor(b, c)); + case 119: + return ir.BitwiseNot(ir.BitwiseAnd(b, c)); + case 120: + return ir.BitwiseXor(a, ir.BitwiseAnd(b, c)); + case 121: + return ir.BitwiseXor(ir.BitwiseAnd(a, ir.BitwiseOr(b, c)), + ir.BitwiseXor(b, ir.BitwiseNot(c))); + case 122: + return ir.BitwiseOr(ir.BitwiseAnd(a, ir.BitwiseNot(b)), ir.BitwiseXor(a, c)); + case 123: + return ir.BitwiseOr(ir.BitwiseNot(b), ir.BitwiseXor(a, c)); + case 124: + return ir.BitwiseOr(ir.BitwiseAnd(a, ir.BitwiseNot(c)), ir.BitwiseXor(a, b)); + case 125: + return ir.BitwiseOr(ir.BitwiseNot(c), ir.BitwiseXor(a, b)); + case 126: + return ir.BitwiseOr(ir.BitwiseXor(a, b), ir.BitwiseXor(a, c)); + case 127: + return ir.BitwiseNot(ir.BitwiseAnd(a, ir.BitwiseAnd(b, c))); + case 128: + return ir.BitwiseAnd(a, ir.BitwiseAnd(b, c)); + case 129: + return ir.BitwiseNot(ir.BitwiseOr(ir.BitwiseXor(a, b), ir.BitwiseXor(a, c))); + case 130: + return ir.BitwiseAnd(c, ir.BitwiseXor(a, ir.BitwiseNot(b))); + case 131: + return ir.BitwiseAnd(ir.BitwiseOr(c, ir.BitwiseNot(a)), ir.BitwiseXor(a, ir.BitwiseNot(b))); + case 132: + return ir.BitwiseAnd(b, ir.BitwiseXor(a, ir.BitwiseNot(c))); + case 133: + return ir.BitwiseAnd(ir.BitwiseOr(b, ir.BitwiseNot(a)), ir.BitwiseXor(a, ir.BitwiseNot(c))); + case 134: + return ir.BitwiseAnd(ir.BitwiseOr(b, c), ir.BitwiseXor(a, ir.BitwiseXor(b, c))); + case 135: + return ir.BitwiseXor(ir.BitwiseAnd(b, c), ir.BitwiseNot(a)); + case 136: + return ir.BitwiseAnd(b, c); + case 137: + return ir.BitwiseAnd(ir.BitwiseOr(b, ir.BitwiseNot(a)), ir.BitwiseXor(b, ir.BitwiseNot(c))); + case 138: + return ir.BitwiseAnd(c, ir.BitwiseOr(b, ir.BitwiseNot(a))); + case 139: + return ir.BitwiseOr(ir.BitwiseAnd(b, c), ir.BitwiseNot(ir.BitwiseOr(a, b))); + case 140: + return ir.BitwiseAnd(b, ir.BitwiseOr(c, ir.BitwiseNot(a))); + case 141: + return ir.BitwiseOr(ir.BitwiseAnd(b, c), ir.BitwiseNot(ir.BitwiseOr(a, c))); + case 142: + return ir.BitwiseXor(a, ir.BitwiseOr(ir.BitwiseXor(a, b), ir.BitwiseXor(a, c))); + case 143: + return ir.BitwiseOr(ir.BitwiseAnd(b, c), ir.BitwiseNot(a)); + case 144: + return ir.BitwiseAnd(a, ir.BitwiseXor(b, ir.BitwiseNot(c))); + case 145: + return ir.BitwiseAnd(ir.BitwiseOr(a, ir.BitwiseNot(b)), ir.BitwiseXor(b, ir.BitwiseNot(c))); + case 146: + return ir.BitwiseAnd(ir.BitwiseOr(a, c), ir.BitwiseXor(a, ir.BitwiseXor(b, c))); + case 147: + return ir.BitwiseXor(ir.BitwiseAnd(a, c), ir.BitwiseNot(b)); + case 148: + return ir.BitwiseAnd(ir.BitwiseOr(a, b), ir.BitwiseXor(a, ir.BitwiseXor(b, c))); + case 149: + return ir.BitwiseXor(ir.BitwiseAnd(a, b), ir.BitwiseNot(c)); + case 150: + return ir.BitwiseXor(a, ir.BitwiseXor(b, c)); + case 151: + return ir.BitwiseOr(ir.BitwiseNot(ir.BitwiseOr(a, b)), + ir.BitwiseXor(a, ir.BitwiseXor(b, c))); + case 152: + return ir.BitwiseAnd(ir.BitwiseOr(a, b), ir.BitwiseXor(b, ir.BitwiseNot(c))); + case 153: + return ir.BitwiseXor(b, ir.BitwiseNot(c)); + case 154: + return ir.BitwiseXor(c, ir.BitwiseAnd(a, ir.BitwiseNot(b))); + case 155: + return ir.BitwiseNot(ir.BitwiseAnd(ir.BitwiseOr(a, b), ir.BitwiseXor(b, c))); + case 156: + return ir.BitwiseXor(b, ir.BitwiseAnd(a, ir.BitwiseNot(c))); + case 157: + return ir.BitwiseNot(ir.BitwiseAnd(ir.BitwiseOr(a, c), ir.BitwiseXor(b, c))); + case 158: + return ir.BitwiseOr(ir.BitwiseAnd(b, c), ir.BitwiseXor(a, ir.BitwiseOr(b, c))); + case 159: + return ir.BitwiseNot(ir.BitwiseAnd(a, ir.BitwiseXor(b, c))); + case 160: + return ir.BitwiseAnd(a, c); + case 161: + return ir.BitwiseAnd(ir.BitwiseOr(a, ir.BitwiseNot(b)), ir.BitwiseXor(a, ir.BitwiseNot(c))); + case 162: + return ir.BitwiseAnd(c, ir.BitwiseOr(a, ir.BitwiseNot(b))); + case 163: + return ir.BitwiseOr(ir.BitwiseAnd(a, c), ir.BitwiseNot(ir.BitwiseOr(a, b))); + case 164: + return ir.BitwiseAnd(ir.BitwiseOr(a, b), ir.BitwiseXor(a, ir.BitwiseNot(c))); + case 165: + return ir.BitwiseXor(a, ir.BitwiseNot(c)); + case 166: + return ir.BitwiseXor(c, ir.BitwiseAnd(b, ir.BitwiseNot(a))); + case 167: + return ir.BitwiseNot(ir.BitwiseAnd(ir.BitwiseOr(a, b), ir.BitwiseXor(a, c))); + case 168: + return ir.BitwiseAnd(c, ir.BitwiseOr(a, b)); + case 169: + return ir.BitwiseXor(ir.BitwiseNot(c), ir.BitwiseOr(a, b)); + case 170: + return c; + case 171: + return ir.BitwiseOr(c, ir.BitwiseNot(ir.BitwiseOr(a, b))); + case 172: + return ir.BitwiseAnd(ir.BitwiseOr(a, b), ir.BitwiseOr(c, ir.BitwiseNot(a))); + case 173: + return ir.BitwiseOr(ir.BitwiseAnd(b, c), ir.BitwiseXor(a, ir.BitwiseNot(c))); + case 174: + return ir.BitwiseOr(c, ir.BitwiseAnd(b, ir.BitwiseNot(a))); + case 175: + return ir.BitwiseOr(c, ir.BitwiseNot(a)); + case 176: + return ir.BitwiseAnd(a, ir.BitwiseOr(c, ir.BitwiseNot(b))); + case 177: + return ir.BitwiseOr(ir.BitwiseAnd(a, c), ir.BitwiseNot(ir.BitwiseOr(b, c))); + case 178: + return ir.BitwiseXor(b, ir.BitwiseOr(ir.BitwiseXor(a, b), ir.BitwiseXor(a, c))); + case 179: + return ir.BitwiseOr(ir.BitwiseAnd(a, c), ir.BitwiseNot(b)); + case 180: + return ir.BitwiseXor(a, ir.BitwiseAnd(b, ir.BitwiseNot(c))); + case 181: + return ir.BitwiseNot(ir.BitwiseAnd(ir.BitwiseOr(b, c), ir.BitwiseXor(a, c))); + case 182: + return ir.BitwiseOr(ir.BitwiseAnd(a, c), ir.BitwiseXor(b, ir.BitwiseOr(a, c))); + case 183: + return ir.BitwiseNot(ir.BitwiseAnd(b, ir.BitwiseXor(a, c))); + case 184: + return ir.BitwiseAnd(ir.BitwiseOr(a, b), ir.BitwiseOr(c, ir.BitwiseNot(b))); + case 185: + return ir.BitwiseOr(ir.BitwiseAnd(a, c), ir.BitwiseXor(b, ir.BitwiseNot(c))); + case 186: + return ir.BitwiseOr(c, ir.BitwiseAnd(a, ir.BitwiseNot(b))); + case 187: + return ir.BitwiseOr(c, ir.BitwiseNot(b)); + case 188: + return ir.BitwiseOr(ir.BitwiseAnd(a, c), ir.BitwiseXor(a, b)); + case 189: + return ir.BitwiseOr(ir.BitwiseXor(a, b), ir.BitwiseXor(a, ir.BitwiseNot(c))); + case 190: + return ir.BitwiseOr(c, ir.BitwiseXor(a, b)); + case 191: + return ir.BitwiseOr(c, ir.BitwiseNot(ir.BitwiseAnd(a, b))); + case 192: + return ir.BitwiseAnd(a, b); + case 193: + return ir.BitwiseAnd(ir.BitwiseOr(a, ir.BitwiseNot(c)), ir.BitwiseXor(a, ir.BitwiseNot(b))); + case 194: + return ir.BitwiseAnd(ir.BitwiseOr(a, c), ir.BitwiseXor(a, ir.BitwiseNot(b))); + case 195: + return ir.BitwiseXor(a, ir.BitwiseNot(b)); + case 196: + return ir.BitwiseAnd(b, ir.BitwiseOr(a, ir.BitwiseNot(c))); + case 197: + return ir.BitwiseOr(ir.BitwiseAnd(a, b), ir.BitwiseNot(ir.BitwiseOr(a, c))); + case 198: + return ir.BitwiseXor(b, ir.BitwiseAnd(c, ir.BitwiseNot(a))); + case 199: + return ir.BitwiseNot(ir.BitwiseAnd(ir.BitwiseOr(a, c), ir.BitwiseXor(a, b))); + case 200: + return ir.BitwiseAnd(b, ir.BitwiseOr(a, c)); + case 201: + return ir.BitwiseXor(ir.BitwiseNot(b), ir.BitwiseOr(a, c)); + case 202: + return ir.BitwiseAnd(ir.BitwiseOr(a, c), ir.BitwiseOr(b, ir.BitwiseNot(a))); + case 203: + return ir.BitwiseOr(ir.BitwiseAnd(b, c), ir.BitwiseXor(a, ir.BitwiseNot(b))); + case 204: + return b; + case 205: + return ir.BitwiseOr(b, ir.BitwiseNot(ir.BitwiseOr(a, c))); + case 206: + return ir.BitwiseOr(b, ir.BitwiseAnd(c, ir.BitwiseNot(a))); + case 207: + return ir.BitwiseOr(b, ir.BitwiseNot(a)); + case 208: + return ir.BitwiseAnd(a, ir.BitwiseOr(b, ir.BitwiseNot(c))); + case 209: + return ir.BitwiseOr(ir.BitwiseAnd(a, b), ir.BitwiseNot(ir.BitwiseOr(b, c))); + case 210: + return ir.BitwiseXor(a, ir.BitwiseAnd(c, ir.BitwiseNot(b))); + case 211: + return ir.BitwiseNot(ir.BitwiseAnd(ir.BitwiseOr(b, c), ir.BitwiseXor(a, b))); + case 212: + return ir.BitwiseXor(c, ir.BitwiseOr(ir.BitwiseXor(a, b), ir.BitwiseXor(a, c))); + case 213: + return ir.BitwiseOr(ir.BitwiseAnd(a, b), ir.BitwiseNot(c)); + case 214: + return ir.BitwiseOr(ir.BitwiseAnd(a, b), ir.BitwiseXor(c, ir.BitwiseOr(a, b))); + case 215: + return ir.BitwiseNot(ir.BitwiseAnd(c, ir.BitwiseXor(a, b))); + case 216: + return ir.BitwiseAnd(ir.BitwiseOr(a, c), ir.BitwiseOr(b, ir.BitwiseNot(c))); + case 217: + return ir.BitwiseOr(ir.BitwiseAnd(a, b), ir.BitwiseXor(b, ir.BitwiseNot(c))); + case 218: + return ir.BitwiseOr(ir.BitwiseAnd(a, b), ir.BitwiseXor(a, c)); + case 219: + return ir.BitwiseOr(ir.BitwiseXor(a, c), ir.BitwiseXor(a, ir.BitwiseNot(b))); + case 220: + return ir.BitwiseOr(b, ir.BitwiseAnd(a, ir.BitwiseNot(c))); + case 221: + return ir.BitwiseOr(b, ir.BitwiseNot(c)); + case 222: + return ir.BitwiseOr(b, ir.BitwiseXor(a, c)); + case 223: + return ir.BitwiseOr(b, ir.BitwiseNot(ir.BitwiseAnd(a, c))); + case 224: + return ir.BitwiseAnd(a, ir.BitwiseOr(b, c)); + case 225: + return ir.BitwiseXor(ir.BitwiseNot(a), ir.BitwiseOr(b, c)); + case 226: + return ir.BitwiseAnd(ir.BitwiseOr(a, ir.BitwiseNot(b)), ir.BitwiseOr(b, c)); + case 227: + return ir.BitwiseOr(ir.BitwiseAnd(a, c), ir.BitwiseXor(a, ir.BitwiseNot(b))); + case 228: + return ir.BitwiseAnd(ir.BitwiseOr(a, ir.BitwiseNot(c)), ir.BitwiseOr(b, c)); + case 229: + return ir.BitwiseOr(ir.BitwiseAnd(a, b), ir.BitwiseXor(a, ir.BitwiseNot(c))); + case 230: + return ir.BitwiseOr(ir.BitwiseAnd(a, b), ir.BitwiseXor(b, c)); + case 231: + return ir.BitwiseOr(ir.BitwiseXor(a, ir.BitwiseNot(b)), ir.BitwiseXor(b, c)); + case 232: + return ir.BitwiseAnd(ir.BitwiseOr(a, b), ir.BitwiseOr(c, ir.BitwiseAnd(a, b))); + case 233: + return ir.BitwiseOr(ir.BitwiseAnd(a, b), + ir.BitwiseXor(ir.BitwiseNot(c), ir.BitwiseOr(a, b))); + case 234: + return ir.BitwiseOr(c, ir.BitwiseAnd(a, b)); + case 235: + return ir.BitwiseOr(c, ir.BitwiseXor(a, ir.BitwiseNot(b))); + case 236: + return ir.BitwiseOr(b, ir.BitwiseAnd(a, c)); + case 237: + return ir.BitwiseOr(b, ir.BitwiseXor(a, ir.BitwiseNot(c))); + case 238: + return ir.BitwiseOr(b, c); + case 239: + return ir.BitwiseOr(ir.BitwiseNot(a), ir.BitwiseOr(b, c)); + case 240: + return a; + case 241: + return ir.BitwiseOr(a, ir.BitwiseNot(ir.BitwiseOr(b, c))); + case 242: + return ir.BitwiseOr(a, ir.BitwiseAnd(c, ir.BitwiseNot(b))); + case 243: + return ir.BitwiseOr(a, ir.BitwiseNot(b)); + case 244: + return ir.BitwiseOr(a, ir.BitwiseAnd(b, ir.BitwiseNot(c))); + case 245: + return ir.BitwiseOr(a, ir.BitwiseNot(c)); + case 246: + return ir.BitwiseOr(a, ir.BitwiseXor(b, c)); + case 247: + return ir.BitwiseOr(a, ir.BitwiseNot(ir.BitwiseAnd(b, c))); + case 248: + return ir.BitwiseOr(a, ir.BitwiseAnd(b, c)); + case 249: + return ir.BitwiseOr(a, ir.BitwiseXor(b, ir.BitwiseNot(c))); + case 250: + return ir.BitwiseOr(a, c); + case 251: + return ir.BitwiseOr(ir.BitwiseNot(b), ir.BitwiseOr(a, c)); + case 252: + return ir.BitwiseOr(a, b); + case 253: + return ir.BitwiseOr(ir.BitwiseNot(c), ir.BitwiseOr(a, b)); + case 254: + return ir.BitwiseOr(a, ir.BitwiseOr(b, c)); + case 255: + return ir.Imm32(0xFFFFFFFF); + // end of generated code } - if (ttbl & 0x02) { - // r |= ~a & ~b & c; - const auto lhs{ir.BitwiseAnd(not_a, not_b)}; - const auto rhs{ir.BitwiseAnd(lhs, c)}; - r = ir.BitwiseOr(r, rhs); - } - if (ttbl & 0x04) { - // r |= ~a & b & ~c; - const auto lhs{ir.BitwiseAnd(not_a, b)}; - const auto rhs{ir.BitwiseAnd(lhs, not_c)}; - r = ir.BitwiseOr(r, rhs); - } - if (ttbl & 0x08) { - // r |= ~a & b & c; - const auto lhs{ir.BitwiseAnd(not_a, b)}; - const auto rhs{ir.BitwiseAnd(lhs, c)}; - r = ir.BitwiseOr(r, rhs); - } - if (ttbl & 0x10) { - // r |= a & ~b & ~c; - const auto lhs{ir.BitwiseAnd(a, not_b)}; - const auto rhs{ir.BitwiseAnd(lhs, not_c)}; - r = ir.BitwiseOr(r, rhs); - } - if (ttbl & 0x20) { - // r |= a & ~b & c; - const auto lhs{ir.BitwiseAnd(a, not_b)}; - const auto rhs{ir.BitwiseAnd(lhs, c)}; - r = ir.BitwiseOr(r, rhs); - } - if (ttbl & 0x40) { - // r |= a & b & ~c; - const auto lhs{ir.BitwiseAnd(a, b)}; - const auto rhs{ir.BitwiseAnd(lhs, not_c)}; - r = ir.BitwiseOr(r, rhs); - } - if (ttbl & 0x80) { - // r |= a & b & c; - const auto lhs{ir.BitwiseAnd(a, b)}; - const auto rhs{ir.BitwiseAnd(lhs, c)}; - r = ir.BitwiseOr(r, rhs); - } - return r; + throw NotImplementedException("LOP3 with out of range ttbl"); } IR::U32 LOP3(TranslatorVisitor& v, u64 insn, const IR::U32& op_b, const IR::U32& op_c, u64 lut) { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input_lut3.py b/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input_lut3.py new file mode 100644 index 000000000..8f547c266 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input_lut3.py @@ -0,0 +1,92 @@ +# Copyright © 2022 degasus <markus@selfnet.de> +# This work is free. You can redistribute it and/or modify it under the +# terms of the Do What The Fuck You Want To Public License, Version 2, +# as published by Sam Hocevar. See http://www.wtfpl.net/ for more details. + +from itertools import product + +# The primitive instructions +OPS = { + 'ir.BitwiseAnd({}, {})' : (2, 1, lambda a,b: a&b), + 'ir.BitwiseOr({}, {})' : (2, 1, lambda a,b: a|b), + 'ir.BitwiseXor({}, {})' : (2, 1, lambda a,b: a^b), + 'ir.BitwiseNot({})' : (1, 0.1, lambda a: (~a) & 255), # Only tiny cost, as this can often inlined in other instructions +} + +# Our database of combination of instructions +optimized_calls = {} +def cmp(lhs, rhs): + if lhs is None: # new entry + return True + if lhs[3] > rhs[3]: # costs + return True + if lhs[3] < rhs[3]: # costs + return False + if len(lhs[0]) > len(rhs[0]): # string len + return True + if len(lhs[0]) < len(rhs[0]): # string len + return False + if lhs[0] > rhs[0]: # string sorting + return True + if lhs[0] < rhs[0]: # string sorting + return False + assert lhs == rhs, "redundant instruction, bug in brute force" + return False +def register(imm, instruction, count, latency): + # Use the sum of instruction count and latency as costs to evaluate which combination is best + costs = count + latency + + old = optimized_calls.get(imm, None) + new = (instruction, count, latency, costs) + + # Update if new or better + if cmp(old, new): + optimized_calls[imm] = new + return True + + return False + +# Constants: 0, 1 (for free) +register(0, 'ir.Imm32(0)', 0, 0) +register(255, 'ir.Imm32(0xFFFFFFFF)', 0, 0) + +# Inputs: a, b, c (for free) +ta = 0xF0 +tb = 0xCC +tc = 0xAA +inputs = { + ta : 'a', + tb : 'b', + tc : 'c', +} +for imm, instruction in inputs.items(): + register(imm, instruction, 0, 0) + register((~imm) & 255, 'ir.BitwiseNot({})'.format(instruction), 0.099, 0.099) # slightly cheaper NEG on inputs + +# Try to combine two values from the db with an instruction. +# If it is better than the old method, update it. +while True: + registered = 0 + calls_copy = optimized_calls.copy() + for OP, (argc, cost, f) in OPS.items(): + for args in product(calls_copy.items(), repeat=argc): + # unpack(transponse) the arrays + imm = [arg[0] for arg in args] + value = [arg[1][0] for arg in args] + count = [arg[1][1] for arg in args] + latency = [arg[1][2] for arg in args] + + registered += register( + f(*imm), + OP.format(*value), + sum(count) + cost, + max(latency) + cost) + if registered == 0: + # No update at all? So terminate + break + +# Hacky output. Please improve me to output valid C++ instead. +s = """ case {imm}: + return {op};""" +for imm in range(256): + print(s.format(imm=imm, op=optimized_calls[imm][0])) diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.cpp b/src/shader_recompiler/frontend/maxwell/translate_program.cpp index 248ad3ced..b22725584 100644 --- a/src/shader_recompiler/frontend/maxwell/translate_program.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp @@ -212,11 +212,11 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo } Optimization::SsaRewritePass(program); + Optimization::ConstantPropagationPass(program); + Optimization::GlobalMemoryToStorageBufferPass(program); Optimization::TexturePass(env, program); - Optimization::ConstantPropagationPass(program); - if (Settings::values.resolution_info.active) { Optimization::RescalingPass(program); } diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp index 38592afd0..ddf497e32 100644 --- a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp +++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp @@ -334,7 +334,8 @@ std::optional<LowAddrInfo> TrackLowAddress(IR::Inst* inst) { /// Tries to track the storage buffer address used by a global memory instruction std::optional<StorageBufferAddr> Track(const IR::Value& value, const Bias* bias) { const auto pred{[bias](const IR::Inst* inst) -> std::optional<StorageBufferAddr> { - if (inst->GetOpcode() != IR::Opcode::GetCbufU32) { + if (inst->GetOpcode() != IR::Opcode::GetCbufU32 && + inst->GetOpcode() != IR::Opcode::GetCbufU32x2) { return std::nullopt; } const IR::Value index{inst->Arg(0)}; diff --git a/src/shader_recompiler/ir_opt/rescaling_pass.cpp b/src/shader_recompiler/ir_opt/rescaling_pass.cpp index c28500dd1..496d4667e 100644 --- a/src/shader_recompiler/ir_opt/rescaling_pass.cpp +++ b/src/shader_recompiler/ir_opt/rescaling_pass.cpp @@ -183,6 +183,31 @@ void ScaleIntegerComposite(IR::IREmitter& ir, IR::Inst& inst, const IR::U1& is_s } } +void ScaleIntegerOffsetComposite(IR::IREmitter& ir, IR::Inst& inst, const IR::U1& is_scaled, + size_t index) { + const IR::Value composite{inst.Arg(index)}; + if (composite.IsEmpty()) { + return; + } + const auto info{inst.Flags<IR::TextureInstInfo>()}; + const IR::U32 x{Scale(ir, is_scaled, IR::U32{ir.CompositeExtract(composite, 0)})}; + const IR::U32 y{Scale(ir, is_scaled, IR::U32{ir.CompositeExtract(composite, 1)})}; + switch (info.type) { + case TextureType::ColorArray2D: + case TextureType::Color2D: + inst.SetArg(index, ir.CompositeConstruct(x, y)); + break; + case TextureType::Color1D: + case TextureType::ColorArray1D: + case TextureType::Color3D: + case TextureType::ColorCube: + case TextureType::ColorArrayCube: + case TextureType::Buffer: + // Nothing to patch here + break; + } +} + void SubScaleCoord(IR::IREmitter& ir, IR::Inst& inst, const IR::U1& is_scaled) { const auto info{inst.Flags<IR::TextureInstInfo>()}; const IR::Value coord{inst.Arg(1)}; @@ -220,7 +245,7 @@ void SubScaleImageFetch(IR::Block& block, IR::Inst& inst) { const IR::U1 is_scaled{ir.IsTextureScaled(ir.Imm32(info.descriptor_index))}; SubScaleCoord(ir, inst, is_scaled); // Scale ImageFetch offset - ScaleIntegerComposite(ir, inst, is_scaled, 2); + ScaleIntegerOffsetComposite(ir, inst, is_scaled, 2); } void SubScaleImageRead(IR::Block& block, IR::Inst& inst) { @@ -242,7 +267,7 @@ void PatchImageFetch(IR::Block& block, IR::Inst& inst) { const IR::U1 is_scaled{ir.IsTextureScaled(ir.Imm32(info.descriptor_index))}; ScaleIntegerComposite(ir, inst, is_scaled, 1); // Scale ImageFetch offset - ScaleIntegerComposite(ir, inst, is_scaled, 2); + ScaleIntegerOffsetComposite(ir, inst, is_scaled, 2); } void PatchImageRead(IR::Block& block, IR::Inst& inst) { diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 5d6d217bb..54a902f56 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -7,6 +7,7 @@ #include "common/assert.h" #include "core/core.h" #include "core/core_timing.h" +#include "video_core/dirty_flags.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/gpu.h" #include "video_core/memory_manager.h" @@ -195,7 +196,7 @@ void Maxwell3D::ProcessMethodCall(u32 method, u32 argument, u32 nonshadow_argume case MAXWELL3D_REG_INDEX(const_buffer.cb_data) + 13: case MAXWELL3D_REG_INDEX(const_buffer.cb_data) + 14: case MAXWELL3D_REG_INDEX(const_buffer.cb_data) + 15: - return StartCBData(method); + return ProcessCBData(argument); case MAXWELL3D_REG_INDEX(cb_bind[0]): return ProcessCBBind(0); case MAXWELL3D_REG_INDEX(cb_bind[1]): @@ -208,6 +209,14 @@ void Maxwell3D::ProcessMethodCall(u32 method, u32 argument, u32 nonshadow_argume return ProcessCBBind(4); case MAXWELL3D_REG_INDEX(draw.vertex_end_gl): return DrawArrays(); + case MAXWELL3D_REG_INDEX(small_index): + regs.index_array.count = regs.small_index.count; + regs.index_array.first = regs.small_index.first; + dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; + return DrawArrays(); + case MAXWELL3D_REG_INDEX(topology_override): + use_topology_override = true; + return; case MAXWELL3D_REG_INDEX(clear_buffers): return ProcessClearBuffers(); case MAXWELL3D_REG_INDEX(query.query_get): @@ -248,14 +257,6 @@ void Maxwell3D::CallMacroMethod(u32 method, const std::vector<u32>& parameters) } void Maxwell3D::CallMethod(u32 method, u32 method_argument, bool is_last_call) { - if (method == cb_data_state.current) { - regs.reg_array[method] = method_argument; - ProcessCBData(method_argument); - return; - } else if (cb_data_state.current != null_cb_data) { - FinishCBData(); - } - // It is an error to write to a register other than the current macro's ARG register before it // has finished execution. if (executing_macro != 0) { @@ -302,7 +303,7 @@ void Maxwell3D::CallMultiMethod(u32 method, const u32* base_start, u32 amount, case MAXWELL3D_REG_INDEX(const_buffer.cb_data) + 13: case MAXWELL3D_REG_INDEX(const_buffer.cb_data) + 14: case MAXWELL3D_REG_INDEX(const_buffer.cb_data) + 15: - ProcessCBMultiData(method, base_start, amount); + ProcessCBMultiData(base_start, amount); break; default: for (std::size_t i = 0; i < amount; i++) { @@ -360,6 +361,35 @@ void Maxwell3D::CallMethodFromMME(u32 method, u32 method_argument) { } } +void Maxwell3D::ProcessTopologyOverride() { + using PrimitiveTopology = Maxwell3D::Regs::PrimitiveTopology; + using PrimitiveTopologyOverride = Maxwell3D::Regs::PrimitiveTopologyOverride; + + PrimitiveTopology topology{}; + + switch (regs.topology_override) { + case PrimitiveTopologyOverride::None: + topology = regs.draw.topology; + break; + case PrimitiveTopologyOverride::Points: + topology = PrimitiveTopology::Points; + break; + case PrimitiveTopologyOverride::Lines: + topology = PrimitiveTopology::Lines; + break; + case PrimitiveTopologyOverride::LineStrip: + topology = PrimitiveTopology::LineStrip; + break; + default: + topology = static_cast<PrimitiveTopology>(regs.topology_override); + break; + } + + if (use_topology_override) { + regs.draw.topology.Assign(topology); + } +} + void Maxwell3D::FlushMMEInlineDraw() { LOG_TRACE(HW_GPU, "called, topology={}, count={}", regs.draw.topology.Value(), regs.vertex_buffer.count); @@ -370,6 +400,8 @@ void Maxwell3D::FlushMMEInlineDraw() { ASSERT_MSG(!regs.draw.instance_next || !regs.draw.instance_cont, "Illegal combination of instancing parameters"); + ProcessTopologyOverride(); + const bool is_indexed = mme_draw.current_mode == MMEDrawMode::Indexed; if (ShouldExecute()) { rasterizer->Draw(is_indexed, true); @@ -529,6 +561,8 @@ void Maxwell3D::DrawArrays() { ASSERT_MSG(!regs.draw.instance_next || !regs.draw.instance_cont, "Illegal combination of instancing parameters"); + ProcessTopologyOverride(); + if (regs.draw.instance_next) { // Increment the current instance *before* drawing. state.current_instance += 1; @@ -587,46 +621,7 @@ void Maxwell3D::ProcessCBBind(size_t stage_index) { rasterizer->BindGraphicsUniformBuffer(stage_index, bind_data.index, gpu_addr, size); } -void Maxwell3D::ProcessCBData(u32 value) { - const u32 id = cb_data_state.id; - cb_data_state.buffer[id][cb_data_state.counter] = value; - // Increment the current buffer position. - regs.const_buffer.cb_pos = regs.const_buffer.cb_pos + 4; - cb_data_state.counter++; -} - -void Maxwell3D::StartCBData(u32 method) { - constexpr u32 first_cb_data = MAXWELL3D_REG_INDEX(const_buffer.cb_data); - cb_data_state.start_pos = regs.const_buffer.cb_pos; - cb_data_state.id = method - first_cb_data; - cb_data_state.current = method; - cb_data_state.counter = 0; - ProcessCBData(regs.const_buffer.cb_data[cb_data_state.id]); -} - -void Maxwell3D::ProcessCBMultiData(u32 method, const u32* start_base, u32 amount) { - if (cb_data_state.current != method) { - if (cb_data_state.current != null_cb_data) { - FinishCBData(); - } - constexpr u32 first_cb_data = MAXWELL3D_REG_INDEX(const_buffer.cb_data); - cb_data_state.start_pos = regs.const_buffer.cb_pos; - cb_data_state.id = method - first_cb_data; - cb_data_state.current = method; - cb_data_state.counter = 0; - } - const std::size_t id = cb_data_state.id; - const std::size_t size = amount; - std::size_t i = 0; - for (; i < size; i++) { - cb_data_state.buffer[id][cb_data_state.counter] = start_base[i]; - cb_data_state.counter++; - } - // Increment the current buffer position. - regs.const_buffer.cb_pos = regs.const_buffer.cb_pos + 4 * amount; -} - -void Maxwell3D::FinishCBData() { +void Maxwell3D::ProcessCBMultiData(const u32* start_base, u32 amount) { // Write the input value to the current const buffer at the current position. const GPUVAddr buffer_address = regs.const_buffer.BufferAddress(); ASSERT(buffer_address != 0); @@ -634,14 +629,16 @@ void Maxwell3D::FinishCBData() { // Don't allow writing past the end of the buffer. ASSERT(regs.const_buffer.cb_pos <= regs.const_buffer.cb_size); - const GPUVAddr address{buffer_address + cb_data_state.start_pos}; - const std::size_t size = regs.const_buffer.cb_pos - cb_data_state.start_pos; + const GPUVAddr address{buffer_address + regs.const_buffer.cb_pos}; + const size_t copy_size = amount * sizeof(u32); + memory_manager.WriteBlock(address, start_base, copy_size); - const u32 id = cb_data_state.id; - memory_manager.WriteBlock(address, cb_data_state.buffer[id].data(), size); + // Increment the current buffer position. + regs.const_buffer.cb_pos += static_cast<u32>(copy_size); +} - cb_data_state.id = null_cb_data; - cb_data_state.current = null_cb_data; +void Maxwell3D::ProcessCBData(u32 value) { + ProcessCBMultiData(&value, 1); } Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const { diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index dc9df6c8b..357a74c70 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -367,6 +367,22 @@ public: Patches = 0xe, }; + // Constants as from NVC0_3D_UNK1970_D3D + // https://gitlab.freedesktop.org/mesa/mesa/-/blob/main/src/gallium/drivers/nouveau/nvc0/nvc0_3d.xml.h#L1598 + enum class PrimitiveTopologyOverride : u32 { + None = 0x0, + Points = 0x1, + Lines = 0x2, + LineStrip = 0x3, + Triangles = 0x4, + TriangleStrip = 0x5, + LinesAdjacency = 0xa, + LineStripAdjacency = 0xb, + TrianglesAdjacency = 0xc, + TriangleStripAdjacency = 0xd, + Patches = 0xe, + }; + enum class IndexFormat : u32 { UnsignedByte = 0x0, UnsignedShort = 0x1, @@ -1200,7 +1216,12 @@ public: } } index_array; - INSERT_PADDING_WORDS_NOINIT(0x7); + union { + BitField<0, 16, u32> first; + BitField<16, 16, u32> count; + } small_index; + + INSERT_PADDING_WORDS_NOINIT(0x6); INSERT_PADDING_WORDS_NOINIT(0x1F); @@ -1244,7 +1265,11 @@ public: BitField<11, 1, u32> depth_clamp_disabled; } view_volume_clip_control; - INSERT_PADDING_WORDS_NOINIT(0x1F); + INSERT_PADDING_WORDS_NOINIT(0xC); + + PrimitiveTopologyOverride topology_override; + + INSERT_PADDING_WORDS_NOINIT(0x12); u32 depth_bounds_enable; @@ -1520,10 +1545,8 @@ private: void ProcessSyncPoint(); /// Handles a write to the CB_DATA[i] register. - void StartCBData(u32 method); void ProcessCBData(u32 value); - void ProcessCBMultiData(u32 method, const u32* start_base, u32 amount); - void FinishCBData(); + void ProcessCBMultiData(const u32* start_base, u32 amount); /// Handles a write to the CB_BIND register. void ProcessCBBind(size_t stage_index); @@ -1531,6 +1554,9 @@ private: /// Handles a write to the VERTEX_END_GL register, triggering a draw. void DrawArrays(); + /// Handles use of topology overrides (e.g., to avoid using a topology assigned from a macro) + void ProcessTopologyOverride(); + // Handles a instance drawcall from MME void StepInstance(MMEDrawMode expected_mode, u32 count); @@ -1555,20 +1581,10 @@ private: /// Interpreter for the macro codes uploaded to the GPU. std::unique_ptr<MacroEngine> macro_engine; - static constexpr u32 null_cb_data = 0xFFFFFFFF; - struct CBDataState { - static constexpr size_t inline_size = 0x4000; - std::array<std::array<u32, inline_size>, 16> buffer; - u32 current{null_cb_data}; - u32 id{null_cb_data}; - u32 start_pos{}; - u32 counter{}; - }; - CBDataState cb_data_state; - Upload::State upload_state; bool execute_on{true}; + bool use_topology_override{false}; }; #define ASSERT_REG_POSITION(field_name, position) \ @@ -1685,6 +1701,7 @@ ASSERT_REG_POSITION(draw, 0x585); ASSERT_REG_POSITION(primitive_restart, 0x591); ASSERT_REG_POSITION(provoking_vertex_last, 0x5A1); ASSERT_REG_POSITION(index_array, 0x5F2); +ASSERT_REG_POSITION(small_index, 0x5F9); ASSERT_REG_POSITION(polygon_offset_clamp, 0x61F); ASSERT_REG_POSITION(instanced_arrays, 0x620); ASSERT_REG_POSITION(vp_point_size, 0x644); @@ -1694,6 +1711,7 @@ ASSERT_REG_POSITION(cull_face, 0x648); ASSERT_REG_POSITION(pixel_center_integer, 0x649); ASSERT_REG_POSITION(viewport_transform_enabled, 0x64B); ASSERT_REG_POSITION(view_volume_clip_control, 0x64F); +ASSERT_REG_POSITION(topology_override, 0x65C); ASSERT_REG_POSITION(depth_bounds_enable, 0x66F); ASSERT_REG_POSITION(logic_op, 0x671); ASSERT_REG_POSITION(clear_buffers, 0x674); diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp index 67388d980..1fc1358bc 100644 --- a/src/video_core/engines/maxwell_dma.cpp +++ b/src/video_core/engines/maxwell_dma.cpp @@ -53,7 +53,6 @@ void MaxwellDMA::Launch() { // TODO(Subv): Perform more research and implement all features of this engine. const LaunchDMA& launch = regs.launch_dma; - ASSERT(launch.semaphore_type == LaunchDMA::SemaphoreType::NONE); ASSERT(launch.interrupt_type == LaunchDMA::InterruptType::NONE); ASSERT(launch.data_transfer_type == LaunchDMA::DataTransferType::NON_PIPELINED); ASSERT(regs.dst_params.origin.x == 0); @@ -79,6 +78,7 @@ void MaxwellDMA::Launch() { CopyPitchToBlockLinear(); } } + ReleaseSemaphore(); } void MaxwellDMA::CopyPitchToPitch() { @@ -244,4 +244,22 @@ void MaxwellDMA::FastCopyBlockLinearToPitch() { memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size); } +void MaxwellDMA::ReleaseSemaphore() { + const auto type = regs.launch_dma.semaphore_type; + const GPUVAddr address = regs.semaphore.address; + switch (type) { + case LaunchDMA::SemaphoreType::NONE: + break; + case LaunchDMA::SemaphoreType::RELEASE_ONE_WORD_SEMAPHORE: + memory_manager.Write<u32>(address, regs.semaphore.payload); + break; + case LaunchDMA::SemaphoreType::RELEASE_FOUR_WORD_SEMAPHORE: + memory_manager.Write<u64>(address, static_cast<u64>(regs.semaphore.payload)); + memory_manager.Write<u64>(address + 8, system.GPU().GetTicks()); + break; + default: + UNREACHABLE_MSG("Unknown semaphore type: {}", static_cast<u32>(type.Value())); + } +} + } // namespace Tegra::Engines diff --git a/src/video_core/engines/maxwell_dma.h b/src/video_core/engines/maxwell_dma.h index a04514425..2692cac8a 100644 --- a/src/video_core/engines/maxwell_dma.h +++ b/src/video_core/engines/maxwell_dma.h @@ -224,6 +224,8 @@ private: void FastCopyBlockLinearToPitch(); + void ReleaseSemaphore(); + Core::System& system; MemoryManager& memory_manager; diff --git a/src/video_core/renderer_opengl/gl_fence_manager.cpp b/src/video_core/renderer_opengl/gl_fence_manager.cpp index 151290101..293ad7d59 100644 --- a/src/video_core/renderer_opengl/gl_fence_manager.cpp +++ b/src/video_core/renderer_opengl/gl_fence_manager.cpp @@ -31,9 +31,8 @@ bool GLInnerFence::IsSignaled() const { return true; } ASSERT(sync_object.handle != 0); - GLsizei length; GLint sync_status; - glGetSynciv(sync_object.handle, GL_SYNC_STATUS, sizeof(GLint), &length, &sync_status); + glGetSynciv(sync_object.handle, GL_SYNC_STATUS, 1, nullptr, &sync_status); return sync_status == GL_SIGNALED; } diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp index f8495896c..9e6732abd 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp @@ -243,10 +243,6 @@ GraphicsPipeline::GraphicsPipeline( case Settings::ShaderBackend::GLASM: if (!sources[stage].empty()) { assembly_programs[stage] = CompileProgram(sources[stage], AssemblyStage(stage)); - if (in_parallel) { - // Make sure program is built before continuing when building in parallel - glGetString(GL_PROGRAM_ERROR_STRING_NV); - } } break; case Settings::ShaderBackend::SPIRV: @@ -256,20 +252,18 @@ GraphicsPipeline::GraphicsPipeline( break; } } - if (in_parallel && backend != Settings::ShaderBackend::GLASM) { - // Make sure programs have built if we are building shaders in parallel - for (OGLProgram& program : source_programs) { - if (program.handle != 0) { - GLint status{}; - glGetProgramiv(program.handle, GL_LINK_STATUS, &status); - } - } + if (in_parallel) { + std::lock_guard lock{built_mutex}; + built_fence.Create(); + // Flush this context to ensure compilation commands and fence are in the GPU pipe. + glFlush(); + built_condvar.notify_one(); + } else { + is_built = true; } if (shader_notify) { shader_notify->MarkShaderComplete(); } - is_built = true; - built_condvar.notify_one(); }}; if (thread_worker) { thread_worker->QueueWork(std::move(func)); @@ -440,7 +434,7 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { buffer_cache.UpdateGraphicsBuffers(is_indexed); buffer_cache.BindHostGeometryBuffers(is_indexed); - if (!is_built.load(std::memory_order::relaxed)) { + if (!IsBuilt()) { WaitForBuild(); } const bool use_assembly{assembly_programs[0].handle != 0}; @@ -585,8 +579,26 @@ void GraphicsPipeline::GenerateTransformFeedbackState() { } void GraphicsPipeline::WaitForBuild() { - std::unique_lock lock{built_mutex}; - built_condvar.wait(lock, [this] { return is_built.load(std::memory_order::relaxed); }); + if (built_fence.handle == 0) { + std::unique_lock lock{built_mutex}; + built_condvar.wait(lock, [this] { return built_fence.handle != 0; }); + } + ASSERT(glClientWaitSync(built_fence.handle, 0, GL_TIMEOUT_IGNORED) != GL_WAIT_FAILED); + is_built = true; +} + +bool GraphicsPipeline::IsBuilt() noexcept { + if (is_built) { + return true; + } + if (built_fence.handle == 0) { + return false; + } + // Timeout of zero means this is non-blocking + const auto sync_status = glClientWaitSync(built_fence.handle, 0, 0); + ASSERT(sync_status != GL_WAIT_FAILED); + is_built = sync_status != GL_TIMEOUT_EXPIRED; + return is_built; } } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.h b/src/video_core/renderer_opengl/gl_graphics_pipeline.h index 4e28d9a42..311d49f3f 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.h +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.h @@ -100,9 +100,7 @@ public: return writes_global_memory; } - [[nodiscard]] bool IsBuilt() const noexcept { - return is_built.load(std::memory_order::relaxed); - } + [[nodiscard]] bool IsBuilt() noexcept; template <typename Spec> static auto MakeConfigureSpecFunc() { @@ -154,7 +152,8 @@ private: std::mutex built_mutex; std::condition_variable built_condvar; - std::atomic_bool is_built{false}; + OGLSync built_fence{}; + bool is_built{false}; }; } // namespace OpenGL diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp index 3e96c0f60..4d73427b4 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp @@ -2,6 +2,7 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include <array> #include <cstring> #include <memory> #include <optional> @@ -292,7 +293,7 @@ std::pair<VkBuffer, VkDeviceSize> QuadIndexedPass::Assemble( .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT, .dstAccessMask = VK_ACCESS_INDEX_READ_BIT, }; - const std::array push_constants{base_vertex, index_shift}; + const std::array<u32, 2> push_constants{base_vertex, index_shift}; const VkDescriptorSet set = descriptor_allocator.Commit(); device.GetLogical().UpdateDescriptorSet(set, *descriptor_template, descriptor_data); cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline); diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index 0f62779de..ca6019a3a 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -1067,7 +1067,8 @@ void TextureCacheRuntime::ConvertImage(Framebuffer* dst, ImageView& dst_view, Im } break; case PixelFormat::A8B8G8R8_UNORM: - if (src_view.format == PixelFormat::S8_UINT_D24_UNORM) { + if (src_view.format == PixelFormat::S8_UINT_D24_UNORM || + src_view.format == PixelFormat::D24_UNORM_S8_UINT) { return blit_image_helper.ConvertD24S8ToABGR8(dst, src_view); } break; diff --git a/src/video_core/video_core.cpp b/src/video_core/video_core.cpp index 329bf4def..2f2594585 100644 --- a/src/video_core/video_core.cpp +++ b/src/video_core/video_core.cpp @@ -50,6 +50,7 @@ std::unique_ptr<Tegra::GPU> CreateGPU(Core::Frontend::EmuWindow& emu_window, Cor gpu->BindRenderer(std::move(renderer)); return gpu; } catch (const std::runtime_error& exception) { + scope.Cancel(); LOG_ERROR(HW_GPU, "Failed to initialize GPU: {}", exception.what()); return nullptr; } diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp index f915bd856..4b943c6ba 100644 --- a/src/yuzu/configuration/config.cpp +++ b/src/yuzu/configuration/config.cpp @@ -609,6 +609,7 @@ void Config::ReadCpuValues() { ReadGlobalSetting(Settings::values.cpuopt_unsafe_ignore_standard_fpcr); ReadGlobalSetting(Settings::values.cpuopt_unsafe_inaccurate_nan); ReadGlobalSetting(Settings::values.cpuopt_unsafe_fastmem_check); + ReadGlobalSetting(Settings::values.cpuopt_unsafe_ignore_global_monitor); if (global) { ReadBasicSetting(Settings::values.cpu_debug_mode); @@ -621,6 +622,8 @@ void Config::ReadCpuValues() { ReadBasicSetting(Settings::values.cpuopt_misc_ir); ReadBasicSetting(Settings::values.cpuopt_reduce_misalign_checks); ReadBasicSetting(Settings::values.cpuopt_fastmem); + ReadBasicSetting(Settings::values.cpuopt_fastmem_exclusives); + ReadBasicSetting(Settings::values.cpuopt_recompile_exclusives); } qt_config->endGroup(); @@ -1139,6 +1142,7 @@ void Config::SaveCpuValues() { WriteGlobalSetting(Settings::values.cpuopt_unsafe_ignore_standard_fpcr); WriteGlobalSetting(Settings::values.cpuopt_unsafe_inaccurate_nan); WriteGlobalSetting(Settings::values.cpuopt_unsafe_fastmem_check); + WriteGlobalSetting(Settings::values.cpuopt_unsafe_ignore_global_monitor); if (global) { WriteBasicSetting(Settings::values.cpu_debug_mode); @@ -1151,6 +1155,8 @@ void Config::SaveCpuValues() { WriteBasicSetting(Settings::values.cpuopt_misc_ir); WriteBasicSetting(Settings::values.cpuopt_reduce_misalign_checks); WriteBasicSetting(Settings::values.cpuopt_fastmem); + WriteBasicSetting(Settings::values.cpuopt_fastmem_exclusives); + WriteBasicSetting(Settings::values.cpuopt_recompile_exclusives); } qt_config->endGroup(); diff --git a/src/yuzu/configuration/configure_cpu.cpp b/src/yuzu/configuration/configure_cpu.cpp index f66cab5d4..bf74ccc7c 100644 --- a/src/yuzu/configuration/configure_cpu.cpp +++ b/src/yuzu/configuration/configure_cpu.cpp @@ -36,6 +36,7 @@ void ConfigureCpu::SetConfiguration() { ui->cpuopt_unsafe_ignore_standard_fpcr->setEnabled(runtime_lock); ui->cpuopt_unsafe_inaccurate_nan->setEnabled(runtime_lock); ui->cpuopt_unsafe_fastmem_check->setEnabled(runtime_lock); + ui->cpuopt_unsafe_ignore_global_monitor->setEnabled(runtime_lock); ui->cpuopt_unsafe_unfuse_fma->setChecked(Settings::values.cpuopt_unsafe_unfuse_fma.GetValue()); ui->cpuopt_unsafe_reduce_fp_error->setChecked( @@ -46,6 +47,8 @@ void ConfigureCpu::SetConfiguration() { Settings::values.cpuopt_unsafe_inaccurate_nan.GetValue()); ui->cpuopt_unsafe_fastmem_check->setChecked( Settings::values.cpuopt_unsafe_fastmem_check.GetValue()); + ui->cpuopt_unsafe_ignore_global_monitor->setChecked( + Settings::values.cpuopt_unsafe_ignore_global_monitor.GetValue()); if (Settings::IsConfiguringGlobal()) { ui->accuracy->setCurrentIndex(static_cast<int>(Settings::values.cpu_accuracy.GetValue())); @@ -82,6 +85,9 @@ void ConfigureCpu::ApplyConfiguration() { ConfigurationShared::ApplyPerGameSetting(&Settings::values.cpuopt_unsafe_fastmem_check, ui->cpuopt_unsafe_fastmem_check, cpuopt_unsafe_fastmem_check); + ConfigurationShared::ApplyPerGameSetting(&Settings::values.cpuopt_unsafe_ignore_global_monitor, + ui->cpuopt_unsafe_ignore_global_monitor, + cpuopt_unsafe_ignore_global_monitor); } void ConfigureCpu::changeEvent(QEvent* event) { @@ -120,4 +126,7 @@ void ConfigureCpu::SetupPerGameUI() { ConfigurationShared::SetColoredTristate(ui->cpuopt_unsafe_fastmem_check, Settings::values.cpuopt_unsafe_fastmem_check, cpuopt_unsafe_fastmem_check); + ConfigurationShared::SetColoredTristate(ui->cpuopt_unsafe_ignore_global_monitor, + Settings::values.cpuopt_unsafe_ignore_global_monitor, + cpuopt_unsafe_ignore_global_monitor); } diff --git a/src/yuzu/configuration/configure_cpu.h b/src/yuzu/configuration/configure_cpu.h index ed9af0e9f..733e38be4 100644 --- a/src/yuzu/configuration/configure_cpu.h +++ b/src/yuzu/configuration/configure_cpu.h @@ -45,6 +45,7 @@ private: ConfigurationShared::CheckState cpuopt_unsafe_ignore_standard_fpcr; ConfigurationShared::CheckState cpuopt_unsafe_inaccurate_nan; ConfigurationShared::CheckState cpuopt_unsafe_fastmem_check; + ConfigurationShared::CheckState cpuopt_unsafe_ignore_global_monitor; const Core::System& system; }; diff --git a/src/yuzu/configuration/configure_cpu.ui b/src/yuzu/configuration/configure_cpu.ui index d8064db24..5d80a8c91 100644 --- a/src/yuzu/configuration/configure_cpu.ui +++ b/src/yuzu/configuration/configure_cpu.ui @@ -150,6 +150,18 @@ </property> </widget> </item> + <item> + <widget class="QCheckBox" name="cpuopt_unsafe_ignore_global_monitor"> + <property name="toolTip"> + <string> + <div>This option improves speed by relying only on the semantics of cmpxchg to ensure safety of exclusive access instructions. Please note this may result in deadlocks and other race conditions.</div> + </string> + </property> + <property name="text"> + <string>Ignore global monitor</string> + </property> + </widget> + </item> </layout> </widget> </item> diff --git a/src/yuzu/configuration/configure_cpu_debug.cpp b/src/yuzu/configuration/configure_cpu_debug.cpp index 05a90963d..616a0be75 100644 --- a/src/yuzu/configuration/configure_cpu_debug.cpp +++ b/src/yuzu/configuration/configure_cpu_debug.cpp @@ -44,6 +44,12 @@ void ConfigureCpuDebug::SetConfiguration() { Settings::values.cpuopt_reduce_misalign_checks.GetValue()); ui->cpuopt_fastmem->setEnabled(runtime_lock); ui->cpuopt_fastmem->setChecked(Settings::values.cpuopt_fastmem.GetValue()); + ui->cpuopt_fastmem_exclusives->setEnabled(runtime_lock); + ui->cpuopt_fastmem_exclusives->setChecked( + Settings::values.cpuopt_fastmem_exclusives.GetValue()); + ui->cpuopt_recompile_exclusives->setEnabled(runtime_lock); + ui->cpuopt_recompile_exclusives->setChecked( + Settings::values.cpuopt_recompile_exclusives.GetValue()); } void ConfigureCpuDebug::ApplyConfiguration() { @@ -56,6 +62,8 @@ void ConfigureCpuDebug::ApplyConfiguration() { Settings::values.cpuopt_misc_ir = ui->cpuopt_misc_ir->isChecked(); Settings::values.cpuopt_reduce_misalign_checks = ui->cpuopt_reduce_misalign_checks->isChecked(); Settings::values.cpuopt_fastmem = ui->cpuopt_fastmem->isChecked(); + Settings::values.cpuopt_fastmem_exclusives = ui->cpuopt_fastmem_exclusives->isChecked(); + Settings::values.cpuopt_recompile_exclusives = ui->cpuopt_recompile_exclusives->isChecked(); } void ConfigureCpuDebug::changeEvent(QEvent* event) { diff --git a/src/yuzu/configuration/configure_cpu_debug.ui b/src/yuzu/configuration/configure_cpu_debug.ui index 6e635bb2f..2bc268810 100644 --- a/src/yuzu/configuration/configure_cpu_debug.ui +++ b/src/yuzu/configuration/configure_cpu_debug.ui @@ -144,7 +144,34 @@ </string> </property> <property name="text"> - <string>Enable Host MMU Emulation</string> + <string>Enable Host MMU Emulation (general memory instructions)</string> + </property> + </widget> + </item> + <item> + <widget class="QCheckBox" name="cpuopt_fastmem_exclusives"> + <property name="toolTip"> + <string> + <div style="white-space: nowrap">This optimization speeds up exclusive memory accesses by the guest program.</div> + <div style="white-space: nowrap">Enabling it causes guest exclusive memory reads/writes to be done directly into memory and make use of Host's MMU.</div> + <div style="white-space: nowrap">Disabling this forces all exclusive memory accesses to use Software MMU Emulation.</div> + </string> + </property> + <property name="text"> + <string>Enable Host MMU Emulation (exclusive memory instructions)</string> + </property> + </widget> + </item> + <item> + <widget class="QCheckBox" name="cpuopt_recompile_exclusives"> + <property name="toolTip"> + <string> + <div style="white-space: nowrap">This optimization speeds up exclusive memory accesses by the guest program.</div> + <div style="white-space: nowrap">Enabling it reduces the overhead of fastmem failure of exclusive memory accesses.</div> + </string> + </property> + <property name="text"> + <string>Enable recompilation of exclusive memory instructions</string> </property> </widget> </item> diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp index d573829be..06774768d 100644 --- a/src/yuzu/main.cpp +++ b/src/yuzu/main.cpp @@ -250,9 +250,9 @@ GMainWindow::GMainWindow() #ifdef ARCHITECTURE_x86_64 const auto& caps = Common::GetCPUCaps(); std::string cpu_string = caps.cpu_string; - if (caps.avx || caps.avx2 || caps.avx512) { + if (caps.avx || caps.avx2 || caps.avx512f) { cpu_string += " | AVX"; - if (caps.avx512) { + if (caps.avx512f) { cpu_string += "512"; } else if (caps.avx2) { cpu_string += '2'; diff --git a/src/yuzu_cmd/config.cpp b/src/yuzu_cmd/config.cpp index 30963a8bb..b74411c84 100644 --- a/src/yuzu_cmd/config.cpp +++ b/src/yuzu_cmd/config.cpp @@ -280,11 +280,14 @@ void Config::ReadValues() { ReadSetting("Cpu", Settings::values.cpuopt_misc_ir); ReadSetting("Cpu", Settings::values.cpuopt_reduce_misalign_checks); ReadSetting("Cpu", Settings::values.cpuopt_fastmem); + ReadSetting("Cpu", Settings::values.cpuopt_fastmem_exclusives); + ReadSetting("Cpu", Settings::values.cpuopt_recompile_exclusives); ReadSetting("Cpu", Settings::values.cpuopt_unsafe_unfuse_fma); ReadSetting("Cpu", Settings::values.cpuopt_unsafe_reduce_fp_error); ReadSetting("Cpu", Settings::values.cpuopt_unsafe_ignore_standard_fpcr); ReadSetting("Cpu", Settings::values.cpuopt_unsafe_inaccurate_nan); ReadSetting("Cpu", Settings::values.cpuopt_unsafe_fastmem_check); + ReadSetting("Cpu", Settings::values.cpuopt_unsafe_ignore_global_monitor); // Renderer ReadSetting("Renderer", Settings::values.renderer_backend); diff --git a/src/yuzu_cmd/default_ini.h b/src/yuzu_cmd/default_ini.h index 6d613bf7a..34782c378 100644 --- a/src/yuzu_cmd/default_ini.h +++ b/src/yuzu_cmd/default_ini.h @@ -124,7 +124,11 @@ keyboard_enabled = [Core] # Whether to use multi-core for CPU emulation # 0: Disabled, 1 (default): Enabled -use_multi_core= +use_multi_core = + +# Enable extended guest system memory layout (6GB DRAM) +# 0 (default): Disabled, 1: Enabled +use_extended_memory_layout = [Cpu] # Adjusts various optimizations. @@ -174,6 +178,14 @@ cpuopt_reduce_misalign_checks = # 0: Disabled, 1 (default): Enabled cpuopt_fastmem = +# Enable Host MMU Emulation for exclusive memory instructions (faster guest memory access) +# 0: Disabled, 1 (default): Enabled +cpuopt_fastmem_exclusives = + +# Enable fallback on failure of fastmem of exclusive memory instructions (faster guest memory access) +# 0: Disabled, 1 (default): Enabled +cpuopt_recompile_exclusives = + # Enable unfuse FMA (improve performance on CPUs without FMA) # Only enabled if cpu_accuracy is set to Unsafe. Automatically chosen with cpu_accuracy = Auto-select. # 0: Disabled, 1 (default): Enabled @@ -199,6 +211,11 @@ cpuopt_unsafe_inaccurate_nan = # 0: Disabled, 1 (default): Enabled cpuopt_unsafe_fastmem_check = +# Enable faster exclusive instructions +# Only enabled if cpu_accuracy is set to Unsafe. Automatically chosen with cpu_accuracy = Auto-select. +# 0: Disabled, 1 (default): Enabled +cpuopt_unsafe_ignore_global_monitor = + [Renderer] # Which backend API to use. # 0 (default): OpenGL, 1: Vulkan |