83 files changed, 3415 insertions, 1586 deletions
diff --git a/src/common/bit_util.h b/src/common/bit_util.h
index f50d3308a..f37538e06 100644
--- a/src/common/bit_util.h
+++ b/src/common/bit_util.h
@@ -57,4 +57,11 @@ requires std::is_integral_v<T>
     return static_cast<T>(1ULL << ((8U * sizeof(T)) - std::countl_zero(value - 1U)));
 }
 
+template <size_t bit_index, typename T>
+requires std::is_integral_v<T>
+[[nodiscard]] constexpr bool Bit(const T value) {
+    static_assert(bit_index < BitSize<T>(), "bit_index must be smaller than size of T");
+    return ((value >> bit_index) & T(1)) == T(1);
+}
+
 } // namespace Common
diff --git a/src/common/host_memory.cpp b/src/common/host_memory.cpp
index 28949fe5e..c465cfc14 100644
--- a/src/common/host_memory.cpp
+++ b/src/common/host_memory.cpp
@@ -327,8 +327,8 @@ private:
     bool IsNiechePlaceholder(size_t virtual_offset, size_t length) const {
         const auto it = placeholders.upper_bound({virtual_offset, virtual_offset + length});
         if (it != placeholders.end() && it->lower() == virtual_offset + length) {
-            const bool is_root = it == placeholders.begin() && virtual_offset == 0;
-            return is_root || std::prev(it)->upper() == virtual_offset;
+            return it == placeholders.begin() ? virtual_offset == 0
+                                              : std::prev(it)->upper() == virtual_offset;
         }
         return false;
     }
diff --git a/src/common/intrusive_red_black_tree.h b/src/common/intrusive_red_black_tree.h
index 3173cc449..b296b639e 100644
--- a/src/common/intrusive_red_black_tree.h
+++ b/src/common/intrusive_red_black_tree.h
@@ -4,6 +4,8 @@
 
 #pragma once
 
+#include "common/alignment.h"
+#include "common/common_funcs.h"
 #include "common/parent_of_member.h"
 #include "common/tree.h"
 
@@ -15,32 +17,33 @@ class IntrusiveRedBlackTreeImpl;
 
 }
 
+#pragma pack(push, 4)
 struct IntrusiveRedBlackTreeNode {
+    YUZU_NON_COPYABLE(IntrusiveRedBlackTreeNode);
+
 public:
-    using EntryType = RBEntry<IntrusiveRedBlackTreeNode>;
+    using RBEntry = freebsd::RBEntry<IntrusiveRedBlackTreeNode>;
 
-    constexpr IntrusiveRedBlackTreeNode() = default;
+private:
+    RBEntry m_entry;
 
-    void SetEntry(const EntryType& new_entry) {
-        entry = new_entry;
-    }
+public:
+    explicit IntrusiveRedBlackTreeNode() = default;
 
-    [[nodiscard]] EntryType& GetEntry() {
-        return entry;
+    [[nodiscard]] constexpr RBEntry& GetRBEntry() {
+        return m_entry;
     }
-
-    [[nodiscard]] const EntryType& GetEntry() const {
-        return entry;
+    [[nodiscard]] constexpr const RBEntry& GetRBEntry() const {
+        return m_entry;
     }
 
-private:
-    EntryType entry{};
-
-    friend class impl::IntrusiveRedBlackTreeImpl;
-
-    template <class, class, class>
-    friend class IntrusiveRedBlackTree;
+    constexpr void SetRBEntry(const RBEntry& entry) {
+        m_entry = entry;
+    }
 };
+static_assert(sizeof(IntrusiveRedBlackTreeNode) ==
+              3 * sizeof(void*) + std::max<size_t>(sizeof(freebsd::RBColor), 4));
+#pragma pack(pop)
 
 template <class T, class Traits, class Comparator>
 class IntrusiveRedBlackTree;
@@ -48,12 +51,17 @@ class IntrusiveRedBlackTree;
 namespace impl {
 
 class IntrusiveRedBlackTreeImpl {
+    YUZU_NON_COPYABLE(IntrusiveRedBlackTreeImpl);
+
 private:
     template <class, class, class>
     friend class ::Common::IntrusiveRedBlackTree;
 
-    using RootType = RBHead<IntrusiveRedBlackTreeNode>;
-    RootType root;
+private:
+    using RootType = freebsd::RBHead<IntrusiveRedBlackTreeNode>;
+
+private:
+    RootType m_root;
 
 public:
     template <bool Const>
@@ -81,149 +89,150 @@ public:
                                              IntrusiveRedBlackTreeImpl::reference>;
 
     private:
-        pointer node;
+        pointer m_node;
 
     public:
-        explicit Iterator(pointer n) : node(n) {}
+        constexpr explicit Iterator(pointer n) : m_node(n) {}
 
-        bool operator==(const Iterator& rhs) const {
-            return this->node == rhs.node;
+        constexpr bool operator==(const Iterator& rhs) const {
+            return m_node == rhs.m_node;
         }
 
-        bool operator!=(const Iterator& rhs) const {
+        constexpr bool operator!=(const Iterator& rhs) const {
             return !(*this == rhs);
         }
 
-        pointer operator->() const {
-            return this->node;
+        constexpr pointer operator->() const {
+            return m_node;
         }
 
-        reference operator*() const {
-            return *this->node;
+        constexpr reference operator*() const {
+            return *m_node;
         }
 
-        Iterator& operator++() {
-            this->node = GetNext(this->node);
+        constexpr Iterator& operator++() {
+            m_node = GetNext(m_node);
             return *this;
         }
 
-        Iterator& operator--() {
-            this->node = GetPrev(this->node);
+        constexpr Iterator& operator--() {
+            m_node = GetPrev(m_node);
             return *this;
         }
 
-        Iterator operator++(int) {
+        constexpr Iterator operator++(int) {
             const Iterator it{*this};
             ++(*this);
             return it;
         }
 
-        Iterator operator--(int) {
+        constexpr Iterator operator--(int) {
             const Iterator it{*this};
             --(*this);
             return it;
         }
 
-        operator Iterator<true>() const {
-            return Iterator<true>(this->node);
+        constexpr operator Iterator<true>() const {
+            return Iterator<true>(m_node);
         }
     };
 
 private:
-    // Define accessors using RB_* functions.
-    bool EmptyImpl() const {
-        return root.IsEmpty();
+    constexpr bool EmptyImpl() const {
+        return m_root.IsEmpty();
     }
 
-    IntrusiveRedBlackTreeNode* GetMinImpl() const {
-        return RB_MIN(const_cast<RootType*>(&root));
+    constexpr IntrusiveRedBlackTreeNode* GetMinImpl() const {
+        return freebsd::RB_MIN(const_cast<RootType&>(m_root));
     }
 
-    IntrusiveRedBlackTreeNode* GetMaxImpl() const {
-        return RB_MAX(const_cast<RootType*>(&root));
+    constexpr IntrusiveRedBlackTreeNode* GetMaxImpl() const {
+        return freebsd::RB_MAX(const_cast<RootType&>(m_root));
     }
 
-    IntrusiveRedBlackTreeNode* RemoveImpl(IntrusiveRedBlackTreeNode* node) {
-        return RB_REMOVE(&root, node);
+    constexpr IntrusiveRedBlackTreeNode* RemoveImpl(IntrusiveRedBlackTreeNode* node) {
+        return freebsd::RB_REMOVE(m_root, node);
     }
 
 public:
-    static IntrusiveRedBlackTreeNode* GetNext(IntrusiveRedBlackTreeNode* node) {
-        return RB_NEXT(node);
+    static constexpr IntrusiveRedBlackTreeNode* GetNext(IntrusiveRedBlackTreeNode* node) {
+        return freebsd::RB_NEXT(node);
     }
 
-    static IntrusiveRedBlackTreeNode* GetPrev(IntrusiveRedBlackTreeNode* node) {
-        return RB_PREV(node);
+    static constexpr IntrusiveRedBlackTreeNode* GetPrev(IntrusiveRedBlackTreeNode* node) {
+        return freebsd::RB_PREV(node);
     }
 
-    static const IntrusiveRedBlackTreeNode* GetNext(const IntrusiveRedBlackTreeNode* node) {
+    static constexpr IntrusiveRedBlackTreeNode const* GetNext(
+        IntrusiveRedBlackTreeNode const* node) {
         return static_cast<const IntrusiveRedBlackTreeNode*>(
             GetNext(const_cast<IntrusiveRedBlackTreeNode*>(node)));
     }
 
-    static const IntrusiveRedBlackTreeNode* GetPrev(const IntrusiveRedBlackTreeNode* node) {
+    static constexpr IntrusiveRedBlackTreeNode const* GetPrev(
+        IntrusiveRedBlackTreeNode const* node) {
         return static_cast<const IntrusiveRedBlackTreeNode*>(
             GetPrev(const_cast<IntrusiveRedBlackTreeNode*>(node)));
     }
 
 public:
-    constexpr IntrusiveRedBlackTreeImpl() {}
+    constexpr IntrusiveRedBlackTreeImpl() = default;
 
     // Iterator accessors.
-    iterator begin() {
+    constexpr iterator begin() {
         return iterator(this->GetMinImpl());
     }
 
-    const_iterator begin() const {
+    constexpr const_iterator begin() const {
         return const_iterator(this->GetMinImpl());
     }
 
-    iterator end() {
+    constexpr iterator end() {
         return iterator(static_cast<IntrusiveRedBlackTreeNode*>(nullptr));
     }
 
-    const_iterator end() const {
+    constexpr const_iterator end() const {
         return const_iterator(static_cast<const IntrusiveRedBlackTreeNode*>(nullptr));
     }
 
-    const_iterator cbegin() const {
+    constexpr const_iterator cbegin() const {
         return this->begin();
     }
 
-    const_iterator cend() const {
+    constexpr const_iterator cend() const {
         return this->end();
     }
 
-    iterator iterator_to(reference ref) {
-        return iterator(&ref);
+    constexpr iterator iterator_to(reference ref) {
+        return iterator(std::addressof(ref));
     }
 
-    const_iterator iterator_to(const_reference ref) const {
-        return const_iterator(&ref);
+    constexpr const_iterator iterator_to(const_reference ref) const {
+        return const_iterator(std::addressof(ref));
     }
 
     // Content management.
-    bool empty() const {
+    constexpr bool empty() const {
         return this->EmptyImpl();
     }
 
-    reference back() {
+    constexpr reference back() {
         return *this->GetMaxImpl();
     }
 
-    const_reference back() const {
+    constexpr const_reference back() const {
         return *this->GetMaxImpl();
     }
 
-    reference front() {
+    constexpr reference front() {
         return *this->GetMinImpl();
     }
 
-    const_reference front() const {
+    constexpr const_reference front() const {
         return *this->GetMinImpl();
     }
 
-    iterator erase(iterator it) {
+    constexpr iterator erase(iterator it) {
         auto cur = std::addressof(*it);
         auto next = GetNext(cur);
         this->RemoveImpl(cur);
@@ -234,16 +243,16 @@ public:
 } // namespace impl
 
 template <typename T>
-concept HasLightCompareType = requires {
-    { std::is_same<typename T::LightCompareType, void>::value } -> std::convertible_to<bool>;
+concept HasRedBlackKeyType = requires {
+    { std::is_same<typename T::RedBlackKeyType, void>::value } -> std::convertible_to<bool>;
 };
 
 namespace impl {
 
     template <typename T, typename Default>
-    consteval auto* GetLightCompareType() {
-        if constexpr (HasLightCompareType<T>) {
-            return static_cast<typename T::LightCompareType*>(nullptr);
+    consteval auto* GetRedBlackKeyType() {
+        if constexpr (HasRedBlackKeyType<T>) {
+            return static_cast<typename T::RedBlackKeyType*>(nullptr);
         } else {
             return static_cast<Default*>(nullptr);
         }
@@ -252,16 +261,17 @@ namespace impl {
 } // namespace impl
 
 template <typename T, typename Default>
-using LightCompareType = std::remove_pointer_t<decltype(impl::GetLightCompareType<T, Default>())>;
+using RedBlackKeyType = std::remove_pointer_t<decltype(impl::GetRedBlackKeyType<T, Default>())>;
 
 template <class T, class Traits, class Comparator>
 class IntrusiveRedBlackTree {
+    YUZU_NON_COPYABLE(IntrusiveRedBlackTree);
 
 public:
     using ImplType = impl::IntrusiveRedBlackTreeImpl;
 
 private:
-    ImplType impl{};
+    ImplType m_impl;
 
 public:
     template <bool Const>
@@ -277,9 +287,9 @@ public:
     using iterator = Iterator<false>;
     using const_iterator = Iterator<true>;
 
-    using light_value_type = LightCompareType<Comparator, value_type>;
-    using const_light_pointer = const light_value_type*;
-    using const_light_reference = const light_value_type&;
+    using key_type = RedBlackKeyType<Comparator, value_type>;
+    using const_key_pointer = const key_type*;
+    using const_key_reference = const key_type&;
 
     template <bool Const>
     class Iterator {
@@ -298,183 +308,201 @@ public:
                                              IntrusiveRedBlackTree::reference>;
 
     private:
-        ImplIterator iterator;
+        ImplIterator m_impl;
 
     private:
-        explicit Iterator(ImplIterator it) : iterator(it) {}
+        constexpr explicit Iterator(ImplIterator it) : m_impl(it) {}
 
-        explicit Iterator(typename std::conditional<Const, ImplType::const_iterator,
-                                                    ImplType::iterator>::type::pointer ptr)
-            : iterator(ptr) {}
+        constexpr explicit Iterator(typename ImplIterator::pointer p) : m_impl(p) {}
 
-        ImplIterator GetImplIterator() const {
-            return this->iterator;
+        constexpr ImplIterator GetImplIterator() const {
+            return m_impl;
         }
 
     public:
-        bool operator==(const Iterator& rhs) const {
-            return this->iterator == rhs.iterator;
+        constexpr bool operator==(const Iterator& rhs) const {
+            return m_impl == rhs.m_impl;
         }
 
-        bool operator!=(const Iterator& rhs) const {
+        constexpr bool operator!=(const Iterator& rhs) const {
             return !(*this == rhs);
         }
 
-        pointer operator->() const {
-            return Traits::GetParent(std::addressof(*this->iterator));
+        constexpr pointer operator->() const {
+            return Traits::GetParent(std::addressof(*m_impl));
         }
 
-        reference operator*() const {
-            return *Traits::GetParent(std::addressof(*this->iterator));
+        constexpr reference operator*() const {
+            return *Traits::GetParent(std::addressof(*m_impl));
         }
 
-        Iterator& operator++() {
-            ++this->iterator;
+        constexpr Iterator& operator++() {
+            ++m_impl;
             return *this;
         }
 
-        Iterator& operator--() {
-            --this->iterator;
+        constexpr Iterator& operator--() {
+            --m_impl;
             return *this;
         }
 
-        Iterator operator++(int) {
+        constexpr Iterator operator++(int) {
             const Iterator it{*this};
-            ++this->iterator;
+            ++m_impl;
             return it;
         }
 
-        Iterator operator--(int) {
+        constexpr Iterator operator--(int) {
             const Iterator it{*this};
-            --this->iterator;
+            --m_impl;
             return it;
         }
 
-        operator Iterator<true>() const {
-            return Iterator<true>(this->iterator);
+        constexpr operator Iterator<true>() const {
+            return Iterator<true>(m_impl);
         }
     };
 
 private:
-    static int CompareImpl(const IntrusiveRedBlackTreeNode* lhs,
-                           const IntrusiveRedBlackTreeNode* rhs) {
+    static constexpr int CompareImpl(const IntrusiveRedBlackTreeNode* lhs,
+                                     const IntrusiveRedBlackTreeNode* rhs) {
         return Comparator::Compare(*Traits::GetParent(lhs), *Traits::GetParent(rhs));
     }
 
-    static int LightCompareImpl(const void* elm, const IntrusiveRedBlackTreeNode* rhs) {
-        return Comparator::Compare(*static_cast<const_light_pointer>(elm), *Traits::GetParent(rhs));
+    static constexpr int CompareKeyImpl(const_key_reference key,
+                                        const IntrusiveRedBlackTreeNode* rhs) {
+        return Comparator::Compare(key, *Traits::GetParent(rhs));
     }
 
     // Define accessors using RB_* functions.
-    IntrusiveRedBlackTreeNode* InsertImpl(IntrusiveRedBlackTreeNode* node) {
-        return RB_INSERT(&impl.root, node, CompareImpl);
+    constexpr IntrusiveRedBlackTreeNode* InsertImpl(IntrusiveRedBlackTreeNode* node) {
+        return freebsd::RB_INSERT(m_impl.m_root, node, CompareImpl);
     }
 
-    IntrusiveRedBlackTreeNode* FindImpl(const IntrusiveRedBlackTreeNode* node) const {
-        return RB_FIND(const_cast<ImplType::RootType*>(&impl.root),
-                       const_cast<IntrusiveRedBlackTreeNode*>(node), CompareImpl);
+    constexpr IntrusiveRedBlackTreeNode* FindImpl(IntrusiveRedBlackTreeNode const* node) const {
+        return freebsd::RB_FIND(const_cast<ImplType::RootType&>(m_impl.m_root),
+                                const_cast<IntrusiveRedBlackTreeNode*>(node), CompareImpl);
     }
 
-    IntrusiveRedBlackTreeNode* NFindImpl(const IntrusiveRedBlackTreeNode* node) const {
-        return RB_NFIND(const_cast<ImplType::RootType*>(&impl.root),
-                        const_cast<IntrusiveRedBlackTreeNode*>(node), CompareImpl);
+    constexpr IntrusiveRedBlackTreeNode* NFindImpl(IntrusiveRedBlackTreeNode const* node) const {
+        return freebsd::RB_NFIND(const_cast<ImplType::RootType&>(m_impl.m_root),
+                                 const_cast<IntrusiveRedBlackTreeNode*>(node), CompareImpl);
     }
 
-    IntrusiveRedBlackTreeNode* FindLightImpl(const_light_pointer lelm) const {
-        return RB_FIND_LIGHT(const_cast<ImplType::RootType*>(&impl.root),
-                             static_cast<const void*>(lelm), LightCompareImpl);
+    constexpr IntrusiveRedBlackTreeNode* FindKeyImpl(const_key_reference key) const {
+        return freebsd::RB_FIND_KEY(const_cast<ImplType::RootType&>(m_impl.m_root), key,
+                                    CompareKeyImpl);
     }
 
-    IntrusiveRedBlackTreeNode* NFindLightImpl(const_light_pointer lelm) const {
-        return RB_NFIND_LIGHT(const_cast<ImplType::RootType*>(&impl.root),
-                              static_cast<const void*>(lelm), LightCompareImpl);
+    constexpr IntrusiveRedBlackTreeNode* NFindKeyImpl(const_key_reference key) const {
+        return freebsd::RB_NFIND_KEY(const_cast<ImplType::RootType&>(m_impl.m_root), key,
+                                     CompareKeyImpl);
+    }
+
+    constexpr IntrusiveRedBlackTreeNode* FindExistingImpl(
+        IntrusiveRedBlackTreeNode const* node) const {
+        return freebsd::RB_FIND_EXISTING(const_cast<ImplType::RootType&>(m_impl.m_root),
+                                         const_cast<IntrusiveRedBlackTreeNode*>(node), CompareImpl);
+    }
+
+    constexpr IntrusiveRedBlackTreeNode* FindExistingKeyImpl(const_key_reference key) const {
+        return freebsd::RB_FIND_EXISTING_KEY(const_cast<ImplType::RootType&>(m_impl.m_root), key,
+                                             CompareKeyImpl);
     }
 
 public:
     constexpr IntrusiveRedBlackTree() = default;
 
     // Iterator accessors.
-    iterator begin() {
-        return iterator(this->impl.begin());
+    constexpr iterator begin() {
+        return iterator(m_impl.begin());
     }
 
-    const_iterator begin() const {
-        return const_iterator(this->impl.begin());
+    constexpr const_iterator begin() const {
+        return const_iterator(m_impl.begin());
     }
 
-    iterator end() {
-        return iterator(this->impl.end());
+    constexpr iterator end() {
+        return iterator(m_impl.end());
     }
 
-    const_iterator end() const {
-        return const_iterator(this->impl.end());
+    constexpr const_iterator end() const {
+        return const_iterator(m_impl.end());
     }
 
-    const_iterator cbegin() const {
+    constexpr const_iterator cbegin() const {
         return this->begin();
     }
 
-    const_iterator cend() const {
+    constexpr const_iterator cend() const {
         return this->end();
     }
 
-    iterator iterator_to(reference ref) {
-        return iterator(this->impl.iterator_to(*Traits::GetNode(std::addressof(ref))));
+    constexpr iterator iterator_to(reference ref) {
+        return iterator(m_impl.iterator_to(*Traits::GetNode(std::addressof(ref))));
     }
 
-    const_iterator iterator_to(const_reference ref) const {
-        return const_iterator(this->impl.iterator_to(*Traits::GetNode(std::addressof(ref))));
+    constexpr const_iterator iterator_to(const_reference ref) const {
+        return const_iterator(m_impl.iterator_to(*Traits::GetNode(std::addressof(ref))));
     }
 
     // Content management.
-    bool empty() const {
-        return this->impl.empty();
+    constexpr bool empty() const {
+        return m_impl.empty();
     }
 
-    reference back() {
-        return *Traits::GetParent(std::addressof(this->impl.back()));
+    constexpr reference back() {
+        return *Traits::GetParent(std::addressof(m_impl.back()));
     }
 
-    const_reference back() const {
-        return *Traits::GetParent(std::addressof(this->impl.back()));
+    constexpr const_reference back() const {
+        return *Traits::GetParent(std::addressof(m_impl.back()));
     }
 
-    reference front() {
-        return *Traits::GetParent(std::addressof(this->impl.front()));
+    constexpr reference front() {
+        return *Traits::GetParent(std::addressof(m_impl.front()));
     }
 
-    const_reference front() const {
-        return *Traits::GetParent(std::addressof(this->impl.front()));
+    constexpr const_reference front() const {
+        return *Traits::GetParent(std::addressof(m_impl.front()));
     }
 
-    iterator erase(iterator it) {
-        return iterator(this->impl.erase(it.GetImplIterator()));
+    constexpr iterator erase(iterator it) {
+        return iterator(m_impl.erase(it.GetImplIterator()));
     }
 
-    iterator insert(reference ref) {
+    constexpr iterator insert(reference ref) {
         ImplType::pointer node = Traits::GetNode(std::addressof(ref));
         this->InsertImpl(node);
         return iterator(node);
     }
 
-    iterator find(const_reference ref) const {
+    constexpr iterator find(const_reference ref) const {
         return iterator(this->FindImpl(Traits::GetNode(std::addressof(ref))));
     }
 
-    iterator nfind(const_reference ref) const {
+    constexpr iterator nfind(const_reference ref) const {
         return iterator(this->NFindImpl(Traits::GetNode(std::addressof(ref))));
     }
 
-    iterator find_light(const_light_reference ref) const {
-        return iterator(this->FindLightImpl(std::addressof(ref)));
+    constexpr iterator find_key(const_key_reference ref) const {
+        return iterator(this->FindKeyImpl(ref));
+    }
+
+    constexpr iterator nfind_key(const_key_reference ref) const {
+        return iterator(this->NFindKeyImpl(ref));
+    }
+
+    constexpr iterator find_existing(const_reference ref) const {
+        return iterator(this->FindExistingImpl(Traits::GetNode(std::addressof(ref))));
     }
 
-    iterator nfind_light(const_light_reference ref) const {
-        return iterator(this->NFindLightImpl(std::addressof(ref)));
+    constexpr iterator find_existing_key(const_key_reference ref) const {
+        return iterator(this->FindExistingKeyImpl(ref));
     }
 };
 
-template <auto T, class Derived = impl::GetParentType<T>>
+template <auto T, class Derived = Common::impl::GetParentType<T>>
 class IntrusiveRedBlackTreeMemberTraits;
 
 template <class Parent, IntrusiveRedBlackTreeNode Parent::*Member, class Derived>
@@ -498,19 +526,16 @@ private:
         return std::addressof(parent->*Member);
     }
 
-    static constexpr Derived* GetParent(IntrusiveRedBlackTreeNode* node) {
-        return GetParentPointer<Member, Derived>(node);
+    static Derived* GetParent(IntrusiveRedBlackTreeNode* node) {
+        return Common::GetParentPointer<Member, Derived>(node);
     }
 
-    static constexpr Derived const* GetParent(const IntrusiveRedBlackTreeNode* node) {
-        return GetParentPointer<Member, Derived>(node);
+    static Derived const* GetParent(IntrusiveRedBlackTreeNode const* node) {
+        return Common::GetParentPointer<Member, Derived>(node);
     }
-
-private:
-    static constexpr TypedStorage<Derived> DerivedStorage = {};
 };
 
-template <auto T, class Derived = impl::GetParentType<T>>
+template <auto T, class Derived = Common::impl::GetParentType<T>>
 class IntrusiveRedBlackTreeMemberTraitsDeferredAssert;
 
 template <class Parent, IntrusiveRedBlackTreeNode Parent::*Member, class Derived>
@@ -521,11 +546,6 @@ public:
         IntrusiveRedBlackTree<Derived, IntrusiveRedBlackTreeMemberTraitsDeferredAssert, Comparator>;
     using TreeTypeImpl = impl::IntrusiveRedBlackTreeImpl;
 
-    static constexpr bool IsValid() {
-        TypedStorage<Derived> DerivedStorage = {};
-        return GetParent(GetNode(GetPointer(DerivedStorage))) == GetPointer(DerivedStorage);
-    }
-
 private:
     template <class, class, class>
     friend class IntrusiveRedBlackTree;
@@ -540,30 +560,36 @@ private:
         return std::addressof(parent->*Member);
     }
 
-    static constexpr Derived* GetParent(IntrusiveRedBlackTreeNode* node) {
-        return GetParentPointer<Member, Derived>(node);
+    static Derived* GetParent(IntrusiveRedBlackTreeNode* node) {
+        return Common::GetParentPointer<Member, Derived>(node);
     }
 
-    static constexpr Derived const* GetParent(const IntrusiveRedBlackTreeNode* node) {
-        return GetParentPointer<Member, Derived>(node);
+    static Derived const* GetParent(IntrusiveRedBlackTreeNode const* node) {
+        return Common::GetParentPointer<Member, Derived>(node);
     }
 };
 
 template <class Derived>
-class IntrusiveRedBlackTreeBaseNode : public IntrusiveRedBlackTreeNode {
+class alignas(void*) IntrusiveRedBlackTreeBaseNode : public IntrusiveRedBlackTreeNode {
 public:
+    using IntrusiveRedBlackTreeNode::IntrusiveRedBlackTreeNode;
+
     constexpr Derived* GetPrev() {
-        return static_cast<Derived*>(impl::IntrusiveRedBlackTreeImpl::GetPrev(this));
+        return static_cast<Derived*>(static_cast<IntrusiveRedBlackTreeBaseNode*>(
+            impl::IntrusiveRedBlackTreeImpl::GetPrev(this)));
     }
     constexpr const Derived* GetPrev() const {
-        return static_cast<const Derived*>(impl::IntrusiveRedBlackTreeImpl::GetPrev(this));
+        return static_cast<const Derived*>(static_cast<const IntrusiveRedBlackTreeBaseNode*>(
+            impl::IntrusiveRedBlackTreeImpl::GetPrev(this)));
     }
 
     constexpr Derived* GetNext() {
-        return static_cast<Derived*>(impl::IntrusiveRedBlackTreeImpl::GetNext(this));
+        return static_cast<Derived*>(static_cast<IntrusiveRedBlackTreeBaseNode*>(
+            impl::IntrusiveRedBlackTreeImpl::GetNext(this)));
     }
     constexpr const Derived* GetNext() const {
-        return static_cast<const Derived*>(impl::IntrusiveRedBlackTreeImpl::GetNext(this));
+        return static_cast<const Derived*>(static_cast<const IntrusiveRedBlackTreeBaseNode*>(
+            impl::IntrusiveRedBlackTreeImpl::GetNext(this)));
     }
 };
 
@@ -581,19 +607,22 @@ private:
     friend class impl::IntrusiveRedBlackTreeImpl;
 
     static constexpr IntrusiveRedBlackTreeNode* GetNode(Derived* parent) {
-        return static_cast<IntrusiveRedBlackTreeNode*>(parent);
+        return static_cast<IntrusiveRedBlackTreeNode*>(
+            static_cast<IntrusiveRedBlackTreeBaseNode<Derived>*>(parent));
     }
 
     static constexpr IntrusiveRedBlackTreeNode const* GetNode(Derived const* parent) {
-        return static_cast<const IntrusiveRedBlackTreeNode*>(parent);
+        return static_cast<const IntrusiveRedBlackTreeNode*>(
+            static_cast<const IntrusiveRedBlackTreeBaseNode<Derived>*>(parent));
     }
 
     static constexpr Derived* GetParent(IntrusiveRedBlackTreeNode* node) {
-        return static_cast<Derived*>(node);
+        return static_cast<Derived*>(static_cast<IntrusiveRedBlackTreeBaseNode<Derived>*>(node));
     }
 
-    static constexpr Derived const* GetParent(const IntrusiveRedBlackTreeNode* node) {
-        return static_cast<const Derived*>(node);
+    static constexpr Derived const* GetParent(IntrusiveRedBlackTreeNode const* node) {
+        return static_cast<const Derived*>(
+            static_cast<const IntrusiveRedBlackTreeBaseNode<Derived>*>(node));
     }
 };
 
diff --git a/src/common/logging/backend.cpp b/src/common/logging/backend.cpp
index c51c05b28..4a2462ec4 100644
--- a/src/common/logging/backend.cpp
+++ b/src/common/logging/backend.cpp
@@ -218,19 +218,17 @@ private:
     Impl(const std::filesystem::path& file_backend_filename, const Filter& filter_)
         : filter{filter_}, file_backend{file_backend_filename} {}
 
-    ~Impl() {
-        StopBackendThread();
-    }
+    ~Impl() = default;
 
     void StartBackendThread() {
-        backend_thread = std::thread([this] {
+        backend_thread = std::jthread([this](std::stop_token stop_token) {
             Common::SetCurrentThreadName("yuzu:Log");
             Entry entry;
             const auto write_logs = [this, &entry]() {
                 ForEachBackend([&entry](Backend& backend) { backend.Write(entry); });
             };
-            while (!stop.stop_requested()) {
-                entry = message_queue.PopWait(stop.get_token());
+            while (!stop_token.stop_requested()) {
+                entry = message_queue.PopWait(stop_token);
                 if (entry.filename != nullptr) {
                     write_logs();
                 }
@@ -244,11 +242,6 @@ private:
         });
     }
 
-    void StopBackendThread() {
-        stop.request_stop();
-        backend_thread.join();
-    }
-
     Entry CreateEntry(Class log_class, Level log_level, const char* filename, unsigned int line_nr,
                       const char* function, std::string&& message) const {
         using std::chrono::duration_cast;
@@ -283,10 +276,9 @@ private:
     ColorConsoleBackend color_console_backend{};
     FileBackend file_backend;
 
-    std::stop_source stop;
-    std::thread backend_thread;
     MPSCQueue<Entry, true> message_queue{};
     std::chrono::steady_clock::time_point time_origin{std::chrono::steady_clock::now()};
+    std::jthread backend_thread;
 };
 } // namespace
 
diff --git a/src/common/settings.cpp b/src/common/settings.cpp
index 2810cec15..877e0faa4 100644
--- a/src/common/settings.cpp
+++ b/src/common/settings.cpp
@@ -176,6 +176,7 @@ void RestoreGlobalState(bool is_powered_on) {
     values.cpuopt_unsafe_ignore_standard_fpcr.SetGlobal(true);
     values.cpuopt_unsafe_inaccurate_nan.SetGlobal(true);
     values.cpuopt_unsafe_fastmem_check.SetGlobal(true);
+    values.cpuopt_unsafe_ignore_global_monitor.SetGlobal(true);
 
     // Renderer
     values.renderer_backend.SetGlobal(true);
diff --git a/src/common/settings.h b/src/common/settings.h
index d06b23a14..a37d83fb3 100644
--- a/src/common/settings.h
+++ b/src/common/settings.h
@@ -484,12 +484,15 @@ struct Values {
     BasicSetting<bool> cpuopt_misc_ir{true, "cpuopt_misc_ir"};
     BasicSetting<bool> cpuopt_reduce_misalign_checks{true, "cpuopt_reduce_misalign_checks"};
     BasicSetting<bool> cpuopt_fastmem{true, "cpuopt_fastmem"};
+    BasicSetting<bool> cpuopt_fastmem_exclusives{true, "cpuopt_fastmem_exclusives"};
+    BasicSetting<bool> cpuopt_recompile_exclusives{true, "cpuopt_recompile_exclusives"};
 
     Setting<bool> cpuopt_unsafe_unfuse_fma{true, "cpuopt_unsafe_unfuse_fma"};
     Setting<bool> cpuopt_unsafe_reduce_fp_error{true, "cpuopt_unsafe_reduce_fp_error"};
     Setting<bool> cpuopt_unsafe_ignore_standard_fpcr{true, "cpuopt_unsafe_ignore_standard_fpcr"};
     Setting<bool> cpuopt_unsafe_inaccurate_nan{true, "cpuopt_unsafe_inaccurate_nan"};
     Setting<bool> cpuopt_unsafe_fastmem_check{true, "cpuopt_unsafe_fastmem_check"};
+    Setting<bool> cpuopt_unsafe_ignore_global_monitor{true, "cpuopt_unsafe_ignore_global_monitor"};
 
     // Renderer
     RangedSetting<RendererBackend> renderer_backend{
diff --git a/src/common/telemetry.cpp b/src/common/telemetry.cpp
index 6241d08b3..98c82cd17 100644
--- a/src/common/telemetry.cpp
+++ b/src/common/telemetry.cpp
@@ -55,22 +55,50 @@ void AppendBuildInfo(FieldCollection& fc) {
 
 void AppendCPUInfo(FieldCollection& fc) {
 #ifdef ARCHITECTURE_x86_64
-    fc.AddField(FieldType::UserSystem, "CPU_Model", Common::GetCPUCaps().cpu_string);
-    fc.AddField(FieldType::UserSystem, "CPU_BrandString", Common::GetCPUCaps().brand_string);
-    fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_AES", Common::GetCPUCaps().aes);
-    fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_AVX", Common::GetCPUCaps().avx);
-    fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_AVX2", Common::GetCPUCaps().avx2);
-    fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_AVX512", Common::GetCPUCaps().avx512);
-    fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_BMI1", Common::GetCPUCaps().bmi1);
-    fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_BMI2", Common::GetCPUCaps().bmi2);
-    fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_FMA", Common::GetCPUCaps().fma);
-    fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_FMA4", Common::GetCPUCaps().fma4);
-    fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_SSE", Common::GetCPUCaps().sse);
-    fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_SSE2", Common::GetCPUCaps().sse2);
-    fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_SSE3", Common::GetCPUCaps().sse3);
-    fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_SSSE3", Common::GetCPUCaps().ssse3);
-    fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_SSE41", Common::GetCPUCaps().sse4_1);
-    fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_SSE42", Common::GetCPUCaps().sse4_2);
+
+    const auto& caps = Common::GetCPUCaps();
+    const auto add_field = [&fc](std::string_view field_name, const auto& field_value) {
+        fc.AddField(FieldType::UserSystem, field_name, field_value);
+    };
+    add_field("CPU_Model", caps.cpu_string);
+    add_field("CPU_BrandString", caps.brand_string);
+
+    add_field("CPU_Extension_x64_SSE", caps.sse);
+    add_field("CPU_Extension_x64_SSE2", caps.sse2);
+    add_field("CPU_Extension_x64_SSE3", caps.sse3);
+    add_field("CPU_Extension_x64_SSSE3", caps.ssse3);
+    add_field("CPU_Extension_x64_SSE41", caps.sse4_1);
+    add_field("CPU_Extension_x64_SSE42", caps.sse4_2);
+
+    add_field("CPU_Extension_x64_AVX", caps.avx);
+    add_field("CPU_Extension_x64_AVX_VNNI", caps.avx_vnni);
+    add_field("CPU_Extension_x64_AVX2", caps.avx2);
+
+    // Skylake-X/SP level AVX512, for compatibility with the previous telemetry field
+    add_field("CPU_Extension_x64_AVX512",
+              caps.avx512f && caps.avx512cd && caps.avx512vl && caps.avx512dq && caps.avx512bw);
+
+    add_field("CPU_Extension_x64_AVX512F", caps.avx512f);
+    add_field("CPU_Extension_x64_AVX512CD", caps.avx512cd);
+    add_field("CPU_Extension_x64_AVX512VL", caps.avx512vl);
+    add_field("CPU_Extension_x64_AVX512DQ", caps.avx512dq);
+    add_field("CPU_Extension_x64_AVX512BW", caps.avx512bw);
+    add_field("CPU_Extension_x64_AVX512BITALG", caps.avx512bitalg);
+    add_field("CPU_Extension_x64_AVX512VBMI", caps.avx512vbmi);
+
+    add_field("CPU_Extension_x64_AES", caps.aes);
+    add_field("CPU_Extension_x64_BMI1", caps.bmi1);
+    add_field("CPU_Extension_x64_BMI2", caps.bmi2);
+    add_field("CPU_Extension_x64_F16C", caps.f16c);
+    add_field("CPU_Extension_x64_FMA", caps.fma);
+    add_field("CPU_Extension_x64_FMA4", caps.fma4);
+    add_field("CPU_Extension_x64_GFNI", caps.gfni);
+    add_field("CPU_Extension_x64_INVARIANT_TSC", caps.invariant_tsc);
+    add_field("CPU_Extension_x64_LZCNT", caps.lzcnt);
+    add_field("CPU_Extension_x64_MOVBE", caps.movbe);
+    add_field("CPU_Extension_x64_PCLMULQDQ", caps.pclmulqdq);
+    add_field("CPU_Extension_x64_POPCNT", caps.popcnt);
+    add_field("CPU_Extension_x64_SHA", caps.sha);
 #else
     fc.AddField(FieldType::UserSystem, "CPU_Model", "Other");
 #endif
diff --git a/src/common/telemetry.h b/src/common/telemetry.h
index 4d632f7eb..3524c857e 100644
--- a/src/common/telemetry.h
+++ b/src/common/telemetry.h
@@ -8,6 +8,7 @@
 #include <map>
 #include <memory>
 #include <string>
+#include <string_view>
 #include "common/common_funcs.h"
 #include "common/common_types.h"
 
@@ -55,8 +56,8 @@ class Field : public FieldInterface {
 public:
     YUZU_NON_COPYABLE(Field);
 
-    Field(FieldType type_, std::string name_, T value_)
-        : name(std::move(name_)), type(type_), value(std::move(value_)) {}
+    Field(FieldType type_, std::string_view name_, T value_)
+        : name(name_), type(type_), value(std::move(value_)) {}
 
     ~Field() override = default;
 
@@ -123,7 +124,7 @@ public:
      * @param value Value for the field to add.
      */
     template <typename T>
-    void AddField(FieldType type, const char* name, T value) {
+    void AddField(FieldType type, std::string_view name, T value) {
         return AddField(std::make_unique<Field<T>>(type, name, std::move(value)));
     }
 
diff --git a/src/common/tree.h b/src/common/tree.h
index 18faa4a48..28370e343 100644
--- a/src/common/tree.h
+++ b/src/common/tree.h
@@ -43,294 +43,265 @@
  * The maximum height of a red-black tree is 2lg (n+1).
  */
 
-#include "common/assert.h"
+namespace Common::freebsd {
 
-namespace Common {
+enum class RBColor {
+    RB_BLACK = 0,
+    RB_RED = 1,
+};
+
+#pragma pack(push, 4)
 template <typename T>
-class RBHead {
+class RBEntry {
 public:
-    [[nodiscard]] T* Root() {
-        return rbh_root;
-    }
+    constexpr RBEntry() = default;
 
-    [[nodiscard]] const T* Root() const {
-        return rbh_root;
+    [[nodiscard]] constexpr T* Left() {
+        return m_rbe_left;
     }
-
-    void SetRoot(T* root) {
-        rbh_root = root;
+    [[nodiscard]] constexpr const T* Left() const {
+        return m_rbe_left;
     }
 
-    [[nodiscard]] bool IsEmpty() const {
-        return Root() == nullptr;
+    constexpr void SetLeft(T* e) {
+        m_rbe_left = e;
     }
 
-private:
-    T* rbh_root = nullptr;
-};
-
-enum class EntryColor {
-    Black,
-    Red,
-};
-
-template <typename T>
-class RBEntry {
-public:
-    [[nodiscard]] T* Left() {
-        return rbe_left;
+    [[nodiscard]] constexpr T* Right() {
+        return m_rbe_right;
     }
-
-    [[nodiscard]] const T* Left() const {
-        return rbe_left;
+    [[nodiscard]] constexpr const T* Right() const {
+        return m_rbe_right;
     }
 
-    void SetLeft(T* left) {
-        rbe_left = left;
+    constexpr void SetRight(T* e) {
+        m_rbe_right = e;
     }
 
-    [[nodiscard]] T* Right() {
-        return rbe_right;
+    [[nodiscard]] constexpr T* Parent() {
+        return m_rbe_parent;
     }
-
-    [[nodiscard]] const T* Right() const {
-        return rbe_right;
+    [[nodiscard]] constexpr const T* Parent() const {
+        return m_rbe_parent;
     }
 
-    void SetRight(T* right) {
-        rbe_right = right;
+    constexpr void SetParent(T* e) {
+        m_rbe_parent = e;
     }
 
-    [[nodiscard]] T* Parent() {
-        return rbe_parent;
+    [[nodiscard]] constexpr bool IsBlack() const {
+        return m_rbe_color == RBColor::RB_BLACK;
     }
-
-    [[nodiscard]] const T* Parent() const {
-        return rbe_parent;
+    [[nodiscard]] constexpr bool IsRed() const {
+        return m_rbe_color == RBColor::RB_RED;
     }
-
-    void SetParent(T* parent) {
-        rbe_parent = parent;
+    [[nodiscard]] constexpr RBColor Color() const {
+        return m_rbe_color;
     }
 
-    [[nodiscard]] bool IsBlack() const {
-        return rbe_color == EntryColor::Black;
+    constexpr void SetColor(RBColor c) {
+        m_rbe_color = c;
     }
 
-    [[nodiscard]] bool IsRed() const {
-        return rbe_color == EntryColor::Red;
-    }
+private:
+    T* m_rbe_left{};
+    T* m_rbe_right{};
+    T* m_rbe_parent{};
+    RBColor m_rbe_color{RBColor::RB_BLACK};
+};
+#pragma pack(pop)
 
-    [[nodiscard]] EntryColor Color() const {
-        return rbe_color;
-    }
+template <typename T>
+struct CheckRBEntry {
+    static constexpr bool value = false;
+};
+template <typename T>
+struct CheckRBEntry<RBEntry<T>> {
+    static constexpr bool value = true;
+};
 
-    void SetColor(EntryColor color) {
-        rbe_color = color;
-    }
+template <typename T>
+concept IsRBEntry = CheckRBEntry<T>::value;
 
+template <typename T>
+concept HasRBEntry = requires(T& t, const T& ct) {
+    { t.GetRBEntry() } -> std::same_as<RBEntry<T>&>;
+    { ct.GetRBEntry() } -> std::same_as<const RBEntry<T>&>;
+};
+
+template <typename T>
+requires HasRBEntry<T>
+class RBHead {
 private:
-    T* rbe_left = nullptr;
-    T* rbe_right = nullptr;
-    T* rbe_parent = nullptr;
-    EntryColor rbe_color{};
+    T* m_rbh_root = nullptr;
+
+public:
+    [[nodiscard]] constexpr T* Root() {
+        return m_rbh_root;
+    }
+    [[nodiscard]] constexpr const T* Root() const {
+        return m_rbh_root;
+    }
+    constexpr void SetRoot(T* root) {
+        m_rbh_root = root;
+    }
+
+    [[nodiscard]] constexpr bool IsEmpty() const {
+        return this->Root() == nullptr;
+    }
 };
 
-template <typename Node>
-[[nodiscard]] RBEntry<Node>& RB_ENTRY(Node* node) {
-    return node->GetEntry();
+template <typename T>
+requires HasRBEntry<T>
+[[nodiscard]] constexpr RBEntry<T>& RB_ENTRY(T* t) {
+    return t->GetRBEntry();
 }
-
-template <typename Node>
-[[nodiscard]] const RBEntry<Node>& RB_ENTRY(const Node* node) {
-    return node->GetEntry();
+template <typename T>
+requires HasRBEntry<T>
+[[nodiscard]] constexpr const RBEntry<T>& RB_ENTRY(const T* t) {
+    return t->GetRBEntry();
 }
 
-template <typename Node>
-[[nodiscard]] Node* RB_PARENT(Node* node) {
-    return RB_ENTRY(node).Parent();
+template <typename T>
+requires HasRBEntry<T>
+[[nodiscard]] constexpr T* RB_LEFT(T* t) {
+    return RB_ENTRY(t).Left();
 }
-
-template <typename Node>
-[[nodiscard]] const Node* RB_PARENT(const Node* node) {
-    return RB_ENTRY(node).Parent();
+template <typename T>
+requires HasRBEntry<T>
+[[nodiscard]] constexpr const T* RB_LEFT(const T* t) {
+    return RB_ENTRY(t).Left();
 }
 
-template <typename Node>
-void RB_SET_PARENT(Node* node, Node* parent) {
-    return RB_ENTRY(node).SetParent(parent);
+template <typename T>
+requires HasRBEntry<T>
+[[nodiscard]] constexpr T* RB_RIGHT(T* t) {
+    return RB_ENTRY(t).Right();
 }
-
-template <typename Node>
-[[nodiscard]] Node* RB_LEFT(Node* node) {
-    return RB_ENTRY(node).Left();
+template <typename T>
+requires HasRBEntry<T>
+[[nodiscard]] constexpr const T* RB_RIGHT(const T* t) {
+    return RB_ENTRY(t).Right();
 }
 
-template <typename Node>
-[[nodiscard]] const Node* RB_LEFT(const Node* node) {
-    return RB_ENTRY(node).Left();
+template <typename T>
+requires HasRBEntry<T>
+[[nodiscard]] constexpr T* RB_PARENT(T* t) {
+    return RB_ENTRY(t).Parent();
 }
-
-template <typename Node>
-void RB_SET_LEFT(Node* node, Node* left) {
-    return RB_ENTRY(node).SetLeft(left);
+template <typename T>
+requires HasRBEntry<T>
+[[nodiscard]] constexpr const T* RB_PARENT(const T* t) {
+    return RB_ENTRY(t).Parent();
 }
 
-template <typename Node>
-[[nodiscard]] Node* RB_RIGHT(Node* node) {
-    return RB_ENTRY(node).Right();
+template <typename T>
+requires HasRBEntry<T>
+constexpr void RB_SET_LEFT(T* t, T* e) {
+    RB_ENTRY(t).SetLeft(e);
 }
-
-template <typename Node>
-[[nodiscard]] const Node* RB_RIGHT(const Node* node) {
-    return RB_ENTRY(node).Right();
+template <typename T>
+requires HasRBEntry<T>
+constexpr void RB_SET_RIGHT(T* t, T* e) {
+    RB_ENTRY(t).SetRight(e);
 }
-
-template <typename Node>
-void RB_SET_RIGHT(Node* node, Node* right) {
-    return RB_ENTRY(node).SetRight(right);
+template <typename T>
+requires HasRBEntry<T>
+constexpr void RB_SET_PARENT(T* t, T* e) {
+    RB_ENTRY(t).SetParent(e);
 }
 
-template <typename Node>
-[[nodiscard]] bool RB_IS_BLACK(const Node* node) {
-    return RB_ENTRY(node).IsBlack();
+template <typename T>
+requires HasRBEntry<T>
+[[nodiscard]] constexpr bool RB_IS_BLACK(const T* t) {
+    return RB_ENTRY(t).IsBlack();
 }
-
-template <typename Node>
-[[nodiscard]] bool RB_IS_RED(const Node* node) {
-    return RB_ENTRY(node).IsRed();
+template <typename T>
+requires HasRBEntry<T>
+[[nodiscard]] constexpr bool RB_IS_RED(const T* t) {
+    return RB_ENTRY(t).IsRed();
 }
 
-template <typename Node>
-[[nodiscard]] EntryColor RB_COLOR(const Node* node) {
-    return RB_ENTRY(node).Color();
+template <typename T>
+requires HasRBEntry<T>
+[[nodiscard]] constexpr RBColor RB_COLOR(const T* t) {
+    return RB_ENTRY(t).Color();
 }
 
-template <typename Node>
-void RB_SET_COLOR(Node* node, EntryColor color) {
-    return RB_ENTRY(node).SetColor(color);
+template <typename T>
+requires HasRBEntry<T>
+constexpr void RB_SET_COLOR(T* t, RBColor c) {
+    RB_ENTRY(t).SetColor(c);
 }
 
-template <typename Node>
-void RB_SET(Node* node, Node* parent) {
-    auto& entry = RB_ENTRY(node);
-    entry.SetParent(parent);
-    entry.SetLeft(nullptr);
-    entry.SetRight(nullptr);
-    entry.SetColor(EntryColor::Red);
+template <typename T>
+requires HasRBEntry<T>
+constexpr void RB_SET(T* elm, T* parent) {
+    auto& rb_entry = RB_ENTRY(elm);
+    rb_entry.SetParent(parent);
+    rb_entry.SetLeft(nullptr);
+    rb_entry.SetRight(nullptr);
+    rb_entry.SetColor(RBColor::RB_RED);
 }
 
-template <typename Node>
-void RB_SET_BLACKRED(Node* black, Node* red) {
-    RB_SET_COLOR(black, EntryColor::Black);
-    RB_SET_COLOR(red, EntryColor::Red);
+template <typename T>
+requires HasRBEntry<T>
+constexpr void RB_SET_BLACKRED(T* black, T* red) {
+    RB_SET_COLOR(black, RBColor::RB_BLACK);
+    RB_SET_COLOR(red, RBColor::RB_RED);
 }
 
-template <typename Node>
-void RB_ROTATE_LEFT(RBHead<Node>* head, Node* elm, Node*& tmp) {
+template <typename T>
+requires HasRBEntry<T>
+constexpr void RB_ROTATE_LEFT(RBHead<T>& head, T* elm, T*& tmp) {
     tmp = RB_RIGHT(elm);
-    RB_SET_RIGHT(elm, RB_LEFT(tmp));
-    if (RB_RIGHT(elm) != nullptr) {
+    if (RB_SET_RIGHT(elm, RB_LEFT(tmp)); RB_RIGHT(elm) != nullptr) {
         RB_SET_PARENT(RB_LEFT(tmp), elm);
     }
 
-    RB_SET_PARENT(tmp, RB_PARENT(elm));
-    if (RB_PARENT(tmp) != nullptr) {
+    if (RB_SET_PARENT(tmp, RB_PARENT(elm)); RB_PARENT(tmp) != nullptr) {
         if (elm == RB_LEFT(RB_PARENT(elm))) {
             RB_SET_LEFT(RB_PARENT(elm), tmp);
         } else {
             RB_SET_RIGHT(RB_PARENT(elm), tmp);
         }
     } else {
-        head->SetRoot(tmp);
+        head.SetRoot(tmp);
     }
 
     RB_SET_LEFT(tmp, elm);
     RB_SET_PARENT(elm, tmp);
 }
 
-template <typename Node>
-void RB_ROTATE_RIGHT(RBHead<Node>* head, Node* elm, Node*& tmp) {
+template <typename T>
+requires HasRBEntry<T>
+constexpr void RB_ROTATE_RIGHT(RBHead<T>& head, T* elm, T*& tmp) {
     tmp = RB_LEFT(elm);
-    RB_SET_LEFT(elm, RB_RIGHT(tmp));
-    if (RB_LEFT(elm) != nullptr) {
+    if (RB_SET_LEFT(elm, RB_RIGHT(tmp)); RB_LEFT(elm) != nullptr) {
         RB_SET_PARENT(RB_RIGHT(tmp), elm);
     }
 
-    RB_SET_PARENT(tmp, RB_PARENT(elm));
-    if (RB_PARENT(tmp) != nullptr) {
+    if (RB_SET_PARENT(tmp, RB_PARENT(elm)); RB_PARENT(tmp) != nullptr) {
         if (elm == RB_LEFT(RB_PARENT(elm))) {
             RB_SET_LEFT(RB_PARENT(elm), tmp);
         } else {
             RB_SET_RIGHT(RB_PARENT(elm), tmp);
         }
     } else {
-        head->SetRoot(tmp);
+        head.SetRoot(tmp);
     }
 
     RB_SET_RIGHT(tmp, elm);
     RB_SET_PARENT(elm, tmp);
 }
 
-template <typename Node>
-void RB_INSERT_COLOR(RBHead<Node>* head, Node* elm) {
-    Node* parent = nullptr;
-    Node* tmp = nullptr;
-
-    while ((parent = RB_PARENT(elm)) != nullptr && RB_IS_RED(parent)) {
-        Node* gparent = RB_PARENT(parent);
-        if (parent == RB_LEFT(gparent)) {
-            tmp = RB_RIGHT(gparent);
-            if (tmp && RB_IS_RED(tmp)) {
-                RB_SET_COLOR(tmp, EntryColor::Black);
-                RB_SET_BLACKRED(parent, gparent);
-                elm = gparent;
-                continue;
-            }
-
-            if (RB_RIGHT(parent) == elm) {
-                RB_ROTATE_LEFT(head, parent, tmp);
-                tmp = parent;
-                parent = elm;
-                elm = tmp;
-            }
-
-            RB_SET_BLACKRED(parent, gparent);
-            RB_ROTATE_RIGHT(head, gparent, tmp);
-        } else {
-            tmp = RB_LEFT(gparent);
-            if (tmp && RB_IS_RED(tmp)) {
-                RB_SET_COLOR(tmp, EntryColor::Black);
-                RB_SET_BLACKRED(parent, gparent);
-                elm = gparent;
-                continue;
-            }
-
-            if (RB_LEFT(parent) == elm) {
-                RB_ROTATE_RIGHT(head, parent, tmp);
-                tmp = parent;
-                parent = elm;
-                elm = tmp;
-            }
-
-            RB_SET_BLACKRED(parent, gparent);
-            RB_ROTATE_LEFT(head, gparent, tmp);
-        }
-    }
-
-    RB_SET_COLOR(head->Root(), EntryColor::Black);
-}
-
-template <typename Node>
-void RB_REMOVE_COLOR(RBHead<Node>* head, Node* parent, Node* elm) {
-    Node* tmp;
-    while ((elm == nullptr || RB_IS_BLACK(elm)) && elm != head->Root() && parent != nullptr) {
+template <typename T>
+requires HasRBEntry<T>
+constexpr void RB_REMOVE_COLOR(RBHead<T>& head, T* parent, T* elm) {
+    T* tmp;
+    while ((elm == nullptr || RB_IS_BLACK(elm)) && elm != head.Root()) {
         if (RB_LEFT(parent) == elm) {
             tmp = RB_RIGHT(parent);
-            if (!tmp) {
-                ASSERT_MSG(false, "tmp is invalid!");
-                break;
-            }
             if (RB_IS_RED(tmp)) {
                 RB_SET_BLACKRED(tmp, parent);
                 RB_ROTATE_LEFT(head, parent, tmp);
@@ -339,29 +310,29 @@ void RB_REMOVE_COLOR(RBHead<Node>* head, Node* parent, Node* elm) {
 
             if ((RB_LEFT(tmp) == nullptr || RB_IS_BLACK(RB_LEFT(tmp))) &&
                 (RB_RIGHT(tmp) == nullptr || RB_IS_BLACK(RB_RIGHT(tmp)))) {
-                RB_SET_COLOR(tmp, EntryColor::Red);
+                RB_SET_COLOR(tmp, RBColor::RB_RED);
                 elm = parent;
                 parent = RB_PARENT(elm);
             } else {
                 if (RB_RIGHT(tmp) == nullptr || RB_IS_BLACK(RB_RIGHT(tmp))) {
-                    Node* oleft;
+                    T* oleft;
                     if ((oleft = RB_LEFT(tmp)) != nullptr) {
-                        RB_SET_COLOR(oleft, EntryColor::Black);
+                        RB_SET_COLOR(oleft, RBColor::RB_BLACK);
                     }
 
-                    RB_SET_COLOR(tmp, EntryColor::Red);
+                    RB_SET_COLOR(tmp, RBColor::RB_RED);
                     RB_ROTATE_RIGHT(head, tmp, oleft);
                     tmp = RB_RIGHT(parent);
                 }
 
                 RB_SET_COLOR(tmp, RB_COLOR(parent));
-                RB_SET_COLOR(parent, EntryColor::Black);
+                RB_SET_COLOR(parent, RBColor::RB_BLACK);
                 if (RB_RIGHT(tmp)) {
-                    RB_SET_COLOR(RB_RIGHT(tmp), EntryColor::Black);
+                    RB_SET_COLOR(RB_RIGHT(tmp), RBColor::RB_BLACK);
                 }
 
                 RB_ROTATE_LEFT(head, parent, tmp);
-                elm = head->Root();
+                elm = head.Root();
                 break;
             }
         } else {
@@ -372,68 +343,56 @@ void RB_REMOVE_COLOR(RBHead<Node>* head, Node* parent, Node* elm) {
                 tmp = RB_LEFT(parent);
             }
 
-            if (!tmp) {
-                ASSERT_MSG(false, "tmp is invalid!");
-                break;
-            }
-
             if ((RB_LEFT(tmp) == nullptr || RB_IS_BLACK(RB_LEFT(tmp))) &&
                 (RB_RIGHT(tmp) == nullptr || RB_IS_BLACK(RB_RIGHT(tmp)))) {
-                RB_SET_COLOR(tmp, EntryColor::Red);
+                RB_SET_COLOR(tmp, RBColor::RB_RED);
                 elm = parent;
                 parent = RB_PARENT(elm);
             } else {
                 if (RB_LEFT(tmp) == nullptr || RB_IS_BLACK(RB_LEFT(tmp))) {
-                    Node* oright;
+                    T* oright;
                     if ((oright = RB_RIGHT(tmp)) != nullptr) {
-                        RB_SET_COLOR(oright, EntryColor::Black);
+                        RB_SET_COLOR(oright, RBColor::RB_BLACK);
                     }
 
-                    RB_SET_COLOR(tmp, EntryColor::Red);
+                    RB_SET_COLOR(tmp, RBColor::RB_RED);
                     RB_ROTATE_LEFT(head, tmp, oright);
                     tmp = RB_LEFT(parent);
                 }
 
                 RB_SET_COLOR(tmp, RB_COLOR(parent));
-                RB_SET_COLOR(parent, EntryColor::Black);
+                RB_SET_COLOR(parent, RBColor::RB_BLACK);
 
                 if (RB_LEFT(tmp)) {
-                    RB_SET_COLOR(RB_LEFT(tmp), EntryColor::Black);
+                    RB_SET_COLOR(RB_LEFT(tmp), RBColor::RB_BLACK);
                 }
 
                 RB_ROTATE_RIGHT(head, parent, tmp);
-                elm = head->Root();
+                elm = head.Root();
                 break;
             }
         }
     }
 
     if (elm) {
-        RB_SET_COLOR(elm, EntryColor::Black);
+        RB_SET_COLOR(elm, RBColor::RB_BLACK);
     }
 }
 
-template <typename Node>
-Node* RB_REMOVE(RBHead<Node>* head, Node* elm) {
-    Node* child = nullptr;
-    Node* parent = nullptr;
-    Node* old = elm;
-    EntryColor color{};
-
-    const auto finalize = [&] {
-        if (color == EntryColor::Black) {
-            RB_REMOVE_COLOR(head, parent, child);
-        }
-
-        return old;
-    };
+template <typename T>
+requires HasRBEntry<T>
+constexpr T* RB_REMOVE(RBHead<T>& head, T* elm) {
+    T* child = nullptr;
+    T* parent = nullptr;
+    T* old = elm;
+    RBColor color = RBColor::RB_BLACK;
 
     if (RB_LEFT(elm) == nullptr) {
         child = RB_RIGHT(elm);
     } else if (RB_RIGHT(elm) == nullptr) {
         child = RB_LEFT(elm);
     } else {
-        Node* left;
+        T* left;
         elm = RB_RIGHT(elm);
         while ((left = RB_LEFT(elm)) != nullptr) {
             elm = left;
@@ -446,6 +405,7 @@ Node* RB_REMOVE(RBHead<Node>* head, Node* elm) {
         if (child) {
             RB_SET_PARENT(child, parent);
         }
+
         if (parent) {
             if (RB_LEFT(parent) == elm) {
                 RB_SET_LEFT(parent, child);
@@ -453,14 +413,14 @@ Node* RB_REMOVE(RBHead<Node>* head, Node* elm) {
                 RB_SET_RIGHT(parent, child);
             }
         } else {
-            head->SetRoot(child);
+            head.SetRoot(child);
         }
 
         if (RB_PARENT(elm) == old) {
             parent = elm;
         }
 
-        elm->SetEntry(old->GetEntry());
+        elm->SetRBEntry(old->GetRBEntry());
 
         if (RB_PARENT(old)) {
             if (RB_LEFT(RB_PARENT(old)) == old) {
@@ -469,17 +429,24 @@ Node* RB_REMOVE(RBHead<Node>* head, Node* elm) {
                 RB_SET_RIGHT(RB_PARENT(old), elm);
             }
         } else {
-            head->SetRoot(elm);
+            head.SetRoot(elm);
         }
+
         RB_SET_PARENT(RB_LEFT(old), elm);
+
         if (RB_RIGHT(old)) {
             RB_SET_PARENT(RB_RIGHT(old), elm);
         }
+
         if (parent) {
             left = parent;
         }
 
-        return finalize();
+        if (color == RBColor::RB_BLACK) {
+            RB_REMOVE_COLOR(head, parent, child);
+        }
+
+        return old;
     }
 
     parent = RB_PARENT(elm);
@@ -495,17 +462,69 @@ Node* RB_REMOVE(RBHead<Node>* head, Node* elm) {
             RB_SET_RIGHT(parent, child);
         }
     } else {
-        head->SetRoot(child);
+        head.SetRoot(child);
+    }
+
+    if (color == RBColor::RB_BLACK) {
+        RB_REMOVE_COLOR(head, parent, child);
+    }
+
+    return old;
+}
+
+template <typename T>
+requires HasRBEntry<T>
+constexpr void RB_INSERT_COLOR(RBHead<T>& head, T* elm) {
+    T *parent = nullptr, *tmp = nullptr;
+    while ((parent = RB_PARENT(elm)) != nullptr && RB_IS_RED(parent)) {
+        T* gparent = RB_PARENT(parent);
+        if (parent == RB_LEFT(gparent)) {
+            tmp = RB_RIGHT(gparent);
+            if (tmp && RB_IS_RED(tmp)) {
+                RB_SET_COLOR(tmp, RBColor::RB_BLACK);
+                RB_SET_BLACKRED(parent, gparent);
+                elm = gparent;
+                continue;
+            }
+
+            if (RB_RIGHT(parent) == elm) {
+                RB_ROTATE_LEFT(head, parent, tmp);
+                tmp = parent;
+                parent = elm;
+                elm = tmp;
+            }
+
+            RB_SET_BLACKRED(parent, gparent);
+            RB_ROTATE_RIGHT(head, gparent, tmp);
+        } else {
+            tmp = RB_LEFT(gparent);
+            if (tmp && RB_IS_RED(tmp)) {
+                RB_SET_COLOR(tmp, RBColor::RB_BLACK);
+                RB_SET_BLACKRED(parent, gparent);
+                elm = gparent;
+                continue;
+            }
+
+            if (RB_LEFT(parent) == elm) {
+                RB_ROTATE_RIGHT(head, parent, tmp);
+                tmp = parent;
+                parent = elm;
+                elm = tmp;
+            }
+
+            RB_SET_BLACKRED(parent, gparent);
+            RB_ROTATE_LEFT(head, gparent, tmp);
+        }
     }
 
-    return finalize();
+    RB_SET_COLOR(head.Root(), RBColor::RB_BLACK);
 }
 
-// Inserts a node into the RB tree
-template <typename Node, typename CompareFunction>
-Node* RB_INSERT(RBHead<Node>* head, Node* elm, CompareFunction cmp) {
-    Node* parent = nullptr;
-    Node* tmp = head->Root();
+template <typename T, typename Compare>
+requires HasRBEntry<T>
+constexpr T* RB_INSERT(RBHead<T>& head, T* elm, Compare cmp) {
+    T* parent = nullptr;
+    T* tmp = head.Root();
     int comp = 0;
 
     while (tmp) {
@@ -529,17 +548,17 @@ Node* RB_INSERT(RBHead<Node>* head, Node* elm, CompareFunction cmp) {
             RB_SET_RIGHT(parent, elm);
         }
     } else {
-        head->SetRoot(elm);
+        head.SetRoot(elm);
     }
 
     RB_INSERT_COLOR(head, elm);
     return nullptr;
 }
 
-// Finds the node with the same key as elm
-template <typename Node, typename CompareFunction>
-Node* RB_FIND(RBHead<Node>* head, Node* elm, CompareFunction cmp) {
-    Node* tmp = head->Root();
+template <typename T, typename Compare>
+requires HasRBEntry<T>
+constexpr T* RB_FIND(RBHead<T>& head, T* elm, Compare cmp) {
+    T* tmp = head.Root();
 
     while (tmp) {
         const int comp = cmp(elm, tmp);
@@ -555,11 +574,11 @@ Node* RB_FIND(RBHead<Node>* head, Node* elm, CompareFunction cmp) {
     return nullptr;
 }
 
-// Finds the first node greater than or equal to the search key
-template <typename Node, typename CompareFunction>
-Node* RB_NFIND(RBHead<Node>* head, Node* elm, CompareFunction cmp) {
-    Node* tmp = head->Root();
-    Node* res = nullptr;
+template <typename T, typename Compare>
+requires HasRBEntry<T>
+constexpr T* RB_NFIND(RBHead<T>& head, T* elm, Compare cmp) {
+    T* tmp = head.Root();
+    T* res = nullptr;
 
     while (tmp) {
         const int comp = cmp(elm, tmp);
@@ -576,13 +595,13 @@ Node* RB_NFIND(RBHead<Node>* head, Node* elm, CompareFunction cmp) {
     return res;
 }
 
-// Finds the node with the same key as lelm
-template <typename Node, typename CompareFunction>
-Node* RB_FIND_LIGHT(RBHead<Node>* head, const void* lelm, CompareFunction lcmp) {
-    Node* tmp = head->Root();
+template <typename T, typename U, typename Compare>
+requires HasRBEntry<T>
+constexpr T* RB_FIND_KEY(RBHead<T>& head, const U& key, Compare cmp) {
+    T* tmp = head.Root();
 
     while (tmp) {
-        const int comp = lcmp(lelm, tmp);
+        const int comp = cmp(key, tmp);
         if (comp < 0) {
             tmp = RB_LEFT(tmp);
         } else if (comp > 0) {
@@ -595,14 +614,14 @@ Node* RB_FIND_LIGHT(RBHead<Node>* head, const void* lelm, CompareFunction lcmp)
     return nullptr;
 }
 
-// Finds the first node greater than or equal to the search key
-template <typename Node, typename CompareFunction>
-Node* RB_NFIND_LIGHT(RBHead<Node>* head, const void* lelm, CompareFunction lcmp) {
-    Node* tmp = head->Root();
-    Node* res = nullptr;
+template <typename T, typename U, typename Compare>
+requires HasRBEntry<T>
+constexpr T* RB_NFIND_KEY(RBHead<T>& head, const U& key, Compare cmp) {
+    T* tmp = head.Root();
+    T* res = nullptr;
 
     while (tmp) {
-        const int comp = lcmp(lelm, tmp);
+        const int comp = cmp(key, tmp);
         if (comp < 0) {
             res = tmp;
             tmp = RB_LEFT(tmp);
@@ -616,8 +635,43 @@ Node* RB_NFIND_LIGHT(RBHead<Node>* head, const void* lelm, CompareFunction lcmp)
     return res;
 }
 
-template <typename Node>
-Node* RB_NEXT(Node* elm) {
+template <typename T, typename Compare>
+requires HasRBEntry<T>
+constexpr T* RB_FIND_EXISTING(RBHead<T>& head, T* elm, Compare cmp) {
+    T* tmp = head.Root();
+
+    while (true) {
+        const int comp = cmp(elm, tmp);
+        if (comp < 0) {
+            tmp = RB_LEFT(tmp);
+        } else if (comp > 0) {
+            tmp = RB_RIGHT(tmp);
+        } else {
+            return tmp;
+        }
+    }
+}
+
+template <typename T, typename U, typename Compare>
+requires HasRBEntry<T>
+constexpr T* RB_FIND_EXISTING_KEY(RBHead<T>& head, const U& key, Compare cmp) {
+    T* tmp = head.Root();
+
+    while (true) {
+        const int comp = cmp(key, tmp);
+        if (comp < 0) {
+            tmp = RB_LEFT(tmp);
+        } else if (comp > 0) {
+            tmp = RB_RIGHT(tmp);
+        } else {
+            return tmp;
+        }
+    }
+}
+
+template <typename T>
+requires HasRBEntry<T>
+constexpr T* RB_NEXT(T* elm) {
     if (RB_RIGHT(elm)) {
         elm = RB_RIGHT(elm);
         while (RB_LEFT(elm)) {
@@ -636,8 +690,9 @@ Node* RB_NEXT(Node* elm) {
     return elm;
 }
 
-template <typename Node>
-Node* RB_PREV(Node* elm) {
+template <typename T>
+requires HasRBEntry<T>
+constexpr T* RB_PREV(T* elm) {
     if (RB_LEFT(elm)) {
         elm = RB_LEFT(elm);
         while (RB_RIGHT(elm)) {
@@ -656,30 +711,32 @@ Node* RB_PREV(Node* elm) {
     return elm;
 }
 
-template <typename Node>
-Node* RB_MINMAX(RBHead<Node>* head, bool is_min) {
-    Node* tmp = head->Root();
-    Node* parent = nullptr;
+template <typename T>
+requires HasRBEntry<T>
+constexpr T* RB_MIN(RBHead<T>& head) {
+    T* tmp = head.Root();
+    T* parent = nullptr;
 
     while (tmp) {
         parent = tmp;
-        if (is_min) {
-            tmp = RB_LEFT(tmp);
-        } else {
-            tmp = RB_RIGHT(tmp);
-        }
+        tmp = RB_LEFT(tmp);
     }
 
     return parent;
 }
 
-template <typename Node>
-Node* RB_MIN(RBHead<Node>* head) {
-    return RB_MINMAX(head, true);
-}
+template <typename T>
+requires HasRBEntry<T>
+constexpr T* RB_MAX(RBHead<T>& head) {
+    T* tmp = head.Root();
+    T* parent = nullptr;
 
-template <typename Node>
-Node* RB_MAX(RBHead<Node>* head) {
-    return RB_MINMAX(head, false);
+    while (tmp) {
+        parent = tmp;
+        tmp = RB_RIGHT(tmp);
+    }
+
+    return parent;
 }
-} // namespace Common
+
+} // namespace Common::freebsd
diff --git a/src/common/x64/cpu_detect.cpp b/src/common/x64/cpu_detect.cpp
index fbeacc7e2..d81edb140 100644
--- a/src/common/x64/cpu_detect.cpp
+++ b/src/common/x64/cpu_detect.cpp
@@ -1,8 +1,12 @@
-// Copyright 2013 Dolphin Emulator Project / 2015 Citra Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// Copyright 2013 Dolphin Emulator Project / 2015 Citra Emulator Project / 2022 Yuzu Emulator
+// Project Licensed under GPLv2 or any later version Refer to the license.txt file included.
 
+#include <array>
 #include <cstring>
+#include <iterator>
+#include <span>
+#include <string_view>
+#include "common/bit_util.h"
 #include "common/common_types.h"
 #include "common/x64/cpu_detect.h"
 
@@ -17,7 +21,7 @@
 // clang-format on
 #endif
 
-static inline void __cpuidex(int info[4], int function_id, int subfunction_id) {
+static inline void __cpuidex(int info[4], u32 function_id, u32 subfunction_id) {
 #if defined(__DragonFly__) || defined(__FreeBSD__)
     // Despite the name, this is just do_cpuid() with ECX as second input.
     cpuid_count((u_int)function_id, (u_int)subfunction_id, (u_int*)info);
@@ -30,7 +34,7 @@ static inline void __cpuidex(int info[4], int function_id, int subfunction_id) {
 #endif
 }
 
-static inline void __cpuid(int info[4], int function_id) {
+static inline void __cpuid(int info[4], u32 function_id) {
     return __cpuidex(info, function_id, 0);
 }
 
@@ -45,6 +49,17 @@ static inline u64 _xgetbv(u32 index) {
 
 namespace Common {
 
+CPUCaps::Manufacturer CPUCaps::ParseManufacturer(std::string_view brand_string) {
+    if (brand_string == "GenuineIntel") {
+        return Manufacturer::Intel;
+    } else if (brand_string == "AuthenticAMD") {
+        return Manufacturer::AMD;
+    } else if (brand_string == "HygonGenuine") {
+        return Manufacturer::Hygon;
+    }
+    return Manufacturer::Unknown;
+}
+
 // Detects the various CPU features
 static CPUCaps Detect() {
     CPUCaps caps = {};
@@ -53,75 +68,74 @@ static CPUCaps Detect() {
     // yuzu at all anyway
 
     int cpu_id[4];
-    memset(caps.brand_string, 0, sizeof(caps.brand_string));
 
-    // Detect CPU's CPUID capabilities and grab CPU string
+    // Detect CPU's CPUID capabilities and grab manufacturer string
     __cpuid(cpu_id, 0x00000000);
-    u32 max_std_fn = cpu_id[0]; // EAX
-
-    std::memcpy(&caps.brand_string[0], &cpu_id[1], sizeof(int));
-    std::memcpy(&caps.brand_string[4], &cpu_id[3], sizeof(int));
-    std::memcpy(&caps.brand_string[8], &cpu_id[2], sizeof(int));
-    if (cpu_id[1] == 0x756e6547 && cpu_id[2] == 0x6c65746e && cpu_id[3] == 0x49656e69)
-        caps.manufacturer = Manufacturer::Intel;
-    else if (cpu_id[1] == 0x68747541 && cpu_id[2] == 0x444d4163 && cpu_id[3] == 0x69746e65)
-        caps.manufacturer = Manufacturer::AMD;
-    else if (cpu_id[1] == 0x6f677948 && cpu_id[2] == 0x656e6975 && cpu_id[3] == 0x6e65476e)
-        caps.manufacturer = Manufacturer::Hygon;
-    else
-        caps.manufacturer = Manufacturer::Unknown;
+    const u32 max_std_fn = cpu_id[0]; // EAX
 
-    __cpuid(cpu_id, 0x80000000);
+    std::memset(caps.brand_string, 0, std::size(caps.brand_string));
+    std::memcpy(&caps.brand_string[0], &cpu_id[1], sizeof(u32));
+    std::memcpy(&caps.brand_string[4], &cpu_id[3], sizeof(u32));
+    std::memcpy(&caps.brand_string[8], &cpu_id[2], sizeof(u32));
+
+    caps.manufacturer = CPUCaps::ParseManufacturer(caps.brand_string);
 
-    u32 max_ex_fn = cpu_id[0];
+    // Set reasonable default cpu string even if brand string not available
+    std::strncpy(caps.cpu_string, caps.brand_string, std::size(caps.brand_string));
 
-    // Set reasonable default brand string even if brand string not available
-    strcpy(caps.cpu_string, caps.brand_string);
+    __cpuid(cpu_id, 0x80000000);
+
+    const u32 max_ex_fn = cpu_id[0];
 
     // Detect family and other miscellaneous features
     if (max_std_fn >= 1) {
         __cpuid(cpu_id, 0x00000001);
-        if ((cpu_id[3] >> 25) & 1)
-            caps.sse = true;
-        if ((cpu_id[3] >> 26) & 1)
-            caps.sse2 = true;
-        if ((cpu_id[2]) & 1)
-            caps.sse3 = true;
-        if ((cpu_id[2] >> 9) & 1)
-            caps.ssse3 = true;
-        if ((cpu_id[2] >> 19) & 1)
-            caps.sse4_1 = true;
-        if ((cpu_id[2] >> 20) & 1)
-            caps.sse4_2 = true;
-        if ((cpu_id[2] >> 25) & 1)
-            caps.aes = true;
+        caps.sse = Common::Bit<25>(cpu_id[3]);
+        caps.sse2 = Common::Bit<26>(cpu_id[3]);
+        caps.sse3 = Common::Bit<0>(cpu_id[2]);
+        caps.pclmulqdq = Common::Bit<1>(cpu_id[2]);
+        caps.ssse3 = Common::Bit<9>(cpu_id[2]);
+        caps.sse4_1 = Common::Bit<19>(cpu_id[2]);
+        caps.sse4_2 = Common::Bit<20>(cpu_id[2]);
+        caps.movbe = Common::Bit<22>(cpu_id[2]);
+        caps.popcnt = Common::Bit<23>(cpu_id[2]);
+        caps.aes = Common::Bit<25>(cpu_id[2]);
+        caps.f16c = Common::Bit<29>(cpu_id[2]);
 
         // AVX support requires 3 separate checks:
         //  - Is the AVX bit set in CPUID?
         //  - Is the XSAVE bit set in CPUID?
         //  - XGETBV result has the XCR bit set.
-        if (((cpu_id[2] >> 28) & 1) && ((cpu_id[2] >> 27) & 1)) {
+        if (Common::Bit<28>(cpu_id[2]) && Common::Bit<27>(cpu_id[2])) {
             if ((_xgetbv(_XCR_XFEATURE_ENABLED_MASK) & 0x6) == 0x6) {
                 caps.avx = true;
-                if ((cpu_id[2] >> 12) & 1)
+                if (Common::Bit<12>(cpu_id[2]))
                     caps.fma = true;
             }
         }
 
         if (max_std_fn >= 7) {
             __cpuidex(cpu_id, 0x00000007, 0x00000000);
-            // Can't enable AVX2 unless the XSAVE/XGETBV checks above passed
-            if ((cpu_id[1] >> 5) & 1)
-                caps.avx2 = caps.avx;
-            if ((cpu_id[1] >> 3) & 1)
-                caps.bmi1 = true;
-            if ((cpu_id[1] >> 8) & 1)
-                caps.bmi2 = true;
-            // Checks for AVX512F, AVX512CD, AVX512VL, AVX512DQ, AVX512BW (Intel Skylake-X/SP)
-            if ((cpu_id[1] >> 16) & 1 && (cpu_id[1] >> 28) & 1 && (cpu_id[1] >> 31) & 1 &&
-                (cpu_id[1] >> 17) & 1 && (cpu_id[1] >> 30) & 1) {
-                caps.avx512 = caps.avx2;
+            // Can't enable AVX{2,512} unless the XSAVE/XGETBV checks above passed
+            if (caps.avx) {
+                caps.avx2 = Common::Bit<5>(cpu_id[1]);
+                caps.avx512f = Common::Bit<16>(cpu_id[1]);
+                caps.avx512dq = Common::Bit<17>(cpu_id[1]);
+                caps.avx512cd = Common::Bit<28>(cpu_id[1]);
+                caps.avx512bw = Common::Bit<30>(cpu_id[1]);
+                caps.avx512vl = Common::Bit<31>(cpu_id[1]);
+                caps.avx512vbmi = Common::Bit<1>(cpu_id[2]);
+                caps.avx512bitalg = Common::Bit<12>(cpu_id[2]);
             }
+
+            caps.bmi1 = Common::Bit<3>(cpu_id[1]);
+            caps.bmi2 = Common::Bit<8>(cpu_id[1]);
+            caps.sha = Common::Bit<29>(cpu_id[1]);
+
+            caps.gfni = Common::Bit<8>(cpu_id[2]);
+
+            __cpuidex(cpu_id, 0x00000007, 0x00000001);
+            caps.avx_vnni = caps.avx && Common::Bit<4>(cpu_id[0]);
         }
     }
 
@@ -138,15 +152,13 @@ static CPUCaps Detect() {
     if (max_ex_fn >= 0x80000001) {
         // Check for more features
         __cpuid(cpu_id, 0x80000001);
-        if ((cpu_id[2] >> 16) & 1)
-            caps.fma4 = true;
+        caps.lzcnt = Common::Bit<5>(cpu_id[2]);
+        caps.fma4 = Common::Bit<16>(cpu_id[2]);
     }
 
     if (max_ex_fn >= 0x80000007) {
         __cpuid(cpu_id, 0x80000007);
-        if (cpu_id[3] & (1 << 8)) {
-            caps.invariant_tsc = true;
-        }
+        caps.invariant_tsc = Common::Bit<8>(cpu_id[3]);
     }
 
     if (max_std_fn >= 0x16) {
diff --git a/src/common/x64/cpu_detect.h b/src/common/x64/cpu_detect.h
index e3b63302e..40c48b132 100644
--- a/src/common/x64/cpu_detect.h
+++ b/src/common/x64/cpu_detect.h
@@ -1,42 +1,65 @@
-// Copyright 2013 Dolphin Emulator Project / 2015 Citra Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// Copyright 2013 Dolphin Emulator Project / 2015 Citra Emulator Project / 2022 Yuzu Emulator
+// Project Project Licensed under GPLv2 or any later version Refer to the license.txt file included.
 
 #pragma once
 
-namespace Common {
+#include <string_view>
+#include "common/common_types.h"
 
-enum class Manufacturer : u32 {
-    Intel = 0,
-    AMD = 1,
-    Hygon = 2,
-    Unknown = 3,
-};
+namespace Common {
 
 /// x86/x64 CPU capabilities that may be detected by this module
 struct CPUCaps {
+
+    enum class Manufacturer : u8 {
+        Unknown = 0,
+        Intel = 1,
+        AMD = 2,
+        Hygon = 3,
+    };
+
+    static Manufacturer ParseManufacturer(std::string_view brand_string);
+
     Manufacturer manufacturer;
-    char cpu_string[0x21];
-    char brand_string[0x41];
-    bool sse;
-    bool sse2;
-    bool sse3;
-    bool ssse3;
-    bool sse4_1;
-    bool sse4_2;
-    bool lzcnt;
-    bool avx;
-    bool avx2;
-    bool avx512;
-    bool bmi1;
-    bool bmi2;
-    bool fma;
-    bool fma4;
-    bool aes;
-    bool invariant_tsc;
+    char brand_string[13];
+
+    char cpu_string[48];
+
     u32 base_frequency;
     u32 max_frequency;
     u32 bus_frequency;
+
+    bool sse : 1;
+    bool sse2 : 1;
+    bool sse3 : 1;
+    bool ssse3 : 1;
+    bool sse4_1 : 1;
+    bool sse4_2 : 1;
+
+    bool avx : 1;
+    bool avx_vnni : 1;
+    bool avx2 : 1;
+    bool avx512f : 1;
+    bool avx512dq : 1;
+    bool avx512cd : 1;
+    bool avx512bw : 1;
+    bool avx512vl : 1;
+    bool avx512vbmi : 1;
+    bool avx512bitalg : 1;
+
+    bool aes : 1;
+    bool bmi1 : 1;
+    bool bmi2 : 1;
+    bool f16c : 1;
+    bool fma : 1;
+    bool fma4 : 1;
+    bool gfni : 1;
+    bool invariant_tsc : 1;
+    bool lzcnt : 1;
+    bool movbe : 1;
+    bool pclmulqdq : 1;
+    bool popcnt : 1;
+    bool sha : 1;
 };
 
 /**
diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt
index 7ed43bfb1..1f234c822 100644
--- a/src/core/CMakeLists.txt
+++ b/src/core/CMakeLists.txt
@@ -154,6 +154,7 @@ add_library(core STATIC
     hle/api_version.h
     hle/ipc.h
     hle/ipc_helpers.h
+    hle/kernel/board/nintendo/nx/k_memory_layout.h
     hle/kernel/board/nintendo/nx/k_system_control.cpp
     hle/kernel/board/nintendo/nx/k_system_control.h
     hle/kernel/board/nintendo/nx/secure_monitor.h
@@ -166,6 +167,7 @@ add_library(core STATIC
     hle/kernel/hle_ipc.h
     hle/kernel/init/init_slab_setup.cpp
     hle/kernel/init/init_slab_setup.h
+    hle/kernel/initial_process.h
     hle/kernel/k_address_arbiter.cpp
     hle/kernel/k_address_arbiter.h
     hle/kernel/k_address_space_info.cpp
@@ -207,6 +209,7 @@ add_library(core STATIC
     hle/kernel/k_memory_region.h
     hle/kernel/k_memory_region_type.h
     hle/kernel/k_page_bitmap.h
+    hle/kernel/k_page_buffer.h
     hle/kernel/k_page_heap.cpp
     hle/kernel/k_page_heap.h
     hle/kernel/k_page_linked_list.h
@@ -244,6 +247,8 @@ add_library(core STATIC
     hle/kernel/k_system_control.h
     hle/kernel/k_thread.cpp
     hle/kernel/k_thread.h
+    hle/kernel/k_thread_local_page.cpp
+    hle/kernel/k_thread_local_page.h
     hle/kernel/k_thread_queue.cpp
     hle/kernel/k_thread_queue.h
     hle/kernel/k_trace.h
diff --git a/src/core/arm/dynarmic/arm_dynarmic_32.cpp b/src/core/arm/dynarmic/arm_dynarmic_32.cpp
index b0d89c539..c1c843b8f 100644
--- a/src/core/arm/dynarmic/arm_dynarmic_32.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic_32.cpp
@@ -137,6 +137,8 @@ std::shared_ptr<Dynarmic::A32::Jit> ARM_Dynarmic_32::MakeJit(Common::PageTable*
     config.page_table_pointer_mask_bits = Common::PageTable::ATTRIBUTE_BITS;
     config.detect_misaligned_access_via_page_table = 16 | 32 | 64 | 128;
     config.only_detect_misalignment_via_page_table_on_page_boundary = true;
+    config.fastmem_exclusive_access = true;
+    config.recompile_on_exclusive_fastmem_failure = true;
 
     // Multi-process state
     config.processor_id = core_index;
@@ -146,8 +148,8 @@ std::shared_ptr<Dynarmic::A32::Jit> ARM_Dynarmic_32::MakeJit(Common::PageTable*
     config.wall_clock_cntpct = uses_wall_clock;
 
     // Code cache size
-    config.code_cache_size = 512_MiB;
-    config.far_code_offset = 400_MiB;
+    config.code_cache_size = 128_MiB;
+    config.far_code_offset = 100_MiB;
 
     // Safe optimizations
     if (Settings::values.cpu_debug_mode) {
@@ -178,6 +180,12 @@ std::shared_ptr<Dynarmic::A32::Jit> ARM_Dynarmic_32::MakeJit(Common::PageTable*
         if (!Settings::values.cpuopt_fastmem) {
             config.fastmem_pointer = nullptr;
         }
+        if (!Settings::values.cpuopt_fastmem_exclusives) {
+            config.fastmem_exclusive_access = false;
+        }
+        if (!Settings::values.cpuopt_recompile_exclusives) {
+            config.recompile_on_exclusive_fastmem_failure = false;
+        }
     }
 
     // Unsafe optimizations
@@ -195,6 +203,9 @@ std::shared_ptr<Dynarmic::A32::Jit> ARM_Dynarmic_32::MakeJit(Common::PageTable*
         if (Settings::values.cpuopt_unsafe_inaccurate_nan) {
             config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_InaccurateNaN;
         }
+        if (Settings::values.cpuopt_unsafe_ignore_global_monitor) {
+            config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_IgnoreGlobalMonitor;
+        }
     }
 
     // Curated optimizations
@@ -203,6 +214,7 @@ std::shared_ptr<Dynarmic::A32::Jit> ARM_Dynarmic_32::MakeJit(Common::PageTable*
         config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_UnfuseFMA;
         config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_IgnoreStandardFPCRValue;
         config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_InaccurateNaN;
+        config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_IgnoreGlobalMonitor;
     }
 
     return std::make_unique<Dynarmic::A32::Jit>(config);
diff --git a/src/core/arm/dynarmic/arm_dynarmic_64.cpp b/src/core/arm/dynarmic/arm_dynarmic_64.cpp
index 56836bd05..aa74fce4d 100644
--- a/src/core/arm/dynarmic/arm_dynarmic_64.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic_64.cpp
@@ -185,6 +185,9 @@ std::shared_ptr<Dynarmic::A64::Jit> ARM_Dynarmic_64::MakeJit(Common::PageTable*
         config.fastmem_pointer = page_table->fastmem_arena;
         config.fastmem_address_space_bits = address_space_bits;
         config.silently_mirror_fastmem = false;
+
+        config.fastmem_exclusive_access = true;
+        config.recompile_on_exclusive_fastmem_failure = true;
     }
 
     // Multi-process state
@@ -205,8 +208,8 @@ std::shared_ptr<Dynarmic::A64::Jit> ARM_Dynarmic_64::MakeJit(Common::PageTable*
     config.wall_clock_cntpct = uses_wall_clock;
 
     // Code cache size
-    config.code_cache_size = 512_MiB;
-    config.far_code_offset = 400_MiB;
+    config.code_cache_size = 128_MiB;
+    config.far_code_offset = 100_MiB;
 
     // Safe optimizations
     if (Settings::values.cpu_debug_mode) {
@@ -237,6 +240,12 @@ std::shared_ptr<Dynarmic::A64::Jit> ARM_Dynarmic_64::MakeJit(Common::PageTable*
         if (!Settings::values.cpuopt_fastmem) {
             config.fastmem_pointer = nullptr;
         }
+        if (!Settings::values.cpuopt_fastmem_exclusives) {
+            config.fastmem_exclusive_access = false;
+        }
+        if (!Settings::values.cpuopt_recompile_exclusives) {
+            config.recompile_on_exclusive_fastmem_failure = false;
+        }
     }
 
     // Unsafe optimizations
@@ -254,6 +263,9 @@ std::shared_ptr<Dynarmic::A64::Jit> ARM_Dynarmic_64::MakeJit(Common::PageTable*
         if (Settings::values.cpuopt_unsafe_fastmem_check) {
             config.fastmem_address_space_bits = 64;
         }
+        if (Settings::values.cpuopt_unsafe_ignore_global_monitor) {
+            config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_IgnoreGlobalMonitor;
+        }
     }
 
     // Curated optimizations
@@ -262,6 +274,7 @@ std::shared_ptr<Dynarmic::A64::Jit> ARM_Dynarmic_64::MakeJit(Common::PageTable*
         config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_UnfuseFMA;
         config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_InaccurateNaN;
         config.fastmem_address_space_bits = 64;
+        config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_IgnoreGlobalMonitor;
     }
 
     return std::make_shared<Dynarmic::A64::Jit>(config);
diff --git a/src/core/arm/dynarmic/arm_exclusive_monitor.cpp b/src/core/arm/dynarmic/arm_exclusive_monitor.cpp
index 397d054a8..ea6b224e0 100644
--- a/src/core/arm/dynarmic/arm_exclusive_monitor.cpp
+++ b/src/core/arm/dynarmic/arm_exclusive_monitor.cpp
@@ -37,8 +37,8 @@ u128 DynarmicExclusiveMonitor::ExclusiveRead128(std::size_t core_index, VAddr ad
     });
 }
 
-void DynarmicExclusiveMonitor::ClearExclusive() {
-    monitor.Clear();
+void DynarmicExclusiveMonitor::ClearExclusive(std::size_t core_index) {
+    monitor.ClearProcessor(core_index);
 }
 
 bool DynarmicExclusiveMonitor::ExclusiveWrite8(std::size_t core_index, VAddr vaddr, u8 value) {
diff --git a/src/core/arm/dynarmic/arm_exclusive_monitor.h b/src/core/arm/dynarmic/arm_exclusive_monitor.h
index 265c4ecef..5a15b43ef 100644
--- a/src/core/arm/dynarmic/arm_exclusive_monitor.h
+++ b/src/core/arm/dynarmic/arm_exclusive_monitor.h
@@ -29,7 +29,7 @@ public:
     u32 ExclusiveRead32(std::size_t core_index, VAddr addr) override;
     u64 ExclusiveRead64(std::size_t core_index, VAddr addr) override;
     u128 ExclusiveRead128(std::size_t core_index, VAddr addr) override;
-    void ClearExclusive() override;
+    void ClearExclusive(std::size_t core_index) override;
 
     bool ExclusiveWrite8(std::size_t core_index, VAddr vaddr, u8 value) override;
     bool ExclusiveWrite16(std::size_t core_index, VAddr vaddr, u16 value) override;
diff --git a/src/core/arm/exclusive_monitor.h b/src/core/arm/exclusive_monitor.h
index 62f6e6023..9914ca3da 100644
--- a/src/core/arm/exclusive_monitor.h
+++ b/src/core/arm/exclusive_monitor.h
@@ -23,7 +23,7 @@ public:
     virtual u32 ExclusiveRead32(std::size_t core_index, VAddr addr) = 0;
     virtual u64 ExclusiveRead64(std::size_t core_index, VAddr addr) = 0;
     virtual u128 ExclusiveRead128(std::size_t core_index, VAddr addr) = 0;
-    virtual void ClearExclusive() = 0;
+    virtual void ClearExclusive(std::size_t core_index) = 0;
 
     virtual bool ExclusiveWrite8(std::size_t core_index, VAddr vaddr, u8 value) = 0;
     virtual bool ExclusiveWrite16(std::size_t core_index, VAddr vaddr, u16 value) = 0;
diff --git a/src/core/core.cpp b/src/core/core.cpp
index b0cfee3ee..c60a784c3 100644
--- a/src/core/core.cpp
+++ b/src/core/core.cpp
@@ -326,7 +326,9 @@ struct System::Impl {
         is_powered_on = false;
         exit_lock = false;
 
-        gpu_core->NotifyShutdown();
+        if (gpu_core != nullptr) {
+            gpu_core->NotifyShutdown();
+        }
 
         services.reset();
         service_manager.reset();
diff --git a/src/core/frontend/emu_window.h b/src/core/frontend/emu_window.h
index e413a520a..b3bffecb2 100644
--- a/src/core/frontend/emu_window.h
+++ b/src/core/frontend/emu_window.h
@@ -42,11 +42,20 @@ public:
             context.MakeCurrent();
         }
         ~Scoped() {
-            context.DoneCurrent();
+            if (active) {
+                context.DoneCurrent();
+            }
+        }
+
+        /// In the event that context was destroyed before the Scoped is destroyed, this provides a
+        /// mechanism to prevent calling a destroyed object's method during the deconstructor
+        void Cancel() {
+            active = false;
         }
 
     private:
         GraphicsContext& context;
+        bool active{true};
     };
 
     /// Calls MakeCurrent on the context and calls DoneCurrent when the scope for the returned value
diff --git a/src/core/hle/ipc_helpers.h b/src/core/hle/ipc_helpers.h
index 026257115..3c4e45fcd 100644
--- a/src/core/hle/ipc_helpers.h
+++ b/src/core/hle/ipc_helpers.h
@@ -385,7 +385,7 @@ public:
     T PopRaw();
 
     template <class T>
-    std::shared_ptr<T> PopIpcInterface() {
+    std::weak_ptr<T> PopIpcInterface() {
         ASSERT(context->Session()->IsDomain());
         ASSERT(context->GetDomainMessageHeader().input_object_count > 0);
         return context->GetDomainHandler<T>(Pop<u32>() - 1);
diff --git a/src/core/hle/kernel/board/nintendo/nx/k_memory_layout.h b/src/core/hle/kernel/board/nintendo/nx/k_memory_layout.h
new file mode 100644
index 000000000..01e225088
--- /dev/null
+++ b/src/core/hle/kernel/board/nintendo/nx/k_memory_layout.h
@@ -0,0 +1,13 @@
+// Copyright 2022 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "common/common_types.h"
+
+namespace Kernel {
+
+constexpr inline PAddr MainMemoryAddress = 0x80000000;
+
+} // namespace Kernel
diff --git a/src/core/hle/kernel/board/nintendo/nx/k_system_control.cpp b/src/core/hle/kernel/board/nintendo/nx/k_system_control.cpp
index 702cacffc..8027bec00 100644
--- a/src/core/hle/kernel/board/nintendo/nx/k_system_control.cpp
+++ b/src/core/hle/kernel/board/nintendo/nx/k_system_control.cpp
@@ -39,6 +39,10 @@ Smc::MemoryArrangement GetMemoryArrangeForInit() {
 }
 } // namespace
 
+size_t KSystemControl::Init::GetRealMemorySize() {
+    return GetIntendedMemorySize();
+}
+
 // Initialization.
 size_t KSystemControl::Init::GetIntendedMemorySize() {
     switch (GetMemorySizeForInit()) {
@@ -53,7 +57,13 @@ size_t KSystemControl::Init::GetIntendedMemorySize() {
 }
 
 PAddr KSystemControl::Init::GetKernelPhysicalBaseAddress(u64 base_address) {
-    return base_address;
+    const size_t real_dram_size = KSystemControl::Init::GetRealMemorySize();
+    const size_t intended_dram_size = KSystemControl::Init::GetIntendedMemorySize();
+    if (intended_dram_size * 2 < real_dram_size) {
+        return base_address;
+    } else {
+        return base_address + ((real_dram_size - intended_dram_size) / 2);
+    }
 }
 
 bool KSystemControl::Init::ShouldIncreaseThreadResourceLimit() {
diff --git a/src/core/hle/kernel/board/nintendo/nx/k_system_control.h b/src/core/hle/kernel/board/nintendo/nx/k_system_control.h
index 52f230ced..df2a17f2a 100644
--- a/src/core/hle/kernel/board/nintendo/nx/k_system_control.h
+++ b/src/core/hle/kernel/board/nintendo/nx/k_system_control.h
@@ -13,6 +13,7 @@ public:
     class Init {
     public:
         // Initialization.
+        static std::size_t GetRealMemorySize();
         static std::size_t GetIntendedMemorySize();
         static PAddr GetKernelPhysicalBaseAddress(u64 base_address);
         static bool ShouldIncreaseThreadResourceLimit();
diff --git a/src/core/hle/kernel/hle_ipc.cpp b/src/core/hle/kernel/hle_ipc.cpp
index e19544c54..9f2175f82 100644
--- a/src/core/hle/kernel/hle_ipc.cpp
+++ b/src/core/hle/kernel/hle_ipc.cpp
@@ -45,7 +45,7 @@ bool SessionRequestManager::HasSessionRequestHandler(const HLERequestContext& co
             LOG_CRITICAL(IPC, "object_id {} is too big!", object_id);
             return false;
         }
-        return DomainHandler(object_id - 1) != nullptr;
+        return DomainHandler(object_id - 1).lock() != nullptr;
     } else {
         return session_handler != nullptr;
     }
@@ -53,9 +53,6 @@ bool SessionRequestManager::HasSessionRequestHandler(const HLERequestContext& co
 
 void SessionRequestHandler::ClientConnected(KServerSession* session) {
     session->ClientConnected(shared_from_this());
-
-    // Ensure our server session is tracked globally.
-    kernel.RegisterServerSession(session);
 }
 
 void SessionRequestHandler::ClientDisconnected(KServerSession* session) {
diff --git a/src/core/hle/kernel/hle_ipc.h b/src/core/hle/kernel/hle_ipc.h
index 754b41ff6..670cc741c 100644
--- a/src/core/hle/kernel/hle_ipc.h
+++ b/src/core/hle/kernel/hle_ipc.h
@@ -94,6 +94,7 @@ protected:
     std::weak_ptr<ServiceThread> service_thread;
 };
 
+using SessionRequestHandlerWeakPtr = std::weak_ptr<SessionRequestHandler>;
 using SessionRequestHandlerPtr = std::shared_ptr<SessionRequestHandler>;
 
 /**
@@ -139,7 +140,7 @@ public:
         }
     }
 
-    SessionRequestHandlerPtr DomainHandler(std::size_t index) const {
+    SessionRequestHandlerWeakPtr DomainHandler(std::size_t index) const {
         ASSERT_MSG(index < DomainHandlerCount(), "Unexpected handler index {}", index);
         return domain_handlers.at(index);
     }
@@ -328,10 +329,10 @@ public:
 
     template <typename T>
     std::shared_ptr<T> GetDomainHandler(std::size_t index) const {
-        return std::static_pointer_cast<T>(manager->DomainHandler(index));
+        return std::static_pointer_cast<T>(manager.lock()->DomainHandler(index).lock());
     }
 
-    void SetSessionRequestManager(std::shared_ptr<SessionRequestManager> manager_) {
+    void SetSessionRequestManager(std::weak_ptr<SessionRequestManager> manager_) {
         manager = std::move(manager_);
     }
 
@@ -374,7 +375,7 @@ private:
     u32 handles_offset{};
     u32 domain_offset{};
 
-    std::shared_ptr<SessionRequestManager> manager;
+    std::weak_ptr<SessionRequestManager> manager;
 
     KernelCore& kernel;
     Core::Memory::Memory& memory;
diff --git a/src/core/hle/kernel/init/init_slab_setup.cpp b/src/core/hle/kernel/init/init_slab_setup.cpp
index 36fc0944a..b0f773ee0 100644
--- a/src/core/hle/kernel/init/init_slab_setup.cpp
+++ b/src/core/hle/kernel/init/init_slab_setup.cpp
@@ -7,19 +7,23 @@
 #include "common/common_funcs.h"
 #include "common/common_types.h"
 #include "core/core.h"
+#include "core/device_memory.h"
 #include "core/hardware_properties.h"
 #include "core/hle/kernel/init/init_slab_setup.h"
 #include "core/hle/kernel/k_code_memory.h"
 #include "core/hle/kernel/k_event.h"
 #include "core/hle/kernel/k_memory_layout.h"
 #include "core/hle/kernel/k_memory_manager.h"
+#include "core/hle/kernel/k_page_buffer.h"
 #include "core/hle/kernel/k_port.h"
 #include "core/hle/kernel/k_process.h"
 #include "core/hle/kernel/k_resource_limit.h"
 #include "core/hle/kernel/k_session.h"
 #include "core/hle/kernel/k_shared_memory.h"
+#include "core/hle/kernel/k_shared_memory_info.h"
 #include "core/hle/kernel/k_system_control.h"
 #include "core/hle/kernel/k_thread.h"
+#include "core/hle/kernel/k_thread_local_page.h"
 #include "core/hle/kernel/k_transfer_memory.h"
 
 namespace Kernel::Init {
@@ -32,9 +36,13 @@ namespace Kernel::Init {
     HANDLER(KEvent, (SLAB_COUNT(KEvent)), ##__VA_ARGS__)                                           \
     HANDLER(KPort, (SLAB_COUNT(KPort)), ##__VA_ARGS__)                                             \
     HANDLER(KSharedMemory, (SLAB_COUNT(KSharedMemory)), ##__VA_ARGS__)                             \
+    HANDLER(KSharedMemoryInfo, (SLAB_COUNT(KSharedMemory) * 8), ##__VA_ARGS__)                     \
     HANDLER(KTransferMemory, (SLAB_COUNT(KTransferMemory)), ##__VA_ARGS__)                         \
     HANDLER(KCodeMemory, (SLAB_COUNT(KCodeMemory)), ##__VA_ARGS__)                                 \
     HANDLER(KSession, (SLAB_COUNT(KSession)), ##__VA_ARGS__)                                       \
+    HANDLER(KThreadLocalPage,                                                                      \
+            (SLAB_COUNT(KProcess) + (SLAB_COUNT(KProcess) + SLAB_COUNT(KThread)) / 8),             \
+            ##__VA_ARGS__)                                                                         \
     HANDLER(KResourceLimit, (SLAB_COUNT(KResourceLimit)), ##__VA_ARGS__)
 
 namespace {
@@ -50,38 +58,46 @@ enum KSlabType : u32 {
 // Constexpr counts.
 constexpr size_t SlabCountKProcess = 80;
 constexpr size_t SlabCountKThread = 800;
-constexpr size_t SlabCountKEvent = 700;
+constexpr size_t SlabCountKEvent = 900;
 constexpr size_t SlabCountKInterruptEvent = 100;
-constexpr size_t SlabCountKPort = 256 + 0x20; // Extra 0x20 ports over Nintendo for homebrew.
+constexpr size_t SlabCountKPort = 384;
 constexpr size_t SlabCountKSharedMemory = 80;
 constexpr size_t SlabCountKTransferMemory = 200;
 constexpr size_t SlabCountKCodeMemory = 10;
 constexpr size_t SlabCountKDeviceAddressSpace = 300;
-constexpr size_t SlabCountKSession = 933;
+constexpr size_t SlabCountKSession = 1133;
 constexpr size_t SlabCountKLightSession = 100;
 constexpr size_t SlabCountKObjectName = 7;
 constexpr size_t SlabCountKResourceLimit = 5;
 constexpr size_t SlabCountKDebug = Core::Hardware::NUM_CPU_CORES;
-constexpr size_t SlabCountKAlpha = 1;
-constexpr size_t SlabCountKBeta = 6;
+constexpr size_t SlabCountKIoPool = 1;
+constexpr size_t SlabCountKIoRegion = 6;
 
 constexpr size_t SlabCountExtraKThread = 160;
 
+/// Helper function to translate from the slab virtual address to the reserved location in physical
+/// memory.
+static PAddr TranslateSlabAddrToPhysical(KMemoryLayout& memory_layout, VAddr slab_addr) {
+    slab_addr -= memory_layout.GetSlabRegionAddress();
+    return slab_addr + Core::DramMemoryMap::SlabHeapBase;
+}
+
 template <typename T>
 VAddr InitializeSlabHeap(Core::System& system, KMemoryLayout& memory_layout, VAddr address,
                          size_t num_objects) {
-    // TODO(bunnei): This is just a place holder. We should initialize the appropriate KSlabHeap for
-    // kernel object type T with the backing kernel memory pointer once we emulate kernel memory.
 
     const size_t size = Common::AlignUp(sizeof(T) * num_objects, alignof(void*));
     VAddr start = Common::AlignUp(address, alignof(T));
 
-    // This is intentionally empty. Once KSlabHeap is fully implemented, we can replace this with
-    // the pointer to emulated memory to pass along. Until then, KSlabHeap will just allocate/free
-    // host memory.
-    void* backing_kernel_memory{};
+    // This should use the virtual memory address passed in, but currently, we do not setup the
+    // kernel virtual memory layout. Instead, we simply map these at a region of physical memory
+    // that we reserve for the slab heaps.
+    // TODO(bunnei): Fix this once we support the kernel virtual memory layout.
 
     if (size > 0) {
+        void* backing_kernel_memory{
+            system.DeviceMemory().GetPointer(TranslateSlabAddrToPhysical(memory_layout, start))};
+
         const KMemoryRegion* region = memory_layout.FindVirtual(start + size - 1);
         ASSERT(region != nullptr);
         ASSERT(region->IsDerivedFrom(KMemoryRegionType_KernelSlab));
@@ -91,6 +107,12 @@ VAddr InitializeSlabHeap(Core::System& system, KMemoryLayout& memory_layout, VAd
     return start + size;
 }
 
+size_t CalculateSlabHeapGapSize() {
+    constexpr size_t KernelSlabHeapGapSize = 2_MiB - 296_KiB;
+    static_assert(KernelSlabHeapGapSize <= KernelSlabHeapGapsSizeMax);
+    return KernelSlabHeapGapSize;
+}
+
 } // namespace
 
 KSlabResourceCounts KSlabResourceCounts::CreateDefault() {
@@ -109,8 +131,8 @@ KSlabResourceCounts KSlabResourceCounts::CreateDefault() {
         .num_KObjectName = SlabCountKObjectName,
         .num_KResourceLimit = SlabCountKResourceLimit,
         .num_KDebug = SlabCountKDebug,
-        .num_KAlpha = SlabCountKAlpha,
-        .num_KBeta = SlabCountKBeta,
+        .num_KIoPool = SlabCountKIoPool,
+        .num_KIoRegion = SlabCountKIoRegion,
     };
 }
 
@@ -136,11 +158,34 @@ size_t CalculateTotalSlabHeapSize(const KernelCore& kernel) {
 #undef ADD_SLAB_SIZE
 
     // Add the reserved size.
-    size += KernelSlabHeapGapsSize;
+    size += CalculateSlabHeapGapSize();
 
     return size;
 }
 
+void InitializeKPageBufferSlabHeap(Core::System& system) {
+    auto& kernel = system.Kernel();
+
+    const auto& counts = kernel.SlabResourceCounts();
+    const size_t num_pages =
+        counts.num_KProcess + counts.num_KThread + (counts.num_KProcess + counts.num_KThread) / 8;
+    const size_t slab_size = num_pages * PageSize;
+
+    // Reserve memory from the system resource limit.
+    ASSERT(kernel.GetSystemResourceLimit()->Reserve(LimitableResource::PhysicalMemory, slab_size));
+
+    // Allocate memory for the slab.
+    constexpr auto AllocateOption = KMemoryManager::EncodeOption(
+        KMemoryManager::Pool::System, KMemoryManager::Direction::FromFront);
+    const PAddr slab_address =
+        kernel.MemoryManager().AllocateAndOpenContinuous(num_pages, 1, AllocateOption);
+    ASSERT(slab_address != 0);
+
+    // Initialize the slabheap.
+    KPageBuffer::InitializeSlabHeap(kernel, system.DeviceMemory().GetPointer(slab_address),
+                                    slab_size);
+}
+
 void InitializeSlabHeaps(Core::System& system, KMemoryLayout& memory_layout) {
     auto& kernel = system.Kernel();
 
@@ -160,13 +205,13 @@ void InitializeSlabHeaps(Core::System& system, KMemoryLayout& memory_layout) {
     }
 
     // Create an array to represent the gaps between the slabs.
-    const size_t total_gap_size = KernelSlabHeapGapsSize;
+    const size_t total_gap_size = CalculateSlabHeapGapSize();
     std::array<size_t, slab_types.size()> slab_gaps;
-    for (size_t i = 0; i < slab_gaps.size(); i++) {
+    for (auto& slab_gap : slab_gaps) {
         // Note: This is an off-by-one error from Nintendo's intention, because GenerateRandomRange
         // is inclusive. However, Nintendo also has the off-by-one error, and it's "harmless", so we
         // will include it ourselves.
-        slab_gaps[i] = KSystemControl::GenerateRandomRange(0, total_gap_size);
+        slab_gap = KSystemControl::GenerateRandomRange(0, total_gap_size);
     }
 
     // Sort the array, so that we can treat differences between values as offsets to the starts of
@@ -177,13 +222,21 @@ void InitializeSlabHeaps(Core::System& system, KMemoryLayout& memory_layout) {
         }
     }
 
-    for (size_t i = 0; i < slab_types.size(); i++) {
+    // Track the gaps, so that we can free them to the unused slab tree.
+    VAddr gap_start = address;
+    size_t gap_size = 0;
+
+    for (size_t i = 0; i < slab_gaps.size(); i++) {
         // Add the random gap to the address.
-        address += (i == 0) ? slab_gaps[0] : slab_gaps[i] - slab_gaps[i - 1];
+        const auto cur_gap = (i == 0) ? slab_gaps[0] : slab_gaps[i] - slab_gaps[i - 1];
+        address += cur_gap;
+        gap_size += cur_gap;
 
 #define INITIALIZE_SLAB_HEAP(NAME, COUNT, ...)                                                     \
     case KSlabType_##NAME:                                                                         \
-        address = InitializeSlabHeap<NAME>(system, memory_layout, address, COUNT);                 \
+        if (COUNT > 0) {                                                                           \
+            address = InitializeSlabHeap<NAME>(system, memory_layout, address, COUNT);             \
+        }                                                                                          \
         break;
 
         // Initialize the slabheap.
@@ -192,7 +245,13 @@ void InitializeSlabHeaps(Core::System& system, KMemoryLayout& memory_layout) {
             FOREACH_SLAB_TYPE(INITIALIZE_SLAB_HEAP)
             // If we somehow get an invalid type, abort.
         default:
-            UNREACHABLE();
+            UNREACHABLE_MSG("Unknown slab type: {}", slab_types[i]);
+        }
+
+        // If we've hit the end of a gap, free it.
+        if (gap_start + gap_size != address) {
+            gap_start = address;
+            gap_size = 0;
         }
     }
 }
diff --git a/src/core/hle/kernel/init/init_slab_setup.h b/src/core/hle/kernel/init/init_slab_setup.h
index a8f7e0918..f54b67d02 100644
--- a/src/core/hle/kernel/init/init_slab_setup.h
+++ b/src/core/hle/kernel/init/init_slab_setup.h
@@ -32,12 +32,13 @@ struct KSlabResourceCounts {
     size_t num_KObjectName;
     size_t num_KResourceLimit;
     size_t num_KDebug;
-    size_t num_KAlpha;
-    size_t num_KBeta;
+    size_t num_KIoPool;
+    size_t num_KIoRegion;
 };
 
 void InitializeSlabResourceCounts(KernelCore& kernel);
 size_t CalculateTotalSlabHeapSize(const KernelCore& kernel);
+void InitializeKPageBufferSlabHeap(Core::System& system);
 void InitializeSlabHeaps(Core::System& system, KMemoryLayout& memory_layout);
 
 } // namespace Kernel::Init
diff --git a/src/core/hle/kernel/initial_process.h b/src/core/hle/kernel/initial_process.h
new file mode 100644
index 000000000..25b27909c
--- /dev/null
+++ b/src/core/hle/kernel/initial_process.h
@@ -0,0 +1,23 @@
+// Copyright 2022 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "common/common_types.h"
+#include "common/literals.h"
+#include "core/hle/kernel/board/nintendo/nx/k_memory_layout.h"
+#include "core/hle/kernel/board/nintendo/nx/k_system_control.h"
+
+namespace Kernel {
+
+using namespace Common::Literals;
+
+constexpr std::size_t InitialProcessBinarySizeMax = 12_MiB;
+
+static inline PAddr GetInitialProcessBinaryPhysicalAddress() {
+    return Kernel::Board::Nintendo::Nx::KSystemControl::Init::GetKernelPhysicalBaseAddress(
+        MainMemoryAddress);
+}
+
+} // namespace Kernel
diff --git a/src/core/hle/kernel/k_address_arbiter.cpp b/src/core/hle/kernel/k_address_arbiter.cpp
index 783c69858..8cdd0490f 100644
--- a/src/core/hle/kernel/k_address_arbiter.cpp
+++ b/src/core/hle/kernel/k_address_arbiter.cpp
@@ -49,7 +49,7 @@ bool DecrementIfLessThan(Core::System& system, s32* out, VAddr address, s32 valu
         }
     } else {
         // Otherwise, clear our exclusive hold and finish
-        monitor.ClearExclusive();
+        monitor.ClearExclusive(current_core);
     }
 
     // We're done.
@@ -78,7 +78,7 @@ bool UpdateIfEqual(Core::System& system, s32* out, VAddr address, s32 value, s32
         }
     } else {
         // Otherwise, clear our exclusive hold and finish.
-        monitor.ClearExclusive();
+        monitor.ClearExclusive(current_core);
     }
 
     // We're done.
@@ -115,7 +115,7 @@ ResultCode KAddressArbiter::Signal(VAddr addr, s32 count) {
     {
         KScopedSchedulerLock sl(kernel);
 
-        auto it = thread_tree.nfind_light({addr, -1});
+        auto it = thread_tree.nfind_key({addr, -1});
         while ((it != thread_tree.end()) && (count <= 0 || num_waiters < count) &&
                (it->GetAddressArbiterKey() == addr)) {
             // End the thread's wait.
@@ -148,7 +148,7 @@ ResultCode KAddressArbiter::SignalAndIncrementIfEqual(VAddr addr, s32 value, s32
             return ResultInvalidState;
         }
 
-        auto it = thread_tree.nfind_light({addr, -1});
+        auto it = thread_tree.nfind_key({addr, -1});
         while ((it != thread_tree.end()) && (count <= 0 || num_waiters < count) &&
                (it->GetAddressArbiterKey() == addr)) {
             // End the thread's wait.
@@ -171,7 +171,7 @@ ResultCode KAddressArbiter::SignalAndModifyByWaitingCountIfEqual(VAddr addr, s32
     {
         [[maybe_unused]] const KScopedSchedulerLock sl(kernel);
 
-        auto it = thread_tree.nfind_light({addr, -1});
+        auto it = thread_tree.nfind_key({addr, -1});
         // Determine the updated value.
         s32 new_value{};
         if (count <= 0) {
diff --git a/src/core/hle/kernel/k_condition_variable.cpp b/src/core/hle/kernel/k_condition_variable.cpp
index aadcc297a..8e2a9593c 100644
--- a/src/core/hle/kernel/k_condition_variable.cpp
+++ b/src/core/hle/kernel/k_condition_variable.cpp
@@ -244,7 +244,7 @@ void KConditionVariable::Signal(u64 cv_key, s32 count) {
     {
         KScopedSchedulerLock sl(kernel);
 
-        auto it = thread_tree.nfind_light({cv_key, -1});
+        auto it = thread_tree.nfind_key({cv_key, -1});
         while ((it != thread_tree.end()) && (count <= 0 || num_waiters < count) &&
                (it->GetConditionVariableKey() == cv_key)) {
             KThread* target_thread = std::addressof(*it);
diff --git a/src/core/hle/kernel/k_memory_layout.h b/src/core/hle/kernel/k_memory_layout.h
index 57ff538cc..0858827b6 100644
--- a/src/core/hle/kernel/k_memory_layout.h
+++ b/src/core/hle/kernel/k_memory_layout.h
@@ -57,11 +57,11 @@ constexpr std::size_t KernelPageTableHeapSize = GetMaximumOverheadSize(MainMemor
 constexpr std::size_t KernelInitialPageHeapSize = 128_KiB;
 
 constexpr std::size_t KernelSlabHeapDataSize = 5_MiB;
-constexpr std::size_t KernelSlabHeapGapsSize = 2_MiB - 64_KiB;
-constexpr std::size_t KernelSlabHeapSize = KernelSlabHeapDataSize + KernelSlabHeapGapsSize;
+constexpr std::size_t KernelSlabHeapGapsSizeMax = 2_MiB - 64_KiB;
+constexpr std::size_t KernelSlabHeapSize = KernelSlabHeapDataSize + KernelSlabHeapGapsSizeMax;
 
 // NOTE: This is calculated from KThread slab counts, assuming KThread size <= 0x860.
-constexpr std::size_t KernelSlabHeapAdditionalSize = 416_KiB;
+constexpr std::size_t KernelSlabHeapAdditionalSize = 0x68000;
 
 constexpr std::size_t KernelResourceSize =
     KernelPageTableHeapSize + KernelInitialPageHeapSize + KernelSlabHeapSize;
@@ -173,6 +173,10 @@ public:
         return Dereference(FindVirtualLinear(address));
     }
 
+    const KMemoryRegion& GetPhysicalLinearRegion(PAddr address) const {
+        return Dereference(FindPhysicalLinear(address));
+    }
+
     const KMemoryRegion* GetPhysicalKernelTraceBufferRegion() const {
         return GetPhysicalMemoryRegionTree().FindFirstDerived(KMemoryRegionType_KernelTraceBuffer);
     }
diff --git a/src/core/hle/kernel/k_memory_manager.cpp b/src/core/hle/kernel/k_memory_manager.cpp
index 1b44541b1..a2f18f643 100644
--- a/src/core/hle/kernel/k_memory_manager.cpp
+++ b/src/core/hle/kernel/k_memory_manager.cpp
@@ -10,189 +10,412 @@
 #include "common/scope_exit.h"
 #include "core/core.h"
 #include "core/device_memory.h"
+#include "core/hle/kernel/initial_process.h"
 #include "core/hle/kernel/k_memory_manager.h"
 #include "core/hle/kernel/k_page_linked_list.h"
+#include "core/hle/kernel/kernel.h"
 #include "core/hle/kernel/svc_results.h"
+#include "core/memory.h"
 
 namespace Kernel {
 
-KMemoryManager::KMemoryManager(Core::System& system_) : system{system_} {}
+namespace {
+
+constexpr KMemoryManager::Pool GetPoolFromMemoryRegionType(u32 type) {
+    if ((type | KMemoryRegionType_DramApplicationPool) == type) {
+        return KMemoryManager::Pool::Application;
+    } else if ((type | KMemoryRegionType_DramAppletPool) == type) {
+        return KMemoryManager::Pool::Applet;
+    } else if ((type | KMemoryRegionType_DramSystemPool) == type) {
+        return KMemoryManager::Pool::System;
+    } else if ((type | KMemoryRegionType_DramSystemNonSecurePool) == type) {
+        return KMemoryManager::Pool::SystemNonSecure;
+    } else {
+        UNREACHABLE_MSG("InvalidMemoryRegionType for conversion to Pool");
+        return {};
+    }
+}
 
-std::size_t KMemoryManager::Impl::Initialize(Pool new_pool, u64 start_address, u64 end_address) {
-    const auto size{end_address - start_address};
+} // namespace
+
+KMemoryManager::KMemoryManager(Core::System& system_)
+    : system{system_}, pool_locks{
+                           KLightLock{system_.Kernel()},
+                           KLightLock{system_.Kernel()},
+                           KLightLock{system_.Kernel()},
+                           KLightLock{system_.Kernel()},
+                       } {}
+
+void KMemoryManager::Initialize(VAddr management_region, size_t management_region_size) {
+
+    // Clear the management region to zero.
+    const VAddr management_region_end = management_region + management_region_size;
+
+    // Reset our manager count.
+    num_managers = 0;
+
+    // Traverse the virtual memory layout tree, initializing each manager as appropriate.
+    while (num_managers != MaxManagerCount) {
+        // Locate the region that should initialize the current manager.
+        PAddr region_address = 0;
+        size_t region_size = 0;
+        Pool region_pool = Pool::Count;
+        for (const auto& it : system.Kernel().MemoryLayout().GetPhysicalMemoryRegionTree()) {
+            // We only care about regions that we need to create managers for.
+            if (!it.IsDerivedFrom(KMemoryRegionType_DramUserPool)) {
+                continue;
+            }
 
-    // Calculate metadata sizes
-    const auto ref_count_size{(size / PageSize) * sizeof(u16)};
-    const auto optimize_map_size{(Common::AlignUp((size / PageSize), 64) / 64) * sizeof(u64)};
-    const auto manager_size{Common::AlignUp(optimize_map_size + ref_count_size, PageSize)};
-    const auto page_heap_size{KPageHeap::CalculateManagementOverheadSize(size)};
-    const auto total_metadata_size{manager_size + page_heap_size};
-    ASSERT(manager_size <= total_metadata_size);
-    ASSERT(Common::IsAligned(total_metadata_size, PageSize));
+            // We want to initialize the managers in order.
+            if (it.GetAttributes() != num_managers) {
+                continue;
+            }
 
-    // Setup region
-    pool = new_pool;
+            const PAddr cur_start = it.GetAddress();
+            const PAddr cur_end = it.GetEndAddress();
+
+            // Validate the region.
+            ASSERT(cur_end != 0);
+            ASSERT(cur_start != 0);
+            ASSERT(it.GetSize() > 0);
+
+            // Update the region's extents.
+            if (region_address == 0) {
+                region_address = cur_start;
+                region_size = it.GetSize();
+                region_pool = GetPoolFromMemoryRegionType(it.GetType());
+            } else {
+                ASSERT(cur_start == region_address + region_size);
+
+                // Update the size.
+                region_size = cur_end - region_address;
+                ASSERT(GetPoolFromMemoryRegionType(it.GetType()) == region_pool);
+            }
+        }
+
+        // If we didn't find a region, we're done.
+        if (region_size == 0) {
+            break;
+        }
 
-    // Initialize the manager's KPageHeap
-    heap.Initialize(start_address, size, page_heap_size);
+        // Initialize a new manager for the region.
+        Impl* manager = std::addressof(managers[num_managers++]);
+        ASSERT(num_managers <= managers.size());
+
+        const size_t cur_size = manager->Initialize(region_address, region_size, management_region,
+                                                    management_region_end, region_pool);
+        management_region += cur_size;
+        ASSERT(management_region <= management_region_end);
+
+        // Insert the manager into the pool list.
+        const auto region_pool_index = static_cast<u32>(region_pool);
+        if (pool_managers_tail[region_pool_index] == nullptr) {
+            pool_managers_head[region_pool_index] = manager;
+        } else {
+            pool_managers_tail[region_pool_index]->SetNext(manager);
+            manager->SetPrev(pool_managers_tail[region_pool_index]);
+        }
+        pool_managers_tail[region_pool_index] = manager;
+    }
 
-    // Free the memory to the heap
-    heap.Free(start_address, size / PageSize);
+    // Free each region to its corresponding heap.
+    size_t reserved_sizes[MaxManagerCount] = {};
+    const PAddr ini_start = GetInitialProcessBinaryPhysicalAddress();
+    const PAddr ini_end = ini_start + InitialProcessBinarySizeMax;
+    const PAddr ini_last = ini_end - 1;
+    for (const auto& it : system.Kernel().MemoryLayout().GetPhysicalMemoryRegionTree()) {
+        if (it.IsDerivedFrom(KMemoryRegionType_DramUserPool)) {
+            // Get the manager for the region.
+            auto index = it.GetAttributes();
+            auto& manager = managers[index];
+
+            const PAddr cur_start = it.GetAddress();
+            const PAddr cur_last = it.GetLastAddress();
+            const PAddr cur_end = it.GetEndAddress();
+
+            if (cur_start <= ini_start && ini_last <= cur_last) {
+                // Free memory before the ini to the heap.
+                if (cur_start != ini_start) {
+                    manager.Free(cur_start, (ini_start - cur_start) / PageSize);
+                }
 
-    // Update the heap's used size
-    heap.UpdateUsedSize();
+                // Open/reserve the ini memory.
+                manager.OpenFirst(ini_start, InitialProcessBinarySizeMax / PageSize);
+                reserved_sizes[it.GetAttributes()] += InitialProcessBinarySizeMax;
 
-    return total_metadata_size;
-}
+                // Free memory after the ini to the heap.
+                if (ini_last != cur_last) {
+                    ASSERT(cur_end != 0);
+                    manager.Free(ini_end, cur_end - ini_end);
+                }
+            } else {
+                // Ensure there's no partial overlap with the ini image.
+                if (cur_start <= ini_last) {
+                    ASSERT(cur_last < ini_start);
+                } else {
+                    // Otherwise, check the region for general validity.
+                    ASSERT(cur_end != 0);
+                }
 
-void KMemoryManager::InitializeManager(Pool pool, u64 start_address, u64 end_address) {
-    ASSERT(pool < Pool::Count);
-    managers[static_cast<std::size_t>(pool)].Initialize(pool, start_address, end_address);
+                // Free the memory to the heap.
+                manager.Free(cur_start, it.GetSize() / PageSize);
+            }
+        }
+    }
+
+    // Update the used size for all managers.
+    for (size_t i = 0; i < num_managers; ++i) {
+        managers[i].SetInitialUsedHeapSize(reserved_sizes[i]);
+    }
 }
 
-VAddr KMemoryManager::AllocateAndOpenContinuous(std::size_t num_pages, std::size_t align_pages,
-                                                u32 option) {
-    // Early return if we're allocating no pages
+PAddr KMemoryManager::AllocateAndOpenContinuous(size_t num_pages, size_t align_pages, u32 option) {
+    // Early return if we're allocating no pages.
     if (num_pages == 0) {
-        return {};
+        return 0;
     }
 
-    // Lock the pool that we're allocating from
+    // Lock the pool that we're allocating from.
     const auto [pool, dir] = DecodeOption(option);
-    const auto pool_index{static_cast<std::size_t>(pool)};
-    std::lock_guard lock{pool_locks[pool_index]};
-
-    // Choose a heap based on our page size request
-    const s32 heap_index{KPageHeap::GetAlignedBlockIndex(num_pages, align_pages)};
-
-    // Loop, trying to iterate from each block
-    // TODO (bunnei): Support multiple managers
-    Impl& chosen_manager{managers[pool_index]};
-    VAddr allocated_block{chosen_manager.AllocateBlock(heap_index, false)};
+    KScopedLightLock lk(pool_locks[static_cast<std::size_t>(pool)]);
+
+    // Choose a heap based on our page size request.
+    const s32 heap_index = KPageHeap::GetAlignedBlockIndex(num_pages, align_pages);
+
+    // Loop, trying to iterate from each block.
+    Impl* chosen_manager = nullptr;
+    PAddr allocated_block = 0;
+    for (chosen_manager = this->GetFirstManager(pool, dir); chosen_manager != nullptr;
+         chosen_manager = this->GetNextManager(chosen_manager, dir)) {
+        allocated_block = chosen_manager->AllocateBlock(heap_index, true);
+        if (allocated_block != 0) {
+            break;
+        }
+    }
 
-    // If we failed to allocate, quit now
-    if (!allocated_block) {
-        return {};
+    // If we failed to allocate, quit now.
+    if (allocated_block == 0) {
+        return 0;
     }
 
-    // If we allocated more than we need, free some
-    const auto allocated_pages{KPageHeap::GetBlockNumPages(heap_index)};
+    // If we allocated more than we need, free some.
+    const size_t allocated_pages = KPageHeap::GetBlockNumPages(heap_index);
     if (allocated_pages > num_pages) {
-        chosen_manager.Free(allocated_block + num_pages * PageSize, allocated_pages - num_pages);
+        chosen_manager->Free(allocated_block + num_pages * PageSize, allocated_pages - num_pages);
     }
 
+    // Open the first reference to the pages.
+    chosen_manager->OpenFirst(allocated_block, num_pages);
+
     return allocated_block;
 }
 
-ResultCode KMemoryManager::Allocate(KPageLinkedList& page_list, std::size_t num_pages, Pool pool,
-                                    Direction dir, u32 heap_fill_value) {
-    ASSERT(page_list.GetNumPages() == 0);
+ResultCode KMemoryManager::AllocatePageGroupImpl(KPageLinkedList* out, size_t num_pages, Pool pool,
+                                                 Direction dir, bool random) {
+    // Choose a heap based on our page size request.
+    const s32 heap_index = KPageHeap::GetBlockIndex(num_pages);
+    R_UNLESS(0 <= heap_index, ResultOutOfMemory);
+
+    // Ensure that we don't leave anything un-freed.
+    auto group_guard = SCOPE_GUARD({
+        for (const auto& it : out->Nodes()) {
+            auto& manager = this->GetManager(system.Kernel().MemoryLayout(), it.GetAddress());
+            const size_t num_pages_to_free =
+                std::min(it.GetNumPages(), (manager.GetEndAddress() - it.GetAddress()) / PageSize);
+            manager.Free(it.GetAddress(), num_pages_to_free);
+        }
+    });
 
-    // Early return if we're allocating no pages
-    if (num_pages == 0) {
-        return ResultSuccess;
-    }
+    // Keep allocating until we've allocated all our pages.
+    for (s32 index = heap_index; index >= 0 && num_pages > 0; index--) {
+        const size_t pages_per_alloc = KPageHeap::GetBlockNumPages(index);
+        for (Impl* cur_manager = this->GetFirstManager(pool, dir); cur_manager != nullptr;
+             cur_manager = this->GetNextManager(cur_manager, dir)) {
+            while (num_pages >= pages_per_alloc) {
+                // Allocate a block.
+                PAddr allocated_block = cur_manager->AllocateBlock(index, random);
+                if (allocated_block == 0) {
+                    break;
+                }
 
-    // Lock the pool that we're allocating from
-    const auto pool_index{static_cast<std::size_t>(pool)};
-    std::lock_guard lock{pool_locks[pool_index]};
+                // Safely add it to our group.
+                {
+                    auto block_guard =
+                        SCOPE_GUARD({ cur_manager->Free(allocated_block, pages_per_alloc); });
+                    R_TRY(out->AddBlock(allocated_block, pages_per_alloc));
+                    block_guard.Cancel();
+                }
 
-    // Choose a heap based on our page size request
-    const s32 heap_index{KPageHeap::GetBlockIndex(num_pages)};
-    if (heap_index < 0) {
-        return ResultOutOfMemory;
+                num_pages -= pages_per_alloc;
+            }
+        }
     }
 
-    // TODO (bunnei): Support multiple managers
-    Impl& chosen_manager{managers[pool_index]};
+    // Only succeed if we allocated as many pages as we wanted.
+    R_UNLESS(num_pages == 0, ResultOutOfMemory);
 
-    // Ensure that we don't leave anything un-freed
-    auto group_guard = detail::ScopeExit([&] {
-        for (const auto& it : page_list.Nodes()) {
-            const auto min_num_pages{std::min<size_t>(
-                it.GetNumPages(), (chosen_manager.GetEndAddress() - it.GetAddress()) / PageSize)};
-            chosen_manager.Free(it.GetAddress(), min_num_pages);
-        }
-    });
+    // We succeeded!
+    group_guard.Cancel();
+    return ResultSuccess;
+}
 
-    // Keep allocating until we've allocated all our pages
-    for (s32 index{heap_index}; index >= 0 && num_pages > 0; index--) {
-        const auto pages_per_alloc{KPageHeap::GetBlockNumPages(index)};
+ResultCode KMemoryManager::AllocateAndOpen(KPageLinkedList* out, size_t num_pages, u32 option) {
+    ASSERT(out != nullptr);
+    ASSERT(out->GetNumPages() == 0);
 
-        while (num_pages >= pages_per_alloc) {
-            // Allocate a block
-            VAddr allocated_block{chosen_manager.AllocateBlock(index, false)};
-            if (!allocated_block) {
-                break;
-            }
+    // Early return if we're allocating no pages.
+    R_SUCCEED_IF(num_pages == 0);
 
-            // Safely add it to our group
-            {
-                auto block_guard = detail::ScopeExit(
-                    [&] { chosen_manager.Free(allocated_block, pages_per_alloc); });
+    // Lock the pool that we're allocating from.
+    const auto [pool, dir] = DecodeOption(option);
+    KScopedLightLock lk(pool_locks[static_cast<size_t>(pool)]);
+
+    // Allocate the page group.
+    R_TRY(this->AllocatePageGroupImpl(out, num_pages, pool, dir, false));
+
+    // Open the first reference to the pages.
+    for (const auto& block : out->Nodes()) {
+        PAddr cur_address = block.GetAddress();
+        size_t remaining_pages = block.GetNumPages();
+        while (remaining_pages > 0) {
+            // Get the manager for the current address.
+            auto& manager = this->GetManager(system.Kernel().MemoryLayout(), cur_address);
+
+            // Process part or all of the block.
+            const size_t cur_pages =
+                std::min(remaining_pages, manager.GetPageOffsetToEnd(cur_address));
+            manager.OpenFirst(cur_address, cur_pages);
+
+            // Advance.
+            cur_address += cur_pages * PageSize;
+            remaining_pages -= cur_pages;
+        }
+    }
 
-                if (const ResultCode result{page_list.AddBlock(allocated_block, pages_per_alloc)};
-                    result.IsError()) {
-                    return result;
-                }
+    return ResultSuccess;
+}
 
-                block_guard.Cancel();
-            }
+ResultCode KMemoryManager::AllocateAndOpenForProcess(KPageLinkedList* out, size_t num_pages,
+                                                     u32 option, u64 process_id, u8 fill_pattern) {
+    ASSERT(out != nullptr);
+    ASSERT(out->GetNumPages() == 0);
 
-            num_pages -= pages_per_alloc;
-        }
-    }
+    // Decode the option.
+    const auto [pool, dir] = DecodeOption(option);
 
-    // Clear allocated memory.
-    for (const auto& it : page_list.Nodes()) {
-        std::memset(system.DeviceMemory().GetPointer(it.GetAddress()), heap_fill_value,
-                    it.GetSize());
+    // Allocate the memory.
+    {
+        // Lock the pool that we're allocating from.
+        KScopedLightLock lk(pool_locks[static_cast<size_t>(pool)]);
+
+        // Allocate the page group.
+        R_TRY(this->AllocatePageGroupImpl(out, num_pages, pool, dir, false));
+
+        // Open the first reference to the pages.
+        for (const auto& block : out->Nodes()) {
+            PAddr cur_address = block.GetAddress();
+            size_t remaining_pages = block.GetNumPages();
+            while (remaining_pages > 0) {
+                // Get the manager for the current address.
+                auto& manager = this->GetManager(system.Kernel().MemoryLayout(), cur_address);
+
+                // Process part or all of the block.
+                const size_t cur_pages =
+                    std::min(remaining_pages, manager.GetPageOffsetToEnd(cur_address));
+                manager.OpenFirst(cur_address, cur_pages);
+
+                // Advance.
+                cur_address += cur_pages * PageSize;
+                remaining_pages -= cur_pages;
+            }
+        }
     }
 
-    // Only succeed if we allocated as many pages as we wanted
-    if (num_pages) {
-        return ResultOutOfMemory;
+    // Set all the allocated memory.
+    for (const auto& block : out->Nodes()) {
+        std::memset(system.DeviceMemory().GetPointer(block.GetAddress()), fill_pattern,
+                    block.GetSize());
     }
 
-    // We succeeded!
-    group_guard.Cancel();
-
     return ResultSuccess;
 }
 
-ResultCode KMemoryManager::Free(KPageLinkedList& page_list, std::size_t num_pages, Pool pool,
-                                Direction dir, u32 heap_fill_value) {
-    // Early return if we're freeing no pages
-    if (!num_pages) {
-        return ResultSuccess;
+void KMemoryManager::Open(PAddr address, size_t num_pages) {
+    // Repeatedly open references until we've done so for all pages.
+    while (num_pages) {
+        auto& manager = this->GetManager(system.Kernel().MemoryLayout(), address);
+        const size_t cur_pages = std::min(num_pages, manager.GetPageOffsetToEnd(address));
+
+        {
+            KScopedLightLock lk(pool_locks[static_cast<size_t>(manager.GetPool())]);
+            manager.Open(address, cur_pages);
+        }
+
+        num_pages -= cur_pages;
+        address += cur_pages * PageSize;
     }
+}
 
-    // Lock the pool that we're freeing from
-    const auto pool_index{static_cast<std::size_t>(pool)};
-    std::lock_guard lock{pool_locks[pool_index]};
+void KMemoryManager::Close(PAddr address, size_t num_pages) {
+    // Repeatedly close references until we've done so for all pages.
+    while (num_pages) {
+        auto& manager = this->GetManager(system.Kernel().MemoryLayout(), address);
+        const size_t cur_pages = std::min(num_pages, manager.GetPageOffsetToEnd(address));
 
-    // TODO (bunnei): Support multiple managers
-    Impl& chosen_manager{managers[pool_index]};
+        {
+            KScopedLightLock lk(pool_locks[static_cast<size_t>(manager.GetPool())]);
+            manager.Close(address, cur_pages);
+        }
 
-    // Free all of the pages
-    for (const auto& it : page_list.Nodes()) {
-        const auto min_num_pages{std::min<size_t>(
-            it.GetNumPages(), (chosen_manager.GetEndAddress() - it.GetAddress()) / PageSize)};
-        chosen_manager.Free(it.GetAddress(), min_num_pages);
+        num_pages -= cur_pages;
+        address += cur_pages * PageSize;
     }
+}
 
-    return ResultSuccess;
+void KMemoryManager::Close(const KPageLinkedList& pg) {
+    for (const auto& node : pg.Nodes()) {
+        Close(node.GetAddress(), node.GetNumPages());
+    }
+}
+void KMemoryManager::Open(const KPageLinkedList& pg) {
+    for (const auto& node : pg.Nodes()) {
+        Open(node.GetAddress(), node.GetNumPages());
+    }
+}
+
+size_t KMemoryManager::Impl::Initialize(PAddr address, size_t size, VAddr management,
+                                        VAddr management_end, Pool p) {
+    // Calculate management sizes.
+    const size_t ref_count_size = (size / PageSize) * sizeof(u16);
+    const size_t optimize_map_size = CalculateOptimizedProcessOverheadSize(size);
+    const size_t manager_size = Common::AlignUp(optimize_map_size + ref_count_size, PageSize);
+    const size_t page_heap_size = KPageHeap::CalculateManagementOverheadSize(size);
+    const size_t total_management_size = manager_size + page_heap_size;
+    ASSERT(manager_size <= total_management_size);
+    ASSERT(management + total_management_size <= management_end);
+    ASSERT(Common::IsAligned(total_management_size, PageSize));
+
+    // Setup region.
+    pool = p;
+    management_region = management;
+    page_reference_counts.resize(
+        Kernel::Board::Nintendo::Nx::KSystemControl::Init::GetIntendedMemorySize() / PageSize);
+    ASSERT(Common::IsAligned(management_region, PageSize));
+
+    // Initialize the manager's KPageHeap.
+    heap.Initialize(address, size, management + manager_size, page_heap_size);
+
+    return total_management_size;
 }
 
-std::size_t KMemoryManager::Impl::CalculateManagementOverheadSize(std::size_t region_size) {
-    const std::size_t ref_count_size = (region_size / PageSize) * sizeof(u16);
-    const std::size_t optimize_map_size =
+size_t KMemoryManager::Impl::CalculateManagementOverheadSize(size_t region_size) {
+    const size_t ref_count_size = (region_size / PageSize) * sizeof(u16);
+    const size_t optimize_map_size =
         (Common::AlignUp((region_size / PageSize), Common::BitSize<u64>()) /
          Common::BitSize<u64>()) *
         sizeof(u64);
-    const std::size_t manager_meta_size =
-        Common::AlignUp(optimize_map_size + ref_count_size, PageSize);
-    const std::size_t page_heap_size = KPageHeap::CalculateManagementOverheadSize(region_size);
+    const size_t manager_meta_size = Common::AlignUp(optimize_map_size + ref_count_size, PageSize);
+    const size_t page_heap_size = KPageHeap::CalculateManagementOverheadSize(region_size);
     return manager_meta_size + page_heap_size;
 }
 
diff --git a/src/core/hle/kernel/k_memory_manager.h b/src/core/hle/kernel/k_memory_manager.h
index 17c7690f1..18775b262 100644
--- a/src/core/hle/kernel/k_memory_manager.h
+++ b/src/core/hle/kernel/k_memory_manager.h
@@ -5,11 +5,12 @@
 #pragma once
 
 #include <array>
-#include <mutex>
 #include <tuple>
 
 #include "common/common_funcs.h"
 #include "common/common_types.h"
+#include "core/hle/kernel/k_light_lock.h"
+#include "core/hle/kernel/k_memory_layout.h"
 #include "core/hle/kernel/k_page_heap.h"
 #include "core/hle/result.h"
 
@@ -52,22 +53,33 @@ public:
 
     explicit KMemoryManager(Core::System& system_);
 
-    constexpr std::size_t GetSize(Pool pool) const {
-        return managers[static_cast<std::size_t>(pool)].GetSize();
+    void Initialize(VAddr management_region, size_t management_region_size);
+
+    constexpr size_t GetSize(Pool pool) const {
+        constexpr Direction GetSizeDirection = Direction::FromFront;
+        size_t total = 0;
+        for (auto* manager = this->GetFirstManager(pool, GetSizeDirection); manager != nullptr;
+             manager = this->GetNextManager(manager, GetSizeDirection)) {
+            total += manager->GetSize();
+        }
+        return total;
     }
 
-    void InitializeManager(Pool pool, u64 start_address, u64 end_address);
+    PAddr AllocateAndOpenContinuous(size_t num_pages, size_t align_pages, u32 option);
+    ResultCode AllocateAndOpen(KPageLinkedList* out, size_t num_pages, u32 option);
+    ResultCode AllocateAndOpenForProcess(KPageLinkedList* out, size_t num_pages, u32 option,
+                                         u64 process_id, u8 fill_pattern);
+
+    static constexpr size_t MaxManagerCount = 10;
 
-    VAddr AllocateAndOpenContinuous(size_t num_pages, size_t align_pages, u32 option);
-    ResultCode Allocate(KPageLinkedList& page_list, std::size_t num_pages, Pool pool, Direction dir,
-                        u32 heap_fill_value = 0);
-    ResultCode Free(KPageLinkedList& page_list, std::size_t num_pages, Pool pool, Direction dir,
-                    u32 heap_fill_value = 0);
+    void Close(PAddr address, size_t num_pages);
+    void Close(const KPageLinkedList& pg);
 
-    static constexpr std::size_t MaxManagerCount = 10;
+    void Open(PAddr address, size_t num_pages);
+    void Open(const KPageLinkedList& pg);
 
 public:
-    static std::size_t CalculateManagementOverheadSize(std::size_t region_size) {
+    static size_t CalculateManagementOverheadSize(size_t region_size) {
         return Impl::CalculateManagementOverheadSize(region_size);
     }
 
@@ -100,17 +112,26 @@ private:
         Impl() = default;
         ~Impl() = default;
 
-        std::size_t Initialize(Pool new_pool, u64 start_address, u64 end_address);
+        size_t Initialize(PAddr address, size_t size, VAddr management, VAddr management_end,
+                          Pool p);
 
         VAddr AllocateBlock(s32 index, bool random) {
             return heap.AllocateBlock(index, random);
         }
 
-        void Free(VAddr addr, std::size_t num_pages) {
+        void Free(VAddr addr, size_t num_pages) {
             heap.Free(addr, num_pages);
         }
 
-        constexpr std::size_t GetSize() const {
+        void SetInitialUsedHeapSize(size_t reserved_size) {
+            heap.SetInitialUsedSize(reserved_size);
+        }
+
+        constexpr Pool GetPool() const {
+            return pool;
+        }
+
+        constexpr size_t GetSize() const {
             return heap.GetSize();
         }
 
@@ -122,10 +143,88 @@ private:
             return heap.GetEndAddress();
         }
 
-        static std::size_t CalculateManagementOverheadSize(std::size_t region_size);
+        constexpr size_t GetPageOffset(PAddr address) const {
+            return heap.GetPageOffset(address);
+        }
+
+        constexpr size_t GetPageOffsetToEnd(PAddr address) const {
+            return heap.GetPageOffsetToEnd(address);
+        }
+
+        constexpr void SetNext(Impl* n) {
+            next = n;
+        }
+
+        constexpr void SetPrev(Impl* n) {
+            prev = n;
+        }
+
+        constexpr Impl* GetNext() const {
+            return next;
+        }
+
+        constexpr Impl* GetPrev() const {
+            return prev;
+        }
+
+        void OpenFirst(PAddr address, size_t num_pages) {
+            size_t index = this->GetPageOffset(address);
+            const size_t end = index + num_pages;
+            while (index < end) {
+                const RefCount ref_count = (++page_reference_counts[index]);
+                ASSERT(ref_count == 1);
 
-        static constexpr std::size_t CalculateOptimizedProcessOverheadSize(
-            std::size_t region_size) {
+                index++;
+            }
+        }
+
+        void Open(PAddr address, size_t num_pages) {
+            size_t index = this->GetPageOffset(address);
+            const size_t end = index + num_pages;
+            while (index < end) {
+                const RefCount ref_count = (++page_reference_counts[index]);
+                ASSERT(ref_count > 1);
+
+                index++;
+            }
+        }
+
+        void Close(PAddr address, size_t num_pages) {
+            size_t index = this->GetPageOffset(address);
+            const size_t end = index + num_pages;
+
+            size_t free_start = 0;
+            size_t free_count = 0;
+            while (index < end) {
+                ASSERT(page_reference_counts[index] > 0);
+                const RefCount ref_count = (--page_reference_counts[index]);
+
+                // Keep track of how many zero refcounts we see in a row, to minimize calls to free.
+                if (ref_count == 0) {
+                    if (free_count > 0) {
+                        free_count++;
+                    } else {
+                        free_start = index;
+                        free_count = 1;
+                    }
+                } else {
+                    if (free_count > 0) {
+                        this->Free(heap.GetAddress() + free_start * PageSize, free_count);
+                        free_count = 0;
+                    }
+                }
+
+                index++;
+            }
+
+            if (free_count > 0) {
+                this->Free(heap.GetAddress() + free_start * PageSize, free_count);
+            }
+        }
+
+        static size_t CalculateManagementOverheadSize(size_t region_size);
+
+        static constexpr size_t CalculateOptimizedProcessOverheadSize(size_t region_size) {
             return (Common::AlignUp((region_size / PageSize), Common::BitSize<u64>()) /
                     Common::BitSize<u64>()) *
                    sizeof(u64);
@@ -135,13 +234,45 @@ private:
         using RefCount = u16;
 
         KPageHeap heap;
+        std::vector<RefCount> page_reference_counts;
+        VAddr management_region{};
         Pool pool{};
+        Impl* next{};
+        Impl* prev{};
     };
 
 private:
+    Impl& GetManager(const KMemoryLayout& memory_layout, PAddr address) {
+        return managers[memory_layout.GetPhysicalLinearRegion(address).GetAttributes()];
+    }
+
+    const Impl& GetManager(const KMemoryLayout& memory_layout, PAddr address) const {
+        return managers[memory_layout.GetPhysicalLinearRegion(address).GetAttributes()];
+    }
+
+    constexpr Impl* GetFirstManager(Pool pool, Direction dir) const {
+        return dir == Direction::FromBack ? pool_managers_tail[static_cast<size_t>(pool)]
+                                          : pool_managers_head[static_cast<size_t>(pool)];
+    }
+
+    constexpr Impl* GetNextManager(Impl* cur, Direction dir) const {
+        if (dir == Direction::FromBack) {
+            return cur->GetPrev();
+        } else {
+            return cur->GetNext();
+        }
+    }
+
+    ResultCode AllocatePageGroupImpl(KPageLinkedList* out, size_t num_pages, Pool pool,
+                                     Direction dir, bool random);
+
+private:
     Core::System& system;
-    std::array<std::mutex, static_cast<std::size_t>(Pool::Count)> pool_locks;
+    std::array<KLightLock, static_cast<size_t>(Pool::Count)> pool_locks;
+    std::array<Impl*, MaxManagerCount> pool_managers_head{};
+    std::array<Impl*, MaxManagerCount> pool_managers_tail{};
     std::array<Impl, MaxManagerCount> managers;
+    size_t num_managers{};
 };
 
 } // namespace Kernel
diff --git a/src/core/hle/kernel/k_memory_region_type.h b/src/core/hle/kernel/k_memory_region_type.h
index a05e66677..0baeddf51 100644
--- a/src/core/hle/kernel/k_memory_region_type.h
+++ b/src/core/hle/kernel/k_memory_region_type.h
@@ -14,7 +14,8 @@
 namespace Kernel {
 
 enum KMemoryRegionType : u32 {
-    KMemoryRegionAttr_CarveoutProtected = 0x04000000,
+    KMemoryRegionAttr_CarveoutProtected = 0x02000000,
+    KMemoryRegionAttr_Uncached = 0x04000000,
     KMemoryRegionAttr_DidKernelMap = 0x08000000,
     KMemoryRegionAttr_ShouldKernelMap = 0x10000000,
     KMemoryRegionAttr_UserReadOnly = 0x20000000,
@@ -239,6 +240,11 @@ static_assert(KMemoryRegionType_VirtualDramHeapBase.GetValue() == 0x1A);
 static_assert(KMemoryRegionType_VirtualDramKernelPtHeap.GetValue() == 0x2A);
 static_assert(KMemoryRegionType_VirtualDramKernelTraceBuffer.GetValue() == 0x4A);
 
+// UNUSED: .DeriveSparse(2, 2, 0);
+constexpr auto KMemoryRegionType_VirtualDramUnknownDebug =
+    KMemoryRegionType_Dram.DeriveSparse(2, 2, 1);
+static_assert(KMemoryRegionType_VirtualDramUnknownDebug.GetValue() == (0x52));
+
 constexpr auto KMemoryRegionType_VirtualDramKernelInitPt =
     KMemoryRegionType_VirtualDramHeapBase.Derive(3, 0);
 constexpr auto KMemoryRegionType_VirtualDramPoolManagement =
@@ -330,6 +336,8 @@ constexpr KMemoryRegionType GetTypeForVirtualLinearMapping(u32 type_id) {
         return KMemoryRegionType_VirtualDramKernelTraceBuffer;
     } else if (KMemoryRegionType_DramKernelPtHeap.IsAncestorOf(type_id)) {
         return KMemoryRegionType_VirtualDramKernelPtHeap;
+    } else if ((type_id | KMemoryRegionAttr_ShouldKernelMap) == type_id) {
+        return KMemoryRegionType_VirtualDramUnknownDebug;
     } else {
         return KMemoryRegionType_Dram;
     }
diff --git a/src/core/hle/kernel/k_page_buffer.h b/src/core/hle/kernel/k_page_buffer.h
new file mode 100644
index 000000000..0a9451228
--- /dev/null
+++ b/src/core/hle/kernel/k_page_buffer.h
@@ -0,0 +1,34 @@
+// Copyright 2022 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <array>
+
+#include "common/alignment.h"
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "core/core.h"
+#include "core/device_memory.h"
+#include "core/hle/kernel/memory_types.h"
+
+namespace Kernel {
+
+class KPageBuffer final : public KSlabAllocated<KPageBuffer> {
+public:
+    KPageBuffer() = default;
+
+    static KPageBuffer* FromPhysicalAddress(Core::System& system, PAddr phys_addr) {
+        ASSERT(Common::IsAligned(phys_addr, PageSize));
+        return reinterpret_cast<KPageBuffer*>(system.DeviceMemory().GetPointer(phys_addr));
+    }
+
+private:
+    [[maybe_unused]] alignas(PageSize) std::array<u8, PageSize> m_buffer{};
+};
+
+static_assert(sizeof(KPageBuffer) == PageSize);
+static_assert(alignof(KPageBuffer) == PageSize);
+
+} // namespace Kernel
diff --git a/src/core/hle/kernel/k_page_heap.cpp b/src/core/hle/kernel/k_page_heap.cpp
index 29d996d62..97a5890a0 100644
--- a/src/core/hle/kernel/k_page_heap.cpp
+++ b/src/core/hle/kernel/k_page_heap.cpp
@@ -7,35 +7,51 @@
 
 namespace Kernel {
 
-void KPageHeap::Initialize(VAddr address, std::size_t size, std::size_t metadata_size) {
-    // Check our assumptions
-    ASSERT(Common::IsAligned((address), PageSize));
+void KPageHeap::Initialize(PAddr address, size_t size, VAddr management_address,
+                           size_t management_size, const size_t* block_shifts,
+                           size_t num_block_shifts) {
+    // Check our assumptions.
+    ASSERT(Common::IsAligned(address, PageSize));
     ASSERT(Common::IsAligned(size, PageSize));
+    ASSERT(0 < num_block_shifts && num_block_shifts <= NumMemoryBlockPageShifts);
+    const VAddr management_end = management_address + management_size;
 
-    // Set our members
-    heap_address = address;
-    heap_size = size;
-
-    // Setup bitmaps
-    metadata.resize(metadata_size / sizeof(u64));
-    u64* cur_bitmap_storage{metadata.data()};
-    for (std::size_t i = 0; i < MemoryBlockPageShifts.size(); i++) {
-        const std::size_t cur_block_shift{MemoryBlockPageShifts[i]};
-        const std::size_t next_block_shift{
-            (i != MemoryBlockPageShifts.size() - 1) ? MemoryBlockPageShifts[i + 1] : 0};
-        cur_bitmap_storage = blocks[i].Initialize(heap_address, heap_size, cur_block_shift,
-                                                  next_block_shift, cur_bitmap_storage);
+    // Set our members.
+    m_heap_address = address;
+    m_heap_size = size;
+    m_num_blocks = num_block_shifts;
+
+    // Setup bitmaps.
+    m_management_data.resize(management_size / sizeof(u64));
+    u64* cur_bitmap_storage{m_management_data.data()};
+    for (size_t i = 0; i < num_block_shifts; i++) {
+        const size_t cur_block_shift = block_shifts[i];
+        const size_t next_block_shift = (i != num_block_shifts - 1) ? block_shifts[i + 1] : 0;
+        cur_bitmap_storage = m_blocks[i].Initialize(m_heap_address, m_heap_size, cur_block_shift,
+                                                    next_block_shift, cur_bitmap_storage);
     }
+
+    // Ensure we didn't overextend our bounds.
+    ASSERT(VAddr(cur_bitmap_storage) <= management_end);
+}
+
+size_t KPageHeap::GetNumFreePages() const {
+    size_t num_free = 0;
+
+    for (size_t i = 0; i < m_num_blocks; i++) {
+        num_free += m_blocks[i].GetNumFreePages();
+    }
+
+    return num_free;
 }
 
-VAddr KPageHeap::AllocateBlock(s32 index, bool random) {
-    const std::size_t needed_size{blocks[index].GetSize()};
+PAddr KPageHeap::AllocateBlock(s32 index, bool random) {
+    const size_t needed_size = m_blocks[index].GetSize();
 
-    for (s32 i{index}; i < static_cast<s32>(MemoryBlockPageShifts.size()); i++) {
-        if (const VAddr addr{blocks[i].PopBlock(random)}; addr) {
-            if (const std::size_t allocated_size{blocks[i].GetSize()};
-                allocated_size > needed_size) {
-                Free(addr + needed_size, (allocated_size - needed_size) / PageSize);
+    for (s32 i = index; i < static_cast<s32>(m_num_blocks); i++) {
+        if (const PAddr addr = m_blocks[i].PopBlock(random); addr != 0) {
+            if (const size_t allocated_size = m_blocks[i].GetSize(); allocated_size > needed_size) {
+                this->Free(addr + needed_size, (allocated_size - needed_size) / PageSize);
             }
             return addr;
         }
@@ -44,34 +60,34 @@ VAddr KPageHeap::AllocateBlock(s32 index, bool random) {
     return 0;
 }
 
-void KPageHeap::FreeBlock(VAddr block, s32 index) {
+void KPageHeap::FreeBlock(PAddr block, s32 index) {
     do {
-        block = blocks[index++].PushBlock(block);
+        block = m_blocks[index++].PushBlock(block);
     } while (block != 0);
 }
 
-void KPageHeap::Free(VAddr addr, std::size_t num_pages) {
-    // Freeing no pages is a no-op
+void KPageHeap::Free(PAddr addr, size_t num_pages) {
+    // Freeing no pages is a no-op.
     if (num_pages == 0) {
         return;
     }
 
-    // Find the largest block size that we can free, and free as many as possible
-    s32 big_index{static_cast<s32>(MemoryBlockPageShifts.size()) - 1};
-    const VAddr start{addr};
-    const VAddr end{(num_pages * PageSize) + addr};
-    VAddr before_start{start};
-    VAddr before_end{start};
-    VAddr after_start{end};
-    VAddr after_end{end};
+    // Find the largest block size that we can free, and free as many as possible.
+    s32 big_index = static_cast<s32>(m_num_blocks) - 1;
+    const PAddr start = addr;
+    const PAddr end = addr + num_pages * PageSize;
+    PAddr before_start = start;
+    PAddr before_end = start;
+    PAddr after_start = end;
+    PAddr after_end = end;
     while (big_index >= 0) {
-        const std::size_t block_size{blocks[big_index].GetSize()};
-        const VAddr big_start{Common::AlignUp((start), block_size)};
-        const VAddr big_end{Common::AlignDown((end), block_size)};
+        const size_t block_size = m_blocks[big_index].GetSize();
+        const PAddr big_start = Common::AlignUp(start, block_size);
+        const PAddr big_end = Common::AlignDown(end, block_size);
         if (big_start < big_end) {
-            // Free as many big blocks as we can
-            for (auto block{big_start}; block < big_end; block += block_size) {
-                FreeBlock(block, big_index);
+            // Free as many big blocks as we can.
+            for (auto block = big_start; block < big_end; block += block_size) {
+                this->FreeBlock(block, big_index);
             }
             before_end = big_start;
             after_start = big_end;
@@ -81,31 +97,31 @@ void KPageHeap::Free(VAddr addr, std::size_t num_pages) {
     }
     ASSERT(big_index >= 0);
 
-    // Free space before the big blocks
-    for (s32 i{big_index - 1}; i >= 0; i--) {
-        const std::size_t block_size{blocks[i].GetSize()};
+    // Free space before the big blocks.
+    for (s32 i = big_index - 1; i >= 0; i--) {
+        const size_t block_size = m_blocks[i].GetSize();
         while (before_start + block_size <= before_end) {
             before_end -= block_size;
-            FreeBlock(before_end, i);
+            this->FreeBlock(before_end, i);
         }
     }
 
-    // Free space after the big blocks
-    for (s32 i{big_index - 1}; i >= 0; i--) {
-        const std::size_t block_size{blocks[i].GetSize()};
+    // Free space after the big blocks.
+    for (s32 i = big_index - 1; i >= 0; i--) {
+        const size_t block_size = m_blocks[i].GetSize();
         while (after_start + block_size <= after_end) {
-            FreeBlock(after_start, i);
+            this->FreeBlock(after_start, i);
             after_start += block_size;
         }
     }
 }
 
-std::size_t KPageHeap::CalculateManagementOverheadSize(std::size_t region_size) {
-    std::size_t overhead_size = 0;
-    for (std::size_t i = 0; i < MemoryBlockPageShifts.size(); i++) {
-        const std::size_t cur_block_shift{MemoryBlockPageShifts[i]};
-        const std::size_t next_block_shift{
-            (i != MemoryBlockPageShifts.size() - 1) ? MemoryBlockPageShifts[i + 1] : 0};
+size_t KPageHeap::CalculateManagementOverheadSize(size_t region_size, const size_t* block_shifts,
+                                                  size_t num_block_shifts) {
+    size_t overhead_size = 0;
+    for (size_t i = 0; i < num_block_shifts; i++) {
+        const size_t cur_block_shift = block_shifts[i];
+        const size_t next_block_shift = (i != num_block_shifts - 1) ? block_shifts[i + 1] : 0;
         overhead_size += KPageHeap::Block::CalculateManagementOverheadSize(
             region_size, cur_block_shift, next_block_shift);
     }
diff --git a/src/core/hle/kernel/k_page_heap.h b/src/core/hle/kernel/k_page_heap.h
index a65aa28a0..60fff766b 100644
--- a/src/core/hle/kernel/k_page_heap.h
+++ b/src/core/hle/kernel/k_page_heap.h
@@ -23,54 +23,73 @@ public:
     KPageHeap() = default;
     ~KPageHeap() = default;
 
-    constexpr VAddr GetAddress() const {
-        return heap_address;
+    constexpr PAddr GetAddress() const {
+        return m_heap_address;
     }
-    constexpr std::size_t GetSize() const {
-        return heap_size;
+    constexpr size_t GetSize() const {
+        return m_heap_size;
     }
-    constexpr VAddr GetEndAddress() const {
-        return GetAddress() + GetSize();
+    constexpr PAddr GetEndAddress() const {
+        return this->GetAddress() + this->GetSize();
     }
-    constexpr std::size_t GetPageOffset(VAddr block) const {
-        return (block - GetAddress()) / PageSize;
+    constexpr size_t GetPageOffset(PAddr block) const {
+        return (block - this->GetAddress()) / PageSize;
+    }
+    constexpr size_t GetPageOffsetToEnd(PAddr block) const {
+        return (this->GetEndAddress() - block) / PageSize;
+    }
+
+    void Initialize(PAddr heap_address, size_t heap_size, VAddr management_address,
+                    size_t management_size) {
+        return this->Initialize(heap_address, heap_size, management_address, management_size,
+                                MemoryBlockPageShifts.data(), NumMemoryBlockPageShifts);
+    }
+
+    size_t GetFreeSize() const {
+        return this->GetNumFreePages() * PageSize;
     }
 
-    void Initialize(VAddr heap_address, std::size_t heap_size, std::size_t metadata_size);
-    VAddr AllocateBlock(s32 index, bool random);
-    void Free(VAddr addr, std::size_t num_pages);
+    void SetInitialUsedSize(size_t reserved_size) {
+        // Check that the reserved size is valid.
+        const size_t free_size = this->GetNumFreePages() * PageSize;
+        ASSERT(m_heap_size >= free_size + reserved_size);
 
-    void UpdateUsedSize() {
-        used_size = heap_size - (GetNumFreePages() * PageSize);
+        // Set the initial used size.
+        m_initial_used_size = m_heap_size - free_size - reserved_size;
     }
 
-    static std::size_t CalculateManagementOverheadSize(std::size_t region_size);
+    PAddr AllocateBlock(s32 index, bool random);
+    void Free(PAddr addr, size_t num_pages);
+
+    static size_t CalculateManagementOverheadSize(size_t region_size) {
+        return CalculateManagementOverheadSize(region_size, MemoryBlockPageShifts.data(),
+                                               NumMemoryBlockPageShifts);
+    }
 
-    static constexpr s32 GetAlignedBlockIndex(std::size_t num_pages, std::size_t align_pages) {
-        const auto target_pages{std::max(num_pages, align_pages)};
-        for (std::size_t i = 0; i < NumMemoryBlockPageShifts; i++) {
-            if (target_pages <=
-                (static_cast<std::size_t>(1) << MemoryBlockPageShifts[i]) / PageSize) {
+    static constexpr s32 GetAlignedBlockIndex(size_t num_pages, size_t align_pages) {
+        const size_t target_pages = std::max(num_pages, align_pages);
+        for (size_t i = 0; i < NumMemoryBlockPageShifts; i++) {
+            if (target_pages <= (size_t(1) << MemoryBlockPageShifts[i]) / PageSize) {
                 return static_cast<s32>(i);
             }
         }
         return -1;
     }
 
-    static constexpr s32 GetBlockIndex(std::size_t num_pages) {
-        for (s32 i{static_cast<s32>(NumMemoryBlockPageShifts) - 1}; i >= 0; i--) {
-            if (num_pages >= (static_cast<std::size_t>(1) << MemoryBlockPageShifts[i]) / PageSize) {
+    static constexpr s32 GetBlockIndex(size_t num_pages) {
+        for (s32 i = static_cast<s32>(NumMemoryBlockPageShifts) - 1; i >= 0; i--) {
+            if (num_pages >= (size_t(1) << MemoryBlockPageShifts[i]) / PageSize) {
                 return i;
             }
         }
         return -1;
     }
 
-    static constexpr std::size_t GetBlockSize(std::size_t index) {
-        return static_cast<std::size_t>(1) << MemoryBlockPageShifts[index];
+    static constexpr size_t GetBlockSize(size_t index) {
+        return size_t(1) << MemoryBlockPageShifts[index];
     }
 
-    static constexpr std::size_t GetBlockNumPages(std::size_t index) {
+    static constexpr size_t GetBlockNumPages(size_t index) {
         return GetBlockSize(index) / PageSize;
     }
 
@@ -83,114 +102,116 @@ private:
         Block() = default;
         ~Block() = default;
 
-        constexpr std::size_t GetShift() const {
-            return block_shift;
+        constexpr size_t GetShift() const {
+            return m_block_shift;
         }
-        constexpr std::size_t GetNextShift() const {
-            return next_block_shift;
+        constexpr size_t GetNextShift() const {
+            return m_next_block_shift;
         }
-        constexpr std::size_t GetSize() const {
-            return static_cast<std::size_t>(1) << GetShift();
+        constexpr size_t GetSize() const {
+            return u64(1) << this->GetShift();
         }
-        constexpr std::size_t GetNumPages() const {
-            return GetSize() / PageSize;
+        constexpr size_t GetNumPages() const {
+            return this->GetSize() / PageSize;
         }
-        constexpr std::size_t GetNumFreeBlocks() const {
-            return bitmap.GetNumBits();
+        constexpr size_t GetNumFreeBlocks() const {
+            return m_bitmap.GetNumBits();
         }
-        constexpr std::size_t GetNumFreePages() const {
-            return GetNumFreeBlocks() * GetNumPages();
+        constexpr size_t GetNumFreePages() const {
+            return this->GetNumFreeBlocks() * this->GetNumPages();
         }
 
-        u64* Initialize(VAddr addr, std::size_t size, std::size_t bs, std::size_t nbs,
-                        u64* bit_storage) {
-            // Set shifts
-            block_shift = bs;
-            next_block_shift = nbs;
-
-            // Align up the address
-            VAddr end{addr + size};
-            const auto align{(next_block_shift != 0) ? (1ULL << next_block_shift)
-                                                     : (1ULL << block_shift)};
-            addr = Common::AlignDown((addr), align);
-            end = Common::AlignUp((end), align);
-
-            heap_address = addr;
-            end_offset = (end - addr) / (1ULL << block_shift);
-            return bitmap.Initialize(bit_storage, end_offset);
+        u64* Initialize(PAddr addr, size_t size, size_t bs, size_t nbs, u64* bit_storage) {
+            // Set shifts.
+            m_block_shift = bs;
+            m_next_block_shift = nbs;
+
+            // Align up the address.
+            PAddr end = addr + size;
+            const size_t align = (m_next_block_shift != 0) ? (u64(1) << m_next_block_shift)
+                                                           : (u64(1) << m_block_shift);
+            addr = Common::AlignDown(addr, align);
+            end = Common::AlignUp(end, align);
+
+            m_heap_address = addr;
+            m_end_offset = (end - addr) / (u64(1) << m_block_shift);
+            return m_bitmap.Initialize(bit_storage, m_end_offset);
         }
 
-        VAddr PushBlock(VAddr address) {
-            // Set the bit for the free block
-            std::size_t offset{(address - heap_address) >> GetShift()};
-            bitmap.SetBit(offset);
+        PAddr PushBlock(PAddr address) {
+            // Set the bit for the free block.
+            size_t offset = (address - m_heap_address) >> this->GetShift();
+            m_bitmap.SetBit(offset);
 
-            // If we have a next shift, try to clear the blocks below and return the address
-            if (GetNextShift()) {
-                const auto diff{1ULL << (GetNextShift() - GetShift())};
+            // If we have a next shift, try to clear the blocks below this one and return the new
+            // address.
+            if (this->GetNextShift()) {
+                const size_t diff = u64(1) << (this->GetNextShift() - this->GetShift());
                 offset = Common::AlignDown(offset, diff);
-                if (bitmap.ClearRange(offset, diff)) {
-                    return heap_address + (offset << GetShift());
+                if (m_bitmap.ClearRange(offset, diff)) {
+                    return m_heap_address + (offset << this->GetShift());
                 }
             }
 
-            // We couldn't coalesce, or we're already as big as possible
-            return 0;
+            // We couldn't coalesce, or we're already as big as possible.
+            return {};
         }
 
-        VAddr PopBlock(bool random) {
-            // Find a free block
-            const s64 soffset{bitmap.FindFreeBlock(random)};
+        PAddr PopBlock(bool random) {
+            // Find a free block.
+            s64 soffset = m_bitmap.FindFreeBlock(random);
             if (soffset < 0) {
-                return 0;
+                return {};
             }
-            const auto offset{static_cast<std::size_t>(soffset)};
+            const size_t offset = static_cast<size_t>(soffset);
 
-            // Update our tracking and return it
-            bitmap.ClearBit(offset);
-            return heap_address + (offset << GetShift());
+            // Update our tracking and return it.
+            m_bitmap.ClearBit(offset);
+            return m_heap_address + (offset << this->GetShift());
         }
 
-        static constexpr std::size_t CalculateManagementOverheadSize(std::size_t region_size,
-                                                                     std::size_t cur_block_shift,
-                                                                     std::size_t next_block_shift) {
-            const auto cur_block_size{(1ULL << cur_block_shift)};
-            const auto next_block_size{(1ULL << next_block_shift)};
-            const auto align{(next_block_shift != 0) ? next_block_size : cur_block_size};
+    public:
+        static constexpr size_t CalculateManagementOverheadSize(size_t region_size,
+                                                                size_t cur_block_shift,
+                                                                size_t next_block_shift) {
+            const size_t cur_block_size = (u64(1) << cur_block_shift);
+            const size_t next_block_size = (u64(1) << next_block_shift);
+            const size_t align = (next_block_shift != 0) ? next_block_size : cur_block_size;
             return KPageBitmap::CalculateManagementOverheadSize(
                 (align * 2 + Common::AlignUp(region_size, align)) / cur_block_size);
         }
 
     private:
-        KPageBitmap bitmap;
-        VAddr heap_address{};
-        uintptr_t end_offset{};
-        std::size_t block_shift{};
-        std::size_t next_block_shift{};
+        KPageBitmap m_bitmap;
+        PAddr m_heap_address{};
+        uintptr_t m_end_offset{};
+        size_t m_block_shift{};
+        size_t m_next_block_shift{};
     };
 
-    constexpr std::size_t GetNumFreePages() const {
-        std::size_t num_free{};
-
-        for (const auto& block : blocks) {
-            num_free += block.GetNumFreePages();
-        }
-
-        return num_free;
-    }
+private:
+    void Initialize(PAddr heap_address, size_t heap_size, VAddr management_address,
+                    size_t management_size, const size_t* block_shifts, size_t num_block_shifts);
+    size_t GetNumFreePages() const;
 
-    void FreeBlock(VAddr block, s32 index);
+    void FreeBlock(PAddr block, s32 index);
 
-    static constexpr std::size_t NumMemoryBlockPageShifts{7};
-    static constexpr std::array<std::size_t, NumMemoryBlockPageShifts> MemoryBlockPageShifts{
+    static constexpr size_t NumMemoryBlockPageShifts{7};
+    static constexpr std::array<size_t, NumMemoryBlockPageShifts> MemoryBlockPageShifts{
         0xC, 0x10, 0x15, 0x16, 0x19, 0x1D, 0x1E,
     };
 
-    VAddr heap_address{};
-    std::size_t heap_size{};
-    std::size_t used_size{};
-    std::array<Block, NumMemoryBlockPageShifts> blocks{};
-    std::vector<u64> metadata;
+private:
+    static size_t CalculateManagementOverheadSize(size_t region_size, const size_t* block_shifts,
+                                                  size_t num_block_shifts);
+
+private:
+    PAddr m_heap_address{};
+    size_t m_heap_size{};
+    size_t m_initial_used_size{};
+    size_t m_num_blocks{};
+    std::array<Block, NumMemoryBlockPageShifts> m_blocks{};
+    std::vector<u64> m_management_data;
 };
 
 } // namespace Kernel
diff --git a/src/core/hle/kernel/k_page_table.cpp b/src/core/hle/kernel/k_page_table.cpp
index 88aa2a152..02d93b12e 100644
--- a/src/core/hle/kernel/k_page_table.cpp
+++ b/src/core/hle/kernel/k_page_table.cpp
@@ -273,87 +273,219 @@ ResultCode KPageTable::MapProcessCode(VAddr addr, std::size_t num_pages, KMemory
     R_TRY(this->CheckMemoryState(addr, size, KMemoryState::All, KMemoryState::Free,
                                  KMemoryPermission::None, KMemoryPermission::None,
                                  KMemoryAttribute::None, KMemoryAttribute::None));
+    KPageLinkedList pg;
+    R_TRY(system.Kernel().MemoryManager().AllocateAndOpen(
+        &pg, num_pages,
+        KMemoryManager::EncodeOption(KMemoryManager::Pool::Application, allocation_option)));
 
-    KPageLinkedList page_linked_list;
-    R_TRY(system.Kernel().MemoryManager().Allocate(page_linked_list, num_pages, memory_pool,
-                                                   allocation_option));
-    R_TRY(Operate(addr, num_pages, page_linked_list, OperationType::MapGroup));
+    R_TRY(Operate(addr, num_pages, pg, OperationType::MapGroup));
 
     block_manager->Update(addr, num_pages, state, perm);
 
     return ResultSuccess;
 }
 
-ResultCode KPageTable::MapCodeMemory(VAddr dst_addr, VAddr src_addr, std::size_t size) {
+ResultCode KPageTable::MapCodeMemory(VAddr dst_address, VAddr src_address, std::size_t size) {
+    // Validate the mapping request.
+    R_UNLESS(this->CanContain(dst_address, size, KMemoryState::AliasCode),
+             ResultInvalidMemoryRegion);
+
+    // Lock the table.
     KScopedLightLock lk(general_lock);
 
-    const std::size_t num_pages{size / PageSize};
+    // Verify that the source memory is normal heap.
+    KMemoryState src_state{};
+    KMemoryPermission src_perm{};
+    std::size_t num_src_allocator_blocks{};
+    R_TRY(this->CheckMemoryState(&src_state, &src_perm, nullptr, &num_src_allocator_blocks,
+                                 src_address, size, KMemoryState::All, KMemoryState::Normal,
+                                 KMemoryPermission::All, KMemoryPermission::UserReadWrite,
+                                 KMemoryAttribute::All, KMemoryAttribute::None));
 
-    KMemoryState state{};
-    KMemoryPermission perm{};
-    CASCADE_CODE(CheckMemoryState(&state, &perm, nullptr, nullptr, src_addr, size,
-                                  KMemoryState::All, KMemoryState::Normal, KMemoryPermission::All,
-                                  KMemoryPermission::UserReadWrite, KMemoryAttribute::Mask,
-                                  KMemoryAttribute::None, KMemoryAttribute::IpcAndDeviceMapped));
+    // Verify that the destination memory is unmapped.
+    std::size_t num_dst_allocator_blocks{};
+    R_TRY(this->CheckMemoryState(&num_dst_allocator_blocks, dst_address, size, KMemoryState::All,
+                                 KMemoryState::Free, KMemoryPermission::None,
+                                 KMemoryPermission::None, KMemoryAttribute::None,
+                                 KMemoryAttribute::None));
 
-    if (IsRegionMapped(dst_addr, size)) {
-        return ResultInvalidCurrentMemory;
-    }
+    // Map the code memory.
+    {
+        // Determine the number of pages being operated on.
+        const std::size_t num_pages = size / PageSize;
 
-    KPageLinkedList page_linked_list;
-    AddRegionToPages(src_addr, num_pages, page_linked_list);
+        // Create page groups for the memory being mapped.
+        KPageLinkedList pg;
+        AddRegionToPages(src_address, num_pages, pg);
 
-    {
-        auto block_guard = detail::ScopeExit(
-            [&] { Operate(src_addr, num_pages, perm, OperationType::ChangePermissions); });
+        // Reprotect the source as kernel-read/not mapped.
+        const auto new_perm = static_cast<KMemoryPermission>(KMemoryPermission::KernelRead |
+                                                             KMemoryPermission::NotMapped);
+        R_TRY(Operate(src_address, num_pages, new_perm, OperationType::ChangePermissions));
 
-        CASCADE_CODE(Operate(src_addr, num_pages, KMemoryPermission::None,
-                             OperationType::ChangePermissions));
-        CASCADE_CODE(MapPages(dst_addr, page_linked_list, KMemoryPermission::None));
+        // Ensure that we unprotect the source pages on failure.
+        auto unprot_guard = SCOPE_GUARD({
+            ASSERT(this->Operate(src_address, num_pages, src_perm, OperationType::ChangePermissions)
+                       .IsSuccess());
+        });
 
-        block_guard.Cancel();
-    }
+        // Map the alias pages.
+        R_TRY(MapPages(dst_address, pg, new_perm));
 
-    block_manager->Update(src_addr, num_pages, state, KMemoryPermission::None,
-                          KMemoryAttribute::Locked);
-    block_manager->Update(dst_addr, num_pages, KMemoryState::AliasCode);
+        // We successfully mapped the alias pages, so we don't need to unprotect the src pages on
+        // failure.
+        unprot_guard.Cancel();
+
+        // Apply the memory block updates.
+        block_manager->Update(src_address, num_pages, src_state, new_perm,
+                              KMemoryAttribute::Locked);
+        block_manager->Update(dst_address, num_pages, KMemoryState::AliasCode, new_perm,
+                              KMemoryAttribute::None);
+    }
 
     return ResultSuccess;
 }
 
-ResultCode KPageTable::UnmapCodeMemory(VAddr dst_addr, VAddr src_addr, std::size_t size) {
+ResultCode KPageTable::UnmapCodeMemory(VAddr dst_address, VAddr src_address, std::size_t size) {
+    // Validate the mapping request.
+    R_UNLESS(this->CanContain(dst_address, size, KMemoryState::AliasCode),
+             ResultInvalidMemoryRegion);
+
+    // Lock the table.
     KScopedLightLock lk(general_lock);
 
-    if (!size) {
-        return ResultSuccess;
+    // Verify that the source memory is locked normal heap.
+    std::size_t num_src_allocator_blocks{};
+    R_TRY(this->CheckMemoryState(std::addressof(num_src_allocator_blocks), src_address, size,
+                                 KMemoryState::All, KMemoryState::Normal, KMemoryPermission::None,
+                                 KMemoryPermission::None, KMemoryAttribute::All,
+                                 KMemoryAttribute::Locked));
+
+    // Verify that the destination memory is aliasable code.
+    std::size_t num_dst_allocator_blocks{};
+    R_TRY(this->CheckMemoryStateContiguous(
+        std::addressof(num_dst_allocator_blocks), dst_address, size, KMemoryState::FlagCanCodeAlias,
+        KMemoryState::FlagCanCodeAlias, KMemoryPermission::None, KMemoryPermission::None,
+        KMemoryAttribute::All, KMemoryAttribute::None));
+
+    // Determine whether any pages being unmapped are code.
+    bool any_code_pages = false;
+    {
+        KMemoryBlockManager::const_iterator it = block_manager->FindIterator(dst_address);
+        while (true) {
+            // Get the memory info.
+            const KMemoryInfo info = it->GetMemoryInfo();
+
+            // Check if the memory has code flag.
+            if ((info.GetState() & KMemoryState::FlagCode) != KMemoryState::None) {
+                any_code_pages = true;
+                break;
+            }
+
+            // Check if we're done.
+            if (dst_address + size - 1 <= info.GetLastAddress()) {
+                break;
+            }
+
+            // Advance.
+            ++it;
+        }
     }
 
-    const std::size_t num_pages{size / PageSize};
+    // Ensure that we maintain the instruction cache.
+    bool reprotected_pages = false;
+    SCOPE_EXIT({
+        if (reprotected_pages && any_code_pages) {
+            system.InvalidateCpuInstructionCacheRange(dst_address, size);
+        }
+    });
 
-    CASCADE_CODE(CheckMemoryState(nullptr, nullptr, nullptr, nullptr, src_addr, size,
-                                  KMemoryState::All, KMemoryState::Normal, KMemoryPermission::None,
-                                  KMemoryPermission::None, KMemoryAttribute::Mask,
-                                  KMemoryAttribute::Locked, KMemoryAttribute::IpcAndDeviceMapped));
+    // Unmap.
+    {
+        // Determine the number of pages being operated on.
+        const std::size_t num_pages = size / PageSize;
 
-    KMemoryState state{};
-    CASCADE_CODE(CheckMemoryState(
-        &state, nullptr, nullptr, nullptr, dst_addr, PageSize, KMemoryState::FlagCanCodeAlias,
-        KMemoryState::FlagCanCodeAlias, KMemoryPermission::None, KMemoryPermission::None,
-        KMemoryAttribute::Mask, KMemoryAttribute::None, KMemoryAttribute::IpcAndDeviceMapped));
-    CASCADE_CODE(CheckMemoryState(dst_addr, size, KMemoryState::All, state, KMemoryPermission::None,
-                                  KMemoryPermission::None, KMemoryAttribute::Mask,
-                                  KMemoryAttribute::None));
-    CASCADE_CODE(Operate(dst_addr, num_pages, KMemoryPermission::None, OperationType::Unmap));
+        // Unmap the aliased copy of the pages.
+        R_TRY(Operate(dst_address, num_pages, KMemoryPermission::None, OperationType::Unmap));
 
-    block_manager->Update(dst_addr, num_pages, KMemoryState::Free);
-    block_manager->Update(src_addr, num_pages, KMemoryState::Normal,
-                          KMemoryPermission::UserReadWrite);
+        // Try to set the permissions for the source pages back to what they should be.
+        R_TRY(Operate(src_address, num_pages, KMemoryPermission::UserReadWrite,
+                      OperationType::ChangePermissions));
 
-    system.InvalidateCpuInstructionCacheRange(dst_addr, size);
+        // Apply the memory block updates.
+        block_manager->Update(dst_address, num_pages, KMemoryState::None);
+        block_manager->Update(src_address, num_pages, KMemoryState::Normal,
+                              KMemoryPermission::UserReadWrite);
+
+        // Note that we reprotected pages.
+        reprotected_pages = true;
+    }
 
     return ResultSuccess;
 }
 
+VAddr KPageTable::FindFreeArea(VAddr region_start, std::size_t region_num_pages,
+                               std::size_t num_pages, std::size_t alignment, std::size_t offset,
+                               std::size_t guard_pages) {
+    VAddr address = 0;
+
+    if (num_pages <= region_num_pages) {
+        if (this->IsAslrEnabled()) {
+            // Try to directly find a free area up to 8 times.
+            for (std::size_t i = 0; i < 8; i++) {
+                const std::size_t random_offset =
+                    KSystemControl::GenerateRandomRange(
+                        0, (region_num_pages - num_pages - guard_pages) * PageSize / alignment) *
+                    alignment;
+                const VAddr candidate =
+                    Common::AlignDown((region_start + random_offset), alignment) + offset;
+
+                KMemoryInfo info = this->QueryInfoImpl(candidate);
+
+                if (info.state != KMemoryState::Free) {
+                    continue;
+                }
+                if (region_start > candidate) {
+                    continue;
+                }
+                if (info.GetAddress() + guard_pages * PageSize > candidate) {
+                    continue;
+                }
+
+                const VAddr candidate_end = candidate + (num_pages + guard_pages) * PageSize - 1;
+                if (candidate_end > info.GetLastAddress()) {
+                    continue;
+                }
+                if (candidate_end > region_start + region_num_pages * PageSize - 1) {
+                    continue;
+                }
+
+                address = candidate;
+                break;
+            }
+            // Fall back to finding the first free area with a random offset.
+            if (address == 0) {
+                // NOTE: Nintendo does not account for guard pages here.
+                // This may theoretically cause an offset to be chosen that cannot be mapped. We
+                // will account for guard pages.
+                const std::size_t offset_pages = KSystemControl::GenerateRandomRange(
+                    0, region_num_pages - num_pages - guard_pages);
+                address = block_manager->FindFreeArea(region_start + offset_pages * PageSize,
+                                                      region_num_pages - offset_pages, num_pages,
+                                                      alignment, offset, guard_pages);
+            }
+        }
+
+        // Find the first free area.
+        if (address == 0) {
+            address = block_manager->FindFreeArea(region_start, region_num_pages, num_pages,
+                                                  alignment, offset, guard_pages);
+        }
+    }
+
+    return address;
+}
+
 ResultCode KPageTable::UnmapProcessMemory(VAddr dst_addr, std::size_t size,
                                           KPageTable& src_page_table, VAddr src_addr) {
     KScopedLightLock lk(general_lock);
@@ -443,9 +575,10 @@ ResultCode KPageTable::MapPhysicalMemory(VAddr address, std::size_t size) {
             R_UNLESS(memory_reservation.Succeeded(), ResultLimitReached);
 
             // Allocate pages for the new memory.
-            KPageLinkedList page_linked_list;
-            R_TRY(system.Kernel().MemoryManager().Allocate(
-                page_linked_list, (size - mapped_size) / PageSize, memory_pool, allocation_option));
+            KPageLinkedList pg;
+            R_TRY(system.Kernel().MemoryManager().AllocateAndOpenForProcess(
+                &pg, (size - mapped_size) / PageSize,
+                KMemoryManager::EncodeOption(memory_pool, allocation_option), 0, 0));
 
             // Map the memory.
             {
@@ -547,7 +680,7 @@ ResultCode KPageTable::MapPhysicalMemory(VAddr address, std::size_t size) {
                 });
 
                 // Iterate over the memory.
-                auto pg_it = page_linked_list.Nodes().begin();
+                auto pg_it = pg.Nodes().begin();
                 PAddr pg_phys_addr = pg_it->GetAddress();
                 size_t pg_pages = pg_it->GetNumPages();
 
@@ -571,7 +704,7 @@ ResultCode KPageTable::MapPhysicalMemory(VAddr address, std::size_t size) {
                             // Check if we're at the end of the physical block.
                             if (pg_pages == 0) {
                                 // Ensure there are more pages to map.
-                                ASSERT(pg_it != page_linked_list.Nodes().end());
+                                ASSERT(pg_it != pg.Nodes().end());
 
                                 // Advance our physical block.
                                 ++pg_it;
@@ -841,10 +974,14 @@ ResultCode KPageTable::UnmapPhysicalMemory(VAddr address, std::size_t size) {
     process->GetResourceLimit()->Release(LimitableResource::PhysicalMemory, mapped_size);
 
     // Update memory blocks.
-    system.Kernel().MemoryManager().Free(pg, size / PageSize, memory_pool, allocation_option);
     block_manager->Update(address, size / PageSize, KMemoryState::Free, KMemoryPermission::None,
                           KMemoryAttribute::None);
 
+    // TODO(bunnei): This is a workaround until the next set of changes, where we add reference
+    // counting for mapped pages. Until then, we must manually close the reference to the page
+    // group.
+    system.Kernel().MemoryManager().Close(pg);
+
     // We succeeded.
     remap_guard.Cancel();
 
@@ -980,6 +1117,46 @@ ResultCode KPageTable::MapPages(VAddr address, KPageLinkedList& page_linked_list
     return ResultSuccess;
 }
 
+ResultCode KPageTable::MapPages(VAddr* out_addr, std::size_t num_pages, std::size_t alignment,
+                                PAddr phys_addr, bool is_pa_valid, VAddr region_start,
+                                std::size_t region_num_pages, KMemoryState state,
+                                KMemoryPermission perm) {
+    ASSERT(Common::IsAligned(alignment, PageSize) && alignment >= PageSize);
+
+    // Ensure this is a valid map request.
+    R_UNLESS(this->CanContain(region_start, region_num_pages * PageSize, state),
+             ResultInvalidCurrentMemory);
+    R_UNLESS(num_pages < region_num_pages, ResultOutOfMemory);
+
+    // Lock the table.
+    KScopedLightLock lk(general_lock);
+
+    // Find a random address to map at.
+    VAddr addr = this->FindFreeArea(region_start, region_num_pages, num_pages, alignment, 0,
+                                    this->GetNumGuardPages());
+    R_UNLESS(addr != 0, ResultOutOfMemory);
+    ASSERT(Common::IsAligned(addr, alignment));
+    ASSERT(this->CanContain(addr, num_pages * PageSize, state));
+    ASSERT(this->CheckMemoryState(addr, num_pages * PageSize, KMemoryState::All, KMemoryState::Free,
+                                  KMemoryPermission::None, KMemoryPermission::None,
+                                  KMemoryAttribute::None, KMemoryAttribute::None)
+               .IsSuccess());
+
+    // Perform mapping operation.
+    if (is_pa_valid) {
+        R_TRY(this->Operate(addr, num_pages, perm, OperationType::Map, phys_addr));
+    } else {
+        UNIMPLEMENTED();
+    }
+
+    // Update the blocks.
+    block_manager->Update(addr, num_pages, state, perm);
+
+    // We successfully mapped the pages.
+    *out_addr = addr;
+    return ResultSuccess;
+}
+
 ResultCode KPageTable::UnmapPages(VAddr addr, const KPageLinkedList& page_linked_list) {
     ASSERT(this->IsLockedByCurrentThread());
 
@@ -1022,6 +1199,30 @@ ResultCode KPageTable::UnmapPages(VAddr addr, KPageLinkedList& page_linked_list,
     return ResultSuccess;
 }
 
+ResultCode KPageTable::UnmapPages(VAddr address, std::size_t num_pages, KMemoryState state) {
+    // Check that the unmap is in range.
+    const std::size_t size = num_pages * PageSize;
+    R_UNLESS(this->Contains(address, size), ResultInvalidCurrentMemory);
+
+    // Lock the table.
+    KScopedLightLock lk(general_lock);
+
+    // Check the memory state.
+    std::size_t num_allocator_blocks{};
+    R_TRY(this->CheckMemoryState(std::addressof(num_allocator_blocks), address, size,
+                                 KMemoryState::All, state, KMemoryPermission::None,
+                                 KMemoryPermission::None, KMemoryAttribute::All,
+                                 KMemoryAttribute::None));
+
+    // Perform the unmap.
+    R_TRY(Operate(address, num_pages, KMemoryPermission::None, OperationType::Unmap));
+
+    // Update the blocks.
+    block_manager->Update(address, num_pages, KMemoryState::Free, KMemoryPermission::None);
+
+    return ResultSuccess;
+}
+
 ResultCode KPageTable::SetProcessMemoryPermission(VAddr addr, std::size_t size,
                                                   Svc::MemoryPermission svc_perm) {
     const size_t num_pages = size / PageSize;
@@ -1270,9 +1471,16 @@ ResultCode KPageTable::SetHeapSize(VAddr* out, std::size_t size) {
     R_UNLESS(memory_reservation.Succeeded(), ResultLimitReached);
 
     // Allocate pages for the heap extension.
-    KPageLinkedList page_linked_list;
-    R_TRY(system.Kernel().MemoryManager().Allocate(page_linked_list, allocation_size / PageSize,
-                                                   memory_pool, allocation_option));
+    KPageLinkedList pg;
+    R_TRY(system.Kernel().MemoryManager().AllocateAndOpen(
+        &pg, allocation_size / PageSize,
+        KMemoryManager::EncodeOption(memory_pool, allocation_option)));
+
+    // Clear all the newly allocated pages.
+    for (const auto& it : pg.Nodes()) {
+        std::memset(system.DeviceMemory().GetPointer(it.GetAddress()), heap_fill_value,
+                    it.GetSize());
+    }
 
     // Map the pages.
     {
@@ -1291,7 +1499,7 @@ ResultCode KPageTable::SetHeapSize(VAddr* out, std::size_t size) {
 
         // Map the pages.
         const auto num_pages = allocation_size / PageSize;
-        R_TRY(Operate(current_heap_end, num_pages, page_linked_list, OperationType::MapGroup));
+        R_TRY(Operate(current_heap_end, num_pages, pg, OperationType::MapGroup));
 
         // Clear all the newly allocated pages.
         for (std::size_t cur_page = 0; cur_page < num_pages; ++cur_page) {
@@ -1339,8 +1547,9 @@ ResultVal<VAddr> KPageTable::AllocateAndMapMemory(std::size_t needed_num_pages,
         R_TRY(Operate(addr, needed_num_pages, perm, OperationType::Map, map_addr));
     } else {
         KPageLinkedList page_group;
-        R_TRY(system.Kernel().MemoryManager().Allocate(page_group, needed_num_pages, memory_pool,
-                                                       allocation_option));
+        R_TRY(system.Kernel().MemoryManager().AllocateAndOpenForProcess(
+            &page_group, needed_num_pages,
+            KMemoryManager::EncodeOption(memory_pool, allocation_option), 0, 0));
         R_TRY(Operate(addr, needed_num_pages, page_group, OperationType::MapGroup));
     }
 
@@ -1547,7 +1756,7 @@ ResultCode KPageTable::Operate(VAddr addr, std::size_t num_pages, KMemoryPermiss
     return ResultSuccess;
 }
 
-constexpr VAddr KPageTable::GetRegionAddress(KMemoryState state) const {
+VAddr KPageTable::GetRegionAddress(KMemoryState state) const {
     switch (state) {
     case KMemoryState::Free:
     case KMemoryState::Kernel:
@@ -1583,7 +1792,7 @@ constexpr VAddr KPageTable::GetRegionAddress(KMemoryState state) const {
     }
 }
 
-constexpr std::size_t KPageTable::GetRegionSize(KMemoryState state) const {
+std::size_t KPageTable::GetRegionSize(KMemoryState state) const {
     switch (state) {
     case KMemoryState::Free:
     case KMemoryState::Kernel:
diff --git a/src/core/hle/kernel/k_page_table.h b/src/core/hle/kernel/k_page_table.h
index c98887d34..54c6adf8d 100644
--- a/src/core/hle/kernel/k_page_table.h
+++ b/src/core/hle/kernel/k_page_table.h
@@ -36,8 +36,8 @@ public:
                                     KMemoryManager::Pool pool);
     ResultCode MapProcessCode(VAddr addr, std::size_t pages_count, KMemoryState state,
                               KMemoryPermission perm);
-    ResultCode MapCodeMemory(VAddr dst_addr, VAddr src_addr, std::size_t size);
-    ResultCode UnmapCodeMemory(VAddr dst_addr, VAddr src_addr, std::size_t size);
+    ResultCode MapCodeMemory(VAddr dst_address, VAddr src_address, std::size_t size);
+    ResultCode UnmapCodeMemory(VAddr dst_address, VAddr src_address, std::size_t size);
     ResultCode UnmapProcessMemory(VAddr dst_addr, std::size_t size, KPageTable& src_page_table,
                                   VAddr src_addr);
     ResultCode MapPhysicalMemory(VAddr addr, std::size_t size);
@@ -46,7 +46,14 @@ public:
     ResultCode UnmapMemory(VAddr dst_addr, VAddr src_addr, std::size_t size);
     ResultCode MapPages(VAddr addr, KPageLinkedList& page_linked_list, KMemoryState state,
                         KMemoryPermission perm);
+    ResultCode MapPages(VAddr* out_addr, std::size_t num_pages, std::size_t alignment,
+                        PAddr phys_addr, KMemoryState state, KMemoryPermission perm) {
+        return this->MapPages(out_addr, num_pages, alignment, phys_addr, true,
+                              this->GetRegionAddress(state), this->GetRegionSize(state) / PageSize,
+                              state, perm);
+    }
     ResultCode UnmapPages(VAddr addr, KPageLinkedList& page_linked_list, KMemoryState state);
+    ResultCode UnmapPages(VAddr address, std::size_t num_pages, KMemoryState state);
     ResultCode SetProcessMemoryPermission(VAddr addr, std::size_t size,
                                           Svc::MemoryPermission svc_perm);
     KMemoryInfo QueryInfo(VAddr addr);
@@ -91,6 +98,9 @@ private:
     ResultCode InitializeMemoryLayout(VAddr start, VAddr end);
     ResultCode MapPages(VAddr addr, const KPageLinkedList& page_linked_list,
                         KMemoryPermission perm);
+    ResultCode MapPages(VAddr* out_addr, std::size_t num_pages, std::size_t alignment,
+                        PAddr phys_addr, bool is_pa_valid, VAddr region_start,
+                        std::size_t region_num_pages, KMemoryState state, KMemoryPermission perm);
     ResultCode UnmapPages(VAddr addr, const KPageLinkedList& page_linked_list);
     bool IsRegionMapped(VAddr address, u64 size);
     bool IsRegionContiguous(VAddr addr, u64 size) const;
@@ -102,8 +112,11 @@ private:
                        OperationType operation);
     ResultCode Operate(VAddr addr, std::size_t num_pages, KMemoryPermission perm,
                        OperationType operation, PAddr map_addr = 0);
-    constexpr VAddr GetRegionAddress(KMemoryState state) const;
-    constexpr std::size_t GetRegionSize(KMemoryState state) const;
+    VAddr GetRegionAddress(KMemoryState state) const;
+    std::size_t GetRegionSize(KMemoryState state) const;
+
+    VAddr FindFreeArea(VAddr region_start, std::size_t region_num_pages, std::size_t num_pages,
+                       std::size_t alignment, std::size_t offset, std::size_t guard_pages);
 
     ResultCode CheckMemoryStateContiguous(std::size_t* out_blocks_needed, VAddr addr,
                                           std::size_t size, KMemoryState state_mask,
@@ -137,7 +150,7 @@ private:
         return CheckMemoryState(nullptr, nullptr, nullptr, out_blocks_needed, addr, size,
                                 state_mask, state, perm_mask, perm, attr_mask, attr, ignore_attr);
     }
-    ResultCode CheckMemoryState(VAddr addr, size_t size, KMemoryState state_mask,
+    ResultCode CheckMemoryState(VAddr addr, std::size_t size, KMemoryState state_mask,
                                 KMemoryState state, KMemoryPermission perm_mask,
                                 KMemoryPermission perm, KMemoryAttribute attr_mask,
                                 KMemoryAttribute attr,
@@ -210,7 +223,7 @@ public:
     constexpr VAddr GetAliasCodeRegionSize() const {
         return alias_code_region_end - alias_code_region_start;
     }
-    size_t GetNormalMemorySize() {
+    std::size_t GetNormalMemorySize() {
         KScopedLightLock lk(general_lock);
         return GetHeapSize() + mapped_physical_memory_size;
     }
@@ -253,9 +266,10 @@ public:
     constexpr bool IsInsideASLRRegion(VAddr address, std::size_t size) const {
         return !IsOutsideASLRRegion(address, size);
     }
-
-    PAddr GetPhysicalAddr(VAddr addr) {
-        ASSERT(IsLockedByCurrentThread());
+    constexpr std::size_t GetNumGuardPages() const {
+        return IsKernel() ? 1 : 4;
+    }
+    PAddr GetPhysicalAddr(VAddr addr) const {
         const auto backing_addr = page_table_impl.backing_addr[addr >> PageBits];
         ASSERT(backing_addr);
         return backing_addr + addr;
@@ -276,10 +290,6 @@ private:
         return is_aslr_enabled;
     }
 
-    constexpr std::size_t GetNumGuardPages() const {
-        return IsKernel() ? 1 : 4;
-    }
-
     constexpr bool ContainsPages(VAddr addr, std::size_t num_pages) const {
         return (address_space_start <= addr) &&
                (num_pages <= (address_space_end - address_space_start) / PageSize) &&
@@ -311,6 +321,8 @@ private:
     bool is_kernel{};
     bool is_aslr_enabled{};
 
+    u32 heap_fill_value{};
+
     KMemoryManager::Pool memory_pool{KMemoryManager::Pool::Application};
     KMemoryManager::Direction allocation_option{KMemoryManager::Direction::FromFront};
 
diff --git a/src/core/hle/kernel/k_port.cpp b/src/core/hle/kernel/k_port.cpp
index a8ba09c4a..ceb98709f 100644
--- a/src/core/hle/kernel/k_port.cpp
+++ b/src/core/hle/kernel/k_port.cpp
@@ -57,7 +57,12 @@ ResultCode KPort::EnqueueSession(KServerSession* session) {
     R_UNLESS(state == State::Normal, ResultPortClosed);
 
     server.EnqueueSession(session);
-    server.GetSessionRequestHandler()->ClientConnected(server.AcceptSession());
+
+    if (auto session_ptr = server.GetSessionRequestHandler().lock()) {
+        session_ptr->ClientConnected(server.AcceptSession());
+    } else {
+        UNREACHABLE();
+    }
 
     return ResultSuccess;
 }
diff --git a/src/core/hle/kernel/k_process.cpp b/src/core/hle/kernel/k_process.cpp
index 9233261cd..b39405496 100644
--- a/src/core/hle/kernel/k_process.cpp
+++ b/src/core/hle/kernel/k_process.cpp
@@ -70,58 +70,6 @@ void SetupMainThread(Core::System& system, KProcess& owner_process, u32 priority
 }
 } // Anonymous namespace
 
-// Represents a page used for thread-local storage.
-//
-// Each TLS page contains slots that may be used by processes and threads.
-// Every process and thread is created with a slot in some arbitrary page
-// (whichever page happens to have an available slot).
-class TLSPage {
-public:
-    static constexpr std::size_t num_slot_entries =
-        Core::Memory::PAGE_SIZE / Core::Memory::TLS_ENTRY_SIZE;
-
-    explicit TLSPage(VAddr address) : base_address{address} {}
-
-    bool HasAvailableSlots() const {
-        return !is_slot_used.all();
-    }
-
-    VAddr GetBaseAddress() const {
-        return base_address;
-    }
-
-    std::optional<VAddr> ReserveSlot() {
-        for (std::size_t i = 0; i < is_slot_used.size(); i++) {
-            if (is_slot_used[i]) {
-                continue;
-            }
-
-            is_slot_used[i] = true;
-            return base_address + (i * Core::Memory::TLS_ENTRY_SIZE);
-        }
-
-        return std::nullopt;
-    }
-
-    void ReleaseSlot(VAddr address) {
-        // Ensure that all given addresses are consistent with how TLS pages
-        // are intended to be used when releasing slots.
-        ASSERT(IsWithinPage(address));
-        ASSERT((address % Core::Memory::TLS_ENTRY_SIZE) == 0);
-
-        const std::size_t index = (address - base_address) / Core::Memory::TLS_ENTRY_SIZE;
-        is_slot_used[index] = false;
-    }
-
-private:
-    bool IsWithinPage(VAddr address) const {
-        return base_address <= address && address < base_address + Core::Memory::PAGE_SIZE;
-    }
-
-    VAddr base_address;
-    std::bitset<num_slot_entries> is_slot_used;
-};
-
 ResultCode KProcess::Initialize(KProcess* process, Core::System& system, std::string process_name,
                                 ProcessType type, KResourceLimit* res_limit) {
     auto& kernel = system.Kernel();
@@ -404,7 +352,7 @@ ResultCode KProcess::LoadFromMetadata(const FileSys::ProgramMetadata& metadata,
     }
 
     // Create TLS region
-    tls_region_address = CreateTLSRegion();
+    R_TRY(this->CreateThreadLocalRegion(std::addressof(tls_region_address)));
     memory_reservation.Commit();
 
     return handle_table.Initialize(capabilities.GetHandleTableSize());
@@ -444,7 +392,7 @@ void KProcess::PrepareForTermination() {
 
     stop_threads(kernel.System().GlobalSchedulerContext().GetThreadList());
 
-    FreeTLSRegion(tls_region_address);
+    this->DeleteThreadLocalRegion(tls_region_address);
     tls_region_address = 0;
 
     if (resource_limit) {
@@ -456,9 +404,6 @@ void KProcess::PrepareForTermination() {
 }
 
 void KProcess::Finalize() {
-    // Finalize the handle table and close any open handles.
-    handle_table.Finalize();
-
     // Free all shared memory infos.
     {
         auto it = shared_memory_list.begin();
@@ -483,67 +428,110 @@ void KProcess::Finalize() {
         resource_limit = nullptr;
     }
 
+    // Finalize the page table.
+    page_table.reset();
+
     // Perform inherited finalization.
     KAutoObjectWithSlabHeapAndContainer<KProcess, KWorkerTask>::Finalize();
 }
 
-/**
- * Attempts to find a TLS page that contains a free slot for
- * use by a thread.
- *
- * @returns If a page with an available slot is found, then an iterator
- *          pointing to the page is returned. Otherwise the end iterator
- *          is returned instead.
- */
-static auto FindTLSPageWithAvailableSlots(std::vector<TLSPage>& tls_pages) {
-    return std::find_if(tls_pages.begin(), tls_pages.end(),
-                        [](const auto& page) { return page.HasAvailableSlots(); });
-}
+ResultCode KProcess::CreateThreadLocalRegion(VAddr* out) {
+    KThreadLocalPage* tlp = nullptr;
+    VAddr tlr = 0;
 
-VAddr KProcess::CreateTLSRegion() {
-    KScopedSchedulerLock lock(kernel);
-    if (auto tls_page_iter{FindTLSPageWithAvailableSlots(tls_pages)};
-        tls_page_iter != tls_pages.cend()) {
-        return *tls_page_iter->ReserveSlot();
-    }
+    // See if we can get a region from a partially used TLP.
+    {
+        KScopedSchedulerLock sl{kernel};
 
-    Page* const tls_page_ptr{kernel.GetUserSlabHeapPages().Allocate()};
-    ASSERT(tls_page_ptr);
+        if (auto it = partially_used_tlp_tree.begin(); it != partially_used_tlp_tree.end()) {
+            tlr = it->Reserve();
+            ASSERT(tlr != 0);
 
-    const VAddr start{page_table->GetKernelMapRegionStart()};
-    const VAddr size{page_table->GetKernelMapRegionEnd() - start};
-    const PAddr tls_map_addr{kernel.System().DeviceMemory().GetPhysicalAddr(tls_page_ptr)};
-    const VAddr tls_page_addr{page_table
-                                  ->AllocateAndMapMemory(1, PageSize, true, start, size / PageSize,
-                                                         KMemoryState::ThreadLocal,
-                                                         KMemoryPermission::UserReadWrite,
-                                                         tls_map_addr)
-                                  .ValueOr(0)};
+            if (it->IsAllUsed()) {
+                tlp = std::addressof(*it);
+                partially_used_tlp_tree.erase(it);
+                fully_used_tlp_tree.insert(*tlp);
+            }
 
-    ASSERT(tls_page_addr);
+            *out = tlr;
+            return ResultSuccess;
+        }
+    }
 
-    std::memset(tls_page_ptr, 0, PageSize);
-    tls_pages.emplace_back(tls_page_addr);
+    // Allocate a new page.
+    tlp = KThreadLocalPage::Allocate(kernel);
+    R_UNLESS(tlp != nullptr, ResultOutOfMemory);
+    auto tlp_guard = SCOPE_GUARD({ KThreadLocalPage::Free(kernel, tlp); });
 
-    const auto reserve_result{tls_pages.back().ReserveSlot()};
-    ASSERT(reserve_result.has_value());
+    // Initialize the new page.
+    R_TRY(tlp->Initialize(kernel, this));
+
+    // Reserve a TLR.
+    tlr = tlp->Reserve();
+    ASSERT(tlr != 0);
+
+    // Insert into our tree.
+    {
+        KScopedSchedulerLock sl{kernel};
+        if (tlp->IsAllUsed()) {
+            fully_used_tlp_tree.insert(*tlp);
+        } else {
+            partially_used_tlp_tree.insert(*tlp);
+        }
+    }
 
-    return *reserve_result;
+    // We succeeded!
+    tlp_guard.Cancel();
+    *out = tlr;
+    return ResultSuccess;
 }
 
-void KProcess::FreeTLSRegion(VAddr tls_address) {
-    KScopedSchedulerLock lock(kernel);
-    const VAddr aligned_address = Common::AlignDown(tls_address, Core::Memory::PAGE_SIZE);
-    auto iter =
-        std::find_if(tls_pages.begin(), tls_pages.end(), [aligned_address](const auto& page) {
-            return page.GetBaseAddress() == aligned_address;
-        });
+ResultCode KProcess::DeleteThreadLocalRegion(VAddr addr) {
+    KThreadLocalPage* page_to_free = nullptr;
+
+    // Release the region.
+    {
+        KScopedSchedulerLock sl{kernel};
+
+        // Try to find the page in the partially used list.
+        auto it = partially_used_tlp_tree.find_key(Common::AlignDown(addr, PageSize));
+        if (it == partially_used_tlp_tree.end()) {
+            // If we don't find it, it has to be in the fully used list.
+            it = fully_used_tlp_tree.find_key(Common::AlignDown(addr, PageSize));
+            R_UNLESS(it != fully_used_tlp_tree.end(), ResultInvalidAddress);
+
+            // Release the region.
+            it->Release(addr);
+
+            // Move the page out of the fully used list.
+            KThreadLocalPage* tlp = std::addressof(*it);
+            fully_used_tlp_tree.erase(it);
+            if (tlp->IsAllFree()) {
+                page_to_free = tlp;
+            } else {
+                partially_used_tlp_tree.insert(*tlp);
+            }
+        } else {
+            // Release the region.
+            it->Release(addr);
+
+            // Handle the all-free case.
+            KThreadLocalPage* tlp = std::addressof(*it);
+            if (tlp->IsAllFree()) {
+                partially_used_tlp_tree.erase(it);
+                page_to_free = tlp;
+            }
+        }
+    }
+
+    // If we should free the page it was in, do so.
+    if (page_to_free != nullptr) {
+        page_to_free->Finalize();
 
-    // Something has gone very wrong if we're freeing a region
-    // with no actual page available.
-    ASSERT(iter != tls_pages.cend());
+        KThreadLocalPage::Free(kernel, page_to_free);
+    }
 
-    iter->ReleaseSlot(tls_address);
+    return ResultSuccess;
 }
 
 void KProcess::LoadModule(CodeSet code_set, VAddr base_addr) {
diff --git a/src/core/hle/kernel/k_process.h b/src/core/hle/kernel/k_process.h
index cf1b67428..5ed0f2d83 100644
--- a/src/core/hle/kernel/k_process.h
+++ b/src/core/hle/kernel/k_process.h
@@ -15,6 +15,7 @@
 #include "core/hle/kernel/k_condition_variable.h"
 #include "core/hle/kernel/k_handle_table.h"
 #include "core/hle/kernel/k_synchronization_object.h"
+#include "core/hle/kernel/k_thread_local_page.h"
 #include "core/hle/kernel/k_worker_task.h"
 #include "core/hle/kernel/process_capability.h"
 #include "core/hle/kernel/slab_helpers.h"
@@ -362,10 +363,10 @@ public:
     // Thread-local storage management
 
     // Marks the next available region as used and returns the address of the slot.
-    [[nodiscard]] VAddr CreateTLSRegion();
+    [[nodiscard]] ResultCode CreateThreadLocalRegion(VAddr* out);
 
     // Frees a used TLS slot identified by the given address
-    void FreeTLSRegion(VAddr tls_address);
+    ResultCode DeleteThreadLocalRegion(VAddr addr);
 
 private:
     void PinThread(s32 core_id, KThread* thread) {
@@ -413,13 +414,6 @@ private:
     /// The ideal CPU core for this process, threads are scheduled on this core by default.
     u8 ideal_core = 0;
 
-    /// The Thread Local Storage area is allocated as processes create threads,
-    /// each TLS area is 0x200 bytes, so one page (0x1000) is split up in 8 parts, and each part
-    /// holds the TLS for a specific thread. This vector contains which parts are in use for each
-    /// page as a bitmask.
-    /// This vector will grow as more pages are allocated for new threads.
-    std::vector<TLSPage> tls_pages;
-
     /// Contains the parsed process capability descriptors.
     ProcessCapabilities capabilities;
 
@@ -482,6 +476,12 @@ private:
     KThread* exception_thread{};
 
     KLightLock state_lock;
+
+    using TLPTree =
+        Common::IntrusiveRedBlackTreeBaseTraits<KThreadLocalPage>::TreeType<KThreadLocalPage>;
+    using TLPIterator = TLPTree::iterator;
+    TLPTree fully_used_tlp_tree;
+    TLPTree partially_used_tlp_tree;
 };
 
 } // namespace Kernel
diff --git a/src/core/hle/kernel/k_server_port.h b/src/core/hle/kernel/k_server_port.h
index 6302d5e61..2185736be 100644
--- a/src/core/hle/kernel/k_server_port.h
+++ b/src/core/hle/kernel/k_server_port.h
@@ -30,11 +30,11 @@ public:
 
     /// Whether or not this server port has an HLE handler available.
     bool HasSessionRequestHandler() const {
-        return session_handler != nullptr;
+        return !session_handler.expired();
     }
 
     /// Gets the HLE handler for this port.
-    SessionRequestHandlerPtr GetSessionRequestHandler() const {
+    SessionRequestHandlerWeakPtr GetSessionRequestHandler() const {
         return session_handler;
     }
 
@@ -42,7 +42,7 @@ public:
      * Sets the HLE handler template for the port. ServerSessions crated by connecting to this port
      * will inherit a reference to this handler.
      */
-    void SetSessionHandler(SessionRequestHandlerPtr&& handler) {
+    void SetSessionHandler(SessionRequestHandlerWeakPtr&& handler) {
         session_handler = std::move(handler);
     }
 
@@ -66,7 +66,7 @@ private:
     void CleanupSessions();
 
     SessionList session_list;
-    SessionRequestHandlerPtr session_handler;
+    SessionRequestHandlerWeakPtr session_handler;
     KPort* parent{};
 };
 
diff --git a/src/core/hle/kernel/k_server_session.cpp b/src/core/hle/kernel/k_server_session.cpp
index 4d94eb9cf..30c56ff29 100644
--- a/src/core/hle/kernel/k_server_session.cpp
+++ b/src/core/hle/kernel/k_server_session.cpp
@@ -27,10 +27,7 @@ namespace Kernel {
 
 KServerSession::KServerSession(KernelCore& kernel_) : KSynchronizationObject{kernel_} {}
 
-KServerSession::~KServerSession() {
-    // Ensure that the global list tracking server sessions does not hold on to a reference.
-    kernel.UnregisterServerSession(this);
-}
+KServerSession::~KServerSession() = default;
 
 void KServerSession::Initialize(KSession* parent_session_, std::string&& name_,
                                 std::shared_ptr<SessionRequestManager> manager_) {
@@ -49,6 +46,9 @@ void KServerSession::Destroy() {
     parent->OnServerClosed();
 
     parent->Close();
+
+    // Release host emulation members.
+    manager.reset();
 }
 
 void KServerSession::OnClientClosed() {
@@ -98,7 +98,12 @@ ResultCode KServerSession::HandleDomainSyncRequest(Kernel::HLERequestContext& co
             UNREACHABLE();
             return ResultSuccess; // Ignore error if asserts are off
         }
-        return manager->DomainHandler(object_id - 1)->HandleSyncRequest(*this, context);
+        if (auto strong_ptr = manager->DomainHandler(object_id - 1).lock()) {
+            return strong_ptr->HandleSyncRequest(*this, context);
+        } else {
+            UNREACHABLE();
+            return ResultSuccess;
+        }
 
     case IPC::DomainMessageHeader::CommandType::CloseVirtualHandle: {
         LOG_DEBUG(IPC, "CloseVirtualHandle, object_id=0x{:08X}", object_id);
diff --git a/src/core/hle/kernel/k_slab_heap.h b/src/core/hle/kernel/k_slab_heap.h
index 05c0bec9c..5690cc757 100644
--- a/src/core/hle/kernel/k_slab_heap.h
+++ b/src/core/hle/kernel/k_slab_heap.h
@@ -16,39 +16,34 @@ class KernelCore;
 
 namespace impl {
 
-class KSlabHeapImpl final {
-public:
+class KSlabHeapImpl {
     YUZU_NON_COPYABLE(KSlabHeapImpl);
     YUZU_NON_MOVEABLE(KSlabHeapImpl);
 
+public:
     struct Node {
         Node* next{};
     };
 
+public:
     constexpr KSlabHeapImpl() = default;
-    constexpr ~KSlabHeapImpl() = default;
 
-    void Initialize(std::size_t size) {
-        ASSERT(head == nullptr);
-        obj_size = size;
-    }
-
-    constexpr std::size_t GetObjectSize() const {
-        return obj_size;
+    void Initialize() {
+        ASSERT(m_head == nullptr);
     }
 
     Node* GetHead() const {
-        return head;
+        return m_head;
     }
 
     void* Allocate() {
-        Node* ret = head.load();
+        Node* ret = m_head.load();
 
         do {
             if (ret == nullptr) {
                 break;
             }
-        } while (!head.compare_exchange_weak(ret, ret->next));
+        } while (!m_head.compare_exchange_weak(ret, ret->next));
 
         return ret;
     }
@@ -56,170 +51,157 @@ public:
     void Free(void* obj) {
         Node* node = static_cast<Node*>(obj);
 
-        Node* cur_head = head.load();
+        Node* cur_head = m_head.load();
         do {
             node->next = cur_head;
-        } while (!head.compare_exchange_weak(cur_head, node));
+        } while (!m_head.compare_exchange_weak(cur_head, node));
     }
 
 private:
-    std::atomic<Node*> head{};
-    std::size_t obj_size{};
+    std::atomic<Node*> m_head{};
 };
 
 } // namespace impl
 
-class KSlabHeapBase {
-public:
+template <bool SupportDynamicExpansion>
+class KSlabHeapBase : protected impl::KSlabHeapImpl {
     YUZU_NON_COPYABLE(KSlabHeapBase);
     YUZU_NON_MOVEABLE(KSlabHeapBase);
 
-    constexpr KSlabHeapBase() = default;
-    constexpr ~KSlabHeapBase() = default;
+private:
+    size_t m_obj_size{};
+    uintptr_t m_peak{};
+    uintptr_t m_start{};
+    uintptr_t m_end{};
 
-    constexpr bool Contains(uintptr_t addr) const {
-        return start <= addr && addr < end;
-    }
+private:
+    void UpdatePeakImpl(uintptr_t obj) {
+        static_assert(std::atomic_ref<uintptr_t>::is_always_lock_free);
+        std::atomic_ref<uintptr_t> peak_ref(m_peak);
 
-    constexpr std::size_t GetSlabHeapSize() const {
-        return (end - start) / GetObjectSize();
+        const uintptr_t alloc_peak = obj + this->GetObjectSize();
+        uintptr_t cur_peak = m_peak;
+        do {
+            if (alloc_peak <= cur_peak) {
+                break;
+            }
+        } while (!peak_ref.compare_exchange_strong(cur_peak, alloc_peak));
     }
 
-    constexpr std::size_t GetObjectSize() const {
-        return impl.GetObjectSize();
-    }
+public:
+    constexpr KSlabHeapBase() = default;
 
-    constexpr uintptr_t GetSlabHeapAddress() const {
-        return start;
+    bool Contains(uintptr_t address) const {
+        return m_start <= address && address < m_end;
     }
 
-    std::size_t GetObjectIndexImpl(const void* obj) const {
-        return (reinterpret_cast<uintptr_t>(obj) - start) / GetObjectSize();
+    void Initialize(size_t obj_size, void* memory, size_t memory_size) {
+        // Ensure we don't initialize a slab using null memory.
+        ASSERT(memory != nullptr);
+
+        // Set our object size.
+        m_obj_size = obj_size;
+
+        // Initialize the base allocator.
+        KSlabHeapImpl::Initialize();
+
+        // Set our tracking variables.
+        const size_t num_obj = (memory_size / obj_size);
+        m_start = reinterpret_cast<uintptr_t>(memory);
+        m_end = m_start + num_obj * obj_size;
+        m_peak = m_start;
+
+        // Free the objects.
+        u8* cur = reinterpret_cast<u8*>(m_end);
+
+        for (size_t i = 0; i < num_obj; i++) {
+            cur -= obj_size;
+            KSlabHeapImpl::Free(cur);
+        }
     }
 
-    std::size_t GetPeakIndex() const {
-        return GetObjectIndexImpl(reinterpret_cast<const void*>(peak));
+    size_t GetSlabHeapSize() const {
+        return (m_end - m_start) / this->GetObjectSize();
     }
 
-    void* AllocateImpl() {
-        return impl.Allocate();
+    size_t GetObjectSize() const {
+        return m_obj_size;
     }
 
-    void FreeImpl(void* obj) {
-        // Don't allow freeing an object that wasn't allocated from this heap
-        ASSERT(Contains(reinterpret_cast<uintptr_t>(obj)));
+    void* Allocate() {
+        void* obj = KSlabHeapImpl::Allocate();
 
-        impl.Free(obj);
+        return obj;
     }
 
-    void InitializeImpl(std::size_t obj_size, void* memory, std::size_t memory_size) {
-        // Ensure we don't initialize a slab using null memory
-        ASSERT(memory != nullptr);
-
-        // Initialize the base allocator
-        impl.Initialize(obj_size);
+    void Free(void* obj) {
+        // Don't allow freeing an object that wasn't allocated from this heap.
+        const bool contained = this->Contains(reinterpret_cast<uintptr_t>(obj));
+        ASSERT(contained);
+        KSlabHeapImpl::Free(obj);
+    }
 
-        // Set our tracking variables
-        const std::size_t num_obj = (memory_size / obj_size);
-        start = reinterpret_cast<uintptr_t>(memory);
-        end = start + num_obj * obj_size;
-        peak = start;
+    size_t GetObjectIndex(const void* obj) const {
+        if constexpr (SupportDynamicExpansion) {
+            if (!this->Contains(reinterpret_cast<uintptr_t>(obj))) {
+                return std::numeric_limits<size_t>::max();
+            }
+        }
 
-        // Free the objects
-        u8* cur = reinterpret_cast<u8*>(end);
+        return (reinterpret_cast<uintptr_t>(obj) - m_start) / this->GetObjectSize();
+    }
 
-        for (std::size_t i{}; i < num_obj; i++) {
-            cur -= obj_size;
-            impl.Free(cur);
-        }
+    size_t GetPeakIndex() const {
+        return this->GetObjectIndex(reinterpret_cast<const void*>(m_peak));
     }
 
-private:
-    using Impl = impl::KSlabHeapImpl;
+    uintptr_t GetSlabHeapAddress() const {
+        return m_start;
+    }
 
-    Impl impl;
-    uintptr_t peak{};
-    uintptr_t start{};
-    uintptr_t end{};
+    size_t GetNumRemaining() const {
+        // Only calculate the number of remaining objects under debug configuration.
+        return 0;
+    }
 };
 
 template <typename T>
-class KSlabHeap final : public KSlabHeapBase {
-public:
-    enum class AllocationType {
-        Host,
-        Guest,
-    };
+class KSlabHeap final : public KSlabHeapBase<false> {
+private:
+    using BaseHeap = KSlabHeapBase<false>;
 
-    explicit constexpr KSlabHeap(AllocationType allocation_type_ = AllocationType::Host)
-        : KSlabHeapBase(), allocation_type{allocation_type_} {}
+public:
+    constexpr KSlabHeap() = default;
 
-    void Initialize(void* memory, std::size_t memory_size) {
-        if (allocation_type == AllocationType::Guest) {
-            InitializeImpl(sizeof(T), memory, memory_size);
-        }
+    void Initialize(void* memory, size_t memory_size) {
+        BaseHeap::Initialize(sizeof(T), memory, memory_size);
     }
 
     T* Allocate() {
-        switch (allocation_type) {
-        case AllocationType::Host:
-            // Fallback for cases where we do not yet support allocating guest memory from the slab
-            // heap, such as for kernel memory regions.
-            return new T;
-
-        case AllocationType::Guest:
-            T* obj = static_cast<T*>(AllocateImpl());
-            if (obj != nullptr) {
-                new (obj) T();
-            }
-            return obj;
-        }
+        T* obj = static_cast<T*>(BaseHeap::Allocate());
 
-        UNREACHABLE_MSG("Invalid AllocationType {}", allocation_type);
-        return nullptr;
+        if (obj != nullptr) [[likely]] {
+            std::construct_at(obj);
+        }
+        return obj;
     }
 
-    T* AllocateWithKernel(KernelCore& kernel) {
-        switch (allocation_type) {
-        case AllocationType::Host:
-            // Fallback for cases where we do not yet support allocating guest memory from the slab
-            // heap, such as for kernel memory regions.
-            return new T(kernel);
+    T* Allocate(KernelCore& kernel) {
+        T* obj = static_cast<T*>(BaseHeap::Allocate());
 
-        case AllocationType::Guest:
-            T* obj = static_cast<T*>(AllocateImpl());
-            if (obj != nullptr) {
-                new (obj) T(kernel);
-            }
-            return obj;
+        if (obj != nullptr) [[likely]] {
+            std::construct_at(obj, kernel);
         }
-
-        UNREACHABLE_MSG("Invalid AllocationType {}", allocation_type);
-        return nullptr;
+        return obj;
     }
 
     void Free(T* obj) {
-        switch (allocation_type) {
-        case AllocationType::Host:
-            // Fallback for cases where we do not yet support allocating guest memory from the slab
-            // heap, such as for kernel memory regions.
-            delete obj;
-            return;
-
-        case AllocationType::Guest:
-            FreeImpl(obj);
-            return;
-        }
-
-        UNREACHABLE_MSG("Invalid AllocationType {}", allocation_type);
+        BaseHeap::Free(obj);
     }
 
-    constexpr std::size_t GetObjectIndex(const T* obj) const {
-        return GetObjectIndexImpl(obj);
+    size_t GetObjectIndex(const T* obj) const {
+        return BaseHeap::GetObjectIndex(obj);
     }
-
-private:
-    const AllocationType allocation_type;
 };
 
 } // namespace Kernel
diff --git a/src/core/hle/kernel/k_thread.cpp b/src/core/hle/kernel/k_thread.cpp
index de3ffe0c7..ba7f72c6b 100644
--- a/src/core/hle/kernel/k_thread.cpp
+++ b/src/core/hle/kernel/k_thread.cpp
@@ -210,7 +210,7 @@ ResultCode KThread::Initialize(KThreadFunction func, uintptr_t arg, VAddr user_s
     if (owner != nullptr) {
         // Setup the TLS, if needed.
         if (type == ThreadType::User) {
-            tls_address = owner->CreateTLSRegion();
+            R_TRY(owner->CreateThreadLocalRegion(std::addressof(tls_address)));
         }
 
         parent = owner;
@@ -305,7 +305,7 @@ void KThread::Finalize() {
 
     // If the thread has a local region, delete it.
     if (tls_address != 0) {
-        parent->FreeTLSRegion(tls_address);
+        ASSERT(parent->DeleteThreadLocalRegion(tls_address).IsSuccess());
     }
 
     // Release any waiters.
@@ -326,6 +326,9 @@ void KThread::Finalize() {
         }
     }
 
+    // Release host emulation members.
+    host_context.reset();
+
     // Perform inherited finalization.
     KSynchronizationObject::Finalize();
 }
diff --git a/src/core/hle/kernel/k_thread.h b/src/core/hle/kernel/k_thread.h
index d058db62c..f46db7298 100644
--- a/src/core/hle/kernel/k_thread.h
+++ b/src/core/hle/kernel/k_thread.h
@@ -656,7 +656,7 @@ private:
     static_assert(sizeof(SyncObjectBuffer::sync_objects) == sizeof(SyncObjectBuffer::handles));
 
     struct ConditionVariableComparator {
-        struct LightCompareType {
+        struct RedBlackKeyType {
             u64 cv_key{};
             s32 priority{};
 
@@ -672,8 +672,8 @@ private:
         template <typename T>
         requires(
             std::same_as<T, KThread> ||
-            std::same_as<T, LightCompareType>) static constexpr int Compare(const T& lhs,
-                                                                            const KThread& rhs) {
+            std::same_as<T, RedBlackKeyType>) static constexpr int Compare(const T& lhs,
+                                                                           const KThread& rhs) {
             const u64 l_key = lhs.GetConditionVariableKey();
             const u64 r_key = rhs.GetConditionVariableKey();
 
diff --git a/src/core/hle/kernel/k_thread_local_page.cpp b/src/core/hle/kernel/k_thread_local_page.cpp
new file mode 100644
index 000000000..4653c29f6
--- /dev/null
+++ b/src/core/hle/kernel/k_thread_local_page.cpp
@@ -0,0 +1,65 @@
+// Copyright 2022 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/scope_exit.h"
+#include "core/hle/kernel/k_memory_block.h"
+#include "core/hle/kernel/k_page_table.h"
+#include "core/hle/kernel/k_process.h"
+#include "core/hle/kernel/k_thread_local_page.h"
+#include "core/hle/kernel/kernel.h"
+
+namespace Kernel {
+
+ResultCode KThreadLocalPage::Initialize(KernelCore& kernel, KProcess* process) {
+    // Set that this process owns us.
+    m_owner = process;
+    m_kernel = &kernel;
+
+    // Allocate a new page.
+    KPageBuffer* page_buf = KPageBuffer::Allocate(kernel);
+    R_UNLESS(page_buf != nullptr, ResultOutOfMemory);
+    auto page_buf_guard = SCOPE_GUARD({ KPageBuffer::Free(kernel, page_buf); });
+
+    // Map the address in.
+    const auto phys_addr = kernel.System().DeviceMemory().GetPhysicalAddr(page_buf);
+    R_TRY(m_owner->PageTable().MapPages(std::addressof(m_virt_addr), 1, PageSize, phys_addr,
+                                        KMemoryState::ThreadLocal,
+                                        KMemoryPermission::UserReadWrite));
+
+    // We succeeded.
+    page_buf_guard.Cancel();
+
+    return ResultSuccess;
+}
+
+ResultCode KThreadLocalPage::Finalize() {
+    // Get the physical address of the page.
+    const PAddr phys_addr = m_owner->PageTable().GetPhysicalAddr(m_virt_addr);
+    ASSERT(phys_addr);
+
+    // Unmap the page.
+    R_TRY(m_owner->PageTable().UnmapPages(this->GetAddress(), 1, KMemoryState::ThreadLocal));
+
+    // Free the page.
+    KPageBuffer::Free(*m_kernel, KPageBuffer::FromPhysicalAddress(m_kernel->System(), phys_addr));
+
+    return ResultSuccess;
+}
+
+VAddr KThreadLocalPage::Reserve() {
+    for (size_t i = 0; i < m_is_region_free.size(); i++) {
+        if (m_is_region_free[i]) {
+            m_is_region_free[i] = false;
+            return this->GetRegionAddress(i);
+        }
+    }
+
+    return 0;
+}
+
+void KThreadLocalPage::Release(VAddr addr) {
+    m_is_region_free[this->GetRegionIndex(addr)] = true;
+}
+
+} // namespace Kernel
diff --git a/src/core/hle/kernel/k_thread_local_page.h b/src/core/hle/kernel/k_thread_local_page.h
new file mode 100644
index 000000000..658c67e94
--- /dev/null
+++ b/src/core/hle/kernel/k_thread_local_page.h
@@ -0,0 +1,112 @@
+// Copyright 2022 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <algorithm>
+#include <array>
+
+#include "common/alignment.h"
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "common/intrusive_red_black_tree.h"
+#include "core/hle/kernel/k_page_buffer.h"
+#include "core/hle/kernel/memory_types.h"
+#include "core/hle/kernel/slab_helpers.h"
+#include "core/hle/result.h"
+
+namespace Kernel {
+
+class KernelCore;
+class KProcess;
+
+class KThreadLocalPage final : public Common::IntrusiveRedBlackTreeBaseNode<KThreadLocalPage>,
+                               public KSlabAllocated<KThreadLocalPage> {
+public:
+    static constexpr size_t RegionsPerPage = PageSize / Svc::ThreadLocalRegionSize;
+    static_assert(RegionsPerPage > 0);
+
+public:
+    constexpr explicit KThreadLocalPage(VAddr addr = {}) : m_virt_addr(addr) {
+        m_is_region_free.fill(true);
+    }
+
+    constexpr VAddr GetAddress() const {
+        return m_virt_addr;
+    }
+
+    ResultCode Initialize(KernelCore& kernel, KProcess* process);
+    ResultCode Finalize();
+
+    VAddr Reserve();
+    void Release(VAddr addr);
+
+    bool IsAllUsed() const {
+        return std::ranges::all_of(m_is_region_free.begin(), m_is_region_free.end(),
+                                   [](bool is_free) { return !is_free; });
+    }
+
+    bool IsAllFree() const {
+        return std::ranges::all_of(m_is_region_free.begin(), m_is_region_free.end(),
+                                   [](bool is_free) { return is_free; });
+    }
+
+    bool IsAnyUsed() const {
+        return !this->IsAllFree();
+    }
+
+    bool IsAnyFree() const {
+        return !this->IsAllUsed();
+    }
+
+public:
+    using RedBlackKeyType = VAddr;
+
+    static constexpr RedBlackKeyType GetRedBlackKey(const RedBlackKeyType& v) {
+        return v;
+    }
+    static constexpr RedBlackKeyType GetRedBlackKey(const KThreadLocalPage& v) {
+        return v.GetAddress();
+    }
+
+    template <typename T>
+    requires(std::same_as<T, KThreadLocalPage> ||
+             std::same_as<T, RedBlackKeyType>) static constexpr int Compare(const T& lhs,
+                                                                            const KThreadLocalPage&
+                                                                                rhs) {
+        const VAddr lval = GetRedBlackKey(lhs);
+        const VAddr rval = GetRedBlackKey(rhs);
+
+        if (lval < rval) {
+            return -1;
+        } else if (lval == rval) {
+            return 0;
+        } else {
+            return 1;
+        }
+    }
+
+private:
+    constexpr VAddr GetRegionAddress(size_t i) const {
+        return this->GetAddress() + i * Svc::ThreadLocalRegionSize;
+    }
+
+    constexpr bool Contains(VAddr addr) const {
+        return this->GetAddress() <= addr && addr < this->GetAddress() + PageSize;
+    }
+
+    constexpr size_t GetRegionIndex(VAddr addr) const {
+        ASSERT(Common::IsAligned(addr, Svc::ThreadLocalRegionSize));
+        ASSERT(this->Contains(addr));
+        return (addr - this->GetAddress()) / Svc::ThreadLocalRegionSize;
+    }
+
+private:
+    VAddr m_virt_addr{};
+    KProcess* m_owner{};
+    KernelCore* m_kernel{};
+    std::array<bool, RegionsPerPage> m_is_region_free{};
+};
+
+} // namespace Kernel
diff --git a/src/core/hle/kernel/kernel.cpp b/src/core/hle/kernel/kernel.cpp
index 797f47021..f9828bc43 100644
--- a/src/core/hle/kernel/kernel.cpp
+++ b/src/core/hle/kernel/kernel.cpp
@@ -52,7 +52,7 @@ namespace Kernel {
 
 struct KernelCore::Impl {
     explicit Impl(Core::System& system_, KernelCore& kernel_)
-        : time_manager{system_}, object_list_container{kernel_},
+        : time_manager{system_},
           service_threads_manager{1, "yuzu:ServiceThreadsManager"}, system{system_} {}
 
     void SetMulticore(bool is_multi) {
@@ -60,6 +60,7 @@ struct KernelCore::Impl {
     }
 
     void Initialize(KernelCore& kernel) {
+        global_object_list_container = std::make_unique<KAutoObjectWithListContainer>(kernel);
         global_scheduler_context = std::make_unique<Kernel::GlobalSchedulerContext>(kernel);
         global_handle_table = std::make_unique<Kernel::KHandleTable>(kernel);
         global_handle_table->Initialize(KHandleTable::MaxTableSize);
@@ -70,14 +71,13 @@ struct KernelCore::Impl {
 
         // Derive the initial memory layout from the emulated board
         Init::InitializeSlabResourceCounts(kernel);
-        KMemoryLayout memory_layout;
-        DeriveInitialMemoryLayout(memory_layout);
-        Init::InitializeSlabHeaps(system, memory_layout);
+        DeriveInitialMemoryLayout();
+        Init::InitializeSlabHeaps(system, *memory_layout);
 
         // Initialize kernel memory and resources.
-        InitializeSystemResourceLimit(kernel, system.CoreTiming(), memory_layout);
-        InitializeMemoryLayout(memory_layout);
-        InitializePageSlab();
+        InitializeSystemResourceLimit(kernel, system.CoreTiming());
+        InitializeMemoryLayout();
+        Init::InitializeKPageBufferSlabHeap(system);
         InitializeSchedulers();
         InitializeSuspendThreads();
         InitializePreemption(kernel);
@@ -108,19 +108,6 @@ struct KernelCore::Impl {
         for (auto* server_port : server_ports_) {
             server_port->Close();
         }
-        // Close all open server sessions.
-        std::unordered_set<KServerSession*> server_sessions_;
-        {
-            std::lock_guard lk(server_sessions_lock);
-            server_sessions_ = server_sessions;
-            server_sessions.clear();
-        }
-        for (auto* server_session : server_sessions_) {
-            server_session->Close();
-        }
-
-        // Ensure that the object list container is finalized and properly shutdown.
-        object_list_container.Finalize();
 
         // Ensures all service threads gracefully shutdown.
         ClearServiceThreads();
@@ -195,11 +182,15 @@ struct KernelCore::Impl {
         {
             std::lock_guard lk(registered_objects_lock);
             if (registered_objects.size()) {
-                LOG_WARNING(Kernel, "{} kernel objects were dangling on shutdown!",
-                            registered_objects.size());
+                LOG_DEBUG(Kernel, "{} kernel objects were dangling on shutdown!",
+                          registered_objects.size());
                 registered_objects.clear();
             }
         }
+
+        // Ensure that the object list container is finalized and properly shutdown.
+        global_object_list_container->Finalize();
+        global_object_list_container.reset();
     }
 
     void InitializePhysicalCores() {
@@ -219,12 +210,11 @@ struct KernelCore::Impl {
 
     // Creates the default system resource limit
     void InitializeSystemResourceLimit(KernelCore& kernel,
-                                       const Core::Timing::CoreTiming& core_timing,
-                                       const KMemoryLayout& memory_layout) {
+                                       const Core::Timing::CoreTiming& core_timing) {
         system_resource_limit = KResourceLimit::Create(system.Kernel());
         system_resource_limit->Initialize(&core_timing);
 
-        const auto [total_size, kernel_size] = memory_layout.GetTotalAndKernelMemorySizes();
+        const auto [total_size, kernel_size] = memory_layout->GetTotalAndKernelMemorySizes();
 
         // If setting the default system values fails, then something seriously wrong has occurred.
         ASSERT(system_resource_limit->SetLimitValue(LimitableResource::PhysicalMemory, total_size)
@@ -293,15 +283,16 @@ struct KernelCore::Impl {
 
     // Gets the dummy KThread for the caller, allocating a new one if this is the first time
     KThread* GetHostDummyThread() {
-        auto make_thread = [this]() {
-            KThread* thread = KThread::Create(system.Kernel());
+        auto initialize = [this](KThread* thread) {
             ASSERT(KThread::InitializeDummyThread(thread).IsSuccess());
             thread->SetName(fmt::format("DummyThread:{}", GetHostThreadId()));
             return thread;
         };
 
-        thread_local KThread* saved_thread = make_thread();
-        return saved_thread;
+        thread_local auto raw_thread = KThread(system.Kernel());
+        thread_local auto thread = initialize(&raw_thread);
+
+        return thread;
     }
 
     /// Registers a CPU core thread by allocating a host thread ID for it
@@ -353,16 +344,18 @@ struct KernelCore::Impl {
         return schedulers[thread_id]->GetCurrentThread();
     }
 
-    void DeriveInitialMemoryLayout(KMemoryLayout& memory_layout) {
+    void DeriveInitialMemoryLayout() {
+        memory_layout = std::make_unique<KMemoryLayout>();
+
         // Insert the root region for the virtual memory tree, from which all other regions will
         // derive.
-        memory_layout.GetVirtualMemoryRegionTree().InsertDirectly(
+        memory_layout->GetVirtualMemoryRegionTree().InsertDirectly(
             KernelVirtualAddressSpaceBase,
             KernelVirtualAddressSpaceBase + KernelVirtualAddressSpaceSize - 1);
 
         // Insert the root region for the physical memory tree, from which all other regions will
         // derive.
-        memory_layout.GetPhysicalMemoryRegionTree().InsertDirectly(
+        memory_layout->GetPhysicalMemoryRegionTree().InsertDirectly(
             KernelPhysicalAddressSpaceBase,
             KernelPhysicalAddressSpaceBase + KernelPhysicalAddressSpaceSize - 1);
 
@@ -379,7 +372,7 @@ struct KernelCore::Impl {
         if (!(kernel_region_start + KernelRegionSize - 1 <= KernelVirtualAddressSpaceLast)) {
             kernel_region_size = KernelVirtualAddressSpaceEnd - kernel_region_start;
         }
-        ASSERT(memory_layout.GetVirtualMemoryRegionTree().Insert(
+        ASSERT(memory_layout->GetVirtualMemoryRegionTree().Insert(
             kernel_region_start, kernel_region_size, KMemoryRegionType_Kernel));
 
         // Setup the code region.
@@ -388,11 +381,11 @@ struct KernelCore::Impl {
             Common::AlignDown(code_start_virt_addr, CodeRegionAlign);
         constexpr VAddr code_region_end = Common::AlignUp(code_end_virt_addr, CodeRegionAlign);
         constexpr size_t code_region_size = code_region_end - code_region_start;
-        ASSERT(memory_layout.GetVirtualMemoryRegionTree().Insert(
+        ASSERT(memory_layout->GetVirtualMemoryRegionTree().Insert(
             code_region_start, code_region_size, KMemoryRegionType_KernelCode));
 
         // Setup board-specific device physical regions.
-        Init::SetupDevicePhysicalMemoryRegions(memory_layout);
+        Init::SetupDevicePhysicalMemoryRegions(*memory_layout);
 
         // Determine the amount of space needed for the misc region.
         size_t misc_region_needed_size;
@@ -401,7 +394,7 @@ struct KernelCore::Impl {
             misc_region_needed_size = Core::Hardware::NUM_CPU_CORES * (3 * (PageSize + PageSize));
 
             // Account for each auto-map device.
-            for (const auto& region : memory_layout.GetPhysicalMemoryRegionTree()) {
+            for (const auto& region : memory_layout->GetPhysicalMemoryRegionTree()) {
                 if (region.HasTypeAttribute(KMemoryRegionAttr_ShouldKernelMap)) {
                     // Check that the region is valid.
                     ASSERT(region.GetEndAddress() != 0);
@@ -426,22 +419,22 @@ struct KernelCore::Impl {
 
         // Setup the misc region.
         const VAddr misc_region_start =
-            memory_layout.GetVirtualMemoryRegionTree().GetRandomAlignedRegion(
+            memory_layout->GetVirtualMemoryRegionTree().GetRandomAlignedRegion(
                 misc_region_size, MiscRegionAlign, KMemoryRegionType_Kernel);
-        ASSERT(memory_layout.GetVirtualMemoryRegionTree().Insert(
+        ASSERT(memory_layout->GetVirtualMemoryRegionTree().Insert(
             misc_region_start, misc_region_size, KMemoryRegionType_KernelMisc));
 
         // Setup the stack region.
         constexpr size_t StackRegionSize = 14_MiB;
         constexpr size_t StackRegionAlign = KernelAslrAlignment;
         const VAddr stack_region_start =
-            memory_layout.GetVirtualMemoryRegionTree().GetRandomAlignedRegion(
+            memory_layout->GetVirtualMemoryRegionTree().GetRandomAlignedRegion(
                 StackRegionSize, StackRegionAlign, KMemoryRegionType_Kernel);
-        ASSERT(memory_layout.GetVirtualMemoryRegionTree().Insert(
+        ASSERT(memory_layout->GetVirtualMemoryRegionTree().Insert(
             stack_region_start, StackRegionSize, KMemoryRegionType_KernelStack));
 
         // Determine the size of the resource region.
-        const size_t resource_region_size = memory_layout.GetResourceRegionSizeForInit();
+        const size_t resource_region_size = memory_layout->GetResourceRegionSizeForInit();
 
         // Determine the size of the slab region.
         const size_t slab_region_size =
@@ -458,23 +451,23 @@ struct KernelCore::Impl {
             Common::AlignUp(code_end_phys_addr + slab_region_size, SlabRegionAlign) -
             Common::AlignDown(code_end_phys_addr, SlabRegionAlign);
         const VAddr slab_region_start =
-            memory_layout.GetVirtualMemoryRegionTree().GetRandomAlignedRegion(
+            memory_layout->GetVirtualMemoryRegionTree().GetRandomAlignedRegion(
                 slab_region_needed_size, SlabRegionAlign, KMemoryRegionType_Kernel) +
             (code_end_phys_addr % SlabRegionAlign);
-        ASSERT(memory_layout.GetVirtualMemoryRegionTree().Insert(
+        ASSERT(memory_layout->GetVirtualMemoryRegionTree().Insert(
             slab_region_start, slab_region_size, KMemoryRegionType_KernelSlab));
 
         // Setup the temp region.
         constexpr size_t TempRegionSize = 128_MiB;
         constexpr size_t TempRegionAlign = KernelAslrAlignment;
         const VAddr temp_region_start =
-            memory_layout.GetVirtualMemoryRegionTree().GetRandomAlignedRegion(
+            memory_layout->GetVirtualMemoryRegionTree().GetRandomAlignedRegion(
                 TempRegionSize, TempRegionAlign, KMemoryRegionType_Kernel);
-        ASSERT(memory_layout.GetVirtualMemoryRegionTree().Insert(temp_region_start, TempRegionSize,
-                                                                 KMemoryRegionType_KernelTemp));
+        ASSERT(memory_layout->GetVirtualMemoryRegionTree().Insert(temp_region_start, TempRegionSize,
+                                                                  KMemoryRegionType_KernelTemp));
 
         // Automatically map in devices that have auto-map attributes.
-        for (auto& region : memory_layout.GetPhysicalMemoryRegionTree()) {
+        for (auto& region : memory_layout->GetPhysicalMemoryRegionTree()) {
             // We only care about kernel regions.
             if (!region.IsDerivedFrom(KMemoryRegionType_Kernel)) {
                 continue;
@@ -501,21 +494,21 @@ struct KernelCore::Impl {
             const size_t map_size =
                 Common::AlignUp(region.GetEndAddress(), PageSize) - map_phys_addr;
             const VAddr map_virt_addr =
-                memory_layout.GetVirtualMemoryRegionTree().GetRandomAlignedRegionWithGuard(
+                memory_layout->GetVirtualMemoryRegionTree().GetRandomAlignedRegionWithGuard(
                     map_size, PageSize, KMemoryRegionType_KernelMisc, PageSize);
-            ASSERT(memory_layout.GetVirtualMemoryRegionTree().Insert(
+            ASSERT(memory_layout->GetVirtualMemoryRegionTree().Insert(
                 map_virt_addr, map_size, KMemoryRegionType_KernelMiscMappedDevice));
             region.SetPairAddress(map_virt_addr + region.GetAddress() - map_phys_addr);
         }
 
-        Init::SetupDramPhysicalMemoryRegions(memory_layout);
+        Init::SetupDramPhysicalMemoryRegions(*memory_layout);
 
         // Insert a physical region for the kernel code region.
-        ASSERT(memory_layout.GetPhysicalMemoryRegionTree().Insert(
+        ASSERT(memory_layout->GetPhysicalMemoryRegionTree().Insert(
             code_start_phys_addr, code_region_size, KMemoryRegionType_DramKernelCode));
 
         // Insert a physical region for the kernel slab region.
-        ASSERT(memory_layout.GetPhysicalMemoryRegionTree().Insert(
+        ASSERT(memory_layout->GetPhysicalMemoryRegionTree().Insert(
             slab_start_phys_addr, slab_region_size, KMemoryRegionType_DramKernelSlab));
 
         // Determine size available for kernel page table heaps, requiring > 8 MB.
@@ -524,12 +517,12 @@ struct KernelCore::Impl {
         ASSERT(page_table_heap_size / 4_MiB > 2);
 
         // Insert a physical region for the kernel page table heap region
-        ASSERT(memory_layout.GetPhysicalMemoryRegionTree().Insert(
+        ASSERT(memory_layout->GetPhysicalMemoryRegionTree().Insert(
             slab_end_phys_addr, page_table_heap_size, KMemoryRegionType_DramKernelPtHeap));
 
         // All DRAM regions that we haven't tagged by this point will be mapped under the linear
         // mapping. Tag them.
-        for (auto& region : memory_layout.GetPhysicalMemoryRegionTree()) {
+        for (auto& region : memory_layout->GetPhysicalMemoryRegionTree()) {
             if (region.GetType() == KMemoryRegionType_Dram) {
                 // Check that the region is valid.
                 ASSERT(region.GetEndAddress() != 0);
@@ -541,7 +534,7 @@ struct KernelCore::Impl {
 
         // Get the linear region extents.
         const auto linear_extents =
-            memory_layout.GetPhysicalMemoryRegionTree().GetDerivedRegionExtents(
+            memory_layout->GetPhysicalMemoryRegionTree().GetDerivedRegionExtents(
                 KMemoryRegionAttr_LinearMapped);
         ASSERT(linear_extents.GetEndAddress() != 0);
 
@@ -553,7 +546,7 @@ struct KernelCore::Impl {
             Common::AlignUp(linear_extents.GetEndAddress(), LinearRegionAlign) -
             aligned_linear_phys_start;
         const VAddr linear_region_start =
-            memory_layout.GetVirtualMemoryRegionTree().GetRandomAlignedRegionWithGuard(
+            memory_layout->GetVirtualMemoryRegionTree().GetRandomAlignedRegionWithGuard(
                 linear_region_size, LinearRegionAlign, KMemoryRegionType_None, LinearRegionAlign);
 
         const u64 linear_region_phys_to_virt_diff = linear_region_start - aligned_linear_phys_start;
@@ -562,7 +555,7 @@ struct KernelCore::Impl {
         {
             PAddr cur_phys_addr = 0;
             u64 cur_size = 0;
-            for (auto& region : memory_layout.GetPhysicalMemoryRegionTree()) {
+            for (auto& region : memory_layout->GetPhysicalMemoryRegionTree()) {
                 if (!region.HasTypeAttribute(KMemoryRegionAttr_LinearMapped)) {
                     continue;
                 }
@@ -581,55 +574,49 @@ struct KernelCore::Impl {
 
                 const VAddr region_virt_addr =
                     region.GetAddress() + linear_region_phys_to_virt_diff;
-                ASSERT(memory_layout.GetVirtualMemoryRegionTree().Insert(
+                ASSERT(memory_layout->GetVirtualMemoryRegionTree().Insert(
                     region_virt_addr, region.GetSize(),
                     GetTypeForVirtualLinearMapping(region.GetType())));
                 region.SetPairAddress(region_virt_addr);
 
                 KMemoryRegion* virt_region =
-                    memory_layout.GetVirtualMemoryRegionTree().FindModifiable(region_virt_addr);
+                    memory_layout->GetVirtualMemoryRegionTree().FindModifiable(region_virt_addr);
                 ASSERT(virt_region != nullptr);
                 virt_region->SetPairAddress(region.GetAddress());
             }
         }
 
         // Insert regions for the initial page table region.
-        ASSERT(memory_layout.GetPhysicalMemoryRegionTree().Insert(
+        ASSERT(memory_layout->GetPhysicalMemoryRegionTree().Insert(
             resource_end_phys_addr, KernelPageTableHeapSize, KMemoryRegionType_DramKernelInitPt));
-        ASSERT(memory_layout.GetVirtualMemoryRegionTree().Insert(
+        ASSERT(memory_layout->GetVirtualMemoryRegionTree().Insert(
             resource_end_phys_addr + linear_region_phys_to_virt_diff, KernelPageTableHeapSize,
             KMemoryRegionType_VirtualDramKernelInitPt));
 
         // All linear-mapped DRAM regions that we haven't tagged by this point will be allocated to
         // some pool partition. Tag them.
-        for (auto& region : memory_layout.GetPhysicalMemoryRegionTree()) {
+        for (auto& region : memory_layout->GetPhysicalMemoryRegionTree()) {
             if (region.GetType() == (KMemoryRegionType_Dram | KMemoryRegionAttr_LinearMapped)) {
                 region.SetType(KMemoryRegionType_DramPoolPartition);
             }
         }
 
         // Setup all other memory regions needed to arrange the pool partitions.
-        Init::SetupPoolPartitionMemoryRegions(memory_layout);
+        Init::SetupPoolPartitionMemoryRegions(*memory_layout);
 
         // Cache all linear regions in their own trees for faster access, later.
-        memory_layout.InitializeLinearMemoryRegionTrees(aligned_linear_phys_start,
-                                                        linear_region_start);
+        memory_layout->InitializeLinearMemoryRegionTrees(aligned_linear_phys_start,
+                                                         linear_region_start);
     }
 
-    void InitializeMemoryLayout(const KMemoryLayout& memory_layout) {
-        const auto system_pool = memory_layout.GetKernelSystemPoolRegionPhysicalExtents();
-        const auto applet_pool = memory_layout.GetKernelAppletPoolRegionPhysicalExtents();
-        const auto application_pool = memory_layout.GetKernelApplicationPoolRegionPhysicalExtents();
+    void InitializeMemoryLayout() {
+        const auto system_pool = memory_layout->GetKernelSystemPoolRegionPhysicalExtents();
 
-        // Initialize memory managers
+        // Initialize the memory manager.
         memory_manager = std::make_unique<KMemoryManager>(system);
-        memory_manager->InitializeManager(KMemoryManager::Pool::Application,
-                                          application_pool.GetAddress(),
-                                          application_pool.GetEndAddress());
-        memory_manager->InitializeManager(KMemoryManager::Pool::Applet, applet_pool.GetAddress(),
-                                          applet_pool.GetEndAddress());
-        memory_manager->InitializeManager(KMemoryManager::Pool::System, system_pool.GetAddress(),
-                                          system_pool.GetEndAddress());
+        const auto& management_region = memory_layout->GetPoolManagementRegion();
+        ASSERT(management_region.GetEndAddress() != 0);
+        memory_manager->Initialize(management_region.GetAddress(), management_region.GetSize());
 
         // Setup memory regions for emulated processes
         // TODO(bunnei): These should not be hardcoded regions initialized within the kernel
@@ -666,22 +653,6 @@ struct KernelCore::Impl {
                                     time_phys_addr, time_size, "Time:SharedMemory");
     }
 
-    void InitializePageSlab() {
-        // Allocate slab heaps
-        user_slab_heap_pages =
-            std::make_unique<KSlabHeap<Page>>(KSlabHeap<Page>::AllocationType::Guest);
-
-        // TODO(ameerj): This should be derived, not hardcoded within the kernel
-        constexpr u64 user_slab_heap_size{0x3de000};
-        // Reserve slab heaps
-        ASSERT(
-            system_resource_limit->Reserve(LimitableResource::PhysicalMemory, user_slab_heap_size));
-        // Initialize slab heap
-        user_slab_heap_pages->Initialize(
-            system.DeviceMemory().GetPointer(Core::DramMemoryMap::SlabHeapBase),
-            user_slab_heap_size);
-    }
-
     KClientPort* CreateNamedServicePort(std::string name) {
         auto search = service_interface_factory.find(name);
         if (search == service_interface_factory.end()) {
@@ -719,7 +690,6 @@ struct KernelCore::Impl {
     }
 
     std::mutex server_ports_lock;
-    std::mutex server_sessions_lock;
     std::mutex registered_objects_lock;
     std::mutex registered_in_use_objects_lock;
 
@@ -743,14 +713,13 @@ struct KernelCore::Impl {
     // stores all the objects in place.
     std::unique_ptr<KHandleTable> global_handle_table;
 
-    KAutoObjectWithListContainer object_list_container;
+    std::unique_ptr<KAutoObjectWithListContainer> global_object_list_container;
 
     /// Map of named ports managed by the kernel, which can be retrieved using
     /// the ConnectToPort SVC.
     std::unordered_map<std::string, ServiceInterfaceFactory> service_interface_factory;
     NamedPortTable named_ports;
     std::unordered_set<KServerPort*> server_ports;
-    std::unordered_set<KServerSession*> server_sessions;
     std::unordered_set<KAutoObject*> registered_objects;
     std::unordered_set<KAutoObject*> registered_in_use_objects;
 
@@ -762,7 +731,6 @@ struct KernelCore::Impl {
 
     // Kernel memory management
     std::unique_ptr<KMemoryManager> memory_manager;
-    std::unique_ptr<KSlabHeap<Page>> user_slab_heap_pages;
 
     // Shared memory for services
     Kernel::KSharedMemory* hid_shared_mem{};
@@ -770,6 +738,9 @@ struct KernelCore::Impl {
     Kernel::KSharedMemory* irs_shared_mem{};
     Kernel::KSharedMemory* time_shared_mem{};
 
+    // Memory layout
+    std::unique_ptr<KMemoryLayout> memory_layout;
+
     // Threads used for services
     std::unordered_set<std::shared_ptr<Kernel::ServiceThread>> service_threads;
     Common::ThreadWorker service_threads_manager;
@@ -918,11 +889,11 @@ const Core::ExclusiveMonitor& KernelCore::GetExclusiveMonitor() const {
 }
 
 KAutoObjectWithListContainer& KernelCore::ObjectListContainer() {
-    return impl->object_list_container;
+    return *impl->global_object_list_container;
 }
 
 const KAutoObjectWithListContainer& KernelCore::ObjectListContainer() const {
-    return impl->object_list_container;
+    return *impl->global_object_list_container;
 }
 
 void KernelCore::InvalidateAllInstructionCaches() {
@@ -952,16 +923,6 @@ KClientPort* KernelCore::CreateNamedServicePort(std::string name) {
     return impl->CreateNamedServicePort(std::move(name));
 }
 
-void KernelCore::RegisterServerSession(KServerSession* server_session) {
-    std::lock_guard lk(impl->server_sessions_lock);
-    impl->server_sessions.insert(server_session);
-}
-
-void KernelCore::UnregisterServerSession(KServerSession* server_session) {
-    std::lock_guard lk(impl->server_sessions_lock);
-    impl->server_sessions.erase(server_session);
-}
-
 void KernelCore::RegisterKernelObject(KAutoObject* object) {
     std::lock_guard lk(impl->registered_objects_lock);
     impl->registered_objects.insert(object);
@@ -1034,14 +995,6 @@ const KMemoryManager& KernelCore::MemoryManager() const {
     return *impl->memory_manager;
 }
 
-KSlabHeap<Page>& KernelCore::GetUserSlabHeapPages() {
-    return *impl->user_slab_heap_pages;
-}
-
-const KSlabHeap<Page>& KernelCore::GetUserSlabHeapPages() const {
-    return *impl->user_slab_heap_pages;
-}
-
 Kernel::KSharedMemory& KernelCore::GetHidSharedMem() {
     return *impl->hid_shared_mem;
 }
@@ -1135,6 +1088,10 @@ const KWorkerTaskManager& KernelCore::WorkerTaskManager() const {
     return impl->worker_task_manager;
 }
 
+const KMemoryLayout& KernelCore::MemoryLayout() const {
+    return *impl->memory_layout;
+}
+
 bool KernelCore::IsPhantomModeForSingleCore() const {
     return impl->IsPhantomModeForSingleCore();
 }
diff --git a/src/core/hle/kernel/kernel.h b/src/core/hle/kernel/kernel.h
index 0e04fc3bb..7087bbda6 100644
--- a/src/core/hle/kernel/kernel.h
+++ b/src/core/hle/kernel/kernel.h
@@ -41,7 +41,9 @@ class KClientSession;
 class KEvent;
 class KHandleTable;
 class KLinkedListNode;
+class KMemoryLayout;
 class KMemoryManager;
+class KPageBuffer;
 class KPort;
 class KProcess;
 class KResourceLimit;
@@ -51,6 +53,7 @@ class KSession;
 class KSharedMemory;
 class KSharedMemoryInfo;
 class KThread;
+class KThreadLocalPage;
 class KTransferMemory;
 class KWorkerTaskManager;
 class KWritableEvent;
@@ -193,14 +196,6 @@ public:
     /// Opens a port to a service previously registered with RegisterNamedService.
     KClientPort* CreateNamedServicePort(std::string name);
 
-    /// Registers a server session with the gobal emulation state, to be freed on shutdown. This is
-    /// necessary because we do not emulate processes for HLE sessions.
-    void RegisterServerSession(KServerSession* server_session);
-
-    /// Unregisters a server session previously registered with RegisterServerSession when it was
-    /// destroyed during the current emulation session.
-    void UnregisterServerSession(KServerSession* server_session);
-
     /// Registers all kernel objects with the global emulation state, this is purely for tracking
     /// leaks after emulation has been shutdown.
     void RegisterKernelObject(KAutoObject* object);
@@ -238,12 +233,6 @@ public:
     /// Gets the virtual memory manager for the kernel.
     const KMemoryManager& MemoryManager() const;
 
-    /// Gets the slab heap allocated for user space pages.
-    KSlabHeap<Page>& GetUserSlabHeapPages();
-
-    /// Gets the slab heap allocated for user space pages.
-    const KSlabHeap<Page>& GetUserSlabHeapPages() const;
-
     /// Gets the shared memory object for HID services.
     Kernel::KSharedMemory& GetHidSharedMem();
 
@@ -335,6 +324,10 @@ public:
             return slab_heap_container->writeable_event;
         } else if constexpr (std::is_same_v<T, KCodeMemory>) {
             return slab_heap_container->code_memory;
+        } else if constexpr (std::is_same_v<T, KPageBuffer>) {
+            return slab_heap_container->page_buffer;
+        } else if constexpr (std::is_same_v<T, KThreadLocalPage>) {
+            return slab_heap_container->thread_local_page;
         }
     }
 
@@ -350,6 +343,9 @@ public:
     /// Gets the current worker task manager, used for dispatching KThread/KProcess tasks.
     const KWorkerTaskManager& WorkerTaskManager() const;
 
+    /// Gets the memory layout.
+    const KMemoryLayout& MemoryLayout() const;
+
 private:
     friend class KProcess;
     friend class KThread;
@@ -393,6 +389,8 @@ private:
         KSlabHeap<KTransferMemory> transfer_memory;
         KSlabHeap<KWritableEvent> writeable_event;
         KSlabHeap<KCodeMemory> code_memory;
+        KSlabHeap<KPageBuffer> page_buffer;
+        KSlabHeap<KThreadLocalPage> thread_local_page;
     };
 
     std::unique_ptr<SlabHeapContainer> slab_heap_container;
diff --git a/src/core/hle/kernel/service_thread.cpp b/src/core/hle/kernel/service_thread.cpp
index 4eb3a5988..52d25b837 100644
--- a/src/core/hle/kernel/service_thread.cpp
+++ b/src/core/hle/kernel/service_thread.cpp
@@ -49,12 +49,9 @@ ServiceThread::Impl::Impl(KernelCore& kernel, std::size_t num_threads, const std
                 return;
             }
 
+            // Allocate a dummy guest thread for this host thread.
             kernel.RegisterHostThread();
 
-            // Ensure the dummy thread allocated for this host thread is closed on exit.
-            auto* dummy_thread = kernel.GetCurrentEmuThread();
-            SCOPE_EXIT({ dummy_thread->Close(); });
-
             while (true) {
                 std::function<void()> task;
 
diff --git a/src/core/hle/kernel/slab_helpers.h b/src/core/hle/kernel/slab_helpers.h
index f1c11256e..dc1e48fc9 100644
--- a/src/core/hle/kernel/slab_helpers.h
+++ b/src/core/hle/kernel/slab_helpers.h
@@ -59,7 +59,7 @@ class KAutoObjectWithSlabHeapAndContainer : public Base {
 
 private:
     static Derived* Allocate(KernelCore& kernel) {
-        return kernel.SlabHeap<Derived>().AllocateWithKernel(kernel);
+        return kernel.SlabHeap<Derived>().Allocate(kernel);
     }
 
     static void Free(KernelCore& kernel, Derived* obj) {
diff --git a/src/core/hle/kernel/svc_types.h b/src/core/hle/kernel/svc_types.h
index 365e22e4e..b2e9ec092 100644
--- a/src/core/hle/kernel/svc_types.h
+++ b/src/core/hle/kernel/svc_types.h
@@ -96,4 +96,6 @@ constexpr inline s32 IdealCoreNoUpdate = -3;
 constexpr inline s32 LowestThreadPriority = 63;
 constexpr inline s32 HighestThreadPriority = 0;
 
+constexpr inline size_t ThreadLocalRegionSize = 0x200;
+
 } // namespace Kernel::Svc
diff --git a/src/core/hle/service/am/am.cpp b/src/core/hle/service/am/am.cpp
index 2f8e21568..420de3c54 100644
--- a/src/core/hle/service/am/am.cpp
+++ b/src/core/hle/service/am/am.cpp
@@ -980,7 +980,7 @@ private:
         LOG_DEBUG(Service_AM, "called");
 
         IPC::RequestParser rp{ctx};
-        applet->GetBroker().PushNormalDataFromGame(rp.PopIpcInterface<IStorage>());
+        applet->GetBroker().PushNormalDataFromGame(rp.PopIpcInterface<IStorage>().lock());
 
         IPC::ResponseBuilder rb{ctx, 2};
         rb.Push(ResultSuccess);
@@ -1007,7 +1007,7 @@ private:
         LOG_DEBUG(Service_AM, "called");
 
         IPC::RequestParser rp{ctx};
-        applet->GetBroker().PushInteractiveDataFromGame(rp.PopIpcInterface<IStorage>());
+        applet->GetBroker().PushInteractiveDataFromGame(rp.PopIpcInterface<IStorage>().lock());
 
         ASSERT(applet->IsInitialized());
         applet->ExecuteInteractive();
diff --git a/src/core/hle/service/kernel_helpers.cpp b/src/core/hle/service/kernel_helpers.cpp
index b8c2c6e51..ff0bbb788 100644
--- a/src/core/hle/service/kernel_helpers.cpp
+++ b/src/core/hle/service/kernel_helpers.cpp
@@ -17,21 +17,12 @@ namespace Service::KernelHelpers {
 
 ServiceContext::ServiceContext(Core::System& system_, std::string name_)
     : kernel(system_.Kernel()) {
-
-    // Create a resource limit for the process.
-    const auto physical_memory_size =
-        kernel.MemoryManager().GetSize(Kernel::KMemoryManager::Pool::System);
-    auto* resource_limit = Kernel::CreateResourceLimitForProcess(system_, physical_memory_size);
-
     // Create the process.
     process = Kernel::KProcess::Create(kernel);
     ASSERT(Kernel::KProcess::Initialize(process, system_, std::move(name_),
                                         Kernel::KProcess::ProcessType::KernelInternal,
-                                        resource_limit)
+                                        kernel.GetSystemResourceLimit())
                .IsSuccess());
-
-    // Close reference to our resource limit, as the process opens one.
-    resource_limit->Close();
 }
 
 ServiceContext::~ServiceContext() {
diff --git a/src/core/hle/service/ldr/ldr.cpp b/src/core/hle/service/ldr/ldr.cpp
index 9fc7bb1b1..099276420 100644
--- a/src/core/hle/service/ldr/ldr.cpp
+++ b/src/core/hle/service/ldr/ldr.cpp
@@ -288,7 +288,7 @@ public:
     }
 
     bool ValidateRegionForMap(Kernel::KPageTable& page_table, VAddr start, std::size_t size) const {
-        constexpr std::size_t padding_size{4 * Kernel::PageSize};
+        const std::size_t padding_size{page_table.GetNumGuardPages() * Kernel::PageSize};
         const auto start_info{page_table.QueryInfo(start - 1)};
 
         if (start_info.state != Kernel::KMemoryState::Free) {
@@ -308,31 +308,69 @@ public:
         return (start + size + padding_size) <= (end_info.GetAddress() + end_info.GetSize());
     }
 
-    VAddr GetRandomMapRegion(const Kernel::KPageTable& page_table, std::size_t size) const {
-        VAddr addr{};
-        const std::size_t end_pages{(page_table.GetAliasCodeRegionSize() - size) >>
-                                    Kernel::PageBits};
-        do {
-            addr = page_table.GetAliasCodeRegionStart() +
-                   (Kernel::KSystemControl::GenerateRandomRange(0, end_pages) << Kernel::PageBits);
-        } while (!page_table.IsInsideAddressSpace(addr, size) ||
-                 page_table.IsInsideHeapRegion(addr, size) ||
-                 page_table.IsInsideAliasRegion(addr, size));
-        return addr;
+    ResultCode GetAvailableMapRegion(Kernel::KPageTable& page_table, u64 size, VAddr& out_addr) {
+        size = Common::AlignUp(size, Kernel::PageSize);
+        size += page_table.GetNumGuardPages() * Kernel::PageSize * 4;
+
+        const auto is_region_available = [&](VAddr addr) {
+            const auto end_addr = addr + size;
+            while (addr < end_addr) {
+                if (system.Memory().IsValidVirtualAddress(addr)) {
+                    return false;
+                }
+
+                if (!page_table.IsInsideAddressSpace(out_addr, size)) {
+                    return false;
+                }
+
+                if (page_table.IsInsideHeapRegion(out_addr, size)) {
+                    return false;
+                }
+
+                if (page_table.IsInsideAliasRegion(out_addr, size)) {
+                    return false;
+                }
+
+                addr += Kernel::PageSize;
+            }
+            return true;
+        };
+
+        bool succeeded = false;
+        const auto map_region_end =
+            page_table.GetAliasCodeRegionStart() + page_table.GetAliasCodeRegionSize();
+        while (current_map_addr < map_region_end) {
+            if (is_region_available(current_map_addr)) {
+                succeeded = true;
+                break;
+            }
+            current_map_addr += 0x100000;
+        }
+
+        if (!succeeded) {
+            UNREACHABLE_MSG("Out of address space!");
+            return Kernel::ResultOutOfMemory;
+        }
+
+        out_addr = current_map_addr;
+        current_map_addr += size;
+
+        return ResultSuccess;
     }
 
-    ResultVal<VAddr> MapProcessCodeMemory(Kernel::KProcess* process, VAddr baseAddress,
-                                          u64 size) const {
+    ResultVal<VAddr> MapProcessCodeMemory(Kernel::KProcess* process, VAddr base_addr, u64 size) {
+        auto& page_table{process->PageTable()};
+        VAddr addr{};
+
         for (std::size_t retry = 0; retry < MAXIMUM_MAP_RETRIES; retry++) {
-            auto& page_table{process->PageTable()};
-            const VAddr addr{GetRandomMapRegion(page_table, size)};
-            const ResultCode result{page_table.MapCodeMemory(addr, baseAddress, size)};
+            R_TRY(GetAvailableMapRegion(page_table, size, addr));
 
+            const ResultCode result{page_table.MapCodeMemory(addr, base_addr, size)};
             if (result == Kernel::ResultInvalidCurrentMemory) {
                 continue;
             }
 
-            CASCADE_CODE(result);
+            R_TRY(result);
 
             if (ValidateRegionForMap(page_table, addr, size)) {
                 return addr;
@@ -343,7 +381,7 @@ public:
     }
 
     ResultVal<VAddr> MapNro(Kernel::KProcess* process, VAddr nro_addr, std::size_t nro_size,
-                            VAddr bss_addr, std::size_t bss_size, std::size_t size) const {
+                            VAddr bss_addr, std::size_t bss_size, std::size_t size) {
         for (std::size_t retry = 0; retry < MAXIMUM_MAP_RETRIES; retry++) {
             auto& page_table{process->PageTable()};
             VAddr addr{};
@@ -597,6 +635,7 @@ public:
         LOG_WARNING(Service_LDR, "(STUBBED) called");
 
         initialized = true;
+        current_map_addr = system.CurrentProcess()->PageTable().GetAliasCodeRegionStart();
 
         IPC::ResponseBuilder rb{ctx, 2};
         rb.Push(ResultSuccess);
@@ -607,6 +646,7 @@ private:
 
     std::map<VAddr, NROInfo> nro;
     std::map<VAddr, std::vector<SHA256Hash>> nrr;
+    VAddr current_map_addr{};
 
     bool IsValidNROHash(const SHA256Hash& hash) const {
         return std::any_of(nrr.begin(), nrr.end(), [&hash](const auto& p) {
diff --git a/src/core/hle/service/sm/sm.cpp b/src/core/hle/service/sm/sm.cpp
index eaa172595..695a1faa6 100644
--- a/src/core/hle/service/sm/sm.cpp
+++ b/src/core/hle/service/sm/sm.cpp
@@ -81,6 +81,8 @@ ResultVal<Kernel::KPort*> ServiceManager::GetServicePort(const std::string& name
     }
 
     auto* port = Kernel::KPort::Create(kernel);
+    SCOPE_EXIT({ port->Close(); });
+
     port->Initialize(ServerSessionCountMax, false, name);
     auto handler = it->second;
     port->GetServerPort().SetSessionHandler(std::move(handler));
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h
index b412957c7..2b360e073 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv.h
+++ b/src/shader_recompiler/backend/spirv/emit_spirv.h
@@ -22,7 +22,7 @@ constexpr u32 NUM_TEXTURE_AND_IMAGE_SCALING_WORDS =
 struct RescalingLayout {
     alignas(16) std::array<u32, NUM_TEXTURE_SCALING_WORDS> rescaling_textures;
     alignas(16) std::array<u32, NUM_IMAGE_SCALING_WORDS> rescaling_images;
-    alignas(16) u32 down_factor;
+    u32 down_factor;
 };
 constexpr u32 RESCALING_LAYOUT_WORDS_OFFSET = offsetof(RescalingLayout, rescaling_textures);
 constexpr u32 RESCALING_LAYOUT_DOWN_FACTOR_OFFSET = offsetof(RescalingLayout, down_factor);
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input.cpp
index e0fe47912..f3c7ceb57 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input.cpp
@@ -13,59 +13,535 @@ namespace {
 // Emulate GPU's LOP3.LUT (three-input logic op with 8-bit truth table)
 IR::U32 ApplyLUT(IR::IREmitter& ir, const IR::U32& a, const IR::U32& b, const IR::U32& c,
                  u64 ttbl) {
-    IR::U32 r{ir.Imm32(0)};
-    const IR::U32 not_a{ir.BitwiseNot(a)};
-    const IR::U32 not_b{ir.BitwiseNot(b)};
-    const IR::U32 not_c{ir.BitwiseNot(c)};
-    if (ttbl & 0x01) {
-        // r |= ~a & ~b & ~c;
-        const auto lhs{ir.BitwiseAnd(not_a, not_b)};
-        const auto rhs{ir.BitwiseAnd(lhs, not_c)};
-        r = ir.BitwiseOr(r, rhs);
+    switch (ttbl) {
+        // generated code, do not edit manually
+    case 0:
+        return ir.Imm32(0);
+    case 1:
+        return ir.BitwiseNot(ir.BitwiseOr(a, ir.BitwiseOr(b, c)));
+    case 2:
+        return ir.BitwiseAnd(c, ir.BitwiseNot(ir.BitwiseOr(a, b)));
+    case 3:
+        return ir.BitwiseNot(ir.BitwiseOr(a, b));
+    case 4:
+        return ir.BitwiseAnd(b, ir.BitwiseNot(ir.BitwiseOr(a, c)));
+    case 5:
+        return ir.BitwiseNot(ir.BitwiseOr(a, c));
+    case 6:
+        return ir.BitwiseAnd(ir.BitwiseNot(a), ir.BitwiseXor(b, c));
+    case 7:
+        return ir.BitwiseNot(ir.BitwiseOr(a, ir.BitwiseAnd(b, c)));
+    case 8:
+        return ir.BitwiseAnd(ir.BitwiseAnd(b, c), ir.BitwiseNot(a));
+    case 9:
+        return ir.BitwiseNot(ir.BitwiseOr(a, ir.BitwiseXor(b, c)));
+    case 10:
+        return ir.BitwiseAnd(c, ir.BitwiseNot(a));
+    case 11:
+        return ir.BitwiseAnd(ir.BitwiseNot(a), ir.BitwiseOr(c, ir.BitwiseNot(b)));
+    case 12:
+        return ir.BitwiseAnd(b, ir.BitwiseNot(a));
+    case 13:
+        return ir.BitwiseAnd(ir.BitwiseNot(a), ir.BitwiseOr(b, ir.BitwiseNot(c)));
+    case 14:
+        return ir.BitwiseAnd(ir.BitwiseNot(a), ir.BitwiseOr(b, c));
+    case 15:
+        return ir.BitwiseNot(a);
+    case 16:
+        return ir.BitwiseAnd(a, ir.BitwiseNot(ir.BitwiseOr(b, c)));
+    case 17:
+        return ir.BitwiseNot(ir.BitwiseOr(b, c));
+    case 18:
+        return ir.BitwiseAnd(ir.BitwiseNot(b), ir.BitwiseXor(a, c));
+    case 19:
+        return ir.BitwiseNot(ir.BitwiseOr(b, ir.BitwiseAnd(a, c)));
+    case 20:
+        return ir.BitwiseAnd(ir.BitwiseNot(c), ir.BitwiseXor(a, b));
+    case 21:
+        return ir.BitwiseNot(ir.BitwiseOr(c, ir.BitwiseAnd(a, b)));
+    case 22:
+        return ir.BitwiseXor(ir.BitwiseOr(a, b), ir.BitwiseOr(c, ir.BitwiseAnd(a, b)));
+    case 23:
+        return ir.BitwiseXor(ir.BitwiseAnd(ir.BitwiseXor(a, b), ir.BitwiseXor(a, c)),
+                             ir.BitwiseNot(a));
+    case 24:
+        return ir.BitwiseAnd(ir.BitwiseXor(a, b), ir.BitwiseXor(a, c));
+    case 25:
+        return ir.BitwiseNot(ir.BitwiseOr(ir.BitwiseAnd(a, b), ir.BitwiseXor(b, c)));
+    case 26:
+        return ir.BitwiseAnd(ir.BitwiseOr(c, ir.BitwiseNot(b)), ir.BitwiseXor(a, c));
+    case 27:
+        return ir.BitwiseXor(ir.BitwiseOr(a, ir.BitwiseNot(c)), ir.BitwiseOr(b, c));
+    case 28:
+        return ir.BitwiseAnd(ir.BitwiseOr(b, ir.BitwiseNot(c)), ir.BitwiseXor(a, b));
+    case 29:
+        return ir.BitwiseXor(ir.BitwiseOr(a, ir.BitwiseNot(b)), ir.BitwiseOr(b, c));
+    case 30:
+        return ir.BitwiseXor(a, ir.BitwiseOr(b, c));
+    case 31:
+        return ir.BitwiseNot(ir.BitwiseAnd(a, ir.BitwiseOr(b, c)));
+    case 32:
+        return ir.BitwiseAnd(ir.BitwiseAnd(a, c), ir.BitwiseNot(b));
+    case 33:
+        return ir.BitwiseNot(ir.BitwiseOr(b, ir.BitwiseXor(a, c)));
+    case 34:
+        return ir.BitwiseAnd(c, ir.BitwiseNot(b));
+    case 35:
+        return ir.BitwiseAnd(ir.BitwiseNot(b), ir.BitwiseOr(c, ir.BitwiseNot(a)));
+    case 36:
+        return ir.BitwiseAnd(ir.BitwiseXor(a, b), ir.BitwiseXor(b, c));
+    case 37:
+        return ir.BitwiseNot(ir.BitwiseOr(ir.BitwiseAnd(a, b), ir.BitwiseXor(a, c)));
+    case 38:
+        return ir.BitwiseAnd(ir.BitwiseOr(c, ir.BitwiseNot(a)), ir.BitwiseXor(b, c));
+    case 39:
+        return ir.BitwiseXor(ir.BitwiseOr(a, c), ir.BitwiseOr(b, ir.BitwiseNot(c)));
+    case 40:
+        return ir.BitwiseAnd(c, ir.BitwiseXor(a, b));
+    case 41:
+        return ir.BitwiseXor(ir.BitwiseOr(a, b),
+                             ir.BitwiseOr(ir.BitwiseAnd(a, b), ir.BitwiseNot(c)));
+    case 42:
+        return ir.BitwiseAnd(c, ir.BitwiseNot(ir.BitwiseAnd(a, b)));
+    case 43:
+        return ir.BitwiseXor(ir.BitwiseOr(a, ir.BitwiseNot(c)),
+                             ir.BitwiseOr(b, ir.BitwiseXor(a, c)));
+    case 44:
+        return ir.BitwiseAnd(ir.BitwiseOr(b, c), ir.BitwiseXor(a, b));
+    case 45:
+        return ir.BitwiseXor(a, ir.BitwiseOr(b, ir.BitwiseNot(c)));
+    case 46:
+        return ir.BitwiseXor(ir.BitwiseAnd(a, b), ir.BitwiseOr(b, c));
+    case 47:
+        return ir.BitwiseOr(ir.BitwiseAnd(c, ir.BitwiseNot(b)), ir.BitwiseNot(a));
+    case 48:
+        return ir.BitwiseAnd(a, ir.BitwiseNot(b));
+    case 49:
+        return ir.BitwiseAnd(ir.BitwiseNot(b), ir.BitwiseOr(a, ir.BitwiseNot(c)));
+    case 50:
+        return ir.BitwiseAnd(ir.BitwiseNot(b), ir.BitwiseOr(a, c));
+    case 51:
+        return ir.BitwiseNot(b);
+    case 52:
+        return ir.BitwiseAnd(ir.BitwiseOr(a, ir.BitwiseNot(c)), ir.BitwiseXor(a, b));
+    case 53:
+        return ir.BitwiseXor(ir.BitwiseOr(a, c), ir.BitwiseOr(b, ir.BitwiseNot(a)));
+    case 54:
+        return ir.BitwiseXor(b, ir.BitwiseOr(a, c));
+    case 55:
+        return ir.BitwiseNot(ir.BitwiseAnd(b, ir.BitwiseOr(a, c)));
+    case 56:
+        return ir.BitwiseAnd(ir.BitwiseOr(a, c), ir.BitwiseXor(a, b));
+    case 57:
+        return ir.BitwiseXor(b, ir.BitwiseOr(a, ir.BitwiseNot(c)));
+    case 58:
+        return ir.BitwiseXor(ir.BitwiseAnd(a, b), ir.BitwiseOr(a, c));
+    case 59:
+        return ir.BitwiseOr(ir.BitwiseAnd(c, ir.BitwiseNot(a)), ir.BitwiseNot(b));
+    case 60:
+        return ir.BitwiseXor(a, b);
+    case 61:
+        return ir.BitwiseOr(ir.BitwiseNot(ir.BitwiseOr(a, c)), ir.BitwiseXor(a, b));
+    case 62:
+        return ir.BitwiseOr(ir.BitwiseAnd(c, ir.BitwiseNot(a)), ir.BitwiseXor(a, b));
+    case 63:
+        return ir.BitwiseNot(ir.BitwiseAnd(a, b));
+    case 64:
+        return ir.BitwiseAnd(ir.BitwiseAnd(a, b), ir.BitwiseNot(c));
+    case 65:
+        return ir.BitwiseNot(ir.BitwiseOr(c, ir.BitwiseXor(a, b)));
+    case 66:
+        return ir.BitwiseAnd(ir.BitwiseXor(a, c), ir.BitwiseXor(b, c));
+    case 67:
+        return ir.BitwiseNot(ir.BitwiseOr(ir.BitwiseAnd(a, c), ir.BitwiseXor(a, b)));
+    case 68:
+        return ir.BitwiseAnd(b, ir.BitwiseNot(c));
+    case 69:
+        return ir.BitwiseAnd(ir.BitwiseNot(c), ir.BitwiseOr(b, ir.BitwiseNot(a)));
+    case 70:
+        return ir.BitwiseAnd(ir.BitwiseOr(b, ir.BitwiseNot(a)), ir.BitwiseXor(b, c));
+    case 71:
+        return ir.BitwiseXor(ir.BitwiseOr(a, b), ir.BitwiseOr(c, ir.BitwiseNot(b)));
+    case 72:
+        return ir.BitwiseAnd(b, ir.BitwiseXor(a, c));
+    case 73:
+        return ir.BitwiseXor(ir.BitwiseOr(a, c),
+                             ir.BitwiseOr(ir.BitwiseAnd(a, c), ir.BitwiseNot(b)));
+    case 74:
+        return ir.BitwiseAnd(ir.BitwiseOr(b, c), ir.BitwiseXor(a, c));
+    case 75:
+        return ir.BitwiseXor(a, ir.BitwiseOr(c, ir.BitwiseNot(b)));
+    case 76:
+        return ir.BitwiseAnd(b, ir.BitwiseNot(ir.BitwiseAnd(a, c)));
+    case 77:
+        return ir.BitwiseXor(ir.BitwiseOr(a, ir.BitwiseNot(b)),
+                             ir.BitwiseOr(c, ir.BitwiseXor(a, b)));
+    case 78:
+        return ir.BitwiseXor(ir.BitwiseAnd(a, c), ir.BitwiseOr(b, c));
+    case 79:
+        return ir.BitwiseOr(ir.BitwiseAnd(b, ir.BitwiseNot(c)), ir.BitwiseNot(a));
+    case 80:
+        return ir.BitwiseAnd(a, ir.BitwiseNot(c));
+    case 81:
+        return ir.BitwiseAnd(ir.BitwiseNot(c), ir.BitwiseOr(a, ir.BitwiseNot(b)));
+    case 82:
+        return ir.BitwiseAnd(ir.BitwiseOr(a, ir.BitwiseNot(b)), ir.BitwiseXor(a, c));
+    case 83:
+        return ir.BitwiseXor(ir.BitwiseOr(a, b), ir.BitwiseOr(c, ir.BitwiseNot(a)));
+    case 84:
+        return ir.BitwiseAnd(ir.BitwiseNot(c), ir.BitwiseOr(a, b));
+    case 85:
+        return ir.BitwiseNot(c);
+    case 86:
+        return ir.BitwiseXor(c, ir.BitwiseOr(a, b));
+    case 87:
+        return ir.BitwiseNot(ir.BitwiseAnd(c, ir.BitwiseOr(a, b)));
+    case 88:
+        return ir.BitwiseAnd(ir.BitwiseOr(a, b), ir.BitwiseXor(a, c));
+    case 89:
+        return ir.BitwiseXor(c, ir.BitwiseOr(a, ir.BitwiseNot(b)));
+    case 90:
+        return ir.BitwiseXor(a, c);
+    case 91:
+        return ir.BitwiseOr(ir.BitwiseNot(ir.BitwiseOr(a, b)), ir.BitwiseXor(a, c));
+    case 92:
+        return ir.BitwiseXor(ir.BitwiseAnd(a, c), ir.BitwiseOr(a, b));
+    case 93:
+        return ir.BitwiseOr(ir.BitwiseAnd(b, ir.BitwiseNot(a)), ir.BitwiseNot(c));
+    case 94:
+        return ir.BitwiseOr(ir.BitwiseAnd(b, ir.BitwiseNot(a)), ir.BitwiseXor(a, c));
+    case 95:
+        return ir.BitwiseNot(ir.BitwiseAnd(a, c));
+    case 96:
+        return ir.BitwiseAnd(a, ir.BitwiseXor(b, c));
+    case 97:
+        return ir.BitwiseXor(ir.BitwiseOr(b, c),
+                             ir.BitwiseOr(ir.BitwiseAnd(b, c), ir.BitwiseNot(a)));
+    case 98:
+        return ir.BitwiseAnd(ir.BitwiseOr(a, c), ir.BitwiseXor(b, c));
+    case 99:
+        return ir.BitwiseXor(b, ir.BitwiseOr(c, ir.BitwiseNot(a)));
+    case 100:
+        return ir.BitwiseAnd(ir.BitwiseOr(a, b), ir.BitwiseXor(b, c));
+    case 101:
+        return ir.BitwiseXor(c, ir.BitwiseOr(b, ir.BitwiseNot(a)));
+    case 102:
+        return ir.BitwiseXor(b, c);
+    case 103:
+        return ir.BitwiseOr(ir.BitwiseNot(ir.BitwiseOr(a, b)), ir.BitwiseXor(b, c));
+    case 104:
+        return ir.BitwiseAnd(ir.BitwiseOr(a, b), ir.BitwiseXor(c, ir.BitwiseAnd(a, b)));
+    case 105:
+        return ir.BitwiseXor(ir.BitwiseNot(a), ir.BitwiseXor(b, c));
+    case 106:
+        return ir.BitwiseXor(c, ir.BitwiseAnd(a, b));
+    case 107:
+        return ir.BitwiseXor(ir.BitwiseAnd(c, ir.BitwiseOr(a, b)),
+                             ir.BitwiseXor(a, ir.BitwiseNot(b)));
+    case 108:
+        return ir.BitwiseXor(b, ir.BitwiseAnd(a, c));
+    case 109:
+        return ir.BitwiseXor(ir.BitwiseAnd(b, ir.BitwiseOr(a, c)),
+                             ir.BitwiseXor(a, ir.BitwiseNot(c)));
+    case 110:
+        return ir.BitwiseOr(ir.BitwiseAnd(b, ir.BitwiseNot(a)), ir.BitwiseXor(b, c));
+    case 111:
+        return ir.BitwiseOr(ir.BitwiseNot(a), ir.BitwiseXor(b, c));
+    case 112:
+        return ir.BitwiseAnd(a, ir.BitwiseNot(ir.BitwiseAnd(b, c)));
+    case 113:
+        return ir.BitwiseXor(ir.BitwiseOr(b, ir.BitwiseNot(a)),
+                             ir.BitwiseOr(c, ir.BitwiseXor(a, b)));
+    case 114:
+        return ir.BitwiseXor(ir.BitwiseAnd(b, c), ir.BitwiseOr(a, c));
+    case 115:
+        return ir.BitwiseOr(ir.BitwiseAnd(a, ir.BitwiseNot(c)), ir.BitwiseNot(b));
+    case 116:
+        return ir.BitwiseXor(ir.BitwiseAnd(b, c), ir.BitwiseOr(a, b));
+    case 117:
+        return ir.BitwiseOr(ir.BitwiseAnd(a, ir.BitwiseNot(b)), ir.BitwiseNot(c));
+    case 118:
+        return ir.BitwiseOr(ir.BitwiseAnd(a, ir.BitwiseNot(b)), ir.BitwiseXor(b, c));
+    case 119:
+        return ir.BitwiseNot(ir.BitwiseAnd(b, c));
+    case 120:
+        return ir.BitwiseXor(a, ir.BitwiseAnd(b, c));
+    case 121:
+        return ir.BitwiseXor(ir.BitwiseAnd(a, ir.BitwiseOr(b, c)),
+                             ir.BitwiseXor(b, ir.BitwiseNot(c)));
+    case 122:
+        return ir.BitwiseOr(ir.BitwiseAnd(a, ir.BitwiseNot(b)), ir.BitwiseXor(a, c));
+    case 123:
+        return ir.BitwiseOr(ir.BitwiseNot(b), ir.BitwiseXor(a, c));
+    case 124:
+        return ir.BitwiseOr(ir.BitwiseAnd(a, ir.BitwiseNot(c)), ir.BitwiseXor(a, b));
+    case 125:
+        return ir.BitwiseOr(ir.BitwiseNot(c), ir.BitwiseXor(a, b));
+    case 126:
+        return ir.BitwiseOr(ir.BitwiseXor(a, b), ir.BitwiseXor(a, c));
+    case 127:
+        return ir.BitwiseNot(ir.BitwiseAnd(a, ir.BitwiseAnd(b, c)));
+    case 128:
+        return ir.BitwiseAnd(a, ir.BitwiseAnd(b, c));
+    case 129:
+        return ir.BitwiseNot(ir.BitwiseOr(ir.BitwiseXor(a, b), ir.BitwiseXor(a, c)));
+    case 130:
+        return ir.BitwiseAnd(c, ir.BitwiseXor(a, ir.BitwiseNot(b)));
+    case 131:
+        return ir.BitwiseAnd(ir.BitwiseOr(c, ir.BitwiseNot(a)), ir.BitwiseXor(a, ir.BitwiseNot(b)));
+    case 132:
+        return ir.BitwiseAnd(b, ir.BitwiseXor(a, ir.BitwiseNot(c)));
+    case 133:
+        return ir.BitwiseAnd(ir.BitwiseOr(b, ir.BitwiseNot(a)), ir.BitwiseXor(a, ir.BitwiseNot(c)));
+    case 134:
+        return ir.BitwiseAnd(ir.BitwiseOr(b, c), ir.BitwiseXor(a, ir.BitwiseXor(b, c)));
+    case 135:
+        return ir.BitwiseXor(ir.BitwiseAnd(b, c), ir.BitwiseNot(a));
+    case 136:
+        return ir.BitwiseAnd(b, c);
+    case 137:
+        return ir.BitwiseAnd(ir.BitwiseOr(b, ir.BitwiseNot(a)), ir.BitwiseXor(b, ir.BitwiseNot(c)));
+    case 138:
+        return ir.BitwiseAnd(c, ir.BitwiseOr(b, ir.BitwiseNot(a)));
+    case 139:
+        return ir.BitwiseOr(ir.BitwiseAnd(b, c), ir.BitwiseNot(ir.BitwiseOr(a, b)));
+    case 140:
+        return ir.BitwiseAnd(b, ir.BitwiseOr(c, ir.BitwiseNot(a)));
+    case 141:
+        return ir.BitwiseOr(ir.BitwiseAnd(b, c), ir.BitwiseNot(ir.BitwiseOr(a, c)));
+    case 142:
+        return ir.BitwiseXor(a, ir.BitwiseOr(ir.BitwiseXor(a, b), ir.BitwiseXor(a, c)));
+    case 143:
+        return ir.BitwiseOr(ir.BitwiseAnd(b, c), ir.BitwiseNot(a));
+    case 144:
+        return ir.BitwiseAnd(a, ir.BitwiseXor(b, ir.BitwiseNot(c)));
+    case 145:
+        return ir.BitwiseAnd(ir.BitwiseOr(a, ir.BitwiseNot(b)), ir.BitwiseXor(b, ir.BitwiseNot(c)));
+    case 146:
+        return ir.BitwiseAnd(ir.BitwiseOr(a, c), ir.BitwiseXor(a, ir.BitwiseXor(b, c)));
+    case 147:
+        return ir.BitwiseXor(ir.BitwiseAnd(a, c), ir.BitwiseNot(b));
+    case 148:
+        return ir.BitwiseAnd(ir.BitwiseOr(a, b), ir.BitwiseXor(a, ir.BitwiseXor(b, c)));
+    case 149:
+        return ir.BitwiseXor(ir.BitwiseAnd(a, b), ir.BitwiseNot(c));
+    case 150:
+        return ir.BitwiseXor(a, ir.BitwiseXor(b, c));
+    case 151:
+        return ir.BitwiseOr(ir.BitwiseNot(ir.BitwiseOr(a, b)),
+                            ir.BitwiseXor(a, ir.BitwiseXor(b, c)));
+    case 152:
+        return ir.BitwiseAnd(ir.BitwiseOr(a, b), ir.BitwiseXor(b, ir.BitwiseNot(c)));
+    case 153:
+        return ir.BitwiseXor(b, ir.BitwiseNot(c));
+    case 154:
+        return ir.BitwiseXor(c, ir.BitwiseAnd(a, ir.BitwiseNot(b)));
+    case 155:
+        return ir.BitwiseNot(ir.BitwiseAnd(ir.BitwiseOr(a, b), ir.BitwiseXor(b, c)));
+    case 156:
+        return ir.BitwiseXor(b, ir.BitwiseAnd(a, ir.BitwiseNot(c)));
+    case 157:
+        return ir.BitwiseNot(ir.BitwiseAnd(ir.BitwiseOr(a, c), ir.BitwiseXor(b, c)));
+    case 158:
+        return ir.BitwiseOr(ir.BitwiseAnd(b, c), ir.BitwiseXor(a, ir.BitwiseOr(b, c)));
+    case 159:
+        return ir.BitwiseNot(ir.BitwiseAnd(a, ir.BitwiseXor(b, c)));
+    case 160:
+        return ir.BitwiseAnd(a, c);
+    case 161:
+        return ir.BitwiseAnd(ir.BitwiseOr(a, ir.BitwiseNot(b)), ir.BitwiseXor(a, ir.BitwiseNot(c)));
+    case 162:
+        return ir.BitwiseAnd(c, ir.BitwiseOr(a, ir.BitwiseNot(b)));
+    case 163:
+        return ir.BitwiseOr(ir.BitwiseAnd(a, c), ir.BitwiseNot(ir.BitwiseOr(a, b)));
+    case 164:
+        return ir.BitwiseAnd(ir.BitwiseOr(a, b), ir.BitwiseXor(a, ir.BitwiseNot(c)));
+    case 165:
+        return ir.BitwiseXor(a, ir.BitwiseNot(c));
+    case 166:
+        return ir.BitwiseXor(c, ir.BitwiseAnd(b, ir.BitwiseNot(a)));
+    case 167:
+        return ir.BitwiseNot(ir.BitwiseAnd(ir.BitwiseOr(a, b), ir.BitwiseXor(a, c)));
+    case 168:
+        return ir.BitwiseAnd(c, ir.BitwiseOr(a, b));
+    case 169:
+        return ir.BitwiseXor(ir.BitwiseNot(c), ir.BitwiseOr(a, b));
+    case 170:
+        return c;
+    case 171:
+        return ir.BitwiseOr(c, ir.BitwiseNot(ir.BitwiseOr(a, b)));
+    case 172:
+        return ir.BitwiseAnd(ir.BitwiseOr(a, b), ir.BitwiseOr(c, ir.BitwiseNot(a)));
+    case 173:
+        return ir.BitwiseOr(ir.BitwiseAnd(b, c), ir.BitwiseXor(a, ir.BitwiseNot(c)));
+    case 174:
+        return ir.BitwiseOr(c, ir.BitwiseAnd(b, ir.BitwiseNot(a)));
+    case 175:
+        return ir.BitwiseOr(c, ir.BitwiseNot(a));
+    case 176:
+        return ir.BitwiseAnd(a, ir.BitwiseOr(c, ir.BitwiseNot(b)));
+    case 177:
+        return ir.BitwiseOr(ir.BitwiseAnd(a, c), ir.BitwiseNot(ir.BitwiseOr(b, c)));
+    case 178:
+        return ir.BitwiseXor(b, ir.BitwiseOr(ir.BitwiseXor(a, b), ir.BitwiseXor(a, c)));
+    case 179:
+        return ir.BitwiseOr(ir.BitwiseAnd(a, c), ir.BitwiseNot(b));
+    case 180:
+        return ir.BitwiseXor(a, ir.BitwiseAnd(b, ir.BitwiseNot(c)));
+    case 181:
+        return ir.BitwiseNot(ir.BitwiseAnd(ir.BitwiseOr(b, c), ir.BitwiseXor(a, c)));
+    case 182:
+        return ir.BitwiseOr(ir.BitwiseAnd(a, c), ir.BitwiseXor(b, ir.BitwiseOr(a, c)));
+    case 183:
+        return ir.BitwiseNot(ir.BitwiseAnd(b, ir.BitwiseXor(a, c)));
+    case 184:
+        return ir.BitwiseAnd(ir.BitwiseOr(a, b), ir.BitwiseOr(c, ir.BitwiseNot(b)));
+    case 185:
+        return ir.BitwiseOr(ir.BitwiseAnd(a, c), ir.BitwiseXor(b, ir.BitwiseNot(c)));
+    case 186:
+        return ir.BitwiseOr(c, ir.BitwiseAnd(a, ir.BitwiseNot(b)));
+    case 187:
+        return ir.BitwiseOr(c, ir.BitwiseNot(b));
+    case 188:
+        return ir.BitwiseOr(ir.BitwiseAnd(a, c), ir.BitwiseXor(a, b));
+    case 189:
+        return ir.BitwiseOr(ir.BitwiseXor(a, b), ir.BitwiseXor(a, ir.BitwiseNot(c)));
+    case 190:
+        return ir.BitwiseOr(c, ir.BitwiseXor(a, b));
+    case 191:
+        return ir.BitwiseOr(c, ir.BitwiseNot(ir.BitwiseAnd(a, b)));
+    case 192:
+        return ir.BitwiseAnd(a, b);
+    case 193:
+        return ir.BitwiseAnd(ir.BitwiseOr(a, ir.BitwiseNot(c)), ir.BitwiseXor(a, ir.BitwiseNot(b)));
+    case 194:
+        return ir.BitwiseAnd(ir.BitwiseOr(a, c), ir.BitwiseXor(a, ir.BitwiseNot(b)));
+    case 195:
+        return ir.BitwiseXor(a, ir.BitwiseNot(b));
+    case 196:
+        return ir.BitwiseAnd(b, ir.BitwiseOr(a, ir.BitwiseNot(c)));
+    case 197:
+        return ir.BitwiseOr(ir.BitwiseAnd(a, b), ir.BitwiseNot(ir.BitwiseOr(a, c)));
+    case 198:
+        return ir.BitwiseXor(b, ir.BitwiseAnd(c, ir.BitwiseNot(a)));
+    case 199:
+        return ir.BitwiseNot(ir.BitwiseAnd(ir.BitwiseOr(a, c), ir.BitwiseXor(a, b)));
+    case 200:
+        return ir.BitwiseAnd(b, ir.BitwiseOr(a, c));
+    case 201:
+        return ir.BitwiseXor(ir.BitwiseNot(b), ir.BitwiseOr(a, c));
+    case 202:
+        return ir.BitwiseAnd(ir.BitwiseOr(a, c), ir.BitwiseOr(b, ir.BitwiseNot(a)));
+    case 203:
+        return ir.BitwiseOr(ir.BitwiseAnd(b, c), ir.BitwiseXor(a, ir.BitwiseNot(b)));
+    case 204:
+        return b;
+    case 205:
+        return ir.BitwiseOr(b, ir.BitwiseNot(ir.BitwiseOr(a, c)));
+    case 206:
+        return ir.BitwiseOr(b, ir.BitwiseAnd(c, ir.BitwiseNot(a)));
+    case 207:
+        return ir.BitwiseOr(b, ir.BitwiseNot(a));
+    case 208:
+        return ir.BitwiseAnd(a, ir.BitwiseOr(b, ir.BitwiseNot(c)));
+    case 209:
+        return ir.BitwiseOr(ir.BitwiseAnd(a, b), ir.BitwiseNot(ir.BitwiseOr(b, c)));
+    case 210:
+        return ir.BitwiseXor(a, ir.BitwiseAnd(c, ir.BitwiseNot(b)));
+    case 211:
+        return ir.BitwiseNot(ir.BitwiseAnd(ir.BitwiseOr(b, c), ir.BitwiseXor(a, b)));
+    case 212:
+        return ir.BitwiseXor(c, ir.BitwiseOr(ir.BitwiseXor(a, b), ir.BitwiseXor(a, c)));
+    case 213:
+        return ir.BitwiseOr(ir.BitwiseAnd(a, b), ir.BitwiseNot(c));
+    case 214:
+        return ir.BitwiseOr(ir.BitwiseAnd(a, b), ir.BitwiseXor(c, ir.BitwiseOr(a, b)));
+    case 215:
+        return ir.BitwiseNot(ir.BitwiseAnd(c, ir.BitwiseXor(a, b)));
+    case 216:
+        return ir.BitwiseAnd(ir.BitwiseOr(a, c), ir.BitwiseOr(b, ir.BitwiseNot(c)));
+    case 217:
+        return ir.BitwiseOr(ir.BitwiseAnd(a, b), ir.BitwiseXor(b, ir.BitwiseNot(c)));
+    case 218:
+        return ir.BitwiseOr(ir.BitwiseAnd(a, b), ir.BitwiseXor(a, c));
+    case 219:
+        return ir.BitwiseOr(ir.BitwiseXor(a, c), ir.BitwiseXor(a, ir.BitwiseNot(b)));
+    case 220:
+        return ir.BitwiseOr(b, ir.BitwiseAnd(a, ir.BitwiseNot(c)));
+    case 221:
+        return ir.BitwiseOr(b, ir.BitwiseNot(c));
+    case 222:
+        return ir.BitwiseOr(b, ir.BitwiseXor(a, c));
+    case 223:
+        return ir.BitwiseOr(b, ir.BitwiseNot(ir.BitwiseAnd(a, c)));
+    case 224:
+        return ir.BitwiseAnd(a, ir.BitwiseOr(b, c));
+    case 225:
+        return ir.BitwiseXor(ir.BitwiseNot(a), ir.BitwiseOr(b, c));
+    case 226:
+        return ir.BitwiseAnd(ir.BitwiseOr(a, ir.BitwiseNot(b)), ir.BitwiseOr(b, c));
+    case 227:
+        return ir.BitwiseOr(ir.BitwiseAnd(a, c), ir.BitwiseXor(a, ir.BitwiseNot(b)));
+    case 228:
+        return ir.BitwiseAnd(ir.BitwiseOr(a, ir.BitwiseNot(c)), ir.BitwiseOr(b, c));
+    case 229:
+        return ir.BitwiseOr(ir.BitwiseAnd(a, b), ir.BitwiseXor(a, ir.BitwiseNot(c)));
+    case 230:
+        return ir.BitwiseOr(ir.BitwiseAnd(a, b), ir.BitwiseXor(b, c));
+    case 231:
+        return ir.BitwiseOr(ir.BitwiseXor(a, ir.BitwiseNot(b)), ir.BitwiseXor(b, c));
+    case 232:
+        return ir.BitwiseAnd(ir.BitwiseOr(a, b), ir.BitwiseOr(c, ir.BitwiseAnd(a, b)));
+    case 233:
+        return ir.BitwiseOr(ir.BitwiseAnd(a, b),
+                            ir.BitwiseXor(ir.BitwiseNot(c), ir.BitwiseOr(a, b)));
+    case 234:
+        return ir.BitwiseOr(c, ir.BitwiseAnd(a, b));
+    case 235:
+        return ir.BitwiseOr(c, ir.BitwiseXor(a, ir.BitwiseNot(b)));
+    case 236:
+        return ir.BitwiseOr(b, ir.BitwiseAnd(a, c));
+    case 237:
+        return ir.BitwiseOr(b, ir.BitwiseXor(a, ir.BitwiseNot(c)));
+    case 238:
+        return ir.BitwiseOr(b, c);
+    case 239:
+        return ir.BitwiseOr(ir.BitwiseNot(a), ir.BitwiseOr(b, c));
+    case 240:
+        return a;
+    case 241:
+        return ir.BitwiseOr(a, ir.BitwiseNot(ir.BitwiseOr(b, c)));
+    case 242:
+        return ir.BitwiseOr(a, ir.BitwiseAnd(c, ir.BitwiseNot(b)));
+    case 243:
+        return ir.BitwiseOr(a, ir.BitwiseNot(b));
+    case 244:
+        return ir.BitwiseOr(a, ir.BitwiseAnd(b, ir.BitwiseNot(c)));
+    case 245:
+        return ir.BitwiseOr(a, ir.BitwiseNot(c));
+    case 246:
+        return ir.BitwiseOr(a, ir.BitwiseXor(b, c));
+    case 247:
+        return ir.BitwiseOr(a, ir.BitwiseNot(ir.BitwiseAnd(b, c)));
+    case 248:
+        return ir.BitwiseOr(a, ir.BitwiseAnd(b, c));
+    case 249:
+        return ir.BitwiseOr(a, ir.BitwiseXor(b, ir.BitwiseNot(c)));
+    case 250:
+        return ir.BitwiseOr(a, c);
+    case 251:
+        return ir.BitwiseOr(ir.BitwiseNot(b), ir.BitwiseOr(a, c));
+    case 252:
+        return ir.BitwiseOr(a, b);
+    case 253:
+        return ir.BitwiseOr(ir.BitwiseNot(c), ir.BitwiseOr(a, b));
+    case 254:
+        return ir.BitwiseOr(a, ir.BitwiseOr(b, c));
+    case 255:
+        return ir.Imm32(0xFFFFFFFF);
+        // end of generated code
     }
-    if (ttbl & 0x02) {
-        // r |= ~a & ~b & c;
-        const auto lhs{ir.BitwiseAnd(not_a, not_b)};
-        const auto rhs{ir.BitwiseAnd(lhs, c)};
-        r = ir.BitwiseOr(r, rhs);
-    }
-    if (ttbl & 0x04) {
-        // r |= ~a & b & ~c;
-        const auto lhs{ir.BitwiseAnd(not_a, b)};
-        const auto rhs{ir.BitwiseAnd(lhs, not_c)};
-        r = ir.BitwiseOr(r, rhs);
-    }
-    if (ttbl & 0x08) {
-        // r |= ~a & b & c;
-        const auto lhs{ir.BitwiseAnd(not_a, b)};
-        const auto rhs{ir.BitwiseAnd(lhs, c)};
-        r = ir.BitwiseOr(r, rhs);
-    }
-    if (ttbl & 0x10) {
-        // r |= a & ~b & ~c;
-        const auto lhs{ir.BitwiseAnd(a, not_b)};
-        const auto rhs{ir.BitwiseAnd(lhs, not_c)};
-        r = ir.BitwiseOr(r, rhs);
-    }
-    if (ttbl & 0x20) {
-        // r |= a & ~b & c;
-        const auto lhs{ir.BitwiseAnd(a, not_b)};
-        const auto rhs{ir.BitwiseAnd(lhs, c)};
-        r = ir.BitwiseOr(r, rhs);
-    }
-    if (ttbl & 0x40) {
-        // r |= a & b & ~c;
-        const auto lhs{ir.BitwiseAnd(a, b)};
-        const auto rhs{ir.BitwiseAnd(lhs, not_c)};
-        r = ir.BitwiseOr(r, rhs);
-    }
-    if (ttbl & 0x80) {
-        // r |= a & b & c;
-        const auto lhs{ir.BitwiseAnd(a, b)};
-        const auto rhs{ir.BitwiseAnd(lhs, c)};
-        r = ir.BitwiseOr(r, rhs);
-    }
-    return r;
+    throw NotImplementedException("LOP3 with out of range ttbl");
 }
 
 IR::U32 LOP3(TranslatorVisitor& v, u64 insn, const IR::U32& op_b, const IR::U32& op_c, u64 lut) {
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input_lut3.py b/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input_lut3.py
new file mode 100644
index 000000000..8f547c266
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input_lut3.py
@@ -0,0 +1,92 @@
+# Copyright © 2022 degasus <markus@selfnet.de>
+# This work is free. You can redistribute it and/or modify it under the
+# terms of the Do What The Fuck You Want To Public License, Version 2,
+# as published by Sam Hocevar. See http://www.wtfpl.net/ for more details.
+
+from itertools import product
+
+# The primitive instructions
+OPS = {
+    'ir.BitwiseAnd({}, {})' : (2, 1, lambda a,b: a&b),
+    'ir.BitwiseOr({}, {})' : (2, 1, lambda a,b: a|b),
+    'ir.BitwiseXor({}, {})' : (2, 1, lambda a,b: a^b),
+    'ir.BitwiseNot({})' : (1, 0.1, lambda a: (~a) & 255), # Only tiny cost, as this can often inlined in other instructions
+}
+
+# Our database of combination of instructions
+optimized_calls = {}
+def cmp(lhs, rhs):
+    if lhs is None: # new entry
+        return True
+    if lhs[3] > rhs[3]: # costs
+        return True
+    if lhs[3] < rhs[3]: # costs
+        return False
+    if len(lhs[0]) > len(rhs[0]): # string len
+        return True
+    if len(lhs[0]) < len(rhs[0]): # string len
+        return False
+    if lhs[0] > rhs[0]: # string sorting
+        return True
+    if lhs[0] < rhs[0]: # string sorting
+        return False
+    assert lhs == rhs, "redundant instruction, bug in brute force"
+    return False
+def register(imm, instruction, count, latency):
+    # Use the sum of instruction count and latency as costs to evaluate which combination is best
+    costs = count + latency
+
+    old = optimized_calls.get(imm, None)
+    new = (instruction, count, latency, costs)
+
+    # Update if new or better
+    if cmp(old, new):
+        optimized_calls[imm] = new
+        return True
+
+    return False
+
+# Constants: 0, 1 (for free)
+register(0, 'ir.Imm32(0)', 0, 0)
+register(255, 'ir.Imm32(0xFFFFFFFF)', 0, 0)
+
+# Inputs: a, b, c (for free)
+ta = 0xF0
+tb = 0xCC
+tc = 0xAA
+inputs = {
+    ta : 'a',
+    tb : 'b',
+    tc : 'c',
+}
+for imm, instruction in inputs.items():
+    register(imm, instruction, 0, 0)
+    register((~imm) & 255, 'ir.BitwiseNot({})'.format(instruction), 0.099, 0.099) # slightly cheaper NEG on inputs
+
+# Try to combine two values from the db with an instruction.
+# If it is better than the old method, update it.
+while True:
+    registered = 0
+    calls_copy = optimized_calls.copy()
+    for OP, (argc, cost, f) in OPS.items():
+        for args in product(calls_copy.items(), repeat=argc):
+            # unpack(transponse) the arrays
+            imm = [arg[0] for arg in args]
+            value = [arg[1][0] for arg in args]
+            count = [arg[1][1] for arg in args]
+            latency = [arg[1][2] for arg in args]
+
+            registered += register(
+                f(*imm),
+                OP.format(*value),
+                sum(count) + cost,
+                max(latency) + cost)
+    if registered == 0:
+        # No update at all? So terminate
+        break
+
+# Hacky output. Please improve me to output valid C++ instead.
+s = """    case {imm}:
+        return {op};"""
+for imm in range(256):
+    print(s.format(imm=imm, op=optimized_calls[imm][0]))
diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.cpp b/src/shader_recompiler/frontend/maxwell/translate_program.cpp
index 248ad3ced..b22725584 100644
--- a/src/shader_recompiler/frontend/maxwell/translate_program.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp
@@ -212,11 +212,11 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo
     }
     Optimization::SsaRewritePass(program);
 
+    Optimization::ConstantPropagationPass(program);
+
     Optimization::GlobalMemoryToStorageBufferPass(program);
     Optimization::TexturePass(env, program);
 
-    Optimization::ConstantPropagationPass(program);
-
     if (Settings::values.resolution_info.active) {
         Optimization::RescalingPass(program);
     }
diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp
index 38592afd0..ddf497e32 100644
--- a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp
+++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp
@@ -334,7 +334,8 @@ std::optional<LowAddrInfo> TrackLowAddress(IR::Inst* inst) {
 /// Tries to track the storage buffer address used by a global memory instruction
 std::optional<StorageBufferAddr> Track(const IR::Value& value, const Bias* bias) {
     const auto pred{[bias](const IR::Inst* inst) -> std::optional<StorageBufferAddr> {
-        if (inst->GetOpcode() != IR::Opcode::GetCbufU32) {
+        if (inst->GetOpcode() != IR::Opcode::GetCbufU32 &&
+            inst->GetOpcode() != IR::Opcode::GetCbufU32x2) {
             return std::nullopt;
         }
         const IR::Value index{inst->Arg(0)};
diff --git a/src/shader_recompiler/ir_opt/rescaling_pass.cpp b/src/shader_recompiler/ir_opt/rescaling_pass.cpp
index c28500dd1..496d4667e 100644
--- a/src/shader_recompiler/ir_opt/rescaling_pass.cpp
+++ b/src/shader_recompiler/ir_opt/rescaling_pass.cpp
@@ -183,6 +183,31 @@ void ScaleIntegerComposite(IR::IREmitter& ir, IR::Inst& inst, const IR::U1& is_s
     }
 }
 
+void ScaleIntegerOffsetComposite(IR::IREmitter& ir, IR::Inst& inst, const IR::U1& is_scaled,
+                                 size_t index) {
+    const IR::Value composite{inst.Arg(index)};
+    if (composite.IsEmpty()) {
+        return;
+    }
+    const auto info{inst.Flags<IR::TextureInstInfo>()};
+    const IR::U32 x{Scale(ir, is_scaled, IR::U32{ir.CompositeExtract(composite, 0)})};
+    const IR::U32 y{Scale(ir, is_scaled, IR::U32{ir.CompositeExtract(composite, 1)})};
+    switch (info.type) {
+    case TextureType::ColorArray2D:
+    case TextureType::Color2D:
+        inst.SetArg(index, ir.CompositeConstruct(x, y));
+        break;
+    case TextureType::Color1D:
+    case TextureType::ColorArray1D:
+    case TextureType::Color3D:
+    case TextureType::ColorCube:
+    case TextureType::ColorArrayCube:
+    case TextureType::Buffer:
+        // Nothing to patch here
+        break;
+    }
+}
+
 void SubScaleCoord(IR::IREmitter& ir, IR::Inst& inst, const IR::U1& is_scaled) {
     const auto info{inst.Flags<IR::TextureInstInfo>()};
     const IR::Value coord{inst.Arg(1)};
@@ -220,7 +245,7 @@ void SubScaleImageFetch(IR::Block& block, IR::Inst& inst) {
     const IR::U1 is_scaled{ir.IsTextureScaled(ir.Imm32(info.descriptor_index))};
     SubScaleCoord(ir, inst, is_scaled);
     // Scale ImageFetch offset
-    ScaleIntegerComposite(ir, inst, is_scaled, 2);
+    ScaleIntegerOffsetComposite(ir, inst, is_scaled, 2);
 }
 
 void SubScaleImageRead(IR::Block& block, IR::Inst& inst) {
@@ -242,7 +267,7 @@ void PatchImageFetch(IR::Block& block, IR::Inst& inst) {
     const IR::U1 is_scaled{ir.IsTextureScaled(ir.Imm32(info.descriptor_index))};
     ScaleIntegerComposite(ir, inst, is_scaled, 1);
     // Scale ImageFetch offset
-    ScaleIntegerComposite(ir, inst, is_scaled, 2);
+    ScaleIntegerOffsetComposite(ir, inst, is_scaled, 2);
 }
 
 void PatchImageRead(IR::Block& block, IR::Inst& inst) {
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 5d6d217bb..54a902f56 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -7,6 +7,7 @@
 #include "common/assert.h"
 #include "core/core.h"
 #include "core/core_timing.h"
+#include "video_core/dirty_flags.h"
 #include "video_core/engines/maxwell_3d.h"
 #include "video_core/gpu.h"
 #include "video_core/memory_manager.h"
@@ -195,7 +196,7 @@ void Maxwell3D::ProcessMethodCall(u32 method, u32 argument, u32 nonshadow_argume
     case MAXWELL3D_REG_INDEX(const_buffer.cb_data) + 13:
     case MAXWELL3D_REG_INDEX(const_buffer.cb_data) + 14:
     case MAXWELL3D_REG_INDEX(const_buffer.cb_data) + 15:
-        return StartCBData(method);
+        return ProcessCBData(argument);
     case MAXWELL3D_REG_INDEX(cb_bind[0]):
         return ProcessCBBind(0);
     case MAXWELL3D_REG_INDEX(cb_bind[1]):
@@ -208,6 +209,14 @@ void Maxwell3D::ProcessMethodCall(u32 method, u32 argument, u32 nonshadow_argume
         return ProcessCBBind(4);
     case MAXWELL3D_REG_INDEX(draw.vertex_end_gl):
         return DrawArrays();
+    case MAXWELL3D_REG_INDEX(small_index):
+        regs.index_array.count = regs.small_index.count;
+        regs.index_array.first = regs.small_index.first;
+        dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
+        return DrawArrays();
+    case MAXWELL3D_REG_INDEX(topology_override):
+        use_topology_override = true;
+        return;
     case MAXWELL3D_REG_INDEX(clear_buffers):
         return ProcessClearBuffers();
     case MAXWELL3D_REG_INDEX(query.query_get):
@@ -248,14 +257,6 @@ void Maxwell3D::CallMacroMethod(u32 method, const std::vector<u32>& parameters)
 }
 
 void Maxwell3D::CallMethod(u32 method, u32 method_argument, bool is_last_call) {
-    if (method == cb_data_state.current) {
-        regs.reg_array[method] = method_argument;
-        ProcessCBData(method_argument);
-        return;
-    } else if (cb_data_state.current != null_cb_data) {
-        FinishCBData();
-    }
-
     // It is an error to write to a register other than the current macro's ARG register before it
     // has finished execution.
     if (executing_macro != 0) {
@@ -302,7 +303,7 @@ void Maxwell3D::CallMultiMethod(u32 method, const u32* base_start, u32 amount,
     case MAXWELL3D_REG_INDEX(const_buffer.cb_data) + 13:
     case MAXWELL3D_REG_INDEX(const_buffer.cb_data) + 14:
     case MAXWELL3D_REG_INDEX(const_buffer.cb_data) + 15:
-        ProcessCBMultiData(method, base_start, amount);
+        ProcessCBMultiData(base_start, amount);
         break;
     default:
         for (std::size_t i = 0; i < amount; i++) {
@@ -360,6 +361,35 @@ void Maxwell3D::CallMethodFromMME(u32 method, u32 method_argument) {
     }
 }
 
+void Maxwell3D::ProcessTopologyOverride() {
+    using PrimitiveTopology = Maxwell3D::Regs::PrimitiveTopology;
+    using PrimitiveTopologyOverride = Maxwell3D::Regs::PrimitiveTopologyOverride;
+
+    PrimitiveTopology topology{};
+
+    switch (regs.topology_override) {
+    case PrimitiveTopologyOverride::None:
+        topology = regs.draw.topology;
+        break;
+    case PrimitiveTopologyOverride::Points:
+        topology = PrimitiveTopology::Points;
+        break;
+    case PrimitiveTopologyOverride::Lines:
+        topology = PrimitiveTopology::Lines;
+        break;
+    case PrimitiveTopologyOverride::LineStrip:
+        topology = PrimitiveTopology::LineStrip;
+        break;
+    default:
+        topology = static_cast<PrimitiveTopology>(regs.topology_override);
+        break;
+    }
+
+    if (use_topology_override) {
+        regs.draw.topology.Assign(topology);
+    }
+}
+
 void Maxwell3D::FlushMMEInlineDraw() {
     LOG_TRACE(HW_GPU, "called, topology={}, count={}", regs.draw.topology.Value(),
               regs.vertex_buffer.count);
@@ -370,6 +400,8 @@ void Maxwell3D::FlushMMEInlineDraw() {
     ASSERT_MSG(!regs.draw.instance_next || !regs.draw.instance_cont,
                "Illegal combination of instancing parameters");
 
+    ProcessTopologyOverride();
+
     const bool is_indexed = mme_draw.current_mode == MMEDrawMode::Indexed;
     if (ShouldExecute()) {
         rasterizer->Draw(is_indexed, true);
@@ -529,6 +561,8 @@ void Maxwell3D::DrawArrays() {
     ASSERT_MSG(!regs.draw.instance_next || !regs.draw.instance_cont,
                "Illegal combination of instancing parameters");
 
+    ProcessTopologyOverride();
+
     if (regs.draw.instance_next) {
         // Increment the current instance *before* drawing.
         state.current_instance += 1;
@@ -587,46 +621,7 @@ void Maxwell3D::ProcessCBBind(size_t stage_index) {
     rasterizer->BindGraphicsUniformBuffer(stage_index, bind_data.index, gpu_addr, size);
 }
 
-void Maxwell3D::ProcessCBData(u32 value) {
-    const u32 id = cb_data_state.id;
-    cb_data_state.buffer[id][cb_data_state.counter] = value;
-    // Increment the current buffer position.
-    regs.const_buffer.cb_pos = regs.const_buffer.cb_pos + 4;
-    cb_data_state.counter++;
-}
-
-void Maxwell3D::StartCBData(u32 method) {
-    constexpr u32 first_cb_data = MAXWELL3D_REG_INDEX(const_buffer.cb_data);
-    cb_data_state.start_pos = regs.const_buffer.cb_pos;
-    cb_data_state.id = method - first_cb_data;
-    cb_data_state.current = method;
-    cb_data_state.counter = 0;
-    ProcessCBData(regs.const_buffer.cb_data[cb_data_state.id]);
-}
-
-void Maxwell3D::ProcessCBMultiData(u32 method, const u32* start_base, u32 amount) {
-    if (cb_data_state.current != method) {
-        if (cb_data_state.current != null_cb_data) {
-            FinishCBData();
-        }
-        constexpr u32 first_cb_data = MAXWELL3D_REG_INDEX(const_buffer.cb_data);
-        cb_data_state.start_pos = regs.const_buffer.cb_pos;
-        cb_data_state.id = method - first_cb_data;
-        cb_data_state.current = method;
-        cb_data_state.counter = 0;
-    }
-    const std::size_t id = cb_data_state.id;
-    const std::size_t size = amount;
-    std::size_t i = 0;
-    for (; i < size; i++) {
-        cb_data_state.buffer[id][cb_data_state.counter] = start_base[i];
-        cb_data_state.counter++;
-    }
-    // Increment the current buffer position.
-    regs.const_buffer.cb_pos = regs.const_buffer.cb_pos + 4 * amount;
-}
-
-void Maxwell3D::FinishCBData() {
+void Maxwell3D::ProcessCBMultiData(const u32* start_base, u32 amount) {
     // Write the input value to the current const buffer at the current position.
     const GPUVAddr buffer_address = regs.const_buffer.BufferAddress();
     ASSERT(buffer_address != 0);
@@ -634,14 +629,16 @@ void Maxwell3D::FinishCBData() {
     // Don't allow writing past the end of the buffer.
     ASSERT(regs.const_buffer.cb_pos <= regs.const_buffer.cb_size);
 
-    const GPUVAddr address{buffer_address + cb_data_state.start_pos};
-    const std::size_t size = regs.const_buffer.cb_pos - cb_data_state.start_pos;
+    const GPUVAddr address{buffer_address + regs.const_buffer.cb_pos};
+    const size_t copy_size = amount * sizeof(u32);
+    memory_manager.WriteBlock(address, start_base, copy_size);
 
-    const u32 id = cb_data_state.id;
-    memory_manager.WriteBlock(address, cb_data_state.buffer[id].data(), size);
+    // Increment the current buffer position.
+    regs.const_buffer.cb_pos += static_cast<u32>(copy_size);
+}
 
-    cb_data_state.id = null_cb_data;
-    cb_data_state.current = null_cb_data;
+void Maxwell3D::ProcessCBData(u32 value) {
+    ProcessCBMultiData(&value, 1);
 }
 
 Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const {
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index dc9df6c8b..357a74c70 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -367,6 +367,22 @@ public:
             Patches = 0xe,
         };
 
+        // Constants as from NVC0_3D_UNK1970_D3D
+        // https://gitlab.freedesktop.org/mesa/mesa/-/blob/main/src/gallium/drivers/nouveau/nvc0/nvc0_3d.xml.h#L1598
+        enum class PrimitiveTopologyOverride : u32 {
+            None = 0x0,
+            Points = 0x1,
+            Lines = 0x2,
+            LineStrip = 0x3,
+            Triangles = 0x4,
+            TriangleStrip = 0x5,
+            LinesAdjacency = 0xa,
+            LineStripAdjacency = 0xb,
+            TrianglesAdjacency = 0xc,
+            TriangleStripAdjacency = 0xd,
+            Patches = 0xe,
+        };
+
         enum class IndexFormat : u32 {
             UnsignedByte = 0x0,
             UnsignedShort = 0x1,
@@ -1200,7 +1216,12 @@ public:
                     }
                 } index_array;
 
-                INSERT_PADDING_WORDS_NOINIT(0x7);
+                union {
+                    BitField<0, 16, u32> first;
+                    BitField<16, 16, u32> count;
+                } small_index;
+
+                INSERT_PADDING_WORDS_NOINIT(0x6);
 
                 INSERT_PADDING_WORDS_NOINIT(0x1F);
 
@@ -1244,7 +1265,11 @@ public:
                     BitField<11, 1, u32> depth_clamp_disabled;
                 } view_volume_clip_control;
 
-                INSERT_PADDING_WORDS_NOINIT(0x1F);
+                INSERT_PADDING_WORDS_NOINIT(0xC);
+
+                PrimitiveTopologyOverride topology_override;
+
+                INSERT_PADDING_WORDS_NOINIT(0x12);
 
                 u32 depth_bounds_enable;
 
@@ -1520,10 +1545,8 @@ private:
     void ProcessSyncPoint();
 
     /// Handles a write to the CB_DATA[i] register.
-    void StartCBData(u32 method);
     void ProcessCBData(u32 value);
-    void ProcessCBMultiData(u32 method, const u32* start_base, u32 amount);
-    void FinishCBData();
+    void ProcessCBMultiData(const u32* start_base, u32 amount);
 
     /// Handles a write to the CB_BIND register.
     void ProcessCBBind(size_t stage_index);
@@ -1531,6 +1554,9 @@ private:
     /// Handles a write to the VERTEX_END_GL register, triggering a draw.
     void DrawArrays();
 
+    /// Handles use of topology overrides (e.g., to avoid using a topology assigned from a macro)
+    void ProcessTopologyOverride();
+
     // Handles a instance drawcall from MME
     void StepInstance(MMEDrawMode expected_mode, u32 count);
 
@@ -1555,20 +1581,10 @@ private:
     /// Interpreter for the macro codes uploaded to the GPU.
     std::unique_ptr<MacroEngine> macro_engine;
 
-    static constexpr u32 null_cb_data = 0xFFFFFFFF;
-    struct CBDataState {
-        static constexpr size_t inline_size = 0x4000;
-        std::array<std::array<u32, inline_size>, 16> buffer;
-        u32 current{null_cb_data};
-        u32 id{null_cb_data};
-        u32 start_pos{};
-        u32 counter{};
-    };
-    CBDataState cb_data_state;
-
     Upload::State upload_state;
 
     bool execute_on{true};
+    bool use_topology_override{false};
 };
 
 #define ASSERT_REG_POSITION(field_name, position)                                                  \
@@ -1685,6 +1701,7 @@ ASSERT_REG_POSITION(draw, 0x585);
 ASSERT_REG_POSITION(primitive_restart, 0x591);
 ASSERT_REG_POSITION(provoking_vertex_last, 0x5A1);
 ASSERT_REG_POSITION(index_array, 0x5F2);
+ASSERT_REG_POSITION(small_index, 0x5F9);
 ASSERT_REG_POSITION(polygon_offset_clamp, 0x61F);
 ASSERT_REG_POSITION(instanced_arrays, 0x620);
 ASSERT_REG_POSITION(vp_point_size, 0x644);
@@ -1694,6 +1711,7 @@ ASSERT_REG_POSITION(cull_face, 0x648);
 ASSERT_REG_POSITION(pixel_center_integer, 0x649);
 ASSERT_REG_POSITION(viewport_transform_enabled, 0x64B);
 ASSERT_REG_POSITION(view_volume_clip_control, 0x64F);
+ASSERT_REG_POSITION(topology_override, 0x65C);
 ASSERT_REG_POSITION(depth_bounds_enable, 0x66F);
 ASSERT_REG_POSITION(logic_op, 0x671);
 ASSERT_REG_POSITION(clear_buffers, 0x674);
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp
index 67388d980..1fc1358bc 100644
--- a/src/video_core/engines/maxwell_dma.cpp
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -53,7 +53,6 @@ void MaxwellDMA::Launch() {
 
     // TODO(Subv): Perform more research and implement all features of this engine.
     const LaunchDMA& launch = regs.launch_dma;
-    ASSERT(launch.semaphore_type == LaunchDMA::SemaphoreType::NONE);
     ASSERT(launch.interrupt_type == LaunchDMA::InterruptType::NONE);
     ASSERT(launch.data_transfer_type == LaunchDMA::DataTransferType::NON_PIPELINED);
     ASSERT(regs.dst_params.origin.x == 0);
@@ -79,6 +78,7 @@ void MaxwellDMA::Launch() {
             CopyPitchToBlockLinear();
         }
     }
+    ReleaseSemaphore();
 }
 
 void MaxwellDMA::CopyPitchToPitch() {
@@ -244,4 +244,22 @@ void MaxwellDMA::FastCopyBlockLinearToPitch() {
     memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size);
 }
 
+void MaxwellDMA::ReleaseSemaphore() {
+    const auto type = regs.launch_dma.semaphore_type;
+    const GPUVAddr address = regs.semaphore.address;
+    switch (type) {
+    case LaunchDMA::SemaphoreType::NONE:
+        break;
+    case LaunchDMA::SemaphoreType::RELEASE_ONE_WORD_SEMAPHORE:
+        memory_manager.Write<u32>(address, regs.semaphore.payload);
+        break;
+    case LaunchDMA::SemaphoreType::RELEASE_FOUR_WORD_SEMAPHORE:
+        memory_manager.Write<u64>(address, static_cast<u64>(regs.semaphore.payload));
+        memory_manager.Write<u64>(address + 8, system.GPU().GetTicks());
+        break;
+    default:
+        UNREACHABLE_MSG("Unknown semaphore type: {}", static_cast<u32>(type.Value()));
+    }
+}
+
 } // namespace Tegra::Engines
diff --git a/src/video_core/engines/maxwell_dma.h b/src/video_core/engines/maxwell_dma.h
index a04514425..2692cac8a 100644
--- a/src/video_core/engines/maxwell_dma.h
+++ b/src/video_core/engines/maxwell_dma.h
@@ -224,6 +224,8 @@ private:
 
     void FastCopyBlockLinearToPitch();
 
+    void ReleaseSemaphore();
+
     Core::System& system;
 
     MemoryManager& memory_manager;
diff --git a/src/video_core/renderer_opengl/gl_fence_manager.cpp b/src/video_core/renderer_opengl/gl_fence_manager.cpp
index 151290101..293ad7d59 100644
--- a/src/video_core/renderer_opengl/gl_fence_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_fence_manager.cpp
@@ -31,9 +31,8 @@ bool GLInnerFence::IsSignaled() const {
         return true;
     }
     ASSERT(sync_object.handle != 0);
-    GLsizei length;
     GLint sync_status;
-    glGetSynciv(sync_object.handle, GL_SYNC_STATUS, sizeof(GLint), &length, &sync_status);
+    glGetSynciv(sync_object.handle, GL_SYNC_STATUS, 1, nullptr, &sync_status);
     return sync_status == GL_SIGNALED;
 }
 
diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp
index f8495896c..9e6732abd 100644
--- a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp
+++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp
@@ -243,10 +243,6 @@ GraphicsPipeline::GraphicsPipeline(
             case Settings::ShaderBackend::GLASM:
                 if (!sources[stage].empty()) {
                     assembly_programs[stage] = CompileProgram(sources[stage], AssemblyStage(stage));
-                    if (in_parallel) {
-                        // Make sure program is built before continuing when building in parallel
-                        glGetString(GL_PROGRAM_ERROR_STRING_NV);
-                    }
                 }
                 break;
             case Settings::ShaderBackend::SPIRV:
@@ -256,20 +252,18 @@ GraphicsPipeline::GraphicsPipeline(
                 break;
             }
         }
-        if (in_parallel && backend != Settings::ShaderBackend::GLASM) {
-            // Make sure programs have built if we are building shaders in parallel
-            for (OGLProgram& program : source_programs) {
-                if (program.handle != 0) {
-                    GLint status{};
-                    glGetProgramiv(program.handle, GL_LINK_STATUS, &status);
-                }
-            }
+        if (in_parallel) {
+            std::lock_guard lock{built_mutex};
+            built_fence.Create();
+            // Flush this context to ensure compilation commands and fence are in the GPU pipe.
+            glFlush();
+            built_condvar.notify_one();
+        } else {
+            is_built = true;
         }
         if (shader_notify) {
             shader_notify->MarkShaderComplete();
         }
-        is_built = true;
-        built_condvar.notify_one();
     }};
     if (thread_worker) {
         thread_worker->QueueWork(std::move(func));
@@ -440,7 +434,7 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
     buffer_cache.UpdateGraphicsBuffers(is_indexed);
     buffer_cache.BindHostGeometryBuffers(is_indexed);
 
-    if (!is_built.load(std::memory_order::relaxed)) {
+    if (!IsBuilt()) {
         WaitForBuild();
     }
     const bool use_assembly{assembly_programs[0].handle != 0};
@@ -585,8 +579,26 @@ void GraphicsPipeline::GenerateTransformFeedbackState() {
 }
 
 void GraphicsPipeline::WaitForBuild() {
-    std::unique_lock lock{built_mutex};
-    built_condvar.wait(lock, [this] { return is_built.load(std::memory_order::relaxed); });
+    if (built_fence.handle == 0) {
+        std::unique_lock lock{built_mutex};
+        built_condvar.wait(lock, [this] { return built_fence.handle != 0; });
+    }
+    ASSERT(glClientWaitSync(built_fence.handle, 0, GL_TIMEOUT_IGNORED) != GL_WAIT_FAILED);
+    is_built = true;
+}
+
+bool GraphicsPipeline::IsBuilt() noexcept {
+    if (is_built) {
+        return true;
+    }
+    if (built_fence.handle == 0) {
+        return false;
+    }
+    // Timeout of zero means this is non-blocking
+    const auto sync_status = glClientWaitSync(built_fence.handle, 0, 0);
+    ASSERT(sync_status != GL_WAIT_FAILED);
+    is_built = sync_status != GL_TIMEOUT_EXPIRED;
+    return is_built;
 }
 
 } // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.h b/src/video_core/renderer_opengl/gl_graphics_pipeline.h
index 4e28d9a42..311d49f3f 100644
--- a/src/video_core/renderer_opengl/gl_graphics_pipeline.h
+++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.h
@@ -100,9 +100,7 @@ public:
         return writes_global_memory;
     }
 
-    [[nodiscard]] bool IsBuilt() const noexcept {
-        return is_built.load(std::memory_order::relaxed);
-    }
+    [[nodiscard]] bool IsBuilt() noexcept;
 
     template <typename Spec>
     static auto MakeConfigureSpecFunc() {
@@ -154,7 +152,8 @@ private:
 
     std::mutex built_mutex;
     std::condition_variable built_condvar;
-    std::atomic_bool is_built{false};
+    OGLSync built_fence{};
+    bool is_built{false};
 };
 
 } // namespace OpenGL
diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp
index 3e96c0f60..4d73427b4 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp
+++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp
@@ -2,6 +2,7 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
+#include <array>
 #include <cstring>
 #include <memory>
 #include <optional>
@@ -292,7 +293,7 @@ std::pair<VkBuffer, VkDeviceSize> QuadIndexedPass::Assemble(
             .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
             .dstAccessMask = VK_ACCESS_INDEX_READ_BIT,
         };
-        const std::array push_constants{base_vertex, index_shift};
+        const std::array<u32, 2> push_constants{base_vertex, index_shift};
         const VkDescriptorSet set = descriptor_allocator.Commit();
         device.GetLogical().UpdateDescriptorSet(set, *descriptor_template, descriptor_data);
         cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
index 0f62779de..ca6019a3a 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
@@ -1067,7 +1067,8 @@ void TextureCacheRuntime::ConvertImage(Framebuffer* dst, ImageView& dst_view, Im
         }
         break;
     case PixelFormat::A8B8G8R8_UNORM:
-        if (src_view.format == PixelFormat::S8_UINT_D24_UNORM) {
+        if (src_view.format == PixelFormat::S8_UINT_D24_UNORM ||
+            src_view.format == PixelFormat::D24_UNORM_S8_UINT) {
             return blit_image_helper.ConvertD24S8ToABGR8(dst, src_view);
         }
         break;
diff --git a/src/video_core/video_core.cpp b/src/video_core/video_core.cpp
index 329bf4def..2f2594585 100644
--- a/src/video_core/video_core.cpp
+++ b/src/video_core/video_core.cpp
@@ -50,6 +50,7 @@ std::unique_ptr<Tegra::GPU> CreateGPU(Core::Frontend::EmuWindow& emu_window, Cor
         gpu->BindRenderer(std::move(renderer));
         return gpu;
     } catch (const std::runtime_error& exception) {
+        scope.Cancel();
         LOG_ERROR(HW_GPU, "Failed to initialize GPU: {}", exception.what());
         return nullptr;
     }
diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp
index f915bd856..4b943c6ba 100644
--- a/src/yuzu/configuration/config.cpp
+++ b/src/yuzu/configuration/config.cpp
@@ -609,6 +609,7 @@ void Config::ReadCpuValues() {
     ReadGlobalSetting(Settings::values.cpuopt_unsafe_ignore_standard_fpcr);
     ReadGlobalSetting(Settings::values.cpuopt_unsafe_inaccurate_nan);
     ReadGlobalSetting(Settings::values.cpuopt_unsafe_fastmem_check);
+    ReadGlobalSetting(Settings::values.cpuopt_unsafe_ignore_global_monitor);
 
     if (global) {
         ReadBasicSetting(Settings::values.cpu_debug_mode);
@@ -621,6 +622,8 @@ void Config::ReadCpuValues() {
         ReadBasicSetting(Settings::values.cpuopt_misc_ir);
         ReadBasicSetting(Settings::values.cpuopt_reduce_misalign_checks);
         ReadBasicSetting(Settings::values.cpuopt_fastmem);
+        ReadBasicSetting(Settings::values.cpuopt_fastmem_exclusives);
+        ReadBasicSetting(Settings::values.cpuopt_recompile_exclusives);
     }
 
     qt_config->endGroup();
@@ -1139,6 +1142,7 @@ void Config::SaveCpuValues() {
     WriteGlobalSetting(Settings::values.cpuopt_unsafe_ignore_standard_fpcr);
     WriteGlobalSetting(Settings::values.cpuopt_unsafe_inaccurate_nan);
     WriteGlobalSetting(Settings::values.cpuopt_unsafe_fastmem_check);
+    WriteGlobalSetting(Settings::values.cpuopt_unsafe_ignore_global_monitor);
 
     if (global) {
         WriteBasicSetting(Settings::values.cpu_debug_mode);
@@ -1151,6 +1155,8 @@ void Config::SaveCpuValues() {
         WriteBasicSetting(Settings::values.cpuopt_misc_ir);
         WriteBasicSetting(Settings::values.cpuopt_reduce_misalign_checks);
         WriteBasicSetting(Settings::values.cpuopt_fastmem);
+        WriteBasicSetting(Settings::values.cpuopt_fastmem_exclusives);
+        WriteBasicSetting(Settings::values.cpuopt_recompile_exclusives);
     }
 
     qt_config->endGroup();
diff --git a/src/yuzu/configuration/configure_cpu.cpp b/src/yuzu/configuration/configure_cpu.cpp
index f66cab5d4..bf74ccc7c 100644
--- a/src/yuzu/configuration/configure_cpu.cpp
+++ b/src/yuzu/configuration/configure_cpu.cpp
@@ -36,6 +36,7 @@ void ConfigureCpu::SetConfiguration() {
     ui->cpuopt_unsafe_ignore_standard_fpcr->setEnabled(runtime_lock);
     ui->cpuopt_unsafe_inaccurate_nan->setEnabled(runtime_lock);
     ui->cpuopt_unsafe_fastmem_check->setEnabled(runtime_lock);
+    ui->cpuopt_unsafe_ignore_global_monitor->setEnabled(runtime_lock);
 
     ui->cpuopt_unsafe_unfuse_fma->setChecked(Settings::values.cpuopt_unsafe_unfuse_fma.GetValue());
     ui->cpuopt_unsafe_reduce_fp_error->setChecked(
@@ -46,6 +47,8 @@ void ConfigureCpu::SetConfiguration() {
         Settings::values.cpuopt_unsafe_inaccurate_nan.GetValue());
     ui->cpuopt_unsafe_fastmem_check->setChecked(
         Settings::values.cpuopt_unsafe_fastmem_check.GetValue());
+    ui->cpuopt_unsafe_ignore_global_monitor->setChecked(
+        Settings::values.cpuopt_unsafe_ignore_global_monitor.GetValue());
 
     if (Settings::IsConfiguringGlobal()) {
         ui->accuracy->setCurrentIndex(static_cast<int>(Settings::values.cpu_accuracy.GetValue()));
@@ -82,6 +85,9 @@ void ConfigureCpu::ApplyConfiguration() {
     ConfigurationShared::ApplyPerGameSetting(&Settings::values.cpuopt_unsafe_fastmem_check,
                                              ui->cpuopt_unsafe_fastmem_check,
                                              cpuopt_unsafe_fastmem_check);
+    ConfigurationShared::ApplyPerGameSetting(&Settings::values.cpuopt_unsafe_ignore_global_monitor,
+                                             ui->cpuopt_unsafe_ignore_global_monitor,
+                                             cpuopt_unsafe_ignore_global_monitor);
 }
 
 void ConfigureCpu::changeEvent(QEvent* event) {
@@ -120,4 +126,7 @@ void ConfigureCpu::SetupPerGameUI() {
     ConfigurationShared::SetColoredTristate(ui->cpuopt_unsafe_fastmem_check,
                                             Settings::values.cpuopt_unsafe_fastmem_check,
                                             cpuopt_unsafe_fastmem_check);
+    ConfigurationShared::SetColoredTristate(ui->cpuopt_unsafe_ignore_global_monitor,
+                                            Settings::values.cpuopt_unsafe_ignore_global_monitor,
+                                            cpuopt_unsafe_ignore_global_monitor);
 }
diff --git a/src/yuzu/configuration/configure_cpu.h b/src/yuzu/configuration/configure_cpu.h
index ed9af0e9f..733e38be4 100644
--- a/src/yuzu/configuration/configure_cpu.h
+++ b/src/yuzu/configuration/configure_cpu.h
@@ -45,6 +45,7 @@ private:
     ConfigurationShared::CheckState cpuopt_unsafe_ignore_standard_fpcr;
     ConfigurationShared::CheckState cpuopt_unsafe_inaccurate_nan;
     ConfigurationShared::CheckState cpuopt_unsafe_fastmem_check;
+    ConfigurationShared::CheckState cpuopt_unsafe_ignore_global_monitor;
 
     const Core::System& system;
 };
diff --git a/src/yuzu/configuration/configure_cpu.ui b/src/yuzu/configuration/configure_cpu.ui
index d8064db24..5d80a8c91 100644
--- a/src/yuzu/configuration/configure_cpu.ui
+++ b/src/yuzu/configuration/configure_cpu.ui
@@ -150,6 +150,18 @@
           </property>
          </widget>
         </item>
+        <item>
+         <widget class="QCheckBox" name="cpuopt_unsafe_ignore_global_monitor">
+          <property name="toolTip">
+           <string>
+            &lt;div&gt;This option improves speed by relying only on the semantics of cmpxchg to ensure safety of exclusive access instructions. Please note this may result in deadlocks and other race conditions.&lt;/div&gt;
+           </string>
+          </property>
+          <property name="text">
+           <string>Ignore global monitor</string>
+          </property>
+         </widget>
+        </item>
        </layout>
       </widget>
      </item>
diff --git a/src/yuzu/configuration/configure_cpu_debug.cpp b/src/yuzu/configuration/configure_cpu_debug.cpp
index 05a90963d..616a0be75 100644
--- a/src/yuzu/configuration/configure_cpu_debug.cpp
+++ b/src/yuzu/configuration/configure_cpu_debug.cpp
@@ -44,6 +44,12 @@ void ConfigureCpuDebug::SetConfiguration() {
         Settings::values.cpuopt_reduce_misalign_checks.GetValue());
     ui->cpuopt_fastmem->setEnabled(runtime_lock);
     ui->cpuopt_fastmem->setChecked(Settings::values.cpuopt_fastmem.GetValue());
+    ui->cpuopt_fastmem_exclusives->setEnabled(runtime_lock);
+    ui->cpuopt_fastmem_exclusives->setChecked(
+        Settings::values.cpuopt_fastmem_exclusives.GetValue());
+    ui->cpuopt_recompile_exclusives->setEnabled(runtime_lock);
+    ui->cpuopt_recompile_exclusives->setChecked(
+        Settings::values.cpuopt_recompile_exclusives.GetValue());
 }
 
 void ConfigureCpuDebug::ApplyConfiguration() {
@@ -56,6 +62,8 @@ void ConfigureCpuDebug::ApplyConfiguration() {
     Settings::values.cpuopt_misc_ir = ui->cpuopt_misc_ir->isChecked();
     Settings::values.cpuopt_reduce_misalign_checks = ui->cpuopt_reduce_misalign_checks->isChecked();
     Settings::values.cpuopt_fastmem = ui->cpuopt_fastmem->isChecked();
+    Settings::values.cpuopt_fastmem_exclusives = ui->cpuopt_fastmem_exclusives->isChecked();
+    Settings::values.cpuopt_recompile_exclusives = ui->cpuopt_recompile_exclusives->isChecked();
 }
 
 void ConfigureCpuDebug::changeEvent(QEvent* event) {
diff --git a/src/yuzu/configuration/configure_cpu_debug.ui b/src/yuzu/configuration/configure_cpu_debug.ui
index 6e635bb2f..2bc268810 100644
--- a/src/yuzu/configuration/configure_cpu_debug.ui
+++ b/src/yuzu/configuration/configure_cpu_debug.ui
@@ -144,7 +144,34 @@
            </string>
           </property>
           <property name="text">
-           <string>Enable Host MMU Emulation</string>
+           <string>Enable Host MMU Emulation (general memory instructions)</string>
+          </property>
+         </widget>
+        </item>
+        <item>
+         <widget class="QCheckBox" name="cpuopt_fastmem_exclusives">
+          <property name="toolTip">
+           <string>
+            &lt;div style=&quot;white-space: nowrap&quot;&gt;This optimization speeds up exclusive memory accesses by the guest program.&lt;/div&gt;
+            &lt;div style=&quot;white-space: nowrap&quot;&gt;Enabling it causes guest exclusive memory reads/writes to be done directly into memory and make use of Host's MMU.&lt;/div&gt;
+            &lt;div style=&quot;white-space: nowrap&quot;&gt;Disabling this forces all exclusive memory accesses to use Software MMU Emulation.&lt;/div&gt;
+           </string>
+          </property>
+          <property name="text">
+           <string>Enable Host MMU Emulation (exclusive memory instructions)</string>
+          </property>
+         </widget>
+        </item>
+        <item>
+         <widget class="QCheckBox" name="cpuopt_recompile_exclusives">
+          <property name="toolTip">
+           <string>
+            &lt;div style=&quot;white-space: nowrap&quot;&gt;This optimization speeds up exclusive memory accesses by the guest program.&lt;/div&gt;
+            &lt;div style=&quot;white-space: nowrap&quot;&gt;Enabling it reduces the overhead of fastmem failure of exclusive memory accesses.&lt;/div&gt;
+           </string>
+          </property>
+          <property name="text">
+           <string>Enable recompilation of exclusive memory instructions</string>
           </property>
          </widget>
         </item>
diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp
index d573829be..06774768d 100644
--- a/src/yuzu/main.cpp
+++ b/src/yuzu/main.cpp
@@ -250,9 +250,9 @@ GMainWindow::GMainWindow()
 #ifdef ARCHITECTURE_x86_64
     const auto& caps = Common::GetCPUCaps();
     std::string cpu_string = caps.cpu_string;
-    if (caps.avx || caps.avx2 || caps.avx512) {
+    if (caps.avx || caps.avx2 || caps.avx512f) {
         cpu_string += " | AVX";
-        if (caps.avx512) {
+        if (caps.avx512f) {
             cpu_string += "512";
         } else if (caps.avx2) {
             cpu_string += '2';
diff --git a/src/yuzu_cmd/config.cpp b/src/yuzu_cmd/config.cpp
index 30963a8bb..b74411c84 100644
--- a/src/yuzu_cmd/config.cpp
+++ b/src/yuzu_cmd/config.cpp
@@ -280,11 +280,14 @@ void Config::ReadValues() {
     ReadSetting("Cpu", Settings::values.cpuopt_misc_ir);
     ReadSetting("Cpu", Settings::values.cpuopt_reduce_misalign_checks);
     ReadSetting("Cpu", Settings::values.cpuopt_fastmem);
+    ReadSetting("Cpu", Settings::values.cpuopt_fastmem_exclusives);
+    ReadSetting("Cpu", Settings::values.cpuopt_recompile_exclusives);
     ReadSetting("Cpu", Settings::values.cpuopt_unsafe_unfuse_fma);
     ReadSetting("Cpu", Settings::values.cpuopt_unsafe_reduce_fp_error);
     ReadSetting("Cpu", Settings::values.cpuopt_unsafe_ignore_standard_fpcr);
     ReadSetting("Cpu", Settings::values.cpuopt_unsafe_inaccurate_nan);
     ReadSetting("Cpu", Settings::values.cpuopt_unsafe_fastmem_check);
+    ReadSetting("Cpu", Settings::values.cpuopt_unsafe_ignore_global_monitor);
 
     // Renderer
     ReadSetting("Renderer", Settings::values.renderer_backend);
diff --git a/src/yuzu_cmd/default_ini.h b/src/yuzu_cmd/default_ini.h
index 6d613bf7a..34782c378 100644
--- a/src/yuzu_cmd/default_ini.h
+++ b/src/yuzu_cmd/default_ini.h
@@ -124,7 +124,11 @@ keyboard_enabled =
 [Core]
 # Whether to use multi-core for CPU emulation
 # 0: Disabled, 1 (default): Enabled
-use_multi_core=
+use_multi_core =
+
+# Enable extended guest system memory layout (6GB DRAM)
+# 0 (default): Disabled, 1: Enabled
+use_extended_memory_layout =
 
 [Cpu]
 # Adjusts various optimizations.
@@ -174,6 +178,14 @@ cpuopt_reduce_misalign_checks =
 # 0: Disabled, 1 (default): Enabled
 cpuopt_fastmem =
 
+# Enable Host MMU Emulation for exclusive memory instructions (faster guest memory access)
+# 0: Disabled, 1 (default): Enabled
+cpuopt_fastmem_exclusives =
+
+# Enable fallback on failure of fastmem of exclusive memory instructions (faster guest memory access)
+# 0: Disabled, 1 (default): Enabled
+cpuopt_recompile_exclusives =
+
 # Enable unfuse FMA (improve performance on CPUs without FMA)
 # Only enabled if cpu_accuracy is set to Unsafe. Automatically chosen with cpu_accuracy = Auto-select.
 # 0: Disabled, 1 (default): Enabled
@@ -199,6 +211,11 @@ cpuopt_unsafe_inaccurate_nan =
 # 0: Disabled, 1 (default): Enabled
 cpuopt_unsafe_fastmem_check =
 
+# Enable faster exclusive instructions
+# Only enabled if cpu_accuracy is set to Unsafe. Automatically chosen with cpu_accuracy = Auto-select.
+# 0: Disabled, 1 (default): Enabled
+cpuopt_unsafe_ignore_global_monitor =
+
 [Renderer]
 # Which backend API to use.
 # 0 (default): OpenGL, 1: Vulkan