57 files changed, 1641 insertions, 892 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 8e9502a97..1bd9d2aa7 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -161,7 +161,7 @@ macro(yuzu_find_packages)
     #    Cmake Pkg Prefix  Version     Conan Pkg
         "Boost             1.73        boost/1.73.0"
         "Catch2            2.13        catch2/2.13.0"
-        "fmt               7.0         fmt/7.0.3"
+        "fmt               7.1         fmt/7.1.0"
     # can't use until https://github.com/bincrafters/community/issues/1173
         #"libzip            1.5         libzip/1.5.2@bincrafters/stable"
         "lz4               1.8         lz4/1.9.2"
diff --git a/externals/httplib/README.md b/externals/httplib/README.md
index 73037d297..1940e446c 100644
--- a/externals/httplib/README.md
+++ b/externals/httplib/README.md
@@ -1,4 +1,4 @@
-From https://github.com/yhirose/cpp-httplib/tree/fce8e6fefdab4ad48bc5b25c98e5ebfda4f3cf53
+From https://github.com/yhirose/cpp-httplib/tree/ff5677ad197947177c158fe857caff4f0e242045 with https://github.com/yhirose/cpp-httplib/pull/701
 
 MIT License
 
diff --git a/externals/httplib/httplib.h b/externals/httplib/httplib.h
index 5139b7f05..8982054e2 100644
--- a/externals/httplib/httplib.h
+++ b/externals/httplib/httplib.h
@@ -173,6 +173,7 @@ using socket_t = int;
 #define INVALID_SOCKET (-1)
 #endif //_WIN32
 
+#include <algorithm>
 #include <array>
 #include <atomic>
 #include <cassert>
@@ -237,6 +238,27 @@ namespace httplib {
 
 namespace detail {
 
+/*
+ * Backport std::make_unique from C++14.
+ *
+ * NOTE: This code came up with the following stackoverflow post:
+ * https://stackoverflow.com/questions/10149840/c-arrays-and-make-unique
+ *
+ */
+
+template <class T, class... Args>
+typename std::enable_if<!std::is_array<T>::value, std::unique_ptr<T>>::type
+make_unique(Args &&... args) {
+  return std::unique_ptr<T>(new T(std::forward<Args>(args)...));
+}
+
+template <class T>
+typename std::enable_if<std::is_array<T>::value, std::unique_ptr<T>>::type
+make_unique(std::size_t n) {
+  typedef typename std::remove_extent<T>::type RT;
+  return std::unique_ptr<T>(new RT[n]);
+}
+
 struct ci {
   bool operator()(const std::string &s1, const std::string &s2) const {
     return std::lexicographical_compare(
@@ -304,6 +326,10 @@ using ContentProvider =
 using ContentProviderWithoutLength =
     std::function<bool(size_t offset, DataSink &sink)>;
 
+using ContentReceiverWithProgress =
+    std::function<bool(const char *data, size_t data_length, uint64_t offset,
+                       uint64_t total_length)>;
+
 using ContentReceiver =
     std::function<bool(const char *data, size_t data_length)>;
 
@@ -317,14 +343,17 @@ public:
                                              ContentReceiver receiver)>;
 
   ContentReader(Reader reader, MultipartReader multipart_reader)
-      : reader_(reader), multipart_reader_(multipart_reader) {}
+      : reader_(std::move(reader)),
+        multipart_reader_(std::move(multipart_reader)) {}
 
   bool operator()(MultipartContentHeader header,
                   ContentReceiver receiver) const {
-    return multipart_reader_(header, receiver);
+    return multipart_reader_(std::move(header), std::move(receiver));
   }
 
-  bool operator()(ContentReceiver receiver) const { return reader_(receiver); }
+  bool operator()(ContentReceiver receiver) const {
+    return reader_(std::move(receiver));
+  }
 
   Reader reader_;
   MultipartReader multipart_reader_;
@@ -353,7 +382,7 @@ struct Request {
   // for client
   size_t redirect_count = CPPHTTPLIB_REDIRECT_MAX_COUNT;
   ResponseHandler response_handler;
-  ContentReceiver content_receiver;
+  ContentReceiverWithProgress content_receiver;
   size_t content_length = 0;
   ContentProvider content_provider;
   Progress progress;
@@ -475,7 +504,7 @@ public:
 
   void enqueue(std::function<void()> fn) override {
     std::unique_lock<std::mutex> lock(mutex_);
-    jobs_.push_back(fn);
+    jobs_.push_back(std::move(fn));
     cond_.notify_one();
   }
 
@@ -664,8 +693,8 @@ private:
                          ContentReceiver multipart_receiver);
 
   virtual bool process_and_close_socket(socket_t sock);
-  
-  struct MountPointEntry { 
+
+  struct MountPointEntry {
     std::string mount_point;
     std::string base_dir;
     Headers headers;
@@ -704,23 +733,27 @@ enum Error {
   Canceled,
   SSLConnection,
   SSLLoadingCerts,
-  SSLServerVerification
+  SSLServerVerification,
+  UnsupportedMultipartBoundaryChars
 };
 
 class Result {
 public:
-  Result(const std::shared_ptr<Response> &res, Error err)
-      : res_(res), err_(err) {}
+  Result(std::unique_ptr<Response> res, Error err)
+      : res_(std::move(res)), err_(err) {}
   operator bool() const { return res_ != nullptr; }
   bool operator==(std::nullptr_t) const { return res_ == nullptr; }
   bool operator!=(std::nullptr_t) const { return res_ != nullptr; }
   const Response &value() const { return *res_; }
+  Response &value() { return *res_; }
   const Response &operator*() const { return *res_; }
+  Response &operator*() { return *res_; }
   const Response *operator->() const { return res_.get(); }
+  Response *operator->() { return res_.get(); }
   Error error() const { return err_; }
 
 private:
-  std::shared_ptr<Response> res_;
+  std::unique_ptr<Response> res_;
   Error err_;
 };
 
@@ -777,6 +810,8 @@ public:
   Result Post(const char *path, const MultipartFormDataItems &items);
   Result Post(const char *path, const Headers &headers,
               const MultipartFormDataItems &items);
+  Result Post(const char *path, const Headers &headers,
+              const MultipartFormDataItems &items, const std::string &boundary);
 
   Result Put(const char *path);
   Result Put(const char *path, const std::string &body,
@@ -863,7 +898,21 @@ protected:
   };
 
   virtual bool create_and_connect_socket(Socket &socket);
-  virtual void close_socket(Socket &socket, bool process_socket_ret);
+
+  // All of:
+  //   shutdown_ssl
+  //   shutdown_socket
+  //   close_socket
+  // should ONLY be called when socket_mutex_ is locked.
+  // Also, shutdown_ssl and close_socket should also NOT be called concurrently
+  // with a DIFFERENT thread sending requests using that socket.
+  virtual void shutdown_ssl(Socket &socket, bool shutdown_gracefully);
+  void shutdown_socket(Socket &socket);
+  void close_socket(Socket &socket);
+
+  // Similar to shutdown_ssl and close_socket, this should NOT be called
+  // concurrently with a DIFFERENT thread sending requests from the socket
+  void lock_socket_and_shutdown_and_close();
 
   bool process_request(Stream &strm, const Request &req, Response &res,
                        bool close_connection);
@@ -873,7 +922,7 @@ protected:
   void copy_settings(const ClientImpl &rhs);
 
   // Error state
-  mutable Error error_ = Error::Success;
+  mutable std::atomic<Error> error_;
 
   // Socket endoint information
   const std::string host_;
@@ -885,6 +934,11 @@ protected:
   mutable std::mutex socket_mutex_;
   std::recursive_mutex request_mutex_;
 
+  // These are all protected under socket_mutex
+  int socket_requests_in_flight_ = 0;
+  std::thread::id socket_requests_are_from_thread_ = std::thread::id();
+  bool socket_should_be_closed_when_request_is_done_ = false;
+
   // Default headers
   Headers default_headers_;
 
@@ -942,13 +996,13 @@ private:
   bool redirect(const Request &req, Response &res);
   bool handle_request(Stream &strm, const Request &req, Response &res,
                       bool close_connection);
-  void stop_core();
-  std::shared_ptr<Response> send_with_content_provider(
+  std::unique_ptr<Response> send_with_content_provider(
       const char *method, const char *path, const Headers &headers,
       const std::string &body, size_t content_length,
       ContentProvider content_provider, const char *content_type);
 
-  virtual bool process_socket(Socket &socket,
+  // socket is const because this function is called when socket_mutex_ is not locked
+  virtual bool process_socket(const Socket &socket,
                               std::function<bool(Stream &strm)> callback);
   virtual bool is_ssl() const;
 };
@@ -1012,6 +1066,8 @@ public:
   Result Post(const char *path, const MultipartFormDataItems &items);
   Result Post(const char *path, const Headers &headers,
               const MultipartFormDataItems &items);
+  Result Post(const char *path, const Headers &headers,
+              const MultipartFormDataItems &items, const std::string &boundary);
   Result Put(const char *path);
   Result Put(const char *path, const std::string &body,
              const char *content_type);
@@ -1098,7 +1154,7 @@ public:
 #endif
 
 private:
-  std::shared_ptr<ClientImpl> cli_;
+  std::unique_ptr<ClientImpl> cli_;
 
 #ifdef CPPHTTPLIB_OPENSSL_SUPPORT
   bool is_ssl_ = false;
@@ -1154,9 +1210,9 @@ public:
 
 private:
   bool create_and_connect_socket(Socket &socket) override;
-  void close_socket(Socket &socket, bool process_socket_ret) override;
+  void shutdown_ssl(Socket &socket, bool shutdown_gracefully) override;
 
-  bool process_socket(Socket &socket,
+  bool process_socket(const Socket &socket,
                       std::function<bool(Stream &strm)> callback) override;
   bool is_ssl() const override;
 
@@ -1942,9 +1998,9 @@ inline socket_t create_client_socket(const char *host, int port,
                                      bool tcp_nodelay,
                                      SocketOptions socket_options,
                                      time_t timeout_sec, time_t timeout_usec,
-                                     const std::string &intf, Error &error) {
+                                     const std::string &intf, std::atomic<Error> &error) {
   auto sock = create_socket(
-      host, port, 0, tcp_nodelay, socket_options,
+      host, port, 0, tcp_nodelay, std::move(socket_options),
       [&](socket_t sock, struct addrinfo &ai) -> bool {
         if (!intf.empty()) {
 #ifdef USE_IF2IP
@@ -2478,7 +2534,8 @@ inline bool read_headers(Stream &strm, Headers &headers) {
 }
 
 inline bool read_content_with_length(Stream &strm, uint64_t len,
-                                     Progress progress, ContentReceiver out) {
+                                     Progress progress,
+                                     ContentReceiverWithProgress out) {
   char buf[CPPHTTPLIB_RECV_BUFSIZ];
 
   uint64_t r = 0;
@@ -2487,8 +2544,7 @@ inline bool read_content_with_length(Stream &strm, uint64_t len,
     auto n = strm.read(buf, (std::min)(read_len, CPPHTTPLIB_RECV_BUFSIZ));
     if (n <= 0) { return false; }
 
-    if (!out(buf, static_cast<size_t>(n))) { return false; }
-
+    if (!out(buf, static_cast<size_t>(n), r, len)) { return false; }
     r += static_cast<uint64_t>(n);
 
     if (progress) {
@@ -2510,8 +2566,10 @@ inline void skip_content_with_length(Stream &strm, uint64_t len) {
   }
 }
 
-inline bool read_content_without_length(Stream &strm, ContentReceiver out) {
+inline bool read_content_without_length(Stream &strm,
+                                        ContentReceiverWithProgress out) {
   char buf[CPPHTTPLIB_RECV_BUFSIZ];
+  uint64_t r = 0;
   for (;;) {
     auto n = strm.read(buf, CPPHTTPLIB_RECV_BUFSIZ);
     if (n < 0) {
@@ -2519,13 +2577,16 @@ inline bool read_content_without_length(Stream &strm, ContentReceiver out) {
     } else if (n == 0) {
       return true;
     }
-    if (!out(buf, static_cast<size_t>(n))) { return false; }
+
+    if (!out(buf, static_cast<size_t>(n), r, 0)) { return false; }
+    r += static_cast<uint64_t>(n);
   }
 
   return true;
 }
 
-inline bool read_content_chunked(Stream &strm, ContentReceiver out) {
+inline bool read_content_chunked(Stream &strm,
+                                 ContentReceiverWithProgress out) {
   const auto bufsiz = 16;
   char buf[bufsiz];
 
@@ -2570,23 +2631,24 @@ inline bool is_chunked_transfer_encoding(const Headers &headers) {
 }
 
 template <typename T, typename U>
-bool prepare_content_receiver(T &x, int &status, ContentReceiver receiver,
+bool prepare_content_receiver(T &x, int &status,
+                              ContentReceiverWithProgress receiver,
                               bool decompress, U callback) {
   if (decompress) {
     std::string encoding = x.get_header_value("Content-Encoding");
-    std::shared_ptr<decompressor> decompressor;
+    std::unique_ptr<decompressor> decompressor;
 
     if (encoding.find("gzip") != std::string::npos ||
         encoding.find("deflate") != std::string::npos) {
 #ifdef CPPHTTPLIB_ZLIB_SUPPORT
-      decompressor = std::make_shared<gzip_decompressor>();
+      decompressor = detail::make_unique<gzip_decompressor>();
 #else
       status = 415;
       return false;
 #endif
     } else if (encoding.find("br") != std::string::npos) {
 #ifdef CPPHTTPLIB_BROTLI_SUPPORT
-      decompressor = std::make_shared<brotli_decompressor>();
+      decompressor = detail::make_unique<brotli_decompressor>();
 #else
       status = 415;
       return false;
@@ -2595,12 +2657,14 @@ bool prepare_content_receiver(T &x, int &status, ContentReceiver receiver,
 
     if (decompressor) {
       if (decompressor->is_valid()) {
-        ContentReceiver out = [&](const char *buf, size_t n) {
-          return decompressor->decompress(
-              buf, n,
-              [&](const char *buf, size_t n) { return receiver(buf, n); });
+        ContentReceiverWithProgress out = [&](const char *buf, size_t n,
+                                              uint64_t off, uint64_t len) {
+          return decompressor->decompress(buf, n,
+                                          [&](const char *buf, size_t n) {
+                                            return receiver(buf, n, off, len);
+                                          });
         };
-        return callback(out);
+        return callback(std::move(out));
       } else {
         status = 500;
         return false;
@@ -2608,18 +2672,20 @@ bool prepare_content_receiver(T &x, int &status, ContentReceiver receiver,
     }
   }
 
-  ContentReceiver out = [&](const char *buf, size_t n) {
-    return receiver(buf, n);
+  ContentReceiverWithProgress out = [&](const char *buf, size_t n, uint64_t off,
+                                        uint64_t len) {
+    return receiver(buf, n, off, len);
   };
-  return callback(out);
+  return callback(std::move(out));
 }
 
 template <typename T>
 bool read_content(Stream &strm, T &x, size_t payload_max_length, int &status,
-                  Progress progress, ContentReceiver receiver,
+                  Progress progress, ContentReceiverWithProgress receiver,
                   bool decompress) {
   return prepare_content_receiver(
-      x, status, receiver, decompress, [&](const ContentReceiver &out) {
+      x, status, std::move(receiver), decompress,
+      [&](const ContentReceiverWithProgress &out) {
         auto ret = true;
         auto exceed_payload_max_length = false;
 
@@ -2634,7 +2700,7 @@ bool read_content(Stream &strm, T &x, size_t payload_max_length, int &status,
             skip_content_with_length(strm, len);
             ret = false;
           } else if (len > 0) {
-            ret = read_content_with_length(strm, len, progress, out);
+            ret = read_content_with_length(strm, len, std::move(progress), out);
           }
         }
 
@@ -2875,7 +2941,7 @@ inline bool parse_multipart_boundary(const std::string &content_type,
   return !boundary.empty();
 }
 
-inline bool parse_range_header(const std::string &s, Ranges &ranges) {
+inline bool parse_range_header(const std::string &s, Ranges &ranges) try {
   static auto re_first_range = std::regex(R"(bytes=(\d*-\d*(?:,\s*\d*-\d*)*))");
   std::smatch m;
   if (std::regex_match(s, m, re_first_range)) {
@@ -2907,7 +2973,7 @@ inline bool parse_range_header(const std::string &s, Ranges &ranges) {
     return all_valid_ranges;
   }
   return false;
-}
+} catch (...) { return false; }
 
 class MultipartFormDataParser {
 public:
@@ -2918,7 +2984,8 @@ public:
   bool is_valid() const { return is_valid_; }
 
   template <typename T, typename U>
-  bool parse(const char *buf, size_t n, T content_callback, U header_callback) {
+  bool parse(const char *buf, size_t n, const T &content_callback,
+             const U &header_callback) {
 
     static const std::regex re_content_disposition(
         "^Content-Disposition:\\s*form-data;\\s*name=\"(.*?)\"(?:;\\s*filename="
@@ -3090,8 +3157,13 @@ inline std::string make_multipart_data_boundary() {
   static const char data[] =
       "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
 
+  // std::random_device might actually be deterministic on some
+  // platforms, but due to lack of support in the c++ standard library,
+  // doing better requires either some ugly hacks or breaking portability.
   std::random_device seed_gen;
-  std::mt19937 engine(seed_gen());
+  // Request 128 bits of entropy for initialization
+  std::seed_seq seed_sequence{seed_gen(), seed_gen(), seed_gen(), seed_gen()};
+  std::mt19937 engine(seed_sequence);
 
   std::string result = "--cpp-httplib-multipart-data-";
 
@@ -3114,7 +3186,7 @@ get_range_offset_and_length(const Request &req, size_t content_length,
   auto slen = static_cast<ssize_t>(content_length);
 
   if (r.first == -1) {
-    r.first = slen - r.second;
+    r.first = (std::max)(static_cast<ssize_t>(0), slen - r.second);
     r.second = slen - 1;
   }
 
@@ -3451,7 +3523,7 @@ inline std::pair<std::string, std::string> make_range_header(Ranges ranges) {
     if (r.second != -1) { field += std::to_string(r.second); }
     i++;
   }
-  return std::make_pair("Range", field);
+  return std::make_pair("Range", std::move(field));
 }
 
 inline std::pair<std::string, std::string>
@@ -3460,7 +3532,7 @@ make_basic_authentication_header(const std::string &username,
                                  bool is_proxy = false) {
   auto field = "Basic " + detail::base64_encode(username + ":" + password);
   auto key = is_proxy ? "Proxy-Authorization" : "Authorization";
-  return std::make_pair(key, field);
+  return std::make_pair(key, std::move(field));
 }
 
 inline std::pair<std::string, std::string>
@@ -3468,7 +3540,7 @@ make_bearer_token_authentication_header(const std::string &token,
                                         bool is_proxy = false) {
   auto field = "Bearer " + token;
   auto key = is_proxy ? "Proxy-Authorization" : "Authorization";
-  return std::make_pair(key, field);
+  return std::make_pair(key, std::move(field));
 }
 
 // Request implementation
@@ -3761,60 +3833,66 @@ inline Server::Server()
 inline Server::~Server() {}
 
 inline Server &Server::Get(const char *pattern, Handler handler) {
-  get_handlers_.push_back(std::make_pair(std::regex(pattern), handler));
+  get_handlers_.push_back(
+      std::make_pair(std::regex(pattern), std::move(handler)));
   return *this;
 }
 
 inline Server &Server::Post(const char *pattern, Handler handler) {
-  post_handlers_.push_back(std::make_pair(std::regex(pattern), handler));
+  post_handlers_.push_back(
+      std::make_pair(std::regex(pattern), std::move(handler)));
   return *this;
 }
 
 inline Server &Server::Post(const char *pattern,
                             HandlerWithContentReader handler) {
   post_handlers_for_content_reader_.push_back(
-      std::make_pair(std::regex(pattern), handler));
+      std::make_pair(std::regex(pattern), std::move(handler)));
   return *this;
 }
 
 inline Server &Server::Put(const char *pattern, Handler handler) {
-  put_handlers_.push_back(std::make_pair(std::regex(pattern), handler));
+  put_handlers_.push_back(
+      std::make_pair(std::regex(pattern), std::move(handler)));
   return *this;
 }
 
 inline Server &Server::Put(const char *pattern,
                            HandlerWithContentReader handler) {
   put_handlers_for_content_reader_.push_back(
-      std::make_pair(std::regex(pattern), handler));
+      std::make_pair(std::regex(pattern), std::move(handler)));
   return *this;
 }
 
 inline Server &Server::Patch(const char *pattern, Handler handler) {
-  patch_handlers_.push_back(std::make_pair(std::regex(pattern), handler));
+  patch_handlers_.push_back(
+      std::make_pair(std::regex(pattern), std::move(handler)));
   return *this;
 }
 
 inline Server &Server::Patch(const char *pattern,
                              HandlerWithContentReader handler) {
   patch_handlers_for_content_reader_.push_back(
-      std::make_pair(std::regex(pattern), handler));
+      std::make_pair(std::regex(pattern), std::move(handler)));
   return *this;
 }
 
 inline Server &Server::Delete(const char *pattern, Handler handler) {
-  delete_handlers_.push_back(std::make_pair(std::regex(pattern), handler));
+  delete_handlers_.push_back(
+      std::make_pair(std::regex(pattern), std::move(handler)));
   return *this;
 }
 
 inline Server &Server::Delete(const char *pattern,
                               HandlerWithContentReader handler) {
   delete_handlers_for_content_reader_.push_back(
-      std::make_pair(std::regex(pattern), handler));
+      std::make_pair(std::regex(pattern), std::move(handler)));
   return *this;
 }
 
 inline Server &Server::Options(const char *pattern, Handler handler) {
-  options_handlers_.push_back(std::make_pair(std::regex(pattern), handler));
+  options_handlers_.push_back(
+      std::make_pair(std::regex(pattern), std::move(handler)));
   return *this;
 }
 
@@ -3860,7 +3938,7 @@ inline void Server::set_error_handler(Handler handler) {
 inline void Server::set_tcp_nodelay(bool on) { tcp_nodelay_ = on; }
 
 inline void Server::set_socket_options(SocketOptions socket_options) {
-  socket_options_ = socket_options;
+  socket_options_ = std::move(socket_options);
 }
 
 inline void Server::set_logger(Logger logger) { logger_ = std::move(logger); }
@@ -4045,16 +4123,16 @@ inline bool Server::write_response(Stream &strm, bool close_connection,
     }
 
     if (type != detail::EncodingType::None) {
-      std::shared_ptr<detail::compressor> compressor;
+      std::unique_ptr<detail::compressor> compressor;
 
       if (type == detail::EncodingType::Gzip) {
 #ifdef CPPHTTPLIB_ZLIB_SUPPORT
-        compressor = std::make_shared<detail::gzip_compressor>();
+        compressor = detail::make_unique<detail::gzip_compressor>();
         res.set_header("Content-Encoding", "gzip");
 #endif
       } else if (type == detail::EncodingType::Brotli) {
 #ifdef CPPHTTPLIB_BROTLI_SUPPORT
-        compressor = std::make_shared<detail::brotli_compressor>();
+        compressor = detail::make_unique<detail::brotli_compressor>();
         res.set_header("Content-Encoding", "brotli");
 #endif
       }
@@ -4136,17 +4214,17 @@ Server::write_content_with_provider(Stream &strm, const Request &req,
     if (res.is_chunked_content_provider) {
       auto type = detail::encoding_type(req, res);
 
-      std::shared_ptr<detail::compressor> compressor;
+      std::unique_ptr<detail::compressor> compressor;
       if (type == detail::EncodingType::Gzip) {
 #ifdef CPPHTTPLIB_ZLIB_SUPPORT
-        compressor = std::make_shared<detail::gzip_compressor>();
+        compressor = detail::make_unique<detail::gzip_compressor>();
 #endif
       } else if (type == detail::EncodingType::Brotli) {
 #ifdef CPPHTTPLIB_BROTLI_SUPPORT
-        compressor = std::make_shared<detail::brotli_compressor>();
+        compressor = detail::make_unique<detail::brotli_compressor>();
 #endif
       } else {
-        compressor = std::make_shared<detail::nocompressor>();
+        compressor = detail::make_unique<detail::nocompressor>();
       }
       assert(compressor != nullptr);
 
@@ -4198,8 +4276,9 @@ inline bool Server::read_content_with_content_receiver(
     Stream &strm, Request &req, Response &res, ContentReceiver receiver,
     MultipartContentHeader multipart_header,
     ContentReceiver multipart_receiver) {
-  return read_content_core(strm, req, res, receiver, multipart_header,
-                           multipart_receiver);
+  return read_content_core(strm, req, res, std::move(receiver),
+                           std::move(multipart_header),
+                           std::move(multipart_receiver));
 }
 
 inline bool Server::read_content_core(Stream &strm, Request &req, Response &res,
@@ -4207,7 +4286,7 @@ inline bool Server::read_content_core(Stream &strm, Request &req, Response &res,
                                       MultipartContentHeader mulitpart_header,
                                       ContentReceiver multipart_receiver) {
   detail::MultipartFormDataParser multipart_form_data_parser;
-  ContentReceiver out;
+  ContentReceiverWithProgress out;
 
   if (req.is_multipart_form_data()) {
     const auto &content_type = req.get_header_value("Content-Type");
@@ -4218,7 +4297,7 @@ inline bool Server::read_content_core(Stream &strm, Request &req, Response &res,
     }
 
     multipart_form_data_parser.set_boundary(std::move(boundary));
-    out = [&](const char *buf, size_t n) {
+    out = [&](const char *buf, size_t n, uint64_t /*off*/, uint64_t /*len*/) {
       /* For debug
       size_t pos = 0;
       while (pos < n) {
@@ -4234,7 +4313,8 @@ inline bool Server::read_content_core(Stream &strm, Request &req, Response &res,
                                               mulitpart_header);
     };
   } else {
-    out = receiver;
+    out = [receiver](const char *buf, size_t n, uint64_t /*off*/,
+                     uint64_t /*len*/) { return receiver(buf, n); };
   }
 
   if (req.method == "DELETE" && !req.has_header("Content-Length")) {
@@ -4271,7 +4351,7 @@ inline bool Server::handle_file_request(Request &req, Response &res,
           auto type =
               detail::find_content_type(path, file_extension_and_mimetype_map_);
           if (type) { res.set_header("Content-Type", type); }
-          for (const auto& kv : entry.headers) {
+          for (const auto &kv : entry.headers) {
             res.set_header(kv.first.c_str(), kv.second);
           }
           res.status = 200;
@@ -4290,7 +4370,7 @@ inline socket_t
 Server::create_server_socket(const char *host, int port, int socket_flags,
                              SocketOptions socket_options) const {
   return detail::create_socket(
-      host, port, socket_flags, tcp_nodelay_, socket_options,
+      host, port, socket_flags, tcp_nodelay_, std::move(socket_options),
       [](socket_t sock, struct addrinfo &ai) -> bool {
         if (::bind(sock, ai.ai_addr, static_cast<socklen_t>(ai.ai_addrlen))) {
           return false;
@@ -4392,32 +4472,37 @@ inline bool Server::routing(Request &req, Response &res, Stream &strm) {
     {
       ContentReader reader(
           [&](ContentReceiver receiver) {
-            return read_content_with_content_receiver(strm, req, res, receiver,
-                                                      nullptr, nullptr);
+            return read_content_with_content_receiver(
+                strm, req, res, std::move(receiver), nullptr, nullptr);
           },
           [&](MultipartContentHeader header, ContentReceiver receiver) {
             return read_content_with_content_receiver(strm, req, res, nullptr,
-                                                      header, receiver);
+                                                      std::move(header),
+                                                      std::move(receiver));
           });
 
       if (req.method == "POST") {
         if (dispatch_request_for_content_reader(
-                req, res, reader, post_handlers_for_content_reader_)) {
+                req, res, std::move(reader),
+                post_handlers_for_content_reader_)) {
           return true;
         }
       } else if (req.method == "PUT") {
         if (dispatch_request_for_content_reader(
-                req, res, reader, put_handlers_for_content_reader_)) {
+                req, res, std::move(reader),
+                put_handlers_for_content_reader_)) {
           return true;
         }
       } else if (req.method == "PATCH") {
         if (dispatch_request_for_content_reader(
-                req, res, reader, patch_handlers_for_content_reader_)) {
+                req, res, std::move(reader),
+                patch_handlers_for_content_reader_)) {
           return true;
         }
       } else if (req.method == "DELETE") {
         if (dispatch_request_for_content_reader(
-                req, res, reader, delete_handlers_for_content_reader_)) {
+                req, res, std::move(reader),
+                delete_handlers_for_content_reader_)) {
           return true;
         }
       }
@@ -4448,7 +4533,6 @@ inline bool Server::routing(Request &req, Response &res, Stream &strm) {
 
 inline bool Server::dispatch_request(Request &req, Response &res,
                                      const Handlers &handlers) {
-
   try {
     for (const auto &x : handlers) {
       const auto &pattern = x.first;
@@ -4531,7 +4615,8 @@ Server::process_request(Stream &strm, bool close_connection,
   if (req.has_header("Range")) {
     const auto &range_header_value = req.get_header_value("Range");
     if (!detail::parse_range_header(range_header_value, req.ranges)) {
-      // TODO: error
+      res.status = 416;
+      return write_response(strm, close_connection, req, res);
     }
   }
 
@@ -4588,11 +4673,11 @@ inline ClientImpl::ClientImpl(const std::string &host, int port)
 inline ClientImpl::ClientImpl(const std::string &host, int port,
                               const std::string &client_cert_path,
                               const std::string &client_key_path)
-    : host_(host), port_(port),
+    : error_(Error::Success), host_(host), port_(port),
       host_and_port_(host_ + ":" + std::to_string(port_)),
       client_cert_path_(client_cert_path), client_key_path_(client_key_path) {}
 
-inline ClientImpl::~ClientImpl() { stop_core(); }
+inline ClientImpl::~ClientImpl() { lock_socket_and_shutdown_and_close(); }
 
 inline bool ClientImpl::is_valid() const { return true; }
 
@@ -4653,15 +4738,47 @@ inline bool ClientImpl::create_and_connect_socket(Socket &socket) {
   return true;
 }
 
-inline void ClientImpl::close_socket(Socket &socket,
-                                     bool /*process_socket_ret*/) {
-  detail::close_socket(socket.sock);
-  socket_.sock = INVALID_SOCKET;
+inline void ClientImpl::shutdown_ssl(Socket &socket, bool shutdown_gracefully) {
+  (void)socket;
+  (void)shutdown_gracefully;
+  //If there are any requests in flight from threads other than us, then it's
+  //a thread-unsafe race because individual ssl* objects are not thread-safe. 
+  assert(socket_requests_in_flight_ == 0 ||
+         socket_requests_are_from_thread_ == std::this_thread::get_id());
+}
+
+inline void ClientImpl::shutdown_socket(Socket &socket) {
+  if (socket.sock == INVALID_SOCKET)
+    return;  
+  detail::shutdown_socket(socket.sock);
+}
+ 
+inline void ClientImpl::close_socket(Socket &socket) {
+  // If there are requests in flight in another thread, usually closing
+  // the socket will be fine and they will simply receive an error when
+  // using the closed socket, but it is still a bug since rarely the OS
+  // may reassign the socket id to be used for a new socket, and then
+  // suddenly they will be operating on a live socket that is different
+  // than the one they intended!
+  assert(socket_requests_in_flight_ == 0 ||
+         socket_requests_are_from_thread_ == std::this_thread::get_id());
+  // It is also a bug if this happens while SSL is still active
 #ifdef CPPHTTPLIB_OPENSSL_SUPPORT
-  socket_.ssl = nullptr;
+  assert(socket.ssl == nullptr);
 #endif
+  if (socket.sock == INVALID_SOCKET)
+    return;
+  detail::close_socket(socket.sock);
+  socket.sock = INVALID_SOCKET;
 }
 
+inline void ClientImpl::lock_socket_and_shutdown_and_close() {
+  std::lock_guard<std::mutex> guard(socket_mutex_);
+  shutdown_ssl(socket_, true);
+  shutdown_socket(socket_);
+  close_socket(socket_);
+}
+ 
 inline bool ClientImpl::read_response_line(Stream &strm, Response &res) {
   std::array<char, 2048> buf;
 
@@ -4696,11 +4813,23 @@ inline bool ClientImpl::send(const Request &req, Response &res) {
 
   {
     std::lock_guard<std::mutex> guard(socket_mutex_);
+    // Set this to false immediately - if it ever gets set to true by the end of the
+    // request, we know another thread instructed us to close the socket.
+    socket_should_be_closed_when_request_is_done_ = false;
 
     auto is_alive = false;
     if (socket_.is_open()) {
       is_alive = detail::select_write(socket_.sock, 0, 0) > 0;
-      if (!is_alive) { close_socket(socket_, false); }
+      if (!is_alive) {
+        // Attempt to avoid sigpipe by shutting down nongracefully if it seems like
+        // the other side has already closed the connection
+        // Also, there cannot be any requests in flight from other threads since we locked
+        // request_mutex_, so safe to close everything immediately
+        const bool shutdown_gracefully = false;
+        shutdown_ssl(socket_, shutdown_gracefully);
+        shutdown_socket(socket_);
+        close_socket(socket_);
+      }
     }
 
     if (!is_alive) {
@@ -4721,15 +4850,38 @@ inline bool ClientImpl::send(const Request &req, Response &res) {
       }
 #endif
     }
+
+    // Mark the current socket as being in use so that it cannot be closed by anyone
+    // else while this request is ongoing, even though we will be releasing the mutex.
+    if (socket_requests_in_flight_ > 1) {
+      assert(socket_requests_are_from_thread_ == std::this_thread::get_id());
+    }
+    socket_requests_in_flight_ += 1;
+    socket_requests_are_from_thread_ = std::this_thread::get_id();
   }
 
   auto close_connection = !keep_alive_;
-
   auto ret = process_socket(socket_, [&](Stream &strm) {
     return handle_request(strm, req, res, close_connection);
   });
 
-  if (close_connection || !ret) { stop_core(); }
+  //Briefly lock mutex in order to mark that a request is no longer ongoing
+  {
+    std::lock_guard<std::mutex> guard(socket_mutex_);
+    socket_requests_in_flight_ -= 1;
+    if (socket_requests_in_flight_ <= 0) {
+      assert(socket_requests_in_flight_ == 0);
+      socket_requests_are_from_thread_ = std::thread::id();
+    }
+
+    if (socket_should_be_closed_when_request_is_done_ ||
+        close_connection ||
+        !ret ) {
+      shutdown_ssl(socket_, true);
+      shutdown_socket(socket_);
+      close_socket(socket_);
+    }
+  }
 
   if (!ret) {
     if (error_ == Error::Success) { error_ = Error::Unknown; }
@@ -4973,7 +5125,7 @@ inline bool ClientImpl::write_request(Stream &strm, const Request &req,
   return true;
 }
 
-inline std::shared_ptr<Response> ClientImpl::send_with_content_provider(
+inline std::unique_ptr<Response> ClientImpl::send_with_content_provider(
     const char *method, const char *path, const Headers &headers,
     const std::string &body, size_t content_length,
     ContentProvider content_provider, const char *content_type) {
@@ -5036,15 +5188,15 @@ inline std::shared_ptr<Response> ClientImpl::send_with_content_provider(
   {
     if (content_provider) {
       req.content_length = content_length;
-      req.content_provider = content_provider;
+      req.content_provider = std::move(content_provider);
     } else {
       req.body = body;
     }
   }
 
-  auto res = std::make_shared<Response>();
+  auto res = detail::make_unique<Response>();
 
-  return send(req, *res) ? res : nullptr;
+  return send(req, *res) ? std::move(res) : nullptr;
 }
 
 inline bool ClientImpl::process_request(Stream &strm, const Request &req,
@@ -5070,16 +5222,21 @@ inline bool ClientImpl::process_request(Stream &strm, const Request &req,
   if (req.method != "HEAD" && req.method != "CONNECT") {
     auto out =
         req.content_receiver
-            ? static_cast<ContentReceiver>([&](const char *buf, size_t n) {
-                auto ret = req.content_receiver(buf, n);
-                if (!ret) { error_ = Error::Canceled; }
-                return ret;
-              })
-            : static_cast<ContentReceiver>([&](const char *buf, size_t n) {
-                if (res.body.size() + n > res.body.max_size()) { return false; }
-                res.body.append(buf, n);
-                return true;
-              });
+            ? static_cast<ContentReceiverWithProgress>(
+                  [&](const char *buf, size_t n, uint64_t off, uint64_t len) {
+                    auto ret = req.content_receiver(buf, n, off, len);
+                    if (!ret) { error_ = Error::Canceled; }
+                    return ret;
+                  })
+            : static_cast<ContentReceiverWithProgress>(
+                  [&](const char *buf, size_t n, uint64_t /*off*/,
+                      uint64_t /*len*/) {
+                    if (res.body.size() + n > res.body.max_size()) {
+                      return false;
+                    }
+                    res.body.append(buf, n);
+                    return true;
+                  });
 
     auto progress = [&](uint64_t current, uint64_t total) {
       if (!req.progress) { return true; }
@@ -5090,7 +5247,8 @@ inline bool ClientImpl::process_request(Stream &strm, const Request &req,
 
     int dummy_status;
     if (!detail::read_content(strm, res, (std::numeric_limits<size_t>::max)(),
-                              dummy_status, progress, out, decompress_)) {
+                              dummy_status, std::move(progress), std::move(out),
+                              decompress_)) {
       if (error_ != Error::Canceled) { error_ = Error::Read; }
       return false;
     }
@@ -5098,7 +5256,16 @@ inline bool ClientImpl::process_request(Stream &strm, const Request &req,
 
   if (res.get_header_value("Connection") == "close" ||
       (res.version == "HTTP/1.0" && res.reason != "Connection established")) {
-    stop_core();
+    // TODO this requires a not-entirely-obvious chain of calls to be correct
+    // for this to be safe. Maybe a code refactor (such as moving this out to
+    // the send function and getting rid of the recursiveness of the mutex)
+    // could make this more obvious.
+    
+    // This is safe to call because process_request is only called by handle_request
+    // which is only called by send, which locks the request mutex during the process.
+    // It would be a bug to call it from a different thread since it's a thread-safety
+    // issue to do these things to the socket if another thread is using the socket.
+    lock_socket_and_shutdown_and_close();
   }
 
   // Log
@@ -5108,11 +5275,11 @@ inline bool ClientImpl::process_request(Stream &strm, const Request &req,
 }
 
 inline bool
-ClientImpl::process_socket(Socket &socket,
+ClientImpl::process_socket(const Socket &socket,
                            std::function<bool(Stream &strm)> callback) {
-  return detail::process_client_socket(socket.sock, read_timeout_sec_,
-                                       read_timeout_usec_, write_timeout_sec_,
-                                       write_timeout_usec_, callback);
+  return detail::process_client_socket(
+      socket.sock, read_timeout_sec_, read_timeout_usec_, write_timeout_sec_,
+      write_timeout_usec_, std::move(callback));
 }
 
 inline bool ClientImpl::is_ssl() const { return false; }
@@ -5138,9 +5305,9 @@ inline Result ClientImpl::Get(const char *path, const Headers &headers,
   req.headers.insert(headers.begin(), headers.end());
   req.progress = std::move(progress);
 
-  auto res = std::make_shared<Response>();
+  auto res = detail::make_unique<Response>();
   auto ret = send(req, *res);
-  return Result{ret ? res : nullptr, get_last_error()};
+  return Result{ret ? std::move(res) : nullptr, get_last_error()};
 }
 
 inline Result ClientImpl::Get(const char *path,
@@ -5170,23 +5337,23 @@ inline Result ClientImpl::Get(const char *path, const Headers &headers,
 inline Result ClientImpl::Get(const char *path,
                               ResponseHandler response_handler,
                               ContentReceiver content_receiver) {
-  return Get(path, Headers(), std::move(response_handler), content_receiver,
-             nullptr);
+  return Get(path, Headers(), std::move(response_handler),
+             std::move(content_receiver), nullptr);
 }
 
 inline Result ClientImpl::Get(const char *path, const Headers &headers,
                               ResponseHandler response_handler,
                               ContentReceiver content_receiver) {
-  return Get(path, headers, std::move(response_handler), content_receiver,
-             nullptr);
+  return Get(path, headers, std::move(response_handler),
+             std::move(content_receiver), nullptr);
 }
 
 inline Result ClientImpl::Get(const char *path,
                               ResponseHandler response_handler,
                               ContentReceiver content_receiver,
                               Progress progress) {
-  return Get(path, Headers(), std::move(response_handler), content_receiver,
-             progress);
+  return Get(path, Headers(), std::move(response_handler),
+             std::move(content_receiver), std::move(progress));
 }
 
 inline Result ClientImpl::Get(const char *path, const Headers &headers,
@@ -5199,12 +5366,16 @@ inline Result ClientImpl::Get(const char *path, const Headers &headers,
   req.headers = default_headers_;
   req.headers.insert(headers.begin(), headers.end());
   req.response_handler = std::move(response_handler);
-  req.content_receiver = std::move(content_receiver);
+  req.content_receiver =
+      [content_receiver](const char *data, size_t data_length,
+                         uint64_t /*offset*/, uint64_t /*total_length*/) {
+        return content_receiver(data, data_length);
+      };
   req.progress = std::move(progress);
 
-  auto res = std::make_shared<Response>();
+  auto res = detail::make_unique<Response>();
   auto ret = send(req, *res);
-  return Result{ret ? res : nullptr, get_last_error()};
+  return Result{ret ? std::move(res) : nullptr, get_last_error()};
 }
 
 inline Result ClientImpl::Head(const char *path) {
@@ -5218,9 +5389,9 @@ inline Result ClientImpl::Head(const char *path, const Headers &headers) {
   req.headers.insert(headers.begin(), headers.end());
   req.path = path;
 
-  auto res = std::make_shared<Response>();
+  auto res = detail::make_unique<Response>();
   auto ret = send(req, *res);
-  return Result{ret ? res : nullptr, get_last_error()};
+  return Result{ret ? std::move(res) : nullptr, get_last_error()};
 }
 
 inline Result ClientImpl::Post(const char *path) {
@@ -5237,7 +5408,7 @@ inline Result ClientImpl::Post(const char *path, const Headers &headers,
                                const char *content_type) {
   auto ret = send_with_content_provider("POST", path, headers, body, 0, nullptr,
                                         content_type);
-  return Result{ret, get_last_error()};
+  return Result{std::move(ret), get_last_error()};
 }
 
 inline Result ClientImpl::Post(const char *path, const Params &params) {
@@ -5247,17 +5418,18 @@ inline Result ClientImpl::Post(const char *path, const Params &params) {
 inline Result ClientImpl::Post(const char *path, size_t content_length,
                                ContentProvider content_provider,
                                const char *content_type) {
-  return Post(path, Headers(), content_length, content_provider, content_type);
+  return Post(path, Headers(), content_length, std::move(content_provider),
+              content_type);
 }
 
 inline Result ClientImpl::Post(const char *path, const Headers &headers,
                                size_t content_length,
                                ContentProvider content_provider,
                                const char *content_type) {
-  auto ret = send_with_content_provider("POST", path, headers, std::string(),
-                                        content_length, content_provider,
-                                        content_type);
-  return Result{ret, get_last_error()};
+  auto ret = send_with_content_provider(
+      "POST", path, headers, std::string(), content_length,
+      std::move(content_provider), content_type);
+  return Result{std::move(ret), get_last_error()};
 }
 
 inline Result ClientImpl::Post(const char *path, const Headers &headers,
@@ -5273,7 +5445,18 @@ inline Result ClientImpl::Post(const char *path,
 
 inline Result ClientImpl::Post(const char *path, const Headers &headers,
                                const MultipartFormDataItems &items) {
-  auto boundary = detail::make_multipart_data_boundary();
+  return Post(path, headers, items, detail::make_multipart_data_boundary());
+}
+inline Result ClientImpl::Post(const char *path, const Headers &headers,
+                               const MultipartFormDataItems &items,
+                               const std::string &boundary) {
+  for (size_t i = 0; i < boundary.size(); i++) {
+    char c = boundary[i];
+    if (!std::isalnum(c) && c != '-' && c != '_') {
+      error_ = Error::UnsupportedMultipartBoundaryChars;
+      return Result{nullptr, error_};
+    }
+  }
 
   std::string body;
 
@@ -5311,23 +5494,24 @@ inline Result ClientImpl::Put(const char *path, const Headers &headers,
                               const char *content_type) {
   auto ret = send_with_content_provider("PUT", path, headers, body, 0, nullptr,
                                         content_type);
-  return Result{ret, get_last_error()};
+  return Result{std::move(ret), get_last_error()};
 }
 
 inline Result ClientImpl::Put(const char *path, size_t content_length,
                               ContentProvider content_provider,
                               const char *content_type) {
-  return Put(path, Headers(), content_length, content_provider, content_type);
+  return Put(path, Headers(), content_length, std::move(content_provider),
+             content_type);
 }
 
 inline Result ClientImpl::Put(const char *path, const Headers &headers,
                               size_t content_length,
                               ContentProvider content_provider,
                               const char *content_type) {
-  auto ret = send_with_content_provider("PUT", path, headers, std::string(),
-                                        content_length, content_provider,
-                                        content_type);
-  return Result{ret, get_last_error()};
+  auto ret = send_with_content_provider(
+      "PUT", path, headers, std::string(), content_length,
+      std::move(content_provider), content_type);
+  return Result{std::move(ret), get_last_error()};
 }
 
 inline Result ClientImpl::Put(const char *path, const Params &params) {
@@ -5350,23 +5534,24 @@ inline Result ClientImpl::Patch(const char *path, const Headers &headers,
                                 const char *content_type) {
   auto ret = send_with_content_provider("PATCH", path, headers, body, 0,
                                         nullptr, content_type);
-  return Result{ret, get_last_error()};
+  return Result{std::move(ret), get_last_error()};
 }
 
 inline Result ClientImpl::Patch(const char *path, size_t content_length,
                                 ContentProvider content_provider,
                                 const char *content_type) {
-  return Patch(path, Headers(), content_length, content_provider, content_type);
+  return Patch(path, Headers(), content_length, std::move(content_provider),
+               content_type);
 }
 
 inline Result ClientImpl::Patch(const char *path, const Headers &headers,
                                 size_t content_length,
                                 ContentProvider content_provider,
                                 const char *content_type) {
-  auto ret = send_with_content_provider("PATCH", path, headers, std::string(),
-                                        content_length, content_provider,
-                                        content_type);
-  return Result{ret, get_last_error()};
+  auto ret = send_with_content_provider(
+      "PATCH", path, headers, std::string(), content_length,
+      std::move(content_provider), content_type);
+  return Result{std::move(ret), get_last_error()};
 }
 
 inline Result ClientImpl::Delete(const char *path) {
@@ -5394,9 +5579,9 @@ inline Result ClientImpl::Delete(const char *path, const Headers &headers,
   if (content_type) { req.headers.emplace("Content-Type", content_type); }
   req.body = body;
 
-  auto res = std::make_shared<Response>();
+  auto res = detail::make_unique<Response>();
   auto ret = send(req, *res);
-  return Result{ret ? res : nullptr, get_last_error()};
+  return Result{ret ? std::move(res) : nullptr, get_last_error()};
 }
 
 inline Result ClientImpl::Options(const char *path) {
@@ -5410,9 +5595,9 @@ inline Result ClientImpl::Options(const char *path, const Headers &headers) {
   req.headers.insert(headers.begin(), headers.end());
   req.path = path;
 
-  auto res = std::make_shared<Response>();
+  auto res = detail::make_unique<Response>();
   auto ret = send(req, *res);
-  return Result{ret ? res : nullptr, get_last_error()};
+  return Result{ret ? std::move(res) : nullptr, get_last_error()};
 }
 
 inline size_t ClientImpl::is_socket_open() const {
@@ -5421,18 +5606,27 @@ inline size_t ClientImpl::is_socket_open() const {
 }
 
 inline void ClientImpl::stop() {
-  stop_core();
-  error_ = Error::Canceled;
-}
-
-inline void ClientImpl::stop_core() {
   std::lock_guard<std::mutex> guard(socket_mutex_);
-  if (socket_.is_open()) {
-    detail::shutdown_socket(socket_.sock);
-    std::this_thread::sleep_for(std::chrono::milliseconds(1));
-    close_socket(socket_, true);
-    std::this_thread::sleep_for(std::chrono::milliseconds(1));
+  // There is no guarantee that this doesn't get overwritten later, but set it so that
+  // there is a good chance that any threads stopping as a result pick up this error.
+  error_ = Error::Canceled;
+  
+  // If there is anything ongoing right now, the ONLY thread-safe thing we can do
+  // is to shutdown_socket, so that threads using this socket suddenly discover
+  // they can't read/write any more and error out.
+  // Everything else (closing the socket, shutting ssl down) is unsafe because these
+  // actions are not thread-safe.
+  if (socket_requests_in_flight_ > 0) {
+    shutdown_socket(socket_);
+    // Aside from that, we set a flag for the socket to be closed when we're done.
+    socket_should_be_closed_when_request_is_done_ = true;
+    return;
   }
+
+  //Otherwise, sitll holding the mutex, we can shut everything down ourselves
+  shutdown_ssl(socket_, true);
+  shutdown_socket(socket_);
+  close_socket(socket_);
 }
 
 inline void ClientImpl::set_connection_timeout(time_t sec, time_t usec) {
@@ -5479,7 +5673,7 @@ inline void ClientImpl::set_default_headers(Headers headers) {
 inline void ClientImpl::set_tcp_nodelay(bool on) { tcp_nodelay_ = on; }
 
 inline void ClientImpl::set_socket_options(SocketOptions socket_options) {
-  socket_options_ = socket_options;
+  socket_options_ = std::move(socket_options);
 }
 
 inline void ClientImpl::set_compress(bool on) { compress_ = on; }
@@ -5554,9 +5748,12 @@ inline SSL *ssl_new(socket_t sock, SSL_CTX *ctx, std::mutex &ctx_mutex,
 }
 
 inline void ssl_delete(std::mutex &ctx_mutex, SSL *ssl,
-                       bool process_socket_ret) {
-  if (process_socket_ret) {
-    SSL_shutdown(ssl); // shutdown only if not already closed by remote
+                       bool shutdown_gracefully) {
+  // sometimes we may want to skip this to try to avoid SIGPIPE if we know
+  // the remote has closed the network connection
+  // Note that it is not always possible to avoid SIGPIPE, this is merely a best-efforts.
+  if (shutdown_gracefully) {
+    SSL_shutdown(ssl); 
   }
 
   std::lock_guard<std::mutex> guard(ctx_mutex);
@@ -5650,22 +5847,7 @@ inline SSLSocketStream::SSLSocketStream(socket_t sock, SSL *ssl,
       read_timeout_usec_(read_timeout_usec),
       write_timeout_sec_(write_timeout_sec),
       write_timeout_usec_(write_timeout_usec) {
-  {
-    timeval tv;
-    tv.tv_sec = static_cast<long>(read_timeout_sec);
-    tv.tv_usec = static_cast<decltype(tv.tv_usec)>(read_timeout_usec);
-
-    setsockopt(sock, SOL_SOCKET, SO_RCVTIMEO, reinterpret_cast<char *>(&tv),
-               sizeof(tv));
-  }
-  {
-    timeval tv;
-    tv.tv_sec = static_cast<long>(write_timeout_sec);
-    tv.tv_usec = static_cast<decltype(tv.tv_usec)>(write_timeout_usec);
-
-    setsockopt(sock, SOL_SOCKET, SO_SNDTIMEO, reinterpret_cast<char *>(&tv),
-               sizeof(tv));
-  }
+  SSL_clear_mode(ssl, SSL_MODE_AUTO_RETRY);
 }
 
 inline SSLSocketStream::~SSLSocketStream() {}
@@ -5680,8 +5862,27 @@ inline bool SSLSocketStream::is_writable() const {
 }
 
 inline ssize_t SSLSocketStream::read(char *ptr, size_t size) {
-  if (SSL_pending(ssl_) > 0 || is_readable()) {
+  if (SSL_pending(ssl_) > 0) {
     return SSL_read(ssl_, ptr, static_cast<int>(size));
+  } else if (is_readable()) {
+    auto ret = SSL_read(ssl_, ptr, static_cast<int>(size));
+    if (ret < 0) {
+      auto err = SSL_get_error(ssl_, ret);
+      while (err == SSL_ERROR_WANT_READ) {
+        if (SSL_pending(ssl_) > 0) {
+          return SSL_read(ssl_, ptr, static_cast<int>(size));
+        } else if (is_readable()) {
+          ret = SSL_read(ssl_, ptr, static_cast<int>(size));
+          if (ret >= 0) {
+            return ret;
+          }
+          err = SSL_get_error(ssl_, ret);
+        } else {
+          return -1;
+        }
+      }
+    }
+    return ret;
   }
   return -1;
 }
@@ -5788,9 +5989,12 @@ inline bool SSLServer::process_and_close_socket(socket_t sock) {
         });
 
     detail::ssl_delete(ctx_mutex_, ssl, ret);
+    detail::shutdown_socket(sock);
+    detail::close_socket(sock);
     return ret;
   }
 
+  detail::shutdown_socket(sock);
   detail::close_socket(sock);
   return false;
 }
@@ -5843,6 +6047,10 @@ inline SSLClient::SSLClient(const std::string &host, int port,
 
 inline SSLClient::~SSLClient() {
   if (ctx_) { SSL_CTX_free(ctx_); }
+  // Make sure to shut down SSL since shutdown_ssl will resolve to the
+  // base function rather than the derived function once we get to the
+  // base class destructor, and won't free the SSL (causing a leak).
+  SSLClient::shutdown_ssl(socket_, true);
 }
 
 inline bool SSLClient::is_valid() const { return ctx_; }
@@ -5876,11 +6084,11 @@ inline bool SSLClient::create_and_connect_socket(Socket &socket) {
   return is_valid() && ClientImpl::create_and_connect_socket(socket);
 }
 
+// Assumes that socket_mutex_ is locked and that there are no requests in flight
 inline bool SSLClient::connect_with_proxy(Socket &socket, Response &res,
                                           bool &success) {
   success = true;
   Response res2;
-
   if (!detail::process_client_socket(
           socket.sock, read_timeout_sec_, read_timeout_usec_,
           write_timeout_sec_, write_timeout_usec_, [&](Stream &strm) {
@@ -5889,7 +6097,10 @@ inline bool SSLClient::connect_with_proxy(Socket &socket, Response &res,
             req2.path = host_and_port_;
             return process_request(strm, req2, res2, false);
           })) {
-    close_socket(socket, true);
+    // Thread-safe to close everything because we are assuming there are no requests in flight
+    shutdown_ssl(socket, true);
+    shutdown_socket(socket);
+    close_socket(socket);
     success = false;
     return false;
   }
@@ -5912,7 +6123,10 @@ inline bool SSLClient::connect_with_proxy(Socket &socket, Response &res,
                       true));
                   return process_request(strm, req3, res3, false);
                 })) {
-          close_socket(socket, true);
+          // Thread-safe to close everything because we are assuming there are no requests in flight
+          shutdown_ssl(socket, true);
+          shutdown_socket(socket);
+          close_socket(socket);
           success = false;
           return false;
         }
@@ -6005,26 +6219,30 @@ inline bool SSLClient::initialize_ssl(Socket &socket) {
     return true;
   }
 
-  close_socket(socket, false);
+  shutdown_socket(socket);
+  close_socket(socket);
   return false;
 }
 
-inline void SSLClient::close_socket(Socket &socket, bool process_socket_ret) {
-  detail::close_socket(socket.sock);
-  socket_.sock = INVALID_SOCKET;
+inline void SSLClient::shutdown_ssl(Socket &socket, bool shutdown_gracefully) {
+  if (socket.sock == INVALID_SOCKET) {
+    assert(socket.ssl == nullptr);
+    return;
+  }
   if (socket.ssl) {
-    detail::ssl_delete(ctx_mutex_, socket.ssl, process_socket_ret);
-    socket_.ssl = nullptr;
+    detail::ssl_delete(ctx_mutex_, socket.ssl, shutdown_gracefully);
+    socket.ssl = nullptr;
   }
+  assert(socket.ssl == nullptr);
 }
 
 inline bool
-SSLClient::process_socket(Socket &socket,
+SSLClient::process_socket(const Socket &socket,
                           std::function<bool(Stream &strm)> callback) {
   assert(socket.ssl);
   return detail::process_client_socket_ssl(
       socket.ssl, socket.sock, read_timeout_sec_, read_timeout_usec_,
-      write_timeout_sec_, write_timeout_usec_, callback);
+      write_timeout_sec_, write_timeout_usec_, std::move(callback));
 }
 
 inline bool SSLClient::is_ssl() const { return true; }
@@ -6175,6 +6393,8 @@ inline Client::Client(const char *scheme_host_port,
 #else
     if (!scheme.empty() && scheme != "http") {
 #endif
+      std::string msg = "'" + scheme + "' scheme is not supported.";
+      throw std::invalid_argument(msg);
       return;
     }
 
@@ -6187,28 +6407,28 @@ inline Client::Client(const char *scheme_host_port,
 
     if (is_ssl) {
 #ifdef CPPHTTPLIB_OPENSSL_SUPPORT
-      cli_ = std::make_shared<SSLClient>(host.c_str(), port, client_cert_path,
-                                         client_key_path);
+      cli_ = detail::make_unique<SSLClient>(host.c_str(), port,
+                                            client_cert_path, client_key_path);
       is_ssl_ = is_ssl;
 #endif
     } else {
-      cli_ = std::make_shared<ClientImpl>(host.c_str(), port, client_cert_path,
-                                          client_key_path);
+      cli_ = detail::make_unique<ClientImpl>(host.c_str(), port,
+                                             client_cert_path, client_key_path);
     }
   } else {
-    cli_ = std::make_shared<ClientImpl>(scheme_host_port, 80, client_cert_path,
-                                        client_key_path);
+    cli_ = detail::make_unique<ClientImpl>(scheme_host_port, 80,
+                                           client_cert_path, client_key_path);
   }
 }
 
 inline Client::Client(const std::string &host, int port)
-    : cli_(std::make_shared<ClientImpl>(host, port)) {}
+    : cli_(detail::make_unique<ClientImpl>(host, port)) {}
 
 inline Client::Client(const std::string &host, int port,
                       const std::string &client_cert_path,
                       const std::string &client_key_path)
-    : cli_(std::make_shared<ClientImpl>(host, port, client_cert_path,
-                                        client_key_path)) {}
+    : cli_(detail::make_unique<ClientImpl>(host, port, client_cert_path,
+                                           client_key_path)) {}
 
 inline Client::~Client() {}
 
@@ -6221,11 +6441,11 @@ inline Result Client::Get(const char *path, const Headers &headers) {
   return cli_->Get(path, headers);
 }
 inline Result Client::Get(const char *path, Progress progress) {
-  return cli_->Get(path, progress);
+  return cli_->Get(path, std::move(progress));
 }
 inline Result Client::Get(const char *path, const Headers &headers,
                           Progress progress) {
-  return cli_->Get(path, headers, progress);
+  return cli_->Get(path, headers, std::move(progress));
 }
 inline Result Client::Get(const char *path, ContentReceiver content_receiver) {
   return cli_->Get(path, std::move(content_receiver));
@@ -6262,7 +6482,8 @@ inline Result Client::Get(const char *path, ResponseHandler response_handler,
 inline Result Client::Get(const char *path, const Headers &headers,
                           ResponseHandler response_handler,
                           ContentReceiver content_receiver, Progress progress) {
-  return cli_->Get(path, headers, response_handler, content_receiver, progress);
+  return cli_->Get(path, headers, std::move(response_handler),
+                   std::move(content_receiver), std::move(progress));
 }
 
 inline Result Client::Head(const char *path) { return cli_->Head(path); }
@@ -6282,13 +6503,14 @@ inline Result Client::Post(const char *path, const Headers &headers,
 inline Result Client::Post(const char *path, size_t content_length,
                            ContentProvider content_provider,
                            const char *content_type) {
-  return cli_->Post(path, content_length, content_provider, content_type);
+  return cli_->Post(path, content_length, std::move(content_provider),
+                    content_type);
 }
 inline Result Client::Post(const char *path, const Headers &headers,
                            size_t content_length,
                            ContentProvider content_provider,
                            const char *content_type) {
-  return cli_->Post(path, headers, content_length, content_provider,
+  return cli_->Post(path, headers, content_length, std::move(content_provider),
                     content_type);
 }
 inline Result Client::Post(const char *path, const Params &params) {
@@ -6306,6 +6528,11 @@ inline Result Client::Post(const char *path, const Headers &headers,
                            const MultipartFormDataItems &items) {
   return cli_->Post(path, headers, items);
 }
+inline Result Client::Post(const char *path, const Headers &headers,
+                           const MultipartFormDataItems &items,
+                           const std::string &boundary) {
+  return cli_->Post(path, headers, items, boundary);
+}
 inline Result Client::Put(const char *path) { return cli_->Put(path); }
 inline Result Client::Put(const char *path, const std::string &body,
                           const char *content_type) {
@@ -6318,13 +6545,14 @@ inline Result Client::Put(const char *path, const Headers &headers,
 inline Result Client::Put(const char *path, size_t content_length,
                           ContentProvider content_provider,
                           const char *content_type) {
-  return cli_->Put(path, content_length, content_provider, content_type);
+  return cli_->Put(path, content_length, std::move(content_provider),
+                   content_type);
 }
 inline Result Client::Put(const char *path, const Headers &headers,
                           size_t content_length,
                           ContentProvider content_provider,
                           const char *content_type) {
-  return cli_->Put(path, headers, content_length, content_provider,
+  return cli_->Put(path, headers, content_length, std::move(content_provider),
                    content_type);
 }
 inline Result Client::Put(const char *path, const Params &params) {
@@ -6345,13 +6573,14 @@ inline Result Client::Patch(const char *path, const Headers &headers,
 inline Result Client::Patch(const char *path, size_t content_length,
                             ContentProvider content_provider,
                             const char *content_type) {
-  return cli_->Patch(path, content_length, content_provider, content_type);
+  return cli_->Patch(path, content_length, std::move(content_provider),
+                     content_type);
 }
 inline Result Client::Patch(const char *path, const Headers &headers,
                             size_t content_length,
                             ContentProvider content_provider,
                             const char *content_type) {
-  return cli_->Patch(path, headers, content_length, content_provider,
+  return cli_->Patch(path, headers, content_length, std::move(content_provider),
                      content_type);
 }
 inline Result Client::Delete(const char *path) { return cli_->Delete(path); }
@@ -6386,7 +6615,7 @@ inline void Client::set_default_headers(Headers headers) {
 
 inline void Client::set_tcp_nodelay(bool on) { cli_->set_tcp_nodelay(on); }
 inline void Client::set_socket_options(SocketOptions socket_options) {
-  cli_->set_socket_options(socket_options);
+  cli_->set_socket_options(std::move(socket_options));
 }
 
 inline void Client::set_connection_timeout(time_t sec, time_t usec) {
diff --git a/externals/microprofile/microprofile.h b/externals/microprofile/microprofile.h
index 85d5bd5de..a06f6457d 100644
--- a/externals/microprofile/microprofile.h
+++ b/externals/microprofile/microprofile.h
@@ -902,8 +902,10 @@ inline uint16_t MicroProfileGetGroupIndex(MicroProfileToken t)
 #include <windows.h>
 #define snprintf _snprintf
 
+#ifdef _MSC_VER
 #pragma warning(push)
 #pragma warning(disable: 4244)
+#endif
 int64_t MicroProfileTicksPerSecondCpu()
 {
     static int64_t nTicksPerSecond = 0;
@@ -946,7 +948,11 @@ typedef HANDLE MicroProfileThread;
 DWORD _stdcall ThreadTrampoline(void* pFunc)
 {
     MicroProfileThreadFunc F = (MicroProfileThreadFunc)pFunc;
-    return (uint32_t)F(0);
+
+    // The return value of F will always return a void*, however, this is for
+    // compatibility with pthreads. The underlying "address" of the pointer
+    // is always a 32-bit value, so this cast is safe to perform.
+    return static_cast<DWORD>(reinterpret_cast<uint64_t>(F(0)));
 }
 
 inline void MicroProfileThreadStart(MicroProfileThread* pThread, MicroProfileThreadFunc Func)
@@ -1742,10 +1748,10 @@ void MicroProfileFlip()
                             }
                         }
                     }
-                    for(uint32_t i = 0; i < MICROPROFILE_MAX_GROUPS; ++i)
+                    for(uint32_t j = 0; j < MICROPROFILE_MAX_GROUPS; ++j)
                     {
-                        pLog->nGroupTicks[i] += nGroupTicks[i];
-                        pFrameGroup[i] += nGroupTicks[i];
+                        pLog->nGroupTicks[j] += nGroupTicks[j];
+                        pFrameGroup[j] += nGroupTicks[j];
                     }
                     pLog->nStackPos = nStackPos;
                 }
@@ -3328,7 +3334,7 @@ bool MicroProfileIsLocalThread(uint32_t nThreadId)
 #endif
 #else
 
-bool MicroProfileIsLocalThread(uint32_t nThreadId){return false;}
+bool MicroProfileIsLocalThread([[maybe_unused]] uint32_t nThreadId) { return false; }
 void MicroProfileStopContextSwitchTrace(){}
 void MicroProfileStartContextSwitchTrace(){}
 
@@ -3576,7 +3582,7 @@ int MicroProfileGetGpuTickReference(int64_t* pOutCpu, int64_t* pOutGpu)
 
 #undef S
 
-#ifdef _WIN32
+#ifdef _MSC_VER
 #pragma warning(pop)
 #endif
 
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 71efbb40d..dbda528ce 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -32,7 +32,6 @@ if (MSVC)
     # /Zc:inline          - Let codegen omit inline functions in object files
     # /Zc:throwingNew     - Let codegen assume `operator new` (without std::nothrow) will never return null
     add_compile_options(
-        /W3
         /MP
         /Zi
         /Zo
@@ -43,6 +42,13 @@ if (MSVC)
         /Zc:externConstexpr
         /Zc:inline
         /Zc:throwingNew
+
+        # Warnings
+        /W3
+        /we4547 # 'operator' : operator before comma has no effect; expected operator with side-effect
+        /we4549 # 'operator1': operator before comma has no effect; did you intend 'operator2'?
+        /we4555 # Expression has no effect; expected expression with side-effect
+        /we4834 # Discarding return value of function with 'nodiscard' attribute
     )
 
     # /GS- - No stack buffer overflow checks
@@ -56,6 +62,7 @@ else()
         -Werror=implicit-fallthrough
         -Werror=missing-declarations
         -Werror=reorder
+        -Werror=unused-result
         -Wextra
         -Wmissing-declarations
         -Wno-attributes
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt
index e50ab2922..207c7a0a6 100644
--- a/src/common/CMakeLists.txt
+++ b/src/common/CMakeLists.txt
@@ -190,6 +190,22 @@ if(ARCHITECTURE_x86_64)
     )
 endif()
 
+if (MSVC)
+  target_compile_definitions(common PRIVATE
+    # The standard library doesn't provide any replacement for codecvt yet
+    # so we can disable this deprecation warning for the time being.
+    _SILENCE_CXX17_CODECVT_HEADER_DEPRECATION_WARNING
+  )
+  target_compile_options(common PRIVATE
+    /W4
+    /WX
+  )
+else()
+  target_compile_options(common PRIVATE
+    -Werror
+  )
+endif()
+
 create_target_directory_groups(common)
 find_package(Boost 1.71 COMPONENTS context headers REQUIRED)
 
diff --git a/src/common/fiber.cpp b/src/common/fiber.cpp
index e186ed880..b209f52fc 100644
--- a/src/common/fiber.cpp
+++ b/src/common/fiber.cpp
@@ -79,9 +79,9 @@ void Fiber::Exit() {
     released = true;
 }
 
-void Fiber::SetRewindPoint(std::function<void(void*)>&& rewind_func, void* start_parameter) {
+void Fiber::SetRewindPoint(std::function<void(void*)>&& rewind_func, void* rewind_param) {
     rewind_point = std::move(rewind_func);
-    rewind_parameter = start_parameter;
+    rewind_parameter = rewind_param;
 }
 
 void Fiber::Rewind() {
@@ -161,9 +161,9 @@ Fiber::Fiber(std::function<void(void*)>&& entry_point_func, void* start_paramete
         boost::context::detail::make_fcontext(stack_base, impl->stack.size(), FiberStartFunc);
 }
 
-void Fiber::SetRewindPoint(std::function<void(void*)>&& rewind_func, void* start_parameter) {
+void Fiber::SetRewindPoint(std::function<void(void*)>&& rewind_func, void* rewind_param) {
     rewind_point = std::move(rewind_func);
-    rewind_parameter = start_parameter;
+    rewind_parameter = rewind_param;
 }
 
 Fiber::Fiber() : impl{std::make_unique<FiberImpl>()} {}
diff --git a/src/common/fiber.h b/src/common/fiber.h
index cefd61df9..699286ee2 100644
--- a/src/common/fiber.h
+++ b/src/common/fiber.h
@@ -49,7 +49,7 @@ public:
     static void YieldTo(std::shared_ptr<Fiber> from, std::shared_ptr<Fiber> to);
     [[nodiscard]] static std::shared_ptr<Fiber> ThreadToFiber();
 
-    void SetRewindPoint(std::function<void(void*)>&& rewind_func, void* start_parameter);
+    void SetRewindPoint(std::function<void(void*)>&& rewind_func, void* rewind_param);
 
     void Rewind();
 
diff --git a/src/common/file_util.cpp b/src/common/file_util.cpp
index 16c3713e0..18fbfa25b 100644
--- a/src/common/file_util.cpp
+++ b/src/common/file_util.cpp
@@ -472,13 +472,14 @@ u64 ScanDirectoryTree(const std::string& directory, FSTEntry& parent_entry,
 }
 
 bool DeleteDirRecursively(const std::string& directory, unsigned int recursion) {
-    const auto callback = [recursion](u64* num_entries_out, const std::string& directory,
-                                      const std::string& virtual_name) -> bool {
-        std::string new_path = directory + DIR_SEP_CHR + virtual_name;
+    const auto callback = [recursion](u64*, const std::string& directory,
+                                      const std::string& virtual_name) {
+        const std::string new_path = directory + DIR_SEP_CHR + virtual_name;
 
         if (IsDirectory(new_path)) {
-            if (recursion == 0)
+            if (recursion == 0) {
                 return false;
+            }
             return DeleteDirRecursively(new_path, recursion - 1);
         }
         return Delete(new_path);
@@ -492,7 +493,8 @@ bool DeleteDirRecursively(const std::string& directory, unsigned int recursion)
     return true;
 }
 
-void CopyDir(const std::string& source_path, const std::string& dest_path) {
+void CopyDir([[maybe_unused]] const std::string& source_path,
+             [[maybe_unused]] const std::string& dest_path) {
 #ifndef _WIN32
     if (source_path == dest_path) {
         return;
@@ -553,7 +555,7 @@ std::optional<std::string> GetCurrentDir() {
     std::string strDir = dir;
 #endif
     free(dir);
-    return std::move(strDir);
+    return strDir;
 }
 
 bool SetCurrentDir(const std::string& directory) {
@@ -772,21 +774,23 @@ std::size_t ReadFileToString(bool text_file, const std::string& filename, std::s
 
 void SplitFilename83(const std::string& filename, std::array<char, 9>& short_name,
                      std::array<char, 4>& extension) {
-    const std::string forbidden_characters = ".\"/\\[]:;=, ";
+    static constexpr std::string_view forbidden_characters = ".\"/\\[]:;=, ";
 
     // On a FAT32 partition, 8.3 names are stored as a 11 bytes array, filled with spaces.
     short_name = {{' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '\0'}};
     extension = {{' ', ' ', ' ', '\0'}};
 
-    std::string::size_type point = filename.rfind('.');
-    if (point == filename.size() - 1)
+    auto point = filename.rfind('.');
+    if (point == filename.size() - 1) {
         point = filename.rfind('.', point);
+    }
 
     // Get short name.
     int j = 0;
     for (char letter : filename.substr(0, point)) {
-        if (forbidden_characters.find(letter, 0) != std::string::npos)
+        if (forbidden_characters.find(letter, 0) != std::string::npos) {
             continue;
+        }
         if (j == 8) {
             // TODO(Link Mauve): also do that for filenames containing a space.
             // TODO(Link Mauve): handle multiple files having the same short name.
@@ -794,14 +798,15 @@ void SplitFilename83(const std::string& filename, std::array<char, 9>& short_nam
             short_name[7] = '1';
             break;
         }
-        short_name[j++] = toupper(letter);
+        short_name[j++] = static_cast<char>(std::toupper(letter));
     }
 
     // Get extension.
     if (point != std::string::npos) {
         j = 0;
-        for (char letter : filename.substr(point + 1, 3))
-            extension[j++] = toupper(letter);
+        for (char letter : filename.substr(point + 1, 3)) {
+            extension[j++] = static_cast<char>(std::toupper(letter));
+        }
     }
 }
 
diff --git a/src/common/file_util.h b/src/common/file_util.h
index 8b587320f..840cde2a6 100644
--- a/src/common/file_util.h
+++ b/src/common/file_util.h
@@ -232,7 +232,7 @@ public:
 
     void Swap(IOFile& other) noexcept;
 
-    [[nodiscard]] bool Open(const std::string& filename, const char openmode[], int flags = 0);
+    bool Open(const std::string& filename, const char openmode[], int flags = 0);
     bool Close();
 
     template <typename T>
diff --git a/src/common/logging/backend.cpp b/src/common/logging/backend.cpp
index 62cfde397..90dfa22ca 100644
--- a/src/common/logging/backend.cpp
+++ b/src/common/logging/backend.cpp
@@ -274,7 +274,6 @@ const char* GetLogClassName(Class log_class) {
     case Class::Count:
         break;
     }
-    UNREACHABLE();
     return "Invalid";
 }
 
@@ -293,7 +292,6 @@ const char* GetLevelName(Level log_level) {
         break;
     }
 #undef LVL
-    UNREACHABLE();
     return "Invalid";
 }
 
diff --git a/src/common/misc.cpp b/src/common/misc.cpp
index 68cb86cd1..1d5393597 100644
--- a/src/common/misc.cpp
+++ b/src/common/misc.cpp
@@ -16,16 +16,23 @@
 // Call directly after the command or use the error num.
 // This function might change the error code.
 std::string GetLastErrorMsg() {
-    static const std::size_t buff_size = 255;
+    static constexpr std::size_t buff_size = 255;
     char err_str[buff_size];
 
 #ifdef _WIN32
     FormatMessageA(FORMAT_MESSAGE_FROM_SYSTEM, nullptr, GetLastError(),
                    MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), err_str, buff_size, nullptr);
+    return std::string(err_str, buff_size);
+#elif defined(__GLIBC__) && (_GNU_SOURCE || (_POSIX_C_SOURCE < 200112L && _XOPEN_SOURCE < 600))
+    // Thread safe (GNU-specific)
+    const char* str = strerror_r(errno, err_str, buff_size);
+    return std::string(str);
 #else
     // Thread safe (XSI-compliant)
-    strerror_r(errno, err_str, buff_size);
+    const int success = strerror_r(errno, err_str, buff_size);
+    if (success != 0) {
+        return {};
+    }
+    return std::string(err_str);
 #endif
-
-    return std::string(err_str, buff_size);
 }
diff --git a/src/common/stream.h b/src/common/stream.h
index 2585c16af..0e40692de 100644
--- a/src/common/stream.h
+++ b/src/common/stream.h
@@ -21,6 +21,12 @@ public:
     explicit Stream();
     ~Stream();
 
+    Stream(const Stream&) = delete;
+    Stream& operator=(const Stream&) = delete;
+
+    Stream(Stream&&) = default;
+    Stream& operator=(Stream&&) = default;
+
     /// Reposition bitstream "cursor" to the specified offset from origin
     void Seek(s32 offset, SeekOrigin origin);
 
@@ -30,15 +36,15 @@ public:
     /// Writes byte at current position
     void WriteByte(u8 byte);
 
-    std::size_t GetPosition() const {
+    [[nodiscard]] std::size_t GetPosition() const {
         return position;
     }
 
-    std::vector<u8>& GetBuffer() {
+    [[nodiscard]] std::vector<u8>& GetBuffer() {
         return buffer;
     }
 
-    const std::vector<u8>& GetBuffer() const {
+    [[nodiscard]] const std::vector<u8>& GetBuffer() const {
         return buffer;
     }
 
diff --git a/src/common/string_util.cpp b/src/common/string_util.cpp
index 84883a1d3..4cba2aaa4 100644
--- a/src/common/string_util.cpp
+++ b/src/common/string_util.cpp
@@ -8,6 +8,7 @@
 #include <cstdlib>
 #include <locale>
 #include <sstream>
+
 #include "common/common_paths.h"
 #include "common/logging/log.h"
 #include "common/string_util.h"
@@ -21,14 +22,14 @@ namespace Common {
 /// Make a string lowercase
 std::string ToLower(std::string str) {
     std::transform(str.begin(), str.end(), str.begin(),
-                   [](unsigned char c) { return std::tolower(c); });
+                   [](unsigned char c) { return static_cast<char>(std::tolower(c)); });
     return str;
 }
 
 /// Make a string uppercase
 std::string ToUpper(std::string str) {
     std::transform(str.begin(), str.end(), str.begin(),
-                   [](unsigned char c) { return std::toupper(c); });
+                   [](unsigned char c) { return static_cast<char>(std::toupper(c)); });
     return str;
 }
 
diff --git a/src/common/timer.cpp b/src/common/timer.cpp
index 2dc15e434..d17dc2a50 100644
--- a/src/common/timer.cpp
+++ b/src/common/timer.cpp
@@ -142,20 +142,18 @@ std::string Timer::GetTimeFormatted() {
 // ----------------
 double Timer::GetDoubleTime() {
     // Get continuous timestamp
-    u64 TmpSeconds = static_cast<u64>(Common::Timer::GetTimeSinceJan1970().count());
-    double ms = static_cast<u64>(GetTimeMs().count()) % 1000;
+    auto tmp_seconds = static_cast<u64>(GetTimeSinceJan1970().count());
+    const auto ms = static_cast<double>(static_cast<u64>(GetTimeMs().count()) % 1000);
 
     // Remove a few years. We only really want enough seconds to make
     // sure that we are detecting actual actions, perhaps 60 seconds is
     // enough really, but I leave a year of seconds anyway, in case the
     // user's clock is incorrect or something like that.
-    TmpSeconds = TmpSeconds - (38 * 365 * 24 * 60 * 60);
+    tmp_seconds = tmp_seconds - (38 * 365 * 24 * 60 * 60);
 
     // Make a smaller integer that fits in the double
-    u32 Seconds = static_cast<u32>(TmpSeconds);
-    double TmpTime = Seconds + ms;
-
-    return TmpTime;
+    const auto seconds = static_cast<u32>(tmp_seconds);
+    return seconds + ms;
 }
 
 } // Namespace Common
diff --git a/src/common/wall_clock.cpp b/src/common/wall_clock.cpp
index 7a20e95b7..452a2837e 100644
--- a/src/common/wall_clock.cpp
+++ b/src/common/wall_clock.cpp
@@ -53,7 +53,7 @@ public:
         return Common::Divide128On32(temporary, 1000000000).first;
     }
 
-    void Pause(bool is_paused) override {
+    void Pause([[maybe_unused]] bool is_paused) override {
         // Do nothing in this clock type.
     }
 
diff --git a/src/common/x64/native_clock.h b/src/common/x64/native_clock.h
index 7c503df26..97aab6ac9 100644
--- a/src/common/x64/native_clock.h
+++ b/src/common/x64/native_clock.h
@@ -34,7 +34,7 @@ private:
     /// value used to reduce the native clocks accuracy as some apss rely on
     /// undefined behavior where the level of accuracy in the clock shouldn't
     /// be higher.
-    static constexpr u64 inaccuracy_mask = ~(0x400 - 1);
+    static constexpr u64 inaccuracy_mask = ~(UINT64_C(0x400) - 1);
 
     SpinLock rtsc_serialize{};
     u64 last_measure{};
diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt
index e0f207f3e..9a983e81d 100644
--- a/src/core/CMakeLists.txt
+++ b/src/core/CMakeLists.txt
@@ -454,6 +454,8 @@ add_library(core STATIC
     hle/service/nvdrv/nvdrv.h
     hle/service/nvdrv/nvmemp.cpp
     hle/service/nvdrv/nvmemp.h
+    hle/service/nvdrv/syncpoint_manager.cpp
+    hle/service/nvdrv/syncpoint_manager.h
     hle/service/nvflinger/buffer_queue.cpp
     hle/service/nvflinger/buffer_queue.h
     hle/service/nvflinger/nvflinger.cpp
diff --git a/src/core/core.cpp b/src/core/core.cpp
index fde2ccc09..242796008 100644
--- a/src/core/core.cpp
+++ b/src/core/core.cpp
@@ -179,16 +179,18 @@ struct System::Impl {
         arp_manager.ResetAll();
 
         telemetry_session = std::make_unique<Core::TelemetrySession>();
+
+        gpu_core = VideoCore::CreateGPU(emu_window, system);
+        if (!gpu_core) {
+            return ResultStatus::ErrorVideoCore;
+        }
+
         service_manager = std::make_shared<Service::SM::ServiceManager>(kernel);
 
         Service::Init(service_manager, system);
         GDBStub::DeferStart();
 
         interrupt_manager = std::make_unique<Core::Hardware::InterruptManager>(system);
-        gpu_core = VideoCore::CreateGPU(emu_window, system);
-        if (!gpu_core) {
-            return ResultStatus::ErrorVideoCore;
-        }
 
         // Initialize time manager, which must happen after kernel is created
         time_manager.Initialize();
diff --git a/src/core/hle/kernel/process.cpp b/src/core/hle/kernel/process.cpp
index ff9d9248b..b17529dee 100644
--- a/src/core/hle/kernel/process.cpp
+++ b/src/core/hle/kernel/process.cpp
@@ -4,6 +4,7 @@
 
 #include <algorithm>
 #include <bitset>
+#include <ctime>
 #include <memory>
 #include <random>
 #include "common/alignment.h"
@@ -123,7 +124,7 @@ std::shared_ptr<Process> Process::Create(Core::System& system, std::string name,
                                                               : kernel.CreateNewUserProcessID();
     process->capabilities.InitializeForMetadatalessProcess();
 
-    std::mt19937 rng(Settings::values.rng_seed.GetValue().value_or(0));
+    std::mt19937 rng(Settings::values.rng_seed.GetValue().value_or(std::time(nullptr)));
     std::uniform_int_distribution<u64> distribution;
     std::generate(process->random_entropy.begin(), process->random_entropy.end(),
                   [&] { return distribution(rng); });
diff --git a/src/core/hle/service/ldr/ldr.cpp b/src/core/hle/service/ldr/ldr.cpp
index 9ad5bbf0d..eeaca44b6 100644
--- a/src/core/hle/service/ldr/ldr.cpp
+++ b/src/core/hle/service/ldr/ldr.cpp
@@ -166,7 +166,7 @@ public:
             {0, &RelocatableObject::LoadNro, "LoadNro"},
             {1, &RelocatableObject::UnloadNro, "UnloadNro"},
             {2, &RelocatableObject::LoadNrr, "LoadNrr"},
-            {3, nullptr, "UnloadNrr"},
+            {3, &RelocatableObject::UnloadNrr, "UnloadNrr"},
             {4, &RelocatableObject::Initialize, "Initialize"},
             {10, nullptr, "LoadNrrEx"},
         };
@@ -272,6 +272,20 @@ public:
         rb.Push(RESULT_SUCCESS);
     }
 
+    void UnloadNrr(Kernel::HLERequestContext& ctx) {
+        IPC::RequestParser rp{ctx};
+        const auto pid = rp.Pop<u64>();
+        const auto nrr_address = rp.Pop<VAddr>();
+
+        LOG_DEBUG(Service_LDR, "called with pid={}, nrr_address={:016X}", pid, nrr_address);
+
+        nrr.erase(nrr_address);
+
+        IPC::ResponseBuilder rb{ctx, 2};
+
+        rb.Push(RESULT_SUCCESS);
+    }
+
     bool ValidateRegionForMap(Kernel::Memory::PageTable& page_table, VAddr start,
                               std::size_t size) const {
         constexpr std::size_t padding_size{4 * Kernel::Memory::PageSize};
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp
index b27ee0502..8356a8139 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp
@@ -15,8 +15,9 @@
 
 namespace Service::Nvidia::Devices {
 
-nvhost_ctrl::nvhost_ctrl(Core::System& system, EventInterface& events_interface)
-    : nvdevice(system), events_interface{events_interface} {}
+nvhost_ctrl::nvhost_ctrl(Core::System& system, EventInterface& events_interface,
+                         SyncpointManager& syncpoint_manager)
+    : nvdevice(system), events_interface{events_interface}, syncpoint_manager{syncpoint_manager} {}
 nvhost_ctrl::~nvhost_ctrl() = default;
 
 u32 nvhost_ctrl::ioctl(Ioctl command, const std::vector<u8>& input, const std::vector<u8>& input2,
@@ -36,8 +37,8 @@ u32 nvhost_ctrl::ioctl(Ioctl command, const std::vector<u8>& input, const std::v
         return IocCtrlEventRegister(input, output);
     case IoctlCommand::IocCtrlEventUnregisterCommand:
         return IocCtrlEventUnregister(input, output);
-    case IoctlCommand::IocCtrlEventSignalCommand:
-        return IocCtrlEventSignal(input, output);
+    case IoctlCommand::IocCtrlClearEventWaitCommand:
+        return IocCtrlClearEventWait(input, output);
     default:
         UNIMPLEMENTED_MSG("Unimplemented ioctl");
         return 0;
@@ -70,19 +71,33 @@ u32 nvhost_ctrl::IocCtrlEventWait(const std::vector<u8>& input, std::vector<u8>&
         return NvResult::BadParameter;
     }
 
+    if (syncpoint_manager.IsSyncpointExpired(params.syncpt_id, params.threshold)) {
+        params.value = syncpoint_manager.GetSyncpointMin(params.syncpt_id);
+        std::memcpy(output.data(), &params, sizeof(params));
+        return NvResult::Success;
+    }
+
+    if (const auto new_value = syncpoint_manager.RefreshSyncpoint(params.syncpt_id);
+        syncpoint_manager.IsSyncpointExpired(params.syncpt_id, params.threshold)) {
+        params.value = new_value;
+        std::memcpy(output.data(), &params, sizeof(params));
+        return NvResult::Success;
+    }
+
     auto event = events_interface.events[event_id];
     auto& gpu = system.GPU();
+
     // This is mostly to take into account unimplemented features. As synced
     // gpu is always synced.
     if (!gpu.IsAsync()) {
-        event.writable->Signal();
+        event.event.writable->Signal();
         return NvResult::Success;
     }
     auto lock = gpu.LockSync();
-    const u32 current_syncpoint_value = gpu.GetSyncpointValue(params.syncpt_id);
+    const u32 current_syncpoint_value = event.fence.value;
     const s32 diff = current_syncpoint_value - params.threshold;
     if (diff >= 0) {
-        event.writable->Signal();
+        event.event.writable->Signal();
         params.value = current_syncpoint_value;
         std::memcpy(output.data(), &params, sizeof(params));
         return NvResult::Success;
@@ -109,7 +124,7 @@ u32 nvhost_ctrl::IocCtrlEventWait(const std::vector<u8>& input, std::vector<u8>&
             params.value = ((params.syncpt_id & 0xfff) << 16) | 0x10000000;
         }
         params.value |= event_id;
-        event.writable->Clear();
+        event.event.writable->Clear();
         gpu.RegisterSyncptInterrupt(params.syncpt_id, target_value);
         if (!is_async && ctrl.fresh_call) {
             ctrl.must_delay = true;
@@ -154,24 +169,22 @@ u32 nvhost_ctrl::IocCtrlEventUnregister(const std::vector<u8>& input, std::vecto
     return NvResult::Success;
 }
 
-u32 nvhost_ctrl::IocCtrlEventSignal(const std::vector<u8>& input, std::vector<u8>& output) {
+u32 nvhost_ctrl::IocCtrlClearEventWait(const std::vector<u8>& input, std::vector<u8>& output) {
     IocCtrlEventSignalParams params{};
     std::memcpy(&params, input.data(), sizeof(params));
-    // TODO(Blinkhawk): This is normally called when an NvEvents timeout on WaitSynchronization
-    // It is believed from RE to cancel the GPU Event. However, better research is required
-    u32 event_id = params.user_event_id & 0x00FF;
-    LOG_WARNING(Service_NVDRV, "(STUBBED) called, user_event_id: {:X}", event_id);
+
+    u32 event_id = params.event_id & 0x00FF;
+    LOG_WARNING(Service_NVDRV, "cleared event wait on, event_id: {:X}", event_id);
+
     if (event_id >= MaxNvEvents) {
         return NvResult::BadParameter;
     }
     if (events_interface.status[event_id] == EventState::Waiting) {
-        auto& gpu = system.GPU();
-        if (gpu.CancelSyncptInterrupt(events_interface.assigned_syncpt[event_id],
-                                      events_interface.assigned_value[event_id])) {
-            events_interface.LiberateEvent(event_id);
-            events_interface.events[event_id].writable->Signal();
-        }
+        events_interface.LiberateEvent(event_id);
     }
+
+    syncpoint_manager.RefreshSyncpoint(events_interface.events[event_id].fence.id);
+
     return NvResult::Success;
 }
 
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h
index 9898623de..24ad96cb9 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h
@@ -14,7 +14,8 @@ namespace Service::Nvidia::Devices {
 
 class nvhost_ctrl final : public nvdevice {
 public:
-    explicit nvhost_ctrl(Core::System& system, EventInterface& events_interface);
+    explicit nvhost_ctrl(Core::System& system, EventInterface& events_interface,
+                         SyncpointManager& syncpoint_manager);
     ~nvhost_ctrl() override;
 
     u32 ioctl(Ioctl command, const std::vector<u8>& input, const std::vector<u8>& input2,
@@ -31,7 +32,7 @@ private:
         IocSyncptWaitexCommand = 0xC0100019,
         IocSyncptReadMaxCommand = 0xC008001A,
         IocGetConfigCommand = 0xC183001B,
-        IocCtrlEventSignalCommand = 0xC004001C,
+        IocCtrlClearEventWaitCommand = 0xC004001C,
         IocCtrlEventWaitCommand = 0xC010001D,
         IocCtrlEventWaitAsyncCommand = 0xC010001E,
         IocCtrlEventRegisterCommand = 0xC004001F,
@@ -94,7 +95,7 @@ private:
     static_assert(sizeof(IocGetConfigParams) == 387, "IocGetConfigParams is incorrect size");
 
     struct IocCtrlEventSignalParams {
-        u32_le user_event_id;
+        u32_le event_id;
     };
     static_assert(sizeof(IocCtrlEventSignalParams) == 4,
                   "IocCtrlEventSignalParams is incorrect size");
@@ -142,9 +143,10 @@ private:
 
     u32 IocCtrlEventUnregister(const std::vector<u8>& input, std::vector<u8>& output);
 
-    u32 IocCtrlEventSignal(const std::vector<u8>& input, std::vector<u8>& output);
+    u32 IocCtrlClearEventWait(const std::vector<u8>& input, std::vector<u8>& output);
 
     EventInterface& events_interface;
+    SyncpointManager& syncpoint_manager;
 };
 
 } // namespace Service::Nvidia::Devices
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
index f1966ac0e..152019548 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
@@ -7,14 +7,20 @@
 #include "common/logging/log.h"
 #include "core/core.h"
 #include "core/hle/service/nvdrv/devices/nvhost_gpu.h"
+#include "core/hle/service/nvdrv/syncpoint_manager.h"
 #include "core/memory.h"
 #include "video_core/gpu.h"
 #include "video_core/memory_manager.h"
 
 namespace Service::Nvidia::Devices {
 
-nvhost_gpu::nvhost_gpu(Core::System& system, std::shared_ptr<nvmap> nvmap_dev)
-    : nvdevice(system), nvmap_dev(std::move(nvmap_dev)) {}
+nvhost_gpu::nvhost_gpu(Core::System& system, std::shared_ptr<nvmap> nvmap_dev,
+                       SyncpointManager& syncpoint_manager)
+    : nvdevice(system), nvmap_dev(std::move(nvmap_dev)), syncpoint_manager{syncpoint_manager} {
+    channel_fence.id = syncpoint_manager.AllocateSyncpoint();
+    channel_fence.value = system.GPU().GetSyncpointValue(channel_fence.id);
+}
+
 nvhost_gpu::~nvhost_gpu() = default;
 
 u32 nvhost_gpu::ioctl(Ioctl command, const std::vector<u8>& input, const std::vector<u8>& input2,
@@ -126,10 +132,10 @@ u32 nvhost_gpu::AllocGPFIFOEx2(const std::vector<u8>& input, std::vector<u8>& ou
                 params.num_entries, params.flags, params.unk0, params.unk1, params.unk2,
                 params.unk3);
 
-    auto& gpu = system.GPU();
-    params.fence_out.id = assigned_syncpoints;
-    params.fence_out.value = gpu.GetSyncpointValue(assigned_syncpoints);
-    assigned_syncpoints++;
+    channel_fence.value = system.GPU().GetSyncpointValue(channel_fence.id);
+
+    params.fence_out = channel_fence;
+
     std::memcpy(output.data(), &params, output.size());
     return 0;
 }
@@ -145,39 +151,100 @@ u32 nvhost_gpu::AllocateObjectContext(const std::vector<u8>& input, std::vector<
     return 0;
 }
 
-u32 nvhost_gpu::SubmitGPFIFO(const std::vector<u8>& input, std::vector<u8>& output) {
-    if (input.size() < sizeof(IoctlSubmitGpfifo)) {
-        UNIMPLEMENTED();
+static std::vector<Tegra::CommandHeader> BuildWaitCommandList(Fence fence) {
+    return {
+        Tegra::BuildCommandHeader(Tegra::BufferMethods::FenceValue, 1,
+                                  Tegra::SubmissionMode::Increasing),
+        {fence.value},
+        Tegra::BuildCommandHeader(Tegra::BufferMethods::FenceAction, 1,
+                                  Tegra::SubmissionMode::Increasing),
+        Tegra::GPU::FenceAction::Build(Tegra::GPU::FenceOperation::Acquire, fence.id),
+    };
+}
+
+static std::vector<Tegra::CommandHeader> BuildIncrementCommandList(Fence fence, u32 add_increment) {
+    std::vector<Tegra::CommandHeader> result{
+        Tegra::BuildCommandHeader(Tegra::BufferMethods::FenceValue, 1,
+                                  Tegra::SubmissionMode::Increasing),
+        {}};
+
+    for (u32 count = 0; count < add_increment; ++count) {
+        result.emplace_back(Tegra::BuildCommandHeader(Tegra::BufferMethods::FenceAction, 1,
+                                                      Tegra::SubmissionMode::Increasing));
+        result.emplace_back(
+            Tegra::GPU::FenceAction::Build(Tegra::GPU::FenceOperation::Increment, fence.id));
     }
-    IoctlSubmitGpfifo params{};
-    std::memcpy(&params, input.data(), sizeof(IoctlSubmitGpfifo));
+
+    return result;
+}
+
+static std::vector<Tegra::CommandHeader> BuildIncrementWithWfiCommandList(Fence fence,
+                                                                          u32 add_increment) {
+    std::vector<Tegra::CommandHeader> result{
+        Tegra::BuildCommandHeader(Tegra::BufferMethods::WaitForInterrupt, 1,
+                                  Tegra::SubmissionMode::Increasing),
+        {}};
+    const std::vector<Tegra::CommandHeader> increment{
+        BuildIncrementCommandList(fence, add_increment)};
+
+    result.insert(result.end(), increment.begin(), increment.end());
+
+    return result;
+}
+
+u32 nvhost_gpu::SubmitGPFIFOImpl(IoctlSubmitGpfifo& params, std::vector<u8>& output,
+                                 Tegra::CommandList&& entries) {
     LOG_TRACE(Service_NVDRV, "called, gpfifo={:X}, num_entries={:X}, flags={:X}", params.address,
               params.num_entries, params.flags.raw);
 
-    ASSERT_MSG(input.size() == sizeof(IoctlSubmitGpfifo) +
-                                   params.num_entries * sizeof(Tegra::CommandListHeader),
-               "Incorrect input size");
+    auto& gpu = system.GPU();
 
-    Tegra::CommandList entries(params.num_entries);
-    std::memcpy(entries.data(), &input[sizeof(IoctlSubmitGpfifo)],
-                params.num_entries * sizeof(Tegra::CommandListHeader));
+    params.fence_out.id = channel_fence.id;
 
-    UNIMPLEMENTED_IF(params.flags.add_wait.Value() != 0);
-    UNIMPLEMENTED_IF(params.flags.add_increment.Value() != 0);
+    if (params.flags.add_wait.Value() &&
+        !syncpoint_manager.IsSyncpointExpired(params.fence_out.id, params.fence_out.value)) {
+        gpu.PushGPUEntries(Tegra::CommandList{BuildWaitCommandList(params.fence_out)});
+    }
 
-    auto& gpu = system.GPU();
-    u32 current_syncpoint_value = gpu.GetSyncpointValue(params.fence_out.id);
-    if (params.flags.increment.Value()) {
-        params.fence_out.value += current_syncpoint_value;
+    if (params.flags.add_increment.Value() || params.flags.increment.Value()) {
+        const u32 increment_value = params.flags.increment.Value() ? params.fence_out.value : 0;
+        params.fence_out.value = syncpoint_manager.IncreaseSyncpoint(
+            params.fence_out.id, params.AddIncrementValue() + increment_value);
     } else {
-        params.fence_out.value = current_syncpoint_value;
+        params.fence_out.value = syncpoint_manager.GetSyncpointMax(params.fence_out.id);
     }
+
+    entries.RefreshIntegrityChecks(gpu);
     gpu.PushGPUEntries(std::move(entries));
 
+    if (params.flags.add_increment.Value()) {
+        if (params.flags.suppress_wfi) {
+            gpu.PushGPUEntries(Tegra::CommandList{
+                BuildIncrementCommandList(params.fence_out, params.AddIncrementValue())});
+        } else {
+            gpu.PushGPUEntries(Tegra::CommandList{
+                BuildIncrementWithWfiCommandList(params.fence_out, params.AddIncrementValue())});
+        }
+    }
+
     std::memcpy(output.data(), &params, sizeof(IoctlSubmitGpfifo));
     return 0;
 }
 
+u32 nvhost_gpu::SubmitGPFIFO(const std::vector<u8>& input, std::vector<u8>& output) {
+    if (input.size() < sizeof(IoctlSubmitGpfifo)) {
+        UNIMPLEMENTED();
+    }
+    IoctlSubmitGpfifo params{};
+    std::memcpy(&params, input.data(), sizeof(IoctlSubmitGpfifo));
+
+    Tegra::CommandList entries(params.num_entries);
+    std::memcpy(entries.command_lists.data(), &input[sizeof(IoctlSubmitGpfifo)],
+                params.num_entries * sizeof(Tegra::CommandListHeader));
+
+    return SubmitGPFIFOImpl(params, output, std::move(entries));
+}
+
 u32 nvhost_gpu::KickoffPB(const std::vector<u8>& input, std::vector<u8>& output,
                           const std::vector<u8>& input2, IoctlVersion version) {
     if (input.size() < sizeof(IoctlSubmitGpfifo)) {
@@ -185,31 +252,17 @@ u32 nvhost_gpu::KickoffPB(const std::vector<u8>& input, std::vector<u8>& output,
     }
     IoctlSubmitGpfifo params{};
     std::memcpy(&params, input.data(), sizeof(IoctlSubmitGpfifo));
-    LOG_TRACE(Service_NVDRV, "called, gpfifo={:X}, num_entries={:X}, flags={:X}", params.address,
-              params.num_entries, params.flags.raw);
 
     Tegra::CommandList entries(params.num_entries);
     if (version == IoctlVersion::Version2) {
-        std::memcpy(entries.data(), input2.data(),
+        std::memcpy(entries.command_lists.data(), input2.data(),
                     params.num_entries * sizeof(Tegra::CommandListHeader));
     } else {
-        system.Memory().ReadBlock(params.address, entries.data(),
+        system.Memory().ReadBlock(params.address, entries.command_lists.data(),
                                   params.num_entries * sizeof(Tegra::CommandListHeader));
     }
-    UNIMPLEMENTED_IF(params.flags.add_wait.Value() != 0);
-    UNIMPLEMENTED_IF(params.flags.add_increment.Value() != 0);
-
-    auto& gpu = system.GPU();
-    u32 current_syncpoint_value = gpu.GetSyncpointValue(params.fence_out.id);
-    if (params.flags.increment.Value()) {
-        params.fence_out.value += current_syncpoint_value;
-    } else {
-        params.fence_out.value = current_syncpoint_value;
-    }
-    gpu.PushGPUEntries(std::move(entries));
 
-    std::memcpy(output.data(), &params, output.size());
-    return 0;
+    return SubmitGPFIFOImpl(params, output, std::move(entries));
 }
 
 u32 nvhost_gpu::GetWaitbase(const std::vector<u8>& input, std::vector<u8>& output) {
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_gpu.h b/src/core/hle/service/nvdrv/devices/nvhost_gpu.h
index 2ac74743f..a252fc06d 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.h
@@ -11,6 +11,11 @@
 #include "common/swap.h"
 #include "core/hle/service/nvdrv/devices/nvdevice.h"
 #include "core/hle/service/nvdrv/nvdata.h"
+#include "video_core/dma_pusher.h"
+
+namespace Service::Nvidia {
+class SyncpointManager;
+}
 
 namespace Service::Nvidia::Devices {
 
@@ -21,7 +26,8 @@ constexpr u32 NVGPU_IOCTL_CHANNEL_KICKOFF_PB(0x1b);
 
 class nvhost_gpu final : public nvdevice {
 public:
-    explicit nvhost_gpu(Core::System& system, std::shared_ptr<nvmap> nvmap_dev);
+    explicit nvhost_gpu(Core::System& system, std::shared_ptr<nvmap> nvmap_dev,
+                        SyncpointManager& syncpoint_manager);
     ~nvhost_gpu() override;
 
     u32 ioctl(Ioctl command, const std::vector<u8>& input, const std::vector<u8>& input2,
@@ -162,10 +168,15 @@ private:
             u32_le raw;
             BitField<0, 1, u32_le> add_wait;      // append a wait sync_point to the list
             BitField<1, 1, u32_le> add_increment; // append an increment to the list
-            BitField<2, 1, u32_le> new_hw_format; // Mostly ignored
+            BitField<2, 1, u32_le> new_hw_format; // mostly ignored
+            BitField<4, 1, u32_le> suppress_wfi;  // suppress wait for interrupt
             BitField<8, 1, u32_le> increment;     // increment the returned fence
         } flags;
         Fence fence_out; // returned new fence object for others to wait on
+
+        u32 AddIncrementValue() const {
+            return flags.add_increment.Value() << 1;
+        }
     };
     static_assert(sizeof(IoctlSubmitGpfifo) == 16 + sizeof(Fence),
                   "IoctlSubmitGpfifo is incorrect size");
@@ -190,6 +201,8 @@ private:
     u32 SetChannelPriority(const std::vector<u8>& input, std::vector<u8>& output);
     u32 AllocGPFIFOEx2(const std::vector<u8>& input, std::vector<u8>& output);
     u32 AllocateObjectContext(const std::vector<u8>& input, std::vector<u8>& output);
+    u32 SubmitGPFIFOImpl(IoctlSubmitGpfifo& params, std::vector<u8>& output,
+                         Tegra::CommandList&& entries);
     u32 SubmitGPFIFO(const std::vector<u8>& input, std::vector<u8>& output);
     u32 KickoffPB(const std::vector<u8>& input, std::vector<u8>& output,
                   const std::vector<u8>& input2, IoctlVersion version);
@@ -198,7 +211,8 @@ private:
     u32 ChannelSetTimeslice(const std::vector<u8>& input, std::vector<u8>& output);
 
     std::shared_ptr<nvmap> nvmap_dev;
-    u32 assigned_syncpoints{};
+    SyncpointManager& syncpoint_manager;
+    Fence channel_fence;
 };
 
 } // namespace Service::Nvidia::Devices
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp
index 85792495f..30f03f845 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp
@@ -38,7 +38,7 @@ std::size_t WriteVectors(std::vector<u8>& dst, const std::vector<T>& src, std::s
 
 namespace NvErrCodes {
 constexpr u32 Success{};
-constexpr u32 OutOfMemory{static_cast<u32>(-12)};
+[[maybe_unused]] constexpr u32 OutOfMemory{static_cast<u32>(-12)};
 constexpr u32 InvalidInput{static_cast<u32>(-22)};
 } // namespace NvErrCodes
 
diff --git a/src/core/hle/service/nvdrv/nvdrv.cpp b/src/core/hle/service/nvdrv/nvdrv.cpp
index 803c1a984..a46755cdc 100644
--- a/src/core/hle/service/nvdrv/nvdrv.cpp
+++ b/src/core/hle/service/nvdrv/nvdrv.cpp
@@ -21,6 +21,7 @@
 #include "core/hle/service/nvdrv/interface.h"
 #include "core/hle/service/nvdrv/nvdrv.h"
 #include "core/hle/service/nvdrv/nvmemp.h"
+#include "core/hle/service/nvdrv/syncpoint_manager.h"
 #include "core/hle/service/nvflinger/nvflinger.h"
 
 namespace Service::Nvidia {
@@ -36,21 +37,23 @@ void InstallInterfaces(SM::ServiceManager& service_manager, NVFlinger::NVFlinger
     nvflinger.SetNVDrvInstance(module_);
 }
 
-Module::Module(Core::System& system) {
+Module::Module(Core::System& system) : syncpoint_manager{system.GPU()} {
     auto& kernel = system.Kernel();
     for (u32 i = 0; i < MaxNvEvents; i++) {
         std::string event_label = fmt::format("NVDRV::NvEvent_{}", i);
-        events_interface.events[i] = Kernel::WritableEvent::CreateEventPair(kernel, event_label);
+        events_interface.events[i] = {Kernel::WritableEvent::CreateEventPair(kernel, event_label)};
         events_interface.status[i] = EventState::Free;
         events_interface.registered[i] = false;
     }
     auto nvmap_dev = std::make_shared<Devices::nvmap>(system);
     devices["/dev/nvhost-as-gpu"] = std::make_shared<Devices::nvhost_as_gpu>(system, nvmap_dev);
-    devices["/dev/nvhost-gpu"] = std::make_shared<Devices::nvhost_gpu>(system, nvmap_dev);
+    devices["/dev/nvhost-gpu"] =
+        std::make_shared<Devices::nvhost_gpu>(system, nvmap_dev, syncpoint_manager);
     devices["/dev/nvhost-ctrl-gpu"] = std::make_shared<Devices::nvhost_ctrl_gpu>(system);
     devices["/dev/nvmap"] = nvmap_dev;
     devices["/dev/nvdisp_disp0"] = std::make_shared<Devices::nvdisp_disp0>(system, nvmap_dev);
-    devices["/dev/nvhost-ctrl"] = std::make_shared<Devices::nvhost_ctrl>(system, events_interface);
+    devices["/dev/nvhost-ctrl"] =
+        std::make_shared<Devices::nvhost_ctrl>(system, events_interface, syncpoint_manager);
     devices["/dev/nvhost-nvdec"] = std::make_shared<Devices::nvhost_nvdec>(system, nvmap_dev);
     devices["/dev/nvhost-nvjpg"] = std::make_shared<Devices::nvhost_nvjpg>(system);
     devices["/dev/nvhost-vic"] = std::make_shared<Devices::nvhost_vic>(system, nvmap_dev);
@@ -95,17 +98,17 @@ void Module::SignalSyncpt(const u32 syncpoint_id, const u32 value) {
         if (events_interface.assigned_syncpt[i] == syncpoint_id &&
             events_interface.assigned_value[i] == value) {
             events_interface.LiberateEvent(i);
-            events_interface.events[i].writable->Signal();
+            events_interface.events[i].event.writable->Signal();
         }
     }
 }
 
 std::shared_ptr<Kernel::ReadableEvent> Module::GetEvent(const u32 event_id) const {
-    return events_interface.events[event_id].readable;
+    return events_interface.events[event_id].event.readable;
 }
 
 std::shared_ptr<Kernel::WritableEvent> Module::GetEventWriteable(const u32 event_id) const {
-    return events_interface.events[event_id].writable;
+    return events_interface.events[event_id].event.writable;
 }
 
 } // namespace Service::Nvidia
diff --git a/src/core/hle/service/nvdrv/nvdrv.h b/src/core/hle/service/nvdrv/nvdrv.h
index 7706a5590..f3d863dac 100644
--- a/src/core/hle/service/nvdrv/nvdrv.h
+++ b/src/core/hle/service/nvdrv/nvdrv.h
@@ -10,6 +10,7 @@
 #include "common/common_types.h"
 #include "core/hle/kernel/writable_event.h"
 #include "core/hle/service/nvdrv/nvdata.h"
+#include "core/hle/service/nvdrv/syncpoint_manager.h"
 #include "core/hle/service/service.h"
 
 namespace Core {
@@ -22,15 +23,23 @@ class NVFlinger;
 
 namespace Service::Nvidia {
 
+class SyncpointManager;
+
 namespace Devices {
 class nvdevice;
 }
 
+/// Represents an Nvidia event
+struct NvEvent {
+    Kernel::EventPair event;
+    Fence fence{};
+};
+
 struct EventInterface {
     // Mask representing currently busy events
     u64 events_mask{};
     // Each kernel event associated to an NV event
-    std::array<Kernel::EventPair, MaxNvEvents> events;
+    std::array<NvEvent, MaxNvEvents> events;
     // The status of the current NVEvent
     std::array<EventState, MaxNvEvents> status{};
     // Tells if an NVEvent is registered or not
@@ -119,6 +128,9 @@ public:
     std::shared_ptr<Kernel::WritableEvent> GetEventWriteable(u32 event_id) const;
 
 private:
+    /// Manages syncpoints on the host
+    SyncpointManager syncpoint_manager;
+
     /// Id to use for the next open file descriptor.
     u32 next_fd = 1;
 
diff --git a/src/core/hle/service/nvdrv/syncpoint_manager.cpp b/src/core/hle/service/nvdrv/syncpoint_manager.cpp
new file mode 100644
index 000000000..0151a03b7
--- /dev/null
+++ b/src/core/hle/service/nvdrv/syncpoint_manager.cpp
@@ -0,0 +1,39 @@
+// Copyright 2020 yuzu emulator team
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/assert.h"
+#include "core/hle/service/nvdrv/syncpoint_manager.h"
+#include "video_core/gpu.h"
+
+namespace Service::Nvidia {
+
+SyncpointManager::SyncpointManager(Tegra::GPU& gpu) : gpu{gpu} {}
+
+SyncpointManager::~SyncpointManager() = default;
+
+u32 SyncpointManager::RefreshSyncpoint(u32 syncpoint_id) {
+    syncpoints[syncpoint_id].min = gpu.GetSyncpointValue(syncpoint_id);
+    return GetSyncpointMin(syncpoint_id);
+}
+
+u32 SyncpointManager::AllocateSyncpoint() {
+    for (u32 syncpoint_id = 1; syncpoint_id < MaxSyncPoints; syncpoint_id++) {
+        if (!syncpoints[syncpoint_id].is_allocated) {
+            syncpoints[syncpoint_id].is_allocated = true;
+            return syncpoint_id;
+        }
+    }
+    UNREACHABLE_MSG("No more available syncpoints!");
+    return {};
+}
+
+u32 SyncpointManager::IncreaseSyncpoint(u32 syncpoint_id, u32 value) {
+    for (u32 index = 0; index < value; ++index) {
+        syncpoints[syncpoint_id].max.fetch_add(1, std::memory_order_relaxed);
+    }
+
+    return GetSyncpointMax(syncpoint_id);
+}
+
+} // namespace Service::Nvidia
diff --git a/src/core/hle/service/nvdrv/syncpoint_manager.h b/src/core/hle/service/nvdrv/syncpoint_manager.h
new file mode 100644
index 000000000..4168b6c7e
--- /dev/null
+++ b/src/core/hle/service/nvdrv/syncpoint_manager.h
@@ -0,0 +1,85 @@
+// Copyright 2020 yuzu emulator team
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <array>
+#include <atomic>
+
+#include "common/common_types.h"
+#include "core/hle/service/nvdrv/nvdata.h"
+
+namespace Tegra {
+class GPU;
+}
+
+namespace Service::Nvidia {
+
+class SyncpointManager final {
+public:
+    explicit SyncpointManager(Tegra::GPU& gpu);
+    ~SyncpointManager();
+
+    /**
+     * Returns true if the specified syncpoint is expired for the given value.
+     * @param syncpoint_id Syncpoint ID to check.
+     * @param value Value to check against the specified syncpoint.
+     * @returns True if the specified syncpoint is expired for the given value, otherwise False.
+     */
+    bool IsSyncpointExpired(u32 syncpoint_id, u32 value) const {
+        return (GetSyncpointMax(syncpoint_id) - value) >= (GetSyncpointMin(syncpoint_id) - value);
+    }
+
+    /**
+     * Gets the lower bound for the specified syncpoint.
+     * @param syncpoint_id Syncpoint ID to get the lower bound for.
+     * @returns The lower bound for the specified syncpoint.
+     */
+    u32 GetSyncpointMin(u32 syncpoint_id) const {
+        return syncpoints[syncpoint_id].min.load(std::memory_order_relaxed);
+    }
+
+    /**
+     * Gets the uper bound for the specified syncpoint.
+     * @param syncpoint_id Syncpoint ID to get the upper bound for.
+     * @returns The upper bound for the specified syncpoint.
+     */
+    u32 GetSyncpointMax(u32 syncpoint_id) const {
+        return syncpoints[syncpoint_id].max.load(std::memory_order_relaxed);
+    }
+
+    /**
+     * Refreshes the minimum value for the specified syncpoint.
+     * @param syncpoint_id Syncpoint ID to be refreshed.
+     * @returns The new syncpoint minimum value.
+     */
+    u32 RefreshSyncpoint(u32 syncpoint_id);
+
+    /**
+     * Allocates a new syncoint.
+     * @returns The syncpoint ID for the newly allocated syncpoint.
+     */
+    u32 AllocateSyncpoint();
+
+    /**
+     * Increases the maximum value for the specified syncpoint.
+     * @param syncpoint_id Syncpoint ID to be increased.
+     * @param value Value to increase the specified syncpoint by.
+     * @returns The new syncpoint maximum value.
+     */
+    u32 IncreaseSyncpoint(u32 syncpoint_id, u32 value);
+
+private:
+    struct Syncpoint {
+        std::atomic<u32> min;
+        std::atomic<u32> max;
+        std::atomic<bool> is_allocated;
+    };
+
+    std::array<Syncpoint, MaxSyncPoints> syncpoints{};
+
+    Tegra::GPU& gpu;
+};
+
+} // namespace Service::Nvidia
diff --git a/src/core/hle/service/nvflinger/nvflinger.cpp b/src/core/hle/service/nvflinger/nvflinger.cpp
index c64673dba..44aa2bdae 100644
--- a/src/core/hle/service/nvflinger/nvflinger.cpp
+++ b/src/core/hle/service/nvflinger/nvflinger.cpp
@@ -242,6 +242,10 @@ void NVFlinger::Compose() {
 
         const auto& igbp_buffer = buffer->get().igbp_buffer;
 
+        if (!system.IsPoweredOn()) {
+            return; // We are likely shutting down
+        }
+
         auto& gpu = system.GPU();
         const auto& multi_fence = buffer->get().multi_fence;
         guard->unlock();
diff --git a/src/input_common/gcadapter/gc_adapter.cpp b/src/input_common/gcadapter/gc_adapter.cpp
index c95feb0d7..b912188b6 100644
--- a/src/input_common/gcadapter/gc_adapter.cpp
+++ b/src/input_common/gcadapter/gc_adapter.cpp
@@ -21,26 +21,6 @@
 
 namespace GCAdapter {
 
-// Used to loop through and assign button in poller
-constexpr std::array<PadButton, 12> PadButtonArray{
-    PadButton::PAD_BUTTON_LEFT, PadButton::PAD_BUTTON_RIGHT, PadButton::PAD_BUTTON_DOWN,
-    PadButton::PAD_BUTTON_UP,   PadButton::PAD_TRIGGER_Z,    PadButton::PAD_TRIGGER_R,
-    PadButton::PAD_TRIGGER_L,   PadButton::PAD_BUTTON_A,     PadButton::PAD_BUTTON_B,
-    PadButton::PAD_BUTTON_X,    PadButton::PAD_BUTTON_Y,     PadButton::PAD_BUTTON_START,
-};
-
-static void PadToState(const GCPadStatus& pad, GCState& out_state) {
-    for (const auto& button : PadButtonArray) {
-        const auto button_key = static_cast<u16>(button);
-        const auto button_value = (pad.button & button_key) != 0;
-        out_state.buttons.insert_or_assign(static_cast<s32>(button_key), button_value);
-    }
-
-    for (std::size_t i = 0; i < pad.axis_values.size(); ++i) {
-        out_state.axes.insert_or_assign(static_cast<u32>(i), pad.axis_values[i]);
-    }
-}
-
 Adapter::Adapter() {
     if (usb_adapter_handle != nullptr) {
         return;
@@ -49,168 +29,263 @@ Adapter::Adapter() {
 
     const int init_res = libusb_init(&libusb_ctx);
     if (init_res == LIBUSB_SUCCESS) {
-        Setup();
+        adapter_scan_thread = std::thread(&Adapter::AdapterScanThread, this);
     } else {
         LOG_ERROR(Input, "libusb could not be initialized. failed with error = {}", init_res);
     }
 }
 
-GCPadStatus Adapter::GetPadStatus(std::size_t port, const std::array<u8, 37>& adapter_payload) {
-    GCPadStatus pad = {};
-    const std::size_t offset = 1 + (9 * port);
+Adapter::~Adapter() {
+    Reset();
+}
+
+void Adapter::AdapterInputThread() {
+    LOG_DEBUG(Input, "GC Adapter input thread started");
+    s32 payload_size{};
+    AdapterPayload adapter_payload{};
+
+    if (adapter_scan_thread.joinable()) {
+        adapter_scan_thread.join();
+    }
+
+    while (adapter_input_thread_running) {
+        libusb_interrupt_transfer(usb_adapter_handle, input_endpoint, adapter_payload.data(),
+                                  static_cast<s32>(adapter_payload.size()), &payload_size, 16);
+        if (IsPayloadCorrect(adapter_payload, payload_size)) {
+            UpdateControllers(adapter_payload);
+            UpdateVibrations();
+        }
+        std::this_thread::yield();
+    }
 
-    adapter_controllers_status[port] = static_cast<ControllerTypes>(adapter_payload[offset] >> 4);
+    if (restart_scan_thread) {
+        adapter_scan_thread = std::thread(&Adapter::AdapterScanThread, this);
+        restart_scan_thread = false;
+    }
+}
+
+bool Adapter::IsPayloadCorrect(const AdapterPayload& adapter_payload, s32 payload_size) {
+    if (payload_size != static_cast<s32>(adapter_payload.size()) ||
+        adapter_payload[0] != LIBUSB_DT_HID) {
+        LOG_DEBUG(Input, "Error reading payload (size: {}, type: {:02x})", payload_size,
+                  adapter_payload[0]);
+        if (input_error_counter++ > 20) {
+            LOG_ERROR(Input, "GC adapter timeout, Is the adapter connected?");
+            adapter_input_thread_running = false;
+            restart_scan_thread = true;
+        }
+        return false;
+    }
+
+    input_error_counter = 0;
+    return true;
+}
+
+void Adapter::UpdateControllers(const AdapterPayload& adapter_payload) {
+    for (std::size_t port = 0; port < pads.size(); ++port) {
+        const std::size_t offset = 1 + (9 * port);
+        const auto type = static_cast<ControllerTypes>(adapter_payload[offset] >> 4);
+        UpdatePadType(port, type);
+        if (DeviceConnected(port)) {
+            const u8 b1 = adapter_payload[offset + 1];
+            const u8 b2 = adapter_payload[offset + 2];
+            UpdateStateButtons(port, b1, b2);
+            UpdateStateAxes(port, adapter_payload);
+            if (configuring) {
+                UpdateYuzuSettings(port);
+            }
+        }
+    }
+}
+
+void Adapter::UpdatePadType(std::size_t port, ControllerTypes pad_type) {
+    if (pads[port].type == pad_type) {
+        return;
+    }
+    // Device changed reset device and set new type
+    ResetDevice(port);
+    pads[port].type = pad_type;
+}
+
+void Adapter::UpdateStateButtons(std::size_t port, u8 b1, u8 b2) {
+    if (port >= pads.size()) {
+        return;
+    }
 
     static constexpr std::array<PadButton, 8> b1_buttons{
-        PadButton::PAD_BUTTON_A,    PadButton::PAD_BUTTON_B,    PadButton::PAD_BUTTON_X,
-        PadButton::PAD_BUTTON_Y,    PadButton::PAD_BUTTON_LEFT, PadButton::PAD_BUTTON_RIGHT,
-        PadButton::PAD_BUTTON_DOWN, PadButton::PAD_BUTTON_UP,
+        PadButton::ButtonA,    PadButton::ButtonB,     PadButton::ButtonX,    PadButton::ButtonY,
+        PadButton::ButtonLeft, PadButton::ButtonRight, PadButton::ButtonDown, PadButton::ButtonUp,
     };
 
     static constexpr std::array<PadButton, 4> b2_buttons{
-        PadButton::PAD_BUTTON_START,
-        PadButton::PAD_TRIGGER_Z,
-        PadButton::PAD_TRIGGER_R,
-        PadButton::PAD_TRIGGER_L,
+        PadButton::ButtonStart,
+        PadButton::TriggerZ,
+        PadButton::TriggerR,
+        PadButton::TriggerL,
     };
+    pads[port].buttons = 0;
+    for (std::size_t i = 0; i < b1_buttons.size(); ++i) {
+        if ((b1 & (1U << i)) != 0) {
+            pads[port].buttons =
+                static_cast<u16>(pads[port].buttons | static_cast<u16>(b1_buttons[i]));
+            pads[port].last_button = b1_buttons[i];
+        }
+    }
 
+    for (std::size_t j = 0; j < b2_buttons.size(); ++j) {
+        if ((b2 & (1U << j)) != 0) {
+            pads[port].buttons =
+                static_cast<u16>(pads[port].buttons | static_cast<u16>(b2_buttons[j]));
+            pads[port].last_button = b2_buttons[j];
+        }
+    }
+}
+
+void Adapter::UpdateStateAxes(std::size_t port, const AdapterPayload& adapter_payload) {
+    if (port >= pads.size()) {
+        return;
+    }
+
+    const std::size_t offset = 1 + (9 * port);
     static constexpr std::array<PadAxes, 6> axes{
         PadAxes::StickX,    PadAxes::StickY,      PadAxes::SubstickX,
         PadAxes::SubstickY, PadAxes::TriggerLeft, PadAxes::TriggerRight,
     };
 
-    if (adapter_controllers_status[port] == ControllerTypes::None && !get_origin[port]) {
-        // Controller may have been disconnected, recalibrate if reconnected.
-        get_origin[port] = true;
+    for (const PadAxes axis : axes) {
+        const auto index = static_cast<std::size_t>(axis);
+        const u8 axis_value = adapter_payload[offset + 3 + index];
+        if (pads[port].axis_origin[index] == 255) {
+            pads[port].axis_origin[index] = axis_value;
+        }
+        pads[port].axis_values[index] =
+            static_cast<s16>(axis_value - pads[port].axis_origin[index]);
     }
+}
 
-    if (adapter_controllers_status[port] != ControllerTypes::None) {
-        const u8 b1 = adapter_payload[offset + 1];
-        const u8 b2 = adapter_payload[offset + 2];
+void Adapter::UpdateYuzuSettings(std::size_t port) {
+    if (port >= pads.size()) {
+        return;
+    }
 
-        for (std::size_t i = 0; i < b1_buttons.size(); ++i) {
-            if ((b1 & (1U << i)) != 0) {
-                pad.button = static_cast<u16>(pad.button | static_cast<u16>(b1_buttons[i]));
-            }
-        }
+    constexpr u8 axis_threshold = 50;
+    GCPadStatus pad_status = {.port = port};
 
-        for (std::size_t j = 0; j < b2_buttons.size(); ++j) {
-            if ((b2 & (1U << j)) != 0) {
-                pad.button = static_cast<u16>(pad.button | static_cast<u16>(b2_buttons[j]));
-            }
-        }
-        for (PadAxes axis : axes) {
-            const auto index = static_cast<std::size_t>(axis);
-            pad.axis_values[index] = adapter_payload[offset + 3 + index];
-        }
+    if (pads[port].buttons != 0) {
+        pad_status.button = pads[port].last_button;
+        pad_queue.Push(pad_status);
+    }
+
+    // Accounting for a threshold here to ensure an intentional press
+    for (std::size_t i = 0; i < pads[port].axis_values.size(); ++i) {
+        const s16 value = pads[port].axis_values[i];
 
-        if (get_origin[port]) {
-            origin_status[port].axis_values = pad.axis_values;
-            get_origin[port] = false;
+        if (value > axis_threshold || value < -axis_threshold) {
+            pad_status.axis = static_cast<PadAxes>(i);
+            pad_status.axis_value = value;
+            pad_status.axis_threshold = axis_threshold;
+            pad_queue.Push(pad_status);
         }
     }
-    return pad;
 }
 
-void Adapter::Read() {
-    LOG_DEBUG(Input, "GC Adapter Read() thread started");
+void Adapter::UpdateVibrations() {
+    // Use 8 states to keep the switching between on/off fast enough for
+    // a human to not notice the difference between switching from on/off
+    // More states = more rumble strengths = slower update time
+    constexpr u8 vibration_states = 8;
 
-    int payload_size;
-    std::array<u8, 37> adapter_payload;
-    std::array<GCPadStatus, 4> pads;
-
-    while (adapter_thread_running) {
-        libusb_interrupt_transfer(usb_adapter_handle, input_endpoint, adapter_payload.data(),
-                                  sizeof(adapter_payload), &payload_size, 16);
-
-        if (payload_size != sizeof(adapter_payload) || adapter_payload[0] != LIBUSB_DT_HID) {
-            LOG_ERROR(Input,
-                      "Error reading payload (size: {}, type: {:02x}) Is the adapter connected?",
-                      payload_size, adapter_payload[0]);
-            adapter_thread_running = false; // error reading from adapter, stop reading.
-            break;
-        }
-        for (std::size_t port = 0; port < pads.size(); ++port) {
-            pads[port] = GetPadStatus(port, adapter_payload);
-            if (DeviceConnected(port) && configuring) {
-                if (pads[port].button != 0) {
-                    pad_queue[port].Push(pads[port]);
-                }
+    vibration_counter = (vibration_counter + 1) % vibration_states;
 
-                // Accounting for a threshold here to ensure an intentional press
-                for (size_t i = 0; i < pads[port].axis_values.size(); ++i) {
-                    const u8 value = pads[port].axis_values[i];
-                    const u8 origin = origin_status[port].axis_values[i];
-
-                    if (value > origin + pads[port].THRESHOLD ||
-                        value < origin - pads[port].THRESHOLD) {
-                        pads[port].axis = static_cast<PadAxes>(i);
-                        pads[port].axis_value = pads[port].axis_values[i];
-                        pad_queue[port].Push(pads[port]);
-                    }
-                }
-            }
-            PadToState(pads[port], state[port]);
-        }
-        std::this_thread::yield();
+    for (GCController& pad : pads) {
+        const bool vibrate = pad.rumble_amplitude > vibration_counter;
+        vibration_changed |= vibrate != pad.enable_vibration;
+        pad.enable_vibration = vibrate;
     }
+    SendVibrations();
 }
 
-void Adapter::Setup() {
-    // Initialize all controllers as unplugged
-    adapter_controllers_status.fill(ControllerTypes::None);
-    // Initialize all ports to store axis origin values
-    get_origin.fill(true);
-
-    // pointer to list of connected usb devices
-    libusb_device** devices{};
-
-    // populate the list of devices, get the count
-    const ssize_t device_count = libusb_get_device_list(libusb_ctx, &devices);
-    if (device_count < 0) {
-        LOG_ERROR(Input, "libusb_get_device_list failed with error: {}", device_count);
+void Adapter::SendVibrations() {
+    if (!rumble_enabled || !vibration_changed) {
         return;
     }
-
-    if (devices != nullptr) {
-        for (std::size_t index = 0; index < static_cast<std::size_t>(device_count); ++index) {
-            if (CheckDeviceAccess(devices[index])) {
-                // GC Adapter found and accessible, registering it
-                GetGCEndpoint(devices[index]);
-                break;
-            }
+    s32 size{};
+    constexpr u8 rumble_command = 0x11;
+    const u8 p1 = pads[0].enable_vibration;
+    const u8 p2 = pads[1].enable_vibration;
+    const u8 p3 = pads[2].enable_vibration;
+    const u8 p4 = pads[3].enable_vibration;
+    std::array<u8, 5> payload = {rumble_command, p1, p2, p3, p4};
+    const int err = libusb_interrupt_transfer(usb_adapter_handle, output_endpoint, payload.data(),
+                                              static_cast<s32>(payload.size()), &size, 16);
+    if (err) {
+        LOG_DEBUG(Input, "Adapter libusb write failed: {}", libusb_error_name(err));
+        if (output_error_counter++ > 5) {
+            LOG_ERROR(Input, "GC adapter output timeout, Rumble disabled");
+            rumble_enabled = false;
         }
-        libusb_free_device_list(devices, 1);
+        return;
     }
+    output_error_counter = 0;
+    vibration_changed = false;
 }
 
-bool Adapter::CheckDeviceAccess(libusb_device* device) {
-    libusb_device_descriptor desc;
-    const int get_descriptor_error = libusb_get_device_descriptor(device, &desc);
-    if (get_descriptor_error) {
-        // could not acquire the descriptor, no point in trying to use it.
-        LOG_ERROR(Input, "libusb_get_device_descriptor failed with error: {}",
-                  get_descriptor_error);
-        return false;
+bool Adapter::RumblePlay(std::size_t port, f32 amplitude) {
+    amplitude = std::clamp(amplitude, 0.0f, 1.0f);
+    const auto raw_amp = static_cast<u8>(amplitude * 0x8);
+    pads[port].rumble_amplitude = raw_amp;
+
+    return rumble_enabled;
+}
+
+void Adapter::AdapterScanThread() {
+    adapter_scan_thread_running = true;
+    adapter_input_thread_running = false;
+    if (adapter_input_thread.joinable()) {
+        adapter_input_thread.join();
+    }
+    ClearLibusbHandle();
+    ResetDevices();
+    while (adapter_scan_thread_running && !adapter_input_thread_running) {
+        Setup();
+        std::this_thread::sleep_for(std::chrono::seconds(1));
     }
+}
 
-    if (desc.idVendor != 0x057e || desc.idProduct != 0x0337) {
-        // This isn't the device we are looking for.
-        return false;
+void Adapter::Setup() {
+    usb_adapter_handle = libusb_open_device_with_vid_pid(libusb_ctx, 0x057e, 0x0337);
+
+    if (usb_adapter_handle == NULL) {
+        return;
+    }
+    if (!CheckDeviceAccess()) {
+        ClearLibusbHandle();
+        return;
     }
-    const int open_error = libusb_open(device, &usb_adapter_handle);
 
-    if (open_error == LIBUSB_ERROR_ACCESS) {
-        LOG_ERROR(Input, "Yuzu can not gain access to this device: ID {:04X}:{:04X}.",
-                  desc.idVendor, desc.idProduct);
-        return false;
+    libusb_device* device = libusb_get_device(usb_adapter_handle);
+
+    LOG_INFO(Input, "GC adapter is now connected");
+    // GC Adapter found and accessible, registering it
+    if (GetGCEndpoint(device)) {
+        adapter_scan_thread_running = false;
+        adapter_input_thread_running = true;
+        rumble_enabled = true;
+        input_error_counter = 0;
+        output_error_counter = 0;
+        adapter_input_thread = std::thread(&Adapter::AdapterInputThread, this);
     }
-    if (open_error) {
-        LOG_ERROR(Input, "libusb_open failed to open device with error = {}", open_error);
-        return false;
+}
+
+bool Adapter::CheckDeviceAccess() {
+    // This fixes payload problems from offbrand GCAdapters
+    const s32 control_transfer_error =
+        libusb_control_transfer(usb_adapter_handle, 0x21, 11, 0x0001, 0, nullptr, 0, 1000);
+    if (control_transfer_error < 0) {
+        LOG_ERROR(Input, "libusb_control_transfer failed with error= {}", control_transfer_error);
     }
 
-    int kernel_driver_error = libusb_kernel_driver_active(usb_adapter_handle, 0);
+    s32 kernel_driver_error = libusb_kernel_driver_active(usb_adapter_handle, 0);
     if (kernel_driver_error == 1) {
         kernel_driver_error = libusb_detach_kernel_driver(usb_adapter_handle, 0);
         if (kernel_driver_error != 0 && kernel_driver_error != LIBUSB_ERROR_NOT_SUPPORTED) {
@@ -236,13 +311,13 @@ bool Adapter::CheckDeviceAccess(libusb_device* device) {
     return true;
 }
 
-void Adapter::GetGCEndpoint(libusb_device* device) {
+bool Adapter::GetGCEndpoint(libusb_device* device) {
     libusb_config_descriptor* config = nullptr;
     const int config_descriptor_return = libusb_get_config_descriptor(device, 0, &config);
     if (config_descriptor_return != LIBUSB_SUCCESS) {
         LOG_ERROR(Input, "libusb_get_config_descriptor failed with error = {}",
                   config_descriptor_return);
-        return;
+        return false;
     }
 
     for (u8 ic = 0; ic < config->bNumInterfaces; ic++) {
@@ -264,31 +339,51 @@ void Adapter::GetGCEndpoint(libusb_device* device) {
     unsigned char clear_payload = 0x13;
     libusb_interrupt_transfer(usb_adapter_handle, output_endpoint, &clear_payload,
                               sizeof(clear_payload), nullptr, 16);
-
-    adapter_thread_running = true;
-    adapter_input_thread = std::thread(&Adapter::Read, this);
+    return true;
 }
 
-Adapter::~Adapter() {
-    Reset();
-}
+void Adapter::JoinThreads() {
+    restart_scan_thread = false;
+    adapter_input_thread_running = false;
+    adapter_scan_thread_running = false;
 
-void Adapter::Reset() {
-    if (adapter_thread_running) {
-        adapter_thread_running = false;
+    if (adapter_scan_thread.joinable()) {
+        adapter_scan_thread.join();
     }
+
     if (adapter_input_thread.joinable()) {
         adapter_input_thread.join();
     }
+}
 
-    adapter_controllers_status.fill(ControllerTypes::None);
-    get_origin.fill(true);
-
+void Adapter::ClearLibusbHandle() {
     if (usb_adapter_handle) {
         libusb_release_interface(usb_adapter_handle, 1);
         libusb_close(usb_adapter_handle);
         usb_adapter_handle = nullptr;
     }
+}
+
+void Adapter::ResetDevices() {
+    for (std::size_t i = 0; i < pads.size(); ++i) {
+        ResetDevice(i);
+    }
+}
+
+void Adapter::ResetDevice(std::size_t port) {
+    pads[port].type = ControllerTypes::None;
+    pads[port].enable_vibration = false;
+    pads[port].rumble_amplitude = 0;
+    pads[port].buttons = 0;
+    pads[port].last_button = PadButton::Undefined;
+    pads[port].axis_values.fill(0);
+    pads[port].axis_origin.fill(255);
+}
+
+void Adapter::Reset() {
+    JoinThreads();
+    ClearLibusbHandle();
+    ResetDevices();
 
     if (libusb_ctx) {
         libusb_exit(libusb_ctx);
@@ -297,11 +392,11 @@ void Adapter::Reset() {
 
 std::vector<Common::ParamPackage> Adapter::GetInputDevices() const {
     std::vector<Common::ParamPackage> devices;
-    for (std::size_t port = 0; port < state.size(); ++port) {
+    for (std::size_t port = 0; port < pads.size(); ++port) {
         if (!DeviceConnected(port)) {
             continue;
         }
-        std::string name = fmt::format("Gamecube Controller {}", port);
+        std::string name = fmt::format("Gamecube Controller {}", port + 1);
         devices.emplace_back(Common::ParamPackage{
             {"class", "gcpad"},
             {"display", std::move(name)},
@@ -318,18 +413,18 @@ InputCommon::ButtonMapping Adapter::GetButtonMappingForDevice(
     // This list also excludes any button that can't be really mapped
     static constexpr std::array<std::pair<Settings::NativeButton::Values, PadButton>, 12>
         switch_to_gcadapter_button = {
-            std::pair{Settings::NativeButton::A, PadButton::PAD_BUTTON_A},
-            {Settings::NativeButton::B, PadButton::PAD_BUTTON_B},
-            {Settings::NativeButton::X, PadButton::PAD_BUTTON_X},
-            {Settings::NativeButton::Y, PadButton::PAD_BUTTON_Y},
-            {Settings::NativeButton::Plus, PadButton::PAD_BUTTON_START},
-            {Settings::NativeButton::DLeft, PadButton::PAD_BUTTON_LEFT},
-            {Settings::NativeButton::DUp, PadButton::PAD_BUTTON_UP},
-            {Settings::NativeButton::DRight, PadButton::PAD_BUTTON_RIGHT},
-            {Settings::NativeButton::DDown, PadButton::PAD_BUTTON_DOWN},
-            {Settings::NativeButton::SL, PadButton::PAD_TRIGGER_L},
-            {Settings::NativeButton::SR, PadButton::PAD_TRIGGER_R},
-            {Settings::NativeButton::R, PadButton::PAD_TRIGGER_Z},
+            std::pair{Settings::NativeButton::A, PadButton::ButtonA},
+            {Settings::NativeButton::B, PadButton::ButtonB},
+            {Settings::NativeButton::X, PadButton::ButtonX},
+            {Settings::NativeButton::Y, PadButton::ButtonY},
+            {Settings::NativeButton::Plus, PadButton::ButtonStart},
+            {Settings::NativeButton::DLeft, PadButton::ButtonLeft},
+            {Settings::NativeButton::DUp, PadButton::ButtonUp},
+            {Settings::NativeButton::DRight, PadButton::ButtonRight},
+            {Settings::NativeButton::DDown, PadButton::ButtonDown},
+            {Settings::NativeButton::SL, PadButton::TriggerL},
+            {Settings::NativeButton::SR, PadButton::TriggerR},
+            {Settings::NativeButton::R, PadButton::TriggerZ},
         };
     if (!params.Has("port")) {
         return {};
@@ -352,8 +447,10 @@ InputCommon::ButtonMapping Adapter::GetButtonMappingForDevice(
     for (const auto& [switch_button, gcadapter_axis] : switch_to_gcadapter_axis) {
         Common::ParamPackage button_params({{"engine", "gcpad"}});
         button_params.Set("port", params.Get("port", 0));
-        button_params.Set("button", static_cast<int>(PadButton::PAD_STICK));
-        button_params.Set("axis", static_cast<int>(gcadapter_axis));
+        button_params.Set("button", static_cast<s32>(PadButton::Stick));
+        button_params.Set("axis", static_cast<s32>(gcadapter_axis));
+        button_params.Set("threshold", 0.5f);
+        button_params.Set("direction", "+");
         mapping.insert_or_assign(switch_button, std::move(button_params));
     }
     return mapping;
@@ -382,46 +479,33 @@ InputCommon::AnalogMapping Adapter::GetAnalogMappingForDevice(
 }
 
 bool Adapter::DeviceConnected(std::size_t port) const {
-    return adapter_controllers_status[port] != ControllerTypes::None;
-}
-
-void Adapter::ResetDeviceType(std::size_t port) {
-    adapter_controllers_status[port] = ControllerTypes::None;
+    return pads[port].type != ControllerTypes::None;
 }
 
 void Adapter::BeginConfiguration() {
-    get_origin.fill(true);
-    for (auto& pq : pad_queue) {
-        pq.Clear();
-    }
+    pad_queue.Clear();
     configuring = true;
 }
 
 void Adapter::EndConfiguration() {
-    for (auto& pq : pad_queue) {
-        pq.Clear();
-    }
+    pad_queue.Clear();
     configuring = false;
 }
 
-std::array<Common::SPSCQueue<GCPadStatus>, 4>& Adapter::GetPadQueue() {
+Common::SPSCQueue<GCPadStatus>& Adapter::GetPadQueue() {
     return pad_queue;
 }
 
-const std::array<Common::SPSCQueue<GCPadStatus>, 4>& Adapter::GetPadQueue() const {
+const Common::SPSCQueue<GCPadStatus>& Adapter::GetPadQueue() const {
     return pad_queue;
 }
 
-std::array<GCState, 4>& Adapter::GetPadState() {
-    return state;
-}
-
-const std::array<GCState, 4>& Adapter::GetPadState() const {
-    return state;
+GCController& Adapter::GetPadState(std::size_t port) {
+    return pads.at(port);
 }
 
-int Adapter::GetOriginValue(u32 port, u32 axis) const {
-    return origin_status[port].axis_values[axis];
+const GCController& Adapter::GetPadState(std::size_t port) const {
+    return pads.at(port);
 }
 
 } // namespace GCAdapter
diff --git a/src/input_common/gcadapter/gc_adapter.h b/src/input_common/gcadapter/gc_adapter.h
index 4f5f3de8e..d28dcfad3 100644
--- a/src/input_common/gcadapter/gc_adapter.h
+++ b/src/input_common/gcadapter/gc_adapter.h
@@ -19,24 +19,23 @@ struct libusb_device_handle;
 namespace GCAdapter {
 
 enum class PadButton {
-    PAD_BUTTON_LEFT = 0x0001,
-    PAD_BUTTON_RIGHT = 0x0002,
-    PAD_BUTTON_DOWN = 0x0004,
-    PAD_BUTTON_UP = 0x0008,
-    PAD_TRIGGER_Z = 0x0010,
-    PAD_TRIGGER_R = 0x0020,
-    PAD_TRIGGER_L = 0x0040,
-    PAD_BUTTON_A = 0x0100,
-    PAD_BUTTON_B = 0x0200,
-    PAD_BUTTON_X = 0x0400,
-    PAD_BUTTON_Y = 0x0800,
-    PAD_BUTTON_START = 0x1000,
+    Undefined = 0x0000,
+    ButtonLeft = 0x0001,
+    ButtonRight = 0x0002,
+    ButtonDown = 0x0004,
+    ButtonUp = 0x0008,
+    TriggerZ = 0x0010,
+    TriggerR = 0x0020,
+    TriggerL = 0x0040,
+    ButtonA = 0x0100,
+    ButtonB = 0x0200,
+    ButtonX = 0x0400,
+    ButtonY = 0x0800,
+    ButtonStart = 0x1000,
     // Below is for compatibility with "AxisButton" type
-    PAD_STICK = 0x2000,
+    Stick = 0x2000,
 };
 
-extern const std::array<PadButton, 12> PadButtonArray;
-
 enum class PadAxes : u8 {
     StickX,
     StickY,
@@ -47,87 +46,122 @@ enum class PadAxes : u8 {
     Undefined,
 };
 
+enum class ControllerTypes {
+    None,
+    Wired,
+    Wireless,
+};
+
 struct GCPadStatus {
-    u16 button{}; // Or-ed PAD_BUTTON_* and PAD_TRIGGER_* bits
+    std::size_t port{};
 
-    std::array<u8, 6> axis_values{};    // Triggers and sticks, following indices defined in PadAxes
-    static constexpr u8 THRESHOLD = 50; // Threshold for axis press for polling
+    PadButton button{PadButton::Undefined}; // Or-ed PAD_BUTTON_* and PAD_TRIGGER_* bits
 
-    u8 port{};
     PadAxes axis{PadAxes::Undefined};
-    u8 axis_value{255};
+    s16 axis_value{};
+    u8 axis_threshold{50};
 };
 
-struct GCState {
-    std::unordered_map<int, bool> buttons;
-    std::unordered_map<u32, u16> axes;
+struct GCController {
+    ControllerTypes type{};
+    bool enable_vibration{};
+    u8 rumble_amplitude{};
+    u16 buttons{};
+    PadButton last_button{};
+    std::array<s16, 6> axis_values{};
+    std::array<u8, 6> axis_origin{};
 };
 
-enum class ControllerTypes { None, Wired, Wireless };
-
 class Adapter {
 public:
-    /// Initialize the GC Adapter capture and read sequence
     Adapter();
-
-    /// Close the adapter read thread and release the adapter
     ~Adapter();
+
+    /// Request a vibration for a controlelr
+    bool RumblePlay(std::size_t port, f32 amplitude);
+
     /// Used for polling
     void BeginConfiguration();
     void EndConfiguration();
 
+    Common::SPSCQueue<GCPadStatus>& GetPadQueue();
+    const Common::SPSCQueue<GCPadStatus>& GetPadQueue() const;
+
+    GCController& GetPadState(std::size_t port);
+    const GCController& GetPadState(std::size_t port) const;
+
+    /// Returns true if there is a device connected to port
+    bool DeviceConnected(std::size_t port) const;
+
+    /// Used for automapping features
     std::vector<Common::ParamPackage> GetInputDevices() const;
     InputCommon::ButtonMapping GetButtonMappingForDevice(const Common::ParamPackage& params) const;
     InputCommon::AnalogMapping GetAnalogMappingForDevice(const Common::ParamPackage& params) const;
 
-    /// Returns true if there is a device connected to port
-    bool DeviceConnected(std::size_t port) const;
+private:
+    using AdapterPayload = std::array<u8, 37>;
 
-    std::array<Common::SPSCQueue<GCPadStatus>, 4>& GetPadQueue();
-    const std::array<Common::SPSCQueue<GCPadStatus>, 4>& GetPadQueue() const;
+    void UpdatePadType(std::size_t port, ControllerTypes pad_type);
+    void UpdateControllers(const AdapterPayload& adapter_payload);
+    void UpdateYuzuSettings(std::size_t port);
+    void UpdateStateButtons(std::size_t port, u8 b1, u8 b2);
+    void UpdateStateAxes(std::size_t port, const AdapterPayload& adapter_payload);
+    void UpdateVibrations();
 
-    std::array<GCState, 4>& GetPadState();
-    const std::array<GCState, 4>& GetPadState() const;
+    void AdapterInputThread();
 
-    int GetOriginValue(u32 port, u32 axis) const;
+    void AdapterScanThread();
 
-private:
-    GCPadStatus GetPadStatus(std::size_t port, const std::array<u8, 37>& adapter_payload);
+    bool IsPayloadCorrect(const AdapterPayload& adapter_payload, s32 payload_size);
+
+    // Updates vibration state of all controllers
+    void SendVibrations();
+
+    /// For use in initialization, querying devices to find the adapter
+    void Setup();
 
-    void Read();
+    /// Resets status of all GC controller devices to a disconected state
+    void ResetDevices();
 
-    /// Resets status of device connected to port
-    void ResetDeviceType(std::size_t port);
+    /// Resets status of device connected to a disconected state
+    void ResetDevice(std::size_t port);
 
     /// Returns true if we successfully gain access to GC Adapter
-    bool CheckDeviceAccess(libusb_device* device);
+    bool CheckDeviceAccess();
 
-    /// Captures GC Adapter endpoint address,
-    void GetGCEndpoint(libusb_device* device);
+    /// Captures GC Adapter endpoint address
+    /// Returns true if the endpoind was set correctly
+    bool GetGCEndpoint(libusb_device* device);
 
     /// For shutting down, clear all data, join all threads, release usb
     void Reset();
 
-    /// For use in initialization, querying devices to find the adapter
-    void Setup();
+    // Join all threads
+    void JoinThreads();
+
+    // Release usb handles
+    void ClearLibusbHandle();
 
     libusb_device_handle* usb_adapter_handle = nullptr;
+    std::array<GCController, 4> pads;
+    Common::SPSCQueue<GCPadStatus> pad_queue;
 
     std::thread adapter_input_thread;
-    bool adapter_thread_running;
+    std::thread adapter_scan_thread;
+    bool adapter_input_thread_running;
+    bool adapter_scan_thread_running;
+    bool restart_scan_thread;
 
     libusb_context* libusb_ctx;
 
-    u8 input_endpoint = 0;
-    u8 output_endpoint = 0;
-
-    bool configuring = false;
+    u8 input_endpoint{0};
+    u8 output_endpoint{0};
+    u8 input_error_counter{0};
+    u8 output_error_counter{0};
+    int vibration_counter{0};
 
-    std::array<GCState, 4> state;
-    std::array<bool, 4> get_origin;
-    std::array<GCPadStatus, 4> origin_status;
-    std::array<Common::SPSCQueue<GCPadStatus>, 4> pad_queue;
-    std::array<ControllerTypes, 4> adapter_controllers_status{};
+    bool configuring{false};
+    bool rumble_enabled{true};
+    bool vibration_changed{true};
 };
-
 } // namespace GCAdapter
diff --git a/src/input_common/gcadapter/gc_poller.cpp b/src/input_common/gcadapter/gc_poller.cpp
index 893556916..6bd6f57fc 100644
--- a/src/input_common/gcadapter/gc_poller.cpp
+++ b/src/input_common/gcadapter/gc_poller.cpp
@@ -15,22 +15,30 @@ namespace InputCommon {
 
 class GCButton final : public Input::ButtonDevice {
 public:
-    explicit GCButton(u32 port_, int button_, const GCAdapter::Adapter* adapter)
+    explicit GCButton(u32 port_, s32 button_, GCAdapter::Adapter* adapter)
         : port(port_), button(button_), gcadapter(adapter) {}
 
     ~GCButton() override;
 
     bool GetStatus() const override {
         if (gcadapter->DeviceConnected(port)) {
-            return gcadapter->GetPadState()[port].buttons.at(button);
+            return (gcadapter->GetPadState(port).buttons & button) != 0;
         }
         return false;
     }
 
+    bool SetRumblePlay(f32 amp_high, f32 amp_low, f32 freq_high, f32 freq_low) const override {
+        const float amplitude = amp_high + amp_low > 2.0f ? 1.0f : (amp_high + amp_low) * 0.5f;
+        const auto new_amp =
+            static_cast<f32>(pow(amplitude, 0.5f) * (3.0f - 2.0f * pow(amplitude, 0.15f)));
+
+        return gcadapter->RumblePlay(port, new_amp);
+    }
+
 private:
     const u32 port;
-    const int button;
-    const GCAdapter::Adapter* gcadapter;
+    const s32 button;
+    GCAdapter::Adapter* gcadapter;
 };
 
 class GCAxisButton final : public Input::ButtonDevice {
@@ -38,13 +46,12 @@ public:
     explicit GCAxisButton(u32 port_, u32 axis_, float threshold_, bool trigger_if_greater_,
                           const GCAdapter::Adapter* adapter)
         : port(port_), axis(axis_), threshold(threshold_), trigger_if_greater(trigger_if_greater_),
-          gcadapter(adapter),
-          origin_value(static_cast<float>(adapter->GetOriginValue(port_, axis_))) {}
+          gcadapter(adapter) {}
 
     bool GetStatus() const override {
         if (gcadapter->DeviceConnected(port)) {
-            const float current_axis_value = gcadapter->GetPadState()[port].axes.at(axis);
-            const float axis_value = (current_axis_value - origin_value) / 128.0f;
+            const float current_axis_value = gcadapter->GetPadState(port).axis_values.at(axis);
+            const float axis_value = current_axis_value / 128.0f;
             if (trigger_if_greater) {
                 // TODO: Might be worthwile to set a slider for the trigger threshold. It is
                 // currently always set to 0.5 in configure_input_player.cpp ZL/ZR HandleClick
@@ -61,7 +68,6 @@ private:
     float threshold;
     bool trigger_if_greater;
     const GCAdapter::Adapter* gcadapter;
-    const float origin_value;
 };
 
 GCButtonFactory::GCButtonFactory(std::shared_ptr<GCAdapter::Adapter> adapter_)
@@ -73,7 +79,7 @@ std::unique_ptr<Input::ButtonDevice> GCButtonFactory::Create(const Common::Param
     const auto button_id = params.Get("button", 0);
     const auto port = static_cast<u32>(params.Get("port", 0));
 
-    constexpr int PAD_STICK_ID = static_cast<u16>(GCAdapter::PadButton::PAD_STICK);
+    constexpr s32 PAD_STICK_ID = static_cast<s32>(GCAdapter::PadButton::Stick);
 
     // button is not an axis/stick button
     if (button_id != PAD_STICK_ID) {
@@ -106,32 +112,25 @@ Common::ParamPackage GCButtonFactory::GetNextInput() const {
     Common::ParamPackage params;
     GCAdapter::GCPadStatus pad;
     auto& queue = adapter->GetPadQueue();
-    for (std::size_t port = 0; port < queue.size(); ++port) {
-        while (queue[port].Pop(pad)) {
-            // This while loop will break on the earliest detected button
-            params.Set("engine", "gcpad");
-            params.Set("port", static_cast<int>(port));
-            for (const auto& button : GCAdapter::PadButtonArray) {
-                const u16 button_value = static_cast<u16>(button);
-                if (pad.button & button_value) {
-                    params.Set("button", button_value);
-                    break;
-                }
-            }
+    while (queue.Pop(pad)) {
+        // This while loop will break on the earliest detected button
+        params.Set("engine", "gcpad");
+        params.Set("port", static_cast<s32>(pad.port));
+        if (pad.button != GCAdapter::PadButton::Undefined) {
+            params.Set("button", static_cast<u16>(pad.button));
+        }
 
-            // For Axis button implementation
-            if (pad.axis != GCAdapter::PadAxes::Undefined) {
-                params.Set("axis", static_cast<u8>(pad.axis));
-                params.Set("button", static_cast<u16>(GCAdapter::PadButton::PAD_STICK));
-                if (pad.axis_value > 128) {
-                    params.Set("direction", "+");
-                    params.Set("threshold", "0.25");
-                } else {
-                    params.Set("direction", "-");
-                    params.Set("threshold", "-0.25");
-                }
-                break;
+        // For Axis button implementation
+        if (pad.axis != GCAdapter::PadAxes::Undefined) {
+            params.Set("axis", static_cast<u8>(pad.axis));
+            params.Set("button", static_cast<u16>(GCAdapter::PadButton::Stick));
+            params.Set("threshold", "0.25");
+            if (pad.axis_value > 0) {
+                params.Set("direction", "+");
+            } else {
+                params.Set("direction", "-");
             }
+            break;
         }
     }
     return params;
@@ -152,17 +151,14 @@ public:
     explicit GCAnalog(u32 port_, u32 axis_x_, u32 axis_y_, float deadzone_,
                       const GCAdapter::Adapter* adapter, float range_)
         : port(port_), axis_x(axis_x_), axis_y(axis_y_), deadzone(deadzone_), gcadapter(adapter),
-          origin_value_x(static_cast<float>(adapter->GetOriginValue(port_, axis_x_))),
-          origin_value_y(static_cast<float>(adapter->GetOriginValue(port_, axis_y_))),
           range(range_) {}
 
     float GetAxis(u32 axis) const {
         if (gcadapter->DeviceConnected(port)) {
             std::lock_guard lock{mutex};
-            const auto origin_value = axis % 2 == 0 ? origin_value_x : origin_value_y;
             const auto axis_value =
-                static_cast<float>(gcadapter->GetPadState()[port].axes.at(axis));
-            return (axis_value - origin_value) / (100.0f * range);
+                static_cast<float>(gcadapter->GetPadState(port).axis_values.at(axis));
+            return (axis_value) / (100.0f * range);
         }
         return 0.0f;
     }
@@ -215,8 +211,6 @@ private:
     const u32 axis_y;
     const float deadzone;
     const GCAdapter::Adapter* gcadapter;
-    const float origin_value_x;
-    const float origin_value_y;
     const float range;
     mutable std::mutex mutex;
 };
@@ -254,26 +248,44 @@ void GCAnalogFactory::EndConfiguration() {
 
 Common::ParamPackage GCAnalogFactory::GetNextInput() {
     GCAdapter::GCPadStatus pad;
+    Common::ParamPackage params;
     auto& queue = adapter->GetPadQueue();
-    for (std::size_t port = 0; port < queue.size(); ++port) {
-        while (queue[port].Pop(pad)) {
-            if (pad.axis == GCAdapter::PadAxes::Undefined ||
-                std::abs((static_cast<float>(pad.axis_value) - 128.0f) / 128.0f) < 0.1f) {
-                continue;
-            }
-            // An analog device needs two axes, so we need to store the axis for later and wait for
-            // a second input event. The axes also must be from the same joystick.
-            const u8 axis = static_cast<u8>(pad.axis);
-            if (analog_x_axis == -1) {
-                analog_x_axis = axis;
-                controller_number = static_cast<int>(port);
-            } else if (analog_y_axis == -1 && analog_x_axis != axis &&
-                       controller_number == static_cast<int>(port)) {
-                analog_y_axis = axis;
-            }
+    while (queue.Pop(pad)) {
+        if (pad.button != GCAdapter::PadButton::Undefined) {
+            params.Set("engine", "gcpad");
+            params.Set("port", static_cast<s32>(pad.port));
+            params.Set("button", static_cast<u16>(pad.button));
+            return params;
+        }
+        if (pad.axis == GCAdapter::PadAxes::Undefined ||
+            std::abs(static_cast<float>(pad.axis_value) / 128.0f) < 0.1f) {
+            continue;
+        }
+        // An analog device needs two axes, so we need to store the axis for later and wait for
+        // a second input event. The axes also must be from the same joystick.
+        const u8 axis = static_cast<u8>(pad.axis);
+        if (axis == 0 || axis == 1) {
+            analog_x_axis = 0;
+            analog_y_axis = 1;
+            controller_number = static_cast<s32>(pad.port);
+            break;
+        }
+        if (axis == 2 || axis == 3) {
+            analog_x_axis = 2;
+            analog_y_axis = 3;
+            controller_number = static_cast<s32>(pad.port);
+            break;
+        }
+
+        if (analog_x_axis == -1) {
+            analog_x_axis = axis;
+            controller_number = static_cast<s32>(pad.port);
+        } else if (analog_y_axis == -1 && analog_x_axis != axis &&
+                   controller_number == static_cast<s32>(pad.port)) {
+            analog_y_axis = axis;
+            break;
         }
     }
-    Common::ParamPackage params;
     if (analog_x_axis != -1 && analog_y_axis != -1) {
         params.Set("engine", "gcpad");
         params.Set("port", controller_number);
diff --git a/src/video_core/command_classes/codecs/codec.cpp b/src/video_core/command_classes/codecs/codec.cpp
index 2df410be8..1adf3cd13 100644
--- a/src/video_core/command_classes/codecs/codec.cpp
+++ b/src/video_core/command_classes/codecs/codec.cpp
@@ -4,6 +4,7 @@
 
 #include <cstring>
 #include <fstream>
+#include <vector>
 #include "common/assert.h"
 #include "video_core/command_classes/codecs/codec.h"
 #include "video_core/command_classes/codecs/h264.h"
diff --git a/src/video_core/command_classes/codecs/codec.h b/src/video_core/command_classes/codecs/codec.h
index 2e56daf29..5bbe6a332 100644
--- a/src/video_core/command_classes/codecs/codec.h
+++ b/src/video_core/command_classes/codecs/codec.h
@@ -5,8 +5,6 @@
 #pragma once
 
 #include <memory>
-#include <vector>
-#include "common/common_funcs.h"
 #include "common/common_types.h"
 #include "video_core/command_classes/nvdec_common.h"
 
@@ -44,11 +42,11 @@ public:
     void Decode();
 
     /// Returns most recently decoded frame
-    AVFrame* GetCurrentFrame();
-    const AVFrame* GetCurrentFrame() const;
+    [[nodiscard]] AVFrame* GetCurrentFrame();
+    [[nodiscard]] const AVFrame* GetCurrentFrame() const;
 
     /// Returns the value of current_codec
-    NvdecCommon::VideoCodec GetCurrentCodec() const;
+    [[nodiscard]] NvdecCommon::VideoCodec GetCurrentCodec() const;
 
 private:
     bool initialized{};
diff --git a/src/video_core/command_classes/codecs/h264.cpp b/src/video_core/command_classes/codecs/h264.cpp
index 1a39f7b23..33e063e20 100644
--- a/src/video_core/command_classes/codecs/h264.cpp
+++ b/src/video_core/command_classes/codecs/h264.cpp
@@ -18,17 +18,33 @@
 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 //
 
+#include <array>
 #include "common/bit_util.h"
 #include "video_core/command_classes/codecs/h264.h"
 #include "video_core/gpu.h"
 #include "video_core/memory_manager.h"
 
 namespace Tegra::Decoder {
+namespace {
+// ZigZag LUTs from libavcodec.
+constexpr std::array<u8, 64> zig_zag_direct{
+    0,  1,  8,  16, 9,  2,  3,  10, 17, 24, 32, 25, 18, 11, 4,  5,  12, 19, 26, 33, 40, 48,
+    41, 34, 27, 20, 13, 6,  7,  14, 21, 28, 35, 42, 49, 56, 57, 50, 43, 36, 29, 22, 15, 23,
+    30, 37, 44, 51, 58, 59, 52, 45, 38, 31, 39, 46, 53, 60, 61, 54, 47, 55, 62, 63,
+};
+
+constexpr std::array<u8, 16> zig_zag_scan{
+    0 + 0 * 4, 1 + 0 * 4, 0 + 1 * 4, 0 + 2 * 4, 1 + 1 * 4, 2 + 0 * 4, 3 + 0 * 4, 2 + 1 * 4,
+    1 + 2 * 4, 0 + 3 * 4, 1 + 3 * 4, 2 + 2 * 4, 3 + 1 * 4, 3 + 2 * 4, 2 + 3 * 4, 3 + 3 * 4,
+};
+} // Anonymous namespace
+
 H264::H264(GPU& gpu_) : gpu(gpu_) {}
 
 H264::~H264() = default;
 
-std::vector<u8>& H264::ComposeFrameHeader(NvdecCommon::NvdecRegisters& state, bool is_first_frame) {
+const std::vector<u8>& H264::ComposeFrameHeader(NvdecCommon::NvdecRegisters& state,
+                                                bool is_first_frame) {
     H264DecoderContext context{};
     gpu.MemoryManager().ReadBlock(state.picture_info_offset, &context, sizeof(H264DecoderContext));
 
@@ -48,7 +64,8 @@ std::vector<u8>& H264::ComposeFrameHeader(NvdecCommon::NvdecRegisters& state, bo
         writer.WriteU(0, 8);
         writer.WriteU(31, 8);
         writer.WriteUe(0);
-        const s32 chroma_format_idc = (context.h264_parameter_set.flags >> 12) & 0x3;
+        const auto chroma_format_idc =
+            static_cast<u32>((context.h264_parameter_set.flags >> 12) & 3);
         writer.WriteUe(chroma_format_idc);
         if (chroma_format_idc == 3) {
             writer.WriteBit(false);
@@ -59,8 +76,8 @@ std::vector<u8>& H264::ComposeFrameHeader(NvdecCommon::NvdecRegisters& state, bo
         writer.WriteBit(false); // QpprimeYZeroTransformBypassFlag
         writer.WriteBit(false); // Scaling matrix present flag
 
-        const s32 order_cnt_type = static_cast<s32>((context.h264_parameter_set.flags >> 14) & 3);
-        writer.WriteUe(static_cast<s32>((context.h264_parameter_set.flags >> 8) & 0xf));
+        const auto order_cnt_type = static_cast<u32>((context.h264_parameter_set.flags >> 14) & 3);
+        writer.WriteUe(static_cast<u32>((context.h264_parameter_set.flags >> 8) & 0xf));
         writer.WriteUe(order_cnt_type);
         if (order_cnt_type == 0) {
             writer.WriteUe(context.h264_parameter_set.log2_max_pic_order_cnt);
@@ -100,7 +117,7 @@ std::vector<u8>& H264::ComposeFrameHeader(NvdecCommon::NvdecRegisters& state, bo
         writer.WriteUe(0);
         writer.WriteUe(0);
 
-        writer.WriteBit(context.h264_parameter_set.entropy_coding_mode_flag);
+        writer.WriteBit(context.h264_parameter_set.entropy_coding_mode_flag != 0);
         writer.WriteBit(false);
         writer.WriteUe(0);
         writer.WriteUe(context.h264_parameter_set.num_refidx_l0_default_active);
@@ -172,8 +189,8 @@ void H264BitWriter::WriteSe(s32 value) {
     WriteExpGolombCodedInt(value);
 }
 
-void H264BitWriter::WriteUe(s32 value) {
-    WriteExpGolombCodedUInt((u32)value);
+void H264BitWriter::WriteUe(u32 value) {
+    WriteExpGolombCodedUInt(value);
 }
 
 void H264BitWriter::End() {
diff --git a/src/video_core/command_classes/codecs/h264.h b/src/video_core/command_classes/codecs/h264.h
index 21752dd90..273449495 100644
--- a/src/video_core/command_classes/codecs/h264.h
+++ b/src/video_core/command_classes/codecs/h264.h
@@ -38,7 +38,7 @@ public:
     /// WriteSe and WriteUe write in the Exp-Golomb-coded syntax
     void WriteU(s32 value, s32 value_sz);
     void WriteSe(s32 value);
-    void WriteUe(s32 value);
+    void WriteUe(u32 value);
 
     /// Finalize the bitstream
     void End();
@@ -51,26 +51,14 @@ public:
     void WriteScalingList(const std::vector<u8>& list, s32 start, s32 count);
 
     /// Return the bitstream as a vector.
-    std::vector<u8>& GetByteArray();
-    const std::vector<u8>& GetByteArray() const;
+    [[nodiscard]] std::vector<u8>& GetByteArray();
+    [[nodiscard]] const std::vector<u8>& GetByteArray() const;
 
 private:
-    // ZigZag LUTs from libavcodec.
-    static constexpr std::array<u8, 64> zig_zag_direct{
-        0,  1,  8,  16, 9,  2,  3,  10, 17, 24, 32, 25, 18, 11, 4,  5,  12, 19, 26, 33, 40, 48,
-        41, 34, 27, 20, 13, 6,  7,  14, 21, 28, 35, 42, 49, 56, 57, 50, 43, 36, 29, 22, 15, 23,
-        30, 37, 44, 51, 58, 59, 52, 45, 38, 31, 39, 46, 53, 60, 61, 54, 47, 55, 62, 63,
-    };
-
-    static constexpr std::array<u8, 16> zig_zag_scan{
-        0 + 0 * 4, 1 + 0 * 4, 0 + 1 * 4, 0 + 2 * 4, 1 + 1 * 4, 2 + 0 * 4, 3 + 0 * 4, 2 + 1 * 4,
-        1 + 2 * 4, 0 + 3 * 4, 1 + 3 * 4, 2 + 2 * 4, 3 + 1 * 4, 3 + 2 * 4, 2 + 3 * 4, 3 + 3 * 4,
-    };
-
     void WriteBits(s32 value, s32 bit_count);
     void WriteExpGolombCodedInt(s32 value);
     void WriteExpGolombCodedUInt(u32 value);
-    s32 GetFreeBufferBits();
+    [[nodiscard]] s32 GetFreeBufferBits();
     void Flush();
 
     s32 buffer_size{8};
@@ -86,8 +74,8 @@ public:
     ~H264();
 
     /// Compose the H264 header of the frame for FFmpeg decoding
-    std::vector<u8>& ComposeFrameHeader(NvdecCommon::NvdecRegisters& state,
-                                        bool is_first_frame = false);
+    [[nodiscard]] const std::vector<u8>& ComposeFrameHeader(NvdecCommon::NvdecRegisters& state,
+                                                            bool is_first_frame = false);
 
 private:
     struct H264ParameterSet {
diff --git a/src/video_core/command_classes/codecs/vp9.cpp b/src/video_core/command_classes/codecs/vp9.cpp
index d205a8f5d..ab44fdc9e 100644
--- a/src/video_core/command_classes/codecs/vp9.cpp
+++ b/src/video_core/command_classes/codecs/vp9.cpp
@@ -9,7 +9,7 @@
 #include "video_core/memory_manager.h"
 
 namespace Tegra::Decoder {
-
+namespace {
 // Default compressed header probabilities once frame context resets
 constexpr Vp9EntropyProbs default_probs{
     .y_mode_prob{
@@ -170,6 +170,89 @@ constexpr Vp9EntropyProbs default_probs{
     .high_precision{128, 128},
 };
 
+constexpr std::array<s32, 256> norm_lut{
+    0, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+};
+
+constexpr std::array<s32, 254> map_lut{
+    20,  21,  22,  23,  24,  25,  0,   26,  27,  28,  29,  30,  31,  32,  33,  34,  35,  36,  37,
+    1,   38,  39,  40,  41,  42,  43,  44,  45,  46,  47,  48,  49,  2,   50,  51,  52,  53,  54,
+    55,  56,  57,  58,  59,  60,  61,  3,   62,  63,  64,  65,  66,  67,  68,  69,  70,  71,  72,
+    73,  4,   74,  75,  76,  77,  78,  79,  80,  81,  82,  83,  84,  85,  5,   86,  87,  88,  89,
+    90,  91,  92,  93,  94,  95,  96,  97,  6,   98,  99,  100, 101, 102, 103, 104, 105, 106, 107,
+    108, 109, 7,   110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 8,   122, 123, 124,
+    125, 126, 127, 128, 129, 130, 131, 132, 133, 9,   134, 135, 136, 137, 138, 139, 140, 141, 142,
+    143, 144, 145, 10,  146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 11,  158, 159,
+    160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 12,  170, 171, 172, 173, 174, 175, 176, 177,
+    178, 179, 180, 181, 13,  182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 14,  194,
+    195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 15,  206, 207, 208, 209, 210, 211, 212,
+    213, 214, 215, 216, 217, 16,  218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 17,
+    230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 18,  242, 243, 244, 245, 246, 247,
+    248, 249, 250, 251, 252, 253, 19,
+};
+
+// 6.2.14 Tile size calculation
+
+[[nodiscard]] s32 CalcMinLog2TileCols(s32 frame_width) {
+    const s32 sb64_cols = (frame_width + 63) / 64;
+    s32 min_log2 = 0;
+
+    while ((64 << min_log2) < sb64_cols) {
+        min_log2++;
+    }
+
+    return min_log2;
+}
+
+[[nodiscard]] s32 CalcMaxLog2TileCols(s32 frame_width) {
+    const s32 sb64_cols = (frame_width + 63) / 64;
+    s32 max_log2 = 1;
+
+    while ((sb64_cols >> max_log2) >= 4) {
+        max_log2++;
+    }
+
+    return max_log2 - 1;
+}
+
+// Recenters probability. Based on section 6.3.6 of VP9 Specification
+[[nodiscard]] s32 RecenterNonNeg(s32 new_prob, s32 old_prob) {
+    if (new_prob > old_prob * 2) {
+        return new_prob;
+    }
+
+    if (new_prob >= old_prob) {
+        return (new_prob - old_prob) * 2;
+    }
+
+    return (old_prob - new_prob) * 2 - 1;
+}
+
+// Adjusts old_prob depending on new_prob. Based on section 6.3.5 of VP9 Specification
+[[nodiscard]] s32 RemapProbability(s32 new_prob, s32 old_prob) {
+    new_prob--;
+    old_prob--;
+
+    std::size_t index{};
+
+    if (old_prob * 2 <= 0xff) {
+        index = static_cast<std::size_t>(std::max(0, RecenterNonNeg(new_prob, old_prob) - 1));
+    } else {
+        index = static_cast<std::size_t>(
+            std::max(0, RecenterNonNeg(0xff - 1 - new_prob, 0xff - 1 - old_prob) - 1));
+    }
+
+    return map_lut[index];
+}
+} // Anonymous namespace
+
 VP9::VP9(GPU& gpu) : gpu(gpu) {}
 
 VP9::~VP9() = default;
@@ -207,32 +290,6 @@ void VP9::WriteProbabilityDelta(VpxRangeEncoder& writer, u8 new_prob, u8 old_pro
     EncodeTermSubExp(writer, delta);
 }
 
-s32 VP9::RemapProbability(s32 new_prob, s32 old_prob) {
-    new_prob--;
-    old_prob--;
-
-    std::size_t index{};
-
-    if (old_prob * 2 <= 0xff) {
-        index = static_cast<std::size_t>(std::max(0, RecenterNonNeg(new_prob, old_prob) - 1));
-    } else {
-        index = static_cast<std::size_t>(
-            std::max(0, RecenterNonNeg(0xff - 1 - new_prob, 0xff - 1 - old_prob) - 1));
-    }
-
-    return map_lut[index];
-}
-
-s32 VP9::RecenterNonNeg(s32 new_prob, s32 old_prob) {
-    if (new_prob > old_prob * 2) {
-        return new_prob;
-    } else if (new_prob >= old_prob) {
-        return (new_prob - old_prob) * 2;
-    } else {
-        return (old_prob - new_prob) * 2 - 1;
-    }
-}
-
 void VP9::EncodeTermSubExp(VpxRangeEncoder& writer, s32 value) {
     if (WriteLessThan(writer, value, 16)) {
         writer.Write(value, 4);
@@ -332,28 +389,6 @@ void VP9::WriteMvProbabilityUpdate(VpxRangeEncoder& writer, u8 new_prob, u8 old_
     }
 }
 
-s32 VP9::CalcMinLog2TileCols(s32 frame_width) {
-    const s32 sb64_cols = (frame_width + 63) / 64;
-    s32 min_log2 = 0;
-
-    while ((64 << min_log2) < sb64_cols) {
-        min_log2++;
-    }
-
-    return min_log2;
-}
-
-s32 VP9::CalcMaxLog2TileCols(s32 frameWidth) {
-    const s32 sb64_cols = (frameWidth + 63) / 64;
-    s32 max_log2 = 1;
-
-    while ((sb64_cols >> max_log2) >= 4) {
-        max_log2++;
-    }
-
-    return max_log2 - 1;
-}
-
 Vp9PictureInfo VP9::GetVp9PictureInfo(const NvdecCommon::NvdecRegisters& state) {
     PictureInfo picture_info{};
     gpu.MemoryManager().ReadBlock(state.picture_info_offset, &picture_info, sizeof(PictureInfo));
@@ -379,14 +414,14 @@ Vp9FrameContainer VP9::GetCurrentFrame(const NvdecCommon::NvdecRegisters& state)
     Vp9FrameContainer frame{};
     {
         gpu.SyncGuestHost();
-        frame.info = std::move(GetVp9PictureInfo(state));
+        frame.info = GetVp9PictureInfo(state);
 
         frame.bit_stream.resize(frame.info.bitstream_size);
         gpu.MemoryManager().ReadBlock(state.frame_bitstream_offset, frame.bit_stream.data(),
                                       frame.info.bitstream_size);
     }
     // Buffer two frames, saving the last show frame info
-    if (next_next_frame.bit_stream.size() != 0) {
+    if (!next_next_frame.bit_stream.empty()) {
         Vp9FrameContainer temp{
             .info = frame.info,
             .bit_stream = frame.bit_stream,
@@ -396,15 +431,15 @@ Vp9FrameContainer VP9::GetCurrentFrame(const NvdecCommon::NvdecRegisters& state)
         frame.bit_stream = next_next_frame.bit_stream;
         next_next_frame = std::move(temp);
 
-        if (next_frame.bit_stream.size() != 0) {
-            Vp9FrameContainer temp{
+        if (!next_frame.bit_stream.empty()) {
+            Vp9FrameContainer temp2{
                 .info = frame.info,
                 .bit_stream = frame.bit_stream,
             };
             next_frame.info.show_frame = frame.info.last_frame_shown;
             frame.info = next_frame.info;
             frame.bit_stream = next_frame.bit_stream;
-            next_frame = std::move(temp);
+            next_frame = std::move(temp2);
         } else {
             next_frame.info = frame.info;
             next_frame.bit_stream = frame.bit_stream;
@@ -605,12 +640,6 @@ std::vector<u8> VP9::ComposeCompressedHeader() {
 
     writer.End();
     return writer.GetBuffer();
-
-    const auto writer_bytearray = writer.GetBuffer();
-
-    std::vector<u8> compressed_header(writer_bytearray.size());
-    std::memcpy(compressed_header.data(), writer_bytearray.data(), writer_bytearray.size());
-    return compressed_header;
 }
 
 VpxBitStreamWriter VP9::ComposeUncompressedHeader() {
@@ -648,7 +677,6 @@ VpxBitStreamWriter VP9::ComposeUncompressedHeader() {
         current_frame_info.intra_only = true;
 
     } else {
-        std::array<s32, 3> ref_frame_index;
 
         if (!current_frame_info.show_frame) {
             uncomp_writer.WriteBit(current_frame_info.intra_only);
@@ -663,9 +691,9 @@ VpxBitStreamWriter VP9::ComposeUncompressedHeader() {
         }
 
         // Last, Golden, Altref frames
-        ref_frame_index = std::array<s32, 3>{0, 1, 2};
+        std::array<s32, 3> ref_frame_index{0, 1, 2};
 
-        // set when next frame is hidden
+        // Set when next frame is hidden
         // altref and golden references are swapped
         if (swap_next_golden) {
             ref_frame_index = std::array<s32, 3>{0, 2, 1};
@@ -754,17 +782,19 @@ VpxBitStreamWriter VP9::ComposeUncompressedHeader() {
         for (std::size_t index = 0; index < current_frame_info.ref_deltas.size(); index++) {
             const s8 old_deltas = loop_filter_ref_deltas[index];
             const s8 new_deltas = current_frame_info.ref_deltas[index];
+            const bool differing_delta = old_deltas != new_deltas;
 
-            loop_filter_delta_update |=
-                (update_loop_filter_ref_deltas[index] = old_deltas != new_deltas);
+            update_loop_filter_ref_deltas[index] = differing_delta;
+            loop_filter_delta_update |= differing_delta;
         }
 
         for (std::size_t index = 0; index < current_frame_info.mode_deltas.size(); index++) {
             const s8 old_deltas = loop_filter_mode_deltas[index];
             const s8 new_deltas = current_frame_info.mode_deltas[index];
+            const bool differing_delta = old_deltas != new_deltas;
 
-            loop_filter_delta_update |=
-                (update_loop_filter_mode_deltas[index] = old_deltas != new_deltas);
+            update_loop_filter_mode_deltas[index] = differing_delta;
+            loop_filter_delta_update |= differing_delta;
         }
 
         uncomp_writer.WriteBit(loop_filter_delta_update);
@@ -824,12 +854,12 @@ VpxBitStreamWriter VP9::ComposeUncompressedHeader() {
     return uncomp_writer;
 }
 
-std::vector<u8>& VP9::ComposeFrameHeader(NvdecCommon::NvdecRegisters& state) {
+const std::vector<u8>& VP9::ComposeFrameHeader(NvdecCommon::NvdecRegisters& state) {
     std::vector<u8> bitstream;
     {
         Vp9FrameContainer curr_frame = GetCurrentFrame(state);
         current_frame_info = curr_frame.info;
-        bitstream = curr_frame.bit_stream;
+        bitstream = std::move(curr_frame.bit_stream);
     }
 
     // The uncompressed header routine sets PrevProb parameters needed for the compressed header
diff --git a/src/video_core/command_classes/codecs/vp9.h b/src/video_core/command_classes/codecs/vp9.h
index 748e11bae..e2504512c 100644
--- a/src/video_core/command_classes/codecs/vp9.h
+++ b/src/video_core/command_classes/codecs/vp9.h
@@ -4,9 +4,9 @@
 
 #pragma once
 
-#include <unordered_map>
+#include <array>
 #include <vector>
-#include "common/common_funcs.h"
+
 #include "common/common_types.h"
 #include "common/stream.h"
 #include "video_core/command_classes/codecs/vp9_types.h"
@@ -25,6 +25,12 @@ public:
     VpxRangeEncoder();
     ~VpxRangeEncoder();
 
+    VpxRangeEncoder(const VpxRangeEncoder&) = delete;
+    VpxRangeEncoder& operator=(const VpxRangeEncoder&) = delete;
+
+    VpxRangeEncoder(VpxRangeEncoder&&) = default;
+    VpxRangeEncoder& operator=(VpxRangeEncoder&&) = default;
+
     /// Writes the rightmost value_size bits from value into the stream
     void Write(s32 value, s32 value_size);
 
@@ -37,11 +43,11 @@ public:
     /// Signal the end of the bitstream
     void End();
 
-    std::vector<u8>& GetBuffer() {
+    [[nodiscard]] std::vector<u8>& GetBuffer() {
         return base_stream.GetBuffer();
     }
 
-    const std::vector<u8>& GetBuffer() const {
+    [[nodiscard]] const std::vector<u8>& GetBuffer() const {
         return base_stream.GetBuffer();
     }
 
@@ -52,17 +58,6 @@ private:
     u32 range{0xff};
     s32 count{-24};
     s32 half_probability{128};
-    static constexpr std::array<s32, 256> norm_lut{
-        0, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
-        3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
-        2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    };
 };
 
 class VpxBitStreamWriter {
@@ -70,6 +65,12 @@ public:
     VpxBitStreamWriter();
     ~VpxBitStreamWriter();
 
+    VpxBitStreamWriter(const VpxBitStreamWriter&) = delete;
+    VpxBitStreamWriter& operator=(const VpxBitStreamWriter&) = delete;
+
+    VpxBitStreamWriter(VpxBitStreamWriter&&) = default;
+    VpxBitStreamWriter& operator=(VpxBitStreamWriter&&) = default;
+
     /// Write an unsigned integer value
     void WriteU(u32 value, u32 value_size);
 
@@ -86,10 +87,10 @@ public:
     void Flush();
 
     /// Returns byte_array
-    std::vector<u8>& GetByteArray();
+    [[nodiscard]] std::vector<u8>& GetByteArray();
 
     /// Returns const byte_array
-    const std::vector<u8>& GetByteArray() const;
+    [[nodiscard]] const std::vector<u8>& GetByteArray() const;
 
 private:
     /// Write bit_count bits from value into buffer
@@ -110,12 +111,18 @@ public:
     explicit VP9(GPU& gpu);
     ~VP9();
 
+    VP9(const VP9&) = delete;
+    VP9& operator=(const VP9&) = delete;
+
+    VP9(VP9&&) = default;
+    VP9& operator=(VP9&&) = delete;
+
     /// Composes the VP9 frame from the GPU state information. Based on the official VP9 spec
     /// documentation
-    std::vector<u8>& ComposeFrameHeader(NvdecCommon::NvdecRegisters& state);
+    [[nodiscard]] const std::vector<u8>& ComposeFrameHeader(NvdecCommon::NvdecRegisters& state);
 
     /// Returns true if the most recent frame was a hidden frame.
-    bool WasFrameHidden() const {
+    [[nodiscard]] bool WasFrameHidden() const {
         return hidden;
     }
 
@@ -132,12 +139,6 @@ private:
     /// Generates compressed header probability deltas in the bitstream writer
     void WriteProbabilityDelta(VpxRangeEncoder& writer, u8 new_prob, u8 old_prob);
 
-    /// Adjusts old_prob depending on new_prob. Based on section 6.3.5 of VP9 Specification
-    s32 RemapProbability(s32 new_prob, s32 old_prob);
-
-    /// Recenters probability. Based on section 6.3.6 of VP9 Specification
-    s32 RecenterNonNeg(s32 new_prob, s32 old_prob);
-
     /// Inverse of 6.3.4 Decode term subexp
     void EncodeTermSubExp(VpxRangeEncoder& writer, s32 value);
 
@@ -157,22 +158,18 @@ private:
     /// Write motion vector probability updates. 6.3.17 in the spec
     void WriteMvProbabilityUpdate(VpxRangeEncoder& writer, u8 new_prob, u8 old_prob);
 
-    /// 6.2.14 Tile size calculation
-    s32 CalcMinLog2TileCols(s32 frame_width);
-    s32 CalcMaxLog2TileCols(s32 frame_width);
-
     /// Returns VP9 information from NVDEC provided offset and size
-    Vp9PictureInfo GetVp9PictureInfo(const NvdecCommon::NvdecRegisters& state);
+    [[nodiscard]] Vp9PictureInfo GetVp9PictureInfo(const NvdecCommon::NvdecRegisters& state);
 
     /// Read and convert NVDEC provided entropy probs to Vp9EntropyProbs struct
     void InsertEntropy(u64 offset, Vp9EntropyProbs& dst);
 
     /// Returns frame to be decoded after buffering
-    Vp9FrameContainer GetCurrentFrame(const NvdecCommon::NvdecRegisters& state);
+    [[nodiscard]] Vp9FrameContainer GetCurrentFrame(const NvdecCommon::NvdecRegisters& state);
 
     /// Use NVDEC providied information to compose the headers for the current frame
-    std::vector<u8> ComposeCompressedHeader();
-    VpxBitStreamWriter ComposeUncompressedHeader();
+    [[nodiscard]] std::vector<u8> ComposeCompressedHeader();
+    [[nodiscard]] VpxBitStreamWriter ComposeUncompressedHeader();
 
     GPU& gpu;
     std::vector<u8> frame;
@@ -180,7 +177,7 @@ private:
     std::array<s8, 4> loop_filter_ref_deltas{};
     std::array<s8, 2> loop_filter_mode_deltas{};
 
-    bool hidden;
+    bool hidden = false;
     s64 current_frame_number = -2; // since we buffer 2 frames
     s32 grace_period = 6;          // frame offsets need to stabilize
     std::array<FrameContexts, 4> frame_ctxs{};
@@ -193,23 +190,6 @@ private:
 
     s32 diff_update_probability = 252;
     s32 frame_sync_code = 0x498342;
-    static constexpr std::array<s32, 254> map_lut = {
-        20,  21,  22,  23,  24,  25,  0,   26,  27,  28,  29,  30,  31,  32,  33,  34,  35,
-        36,  37,  1,   38,  39,  40,  41,  42,  43,  44,  45,  46,  47,  48,  49,  2,   50,
-        51,  52,  53,  54,  55,  56,  57,  58,  59,  60,  61,  3,   62,  63,  64,  65,  66,
-        67,  68,  69,  70,  71,  72,  73,  4,   74,  75,  76,  77,  78,  79,  80,  81,  82,
-        83,  84,  85,  5,   86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96,  97,  6,
-        98,  99,  100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 7,   110, 111, 112, 113,
-        114, 115, 116, 117, 118, 119, 120, 121, 8,   122, 123, 124, 125, 126, 127, 128, 129,
-        130, 131, 132, 133, 9,   134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145,
-        10,  146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 11,  158, 159, 160,
-        161, 162, 163, 164, 165, 166, 167, 168, 169, 12,  170, 171, 172, 173, 174, 175, 176,
-        177, 178, 179, 180, 181, 13,  182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192,
-        193, 14,  194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 15,  206, 207,
-        208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 16,  218, 219, 220, 221, 222, 223,
-        224, 225, 226, 227, 228, 229, 17,  230, 231, 232, 233, 234, 235, 236, 237, 238, 239,
-        240, 241, 18,  242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 19,
-    };
 };
 
 } // namespace Decoder
diff --git a/src/video_core/command_classes/codecs/vp9_types.h b/src/video_core/command_classes/codecs/vp9_types.h
index 8688fdac0..4f0b05d22 100644
--- a/src/video_core/command_classes/codecs/vp9_types.h
+++ b/src/video_core/command_classes/codecs/vp9_types.h
@@ -4,13 +4,11 @@
 
 #pragma once
 
-#include <algorithm>
-#include <list>
+#include <array>
+#include <cstring>
 #include <vector>
-#include "common/cityhash.h"
 #include "common/common_funcs.h"
 #include "common/common_types.h"
-#include "video_core/command_classes/nvdec_common.h"
 
 namespace Tegra {
 class GPU;
@@ -233,9 +231,8 @@ struct PictureInfo {
     u32 surface_params{};
     INSERT_PADDING_WORDS(3);
 
-    Vp9PictureInfo Convert() const {
-
-        return Vp9PictureInfo{
+    [[nodiscard]] Vp9PictureInfo Convert() const {
+        return {
             .is_key_frame = (vp9_flags & FrameFlags::IsKeyFrame) != 0,
             .intra_only = (vp9_flags & FrameFlags::IntraOnly) != 0,
             .last_frame_was_key = (vp9_flags & FrameFlags::LastFrameIsKeyFrame) != 0,
diff --git a/src/video_core/command_classes/host1x.cpp b/src/video_core/command_classes/host1x.cpp
index a5234ee47..c4dd4881a 100644
--- a/src/video_core/command_classes/host1x.cpp
+++ b/src/video_core/command_classes/host1x.cpp
@@ -15,7 +15,7 @@ void Tegra::Host1x::StateWrite(u32 offset, u32 arguments) {
     std::memcpy(state_offset, &arguments, sizeof(u32));
 }
 
-void Tegra::Host1x::ProcessMethod(Host1x::Method method, const std::vector<u32>& arguments) {
+void Tegra::Host1x::ProcessMethod(Method method, const std::vector<u32>& arguments) {
     StateWrite(static_cast<u32>(method), arguments[0]);
     switch (method) {
     case Method::WaitSyncpt:
diff --git a/src/video_core/command_classes/host1x.h b/src/video_core/command_classes/host1x.h
index 501a5ed2e..013eaa0c1 100644
--- a/src/video_core/command_classes/host1x.h
+++ b/src/video_core/command_classes/host1x.h
@@ -61,7 +61,7 @@ public:
     ~Host1x();
 
     /// Writes the method into the state, Invoke Execute() if encountered
-    void ProcessMethod(Host1x::Method method, const std::vector<u32>& arguments);
+    void ProcessMethod(Method method, const std::vector<u32>& arguments);
 
 private:
     /// For Host1x, execute is waiting on a syncpoint previously written into the state
diff --git a/src/video_core/command_classes/nvdec.cpp b/src/video_core/command_classes/nvdec.cpp
index ede9466eb..8ca7a7b06 100644
--- a/src/video_core/command_classes/nvdec.cpp
+++ b/src/video_core/command_classes/nvdec.cpp
@@ -2,13 +2,9 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
-#include <bitset>
 #include "common/assert.h"
-#include "common/bit_util.h"
-#include "core/memory.h"
 #include "video_core/command_classes/nvdec.h"
 #include "video_core/gpu.h"
-#include "video_core/memory_manager.h"
 
 namespace Tegra {
 
@@ -16,7 +12,7 @@ Nvdec::Nvdec(GPU& gpu_) : gpu(gpu_), codec(std::make_unique<Codec>(gpu)) {}
 
 Nvdec::~Nvdec() = default;
 
-void Nvdec::ProcessMethod(Nvdec::Method method, const std::vector<u32>& arguments) {
+void Nvdec::ProcessMethod(Method method, const std::vector<u32>& arguments) {
     if (method == Method::SetVideoCodec) {
         codec->StateWrite(static_cast<u32>(method), arguments[0]);
     } else {
diff --git a/src/video_core/command_classes/nvdec.h b/src/video_core/command_classes/nvdec.h
index c1a9d843e..eec4443f9 100644
--- a/src/video_core/command_classes/nvdec.h
+++ b/src/video_core/command_classes/nvdec.h
@@ -4,8 +4,8 @@
 
 #pragma once
 
+#include <memory>
 #include <vector>
-#include "common/common_funcs.h"
 #include "common/common_types.h"
 #include "video_core/command_classes/codecs/codec.h"
 
@@ -23,17 +23,17 @@ public:
     ~Nvdec();
 
     /// Writes the method into the state, Invoke Execute() if encountered
-    void ProcessMethod(Nvdec::Method method, const std::vector<u32>& arguments);
+    void ProcessMethod(Method method, const std::vector<u32>& arguments);
 
     /// Return most recently decoded frame
-    AVFrame* GetFrame();
-    const AVFrame* GetFrame() const;
+    [[nodiscard]] AVFrame* GetFrame();
+    [[nodiscard]] const AVFrame* GetFrame() const;
 
 private:
     /// Invoke codec to decode a frame
     void Execute();
 
     GPU& gpu;
-    std::unique_ptr<Tegra::Codec> codec;
+    std::unique_ptr<Codec> codec;
 };
 } // namespace Tegra
diff --git a/src/video_core/command_classes/sync_manager.cpp b/src/video_core/command_classes/sync_manager.cpp
index a0ab44855..19dc9e0ab 100644
--- a/src/video_core/command_classes/sync_manager.cpp
+++ b/src/video_core/command_classes/sync_manager.cpp
@@ -27,22 +27,22 @@ SyncptIncrManager::SyncptIncrManager(GPU& gpu_) : gpu(gpu_) {}
 SyncptIncrManager::~SyncptIncrManager() = default;
 
 void SyncptIncrManager::Increment(u32 id) {
-    increments.push_back(SyncptIncr{0, id, true});
+    increments.emplace_back(0, 0, id, true);
     IncrementAllDone();
 }
 
 u32 SyncptIncrManager::IncrementWhenDone(u32 class_id, u32 id) {
     const u32 handle = current_id++;
-    increments.push_back(SyncptIncr{handle, class_id, id});
+    increments.emplace_back(handle, class_id, id);
     return handle;
 }
 
 void SyncptIncrManager::SignalDone(u32 handle) {
-    auto done_incr = std::find_if(increments.begin(), increments.end(),
-                                  [handle](SyncptIncr incr) { return incr.id == handle; });
-    if (done_incr != increments.end()) {
-        const SyncptIncr incr = *done_incr;
-        *done_incr = SyncptIncr{incr.id, incr.class_id, incr.syncpt_id, true};
+    const auto done_incr =
+        std::find_if(increments.begin(), increments.end(),
+                     [handle](const SyncptIncr& incr) { return incr.id == handle; });
+    if (done_incr != increments.cend()) {
+        done_incr->complete = true;
     }
     IncrementAllDone();
 }
diff --git a/src/video_core/command_classes/sync_manager.h b/src/video_core/command_classes/sync_manager.h
index 353b67573..2c321ec58 100644
--- a/src/video_core/command_classes/sync_manager.h
+++ b/src/video_core/command_classes/sync_manager.h
@@ -32,8 +32,8 @@ struct SyncptIncr {
     u32 syncpt_id;
     bool complete;
 
-    SyncptIncr(u32 id, u32 syncpt_id_, u32 class_id_, bool done = false)
-        : id(id), class_id(class_id_), syncpt_id(syncpt_id_), complete(done) {}
+    SyncptIncr(u32 id_, u32 class_id_, u32 syncpt_id_, bool done = false)
+        : id(id_), class_id(class_id_), syncpt_id(syncpt_id_), complete(done) {}
 };
 
 class SyncptIncrManager {
diff --git a/src/video_core/command_classes/vic.cpp b/src/video_core/command_classes/vic.cpp
index 66e15a1a8..5b52da277 100644
--- a/src/video_core/command_classes/vic.cpp
+++ b/src/video_core/command_classes/vic.cpp
@@ -26,7 +26,7 @@ void Vic::VicStateWrite(u32 offset, u32 arguments) {
     std::memcpy(state_offset, &arguments, sizeof(u32));
 }
 
-void Vic::ProcessMethod(Vic::Method method, const std::vector<u32>& arguments) {
+void Vic::ProcessMethod(Method method, const std::vector<u32>& arguments) {
     LOG_DEBUG(HW_GPU, "Vic method 0x{:X}", static_cast<u32>(method));
     VicStateWrite(static_cast<u32>(method), arguments[0]);
     const u64 arg = static_cast<u64>(arguments[0]) << 8;
diff --git a/src/video_core/command_classes/vic.h b/src/video_core/command_classes/vic.h
index dd0a2aed8..8c4e284a1 100644
--- a/src/video_core/command_classes/vic.h
+++ b/src/video_core/command_classes/vic.h
@@ -63,11 +63,11 @@ public:
         SetOutputSurfaceChromaVOffset = 0x1ca
     };
 
-    explicit Vic(GPU& gpu, std::shared_ptr<Tegra::Nvdec> nvdec_processor);
+    explicit Vic(GPU& gpu, std::shared_ptr<Nvdec> nvdec_processor);
     ~Vic();
 
     /// Write to the device state.
-    void ProcessMethod(Vic::Method method, const std::vector<u32>& arguments);
+    void ProcessMethod(Method method, const std::vector<u32>& arguments);
 
 private:
     void Execute();
diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp
index f2f96ac33..105b85a92 100644
--- a/src/video_core/dma_pusher.cpp
+++ b/src/video_core/dma_pusher.cpp
@@ -2,6 +2,7 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
+#include "common/cityhash.h"
 #include "common/microprofile.h"
 #include "core/core.h"
 #include "core/memory.h"
@@ -12,6 +13,20 @@
 
 namespace Tegra {
 
+void CommandList::RefreshIntegrityChecks(GPU& gpu) {
+    command_list_hashes.resize(command_lists.size());
+
+    for (std::size_t index = 0; index < command_lists.size(); ++index) {
+        const CommandListHeader command_list_header = command_lists[index];
+        std::vector<CommandHeader> command_headers(command_list_header.size);
+        gpu.MemoryManager().ReadBlockUnsafe(command_list_header.addr, command_headers.data(),
+                                            command_list_header.size * sizeof(u32));
+        command_list_hashes[index] =
+            Common::CityHash64(reinterpret_cast<char*>(command_headers.data()),
+                               command_list_header.size * sizeof(u32));
+    }
+}
+
 DmaPusher::DmaPusher(Core::System& system, GPU& gpu) : gpu{gpu}, system{system} {}
 
 DmaPusher::~DmaPusher() = default;
@@ -45,32 +60,51 @@ bool DmaPusher::Step() {
         return false;
     }
 
-    const CommandList& command_list{dma_pushbuffer.front()};
-    ASSERT_OR_EXECUTE(!command_list.empty(), {
-        // Somehow the command_list is empty, in order to avoid a crash
-        // We ignore it and assume its size is 0.
-        dma_pushbuffer.pop();
-        dma_pushbuffer_subindex = 0;
-        return true;
-    });
-    const CommandListHeader command_list_header{command_list[dma_pushbuffer_subindex++]};
-    const GPUVAddr dma_get = command_list_header.addr;
-
-    if (dma_pushbuffer_subindex >= command_list.size()) {
-        // We've gone through the current list, remove it from the queue
-        dma_pushbuffer.pop();
-        dma_pushbuffer_subindex = 0;
-    }
+    CommandList& command_list{dma_pushbuffer.front()};
 
-    if (command_list_header.size == 0) {
-        return true;
-    }
+    ASSERT_OR_EXECUTE(
+        command_list.command_lists.size() || command_list.prefetch_command_list.size(), {
+            // Somehow the command_list is empty, in order to avoid a crash
+            // We ignore it and assume its size is 0.
+            dma_pushbuffer.pop();
+            dma_pushbuffer_subindex = 0;
+            return true;
+        });
 
-    // Push buffer non-empty, read a word
-    command_headers.resize(command_list_header.size);
-    gpu.MemoryManager().ReadBlockUnsafe(dma_get, command_headers.data(),
-                                        command_list_header.size * sizeof(u32));
+    if (command_list.prefetch_command_list.size()) {
+        // Prefetched command list from nvdrv, used for things like synchronization
+        command_headers = std::move(command_list.prefetch_command_list);
+        dma_pushbuffer.pop();
+    } else {
+        const CommandListHeader command_list_header{
+            command_list.command_lists[dma_pushbuffer_subindex]};
+        const u64 next_hash = command_list.command_list_hashes[dma_pushbuffer_subindex++];
+        const GPUVAddr dma_get = command_list_header.addr;
+
+        if (dma_pushbuffer_subindex >= command_list.command_lists.size()) {
+            // We've gone through the current list, remove it from the queue
+            dma_pushbuffer.pop();
+            dma_pushbuffer_subindex = 0;
+        }
 
+        if (command_list_header.size == 0) {
+            return true;
+        }
+
+        // Push buffer non-empty, read a word
+        command_headers.resize(command_list_header.size);
+        gpu.MemoryManager().ReadBlockUnsafe(dma_get, command_headers.data(),
+                                            command_list_header.size * sizeof(u32));
+
+        // Integrity check
+        const u64 new_hash = Common::CityHash64(reinterpret_cast<char*>(command_headers.data()),
+                                                command_list_header.size * sizeof(u32));
+        if (new_hash != next_hash) {
+            LOG_CRITICAL(HW_GPU, "CommandList at addr=0x{:X} is corrupt, skipping!", dma_get);
+            dma_pushbuffer.pop();
+            return true;
+        }
+    }
     for (std::size_t index = 0; index < command_headers.size();) {
         const CommandHeader& command_header = command_headers[index];
 
diff --git a/src/video_core/dma_pusher.h b/src/video_core/dma_pusher.h
index efa90d170..8496ba2da 100644
--- a/src/video_core/dma_pusher.h
+++ b/src/video_core/dma_pusher.h
@@ -27,6 +27,31 @@ enum class SubmissionMode : u32 {
     IncreaseOnce = 5
 };
 
+// Note that, traditionally, methods are treated as 4-byte addressable locations, and hence
+// their numbers are written down multiplied by 4 in Docs. Here we are not multiply by 4.
+// So the values you see in docs might be multiplied by 4.
+enum class BufferMethods : u32 {
+    BindObject = 0x0,
+    Nop = 0x2,
+    SemaphoreAddressHigh = 0x4,
+    SemaphoreAddressLow = 0x5,
+    SemaphoreSequence = 0x6,
+    SemaphoreTrigger = 0x7,
+    NotifyIntr = 0x8,
+    WrcacheFlush = 0x9,
+    Unk28 = 0xA,
+    UnkCacheFlush = 0xB,
+    RefCnt = 0x14,
+    SemaphoreAcquire = 0x1A,
+    SemaphoreRelease = 0x1B,
+    FenceValue = 0x1C,
+    FenceAction = 0x1D,
+    WaitForInterrupt = 0x1E,
+    Unk7c = 0x1F,
+    Yield = 0x20,
+    NonPullerMethods = 0x40,
+};
+
 struct CommandListHeader {
     union {
         u64 raw;
@@ -49,9 +74,29 @@ union CommandHeader {
 static_assert(std::is_standard_layout_v<CommandHeader>, "CommandHeader is not standard layout");
 static_assert(sizeof(CommandHeader) == sizeof(u32), "CommandHeader has incorrect size!");
 
+static constexpr CommandHeader BuildCommandHeader(BufferMethods method, u32 arg_count,
+                                                  SubmissionMode mode) {
+    CommandHeader result{};
+    result.method.Assign(static_cast<u32>(method));
+    result.arg_count.Assign(arg_count);
+    result.mode.Assign(mode);
+    return result;
+}
+
 class GPU;
 
-using CommandList = std::vector<Tegra::CommandListHeader>;
+struct CommandList final {
+    CommandList() = default;
+    explicit CommandList(std::size_t size) : command_lists(size) {}
+    explicit CommandList(std::vector<Tegra::CommandHeader>&& prefetch_command_list)
+        : prefetch_command_list{std::move(prefetch_command_list)} {}
+
+    void RefreshIntegrityChecks(GPU& gpu);
+
+    std::vector<Tegra::CommandListHeader> command_lists;
+    std::vector<u64> command_list_hashes;
+    std::vector<Tegra::CommandHeader> prefetch_command_list;
+};
 
 /**
  * The DmaPusher class implements DMA submission to FIFOs, providing an area of memory that the
@@ -60,7 +105,7 @@ using CommandList = std::vector<Tegra::CommandListHeader>;
  * See https://envytools.readthedocs.io/en/latest/hw/fifo/dma-pusher.html#fifo-dma-pusher for
  * details on this implementation.
  */
-class DmaPusher {
+class DmaPusher final {
 public:
     explicit DmaPusher(Core::System& system, GPU& gpu);
     ~DmaPusher();
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index d374b73cf..a3c05d1b0 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -1893,6 +1893,7 @@ public:
         ICMP_IMM,
         FCMP_RR,
         FCMP_RC,
+        FCMP_IMMR,
         MUFU,  // Multi-Function Operator
         RRO_C, // Range Reduction Operator
         RRO_R,
@@ -2205,6 +2206,7 @@ private:
             INST("0111110-0-------", Id::HSET2_IMM, Type::HalfSet, "HSET2_IMM"),
             INST("010110111010----", Id::FCMP_RR, Type::Arithmetic, "FCMP_RR"),
             INST("010010111010----", Id::FCMP_RC, Type::Arithmetic, "FCMP_RC"),
+            INST("0011011-1010----", Id::FCMP_IMMR, Type::Arithmetic, "FCMP_IMMR"),
             INST("0101000010000---", Id::MUFU, Type::Arithmetic, "MUFU"),
             INST("0100110010010---", Id::RRO_C, Type::Arithmetic, "RRO_C"),
             INST("0101110010010---", Id::RRO_R, Type::Arithmetic, "RRO_R"),
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index 171f78183..ebd149c3a 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -194,30 +194,6 @@ void GPU::SyncGuestHost() {
 void GPU::OnCommandListEnd() {
     renderer->Rasterizer().ReleaseFences();
 }
-// Note that, traditionally, methods are treated as 4-byte addressable locations, and hence
-// their numbers are written down multiplied by 4 in Docs. Here we are not multiply by 4.
-// So the values you see in docs might be multiplied by 4.
-enum class BufferMethods {
-    BindObject = 0x0,
-    Nop = 0x2,
-    SemaphoreAddressHigh = 0x4,
-    SemaphoreAddressLow = 0x5,
-    SemaphoreSequence = 0x6,
-    SemaphoreTrigger = 0x7,
-    NotifyIntr = 0x8,
-    WrcacheFlush = 0x9,
-    Unk28 = 0xA,
-    UnkCacheFlush = 0xB,
-    RefCnt = 0x14,
-    SemaphoreAcquire = 0x1A,
-    SemaphoreRelease = 0x1B,
-    FenceValue = 0x1C,
-    FenceAction = 0x1D,
-    Unk78 = 0x1E,
-    Unk7c = 0x1F,
-    Yield = 0x20,
-    NonPullerMethods = 0x40,
-};
 
 enum class GpuSemaphoreOperation {
     AcquireEqual = 0x1,
@@ -277,7 +253,12 @@ void GPU::CallPullerMethod(const MethodCall& method_call) {
     case BufferMethods::UnkCacheFlush:
     case BufferMethods::WrcacheFlush:
     case BufferMethods::FenceValue:
+        break;
     case BufferMethods::FenceAction:
+        ProcessFenceActionMethod();
+        break;
+    case BufferMethods::WaitForInterrupt:
+        ProcessWaitForInterruptMethod();
         break;
     case BufferMethods::SemaphoreTrigger: {
         ProcessSemaphoreTriggerMethod();
@@ -391,6 +372,25 @@ void GPU::ProcessBindMethod(const MethodCall& method_call) {
     }
 }
 
+void GPU::ProcessFenceActionMethod() {
+    switch (regs.fence_action.op) {
+    case FenceOperation::Acquire:
+        WaitFence(regs.fence_action.syncpoint_id, regs.fence_value);
+        break;
+    case FenceOperation::Increment:
+        IncrementSyncPoint(regs.fence_action.syncpoint_id);
+        break;
+    default:
+        UNIMPLEMENTED_MSG("Unimplemented operation {}",
+                          static_cast<u32>(regs.fence_action.op.Value()));
+    }
+}
+
+void GPU::ProcessWaitForInterruptMethod() {
+    // TODO(bunnei) ImplementMe
+    LOG_WARNING(HW_GPU, "(STUBBED) called");
+}
+
 void GPU::ProcessSemaphoreTriggerMethod() {
     const auto semaphoreOperationMask = 0xF;
     const auto op =
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index b8c613b11..5444b49f3 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -263,6 +263,24 @@ public:
         return use_nvdec;
     }
 
+    enum class FenceOperation : u32 {
+        Acquire = 0,
+        Increment = 1,
+    };
+
+    union FenceAction {
+        u32 raw;
+        BitField<0, 1, FenceOperation> op;
+        BitField<8, 24, u32> syncpoint_id;
+
+        static constexpr CommandHeader Build(FenceOperation op, u32 syncpoint_id) {
+            FenceAction result{};
+            result.op.Assign(op);
+            result.syncpoint_id.Assign(syncpoint_id);
+            return {result.raw};
+        }
+    };
+
     struct Regs {
         static constexpr size_t NUM_REGS = 0x40;
 
@@ -291,10 +309,7 @@ public:
                 u32 semaphore_acquire;
                 u32 semaphore_release;
                 u32 fence_value;
-                union {
-                    BitField<4, 4, u32> operation;
-                    BitField<8, 8, u32> id;
-                } fence_action;
+                FenceAction fence_action;
                 INSERT_UNION_PADDING_WORDS(0xE2);
 
                 // Puller state
@@ -342,6 +357,8 @@ protected:
 
 private:
     void ProcessBindMethod(const MethodCall& method_call);
+    void ProcessFenceActionMethod();
+    void ProcessWaitForInterruptMethod();
     void ProcessSemaphoreTriggerMethod();
     void ProcessSemaphoreRelease();
     void ProcessSemaphoreAcquire();
diff --git a/src/video_core/shader/async_shaders.cpp b/src/video_core/shader/async_shaders.cpp
index aabd62c5c..39cc3b869 100644
--- a/src/video_core/shader/async_shaders.cpp
+++ b/src/video_core/shader/async_shaders.cpp
@@ -20,14 +20,15 @@ AsyncShaders::~AsyncShaders() {
 }
 
 void AsyncShaders::AllocateWorkers() {
-    // Max worker threads we should allow
-    constexpr u32 MAX_THREADS = 4;
-    // Deduce how many threads we can use
-    const u32 threads_used = std::thread::hardware_concurrency() / 4;
-    // Always allow at least 1 thread regardless of our settings
-    const auto max_worker_count = std::max(1U, threads_used);
-    // Don't use more than MAX_THREADS
-    const auto num_workers = std::min(max_worker_count, MAX_THREADS);
+    // Use at least one thread
+    u32 num_workers = 1;
+
+    // Deduce how many more threads we can use
+    const u32 thread_count = std::thread::hardware_concurrency();
+    if (thread_count >= 8) {
+        // Increase async workers by 1 for every 2 threads >= 8
+        num_workers += 1 + (thread_count - 8) / 2;
+    }
 
     // If we already have workers queued, ignore
     if (num_workers == worker_threads.size()) {
diff --git a/src/video_core/shader/decode/arithmetic.cpp b/src/video_core/shader/decode/arithmetic.cpp
index 4db329fa5..afef5948d 100644
--- a/src/video_core/shader/decode/arithmetic.cpp
+++ b/src/video_core/shader/decode/arithmetic.cpp
@@ -137,7 +137,8 @@ u32 ShaderIR::DecodeArithmetic(NodeBlock& bb, u32 pc) {
         break;
     }
     case OpCode::Id::FCMP_RR:
-    case OpCode::Id::FCMP_RC: {
+    case OpCode::Id::FCMP_RC:
+    case OpCode::Id::FCMP_IMMR: {
         UNIMPLEMENTED_IF(instr.fcmp.ftz == 0);
         Node op_c = GetRegister(instr.gpr39);
         Node comp = GetPredicateComparisonFloat(instr.fcmp.cond, std::move(op_c), Immediate(0.0f));
diff --git a/src/web_service/web_backend.cpp b/src/web_service/web_backend.cpp
index f264b98a0..67183e64c 100644
--- a/src/web_service/web_backend.cpp
+++ b/src/web_service/web_backend.cpp
@@ -71,11 +71,6 @@ struct Client::Impl {
             return {};
         }
 
-        if (!cli->is_socket_open()) {
-            LOG_ERROR(WebService, "Failed to open socket, skipping request!");
-            return {};
-        }
-
         cli->set_connection_timeout(TIMEOUT_SECONDS);
         cli->set_read_timeout(TIMEOUT_SECONDS);
         cli->set_write_timeout(TIMEOUT_SECONDS);