diff options
57 files changed, 1641 insertions, 892 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index 8e9502a97..1bd9d2aa7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -161,7 +161,7 @@ macro(yuzu_find_packages) # Cmake Pkg Prefix Version Conan Pkg "Boost 1.73 boost/1.73.0" "Catch2 2.13 catch2/2.13.0" - "fmt 7.0 fmt/7.0.3" + "fmt 7.1 fmt/7.1.0" # can't use until https://github.com/bincrafters/community/issues/1173 #"libzip 1.5 libzip/1.5.2@bincrafters/stable" "lz4 1.8 lz4/1.9.2" diff --git a/externals/httplib/README.md b/externals/httplib/README.md index 73037d297..1940e446c 100644 --- a/externals/httplib/README.md +++ b/externals/httplib/README.md @@ -1,4 +1,4 @@ -From https://github.com/yhirose/cpp-httplib/tree/fce8e6fefdab4ad48bc5b25c98e5ebfda4f3cf53 +From https://github.com/yhirose/cpp-httplib/tree/ff5677ad197947177c158fe857caff4f0e242045 with https://github.com/yhirose/cpp-httplib/pull/701 MIT License diff --git a/externals/httplib/httplib.h b/externals/httplib/httplib.h index 5139b7f05..8982054e2 100644 --- a/externals/httplib/httplib.h +++ b/externals/httplib/httplib.h @@ -173,6 +173,7 @@ using socket_t = int; #define INVALID_SOCKET (-1) #endif //_WIN32 +#include <algorithm> #include <array> #include <atomic> #include <cassert> @@ -237,6 +238,27 @@ namespace httplib { namespace detail { +/* + * Backport std::make_unique from C++14. + * + * NOTE: This code came up with the following stackoverflow post: + * https://stackoverflow.com/questions/10149840/c-arrays-and-make-unique + * + */ + +template <class T, class... Args> +typename std::enable_if<!std::is_array<T>::value, std::unique_ptr<T>>::type +make_unique(Args &&... args) { + return std::unique_ptr<T>(new T(std::forward<Args>(args)...)); +} + +template <class T> +typename std::enable_if<std::is_array<T>::value, std::unique_ptr<T>>::type +make_unique(std::size_t n) { + typedef typename std::remove_extent<T>::type RT; + return std::unique_ptr<T>(new RT[n]); +} + struct ci { bool operator()(const std::string &s1, const std::string &s2) const { return std::lexicographical_compare( @@ -304,6 +326,10 @@ using ContentProvider = using ContentProviderWithoutLength = std::function<bool(size_t offset, DataSink &sink)>; +using ContentReceiverWithProgress = + std::function<bool(const char *data, size_t data_length, uint64_t offset, + uint64_t total_length)>; + using ContentReceiver = std::function<bool(const char *data, size_t data_length)>; @@ -317,14 +343,17 @@ public: ContentReceiver receiver)>; ContentReader(Reader reader, MultipartReader multipart_reader) - : reader_(reader), multipart_reader_(multipart_reader) {} + : reader_(std::move(reader)), + multipart_reader_(std::move(multipart_reader)) {} bool operator()(MultipartContentHeader header, ContentReceiver receiver) const { - return multipart_reader_(header, receiver); + return multipart_reader_(std::move(header), std::move(receiver)); } - bool operator()(ContentReceiver receiver) const { return reader_(receiver); } + bool operator()(ContentReceiver receiver) const { + return reader_(std::move(receiver)); + } Reader reader_; MultipartReader multipart_reader_; @@ -353,7 +382,7 @@ struct Request { // for client size_t redirect_count = CPPHTTPLIB_REDIRECT_MAX_COUNT; ResponseHandler response_handler; - ContentReceiver content_receiver; + ContentReceiverWithProgress content_receiver; size_t content_length = 0; ContentProvider content_provider; Progress progress; @@ -475,7 +504,7 @@ public: void enqueue(std::function<void()> fn) override { std::unique_lock<std::mutex> lock(mutex_); - jobs_.push_back(fn); + jobs_.push_back(std::move(fn)); cond_.notify_one(); } @@ -664,8 +693,8 @@ private: ContentReceiver multipart_receiver); virtual bool process_and_close_socket(socket_t sock); - - struct MountPointEntry { + + struct MountPointEntry { std::string mount_point; std::string base_dir; Headers headers; @@ -704,23 +733,27 @@ enum Error { Canceled, SSLConnection, SSLLoadingCerts, - SSLServerVerification + SSLServerVerification, + UnsupportedMultipartBoundaryChars }; class Result { public: - Result(const std::shared_ptr<Response> &res, Error err) - : res_(res), err_(err) {} + Result(std::unique_ptr<Response> res, Error err) + : res_(std::move(res)), err_(err) {} operator bool() const { return res_ != nullptr; } bool operator==(std::nullptr_t) const { return res_ == nullptr; } bool operator!=(std::nullptr_t) const { return res_ != nullptr; } const Response &value() const { return *res_; } + Response &value() { return *res_; } const Response &operator*() const { return *res_; } + Response &operator*() { return *res_; } const Response *operator->() const { return res_.get(); } + Response *operator->() { return res_.get(); } Error error() const { return err_; } private: - std::shared_ptr<Response> res_; + std::unique_ptr<Response> res_; Error err_; }; @@ -777,6 +810,8 @@ public: Result Post(const char *path, const MultipartFormDataItems &items); Result Post(const char *path, const Headers &headers, const MultipartFormDataItems &items); + Result Post(const char *path, const Headers &headers, + const MultipartFormDataItems &items, const std::string &boundary); Result Put(const char *path); Result Put(const char *path, const std::string &body, @@ -863,7 +898,21 @@ protected: }; virtual bool create_and_connect_socket(Socket &socket); - virtual void close_socket(Socket &socket, bool process_socket_ret); + + // All of: + // shutdown_ssl + // shutdown_socket + // close_socket + // should ONLY be called when socket_mutex_ is locked. + // Also, shutdown_ssl and close_socket should also NOT be called concurrently + // with a DIFFERENT thread sending requests using that socket. + virtual void shutdown_ssl(Socket &socket, bool shutdown_gracefully); + void shutdown_socket(Socket &socket); + void close_socket(Socket &socket); + + // Similar to shutdown_ssl and close_socket, this should NOT be called + // concurrently with a DIFFERENT thread sending requests from the socket + void lock_socket_and_shutdown_and_close(); bool process_request(Stream &strm, const Request &req, Response &res, bool close_connection); @@ -873,7 +922,7 @@ protected: void copy_settings(const ClientImpl &rhs); // Error state - mutable Error error_ = Error::Success; + mutable std::atomic<Error> error_; // Socket endoint information const std::string host_; @@ -885,6 +934,11 @@ protected: mutable std::mutex socket_mutex_; std::recursive_mutex request_mutex_; + // These are all protected under socket_mutex + int socket_requests_in_flight_ = 0; + std::thread::id socket_requests_are_from_thread_ = std::thread::id(); + bool socket_should_be_closed_when_request_is_done_ = false; + // Default headers Headers default_headers_; @@ -942,13 +996,13 @@ private: bool redirect(const Request &req, Response &res); bool handle_request(Stream &strm, const Request &req, Response &res, bool close_connection); - void stop_core(); - std::shared_ptr<Response> send_with_content_provider( + std::unique_ptr<Response> send_with_content_provider( const char *method, const char *path, const Headers &headers, const std::string &body, size_t content_length, ContentProvider content_provider, const char *content_type); - virtual bool process_socket(Socket &socket, + // socket is const because this function is called when socket_mutex_ is not locked + virtual bool process_socket(const Socket &socket, std::function<bool(Stream &strm)> callback); virtual bool is_ssl() const; }; @@ -1012,6 +1066,8 @@ public: Result Post(const char *path, const MultipartFormDataItems &items); Result Post(const char *path, const Headers &headers, const MultipartFormDataItems &items); + Result Post(const char *path, const Headers &headers, + const MultipartFormDataItems &items, const std::string &boundary); Result Put(const char *path); Result Put(const char *path, const std::string &body, const char *content_type); @@ -1098,7 +1154,7 @@ public: #endif private: - std::shared_ptr<ClientImpl> cli_; + std::unique_ptr<ClientImpl> cli_; #ifdef CPPHTTPLIB_OPENSSL_SUPPORT bool is_ssl_ = false; @@ -1154,9 +1210,9 @@ public: private: bool create_and_connect_socket(Socket &socket) override; - void close_socket(Socket &socket, bool process_socket_ret) override; + void shutdown_ssl(Socket &socket, bool shutdown_gracefully) override; - bool process_socket(Socket &socket, + bool process_socket(const Socket &socket, std::function<bool(Stream &strm)> callback) override; bool is_ssl() const override; @@ -1942,9 +1998,9 @@ inline socket_t create_client_socket(const char *host, int port, bool tcp_nodelay, SocketOptions socket_options, time_t timeout_sec, time_t timeout_usec, - const std::string &intf, Error &error) { + const std::string &intf, std::atomic<Error> &error) { auto sock = create_socket( - host, port, 0, tcp_nodelay, socket_options, + host, port, 0, tcp_nodelay, std::move(socket_options), [&](socket_t sock, struct addrinfo &ai) -> bool { if (!intf.empty()) { #ifdef USE_IF2IP @@ -2478,7 +2534,8 @@ inline bool read_headers(Stream &strm, Headers &headers) { } inline bool read_content_with_length(Stream &strm, uint64_t len, - Progress progress, ContentReceiver out) { + Progress progress, + ContentReceiverWithProgress out) { char buf[CPPHTTPLIB_RECV_BUFSIZ]; uint64_t r = 0; @@ -2487,8 +2544,7 @@ inline bool read_content_with_length(Stream &strm, uint64_t len, auto n = strm.read(buf, (std::min)(read_len, CPPHTTPLIB_RECV_BUFSIZ)); if (n <= 0) { return false; } - if (!out(buf, static_cast<size_t>(n))) { return false; } - + if (!out(buf, static_cast<size_t>(n), r, len)) { return false; } r += static_cast<uint64_t>(n); if (progress) { @@ -2510,8 +2566,10 @@ inline void skip_content_with_length(Stream &strm, uint64_t len) { } } -inline bool read_content_without_length(Stream &strm, ContentReceiver out) { +inline bool read_content_without_length(Stream &strm, + ContentReceiverWithProgress out) { char buf[CPPHTTPLIB_RECV_BUFSIZ]; + uint64_t r = 0; for (;;) { auto n = strm.read(buf, CPPHTTPLIB_RECV_BUFSIZ); if (n < 0) { @@ -2519,13 +2577,16 @@ inline bool read_content_without_length(Stream &strm, ContentReceiver out) { } else if (n == 0) { return true; } - if (!out(buf, static_cast<size_t>(n))) { return false; } + + if (!out(buf, static_cast<size_t>(n), r, 0)) { return false; } + r += static_cast<uint64_t>(n); } return true; } -inline bool read_content_chunked(Stream &strm, ContentReceiver out) { +inline bool read_content_chunked(Stream &strm, + ContentReceiverWithProgress out) { const auto bufsiz = 16; char buf[bufsiz]; @@ -2570,23 +2631,24 @@ inline bool is_chunked_transfer_encoding(const Headers &headers) { } template <typename T, typename U> -bool prepare_content_receiver(T &x, int &status, ContentReceiver receiver, +bool prepare_content_receiver(T &x, int &status, + ContentReceiverWithProgress receiver, bool decompress, U callback) { if (decompress) { std::string encoding = x.get_header_value("Content-Encoding"); - std::shared_ptr<decompressor> decompressor; + std::unique_ptr<decompressor> decompressor; if (encoding.find("gzip") != std::string::npos || encoding.find("deflate") != std::string::npos) { #ifdef CPPHTTPLIB_ZLIB_SUPPORT - decompressor = std::make_shared<gzip_decompressor>(); + decompressor = detail::make_unique<gzip_decompressor>(); #else status = 415; return false; #endif } else if (encoding.find("br") != std::string::npos) { #ifdef CPPHTTPLIB_BROTLI_SUPPORT - decompressor = std::make_shared<brotli_decompressor>(); + decompressor = detail::make_unique<brotli_decompressor>(); #else status = 415; return false; @@ -2595,12 +2657,14 @@ bool prepare_content_receiver(T &x, int &status, ContentReceiver receiver, if (decompressor) { if (decompressor->is_valid()) { - ContentReceiver out = [&](const char *buf, size_t n) { - return decompressor->decompress( - buf, n, - [&](const char *buf, size_t n) { return receiver(buf, n); }); + ContentReceiverWithProgress out = [&](const char *buf, size_t n, + uint64_t off, uint64_t len) { + return decompressor->decompress(buf, n, + [&](const char *buf, size_t n) { + return receiver(buf, n, off, len); + }); }; - return callback(out); + return callback(std::move(out)); } else { status = 500; return false; @@ -2608,18 +2672,20 @@ bool prepare_content_receiver(T &x, int &status, ContentReceiver receiver, } } - ContentReceiver out = [&](const char *buf, size_t n) { - return receiver(buf, n); + ContentReceiverWithProgress out = [&](const char *buf, size_t n, uint64_t off, + uint64_t len) { + return receiver(buf, n, off, len); }; - return callback(out); + return callback(std::move(out)); } template <typename T> bool read_content(Stream &strm, T &x, size_t payload_max_length, int &status, - Progress progress, ContentReceiver receiver, + Progress progress, ContentReceiverWithProgress receiver, bool decompress) { return prepare_content_receiver( - x, status, receiver, decompress, [&](const ContentReceiver &out) { + x, status, std::move(receiver), decompress, + [&](const ContentReceiverWithProgress &out) { auto ret = true; auto exceed_payload_max_length = false; @@ -2634,7 +2700,7 @@ bool read_content(Stream &strm, T &x, size_t payload_max_length, int &status, skip_content_with_length(strm, len); ret = false; } else if (len > 0) { - ret = read_content_with_length(strm, len, progress, out); + ret = read_content_with_length(strm, len, std::move(progress), out); } } @@ -2875,7 +2941,7 @@ inline bool parse_multipart_boundary(const std::string &content_type, return !boundary.empty(); } -inline bool parse_range_header(const std::string &s, Ranges &ranges) { +inline bool parse_range_header(const std::string &s, Ranges &ranges) try { static auto re_first_range = std::regex(R"(bytes=(\d*-\d*(?:,\s*\d*-\d*)*))"); std::smatch m; if (std::regex_match(s, m, re_first_range)) { @@ -2907,7 +2973,7 @@ inline bool parse_range_header(const std::string &s, Ranges &ranges) { return all_valid_ranges; } return false; -} +} catch (...) { return false; } class MultipartFormDataParser { public: @@ -2918,7 +2984,8 @@ public: bool is_valid() const { return is_valid_; } template <typename T, typename U> - bool parse(const char *buf, size_t n, T content_callback, U header_callback) { + bool parse(const char *buf, size_t n, const T &content_callback, + const U &header_callback) { static const std::regex re_content_disposition( "^Content-Disposition:\\s*form-data;\\s*name=\"(.*?)\"(?:;\\s*filename=" @@ -3090,8 +3157,13 @@ inline std::string make_multipart_data_boundary() { static const char data[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"; + // std::random_device might actually be deterministic on some + // platforms, but due to lack of support in the c++ standard library, + // doing better requires either some ugly hacks or breaking portability. std::random_device seed_gen; - std::mt19937 engine(seed_gen()); + // Request 128 bits of entropy for initialization + std::seed_seq seed_sequence{seed_gen(), seed_gen(), seed_gen(), seed_gen()}; + std::mt19937 engine(seed_sequence); std::string result = "--cpp-httplib-multipart-data-"; @@ -3114,7 +3186,7 @@ get_range_offset_and_length(const Request &req, size_t content_length, auto slen = static_cast<ssize_t>(content_length); if (r.first == -1) { - r.first = slen - r.second; + r.first = (std::max)(static_cast<ssize_t>(0), slen - r.second); r.second = slen - 1; } @@ -3451,7 +3523,7 @@ inline std::pair<std::string, std::string> make_range_header(Ranges ranges) { if (r.second != -1) { field += std::to_string(r.second); } i++; } - return std::make_pair("Range", field); + return std::make_pair("Range", std::move(field)); } inline std::pair<std::string, std::string> @@ -3460,7 +3532,7 @@ make_basic_authentication_header(const std::string &username, bool is_proxy = false) { auto field = "Basic " + detail::base64_encode(username + ":" + password); auto key = is_proxy ? "Proxy-Authorization" : "Authorization"; - return std::make_pair(key, field); + return std::make_pair(key, std::move(field)); } inline std::pair<std::string, std::string> @@ -3468,7 +3540,7 @@ make_bearer_token_authentication_header(const std::string &token, bool is_proxy = false) { auto field = "Bearer " + token; auto key = is_proxy ? "Proxy-Authorization" : "Authorization"; - return std::make_pair(key, field); + return std::make_pair(key, std::move(field)); } // Request implementation @@ -3761,60 +3833,66 @@ inline Server::Server() inline Server::~Server() {} inline Server &Server::Get(const char *pattern, Handler handler) { - get_handlers_.push_back(std::make_pair(std::regex(pattern), handler)); + get_handlers_.push_back( + std::make_pair(std::regex(pattern), std::move(handler))); return *this; } inline Server &Server::Post(const char *pattern, Handler handler) { - post_handlers_.push_back(std::make_pair(std::regex(pattern), handler)); + post_handlers_.push_back( + std::make_pair(std::regex(pattern), std::move(handler))); return *this; } inline Server &Server::Post(const char *pattern, HandlerWithContentReader handler) { post_handlers_for_content_reader_.push_back( - std::make_pair(std::regex(pattern), handler)); + std::make_pair(std::regex(pattern), std::move(handler))); return *this; } inline Server &Server::Put(const char *pattern, Handler handler) { - put_handlers_.push_back(std::make_pair(std::regex(pattern), handler)); + put_handlers_.push_back( + std::make_pair(std::regex(pattern), std::move(handler))); return *this; } inline Server &Server::Put(const char *pattern, HandlerWithContentReader handler) { put_handlers_for_content_reader_.push_back( - std::make_pair(std::regex(pattern), handler)); + std::make_pair(std::regex(pattern), std::move(handler))); return *this; } inline Server &Server::Patch(const char *pattern, Handler handler) { - patch_handlers_.push_back(std::make_pair(std::regex(pattern), handler)); + patch_handlers_.push_back( + std::make_pair(std::regex(pattern), std::move(handler))); return *this; } inline Server &Server::Patch(const char *pattern, HandlerWithContentReader handler) { patch_handlers_for_content_reader_.push_back( - std::make_pair(std::regex(pattern), handler)); + std::make_pair(std::regex(pattern), std::move(handler))); return *this; } inline Server &Server::Delete(const char *pattern, Handler handler) { - delete_handlers_.push_back(std::make_pair(std::regex(pattern), handler)); + delete_handlers_.push_back( + std::make_pair(std::regex(pattern), std::move(handler))); return *this; } inline Server &Server::Delete(const char *pattern, HandlerWithContentReader handler) { delete_handlers_for_content_reader_.push_back( - std::make_pair(std::regex(pattern), handler)); + std::make_pair(std::regex(pattern), std::move(handler))); return *this; } inline Server &Server::Options(const char *pattern, Handler handler) { - options_handlers_.push_back(std::make_pair(std::regex(pattern), handler)); + options_handlers_.push_back( + std::make_pair(std::regex(pattern), std::move(handler))); return *this; } @@ -3860,7 +3938,7 @@ inline void Server::set_error_handler(Handler handler) { inline void Server::set_tcp_nodelay(bool on) { tcp_nodelay_ = on; } inline void Server::set_socket_options(SocketOptions socket_options) { - socket_options_ = socket_options; + socket_options_ = std::move(socket_options); } inline void Server::set_logger(Logger logger) { logger_ = std::move(logger); } @@ -4045,16 +4123,16 @@ inline bool Server::write_response(Stream &strm, bool close_connection, } if (type != detail::EncodingType::None) { - std::shared_ptr<detail::compressor> compressor; + std::unique_ptr<detail::compressor> compressor; if (type == detail::EncodingType::Gzip) { #ifdef CPPHTTPLIB_ZLIB_SUPPORT - compressor = std::make_shared<detail::gzip_compressor>(); + compressor = detail::make_unique<detail::gzip_compressor>(); res.set_header("Content-Encoding", "gzip"); #endif } else if (type == detail::EncodingType::Brotli) { #ifdef CPPHTTPLIB_BROTLI_SUPPORT - compressor = std::make_shared<detail::brotli_compressor>(); + compressor = detail::make_unique<detail::brotli_compressor>(); res.set_header("Content-Encoding", "brotli"); #endif } @@ -4136,17 +4214,17 @@ Server::write_content_with_provider(Stream &strm, const Request &req, if (res.is_chunked_content_provider) { auto type = detail::encoding_type(req, res); - std::shared_ptr<detail::compressor> compressor; + std::unique_ptr<detail::compressor> compressor; if (type == detail::EncodingType::Gzip) { #ifdef CPPHTTPLIB_ZLIB_SUPPORT - compressor = std::make_shared<detail::gzip_compressor>(); + compressor = detail::make_unique<detail::gzip_compressor>(); #endif } else if (type == detail::EncodingType::Brotli) { #ifdef CPPHTTPLIB_BROTLI_SUPPORT - compressor = std::make_shared<detail::brotli_compressor>(); + compressor = detail::make_unique<detail::brotli_compressor>(); #endif } else { - compressor = std::make_shared<detail::nocompressor>(); + compressor = detail::make_unique<detail::nocompressor>(); } assert(compressor != nullptr); @@ -4198,8 +4276,9 @@ inline bool Server::read_content_with_content_receiver( Stream &strm, Request &req, Response &res, ContentReceiver receiver, MultipartContentHeader multipart_header, ContentReceiver multipart_receiver) { - return read_content_core(strm, req, res, receiver, multipart_header, - multipart_receiver); + return read_content_core(strm, req, res, std::move(receiver), + std::move(multipart_header), + std::move(multipart_receiver)); } inline bool Server::read_content_core(Stream &strm, Request &req, Response &res, @@ -4207,7 +4286,7 @@ inline bool Server::read_content_core(Stream &strm, Request &req, Response &res, MultipartContentHeader mulitpart_header, ContentReceiver multipart_receiver) { detail::MultipartFormDataParser multipart_form_data_parser; - ContentReceiver out; + ContentReceiverWithProgress out; if (req.is_multipart_form_data()) { const auto &content_type = req.get_header_value("Content-Type"); @@ -4218,7 +4297,7 @@ inline bool Server::read_content_core(Stream &strm, Request &req, Response &res, } multipart_form_data_parser.set_boundary(std::move(boundary)); - out = [&](const char *buf, size_t n) { + out = [&](const char *buf, size_t n, uint64_t /*off*/, uint64_t /*len*/) { /* For debug size_t pos = 0; while (pos < n) { @@ -4234,7 +4313,8 @@ inline bool Server::read_content_core(Stream &strm, Request &req, Response &res, mulitpart_header); }; } else { - out = receiver; + out = [receiver](const char *buf, size_t n, uint64_t /*off*/, + uint64_t /*len*/) { return receiver(buf, n); }; } if (req.method == "DELETE" && !req.has_header("Content-Length")) { @@ -4271,7 +4351,7 @@ inline bool Server::handle_file_request(Request &req, Response &res, auto type = detail::find_content_type(path, file_extension_and_mimetype_map_); if (type) { res.set_header("Content-Type", type); } - for (const auto& kv : entry.headers) { + for (const auto &kv : entry.headers) { res.set_header(kv.first.c_str(), kv.second); } res.status = 200; @@ -4290,7 +4370,7 @@ inline socket_t Server::create_server_socket(const char *host, int port, int socket_flags, SocketOptions socket_options) const { return detail::create_socket( - host, port, socket_flags, tcp_nodelay_, socket_options, + host, port, socket_flags, tcp_nodelay_, std::move(socket_options), [](socket_t sock, struct addrinfo &ai) -> bool { if (::bind(sock, ai.ai_addr, static_cast<socklen_t>(ai.ai_addrlen))) { return false; @@ -4392,32 +4472,37 @@ inline bool Server::routing(Request &req, Response &res, Stream &strm) { { ContentReader reader( [&](ContentReceiver receiver) { - return read_content_with_content_receiver(strm, req, res, receiver, - nullptr, nullptr); + return read_content_with_content_receiver( + strm, req, res, std::move(receiver), nullptr, nullptr); }, [&](MultipartContentHeader header, ContentReceiver receiver) { return read_content_with_content_receiver(strm, req, res, nullptr, - header, receiver); + std::move(header), + std::move(receiver)); }); if (req.method == "POST") { if (dispatch_request_for_content_reader( - req, res, reader, post_handlers_for_content_reader_)) { + req, res, std::move(reader), + post_handlers_for_content_reader_)) { return true; } } else if (req.method == "PUT") { if (dispatch_request_for_content_reader( - req, res, reader, put_handlers_for_content_reader_)) { + req, res, std::move(reader), + put_handlers_for_content_reader_)) { return true; } } else if (req.method == "PATCH") { if (dispatch_request_for_content_reader( - req, res, reader, patch_handlers_for_content_reader_)) { + req, res, std::move(reader), + patch_handlers_for_content_reader_)) { return true; } } else if (req.method == "DELETE") { if (dispatch_request_for_content_reader( - req, res, reader, delete_handlers_for_content_reader_)) { + req, res, std::move(reader), + delete_handlers_for_content_reader_)) { return true; } } @@ -4448,7 +4533,6 @@ inline bool Server::routing(Request &req, Response &res, Stream &strm) { inline bool Server::dispatch_request(Request &req, Response &res, const Handlers &handlers) { - try { for (const auto &x : handlers) { const auto &pattern = x.first; @@ -4531,7 +4615,8 @@ Server::process_request(Stream &strm, bool close_connection, if (req.has_header("Range")) { const auto &range_header_value = req.get_header_value("Range"); if (!detail::parse_range_header(range_header_value, req.ranges)) { - // TODO: error + res.status = 416; + return write_response(strm, close_connection, req, res); } } @@ -4588,11 +4673,11 @@ inline ClientImpl::ClientImpl(const std::string &host, int port) inline ClientImpl::ClientImpl(const std::string &host, int port, const std::string &client_cert_path, const std::string &client_key_path) - : host_(host), port_(port), + : error_(Error::Success), host_(host), port_(port), host_and_port_(host_ + ":" + std::to_string(port_)), client_cert_path_(client_cert_path), client_key_path_(client_key_path) {} -inline ClientImpl::~ClientImpl() { stop_core(); } +inline ClientImpl::~ClientImpl() { lock_socket_and_shutdown_and_close(); } inline bool ClientImpl::is_valid() const { return true; } @@ -4653,15 +4738,47 @@ inline bool ClientImpl::create_and_connect_socket(Socket &socket) { return true; } -inline void ClientImpl::close_socket(Socket &socket, - bool /*process_socket_ret*/) { - detail::close_socket(socket.sock); - socket_.sock = INVALID_SOCKET; +inline void ClientImpl::shutdown_ssl(Socket &socket, bool shutdown_gracefully) { + (void)socket; + (void)shutdown_gracefully; + //If there are any requests in flight from threads other than us, then it's + //a thread-unsafe race because individual ssl* objects are not thread-safe. + assert(socket_requests_in_flight_ == 0 || + socket_requests_are_from_thread_ == std::this_thread::get_id()); +} + +inline void ClientImpl::shutdown_socket(Socket &socket) { + if (socket.sock == INVALID_SOCKET) + return; + detail::shutdown_socket(socket.sock); +} + +inline void ClientImpl::close_socket(Socket &socket) { + // If there are requests in flight in another thread, usually closing + // the socket will be fine and they will simply receive an error when + // using the closed socket, but it is still a bug since rarely the OS + // may reassign the socket id to be used for a new socket, and then + // suddenly they will be operating on a live socket that is different + // than the one they intended! + assert(socket_requests_in_flight_ == 0 || + socket_requests_are_from_thread_ == std::this_thread::get_id()); + // It is also a bug if this happens while SSL is still active #ifdef CPPHTTPLIB_OPENSSL_SUPPORT - socket_.ssl = nullptr; + assert(socket.ssl == nullptr); #endif + if (socket.sock == INVALID_SOCKET) + return; + detail::close_socket(socket.sock); + socket.sock = INVALID_SOCKET; } +inline void ClientImpl::lock_socket_and_shutdown_and_close() { + std::lock_guard<std::mutex> guard(socket_mutex_); + shutdown_ssl(socket_, true); + shutdown_socket(socket_); + close_socket(socket_); +} + inline bool ClientImpl::read_response_line(Stream &strm, Response &res) { std::array<char, 2048> buf; @@ -4696,11 +4813,23 @@ inline bool ClientImpl::send(const Request &req, Response &res) { { std::lock_guard<std::mutex> guard(socket_mutex_); + // Set this to false immediately - if it ever gets set to true by the end of the + // request, we know another thread instructed us to close the socket. + socket_should_be_closed_when_request_is_done_ = false; auto is_alive = false; if (socket_.is_open()) { is_alive = detail::select_write(socket_.sock, 0, 0) > 0; - if (!is_alive) { close_socket(socket_, false); } + if (!is_alive) { + // Attempt to avoid sigpipe by shutting down nongracefully if it seems like + // the other side has already closed the connection + // Also, there cannot be any requests in flight from other threads since we locked + // request_mutex_, so safe to close everything immediately + const bool shutdown_gracefully = false; + shutdown_ssl(socket_, shutdown_gracefully); + shutdown_socket(socket_); + close_socket(socket_); + } } if (!is_alive) { @@ -4721,15 +4850,38 @@ inline bool ClientImpl::send(const Request &req, Response &res) { } #endif } + + // Mark the current socket as being in use so that it cannot be closed by anyone + // else while this request is ongoing, even though we will be releasing the mutex. + if (socket_requests_in_flight_ > 1) { + assert(socket_requests_are_from_thread_ == std::this_thread::get_id()); + } + socket_requests_in_flight_ += 1; + socket_requests_are_from_thread_ = std::this_thread::get_id(); } auto close_connection = !keep_alive_; - auto ret = process_socket(socket_, [&](Stream &strm) { return handle_request(strm, req, res, close_connection); }); - if (close_connection || !ret) { stop_core(); } + //Briefly lock mutex in order to mark that a request is no longer ongoing + { + std::lock_guard<std::mutex> guard(socket_mutex_); + socket_requests_in_flight_ -= 1; + if (socket_requests_in_flight_ <= 0) { + assert(socket_requests_in_flight_ == 0); + socket_requests_are_from_thread_ = std::thread::id(); + } + + if (socket_should_be_closed_when_request_is_done_ || + close_connection || + !ret ) { + shutdown_ssl(socket_, true); + shutdown_socket(socket_); + close_socket(socket_); + } + } if (!ret) { if (error_ == Error::Success) { error_ = Error::Unknown; } @@ -4973,7 +5125,7 @@ inline bool ClientImpl::write_request(Stream &strm, const Request &req, return true; } -inline std::shared_ptr<Response> ClientImpl::send_with_content_provider( +inline std::unique_ptr<Response> ClientImpl::send_with_content_provider( const char *method, const char *path, const Headers &headers, const std::string &body, size_t content_length, ContentProvider content_provider, const char *content_type) { @@ -5036,15 +5188,15 @@ inline std::shared_ptr<Response> ClientImpl::send_with_content_provider( { if (content_provider) { req.content_length = content_length; - req.content_provider = content_provider; + req.content_provider = std::move(content_provider); } else { req.body = body; } } - auto res = std::make_shared<Response>(); + auto res = detail::make_unique<Response>(); - return send(req, *res) ? res : nullptr; + return send(req, *res) ? std::move(res) : nullptr; } inline bool ClientImpl::process_request(Stream &strm, const Request &req, @@ -5070,16 +5222,21 @@ inline bool ClientImpl::process_request(Stream &strm, const Request &req, if (req.method != "HEAD" && req.method != "CONNECT") { auto out = req.content_receiver - ? static_cast<ContentReceiver>([&](const char *buf, size_t n) { - auto ret = req.content_receiver(buf, n); - if (!ret) { error_ = Error::Canceled; } - return ret; - }) - : static_cast<ContentReceiver>([&](const char *buf, size_t n) { - if (res.body.size() + n > res.body.max_size()) { return false; } - res.body.append(buf, n); - return true; - }); + ? static_cast<ContentReceiverWithProgress>( + [&](const char *buf, size_t n, uint64_t off, uint64_t len) { + auto ret = req.content_receiver(buf, n, off, len); + if (!ret) { error_ = Error::Canceled; } + return ret; + }) + : static_cast<ContentReceiverWithProgress>( + [&](const char *buf, size_t n, uint64_t /*off*/, + uint64_t /*len*/) { + if (res.body.size() + n > res.body.max_size()) { + return false; + } + res.body.append(buf, n); + return true; + }); auto progress = [&](uint64_t current, uint64_t total) { if (!req.progress) { return true; } @@ -5090,7 +5247,8 @@ inline bool ClientImpl::process_request(Stream &strm, const Request &req, int dummy_status; if (!detail::read_content(strm, res, (std::numeric_limits<size_t>::max)(), - dummy_status, progress, out, decompress_)) { + dummy_status, std::move(progress), std::move(out), + decompress_)) { if (error_ != Error::Canceled) { error_ = Error::Read; } return false; } @@ -5098,7 +5256,16 @@ inline bool ClientImpl::process_request(Stream &strm, const Request &req, if (res.get_header_value("Connection") == "close" || (res.version == "HTTP/1.0" && res.reason != "Connection established")) { - stop_core(); + // TODO this requires a not-entirely-obvious chain of calls to be correct + // for this to be safe. Maybe a code refactor (such as moving this out to + // the send function and getting rid of the recursiveness of the mutex) + // could make this more obvious. + + // This is safe to call because process_request is only called by handle_request + // which is only called by send, which locks the request mutex during the process. + // It would be a bug to call it from a different thread since it's a thread-safety + // issue to do these things to the socket if another thread is using the socket. + lock_socket_and_shutdown_and_close(); } // Log @@ -5108,11 +5275,11 @@ inline bool ClientImpl::process_request(Stream &strm, const Request &req, } inline bool -ClientImpl::process_socket(Socket &socket, +ClientImpl::process_socket(const Socket &socket, std::function<bool(Stream &strm)> callback) { - return detail::process_client_socket(socket.sock, read_timeout_sec_, - read_timeout_usec_, write_timeout_sec_, - write_timeout_usec_, callback); + return detail::process_client_socket( + socket.sock, read_timeout_sec_, read_timeout_usec_, write_timeout_sec_, + write_timeout_usec_, std::move(callback)); } inline bool ClientImpl::is_ssl() const { return false; } @@ -5138,9 +5305,9 @@ inline Result ClientImpl::Get(const char *path, const Headers &headers, req.headers.insert(headers.begin(), headers.end()); req.progress = std::move(progress); - auto res = std::make_shared<Response>(); + auto res = detail::make_unique<Response>(); auto ret = send(req, *res); - return Result{ret ? res : nullptr, get_last_error()}; + return Result{ret ? std::move(res) : nullptr, get_last_error()}; } inline Result ClientImpl::Get(const char *path, @@ -5170,23 +5337,23 @@ inline Result ClientImpl::Get(const char *path, const Headers &headers, inline Result ClientImpl::Get(const char *path, ResponseHandler response_handler, ContentReceiver content_receiver) { - return Get(path, Headers(), std::move(response_handler), content_receiver, - nullptr); + return Get(path, Headers(), std::move(response_handler), + std::move(content_receiver), nullptr); } inline Result ClientImpl::Get(const char *path, const Headers &headers, ResponseHandler response_handler, ContentReceiver content_receiver) { - return Get(path, headers, std::move(response_handler), content_receiver, - nullptr); + return Get(path, headers, std::move(response_handler), + std::move(content_receiver), nullptr); } inline Result ClientImpl::Get(const char *path, ResponseHandler response_handler, ContentReceiver content_receiver, Progress progress) { - return Get(path, Headers(), std::move(response_handler), content_receiver, - progress); + return Get(path, Headers(), std::move(response_handler), + std::move(content_receiver), std::move(progress)); } inline Result ClientImpl::Get(const char *path, const Headers &headers, @@ -5199,12 +5366,16 @@ inline Result ClientImpl::Get(const char *path, const Headers &headers, req.headers = default_headers_; req.headers.insert(headers.begin(), headers.end()); req.response_handler = std::move(response_handler); - req.content_receiver = std::move(content_receiver); + req.content_receiver = + [content_receiver](const char *data, size_t data_length, + uint64_t /*offset*/, uint64_t /*total_length*/) { + return content_receiver(data, data_length); + }; req.progress = std::move(progress); - auto res = std::make_shared<Response>(); + auto res = detail::make_unique<Response>(); auto ret = send(req, *res); - return Result{ret ? res : nullptr, get_last_error()}; + return Result{ret ? std::move(res) : nullptr, get_last_error()}; } inline Result ClientImpl::Head(const char *path) { @@ -5218,9 +5389,9 @@ inline Result ClientImpl::Head(const char *path, const Headers &headers) { req.headers.insert(headers.begin(), headers.end()); req.path = path; - auto res = std::make_shared<Response>(); + auto res = detail::make_unique<Response>(); auto ret = send(req, *res); - return Result{ret ? res : nullptr, get_last_error()}; + return Result{ret ? std::move(res) : nullptr, get_last_error()}; } inline Result ClientImpl::Post(const char *path) { @@ -5237,7 +5408,7 @@ inline Result ClientImpl::Post(const char *path, const Headers &headers, const char *content_type) { auto ret = send_with_content_provider("POST", path, headers, body, 0, nullptr, content_type); - return Result{ret, get_last_error()}; + return Result{std::move(ret), get_last_error()}; } inline Result ClientImpl::Post(const char *path, const Params ¶ms) { @@ -5247,17 +5418,18 @@ inline Result ClientImpl::Post(const char *path, const Params ¶ms) { inline Result ClientImpl::Post(const char *path, size_t content_length, ContentProvider content_provider, const char *content_type) { - return Post(path, Headers(), content_length, content_provider, content_type); + return Post(path, Headers(), content_length, std::move(content_provider), + content_type); } inline Result ClientImpl::Post(const char *path, const Headers &headers, size_t content_length, ContentProvider content_provider, const char *content_type) { - auto ret = send_with_content_provider("POST", path, headers, std::string(), - content_length, content_provider, - content_type); - return Result{ret, get_last_error()}; + auto ret = send_with_content_provider( + "POST", path, headers, std::string(), content_length, + std::move(content_provider), content_type); + return Result{std::move(ret), get_last_error()}; } inline Result ClientImpl::Post(const char *path, const Headers &headers, @@ -5273,7 +5445,18 @@ inline Result ClientImpl::Post(const char *path, inline Result ClientImpl::Post(const char *path, const Headers &headers, const MultipartFormDataItems &items) { - auto boundary = detail::make_multipart_data_boundary(); + return Post(path, headers, items, detail::make_multipart_data_boundary()); +} +inline Result ClientImpl::Post(const char *path, const Headers &headers, + const MultipartFormDataItems &items, + const std::string &boundary) { + for (size_t i = 0; i < boundary.size(); i++) { + char c = boundary[i]; + if (!std::isalnum(c) && c != '-' && c != '_') { + error_ = Error::UnsupportedMultipartBoundaryChars; + return Result{nullptr, error_}; + } + } std::string body; @@ -5311,23 +5494,24 @@ inline Result ClientImpl::Put(const char *path, const Headers &headers, const char *content_type) { auto ret = send_with_content_provider("PUT", path, headers, body, 0, nullptr, content_type); - return Result{ret, get_last_error()}; + return Result{std::move(ret), get_last_error()}; } inline Result ClientImpl::Put(const char *path, size_t content_length, ContentProvider content_provider, const char *content_type) { - return Put(path, Headers(), content_length, content_provider, content_type); + return Put(path, Headers(), content_length, std::move(content_provider), + content_type); } inline Result ClientImpl::Put(const char *path, const Headers &headers, size_t content_length, ContentProvider content_provider, const char *content_type) { - auto ret = send_with_content_provider("PUT", path, headers, std::string(), - content_length, content_provider, - content_type); - return Result{ret, get_last_error()}; + auto ret = send_with_content_provider( + "PUT", path, headers, std::string(), content_length, + std::move(content_provider), content_type); + return Result{std::move(ret), get_last_error()}; } inline Result ClientImpl::Put(const char *path, const Params ¶ms) { @@ -5350,23 +5534,24 @@ inline Result ClientImpl::Patch(const char *path, const Headers &headers, const char *content_type) { auto ret = send_with_content_provider("PATCH", path, headers, body, 0, nullptr, content_type); - return Result{ret, get_last_error()}; + return Result{std::move(ret), get_last_error()}; } inline Result ClientImpl::Patch(const char *path, size_t content_length, ContentProvider content_provider, const char *content_type) { - return Patch(path, Headers(), content_length, content_provider, content_type); + return Patch(path, Headers(), content_length, std::move(content_provider), + content_type); } inline Result ClientImpl::Patch(const char *path, const Headers &headers, size_t content_length, ContentProvider content_provider, const char *content_type) { - auto ret = send_with_content_provider("PATCH", path, headers, std::string(), - content_length, content_provider, - content_type); - return Result{ret, get_last_error()}; + auto ret = send_with_content_provider( + "PATCH", path, headers, std::string(), content_length, + std::move(content_provider), content_type); + return Result{std::move(ret), get_last_error()}; } inline Result ClientImpl::Delete(const char *path) { @@ -5394,9 +5579,9 @@ inline Result ClientImpl::Delete(const char *path, const Headers &headers, if (content_type) { req.headers.emplace("Content-Type", content_type); } req.body = body; - auto res = std::make_shared<Response>(); + auto res = detail::make_unique<Response>(); auto ret = send(req, *res); - return Result{ret ? res : nullptr, get_last_error()}; + return Result{ret ? std::move(res) : nullptr, get_last_error()}; } inline Result ClientImpl::Options(const char *path) { @@ -5410,9 +5595,9 @@ inline Result ClientImpl::Options(const char *path, const Headers &headers) { req.headers.insert(headers.begin(), headers.end()); req.path = path; - auto res = std::make_shared<Response>(); + auto res = detail::make_unique<Response>(); auto ret = send(req, *res); - return Result{ret ? res : nullptr, get_last_error()}; + return Result{ret ? std::move(res) : nullptr, get_last_error()}; } inline size_t ClientImpl::is_socket_open() const { @@ -5421,18 +5606,27 @@ inline size_t ClientImpl::is_socket_open() const { } inline void ClientImpl::stop() { - stop_core(); - error_ = Error::Canceled; -} - -inline void ClientImpl::stop_core() { std::lock_guard<std::mutex> guard(socket_mutex_); - if (socket_.is_open()) { - detail::shutdown_socket(socket_.sock); - std::this_thread::sleep_for(std::chrono::milliseconds(1)); - close_socket(socket_, true); - std::this_thread::sleep_for(std::chrono::milliseconds(1)); + // There is no guarantee that this doesn't get overwritten later, but set it so that + // there is a good chance that any threads stopping as a result pick up this error. + error_ = Error::Canceled; + + // If there is anything ongoing right now, the ONLY thread-safe thing we can do + // is to shutdown_socket, so that threads using this socket suddenly discover + // they can't read/write any more and error out. + // Everything else (closing the socket, shutting ssl down) is unsafe because these + // actions are not thread-safe. + if (socket_requests_in_flight_ > 0) { + shutdown_socket(socket_); + // Aside from that, we set a flag for the socket to be closed when we're done. + socket_should_be_closed_when_request_is_done_ = true; + return; } + + //Otherwise, sitll holding the mutex, we can shut everything down ourselves + shutdown_ssl(socket_, true); + shutdown_socket(socket_); + close_socket(socket_); } inline void ClientImpl::set_connection_timeout(time_t sec, time_t usec) { @@ -5479,7 +5673,7 @@ inline void ClientImpl::set_default_headers(Headers headers) { inline void ClientImpl::set_tcp_nodelay(bool on) { tcp_nodelay_ = on; } inline void ClientImpl::set_socket_options(SocketOptions socket_options) { - socket_options_ = socket_options; + socket_options_ = std::move(socket_options); } inline void ClientImpl::set_compress(bool on) { compress_ = on; } @@ -5554,9 +5748,12 @@ inline SSL *ssl_new(socket_t sock, SSL_CTX *ctx, std::mutex &ctx_mutex, } inline void ssl_delete(std::mutex &ctx_mutex, SSL *ssl, - bool process_socket_ret) { - if (process_socket_ret) { - SSL_shutdown(ssl); // shutdown only if not already closed by remote + bool shutdown_gracefully) { + // sometimes we may want to skip this to try to avoid SIGPIPE if we know + // the remote has closed the network connection + // Note that it is not always possible to avoid SIGPIPE, this is merely a best-efforts. + if (shutdown_gracefully) { + SSL_shutdown(ssl); } std::lock_guard<std::mutex> guard(ctx_mutex); @@ -5650,22 +5847,7 @@ inline SSLSocketStream::SSLSocketStream(socket_t sock, SSL *ssl, read_timeout_usec_(read_timeout_usec), write_timeout_sec_(write_timeout_sec), write_timeout_usec_(write_timeout_usec) { - { - timeval tv; - tv.tv_sec = static_cast<long>(read_timeout_sec); - tv.tv_usec = static_cast<decltype(tv.tv_usec)>(read_timeout_usec); - - setsockopt(sock, SOL_SOCKET, SO_RCVTIMEO, reinterpret_cast<char *>(&tv), - sizeof(tv)); - } - { - timeval tv; - tv.tv_sec = static_cast<long>(write_timeout_sec); - tv.tv_usec = static_cast<decltype(tv.tv_usec)>(write_timeout_usec); - - setsockopt(sock, SOL_SOCKET, SO_SNDTIMEO, reinterpret_cast<char *>(&tv), - sizeof(tv)); - } + SSL_clear_mode(ssl, SSL_MODE_AUTO_RETRY); } inline SSLSocketStream::~SSLSocketStream() {} @@ -5680,8 +5862,27 @@ inline bool SSLSocketStream::is_writable() const { } inline ssize_t SSLSocketStream::read(char *ptr, size_t size) { - if (SSL_pending(ssl_) > 0 || is_readable()) { + if (SSL_pending(ssl_) > 0) { return SSL_read(ssl_, ptr, static_cast<int>(size)); + } else if (is_readable()) { + auto ret = SSL_read(ssl_, ptr, static_cast<int>(size)); + if (ret < 0) { + auto err = SSL_get_error(ssl_, ret); + while (err == SSL_ERROR_WANT_READ) { + if (SSL_pending(ssl_) > 0) { + return SSL_read(ssl_, ptr, static_cast<int>(size)); + } else if (is_readable()) { + ret = SSL_read(ssl_, ptr, static_cast<int>(size)); + if (ret >= 0) { + return ret; + } + err = SSL_get_error(ssl_, ret); + } else { + return -1; + } + } + } + return ret; } return -1; } @@ -5788,9 +5989,12 @@ inline bool SSLServer::process_and_close_socket(socket_t sock) { }); detail::ssl_delete(ctx_mutex_, ssl, ret); + detail::shutdown_socket(sock); + detail::close_socket(sock); return ret; } + detail::shutdown_socket(sock); detail::close_socket(sock); return false; } @@ -5843,6 +6047,10 @@ inline SSLClient::SSLClient(const std::string &host, int port, inline SSLClient::~SSLClient() { if (ctx_) { SSL_CTX_free(ctx_); } + // Make sure to shut down SSL since shutdown_ssl will resolve to the + // base function rather than the derived function once we get to the + // base class destructor, and won't free the SSL (causing a leak). + SSLClient::shutdown_ssl(socket_, true); } inline bool SSLClient::is_valid() const { return ctx_; } @@ -5876,11 +6084,11 @@ inline bool SSLClient::create_and_connect_socket(Socket &socket) { return is_valid() && ClientImpl::create_and_connect_socket(socket); } +// Assumes that socket_mutex_ is locked and that there are no requests in flight inline bool SSLClient::connect_with_proxy(Socket &socket, Response &res, bool &success) { success = true; Response res2; - if (!detail::process_client_socket( socket.sock, read_timeout_sec_, read_timeout_usec_, write_timeout_sec_, write_timeout_usec_, [&](Stream &strm) { @@ -5889,7 +6097,10 @@ inline bool SSLClient::connect_with_proxy(Socket &socket, Response &res, req2.path = host_and_port_; return process_request(strm, req2, res2, false); })) { - close_socket(socket, true); + // Thread-safe to close everything because we are assuming there are no requests in flight + shutdown_ssl(socket, true); + shutdown_socket(socket); + close_socket(socket); success = false; return false; } @@ -5912,7 +6123,10 @@ inline bool SSLClient::connect_with_proxy(Socket &socket, Response &res, true)); return process_request(strm, req3, res3, false); })) { - close_socket(socket, true); + // Thread-safe to close everything because we are assuming there are no requests in flight + shutdown_ssl(socket, true); + shutdown_socket(socket); + close_socket(socket); success = false; return false; } @@ -6005,26 +6219,30 @@ inline bool SSLClient::initialize_ssl(Socket &socket) { return true; } - close_socket(socket, false); + shutdown_socket(socket); + close_socket(socket); return false; } -inline void SSLClient::close_socket(Socket &socket, bool process_socket_ret) { - detail::close_socket(socket.sock); - socket_.sock = INVALID_SOCKET; +inline void SSLClient::shutdown_ssl(Socket &socket, bool shutdown_gracefully) { + if (socket.sock == INVALID_SOCKET) { + assert(socket.ssl == nullptr); + return; + } if (socket.ssl) { - detail::ssl_delete(ctx_mutex_, socket.ssl, process_socket_ret); - socket_.ssl = nullptr; + detail::ssl_delete(ctx_mutex_, socket.ssl, shutdown_gracefully); + socket.ssl = nullptr; } + assert(socket.ssl == nullptr); } inline bool -SSLClient::process_socket(Socket &socket, +SSLClient::process_socket(const Socket &socket, std::function<bool(Stream &strm)> callback) { assert(socket.ssl); return detail::process_client_socket_ssl( socket.ssl, socket.sock, read_timeout_sec_, read_timeout_usec_, - write_timeout_sec_, write_timeout_usec_, callback); + write_timeout_sec_, write_timeout_usec_, std::move(callback)); } inline bool SSLClient::is_ssl() const { return true; } @@ -6175,6 +6393,8 @@ inline Client::Client(const char *scheme_host_port, #else if (!scheme.empty() && scheme != "http") { #endif + std::string msg = "'" + scheme + "' scheme is not supported."; + throw std::invalid_argument(msg); return; } @@ -6187,28 +6407,28 @@ inline Client::Client(const char *scheme_host_port, if (is_ssl) { #ifdef CPPHTTPLIB_OPENSSL_SUPPORT - cli_ = std::make_shared<SSLClient>(host.c_str(), port, client_cert_path, - client_key_path); + cli_ = detail::make_unique<SSLClient>(host.c_str(), port, + client_cert_path, client_key_path); is_ssl_ = is_ssl; #endif } else { - cli_ = std::make_shared<ClientImpl>(host.c_str(), port, client_cert_path, - client_key_path); + cli_ = detail::make_unique<ClientImpl>(host.c_str(), port, + client_cert_path, client_key_path); } } else { - cli_ = std::make_shared<ClientImpl>(scheme_host_port, 80, client_cert_path, - client_key_path); + cli_ = detail::make_unique<ClientImpl>(scheme_host_port, 80, + client_cert_path, client_key_path); } } inline Client::Client(const std::string &host, int port) - : cli_(std::make_shared<ClientImpl>(host, port)) {} + : cli_(detail::make_unique<ClientImpl>(host, port)) {} inline Client::Client(const std::string &host, int port, const std::string &client_cert_path, const std::string &client_key_path) - : cli_(std::make_shared<ClientImpl>(host, port, client_cert_path, - client_key_path)) {} + : cli_(detail::make_unique<ClientImpl>(host, port, client_cert_path, + client_key_path)) {} inline Client::~Client() {} @@ -6221,11 +6441,11 @@ inline Result Client::Get(const char *path, const Headers &headers) { return cli_->Get(path, headers); } inline Result Client::Get(const char *path, Progress progress) { - return cli_->Get(path, progress); + return cli_->Get(path, std::move(progress)); } inline Result Client::Get(const char *path, const Headers &headers, Progress progress) { - return cli_->Get(path, headers, progress); + return cli_->Get(path, headers, std::move(progress)); } inline Result Client::Get(const char *path, ContentReceiver content_receiver) { return cli_->Get(path, std::move(content_receiver)); @@ -6262,7 +6482,8 @@ inline Result Client::Get(const char *path, ResponseHandler response_handler, inline Result Client::Get(const char *path, const Headers &headers, ResponseHandler response_handler, ContentReceiver content_receiver, Progress progress) { - return cli_->Get(path, headers, response_handler, content_receiver, progress); + return cli_->Get(path, headers, std::move(response_handler), + std::move(content_receiver), std::move(progress)); } inline Result Client::Head(const char *path) { return cli_->Head(path); } @@ -6282,13 +6503,14 @@ inline Result Client::Post(const char *path, const Headers &headers, inline Result Client::Post(const char *path, size_t content_length, ContentProvider content_provider, const char *content_type) { - return cli_->Post(path, content_length, content_provider, content_type); + return cli_->Post(path, content_length, std::move(content_provider), + content_type); } inline Result Client::Post(const char *path, const Headers &headers, size_t content_length, ContentProvider content_provider, const char *content_type) { - return cli_->Post(path, headers, content_length, content_provider, + return cli_->Post(path, headers, content_length, std::move(content_provider), content_type); } inline Result Client::Post(const char *path, const Params ¶ms) { @@ -6306,6 +6528,11 @@ inline Result Client::Post(const char *path, const Headers &headers, const MultipartFormDataItems &items) { return cli_->Post(path, headers, items); } +inline Result Client::Post(const char *path, const Headers &headers, + const MultipartFormDataItems &items, + const std::string &boundary) { + return cli_->Post(path, headers, items, boundary); +} inline Result Client::Put(const char *path) { return cli_->Put(path); } inline Result Client::Put(const char *path, const std::string &body, const char *content_type) { @@ -6318,13 +6545,14 @@ inline Result Client::Put(const char *path, const Headers &headers, inline Result Client::Put(const char *path, size_t content_length, ContentProvider content_provider, const char *content_type) { - return cli_->Put(path, content_length, content_provider, content_type); + return cli_->Put(path, content_length, std::move(content_provider), + content_type); } inline Result Client::Put(const char *path, const Headers &headers, size_t content_length, ContentProvider content_provider, const char *content_type) { - return cli_->Put(path, headers, content_length, content_provider, + return cli_->Put(path, headers, content_length, std::move(content_provider), content_type); } inline Result Client::Put(const char *path, const Params ¶ms) { @@ -6345,13 +6573,14 @@ inline Result Client::Patch(const char *path, const Headers &headers, inline Result Client::Patch(const char *path, size_t content_length, ContentProvider content_provider, const char *content_type) { - return cli_->Patch(path, content_length, content_provider, content_type); + return cli_->Patch(path, content_length, std::move(content_provider), + content_type); } inline Result Client::Patch(const char *path, const Headers &headers, size_t content_length, ContentProvider content_provider, const char *content_type) { - return cli_->Patch(path, headers, content_length, content_provider, + return cli_->Patch(path, headers, content_length, std::move(content_provider), content_type); } inline Result Client::Delete(const char *path) { return cli_->Delete(path); } @@ -6386,7 +6615,7 @@ inline void Client::set_default_headers(Headers headers) { inline void Client::set_tcp_nodelay(bool on) { cli_->set_tcp_nodelay(on); } inline void Client::set_socket_options(SocketOptions socket_options) { - cli_->set_socket_options(socket_options); + cli_->set_socket_options(std::move(socket_options)); } inline void Client::set_connection_timeout(time_t sec, time_t usec) { diff --git a/externals/microprofile/microprofile.h b/externals/microprofile/microprofile.h index 85d5bd5de..a06f6457d 100644 --- a/externals/microprofile/microprofile.h +++ b/externals/microprofile/microprofile.h @@ -902,8 +902,10 @@ inline uint16_t MicroProfileGetGroupIndex(MicroProfileToken t) #include <windows.h> #define snprintf _snprintf +#ifdef _MSC_VER #pragma warning(push) #pragma warning(disable: 4244) +#endif int64_t MicroProfileTicksPerSecondCpu() { static int64_t nTicksPerSecond = 0; @@ -946,7 +948,11 @@ typedef HANDLE MicroProfileThread; DWORD _stdcall ThreadTrampoline(void* pFunc) { MicroProfileThreadFunc F = (MicroProfileThreadFunc)pFunc; - return (uint32_t)F(0); + + // The return value of F will always return a void*, however, this is for + // compatibility with pthreads. The underlying "address" of the pointer + // is always a 32-bit value, so this cast is safe to perform. + return static_cast<DWORD>(reinterpret_cast<uint64_t>(F(0))); } inline void MicroProfileThreadStart(MicroProfileThread* pThread, MicroProfileThreadFunc Func) @@ -1742,10 +1748,10 @@ void MicroProfileFlip() } } } - for(uint32_t i = 0; i < MICROPROFILE_MAX_GROUPS; ++i) + for(uint32_t j = 0; j < MICROPROFILE_MAX_GROUPS; ++j) { - pLog->nGroupTicks[i] += nGroupTicks[i]; - pFrameGroup[i] += nGroupTicks[i]; + pLog->nGroupTicks[j] += nGroupTicks[j]; + pFrameGroup[j] += nGroupTicks[j]; } pLog->nStackPos = nStackPos; } @@ -3328,7 +3334,7 @@ bool MicroProfileIsLocalThread(uint32_t nThreadId) #endif #else -bool MicroProfileIsLocalThread(uint32_t nThreadId){return false;} +bool MicroProfileIsLocalThread([[maybe_unused]] uint32_t nThreadId) { return false; } void MicroProfileStopContextSwitchTrace(){} void MicroProfileStartContextSwitchTrace(){} @@ -3576,7 +3582,7 @@ int MicroProfileGetGpuTickReference(int64_t* pOutCpu, int64_t* pOutGpu) #undef S -#ifdef _WIN32 +#ifdef _MSC_VER #pragma warning(pop) #endif diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 71efbb40d..dbda528ce 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -32,7 +32,6 @@ if (MSVC) # /Zc:inline - Let codegen omit inline functions in object files # /Zc:throwingNew - Let codegen assume `operator new` (without std::nothrow) will never return null add_compile_options( - /W3 /MP /Zi /Zo @@ -43,6 +42,13 @@ if (MSVC) /Zc:externConstexpr /Zc:inline /Zc:throwingNew + + # Warnings + /W3 + /we4547 # 'operator' : operator before comma has no effect; expected operator with side-effect + /we4549 # 'operator1': operator before comma has no effect; did you intend 'operator2'? + /we4555 # Expression has no effect; expected expression with side-effect + /we4834 # Discarding return value of function with 'nodiscard' attribute ) # /GS- - No stack buffer overflow checks @@ -56,6 +62,7 @@ else() -Werror=implicit-fallthrough -Werror=missing-declarations -Werror=reorder + -Werror=unused-result -Wextra -Wmissing-declarations -Wno-attributes diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index e50ab2922..207c7a0a6 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt @@ -190,6 +190,22 @@ if(ARCHITECTURE_x86_64) ) endif() +if (MSVC) + target_compile_definitions(common PRIVATE + # The standard library doesn't provide any replacement for codecvt yet + # so we can disable this deprecation warning for the time being. + _SILENCE_CXX17_CODECVT_HEADER_DEPRECATION_WARNING + ) + target_compile_options(common PRIVATE + /W4 + /WX + ) +else() + target_compile_options(common PRIVATE + -Werror + ) +endif() + create_target_directory_groups(common) find_package(Boost 1.71 COMPONENTS context headers REQUIRED) diff --git a/src/common/fiber.cpp b/src/common/fiber.cpp index e186ed880..b209f52fc 100644 --- a/src/common/fiber.cpp +++ b/src/common/fiber.cpp @@ -79,9 +79,9 @@ void Fiber::Exit() { released = true; } -void Fiber::SetRewindPoint(std::function<void(void*)>&& rewind_func, void* start_parameter) { +void Fiber::SetRewindPoint(std::function<void(void*)>&& rewind_func, void* rewind_param) { rewind_point = std::move(rewind_func); - rewind_parameter = start_parameter; + rewind_parameter = rewind_param; } void Fiber::Rewind() { @@ -161,9 +161,9 @@ Fiber::Fiber(std::function<void(void*)>&& entry_point_func, void* start_paramete boost::context::detail::make_fcontext(stack_base, impl->stack.size(), FiberStartFunc); } -void Fiber::SetRewindPoint(std::function<void(void*)>&& rewind_func, void* start_parameter) { +void Fiber::SetRewindPoint(std::function<void(void*)>&& rewind_func, void* rewind_param) { rewind_point = std::move(rewind_func); - rewind_parameter = start_parameter; + rewind_parameter = rewind_param; } Fiber::Fiber() : impl{std::make_unique<FiberImpl>()} {} diff --git a/src/common/fiber.h b/src/common/fiber.h index cefd61df9..699286ee2 100644 --- a/src/common/fiber.h +++ b/src/common/fiber.h @@ -49,7 +49,7 @@ public: static void YieldTo(std::shared_ptr<Fiber> from, std::shared_ptr<Fiber> to); [[nodiscard]] static std::shared_ptr<Fiber> ThreadToFiber(); - void SetRewindPoint(std::function<void(void*)>&& rewind_func, void* start_parameter); + void SetRewindPoint(std::function<void(void*)>&& rewind_func, void* rewind_param); void Rewind(); diff --git a/src/common/file_util.cpp b/src/common/file_util.cpp index 16c3713e0..18fbfa25b 100644 --- a/src/common/file_util.cpp +++ b/src/common/file_util.cpp @@ -472,13 +472,14 @@ u64 ScanDirectoryTree(const std::string& directory, FSTEntry& parent_entry, } bool DeleteDirRecursively(const std::string& directory, unsigned int recursion) { - const auto callback = [recursion](u64* num_entries_out, const std::string& directory, - const std::string& virtual_name) -> bool { - std::string new_path = directory + DIR_SEP_CHR + virtual_name; + const auto callback = [recursion](u64*, const std::string& directory, + const std::string& virtual_name) { + const std::string new_path = directory + DIR_SEP_CHR + virtual_name; if (IsDirectory(new_path)) { - if (recursion == 0) + if (recursion == 0) { return false; + } return DeleteDirRecursively(new_path, recursion - 1); } return Delete(new_path); @@ -492,7 +493,8 @@ bool DeleteDirRecursively(const std::string& directory, unsigned int recursion) return true; } -void CopyDir(const std::string& source_path, const std::string& dest_path) { +void CopyDir([[maybe_unused]] const std::string& source_path, + [[maybe_unused]] const std::string& dest_path) { #ifndef _WIN32 if (source_path == dest_path) { return; @@ -553,7 +555,7 @@ std::optional<std::string> GetCurrentDir() { std::string strDir = dir; #endif free(dir); - return std::move(strDir); + return strDir; } bool SetCurrentDir(const std::string& directory) { @@ -772,21 +774,23 @@ std::size_t ReadFileToString(bool text_file, const std::string& filename, std::s void SplitFilename83(const std::string& filename, std::array<char, 9>& short_name, std::array<char, 4>& extension) { - const std::string forbidden_characters = ".\"/\\[]:;=, "; + static constexpr std::string_view forbidden_characters = ".\"/\\[]:;=, "; // On a FAT32 partition, 8.3 names are stored as a 11 bytes array, filled with spaces. short_name = {{' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '\0'}}; extension = {{' ', ' ', ' ', '\0'}}; - std::string::size_type point = filename.rfind('.'); - if (point == filename.size() - 1) + auto point = filename.rfind('.'); + if (point == filename.size() - 1) { point = filename.rfind('.', point); + } // Get short name. int j = 0; for (char letter : filename.substr(0, point)) { - if (forbidden_characters.find(letter, 0) != std::string::npos) + if (forbidden_characters.find(letter, 0) != std::string::npos) { continue; + } if (j == 8) { // TODO(Link Mauve): also do that for filenames containing a space. // TODO(Link Mauve): handle multiple files having the same short name. @@ -794,14 +798,15 @@ void SplitFilename83(const std::string& filename, std::array<char, 9>& short_nam short_name[7] = '1'; break; } - short_name[j++] = toupper(letter); + short_name[j++] = static_cast<char>(std::toupper(letter)); } // Get extension. if (point != std::string::npos) { j = 0; - for (char letter : filename.substr(point + 1, 3)) - extension[j++] = toupper(letter); + for (char letter : filename.substr(point + 1, 3)) { + extension[j++] = static_cast<char>(std::toupper(letter)); + } } } diff --git a/src/common/file_util.h b/src/common/file_util.h index 8b587320f..840cde2a6 100644 --- a/src/common/file_util.h +++ b/src/common/file_util.h @@ -232,7 +232,7 @@ public: void Swap(IOFile& other) noexcept; - [[nodiscard]] bool Open(const std::string& filename, const char openmode[], int flags = 0); + bool Open(const std::string& filename, const char openmode[], int flags = 0); bool Close(); template <typename T> diff --git a/src/common/logging/backend.cpp b/src/common/logging/backend.cpp index 62cfde397..90dfa22ca 100644 --- a/src/common/logging/backend.cpp +++ b/src/common/logging/backend.cpp @@ -274,7 +274,6 @@ const char* GetLogClassName(Class log_class) { case Class::Count: break; } - UNREACHABLE(); return "Invalid"; } @@ -293,7 +292,6 @@ const char* GetLevelName(Level log_level) { break; } #undef LVL - UNREACHABLE(); return "Invalid"; } diff --git a/src/common/misc.cpp b/src/common/misc.cpp index 68cb86cd1..1d5393597 100644 --- a/src/common/misc.cpp +++ b/src/common/misc.cpp @@ -16,16 +16,23 @@ // Call directly after the command or use the error num. // This function might change the error code. std::string GetLastErrorMsg() { - static const std::size_t buff_size = 255; + static constexpr std::size_t buff_size = 255; char err_str[buff_size]; #ifdef _WIN32 FormatMessageA(FORMAT_MESSAGE_FROM_SYSTEM, nullptr, GetLastError(), MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), err_str, buff_size, nullptr); + return std::string(err_str, buff_size); +#elif defined(__GLIBC__) && (_GNU_SOURCE || (_POSIX_C_SOURCE < 200112L && _XOPEN_SOURCE < 600)) + // Thread safe (GNU-specific) + const char* str = strerror_r(errno, err_str, buff_size); + return std::string(str); #else // Thread safe (XSI-compliant) - strerror_r(errno, err_str, buff_size); + const int success = strerror_r(errno, err_str, buff_size); + if (success != 0) { + return {}; + } + return std::string(err_str); #endif - - return std::string(err_str, buff_size); } diff --git a/src/common/stream.h b/src/common/stream.h index 2585c16af..0e40692de 100644 --- a/src/common/stream.h +++ b/src/common/stream.h @@ -21,6 +21,12 @@ public: explicit Stream(); ~Stream(); + Stream(const Stream&) = delete; + Stream& operator=(const Stream&) = delete; + + Stream(Stream&&) = default; + Stream& operator=(Stream&&) = default; + /// Reposition bitstream "cursor" to the specified offset from origin void Seek(s32 offset, SeekOrigin origin); @@ -30,15 +36,15 @@ public: /// Writes byte at current position void WriteByte(u8 byte); - std::size_t GetPosition() const { + [[nodiscard]] std::size_t GetPosition() const { return position; } - std::vector<u8>& GetBuffer() { + [[nodiscard]] std::vector<u8>& GetBuffer() { return buffer; } - const std::vector<u8>& GetBuffer() const { + [[nodiscard]] const std::vector<u8>& GetBuffer() const { return buffer; } diff --git a/src/common/string_util.cpp b/src/common/string_util.cpp index 84883a1d3..4cba2aaa4 100644 --- a/src/common/string_util.cpp +++ b/src/common/string_util.cpp @@ -8,6 +8,7 @@ #include <cstdlib> #include <locale> #include <sstream> + #include "common/common_paths.h" #include "common/logging/log.h" #include "common/string_util.h" @@ -21,14 +22,14 @@ namespace Common { /// Make a string lowercase std::string ToLower(std::string str) { std::transform(str.begin(), str.end(), str.begin(), - [](unsigned char c) { return std::tolower(c); }); + [](unsigned char c) { return static_cast<char>(std::tolower(c)); }); return str; } /// Make a string uppercase std::string ToUpper(std::string str) { std::transform(str.begin(), str.end(), str.begin(), - [](unsigned char c) { return std::toupper(c); }); + [](unsigned char c) { return static_cast<char>(std::toupper(c)); }); return str; } diff --git a/src/common/timer.cpp b/src/common/timer.cpp index 2dc15e434..d17dc2a50 100644 --- a/src/common/timer.cpp +++ b/src/common/timer.cpp @@ -142,20 +142,18 @@ std::string Timer::GetTimeFormatted() { // ---------------- double Timer::GetDoubleTime() { // Get continuous timestamp - u64 TmpSeconds = static_cast<u64>(Common::Timer::GetTimeSinceJan1970().count()); - double ms = static_cast<u64>(GetTimeMs().count()) % 1000; + auto tmp_seconds = static_cast<u64>(GetTimeSinceJan1970().count()); + const auto ms = static_cast<double>(static_cast<u64>(GetTimeMs().count()) % 1000); // Remove a few years. We only really want enough seconds to make // sure that we are detecting actual actions, perhaps 60 seconds is // enough really, but I leave a year of seconds anyway, in case the // user's clock is incorrect or something like that. - TmpSeconds = TmpSeconds - (38 * 365 * 24 * 60 * 60); + tmp_seconds = tmp_seconds - (38 * 365 * 24 * 60 * 60); // Make a smaller integer that fits in the double - u32 Seconds = static_cast<u32>(TmpSeconds); - double TmpTime = Seconds + ms; - - return TmpTime; + const auto seconds = static_cast<u32>(tmp_seconds); + return seconds + ms; } } // Namespace Common diff --git a/src/common/wall_clock.cpp b/src/common/wall_clock.cpp index 7a20e95b7..452a2837e 100644 --- a/src/common/wall_clock.cpp +++ b/src/common/wall_clock.cpp @@ -53,7 +53,7 @@ public: return Common::Divide128On32(temporary, 1000000000).first; } - void Pause(bool is_paused) override { + void Pause([[maybe_unused]] bool is_paused) override { // Do nothing in this clock type. } diff --git a/src/common/x64/native_clock.h b/src/common/x64/native_clock.h index 7c503df26..97aab6ac9 100644 --- a/src/common/x64/native_clock.h +++ b/src/common/x64/native_clock.h @@ -34,7 +34,7 @@ private: /// value used to reduce the native clocks accuracy as some apss rely on /// undefined behavior where the level of accuracy in the clock shouldn't /// be higher. - static constexpr u64 inaccuracy_mask = ~(0x400 - 1); + static constexpr u64 inaccuracy_mask = ~(UINT64_C(0x400) - 1); SpinLock rtsc_serialize{}; u64 last_measure{}; diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index e0f207f3e..9a983e81d 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -454,6 +454,8 @@ add_library(core STATIC hle/service/nvdrv/nvdrv.h hle/service/nvdrv/nvmemp.cpp hle/service/nvdrv/nvmemp.h + hle/service/nvdrv/syncpoint_manager.cpp + hle/service/nvdrv/syncpoint_manager.h hle/service/nvflinger/buffer_queue.cpp hle/service/nvflinger/buffer_queue.h hle/service/nvflinger/nvflinger.cpp diff --git a/src/core/core.cpp b/src/core/core.cpp index fde2ccc09..242796008 100644 --- a/src/core/core.cpp +++ b/src/core/core.cpp @@ -179,16 +179,18 @@ struct System::Impl { arp_manager.ResetAll(); telemetry_session = std::make_unique<Core::TelemetrySession>(); + + gpu_core = VideoCore::CreateGPU(emu_window, system); + if (!gpu_core) { + return ResultStatus::ErrorVideoCore; + } + service_manager = std::make_shared<Service::SM::ServiceManager>(kernel); Service::Init(service_manager, system); GDBStub::DeferStart(); interrupt_manager = std::make_unique<Core::Hardware::InterruptManager>(system); - gpu_core = VideoCore::CreateGPU(emu_window, system); - if (!gpu_core) { - return ResultStatus::ErrorVideoCore; - } // Initialize time manager, which must happen after kernel is created time_manager.Initialize(); diff --git a/src/core/hle/kernel/process.cpp b/src/core/hle/kernel/process.cpp index ff9d9248b..b17529dee 100644 --- a/src/core/hle/kernel/process.cpp +++ b/src/core/hle/kernel/process.cpp @@ -4,6 +4,7 @@ #include <algorithm> #include <bitset> +#include <ctime> #include <memory> #include <random> #include "common/alignment.h" @@ -123,7 +124,7 @@ std::shared_ptr<Process> Process::Create(Core::System& system, std::string name, : kernel.CreateNewUserProcessID(); process->capabilities.InitializeForMetadatalessProcess(); - std::mt19937 rng(Settings::values.rng_seed.GetValue().value_or(0)); + std::mt19937 rng(Settings::values.rng_seed.GetValue().value_or(std::time(nullptr))); std::uniform_int_distribution<u64> distribution; std::generate(process->random_entropy.begin(), process->random_entropy.end(), [&] { return distribution(rng); }); diff --git a/src/core/hle/service/ldr/ldr.cpp b/src/core/hle/service/ldr/ldr.cpp index 9ad5bbf0d..eeaca44b6 100644 --- a/src/core/hle/service/ldr/ldr.cpp +++ b/src/core/hle/service/ldr/ldr.cpp @@ -166,7 +166,7 @@ public: {0, &RelocatableObject::LoadNro, "LoadNro"}, {1, &RelocatableObject::UnloadNro, "UnloadNro"}, {2, &RelocatableObject::LoadNrr, "LoadNrr"}, - {3, nullptr, "UnloadNrr"}, + {3, &RelocatableObject::UnloadNrr, "UnloadNrr"}, {4, &RelocatableObject::Initialize, "Initialize"}, {10, nullptr, "LoadNrrEx"}, }; @@ -272,6 +272,20 @@ public: rb.Push(RESULT_SUCCESS); } + void UnloadNrr(Kernel::HLERequestContext& ctx) { + IPC::RequestParser rp{ctx}; + const auto pid = rp.Pop<u64>(); + const auto nrr_address = rp.Pop<VAddr>(); + + LOG_DEBUG(Service_LDR, "called with pid={}, nrr_address={:016X}", pid, nrr_address); + + nrr.erase(nrr_address); + + IPC::ResponseBuilder rb{ctx, 2}; + + rb.Push(RESULT_SUCCESS); + } + bool ValidateRegionForMap(Kernel::Memory::PageTable& page_table, VAddr start, std::size_t size) const { constexpr std::size_t padding_size{4 * Kernel::Memory::PageSize}; diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp index b27ee0502..8356a8139 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp @@ -15,8 +15,9 @@ namespace Service::Nvidia::Devices { -nvhost_ctrl::nvhost_ctrl(Core::System& system, EventInterface& events_interface) - : nvdevice(system), events_interface{events_interface} {} +nvhost_ctrl::nvhost_ctrl(Core::System& system, EventInterface& events_interface, + SyncpointManager& syncpoint_manager) + : nvdevice(system), events_interface{events_interface}, syncpoint_manager{syncpoint_manager} {} nvhost_ctrl::~nvhost_ctrl() = default; u32 nvhost_ctrl::ioctl(Ioctl command, const std::vector<u8>& input, const std::vector<u8>& input2, @@ -36,8 +37,8 @@ u32 nvhost_ctrl::ioctl(Ioctl command, const std::vector<u8>& input, const std::v return IocCtrlEventRegister(input, output); case IoctlCommand::IocCtrlEventUnregisterCommand: return IocCtrlEventUnregister(input, output); - case IoctlCommand::IocCtrlEventSignalCommand: - return IocCtrlEventSignal(input, output); + case IoctlCommand::IocCtrlClearEventWaitCommand: + return IocCtrlClearEventWait(input, output); default: UNIMPLEMENTED_MSG("Unimplemented ioctl"); return 0; @@ -70,19 +71,33 @@ u32 nvhost_ctrl::IocCtrlEventWait(const std::vector<u8>& input, std::vector<u8>& return NvResult::BadParameter; } + if (syncpoint_manager.IsSyncpointExpired(params.syncpt_id, params.threshold)) { + params.value = syncpoint_manager.GetSyncpointMin(params.syncpt_id); + std::memcpy(output.data(), ¶ms, sizeof(params)); + return NvResult::Success; + } + + if (const auto new_value = syncpoint_manager.RefreshSyncpoint(params.syncpt_id); + syncpoint_manager.IsSyncpointExpired(params.syncpt_id, params.threshold)) { + params.value = new_value; + std::memcpy(output.data(), ¶ms, sizeof(params)); + return NvResult::Success; + } + auto event = events_interface.events[event_id]; auto& gpu = system.GPU(); + // This is mostly to take into account unimplemented features. As synced // gpu is always synced. if (!gpu.IsAsync()) { - event.writable->Signal(); + event.event.writable->Signal(); return NvResult::Success; } auto lock = gpu.LockSync(); - const u32 current_syncpoint_value = gpu.GetSyncpointValue(params.syncpt_id); + const u32 current_syncpoint_value = event.fence.value; const s32 diff = current_syncpoint_value - params.threshold; if (diff >= 0) { - event.writable->Signal(); + event.event.writable->Signal(); params.value = current_syncpoint_value; std::memcpy(output.data(), ¶ms, sizeof(params)); return NvResult::Success; @@ -109,7 +124,7 @@ u32 nvhost_ctrl::IocCtrlEventWait(const std::vector<u8>& input, std::vector<u8>& params.value = ((params.syncpt_id & 0xfff) << 16) | 0x10000000; } params.value |= event_id; - event.writable->Clear(); + event.event.writable->Clear(); gpu.RegisterSyncptInterrupt(params.syncpt_id, target_value); if (!is_async && ctrl.fresh_call) { ctrl.must_delay = true; @@ -154,24 +169,22 @@ u32 nvhost_ctrl::IocCtrlEventUnregister(const std::vector<u8>& input, std::vecto return NvResult::Success; } -u32 nvhost_ctrl::IocCtrlEventSignal(const std::vector<u8>& input, std::vector<u8>& output) { +u32 nvhost_ctrl::IocCtrlClearEventWait(const std::vector<u8>& input, std::vector<u8>& output) { IocCtrlEventSignalParams params{}; std::memcpy(¶ms, input.data(), sizeof(params)); - // TODO(Blinkhawk): This is normally called when an NvEvents timeout on WaitSynchronization - // It is believed from RE to cancel the GPU Event. However, better research is required - u32 event_id = params.user_event_id & 0x00FF; - LOG_WARNING(Service_NVDRV, "(STUBBED) called, user_event_id: {:X}", event_id); + + u32 event_id = params.event_id & 0x00FF; + LOG_WARNING(Service_NVDRV, "cleared event wait on, event_id: {:X}", event_id); + if (event_id >= MaxNvEvents) { return NvResult::BadParameter; } if (events_interface.status[event_id] == EventState::Waiting) { - auto& gpu = system.GPU(); - if (gpu.CancelSyncptInterrupt(events_interface.assigned_syncpt[event_id], - events_interface.assigned_value[event_id])) { - events_interface.LiberateEvent(event_id); - events_interface.events[event_id].writable->Signal(); - } + events_interface.LiberateEvent(event_id); } + + syncpoint_manager.RefreshSyncpoint(events_interface.events[event_id].fence.id); + return NvResult::Success; } diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h index 9898623de..24ad96cb9 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h +++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h @@ -14,7 +14,8 @@ namespace Service::Nvidia::Devices { class nvhost_ctrl final : public nvdevice { public: - explicit nvhost_ctrl(Core::System& system, EventInterface& events_interface); + explicit nvhost_ctrl(Core::System& system, EventInterface& events_interface, + SyncpointManager& syncpoint_manager); ~nvhost_ctrl() override; u32 ioctl(Ioctl command, const std::vector<u8>& input, const std::vector<u8>& input2, @@ -31,7 +32,7 @@ private: IocSyncptWaitexCommand = 0xC0100019, IocSyncptReadMaxCommand = 0xC008001A, IocGetConfigCommand = 0xC183001B, - IocCtrlEventSignalCommand = 0xC004001C, + IocCtrlClearEventWaitCommand = 0xC004001C, IocCtrlEventWaitCommand = 0xC010001D, IocCtrlEventWaitAsyncCommand = 0xC010001E, IocCtrlEventRegisterCommand = 0xC004001F, @@ -94,7 +95,7 @@ private: static_assert(sizeof(IocGetConfigParams) == 387, "IocGetConfigParams is incorrect size"); struct IocCtrlEventSignalParams { - u32_le user_event_id; + u32_le event_id; }; static_assert(sizeof(IocCtrlEventSignalParams) == 4, "IocCtrlEventSignalParams is incorrect size"); @@ -142,9 +143,10 @@ private: u32 IocCtrlEventUnregister(const std::vector<u8>& input, std::vector<u8>& output); - u32 IocCtrlEventSignal(const std::vector<u8>& input, std::vector<u8>& output); + u32 IocCtrlClearEventWait(const std::vector<u8>& input, std::vector<u8>& output); EventInterface& events_interface; + SyncpointManager& syncpoint_manager; }; } // namespace Service::Nvidia::Devices diff --git a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp index f1966ac0e..152019548 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp @@ -7,14 +7,20 @@ #include "common/logging/log.h" #include "core/core.h" #include "core/hle/service/nvdrv/devices/nvhost_gpu.h" +#include "core/hle/service/nvdrv/syncpoint_manager.h" #include "core/memory.h" #include "video_core/gpu.h" #include "video_core/memory_manager.h" namespace Service::Nvidia::Devices { -nvhost_gpu::nvhost_gpu(Core::System& system, std::shared_ptr<nvmap> nvmap_dev) - : nvdevice(system), nvmap_dev(std::move(nvmap_dev)) {} +nvhost_gpu::nvhost_gpu(Core::System& system, std::shared_ptr<nvmap> nvmap_dev, + SyncpointManager& syncpoint_manager) + : nvdevice(system), nvmap_dev(std::move(nvmap_dev)), syncpoint_manager{syncpoint_manager} { + channel_fence.id = syncpoint_manager.AllocateSyncpoint(); + channel_fence.value = system.GPU().GetSyncpointValue(channel_fence.id); +} + nvhost_gpu::~nvhost_gpu() = default; u32 nvhost_gpu::ioctl(Ioctl command, const std::vector<u8>& input, const std::vector<u8>& input2, @@ -126,10 +132,10 @@ u32 nvhost_gpu::AllocGPFIFOEx2(const std::vector<u8>& input, std::vector<u8>& ou params.num_entries, params.flags, params.unk0, params.unk1, params.unk2, params.unk3); - auto& gpu = system.GPU(); - params.fence_out.id = assigned_syncpoints; - params.fence_out.value = gpu.GetSyncpointValue(assigned_syncpoints); - assigned_syncpoints++; + channel_fence.value = system.GPU().GetSyncpointValue(channel_fence.id); + + params.fence_out = channel_fence; + std::memcpy(output.data(), ¶ms, output.size()); return 0; } @@ -145,39 +151,100 @@ u32 nvhost_gpu::AllocateObjectContext(const std::vector<u8>& input, std::vector< return 0; } -u32 nvhost_gpu::SubmitGPFIFO(const std::vector<u8>& input, std::vector<u8>& output) { - if (input.size() < sizeof(IoctlSubmitGpfifo)) { - UNIMPLEMENTED(); +static std::vector<Tegra::CommandHeader> BuildWaitCommandList(Fence fence) { + return { + Tegra::BuildCommandHeader(Tegra::BufferMethods::FenceValue, 1, + Tegra::SubmissionMode::Increasing), + {fence.value}, + Tegra::BuildCommandHeader(Tegra::BufferMethods::FenceAction, 1, + Tegra::SubmissionMode::Increasing), + Tegra::GPU::FenceAction::Build(Tegra::GPU::FenceOperation::Acquire, fence.id), + }; +} + +static std::vector<Tegra::CommandHeader> BuildIncrementCommandList(Fence fence, u32 add_increment) { + std::vector<Tegra::CommandHeader> result{ + Tegra::BuildCommandHeader(Tegra::BufferMethods::FenceValue, 1, + Tegra::SubmissionMode::Increasing), + {}}; + + for (u32 count = 0; count < add_increment; ++count) { + result.emplace_back(Tegra::BuildCommandHeader(Tegra::BufferMethods::FenceAction, 1, + Tegra::SubmissionMode::Increasing)); + result.emplace_back( + Tegra::GPU::FenceAction::Build(Tegra::GPU::FenceOperation::Increment, fence.id)); } - IoctlSubmitGpfifo params{}; - std::memcpy(¶ms, input.data(), sizeof(IoctlSubmitGpfifo)); + + return result; +} + +static std::vector<Tegra::CommandHeader> BuildIncrementWithWfiCommandList(Fence fence, + u32 add_increment) { + std::vector<Tegra::CommandHeader> result{ + Tegra::BuildCommandHeader(Tegra::BufferMethods::WaitForInterrupt, 1, + Tegra::SubmissionMode::Increasing), + {}}; + const std::vector<Tegra::CommandHeader> increment{ + BuildIncrementCommandList(fence, add_increment)}; + + result.insert(result.end(), increment.begin(), increment.end()); + + return result; +} + +u32 nvhost_gpu::SubmitGPFIFOImpl(IoctlSubmitGpfifo& params, std::vector<u8>& output, + Tegra::CommandList&& entries) { LOG_TRACE(Service_NVDRV, "called, gpfifo={:X}, num_entries={:X}, flags={:X}", params.address, params.num_entries, params.flags.raw); - ASSERT_MSG(input.size() == sizeof(IoctlSubmitGpfifo) + - params.num_entries * sizeof(Tegra::CommandListHeader), - "Incorrect input size"); + auto& gpu = system.GPU(); - Tegra::CommandList entries(params.num_entries); - std::memcpy(entries.data(), &input[sizeof(IoctlSubmitGpfifo)], - params.num_entries * sizeof(Tegra::CommandListHeader)); + params.fence_out.id = channel_fence.id; - UNIMPLEMENTED_IF(params.flags.add_wait.Value() != 0); - UNIMPLEMENTED_IF(params.flags.add_increment.Value() != 0); + if (params.flags.add_wait.Value() && + !syncpoint_manager.IsSyncpointExpired(params.fence_out.id, params.fence_out.value)) { + gpu.PushGPUEntries(Tegra::CommandList{BuildWaitCommandList(params.fence_out)}); + } - auto& gpu = system.GPU(); - u32 current_syncpoint_value = gpu.GetSyncpointValue(params.fence_out.id); - if (params.flags.increment.Value()) { - params.fence_out.value += current_syncpoint_value; + if (params.flags.add_increment.Value() || params.flags.increment.Value()) { + const u32 increment_value = params.flags.increment.Value() ? params.fence_out.value : 0; + params.fence_out.value = syncpoint_manager.IncreaseSyncpoint( + params.fence_out.id, params.AddIncrementValue() + increment_value); } else { - params.fence_out.value = current_syncpoint_value; + params.fence_out.value = syncpoint_manager.GetSyncpointMax(params.fence_out.id); } + + entries.RefreshIntegrityChecks(gpu); gpu.PushGPUEntries(std::move(entries)); + if (params.flags.add_increment.Value()) { + if (params.flags.suppress_wfi) { + gpu.PushGPUEntries(Tegra::CommandList{ + BuildIncrementCommandList(params.fence_out, params.AddIncrementValue())}); + } else { + gpu.PushGPUEntries(Tegra::CommandList{ + BuildIncrementWithWfiCommandList(params.fence_out, params.AddIncrementValue())}); + } + } + std::memcpy(output.data(), ¶ms, sizeof(IoctlSubmitGpfifo)); return 0; } +u32 nvhost_gpu::SubmitGPFIFO(const std::vector<u8>& input, std::vector<u8>& output) { + if (input.size() < sizeof(IoctlSubmitGpfifo)) { + UNIMPLEMENTED(); + } + IoctlSubmitGpfifo params{}; + std::memcpy(¶ms, input.data(), sizeof(IoctlSubmitGpfifo)); + + Tegra::CommandList entries(params.num_entries); + std::memcpy(entries.command_lists.data(), &input[sizeof(IoctlSubmitGpfifo)], + params.num_entries * sizeof(Tegra::CommandListHeader)); + + return SubmitGPFIFOImpl(params, output, std::move(entries)); +} + u32 nvhost_gpu::KickoffPB(const std::vector<u8>& input, std::vector<u8>& output, const std::vector<u8>& input2, IoctlVersion version) { if (input.size() < sizeof(IoctlSubmitGpfifo)) { @@ -185,31 +252,17 @@ u32 nvhost_gpu::KickoffPB(const std::vector<u8>& input, std::vector<u8>& output, } IoctlSubmitGpfifo params{}; std::memcpy(¶ms, input.data(), sizeof(IoctlSubmitGpfifo)); - LOG_TRACE(Service_NVDRV, "called, gpfifo={:X}, num_entries={:X}, flags={:X}", params.address, - params.num_entries, params.flags.raw); Tegra::CommandList entries(params.num_entries); if (version == IoctlVersion::Version2) { - std::memcpy(entries.data(), input2.data(), + std::memcpy(entries.command_lists.data(), input2.data(), params.num_entries * sizeof(Tegra::CommandListHeader)); } else { - system.Memory().ReadBlock(params.address, entries.data(), + system.Memory().ReadBlock(params.address, entries.command_lists.data(), params.num_entries * sizeof(Tegra::CommandListHeader)); } - UNIMPLEMENTED_IF(params.flags.add_wait.Value() != 0); - UNIMPLEMENTED_IF(params.flags.add_increment.Value() != 0); - - auto& gpu = system.GPU(); - u32 current_syncpoint_value = gpu.GetSyncpointValue(params.fence_out.id); - if (params.flags.increment.Value()) { - params.fence_out.value += current_syncpoint_value; - } else { - params.fence_out.value = current_syncpoint_value; - } - gpu.PushGPUEntries(std::move(entries)); - std::memcpy(output.data(), ¶ms, output.size()); - return 0; + return SubmitGPFIFOImpl(params, output, std::move(entries)); } u32 nvhost_gpu::GetWaitbase(const std::vector<u8>& input, std::vector<u8>& output) { diff --git a/src/core/hle/service/nvdrv/devices/nvhost_gpu.h b/src/core/hle/service/nvdrv/devices/nvhost_gpu.h index 2ac74743f..a252fc06d 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.h +++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.h @@ -11,6 +11,11 @@ #include "common/swap.h" #include "core/hle/service/nvdrv/devices/nvdevice.h" #include "core/hle/service/nvdrv/nvdata.h" +#include "video_core/dma_pusher.h" + +namespace Service::Nvidia { +class SyncpointManager; +} namespace Service::Nvidia::Devices { @@ -21,7 +26,8 @@ constexpr u32 NVGPU_IOCTL_CHANNEL_KICKOFF_PB(0x1b); class nvhost_gpu final : public nvdevice { public: - explicit nvhost_gpu(Core::System& system, std::shared_ptr<nvmap> nvmap_dev); + explicit nvhost_gpu(Core::System& system, std::shared_ptr<nvmap> nvmap_dev, + SyncpointManager& syncpoint_manager); ~nvhost_gpu() override; u32 ioctl(Ioctl command, const std::vector<u8>& input, const std::vector<u8>& input2, @@ -162,10 +168,15 @@ private: u32_le raw; BitField<0, 1, u32_le> add_wait; // append a wait sync_point to the list BitField<1, 1, u32_le> add_increment; // append an increment to the list - BitField<2, 1, u32_le> new_hw_format; // Mostly ignored + BitField<2, 1, u32_le> new_hw_format; // mostly ignored + BitField<4, 1, u32_le> suppress_wfi; // suppress wait for interrupt BitField<8, 1, u32_le> increment; // increment the returned fence } flags; Fence fence_out; // returned new fence object for others to wait on + + u32 AddIncrementValue() const { + return flags.add_increment.Value() << 1; + } }; static_assert(sizeof(IoctlSubmitGpfifo) == 16 + sizeof(Fence), "IoctlSubmitGpfifo is incorrect size"); @@ -190,6 +201,8 @@ private: u32 SetChannelPriority(const std::vector<u8>& input, std::vector<u8>& output); u32 AllocGPFIFOEx2(const std::vector<u8>& input, std::vector<u8>& output); u32 AllocateObjectContext(const std::vector<u8>& input, std::vector<u8>& output); + u32 SubmitGPFIFOImpl(IoctlSubmitGpfifo& params, std::vector<u8>& output, + Tegra::CommandList&& entries); u32 SubmitGPFIFO(const std::vector<u8>& input, std::vector<u8>& output); u32 KickoffPB(const std::vector<u8>& input, std::vector<u8>& output, const std::vector<u8>& input2, IoctlVersion version); @@ -198,7 +211,8 @@ private: u32 ChannelSetTimeslice(const std::vector<u8>& input, std::vector<u8>& output); std::shared_ptr<nvmap> nvmap_dev; - u32 assigned_syncpoints{}; + SyncpointManager& syncpoint_manager; + Fence channel_fence; }; } // namespace Service::Nvidia::Devices diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp index 85792495f..30f03f845 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp @@ -38,7 +38,7 @@ std::size_t WriteVectors(std::vector<u8>& dst, const std::vector<T>& src, std::s namespace NvErrCodes { constexpr u32 Success{}; -constexpr u32 OutOfMemory{static_cast<u32>(-12)}; +[[maybe_unused]] constexpr u32 OutOfMemory{static_cast<u32>(-12)}; constexpr u32 InvalidInput{static_cast<u32>(-22)}; } // namespace NvErrCodes diff --git a/src/core/hle/service/nvdrv/nvdrv.cpp b/src/core/hle/service/nvdrv/nvdrv.cpp index 803c1a984..a46755cdc 100644 --- a/src/core/hle/service/nvdrv/nvdrv.cpp +++ b/src/core/hle/service/nvdrv/nvdrv.cpp @@ -21,6 +21,7 @@ #include "core/hle/service/nvdrv/interface.h" #include "core/hle/service/nvdrv/nvdrv.h" #include "core/hle/service/nvdrv/nvmemp.h" +#include "core/hle/service/nvdrv/syncpoint_manager.h" #include "core/hle/service/nvflinger/nvflinger.h" namespace Service::Nvidia { @@ -36,21 +37,23 @@ void InstallInterfaces(SM::ServiceManager& service_manager, NVFlinger::NVFlinger nvflinger.SetNVDrvInstance(module_); } -Module::Module(Core::System& system) { +Module::Module(Core::System& system) : syncpoint_manager{system.GPU()} { auto& kernel = system.Kernel(); for (u32 i = 0; i < MaxNvEvents; i++) { std::string event_label = fmt::format("NVDRV::NvEvent_{}", i); - events_interface.events[i] = Kernel::WritableEvent::CreateEventPair(kernel, event_label); + events_interface.events[i] = {Kernel::WritableEvent::CreateEventPair(kernel, event_label)}; events_interface.status[i] = EventState::Free; events_interface.registered[i] = false; } auto nvmap_dev = std::make_shared<Devices::nvmap>(system); devices["/dev/nvhost-as-gpu"] = std::make_shared<Devices::nvhost_as_gpu>(system, nvmap_dev); - devices["/dev/nvhost-gpu"] = std::make_shared<Devices::nvhost_gpu>(system, nvmap_dev); + devices["/dev/nvhost-gpu"] = + std::make_shared<Devices::nvhost_gpu>(system, nvmap_dev, syncpoint_manager); devices["/dev/nvhost-ctrl-gpu"] = std::make_shared<Devices::nvhost_ctrl_gpu>(system); devices["/dev/nvmap"] = nvmap_dev; devices["/dev/nvdisp_disp0"] = std::make_shared<Devices::nvdisp_disp0>(system, nvmap_dev); - devices["/dev/nvhost-ctrl"] = std::make_shared<Devices::nvhost_ctrl>(system, events_interface); + devices["/dev/nvhost-ctrl"] = + std::make_shared<Devices::nvhost_ctrl>(system, events_interface, syncpoint_manager); devices["/dev/nvhost-nvdec"] = std::make_shared<Devices::nvhost_nvdec>(system, nvmap_dev); devices["/dev/nvhost-nvjpg"] = std::make_shared<Devices::nvhost_nvjpg>(system); devices["/dev/nvhost-vic"] = std::make_shared<Devices::nvhost_vic>(system, nvmap_dev); @@ -95,17 +98,17 @@ void Module::SignalSyncpt(const u32 syncpoint_id, const u32 value) { if (events_interface.assigned_syncpt[i] == syncpoint_id && events_interface.assigned_value[i] == value) { events_interface.LiberateEvent(i); - events_interface.events[i].writable->Signal(); + events_interface.events[i].event.writable->Signal(); } } } std::shared_ptr<Kernel::ReadableEvent> Module::GetEvent(const u32 event_id) const { - return events_interface.events[event_id].readable; + return events_interface.events[event_id].event.readable; } std::shared_ptr<Kernel::WritableEvent> Module::GetEventWriteable(const u32 event_id) const { - return events_interface.events[event_id].writable; + return events_interface.events[event_id].event.writable; } } // namespace Service::Nvidia diff --git a/src/core/hle/service/nvdrv/nvdrv.h b/src/core/hle/service/nvdrv/nvdrv.h index 7706a5590..f3d863dac 100644 --- a/src/core/hle/service/nvdrv/nvdrv.h +++ b/src/core/hle/service/nvdrv/nvdrv.h @@ -10,6 +10,7 @@ #include "common/common_types.h" #include "core/hle/kernel/writable_event.h" #include "core/hle/service/nvdrv/nvdata.h" +#include "core/hle/service/nvdrv/syncpoint_manager.h" #include "core/hle/service/service.h" namespace Core { @@ -22,15 +23,23 @@ class NVFlinger; namespace Service::Nvidia { +class SyncpointManager; + namespace Devices { class nvdevice; } +/// Represents an Nvidia event +struct NvEvent { + Kernel::EventPair event; + Fence fence{}; +}; + struct EventInterface { // Mask representing currently busy events u64 events_mask{}; // Each kernel event associated to an NV event - std::array<Kernel::EventPair, MaxNvEvents> events; + std::array<NvEvent, MaxNvEvents> events; // The status of the current NVEvent std::array<EventState, MaxNvEvents> status{}; // Tells if an NVEvent is registered or not @@ -119,6 +128,9 @@ public: std::shared_ptr<Kernel::WritableEvent> GetEventWriteable(u32 event_id) const; private: + /// Manages syncpoints on the host + SyncpointManager syncpoint_manager; + /// Id to use for the next open file descriptor. u32 next_fd = 1; diff --git a/src/core/hle/service/nvdrv/syncpoint_manager.cpp b/src/core/hle/service/nvdrv/syncpoint_manager.cpp new file mode 100644 index 000000000..0151a03b7 --- /dev/null +++ b/src/core/hle/service/nvdrv/syncpoint_manager.cpp @@ -0,0 +1,39 @@ +// Copyright 2020 yuzu emulator team +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/assert.h" +#include "core/hle/service/nvdrv/syncpoint_manager.h" +#include "video_core/gpu.h" + +namespace Service::Nvidia { + +SyncpointManager::SyncpointManager(Tegra::GPU& gpu) : gpu{gpu} {} + +SyncpointManager::~SyncpointManager() = default; + +u32 SyncpointManager::RefreshSyncpoint(u32 syncpoint_id) { + syncpoints[syncpoint_id].min = gpu.GetSyncpointValue(syncpoint_id); + return GetSyncpointMin(syncpoint_id); +} + +u32 SyncpointManager::AllocateSyncpoint() { + for (u32 syncpoint_id = 1; syncpoint_id < MaxSyncPoints; syncpoint_id++) { + if (!syncpoints[syncpoint_id].is_allocated) { + syncpoints[syncpoint_id].is_allocated = true; + return syncpoint_id; + } + } + UNREACHABLE_MSG("No more available syncpoints!"); + return {}; +} + +u32 SyncpointManager::IncreaseSyncpoint(u32 syncpoint_id, u32 value) { + for (u32 index = 0; index < value; ++index) { + syncpoints[syncpoint_id].max.fetch_add(1, std::memory_order_relaxed); + } + + return GetSyncpointMax(syncpoint_id); +} + +} // namespace Service::Nvidia diff --git a/src/core/hle/service/nvdrv/syncpoint_manager.h b/src/core/hle/service/nvdrv/syncpoint_manager.h new file mode 100644 index 000000000..4168b6c7e --- /dev/null +++ b/src/core/hle/service/nvdrv/syncpoint_manager.h @@ -0,0 +1,85 @@ +// Copyright 2020 yuzu emulator team +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <array> +#include <atomic> + +#include "common/common_types.h" +#include "core/hle/service/nvdrv/nvdata.h" + +namespace Tegra { +class GPU; +} + +namespace Service::Nvidia { + +class SyncpointManager final { +public: + explicit SyncpointManager(Tegra::GPU& gpu); + ~SyncpointManager(); + + /** + * Returns true if the specified syncpoint is expired for the given value. + * @param syncpoint_id Syncpoint ID to check. + * @param value Value to check against the specified syncpoint. + * @returns True if the specified syncpoint is expired for the given value, otherwise False. + */ + bool IsSyncpointExpired(u32 syncpoint_id, u32 value) const { + return (GetSyncpointMax(syncpoint_id) - value) >= (GetSyncpointMin(syncpoint_id) - value); + } + + /** + * Gets the lower bound for the specified syncpoint. + * @param syncpoint_id Syncpoint ID to get the lower bound for. + * @returns The lower bound for the specified syncpoint. + */ + u32 GetSyncpointMin(u32 syncpoint_id) const { + return syncpoints[syncpoint_id].min.load(std::memory_order_relaxed); + } + + /** + * Gets the uper bound for the specified syncpoint. + * @param syncpoint_id Syncpoint ID to get the upper bound for. + * @returns The upper bound for the specified syncpoint. + */ + u32 GetSyncpointMax(u32 syncpoint_id) const { + return syncpoints[syncpoint_id].max.load(std::memory_order_relaxed); + } + + /** + * Refreshes the minimum value for the specified syncpoint. + * @param syncpoint_id Syncpoint ID to be refreshed. + * @returns The new syncpoint minimum value. + */ + u32 RefreshSyncpoint(u32 syncpoint_id); + + /** + * Allocates a new syncoint. + * @returns The syncpoint ID for the newly allocated syncpoint. + */ + u32 AllocateSyncpoint(); + + /** + * Increases the maximum value for the specified syncpoint. + * @param syncpoint_id Syncpoint ID to be increased. + * @param value Value to increase the specified syncpoint by. + * @returns The new syncpoint maximum value. + */ + u32 IncreaseSyncpoint(u32 syncpoint_id, u32 value); + +private: + struct Syncpoint { + std::atomic<u32> min; + std::atomic<u32> max; + std::atomic<bool> is_allocated; + }; + + std::array<Syncpoint, MaxSyncPoints> syncpoints{}; + + Tegra::GPU& gpu; +}; + +} // namespace Service::Nvidia diff --git a/src/core/hle/service/nvflinger/nvflinger.cpp b/src/core/hle/service/nvflinger/nvflinger.cpp index c64673dba..44aa2bdae 100644 --- a/src/core/hle/service/nvflinger/nvflinger.cpp +++ b/src/core/hle/service/nvflinger/nvflinger.cpp @@ -242,6 +242,10 @@ void NVFlinger::Compose() { const auto& igbp_buffer = buffer->get().igbp_buffer; + if (!system.IsPoweredOn()) { + return; // We are likely shutting down + } + auto& gpu = system.GPU(); const auto& multi_fence = buffer->get().multi_fence; guard->unlock(); diff --git a/src/input_common/gcadapter/gc_adapter.cpp b/src/input_common/gcadapter/gc_adapter.cpp index c95feb0d7..b912188b6 100644 --- a/src/input_common/gcadapter/gc_adapter.cpp +++ b/src/input_common/gcadapter/gc_adapter.cpp @@ -21,26 +21,6 @@ namespace GCAdapter { -// Used to loop through and assign button in poller -constexpr std::array<PadButton, 12> PadButtonArray{ - PadButton::PAD_BUTTON_LEFT, PadButton::PAD_BUTTON_RIGHT, PadButton::PAD_BUTTON_DOWN, - PadButton::PAD_BUTTON_UP, PadButton::PAD_TRIGGER_Z, PadButton::PAD_TRIGGER_R, - PadButton::PAD_TRIGGER_L, PadButton::PAD_BUTTON_A, PadButton::PAD_BUTTON_B, - PadButton::PAD_BUTTON_X, PadButton::PAD_BUTTON_Y, PadButton::PAD_BUTTON_START, -}; - -static void PadToState(const GCPadStatus& pad, GCState& out_state) { - for (const auto& button : PadButtonArray) { - const auto button_key = static_cast<u16>(button); - const auto button_value = (pad.button & button_key) != 0; - out_state.buttons.insert_or_assign(static_cast<s32>(button_key), button_value); - } - - for (std::size_t i = 0; i < pad.axis_values.size(); ++i) { - out_state.axes.insert_or_assign(static_cast<u32>(i), pad.axis_values[i]); - } -} - Adapter::Adapter() { if (usb_adapter_handle != nullptr) { return; @@ -49,168 +29,263 @@ Adapter::Adapter() { const int init_res = libusb_init(&libusb_ctx); if (init_res == LIBUSB_SUCCESS) { - Setup(); + adapter_scan_thread = std::thread(&Adapter::AdapterScanThread, this); } else { LOG_ERROR(Input, "libusb could not be initialized. failed with error = {}", init_res); } } -GCPadStatus Adapter::GetPadStatus(std::size_t port, const std::array<u8, 37>& adapter_payload) { - GCPadStatus pad = {}; - const std::size_t offset = 1 + (9 * port); +Adapter::~Adapter() { + Reset(); +} + +void Adapter::AdapterInputThread() { + LOG_DEBUG(Input, "GC Adapter input thread started"); + s32 payload_size{}; + AdapterPayload adapter_payload{}; + + if (adapter_scan_thread.joinable()) { + adapter_scan_thread.join(); + } + + while (adapter_input_thread_running) { + libusb_interrupt_transfer(usb_adapter_handle, input_endpoint, adapter_payload.data(), + static_cast<s32>(adapter_payload.size()), &payload_size, 16); + if (IsPayloadCorrect(adapter_payload, payload_size)) { + UpdateControllers(adapter_payload); + UpdateVibrations(); + } + std::this_thread::yield(); + } - adapter_controllers_status[port] = static_cast<ControllerTypes>(adapter_payload[offset] >> 4); + if (restart_scan_thread) { + adapter_scan_thread = std::thread(&Adapter::AdapterScanThread, this); + restart_scan_thread = false; + } +} + +bool Adapter::IsPayloadCorrect(const AdapterPayload& adapter_payload, s32 payload_size) { + if (payload_size != static_cast<s32>(adapter_payload.size()) || + adapter_payload[0] != LIBUSB_DT_HID) { + LOG_DEBUG(Input, "Error reading payload (size: {}, type: {:02x})", payload_size, + adapter_payload[0]); + if (input_error_counter++ > 20) { + LOG_ERROR(Input, "GC adapter timeout, Is the adapter connected?"); + adapter_input_thread_running = false; + restart_scan_thread = true; + } + return false; + } + + input_error_counter = 0; + return true; +} + +void Adapter::UpdateControllers(const AdapterPayload& adapter_payload) { + for (std::size_t port = 0; port < pads.size(); ++port) { + const std::size_t offset = 1 + (9 * port); + const auto type = static_cast<ControllerTypes>(adapter_payload[offset] >> 4); + UpdatePadType(port, type); + if (DeviceConnected(port)) { + const u8 b1 = adapter_payload[offset + 1]; + const u8 b2 = adapter_payload[offset + 2]; + UpdateStateButtons(port, b1, b2); + UpdateStateAxes(port, adapter_payload); + if (configuring) { + UpdateYuzuSettings(port); + } + } + } +} + +void Adapter::UpdatePadType(std::size_t port, ControllerTypes pad_type) { + if (pads[port].type == pad_type) { + return; + } + // Device changed reset device and set new type + ResetDevice(port); + pads[port].type = pad_type; +} + +void Adapter::UpdateStateButtons(std::size_t port, u8 b1, u8 b2) { + if (port >= pads.size()) { + return; + } static constexpr std::array<PadButton, 8> b1_buttons{ - PadButton::PAD_BUTTON_A, PadButton::PAD_BUTTON_B, PadButton::PAD_BUTTON_X, - PadButton::PAD_BUTTON_Y, PadButton::PAD_BUTTON_LEFT, PadButton::PAD_BUTTON_RIGHT, - PadButton::PAD_BUTTON_DOWN, PadButton::PAD_BUTTON_UP, + PadButton::ButtonA, PadButton::ButtonB, PadButton::ButtonX, PadButton::ButtonY, + PadButton::ButtonLeft, PadButton::ButtonRight, PadButton::ButtonDown, PadButton::ButtonUp, }; static constexpr std::array<PadButton, 4> b2_buttons{ - PadButton::PAD_BUTTON_START, - PadButton::PAD_TRIGGER_Z, - PadButton::PAD_TRIGGER_R, - PadButton::PAD_TRIGGER_L, + PadButton::ButtonStart, + PadButton::TriggerZ, + PadButton::TriggerR, + PadButton::TriggerL, }; + pads[port].buttons = 0; + for (std::size_t i = 0; i < b1_buttons.size(); ++i) { + if ((b1 & (1U << i)) != 0) { + pads[port].buttons = + static_cast<u16>(pads[port].buttons | static_cast<u16>(b1_buttons[i])); + pads[port].last_button = b1_buttons[i]; + } + } + for (std::size_t j = 0; j < b2_buttons.size(); ++j) { + if ((b2 & (1U << j)) != 0) { + pads[port].buttons = + static_cast<u16>(pads[port].buttons | static_cast<u16>(b2_buttons[j])); + pads[port].last_button = b2_buttons[j]; + } + } +} + +void Adapter::UpdateStateAxes(std::size_t port, const AdapterPayload& adapter_payload) { + if (port >= pads.size()) { + return; + } + + const std::size_t offset = 1 + (9 * port); static constexpr std::array<PadAxes, 6> axes{ PadAxes::StickX, PadAxes::StickY, PadAxes::SubstickX, PadAxes::SubstickY, PadAxes::TriggerLeft, PadAxes::TriggerRight, }; - if (adapter_controllers_status[port] == ControllerTypes::None && !get_origin[port]) { - // Controller may have been disconnected, recalibrate if reconnected. - get_origin[port] = true; + for (const PadAxes axis : axes) { + const auto index = static_cast<std::size_t>(axis); + const u8 axis_value = adapter_payload[offset + 3 + index]; + if (pads[port].axis_origin[index] == 255) { + pads[port].axis_origin[index] = axis_value; + } + pads[port].axis_values[index] = + static_cast<s16>(axis_value - pads[port].axis_origin[index]); } +} - if (adapter_controllers_status[port] != ControllerTypes::None) { - const u8 b1 = adapter_payload[offset + 1]; - const u8 b2 = adapter_payload[offset + 2]; +void Adapter::UpdateYuzuSettings(std::size_t port) { + if (port >= pads.size()) { + return; + } - for (std::size_t i = 0; i < b1_buttons.size(); ++i) { - if ((b1 & (1U << i)) != 0) { - pad.button = static_cast<u16>(pad.button | static_cast<u16>(b1_buttons[i])); - } - } + constexpr u8 axis_threshold = 50; + GCPadStatus pad_status = {.port = port}; - for (std::size_t j = 0; j < b2_buttons.size(); ++j) { - if ((b2 & (1U << j)) != 0) { - pad.button = static_cast<u16>(pad.button | static_cast<u16>(b2_buttons[j])); - } - } - for (PadAxes axis : axes) { - const auto index = static_cast<std::size_t>(axis); - pad.axis_values[index] = adapter_payload[offset + 3 + index]; - } + if (pads[port].buttons != 0) { + pad_status.button = pads[port].last_button; + pad_queue.Push(pad_status); + } + + // Accounting for a threshold here to ensure an intentional press + for (std::size_t i = 0; i < pads[port].axis_values.size(); ++i) { + const s16 value = pads[port].axis_values[i]; - if (get_origin[port]) { - origin_status[port].axis_values = pad.axis_values; - get_origin[port] = false; + if (value > axis_threshold || value < -axis_threshold) { + pad_status.axis = static_cast<PadAxes>(i); + pad_status.axis_value = value; + pad_status.axis_threshold = axis_threshold; + pad_queue.Push(pad_status); } } - return pad; } -void Adapter::Read() { - LOG_DEBUG(Input, "GC Adapter Read() thread started"); +void Adapter::UpdateVibrations() { + // Use 8 states to keep the switching between on/off fast enough for + // a human to not notice the difference between switching from on/off + // More states = more rumble strengths = slower update time + constexpr u8 vibration_states = 8; - int payload_size; - std::array<u8, 37> adapter_payload; - std::array<GCPadStatus, 4> pads; - - while (adapter_thread_running) { - libusb_interrupt_transfer(usb_adapter_handle, input_endpoint, adapter_payload.data(), - sizeof(adapter_payload), &payload_size, 16); - - if (payload_size != sizeof(adapter_payload) || adapter_payload[0] != LIBUSB_DT_HID) { - LOG_ERROR(Input, - "Error reading payload (size: {}, type: {:02x}) Is the adapter connected?", - payload_size, adapter_payload[0]); - adapter_thread_running = false; // error reading from adapter, stop reading. - break; - } - for (std::size_t port = 0; port < pads.size(); ++port) { - pads[port] = GetPadStatus(port, adapter_payload); - if (DeviceConnected(port) && configuring) { - if (pads[port].button != 0) { - pad_queue[port].Push(pads[port]); - } + vibration_counter = (vibration_counter + 1) % vibration_states; - // Accounting for a threshold here to ensure an intentional press - for (size_t i = 0; i < pads[port].axis_values.size(); ++i) { - const u8 value = pads[port].axis_values[i]; - const u8 origin = origin_status[port].axis_values[i]; - - if (value > origin + pads[port].THRESHOLD || - value < origin - pads[port].THRESHOLD) { - pads[port].axis = static_cast<PadAxes>(i); - pads[port].axis_value = pads[port].axis_values[i]; - pad_queue[port].Push(pads[port]); - } - } - } - PadToState(pads[port], state[port]); - } - std::this_thread::yield(); + for (GCController& pad : pads) { + const bool vibrate = pad.rumble_amplitude > vibration_counter; + vibration_changed |= vibrate != pad.enable_vibration; + pad.enable_vibration = vibrate; } + SendVibrations(); } -void Adapter::Setup() { - // Initialize all controllers as unplugged - adapter_controllers_status.fill(ControllerTypes::None); - // Initialize all ports to store axis origin values - get_origin.fill(true); - - // pointer to list of connected usb devices - libusb_device** devices{}; - - // populate the list of devices, get the count - const ssize_t device_count = libusb_get_device_list(libusb_ctx, &devices); - if (device_count < 0) { - LOG_ERROR(Input, "libusb_get_device_list failed with error: {}", device_count); +void Adapter::SendVibrations() { + if (!rumble_enabled || !vibration_changed) { return; } - - if (devices != nullptr) { - for (std::size_t index = 0; index < static_cast<std::size_t>(device_count); ++index) { - if (CheckDeviceAccess(devices[index])) { - // GC Adapter found and accessible, registering it - GetGCEndpoint(devices[index]); - break; - } + s32 size{}; + constexpr u8 rumble_command = 0x11; + const u8 p1 = pads[0].enable_vibration; + const u8 p2 = pads[1].enable_vibration; + const u8 p3 = pads[2].enable_vibration; + const u8 p4 = pads[3].enable_vibration; + std::array<u8, 5> payload = {rumble_command, p1, p2, p3, p4}; + const int err = libusb_interrupt_transfer(usb_adapter_handle, output_endpoint, payload.data(), + static_cast<s32>(payload.size()), &size, 16); + if (err) { + LOG_DEBUG(Input, "Adapter libusb write failed: {}", libusb_error_name(err)); + if (output_error_counter++ > 5) { + LOG_ERROR(Input, "GC adapter output timeout, Rumble disabled"); + rumble_enabled = false; } - libusb_free_device_list(devices, 1); + return; } + output_error_counter = 0; + vibration_changed = false; } -bool Adapter::CheckDeviceAccess(libusb_device* device) { - libusb_device_descriptor desc; - const int get_descriptor_error = libusb_get_device_descriptor(device, &desc); - if (get_descriptor_error) { - // could not acquire the descriptor, no point in trying to use it. - LOG_ERROR(Input, "libusb_get_device_descriptor failed with error: {}", - get_descriptor_error); - return false; +bool Adapter::RumblePlay(std::size_t port, f32 amplitude) { + amplitude = std::clamp(amplitude, 0.0f, 1.0f); + const auto raw_amp = static_cast<u8>(amplitude * 0x8); + pads[port].rumble_amplitude = raw_amp; + + return rumble_enabled; +} + +void Adapter::AdapterScanThread() { + adapter_scan_thread_running = true; + adapter_input_thread_running = false; + if (adapter_input_thread.joinable()) { + adapter_input_thread.join(); + } + ClearLibusbHandle(); + ResetDevices(); + while (adapter_scan_thread_running && !adapter_input_thread_running) { + Setup(); + std::this_thread::sleep_for(std::chrono::seconds(1)); } +} - if (desc.idVendor != 0x057e || desc.idProduct != 0x0337) { - // This isn't the device we are looking for. - return false; +void Adapter::Setup() { + usb_adapter_handle = libusb_open_device_with_vid_pid(libusb_ctx, 0x057e, 0x0337); + + if (usb_adapter_handle == NULL) { + return; + } + if (!CheckDeviceAccess()) { + ClearLibusbHandle(); + return; } - const int open_error = libusb_open(device, &usb_adapter_handle); - if (open_error == LIBUSB_ERROR_ACCESS) { - LOG_ERROR(Input, "Yuzu can not gain access to this device: ID {:04X}:{:04X}.", - desc.idVendor, desc.idProduct); - return false; + libusb_device* device = libusb_get_device(usb_adapter_handle); + + LOG_INFO(Input, "GC adapter is now connected"); + // GC Adapter found and accessible, registering it + if (GetGCEndpoint(device)) { + adapter_scan_thread_running = false; + adapter_input_thread_running = true; + rumble_enabled = true; + input_error_counter = 0; + output_error_counter = 0; + adapter_input_thread = std::thread(&Adapter::AdapterInputThread, this); } - if (open_error) { - LOG_ERROR(Input, "libusb_open failed to open device with error = {}", open_error); - return false; +} + +bool Adapter::CheckDeviceAccess() { + // This fixes payload problems from offbrand GCAdapters + const s32 control_transfer_error = + libusb_control_transfer(usb_adapter_handle, 0x21, 11, 0x0001, 0, nullptr, 0, 1000); + if (control_transfer_error < 0) { + LOG_ERROR(Input, "libusb_control_transfer failed with error= {}", control_transfer_error); } - int kernel_driver_error = libusb_kernel_driver_active(usb_adapter_handle, 0); + s32 kernel_driver_error = libusb_kernel_driver_active(usb_adapter_handle, 0); if (kernel_driver_error == 1) { kernel_driver_error = libusb_detach_kernel_driver(usb_adapter_handle, 0); if (kernel_driver_error != 0 && kernel_driver_error != LIBUSB_ERROR_NOT_SUPPORTED) { @@ -236,13 +311,13 @@ bool Adapter::CheckDeviceAccess(libusb_device* device) { return true; } -void Adapter::GetGCEndpoint(libusb_device* device) { +bool Adapter::GetGCEndpoint(libusb_device* device) { libusb_config_descriptor* config = nullptr; const int config_descriptor_return = libusb_get_config_descriptor(device, 0, &config); if (config_descriptor_return != LIBUSB_SUCCESS) { LOG_ERROR(Input, "libusb_get_config_descriptor failed with error = {}", config_descriptor_return); - return; + return false; } for (u8 ic = 0; ic < config->bNumInterfaces; ic++) { @@ -264,31 +339,51 @@ void Adapter::GetGCEndpoint(libusb_device* device) { unsigned char clear_payload = 0x13; libusb_interrupt_transfer(usb_adapter_handle, output_endpoint, &clear_payload, sizeof(clear_payload), nullptr, 16); - - adapter_thread_running = true; - adapter_input_thread = std::thread(&Adapter::Read, this); + return true; } -Adapter::~Adapter() { - Reset(); -} +void Adapter::JoinThreads() { + restart_scan_thread = false; + adapter_input_thread_running = false; + adapter_scan_thread_running = false; -void Adapter::Reset() { - if (adapter_thread_running) { - adapter_thread_running = false; + if (adapter_scan_thread.joinable()) { + adapter_scan_thread.join(); } + if (adapter_input_thread.joinable()) { adapter_input_thread.join(); } +} - adapter_controllers_status.fill(ControllerTypes::None); - get_origin.fill(true); - +void Adapter::ClearLibusbHandle() { if (usb_adapter_handle) { libusb_release_interface(usb_adapter_handle, 1); libusb_close(usb_adapter_handle); usb_adapter_handle = nullptr; } +} + +void Adapter::ResetDevices() { + for (std::size_t i = 0; i < pads.size(); ++i) { + ResetDevice(i); + } +} + +void Adapter::ResetDevice(std::size_t port) { + pads[port].type = ControllerTypes::None; + pads[port].enable_vibration = false; + pads[port].rumble_amplitude = 0; + pads[port].buttons = 0; + pads[port].last_button = PadButton::Undefined; + pads[port].axis_values.fill(0); + pads[port].axis_origin.fill(255); +} + +void Adapter::Reset() { + JoinThreads(); + ClearLibusbHandle(); + ResetDevices(); if (libusb_ctx) { libusb_exit(libusb_ctx); @@ -297,11 +392,11 @@ void Adapter::Reset() { std::vector<Common::ParamPackage> Adapter::GetInputDevices() const { std::vector<Common::ParamPackage> devices; - for (std::size_t port = 0; port < state.size(); ++port) { + for (std::size_t port = 0; port < pads.size(); ++port) { if (!DeviceConnected(port)) { continue; } - std::string name = fmt::format("Gamecube Controller {}", port); + std::string name = fmt::format("Gamecube Controller {}", port + 1); devices.emplace_back(Common::ParamPackage{ {"class", "gcpad"}, {"display", std::move(name)}, @@ -318,18 +413,18 @@ InputCommon::ButtonMapping Adapter::GetButtonMappingForDevice( // This list also excludes any button that can't be really mapped static constexpr std::array<std::pair<Settings::NativeButton::Values, PadButton>, 12> switch_to_gcadapter_button = { - std::pair{Settings::NativeButton::A, PadButton::PAD_BUTTON_A}, - {Settings::NativeButton::B, PadButton::PAD_BUTTON_B}, - {Settings::NativeButton::X, PadButton::PAD_BUTTON_X}, - {Settings::NativeButton::Y, PadButton::PAD_BUTTON_Y}, - {Settings::NativeButton::Plus, PadButton::PAD_BUTTON_START}, - {Settings::NativeButton::DLeft, PadButton::PAD_BUTTON_LEFT}, - {Settings::NativeButton::DUp, PadButton::PAD_BUTTON_UP}, - {Settings::NativeButton::DRight, PadButton::PAD_BUTTON_RIGHT}, - {Settings::NativeButton::DDown, PadButton::PAD_BUTTON_DOWN}, - {Settings::NativeButton::SL, PadButton::PAD_TRIGGER_L}, - {Settings::NativeButton::SR, PadButton::PAD_TRIGGER_R}, - {Settings::NativeButton::R, PadButton::PAD_TRIGGER_Z}, + std::pair{Settings::NativeButton::A, PadButton::ButtonA}, + {Settings::NativeButton::B, PadButton::ButtonB}, + {Settings::NativeButton::X, PadButton::ButtonX}, + {Settings::NativeButton::Y, PadButton::ButtonY}, + {Settings::NativeButton::Plus, PadButton::ButtonStart}, + {Settings::NativeButton::DLeft, PadButton::ButtonLeft}, + {Settings::NativeButton::DUp, PadButton::ButtonUp}, + {Settings::NativeButton::DRight, PadButton::ButtonRight}, + {Settings::NativeButton::DDown, PadButton::ButtonDown}, + {Settings::NativeButton::SL, PadButton::TriggerL}, + {Settings::NativeButton::SR, PadButton::TriggerR}, + {Settings::NativeButton::R, PadButton::TriggerZ}, }; if (!params.Has("port")) { return {}; @@ -352,8 +447,10 @@ InputCommon::ButtonMapping Adapter::GetButtonMappingForDevice( for (const auto& [switch_button, gcadapter_axis] : switch_to_gcadapter_axis) { Common::ParamPackage button_params({{"engine", "gcpad"}}); button_params.Set("port", params.Get("port", 0)); - button_params.Set("button", static_cast<int>(PadButton::PAD_STICK)); - button_params.Set("axis", static_cast<int>(gcadapter_axis)); + button_params.Set("button", static_cast<s32>(PadButton::Stick)); + button_params.Set("axis", static_cast<s32>(gcadapter_axis)); + button_params.Set("threshold", 0.5f); + button_params.Set("direction", "+"); mapping.insert_or_assign(switch_button, std::move(button_params)); } return mapping; @@ -382,46 +479,33 @@ InputCommon::AnalogMapping Adapter::GetAnalogMappingForDevice( } bool Adapter::DeviceConnected(std::size_t port) const { - return adapter_controllers_status[port] != ControllerTypes::None; -} - -void Adapter::ResetDeviceType(std::size_t port) { - adapter_controllers_status[port] = ControllerTypes::None; + return pads[port].type != ControllerTypes::None; } void Adapter::BeginConfiguration() { - get_origin.fill(true); - for (auto& pq : pad_queue) { - pq.Clear(); - } + pad_queue.Clear(); configuring = true; } void Adapter::EndConfiguration() { - for (auto& pq : pad_queue) { - pq.Clear(); - } + pad_queue.Clear(); configuring = false; } -std::array<Common::SPSCQueue<GCPadStatus>, 4>& Adapter::GetPadQueue() { +Common::SPSCQueue<GCPadStatus>& Adapter::GetPadQueue() { return pad_queue; } -const std::array<Common::SPSCQueue<GCPadStatus>, 4>& Adapter::GetPadQueue() const { +const Common::SPSCQueue<GCPadStatus>& Adapter::GetPadQueue() const { return pad_queue; } -std::array<GCState, 4>& Adapter::GetPadState() { - return state; -} - -const std::array<GCState, 4>& Adapter::GetPadState() const { - return state; +GCController& Adapter::GetPadState(std::size_t port) { + return pads.at(port); } -int Adapter::GetOriginValue(u32 port, u32 axis) const { - return origin_status[port].axis_values[axis]; +const GCController& Adapter::GetPadState(std::size_t port) const { + return pads.at(port); } } // namespace GCAdapter diff --git a/src/input_common/gcadapter/gc_adapter.h b/src/input_common/gcadapter/gc_adapter.h index 4f5f3de8e..d28dcfad3 100644 --- a/src/input_common/gcadapter/gc_adapter.h +++ b/src/input_common/gcadapter/gc_adapter.h @@ -19,24 +19,23 @@ struct libusb_device_handle; namespace GCAdapter { enum class PadButton { - PAD_BUTTON_LEFT = 0x0001, - PAD_BUTTON_RIGHT = 0x0002, - PAD_BUTTON_DOWN = 0x0004, - PAD_BUTTON_UP = 0x0008, - PAD_TRIGGER_Z = 0x0010, - PAD_TRIGGER_R = 0x0020, - PAD_TRIGGER_L = 0x0040, - PAD_BUTTON_A = 0x0100, - PAD_BUTTON_B = 0x0200, - PAD_BUTTON_X = 0x0400, - PAD_BUTTON_Y = 0x0800, - PAD_BUTTON_START = 0x1000, + Undefined = 0x0000, + ButtonLeft = 0x0001, + ButtonRight = 0x0002, + ButtonDown = 0x0004, + ButtonUp = 0x0008, + TriggerZ = 0x0010, + TriggerR = 0x0020, + TriggerL = 0x0040, + ButtonA = 0x0100, + ButtonB = 0x0200, + ButtonX = 0x0400, + ButtonY = 0x0800, + ButtonStart = 0x1000, // Below is for compatibility with "AxisButton" type - PAD_STICK = 0x2000, + Stick = 0x2000, }; -extern const std::array<PadButton, 12> PadButtonArray; - enum class PadAxes : u8 { StickX, StickY, @@ -47,87 +46,122 @@ enum class PadAxes : u8 { Undefined, }; +enum class ControllerTypes { + None, + Wired, + Wireless, +}; + struct GCPadStatus { - u16 button{}; // Or-ed PAD_BUTTON_* and PAD_TRIGGER_* bits + std::size_t port{}; - std::array<u8, 6> axis_values{}; // Triggers and sticks, following indices defined in PadAxes - static constexpr u8 THRESHOLD = 50; // Threshold for axis press for polling + PadButton button{PadButton::Undefined}; // Or-ed PAD_BUTTON_* and PAD_TRIGGER_* bits - u8 port{}; PadAxes axis{PadAxes::Undefined}; - u8 axis_value{255}; + s16 axis_value{}; + u8 axis_threshold{50}; }; -struct GCState { - std::unordered_map<int, bool> buttons; - std::unordered_map<u32, u16> axes; +struct GCController { + ControllerTypes type{}; + bool enable_vibration{}; + u8 rumble_amplitude{}; + u16 buttons{}; + PadButton last_button{}; + std::array<s16, 6> axis_values{}; + std::array<u8, 6> axis_origin{}; }; -enum class ControllerTypes { None, Wired, Wireless }; - class Adapter { public: - /// Initialize the GC Adapter capture and read sequence Adapter(); - - /// Close the adapter read thread and release the adapter ~Adapter(); + + /// Request a vibration for a controlelr + bool RumblePlay(std::size_t port, f32 amplitude); + /// Used for polling void BeginConfiguration(); void EndConfiguration(); + Common::SPSCQueue<GCPadStatus>& GetPadQueue(); + const Common::SPSCQueue<GCPadStatus>& GetPadQueue() const; + + GCController& GetPadState(std::size_t port); + const GCController& GetPadState(std::size_t port) const; + + /// Returns true if there is a device connected to port + bool DeviceConnected(std::size_t port) const; + + /// Used for automapping features std::vector<Common::ParamPackage> GetInputDevices() const; InputCommon::ButtonMapping GetButtonMappingForDevice(const Common::ParamPackage& params) const; InputCommon::AnalogMapping GetAnalogMappingForDevice(const Common::ParamPackage& params) const; - /// Returns true if there is a device connected to port - bool DeviceConnected(std::size_t port) const; +private: + using AdapterPayload = std::array<u8, 37>; - std::array<Common::SPSCQueue<GCPadStatus>, 4>& GetPadQueue(); - const std::array<Common::SPSCQueue<GCPadStatus>, 4>& GetPadQueue() const; + void UpdatePadType(std::size_t port, ControllerTypes pad_type); + void UpdateControllers(const AdapterPayload& adapter_payload); + void UpdateYuzuSettings(std::size_t port); + void UpdateStateButtons(std::size_t port, u8 b1, u8 b2); + void UpdateStateAxes(std::size_t port, const AdapterPayload& adapter_payload); + void UpdateVibrations(); - std::array<GCState, 4>& GetPadState(); - const std::array<GCState, 4>& GetPadState() const; + void AdapterInputThread(); - int GetOriginValue(u32 port, u32 axis) const; + void AdapterScanThread(); -private: - GCPadStatus GetPadStatus(std::size_t port, const std::array<u8, 37>& adapter_payload); + bool IsPayloadCorrect(const AdapterPayload& adapter_payload, s32 payload_size); + + // Updates vibration state of all controllers + void SendVibrations(); + + /// For use in initialization, querying devices to find the adapter + void Setup(); - void Read(); + /// Resets status of all GC controller devices to a disconected state + void ResetDevices(); - /// Resets status of device connected to port - void ResetDeviceType(std::size_t port); + /// Resets status of device connected to a disconected state + void ResetDevice(std::size_t port); /// Returns true if we successfully gain access to GC Adapter - bool CheckDeviceAccess(libusb_device* device); + bool CheckDeviceAccess(); - /// Captures GC Adapter endpoint address, - void GetGCEndpoint(libusb_device* device); + /// Captures GC Adapter endpoint address + /// Returns true if the endpoind was set correctly + bool GetGCEndpoint(libusb_device* device); /// For shutting down, clear all data, join all threads, release usb void Reset(); - /// For use in initialization, querying devices to find the adapter - void Setup(); + // Join all threads + void JoinThreads(); + + // Release usb handles + void ClearLibusbHandle(); libusb_device_handle* usb_adapter_handle = nullptr; + std::array<GCController, 4> pads; + Common::SPSCQueue<GCPadStatus> pad_queue; std::thread adapter_input_thread; - bool adapter_thread_running; + std::thread adapter_scan_thread; + bool adapter_input_thread_running; + bool adapter_scan_thread_running; + bool restart_scan_thread; libusb_context* libusb_ctx; - u8 input_endpoint = 0; - u8 output_endpoint = 0; - - bool configuring = false; + u8 input_endpoint{0}; + u8 output_endpoint{0}; + u8 input_error_counter{0}; + u8 output_error_counter{0}; + int vibration_counter{0}; - std::array<GCState, 4> state; - std::array<bool, 4> get_origin; - std::array<GCPadStatus, 4> origin_status; - std::array<Common::SPSCQueue<GCPadStatus>, 4> pad_queue; - std::array<ControllerTypes, 4> adapter_controllers_status{}; + bool configuring{false}; + bool rumble_enabled{true}; + bool vibration_changed{true}; }; - } // namespace GCAdapter diff --git a/src/input_common/gcadapter/gc_poller.cpp b/src/input_common/gcadapter/gc_poller.cpp index 893556916..6bd6f57fc 100644 --- a/src/input_common/gcadapter/gc_poller.cpp +++ b/src/input_common/gcadapter/gc_poller.cpp @@ -15,22 +15,30 @@ namespace InputCommon { class GCButton final : public Input::ButtonDevice { public: - explicit GCButton(u32 port_, int button_, const GCAdapter::Adapter* adapter) + explicit GCButton(u32 port_, s32 button_, GCAdapter::Adapter* adapter) : port(port_), button(button_), gcadapter(adapter) {} ~GCButton() override; bool GetStatus() const override { if (gcadapter->DeviceConnected(port)) { - return gcadapter->GetPadState()[port].buttons.at(button); + return (gcadapter->GetPadState(port).buttons & button) != 0; } return false; } + bool SetRumblePlay(f32 amp_high, f32 amp_low, f32 freq_high, f32 freq_low) const override { + const float amplitude = amp_high + amp_low > 2.0f ? 1.0f : (amp_high + amp_low) * 0.5f; + const auto new_amp = + static_cast<f32>(pow(amplitude, 0.5f) * (3.0f - 2.0f * pow(amplitude, 0.15f))); + + return gcadapter->RumblePlay(port, new_amp); + } + private: const u32 port; - const int button; - const GCAdapter::Adapter* gcadapter; + const s32 button; + GCAdapter::Adapter* gcadapter; }; class GCAxisButton final : public Input::ButtonDevice { @@ -38,13 +46,12 @@ public: explicit GCAxisButton(u32 port_, u32 axis_, float threshold_, bool trigger_if_greater_, const GCAdapter::Adapter* adapter) : port(port_), axis(axis_), threshold(threshold_), trigger_if_greater(trigger_if_greater_), - gcadapter(adapter), - origin_value(static_cast<float>(adapter->GetOriginValue(port_, axis_))) {} + gcadapter(adapter) {} bool GetStatus() const override { if (gcadapter->DeviceConnected(port)) { - const float current_axis_value = gcadapter->GetPadState()[port].axes.at(axis); - const float axis_value = (current_axis_value - origin_value) / 128.0f; + const float current_axis_value = gcadapter->GetPadState(port).axis_values.at(axis); + const float axis_value = current_axis_value / 128.0f; if (trigger_if_greater) { // TODO: Might be worthwile to set a slider for the trigger threshold. It is // currently always set to 0.5 in configure_input_player.cpp ZL/ZR HandleClick @@ -61,7 +68,6 @@ private: float threshold; bool trigger_if_greater; const GCAdapter::Adapter* gcadapter; - const float origin_value; }; GCButtonFactory::GCButtonFactory(std::shared_ptr<GCAdapter::Adapter> adapter_) @@ -73,7 +79,7 @@ std::unique_ptr<Input::ButtonDevice> GCButtonFactory::Create(const Common::Param const auto button_id = params.Get("button", 0); const auto port = static_cast<u32>(params.Get("port", 0)); - constexpr int PAD_STICK_ID = static_cast<u16>(GCAdapter::PadButton::PAD_STICK); + constexpr s32 PAD_STICK_ID = static_cast<s32>(GCAdapter::PadButton::Stick); // button is not an axis/stick button if (button_id != PAD_STICK_ID) { @@ -106,32 +112,25 @@ Common::ParamPackage GCButtonFactory::GetNextInput() const { Common::ParamPackage params; GCAdapter::GCPadStatus pad; auto& queue = adapter->GetPadQueue(); - for (std::size_t port = 0; port < queue.size(); ++port) { - while (queue[port].Pop(pad)) { - // This while loop will break on the earliest detected button - params.Set("engine", "gcpad"); - params.Set("port", static_cast<int>(port)); - for (const auto& button : GCAdapter::PadButtonArray) { - const u16 button_value = static_cast<u16>(button); - if (pad.button & button_value) { - params.Set("button", button_value); - break; - } - } + while (queue.Pop(pad)) { + // This while loop will break on the earliest detected button + params.Set("engine", "gcpad"); + params.Set("port", static_cast<s32>(pad.port)); + if (pad.button != GCAdapter::PadButton::Undefined) { + params.Set("button", static_cast<u16>(pad.button)); + } - // For Axis button implementation - if (pad.axis != GCAdapter::PadAxes::Undefined) { - params.Set("axis", static_cast<u8>(pad.axis)); - params.Set("button", static_cast<u16>(GCAdapter::PadButton::PAD_STICK)); - if (pad.axis_value > 128) { - params.Set("direction", "+"); - params.Set("threshold", "0.25"); - } else { - params.Set("direction", "-"); - params.Set("threshold", "-0.25"); - } - break; + // For Axis button implementation + if (pad.axis != GCAdapter::PadAxes::Undefined) { + params.Set("axis", static_cast<u8>(pad.axis)); + params.Set("button", static_cast<u16>(GCAdapter::PadButton::Stick)); + params.Set("threshold", "0.25"); + if (pad.axis_value > 0) { + params.Set("direction", "+"); + } else { + params.Set("direction", "-"); } + break; } } return params; @@ -152,17 +151,14 @@ public: explicit GCAnalog(u32 port_, u32 axis_x_, u32 axis_y_, float deadzone_, const GCAdapter::Adapter* adapter, float range_) : port(port_), axis_x(axis_x_), axis_y(axis_y_), deadzone(deadzone_), gcadapter(adapter), - origin_value_x(static_cast<float>(adapter->GetOriginValue(port_, axis_x_))), - origin_value_y(static_cast<float>(adapter->GetOriginValue(port_, axis_y_))), range(range_) {} float GetAxis(u32 axis) const { if (gcadapter->DeviceConnected(port)) { std::lock_guard lock{mutex}; - const auto origin_value = axis % 2 == 0 ? origin_value_x : origin_value_y; const auto axis_value = - static_cast<float>(gcadapter->GetPadState()[port].axes.at(axis)); - return (axis_value - origin_value) / (100.0f * range); + static_cast<float>(gcadapter->GetPadState(port).axis_values.at(axis)); + return (axis_value) / (100.0f * range); } return 0.0f; } @@ -215,8 +211,6 @@ private: const u32 axis_y; const float deadzone; const GCAdapter::Adapter* gcadapter; - const float origin_value_x; - const float origin_value_y; const float range; mutable std::mutex mutex; }; @@ -254,26 +248,44 @@ void GCAnalogFactory::EndConfiguration() { Common::ParamPackage GCAnalogFactory::GetNextInput() { GCAdapter::GCPadStatus pad; + Common::ParamPackage params; auto& queue = adapter->GetPadQueue(); - for (std::size_t port = 0; port < queue.size(); ++port) { - while (queue[port].Pop(pad)) { - if (pad.axis == GCAdapter::PadAxes::Undefined || - std::abs((static_cast<float>(pad.axis_value) - 128.0f) / 128.0f) < 0.1f) { - continue; - } - // An analog device needs two axes, so we need to store the axis for later and wait for - // a second input event. The axes also must be from the same joystick. - const u8 axis = static_cast<u8>(pad.axis); - if (analog_x_axis == -1) { - analog_x_axis = axis; - controller_number = static_cast<int>(port); - } else if (analog_y_axis == -1 && analog_x_axis != axis && - controller_number == static_cast<int>(port)) { - analog_y_axis = axis; - } + while (queue.Pop(pad)) { + if (pad.button != GCAdapter::PadButton::Undefined) { + params.Set("engine", "gcpad"); + params.Set("port", static_cast<s32>(pad.port)); + params.Set("button", static_cast<u16>(pad.button)); + return params; + } + if (pad.axis == GCAdapter::PadAxes::Undefined || + std::abs(static_cast<float>(pad.axis_value) / 128.0f) < 0.1f) { + continue; + } + // An analog device needs two axes, so we need to store the axis for later and wait for + // a second input event. The axes also must be from the same joystick. + const u8 axis = static_cast<u8>(pad.axis); + if (axis == 0 || axis == 1) { + analog_x_axis = 0; + analog_y_axis = 1; + controller_number = static_cast<s32>(pad.port); + break; + } + if (axis == 2 || axis == 3) { + analog_x_axis = 2; + analog_y_axis = 3; + controller_number = static_cast<s32>(pad.port); + break; + } + + if (analog_x_axis == -1) { + analog_x_axis = axis; + controller_number = static_cast<s32>(pad.port); + } else if (analog_y_axis == -1 && analog_x_axis != axis && + controller_number == static_cast<s32>(pad.port)) { + analog_y_axis = axis; + break; } } - Common::ParamPackage params; if (analog_x_axis != -1 && analog_y_axis != -1) { params.Set("engine", "gcpad"); params.Set("port", controller_number); diff --git a/src/video_core/command_classes/codecs/codec.cpp b/src/video_core/command_classes/codecs/codec.cpp index 2df410be8..1adf3cd13 100644 --- a/src/video_core/command_classes/codecs/codec.cpp +++ b/src/video_core/command_classes/codecs/codec.cpp @@ -4,6 +4,7 @@ #include <cstring> #include <fstream> +#include <vector> #include "common/assert.h" #include "video_core/command_classes/codecs/codec.h" #include "video_core/command_classes/codecs/h264.h" diff --git a/src/video_core/command_classes/codecs/codec.h b/src/video_core/command_classes/codecs/codec.h index 2e56daf29..5bbe6a332 100644 --- a/src/video_core/command_classes/codecs/codec.h +++ b/src/video_core/command_classes/codecs/codec.h @@ -5,8 +5,6 @@ #pragma once #include <memory> -#include <vector> -#include "common/common_funcs.h" #include "common/common_types.h" #include "video_core/command_classes/nvdec_common.h" @@ -44,11 +42,11 @@ public: void Decode(); /// Returns most recently decoded frame - AVFrame* GetCurrentFrame(); - const AVFrame* GetCurrentFrame() const; + [[nodiscard]] AVFrame* GetCurrentFrame(); + [[nodiscard]] const AVFrame* GetCurrentFrame() const; /// Returns the value of current_codec - NvdecCommon::VideoCodec GetCurrentCodec() const; + [[nodiscard]] NvdecCommon::VideoCodec GetCurrentCodec() const; private: bool initialized{}; diff --git a/src/video_core/command_classes/codecs/h264.cpp b/src/video_core/command_classes/codecs/h264.cpp index 1a39f7b23..33e063e20 100644 --- a/src/video_core/command_classes/codecs/h264.cpp +++ b/src/video_core/command_classes/codecs/h264.cpp @@ -18,17 +18,33 @@ // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. // +#include <array> #include "common/bit_util.h" #include "video_core/command_classes/codecs/h264.h" #include "video_core/gpu.h" #include "video_core/memory_manager.h" namespace Tegra::Decoder { +namespace { +// ZigZag LUTs from libavcodec. +constexpr std::array<u8, 64> zig_zag_direct{ + 0, 1, 8, 16, 9, 2, 3, 10, 17, 24, 32, 25, 18, 11, 4, 5, 12, 19, 26, 33, 40, 48, + 41, 34, 27, 20, 13, 6, 7, 14, 21, 28, 35, 42, 49, 56, 57, 50, 43, 36, 29, 22, 15, 23, + 30, 37, 44, 51, 58, 59, 52, 45, 38, 31, 39, 46, 53, 60, 61, 54, 47, 55, 62, 63, +}; + +constexpr std::array<u8, 16> zig_zag_scan{ + 0 + 0 * 4, 1 + 0 * 4, 0 + 1 * 4, 0 + 2 * 4, 1 + 1 * 4, 2 + 0 * 4, 3 + 0 * 4, 2 + 1 * 4, + 1 + 2 * 4, 0 + 3 * 4, 1 + 3 * 4, 2 + 2 * 4, 3 + 1 * 4, 3 + 2 * 4, 2 + 3 * 4, 3 + 3 * 4, +}; +} // Anonymous namespace + H264::H264(GPU& gpu_) : gpu(gpu_) {} H264::~H264() = default; -std::vector<u8>& H264::ComposeFrameHeader(NvdecCommon::NvdecRegisters& state, bool is_first_frame) { +const std::vector<u8>& H264::ComposeFrameHeader(NvdecCommon::NvdecRegisters& state, + bool is_first_frame) { H264DecoderContext context{}; gpu.MemoryManager().ReadBlock(state.picture_info_offset, &context, sizeof(H264DecoderContext)); @@ -48,7 +64,8 @@ std::vector<u8>& H264::ComposeFrameHeader(NvdecCommon::NvdecRegisters& state, bo writer.WriteU(0, 8); writer.WriteU(31, 8); writer.WriteUe(0); - const s32 chroma_format_idc = (context.h264_parameter_set.flags >> 12) & 0x3; + const auto chroma_format_idc = + static_cast<u32>((context.h264_parameter_set.flags >> 12) & 3); writer.WriteUe(chroma_format_idc); if (chroma_format_idc == 3) { writer.WriteBit(false); @@ -59,8 +76,8 @@ std::vector<u8>& H264::ComposeFrameHeader(NvdecCommon::NvdecRegisters& state, bo writer.WriteBit(false); // QpprimeYZeroTransformBypassFlag writer.WriteBit(false); // Scaling matrix present flag - const s32 order_cnt_type = static_cast<s32>((context.h264_parameter_set.flags >> 14) & 3); - writer.WriteUe(static_cast<s32>((context.h264_parameter_set.flags >> 8) & 0xf)); + const auto order_cnt_type = static_cast<u32>((context.h264_parameter_set.flags >> 14) & 3); + writer.WriteUe(static_cast<u32>((context.h264_parameter_set.flags >> 8) & 0xf)); writer.WriteUe(order_cnt_type); if (order_cnt_type == 0) { writer.WriteUe(context.h264_parameter_set.log2_max_pic_order_cnt); @@ -100,7 +117,7 @@ std::vector<u8>& H264::ComposeFrameHeader(NvdecCommon::NvdecRegisters& state, bo writer.WriteUe(0); writer.WriteUe(0); - writer.WriteBit(context.h264_parameter_set.entropy_coding_mode_flag); + writer.WriteBit(context.h264_parameter_set.entropy_coding_mode_flag != 0); writer.WriteBit(false); writer.WriteUe(0); writer.WriteUe(context.h264_parameter_set.num_refidx_l0_default_active); @@ -172,8 +189,8 @@ void H264BitWriter::WriteSe(s32 value) { WriteExpGolombCodedInt(value); } -void H264BitWriter::WriteUe(s32 value) { - WriteExpGolombCodedUInt((u32)value); +void H264BitWriter::WriteUe(u32 value) { + WriteExpGolombCodedUInt(value); } void H264BitWriter::End() { diff --git a/src/video_core/command_classes/codecs/h264.h b/src/video_core/command_classes/codecs/h264.h index 21752dd90..273449495 100644 --- a/src/video_core/command_classes/codecs/h264.h +++ b/src/video_core/command_classes/codecs/h264.h @@ -38,7 +38,7 @@ public: /// WriteSe and WriteUe write in the Exp-Golomb-coded syntax void WriteU(s32 value, s32 value_sz); void WriteSe(s32 value); - void WriteUe(s32 value); + void WriteUe(u32 value); /// Finalize the bitstream void End(); @@ -51,26 +51,14 @@ public: void WriteScalingList(const std::vector<u8>& list, s32 start, s32 count); /// Return the bitstream as a vector. - std::vector<u8>& GetByteArray(); - const std::vector<u8>& GetByteArray() const; + [[nodiscard]] std::vector<u8>& GetByteArray(); + [[nodiscard]] const std::vector<u8>& GetByteArray() const; private: - // ZigZag LUTs from libavcodec. - static constexpr std::array<u8, 64> zig_zag_direct{ - 0, 1, 8, 16, 9, 2, 3, 10, 17, 24, 32, 25, 18, 11, 4, 5, 12, 19, 26, 33, 40, 48, - 41, 34, 27, 20, 13, 6, 7, 14, 21, 28, 35, 42, 49, 56, 57, 50, 43, 36, 29, 22, 15, 23, - 30, 37, 44, 51, 58, 59, 52, 45, 38, 31, 39, 46, 53, 60, 61, 54, 47, 55, 62, 63, - }; - - static constexpr std::array<u8, 16> zig_zag_scan{ - 0 + 0 * 4, 1 + 0 * 4, 0 + 1 * 4, 0 + 2 * 4, 1 + 1 * 4, 2 + 0 * 4, 3 + 0 * 4, 2 + 1 * 4, - 1 + 2 * 4, 0 + 3 * 4, 1 + 3 * 4, 2 + 2 * 4, 3 + 1 * 4, 3 + 2 * 4, 2 + 3 * 4, 3 + 3 * 4, - }; - void WriteBits(s32 value, s32 bit_count); void WriteExpGolombCodedInt(s32 value); void WriteExpGolombCodedUInt(u32 value); - s32 GetFreeBufferBits(); + [[nodiscard]] s32 GetFreeBufferBits(); void Flush(); s32 buffer_size{8}; @@ -86,8 +74,8 @@ public: ~H264(); /// Compose the H264 header of the frame for FFmpeg decoding - std::vector<u8>& ComposeFrameHeader(NvdecCommon::NvdecRegisters& state, - bool is_first_frame = false); + [[nodiscard]] const std::vector<u8>& ComposeFrameHeader(NvdecCommon::NvdecRegisters& state, + bool is_first_frame = false); private: struct H264ParameterSet { diff --git a/src/video_core/command_classes/codecs/vp9.cpp b/src/video_core/command_classes/codecs/vp9.cpp index d205a8f5d..ab44fdc9e 100644 --- a/src/video_core/command_classes/codecs/vp9.cpp +++ b/src/video_core/command_classes/codecs/vp9.cpp @@ -9,7 +9,7 @@ #include "video_core/memory_manager.h" namespace Tegra::Decoder { - +namespace { // Default compressed header probabilities once frame context resets constexpr Vp9EntropyProbs default_probs{ .y_mode_prob{ @@ -170,6 +170,89 @@ constexpr Vp9EntropyProbs default_probs{ .high_precision{128, 128}, }; +constexpr std::array<s32, 256> norm_lut{ + 0, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; + +constexpr std::array<s32, 254> map_lut{ + 20, 21, 22, 23, 24, 25, 0, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, + 1, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 2, 50, 51, 52, 53, 54, + 55, 56, 57, 58, 59, 60, 61, 3, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, + 73, 4, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 5, 86, 87, 88, 89, + 90, 91, 92, 93, 94, 95, 96, 97, 6, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, + 108, 109, 7, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 8, 122, 123, 124, + 125, 126, 127, 128, 129, 130, 131, 132, 133, 9, 134, 135, 136, 137, 138, 139, 140, 141, 142, + 143, 144, 145, 10, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 11, 158, 159, + 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 12, 170, 171, 172, 173, 174, 175, 176, 177, + 178, 179, 180, 181, 13, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 14, 194, + 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 15, 206, 207, 208, 209, 210, 211, 212, + 213, 214, 215, 216, 217, 16, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 17, + 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 18, 242, 243, 244, 245, 246, 247, + 248, 249, 250, 251, 252, 253, 19, +}; + +// 6.2.14 Tile size calculation + +[[nodiscard]] s32 CalcMinLog2TileCols(s32 frame_width) { + const s32 sb64_cols = (frame_width + 63) / 64; + s32 min_log2 = 0; + + while ((64 << min_log2) < sb64_cols) { + min_log2++; + } + + return min_log2; +} + +[[nodiscard]] s32 CalcMaxLog2TileCols(s32 frame_width) { + const s32 sb64_cols = (frame_width + 63) / 64; + s32 max_log2 = 1; + + while ((sb64_cols >> max_log2) >= 4) { + max_log2++; + } + + return max_log2 - 1; +} + +// Recenters probability. Based on section 6.3.6 of VP9 Specification +[[nodiscard]] s32 RecenterNonNeg(s32 new_prob, s32 old_prob) { + if (new_prob > old_prob * 2) { + return new_prob; + } + + if (new_prob >= old_prob) { + return (new_prob - old_prob) * 2; + } + + return (old_prob - new_prob) * 2 - 1; +} + +// Adjusts old_prob depending on new_prob. Based on section 6.3.5 of VP9 Specification +[[nodiscard]] s32 RemapProbability(s32 new_prob, s32 old_prob) { + new_prob--; + old_prob--; + + std::size_t index{}; + + if (old_prob * 2 <= 0xff) { + index = static_cast<std::size_t>(std::max(0, RecenterNonNeg(new_prob, old_prob) - 1)); + } else { + index = static_cast<std::size_t>( + std::max(0, RecenterNonNeg(0xff - 1 - new_prob, 0xff - 1 - old_prob) - 1)); + } + + return map_lut[index]; +} +} // Anonymous namespace + VP9::VP9(GPU& gpu) : gpu(gpu) {} VP9::~VP9() = default; @@ -207,32 +290,6 @@ void VP9::WriteProbabilityDelta(VpxRangeEncoder& writer, u8 new_prob, u8 old_pro EncodeTermSubExp(writer, delta); } -s32 VP9::RemapProbability(s32 new_prob, s32 old_prob) { - new_prob--; - old_prob--; - - std::size_t index{}; - - if (old_prob * 2 <= 0xff) { - index = static_cast<std::size_t>(std::max(0, RecenterNonNeg(new_prob, old_prob) - 1)); - } else { - index = static_cast<std::size_t>( - std::max(0, RecenterNonNeg(0xff - 1 - new_prob, 0xff - 1 - old_prob) - 1)); - } - - return map_lut[index]; -} - -s32 VP9::RecenterNonNeg(s32 new_prob, s32 old_prob) { - if (new_prob > old_prob * 2) { - return new_prob; - } else if (new_prob >= old_prob) { - return (new_prob - old_prob) * 2; - } else { - return (old_prob - new_prob) * 2 - 1; - } -} - void VP9::EncodeTermSubExp(VpxRangeEncoder& writer, s32 value) { if (WriteLessThan(writer, value, 16)) { writer.Write(value, 4); @@ -332,28 +389,6 @@ void VP9::WriteMvProbabilityUpdate(VpxRangeEncoder& writer, u8 new_prob, u8 old_ } } -s32 VP9::CalcMinLog2TileCols(s32 frame_width) { - const s32 sb64_cols = (frame_width + 63) / 64; - s32 min_log2 = 0; - - while ((64 << min_log2) < sb64_cols) { - min_log2++; - } - - return min_log2; -} - -s32 VP9::CalcMaxLog2TileCols(s32 frameWidth) { - const s32 sb64_cols = (frameWidth + 63) / 64; - s32 max_log2 = 1; - - while ((sb64_cols >> max_log2) >= 4) { - max_log2++; - } - - return max_log2 - 1; -} - Vp9PictureInfo VP9::GetVp9PictureInfo(const NvdecCommon::NvdecRegisters& state) { PictureInfo picture_info{}; gpu.MemoryManager().ReadBlock(state.picture_info_offset, &picture_info, sizeof(PictureInfo)); @@ -379,14 +414,14 @@ Vp9FrameContainer VP9::GetCurrentFrame(const NvdecCommon::NvdecRegisters& state) Vp9FrameContainer frame{}; { gpu.SyncGuestHost(); - frame.info = std::move(GetVp9PictureInfo(state)); + frame.info = GetVp9PictureInfo(state); frame.bit_stream.resize(frame.info.bitstream_size); gpu.MemoryManager().ReadBlock(state.frame_bitstream_offset, frame.bit_stream.data(), frame.info.bitstream_size); } // Buffer two frames, saving the last show frame info - if (next_next_frame.bit_stream.size() != 0) { + if (!next_next_frame.bit_stream.empty()) { Vp9FrameContainer temp{ .info = frame.info, .bit_stream = frame.bit_stream, @@ -396,15 +431,15 @@ Vp9FrameContainer VP9::GetCurrentFrame(const NvdecCommon::NvdecRegisters& state) frame.bit_stream = next_next_frame.bit_stream; next_next_frame = std::move(temp); - if (next_frame.bit_stream.size() != 0) { - Vp9FrameContainer temp{ + if (!next_frame.bit_stream.empty()) { + Vp9FrameContainer temp2{ .info = frame.info, .bit_stream = frame.bit_stream, }; next_frame.info.show_frame = frame.info.last_frame_shown; frame.info = next_frame.info; frame.bit_stream = next_frame.bit_stream; - next_frame = std::move(temp); + next_frame = std::move(temp2); } else { next_frame.info = frame.info; next_frame.bit_stream = frame.bit_stream; @@ -605,12 +640,6 @@ std::vector<u8> VP9::ComposeCompressedHeader() { writer.End(); return writer.GetBuffer(); - - const auto writer_bytearray = writer.GetBuffer(); - - std::vector<u8> compressed_header(writer_bytearray.size()); - std::memcpy(compressed_header.data(), writer_bytearray.data(), writer_bytearray.size()); - return compressed_header; } VpxBitStreamWriter VP9::ComposeUncompressedHeader() { @@ -648,7 +677,6 @@ VpxBitStreamWriter VP9::ComposeUncompressedHeader() { current_frame_info.intra_only = true; } else { - std::array<s32, 3> ref_frame_index; if (!current_frame_info.show_frame) { uncomp_writer.WriteBit(current_frame_info.intra_only); @@ -663,9 +691,9 @@ VpxBitStreamWriter VP9::ComposeUncompressedHeader() { } // Last, Golden, Altref frames - ref_frame_index = std::array<s32, 3>{0, 1, 2}; + std::array<s32, 3> ref_frame_index{0, 1, 2}; - // set when next frame is hidden + // Set when next frame is hidden // altref and golden references are swapped if (swap_next_golden) { ref_frame_index = std::array<s32, 3>{0, 2, 1}; @@ -754,17 +782,19 @@ VpxBitStreamWriter VP9::ComposeUncompressedHeader() { for (std::size_t index = 0; index < current_frame_info.ref_deltas.size(); index++) { const s8 old_deltas = loop_filter_ref_deltas[index]; const s8 new_deltas = current_frame_info.ref_deltas[index]; + const bool differing_delta = old_deltas != new_deltas; - loop_filter_delta_update |= - (update_loop_filter_ref_deltas[index] = old_deltas != new_deltas); + update_loop_filter_ref_deltas[index] = differing_delta; + loop_filter_delta_update |= differing_delta; } for (std::size_t index = 0; index < current_frame_info.mode_deltas.size(); index++) { const s8 old_deltas = loop_filter_mode_deltas[index]; const s8 new_deltas = current_frame_info.mode_deltas[index]; + const bool differing_delta = old_deltas != new_deltas; - loop_filter_delta_update |= - (update_loop_filter_mode_deltas[index] = old_deltas != new_deltas); + update_loop_filter_mode_deltas[index] = differing_delta; + loop_filter_delta_update |= differing_delta; } uncomp_writer.WriteBit(loop_filter_delta_update); @@ -824,12 +854,12 @@ VpxBitStreamWriter VP9::ComposeUncompressedHeader() { return uncomp_writer; } -std::vector<u8>& VP9::ComposeFrameHeader(NvdecCommon::NvdecRegisters& state) { +const std::vector<u8>& VP9::ComposeFrameHeader(NvdecCommon::NvdecRegisters& state) { std::vector<u8> bitstream; { Vp9FrameContainer curr_frame = GetCurrentFrame(state); current_frame_info = curr_frame.info; - bitstream = curr_frame.bit_stream; + bitstream = std::move(curr_frame.bit_stream); } // The uncompressed header routine sets PrevProb parameters needed for the compressed header diff --git a/src/video_core/command_classes/codecs/vp9.h b/src/video_core/command_classes/codecs/vp9.h index 748e11bae..e2504512c 100644 --- a/src/video_core/command_classes/codecs/vp9.h +++ b/src/video_core/command_classes/codecs/vp9.h @@ -4,9 +4,9 @@ #pragma once -#include <unordered_map> +#include <array> #include <vector> -#include "common/common_funcs.h" + #include "common/common_types.h" #include "common/stream.h" #include "video_core/command_classes/codecs/vp9_types.h" @@ -25,6 +25,12 @@ public: VpxRangeEncoder(); ~VpxRangeEncoder(); + VpxRangeEncoder(const VpxRangeEncoder&) = delete; + VpxRangeEncoder& operator=(const VpxRangeEncoder&) = delete; + + VpxRangeEncoder(VpxRangeEncoder&&) = default; + VpxRangeEncoder& operator=(VpxRangeEncoder&&) = default; + /// Writes the rightmost value_size bits from value into the stream void Write(s32 value, s32 value_size); @@ -37,11 +43,11 @@ public: /// Signal the end of the bitstream void End(); - std::vector<u8>& GetBuffer() { + [[nodiscard]] std::vector<u8>& GetBuffer() { return base_stream.GetBuffer(); } - const std::vector<u8>& GetBuffer() const { + [[nodiscard]] const std::vector<u8>& GetBuffer() const { return base_stream.GetBuffer(); } @@ -52,17 +58,6 @@ private: u32 range{0xff}; s32 count{-24}; s32 half_probability{128}; - static constexpr std::array<s32, 256> norm_lut{ - 0, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - }; }; class VpxBitStreamWriter { @@ -70,6 +65,12 @@ public: VpxBitStreamWriter(); ~VpxBitStreamWriter(); + VpxBitStreamWriter(const VpxBitStreamWriter&) = delete; + VpxBitStreamWriter& operator=(const VpxBitStreamWriter&) = delete; + + VpxBitStreamWriter(VpxBitStreamWriter&&) = default; + VpxBitStreamWriter& operator=(VpxBitStreamWriter&&) = default; + /// Write an unsigned integer value void WriteU(u32 value, u32 value_size); @@ -86,10 +87,10 @@ public: void Flush(); /// Returns byte_array - std::vector<u8>& GetByteArray(); + [[nodiscard]] std::vector<u8>& GetByteArray(); /// Returns const byte_array - const std::vector<u8>& GetByteArray() const; + [[nodiscard]] const std::vector<u8>& GetByteArray() const; private: /// Write bit_count bits from value into buffer @@ -110,12 +111,18 @@ public: explicit VP9(GPU& gpu); ~VP9(); + VP9(const VP9&) = delete; + VP9& operator=(const VP9&) = delete; + + VP9(VP9&&) = default; + VP9& operator=(VP9&&) = delete; + /// Composes the VP9 frame from the GPU state information. Based on the official VP9 spec /// documentation - std::vector<u8>& ComposeFrameHeader(NvdecCommon::NvdecRegisters& state); + [[nodiscard]] const std::vector<u8>& ComposeFrameHeader(NvdecCommon::NvdecRegisters& state); /// Returns true if the most recent frame was a hidden frame. - bool WasFrameHidden() const { + [[nodiscard]] bool WasFrameHidden() const { return hidden; } @@ -132,12 +139,6 @@ private: /// Generates compressed header probability deltas in the bitstream writer void WriteProbabilityDelta(VpxRangeEncoder& writer, u8 new_prob, u8 old_prob); - /// Adjusts old_prob depending on new_prob. Based on section 6.3.5 of VP9 Specification - s32 RemapProbability(s32 new_prob, s32 old_prob); - - /// Recenters probability. Based on section 6.3.6 of VP9 Specification - s32 RecenterNonNeg(s32 new_prob, s32 old_prob); - /// Inverse of 6.3.4 Decode term subexp void EncodeTermSubExp(VpxRangeEncoder& writer, s32 value); @@ -157,22 +158,18 @@ private: /// Write motion vector probability updates. 6.3.17 in the spec void WriteMvProbabilityUpdate(VpxRangeEncoder& writer, u8 new_prob, u8 old_prob); - /// 6.2.14 Tile size calculation - s32 CalcMinLog2TileCols(s32 frame_width); - s32 CalcMaxLog2TileCols(s32 frame_width); - /// Returns VP9 information from NVDEC provided offset and size - Vp9PictureInfo GetVp9PictureInfo(const NvdecCommon::NvdecRegisters& state); + [[nodiscard]] Vp9PictureInfo GetVp9PictureInfo(const NvdecCommon::NvdecRegisters& state); /// Read and convert NVDEC provided entropy probs to Vp9EntropyProbs struct void InsertEntropy(u64 offset, Vp9EntropyProbs& dst); /// Returns frame to be decoded after buffering - Vp9FrameContainer GetCurrentFrame(const NvdecCommon::NvdecRegisters& state); + [[nodiscard]] Vp9FrameContainer GetCurrentFrame(const NvdecCommon::NvdecRegisters& state); /// Use NVDEC providied information to compose the headers for the current frame - std::vector<u8> ComposeCompressedHeader(); - VpxBitStreamWriter ComposeUncompressedHeader(); + [[nodiscard]] std::vector<u8> ComposeCompressedHeader(); + [[nodiscard]] VpxBitStreamWriter ComposeUncompressedHeader(); GPU& gpu; std::vector<u8> frame; @@ -180,7 +177,7 @@ private: std::array<s8, 4> loop_filter_ref_deltas{}; std::array<s8, 2> loop_filter_mode_deltas{}; - bool hidden; + bool hidden = false; s64 current_frame_number = -2; // since we buffer 2 frames s32 grace_period = 6; // frame offsets need to stabilize std::array<FrameContexts, 4> frame_ctxs{}; @@ -193,23 +190,6 @@ private: s32 diff_update_probability = 252; s32 frame_sync_code = 0x498342; - static constexpr std::array<s32, 254> map_lut = { - 20, 21, 22, 23, 24, 25, 0, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, - 36, 37, 1, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 2, 50, - 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 3, 62, 63, 64, 65, 66, - 67, 68, 69, 70, 71, 72, 73, 4, 74, 75, 76, 77, 78, 79, 80, 81, 82, - 83, 84, 85, 5, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 6, - 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 7, 110, 111, 112, 113, - 114, 115, 116, 117, 118, 119, 120, 121, 8, 122, 123, 124, 125, 126, 127, 128, 129, - 130, 131, 132, 133, 9, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, - 10, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 11, 158, 159, 160, - 161, 162, 163, 164, 165, 166, 167, 168, 169, 12, 170, 171, 172, 173, 174, 175, 176, - 177, 178, 179, 180, 181, 13, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, - 193, 14, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 15, 206, 207, - 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 16, 218, 219, 220, 221, 222, 223, - 224, 225, 226, 227, 228, 229, 17, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, - 240, 241, 18, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 19, - }; }; } // namespace Decoder diff --git a/src/video_core/command_classes/codecs/vp9_types.h b/src/video_core/command_classes/codecs/vp9_types.h index 8688fdac0..4f0b05d22 100644 --- a/src/video_core/command_classes/codecs/vp9_types.h +++ b/src/video_core/command_classes/codecs/vp9_types.h @@ -4,13 +4,11 @@ #pragma once -#include <algorithm> -#include <list> +#include <array> +#include <cstring> #include <vector> -#include "common/cityhash.h" #include "common/common_funcs.h" #include "common/common_types.h" -#include "video_core/command_classes/nvdec_common.h" namespace Tegra { class GPU; @@ -233,9 +231,8 @@ struct PictureInfo { u32 surface_params{}; INSERT_PADDING_WORDS(3); - Vp9PictureInfo Convert() const { - - return Vp9PictureInfo{ + [[nodiscard]] Vp9PictureInfo Convert() const { + return { .is_key_frame = (vp9_flags & FrameFlags::IsKeyFrame) != 0, .intra_only = (vp9_flags & FrameFlags::IntraOnly) != 0, .last_frame_was_key = (vp9_flags & FrameFlags::LastFrameIsKeyFrame) != 0, diff --git a/src/video_core/command_classes/host1x.cpp b/src/video_core/command_classes/host1x.cpp index a5234ee47..c4dd4881a 100644 --- a/src/video_core/command_classes/host1x.cpp +++ b/src/video_core/command_classes/host1x.cpp @@ -15,7 +15,7 @@ void Tegra::Host1x::StateWrite(u32 offset, u32 arguments) { std::memcpy(state_offset, &arguments, sizeof(u32)); } -void Tegra::Host1x::ProcessMethod(Host1x::Method method, const std::vector<u32>& arguments) { +void Tegra::Host1x::ProcessMethod(Method method, const std::vector<u32>& arguments) { StateWrite(static_cast<u32>(method), arguments[0]); switch (method) { case Method::WaitSyncpt: diff --git a/src/video_core/command_classes/host1x.h b/src/video_core/command_classes/host1x.h index 501a5ed2e..013eaa0c1 100644 --- a/src/video_core/command_classes/host1x.h +++ b/src/video_core/command_classes/host1x.h @@ -61,7 +61,7 @@ public: ~Host1x(); /// Writes the method into the state, Invoke Execute() if encountered - void ProcessMethod(Host1x::Method method, const std::vector<u32>& arguments); + void ProcessMethod(Method method, const std::vector<u32>& arguments); private: /// For Host1x, execute is waiting on a syncpoint previously written into the state diff --git a/src/video_core/command_classes/nvdec.cpp b/src/video_core/command_classes/nvdec.cpp index ede9466eb..8ca7a7b06 100644 --- a/src/video_core/command_classes/nvdec.cpp +++ b/src/video_core/command_classes/nvdec.cpp @@ -2,13 +2,9 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include <bitset> #include "common/assert.h" -#include "common/bit_util.h" -#include "core/memory.h" #include "video_core/command_classes/nvdec.h" #include "video_core/gpu.h" -#include "video_core/memory_manager.h" namespace Tegra { @@ -16,7 +12,7 @@ Nvdec::Nvdec(GPU& gpu_) : gpu(gpu_), codec(std::make_unique<Codec>(gpu)) {} Nvdec::~Nvdec() = default; -void Nvdec::ProcessMethod(Nvdec::Method method, const std::vector<u32>& arguments) { +void Nvdec::ProcessMethod(Method method, const std::vector<u32>& arguments) { if (method == Method::SetVideoCodec) { codec->StateWrite(static_cast<u32>(method), arguments[0]); } else { diff --git a/src/video_core/command_classes/nvdec.h b/src/video_core/command_classes/nvdec.h index c1a9d843e..eec4443f9 100644 --- a/src/video_core/command_classes/nvdec.h +++ b/src/video_core/command_classes/nvdec.h @@ -4,8 +4,8 @@ #pragma once +#include <memory> #include <vector> -#include "common/common_funcs.h" #include "common/common_types.h" #include "video_core/command_classes/codecs/codec.h" @@ -23,17 +23,17 @@ public: ~Nvdec(); /// Writes the method into the state, Invoke Execute() if encountered - void ProcessMethod(Nvdec::Method method, const std::vector<u32>& arguments); + void ProcessMethod(Method method, const std::vector<u32>& arguments); /// Return most recently decoded frame - AVFrame* GetFrame(); - const AVFrame* GetFrame() const; + [[nodiscard]] AVFrame* GetFrame(); + [[nodiscard]] const AVFrame* GetFrame() const; private: /// Invoke codec to decode a frame void Execute(); GPU& gpu; - std::unique_ptr<Tegra::Codec> codec; + std::unique_ptr<Codec> codec; }; } // namespace Tegra diff --git a/src/video_core/command_classes/sync_manager.cpp b/src/video_core/command_classes/sync_manager.cpp index a0ab44855..19dc9e0ab 100644 --- a/src/video_core/command_classes/sync_manager.cpp +++ b/src/video_core/command_classes/sync_manager.cpp @@ -27,22 +27,22 @@ SyncptIncrManager::SyncptIncrManager(GPU& gpu_) : gpu(gpu_) {} SyncptIncrManager::~SyncptIncrManager() = default; void SyncptIncrManager::Increment(u32 id) { - increments.push_back(SyncptIncr{0, id, true}); + increments.emplace_back(0, 0, id, true); IncrementAllDone(); } u32 SyncptIncrManager::IncrementWhenDone(u32 class_id, u32 id) { const u32 handle = current_id++; - increments.push_back(SyncptIncr{handle, class_id, id}); + increments.emplace_back(handle, class_id, id); return handle; } void SyncptIncrManager::SignalDone(u32 handle) { - auto done_incr = std::find_if(increments.begin(), increments.end(), - [handle](SyncptIncr incr) { return incr.id == handle; }); - if (done_incr != increments.end()) { - const SyncptIncr incr = *done_incr; - *done_incr = SyncptIncr{incr.id, incr.class_id, incr.syncpt_id, true}; + const auto done_incr = + std::find_if(increments.begin(), increments.end(), + [handle](const SyncptIncr& incr) { return incr.id == handle; }); + if (done_incr != increments.cend()) { + done_incr->complete = true; } IncrementAllDone(); } diff --git a/src/video_core/command_classes/sync_manager.h b/src/video_core/command_classes/sync_manager.h index 353b67573..2c321ec58 100644 --- a/src/video_core/command_classes/sync_manager.h +++ b/src/video_core/command_classes/sync_manager.h @@ -32,8 +32,8 @@ struct SyncptIncr { u32 syncpt_id; bool complete; - SyncptIncr(u32 id, u32 syncpt_id_, u32 class_id_, bool done = false) - : id(id), class_id(class_id_), syncpt_id(syncpt_id_), complete(done) {} + SyncptIncr(u32 id_, u32 class_id_, u32 syncpt_id_, bool done = false) + : id(id_), class_id(class_id_), syncpt_id(syncpt_id_), complete(done) {} }; class SyncptIncrManager { diff --git a/src/video_core/command_classes/vic.cpp b/src/video_core/command_classes/vic.cpp index 66e15a1a8..5b52da277 100644 --- a/src/video_core/command_classes/vic.cpp +++ b/src/video_core/command_classes/vic.cpp @@ -26,7 +26,7 @@ void Vic::VicStateWrite(u32 offset, u32 arguments) { std::memcpy(state_offset, &arguments, sizeof(u32)); } -void Vic::ProcessMethod(Vic::Method method, const std::vector<u32>& arguments) { +void Vic::ProcessMethod(Method method, const std::vector<u32>& arguments) { LOG_DEBUG(HW_GPU, "Vic method 0x{:X}", static_cast<u32>(method)); VicStateWrite(static_cast<u32>(method), arguments[0]); const u64 arg = static_cast<u64>(arguments[0]) << 8; diff --git a/src/video_core/command_classes/vic.h b/src/video_core/command_classes/vic.h index dd0a2aed8..8c4e284a1 100644 --- a/src/video_core/command_classes/vic.h +++ b/src/video_core/command_classes/vic.h @@ -63,11 +63,11 @@ public: SetOutputSurfaceChromaVOffset = 0x1ca }; - explicit Vic(GPU& gpu, std::shared_ptr<Tegra::Nvdec> nvdec_processor); + explicit Vic(GPU& gpu, std::shared_ptr<Nvdec> nvdec_processor); ~Vic(); /// Write to the device state. - void ProcessMethod(Vic::Method method, const std::vector<u32>& arguments); + void ProcessMethod(Method method, const std::vector<u32>& arguments); private: void Execute(); diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp index f2f96ac33..105b85a92 100644 --- a/src/video_core/dma_pusher.cpp +++ b/src/video_core/dma_pusher.cpp @@ -2,6 +2,7 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include "common/cityhash.h" #include "common/microprofile.h" #include "core/core.h" #include "core/memory.h" @@ -12,6 +13,20 @@ namespace Tegra { +void CommandList::RefreshIntegrityChecks(GPU& gpu) { + command_list_hashes.resize(command_lists.size()); + + for (std::size_t index = 0; index < command_lists.size(); ++index) { + const CommandListHeader command_list_header = command_lists[index]; + std::vector<CommandHeader> command_headers(command_list_header.size); + gpu.MemoryManager().ReadBlockUnsafe(command_list_header.addr, command_headers.data(), + command_list_header.size * sizeof(u32)); + command_list_hashes[index] = + Common::CityHash64(reinterpret_cast<char*>(command_headers.data()), + command_list_header.size * sizeof(u32)); + } +} + DmaPusher::DmaPusher(Core::System& system, GPU& gpu) : gpu{gpu}, system{system} {} DmaPusher::~DmaPusher() = default; @@ -45,32 +60,51 @@ bool DmaPusher::Step() { return false; } - const CommandList& command_list{dma_pushbuffer.front()}; - ASSERT_OR_EXECUTE(!command_list.empty(), { - // Somehow the command_list is empty, in order to avoid a crash - // We ignore it and assume its size is 0. - dma_pushbuffer.pop(); - dma_pushbuffer_subindex = 0; - return true; - }); - const CommandListHeader command_list_header{command_list[dma_pushbuffer_subindex++]}; - const GPUVAddr dma_get = command_list_header.addr; - - if (dma_pushbuffer_subindex >= command_list.size()) { - // We've gone through the current list, remove it from the queue - dma_pushbuffer.pop(); - dma_pushbuffer_subindex = 0; - } + CommandList& command_list{dma_pushbuffer.front()}; - if (command_list_header.size == 0) { - return true; - } + ASSERT_OR_EXECUTE( + command_list.command_lists.size() || command_list.prefetch_command_list.size(), { + // Somehow the command_list is empty, in order to avoid a crash + // We ignore it and assume its size is 0. + dma_pushbuffer.pop(); + dma_pushbuffer_subindex = 0; + return true; + }); - // Push buffer non-empty, read a word - command_headers.resize(command_list_header.size); - gpu.MemoryManager().ReadBlockUnsafe(dma_get, command_headers.data(), - command_list_header.size * sizeof(u32)); + if (command_list.prefetch_command_list.size()) { + // Prefetched command list from nvdrv, used for things like synchronization + command_headers = std::move(command_list.prefetch_command_list); + dma_pushbuffer.pop(); + } else { + const CommandListHeader command_list_header{ + command_list.command_lists[dma_pushbuffer_subindex]}; + const u64 next_hash = command_list.command_list_hashes[dma_pushbuffer_subindex++]; + const GPUVAddr dma_get = command_list_header.addr; + + if (dma_pushbuffer_subindex >= command_list.command_lists.size()) { + // We've gone through the current list, remove it from the queue + dma_pushbuffer.pop(); + dma_pushbuffer_subindex = 0; + } + if (command_list_header.size == 0) { + return true; + } + + // Push buffer non-empty, read a word + command_headers.resize(command_list_header.size); + gpu.MemoryManager().ReadBlockUnsafe(dma_get, command_headers.data(), + command_list_header.size * sizeof(u32)); + + // Integrity check + const u64 new_hash = Common::CityHash64(reinterpret_cast<char*>(command_headers.data()), + command_list_header.size * sizeof(u32)); + if (new_hash != next_hash) { + LOG_CRITICAL(HW_GPU, "CommandList at addr=0x{:X} is corrupt, skipping!", dma_get); + dma_pushbuffer.pop(); + return true; + } + } for (std::size_t index = 0; index < command_headers.size();) { const CommandHeader& command_header = command_headers[index]; diff --git a/src/video_core/dma_pusher.h b/src/video_core/dma_pusher.h index efa90d170..8496ba2da 100644 --- a/src/video_core/dma_pusher.h +++ b/src/video_core/dma_pusher.h @@ -27,6 +27,31 @@ enum class SubmissionMode : u32 { IncreaseOnce = 5 }; +// Note that, traditionally, methods are treated as 4-byte addressable locations, and hence +// their numbers are written down multiplied by 4 in Docs. Here we are not multiply by 4. +// So the values you see in docs might be multiplied by 4. +enum class BufferMethods : u32 { + BindObject = 0x0, + Nop = 0x2, + SemaphoreAddressHigh = 0x4, + SemaphoreAddressLow = 0x5, + SemaphoreSequence = 0x6, + SemaphoreTrigger = 0x7, + NotifyIntr = 0x8, + WrcacheFlush = 0x9, + Unk28 = 0xA, + UnkCacheFlush = 0xB, + RefCnt = 0x14, + SemaphoreAcquire = 0x1A, + SemaphoreRelease = 0x1B, + FenceValue = 0x1C, + FenceAction = 0x1D, + WaitForInterrupt = 0x1E, + Unk7c = 0x1F, + Yield = 0x20, + NonPullerMethods = 0x40, +}; + struct CommandListHeader { union { u64 raw; @@ -49,9 +74,29 @@ union CommandHeader { static_assert(std::is_standard_layout_v<CommandHeader>, "CommandHeader is not standard layout"); static_assert(sizeof(CommandHeader) == sizeof(u32), "CommandHeader has incorrect size!"); +static constexpr CommandHeader BuildCommandHeader(BufferMethods method, u32 arg_count, + SubmissionMode mode) { + CommandHeader result{}; + result.method.Assign(static_cast<u32>(method)); + result.arg_count.Assign(arg_count); + result.mode.Assign(mode); + return result; +} + class GPU; -using CommandList = std::vector<Tegra::CommandListHeader>; +struct CommandList final { + CommandList() = default; + explicit CommandList(std::size_t size) : command_lists(size) {} + explicit CommandList(std::vector<Tegra::CommandHeader>&& prefetch_command_list) + : prefetch_command_list{std::move(prefetch_command_list)} {} + + void RefreshIntegrityChecks(GPU& gpu); + + std::vector<Tegra::CommandListHeader> command_lists; + std::vector<u64> command_list_hashes; + std::vector<Tegra::CommandHeader> prefetch_command_list; +}; /** * The DmaPusher class implements DMA submission to FIFOs, providing an area of memory that the @@ -60,7 +105,7 @@ using CommandList = std::vector<Tegra::CommandListHeader>; * See https://envytools.readthedocs.io/en/latest/hw/fifo/dma-pusher.html#fifo-dma-pusher for * details on this implementation. */ -class DmaPusher { +class DmaPusher final { public: explicit DmaPusher(Core::System& system, GPU& gpu); ~DmaPusher(); diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index d374b73cf..a3c05d1b0 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -1893,6 +1893,7 @@ public: ICMP_IMM, FCMP_RR, FCMP_RC, + FCMP_IMMR, MUFU, // Multi-Function Operator RRO_C, // Range Reduction Operator RRO_R, @@ -2205,6 +2206,7 @@ private: INST("0111110-0-------", Id::HSET2_IMM, Type::HalfSet, "HSET2_IMM"), INST("010110111010----", Id::FCMP_RR, Type::Arithmetic, "FCMP_RR"), INST("010010111010----", Id::FCMP_RC, Type::Arithmetic, "FCMP_RC"), + INST("0011011-1010----", Id::FCMP_IMMR, Type::Arithmetic, "FCMP_IMMR"), INST("0101000010000---", Id::MUFU, Type::Arithmetic, "MUFU"), INST("0100110010010---", Id::RRO_C, Type::Arithmetic, "RRO_C"), INST("0101110010010---", Id::RRO_R, Type::Arithmetic, "RRO_R"), diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 171f78183..ebd149c3a 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -194,30 +194,6 @@ void GPU::SyncGuestHost() { void GPU::OnCommandListEnd() { renderer->Rasterizer().ReleaseFences(); } -// Note that, traditionally, methods are treated as 4-byte addressable locations, and hence -// their numbers are written down multiplied by 4 in Docs. Here we are not multiply by 4. -// So the values you see in docs might be multiplied by 4. -enum class BufferMethods { - BindObject = 0x0, - Nop = 0x2, - SemaphoreAddressHigh = 0x4, - SemaphoreAddressLow = 0x5, - SemaphoreSequence = 0x6, - SemaphoreTrigger = 0x7, - NotifyIntr = 0x8, - WrcacheFlush = 0x9, - Unk28 = 0xA, - UnkCacheFlush = 0xB, - RefCnt = 0x14, - SemaphoreAcquire = 0x1A, - SemaphoreRelease = 0x1B, - FenceValue = 0x1C, - FenceAction = 0x1D, - Unk78 = 0x1E, - Unk7c = 0x1F, - Yield = 0x20, - NonPullerMethods = 0x40, -}; enum class GpuSemaphoreOperation { AcquireEqual = 0x1, @@ -277,7 +253,12 @@ void GPU::CallPullerMethod(const MethodCall& method_call) { case BufferMethods::UnkCacheFlush: case BufferMethods::WrcacheFlush: case BufferMethods::FenceValue: + break; case BufferMethods::FenceAction: + ProcessFenceActionMethod(); + break; + case BufferMethods::WaitForInterrupt: + ProcessWaitForInterruptMethod(); break; case BufferMethods::SemaphoreTrigger: { ProcessSemaphoreTriggerMethod(); @@ -391,6 +372,25 @@ void GPU::ProcessBindMethod(const MethodCall& method_call) { } } +void GPU::ProcessFenceActionMethod() { + switch (regs.fence_action.op) { + case FenceOperation::Acquire: + WaitFence(regs.fence_action.syncpoint_id, regs.fence_value); + break; + case FenceOperation::Increment: + IncrementSyncPoint(regs.fence_action.syncpoint_id); + break; + default: + UNIMPLEMENTED_MSG("Unimplemented operation {}", + static_cast<u32>(regs.fence_action.op.Value())); + } +} + +void GPU::ProcessWaitForInterruptMethod() { + // TODO(bunnei) ImplementMe + LOG_WARNING(HW_GPU, "(STUBBED) called"); +} + void GPU::ProcessSemaphoreTriggerMethod() { const auto semaphoreOperationMask = 0xF; const auto op = diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index b8c613b11..5444b49f3 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h @@ -263,6 +263,24 @@ public: return use_nvdec; } + enum class FenceOperation : u32 { + Acquire = 0, + Increment = 1, + }; + + union FenceAction { + u32 raw; + BitField<0, 1, FenceOperation> op; + BitField<8, 24, u32> syncpoint_id; + + static constexpr CommandHeader Build(FenceOperation op, u32 syncpoint_id) { + FenceAction result{}; + result.op.Assign(op); + result.syncpoint_id.Assign(syncpoint_id); + return {result.raw}; + } + }; + struct Regs { static constexpr size_t NUM_REGS = 0x40; @@ -291,10 +309,7 @@ public: u32 semaphore_acquire; u32 semaphore_release; u32 fence_value; - union { - BitField<4, 4, u32> operation; - BitField<8, 8, u32> id; - } fence_action; + FenceAction fence_action; INSERT_UNION_PADDING_WORDS(0xE2); // Puller state @@ -342,6 +357,8 @@ protected: private: void ProcessBindMethod(const MethodCall& method_call); + void ProcessFenceActionMethod(); + void ProcessWaitForInterruptMethod(); void ProcessSemaphoreTriggerMethod(); void ProcessSemaphoreRelease(); void ProcessSemaphoreAcquire(); diff --git a/src/video_core/shader/async_shaders.cpp b/src/video_core/shader/async_shaders.cpp index aabd62c5c..39cc3b869 100644 --- a/src/video_core/shader/async_shaders.cpp +++ b/src/video_core/shader/async_shaders.cpp @@ -20,14 +20,15 @@ AsyncShaders::~AsyncShaders() { } void AsyncShaders::AllocateWorkers() { - // Max worker threads we should allow - constexpr u32 MAX_THREADS = 4; - // Deduce how many threads we can use - const u32 threads_used = std::thread::hardware_concurrency() / 4; - // Always allow at least 1 thread regardless of our settings - const auto max_worker_count = std::max(1U, threads_used); - // Don't use more than MAX_THREADS - const auto num_workers = std::min(max_worker_count, MAX_THREADS); + // Use at least one thread + u32 num_workers = 1; + + // Deduce how many more threads we can use + const u32 thread_count = std::thread::hardware_concurrency(); + if (thread_count >= 8) { + // Increase async workers by 1 for every 2 threads >= 8 + num_workers += 1 + (thread_count - 8) / 2; + } // If we already have workers queued, ignore if (num_workers == worker_threads.size()) { diff --git a/src/video_core/shader/decode/arithmetic.cpp b/src/video_core/shader/decode/arithmetic.cpp index 4db329fa5..afef5948d 100644 --- a/src/video_core/shader/decode/arithmetic.cpp +++ b/src/video_core/shader/decode/arithmetic.cpp @@ -137,7 +137,8 @@ u32 ShaderIR::DecodeArithmetic(NodeBlock& bb, u32 pc) { break; } case OpCode::Id::FCMP_RR: - case OpCode::Id::FCMP_RC: { + case OpCode::Id::FCMP_RC: + case OpCode::Id::FCMP_IMMR: { UNIMPLEMENTED_IF(instr.fcmp.ftz == 0); Node op_c = GetRegister(instr.gpr39); Node comp = GetPredicateComparisonFloat(instr.fcmp.cond, std::move(op_c), Immediate(0.0f)); diff --git a/src/web_service/web_backend.cpp b/src/web_service/web_backend.cpp index f264b98a0..67183e64c 100644 --- a/src/web_service/web_backend.cpp +++ b/src/web_service/web_backend.cpp @@ -71,11 +71,6 @@ struct Client::Impl { return {}; } - if (!cli->is_socket_open()) { - LOG_ERROR(WebService, "Failed to open socket, skipping request!"); - return {}; - } - cli->set_connection_timeout(TIMEOUT_SECONDS); cli->set_read_timeout(TIMEOUT_SECONDS); cli->set_write_timeout(TIMEOUT_SECONDS); |