From 958df6ad9192285e0a19caa362f9f0c9e63422d4 Mon Sep 17 00:00:00 2001 From: Mattes D Date: Fri, 25 Dec 2015 18:50:25 +0100 Subject: Added the cUrlParser class, exported to Lua API. --- src/HTTPServer/UrlParser.cpp | 200 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 200 insertions(+) create mode 100644 src/HTTPServer/UrlParser.cpp (limited to 'src/HTTPServer/UrlParser.cpp') diff --git a/src/HTTPServer/UrlParser.cpp b/src/HTTPServer/UrlParser.cpp new file mode 100644 index 000000000..05db3e413 --- /dev/null +++ b/src/HTTPServer/UrlParser.cpp @@ -0,0 +1,200 @@ + +// UrlParser.cpp + +// Implements the cUrlParser class that parses string URL into individual parts + +#include "Globals.h" +#include "UrlParser.h" + + + + + +UInt16 cUrlParser::GetDefaultPort(const AString & a_Scheme) +{ + if (a_Scheme == "http") + { + return 80; + } + else if (a_Scheme == "https") + { + return 443; + } + else if (a_Scheme == "ftp") + { + return 21; + } + else if (a_Scheme == "mailto") + { + return 25; + } + return 0; +} + + + + + +std::pair cUrlParser::ParseAuthorityPart( + const AString & a_AuthorityPart, + AString & a_Username, + AString & a_Password, + AString & a_Host, + UInt16 & a_Port +) +{ + /* + a_AuthorityPart format: + [user:password@]host[:port] + host can be an IPv4, hostname, or an IPv6 enclosed in brackets + Assume only the password can contain an additional at-sign + */ + + // Split the authority on the last at-sign, if present: + auto idxLastAtSign = a_AuthorityPart.find_last_of('@'); + auto credPart = (idxLastAtSign == AString::npos) ? AString() : a_AuthorityPart.substr(0, idxLastAtSign); + auto srvrPart = (idxLastAtSign == AString::npos) ? a_AuthorityPart : a_AuthorityPart.substr(idxLastAtSign + 1); + + // User credentials are completely optional: + auto idxCredColon = credPart.find(':'); + a_Username = credPart.substr(0, idxCredColon); + a_Password = (idxCredColon == AString::npos) ? AString() : credPart.substr(idxCredColon + 1); + + // Host can be a hostname, IPv4 or [IPv6]. If in brackets, search for the closing bracket first + if (srvrPart.empty()) + { + // No host information at all. Bail out with success + a_Host.clear(); + return std::make_pair(true, AString()); + } + if (srvrPart[0] == '[') + { + // [IPv6] host, search for the closing bracket + auto idxClosingBracket = srvrPart.find(']'); + if (idxClosingBracket == AString::npos) + { + return std::make_pair(false, "Invalid IPv6-like address, missing closing bracket"); + } + a_Host = srvrPart.substr(0, idxClosingBracket); + auto portPart = srvrPart.substr(idxClosingBracket + 1); + if (portPart.empty()) + { + // No port was specified, return success + return std::make_pair(true, AString()); + } + if (portPart[0] != ':') + { + return std::make_pair(false, "Invalid port format after IPv6 address, mising colon"); + } + if (!StringToInteger(portPart.substr(2), a_Port)) + { + return std::make_pair(false, "Failed to parse port number after IPv6 address"); + } + return std::make_pair(true, AString()); + } + + // Not an [IPv6] address, split on the last colon: + auto idxLastColon = srvrPart.find_last_of(':'); + a_Host = srvrPart.substr(0, idxLastColon); + if (idxLastColon == AString::npos) + { + // No port was specified, return success + return std::make_pair(true, AString()); + } + auto portPart = srvrPart.substr(idxLastColon + 1); + if (!StringToInteger(portPart, a_Port)) + { + return std::make_pair(false, "Failed to parse port number after hostname"); + } + return std::make_pair(true, AString()); +} + + + + + +std::pair cUrlParser::Parse( + const AString & a_Url, + AString & a_Scheme, + AString & a_Username, + AString & a_Password, + AString & a_Host, + UInt16 & a_Port, + AString & a_Path, + AString & a_Query, + AString & a_Fragment +) +{ + // Find the scheme - the text before the first colon: + auto idxColon = a_Url.find(':'); + if (idxColon == AString::npos) + { + return std::make_pair(false, "Cannot parse the Scheme part of the URL"); + } + a_Scheme = StrToLower(a_Url.substr(0, idxColon)); + a_Port = GetDefaultPort(a_Scheme); + if (a_Port == 0) + { + return std::make_pair(false, Printf("Unknown URL scheme: \"%s\"", a_Scheme.c_str())); + } + + // If the next two chars are a double-slash, skip them: + auto authStart = idxColon + 1; + if (a_Url.substr(authStart, 2) == "//") + { + authStart += 2; + } + + // The Authority part follows the Scheme, until the first slash: + auto idxFirstSlash = a_Url.find('/', authStart + 1); + if (idxFirstSlash == AString::npos) + { + // No slash, the whole end of the Url is the authority part + idxFirstSlash = a_Url.size(); + } + + // Parse the Authority part into individual components: + auto res = ParseAuthorityPart( + a_Url.substr(authStart, idxFirstSlash - authStart), + a_Username, a_Password, + a_Host, a_Port + ); + if (!res.first) + { + return res; + } + + // Parse the rest into a path, query and fragment: + a_Path.clear(); + a_Query.clear(); + a_Fragment.clear(); + if (idxFirstSlash == a_Url.size()) + { + // No additional data, bail out with success + return std::make_pair(true, AString()); + } + auto idxPathEnd = a_Url.find_first_of("?#", idxFirstSlash + 1); + if (idxPathEnd == AString::npos) + { + a_Path = a_Url.substr(idxFirstSlash); + return std::make_pair(true, AString()); + } + a_Path = a_Url.substr(idxFirstSlash, idxPathEnd - idxFirstSlash); + auto idxHash = a_Url.find('#', idxPathEnd); + if (idxHash == AString::npos) + { + a_Query = a_Url.substr(idxPathEnd + 1); + return std::make_pair(true, AString()); + } + if (idxHash > idxPathEnd) + { + a_Query = a_Url.substr(idxPathEnd + 1, idxHash - idxPathEnd - 1); + } + a_Fragment = a_Url.substr(idxHash + 1); + return std::make_pair(true, AString()); +} + + + + + -- cgit v1.2.3