1 files changed, 766 insertions, 0 deletions
diff --git a/src/StringUtils.cpp b/src/StringUtils.cpp
new file mode 100644
index 000000000..f7aeeed26
--- /dev/null
+++ b/src/StringUtils.cpp
@@ -0,0 +1,766 @@
+
+// StringUtils.cpp
+
+// Implements the various string helper functions:
+
+#include "Globals.h"
+
+#if defined(ANDROID_NDK)
+#include <ctype.h>
+#endif
+
+#ifdef _MSC_VER
+	// Under MSVC, link to WinSock2 (needed by RawBEToUTF8's byteswapping)
+	#pragma comment(lib, "ws2_32.lib")
+#endif
+
+
+
+
+
+AString & AppendVPrintf(AString & str, const char *format, va_list args)
+{
+	ASSERT(format != NULL);
+	
+	char buffer[2048];
+	size_t len;
+	#ifdef _MSC_VER
+	// MS CRT provides secure printf that doesn't behave like in the C99 standard
+	if ((len = _vsnprintf_s(buffer, ARRAYCOUNT(buffer), _TRUNCATE, format, args)) != -1)
+	#else  // _MSC_VER
+	if ((len = vsnprintf(buffer, ARRAYCOUNT(buffer), format, args)) < ARRAYCOUNT(buffer))
+	#endif  // else _MSC_VER
+	{
+		// The result did fit into the static buffer
+		str.append(buffer, len);
+		return str;
+	}
+	
+	// The result did not fit into the static buffer
+	#ifdef _MSC_VER
+	// for MS CRT, we need to calculate the result length
+	len = _vscprintf(format, args);
+	if (len == -1)
+	{
+		return str;
+	}
+	#endif  // _MSC_VER
+	
+	// Allocate a buffer and printf into it:
+	str.resize(len + 1);
+	// HACK: we're accessing AString's internal buffer in a way that is NOT guaranteed to always work. But it works on all STL implementations tested.
+	// I can't think of any other way that is safe, doesn't allocate twice as much space as needed and doesn't use C++11 features like the move constructor
+	#ifdef _MSC_VER
+	vsprintf_s((char *)str.data(), len + 1, format, args);
+	#else  // _MSC_VER
+	vsnprintf((char *)str.data(), len + 1, format, args);
+	#endif  // else _MSC_VER
+	str.resize(len);
+	return str;
+}
+
+
+
+
+
+AString & Printf(AString & str, const char * format, ...)
+{
+	str.clear();
+	va_list args;
+	va_start(args, format);
+	std::string &retval = AppendVPrintf(str, format, args);
+	va_end(args);
+	return retval;
+}
+
+
+
+
+
+AString Printf(const char * format, ...)
+{
+	AString res;
+	va_list args;
+	va_start(args, format);
+	AppendVPrintf(res, format, args);
+	va_end(args);
+	return res;
+}
+
+
+
+
+
+AString & AppendPrintf(AString &str, const char *format, ...)
+{
+	va_list args;
+	va_start(args, format);
+	std::string &retval = AppendVPrintf(str, format, args);
+	va_end(args);
+	return retval;
+}
+
+
+
+
+
+AStringVector StringSplit(const AString & str, const AString & delim)
+{
+	AStringVector results;
+	size_t cutAt = 0;
+	size_t Prev = 0;
+	while ((cutAt = str.find_first_of(delim, Prev)) != str.npos)
+	{
+		results.push_back(str.substr(Prev, cutAt - Prev));
+		Prev = cutAt + 1;
+	}
+	if (Prev < str.length())
+	{
+		results.push_back(str.substr(Prev));
+	}
+	return results;
+}
+
+
+
+
+
+AStringVector StringSplitAndTrim(const AString & str, const AString & delim)
+{
+	AStringVector results;
+	size_t cutAt = 0;
+	size_t Prev = 0;
+	while ((cutAt = str.find_first_of(delim, Prev)) != str.npos)
+	{
+		results.push_back(TrimString(str.substr(Prev, cutAt - Prev)));
+		Prev = cutAt + 1;
+	}
+	if (Prev < str.length())
+	{
+		results.push_back(TrimString(str.substr(Prev)));
+	}
+	return results;
+}
+
+
+
+
+AString TrimString(const AString & str)
+{
+	size_t len = str.length();
+	size_t start = 0;
+	while (start < len)
+	{
+		if (str[start] > 32)
+		{
+			break;
+		}
+		++start;
+	}
+	if (start == len)
+	{
+		return "";
+	}
+	
+	size_t end = len;
+	while (end >= start)
+	{
+		if (str[end] > 32)
+		{
+			break;
+		}
+		--end;
+	}
+	
+	return str.substr(start, end - start + 1);
+}
+
+
+
+
+
+AString & StrToUpper(AString & s)
+{
+	AString::iterator i = s.begin();
+	AString::iterator end = s.end();
+
+	while (i != end)
+	{
+		*i = (char)toupper(*i);
+		++i;
+	}
+	return s;
+}
+
+
+
+
+
+AString & StrToLower(AString & s)
+{
+	AString::iterator i = s.begin();
+	AString::iterator end = s.end();
+
+	while (i != end)
+	{
+		*i = (char)tolower(*i);
+		++i;
+	}
+	return s;
+}
+
+
+
+
+
+int NoCaseCompare(const AString & s1, const AString & s2)
+{
+	#ifdef _MSC_VER
+	// MSVC has stricmp that compares case-insensitive:
+	return _stricmp(s1.c_str(), s2.c_str());
+	#else 
+	// Do it the hard way:
+	AString s1Copy(s1);
+	AString s2Copy(s2);
+	return StrToUpper(s1Copy).compare(StrToUpper(s2Copy));
+	#endif  // else _MSC_VER
+}
+
+
+
+
+
+unsigned int RateCompareString(const AString & s1, const AString & s2 )
+{
+	unsigned int MatchedLetters = 0;
+	unsigned int s1Length = s1.length();
+
+	if( s1Length > s2.length() ) return 0; // Definitely not a match
+
+	for (unsigned int i = 0; i < s1Length; i++)
+	{
+		char c1 = (char)toupper( s1[i] );
+		char c2 = (char)toupper( s2[i] );
+		if( c1 == c2 )
+		{
+			++MatchedLetters;
+		}
+		else
+		{
+			break;
+		}
+	}
+	return MatchedLetters;
+}
+
+
+
+
+
+void ReplaceString(AString & iHayStack, const AString & iNeedle, const AString & iReplaceWith)
+{
+	size_t pos1 = iHayStack.find(iNeedle);
+	while (pos1 != AString::npos)
+	{
+		iHayStack.replace( pos1, iNeedle.size(), iReplaceWith);
+		pos1 = iHayStack.find(iNeedle, pos1);
+	}
+}
+
+
+
+
+// Converts a stream of BE shorts into UTF-8 string; returns a ref to a_UTF8
+AString & RawBEToUTF8(short * a_RawData, int a_NumShorts, AString & a_UTF8)
+{
+	a_UTF8.clear();
+	a_UTF8.reserve(3 * a_NumShorts / 2);  // a quick guess of the resulting size
+	for (int i = 0; i < a_NumShorts; i++)
+	{
+		int c = ntohs(*(a_RawData + i));
+		if (c < 0x80)
+		{
+			a_UTF8.push_back((char)c);
+		}
+		else if (c < 0x800)
+		{
+			a_UTF8.push_back((char)(192 + c / 64));
+			a_UTF8.push_back((char)(128 + c % 64));
+		}
+		else if (c - 0xd800u < 0x800)
+		{
+			// Error, silently drop
+		}
+		else if (c < 0x10000)
+		{
+			a_UTF8.push_back((char)(224 + c / 4096));
+			a_UTF8.push_back((char)(128 + c / 64 % 64));
+			a_UTF8.push_back((char)(128 + c % 64));
+		}
+		else if (c < 0x110000)
+		{
+			a_UTF8.push_back((char)(240 + c / 262144));
+			a_UTF8.push_back((char)(128 + c / 4096 % 64));
+			a_UTF8.push_back((char)(128 + c / 64 % 64));
+			a_UTF8.push_back((char)(128 + c % 64));
+		}
+		else
+		{
+			// Error, silently drop
+		}
+	}
+	return a_UTF8;
+}
+
+
+
+
+// UTF-8 conversion code adapted from:
+//  http://stackoverflow.com/questions/2867123/convert-utf-16-to-utf-8-under-windows-and-linux-in-c
+
+///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+// Begin of Unicode, Inc.'s code / information
+///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+/*
+Notice from the original file:
+* Copyright 2001-2004 Unicode, Inc.
+* 
+* Disclaimer
+* 
+* This source code is provided as is by Unicode, Inc. No claims are
+* made as to fitness for any particular purpose. No warranties of any
+* kind are expressed or implied. The recipient agrees to determine
+* applicability of information provided. If this file has been
+* purchased on magnetic or optical media from Unicode, Inc., the
+* sole remedy for any claim will be exchange of defective media
+* within 90 days of receipt.
+* 
+* Limitations on Rights to Redistribute This Code
+* 
+* Unicode, Inc. hereby grants the right to freely use the information
+* supplied in this file in the creation of products supporting the
+* Unicode Standard, and to make copies of this file in any form
+* for internal or external distribution as long as this notice
+* remains attached.
+*/
+
+#define UNI_MAX_BMP         0x0000FFFF
+#define UNI_MAX_UTF16       0x0010FFFF
+#define UNI_MAX_UTF32       0x7FFFFFFF
+#define UNI_MAX_LEGAL_UTF32 0x0010FFFF
+#define UNI_SUR_HIGH_START  0xD800
+#define UNI_SUR_HIGH_END    0xDBFF
+#define UNI_SUR_LOW_START   0xDC00
+#define UNI_SUR_LOW_END     0xDFFF
+
+
+
+
+
+static const char trailingBytesForUTF8[256] =
+{
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
+	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
+	2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,  3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5
+};
+
+
+
+
+
+static const unsigned int offsetsFromUTF8[6] =
+{
+	0x00000000UL, 0x00003080UL, 0x000E2080UL, 
+	0x03C82080UL, 0xFA082080UL, 0x82082080UL
+};
+
+
+
+
+
+static bool isLegalUTF8(const unsigned char * source, int length)
+{
+	unsigned char a;
+	const unsigned char * srcptr = source + length;
+	switch (length)
+	{
+		default: return false;
+		// Everything else falls through when "true"...
+		case 4: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return false;
+		case 3: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return false;
+		case 2:
+		{
+			if ((a = (*--srcptr)) > 0xBF) return false;
+			switch (*source)
+			{
+				// no fall-through in this inner switch
+				case 0xE0: if (a < 0xA0) return false; break;
+				case 0xED: if (a > 0x9F) return false; break;
+				case 0xF0: if (a < 0x90) return false; break;
+				case 0xF4: if (a > 0x8F) return false; break;
+				default:   if (a < 0x80) return false;
+			}
+		}
+		case 1: if (*source >= 0x80 && *source < 0xC2) return false;
+	}
+	if (*source > 0xF4) return false;
+	return true;
+}
+
+
+
+
+
+AString & UTF8ToRawBEUTF16(const char * a_UTF8, size_t a_UTF8Length, AString & a_UTF16)
+{
+	a_UTF16.clear();
+	a_UTF16.reserve(a_UTF8Length * 3);
+
+	const unsigned char * source    = (const unsigned char*)a_UTF8;
+	const unsigned char * sourceEnd = source + a_UTF8Length;
+	const int halfShift  = 10;  // used for shifting by 10 bits
+	const unsigned int halfBase = 0x0010000UL;
+	const unsigned int halfMask = 0x3FFUL;
+
+	while (source < sourceEnd)
+	{
+		unsigned int ch = 0;
+		unsigned short extraBytesToRead = trailingBytesForUTF8[*source];
+		if (source + extraBytesToRead >= sourceEnd)
+		{
+			return a_UTF16;
+		}
+		// Do this check whether lenient or strict
+		if (!isLegalUTF8(source, extraBytesToRead + 1))
+		{
+			return a_UTF16;
+			break;
+		}
+		
+		// The cases all fall through. See "Note A" below.
+		switch (extraBytesToRead)
+		{
+			case 5: ch += *source++; ch <<= 6; /* remember, illegal UTF-8 */
+			case 4: ch += *source++; ch <<= 6; /* remember, illegal UTF-8 */
+			case 3: ch += *source++; ch <<= 6;
+			case 2: ch += *source++; ch <<= 6;
+			case 1: ch += *source++; ch <<= 6;
+			case 0: ch += *source++;
+		}
+		ch -= offsetsFromUTF8[extraBytesToRead];
+
+		if (ch <= UNI_MAX_BMP)
+		{
+			// Target is a character <= 0xFFFF
+			if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END)
+			{
+				// UTF-16 surrogate values are illegal in UTF-32
+				ch = ' ';
+			}
+			unsigned short v = htons((unsigned short)ch);
+			a_UTF16.append((const char *)&v, 2);
+		}
+		else if (ch > UNI_MAX_UTF16)
+		{
+			// Invalid value, replace with a space
+			unsigned short v = htons(' ');
+			a_UTF16.append((const char *)&v, 2);
+		}
+		else
+		{
+			// target is a character in range 0xFFFF - 0x10FFFF.
+			ch -= halfBase;
+			unsigned short v1 = htons((ch >> halfShift) + UNI_SUR_HIGH_START);
+			unsigned short v2 = htons((ch & halfMask) + UNI_SUR_LOW_START);
+			a_UTF16.append((const char *)&v1, 2);
+			a_UTF16.append((const char *)&v2, 2);
+		}
+	}
+	return a_UTF16;
+}
+
+/* ---------------------------------------------------------------------
+
+    Note A.
+    The fall-through switches in UTF-8 reading code save a
+    temp variable, some decrements & conditionals.  The switches
+    are equivalent to the following loop:
+    {
+        int tmpBytesToRead = extraBytesToRead+1;
+        do {
+        ch += *source++;
+        --tmpBytesToRead;
+        if (tmpBytesToRead) ch <<= 6;
+        } while (tmpBytesToRead > 0);
+    }
+
+   ---------------------------------------------------------------------
+*/
+
+///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+// End of Unicode, Inc.'s code / information
+///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+
+
+
+
+
+#define HEX(x) ((x) > 9 ? (x) + 'A' - 10 : (x) + '0')
+
+/**
+format binary data this way:
+00001234: 31 32 33 34 35 36 37 38 39 30 61 62 63 64 65 66    1234567890abcdef
+*/
+AString & CreateHexDump(AString & a_Out, const void * a_Data, int a_Size, int a_LineLength)
+{
+	ASSERT(a_LineLength <= 120);  // Due to using a fixed size line buffer; increase line[]'s size to lift this max
+	char line[512];
+	char * p;
+	char * q;
+	
+	a_Out.reserve(a_Size / a_LineLength * (18 + 6 * a_LineLength));
+	for (int i = 0; i < a_Size; i += a_LineLength)
+	{
+		int k = a_Size - i;
+		if (k > a_LineLength)
+		{
+			k = a_LineLength;
+		}
+		#ifdef _MSC_VER
+		// MSVC provides a "secure" version of sprintf()
+		int Count = sprintf_s(line, sizeof(line), "%08x:", i);
+		#else
+		int Count = sprintf(line, "%08x:", i);
+		#endif
+		// Remove the terminating NULL / leftover garbage in line, after the sprintf-ed value
+		memset(line + Count, 32, sizeof(line) - Count);
+		p = line + 10;
+		q = p + 2 + a_LineLength * 3 + 1;
+		for (int j = 0; j < k; j++)
+		{
+			unsigned char c = ((unsigned char *)a_Data)[i + j];
+			p[0] = HEX(c >> 4);
+			p[1] = HEX(c & 0xf);
+			p[2] = ' ';
+			if (c >= ' ')
+			{
+				q[0] = (char)c;
+			}
+			else
+			{
+				q[0] = '.';
+			}
+			p += 3;
+			q ++;
+		}  // for j
+		q[0] = '\n';
+		q[1] = 0;
+		a_Out.append(line);
+	}  // for i
+	return a_Out;
+}
+
+
+
+
+
+AString EscapeString(const AString & a_Message)
+{
+	AString EscapedMsg;
+	size_t len = a_Message.size();
+	size_t last = 0;
+	EscapedMsg.reserve(len);
+	for (size_t i = 0; i < len; i++)
+	{
+		char ch = a_Message[i];
+		switch (ch)
+		{
+			case '\'':
+			case '\"':
+			case '\\':
+			{
+				if (i > last)
+				{
+					EscapedMsg.append(a_Message, last, i - last);
+				}
+				EscapedMsg.push_back('\\');
+				EscapedMsg.push_back(ch);
+				last = i + 1;
+				break;
+			}
+		}  // switch (ch)
+	}  // for i - a_Message[]
+	if (len > last)
+	{
+		EscapedMsg.append(a_Message, last, len - last);
+	}
+	return EscapedMsg;
+}
+
+
+
+
+
+AString StripColorCodes(const AString & a_Message)
+{
+	AString res(a_Message);
+	size_t idx = 0;
+	while (true)
+	{
+		idx = res.find("\xc2\xa7", idx);
+		if (idx == AString::npos)
+		{
+			return res;
+		}
+		res.erase(idx, 3);
+	}
+}
+
+
+
+
+
+AString URLDecode(const AString & a_String)
+{
+	AString res;
+	size_t len = a_String.length();
+	res.reserve(len);
+	for (size_t i = 0; i < len; i++)
+	{
+		char ch = a_String[i];
+		if ((ch != '%') || (i > len - 3))
+		{
+			res.push_back(ch);
+			continue;
+		}
+		// Decode the hex value:
+		char hi = a_String[i + 1], lo = a_String[i + 2];
+		if ((hi >= '0') && (hi <= '9'))
+		{
+			hi = hi - '0';
+		}
+		else if ((hi >= 'a') && (hi <= 'f'))
+		{
+			hi = hi - 'a' + 10;
+		}
+		else if ((hi >= 'A') && (hi <= 'F'))
+		{
+			hi = hi - 'F' + 10;
+		}
+		else
+		{
+			res.push_back(ch);
+			continue;
+		}
+		if ((lo >= '0') && (lo <= '9'))
+		{
+			lo = lo - '0';
+		}
+		else if ((lo >= 'a') && (lo <= 'f'))
+		{
+			lo = lo - 'a' + 10;
+		}
+		else if ((lo >= 'A') && (lo <= 'F'))
+		{
+			lo = lo - 'A' + 10;
+		}
+		else
+		{
+			res.push_back(ch);
+			continue;
+		}
+		res.push_back((hi << 4) | lo);
+		i += 2;
+	}  // for i - a_String[]
+	return res;
+}
+
+
+
+
+
+AString ReplaceAllCharOccurrences(const AString & a_String, char a_From, char a_To)
+{
+	AString res(a_String);
+	std::replace(res.begin(), res.end(), a_From, a_To);
+	return res;
+}
+
+
+
+
+
+/// Converts one Hex character in a Base64 encoding into the data value
+static inline int UnBase64(char c)
+{
+	if (c >='A' && c <= 'Z')
+	{
+		return c - 'A';
+	}
+	if (c >='a' && c <= 'z')
+	{
+		return c - 'a' + 26;
+	}
+	if (c >= '0' && c <= '9')
+	{
+		return c - '0' + 52;
+	}
+	if (c == '+')
+	{
+		return 62;
+	}
+	if (c == '/')
+	{
+		return 63;
+	}
+	if (c == '=')
+	{
+		return -1;
+	}
+	return -2;
+}
+
+
+
+
+
+AString Base64Decode(const AString & a_Base64String)
+{
+	AString res;
+	size_t i, len = a_Base64String.size();
+	int o, c;
+	res.resize((len * 4) / 3 + 5, 0);  // Approximate the upper bound on the result length
+	for (o = 0, i = 0; i < len; i++)
+	{
+		c = UnBase64(a_Base64String[i]);
+		if (c >= 0)
+		{
+			switch (o & 7)
+			{
+				case 0:	res[o >> 3] |= (c << 2); break;
+				case 6: res[o >> 3] |= (c >> 4); res[(o >> 3) + 1] |= (c << 4); break;
+				case 4: res[o >> 3] |= (c >> 2); res[(o >> 3) + 1] |= (c << 6); break;
+				case 2: res[o >> 3] |= c; break;
+			}
+			o += 6;
+		}
+		if (c == -1)
+		{
+			// Error while decoding, invalid input. Return as much as we've decoded:
+			res.resize(o >> 3);
+			return res;
+		}
+	}
+	res.resize(o >> 3);
+	return res;}
+
+
+
+