Cuberite
A lightweight, fast and extensible game server for Minecraft
StringUtils.cpp
Go to the documentation of this file.
1 
2 // StringUtils.cpp
3 
4 // Implements the various string helper functions:
5 
6 #include "Globals.h"
7 
8 #include "fmt/printf.h"
9 
10 #ifdef _MSC_VER
11  // Under MSVC, link to WinSock2 (needed by RawBEToUTF8's byteswapping)
12  #pragma comment(lib, "ws2_32.lib")
13 #endif
14 
15 
16 
17 
18 
21 static unsigned char HexToDec(char a_HexChar)
22 {
23  switch (a_HexChar)
24  {
25  case '0': return 0;
26  case '1': return 1;
27  case '2': return 2;
28  case '3': return 3;
29  case '4': return 4;
30  case '5': return 5;
31  case '6': return 6;
32  case '7': return 7;
33  case '8': return 8;
34  case '9': return 9;
35  case 'a': return 10;
36  case 'b': return 11;
37  case 'c': return 12;
38  case 'd': return 13;
39  case 'e': return 14;
40  case 'f': return 15;
41  case 'A': return 10;
42  case 'B': return 11;
43  case 'C': return 12;
44  case 'D': return 13;
45  case 'E': return 14;
46  case 'F': return 15;
47  }
48  return 0xff;
49 }
50 
51 
52 
53 
54 
55 AString & Printf(AString & str, const char * format, fmt::ArgList args)
56 {
57  ASSERT(format != nullptr);
58  str = fmt::sprintf(format, args);
59  return str;
60 }
61 
62 
63 
64 
65 
66 AString Printf(const char * format, fmt::ArgList args)
67 {
68  ASSERT(format != nullptr);
69  return fmt::sprintf(format, args);
70 }
71 
72 
73 
74 
75 
76 AStringVector StringSplit(const AString & str, const AString & delim)
77 {
78  AStringVector results;
79  size_t cutAt = 0;
80  size_t Prev = 0;
81  while ((cutAt = str.find_first_of(delim, Prev)) != str.npos)
82  {
83  results.push_back(str.substr(Prev, cutAt - Prev));
84  Prev = cutAt + 1;
85  }
86  if (Prev < str.length())
87  {
88  results.push_back(str.substr(Prev));
89  }
90  return results;
91 }
92 
93 
94 
95 
96 
98 {
99  AStringVector results;
100 
101  size_t cutAt = 0;
102  size_t Prev = 0;
103  size_t cutAtQuote = 0;
104 
105  while ((cutAt = str.find_first_of(delim, Prev)) != str.npos)
106  {
107  if (cutAt == Prev)
108  {
109  // Empty string due to multiple whitespace / whitespace at the beginning of the input
110  // Just skip it
111  Prev = Prev + 1;
112  continue;
113  }
114  AString current = str.substr(Prev, cutAt - Prev);
115  if ((current.front() == '"') || (current.front() == '\''))
116  {
117  Prev += 1;
118  cutAtQuote = str.find_first_of(current.front(), Prev);
119  if (cutAtQuote != str.npos)
120  {
121  current = str.substr(Prev, cutAtQuote - Prev);
122  cutAt = cutAtQuote + 1;
123  }
124  }
125 
126  results.push_back(std::move(current));
127  Prev = cutAt + 1;
128  }
129 
130  if (Prev < str.length())
131  {
132  AString current = str.substr(Prev);
133 
134  // If the remant is wrapped in matching quotes, remove them:
135  if (
136  (current.length() >= 2) &&
137  ((current.front() == '"') || (current.front() == '\'')) &&
138  (current.front() == current.back())
139  )
140  {
141  current = current.substr(1, current.length() - 2);
142  }
143 
144  results.push_back(current);
145  }
146 
147  return results;
148 }
149 
150 
151 
152 
153 
154 AString StringJoin(const AStringVector & a_Strings, const AString & a_Delimeter)
155 {
156  if (a_Strings.empty())
157  {
158  return {};
159  }
160 
161  // Do a dry run to gather the size
162  const auto DelimSize = a_Delimeter.size();
163  size_t ResultSize = a_Strings[0].size();
164  std::for_each(a_Strings.begin() + 1, a_Strings.end(),
165  [&](const AString & a_String)
166  {
167  ResultSize += DelimSize;
168  ResultSize += a_String.size();
169  }
170  );
171 
172  // Now do the actual join
173  AString Result;
174  Result.reserve(ResultSize);
175  Result.append(a_Strings[0]);
176  std::for_each(a_Strings.begin() + 1, a_Strings.end(),
177  [&](const AString & a_String)
178  {
179  Result += a_Delimeter;
180  Result += a_String;
181  }
182  );
183  return Result;
184 }
185 
186 
187 
188 
189 
190 AStringVector StringSplitAndTrim(const AString & str, const AString & delim)
191 {
192  AStringVector results;
193  size_t cutAt = 0;
194  size_t Prev = 0;
195  while ((cutAt = str.find_first_of(delim, Prev)) != str.npos)
196  {
197  results.push_back(TrimString(str.substr(Prev, cutAt - Prev)));
198  Prev = cutAt + 1;
199  }
200  if (Prev < str.length())
201  {
202  results.push_back(TrimString(str.substr(Prev)));
203  }
204  return results;
205 }
206 
207 
208 
209 
210 
212 {
213  size_t len = str.length();
214  size_t start = 0;
215  while (start < len)
216  {
217  if (static_cast<unsigned char>(str[start]) > 32)
218  {
219  break;
220  }
221  ++start;
222  }
223  if (start == len)
224  {
225  return "";
226  }
227 
228  size_t end = len;
229  while (end >= start)
230  {
231  if (static_cast<unsigned char>(str[end]) > 32)
232  {
233  break;
234  }
235  --end;
236  }
237 
238  return str.substr(start, end - start + 1);
239 }
240 
241 
242 
243 
244 
246 {
247  std::transform(s.begin(), s.end(), s.begin(), ::tolower);
248  return s;
249 }
250 
251 
252 
253 
254 
256 {
257  std::transform(s.begin(), s.end(), s.begin(), ::toupper);
258  return s;
259 }
260 
261 
262 
263 
264 
266 {
267  AString res;
268  res.resize(s.size());
269  std::transform(s.begin(), s.end(), res.begin(), ::tolower);
270  return res;
271 }
272 
273 
274 
275 
276 
278 {
279  AString res;
280  res.resize(s.size());
281  std::transform(s.begin(), s.end(), res.begin(), ::toupper);
282  return res;
283 }
284 
285 
286 
287 
288 
289 int NoCaseCompare(const AString & s1, const AString & s2)
290 {
291  #ifdef _MSC_VER
292  return _stricmp(s1.c_str(), s2.c_str());
293  #else
294  return strcasecmp(s1.c_str(), s2.c_str());
295  #endif // else _MSC_VER
296 }
297 
298 
299 
300 
301 
302 size_t RateCompareString(const AString & s1, const AString & s2)
303 {
304  size_t MatchedLetters = 0;
305  size_t s1Length = s1.length();
306 
307  if (s1Length > s2.length())
308  {
309  // Definitely not a match
310  return 0;
311  }
312 
313  for (size_t i = 0; i < s1Length; i++)
314  {
315  char c1 = static_cast<char>(toupper(s1[i]));
316  char c2 = static_cast<char>(toupper(s2[i]));
317  if (c1 == c2)
318  {
319  ++MatchedLetters;
320  }
321  else
322  {
323  break;
324  }
325  }
326  return MatchedLetters;
327 }
328 
329 
330 
331 
332 
333 void ReplaceString(AString & iHayStack, const AString & iNeedle, const AString & iReplaceWith)
334 {
335  // find always returns the current position for an empty needle; prevent endless loop
336  if (iNeedle.empty())
337  {
338  return;
339  }
340 
341  size_t pos1 = iHayStack.find(iNeedle);
342  while (pos1 != AString::npos)
343  {
344  iHayStack.replace( pos1, iNeedle.size(), iReplaceWith);
345  pos1 = iHayStack.find(iNeedle, pos1 + iReplaceWith.size());
346  }
347 }
348 
349 
350 
351 
352 
353 AString & RawBEToUTF8(const char * a_RawData, size_t a_NumShorts, AString & a_UTF8)
354 {
355  a_UTF8.clear();
356  a_UTF8.reserve(3 * a_NumShorts / 2); // a quick guess of the resulting size
357  for (size_t i = 0; i < a_NumShorts; i++)
358  {
359  a_UTF8.append(UnicodeCharToUtf8(GetBEUShort(&a_RawData[i * 2])));
360  }
361  return a_UTF8;
362 }
363 
364 
365 
366 
367 
368 AString UnicodeCharToUtf8(unsigned a_UnicodeChar)
369 {
370  if (a_UnicodeChar < 0x80)
371  {
372  return AString{static_cast<char>(a_UnicodeChar)};
373  }
374  else if (a_UnicodeChar < 0x800)
375  {
376  return AString
377  {
378  static_cast<char>(192 + a_UnicodeChar / 64),
379  static_cast<char>(128 + a_UnicodeChar % 64),
380  };
381  }
382  else if (a_UnicodeChar - 0xd800 < 0x800)
383  {
384  // Error
385  return AString();
386  }
387  else if (a_UnicodeChar < 0x10000)
388  {
389  return AString
390  {
391  static_cast<char>(224 + a_UnicodeChar / 4096),
392  static_cast<char>(128 + (a_UnicodeChar / 64) % 64),
393  static_cast<char>(128 + a_UnicodeChar % 64)
394  };
395  }
396  else if (a_UnicodeChar < 0x110000)
397  {
398  return AString
399  {
400  static_cast<char>(240 + a_UnicodeChar / 262144),
401  static_cast<char>(128 + (a_UnicodeChar / 4096) % 64),
402  static_cast<char>(128 + (a_UnicodeChar / 64) % 64),
403  static_cast<char>(128 + a_UnicodeChar % 64),
404  };
405  }
406  else
407  {
408  // Error
409  return AString();
410  }
411 }
412 
413 
414 
415 
416 
417 #ifdef __GNUC__
418 #pragma GCC diagnostic push
419 #pragma GCC diagnostic ignored "-Wimplicit-fallthrough"
420 #endif
421 // UTF-8 conversion code adapted from:
422 // https://stackoverflow.com/questions/2867123/convert-utf-16-to-utf-8-under-windows-and-linux-in-c
423 
425 // Begin of Unicode, Inc.'s code / information
427 
428 /*
429 Notice from the original file:
430 * Copyright 2001-2004 Unicode, Inc.
431 *
432 * Disclaimer
433 *
434 * This source code is provided as is by Unicode, Inc. No claims are
435 * made as to fitness for any particular purpose. No warranties of any
436 * kind are expressed or implied. The recipient agrees to determine
437 * applicability of information provided. If this file has been
438 * purchased on magnetic or optical media from Unicode, Inc., the
439 * sole remedy for any claim will be exchange of defective media
440 * within 90 days of receipt.
441 *
442 * Limitations on Rights to Redistribute This Code
443 *
444 * Unicode, Inc. hereby grants the right to freely use the information
445 * supplied in this file in the creation of products supporting the
446 * Unicode Standard, and to make copies of this file in any form
447 * for internal or external distribution as long as this notice
448 * remains attached.
449 */
450 
451 #define UNI_MAX_BMP 0x0000FFFF
452 #define UNI_MAX_UTF16 0x0010FFFF
453 #define UNI_SUR_HIGH_START 0xD800
454 #define UNI_SUR_LOW_START 0xDC00
455 #define UNI_SUR_LOW_END 0xDFFF
456 
457 
458 
459 
460 
461 static const Byte trailingBytesForUTF8[256] =
462 {
463  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
464  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
465  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
466  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
467  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
468  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
469  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
470  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5
471 };
472 
473 
474 
475 
476 
477 static const unsigned int offsetsFromUTF8[6] =
478 {
479  0x00000000UL, 0x00003080UL, 0x000E2080UL,
480  0x03C82080UL, 0xFA082080UL, 0x82082080UL
481 };
482 
483 
484 
485 
486 
487 static bool isLegalUTF8(const unsigned char * source, int length)
488 {
489  unsigned char a;
490  const unsigned char * srcptr = source + length;
491  switch (length)
492  {
493  default: return false;
494  // Everything else falls through when "true"...
495  case 4: if (((a = (*--srcptr)) < 0x80) || (a > 0xbf)) return false;
496  case 3: if (((a = (*--srcptr)) < 0x80) || (a > 0xbf)) return false;
497  case 2:
498  {
499  if ((a = (*--srcptr)) > 0xbf)
500  {
501  return false;
502  }
503  switch (*source)
504  {
505  // no fall-through in this inner switch
506  case 0xe0: if (a < 0xa0) return false; break;
507  case 0xed: if (a > 0x9f) return false; break;
508  case 0xf0: if (a < 0x90) return false; break;
509  case 0xf4: if (a > 0x8f) return false; break;
510  default: if (a < 0x80) return false;
511  }
512  }
513  case 1: if ((*source >= 0x80) && (*source < 0xc2)) return false;
514  }
515  if (*source > 0xf4)
516  {
517  return false;
518  }
519  return true;
520 }
521 
522 
523 
524 
525 
526 std::u16string UTF8ToRawBEUTF16(const AString & a_UTF8)
527 {
528  std::u16string UTF16;
529  UTF16.reserve(a_UTF8.size() * 2);
530 
531  const unsigned char * source = reinterpret_cast<const unsigned char *>(a_UTF8.data());
532  const unsigned char * sourceEnd = source + a_UTF8.size();
533  const int halfShift = 10; // used for shifting by 10 bits
534  const unsigned int halfBase = 0x0010000UL;
535  const unsigned int halfMask = 0x3ffUL;
536 
537  while (source < sourceEnd)
538  {
539  unsigned int ch = 0;
540  unsigned short extraBytesToRead = trailingBytesForUTF8[*source];
541  if (source + extraBytesToRead >= sourceEnd)
542  {
543  return UTF16;
544  }
545  // Do this check whether lenient or strict
546  if (!isLegalUTF8(source, extraBytesToRead + 1))
547  {
548  return UTF16;
549  }
550 
551  // The cases all fall through. See "Note A" below.
552  switch (extraBytesToRead)
553  {
554  case 5: ch += *source++; ch <<= 6; /* remember, illegal UTF-8 */
555  case 4: ch += *source++; ch <<= 6; /* remember, illegal UTF-8 */
556  case 3: ch += *source++; ch <<= 6;
557  case 2: ch += *source++; ch <<= 6;
558  case 1: ch += *source++; ch <<= 6;
559  case 0: ch += *source++;
560  }
561  ch -= offsetsFromUTF8[extraBytesToRead];
562 
563  if (ch <= UNI_MAX_BMP)
564  {
565  // Target is a character <= 0xFFFF
566  if ((ch >= UNI_SUR_HIGH_START) && (ch <= UNI_SUR_LOW_END))
567  {
568  // UTF-16 surrogate values are illegal in UTF-32
569  ch = ' ';
570  }
571  unsigned short v = htons(static_cast<unsigned short>(ch));
572  UTF16.push_back(static_cast<char16_t>(v));
573  }
574  else if (ch > UNI_MAX_UTF16)
575  {
576  // Invalid value, replace with a space
577  unsigned short v = htons(' ');
578  UTF16.push_back(static_cast<char16_t>(v));
579  }
580  else
581  {
582  // target is a character in range 0xFFFF - 0x10FFFF.
583  ch -= halfBase;
584  auto v1 = htons(static_cast<uint16_t>((ch >> halfShift) + UNI_SUR_HIGH_START));
585  auto v2 = htons(static_cast<uint16_t>((ch & halfMask) + UNI_SUR_LOW_START));
586  UTF16.push_back(static_cast<char16_t>(v1));
587  UTF16.push_back(static_cast<char16_t>(v2));
588  }
589  }
590  return UTF16;
591 }
592 
593 /*
594 ---------------------------------------------------------------------
595 Note A.
596 The fall-through switches in UTF-8 reading code save a
597 temp variable, some decrements & conditionals. The switches
598 are equivalent to the following loop:
599 {
600  int tmpBytesToRead = extraBytesToRead + 1;
601  do
602  {
603  ch += *source++;
604  --tmpBytesToRead;
605  if (tmpBytesToRead)
606  {
607  ch <<= 6;
608  }
609  } while (tmpBytesToRead > 0);
610 }
611 ---------------------------------------------------------------------
612 */
613 
615 // End of Unicode, Inc.'s code / information
617 #ifdef __GNUC__
618 #pragma GCC diagnostic pop
619 #endif
620 
621 
622 
623 
624 
625 #define HEX(x) static_cast<char>((x) > 9 ? (x) + 'A' - 10 : (x) + '0')
626 
631 AString & CreateHexDump(AString & a_Out, const void * a_Data, size_t a_Size, size_t a_BytesPerLine)
632 {
633  fmt::MemoryWriter Output;
634  /* If formatting the data from the comment above:
635  Hex holds: "31 32 33 34 35 36 37 38 39 30 61 62 63 64 65 66 "
636  Chars holds: "1234567890abcdef" */
637  fmt::MemoryWriter Hex, Chars;
638 
639  if (a_Size > 0)
640  {
641  // Same as std::ceil(static_cast<float>(a_Size) / a_BytesPerLine);
642  const size_t NumLines = a_Size / a_BytesPerLine + (a_Size % a_BytesPerLine != 0);
643  const size_t CharsPerLine = 14 + 4 * a_BytesPerLine;
644  Output.buffer().reserve(NumLines * CharsPerLine);
645  }
646 
647  for (size_t i = 0; i < a_Size; i += a_BytesPerLine)
648  {
649  size_t k = std::min(a_Size - i, a_BytesPerLine);
650  for (size_t j = 0; j < k; j++)
651  {
652  Byte c = (static_cast<const Byte *>(a_Data))[i + j];
653  Hex << HEX(c >> 4) << HEX(c & 0xf) << ' ';
654  Chars << ((c >= ' ') ? static_cast<char>(c) : '.');
655  } // for j
656 
657  // Write Hex with a dynamic fixed width
658  Output.write("{0:08x}: {1:{2}} {3}\n", i, Hex.c_str(), a_BytesPerLine * 3, Chars.c_str());
659  Hex.clear();
660  Chars.clear();
661  } // for i
662  a_Out.append(Output.data(), Output.size());
663  return a_Out;
664 }
665 
666 
667 
668 
669 
670 AString EscapeString(const AString & a_Message)
671 {
672  AString EscapedMsg;
673  size_t len = a_Message.size();
674  size_t last = 0;
675  EscapedMsg.reserve(len);
676  for (size_t i = 0; i < len; i++)
677  {
678  char ch = a_Message[i];
679  switch (ch)
680  {
681  case '\'':
682  case '\"':
683  case '\\':
684  {
685  if (i > last)
686  {
687  EscapedMsg.append(a_Message, last, i - last);
688  }
689  EscapedMsg.push_back('\\');
690  EscapedMsg.push_back(ch);
691  last = i + 1;
692  break;
693  }
694  } // switch (ch)
695  } // for i - a_Message[]
696  if (len > last)
697  {
698  EscapedMsg.append(a_Message, last, len - last);
699  }
700  return EscapedMsg;
701 }
702 
703 
704 
705 
706 
707 AString StripColorCodes(const AString & a_Message)
708 {
709  AString res(a_Message);
710  size_t idx = 0;
711  for (;;)
712  {
713  idx = res.find("\xc2\xa7", idx);
714  if (idx == AString::npos)
715  {
716  return res;
717  }
718  res.erase(idx, 3);
719  }
720 }
721 
722 
723 
724 
725 
726 std::pair<bool, AString> URLDecode(const AString & a_Text)
727 {
728  AString res;
729  auto len = a_Text.size();
730  res.reserve(len);
731  for (size_t i = 0; i < len; i++)
732  {
733  if (a_Text[i] == '+')
734  {
735  res.push_back(' ');
736  continue;
737  }
738  if (a_Text[i] != '%')
739  {
740  res.push_back(a_Text[i]);
741  continue;
742  }
743  if (i + 1 >= len)
744  {
745  // String too short for an encoded value
746  return std::make_pair(false, AString());
747  }
748  if ((a_Text[i + 1] == 'u') || (a_Text[i + 1] == 'U'))
749  {
750  // Unicode char "%u0xxxx"
751  if (i + 6 >= len)
752  {
753  return std::make_pair(false, AString());
754  }
755  if (a_Text[i + 2] != '0')
756  {
757  return std::make_pair(false, AString());
758  }
759  unsigned v1 = HexToDec(a_Text[i + 3]);
760  unsigned v2 = HexToDec(a_Text[i + 4]);
761  unsigned v3 = HexToDec(a_Text[i + 5]);
762  unsigned v4 = HexToDec(a_Text[i + 6]);
763  if ((v1 == 0xff) || (v2 == 0xff) || (v4 == 0xff) || (v3 == 0xff))
764  {
765  // Invalid hex numbers
766  return std::make_pair(false, AString());
767  }
768  res.append(UnicodeCharToUtf8((v1 << 12) | (v2 << 8) | (v3 << 4) | v4));
769  i = i + 6;
770  }
771  else
772  {
773  // Regular char "%xx":
774  if (i + 2 >= len)
775  {
776  return std::make_pair(false, AString());
777  }
778  auto v1 = HexToDec(a_Text[i + 1]);
779  auto v2 = HexToDec(a_Text[i + 2]);
780  if ((v1 == 0xff) || (v2 == 0xff))
781  {
782  // Invalid hex numbers
783  return std::make_pair(false, AString());
784  }
785  res.push_back(static_cast<char>((v1 << 4) | v2));
786  i = i + 2;
787  }
788  } // for i - a_Text[i]
789  return std::make_pair(true, res);
790 }
791 
792 
793 
794 
795 
796 AString URLEncode(const AString & a_Text)
797 {
798  AString res;
799  auto len = a_Text.size();
800  res.reserve(len);
801  static const char HEX[] = "0123456789abcdef";
802  for (size_t i = 0; i < len; ++i)
803  {
804  if (isalnum(a_Text[i]))
805  {
806  res.push_back(a_Text[i]);
807  }
808  else if (a_Text[i] == ' ')
809  {
810  res.push_back('+');
811  }
812  else
813  {
814  res.push_back('%');
815  res.push_back(HEX[static_cast<unsigned char>(a_Text[i]) >> 4]);
816  res.push_back(HEX[static_cast<unsigned char>(a_Text[i]) & 0x0f]);
817  }
818  }
819  return res;
820 }
821 
822 
823 
824 
825 
826 AString ReplaceAllCharOccurrences(const AString & a_String, char a_From, char a_To)
827 {
828  AString res(a_String);
829  std::replace(res.begin(), res.end(), a_From, a_To);
830  return res;
831 }
832 
833 
834 
835 
836 
838 static inline int UnBase64(char c)
839 {
840  if ((c >='A') && (c <= 'Z'))
841  {
842  return c - 'A';
843  }
844  if ((c >='a') && (c <= 'z'))
845  {
846  return c - 'a' + 26;
847  }
848  if ((c >= '0') && (c <= '9'))
849  {
850  return c - '0' + 52;
851  }
852  if (c == '+')
853  {
854  return 62;
855  }
856  if (c == '/')
857  {
858  return 63;
859  }
860  if (c == '=')
861  {
862  return -1;
863  }
864  return -2;
865 }
866 
867 
868 
869 
870 
871 AString Base64Decode(const AString & a_Base64String)
872 {
873  AString res;
874  size_t i, len = a_Base64String.size();
875  size_t o;
876  int c;
877  res.resize((len * 4) / 3 + 5, 0); // Approximate the upper bound on the result length
878  for (o = 0, i = 0; i < len; i++)
879  {
880  c = UnBase64(a_Base64String[i]);
881  if (c >= 0)
882  {
883  switch (o & 7)
884  {
885  case 0: res[o >> 3] |= (c << 2); break;
886  case 6: res[o >> 3] |= (c >> 4); res[(o >> 3) + 1] |= (c << 4); break;
887  case 4: res[o >> 3] |= (c >> 2); res[(o >> 3) + 1] |= (c << 6); break;
888  case 2: res[o >> 3] |= c; break;
889  }
890  o += 6;
891  }
892  if (c == -1)
893  {
894  // Error while decoding, invalid input. Return as much as we've decoded:
895  res.resize(o >> 3);
896  return res;
897  }
898  }
899  res.resize(o >> 3);
900  return res;
901 }
902 
903 
904 
905 
906 
907 AString Base64Encode(const AString & a_Input)
908 {
909  static const char BASE64[64] =
910  {
911  'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
912  'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
913  'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
914  'w', 'x', 'y', 'z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/'
915  };
916 
917  AString output;
918  output.resize(((a_Input.size() + 2) / 3) * 4);
919 
920  size_t output_index = 0;
921  size_t size_full24 = (a_Input.size() / 3) * 3;
922 
923  for (size_t i = 0; i < size_full24; i += 3)
924  {
925  output[output_index++] = BASE64[static_cast<unsigned char>(a_Input[i]) >> 2];
926  output[output_index++] = BASE64[(static_cast<unsigned char>(a_Input[i]) << 4 | static_cast<unsigned char>(a_Input[i + 1]) >> 4) & 63];
927  output[output_index++] = BASE64[(static_cast<unsigned char>(a_Input[i + 1]) << 2 | static_cast<unsigned char>(a_Input[i + 2]) >> 6) & 63];
928  output[output_index++] = BASE64[static_cast<unsigned char>(a_Input[i + 2]) & 63];
929  }
930 
931  if (size_full24 < a_Input.size())
932  {
933  output[output_index++] = BASE64[static_cast<unsigned char>(a_Input[size_full24]) >> 2];
934  if (size_full24 + 1 == a_Input.size())
935  {
936  output[output_index++] = BASE64[(static_cast<unsigned char>(a_Input[size_full24]) << 4) & 63];
937  output[output_index++] = '=';
938  }
939  else
940  {
941  output[output_index++] = BASE64[(static_cast<unsigned char>(a_Input[size_full24]) << 4 | static_cast<unsigned char>(a_Input[size_full24 + 1]) >> 4) & 63];
942  output[output_index++] = BASE64[(static_cast<unsigned char>(a_Input[size_full24 + 1]) << 2) & 63];
943  }
944 
945  output[output_index++] = '=';
946  }
947  ASSERT(output_index == output.size());
948 
949  return output;
950 }
951 
952 
953 
954 
955 
956 short GetBEShort(const char * a_Mem)
957 {
958  const Byte * Bytes = reinterpret_cast<const Byte *>(a_Mem);
959  return static_cast<short>((Bytes[0] << 8) | Bytes[1]);
960 }
961 
962 
963 
964 
965 
966 unsigned short GetBEUShort(const char * a_Mem)
967 {
968  const Byte * Bytes = reinterpret_cast<const Byte *>(a_Mem);
969  return static_cast<unsigned short>((Bytes[0] << 8) | Bytes[1]);
970 }
971 
972 
973 
974 
975 
976 int GetBEInt(const char * a_Mem)
977 {
978  const Byte * Bytes = reinterpret_cast<const Byte *>(a_Mem);
979  return (Bytes[0] << 24) | (Bytes[1] << 16) | (Bytes[2] << 8) | Bytes[3];
980 }
981 
982 
983 
984 
985 
986 void SetBEInt(char * a_Mem, Int32 a_Value)
987 {
988  a_Mem[0] = a_Value >> 24;
989  a_Mem[1] = static_cast<char>((a_Value >> 16) & 0xff);
990  a_Mem[2] = static_cast<char>((a_Value >> 8) & 0xff);
991  a_Mem[3] = static_cast<char>(a_Value & 0xff);
992 }
993 
994 
995 
996 
997 
998 bool SplitZeroTerminatedStrings(const AString & a_Strings, AStringVector & a_Output)
999 {
1000  a_Output.clear();
1001  size_t size = a_Strings.size();
1002  size_t start = 0;
1003  bool res = false;
1004  for (size_t i = 0; i < size; i++)
1005  {
1006  if (a_Strings[i] == 0)
1007  {
1008  a_Output.push_back(a_Strings.substr(start, i - start));
1009  start = i + 1;
1010  res = true;
1011  }
1012  }
1013  if (start < size)
1014  {
1015  a_Output.push_back(a_Strings.substr(start, size - start));
1016  res = true;
1017  }
1018 
1019  return res;
1020 }
1021 
1022 
1023 
1024 
1025 
1026 AStringVector MergeStringVectors(const AStringVector & a_Strings1, const AStringVector & a_Strings2)
1027 {
1028  // Initialize the resulting vector by the first vector:
1029  AStringVector res = a_Strings1;
1030 
1031  // Add each item from strings2 that is not already present:
1032  for (auto item : a_Strings2)
1033  {
1034  if (std::find(res.begin(), res.end(), item) == res.end())
1035  {
1036  res.push_back(item);
1037  }
1038  } // for item - a_Strings2[]
1039 
1040  return res;
1041 }
1042 
1043 
1044 
1045 
1046 
1047 AString StringsConcat(const AStringVector & a_Strings, char a_Separator)
1048 {
1049  // If the vector is empty, return an empty string:
1050  if (a_Strings.empty())
1051  {
1052  return "";
1053  }
1054 
1055  // Concatenate the strings in the vector:
1056  AString res;
1057  res.append(a_Strings[0]);
1058  for (auto itr = a_Strings.cbegin() + 1, end = a_Strings.cend(); itr != end; ++itr)
1059  {
1060  res.push_back(a_Separator);
1061  res.append(*itr);
1062  }
1063  return res;
1064 }
1065 
1066 
1067 
1068 
1069 
1070 bool StringToFloat(const AString & a_String, float & a_Num)
1071 {
1072  char *err;
1073  a_Num = strtof(a_String.c_str(), &err);
1074  if (*err != 0)
1075  {
1076  return false;
1077  }
1078  return true;
1079 }
1080 
1081 
1082 
1083 
1084 
1085 bool IsOnlyWhitespace(const AString & a_String)
1086 {
1087  return std::all_of(a_String.cbegin(), a_String.cend(), isspace);
1088 }
static const unsigned int offsetsFromUTF8[6]
bool StringToFloat(const AString &a_String, float &a_Num)
Converts a string into a float.
AString & RawBEToUTF8(const char *a_RawData, size_t a_NumShorts, AString &a_UTF8)
Converts a stream of BE shorts into UTF-8 string; returns a_UTF8.
AString StripColorCodes(const AString &a_Message)
Removes all control codes used by MC for colors and styles.
#define UNI_SUR_LOW_END
AString StringsConcat(const AStringVector &a_Strings, char a_Separator)
Concatenates the specified strings into a single string, separated by the specified separator charact...
AString StringJoin(const AStringVector &a_Strings, const AString &a_Delimeter)
Join a list of strings with the given delimiter between entries.
#define UNI_SUR_LOW_START
AStringVector StringSplitWithQuotes(const AString &str, const AString &delim)
Split the string at any of the listed delimiters.
Definition: StringUtils.cpp:97
AString & InPlaceUppercase(AString &s)
In-place string conversion to uppercase.
unsigned short GetBEUShort(const char *a_Mem)
Reads two bytes from the specified memory location and interprets them as BigEndian unsigned short...
bool IsOnlyWhitespace(const AString &a_String)
Returns true if only whitespace characters are present in the string.
void ReplaceString(AString &iHayStack, const AString &iNeedle, const AString &iReplaceWith)
Replaces each occurence of iNeedle in iHayStack with iReplaceWith.
AString EscapeString(const AString &a_Message)
Returns a copy of a_Message with all quotes and backslashes escaped by a backslash.
AString & InPlaceLowercase(AString &s)
In-place string conversion to lowercase.
static bool isLegalUTF8(const unsigned char *source, int length)
std::vector< AString > AStringVector
Definition: StringUtils.h:14
#define UNI_SUR_HIGH_START
void SetBEInt(char *a_Mem, Int32 a_Value)
Writes four bytes to the specified memory location so that they interpret as BigEndian int...
AString ReplaceAllCharOccurrences(const AString &a_String, char a_From, char a_To)
Replaces all occurrences of char a_From inside a_String with char a_To.
AString Base64Decode(const AString &a_Base64String)
Decodes a Base64-encoded string into the raw data.
std::pair< bool, AString > URLDecode(const AString &a_Text)
URL-Decodes the given string.
#define UNI_MAX_BMP
int NoCaseCompare(const AString &s1, const AString &s2)
Case-insensitive string comparison.
AStringVector StringSplitAndTrim(const AString &str, const AString &delim)
Split the string at any of the listed delimiters and trim each value.
short GetBEShort(const char *a_Mem)
Reads two bytes from the specified memory location and interprets them as BigEndian short...
AString & Printf(AString &str, const char *format, fmt::ArgList args)
Output the formatted text into the string.
Definition: StringUtils.cpp:55
#define ASSERT(x)
Definition: Globals.h:335
AString UnicodeCharToUtf8(unsigned a_UnicodeChar)
Converts a unicode character to its UTF8 representation.
AString TrimString(const AString &str)
Trims whitespace at both ends of the string.
static unsigned char HexToDec(char a_HexChar)
Returns the value of the single hex digit.
Definition: StringUtils.cpp:21
AString URLEncode(const AString &a_Text)
URL-encodes the given string.
#define UNI_MAX_UTF16
std::string AString
Definition: StringUtils.h:13
static const Byte trailingBytesForUTF8[256]
std::u16string UTF8ToRawBEUTF16(const AString &a_UTF8)
Converts a UTF-8 string into a UTF-16 BE string.
AString StrToUpper(const AString &s)
Returns an upper-cased copy of the string.
AString & CreateHexDump(AString &a_Out, const void *a_Data, size_t a_Size, size_t a_BytesPerLine)
format binary data this way: 00001234: 31 32 33 34 35 36 37 38 39 30 61 62 63 64 65 66 1234567890abcd...
signed int Int32
Definition: Globals.h:108
AStringVector StringSplit(const AString &str, const AString &delim)
Split the string at any of the listed delimiters.
Definition: StringUtils.cpp:76
static int UnBase64(char c)
Converts one Hex character in a Base64 encoding into the data value.
unsigned char Byte
Definition: Globals.h:117
#define HEX(x)
size_t RateCompareString(const AString &s1, const AString &s2)
Case-insensitive string comparison that returns a rating of equal-ness between [0 - s1...
AStringVector MergeStringVectors(const AStringVector &a_Strings1, const AStringVector &a_Strings2)
Merges the two vectors of strings, removing duplicate entries from the second vector.
AString StrToLower(const AString &s)
Returns a lower-cased copy of the string.
int GetBEInt(const char *a_Mem)
Reads four bytes from the specified memory location and interprets them as BigEndian int...
AString Base64Encode(const AString &a_Input)
Encodes a string into Base64.
bool SplitZeroTerminatedStrings(const AString &a_Strings, AStringVector &a_Output)
Splits a string that has embedded \0 characters, on those characters.