Cuberite
A lightweight, fast and extensible game server for Minecraft
StringUtils.cpp
Go to the documentation of this file.
1 
2 // StringUtils.cpp
3 
4 // Implements the various string helper functions:
5 
6 #include "Globals.h"
7 
8 #include "fmt/printf.h"
9 
10 #ifdef _MSC_VER
11  // Under MSVC, link to WinSock2 (needed by RawBEToUTF8's byteswapping)
12  #pragma comment(lib, "ws2_32.lib")
13 #endif
14 
15 
16 
17 
18 
21 static unsigned char HexToDec(char a_HexChar)
22 {
23  switch (a_HexChar)
24  {
25  case '0': return 0;
26  case '1': return 1;
27  case '2': return 2;
28  case '3': return 3;
29  case '4': return 4;
30  case '5': return 5;
31  case '6': return 6;
32  case '7': return 7;
33  case '8': return 8;
34  case '9': return 9;
35  case 'a': return 10;
36  case 'b': return 11;
37  case 'c': return 12;
38  case 'd': return 13;
39  case 'e': return 14;
40  case 'f': return 15;
41  case 'A': return 10;
42  case 'B': return 11;
43  case 'C': return 12;
44  case 'D': return 13;
45  case 'E': return 14;
46  case 'F': return 15;
47  }
48  return 0xff;
49 }
50 
51 
52 
53 
54 
55 AStringVector StringSplit(const AString & str, const AString & delim)
56 {
57  AStringVector results;
58  size_t cutAt = 0;
59  size_t Prev = 0;
60  while ((cutAt = str.find_first_of(delim, Prev)) != str.npos)
61  {
62  results.push_back(str.substr(Prev, cutAt - Prev));
63  Prev = cutAt + 1;
64  }
65  if (Prev < str.length())
66  {
67  results.push_back(str.substr(Prev));
68  }
69  return results;
70 }
71 
72 
73 
74 
75 
77 {
78  AStringVector results;
79 
80  size_t cutAt = 0;
81  size_t Prev = 0;
82  size_t cutAtQuote = 0;
83 
84  while ((cutAt = str.find_first_of(delim, Prev)) != str.npos)
85  {
86  if (cutAt == Prev)
87  {
88  // Empty string due to multiple whitespace / whitespace at the beginning of the input
89  // Just skip it
90  Prev = Prev + 1;
91  continue;
92  }
93  AString current = str.substr(Prev, cutAt - Prev);
94  if ((current.front() == '"') || (current.front() == '\''))
95  {
96  Prev += 1;
97  cutAtQuote = str.find_first_of(current.front(), Prev);
98  if (cutAtQuote != str.npos)
99  {
100  current = str.substr(Prev, cutAtQuote - Prev);
101  cutAt = cutAtQuote + 1;
102  }
103  }
104 
105  results.push_back(std::move(current));
106  Prev = cutAt + 1;
107  }
108 
109  if (Prev < str.length())
110  {
111  AString current = str.substr(Prev);
112 
113  // If the remant is wrapped in matching quotes, remove them:
114  if (
115  (current.length() >= 2) &&
116  ((current.front() == '"') || (current.front() == '\'')) &&
117  (current.front() == current.back())
118  )
119  {
120  current = current.substr(1, current.length() - 2);
121  }
122 
123  results.push_back(current);
124  }
125 
126  return results;
127 }
128 
129 
130 
131 
132 
133 AString StringJoin(const AStringVector & a_Strings, const AString & a_Delimeter)
134 {
135  if (a_Strings.empty())
136  {
137  return {};
138  }
139 
140  // Do a dry run to gather the size
141  const auto DelimSize = a_Delimeter.size();
142  size_t ResultSize = a_Strings[0].size();
143  std::for_each(a_Strings.begin() + 1, a_Strings.end(),
144  [&](const AString & a_String)
145  {
146  ResultSize += DelimSize;
147  ResultSize += a_String.size();
148  }
149  );
150 
151  // Now do the actual join
152  AString Result;
153  Result.reserve(ResultSize);
154  Result.append(a_Strings[0]);
155  std::for_each(a_Strings.begin() + 1, a_Strings.end(),
156  [&](const AString & a_String)
157  {
158  Result += a_Delimeter;
159  Result += a_String;
160  }
161  );
162  return Result;
163 }
164 
165 
166 
167 
168 
169 AStringVector StringSplitAndTrim(const AString & str, const AString & delim)
170 {
171  AStringVector results;
172  size_t cutAt = 0;
173  size_t Prev = 0;
174  while ((cutAt = str.find_first_of(delim, Prev)) != str.npos)
175  {
176  results.push_back(TrimString(str.substr(Prev, cutAt - Prev)));
177  Prev = cutAt + 1;
178  }
179  if (Prev < str.length())
180  {
181  results.push_back(TrimString(str.substr(Prev)));
182  }
183  return results;
184 }
185 
186 
187 
188 
189 
191 {
192  size_t len = str.length();
193  size_t start = 0;
194  while (start < len)
195  {
196  if (static_cast<unsigned char>(str[start]) > 32)
197  {
198  break;
199  }
200  ++start;
201  }
202  if (start == len)
203  {
204  return "";
205  }
206 
207  size_t end = len;
208  while (end >= start)
209  {
210  if (static_cast<unsigned char>(str[end]) > 32)
211  {
212  break;
213  }
214  --end;
215  }
216 
217  return str.substr(start, end - start + 1);
218 }
219 
220 
221 
222 
223 
225 {
226  std::transform(s.begin(), s.end(), s.begin(), ::tolower);
227  return s;
228 }
229 
230 
231 
232 
233 
235 {
236  std::transform(s.begin(), s.end(), s.begin(), ::toupper);
237  return s;
238 }
239 
240 
241 
242 
243 
245 {
246  AString res;
247  res.resize(s.size());
248  std::transform(s.begin(), s.end(), res.begin(), ::tolower);
249  return res;
250 }
251 
252 
253 
254 
255 
257 {
258  AString res;
259  res.resize(s.size());
260  std::transform(s.begin(), s.end(), res.begin(), ::toupper);
261  return res;
262 }
263 
264 
265 
266 
267 
268 int NoCaseCompare(const AString & s1, const AString & s2)
269 {
270  #ifdef _MSC_VER
271  return _stricmp(s1.c_str(), s2.c_str());
272  #else
273  return strcasecmp(s1.c_str(), s2.c_str());
274  #endif // else _MSC_VER
275 }
276 
277 
278 
279 
280 
281 size_t RateCompareString(const AString & s1, const AString & s2)
282 {
283  size_t MatchedLetters = 0;
284  size_t s1Length = s1.length();
285 
286  if (s1Length > s2.length())
287  {
288  // Definitely not a match
289  return 0;
290  }
291 
292  for (size_t i = 0; i < s1Length; i++)
293  {
294  char c1 = static_cast<char>(toupper(s1[i]));
295  char c2 = static_cast<char>(toupper(s2[i]));
296  if (c1 == c2)
297  {
298  ++MatchedLetters;
299  }
300  else
301  {
302  break;
303  }
304  }
305  return MatchedLetters;
306 }
307 
308 
309 
310 
311 
312 void ReplaceString(AString & iHayStack, const AString & iNeedle, const AString & iReplaceWith)
313 {
314  // find always returns the current position for an empty needle; prevent endless loop
315  if (iNeedle.empty())
316  {
317  return;
318  }
319 
320  size_t pos1 = iHayStack.find(iNeedle);
321  while (pos1 != AString::npos)
322  {
323  iHayStack.replace( pos1, iNeedle.size(), iReplaceWith);
324  pos1 = iHayStack.find(iNeedle, pos1 + iReplaceWith.size());
325  }
326 }
327 
328 
329 
330 
331 
332 void ReplaceURL(AString & iHayStack, const AString & iNeedle, const AString & iReplaceWith)
333 {
334  auto ReplaceWith = URLEncode(iReplaceWith);
335  ReplaceString(iHayStack, iNeedle, ReplaceWith);
336 }
337 
338 
339 
340 
341 
342 AString & RawBEToUTF8(const char * a_RawData, size_t a_NumShorts, AString & a_UTF8)
343 {
344  a_UTF8.clear();
345  a_UTF8.reserve(3 * a_NumShorts / 2); // a quick guess of the resulting size
346  for (size_t i = 0; i < a_NumShorts; i++)
347  {
348  a_UTF8.append(UnicodeCharToUtf8(GetBEUShort(&a_RawData[i * 2])));
349  }
350  return a_UTF8;
351 }
352 
353 
354 
355 
356 
357 AString UnicodeCharToUtf8(unsigned a_UnicodeChar)
358 {
359  if (a_UnicodeChar < 0x80)
360  {
361  return AString{static_cast<char>(a_UnicodeChar)};
362  }
363  else if (a_UnicodeChar < 0x800)
364  {
365  return AString
366  {
367  static_cast<char>(192 + a_UnicodeChar / 64),
368  static_cast<char>(128 + a_UnicodeChar % 64),
369  };
370  }
371  else if (a_UnicodeChar - 0xd800 < 0x800)
372  {
373  // Error
374  return AString();
375  }
376  else if (a_UnicodeChar < 0x10000)
377  {
378  return AString
379  {
380  static_cast<char>(224 + a_UnicodeChar / 4096),
381  static_cast<char>(128 + (a_UnicodeChar / 64) % 64),
382  static_cast<char>(128 + a_UnicodeChar % 64)
383  };
384  }
385  else if (a_UnicodeChar < 0x110000)
386  {
387  return AString
388  {
389  static_cast<char>(240 + a_UnicodeChar / 262144),
390  static_cast<char>(128 + (a_UnicodeChar / 4096) % 64),
391  static_cast<char>(128 + (a_UnicodeChar / 64) % 64),
392  static_cast<char>(128 + a_UnicodeChar % 64),
393  };
394  }
395  else
396  {
397  // Error
398  return AString();
399  }
400 }
401 
402 
403 
404 
405 
406 #ifdef __GNUC__
407 #pragma GCC diagnostic push
408 #pragma GCC diagnostic ignored "-Wimplicit-fallthrough"
409 #endif
410 // UTF-8 conversion code adapted from:
411 // https://stackoverflow.com/questions/2867123/convert-utf-16-to-utf-8-under-windows-and-linux-in-c
412 
414 // Begin of Unicode, Inc.'s code / information
416 
417 /*
418 Notice from the original file:
419 * Copyright 2001-2004 Unicode, Inc.
420 *
421 * Disclaimer
422 *
423 * This source code is provided as is by Unicode, Inc. No claims are
424 * made as to fitness for any particular purpose. No warranties of any
425 * kind are expressed or implied. The recipient agrees to determine
426 * applicability of information provided. If this file has been
427 * purchased on magnetic or optical media from Unicode, Inc., the
428 * sole remedy for any claim will be exchange of defective media
429 * within 90 days of receipt.
430 *
431 * Limitations on Rights to Redistribute This Code
432 *
433 * Unicode, Inc. hereby grants the right to freely use the information
434 * supplied in this file in the creation of products supporting the
435 * Unicode Standard, and to make copies of this file in any form
436 * for internal or external distribution as long as this notice
437 * remains attached.
438 */
439 
440 #define UNI_MAX_BMP 0x0000FFFF
441 #define UNI_MAX_UTF16 0x0010FFFF
442 #define UNI_SUR_HIGH_START 0xD800
443 #define UNI_SUR_LOW_START 0xDC00
444 #define UNI_SUR_LOW_END 0xDFFF
445 
446 
447 
448 
449 
450 static const Byte trailingBytesForUTF8[256] =
451 {
452  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
453  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
454  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
455  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
456  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
457  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
458  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
459  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5
460 };
461 
462 
463 
464 
465 
466 static const unsigned int offsetsFromUTF8[6] =
467 {
468  0x00000000UL, 0x00003080UL, 0x000E2080UL,
469  0x03C82080UL, 0xFA082080UL, 0x82082080UL
470 };
471 
472 
473 
474 
475 
476 static bool isLegalUTF8(const unsigned char * source, int length)
477 {
478  unsigned char a;
479  const unsigned char * srcptr = source + length;
480  switch (length)
481  {
482  default: return false;
483  // Everything else falls through when "true"...
484  case 4: if (((a = (*--srcptr)) < 0x80) || (a > 0xbf)) return false;
485  case 3: if (((a = (*--srcptr)) < 0x80) || (a > 0xbf)) return false;
486  case 2:
487  {
488  if ((a = (*--srcptr)) > 0xbf)
489  {
490  return false;
491  }
492  switch (*source)
493  {
494  // no fall-through in this inner switch
495  case 0xe0: if (a < 0xa0) return false; break;
496  case 0xed: if (a > 0x9f) return false; break;
497  case 0xf0: if (a < 0x90) return false; break;
498  case 0xf4: if (a > 0x8f) return false; break;
499  default: if (a < 0x80) return false;
500  }
501  }
502  case 1: if ((*source >= 0x80) && (*source < 0xc2)) return false;
503  }
504  return (*source <= 0xf4);
505 }
506 
507 
508 
509 
510 
511 std::u16string UTF8ToRawBEUTF16(const AString & a_UTF8)
512 {
513  std::u16string UTF16;
514  UTF16.reserve(a_UTF8.size() * 2);
515 
516  const unsigned char * source = reinterpret_cast<const unsigned char *>(a_UTF8.data());
517  const unsigned char * sourceEnd = source + a_UTF8.size();
518  const int halfShift = 10; // used for shifting by 10 bits
519  const unsigned int halfBase = 0x0010000UL;
520  const unsigned int halfMask = 0x3ffUL;
521 
522  while (source < sourceEnd)
523  {
524  unsigned int ch = 0;
525  unsigned short extraBytesToRead = trailingBytesForUTF8[*source];
526  if (source + extraBytesToRead >= sourceEnd)
527  {
528  return UTF16;
529  }
530  // Do this check whether lenient or strict
531  if (!isLegalUTF8(source, extraBytesToRead + 1))
532  {
533  return UTF16;
534  }
535 
536  // The cases all fall through. See "Note A" below.
537  switch (extraBytesToRead)
538  {
539  case 5: ch += *source++; ch <<= 6; /* remember, illegal UTF-8 */
540  case 4: ch += *source++; ch <<= 6; /* remember, illegal UTF-8 */
541  case 3: ch += *source++; ch <<= 6;
542  case 2: ch += *source++; ch <<= 6;
543  case 1: ch += *source++; ch <<= 6;
544  case 0: ch += *source++;
545  }
546  ch -= offsetsFromUTF8[extraBytesToRead];
547 
548  if (ch <= UNI_MAX_BMP)
549  {
550  // Target is a character <= 0xFFFF
551  if ((ch >= UNI_SUR_HIGH_START) && (ch <= UNI_SUR_LOW_END))
552  {
553  // UTF-16 surrogate values are illegal in UTF-32
554  ch = ' ';
555  }
556  unsigned short v = htons(static_cast<unsigned short>(ch));
557  UTF16.push_back(static_cast<char16_t>(v));
558  }
559  else if (ch > UNI_MAX_UTF16)
560  {
561  // Invalid value, replace with a space
562  unsigned short v = htons(' ');
563  UTF16.push_back(static_cast<char16_t>(v));
564  }
565  else
566  {
567  // target is a character in range 0xFFFF - 0x10FFFF.
568  ch -= halfBase;
569  auto v1 = htons(static_cast<uint16_t>((ch >> halfShift) + UNI_SUR_HIGH_START));
570  auto v2 = htons(static_cast<uint16_t>((ch & halfMask) + UNI_SUR_LOW_START));
571  UTF16.push_back(static_cast<char16_t>(v1));
572  UTF16.push_back(static_cast<char16_t>(v2));
573  }
574  }
575  return UTF16;
576 }
577 
578 /*
579 ---------------------------------------------------------------------
580 Note A.
581 The fall-through switches in UTF-8 reading code save a
582 temp variable, some decrements & conditionals. The switches
583 are equivalent to the following loop:
584 {
585  int tmpBytesToRead = extraBytesToRead + 1;
586  do
587  {
588  ch += *source++;
589  --tmpBytesToRead;
590  if (tmpBytesToRead)
591  {
592  ch <<= 6;
593  }
594  } while (tmpBytesToRead > 0);
595 }
596 ---------------------------------------------------------------------
597 */
598 
600 // End of Unicode, Inc.'s code / information
602 #ifdef __GNUC__
603 #pragma GCC diagnostic pop
604 #endif
605 
606 
607 
608 
609 
610 #define HEX(x) static_cast<char>((x) > 9 ? (x) + 'A' - 10 : (x) + '0')
611 
616 AString & CreateHexDump(AString & a_Out, const void * a_Data, size_t a_Size, size_t a_BytesPerLine)
617 {
618  fmt::memory_buffer Output;
619  /* If formatting the data from the comment above:
620  Hex holds: "31 32 33 34 35 36 37 38 39 30 61 62 63 64 65 66 "
621  Chars holds: "1234567890abcdef" */
622  fmt::memory_buffer Hex, Chars;
623 
624  if (a_Size > 0)
625  {
626  // Same as std::ceil(static_cast<float>(a_Size) / a_BytesPerLine);
627  const size_t NumLines = a_Size / a_BytesPerLine + (a_Size % a_BytesPerLine != 0);
628  const size_t CharsPerLine = 14 + 4 * a_BytesPerLine;
629  Output.reserve(NumLines * CharsPerLine);
630  }
631 
632  for (size_t i = 0; i < a_Size; i += a_BytesPerLine)
633  {
634  size_t k = std::min(a_Size - i, a_BytesPerLine);
635  for (size_t j = 0; j < k; j++)
636  {
637  Byte c = (static_cast<const Byte *>(a_Data))[i + j];
638  Hex.push_back(HEX(c >> 4));
639  Hex.push_back(HEX(c & 0xf));
640  Hex.push_back(' ');
641  Chars.push_back((c >= ' ') ? static_cast<char>(c) : '.');
642  } // for j
643 
644  // Write Hex with a dynamic fixed width
645  auto HexStr = fmt::string_view(Hex.data(), Hex.size());
646  auto CharsStr = fmt::string_view(Chars.data(), Chars.size());
647  fmt::format_to(
648  Output, "{0:08x}: {1:{2}} {3}\n",
649  i, HexStr, a_BytesPerLine * 3, CharsStr
650  );
651 
652  Hex.clear();
653  Chars.clear();
654  } // for i
655  a_Out.append(Output.data(), Output.size());
656  return a_Out;
657 }
658 
659 
660 
661 
662 
663 AString EscapeString(const AString & a_Message)
664 {
665  AString EscapedMsg;
666  size_t len = a_Message.size();
667  size_t last = 0;
668  EscapedMsg.reserve(len);
669  for (size_t i = 0; i < len; i++)
670  {
671  char ch = a_Message[i];
672  switch (ch)
673  {
674  case '\'':
675  case '\"':
676  case '\\':
677  {
678  if (i > last)
679  {
680  EscapedMsg.append(a_Message, last, i - last);
681  }
682  EscapedMsg.push_back('\\');
683  EscapedMsg.push_back(ch);
684  last = i + 1;
685  break;
686  }
687  } // switch (ch)
688  } // for i - a_Message[]
689  if (len > last)
690  {
691  EscapedMsg.append(a_Message, last, len - last);
692  }
693  return EscapedMsg;
694 }
695 
696 
697 
698 
699 
700 AString StripColorCodes(const AString & a_Message)
701 {
702  AString res(a_Message);
703  size_t idx = 0;
704  for (;;)
705  {
706  idx = res.find("\xc2\xa7", idx);
707  if (idx == AString::npos)
708  {
709  return res;
710  }
711  res.erase(idx, 3);
712  }
713 }
714 
715 
716 
717 
718 
719 std::pair<bool, AString> URLDecode(const AString & a_Text)
720 {
721  AString res;
722  auto len = a_Text.size();
723  res.reserve(len);
724  for (size_t i = 0; i < len; i++)
725  {
726  if (a_Text[i] == '+')
727  {
728  res.push_back(' ');
729  continue;
730  }
731  if (a_Text[i] != '%')
732  {
733  res.push_back(a_Text[i]);
734  continue;
735  }
736  if (i + 1 >= len)
737  {
738  // String too short for an encoded value
739  return std::make_pair(false, AString());
740  }
741  if ((a_Text[i + 1] == 'u') || (a_Text[i + 1] == 'U'))
742  {
743  // Unicode char "%u0xxxx"
744  if (i + 6 >= len)
745  {
746  return std::make_pair(false, AString());
747  }
748  if (a_Text[i + 2] != '0')
749  {
750  return std::make_pair(false, AString());
751  }
752  unsigned v1 = HexToDec(a_Text[i + 3]);
753  unsigned v2 = HexToDec(a_Text[i + 4]);
754  unsigned v3 = HexToDec(a_Text[i + 5]);
755  unsigned v4 = HexToDec(a_Text[i + 6]);
756  if ((v1 == 0xff) || (v2 == 0xff) || (v4 == 0xff) || (v3 == 0xff))
757  {
758  // Invalid hex numbers
759  return std::make_pair(false, AString());
760  }
761  res.append(UnicodeCharToUtf8((v1 << 12) | (v2 << 8) | (v3 << 4) | v4));
762  i = i + 6;
763  }
764  else
765  {
766  // Regular char "%xx":
767  if (i + 2 >= len)
768  {
769  return std::make_pair(false, AString());
770  }
771  auto v1 = HexToDec(a_Text[i + 1]);
772  auto v2 = HexToDec(a_Text[i + 2]);
773  if ((v1 == 0xff) || (v2 == 0xff))
774  {
775  // Invalid hex numbers
776  return std::make_pair(false, AString());
777  }
778  res.push_back(static_cast<char>((v1 << 4) | v2));
779  i = i + 2;
780  }
781  } // for i - a_Text[i]
782  return std::make_pair(true, res);
783 }
784 
785 
786 
787 
788 
789 AString URLEncode(const AString & a_Text)
790 {
791  AString res;
792  auto len = a_Text.size();
793  res.reserve(len);
794  static const char HEX[] = "0123456789ABCDEF";
795  for (size_t i = 0; i < len; ++i)
796  {
797  if (isalnum(a_Text[i]))
798  {
799  res.push_back(a_Text[i]);
800  }
801  else if (a_Text[i] == ' ')
802  {
803  res.push_back('+');
804  }
805  else
806  {
807  res.push_back('%');
808  res.push_back(HEX[static_cast<unsigned char>(a_Text[i]) >> 4]);
809  res.push_back(HEX[static_cast<unsigned char>(a_Text[i]) & 0x0f]);
810  }
811  }
812  return res;
813 }
814 
815 
816 
817 
818 
819 AString ReplaceAllCharOccurrences(const AString & a_String, char a_From, char a_To)
820 {
821  AString res(a_String);
822  std::replace(res.begin(), res.end(), a_From, a_To);
823  return res;
824 }
825 
826 
827 
828 
829 
831 static inline int UnBase64(char c)
832 {
833  if ((c >='A') && (c <= 'Z'))
834  {
835  return c - 'A';
836  }
837  if ((c >='a') && (c <= 'z'))
838  {
839  return c - 'a' + 26;
840  }
841  if ((c >= '0') && (c <= '9'))
842  {
843  return c - '0' + 52;
844  }
845  if (c == '+')
846  {
847  return 62;
848  }
849  if (c == '/')
850  {
851  return 63;
852  }
853  if (c == '=')
854  {
855  return -1;
856  }
857  return -2;
858 }
859 
860 
861 
862 
863 
864 AString Base64Decode(const AString & a_Base64String)
865 {
866  AString res;
867  size_t i, len = a_Base64String.size();
868  size_t o;
869  int c;
870  res.resize((len * 4) / 3 + 5, 0); // Approximate the upper bound on the result length
871  for (o = 0, i = 0; i < len; i++)
872  {
873  c = UnBase64(a_Base64String[i]);
874  if (c >= 0)
875  {
876  switch (o & 7)
877  {
878  case 0: res[o >> 3] |= (c << 2); break;
879  case 6: res[o >> 3] |= (c >> 4); res[(o >> 3) + 1] |= (c << 4); break;
880  case 4: res[o >> 3] |= (c >> 2); res[(o >> 3) + 1] |= (c << 6); break;
881  case 2: res[o >> 3] |= c; break;
882  }
883  o += 6;
884  }
885  if (c == -1)
886  {
887  // Error while decoding, invalid input. Return as much as we've decoded:
888  res.resize(o >> 3);
889  return res;
890  }
891  }
892  res.resize(o >> 3);
893  return res;
894 }
895 
896 
897 
898 
899 
900 AString Base64Encode(const AString & a_Input)
901 {
902  static const char BASE64[64] =
903  {
904  'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
905  'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
906  'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
907  'w', 'x', 'y', 'z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/'
908  };
909 
910  AString output;
911  output.resize(((a_Input.size() + 2) / 3) * 4);
912 
913  size_t output_index = 0;
914  size_t size_full24 = (a_Input.size() / 3) * 3;
915 
916  for (size_t i = 0; i < size_full24; i += 3)
917  {
918  output[output_index++] = BASE64[static_cast<unsigned char>(a_Input[i]) >> 2];
919  output[output_index++] = BASE64[(static_cast<unsigned char>(a_Input[i]) << 4 | static_cast<unsigned char>(a_Input[i + 1]) >> 4) & 63];
920  output[output_index++] = BASE64[(static_cast<unsigned char>(a_Input[i + 1]) << 2 | static_cast<unsigned char>(a_Input[i + 2]) >> 6) & 63];
921  output[output_index++] = BASE64[static_cast<unsigned char>(a_Input[i + 2]) & 63];
922  }
923 
924  if (size_full24 < a_Input.size())
925  {
926  output[output_index++] = BASE64[static_cast<unsigned char>(a_Input[size_full24]) >> 2];
927  if (size_full24 + 1 == a_Input.size())
928  {
929  output[output_index++] = BASE64[(static_cast<unsigned char>(a_Input[size_full24]) << 4) & 63];
930  output[output_index++] = '=';
931  }
932  else
933  {
934  output[output_index++] = BASE64[(static_cast<unsigned char>(a_Input[size_full24]) << 4 | static_cast<unsigned char>(a_Input[size_full24 + 1]) >> 4) & 63];
935  output[output_index++] = BASE64[(static_cast<unsigned char>(a_Input[size_full24 + 1]) << 2) & 63];
936  }
937 
938  output[output_index++] = '=';
939  }
940  ASSERT(output_index == output.size());
941 
942  return output;
943 }
944 
945 
946 
947 
948 
949 short GetBEShort(const std::byte * const a_Mem)
950 {
951  return static_cast<short>(
952  (static_cast<short>(a_Mem[0]) << 8) |
953  static_cast<short>(a_Mem[1])
954  );
955 }
956 
957 
958 
959 
960 
961 unsigned short GetBEUShort(const char * a_Mem)
962 {
963  const Byte * Bytes = reinterpret_cast<const Byte *>(a_Mem);
964  return static_cast<unsigned short>((Bytes[0] << 8) | Bytes[1]);
965 }
966 
967 
968 
969 
970 
971 int GetBEInt(const std::byte * const a_Mem)
972 {
973  return
974  (static_cast<int>(a_Mem[0]) << 24) |
975  (static_cast<int>(a_Mem[1]) << 16) |
976  (static_cast<int>(a_Mem[2]) << 8) |
977  static_cast<int>(a_Mem[3])
978  ;
979 }
980 
981 
982 
983 
984 
985 void SetBEInt(std::byte * a_Mem, Int32 a_Value)
986 {
987  a_Mem[0] = std::byte(a_Value >> 24);
988  a_Mem[1] = std::byte((a_Value >> 16) & 0xff);
989  a_Mem[2] = std::byte((a_Value >> 8) & 0xff);
990  a_Mem[3] = std::byte(a_Value & 0xff);
991 }
992 
993 
994 
995 
996 
997 bool SplitZeroTerminatedStrings(const AString & a_Strings, AStringVector & a_Output)
998 {
999  a_Output.clear();
1000  size_t size = a_Strings.size();
1001  size_t start = 0;
1002  bool res = false;
1003  for (size_t i = 0; i < size; i++)
1004  {
1005  if (a_Strings[i] == 0)
1006  {
1007  a_Output.push_back(a_Strings.substr(start, i - start));
1008  start = i + 1;
1009  res = true;
1010  }
1011  }
1012  if (start < size)
1013  {
1014  a_Output.push_back(a_Strings.substr(start, size - start));
1015  res = true;
1016  }
1017 
1018  return res;
1019 }
1020 
1021 
1022 
1023 
1024 
1025 AStringVector MergeStringVectors(const AStringVector & a_Strings1, const AStringVector & a_Strings2)
1026 {
1027  // Initialize the resulting vector by the first vector:
1028  AStringVector res = a_Strings1;
1029 
1030  // Add each item from strings2 that is not already present:
1031  for (const auto & item : a_Strings2)
1032  {
1033  if (std::find(res.begin(), res.end(), item) == res.end())
1034  {
1035  res.push_back(item);
1036  }
1037  } // for item - a_Strings2[]
1038 
1039  return res;
1040 }
1041 
1042 
1043 
1044 
1045 
1046 AString StringsConcat(const AStringVector & a_Strings, char a_Separator)
1047 {
1048  // If the vector is empty, return an empty string:
1049  if (a_Strings.empty())
1050  {
1051  return "";
1052  }
1053 
1054  // Concatenate the strings in the vector:
1055  AString res;
1056  res.append(a_Strings[0]);
1057  for (auto itr = a_Strings.cbegin() + 1, end = a_Strings.cend(); itr != end; ++itr)
1058  {
1059  res.push_back(a_Separator);
1060  res.append(*itr);
1061  }
1062  return res;
1063 }
1064 
1065 
1066 
1067 
1068 
1069 bool StringToFloat(const AString & a_String, float & a_Num)
1070 {
1071  char *err;
1072  a_Num = strtof(a_String.c_str(), &err);
1073  return (*err == 0);
1074 }
1075 
1076 
1077 
1078 
1079 
1080 bool IsOnlyWhitespace(const AString & a_String)
1081 {
1082  return std::all_of(a_String.cbegin(), a_String.cend(), isspace);
1083 }
signed int Int32
Definition: Globals.h:152
#define ASSERT(x)
Definition: Globals.h:276
unsigned char Byte
Definition: Globals.h:161
std::pair< bool, AString > URLDecode(const AString &a_Text)
URL-Decodes the given string.
short GetBEShort(const std::byte *const a_Mem)
Reads two bytes from the specified memory location and interprets them as BigEndian short.
AStringVector StringSplitAndTrim(const AString &str, const AString &delim)
Split the string at any of the listed delimiters and trim each value.
static bool isLegalUTF8(const unsigned char *source, int length)
AString & InPlaceLowercase(AString &s)
In-place string conversion to lowercase.
AString TrimString(const AString &str)
Trims whitespace at both ends of the string.
#define UNI_SUR_LOW_START
static const unsigned int offsetsFromUTF8[6]
AString & RawBEToUTF8(const char *a_RawData, size_t a_NumShorts, AString &a_UTF8)
Converts a stream of BE shorts into UTF-8 string; returns a_UTF8.
AString URLEncode(const AString &a_Text)
URL-encodes the given string.
AString StrToLower(const AString &s)
Returns a lower-cased copy of the string.
int GetBEInt(const std::byte *const a_Mem)
Reads four bytes from the specified memory location and interprets them as BigEndian int.
AString & InPlaceUppercase(AString &s)
In-place string conversion to uppercase.
AString EscapeString(const AString &a_Message)
Returns a copy of a_Message with all quotes and backslashes escaped by a backslash.
AString ReplaceAllCharOccurrences(const AString &a_String, char a_From, char a_To)
Replaces all occurrences of char a_From inside a_String with char a_To.
#define UNI_SUR_HIGH_START
AStringVector StringSplit(const AString &str, const AString &delim)
Split the string at any of the listed delimiters.
Definition: StringUtils.cpp:55
bool SplitZeroTerminatedStrings(const AString &a_Strings, AStringVector &a_Output)
Splits a string that has embedded \0 characters, on those characters.
AString Base64Decode(const AString &a_Base64String)
Decodes a Base64-encoded string into the raw data.
void SetBEInt(std::byte *a_Mem, Int32 a_Value)
Writes four bytes to the specified memory location so that they interpret as BigEndian int.
void ReplaceString(AString &iHayStack, const AString &iNeedle, const AString &iReplaceWith)
Replaces each occurence of iNeedle in iHayStack with iReplaceWith.
#define UNI_MAX_UTF16
static int UnBase64(char c)
Converts one Hex character in a Base64 encoding into the data value.
AString StripColorCodes(const AString &a_Message)
Removes all control codes used by MC for colors and styles.
AString & CreateHexDump(AString &a_Out, const void *a_Data, size_t a_Size, size_t a_BytesPerLine)
format binary data this way: 00001234: 31 32 33 34 35 36 37 38 39 30 61 62 63 64 65 66 1234567890abcd...
AStringVector StringSplitWithQuotes(const AString &str, const AString &delim)
Split the string at any of the listed delimiters.
Definition: StringUtils.cpp:76
AString StringsConcat(const AStringVector &a_Strings, char a_Separator)
Concatenates the specified strings into a single string, separated by the specified separator charact...
int NoCaseCompare(const AString &s1, const AString &s2)
Case-insensitive string comparison.
void ReplaceURL(AString &iHayStack, const AString &iNeedle, const AString &iReplaceWith)
Replaces each occurence of iNeedle in iHayStack with iReplaceWith, after URL-encoding iReplaceWith.
static unsigned char HexToDec(char a_HexChar)
Returns the value of the single hex digit.
Definition: StringUtils.cpp:21
AStringVector MergeStringVectors(const AStringVector &a_Strings1, const AStringVector &a_Strings2)
Merges the two vectors of strings, removing duplicate entries from the second vector.
AString StrToUpper(const AString &s)
Returns an upper-cased copy of the string.
AString UnicodeCharToUtf8(unsigned a_UnicodeChar)
Converts a unicode character to its UTF8 representation.
#define UNI_SUR_LOW_END
AString StringJoin(const AStringVector &a_Strings, const AString &a_Delimeter)
Join a list of strings with the given delimiter between entries.
size_t RateCompareString(const AString &s1, const AString &s2)
Case-insensitive string comparison that returns a rating of equal-ness between [0 - s1....
bool StringToFloat(const AString &a_String, float &a_Num)
Converts a string into a float.
#define UNI_MAX_BMP
bool IsOnlyWhitespace(const AString &a_String)
Returns true if only whitespace characters are present in the string.
AString Base64Encode(const AString &a_Input)
Encodes a string into Base64.
std::u16string UTF8ToRawBEUTF16(const AString &a_UTF8)
Converts a UTF-8 string into a UTF-16 BE string.
static const Byte trailingBytesForUTF8[256]
unsigned short GetBEUShort(const char *a_Mem)
Reads two bytes from the specified memory location and interprets them as BigEndian unsigned short.
#define HEX(x)
std::vector< AString > AStringVector
Definition: StringUtils.h:12
std::string AString
Definition: StringUtils.h:11