Cuberite
A lightweight, fast and extensible game server for Minecraft
StringUtils.cpp
Go to the documentation of this file.
1 
2 // StringUtils.cpp
3 
4 // Implements the various string helper functions:
5 
6 #include "Globals.h"
7 
8 #include "fmt/printf.h"
9 
10 #ifdef _MSC_VER
11  // Under MSVC, link to WinSock2 (needed by RawBEToUTF8's byteswapping)
12  #pragma comment(lib, "ws2_32.lib")
13 #endif
14 
15 
16 
17 
18 
21 static unsigned char HexToDec(char a_HexChar)
22 {
23  switch (a_HexChar)
24  {
25  case '0': return 0;
26  case '1': return 1;
27  case '2': return 2;
28  case '3': return 3;
29  case '4': return 4;
30  case '5': return 5;
31  case '6': return 6;
32  case '7': return 7;
33  case '8': return 8;
34  case '9': return 9;
35  case 'a': return 10;
36  case 'b': return 11;
37  case 'c': return 12;
38  case 'd': return 13;
39  case 'e': return 14;
40  case 'f': return 15;
41  case 'A': return 10;
42  case 'B': return 11;
43  case 'C': return 12;
44  case 'D': return 13;
45  case 'E': return 14;
46  case 'F': return 15;
47  }
48  return 0xff;
49 }
50 
51 
52 
53 
54 
55 AString & vPrintf(AString & a_String, const char * a_Format, fmt::printf_args a_ArgList)
56 {
57  ASSERT(a_Format != nullptr);
58  fmt::memory_buffer Buffer; // Save a string allocation compared to vsprintf
59  fmt::vprintf(Buffer, fmt::to_string_view(a_Format), a_ArgList);
60  a_String.assign(Buffer.data(), Buffer.size());
61  return a_String;
62 }
63 
64 
65 
66 
67 
68 AString vPrintf(const char * a_Format, fmt::printf_args a_ArgList)
69 {
70  ASSERT(a_Format != nullptr);
71  return fmt::vsprintf(a_Format, a_ArgList);
72 }
73 
74 
75 
76 
77 
78 AString & vAppendPrintf(AString & a_String, const char * a_Format, fmt::printf_args a_ArgList)
79 {
80  ASSERT(a_Format != nullptr);
81  fmt::memory_buffer Buffer;
82  fmt::vprintf(Buffer, fmt::to_string_view(a_Format), a_ArgList);
83  a_String.append(Buffer.data(), Buffer.size());
84  return a_String;
85 }
86 
87 
88 
89 
90 
91 AStringVector StringSplit(const AString & str, const AString & delim)
92 {
93  AStringVector results;
94  size_t cutAt = 0;
95  size_t Prev = 0;
96  while ((cutAt = str.find_first_of(delim, Prev)) != str.npos)
97  {
98  results.push_back(str.substr(Prev, cutAt - Prev));
99  Prev = cutAt + 1;
100  }
101  if (Prev < str.length())
102  {
103  results.push_back(str.substr(Prev));
104  }
105  return results;
106 }
107 
108 
109 
110 
111 
113 {
114  AStringVector results;
115 
116  size_t cutAt = 0;
117  size_t Prev = 0;
118  size_t cutAtQuote = 0;
119 
120  while ((cutAt = str.find_first_of(delim, Prev)) != str.npos)
121  {
122  if (cutAt == Prev)
123  {
124  // Empty string due to multiple whitespace / whitespace at the beginning of the input
125  // Just skip it
126  Prev = Prev + 1;
127  continue;
128  }
129  AString current = str.substr(Prev, cutAt - Prev);
130  if ((current.front() == '"') || (current.front() == '\''))
131  {
132  Prev += 1;
133  cutAtQuote = str.find_first_of(current.front(), Prev);
134  if (cutAtQuote != str.npos)
135  {
136  current = str.substr(Prev, cutAtQuote - Prev);
137  cutAt = cutAtQuote + 1;
138  }
139  }
140 
141  results.push_back(std::move(current));
142  Prev = cutAt + 1;
143  }
144 
145  if (Prev < str.length())
146  {
147  AString current = str.substr(Prev);
148 
149  // If the remant is wrapped in matching quotes, remove them:
150  if (
151  (current.length() >= 2) &&
152  ((current.front() == '"') || (current.front() == '\'')) &&
153  (current.front() == current.back())
154  )
155  {
156  current = current.substr(1, current.length() - 2);
157  }
158 
159  results.push_back(current);
160  }
161 
162  return results;
163 }
164 
165 
166 
167 
168 
169 AString StringJoin(const AStringVector & a_Strings, const AString & a_Delimeter)
170 {
171  if (a_Strings.empty())
172  {
173  return {};
174  }
175 
176  // Do a dry run to gather the size
177  const auto DelimSize = a_Delimeter.size();
178  size_t ResultSize = a_Strings[0].size();
179  std::for_each(a_Strings.begin() + 1, a_Strings.end(),
180  [&](const AString & a_String)
181  {
182  ResultSize += DelimSize;
183  ResultSize += a_String.size();
184  }
185  );
186 
187  // Now do the actual join
188  AString Result;
189  Result.reserve(ResultSize);
190  Result.append(a_Strings[0]);
191  std::for_each(a_Strings.begin() + 1, a_Strings.end(),
192  [&](const AString & a_String)
193  {
194  Result += a_Delimeter;
195  Result += a_String;
196  }
197  );
198  return Result;
199 }
200 
201 
202 
203 
204 
205 AStringVector StringSplitAndTrim(const AString & str, const AString & delim)
206 {
207  AStringVector results;
208  size_t cutAt = 0;
209  size_t Prev = 0;
210  while ((cutAt = str.find_first_of(delim, Prev)) != str.npos)
211  {
212  results.push_back(TrimString(str.substr(Prev, cutAt - Prev)));
213  Prev = cutAt + 1;
214  }
215  if (Prev < str.length())
216  {
217  results.push_back(TrimString(str.substr(Prev)));
218  }
219  return results;
220 }
221 
222 
223 
224 
225 
227 {
228  size_t len = str.length();
229  size_t start = 0;
230  while (start < len)
231  {
232  if (static_cast<unsigned char>(str[start]) > 32)
233  {
234  break;
235  }
236  ++start;
237  }
238  if (start == len)
239  {
240  return "";
241  }
242 
243  size_t end = len;
244  while (end >= start)
245  {
246  if (static_cast<unsigned char>(str[end]) > 32)
247  {
248  break;
249  }
250  --end;
251  }
252 
253  return str.substr(start, end - start + 1);
254 }
255 
256 
257 
258 
259 
261 {
262  std::transform(s.begin(), s.end(), s.begin(), ::tolower);
263  return s;
264 }
265 
266 
267 
268 
269 
271 {
272  std::transform(s.begin(), s.end(), s.begin(), ::toupper);
273  return s;
274 }
275 
276 
277 
278 
279 
281 {
282  AString res;
283  res.resize(s.size());
284  std::transform(s.begin(), s.end(), res.begin(), ::tolower);
285  return res;
286 }
287 
288 
289 
290 
291 
293 {
294  AString res;
295  res.resize(s.size());
296  std::transform(s.begin(), s.end(), res.begin(), ::toupper);
297  return res;
298 }
299 
300 
301 
302 
303 
304 int NoCaseCompare(const AString & s1, const AString & s2)
305 {
306  #ifdef _MSC_VER
307  return _stricmp(s1.c_str(), s2.c_str());
308  #else
309  return strcasecmp(s1.c_str(), s2.c_str());
310  #endif // else _MSC_VER
311 }
312 
313 
314 
315 
316 
317 size_t RateCompareString(const AString & s1, const AString & s2)
318 {
319  size_t MatchedLetters = 0;
320  size_t s1Length = s1.length();
321 
322  if (s1Length > s2.length())
323  {
324  // Definitely not a match
325  return 0;
326  }
327 
328  for (size_t i = 0; i < s1Length; i++)
329  {
330  char c1 = static_cast<char>(toupper(s1[i]));
331  char c2 = static_cast<char>(toupper(s2[i]));
332  if (c1 == c2)
333  {
334  ++MatchedLetters;
335  }
336  else
337  {
338  break;
339  }
340  }
341  return MatchedLetters;
342 }
343 
344 
345 
346 
347 
348 void ReplaceString(AString & iHayStack, const AString & iNeedle, const AString & iReplaceWith)
349 {
350  // find always returns the current position for an empty needle; prevent endless loop
351  if (iNeedle.empty())
352  {
353  return;
354  }
355 
356  size_t pos1 = iHayStack.find(iNeedle);
357  while (pos1 != AString::npos)
358  {
359  iHayStack.replace( pos1, iNeedle.size(), iReplaceWith);
360  pos1 = iHayStack.find(iNeedle, pos1 + iReplaceWith.size());
361  }
362 }
363 
364 
365 
366 
367 
368 void ReplaceURL(AString & iHayStack, const AString & iNeedle, const AString & iReplaceWith)
369 {
370  auto ReplaceWith = URLEncode(iReplaceWith);
371  ReplaceString(iHayStack, iNeedle, ReplaceWith);
372 }
373 
374 
375 
376 
377 
378 AString & RawBEToUTF8(const char * a_RawData, size_t a_NumShorts, AString & a_UTF8)
379 {
380  a_UTF8.clear();
381  a_UTF8.reserve(3 * a_NumShorts / 2); // a quick guess of the resulting size
382  for (size_t i = 0; i < a_NumShorts; i++)
383  {
384  a_UTF8.append(UnicodeCharToUtf8(GetBEUShort(&a_RawData[i * 2])));
385  }
386  return a_UTF8;
387 }
388 
389 
390 
391 
392 
393 AString UnicodeCharToUtf8(unsigned a_UnicodeChar)
394 {
395  if (a_UnicodeChar < 0x80)
396  {
397  return AString{static_cast<char>(a_UnicodeChar)};
398  }
399  else if (a_UnicodeChar < 0x800)
400  {
401  return AString
402  {
403  static_cast<char>(192 + a_UnicodeChar / 64),
404  static_cast<char>(128 + a_UnicodeChar % 64),
405  };
406  }
407  else if (a_UnicodeChar - 0xd800 < 0x800)
408  {
409  // Error
410  return AString();
411  }
412  else if (a_UnicodeChar < 0x10000)
413  {
414  return AString
415  {
416  static_cast<char>(224 + a_UnicodeChar / 4096),
417  static_cast<char>(128 + (a_UnicodeChar / 64) % 64),
418  static_cast<char>(128 + a_UnicodeChar % 64)
419  };
420  }
421  else if (a_UnicodeChar < 0x110000)
422  {
423  return AString
424  {
425  static_cast<char>(240 + a_UnicodeChar / 262144),
426  static_cast<char>(128 + (a_UnicodeChar / 4096) % 64),
427  static_cast<char>(128 + (a_UnicodeChar / 64) % 64),
428  static_cast<char>(128 + a_UnicodeChar % 64),
429  };
430  }
431  else
432  {
433  // Error
434  return AString();
435  }
436 }
437 
438 
439 
440 
441 
442 #ifdef __GNUC__
443 #pragma GCC diagnostic push
444 #pragma GCC diagnostic ignored "-Wimplicit-fallthrough"
445 #endif
446 // UTF-8 conversion code adapted from:
447 // https://stackoverflow.com/questions/2867123/convert-utf-16-to-utf-8-under-windows-and-linux-in-c
448 
450 // Begin of Unicode, Inc.'s code / information
452 
453 /*
454 Notice from the original file:
455 * Copyright 2001-2004 Unicode, Inc.
456 *
457 * Disclaimer
458 *
459 * This source code is provided as is by Unicode, Inc. No claims are
460 * made as to fitness for any particular purpose. No warranties of any
461 * kind are expressed or implied. The recipient agrees to determine
462 * applicability of information provided. If this file has been
463 * purchased on magnetic or optical media from Unicode, Inc., the
464 * sole remedy for any claim will be exchange of defective media
465 * within 90 days of receipt.
466 *
467 * Limitations on Rights to Redistribute This Code
468 *
469 * Unicode, Inc. hereby grants the right to freely use the information
470 * supplied in this file in the creation of products supporting the
471 * Unicode Standard, and to make copies of this file in any form
472 * for internal or external distribution as long as this notice
473 * remains attached.
474 */
475 
476 #define UNI_MAX_BMP 0x0000FFFF
477 #define UNI_MAX_UTF16 0x0010FFFF
478 #define UNI_SUR_HIGH_START 0xD800
479 #define UNI_SUR_LOW_START 0xDC00
480 #define UNI_SUR_LOW_END 0xDFFF
481 
482 
483 
484 
485 
486 static const Byte trailingBytesForUTF8[256] =
487 {
488  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
489  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
490  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
491  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
492  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
493  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
494  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
495  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5
496 };
497 
498 
499 
500 
501 
502 static const unsigned int offsetsFromUTF8[6] =
503 {
504  0x00000000UL, 0x00003080UL, 0x000E2080UL,
505  0x03C82080UL, 0xFA082080UL, 0x82082080UL
506 };
507 
508 
509 
510 
511 
512 static bool isLegalUTF8(const unsigned char * source, int length)
513 {
514  unsigned char a;
515  const unsigned char * srcptr = source + length;
516  switch (length)
517  {
518  default: return false;
519  // Everything else falls through when "true"...
520  case 4: if (((a = (*--srcptr)) < 0x80) || (a > 0xbf)) return false;
521  case 3: if (((a = (*--srcptr)) < 0x80) || (a > 0xbf)) return false;
522  case 2:
523  {
524  if ((a = (*--srcptr)) > 0xbf)
525  {
526  return false;
527  }
528  switch (*source)
529  {
530  // no fall-through in this inner switch
531  case 0xe0: if (a < 0xa0) return false; break;
532  case 0xed: if (a > 0x9f) return false; break;
533  case 0xf0: if (a < 0x90) return false; break;
534  case 0xf4: if (a > 0x8f) return false; break;
535  default: if (a < 0x80) return false;
536  }
537  }
538  case 1: if ((*source >= 0x80) && (*source < 0xc2)) return false;
539  }
540  return (*source <= 0xf4);
541 }
542 
543 
544 
545 
546 
547 std::u16string UTF8ToRawBEUTF16(const AString & a_UTF8)
548 {
549  std::u16string UTF16;
550  UTF16.reserve(a_UTF8.size() * 2);
551 
552  const unsigned char * source = reinterpret_cast<const unsigned char *>(a_UTF8.data());
553  const unsigned char * sourceEnd = source + a_UTF8.size();
554  const int halfShift = 10; // used for shifting by 10 bits
555  const unsigned int halfBase = 0x0010000UL;
556  const unsigned int halfMask = 0x3ffUL;
557 
558  while (source < sourceEnd)
559  {
560  unsigned int ch = 0;
561  unsigned short extraBytesToRead = trailingBytesForUTF8[*source];
562  if (source + extraBytesToRead >= sourceEnd)
563  {
564  return UTF16;
565  }
566  // Do this check whether lenient or strict
567  if (!isLegalUTF8(source, extraBytesToRead + 1))
568  {
569  return UTF16;
570  }
571 
572  // The cases all fall through. See "Note A" below.
573  switch (extraBytesToRead)
574  {
575  case 5: ch += *source++; ch <<= 6; /* remember, illegal UTF-8 */
576  case 4: ch += *source++; ch <<= 6; /* remember, illegal UTF-8 */
577  case 3: ch += *source++; ch <<= 6;
578  case 2: ch += *source++; ch <<= 6;
579  case 1: ch += *source++; ch <<= 6;
580  case 0: ch += *source++;
581  }
582  ch -= offsetsFromUTF8[extraBytesToRead];
583 
584  if (ch <= UNI_MAX_BMP)
585  {
586  // Target is a character <= 0xFFFF
587  if ((ch >= UNI_SUR_HIGH_START) && (ch <= UNI_SUR_LOW_END))
588  {
589  // UTF-16 surrogate values are illegal in UTF-32
590  ch = ' ';
591  }
592  unsigned short v = htons(static_cast<unsigned short>(ch));
593  UTF16.push_back(static_cast<char16_t>(v));
594  }
595  else if (ch > UNI_MAX_UTF16)
596  {
597  // Invalid value, replace with a space
598  unsigned short v = htons(' ');
599  UTF16.push_back(static_cast<char16_t>(v));
600  }
601  else
602  {
603  // target is a character in range 0xFFFF - 0x10FFFF.
604  ch -= halfBase;
605  auto v1 = htons(static_cast<uint16_t>((ch >> halfShift) + UNI_SUR_HIGH_START));
606  auto v2 = htons(static_cast<uint16_t>((ch & halfMask) + UNI_SUR_LOW_START));
607  UTF16.push_back(static_cast<char16_t>(v1));
608  UTF16.push_back(static_cast<char16_t>(v2));
609  }
610  }
611  return UTF16;
612 }
613 
614 /*
615 ---------------------------------------------------------------------
616 Note A.
617 The fall-through switches in UTF-8 reading code save a
618 temp variable, some decrements & conditionals. The switches
619 are equivalent to the following loop:
620 {
621  int tmpBytesToRead = extraBytesToRead + 1;
622  do
623  {
624  ch += *source++;
625  --tmpBytesToRead;
626  if (tmpBytesToRead)
627  {
628  ch <<= 6;
629  }
630  } while (tmpBytesToRead > 0);
631 }
632 ---------------------------------------------------------------------
633 */
634 
636 // End of Unicode, Inc.'s code / information
638 #ifdef __GNUC__
639 #pragma GCC diagnostic pop
640 #endif
641 
642 
643 
644 
645 
646 #define HEX(x) static_cast<char>((x) > 9 ? (x) + 'A' - 10 : (x) + '0')
647 
652 AString & CreateHexDump(AString & a_Out, const void * a_Data, size_t a_Size, size_t a_BytesPerLine)
653 {
654  fmt::memory_buffer Output;
655  /* If formatting the data from the comment above:
656  Hex holds: "31 32 33 34 35 36 37 38 39 30 61 62 63 64 65 66 "
657  Chars holds: "1234567890abcdef" */
658  fmt::memory_buffer Hex, Chars;
659 
660  if (a_Size > 0)
661  {
662  // Same as std::ceil(static_cast<float>(a_Size) / a_BytesPerLine);
663  const size_t NumLines = a_Size / a_BytesPerLine + (a_Size % a_BytesPerLine != 0);
664  const size_t CharsPerLine = 14 + 4 * a_BytesPerLine;
665  Output.reserve(NumLines * CharsPerLine);
666  }
667 
668  for (size_t i = 0; i < a_Size; i += a_BytesPerLine)
669  {
670  size_t k = std::min(a_Size - i, a_BytesPerLine);
671  for (size_t j = 0; j < k; j++)
672  {
673  Byte c = (static_cast<const Byte *>(a_Data))[i + j];
674  Hex.push_back(HEX(c >> 4));
675  Hex.push_back(HEX(c & 0xf));
676  Hex.push_back(' ');
677  Chars.push_back((c >= ' ') ? static_cast<char>(c) : '.');
678  } // for j
679 
680  // Write Hex with a dynamic fixed width
681  auto HexStr = fmt::string_view(Hex.data(), Hex.size());
682  auto CharsStr = fmt::string_view(Chars.data(), Chars.size());
683  fmt::format_to(
684  Output, "{0:08x}: {1:{2}} {3}\n",
685  i, HexStr, a_BytesPerLine * 3, CharsStr
686  );
687 
688  Hex.clear();
689  Chars.clear();
690  } // for i
691  a_Out.append(Output.data(), Output.size());
692  return a_Out;
693 }
694 
695 
696 
697 
698 
699 AString EscapeString(const AString & a_Message)
700 {
701  AString EscapedMsg;
702  size_t len = a_Message.size();
703  size_t last = 0;
704  EscapedMsg.reserve(len);
705  for (size_t i = 0; i < len; i++)
706  {
707  char ch = a_Message[i];
708  switch (ch)
709  {
710  case '\'':
711  case '\"':
712  case '\\':
713  {
714  if (i > last)
715  {
716  EscapedMsg.append(a_Message, last, i - last);
717  }
718  EscapedMsg.push_back('\\');
719  EscapedMsg.push_back(ch);
720  last = i + 1;
721  break;
722  }
723  } // switch (ch)
724  } // for i - a_Message[]
725  if (len > last)
726  {
727  EscapedMsg.append(a_Message, last, len - last);
728  }
729  return EscapedMsg;
730 }
731 
732 
733 
734 
735 
736 AString StripColorCodes(const AString & a_Message)
737 {
738  AString res(a_Message);
739  size_t idx = 0;
740  for (;;)
741  {
742  idx = res.find("\xc2\xa7", idx);
743  if (idx == AString::npos)
744  {
745  return res;
746  }
747  res.erase(idx, 3);
748  }
749 }
750 
751 
752 
753 
754 
755 std::pair<bool, AString> URLDecode(const AString & a_Text)
756 {
757  AString res;
758  auto len = a_Text.size();
759  res.reserve(len);
760  for (size_t i = 0; i < len; i++)
761  {
762  if (a_Text[i] == '+')
763  {
764  res.push_back(' ');
765  continue;
766  }
767  if (a_Text[i] != '%')
768  {
769  res.push_back(a_Text[i]);
770  continue;
771  }
772  if (i + 1 >= len)
773  {
774  // String too short for an encoded value
775  return std::make_pair(false, AString());
776  }
777  if ((a_Text[i + 1] == 'u') || (a_Text[i + 1] == 'U'))
778  {
779  // Unicode char "%u0xxxx"
780  if (i + 6 >= len)
781  {
782  return std::make_pair(false, AString());
783  }
784  if (a_Text[i + 2] != '0')
785  {
786  return std::make_pair(false, AString());
787  }
788  unsigned v1 = HexToDec(a_Text[i + 3]);
789  unsigned v2 = HexToDec(a_Text[i + 4]);
790  unsigned v3 = HexToDec(a_Text[i + 5]);
791  unsigned v4 = HexToDec(a_Text[i + 6]);
792  if ((v1 == 0xff) || (v2 == 0xff) || (v4 == 0xff) || (v3 == 0xff))
793  {
794  // Invalid hex numbers
795  return std::make_pair(false, AString());
796  }
797  res.append(UnicodeCharToUtf8((v1 << 12) | (v2 << 8) | (v3 << 4) | v4));
798  i = i + 6;
799  }
800  else
801  {
802  // Regular char "%xx":
803  if (i + 2 >= len)
804  {
805  return std::make_pair(false, AString());
806  }
807  auto v1 = HexToDec(a_Text[i + 1]);
808  auto v2 = HexToDec(a_Text[i + 2]);
809  if ((v1 == 0xff) || (v2 == 0xff))
810  {
811  // Invalid hex numbers
812  return std::make_pair(false, AString());
813  }
814  res.push_back(static_cast<char>((v1 << 4) | v2));
815  i = i + 2;
816  }
817  } // for i - a_Text[i]
818  return std::make_pair(true, res);
819 }
820 
821 
822 
823 
824 
825 AString URLEncode(const AString & a_Text)
826 {
827  AString res;
828  auto len = a_Text.size();
829  res.reserve(len);
830  static const char HEX[] = "0123456789ABCDEF";
831  for (size_t i = 0; i < len; ++i)
832  {
833  if (isalnum(a_Text[i]))
834  {
835  res.push_back(a_Text[i]);
836  }
837  else if (a_Text[i] == ' ')
838  {
839  res.push_back('+');
840  }
841  else
842  {
843  res.push_back('%');
844  res.push_back(HEX[static_cast<unsigned char>(a_Text[i]) >> 4]);
845  res.push_back(HEX[static_cast<unsigned char>(a_Text[i]) & 0x0f]);
846  }
847  }
848  return res;
849 }
850 
851 
852 
853 
854 
855 AString ReplaceAllCharOccurrences(const AString & a_String, char a_From, char a_To)
856 {
857  AString res(a_String);
858  std::replace(res.begin(), res.end(), a_From, a_To);
859  return res;
860 }
861 
862 
863 
864 
865 
867 static inline int UnBase64(char c)
868 {
869  if ((c >='A') && (c <= 'Z'))
870  {
871  return c - 'A';
872  }
873  if ((c >='a') && (c <= 'z'))
874  {
875  return c - 'a' + 26;
876  }
877  if ((c >= '0') && (c <= '9'))
878  {
879  return c - '0' + 52;
880  }
881  if (c == '+')
882  {
883  return 62;
884  }
885  if (c == '/')
886  {
887  return 63;
888  }
889  if (c == '=')
890  {
891  return -1;
892  }
893  return -2;
894 }
895 
896 
897 
898 
899 
900 AString Base64Decode(const AString & a_Base64String)
901 {
902  AString res;
903  size_t i, len = a_Base64String.size();
904  size_t o;
905  int c;
906  res.resize((len * 4) / 3 + 5, 0); // Approximate the upper bound on the result length
907  for (o = 0, i = 0; i < len; i++)
908  {
909  c = UnBase64(a_Base64String[i]);
910  if (c >= 0)
911  {
912  switch (o & 7)
913  {
914  case 0: res[o >> 3] |= (c << 2); break;
915  case 6: res[o >> 3] |= (c >> 4); res[(o >> 3) + 1] |= (c << 4); break;
916  case 4: res[o >> 3] |= (c >> 2); res[(o >> 3) + 1] |= (c << 6); break;
917  case 2: res[o >> 3] |= c; break;
918  }
919  o += 6;
920  }
921  if (c == -1)
922  {
923  // Error while decoding, invalid input. Return as much as we've decoded:
924  res.resize(o >> 3);
925  return res;
926  }
927  }
928  res.resize(o >> 3);
929  return res;
930 }
931 
932 
933 
934 
935 
936 AString Base64Encode(const AString & a_Input)
937 {
938  static const char BASE64[64] =
939  {
940  'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
941  'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
942  'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
943  'w', 'x', 'y', 'z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/'
944  };
945 
946  AString output;
947  output.resize(((a_Input.size() + 2) / 3) * 4);
948 
949  size_t output_index = 0;
950  size_t size_full24 = (a_Input.size() / 3) * 3;
951 
952  for (size_t i = 0; i < size_full24; i += 3)
953  {
954  output[output_index++] = BASE64[static_cast<unsigned char>(a_Input[i]) >> 2];
955  output[output_index++] = BASE64[(static_cast<unsigned char>(a_Input[i]) << 4 | static_cast<unsigned char>(a_Input[i + 1]) >> 4) & 63];
956  output[output_index++] = BASE64[(static_cast<unsigned char>(a_Input[i + 1]) << 2 | static_cast<unsigned char>(a_Input[i + 2]) >> 6) & 63];
957  output[output_index++] = BASE64[static_cast<unsigned char>(a_Input[i + 2]) & 63];
958  }
959 
960  if (size_full24 < a_Input.size())
961  {
962  output[output_index++] = BASE64[static_cast<unsigned char>(a_Input[size_full24]) >> 2];
963  if (size_full24 + 1 == a_Input.size())
964  {
965  output[output_index++] = BASE64[(static_cast<unsigned char>(a_Input[size_full24]) << 4) & 63];
966  output[output_index++] = '=';
967  }
968  else
969  {
970  output[output_index++] = BASE64[(static_cast<unsigned char>(a_Input[size_full24]) << 4 | static_cast<unsigned char>(a_Input[size_full24 + 1]) >> 4) & 63];
971  output[output_index++] = BASE64[(static_cast<unsigned char>(a_Input[size_full24 + 1]) << 2) & 63];
972  }
973 
974  output[output_index++] = '=';
975  }
976  ASSERT(output_index == output.size());
977 
978  return output;
979 }
980 
981 
982 
983 
984 
985 short GetBEShort(const std::byte * const a_Mem)
986 {
987  return static_cast<short>(
988  (static_cast<short>(a_Mem[0]) << 8) |
989  static_cast<short>(a_Mem[1])
990  );
991 }
992 
993 
994 
995 
996 
997 unsigned short GetBEUShort(const char * a_Mem)
998 {
999  const Byte * Bytes = reinterpret_cast<const Byte *>(a_Mem);
1000  return static_cast<unsigned short>((Bytes[0] << 8) | Bytes[1]);
1001 }
1002 
1003 
1004 
1005 
1006 
1007 int GetBEInt(const std::byte * const a_Mem)
1008 {
1009  return
1010  (static_cast<int>(a_Mem[0]) << 24) |
1011  (static_cast<int>(a_Mem[1]) << 16) |
1012  (static_cast<int>(a_Mem[2]) << 8) |
1013  static_cast<int>(a_Mem[3])
1014  ;
1015 }
1016 
1017 
1018 
1019 
1020 
1021 void SetBEInt(std::byte * a_Mem, Int32 a_Value)
1022 {
1023  a_Mem[0] = std::byte(a_Value >> 24);
1024  a_Mem[1] = std::byte((a_Value >> 16) & 0xff);
1025  a_Mem[2] = std::byte((a_Value >> 8) & 0xff);
1026  a_Mem[3] = std::byte(a_Value & 0xff);
1027 }
1028 
1029 
1030 
1031 
1032 
1033 bool SplitZeroTerminatedStrings(const AString & a_Strings, AStringVector & a_Output)
1034 {
1035  a_Output.clear();
1036  size_t size = a_Strings.size();
1037  size_t start = 0;
1038  bool res = false;
1039  for (size_t i = 0; i < size; i++)
1040  {
1041  if (a_Strings[i] == 0)
1042  {
1043  a_Output.push_back(a_Strings.substr(start, i - start));
1044  start = i + 1;
1045  res = true;
1046  }
1047  }
1048  if (start < size)
1049  {
1050  a_Output.push_back(a_Strings.substr(start, size - start));
1051  res = true;
1052  }
1053 
1054  return res;
1055 }
1056 
1057 
1058 
1059 
1060 
1061 AStringVector MergeStringVectors(const AStringVector & a_Strings1, const AStringVector & a_Strings2)
1062 {
1063  // Initialize the resulting vector by the first vector:
1064  AStringVector res = a_Strings1;
1065 
1066  // Add each item from strings2 that is not already present:
1067  for (const auto & item : a_Strings2)
1068  {
1069  if (std::find(res.begin(), res.end(), item) == res.end())
1070  {
1071  res.push_back(item);
1072  }
1073  } // for item - a_Strings2[]
1074 
1075  return res;
1076 }
1077 
1078 
1079 
1080 
1081 
1082 AString StringsConcat(const AStringVector & a_Strings, char a_Separator)
1083 {
1084  // If the vector is empty, return an empty string:
1085  if (a_Strings.empty())
1086  {
1087  return "";
1088  }
1089 
1090  // Concatenate the strings in the vector:
1091  AString res;
1092  res.append(a_Strings[0]);
1093  for (auto itr = a_Strings.cbegin() + 1, end = a_Strings.cend(); itr != end; ++itr)
1094  {
1095  res.push_back(a_Separator);
1096  res.append(*itr);
1097  }
1098  return res;
1099 }
1100 
1101 
1102 
1103 
1104 
1105 bool StringToFloat(const AString & a_String, float & a_Num)
1106 {
1107  char *err;
1108  a_Num = strtof(a_String.c_str(), &err);
1109  return (*err == 0);
1110 }
1111 
1112 
1113 
1114 
1115 
1116 bool IsOnlyWhitespace(const AString & a_String)
1117 {
1118  return std::all_of(a_String.cbegin(), a_String.cend(), isspace);
1119 }
StringSplitWithQuotes
AStringVector StringSplitWithQuotes(const AString &str, const AString &delim)
Split the string at any of the listed delimiters.
Definition: StringUtils.cpp:112
RawBEToUTF8
AString & RawBEToUTF8(const char *a_RawData, size_t a_NumShorts, AString &a_UTF8)
Converts a stream of BE shorts into UTF-8 string; returns a_UTF8.
Definition: StringUtils.cpp:378
TrimString
AString TrimString(const AString &str)
Trims whitespace at both ends of the string.
Definition: StringUtils.cpp:226
GetBEInt
int GetBEInt(const std::byte *const a_Mem)
Reads four bytes from the specified memory location and interprets them as BigEndian int.
Definition: StringUtils.cpp:1007
UNI_SUR_HIGH_START
#define UNI_SUR_HIGH_START
Definition: StringUtils.cpp:478
UnBase64
static int UnBase64(char c)
Converts one Hex character in a Base64 encoding into the data value.
Definition: StringUtils.cpp:867
CreateHexDump
AString & CreateHexDump(AString &a_Out, const void *a_Data, size_t a_Size, size_t a_BytesPerLine)
format binary data this way: 00001234: 31 32 33 34 35 36 37 38 39 30 61 62 63 64 65 66 1234567890abcd...
Definition: StringUtils.cpp:652
URLDecode
std::pair< bool, AString > URLDecode(const AString &a_Text)
URL-Decodes the given string.
Definition: StringUtils.cpp:755
InPlaceUppercase
AString & InPlaceUppercase(AString &s)
In-place string conversion to uppercase.
Definition: StringUtils.cpp:270
StrToLower
AString StrToLower(const AString &s)
Returns a lower-cased copy of the string.
Definition: StringUtils.cpp:280
MergeStringVectors
AStringVector MergeStringVectors(const AStringVector &a_Strings1, const AStringVector &a_Strings2)
Merges the two vectors of strings, removing duplicate entries from the second vector.
Definition: StringUtils.cpp:1061
StrToUpper
AString StrToUpper(const AString &s)
Returns an upper-cased copy of the string.
Definition: StringUtils.cpp:292
StringsConcat
AString StringsConcat(const AStringVector &a_Strings, char a_Separator)
Concatenates the specified strings into a single string, separated by the specified separator charact...
Definition: StringUtils.cpp:1082
Globals.h
NoCaseCompare
int NoCaseCompare(const AString &s1, const AString &s2)
Case-insensitive string comparison.
Definition: StringUtils.cpp:304
StringToFloat
bool StringToFloat(const AString &a_String, float &a_Num)
Converts a string into a float.
Definition: StringUtils.cpp:1105
vPrintf
AString & vPrintf(AString &a_String, const char *a_Format, fmt::printf_args a_ArgList)
Output the formatted text into the string.
Definition: StringUtils.cpp:55
ASSERT
#define ASSERT(x)
Definition: Globals.h:273
UNI_MAX_BMP
#define UNI_MAX_BMP
Definition: StringUtils.cpp:476
StringSplit
AStringVector StringSplit(const AString &str, const AString &delim)
Split the string at any of the listed delimiters.
Definition: StringUtils.cpp:91
isLegalUTF8
static bool isLegalUTF8(const unsigned char *source, int length)
Definition: StringUtils.cpp:512
Base64Decode
AString Base64Decode(const AString &a_Base64String)
Decodes a Base64-encoded string into the raw data.
Definition: StringUtils.cpp:900
GetBEShort
short GetBEShort(const std::byte *const a_Mem)
Reads two bytes from the specified memory location and interprets them as BigEndian short.
Definition: StringUtils.cpp:985
UNI_SUR_LOW_END
#define UNI_SUR_LOW_END
Definition: StringUtils.cpp:480
ReplaceString
void ReplaceString(AString &iHayStack, const AString &iNeedle, const AString &iReplaceWith)
Replaces each occurence of iNeedle in iHayStack with iReplaceWith.
Definition: StringUtils.cpp:348
HEX
#define HEX(x)
Definition: StringUtils.cpp:646
StringSplitAndTrim
AStringVector StringSplitAndTrim(const AString &str, const AString &delim)
Split the string at any of the listed delimiters and trim each value.
Definition: StringUtils.cpp:205
UnicodeCharToUtf8
AString UnicodeCharToUtf8(unsigned a_UnicodeChar)
Converts a unicode character to its UTF8 representation.
Definition: StringUtils.cpp:393
vAppendPrintf
AString & vAppendPrintf(AString &a_String, const char *a_Format, fmt::printf_args a_ArgList)
Add the formated string to the existing data in the string.
Definition: StringUtils.cpp:78
HexToDec
static unsigned char HexToDec(char a_HexChar)
Returns the value of the single hex digit.
Definition: StringUtils.cpp:21
SetBEInt
void SetBEInt(std::byte *a_Mem, Int32 a_Value)
Writes four bytes to the specified memory location so that they interpret as BigEndian int.
Definition: StringUtils.cpp:1021
URLEncode
AString URLEncode(const AString &a_Text)
URL-encodes the given string.
Definition: StringUtils.cpp:825
Int32
signed int Int32
Definition: Globals.h:149
StringJoin
AString StringJoin(const AStringVector &a_Strings, const AString &a_Delimeter)
Join a list of strings with the given delimiter between entries.
Definition: StringUtils.cpp:169
RateCompareString
size_t RateCompareString(const AString &s1, const AString &s2)
Case-insensitive string comparison that returns a rating of equal-ness between [0 - s1....
Definition: StringUtils.cpp:317
EscapeString
AString EscapeString(const AString &a_Message)
Returns a copy of a_Message with all quotes and backslashes escaped by a backslash.
Definition: StringUtils.cpp:699
UTF8ToRawBEUTF16
std::u16string UTF8ToRawBEUTF16(const AString &a_UTF8)
Converts a UTF-8 string into a UTF-16 BE string.
Definition: StringUtils.cpp:547
Base64Encode
AString Base64Encode(const AString &a_Input)
Encodes a string into Base64.
Definition: StringUtils.cpp:936
Byte
unsigned char Byte
Definition: Globals.h:158
ReplaceAllCharOccurrences
AString ReplaceAllCharOccurrences(const AString &a_String, char a_From, char a_To)
Replaces all occurrences of char a_From inside a_String with char a_To.
Definition: StringUtils.cpp:855
trailingBytesForUTF8
static const Byte trailingBytesForUTF8[256]
Definition: StringUtils.cpp:486
InPlaceLowercase
AString & InPlaceLowercase(AString &s)
In-place string conversion to lowercase.
Definition: StringUtils.cpp:260
SplitZeroTerminatedStrings
bool SplitZeroTerminatedStrings(const AString &a_Strings, AStringVector &a_Output)
Splits a string that has embedded \0 characters, on those characters.
Definition: StringUtils.cpp:1033
offsetsFromUTF8
static const unsigned int offsetsFromUTF8[6]
Definition: StringUtils.cpp:502
StripColorCodes
AString StripColorCodes(const AString &a_Message)
Removes all control codes used by MC for colors and styles.
Definition: StringUtils.cpp:736
UNI_MAX_UTF16
#define UNI_MAX_UTF16
Definition: StringUtils.cpp:477
GetBEUShort
unsigned short GetBEUShort(const char *a_Mem)
Reads two bytes from the specified memory location and interprets them as BigEndian unsigned short.
Definition: StringUtils.cpp:997
ReplaceURL
void ReplaceURL(AString &iHayStack, const AString &iNeedle, const AString &iReplaceWith)
Replaces each occurence of iNeedle in iHayStack with iReplaceWith, after URL-encoding iReplaceWith.
Definition: StringUtils.cpp:368
IsOnlyWhitespace
bool IsOnlyWhitespace(const AString &a_String)
Returns true if only whitespace characters are present in the string.
Definition: StringUtils.cpp:1116
AString
std::string AString
Definition: StringUtils.h:11
UNI_SUR_LOW_START
#define UNI_SUR_LOW_START
Definition: StringUtils.cpp:479
AStringVector
std::vector< AString > AStringVector
Definition: StringUtils.h:12