OpenTTD Source  20240917-master-g9ab0a47812
string_func.h
Go to the documentation of this file.
1 /*
2  * This file is part of OpenTTD.
3  * OpenTTD is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, version 2.
4  * OpenTTD is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
5  * See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with OpenTTD. If not, see <http://www.gnu.org/licenses/>.
6  */
7 
12 #ifndef STRING_FUNC_H
13 #define STRING_FUNC_H
14 
15 #include <iosfwd>
16 
17 #include "core/bitmath_func.hpp"
18 #include "string_type.h"
19 
20 void strecpy(std::span<char> dst, std::string_view src);
21 
22 std::string FormatArrayAsHex(std::span<const uint8_t> data);
23 
24 void StrMakeValidInPlace(char *str, const char *last, StringValidationSettings settings = SVS_REPLACE_WITH_QUESTION_MARK) NOACCESS(2);
25 [[nodiscard]] std::string StrMakeValid(std::string_view str, StringValidationSettings settings = SVS_REPLACE_WITH_QUESTION_MARK);
27 
28 bool strtolower(std::string &str, std::string::size_type offs = 0);
29 
30 [[nodiscard]] bool StrValid(std::span<const char> str);
31 void StrTrimInPlace(std::string &str);
32 std::string_view StrTrimView(std::string_view str);
33 
34 [[nodiscard]] bool StrStartsWithIgnoreCase(std::string_view str, const std::string_view prefix);
35 [[nodiscard]] bool StrEndsWithIgnoreCase(std::string_view str, const std::string_view suffix);
36 
37 [[nodiscard]] int StrCompareIgnoreCase(const std::string_view str1, const std::string_view str2);
38 [[nodiscard]] bool StrEqualsIgnoreCase(const std::string_view str1, const std::string_view str2);
39 [[nodiscard]] int StrNaturalCompare(std::string_view s1, std::string_view s2, bool ignore_garbage_at_front = false);
40 [[nodiscard]] bool StrNaturalContains(const std::string_view str, const std::string_view value);
41 [[nodiscard]] bool StrNaturalContainsIgnoreCase(const std::string_view str, const std::string_view value);
42 
43 bool ConvertHexToBytes(std::string_view hex, std::span<uint8_t> bytes);
44 
47  bool operator()(const std::string_view s1, const std::string_view s2) const { return StrCompareIgnoreCase(s1, s2) < 0; }
48 };
49 
57 inline bool StrEmpty(const char *s)
58 {
59  return s == nullptr || s[0] == '\0';
60 }
61 
69 inline size_t ttd_strnlen(const char *str, size_t maxlen)
70 {
71  const char *t;
72  for (t = str; static_cast<size_t>(t - str) < maxlen && *t != '\0'; t++) {}
73  return t - str;
74 }
75 
76 bool IsValidChar(char32_t key, CharSetFilter afilter);
77 
78 size_t Utf8Decode(char32_t *c, const char *s);
79 size_t Utf8Encode(char *buf, char32_t c);
80 size_t Utf8Encode(std::ostreambuf_iterator<char> &buf, char32_t c);
81 size_t Utf8Encode(std::back_insert_iterator<std::string> &buf, char32_t c);
82 size_t Utf8TrimString(char *s, size_t maxlen);
83 
84 
85 inline char32_t Utf8Consume(const char **s)
86 {
87  char32_t c;
88  *s += Utf8Decode(&c, *s);
89  return c;
90 }
91 
92 template <class Titr>
93 inline char32_t Utf8Consume(Titr &s)
94 {
95  char32_t c;
96  s += Utf8Decode(&c, &*s);
97  return c;
98 }
99 
105 inline int8_t Utf8CharLen(char32_t c)
106 {
107  if (c < 0x80) return 1;
108  if (c < 0x800) return 2;
109  if (c < 0x10000) return 3;
110  if (c < 0x110000) return 4;
111 
112  /* Invalid valid, we encode as a '?' */
113  return 1;
114 }
115 
116 
124 inline int8_t Utf8EncodedCharLen(char c)
125 {
126  if (GB(c, 3, 5) == 0x1E) return 4;
127  if (GB(c, 4, 4) == 0x0E) return 3;
128  if (GB(c, 5, 3) == 0x06) return 2;
129  if (GB(c, 7, 1) == 0x00) return 1;
130 
131  /* Invalid UTF8 start encoding */
132  return 0;
133 }
134 
135 
136 /* Check if the given character is part of a UTF8 sequence */
137 inline bool IsUtf8Part(char c)
138 {
139  return GB(c, 6, 2) == 2;
140 }
141 
149 inline char *Utf8PrevChar(char *s)
150 {
151  char *ret = s;
152  while (IsUtf8Part(*--ret)) {}
153  return ret;
154 }
155 
156 inline const char *Utf8PrevChar(const char *s)
157 {
158  const char *ret = s;
159  while (IsUtf8Part(*--ret)) {}
160  return ret;
161 }
162 
163 size_t Utf8StringLength(const char *s);
164 size_t Utf8StringLength(const std::string &str);
165 
171 inline bool Utf16IsLeadSurrogate(uint c)
172 {
173  return c >= 0xD800 && c <= 0xDBFF;
174 }
175 
181 inline bool Utf16IsTrailSurrogate(uint c)
182 {
183  return c >= 0xDC00 && c <= 0xDFFF;
184 }
185 
192 inline char32_t Utf16DecodeSurrogate(uint lead, uint trail)
193 {
194  return 0x10000 + (((lead - 0xD800) << 10) | (trail - 0xDC00));
195 }
196 
202 inline char32_t Utf16DecodeChar(const uint16_t *c)
203 {
204  if (Utf16IsLeadSurrogate(c[0])) {
205  return Utf16DecodeSurrogate(c[0], c[1]);
206  } else {
207  return *c;
208  }
209 }
210 
217 inline bool IsTextDirectionChar(char32_t c)
218 {
219  switch (c) {
220  case CHAR_TD_LRM:
221  case CHAR_TD_RLM:
222  case CHAR_TD_LRE:
223  case CHAR_TD_RLE:
224  case CHAR_TD_LRO:
225  case CHAR_TD_RLO:
226  case CHAR_TD_PDF:
227  return true;
228 
229  default:
230  return false;
231  }
232 }
233 
234 inline bool IsPrintable(char32_t c)
235 {
236  if (c < 0x20) return false;
237  if (c < 0xE000) return true;
238  if (c < 0xE200) return false;
239  return true;
240 }
241 
249 inline bool IsWhitespace(char32_t c)
250 {
251  return c == 0x0020 /* SPACE */ || c == 0x3000; /* IDEOGRAPHIC SPACE */
252 }
253 
254 /* Needed for NetBSD version (so feature) testing */
255 #if defined(__NetBSD__) || defined(__FreeBSD__)
256 #include <sys/param.h>
257 #endif
258 
259 /* strcasestr is available for _GNU_SOURCE, BSD and some Apple */
260 #if defined(_GNU_SOURCE) || (defined(__BSD_VISIBLE) && __BSD_VISIBLE) || (defined(__APPLE__) && (!defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE))) || defined(_NETBSD_SOURCE)
261 # undef DEFINE_STRCASESTR
262 #else
263 # define DEFINE_STRCASESTR
264 char *strcasestr(const char *haystack, const char *needle);
265 #endif /* strcasestr is available */
266 
267 #endif /* STRING_FUNC_H */
IsValidChar
bool IsValidChar(char32_t key, CharSetFilter afilter)
Only allow certain keys.
Definition: string.cpp:396
StrMakeValid
std::string StrMakeValid(std::string_view str, StringValidationSettings settings=SVS_REPLACE_WITH_QUESTION_MARK)
Copies the valid (UTF-8) characters from str to the returned string.
Definition: string.cpp:205
Utf16DecodeSurrogate
char32_t Utf16DecodeSurrogate(uint lead, uint trail)
Convert an UTF-16 surrogate pair to the corresponding Unicode character.
Definition: string_func.h:192
StrTrimInPlace
void StrTrimInPlace(std::string &str)
Trim the spaces from given string in place, i.e.
Definition: string.cpp:260
Utf8CharLen
int8_t Utf8CharLen(char32_t c)
Return the length of a UTF-8 encoded character.
Definition: string_func.h:105
GB
constexpr static debug_inline uint GB(const T x, const uint8_t s, const uint8_t n)
Fetch n bits from x, started at bit s.
Definition: bitmath_func.hpp:32
Utf8PrevChar
char * Utf8PrevChar(char *s)
Retrieve the previous UNICODE character in an UTF-8 encoded string.
Definition: string_func.h:149
StrMakeValidInPlace
void StrMakeValidInPlace(char *str, const char *last, StringValidationSettings settings=SVS_REPLACE_WITH_QUESTION_MARK) NOACCESS(2)
Scans the string for invalid characters and replaces then with a question mark '?' (if not ignored).
Definition: string.cpp:178
CHAR_TD_RLO
static const char32_t CHAR_TD_RLO
Force the following characters to be treated as right-to-left characters.
Definition: string_type.h:40
Utf16DecodeChar
char32_t Utf16DecodeChar(const uint16_t *c)
Decode an UTF-16 character.
Definition: string_func.h:202
Utf16IsTrailSurrogate
bool Utf16IsTrailSurrogate(uint c)
Is the given character a lead surrogate code point?
Definition: string_func.h:181
StrNaturalCompare
int StrNaturalCompare(std::string_view s1, std::string_view s2, bool ignore_garbage_at_front=false)
Compares two strings using case insensitive natural sort.
Definition: string.cpp:566
ttd_strnlen
size_t ttd_strnlen(const char *str, size_t maxlen)
Get the length of a string, within a limited buffer.
Definition: string_func.h:69
Utf8TrimString
size_t Utf8TrimString(char *s, size_t maxlen)
Properly terminate an UTF8 string to some maximum length.
Definition: string.cpp:508
StrEmpty
bool StrEmpty(const char *s)
Check if a string buffer is empty.
Definition: string_func.h:57
StrValid
bool StrValid(std::span< const char > str)
Checks whether the given string is valid, i.e.
Definition: string.cpp:227
CHAR_TD_PDF
static const char32_t CHAR_TD_PDF
Restore the text-direction state to before the last LRE, RLE, LRO or RLO.
Definition: string_type.h:41
bitmath_func.hpp
StrEndsWithIgnoreCase
bool StrEndsWithIgnoreCase(std::string_view str, const std::string_view suffix)
Check whether the given string ends with the given suffix, ignoring case.
Definition: string.cpp:321
StrStartsWithIgnoreCase
bool StrStartsWithIgnoreCase(std::string_view str, const std::string_view prefix)
Check whether the given string starts with the given prefix, ignoring case.
Definition: string.cpp:281
Utf8StringLength
size_t Utf8StringLength(const char *s)
Get the length of an UTF-8 encoded string in number of characters and thus not the number of bytes th...
Definition: string.cpp:359
IsTextDirectionChar
bool IsTextDirectionChar(char32_t c)
Is the given character a text direction character.
Definition: string_func.h:217
CHAR_TD_RLE
static const char32_t CHAR_TD_RLE
The following text is embedded right-to-left.
Definition: string_type.h:38
CHAR_TD_LRO
static const char32_t CHAR_TD_LRO
Force the following characters to be treated as left-to-right characters.
Definition: string_type.h:39
settings
fluid_settings_t * settings
FluidSynth settings handle.
Definition: fluidsynth.cpp:21
CHAR_TD_LRE
static const char32_t CHAR_TD_LRE
The following text is embedded left-to-right.
Definition: string_type.h:37
string_type.h
CaseInsensitiveComparator
Case insensitive comparator for strings, for example for use in std::map.
Definition: string_func.h:46
StringValidationSettings
StringValidationSettings
Settings for the string validation.
Definition: string_type.h:44
StrNaturalContainsIgnoreCase
bool StrNaturalContainsIgnoreCase(const std::string_view str, const std::string_view value)
Checks if a string is contained in another string with a locale-aware comparison that is case insensi...
Definition: string.cpp:664
Utf16IsLeadSurrogate
bool Utf16IsLeadSurrogate(uint c)
Is the given character a lead surrogate code point?
Definition: string_func.h:171
Utf8EncodedCharLen
int8_t Utf8EncodedCharLen(char c)
Return the length of an UTF-8 encoded value based on a single char.
Definition: string_func.h:124
Utf8Decode
size_t Utf8Decode(char32_t *c, const char *s)
Decode and consume the next UTF-8 encoded character.
Definition: string.cpp:419
IsWhitespace
bool IsWhitespace(char32_t c)
Check whether UNICODE character is whitespace or not, i.e.
Definition: string_func.h:249
CHAR_TD_RLM
static const char32_t CHAR_TD_RLM
The next character acts like a right-to-left character.
Definition: string_type.h:36
StrNaturalContains
bool StrNaturalContains(const std::string_view str, const std::string_view value)
Checks if a string is contained in another string with a locale-aware comparison that is case sensiti...
Definition: string.cpp:637
Utf8Encode
size_t Utf8Encode(T buf, char32_t c)
Encode a unicode character and place it in the buffer.
Definition: string.cpp:460
strecpy
void strecpy(std::span< char > dst, std::string_view src)
Copies characters from one buffer to another.
Definition: string.cpp:60
FormatArrayAsHex
std::string FormatArrayAsHex(std::span< const uint8_t > data)
Format a byte array into a continuous hex string.
Definition: string.cpp:81
SVS_REPLACE_WITH_QUESTION_MARK
@ SVS_REPLACE_WITH_QUESTION_MARK
Replace the unknown/bad bits with question marks.
Definition: string_type.h:46
StrEqualsIgnoreCase
bool StrEqualsIgnoreCase(const std::string_view str1, const std::string_view str2)
Compares two string( view)s for equality, while ignoring the case of the characters.
Definition: string.cpp:347
StrCompareIgnoreCase
int StrCompareIgnoreCase(const std::string_view str1, const std::string_view str2)
Compares two string( view)s, while ignoring the case of the characters.
Definition: string.cpp:334
CharSetFilter
CharSetFilter
Valid filter types for IsValidChar.
Definition: string_type.h:24
ConvertHexToBytes
bool ConvertHexToBytes(std::string_view hex, std::span< uint8_t > bytes)
Convert a hex-string to a byte-array, while validating it was actually hex.
Definition: string.cpp:711
CHAR_TD_LRM
static const char32_t CHAR_TD_LRM
The next character acts like a left-to-right character.
Definition: string_type.h:35