OpenTTD Source 20250312-master-gcdcc6b491d
string_func.h
Go to the documentation of this file.
1/*
2 * This file is part of OpenTTD.
3 * OpenTTD is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, version 2.
4 * OpenTTD is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
5 * See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with OpenTTD. If not, see <http://www.gnu.org/licenses/>.
6 */
7
12#ifndef STRING_FUNC_H
13#define STRING_FUNC_H
14
15#include <iosfwd>
16
17#include "core/bitmath_func.hpp"
18#include "string_type.h"
19
20void strecpy(std::span<char> dst, std::string_view src);
21
22std::string FormatArrayAsHex(std::span<const uint8_t> data);
23
24void StrMakeValidInPlace(char *str, const char *last, StringValidationSettings settings = SVS_REPLACE_WITH_QUESTION_MARK) NOACCESS(2);
25[[nodiscard]] std::string StrMakeValid(std::string_view str, StringValidationSettings settings = SVS_REPLACE_WITH_QUESTION_MARK);
27
28bool strtolower(std::string &str, std::string::size_type offs = 0);
29
30[[nodiscard]] bool StrValid(std::span<const char> str);
31void StrTrimInPlace(std::string &str);
32std::string_view StrTrimView(std::string_view str);
33
34[[nodiscard]] bool StrStartsWithIgnoreCase(std::string_view str, const std::string_view prefix);
35[[nodiscard]] bool StrEndsWithIgnoreCase(std::string_view str, const std::string_view suffix);
36
37[[nodiscard]] int StrCompareIgnoreCase(const std::string_view str1, const std::string_view str2);
38[[nodiscard]] bool StrEqualsIgnoreCase(const std::string_view str1, const std::string_view str2);
39[[nodiscard]] int StrNaturalCompare(std::string_view s1, std::string_view s2, bool ignore_garbage_at_front = false);
40[[nodiscard]] bool StrNaturalContains(const std::string_view str, const std::string_view value);
41[[nodiscard]] bool StrNaturalContainsIgnoreCase(const std::string_view str, const std::string_view value);
42
43bool ConvertHexToBytes(std::string_view hex, std::span<uint8_t> bytes);
44
47 bool operator()(const std::string_view s1, const std::string_view s2) const { return StrCompareIgnoreCase(s1, s2) < 0; }
48};
49
57inline bool StrEmpty(const char *s)
58{
59 return s == nullptr || s[0] == '\0';
60}
61
69inline size_t ttd_strnlen(const char *str, size_t maxlen)
70{
71 const char *t;
72 for (t = str; static_cast<size_t>(t - str) < maxlen && *t != '\0'; t++) {}
73 return t - str;
74}
75
76bool IsValidChar(char32_t key, CharSetFilter afilter);
77
78size_t Utf8Decode(char32_t *c, const char *s);
79/* std::string_view::iterator might be char *, in which case we do not want this templated variant to be taken. */
80template <typename T> requires (!std::is_same_v<T, char *> && (std::is_same_v<std::string_view::iterator, T> || std::is_same_v<std::string::iterator, T>))
81inline size_t Utf8Decode(char32_t *c, T &s) { return Utf8Decode(c, &*s); }
82size_t Utf8Encode(char *buf, char32_t c);
83size_t Utf8Encode(std::ostreambuf_iterator<char> &buf, char32_t c);
84size_t Utf8Encode(std::back_insert_iterator<std::string> &buf, char32_t c);
85inline size_t Utf8Encode(std::string::iterator &s, char32_t c) { return Utf8Encode(&*s, c); }
86size_t Utf8TrimString(char *s, size_t maxlen);
87
88
89inline char32_t Utf8Consume(const char **s)
90{
91 char32_t c;
92 *s += Utf8Decode(&c, *s);
93 return c;
94}
95
96template <class Titr>
97inline char32_t Utf8Consume(Titr &s)
98{
99 char32_t c;
100 s += Utf8Decode(&c, &*s);
101 return c;
102}
103
109inline int8_t Utf8CharLen(char32_t c)
110{
111 if (c < 0x80) return 1;
112 if (c < 0x800) return 2;
113 if (c < 0x10000) return 3;
114 if (c < 0x110000) return 4;
115
116 /* Invalid valid, we encode as a '?' */
117 return 1;
118}
119
120
128inline int8_t Utf8EncodedCharLen(char c)
129{
130 if (GB(c, 3, 5) == 0x1E) return 4;
131 if (GB(c, 4, 4) == 0x0E) return 3;
132 if (GB(c, 5, 3) == 0x06) return 2;
133 if (GB(c, 7, 1) == 0x00) return 1;
134
135 /* Invalid UTF8 start encoding */
136 return 0;
137}
138
139
140/* Check if the given character is part of a UTF8 sequence */
141inline bool IsUtf8Part(char c)
142{
143 return GB(c, 6, 2) == 2;
144}
145
153inline char *Utf8PrevChar(char *s)
154{
155 char *ret = s;
156 while (IsUtf8Part(*--ret)) {}
157 return ret;
158}
159
160inline const char *Utf8PrevChar(const char *s)
161{
162 const char *ret = s;
163 while (IsUtf8Part(*--ret)) {}
164 return ret;
165}
166
167inline std::string::iterator Utf8PrevChar(std::string::iterator &s)
168{
169 auto cur = s;
170 do {
171 cur = std::prev(cur);
172 } while (IsUtf8Part(*cur));
173 return cur;
174}
175
176size_t Utf8StringLength(const char *s);
177size_t Utf8StringLength(const std::string &str);
178
184inline bool Utf16IsLeadSurrogate(uint c)
185{
186 return c >= 0xD800 && c <= 0xDBFF;
187}
188
194inline bool Utf16IsTrailSurrogate(uint c)
195{
196 return c >= 0xDC00 && c <= 0xDFFF;
197}
198
205inline char32_t Utf16DecodeSurrogate(uint lead, uint trail)
206{
207 return 0x10000 + (((lead - 0xD800) << 10) | (trail - 0xDC00));
208}
209
215inline char32_t Utf16DecodeChar(const uint16_t *c)
216{
217 if (Utf16IsLeadSurrogate(c[0])) {
218 return Utf16DecodeSurrogate(c[0], c[1]);
219 } else {
220 return *c;
221 }
222}
223
230inline bool IsTextDirectionChar(char32_t c)
231{
232 switch (c) {
233 case CHAR_TD_LRM:
234 case CHAR_TD_RLM:
235 case CHAR_TD_LRE:
236 case CHAR_TD_RLE:
237 case CHAR_TD_LRO:
238 case CHAR_TD_RLO:
239 case CHAR_TD_PDF:
240 return true;
241
242 default:
243 return false;
244 }
245}
246
247inline bool IsPrintable(char32_t c)
248{
249 if (c < 0x20) return false;
250 if (c < 0xE000) return true;
251 if (c < 0xE200) return false;
252 return true;
253}
254
262inline bool IsWhitespace(char32_t c)
263{
264 return c == 0x0020 /* SPACE */ || c == 0x3000; /* IDEOGRAPHIC SPACE */
265}
266
267/* Needed for NetBSD version (so feature) testing */
268#if defined(__NetBSD__) || defined(__FreeBSD__)
269#include <sys/param.h>
270#endif
271
272/* strcasestr is available for _GNU_SOURCE, BSD and some Apple */
273#if defined(_GNU_SOURCE) || (defined(__BSD_VISIBLE) && __BSD_VISIBLE) || (defined(__APPLE__) && (!defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE))) || defined(_NETBSD_SOURCE)
274# undef DEFINE_STRCASESTR
275#else
276# define DEFINE_STRCASESTR
277char *strcasestr(const char *haystack, const char *needle);
278#endif /* strcasestr is available */
279
280#endif /* STRING_FUNC_H */
Functions related to bit mathematics.
debug_inline static constexpr uint GB(const T x, const uint8_t s, const uint8_t n)
Fetch n bits from x, started at bit s.
fluid_settings_t * settings
FluidSynth settings handle.
size_t Utf8Encode(T buf, char32_t c)
Encode a unicode character and place it in the buffer.
Definition string.cpp:478
bool ConvertHexToBytes(std::string_view hex, std::span< uint8_t > bytes)
Convert a hex-string to a byte-array, while validating it was actually hex.
Definition string.cpp:752
char32_t Utf16DecodeSurrogate(uint lead, uint trail)
Convert an UTF-16 surrogate pair to the corresponding Unicode character.
char32_t Utf16DecodeChar(const uint16_t *c)
Decode an UTF-16 character.
bool StrEmpty(const char *s)
Check if a string buffer is empty.
Definition string_func.h:57
bool Utf16IsLeadSurrogate(uint c)
Is the given character a lead surrogate code point?
bool IsValidChar(char32_t key, CharSetFilter afilter)
Only allow certain keys.
Definition string.cpp:414
bool StrEqualsIgnoreCase(const std::string_view str1, const std::string_view str2)
Compares two string( view)s for equality, while ignoring the case of the characters.
Definition string.cpp:365
bool IsWhitespace(char32_t c)
Check whether UNICODE character is whitespace or not, i.e.
bool StrNaturalContains(const std::string_view str, const std::string_view value)
Checks if a string is contained in another string with a locale-aware comparison that is case sensiti...
Definition string.cpp:678
int8_t Utf8EncodedCharLen(char c)
Return the length of an UTF-8 encoded value based on a single char.
void strecpy(std::span< char > dst, std::string_view src)
Copies characters from one buffer to another.
Definition string.cpp:59
std::string FormatArrayAsHex(std::span< const uint8_t > data)
Format a byte array into a continuous hex string.
Definition string.cpp:80
size_t Utf8StringLength(const char *s)
Get the length of an UTF-8 encoded string in number of characters and thus not the number of bytes th...
Definition string.cpp:377
bool StrStartsWithIgnoreCase(std::string_view str, const std::string_view prefix)
Check whether the given string starts with the given prefix, ignoring case.
Definition string.cpp:299
bool StrValid(std::span< const char > str)
Checks whether the given string is valid, i.e.
Definition string.cpp:245
size_t Utf8Decode(char32_t *c, const char *s)
Decode and consume the next UTF-8 encoded character.
Definition string.cpp:437
std::string StrMakeValid(std::string_view str, StringValidationSettings settings=SVS_REPLACE_WITH_QUESTION_MARK)
Copies the valid (UTF-8) characters from str to the returned string.
Definition string.cpp:223
int8_t Utf8CharLen(char32_t c)
Return the length of a UTF-8 encoded character.
void StrMakeValidInPlace(char *str, const char *last, StringValidationSettings settings=SVS_REPLACE_WITH_QUESTION_MARK) NOACCESS(2)
Scans the string for invalid characters and replaces then with a question mark '?' (if not ignored).
Definition string.cpp:196
bool StrNaturalContainsIgnoreCase(const std::string_view str, const std::string_view value)
Checks if a string is contained in another string with a locale-aware comparison that is case insensi...
Definition string.cpp:705
bool Utf16IsTrailSurrogate(uint c)
Is the given character a lead surrogate code point?
int StrCompareIgnoreCase(const std::string_view str1, const std::string_view str2)
Compares two string( view)s, while ignoring the case of the characters.
Definition string.cpp:352
char * Utf8PrevChar(char *s)
Retrieve the previous UNICODE character in an UTF-8 encoded string.
bool IsTextDirectionChar(char32_t c)
Is the given character a text direction character.
bool StrEndsWithIgnoreCase(std::string_view str, const std::string_view suffix)
Check whether the given string ends with the given suffix, ignoring case.
Definition string.cpp:339
size_t Utf8TrimString(char *s, size_t maxlen)
Properly terminate an UTF8 string to some maximum length.
Definition string.cpp:526
void StrTrimInPlace(std::string &str)
Trim the spaces from given string in place, i.e.
Definition string.cpp:278
size_t ttd_strnlen(const char *str, size_t maxlen)
Get the length of a string, within a limited buffer.
Definition string_func.h:69
int StrNaturalCompare(std::string_view s1, std::string_view s2, bool ignore_garbage_at_front=false)
Compares two strings using case insensitive natural sort.
Definition string.cpp:607
Types for strings.
static const char32_t CHAR_TD_RLE
The following text is embedded right-to-left.
Definition string_type.h:38
static const char32_t CHAR_TD_LRO
Force the following characters to be treated as left-to-right characters.
Definition string_type.h:39
StringValidationSettings
Settings for the string validation.
Definition string_type.h:44
@ SVS_REPLACE_WITH_QUESTION_MARK
Replace the unknown/bad bits with question marks.
Definition string_type.h:46
static const char32_t CHAR_TD_LRM
The next character acts like a left-to-right character.
Definition string_type.h:35
CharSetFilter
Valid filter types for IsValidChar.
Definition string_type.h:24
static const char32_t CHAR_TD_RLO
Force the following characters to be treated as right-to-left characters.
Definition string_type.h:40
static const char32_t CHAR_TD_LRE
The following text is embedded left-to-right.
Definition string_type.h:37
static const char32_t CHAR_TD_RLM
The next character acts like a right-to-left character.
Definition string_type.h:36
static const char32_t CHAR_TD_PDF
Restore the text-direction state to before the last LRE, RLE, LRO or RLO.
Definition string_type.h:41
Case insensitive comparator for strings, for example for use in std::map.
Definition string_func.h:46