OpenTTD Source 20241224-master-gf74b0cf984
string_func.h
Go to the documentation of this file.
1/*
2 * This file is part of OpenTTD.
3 * OpenTTD is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, version 2.
4 * OpenTTD is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
5 * See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with OpenTTD. If not, see <http://www.gnu.org/licenses/>.
6 */
7
12#ifndef STRING_FUNC_H
13#define STRING_FUNC_H
14
15#include <iosfwd>
16
17#include "core/bitmath_func.hpp"
18#include "string_type.h"
19
20void strecpy(std::span<char> dst, std::string_view src);
21
22std::string FormatArrayAsHex(std::span<const uint8_t> data);
23
24void StrMakeValidInPlace(char *str, const char *last, StringValidationSettings settings = SVS_REPLACE_WITH_QUESTION_MARK) NOACCESS(2);
25[[nodiscard]] std::string StrMakeValid(std::string_view str, StringValidationSettings settings = SVS_REPLACE_WITH_QUESTION_MARK);
27
28bool strtolower(std::string &str, std::string::size_type offs = 0);
29
30[[nodiscard]] bool StrValid(std::span<const char> str);
31void StrTrimInPlace(std::string &str);
32std::string_view StrTrimView(std::string_view str);
33
34[[nodiscard]] bool StrStartsWithIgnoreCase(std::string_view str, const std::string_view prefix);
35[[nodiscard]] bool StrEndsWithIgnoreCase(std::string_view str, const std::string_view suffix);
36
37[[nodiscard]] int StrCompareIgnoreCase(const std::string_view str1, const std::string_view str2);
38[[nodiscard]] bool StrEqualsIgnoreCase(const std::string_view str1, const std::string_view str2);
39[[nodiscard]] int StrNaturalCompare(std::string_view s1, std::string_view s2, bool ignore_garbage_at_front = false);
40[[nodiscard]] bool StrNaturalContains(const std::string_view str, const std::string_view value);
41[[nodiscard]] bool StrNaturalContainsIgnoreCase(const std::string_view str, const std::string_view value);
42
43bool ConvertHexToBytes(std::string_view hex, std::span<uint8_t> bytes);
44
47 bool operator()(const std::string_view s1, const std::string_view s2) const { return StrCompareIgnoreCase(s1, s2) < 0; }
48};
49
57inline bool StrEmpty(const char *s)
58{
59 return s == nullptr || s[0] == '\0';
60}
61
69inline size_t ttd_strnlen(const char *str, size_t maxlen)
70{
71 const char *t;
72 for (t = str; static_cast<size_t>(t - str) < maxlen && *t != '\0'; t++) {}
73 return t - str;
74}
75
76bool IsValidChar(char32_t key, CharSetFilter afilter);
77
78size_t Utf8Decode(char32_t *c, const char *s);
79size_t Utf8Encode(char *buf, char32_t c);
80size_t Utf8Encode(std::ostreambuf_iterator<char> &buf, char32_t c);
81size_t Utf8Encode(std::back_insert_iterator<std::string> &buf, char32_t c);
82size_t Utf8TrimString(char *s, size_t maxlen);
83
84
85inline char32_t Utf8Consume(const char **s)
86{
87 char32_t c;
88 *s += Utf8Decode(&c, *s);
89 return c;
90}
91
92template <class Titr>
93inline char32_t Utf8Consume(Titr &s)
94{
95 char32_t c;
96 s += Utf8Decode(&c, &*s);
97 return c;
98}
99
105inline int8_t Utf8CharLen(char32_t c)
106{
107 if (c < 0x80) return 1;
108 if (c < 0x800) return 2;
109 if (c < 0x10000) return 3;
110 if (c < 0x110000) return 4;
111
112 /* Invalid valid, we encode as a '?' */
113 return 1;
114}
115
116
124inline int8_t Utf8EncodedCharLen(char c)
125{
126 if (GB(c, 3, 5) == 0x1E) return 4;
127 if (GB(c, 4, 4) == 0x0E) return 3;
128 if (GB(c, 5, 3) == 0x06) return 2;
129 if (GB(c, 7, 1) == 0x00) return 1;
130
131 /* Invalid UTF8 start encoding */
132 return 0;
133}
134
135
136/* Check if the given character is part of a UTF8 sequence */
137inline bool IsUtf8Part(char c)
138{
139 return GB(c, 6, 2) == 2;
140}
141
149inline char *Utf8PrevChar(char *s)
150{
151 char *ret = s;
152 while (IsUtf8Part(*--ret)) {}
153 return ret;
154}
155
156inline const char *Utf8PrevChar(const char *s)
157{
158 const char *ret = s;
159 while (IsUtf8Part(*--ret)) {}
160 return ret;
161}
162
163size_t Utf8StringLength(const char *s);
164size_t Utf8StringLength(const std::string &str);
165
171inline bool Utf16IsLeadSurrogate(uint c)
172{
173 return c >= 0xD800 && c <= 0xDBFF;
174}
175
181inline bool Utf16IsTrailSurrogate(uint c)
182{
183 return c >= 0xDC00 && c <= 0xDFFF;
184}
185
192inline char32_t Utf16DecodeSurrogate(uint lead, uint trail)
193{
194 return 0x10000 + (((lead - 0xD800) << 10) | (trail - 0xDC00));
195}
196
202inline char32_t Utf16DecodeChar(const uint16_t *c)
203{
204 if (Utf16IsLeadSurrogate(c[0])) {
205 return Utf16DecodeSurrogate(c[0], c[1]);
206 } else {
207 return *c;
208 }
209}
210
217inline bool IsTextDirectionChar(char32_t c)
218{
219 switch (c) {
220 case CHAR_TD_LRM:
221 case CHAR_TD_RLM:
222 case CHAR_TD_LRE:
223 case CHAR_TD_RLE:
224 case CHAR_TD_LRO:
225 case CHAR_TD_RLO:
226 case CHAR_TD_PDF:
227 return true;
228
229 default:
230 return false;
231 }
232}
233
234inline bool IsPrintable(char32_t c)
235{
236 if (c < 0x20) return false;
237 if (c < 0xE000) return true;
238 if (c < 0xE200) return false;
239 return true;
240}
241
249inline bool IsWhitespace(char32_t c)
250{
251 return c == 0x0020 /* SPACE */ || c == 0x3000; /* IDEOGRAPHIC SPACE */
252}
253
254/* Needed for NetBSD version (so feature) testing */
255#if defined(__NetBSD__) || defined(__FreeBSD__)
256#include <sys/param.h>
257#endif
258
259/* strcasestr is available for _GNU_SOURCE, BSD and some Apple */
260#if defined(_GNU_SOURCE) || (defined(__BSD_VISIBLE) && __BSD_VISIBLE) || (defined(__APPLE__) && (!defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE))) || defined(_NETBSD_SOURCE)
261# undef DEFINE_STRCASESTR
262#else
263# define DEFINE_STRCASESTR
264char *strcasestr(const char *haystack, const char *needle);
265#endif /* strcasestr is available */
266
267#endif /* STRING_FUNC_H */
Functions related to bit mathematics.
debug_inline static constexpr uint GB(const T x, const uint8_t s, const uint8_t n)
Fetch n bits from x, started at bit s.
fluid_settings_t * settings
FluidSynth settings handle.
size_t Utf8Encode(T buf, char32_t c)
Encode a unicode character and place it in the buffer.
Definition string.cpp:460
bool ConvertHexToBytes(std::string_view hex, std::span< uint8_t > bytes)
Convert a hex-string to a byte-array, while validating it was actually hex.
Definition string.cpp:734
char32_t Utf16DecodeSurrogate(uint lead, uint trail)
Convert an UTF-16 surrogate pair to the corresponding Unicode character.
char32_t Utf16DecodeChar(const uint16_t *c)
Decode an UTF-16 character.
bool StrEmpty(const char *s)
Check if a string buffer is empty.
Definition string_func.h:57
bool Utf16IsLeadSurrogate(uint c)
Is the given character a lead surrogate code point?
bool IsValidChar(char32_t key, CharSetFilter afilter)
Only allow certain keys.
Definition string.cpp:396
bool StrEqualsIgnoreCase(const std::string_view str1, const std::string_view str2)
Compares two string( view)s for equality, while ignoring the case of the characters.
Definition string.cpp:347
bool IsWhitespace(char32_t c)
Check whether UNICODE character is whitespace or not, i.e.
bool StrNaturalContains(const std::string_view str, const std::string_view value)
Checks if a string is contained in another string with a locale-aware comparison that is case sensiti...
Definition string.cpp:660
int8_t Utf8EncodedCharLen(char c)
Return the length of an UTF-8 encoded value based on a single char.
void strecpy(std::span< char > dst, std::string_view src)
Copies characters from one buffer to another.
Definition string.cpp:60
std::string FormatArrayAsHex(std::span< const uint8_t > data)
Format a byte array into a continuous hex string.
Definition string.cpp:81
size_t Utf8StringLength(const char *s)
Get the length of an UTF-8 encoded string in number of characters and thus not the number of bytes th...
Definition string.cpp:359
bool StrStartsWithIgnoreCase(std::string_view str, const std::string_view prefix)
Check whether the given string starts with the given prefix, ignoring case.
Definition string.cpp:281
bool StrValid(std::span< const char > str)
Checks whether the given string is valid, i.e.
Definition string.cpp:227
size_t Utf8Decode(char32_t *c, const char *s)
Decode and consume the next UTF-8 encoded character.
Definition string.cpp:419
std::string StrMakeValid(std::string_view str, StringValidationSettings settings=SVS_REPLACE_WITH_QUESTION_MARK)
Copies the valid (UTF-8) characters from str to the returned string.
Definition string.cpp:205
int8_t Utf8CharLen(char32_t c)
Return the length of a UTF-8 encoded character.
void StrMakeValidInPlace(char *str, const char *last, StringValidationSettings settings=SVS_REPLACE_WITH_QUESTION_MARK) NOACCESS(2)
Scans the string for invalid characters and replaces then with a question mark '?' (if not ignored).
Definition string.cpp:178
bool StrNaturalContainsIgnoreCase(const std::string_view str, const std::string_view value)
Checks if a string is contained in another string with a locale-aware comparison that is case insensi...
Definition string.cpp:687
bool Utf16IsTrailSurrogate(uint c)
Is the given character a lead surrogate code point?
int StrCompareIgnoreCase(const std::string_view str1, const std::string_view str2)
Compares two string( view)s, while ignoring the case of the characters.
Definition string.cpp:334
char * Utf8PrevChar(char *s)
Retrieve the previous UNICODE character in an UTF-8 encoded string.
bool IsTextDirectionChar(char32_t c)
Is the given character a text direction character.
bool StrEndsWithIgnoreCase(std::string_view str, const std::string_view suffix)
Check whether the given string ends with the given suffix, ignoring case.
Definition string.cpp:321
size_t Utf8TrimString(char *s, size_t maxlen)
Properly terminate an UTF8 string to some maximum length.
Definition string.cpp:508
void StrTrimInPlace(std::string &str)
Trim the spaces from given string in place, i.e.
Definition string.cpp:260
size_t ttd_strnlen(const char *str, size_t maxlen)
Get the length of a string, within a limited buffer.
Definition string_func.h:69
int StrNaturalCompare(std::string_view s1, std::string_view s2, bool ignore_garbage_at_front=false)
Compares two strings using case insensitive natural sort.
Definition string.cpp:589
Types for strings.
CharSetFilter
Valid filter types for IsValidChar.
Definition string_type.h:24
static const char32_t CHAR_TD_RLE
The following text is embedded right-to-left.
Definition string_type.h:38
static const char32_t CHAR_TD_LRO
Force the following characters to be treated as left-to-right characters.
Definition string_type.h:39
StringValidationSettings
Settings for the string validation.
Definition string_type.h:44
@ SVS_REPLACE_WITH_QUESTION_MARK
Replace the unknown/bad bits with question marks.
Definition string_type.h:46
static const char32_t CHAR_TD_LRM
The next character acts like a left-to-right character.
Definition string_type.h:35
static const char32_t CHAR_TD_RLO
Force the following characters to be treated as right-to-left characters.
Definition string_type.h:40
static const char32_t CHAR_TD_LRE
The following text is embedded left-to-right.
Definition string_type.h:37
static const char32_t CHAR_TD_RLM
The next character acts like a right-to-left character.
Definition string_type.h:36
static const char32_t CHAR_TD_PDF
Restore the text-direction state to before the last LRE, RLE, LRO or RLO.
Definition string_type.h:41
Case insensitive comparator for strings, for example for use in std::map.
Definition string_func.h:46