OpenTTD Source 20260218-master-g2123fca5ea
string_consumer.cpp
Go to the documentation of this file.
1/*
2 * This file is part of OpenTTD.
3 * OpenTTD is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, version 2.
4 * OpenTTD is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
5 * See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with OpenTTD. If not, see <https://www.gnu.org/licenses/old-licenses/gpl-2.0>.
6 */
7
9
10#include "../stdafx.h"
11#include "string_consumer.hpp"
12
13#include "utf8.hpp"
14#include "string_builder.hpp"
15
16#include "../string_func.h"
17
18#if defined(STRGEN) || defined(SETTINGSGEN)
19#include "../error_func.h"
20#else
21#include "../debug.h"
22#endif
23
24#include "../safeguards.h"
25
26/* static */ const std::string_view StringConsumer::WHITESPACE_NO_NEWLINE = "\t\v\f\r ";
27/* static */ const std::string_view StringConsumer::WHITESPACE_OR_NEWLINE = "\t\n\v\f\r ";
28
33/* static */ void StringConsumer::LogError(std::string &&msg)
34{
35#if defined(STRGEN) || defined(SETTINGSGEN)
36 FatalErrorI(std::move(msg));
37#else
38 DebugPrint("misc", 0, std::move(msg));
39#endif
40}
41
42std::optional<uint8_t> StringConsumer::PeekUint8() const
43{
44 if (this->GetBytesLeft() < 1) return std::nullopt;
45 return static_cast<uint8_t>(this->src[this->position]);
46}
47
48std::optional<uint16_t> StringConsumer::PeekUint16LE() const
49{
50 if (this->GetBytesLeft() < 2) return std::nullopt;
51 return static_cast<uint8_t>(this->src[this->position]) |
52 static_cast<uint8_t>(this->src[this->position + 1]) << 8;
53}
54
55std::optional<uint32_t> StringConsumer::PeekUint32LE() const
56{
57 if (this->GetBytesLeft() < 4) return std::nullopt;
58 return static_cast<uint8_t>(this->src[this->position]) |
59 static_cast<uint8_t>(this->src[this->position + 1]) << 8 |
60 static_cast<uint8_t>(this->src[this->position + 2]) << 16 |
61 static_cast<uint8_t>(this->src[this->position + 3]) << 24;
62}
63
64std::optional<uint64_t> StringConsumer::PeekUint64LE() const
65{
66 if (this->GetBytesLeft() < 8) return std::nullopt;
67 return static_cast<uint64_t>(static_cast<uint8_t>(this->src[this->position])) |
68 static_cast<uint64_t>(static_cast<uint8_t>(this->src[this->position + 1])) << 8 |
69 static_cast<uint64_t>(static_cast<uint8_t>(this->src[this->position + 2])) << 16 |
70 static_cast<uint64_t>(static_cast<uint8_t>(this->src[this->position + 3])) << 24 |
71 static_cast<uint64_t>(static_cast<uint8_t>(this->src[this->position + 4])) << 32 |
72 static_cast<uint64_t>(static_cast<uint8_t>(this->src[this->position + 5])) << 40 |
73 static_cast<uint64_t>(static_cast<uint8_t>(this->src[this->position + 6])) << 48 |
74 static_cast<uint64_t>(static_cast<uint8_t>(this->src[this->position + 7])) << 56;
75}
76
77std::optional<char> StringConsumer::PeekChar() const
78{
79 auto result = this->PeekUint8();
80 if (!result.has_value()) return {};
81 return static_cast<char>(*result);
82}
83
84std::pair<StringConsumer::size_type, char32_t> StringConsumer::PeekUtf8() const
85{
86 auto buf = this->src.substr(this->position);
87 return DecodeUtf8(buf);
88}
89
90std::string_view StringConsumer::Peek(size_type len) const
91{
92 auto buf = this->src.substr(this->position);
93 if (len == std::string_view::npos) {
94 len = buf.size();
95 } else if (len > buf.size()) {
96 len = buf.size();
97 }
98 return buf.substr(0, len);
99}
100
102{
103 if (len == std::string_view::npos) {
104 this->position = this->src.size();
105 } else if (size_type max_len = GetBytesLeft(); len > max_len) {
106 LogError(fmt::format("Source buffer too short: {} > {}", len, max_len));
107 this->position = this->src.size();
108 } else {
109 this->position += len;
110 }
111}
112
114{
115 assert(!str.empty());
116 auto buf = this->src.substr(this->position);
117 return buf.find(str);
118}
119
121{
122 auto [data, len] = EncodeUtf8(c);
123 return this->Find({data, len});
124}
125
127{
128 assert(!chars.empty());
129 auto buf = this->src.substr(this->position);
130 return buf.find_first_of(chars);
131}
132
134{
135 assert(!chars.empty());
136 auto buf = this->src.substr(this->position);
137 return buf.find_first_not_of(chars);
138}
139
140std::string_view StringConsumer::PeekUntil(std::string_view str, SeparatorUsage sep) const
141{
142 assert(!str.empty());
143 auto buf = this->src.substr(this->position);
144 auto len = buf.find(str);
145 if (len != std::string_view::npos) {
146 switch (sep) {
148 if (buf.compare(len, str.size(), str) == 0) len += str.size();
149 break;
151 while (buf.compare(len, str.size(), str) == 0) len += str.size();
152 break;
153 default:
154 break;
155 }
156 }
157 return buf.substr(0, len);
158}
159
160std::string_view StringConsumer::PeekUntilUtf8(char32_t c, SeparatorUsage sep) const
161{
162 auto [data, len] = EncodeUtf8(c);
163 return PeekUntil({data, len}, sep);
164}
165
166std::string_view StringConsumer::ReadUntilUtf8(char32_t c, SeparatorUsage sep)
167{
168 auto [data, len] = EncodeUtf8(c);
169 return ReadUntil({data, len}, sep);
170}
171
173{
174 auto [data, len] = EncodeUtf8(c);
175 return SkipUntil({data, len}, sep);
176}
177
179{
180 this->SkipIf("-");
181 if (base == 0) {
182 if (this->ReadIf("0x") || this->ReadIf("0X")) { // boolean short-circuit ensures only one prefix is read
183 base = 16;
184 } else {
185 base = 10;
186 }
187 }
188 switch (base) {
189 default:
190 assert(false);
191 break;
192 case 8:
193 this->SkipUntilCharNotIn("01234567");
194 break;
195 case 10:
196 this->SkipUntilCharNotIn("0123456789");
197 break;
198 case 16:
199 this->SkipUntilCharNotIn("0123456789abcdefABCDEF");
200 break;
201 }
202}
std::string_view Peek(size_type len) const
Peek the next 'len' bytes.
SeparatorUsage
Treatment of separator characters.
@ READ_ALL_SEPARATORS
Read all consecutive separators, and include them all in the result.
@ READ_ONE_SEPARATOR
Read one separator, and include it in the result.
std::optional< uint32_t > PeekUint32LE() const
Peek binary uint32 using little endian.
size_type GetBytesLeft() const noexcept
Get number of bytes left to read.
std::string_view PeekUntil(std::string_view str, SeparatorUsage sep) const
Peek data until the first occurrence of 'str'.
std::string_view::size_type size_type
The type of the size of our strings.
static const std::string_view WHITESPACE_OR_NEWLINE
ASCII whitespace characters, including new-line.
std::string_view ReadUntil(std::string_view str, SeparatorUsage sep)
Read data until the first occurrence of 'str', and advance reader.
void SkipUntil(std::string_view str, SeparatorUsage sep)
Skip data until the first occurrence of 'str'.
static const std::string_view WHITESPACE_NO_NEWLINE
ASCII whitespace characters, excluding new-line.
std::optional< char > PeekChar() const
Peek 8-bit character.
static void LogError(std::string &&msg)
Log an error in the processing (too small buffer, integer out of range, etc.).
bool ReadIf(std::string_view str)
Check whether the next data matches 'str', and skip it.
std::string_view PeekUntilUtf8(char32_t c, SeparatorUsage sep) const
Peek data until the first occurrence of UTF-8 char 'c'.
std::optional< uint64_t > PeekUint64LE() const
Peek binary uint64 using little endian.
void SkipUntilCharNotIn(std::string_view chars)
Skip 8-bit chars, while they are in 'chars', until they are not.
size_type FindCharNotIn(std::string_view chars) const
Find first occurrence of any 8-bit char not in 'chars'.
void SkipIf(std::string_view str)
If the next data matches 'str', then skip it.
std::optional< uint16_t > PeekUint16LE() const
Peek binary uint16 using little endian.
size_type position
The current parsing position in the string.
std::string_view src
The string to parse.
void SkipIntegerBase(int base)
Skip an integer in number 'base'.
void Skip(size_type len)
Discard some bytes.
size_type Find(std::string_view str) const
Find first occurrence of 'str'.
std::optional< uint8_t > PeekUint8() const
Peek binary uint8.
void SkipUntilUtf8(char32_t c, SeparatorUsage sep)
Skip data until the first occurrence of UTF-8 char 'c'.
std::string_view ReadUntilUtf8(char32_t c, SeparatorUsage sep)
Read data until the first occurrence of UTF-8 char 'c', and advance reader.
std::pair< size_type, char32_t > PeekUtf8() const
Peek UTF-8 character.
size_type FindCharIn(std::string_view chars) const
Find first occurrence of any 8-bit char in 'chars'.
size_type FindUtf8(char32_t c) const
Find first occurrence of UTF-8 char 'c'.
void DebugPrint(std::string_view category, int level, std::string &&message)
Internal function for outputting the debug line.
Definition debug.cpp:111
Functions related to debugging.
Error reporting related functions.
void FatalErrorI(const std::string &str)
Error handling for fatal non-user errors.
Definition openttd.cpp:136
A number of safeguards to prevent using unsafe methods.
Definition of base types and functions in a cross-platform compatible way.
Compose strings from textual and binary data.
Parse strings.
Functions related to low-level strings.
std::pair< size_t, char32_t > DecodeUtf8(std::string_view buf)
Decode a character from UTF-8.
Definition utf8.cpp:46
std::pair< char[4], size_t > EncodeUtf8(char32_t c)
Encode a character to UTF-8.
Definition utf8.cpp:19
Handling of UTF-8 encoded data.