OpenTTD Source 20250524-master-gc366e6a48e
strgen_base.cpp
Go to the documentation of this file.
1/*
2 * This file is part of OpenTTD.
3 * OpenTTD is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, version 2.
4 * OpenTTD is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
5 * See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with OpenTTD. If not, see <http://www.gnu.org/licenses/>.
6 */
7
10#include "../stdafx.h"
11#include "../core/endian_func.hpp"
12#include "../core/math_func.hpp"
13#include "../error_func.h"
14#include "../string_func.h"
15#include "../core/string_builder.hpp"
16#include "../table/control_codes.h"
17
18#include "strgen.h"
19
20#include "../table/strgen_tables.h"
21
22#include "../safeguards.h"
23
24StrgenState _strgen;
25static bool _translated;
26static std::string_view _cur_ident;
27static ParsedCommandStruct _cur_pcs;
28static size_t _cur_argidx;
29
31 const CmdStruct *cmd = nullptr;
32 std::string param;
33 std::optional<size_t> argno;
34 std::optional<uint8_t> casei;
35};
36static ParsedCommandString ParseCommandString(StringConsumer &consumer);
37static size_t TranslateArgumentIdx(size_t arg, size_t offset = 0);
38
44Case::Case(uint8_t caseidx, std::string_view string) :
45 caseidx(caseidx), string(string)
46{
47}
48
56LangString::LangString(std::string_view name, std::string_view english, size_t index, size_t line) :
57 name(name), english(english), index(index), line(line)
58{
59}
60
63{
64 this->translated.clear();
65 this->translated_cases.clear();
66}
67
72StringData::StringData(size_t tabs) : tabs(tabs), max_strings(tabs * TAB_SIZE)
73{
74 this->strings.resize(max_strings);
75 this->next_string_id = 0;
76}
77
80{
81 for (size_t i = 0; i < this->max_strings; i++) {
82 LangString *ls = this->strings[i].get();
83 if (ls != nullptr) ls->FreeTranslation();
84 }
85}
86
92void StringData::Add(std::shared_ptr<LangString> ls)
93{
94 this->name_to_string[ls->name] = ls;
95 this->strings[ls->index] = std::move(ls);
96}
97
103LangString *StringData::Find(std::string_view s)
104{
105 auto it = this->name_to_string.find(s);
106 if (it == this->name_to_string.end()) return nullptr;
107
108 return it->second.get();
109}
110
117static uint32_t VersionHashStr(uint32_t hash, std::string_view s)
118{
119 for (auto c : s) {
120 hash = std::rotl(hash, 3) ^ c;
121 hash = (hash & 1 ? hash >> 1 ^ 0xDEADBEEF : hash >> 1);
122 }
123 return hash;
124}
125
130uint32_t StringData::Version() const
131{
132 uint32_t hash = 0;
133
134 for (size_t i = 0; i < this->max_strings; i++) {
135 const LangString *ls = this->strings[i].get();
136
137 if (ls != nullptr) {
138 hash ^= i * 0x717239;
139 hash = (hash & 1 ? hash >> 1 ^ 0xDEADBEEF : hash >> 1);
140 hash = VersionHashStr(hash, ls->name);
141
142 StringConsumer consumer(ls->english);
144 while ((cs = ParseCommandString(consumer)).cmd != nullptr) {
145 if (cs.cmd->flags.Test(CmdFlag::DontCount)) continue;
146
147 hash ^= (cs.cmd - _cmd_structs) * 0x1234567;
148 hash = (hash & 1 ? hash >> 1 ^ 0xF00BAA4 : hash >> 1);
149 }
150 }
151 }
152
153 return hash;
154}
155
160size_t StringData::CountInUse(size_t tab) const
161{
162 size_t count = TAB_SIZE;
163 while (count > 0 && this->strings[(tab * TAB_SIZE) + count - 1] == nullptr) --count;
164 return count;
165}
166
167void EmitSingleChar(StringBuilder &builder, std::string_view param, char32_t value)
168{
169 if (!param.empty()) StrgenWarning("Ignoring trailing letters in command");
170 builder.PutUtf8(value);
171}
172
173/* The plural specifier looks like
174 * {NUM} {PLURAL <ARG#> passenger passengers} then it picks either passenger/passengers depending on the count in NUM */
175static std::pair<std::optional<size_t>, std::optional<size_t>> ParseRelNum(StringConsumer &consumer)
176{
178 std::optional<size_t> v = consumer.TryReadIntegerBase<size_t>(10);
179 std::optional<size_t> offset;
180 if (v.has_value() && consumer.ReadCharIf(':')) {
181 /* Take the Nth within */
182 offset = consumer.TryReadIntegerBase<size_t>(10);
183 if (!offset.has_value()) StrgenFatal("Expected number for substring parameter");
184 }
185 return {v, offset};
186}
187
188/* Parse out the next word, or nullptr */
189std::optional<std::string_view> ParseWord(StringConsumer &consumer)
190{
192 if (!consumer.AnyBytesLeft()) return {};
193
194 if (consumer.ReadCharIf('"')) {
195 /* parse until next " or NUL */
196 auto result = consumer.ReadUntilChar('"', StringConsumer::KEEP_SEPARATOR);
197 if (!consumer.ReadCharIf('"')) StrgenFatal("Unterminated quotes");
198 return result;
199 } else {
200 /* proceed until whitespace or NUL */
202 }
203}
204
205/* This is encoded like
206 * CommandByte <ARG#> <NUM> {Length of each string} {each string} */
207static void EmitWordList(StringBuilder &builder, const std::vector<std::string> &words)
208{
209 builder.PutUint8(static_cast<uint8_t>(words.size()));
210 for (size_t i = 0; i < words.size(); i++) {
211 size_t len = words[i].size();
212 if (len > UINT8_MAX) StrgenFatal("WordList {}/{} string '{}' too long, max bytes {}", i + 1, words.size(), words[i], UINT8_MAX);
213 builder.PutUint8(static_cast<uint8_t>(len));
214 }
215 for (size_t i = 0; i < words.size(); i++) {
216 builder.Put(words[i]);
217 }
218}
219
220void EmitPlural(StringBuilder &builder, std::string_view param, char32_t)
221{
222 StringConsumer consumer(param);
223
224 /* Parse out the number, if one exists. Otherwise default to prev arg. */
225 auto [argidx, offset] = ParseRelNum(consumer);
226 if (!argidx.has_value()) {
227 if (_cur_argidx == 0) StrgenFatal("Plural choice needs positional reference");
228 argidx = _cur_argidx - 1;
229 }
230
231 const CmdStruct *cmd = _cur_pcs.consuming_commands[*argidx];
232 if (!offset.has_value()) {
233 /* Use default offset */
234 if (cmd == nullptr || !cmd->default_plural_offset.has_value()) {
235 StrgenFatal("Command '{}' has no (default) plural position", cmd == nullptr ? "<empty>" : cmd->cmd);
236 }
237 offset = cmd->default_plural_offset;
238 }
239
240 /* Parse each string */
241 std::vector<std::string> words;
242 for (;;) {
243 auto word = ParseWord(consumer);
244 if (!word.has_value()) break;
245 words.emplace_back(*word);
246 }
247
248 if (words.empty()) {
249 StrgenFatal("{}: No plural words", _cur_ident);
250 }
251
252 size_t expected = _plural_forms[_strgen.lang.plural_form].plural_count;
253 if (expected != words.size()) {
254 if (_translated) {
255 StrgenFatal("{}: Invalid number of plural forms. Expecting {}, found {}.", _cur_ident,
256 expected, words.size());
257 } else {
258 if (_strgen.show_warnings) StrgenWarning("'{}' is untranslated. Tweaking english string to allow compilation for plural forms", _cur_ident);
259 if (words.size() > expected) {
260 words.resize(expected);
261 } else {
262 while (words.size() < expected) {
263 words.push_back(words.back());
264 }
265 }
266 }
267 }
268
269 builder.PutUtf8(SCC_PLURAL_LIST);
270 builder.PutUint8(_strgen.lang.plural_form);
271 builder.PutUint8(static_cast<uint8_t>(TranslateArgumentIdx(*argidx, *offset)));
272 EmitWordList(builder, words);
273}
274
275void EmitGender(StringBuilder &builder, std::string_view param, char32_t)
276{
277 StringConsumer consumer(param);
278 if (consumer.ReadCharIf('=')) {
279 /* This is a {G=DER} command */
280 auto gender = consumer.Read(StringConsumer::npos);
281 auto nw = _strgen.lang.GetGenderIndex(gender);
282 if (nw >= MAX_NUM_GENDERS) StrgenFatal("G argument '{}' invalid", gender);
283
284 /* now nw contains the gender index */
285 builder.PutUtf8(SCC_GENDER_INDEX);
286 builder.PutUint8(nw);
287 } else {
288 /* This is a {G 0 foo bar two} command.
289 * If no relative number exists, default to +0 */
290 auto [argidx, offset] = ParseRelNum(consumer);
291 if (!argidx.has_value()) argidx = _cur_argidx;
292 if (!offset.has_value()) offset = 0;
293
294 const CmdStruct *cmd = _cur_pcs.consuming_commands[*argidx];
295 if (cmd == nullptr || !cmd->flags.Test(CmdFlag::Gender)) {
296 StrgenFatal("Command '{}' can't have a gender", cmd == nullptr ? "<empty>" : cmd->cmd);
297 }
298
299 std::vector<std::string> words;
300 for (;;) {
301 auto word = ParseWord(consumer);
302 if (!word.has_value()) break;
303 words.emplace_back(*word);
304 }
305 if (words.size() != _strgen.lang.num_genders) StrgenFatal("Bad # of arguments for gender command");
306
307 assert(IsInsideBS(cmd->value, SCC_CONTROL_START, UINT8_MAX));
308 builder.PutUtf8(SCC_GENDER_LIST);
309 builder.PutUint8(static_cast<uint8_t>(TranslateArgumentIdx(*argidx, *offset)));
310 EmitWordList(builder, words);
311 }
312}
313
314static const CmdStruct *FindCmd(std::string_view s)
315{
316 for (const auto &cs : _cmd_structs) {
317 if (cs.cmd == s) return &cs;
318 }
319 return nullptr;
320}
321
322static uint8_t ResolveCaseName(std::string_view str)
323{
324 uint8_t case_idx = _strgen.lang.GetCaseIndex(str);
325 if (case_idx >= MAX_NUM_CASES) StrgenFatal("Invalid case-name '{}'", str);
326 return case_idx + 1;
327}
328
329/* returns cmd == nullptr on eof */
330static ParsedCommandString ParseCommandString(StringConsumer &consumer)
331{
332 ParsedCommandString result;
333
334 /* Scan to the next command, exit if there's no next command. */
336 if (!consumer.ReadCharIf('{')) return {};
337
338 if (auto argno = consumer.TryReadIntegerBase<uint32_t>(10); argno.has_value()) {
339 result.argno = argno;
340 if (!consumer.ReadCharIf(':')) StrgenFatal("missing arg #");
341 }
342
343 /* parse command name */
344 auto command = consumer.ReadUntilCharIn("} =.");
345 result.cmd = FindCmd(command);
346 if (result.cmd == nullptr) {
347 StrgenError("Undefined command '{}'", command);
348 return {};
349 }
350
351 /* parse case */
352 if (consumer.ReadCharIf('.')) {
353 if (!result.cmd->flags.Test(CmdFlag::Case)) {
354 StrgenFatal("Command '{}' can't have a case", result.cmd->cmd);
355 }
356
357 auto casep = consumer.ReadUntilCharIn("} ");
358 result.casei = ResolveCaseName(casep);
359 }
360
361 /* parse params */
362 result.param = consumer.ReadUntilChar('}', StringConsumer::KEEP_SEPARATOR);
363
364 if (!consumer.ReadCharIf('}')) {
365 StrgenError("Missing }} from command '{}'", result.cmd->cmd);
366 return {};
367 }
368
369 return result;
370}
371
379StringReader::StringReader(StringData &data, const std::string &file, bool master, bool translation) :
380 data(data), file(file), master(master), translation(translation)
381{
382}
383
384ParsedCommandStruct ExtractCommandString(std::string_view s, bool)
385{
387 StringConsumer consumer(s);
388
389 size_t argidx = 0;
390 for (;;) {
391 /* read until next command from a. */
392 auto cs = ParseCommandString(consumer);
393
394 if (cs.cmd == nullptr) break;
395
396 /* Sanity checking */
397 if (cs.argno.has_value() && cs.cmd->consumes == 0) StrgenFatal("Non consumer param can't have a paramindex");
398
399 if (cs.cmd->consumes > 0) {
400 if (cs.argno.has_value()) argidx = *cs.argno;
401 if (argidx >= p.consuming_commands.max_size()) StrgenFatal("invalid param idx {}", argidx);
402 if (p.consuming_commands[argidx] != nullptr && p.consuming_commands[argidx] != cs.cmd) StrgenFatal("duplicate param idx {}", argidx);
403
404 p.consuming_commands[argidx++] = cs.cmd;
405 } else if (!cs.cmd->flags.Test(CmdFlag::DontCount)) { // Ignore some of them
406 p.non_consuming_commands.emplace_back(cs.cmd, std::move(cs.param));
407 }
408 }
409
410 return p;
411}
412
413const CmdStruct *TranslateCmdForCompare(const CmdStruct *a)
414{
415 if (a == nullptr) return nullptr;
416
417 if (a->cmd == "STRING1" ||
418 a->cmd == "STRING2" ||
419 a->cmd == "STRING3" ||
420 a->cmd == "STRING4" ||
421 a->cmd == "STRING5" ||
422 a->cmd == "STRING6" ||
423 a->cmd == "STRING7" ||
424 a->cmd == "RAW_STRING") {
425 return FindCmd("STRING");
426 }
427
428 return a;
429}
430
431static bool CheckCommandsMatch(std::string_view a, std::string_view b, std::string_view name)
432{
433 /* If we're not translating, i.e. we're compiling the base language,
434 * it is pointless to do all these checks as it'll always be correct.
435 * After all, all checks are based on the base language.
436 */
437 if (!_strgen.translation) return true;
438
439 bool result = true;
440
441 ParsedCommandStruct templ = ExtractCommandString(b, true);
442 ParsedCommandStruct lang = ExtractCommandString(a, true);
443
444 /* For each string in templ, see if we find it in lang */
445 if (templ.non_consuming_commands.max_size() != lang.non_consuming_commands.max_size()) {
446 StrgenWarning("{}: template string and language string have a different # of commands", name);
447 result = false;
448 }
449
450 for (auto &templ_nc : templ.non_consuming_commands) {
451 /* see if we find it in lang, and zero it out */
452 bool found = false;
453 for (auto &lang_nc : lang.non_consuming_commands) {
454 if (templ_nc.cmd == lang_nc.cmd && templ_nc.param == lang_nc.param) {
455 /* it was found in both. zero it out from lang so we don't find it again */
456 lang_nc.cmd = nullptr;
457 found = true;
458 break;
459 }
460 }
461
462 if (!found) {
463 StrgenWarning("{}: command '{}' exists in template file but not in language file", name, templ_nc.cmd->cmd);
464 result = false;
465 }
466 }
467
468 /* if we reach here, all non consumer commands match up.
469 * Check if the non consumer commands match up also. */
470 for (size_t i = 0; i < templ.consuming_commands.max_size(); i++) {
471 if (TranslateCmdForCompare(templ.consuming_commands[i]) != lang.consuming_commands[i]) {
472 StrgenWarning("{}: Param idx #{} '{}' doesn't match with template command '{}'", name, i,
473 lang.consuming_commands[i] == nullptr ? "<empty>" : TranslateCmdForCompare(lang.consuming_commands[i])->cmd,
474 templ.consuming_commands[i] == nullptr ? "<empty>" : templ.consuming_commands[i]->cmd);
475 result = false;
476 }
477 }
478
479 return result;
480}
481
482void StringReader::HandleString(std::string_view src)
483{
484 /* Ignore blank lines */
485 if (src.empty()) return;
486
487 StringConsumer consumer(src);
488 if (consumer.ReadCharIf('#')) {
489 if (consumer.ReadCharIf('#') && !consumer.ReadCharIf('#')) this->HandlePragma(consumer.Read(StringConsumer::npos), _strgen.lang);
490 return; // ignore comments
491 }
492
493 /* Read string name */
494 std::string_view str_name = StrTrimView(consumer.ReadUntilChar(':', StringConsumer::KEEP_SEPARATOR), StringConsumer::WHITESPACE_NO_NEWLINE);
495 if (!consumer.ReadCharIf(':')) {
496 StrgenError("Line has no ':' delimiter");
497 return;
498 }
499
500 /* Read string case */
501 std::optional<std::string_view> casep;
502 if (auto index = str_name.find("."); index != std::string_view::npos) {
503 casep = str_name.substr(index + 1);
504 str_name = str_name.substr(0, index);
505 }
506
507 /* Read string data */
508 std::string_view value = consumer.Read(StringConsumer::npos);
509
510 /* Check string is valid UTF-8 */
511 for (StringConsumer validation_consumer(value); validation_consumer.AnyBytesLeft(); ) {
512 auto c = validation_consumer.TryReadUtf8();
513 if (!c.has_value()) StrgenFatal("Invalid UTF-8 sequence in '{}'", value);
514 if (*c <= 0x001F || // ASCII control character range
515 *c == 0x200B || // Zero width space
516 (*c >= 0xE000 && *c <= 0xF8FF) || // Private range
517 (*c >= 0xFFF0 && *c <= 0xFFFF)) { // Specials range
518 StrgenFatal("Unwanted UTF-8 character U+{:04X} in sequence '{}'", static_cast<uint32_t>(*c), value);
519 }
520 }
521
522 /* Check if this string already exists.. */
523 LangString *ent = this->data.Find(str_name);
524
525 if (this->master) {
526 if (casep.has_value()) {
527 StrgenError("Cases in the base translation are not supported.");
528 return;
529 }
530
531 if (ent != nullptr) {
532 StrgenError("String name '{}' is used multiple times", str_name);
533 return;
534 }
535
536 if (this->data.strings[this->data.next_string_id] != nullptr) {
537 StrgenError("String ID 0x{:X} for '{}' already in use by '{}'", this->data.next_string_id, str_name, this->data.strings[this->data.next_string_id]->name);
538 return;
539 }
540
541 /* Allocate a new LangString */
542 this->data.Add(std::make_unique<LangString>(str_name, value, this->data.next_string_id++, _strgen.cur_line));
543 } else {
544 if (ent == nullptr) {
545 StrgenWarning("String name '{}' does not exist in master file", str_name);
546 return;
547 }
548
549 if (!ent->translated.empty() && !casep.has_value()) {
550 StrgenError("String name '{}' is used multiple times", str_name);
551 return;
552 }
553
554 /* make sure that the commands match */
555 if (!CheckCommandsMatch(value, ent->english, str_name)) return;
556
557 if (casep.has_value()) {
558 ent->translated_cases.emplace_back(ResolveCaseName(*casep), value);
559 } else {
560 ent->translated = value;
561 /* If the string was translated, use the line from the
562 * translated language so errors in the translated file
563 * are properly referenced to. */
564 ent->line = _strgen.cur_line;
565 }
566 }
567}
568
569void StringReader::HandlePragma(std::string_view str, LanguagePackHeader &lang)
570{
571 StringConsumer consumer(str);
572 auto name = consumer.ReadUntilChar(' ', StringConsumer::SKIP_ALL_SEPARATORS);
573 if (name == "plural") {
574 lang.plural_form = consumer.ReadIntegerBase<uint32_t>(10);
575 if (lang.plural_form >= lengthof(_plural_forms)) {
576 StrgenFatal("Invalid pluralform {}", lang.plural_form);
577 }
578 } else {
579 StrgenFatal("unknown pragma '{}'", name);
580 }
581}
582
584{
585 _strgen.warnings = _strgen.errors = 0;
586
587 _strgen.translation = this->translation;
588 _strgen.file = this->file;
589
590 /* For each new file we parse, reset the genders, and language codes. */
591 _strgen.lang = {};
592
593 _strgen.cur_line = 1;
594 while (this->data.next_string_id < this->data.max_strings) {
595 std::optional<std::string> line = this->ReadLine();
596 if (!line.has_value()) return;
597
598 this->HandleString(StrTrimView(line.value(), StringConsumer::WHITESPACE_OR_NEWLINE));
599 _strgen.cur_line++;
600 }
601
602 if (this->data.next_string_id == this->data.max_strings) {
603 StrgenError("Too many strings, maximum allowed is {}", this->data.max_strings);
604 }
605}
606
612{
613 size_t last = 0;
614 for (size_t i = 0; i < data.max_strings; i++) {
615 if (data.strings[i] != nullptr) {
616 this->WriteStringID(data.strings[i]->name, i);
617 last = i;
618 }
619 }
620
621 this->WriteStringID("STR_LAST_STRINGID", last);
622}
623
624static size_t TranslateArgumentIdx(size_t argidx, size_t offset)
625{
626 if (argidx >= _cur_pcs.consuming_commands.max_size()) {
627 StrgenFatal("invalid argidx {}", argidx);
628 }
629 const CmdStruct *cs = _cur_pcs.consuming_commands[argidx];
630 if (cs != nullptr && cs->consumes <= offset) {
631 StrgenFatal("invalid argidx offset {}:{}", argidx, offset);
632 }
633
634 if (_cur_pcs.consuming_commands[argidx] == nullptr) {
635 StrgenFatal("no command for this argidx {}", argidx);
636 }
637
638 size_t sum = 0;
639 for (size_t i = 0; i < argidx; i++) {
640 cs = _cur_pcs.consuming_commands[i];
641
642 sum += (cs != nullptr) ? cs->consumes : 1;
643 }
644
645 return sum + offset;
646}
647
648static void PutArgidxCommand(StringBuilder &builder)
649{
650 builder.PutUtf8(SCC_ARG_INDEX);
651 builder.PutUint8(static_cast<uint8_t>(TranslateArgumentIdx(_cur_argidx)));
652}
653
654static std::string PutCommandString(std::string_view str)
655{
656 std::string result;
657 StringBuilder builder(result);
658 StringConsumer consumer(str);
659 _cur_argidx = 0;
660
661 for (;;) {
662 /* Process characters as they are until we encounter a { */
663 builder.Put(consumer.ReadUntilChar('{', StringConsumer::KEEP_SEPARATOR));
664 if (!consumer.AnyBytesLeft()) break;
665
666 auto cs = ParseCommandString(consumer);
667 auto *cmd = cs.cmd;
668 if (cmd == nullptr) break;
669
670 if (cs.casei.has_value()) {
671 builder.PutUtf8(SCC_SET_CASE); // {SET_CASE}
672 builder.PutUint8(*cs.casei);
673 }
674
675 /* For params that consume values, we need to handle the argindex properly */
676 if (cmd->consumes > 0) {
677 /* Check if we need to output a move-param command */
678 if (cs.argno.has_value() && *cs.argno != _cur_argidx) {
679 _cur_argidx = *cs.argno;
680 PutArgidxCommand(builder);
681 }
682
683 /* Output the one from the master string... it's always accurate. */
684 cmd = _cur_pcs.consuming_commands[_cur_argidx++];
685 if (cmd == nullptr) {
686 StrgenFatal("{}: No argument exists at position {}", _cur_ident, _cur_argidx - 1);
687 }
688 }
689
690 cmd->proc(builder, cs.param, cmd->value);
691 }
692 return result;
693}
694
700{
701 char buffer[2];
702 size_t offs = 0;
703 if (length >= 0x4000) {
704 StrgenFatal("string too long");
705 }
706
707 if (length >= 0xC0) {
708 buffer[offs++] = static_cast<char>(static_cast<uint8_t>((length >> 8) | 0xC0));
709 }
710 buffer[offs++] = static_cast<char>(static_cast<uint8_t>(length & 0xFF));
711 this->Write({buffer, offs});
712}
713
719{
720 std::vector<size_t> in_use;
721 for (size_t tab = 0; tab < data.tabs; tab++) {
722 size_t n = data.CountInUse(tab);
723
724 in_use.push_back(n);
725 _strgen.lang.offsets[tab] = TO_LE16(static_cast<uint16_t>(n));
726
727 for (size_t j = 0; j != in_use[tab]; j++) {
728 const LangString *ls = data.strings[(tab * TAB_SIZE) + j].get();
729 if (ls != nullptr && ls->translated.empty()) _strgen.lang.missing++;
730 }
731 }
732
733 _strgen.lang.ident = TO_LE32(LanguagePackHeader::IDENT);
734 _strgen.lang.version = TO_LE32(data.Version());
735 _strgen.lang.missing = TO_LE16(_strgen.lang.missing);
736 _strgen.lang.winlangid = TO_LE16(_strgen.lang.winlangid);
737
738 this->WriteHeader(&_strgen.lang);
739
740 for (size_t tab = 0; tab < data.tabs; tab++) {
741 for (size_t j = 0; j != in_use[tab]; j++) {
742 const LangString *ls = data.strings[(tab * TAB_SIZE) + j].get();
743
744 /* For undefined strings, just set that it's an empty string */
745 if (ls == nullptr) {
746 this->WriteLength(0);
747 continue;
748 }
749
750 std::string output;
751 StringBuilder builder(output);
752 _cur_ident = ls->name;
753 _strgen.cur_line = ls->line;
754
755 /* Produce a message if a string doesn't have a translation. */
756 if (ls->translated.empty()) {
757 if (_strgen.show_warnings) {
758 StrgenWarning("'{}' is untranslated", ls->name);
759 }
760 if (_strgen.annotate_todos) {
761 builder.Put("<TODO> ");
762 }
763 }
764
765 /* Extract the strings and stuff from the english command string */
766 _cur_pcs = ExtractCommandString(ls->english, false);
767
768 _translated = !ls->translated_cases.empty() || !ls->translated.empty();
769 const std::string &cmdp = _translated ? ls->translated : ls->english;
770
771 if (!ls->translated_cases.empty()) {
772 /* Need to output a case-switch.
773 * It has this format
774 * <0x9E> <NUM CASES> <CASE1> <LEN1> <STRING1> <CASE2> <LEN2> <STRING2> <CASE3> <LEN3> <STRING3> <LENDEFAULT> <STRINGDEFAULT>
775 * Each LEN is printed using 2 bytes in little endian order. */
776 builder.PutUtf8(SCC_SWITCH_CASE);
777 builder.PutUint8(static_cast<uint8_t>(ls->translated_cases.size()));
778
779 /* Write each case */
780 for (const Case &c : ls->translated_cases) {
781 auto case_str = PutCommandString(c.string);
782 builder.PutUint8(c.caseidx);
783 builder.PutUint16LE(static_cast<uint16_t>(case_str.size()));
784 builder.Put(case_str);
785 }
786 }
787
788 std::string def_str;
789 if (!cmdp.empty()) def_str = PutCommandString(cmdp);
790 if (!ls->translated_cases.empty()) {
791 builder.PutUint16LE(static_cast<uint16_t>(def_str.size()));
792 }
793 builder.Put(def_str);
794
795 this->WriteLength(output.size());
796 this->Write(output);
797 }
798 }
799}
constexpr bool Test(Tvalue_type value) const
Test if the value-th bit is set.
void PutUtf8(char32_t c)
Append UTF.8 char.
void PutUint16LE(uint16_t value)
Append binary uint16 using little endian.
void Put(std::string_view str)
Append string.
void PutUint8(uint8_t value)
Append binary uint8.
Compose data into a growing std::string.
Parse data from a string / buffer.
bool ReadCharIf(char c)
Check whether the next 8-bit char matches 'c', and skip it.
std::optional< T > TryReadIntegerBase(int base, bool clamp=false)
Try to read and parse an integer in number 'base', and then advance the reader.
std::string_view ReadUntilChar(char c, SeparatorUsage sep)
Read data until the first occurrence of 8-bit char 'c', and advance reader.
void SkipUntilChar(char c, SeparatorUsage sep)
Skip data until the first occurrence of 8-bit char 'c'.
@ SKIP_ALL_SEPARATORS
Read and discard all consecutive separators, do not include any in the result.
@ KEEP_SEPARATOR
Keep the separator in the data as next value to be read.
bool AnyBytesLeft() const noexcept
Check whether any bytes left to read.
static const std::string_view WHITESPACE_OR_NEWLINE
ASCII whitespace characters, including new-line.
static const std::string_view WHITESPACE_NO_NEWLINE
ASCII whitespace characters, excluding new-line.
void SkipUntilCharNotIn(std::string_view chars)
Skip 8-bit chars, while they are in 'chars', until they are not.
std::string_view ReadUntilCharIn(std::string_view chars)
Read 8-bit chars, while they are not in 'chars', until they are; and advance reader.
T ReadIntegerBase(int base, T def=0, bool clamp=false)
Read and parse an integer in number 'base', and advance the reader.
std::string_view Read(size_type len)
Read the next 'len' bytes, and advance reader.
static constexpr size_type npos
Special value for "end of data".
static const uint8_t MAX_NUM_GENDERS
Maximum number of supported genders.
Definition language.h:20
static const uint8_t MAX_NUM_CASES
Maximum number of supported cases.
Definition language.h:21
constexpr bool IsInsideBS(const T x, const size_t base, const size_t size)
Checks if a value is between a window started at some base point.
#define lengthof(array)
Return the length of an fixed size array.
Definition stdafx.h:271
Structures related to strgen.
static bool _translated
Whether the current language is not the master language.
static uint32_t VersionHashStr(uint32_t hash, std::string_view s)
Create a compound hash.
static const PluralForm _plural_forms[]
All plural forms used.
@ Gender
These commands support genders.
@ Case
These commands support cases.
@ DontCount
These commands aren't counted for comparison.
static const uint TAB_SIZE
Number of strings per StringTab.
Container for the different cases of a string.
Definition strgen.h:22
Case(uint8_t caseidx, std::string_view string)
Create a new case.
virtual void WriteStringID(const std::string &name, size_t stringid)=0
Write the string ID.
void WriteHeader(const StringData &data)
Write the header information.
Information about a single string.
Definition strgen.h:30
size_t line
Line of string in source-file.
Definition strgen.h:35
std::string english
English text.
Definition strgen.h:32
std::vector< Case > translated_cases
Cases of the translation.
Definition strgen.h:36
std::string translated
Translated text.
Definition strgen.h:33
void FreeTranslation()
Free all data related to the translation.
std::string name
Name of the string.
Definition strgen.h:31
LangString(std::string_view name, std::string_view english, size_t index, size_t line)
Create a new string.
Header of a language file.
Definition language.h:24
uint8_t GetCaseIndex(std::string_view case_str) const
Get the index for the given case.
Definition language.h:81
uint8_t plural_form
plural form index
Definition language.h:41
uint32_t version
32-bits of auto generated version info which is basically a hash of strings.h
Definition language.h:28
uint16_t offsets[TEXT_TAB_END]
the offsets
Definition language.h:32
uint16_t winlangid
Windows language ID: Windows cannot and will not convert isocodes to something it can use to determin...
Definition language.h:51
uint8_t num_genders
the number of genders of this language
Definition language.h:53
uint16_t missing
number of missing strings.
Definition language.h:40
uint32_t ident
32-bits identifier
Definition language.h:27
static const uint32_t IDENT
Identifier for OpenTTD language files, big endian for "LANG".
Definition language.h:25
uint8_t GetGenderIndex(std::string_view gender_str) const
Get the index for the given gender.
Definition language.h:68
virtual void WriteHeader(const LanguagePackHeader *header)=0
Write the header metadata.
virtual void WriteLength(size_t length)
Write the length as a simple gamma.
virtual void Write(std::string_view buffer)=0
Write a number of bytes.
virtual void WriteLang(const StringData &data)
Actually write the language.
size_t plural_count
The number of plural forms.
Global state shared between strgen.cpp, game_text.cpp and strgen_base.cpp.
Definition strgen.h:159
std::string file
The filename of the input, so we can refer to it in errors/warnings.
Definition strgen.h:160
bool translation
Is the current file actually a translation or not.
Definition strgen.h:166
LanguagePackHeader lang
Header information about a language.
Definition strgen.h:167
size_t cur_line
The current line we're parsing in the input file.
Definition strgen.h:161
Information about the currently known strings.
Definition strgen.h:43
size_t tabs
The number of 'tabs' of strings.
Definition strgen.h:46
void Add(std::shared_ptr< LangString > ls)
Add a newly created LangString.
size_t max_strings
The maximum number of strings.
Definition strgen.h:47
size_t next_string_id
The next string ID to allocate.
Definition strgen.h:48
void FreeTranslation()
Free all data related to the translation.
LangString * Find(std::string_view s)
Find a LangString based on the string name.
StringData(size_t tabs)
Create a new string data container.
std::unordered_map< std::string, std::shared_ptr< LangString >, StringHash, std::equal_to<> > name_to_string
Lookup table for the strings.
Definition strgen.h:45
std::vector< std::shared_ptr< LangString > > strings
List of all known strings.
Definition strgen.h:44
uint32_t Version() const
Make a hash of the file to get a unique "version number".
size_t CountInUse(size_t tab) const
Count the number of tab elements that are in use.
const std::string file
The file we are reading.
Definition strgen.h:61
StringReader(StringData &data, const std::string &file, bool master, bool translation)
Prepare reading.
StringData & data
The data to fill during reading.
Definition strgen.h:60
virtual void ParseFile()
Start parsing the file.
bool translation
Are we reading a translation, implies !master. However, the base translation will have this false.
Definition strgen.h:63
virtual void HandlePragma(std::string_view str, LanguagePackHeader &lang)
Handle the pragma of the file.
virtual std::optional< std::string > ReadLine()=0
Read a single line from the source of strings.
bool master
Are we reading the master file?
Definition strgen.h:62