OpenTTD Source 20250312-master-gcdcc6b491d
strgen_base.cpp
Go to the documentation of this file.
1/*
2 * This file is part of OpenTTD.
3 * OpenTTD is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, version 2.
4 * OpenTTD is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
5 * See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with OpenTTD. If not, see <http://www.gnu.org/licenses/>.
6 */
7
10#include "../stdafx.h"
11#include "../core/endian_func.hpp"
12#include "../core/mem_func.hpp"
13#include "../error_func.h"
14#include "../string_func.h"
15#include "../table/control_codes.h"
16
17#include "strgen.h"
18
19
20#include "../table/strgen_tables.h"
21
22#include "../safeguards.h"
23
24/* Compiles a list of strings into a compiled string list */
25
26static bool _translated;
27static bool _translation;
28const char *_file = "(unknown file)";
30int _errors, _warnings, _show_todo;
32
33static const CmdStruct *ParseCommandString(const char **str, std::string &param, int *argno, int *casei);
34
40Case::Case(int caseidx, const std::string &string) :
41 caseidx(caseidx), string(string)
42{
43}
44
52LangString::LangString(const std::string &name, const std::string &english, size_t index, int line) :
53 name(name), english(english), index(index), line(line)
54{
55}
56
59{
60 this->translated.clear();
61 this->translated_cases.clear();
62}
63
68StringData::StringData(size_t tabs) : tabs(tabs), max_strings(tabs * TAB_SIZE)
69{
70 this->strings.resize(max_strings);
71 this->next_string_id = 0;
72}
73
76{
77 for (size_t i = 0; i < this->max_strings; i++) {
78 LangString *ls = this->strings[i].get();
79 if (ls != nullptr) ls->FreeTranslation();
80 }
81}
82
88void StringData::Add(std::unique_ptr<LangString> ls)
89{
90 this->name_to_string[ls->name] = ls.get();
91 this->strings[ls->index].swap(ls);
92}
93
99LangString *StringData::Find(const std::string_view s)
100{
101 auto it = this->name_to_string.find(s);
102 if (it == this->name_to_string.end()) return nullptr;
103
104 return it->second;
105}
106
113uint StringData::VersionHashStr(uint hash, const char *s) const
114{
115 for (; *s != '\0'; s++) {
116 hash = std::rotl(hash, 3) ^ *s;
117 hash = (hash & 1 ? hash >> 1 ^ 0xDEADBEEF : hash >> 1);
118 }
119 return hash;
120}
121
127{
128 uint hash = 0;
129
130 for (size_t i = 0; i < this->max_strings; i++) {
131 const LangString *ls = this->strings[i].get();
132
133 if (ls != nullptr) {
134 const CmdStruct *cs;
135 const char *s;
136 std::string buf;
137 int argno;
138 int casei;
139
140 s = ls->name.c_str();
141 hash ^= i * 0x717239;
142 hash = (hash & 1 ? hash >> 1 ^ 0xDEADBEEF : hash >> 1);
143 hash = this->VersionHashStr(hash, s + 1);
144
145 s = ls->english.c_str();
146 while ((cs = ParseCommandString(&s, buf, &argno, &casei)) != nullptr) {
147 if (cs->flags.Test(CmdFlag::DontCount)) continue;
148
149 hash ^= (cs - _cmd_structs) * 0x1234567;
150 hash = (hash & 1 ? hash >> 1 ^ 0xF00BAA4 : hash >> 1);
151 }
152 }
153 }
154
155 return hash;
156}
157
162uint StringData::CountInUse(uint tab) const
163{
164 int i;
165 for (i = TAB_SIZE; --i >= 0;) if (this->strings[(tab * TAB_SIZE) + i] != nullptr) break;
166 return i + 1;
167}
168
169static const char *_cur_ident;
170
171/* Used when generating some advanced commands. */
172static ParsedCommandStruct _cur_pcs;
173static int _cur_argidx;
174
176struct Buffer : std::vector<uint8_t> {
181 void AppendByte(uint8_t value)
182 {
183 this->push_back(value);
184 }
185
190 void AppendUtf8(uint32_t value)
191 {
192 if (value < 0x80) {
193 this->push_back(value);
194 } else if (value < 0x800) {
195 this->push_back(0xC0 + GB(value, 6, 5));
196 this->push_back(0x80 + GB(value, 0, 6));
197 } else if (value < 0x10000) {
198 this->push_back(0xE0 + GB(value, 12, 4));
199 this->push_back(0x80 + GB(value, 6, 6));
200 this->push_back(0x80 + GB(value, 0, 6));
201 } else if (value < 0x110000) {
202 this->push_back(0xF0 + GB(value, 18, 3));
203 this->push_back(0x80 + GB(value, 12, 6));
204 this->push_back(0x80 + GB(value, 6, 6));
205 this->push_back(0x80 + GB(value, 0, 6));
206 } else {
207 StrgenWarning("Invalid unicode value U+0x{:X}", value);
208 }
209 }
210};
211
212size_t Utf8Validate(const char *s)
213{
214 uint32_t c;
215
216 if (!HasBit(s[0], 7)) {
217 /* 1 byte */
218 return 1;
219 } else if (GB(s[0], 5, 3) == 6 && IsUtf8Part(s[1])) {
220 /* 2 bytes */
221 c = GB(s[0], 0, 5) << 6 | GB(s[1], 0, 6);
222 if (c >= 0x80) return 2;
223 } else if (GB(s[0], 4, 4) == 14 && IsUtf8Part(s[1]) && IsUtf8Part(s[2])) {
224 /* 3 bytes */
225 c = GB(s[0], 0, 4) << 12 | GB(s[1], 0, 6) << 6 | GB(s[2], 0, 6);
226 if (c >= 0x800) return 3;
227 } else if (GB(s[0], 3, 5) == 30 && IsUtf8Part(s[1]) && IsUtf8Part(s[2]) && IsUtf8Part(s[3])) {
228 /* 4 bytes */
229 c = GB(s[0], 0, 3) << 18 | GB(s[1], 0, 6) << 12 | GB(s[2], 0, 6) << 6 | GB(s[3], 0, 6);
230 if (c >= 0x10000 && c <= 0x10FFFF) return 4;
231 }
232
233 return 0;
234}
235
236
237void EmitSingleChar(Buffer *buffer, char *buf, int value)
238{
239 if (*buf != '\0') StrgenWarning("Ignoring trailing letters in command");
240 buffer->AppendUtf8(value);
241}
242
243
244/* The plural specifier looks like
245 * {NUM} {PLURAL -1 passenger passengers} then it picks either passenger/passengers depending on the count in NUM */
246
247/* This is encoded like
248 * CommandByte <ARG#> <NUM> {Length of each string} {each string} */
249
250bool ParseRelNum(char **buf, int *value, int *offset)
251{
252 const char *s = *buf;
253 char *end;
254 bool rel = false;
255
256 while (*s == ' ' || *s == '\t') s++;
257 if (*s == '+') {
258 rel = true;
259 s++;
260 }
261 int v = std::strtol(s, &end, 0);
262 if (end == s) return false;
263 if (rel || v < 0) {
264 *value += v;
265 } else {
266 *value = v;
267 }
268 if (offset != nullptr && *end == ':') {
269 /* Take the Nth within */
270 s = end + 1;
271 *offset = std::strtol(s, &end, 0);
272 if (end == s) return false;
273 }
274 *buf = end;
275 return true;
276}
277
278/* Parse out the next word, or nullptr */
279char *ParseWord(char **buf)
280{
281 char *s = *buf, *r;
282
283 while (*s == ' ' || *s == '\t') s++;
284 if (*s == '\0') return nullptr;
285
286 if (*s == '"') {
287 r = ++s;
288 /* parse until next " or NUL */
289 for (;;) {
290 if (*s == '\0') break;
291 if (*s == '"') {
292 *s++ = '\0';
293 break;
294 }
295 s++;
296 }
297 } else {
298 /* proceed until whitespace or NUL */
299 r = s;
300 for (;;) {
301 if (*s == '\0') break;
302 if (*s == ' ' || *s == '\t') {
303 *s++ = '\0';
304 break;
305 }
306 s++;
307 }
308 }
309 *buf = s;
310 return r;
311}
312
313/* Forward declaration */
314static int TranslateArgumentIdx(int arg, int offset = 0);
315
316static void EmitWordList(Buffer *buffer, const std::vector<const char *> &words, uint nw)
317{
318 /* Maximum word length in bytes, excluding trailing NULL. */
319 constexpr uint MAX_WORD_LENGTH = UINT8_MAX - 2;
320
321 buffer->AppendByte(nw);
322 for (uint i = 0; i < nw; i++) {
323 size_t len = strlen(words[i]) + 1;
324 if (len >= UINT8_MAX) StrgenFatal("WordList {}/{} string '{}' too long, max bytes {}", i + 1, nw, words[i], MAX_WORD_LENGTH);
325 buffer->AppendByte(static_cast<uint8_t>(len));
326 }
327 for (uint i = 0; i < nw; i++) {
328 for (uint j = 0; words[i][j] != '\0'; j++) buffer->AppendByte(words[i][j]);
329 buffer->AppendByte(0);
330 }
331}
332
333void EmitPlural(Buffer *buffer, char *buf, int)
334{
335 int argidx = _cur_argidx;
336 int offset = -1;
338 std::vector<const char *> words(std::max(expected, MAX_PLURALS), nullptr);
339 int nw = 0;
340
341 /* Parse out the number, if one exists. Otherwise default to prev arg. */
342 if (!ParseRelNum(&buf, &argidx, &offset)) argidx--;
343
344 const CmdStruct *cmd = _cur_pcs.consuming_commands[argidx];
345 if (offset == -1) {
346 /* Use default offset */
347 if (cmd == nullptr || cmd->default_plural_offset < 0) {
348 StrgenFatal("Command '{}' has no (default) plural position", cmd == nullptr ? "<empty>" : cmd->cmd);
349 }
350 offset = cmd->default_plural_offset;
351 }
352
353 /* Parse each string */
354 for (nw = 0; nw < MAX_PLURALS; nw++) {
355 words[nw] = ParseWord(&buf);
356 if (words[nw] == nullptr) break;
357 }
358
359 if (nw == 0) {
360 StrgenFatal("{}: No plural words", _cur_ident);
361 }
362
363 if (expected != nw) {
364 if (_translated) {
365 StrgenFatal("{}: Invalid number of plural forms. Expecting {}, found {}.", _cur_ident,
366 expected, nw);
367 } else {
368 if ((_show_todo & 2) != 0) StrgenWarning("'{}' is untranslated. Tweaking english string to allow compilation for plural forms", _cur_ident);
369 if (nw > expected) {
370 nw = expected;
371 } else {
372 for (; nw < expected; nw++) {
373 words[nw] = words[nw - 1];
374 }
375 }
376 }
377 }
378
379 buffer->AppendUtf8(SCC_PLURAL_LIST);
381 buffer->AppendByte(TranslateArgumentIdx(argidx, offset));
382 EmitWordList(buffer, words, nw);
383}
384
385void EmitGender(Buffer *buffer, char *buf, int)
386{
387 int argidx = _cur_argidx;
388 int offset = 0;
389 uint nw;
390
391 if (buf[0] == '=') {
392 buf++;
393
394 /* This is a {G=DER} command */
395 nw = _lang.GetGenderIndex(buf);
396 if (nw >= MAX_NUM_GENDERS) StrgenFatal("G argument '{}' invalid", buf);
397
398 /* now nw contains the gender index */
399 buffer->AppendUtf8(SCC_GENDER_INDEX);
400 buffer->AppendByte(nw);
401 } else {
402 std::vector<const char *> words(MAX_NUM_GENDERS, nullptr);
403
404 /* This is a {G 0 foo bar two} command.
405 * If no relative number exists, default to +0 */
406 ParseRelNum(&buf, &argidx, &offset);
407
408 const CmdStruct *cmd = _cur_pcs.consuming_commands[argidx];
409 if (cmd == nullptr || !cmd->flags.Test(CmdFlag::Gender)) {
410 StrgenFatal("Command '{}' can't have a gender", cmd == nullptr ? "<empty>" : cmd->cmd);
411 }
412
413 for (nw = 0; nw < MAX_NUM_GENDERS; nw++) {
414 words[nw] = ParseWord(&buf);
415 if (words[nw] == nullptr) break;
416 }
417 if (nw != _lang.num_genders) StrgenFatal("Bad # of arguments for gender command");
418
419 assert(IsInsideBS(cmd->value, SCC_CONTROL_START, UINT8_MAX));
420 buffer->AppendUtf8(SCC_GENDER_LIST);
421 buffer->AppendByte(TranslateArgumentIdx(argidx, offset));
422 EmitWordList(buffer, words, nw);
423 }
424}
425
426static const CmdStruct *FindCmd(const char *s, int len)
427{
428 for (const auto &cs : _cmd_structs) {
429 if (strncmp(cs.cmd, s, len) == 0 && cs.cmd[len] == '\0') return &cs;
430 }
431 return nullptr;
432}
433
434static uint ResolveCaseName(const char *str, size_t len)
435{
436 /* First get a clean copy of only the case name, then resolve it. */
437 char case_str[CASE_GENDER_LEN];
438 len = std::min(lengthof(case_str) - 1, len);
439 memcpy(case_str, str, len);
440 case_str[len] = '\0';
441
442 uint8_t case_idx = _lang.GetCaseIndex(case_str);
443 if (case_idx >= MAX_NUM_CASES) StrgenFatal("Invalid case-name '{}'", case_str);
444 return case_idx + 1;
445}
446
447
448/* returns nullptr on eof
449 * else returns command struct */
450static const CmdStruct *ParseCommandString(const char **str, std::string &param, int *argno, int *casei)
451{
452 const char *s = *str, *start;
453 char c;
454
455 *argno = -1;
456 *casei = -1;
457
458 /* Scan to the next command, exit if there's no next command. */
459 for (; *s != '{'; s++) {
460 if (*s == '\0') return nullptr;
461 }
462 s++; // Skip past the {
463
464 if (*s >= '0' && *s <= '9') {
465 char *end;
466
467 *argno = std::strtoul(s, &end, 0);
468 if (*end != ':') StrgenFatal("missing arg #");
469 s = end + 1;
470 }
471
472 /* parse command name */
473 start = s;
474 do {
475 c = *s++;
476 } while (c != '}' && c != ' ' && c != '=' && c != '.' && c != 0);
477
478 const CmdStruct *cmd = FindCmd(start, s - start - 1);
479 if (cmd == nullptr) {
480 std::string command(start, s - start - 1);
481 StrgenError("Undefined command '{}'", command);
482 return nullptr;
483 }
484
485 if (c == '.') {
486 const char *casep = s;
487
488 if (!cmd->flags.Test(CmdFlag::Case)) {
489 StrgenFatal("Command '{}' can't have a case", cmd->cmd);
490 }
491
492 do {
493 c = *s++;
494 } while (c != '}' && c != ' ' && c != '\0');
495 *casei = ResolveCaseName(casep, s - casep - 1);
496 }
497
498 if (c == '\0') {
499 StrgenError("Missing }} from command '{}'", start);
500 return nullptr;
501 }
502
503
504 if (c != '}') {
505 if (c == '=') s--;
506 /* copy params */
507 start = s;
508 for (;;) {
509 c = *s++;
510 if (c == '}') break;
511 if (c == '\0') {
512 StrgenError("Missing }} from command '{}'", start);
513 return nullptr;
514 }
515 param += c;
516 }
517 }
518
519 *str = s;
520
521 return cmd;
522}
523
531StringReader::StringReader(StringData &data, const std::string &file, bool master, bool translation) :
532 data(data), file(file), master(master), translation(translation)
533{
534}
535
536ParsedCommandStruct ExtractCommandString(const char *s, bool)
537{
538 int argno;
539 int argidx = 0;
540 int casei;
541
543
544 for (;;) {
545 /* read until next command from a. */
546 std::string param;
547 const CmdStruct *ar = ParseCommandString(&s, param, &argno, &casei);
548
549 if (ar == nullptr) break;
550
551 /* Sanity checking */
552 if (argno != -1 && ar->consumes == 0) StrgenFatal("Non consumer param can't have a paramindex");
553
554 if (ar->consumes) {
555 if (argno != -1) argidx = argno;
556 if (argidx < 0 || (uint)argidx >= p.consuming_commands.max_size()) StrgenFatal("invalid param idx {}", argidx);
557 if (p.consuming_commands[argidx] != nullptr && p.consuming_commands[argidx] != ar) StrgenFatal("duplicate param idx {}", argidx);
558
559 p.consuming_commands[argidx++] = ar;
560 } else if (!ar->flags.Test(CmdFlag::DontCount)) { // Ignore some of them
561 p.non_consuming_commands.emplace_back(CmdPair{ar, std::move(param)});
562 }
563 }
564
565 return p;
566}
567
568
569const CmdStruct *TranslateCmdForCompare(const CmdStruct *a)
570{
571 if (a == nullptr) return nullptr;
572
573 if (strcmp(a->cmd, "STRING1") == 0 ||
574 strcmp(a->cmd, "STRING2") == 0 ||
575 strcmp(a->cmd, "STRING3") == 0 ||
576 strcmp(a->cmd, "STRING4") == 0 ||
577 strcmp(a->cmd, "STRING5") == 0 ||
578 strcmp(a->cmd, "STRING6") == 0 ||
579 strcmp(a->cmd, "STRING7") == 0 ||
580 strcmp(a->cmd, "RAW_STRING") == 0) {
581 return FindCmd("STRING", 6);
582 }
583
584 return a;
585}
586
587
588static bool CheckCommandsMatch(const char *a, const char *b, const char *name)
589{
590 /* If we're not translating, i.e. we're compiling the base language,
591 * it is pointless to do all these checks as it'll always be correct.
592 * After all, all checks are based on the base language.
593 */
594 if (!_translation) return true;
595
596 bool result = true;
597
598 ParsedCommandStruct templ = ExtractCommandString(b, true);
599 ParsedCommandStruct lang = ExtractCommandString(a, true);
600
601 /* For each string in templ, see if we find it in lang */
602 if (templ.non_consuming_commands.max_size() != lang.non_consuming_commands.max_size()) {
603 StrgenWarning("{}: template string and language string have a different # of commands", name);
604 result = false;
605 }
606
607 for (auto &templ_nc : templ.non_consuming_commands) {
608 /* see if we find it in lang, and zero it out */
609 bool found = false;
610 for (auto &lang_nc : lang.non_consuming_commands) {
611 if (templ_nc.cmd == lang_nc.cmd && templ_nc.param == lang_nc.param) {
612 /* it was found in both. zero it out from lang so we don't find it again */
613 lang_nc.cmd = nullptr;
614 found = true;
615 break;
616 }
617 }
618
619 if (!found) {
620 StrgenWarning("{}: command '{}' exists in template file but not in language file", name, templ_nc.cmd->cmd);
621 result = false;
622 }
623 }
624
625 /* if we reach here, all non consumer commands match up.
626 * Check if the non consumer commands match up also. */
627 for (uint i = 0; i < templ.consuming_commands.max_size(); i++) {
628 if (TranslateCmdForCompare(templ.consuming_commands[i]) != lang.consuming_commands[i]) {
629 StrgenWarning("{}: Param idx #{} '{}' doesn't match with template command '{}'", name, i,
630 lang.consuming_commands[i] == nullptr ? "<empty>" : TranslateCmdForCompare(lang.consuming_commands[i])->cmd,
631 templ.consuming_commands[i] == nullptr ? "<empty>" : templ.consuming_commands[i]->cmd);
632 result = false;
633 }
634 }
635
636 return result;
637}
638
639void StringReader::HandleString(char *str)
640{
641 if (*str == '#') {
642 if (str[1] == '#' && str[2] != '#') this->HandlePragma(str + 2);
643 return;
644 }
645
646 /* Ignore comments & blank lines */
647 if (*str == ';' || *str == ' ' || *str == '\0') return;
648
649 char *s = strchr(str, ':');
650 if (s == nullptr) {
651 StrgenError("Line has no ':' delimiter");
652 return;
653 }
654
655 char *t;
656 /* Trim spaces.
657 * After this str points to the command name, and s points to the command contents */
658 for (t = s; t > str && (t[-1] == ' ' || t[-1] == '\t'); t--) {}
659 *t = 0;
660 s++;
661
662 /* Check string is valid UTF-8 */
663 const char *tmp;
664 for (tmp = s; *tmp != '\0';) {
665 size_t len = Utf8Validate(tmp);
666 if (len == 0) StrgenFatal("Invalid UTF-8 sequence in '{}'", s);
667
668 char32_t c;
669 Utf8Decode(&c, tmp);
670 if (c <= 0x001F || // ASCII control character range
671 c == 0x200B || // Zero width space
672 (c >= 0xE000 && c <= 0xF8FF) || // Private range
673 (c >= 0xFFF0 && c <= 0xFFFF)) { // Specials range
674 StrgenFatal("Unwanted UTF-8 character U+{:04X} in sequence '{}'", (int)c, s);
675 }
676
677 tmp += len;
678 }
679
680 /* Check if the string has a case..
681 * The syntax for cases is IDENTNAME.case */
682 char *casep = strchr(str, '.');
683 if (casep != nullptr) *casep++ = '\0';
684
685 /* Check if this string already exists.. */
686 LangString *ent = this->data.Find(str);
687
688 if (this->master) {
689 if (casep != nullptr) {
690 StrgenError("Cases in the base translation are not supported.");
691 return;
692 }
693
694 if (ent != nullptr) {
695 StrgenError("String name '{}' is used multiple times", str);
696 return;
697 }
698
699 if (this->data.strings[this->data.next_string_id] != nullptr) {
700 StrgenError("String ID 0x{:X} for '{}' already in use by '{}'", this->data.next_string_id, str, this->data.strings[this->data.next_string_id]->name);
701 return;
702 }
703
704 /* Allocate a new LangString */
705 this->data.Add(std::make_unique<LangString>(str, s, this->data.next_string_id++, _cur_line));
706 } else {
707 if (ent == nullptr) {
708 StrgenWarning("String name '{}' does not exist in master file", str);
709 return;
710 }
711
712 if (!ent->translated.empty() && casep == nullptr) {
713 StrgenError("String name '{}' is used multiple times", str);
714 return;
715 }
716
717 /* make sure that the commands match */
718 if (!CheckCommandsMatch(s, ent->english.c_str(), str)) return;
719
720 if (casep != nullptr) {
721 ent->translated_cases.emplace_back(ResolveCaseName(casep, strlen(casep)), s);
722 } else {
723 ent->translated = s;
724 /* If the string was translated, use the line from the
725 * translated language so errors in the translated file
726 * are properly referenced to. */
727 ent->line = _cur_line;
728 }
729 }
730}
731
733{
734 if (!memcmp(str, "plural ", 7)) {
735 _lang.plural_form = atoi(str + 7);
737 StrgenFatal("Invalid pluralform {}", _lang.plural_form);
738 }
739 } else {
740 StrgenFatal("unknown pragma '{}'", str);
741 }
742}
743
744static void StripTrailingWhitespace(std::string &str)
745{
746 str.erase(str.find_last_not_of("\r\n ") + 1);
747}
748
750{
751 _warnings = _errors = 0;
752
754 _file = this->file.c_str();
755
756 /* Abusing _show_todo to replace "warning" with "info" for translations. */
757 _show_todo &= 3;
758 if (!this->translation) _show_todo |= 4;
759
760 /* For each new file we parse, reset the genders, and language codes. */
761 MemSetT(&_lang, 0);
765
766 _cur_line = 1;
767 while (this->data.next_string_id < this->data.max_strings) {
768 std::optional<std::string> line = this->ReadLine();
769 if (!line.has_value()) return;
770
771 StripTrailingWhitespace(line.value());
772 this->HandleString(line.value().data());
773 _cur_line++;
774 }
775
776 if (this->data.next_string_id == this->data.max_strings) {
777 StrgenError("Too many strings, maximum allowed is {}", this->data.max_strings);
778 }
779}
780
786{
787 int last = 0;
788 for (size_t i = 0; i < data.max_strings; i++) {
789 if (data.strings[i] != nullptr) {
790 this->WriteStringID(data.strings[i]->name, (int)i);
791 last = (int)i;
792 }
793 }
794
795 this->WriteStringID("STR_LAST_STRINGID", last);
796}
797
798static int TranslateArgumentIdx(int argidx, int offset)
799{
800 int sum;
801
802 if (argidx < 0 || (uint)argidx >= _cur_pcs.consuming_commands.max_size()) {
803 StrgenFatal("invalid argidx {}", argidx);
804 }
805 const CmdStruct *cs = _cur_pcs.consuming_commands[argidx];
806 if (cs != nullptr && cs->consumes <= offset) {
807 StrgenFatal("invalid argidx offset {}:{}", argidx, offset);
808 }
809
810 if (_cur_pcs.consuming_commands[argidx] == nullptr) {
811 StrgenFatal("no command for this argidx {}", argidx);
812 }
813
814 for (int i = sum = 0; i < argidx; i++) {
815 cs = _cur_pcs.consuming_commands[i];
816
817 sum += (cs != nullptr) ? cs->consumes : 1;
818 }
819
820 return sum + offset;
821}
822
823static void PutArgidxCommand(Buffer *buffer)
824{
825 buffer->AppendUtf8(SCC_ARG_INDEX);
826 buffer->AppendByte(TranslateArgumentIdx(_cur_argidx));
827}
828
829
830static void PutCommandString(Buffer *buffer, const char *str)
831{
832 _cur_argidx = 0;
833
834 while (*str != '\0') {
835 /* Process characters as they are until we encounter a { */
836 if (*str != '{') {
837 buffer->AppendByte(*str++);
838 continue;
839 }
840
841 std::string param;
842 int argno;
843 int casei;
844 const CmdStruct *cs = ParseCommandString(&str, param, &argno, &casei);
845 if (cs == nullptr) break;
846
847 if (casei != -1) {
848 buffer->AppendUtf8(SCC_SET_CASE); // {SET_CASE}
849 buffer->AppendByte(casei);
850 }
851
852 /* For params that consume values, we need to handle the argindex properly */
853 if (cs->consumes > 0) {
854 /* Check if we need to output a move-param command */
855 if (argno != -1 && argno != _cur_argidx) {
856 _cur_argidx = argno;
857 PutArgidxCommand(buffer);
858 }
859
860 /* Output the one from the master string... it's always accurate. */
861 cs = _cur_pcs.consuming_commands[_cur_argidx++];
862 if (cs == nullptr) {
863 StrgenFatal("{}: No argument exists at position {}", _cur_ident, _cur_argidx - 1);
864 }
865 }
866
867 cs->proc(buffer, param.data(), cs->value);
868 }
869}
870
876{
877 char buffer[2];
878 int offs = 0;
879 if (length >= 0x4000) {
880 StrgenFatal("string too long");
881 }
882
883 if (length >= 0xC0) {
884 buffer[offs++] = (length >> 8) | 0xC0;
885 }
886 buffer[offs++] = length & 0xFF;
887 this->Write((uint8_t*)buffer, offs);
888}
889
895{
896 std::vector<uint> in_use;
897 for (size_t tab = 0; tab < data.tabs; tab++) {
898 uint n = data.CountInUse((uint)tab);
899
900 in_use.push_back(n);
901 _lang.offsets[tab] = TO_LE16(n);
902
903 for (uint j = 0; j != in_use[tab]; j++) {
904 const LangString *ls = data.strings[(tab * TAB_SIZE) + j].get();
905 if (ls != nullptr && ls->translated.empty()) _lang.missing++;
906 }
907 }
908
910 _lang.version = TO_LE32(data.Version());
911 _lang.missing = TO_LE16(_lang.missing);
912 _lang.winlangid = TO_LE16(_lang.winlangid);
913
914 this->WriteHeader(&_lang);
915 Buffer buffer;
916
917 for (size_t tab = 0; tab < data.tabs; tab++) {
918 for (uint j = 0; j != in_use[tab]; j++) {
919 const LangString *ls = data.strings[(tab * TAB_SIZE) + j].get();
920 const std::string *cmdp;
921
922 /* For undefined strings, just set that it's an empty string */
923 if (ls == nullptr) {
924 this->WriteLength(0);
925 continue;
926 }
927
928 _cur_ident = ls->name.c_str();
929 _cur_line = ls->line;
930
931 /* Produce a message if a string doesn't have a translation. */
932 if (_show_todo > 0 && ls->translated.empty()) {
933 if ((_show_todo & 2) != 0) {
934 StrgenWarning("'{}' is untranslated", ls->name);
935 }
936 if ((_show_todo & 1) != 0) {
937 const char *s = "<TODO> ";
938 while (*s != '\0') buffer.AppendByte(*s++);
939 }
940 }
941
942 /* Extract the strings and stuff from the english command string */
943 _cur_pcs = ExtractCommandString(ls->english.c_str(), false);
944
945 if (!ls->translated_cases.empty() || !ls->translated.empty()) {
946 cmdp = &ls->translated;
947 } else {
948 cmdp = &ls->english;
949 }
950
951 _translated = cmdp != &ls->english;
952
953 if (!ls->translated_cases.empty()) {
954 /* Need to output a case-switch.
955 * It has this format
956 * <0x9E> <NUM CASES> <CASE1> <LEN1> <STRING1> <CASE2> <LEN2> <STRING2> <CASE3> <LEN3> <STRING3> <STRINGDEFAULT>
957 * Each LEN is printed using 2 bytes in big endian order. */
958 buffer.AppendUtf8(SCC_SWITCH_CASE);
959 buffer.AppendByte((uint8_t)ls->translated_cases.size());
960
961 /* Write each case */
962 for (const Case &c : ls->translated_cases) {
963 buffer.AppendByte(c.caseidx);
964 /* Make some space for the 16-bit length */
965 uint pos = (uint)buffer.size();
966 buffer.AppendByte(0);
967 buffer.AppendByte(0);
968 /* Write string */
969 PutCommandString(&buffer, c.string.c_str());
970 buffer.AppendByte(0); // terminate with a zero
971 /* Fill in the length */
972 uint size = (uint)buffer.size() - (pos + 2);
973 buffer[pos + 0] = GB(size, 8, 8);
974 buffer[pos + 1] = GB(size, 0, 8);
975 }
976 }
977
978 if (!cmdp->empty()) PutCommandString(&buffer, cmdp->c_str());
979
980 this->WriteLength((uint)buffer.size());
981 this->Write(buffer.data(), buffer.size());
982 buffer.clear();
983 }
984 }
985}
debug_inline constexpr bool HasBit(const T x, const uint8_t y)
Checks if a bit in a value is set.
debug_inline static constexpr uint GB(const T x, const uint8_t s, const uint8_t n)
Fetch n bits from x, started at bit s.
constexpr bool Test(Tvalue_type value) const
Test if the value-th bit is set.
static const uint8_t MAX_NUM_GENDERS
Maximum number of supported genders.
Definition language.h:20
static const uint8_t CASE_GENDER_LEN
The (maximum) length of a case/gender string.
Definition language.h:19
static const uint8_t MAX_NUM_CASES
Maximum number of supported cases.
Definition language.h:21
constexpr bool IsInsideBS(const T x, const size_t base, const size_t size)
Checks if a value is between a window started at some base point.
void MemSetT(T *ptr, uint8_t value, size_t num=1)
Type-safe version of memset().
Definition mem_func.hpp:49
#define lengthof(array)
Return the length of an fixed size array.
Definition stdafx.h:277
Structures related to strgen.
LanguagePackHeader _lang
Header information about a language.
int _cur_line
The current line we're parsing in the input file.
const char * _file
The filename of the input, so we can refer to it in errors/warnings.
LanguagePackHeader _lang
Header information about a language.
static bool _translated
Whether the current language is not the master language.
int _cur_line
The current line we're parsing in the input file.
static bool _translation
Is the current file actually a translation or not.
const char * _file
The filename of the input, so we can refer to it in errors/warnings.
static const int MAX_PLURALS
The maximum number of plurals.
static const PluralForm _plural_forms[]
All plural forms used.
@ Gender
These commands support genders.
@ Case
These commands support cases.
@ DontCount
These commands aren't counted for comparison.
void strecpy(std::span< char > dst, std::string_view src)
Copies characters from one buffer to another.
Definition string.cpp:59
size_t Utf8Decode(char32_t *c, const char *s)
Decode and consume the next UTF-8 encoded character.
Definition string.cpp:437
static const uint TAB_SIZE
Number of strings per StringTab.
The buffer for writing a single string.
void AppendUtf8(uint32_t value)
Add an Unicode character encoded in UTF-8 to the buffer.
void AppendByte(uint8_t value)
Convenience method for adding a byte.
Container for the different cases of a string.
Definition strgen.h:20
Case(int caseidx, const std::string &string)
Create a new case.
virtual void WriteStringID(const std::string &name, int stringid)=0
Write the string ID.
void WriteHeader(const StringData &data)
Write the header information.
Information about a single string.
Definition strgen.h:28
int line
Line of string in source-file.
Definition strgen.h:33
LangString(const std::string &name, const std::string &english, size_t index, int line)
Create a new string.
std::string english
English text.
Definition strgen.h:30
std::vector< Case > translated_cases
Cases of the translation.
Definition strgen.h:34
std::string translated
Translated text.
Definition strgen.h:31
void FreeTranslation()
Free all data related to the translation.
std::string name
Name of the string.
Definition strgen.h:29
Header of a language file.
Definition language.h:24
uint8_t plural_form
plural form index
Definition language.h:41
uint32_t version
32-bits of auto generated version info which is basically a hash of strings.h
Definition language.h:28
uint8_t GetGenderIndex(const char *gender_str) const
Get the index for the given gender.
Definition language.h:68
uint16_t offsets[TEXT_TAB_END]
the offsets
Definition language.h:32
uint16_t winlangid
Windows language ID: Windows cannot and will not convert isocodes to something it can use to determin...
Definition language.h:51
uint8_t num_genders
the number of genders of this language
Definition language.h:53
uint8_t GetCaseIndex(const char *case_str) const
Get the index for the given case.
Definition language.h:81
uint16_t missing
number of missing strings.
Definition language.h:40
char digit_group_separator[8]
Thousand separator used for anything not currencies.
Definition language.h:35
uint32_t ident
32-bits identifier
Definition language.h:27
char digit_decimal_separator[8]
Decimal separator.
Definition language.h:39
char digit_group_separator_currency[8]
Thousand separator used for currencies.
Definition language.h:37
static const uint32_t IDENT
Identifier for OpenTTD language files, big endian for "LANG".
Definition language.h:25
virtual void WriteHeader(const LanguagePackHeader *header)=0
Write the header metadata.
virtual void WriteLength(uint length)
Write the length as a simple gamma.
virtual void Write(const uint8_t *buffer, size_t length)=0
Write a number of bytes.
virtual void WriteLang(const StringData &data)
Actually write the language.
int plural_count
The number of plural forms.
Information about the currently known strings.
Definition strgen.h:41
size_t tabs
The number of 'tabs' of strings.
Definition strgen.h:44
uint CountInUse(uint tab) const
Count the number of tab elements that are in use.
uint VersionHashStr(uint hash, const char *s) const
Create a compound hash.
std::vector< std::unique_ptr< LangString > > strings
List of all known strings.
Definition strgen.h:42
size_t max_strings
The maximum number of strings.
Definition strgen.h:45
void Add(std::unique_ptr< LangString > ls)
Add a newly created LangString.
size_t next_string_id
The next string ID to allocate.
Definition strgen.h:46
uint Version() const
Make a hash of the file to get a unique "version number".
LangString * Find(const std::string_view s)
Find a LangString based on the string name.
void FreeTranslation()
Free all data related to the translation.
StringData(size_t tabs)
Create a new string data container.
std::unordered_map< std::string_view, LangString * > name_to_string
Lookup table for the strings.
Definition strgen.h:43
const std::string file
The file we are reading.
Definition strgen.h:60
StringReader(StringData &data, const std::string &file, bool master, bool translation)
Prepare reading.
StringData & data
The data to fill during reading.
Definition strgen.h:59
virtual void ParseFile()
Start parsing the file.
bool translation
Are we reading a translation, implies !master. However, the base translation will have this false.
Definition strgen.h:62
virtual std::optional< std::string > ReadLine()=0
Read a single line from the source of strings.
virtual void HandlePragma(char *str)
Handle the pragma of the file.
bool master
Are we reading the master file?
Definition strgen.h:61