OpenTTD Source 20241224-master-gf74b0cf984
strgen_base.cpp
Go to the documentation of this file.
1/*
2 * This file is part of OpenTTD.
3 * OpenTTD is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, version 2.
4 * OpenTTD is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
5 * See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with OpenTTD. If not, see <http://www.gnu.org/licenses/>.
6 */
7
10#include "../stdafx.h"
11#include "../core/alloc_func.hpp"
12#include "../core/endian_func.hpp"
13#include "../core/mem_func.hpp"
14#include "../error_func.h"
15#include "../string_func.h"
16#include "../table/control_codes.h"
17
18#include "strgen.h"
19
20
21#include "../table/strgen_tables.h"
22
23#include "../safeguards.h"
24
25/* Compiles a list of strings into a compiled string list */
26
27static bool _translated;
28static bool _translation;
29const char *_file = "(unknown file)";
31int _errors, _warnings, _show_todo;
33
34static const CmdStruct *ParseCommandString(const char **str, std::string &param, int *argno, int *casei);
35
41Case::Case(int caseidx, const std::string &string) :
42 caseidx(caseidx), string(string)
43{
44}
45
53LangString::LangString(const std::string &name, const std::string &english, size_t index, int line) :
54 name(name), english(english), index(index), line(line)
55{
56}
57
60{
61 this->translated.clear();
62 this->translated_cases.clear();
63}
64
69StringData::StringData(size_t tabs) : tabs(tabs), max_strings(tabs * TAB_SIZE)
70{
71 this->strings.resize(max_strings);
72 this->next_string_id = 0;
73}
74
77{
78 for (size_t i = 0; i < this->max_strings; i++) {
79 LangString *ls = this->strings[i].get();
80 if (ls != nullptr) ls->FreeTranslation();
81 }
82}
83
89void StringData::Add(std::unique_ptr<LangString> ls)
90{
91 this->name_to_string[ls->name] = ls.get();
92 this->strings[ls->index].swap(ls);
93}
94
100LangString *StringData::Find(const std::string_view s)
101{
102 auto it = this->name_to_string.find(s);
103 if (it == this->name_to_string.end()) return nullptr;
104
105 return it->second;
106}
107
114uint StringData::VersionHashStr(uint hash, const char *s) const
115{
116 for (; *s != '\0'; s++) {
117 hash = std::rotl(hash, 3) ^ *s;
118 hash = (hash & 1 ? hash >> 1 ^ 0xDEADBEEF : hash >> 1);
119 }
120 return hash;
121}
122
128{
129 uint hash = 0;
130
131 for (size_t i = 0; i < this->max_strings; i++) {
132 const LangString *ls = this->strings[i].get();
133
134 if (ls != nullptr) {
135 const CmdStruct *cs;
136 const char *s;
137 std::string buf;
138 int argno;
139 int casei;
140
141 s = ls->name.c_str();
142 hash ^= i * 0x717239;
143 hash = (hash & 1 ? hash >> 1 ^ 0xDEADBEEF : hash >> 1);
144 hash = this->VersionHashStr(hash, s + 1);
145
146 s = ls->english.c_str();
147 while ((cs = ParseCommandString(&s, buf, &argno, &casei)) != nullptr) {
148 if (cs->flags & C_DONTCOUNT) continue;
149
150 hash ^= (cs - _cmd_structs) * 0x1234567;
151 hash = (hash & 1 ? hash >> 1 ^ 0xF00BAA4 : hash >> 1);
152 }
153 }
154 }
155
156 return hash;
157}
158
163uint StringData::CountInUse(uint tab) const
164{
165 int i;
166 for (i = TAB_SIZE; --i >= 0;) if (this->strings[(tab * TAB_SIZE) + i] != nullptr) break;
167 return i + 1;
168}
169
170static const char *_cur_ident;
171
172/* Used when generating some advanced commands. */
173static ParsedCommandStruct _cur_pcs;
174static int _cur_argidx;
175
177struct Buffer : std::vector<uint8_t> {
182 void AppendByte(uint8_t value)
183 {
184 this->push_back(value);
185 }
186
191 void AppendUtf8(uint32_t value)
192 {
193 if (value < 0x80) {
194 this->push_back(value);
195 } else if (value < 0x800) {
196 this->push_back(0xC0 + GB(value, 6, 5));
197 this->push_back(0x80 + GB(value, 0, 6));
198 } else if (value < 0x10000) {
199 this->push_back(0xE0 + GB(value, 12, 4));
200 this->push_back(0x80 + GB(value, 6, 6));
201 this->push_back(0x80 + GB(value, 0, 6));
202 } else if (value < 0x110000) {
203 this->push_back(0xF0 + GB(value, 18, 3));
204 this->push_back(0x80 + GB(value, 12, 6));
205 this->push_back(0x80 + GB(value, 6, 6));
206 this->push_back(0x80 + GB(value, 0, 6));
207 } else {
208 StrgenWarning("Invalid unicode value U+0x{:X}", value);
209 }
210 }
211};
212
213size_t Utf8Validate(const char *s)
214{
215 uint32_t c;
216
217 if (!HasBit(s[0], 7)) {
218 /* 1 byte */
219 return 1;
220 } else if (GB(s[0], 5, 3) == 6 && IsUtf8Part(s[1])) {
221 /* 2 bytes */
222 c = GB(s[0], 0, 5) << 6 | GB(s[1], 0, 6);
223 if (c >= 0x80) return 2;
224 } else if (GB(s[0], 4, 4) == 14 && IsUtf8Part(s[1]) && IsUtf8Part(s[2])) {
225 /* 3 bytes */
226 c = GB(s[0], 0, 4) << 12 | GB(s[1], 0, 6) << 6 | GB(s[2], 0, 6);
227 if (c >= 0x800) return 3;
228 } else if (GB(s[0], 3, 5) == 30 && IsUtf8Part(s[1]) && IsUtf8Part(s[2]) && IsUtf8Part(s[3])) {
229 /* 4 bytes */
230 c = GB(s[0], 0, 3) << 18 | GB(s[1], 0, 6) << 12 | GB(s[2], 0, 6) << 6 | GB(s[3], 0, 6);
231 if (c >= 0x10000 && c <= 0x10FFFF) return 4;
232 }
233
234 return 0;
235}
236
237
238void EmitSingleChar(Buffer *buffer, char *buf, int value)
239{
240 if (*buf != '\0') StrgenWarning("Ignoring trailing letters in command");
241 buffer->AppendUtf8(value);
242}
243
244
245/* The plural specifier looks like
246 * {NUM} {PLURAL -1 passenger passengers} then it picks either passenger/passengers depending on the count in NUM */
247
248/* This is encoded like
249 * CommandByte <ARG#> <NUM> {Length of each string} {each string} */
250
251bool ParseRelNum(char **buf, int *value, int *offset)
252{
253 const char *s = *buf;
254 char *end;
255 bool rel = false;
256
257 while (*s == ' ' || *s == '\t') s++;
258 if (*s == '+') {
259 rel = true;
260 s++;
261 }
262 int v = std::strtol(s, &end, 0);
263 if (end == s) return false;
264 if (rel || v < 0) {
265 *value += v;
266 } else {
267 *value = v;
268 }
269 if (offset != nullptr && *end == ':') {
270 /* Take the Nth within */
271 s = end + 1;
272 *offset = std::strtol(s, &end, 0);
273 if (end == s) return false;
274 }
275 *buf = end;
276 return true;
277}
278
279/* Parse out the next word, or nullptr */
280char *ParseWord(char **buf)
281{
282 char *s = *buf, *r;
283
284 while (*s == ' ' || *s == '\t') s++;
285 if (*s == '\0') return nullptr;
286
287 if (*s == '"') {
288 r = ++s;
289 /* parse until next " or NUL */
290 for (;;) {
291 if (*s == '\0') break;
292 if (*s == '"') {
293 *s++ = '\0';
294 break;
295 }
296 s++;
297 }
298 } else {
299 /* proceed until whitespace or NUL */
300 r = s;
301 for (;;) {
302 if (*s == '\0') break;
303 if (*s == ' ' || *s == '\t') {
304 *s++ = '\0';
305 break;
306 }
307 s++;
308 }
309 }
310 *buf = s;
311 return r;
312}
313
314/* Forward declaration */
315static int TranslateArgumentIdx(int arg, int offset = 0);
316
317static void EmitWordList(Buffer *buffer, const std::vector<const char *> &words, uint nw)
318{
319 /* Maximum word length in bytes, excluding trailing NULL. */
320 constexpr uint MAX_WORD_LENGTH = UINT8_MAX - 2;
321
322 buffer->AppendByte(nw);
323 for (uint i = 0; i < nw; i++) {
324 size_t len = strlen(words[i]) + 1;
325 if (len >= UINT8_MAX) StrgenFatal("WordList {}/{} string '{}' too long, max bytes {}", i + 1, nw, words[i], MAX_WORD_LENGTH);
326 buffer->AppendByte(static_cast<uint8_t>(len));
327 }
328 for (uint i = 0; i < nw; i++) {
329 for (uint j = 0; words[i][j] != '\0'; j++) buffer->AppendByte(words[i][j]);
330 buffer->AppendByte(0);
331 }
332}
333
334void EmitPlural(Buffer *buffer, char *buf, int)
335{
336 int argidx = _cur_argidx;
337 int offset = -1;
339 std::vector<const char *> words(std::max(expected, MAX_PLURALS), nullptr);
340 int nw = 0;
341
342 /* Parse out the number, if one exists. Otherwise default to prev arg. */
343 if (!ParseRelNum(&buf, &argidx, &offset)) argidx--;
344
345 const CmdStruct *cmd = _cur_pcs.consuming_commands[argidx];
346 if (offset == -1) {
347 /* Use default offset */
348 if (cmd == nullptr || cmd->default_plural_offset < 0) {
349 StrgenFatal("Command '{}' has no (default) plural position", cmd == nullptr ? "<empty>" : cmd->cmd);
350 }
351 offset = cmd->default_plural_offset;
352 }
353
354 /* Parse each string */
355 for (nw = 0; nw < MAX_PLURALS; nw++) {
356 words[nw] = ParseWord(&buf);
357 if (words[nw] == nullptr) break;
358 }
359
360 if (nw == 0) {
361 StrgenFatal("{}: No plural words", _cur_ident);
362 }
363
364 if (expected != nw) {
365 if (_translated) {
366 StrgenFatal("{}: Invalid number of plural forms. Expecting {}, found {}.", _cur_ident,
367 expected, nw);
368 } else {
369 if ((_show_todo & 2) != 0) StrgenWarning("'{}' is untranslated. Tweaking english string to allow compilation for plural forms", _cur_ident);
370 if (nw > expected) {
371 nw = expected;
372 } else {
373 for (; nw < expected; nw++) {
374 words[nw] = words[nw - 1];
375 }
376 }
377 }
378 }
379
380 buffer->AppendUtf8(SCC_PLURAL_LIST);
382 buffer->AppendByte(TranslateArgumentIdx(argidx, offset));
383 EmitWordList(buffer, words, nw);
384}
385
386void EmitGender(Buffer *buffer, char *buf, int)
387{
388 int argidx = _cur_argidx;
389 int offset = 0;
390 uint nw;
391
392 if (buf[0] == '=') {
393 buf++;
394
395 /* This is a {G=DER} command */
396 nw = _lang.GetGenderIndex(buf);
397 if (nw >= MAX_NUM_GENDERS) StrgenFatal("G argument '{}' invalid", buf);
398
399 /* now nw contains the gender index */
400 buffer->AppendUtf8(SCC_GENDER_INDEX);
401 buffer->AppendByte(nw);
402 } else {
403 std::vector<const char *> words(MAX_NUM_GENDERS, nullptr);
404
405 /* This is a {G 0 foo bar two} command.
406 * If no relative number exists, default to +0 */
407 ParseRelNum(&buf, &argidx, &offset);
408
409 const CmdStruct *cmd = _cur_pcs.consuming_commands[argidx];
410 if (cmd == nullptr || (cmd->flags & C_GENDER) == 0) {
411 StrgenFatal("Command '{}' can't have a gender", cmd == nullptr ? "<empty>" : cmd->cmd);
412 }
413
414 for (nw = 0; nw < MAX_NUM_GENDERS; nw++) {
415 words[nw] = ParseWord(&buf);
416 if (words[nw] == nullptr) break;
417 }
418 if (nw != _lang.num_genders) StrgenFatal("Bad # of arguments for gender command");
419
420 assert(IsInsideBS(cmd->value, SCC_CONTROL_START, UINT8_MAX));
421 buffer->AppendUtf8(SCC_GENDER_LIST);
422 buffer->AppendByte(TranslateArgumentIdx(argidx, offset));
423 EmitWordList(buffer, words, nw);
424 }
425}
426
427static const CmdStruct *FindCmd(const char *s, int len)
428{
429 for (const auto &cs : _cmd_structs) {
430 if (strncmp(cs.cmd, s, len) == 0 && cs.cmd[len] == '\0') return &cs;
431 }
432 return nullptr;
433}
434
435static uint ResolveCaseName(const char *str, size_t len)
436{
437 /* First get a clean copy of only the case name, then resolve it. */
438 char case_str[CASE_GENDER_LEN];
439 len = std::min(lengthof(case_str) - 1, len);
440 memcpy(case_str, str, len);
441 case_str[len] = '\0';
442
443 uint8_t case_idx = _lang.GetCaseIndex(case_str);
444 if (case_idx >= MAX_NUM_CASES) StrgenFatal("Invalid case-name '{}'", case_str);
445 return case_idx + 1;
446}
447
448
449/* returns nullptr on eof
450 * else returns command struct */
451static const CmdStruct *ParseCommandString(const char **str, std::string &param, int *argno, int *casei)
452{
453 const char *s = *str, *start;
454 char c;
455
456 *argno = -1;
457 *casei = -1;
458
459 /* Scan to the next command, exit if there's no next command. */
460 for (; *s != '{'; s++) {
461 if (*s == '\0') return nullptr;
462 }
463 s++; // Skip past the {
464
465 if (*s >= '0' && *s <= '9') {
466 char *end;
467
468 *argno = std::strtoul(s, &end, 0);
469 if (*end != ':') StrgenFatal("missing arg #");
470 s = end + 1;
471 }
472
473 /* parse command name */
474 start = s;
475 do {
476 c = *s++;
477 } while (c != '}' && c != ' ' && c != '=' && c != '.' && c != 0);
478
479 const CmdStruct *cmd = FindCmd(start, s - start - 1);
480 if (cmd == nullptr) {
481 std::string command(start, s - start - 1);
482 StrgenError("Undefined command '{}'", command);
483 return nullptr;
484 }
485
486 if (c == '.') {
487 const char *casep = s;
488
489 if (!(cmd->flags & C_CASE)) {
490 StrgenFatal("Command '{}' can't have a case", cmd->cmd);
491 }
492
493 do {
494 c = *s++;
495 } while (c != '}' && c != ' ' && c != '\0');
496 *casei = ResolveCaseName(casep, s - casep - 1);
497 }
498
499 if (c == '\0') {
500 StrgenError("Missing }} from command '{}'", start);
501 return nullptr;
502 }
503
504
505 if (c != '}') {
506 if (c == '=') s--;
507 /* copy params */
508 start = s;
509 for (;;) {
510 c = *s++;
511 if (c == '}') break;
512 if (c == '\0') {
513 StrgenError("Missing }} from command '{}'", start);
514 return nullptr;
515 }
516 param += c;
517 }
518 }
519
520 *str = s;
521
522 return cmd;
523}
524
532StringReader::StringReader(StringData &data, const std::string &file, bool master, bool translation) :
533 data(data), file(file), master(master), translation(translation)
534{
535}
536
537ParsedCommandStruct ExtractCommandString(const char *s, bool)
538{
539 int argno;
540 int argidx = 0;
541 int casei;
542
544
545 for (;;) {
546 /* read until next command from a. */
547 std::string param;
548 const CmdStruct *ar = ParseCommandString(&s, param, &argno, &casei);
549
550 if (ar == nullptr) break;
551
552 /* Sanity checking */
553 if (argno != -1 && ar->consumes == 0) StrgenFatal("Non consumer param can't have a paramindex");
554
555 if (ar->consumes) {
556 if (argno != -1) argidx = argno;
557 if (argidx < 0 || (uint)argidx >= p.consuming_commands.max_size()) StrgenFatal("invalid param idx {}", argidx);
558 if (p.consuming_commands[argidx] != nullptr && p.consuming_commands[argidx] != ar) StrgenFatal("duplicate param idx {}", argidx);
559
560 p.consuming_commands[argidx++] = ar;
561 } else if (!(ar->flags & C_DONTCOUNT)) { // Ignore some of them
562 p.non_consuming_commands.emplace_back(CmdPair{ar, std::move(param)});
563 }
564 }
565
566 return p;
567}
568
569
570const CmdStruct *TranslateCmdForCompare(const CmdStruct *a)
571{
572 if (a == nullptr) return nullptr;
573
574 if (strcmp(a->cmd, "STRING1") == 0 ||
575 strcmp(a->cmd, "STRING2") == 0 ||
576 strcmp(a->cmd, "STRING3") == 0 ||
577 strcmp(a->cmd, "STRING4") == 0 ||
578 strcmp(a->cmd, "STRING5") == 0 ||
579 strcmp(a->cmd, "STRING6") == 0 ||
580 strcmp(a->cmd, "STRING7") == 0 ||
581 strcmp(a->cmd, "RAW_STRING") == 0) {
582 return FindCmd("STRING", 6);
583 }
584
585 return a;
586}
587
588
589static bool CheckCommandsMatch(const char *a, const char *b, const char *name)
590{
591 /* If we're not translating, i.e. we're compiling the base language,
592 * it is pointless to do all these checks as it'll always be correct.
593 * After all, all checks are based on the base language.
594 */
595 if (!_translation) return true;
596
597 bool result = true;
598
599 ParsedCommandStruct templ = ExtractCommandString(b, true);
600 ParsedCommandStruct lang = ExtractCommandString(a, true);
601
602 /* For each string in templ, see if we find it in lang */
603 if (templ.non_consuming_commands.max_size() != lang.non_consuming_commands.max_size()) {
604 StrgenWarning("{}: template string and language string have a different # of commands", name);
605 result = false;
606 }
607
608 for (auto &templ_nc : templ.non_consuming_commands) {
609 /* see if we find it in lang, and zero it out */
610 bool found = false;
611 for (auto &lang_nc : lang.non_consuming_commands) {
612 if (templ_nc.cmd == lang_nc.cmd && templ_nc.param == lang_nc.param) {
613 /* it was found in both. zero it out from lang so we don't find it again */
614 lang_nc.cmd = nullptr;
615 found = true;
616 break;
617 }
618 }
619
620 if (!found) {
621 StrgenWarning("{}: command '{}' exists in template file but not in language file", name, templ_nc.cmd->cmd);
622 result = false;
623 }
624 }
625
626 /* if we reach here, all non consumer commands match up.
627 * Check if the non consumer commands match up also. */
628 for (uint i = 0; i < templ.consuming_commands.max_size(); i++) {
629 if (TranslateCmdForCompare(templ.consuming_commands[i]) != lang.consuming_commands[i]) {
630 StrgenWarning("{}: Param idx #{} '{}' doesn't match with template command '{}'", name, i,
631 lang.consuming_commands[i] == nullptr ? "<empty>" : TranslateCmdForCompare(lang.consuming_commands[i])->cmd,
632 templ.consuming_commands[i] == nullptr ? "<empty>" : templ.consuming_commands[i]->cmd);
633 result = false;
634 }
635 }
636
637 return result;
638}
639
640void StringReader::HandleString(char *str)
641{
642 if (*str == '#') {
643 if (str[1] == '#' && str[2] != '#') this->HandlePragma(str + 2);
644 return;
645 }
646
647 /* Ignore comments & blank lines */
648 if (*str == ';' || *str == ' ' || *str == '\0') return;
649
650 char *s = strchr(str, ':');
651 if (s == nullptr) {
652 StrgenError("Line has no ':' delimiter");
653 return;
654 }
655
656 char *t;
657 /* Trim spaces.
658 * After this str points to the command name, and s points to the command contents */
659 for (t = s; t > str && (t[-1] == ' ' || t[-1] == '\t'); t--) {}
660 *t = 0;
661 s++;
662
663 /* Check string is valid UTF-8 */
664 const char *tmp;
665 for (tmp = s; *tmp != '\0';) {
666 size_t len = Utf8Validate(tmp);
667 if (len == 0) StrgenFatal("Invalid UTF-8 sequence in '{}'", s);
668
669 char32_t c;
670 Utf8Decode(&c, tmp);
671 if (c <= 0x001F || // ASCII control character range
672 c == 0x200B || // Zero width space
673 (c >= 0xE000 && c <= 0xF8FF) || // Private range
674 (c >= 0xFFF0 && c <= 0xFFFF)) { // Specials range
675 StrgenFatal("Unwanted UTF-8 character U+{:04X} in sequence '{}'", (int)c, s);
676 }
677
678 tmp += len;
679 }
680
681 /* Check if the string has a case..
682 * The syntax for cases is IDENTNAME.case */
683 char *casep = strchr(str, '.');
684 if (casep != nullptr) *casep++ = '\0';
685
686 /* Check if this string already exists.. */
687 LangString *ent = this->data.Find(str);
688
689 if (this->master) {
690 if (casep != nullptr) {
691 StrgenError("Cases in the base translation are not supported.");
692 return;
693 }
694
695 if (ent != nullptr) {
696 StrgenError("String name '{}' is used multiple times", str);
697 return;
698 }
699
700 if (this->data.strings[this->data.next_string_id] != nullptr) {
701 StrgenError("String ID 0x{:X} for '{}' already in use by '{}'", this->data.next_string_id, str, this->data.strings[this->data.next_string_id]->name);
702 return;
703 }
704
705 /* Allocate a new LangString */
706 this->data.Add(std::make_unique<LangString>(str, s, this->data.next_string_id++, _cur_line));
707 } else {
708 if (ent == nullptr) {
709 StrgenWarning("String name '{}' does not exist in master file", str);
710 return;
711 }
712
713 if (!ent->translated.empty() && casep == nullptr) {
714 StrgenError("String name '{}' is used multiple times", str);
715 return;
716 }
717
718 /* make sure that the commands match */
719 if (!CheckCommandsMatch(s, ent->english.c_str(), str)) return;
720
721 if (casep != nullptr) {
722 ent->translated_cases.emplace_back(ResolveCaseName(casep, strlen(casep)), s);
723 } else {
724 ent->translated = s;
725 /* If the string was translated, use the line from the
726 * translated language so errors in the translated file
727 * are properly referenced to. */
728 ent->line = _cur_line;
729 }
730 }
731}
732
734{
735 if (!memcmp(str, "plural ", 7)) {
736 _lang.plural_form = atoi(str + 7);
738 StrgenFatal("Invalid pluralform {}", _lang.plural_form);
739 }
740 } else {
741 StrgenFatal("unknown pragma '{}'", str);
742 }
743}
744
745static void StripTrailingWhitespace(std::string &str)
746{
747 str.erase(str.find_last_not_of("\r\n ") + 1);
748}
749
751{
752 _warnings = _errors = 0;
753
755 _file = this->file.c_str();
756
757 /* Abusing _show_todo to replace "warning" with "info" for translations. */
758 _show_todo &= 3;
759 if (!this->translation) _show_todo |= 4;
760
761 /* For each new file we parse, reset the genders, and language codes. */
762 MemSetT(&_lang, 0);
766
767 _cur_line = 1;
768 while (this->data.next_string_id < this->data.max_strings) {
769 std::optional<std::string> line = this->ReadLine();
770 if (!line.has_value()) return;
771
772 StripTrailingWhitespace(line.value());
773 this->HandleString(line.value().data());
774 _cur_line++;
775 }
776
777 if (this->data.next_string_id == this->data.max_strings) {
778 StrgenError("Too many strings, maximum allowed is {}", this->data.max_strings);
779 }
780}
781
787{
788 int last = 0;
789 for (size_t i = 0; i < data.max_strings; i++) {
790 if (data.strings[i] != nullptr) {
791 this->WriteStringID(data.strings[i]->name, (int)i);
792 last = (int)i;
793 }
794 }
795
796 this->WriteStringID("STR_LAST_STRINGID", last);
797}
798
799static int TranslateArgumentIdx(int argidx, int offset)
800{
801 int sum;
802
803 if (argidx < 0 || (uint)argidx >= _cur_pcs.consuming_commands.max_size()) {
804 StrgenFatal("invalid argidx {}", argidx);
805 }
806 const CmdStruct *cs = _cur_pcs.consuming_commands[argidx];
807 if (cs != nullptr && cs->consumes <= offset) {
808 StrgenFatal("invalid argidx offset {}:{}", argidx, offset);
809 }
810
811 if (_cur_pcs.consuming_commands[argidx] == nullptr) {
812 StrgenFatal("no command for this argidx {}", argidx);
813 }
814
815 for (int i = sum = 0; i < argidx; i++) {
816 cs = _cur_pcs.consuming_commands[i];
817
818 sum += (cs != nullptr) ? cs->consumes : 1;
819 }
820
821 return sum + offset;
822}
823
824static void PutArgidxCommand(Buffer *buffer)
825{
826 buffer->AppendUtf8(SCC_ARG_INDEX);
827 buffer->AppendByte(TranslateArgumentIdx(_cur_argidx));
828}
829
830
831static void PutCommandString(Buffer *buffer, const char *str)
832{
833 _cur_argidx = 0;
834
835 while (*str != '\0') {
836 /* Process characters as they are until we encounter a { */
837 if (*str != '{') {
838 buffer->AppendByte(*str++);
839 continue;
840 }
841
842 std::string param;
843 int argno;
844 int casei;
845 const CmdStruct *cs = ParseCommandString(&str, param, &argno, &casei);
846 if (cs == nullptr) break;
847
848 if (casei != -1) {
849 buffer->AppendUtf8(SCC_SET_CASE); // {SET_CASE}
850 buffer->AppendByte(casei);
851 }
852
853 /* For params that consume values, we need to handle the argindex properly */
854 if (cs->consumes > 0) {
855 /* Check if we need to output a move-param command */
856 if (argno != -1 && argno != _cur_argidx) {
857 _cur_argidx = argno;
858 PutArgidxCommand(buffer);
859 }
860
861 /* Output the one from the master string... it's always accurate. */
862 cs = _cur_pcs.consuming_commands[_cur_argidx++];
863 if (cs == nullptr) {
864 StrgenFatal("{}: No argument exists at position {}", _cur_ident, _cur_argidx - 1);
865 }
866 }
867
868 cs->proc(buffer, param.data(), cs->value);
869 }
870}
871
877{
878 char buffer[2];
879 int offs = 0;
880 if (length >= 0x4000) {
881 StrgenFatal("string too long");
882 }
883
884 if (length >= 0xC0) {
885 buffer[offs++] = (length >> 8) | 0xC0;
886 }
887 buffer[offs++] = length & 0xFF;
888 this->Write((uint8_t*)buffer, offs);
889}
890
896{
897 std::vector<uint> in_use;
898 for (size_t tab = 0; tab < data.tabs; tab++) {
899 uint n = data.CountInUse((uint)tab);
900
901 in_use.push_back(n);
902 _lang.offsets[tab] = TO_LE16(n);
903
904 for (uint j = 0; j != in_use[tab]; j++) {
905 const LangString *ls = data.strings[(tab * TAB_SIZE) + j].get();
906 if (ls != nullptr && ls->translated.empty()) _lang.missing++;
907 }
908 }
909
911 _lang.version = TO_LE32(data.Version());
912 _lang.missing = TO_LE16(_lang.missing);
913 _lang.winlangid = TO_LE16(_lang.winlangid);
914
915 this->WriteHeader(&_lang);
916 Buffer buffer;
917
918 for (size_t tab = 0; tab < data.tabs; tab++) {
919 for (uint j = 0; j != in_use[tab]; j++) {
920 const LangString *ls = data.strings[(tab * TAB_SIZE) + j].get();
921 const std::string *cmdp;
922
923 /* For undefined strings, just set that it's an empty string */
924 if (ls == nullptr) {
925 this->WriteLength(0);
926 continue;
927 }
928
929 _cur_ident = ls->name.c_str();
930 _cur_line = ls->line;
931
932 /* Produce a message if a string doesn't have a translation. */
933 if (_show_todo > 0 && ls->translated.empty()) {
934 if ((_show_todo & 2) != 0) {
935 StrgenWarning("'{}' is untranslated", ls->name);
936 }
937 if ((_show_todo & 1) != 0) {
938 const char *s = "<TODO> ";
939 while (*s != '\0') buffer.AppendByte(*s++);
940 }
941 }
942
943 /* Extract the strings and stuff from the english command string */
944 _cur_pcs = ExtractCommandString(ls->english.c_str(), false);
945
946 if (!ls->translated_cases.empty() || !ls->translated.empty()) {
947 cmdp = &ls->translated;
948 } else {
949 cmdp = &ls->english;
950 }
951
952 _translated = cmdp != &ls->english;
953
954 if (!ls->translated_cases.empty()) {
955 /* Need to output a case-switch.
956 * It has this format
957 * <0x9E> <NUM CASES> <CASE1> <LEN1> <STRING1> <CASE2> <LEN2> <STRING2> <CASE3> <LEN3> <STRING3> <STRINGDEFAULT>
958 * Each LEN is printed using 2 bytes in big endian order. */
959 buffer.AppendUtf8(SCC_SWITCH_CASE);
960 buffer.AppendByte((uint8_t)ls->translated_cases.size());
961
962 /* Write each case */
963 for (const Case &c : ls->translated_cases) {
964 buffer.AppendByte(c.caseidx);
965 /* Make some space for the 16-bit length */
966 uint pos = (uint)buffer.size();
967 buffer.AppendByte(0);
968 buffer.AppendByte(0);
969 /* Write string */
970 PutCommandString(&buffer, c.string.c_str());
971 buffer.AppendByte(0); // terminate with a zero
972 /* Fill in the length */
973 uint size = (uint)buffer.size() - (pos + 2);
974 buffer[pos + 0] = GB(size, 8, 8);
975 buffer[pos + 1] = GB(size, 0, 8);
976 }
977 }
978
979 if (!cmdp->empty()) PutCommandString(&buffer, cmdp->c_str());
980
981 this->WriteLength((uint)buffer.size());
982 this->Write(buffer.data(), buffer.size());
983 buffer.clear();
984 }
985 }
986}
debug_inline constexpr bool HasBit(const T x, const uint8_t y)
Checks if a bit in a value is set.
debug_inline static constexpr uint GB(const T x, const uint8_t s, const uint8_t n)
Fetch n bits from x, started at bit s.
static const uint8_t MAX_NUM_GENDERS
Maximum number of supported genders.
Definition language.h:20
static const uint8_t CASE_GENDER_LEN
The (maximum) length of a case/gender string.
Definition language.h:19
static const uint8_t MAX_NUM_CASES
Maximum number of supported cases.
Definition language.h:21
constexpr bool IsInsideBS(const T x, const size_t base, const size_t size)
Checks if a value is between a window started at some base point.
void MemSetT(T *ptr, uint8_t value, size_t num=1)
Type-safe version of memset().
Definition mem_func.hpp:49
#define lengthof(array)
Return the length of an fixed size array.
Definition stdafx.h:280
Structures related to strgen.
LanguagePackHeader _lang
Header information about a language.
int _cur_line
The current line we're parsing in the input file.
const char * _file
The filename of the input, so we can refer to it in errors/warnings.
LanguagePackHeader _lang
Header information about a language.
static bool _translated
Whether the current language is not the master language.
int _cur_line
The current line we're parsing in the input file.
static bool _translation
Is the current file actually a translation or not.
const char * _file
The filename of the input, so we can refer to it in errors/warnings.
static const int MAX_PLURALS
The maximum number of plurals.
static const PluralForm _plural_forms[]
All plural forms used.
@ C_GENDER
These commands support genders.
@ C_CASE
These commands support cases.
@ C_DONTCOUNT
These commands aren't counted for comparison.
void strecpy(std::span< char > dst, std::string_view src)
Copies characters from one buffer to another.
Definition string.cpp:60
size_t Utf8Decode(char32_t *c, const char *s)
Decode and consume the next UTF-8 encoded character.
Definition string.cpp:419
static const uint TAB_SIZE
Number of strings per StringTab.
The buffer for writing a single string.
void AppendUtf8(uint32_t value)
Add an Unicode character encoded in UTF-8 to the buffer.
void AppendByte(uint8_t value)
Convenience method for adding a byte.
Container for the different cases of a string.
Definition strgen.h:20
Case(int caseidx, const std::string &string)
Create a new case.
virtual void WriteStringID(const std::string &name, int stringid)=0
Write the string ID.
void WriteHeader(const StringData &data)
Write the header information.
Information about a single string.
Definition strgen.h:28
int line
Line of string in source-file.
Definition strgen.h:33
LangString(const std::string &name, const std::string &english, size_t index, int line)
Create a new string.
std::string english
English text.
Definition strgen.h:30
std::vector< Case > translated_cases
Cases of the translation.
Definition strgen.h:34
std::string translated
Translated text.
Definition strgen.h:31
void FreeTranslation()
Free all data related to the translation.
std::string name
Name of the string.
Definition strgen.h:29
Header of a language file.
Definition language.h:24
uint8_t plural_form
plural form index
Definition language.h:41
uint32_t version
32-bits of auto generated version info which is basically a hash of strings.h
Definition language.h:28
uint8_t GetGenderIndex(const char *gender_str) const
Get the index for the given gender.
Definition language.h:68
uint16_t offsets[TEXT_TAB_END]
the offsets
Definition language.h:32
uint16_t winlangid
Windows language ID: Windows cannot and will not convert isocodes to something it can use to determin...
Definition language.h:51
uint8_t num_genders
the number of genders of this language
Definition language.h:53
uint8_t GetCaseIndex(const char *case_str) const
Get the index for the given case.
Definition language.h:81
uint16_t missing
number of missing strings.
Definition language.h:40
char digit_group_separator[8]
Thousand separator used for anything not currencies.
Definition language.h:35
uint32_t ident
32-bits identifier
Definition language.h:27
char digit_decimal_separator[8]
Decimal separator.
Definition language.h:39
char digit_group_separator_currency[8]
Thousand separator used for currencies.
Definition language.h:37
static const uint32_t IDENT
Identifier for OpenTTD language files, big endian for "LANG".
Definition language.h:25
virtual void WriteHeader(const LanguagePackHeader *header)=0
Write the header metadata.
virtual void WriteLength(uint length)
Write the length as a simple gamma.
virtual void Write(const uint8_t *buffer, size_t length)=0
Write a number of bytes.
virtual void WriteLang(const StringData &data)
Actually write the language.
int plural_count
The number of plural forms.
Information about the currently known strings.
Definition strgen.h:41
size_t tabs
The number of 'tabs' of strings.
Definition strgen.h:44
uint CountInUse(uint tab) const
Count the number of tab elements that are in use.
uint VersionHashStr(uint hash, const char *s) const
Create a compound hash.
std::vector< std::unique_ptr< LangString > > strings
List of all known strings.
Definition strgen.h:42
size_t max_strings
The maximum number of strings.
Definition strgen.h:45
void Add(std::unique_ptr< LangString > ls)
Add a newly created LangString.
size_t next_string_id
The next string ID to allocate.
Definition strgen.h:46
uint Version() const
Make a hash of the file to get a unique "version number".
LangString * Find(const std::string_view s)
Find a LangString based on the string name.
void FreeTranslation()
Free all data related to the translation.
StringData(size_t tabs)
Create a new string data container.
std::unordered_map< std::string_view, LangString * > name_to_string
Lookup table for the strings.
Definition strgen.h:43
const std::string file
The file we are reading.
Definition strgen.h:60
StringReader(StringData &data, const std::string &file, bool master, bool translation)
Prepare reading.
StringData & data
The data to fill during reading.
Definition strgen.h:59
virtual void ParseFile()
Start parsing the file.
bool translation
Are we reading a translation, implies !master. However, the base translation will have this false.
Definition strgen.h:62
virtual std::optional< std::string > ReadLine()=0
Read a single line from the source of strings.
virtual void HandlePragma(char *str)
Handle the pragma of the file.
bool master
Are we reading the master file?
Definition strgen.h:61