OpenTTD Source  20241108-master-g80f628063a
strgen_base.cpp
Go to the documentation of this file.
1 /*
2  * This file is part of OpenTTD.
3  * OpenTTD is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, version 2.
4  * OpenTTD is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
5  * See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with OpenTTD. If not, see <http://www.gnu.org/licenses/>.
6  */
7 
10 #include "../stdafx.h"
11 #include "../core/alloc_func.hpp"
12 #include "../core/endian_func.hpp"
13 #include "../core/mem_func.hpp"
14 #include "../error_func.h"
15 #include "../string_func.h"
16 #include "../table/control_codes.h"
17 
18 #include "strgen.h"
19 
20 
21 #include "../table/strgen_tables.h"
22 
23 #include "../safeguards.h"
24 
25 /* Compiles a list of strings into a compiled string list */
26 
27 static bool _translated;
28 static bool _translation;
29 const char *_file = "(unknown file)";
30 int _cur_line;
31 int _errors, _warnings, _show_todo;
33 
34 static const CmdStruct *ParseCommandString(const char **str, std::string &param, int *argno, int *casei);
35 
41 Case::Case(int caseidx, const std::string &string) :
42  caseidx(caseidx), string(string)
43 {
44 }
45 
53 LangString::LangString(const std::string &name, const std::string &english, size_t index, int line) :
54  name(name), english(english), index(index), line(line)
55 {
56 }
57 
60 {
61  this->translated.clear();
62  this->translated_cases.clear();
63 }
64 
69 StringData::StringData(size_t tabs) : tabs(tabs), max_strings(tabs * TAB_SIZE)
70 {
71  this->strings.resize(max_strings);
72  this->next_string_id = 0;
73 }
74 
77 {
78  for (size_t i = 0; i < this->max_strings; i++) {
79  LangString *ls = this->strings[i].get();
80  if (ls != nullptr) ls->FreeTranslation();
81  }
82 }
83 
89 void StringData::Add(std::unique_ptr<LangString> ls)
90 {
91  this->name_to_string[ls->name] = ls.get();
92  this->strings[ls->index].swap(ls);
93 }
94 
100 LangString *StringData::Find(const std::string_view s)
101 {
102  auto it = this->name_to_string.find(s);
103  if (it == this->name_to_string.end()) return nullptr;
104 
105  return it->second;
106 }
107 
114 uint StringData::VersionHashStr(uint hash, const char *s) const
115 {
116  for (; *s != '\0'; s++) {
117  hash = std::rotl(hash, 3) ^ *s;
118  hash = (hash & 1 ? hash >> 1 ^ 0xDEADBEEF : hash >> 1);
119  }
120  return hash;
121 }
122 
128 {
129  uint hash = 0;
130 
131  for (size_t i = 0; i < this->max_strings; i++) {
132  const LangString *ls = this->strings[i].get();
133 
134  if (ls != nullptr) {
135  const CmdStruct *cs;
136  const char *s;
137  std::string buf;
138  int argno;
139  int casei;
140 
141  s = ls->name.c_str();
142  hash ^= i * 0x717239;
143  hash = (hash & 1 ? hash >> 1 ^ 0xDEADBEEF : hash >> 1);
144  hash = this->VersionHashStr(hash, s + 1);
145 
146  s = ls->english.c_str();
147  while ((cs = ParseCommandString(&s, buf, &argno, &casei)) != nullptr) {
148  if (cs->flags & C_DONTCOUNT) continue;
149 
150  hash ^= (cs - _cmd_structs) * 0x1234567;
151  hash = (hash & 1 ? hash >> 1 ^ 0xF00BAA4 : hash >> 1);
152  }
153  }
154  }
155 
156  return hash;
157 }
158 
163 uint StringData::CountInUse(uint tab) const
164 {
165  int i;
166  for (i = TAB_SIZE; --i >= 0;) if (this->strings[(tab * TAB_SIZE) + i] != nullptr) break;
167  return i + 1;
168 }
169 
170 static const char *_cur_ident;
171 
172 /* Used when generating some advanced commands. */
173 static ParsedCommandStruct _cur_pcs;
174 static int _cur_argidx;
175 
177 struct Buffer : std::vector<uint8_t> {
182  void AppendByte(uint8_t value)
183  {
184  this->push_back(value);
185  }
186 
191  void AppendUtf8(uint32_t value)
192  {
193  if (value < 0x80) {
194  this->push_back(value);
195  } else if (value < 0x800) {
196  this->push_back(0xC0 + GB(value, 6, 5));
197  this->push_back(0x80 + GB(value, 0, 6));
198  } else if (value < 0x10000) {
199  this->push_back(0xE0 + GB(value, 12, 4));
200  this->push_back(0x80 + GB(value, 6, 6));
201  this->push_back(0x80 + GB(value, 0, 6));
202  } else if (value < 0x110000) {
203  this->push_back(0xF0 + GB(value, 18, 3));
204  this->push_back(0x80 + GB(value, 12, 6));
205  this->push_back(0x80 + GB(value, 6, 6));
206  this->push_back(0x80 + GB(value, 0, 6));
207  } else {
208  StrgenWarning("Invalid unicode value U+0x{:X}", value);
209  }
210  }
211 };
212 
213 size_t Utf8Validate(const char *s)
214 {
215  uint32_t c;
216 
217  if (!HasBit(s[0], 7)) {
218  /* 1 byte */
219  return 1;
220  } else if (GB(s[0], 5, 3) == 6 && IsUtf8Part(s[1])) {
221  /* 2 bytes */
222  c = GB(s[0], 0, 5) << 6 | GB(s[1], 0, 6);
223  if (c >= 0x80) return 2;
224  } else if (GB(s[0], 4, 4) == 14 && IsUtf8Part(s[1]) && IsUtf8Part(s[2])) {
225  /* 3 bytes */
226  c = GB(s[0], 0, 4) << 12 | GB(s[1], 0, 6) << 6 | GB(s[2], 0, 6);
227  if (c >= 0x800) return 3;
228  } else if (GB(s[0], 3, 5) == 30 && IsUtf8Part(s[1]) && IsUtf8Part(s[2]) && IsUtf8Part(s[3])) {
229  /* 4 bytes */
230  c = GB(s[0], 0, 3) << 18 | GB(s[1], 0, 6) << 12 | GB(s[2], 0, 6) << 6 | GB(s[3], 0, 6);
231  if (c >= 0x10000 && c <= 0x10FFFF) return 4;
232  }
233 
234  return 0;
235 }
236 
237 
238 void EmitSingleChar(Buffer *buffer, char *buf, int value)
239 {
240  if (*buf != '\0') StrgenWarning("Ignoring trailing letters in command");
241  buffer->AppendUtf8(value);
242 }
243 
244 
245 /* The plural specifier looks like
246  * {NUM} {PLURAL -1 passenger passengers} then it picks either passenger/passengers depending on the count in NUM */
247 
248 /* This is encoded like
249  * CommandByte <ARG#> <NUM> {Length of each string} {each string} */
250 
251 bool ParseRelNum(char **buf, int *value, int *offset)
252 {
253  const char *s = *buf;
254  char *end;
255  bool rel = false;
256 
257  while (*s == ' ' || *s == '\t') s++;
258  if (*s == '+') {
259  rel = true;
260  s++;
261  }
262  int v = std::strtol(s, &end, 0);
263  if (end == s) return false;
264  if (rel || v < 0) {
265  *value += v;
266  } else {
267  *value = v;
268  }
269  if (offset != nullptr && *end == ':') {
270  /* Take the Nth within */
271  s = end + 1;
272  *offset = std::strtol(s, &end, 0);
273  if (end == s) return false;
274  }
275  *buf = end;
276  return true;
277 }
278 
279 /* Parse out the next word, or nullptr */
280 char *ParseWord(char **buf)
281 {
282  char *s = *buf, *r;
283 
284  while (*s == ' ' || *s == '\t') s++;
285  if (*s == '\0') return nullptr;
286 
287  if (*s == '"') {
288  r = ++s;
289  /* parse until next " or NUL */
290  for (;;) {
291  if (*s == '\0') break;
292  if (*s == '"') {
293  *s++ = '\0';
294  break;
295  }
296  s++;
297  }
298  } else {
299  /* proceed until whitespace or NUL */
300  r = s;
301  for (;;) {
302  if (*s == '\0') break;
303  if (*s == ' ' || *s == '\t') {
304  *s++ = '\0';
305  break;
306  }
307  s++;
308  }
309  }
310  *buf = s;
311  return r;
312 }
313 
314 /* Forward declaration */
315 static int TranslateArgumentIdx(int arg, int offset = 0);
316 
317 static void EmitWordList(Buffer *buffer, const std::vector<const char *> &words, uint nw)
318 {
319  /* Maximum word length in bytes, excluding trailing NULL. */
320  constexpr uint MAX_WORD_LENGTH = UINT8_MAX - 2;
321 
322  buffer->AppendByte(nw);
323  for (uint i = 0; i < nw; i++) {
324  size_t len = strlen(words[i]) + 1;
325  if (len >= UINT8_MAX) StrgenFatal("WordList {}/{} string '{}' too long, max bytes {}", i + 1, nw, words[i], MAX_WORD_LENGTH);
326  buffer->AppendByte(static_cast<uint8_t>(len));
327  }
328  for (uint i = 0; i < nw; i++) {
329  for (uint j = 0; words[i][j] != '\0'; j++) buffer->AppendByte(words[i][j]);
330  buffer->AppendByte(0);
331  }
332 }
333 
334 void EmitPlural(Buffer *buffer, char *buf, int)
335 {
336  int argidx = _cur_argidx;
337  int offset = -1;
339  std::vector<const char *> words(std::max(expected, MAX_PLURALS), nullptr);
340  int nw = 0;
341 
342  /* Parse out the number, if one exists. Otherwise default to prev arg. */
343  if (!ParseRelNum(&buf, &argidx, &offset)) argidx--;
344 
345  const CmdStruct *cmd = _cur_pcs.consuming_commands[argidx];
346  if (offset == -1) {
347  /* Use default offset */
348  if (cmd == nullptr || cmd->default_plural_offset < 0) {
349  StrgenFatal("Command '{}' has no (default) plural position", cmd == nullptr ? "<empty>" : cmd->cmd);
350  }
351  offset = cmd->default_plural_offset;
352  }
353 
354  /* Parse each string */
355  for (nw = 0; nw < MAX_PLURALS; nw++) {
356  words[nw] = ParseWord(&buf);
357  if (words[nw] == nullptr) break;
358  }
359 
360  if (nw == 0) {
361  StrgenFatal("{}: No plural words", _cur_ident);
362  }
363 
364  if (expected != nw) {
365  if (_translated) {
366  StrgenFatal("{}: Invalid number of plural forms. Expecting {}, found {}.", _cur_ident,
367  expected, nw);
368  } else {
369  if ((_show_todo & 2) != 0) StrgenWarning("'{}' is untranslated. Tweaking english string to allow compilation for plural forms", _cur_ident);
370  if (nw > expected) {
371  nw = expected;
372  } else {
373  for (; nw < expected; nw++) {
374  words[nw] = words[nw - 1];
375  }
376  }
377  }
378  }
379 
380  buffer->AppendUtf8(SCC_PLURAL_LIST);
381  buffer->AppendByte(_lang.plural_form);
382  buffer->AppendByte(TranslateArgumentIdx(argidx, offset));
383  EmitWordList(buffer, words, nw);
384 }
385 
386 void EmitGender(Buffer *buffer, char *buf, int)
387 {
388  int argidx = _cur_argidx;
389  int offset = 0;
390  uint nw;
391 
392  if (buf[0] == '=') {
393  buf++;
394 
395  /* This is a {G=DER} command */
396  nw = _lang.GetGenderIndex(buf);
397  if (nw >= MAX_NUM_GENDERS) StrgenFatal("G argument '{}' invalid", buf);
398 
399  /* now nw contains the gender index */
400  buffer->AppendUtf8(SCC_GENDER_INDEX);
401  buffer->AppendByte(nw);
402  } else {
403  std::vector<const char *> words(MAX_NUM_GENDERS, nullptr);
404 
405  /* This is a {G 0 foo bar two} command.
406  * If no relative number exists, default to +0 */
407  ParseRelNum(&buf, &argidx, &offset);
408 
409  const CmdStruct *cmd = _cur_pcs.consuming_commands[argidx];
410  if (cmd == nullptr || (cmd->flags & C_GENDER) == 0) {
411  StrgenFatal("Command '{}' can't have a gender", cmd == nullptr ? "<empty>" : cmd->cmd);
412  }
413 
414  for (nw = 0; nw < MAX_NUM_GENDERS; nw++) {
415  words[nw] = ParseWord(&buf);
416  if (words[nw] == nullptr) break;
417  }
418  if (nw != _lang.num_genders) StrgenFatal("Bad # of arguments for gender command");
419 
420  assert(IsInsideBS(cmd->value, SCC_CONTROL_START, UINT8_MAX));
421  buffer->AppendUtf8(SCC_GENDER_LIST);
422  buffer->AppendByte(TranslateArgumentIdx(argidx, offset));
423  EmitWordList(buffer, words, nw);
424  }
425 }
426 
427 static const CmdStruct *FindCmd(const char *s, int len)
428 {
429  for (const auto &cs : _cmd_structs) {
430  if (strncmp(cs.cmd, s, len) == 0 && cs.cmd[len] == '\0') return &cs;
431  }
432  return nullptr;
433 }
434 
435 static uint ResolveCaseName(const char *str, size_t len)
436 {
437  /* First get a clean copy of only the case name, then resolve it. */
438  char case_str[CASE_GENDER_LEN];
439  len = std::min(lengthof(case_str) - 1, len);
440  memcpy(case_str, str, len);
441  case_str[len] = '\0';
442 
443  uint8_t case_idx = _lang.GetCaseIndex(case_str);
444  if (case_idx >= MAX_NUM_CASES) StrgenFatal("Invalid case-name '{}'", case_str);
445  return case_idx + 1;
446 }
447 
448 
449 /* returns nullptr on eof
450  * else returns command struct */
451 static const CmdStruct *ParseCommandString(const char **str, std::string &param, int *argno, int *casei)
452 {
453  const char *s = *str, *start;
454  char c;
455 
456  *argno = -1;
457  *casei = -1;
458 
459  /* Scan to the next command, exit if there's no next command. */
460  for (; *s != '{'; s++) {
461  if (*s == '\0') return nullptr;
462  }
463  s++; // Skip past the {
464 
465  if (*s >= '0' && *s <= '9') {
466  char *end;
467 
468  *argno = std::strtoul(s, &end, 0);
469  if (*end != ':') StrgenFatal("missing arg #");
470  s = end + 1;
471  }
472 
473  /* parse command name */
474  start = s;
475  do {
476  c = *s++;
477  } while (c != '}' && c != ' ' && c != '=' && c != '.' && c != 0);
478 
479  const CmdStruct *cmd = FindCmd(start, s - start - 1);
480  if (cmd == nullptr) {
481  std::string command(start, s - start - 1);
482  StrgenError("Undefined command '{}'", command);
483  return nullptr;
484  }
485 
486  if (c == '.') {
487  const char *casep = s;
488 
489  if (!(cmd->flags & C_CASE)) {
490  StrgenFatal("Command '{}' can't have a case", cmd->cmd);
491  }
492 
493  do {
494  c = *s++;
495  } while (c != '}' && c != ' ' && c != '\0');
496  *casei = ResolveCaseName(casep, s - casep - 1);
497  }
498 
499  if (c == '\0') {
500  StrgenError("Missing }} from command '{}'", start);
501  return nullptr;
502  }
503 
504 
505  if (c != '}') {
506  if (c == '=') s--;
507  /* copy params */
508  start = s;
509  for (;;) {
510  c = *s++;
511  if (c == '}') break;
512  if (c == '\0') {
513  StrgenError("Missing }} from command '{}'", start);
514  return nullptr;
515  }
516  param += c;
517  }
518  }
519 
520  *str = s;
521 
522  return cmd;
523 }
524 
532 StringReader::StringReader(StringData &data, const std::string &file, bool master, bool translation) :
533  data(data), file(file), master(master), translation(translation)
534 {
535 }
536 
537 ParsedCommandStruct ExtractCommandString(const char *s, bool)
538 {
539  int argno;
540  int argidx = 0;
541  int casei;
542 
544 
545  for (;;) {
546  /* read until next command from a. */
547  std::string param;
548  const CmdStruct *ar = ParseCommandString(&s, param, &argno, &casei);
549 
550  if (ar == nullptr) break;
551 
552  /* Sanity checking */
553  if (argno != -1 && ar->consumes == 0) StrgenFatal("Non consumer param can't have a paramindex");
554 
555  if (ar->consumes) {
556  if (argno != -1) argidx = argno;
557  if (argidx < 0 || (uint)argidx >= p.consuming_commands.max_size()) StrgenFatal("invalid param idx {}", argidx);
558  if (p.consuming_commands[argidx] != nullptr && p.consuming_commands[argidx] != ar) StrgenFatal("duplicate param idx {}", argidx);
559 
560  p.consuming_commands[argidx++] = ar;
561  } else if (!(ar->flags & C_DONTCOUNT)) { // Ignore some of them
562  p.non_consuming_commands.emplace_back(CmdPair{ar, std::move(param)});
563  }
564  }
565 
566  return p;
567 }
568 
569 
570 const CmdStruct *TranslateCmdForCompare(const CmdStruct *a)
571 {
572  if (a == nullptr) return nullptr;
573 
574  if (strcmp(a->cmd, "STRING1") == 0 ||
575  strcmp(a->cmd, "STRING2") == 0 ||
576  strcmp(a->cmd, "STRING3") == 0 ||
577  strcmp(a->cmd, "STRING4") == 0 ||
578  strcmp(a->cmd, "STRING5") == 0 ||
579  strcmp(a->cmd, "STRING6") == 0 ||
580  strcmp(a->cmd, "STRING7") == 0 ||
581  strcmp(a->cmd, "RAW_STRING") == 0) {
582  return FindCmd("STRING", 6);
583  }
584 
585  return a;
586 }
587 
588 
589 static bool CheckCommandsMatch(const char *a, const char *b, const char *name)
590 {
591  /* If we're not translating, i.e. we're compiling the base language,
592  * it is pointless to do all these checks as it'll always be correct.
593  * After all, all checks are based on the base language.
594  */
595  if (!_translation) return true;
596 
597  bool result = true;
598 
599  ParsedCommandStruct templ = ExtractCommandString(b, true);
600  ParsedCommandStruct lang = ExtractCommandString(a, true);
601 
602  /* For each string in templ, see if we find it in lang */
603  if (templ.non_consuming_commands.max_size() != lang.non_consuming_commands.max_size()) {
604  StrgenWarning("{}: template string and language string have a different # of commands", name);
605  result = false;
606  }
607 
608  for (auto &templ_nc : templ.non_consuming_commands) {
609  /* see if we find it in lang, and zero it out */
610  bool found = false;
611  for (auto &lang_nc : lang.non_consuming_commands) {
612  if (templ_nc.cmd == lang_nc.cmd && templ_nc.param == lang_nc.param) {
613  /* it was found in both. zero it out from lang so we don't find it again */
614  lang_nc.cmd = nullptr;
615  found = true;
616  break;
617  }
618  }
619 
620  if (!found) {
621  StrgenWarning("{}: command '{}' exists in template file but not in language file", name, templ_nc.cmd->cmd);
622  result = false;
623  }
624  }
625 
626  /* if we reach here, all non consumer commands match up.
627  * Check if the non consumer commands match up also. */
628  for (uint i = 0; i < templ.consuming_commands.max_size(); i++) {
629  if (TranslateCmdForCompare(templ.consuming_commands[i]) != lang.consuming_commands[i]) {
630  StrgenWarning("{}: Param idx #{} '{}' doesn't match with template command '{}'", name, i,
631  lang.consuming_commands[i] == nullptr ? "<empty>" : TranslateCmdForCompare(lang.consuming_commands[i])->cmd,
632  templ.consuming_commands[i] == nullptr ? "<empty>" : templ.consuming_commands[i]->cmd);
633  result = false;
634  }
635  }
636 
637  return result;
638 }
639 
640 void StringReader::HandleString(char *str)
641 {
642  if (*str == '#') {
643  if (str[1] == '#' && str[2] != '#') this->HandlePragma(str + 2);
644  return;
645  }
646 
647  /* Ignore comments & blank lines */
648  if (*str == ';' || *str == ' ' || *str == '\0') return;
649 
650  char *s = strchr(str, ':');
651  if (s == nullptr) {
652  StrgenError("Line has no ':' delimiter");
653  return;
654  }
655 
656  char *t;
657  /* Trim spaces.
658  * After this str points to the command name, and s points to the command contents */
659  for (t = s; t > str && (t[-1] == ' ' || t[-1] == '\t'); t--) {}
660  *t = 0;
661  s++;
662 
663  /* Check string is valid UTF-8 */
664  const char *tmp;
665  for (tmp = s; *tmp != '\0';) {
666  size_t len = Utf8Validate(tmp);
667  if (len == 0) StrgenFatal("Invalid UTF-8 sequence in '{}'", s);
668 
669  char32_t c;
670  Utf8Decode(&c, tmp);
671  if (c <= 0x001F || // ASCII control character range
672  c == 0x200B || // Zero width space
673  (c >= 0xE000 && c <= 0xF8FF) || // Private range
674  (c >= 0xFFF0 && c <= 0xFFFF)) { // Specials range
675  StrgenFatal("Unwanted UTF-8 character U+{:04X} in sequence '{}'", (int)c, s);
676  }
677 
678  tmp += len;
679  }
680 
681  /* Check if the string has a case..
682  * The syntax for cases is IDENTNAME.case */
683  char *casep = strchr(str, '.');
684  if (casep != nullptr) *casep++ = '\0';
685 
686  /* Check if this string already exists.. */
687  LangString *ent = this->data.Find(str);
688 
689  if (this->master) {
690  if (casep != nullptr) {
691  StrgenError("Cases in the base translation are not supported.");
692  return;
693  }
694 
695  if (ent != nullptr) {
696  StrgenError("String name '{}' is used multiple times", str);
697  return;
698  }
699 
700  if (this->data.strings[this->data.next_string_id] != nullptr) {
701  StrgenError("String ID 0x{:X} for '{}' already in use by '{}'", this->data.next_string_id, str, this->data.strings[this->data.next_string_id]->name);
702  return;
703  }
704 
705  /* Allocate a new LangString */
706  this->data.Add(std::make_unique<LangString>(str, s, this->data.next_string_id++, _cur_line));
707  } else {
708  if (ent == nullptr) {
709  StrgenWarning("String name '{}' does not exist in master file", str);
710  return;
711  }
712 
713  if (!ent->translated.empty() && casep == nullptr) {
714  StrgenError("String name '{}' is used multiple times", str);
715  return;
716  }
717 
718  /* make sure that the commands match */
719  if (!CheckCommandsMatch(s, ent->english.c_str(), str)) return;
720 
721  if (casep != nullptr) {
722  ent->translated_cases.emplace_back(ResolveCaseName(casep, strlen(casep)), s);
723  } else {
724  ent->translated = s;
725  /* If the string was translated, use the line from the
726  * translated language so errors in the translated file
727  * are properly referenced to. */
728  ent->line = _cur_line;
729  }
730  }
731 }
732 
734 {
735  if (!memcmp(str, "plural ", 7)) {
736  _lang.plural_form = atoi(str + 7);
738  StrgenFatal("Invalid pluralform {}", _lang.plural_form);
739  }
740  } else {
741  StrgenFatal("unknown pragma '{}'", str);
742  }
743 }
744 
745 static void StripTrailingWhitespace(std::string &str)
746 {
747  str.erase(str.find_last_not_of("\r\n ") + 1);
748 }
749 
751 {
752  _warnings = _errors = 0;
753 
754  _translation = this->translation;
755  _file = this->file.c_str();
756 
757  /* Abusing _show_todo to replace "warning" with "info" for translations. */
758  _show_todo &= 3;
759  if (!this->translation) _show_todo |= 4;
760 
761  /* For each new file we parse, reset the genders, and language codes. */
762  MemSetT(&_lang, 0);
766 
767  _cur_line = 1;
768  while (this->data.next_string_id < this->data.max_strings) {
769  std::optional<std::string> line = this->ReadLine();
770  if (!line.has_value()) return;
771 
772  StripTrailingWhitespace(line.value());
773  this->HandleString(line.value().data());
774  _cur_line++;
775  }
776 
777  if (this->data.next_string_id == this->data.max_strings) {
778  StrgenError("Too many strings, maximum allowed is {}", this->data.max_strings);
779  }
780 }
781 
787 {
788  int last = 0;
789  for (size_t i = 0; i < data.max_strings; i++) {
790  if (data.strings[i] != nullptr) {
791  this->WriteStringID(data.strings[i]->name, (int)i);
792  last = (int)i;
793  }
794  }
795 
796  this->WriteStringID("STR_LAST_STRINGID", last);
797 }
798 
799 static int TranslateArgumentIdx(int argidx, int offset)
800 {
801  int sum;
802 
803  if (argidx < 0 || (uint)argidx >= _cur_pcs.consuming_commands.max_size()) {
804  StrgenFatal("invalid argidx {}", argidx);
805  }
806  const CmdStruct *cs = _cur_pcs.consuming_commands[argidx];
807  if (cs != nullptr && cs->consumes <= offset) {
808  StrgenFatal("invalid argidx offset {}:{}", argidx, offset);
809  }
810 
811  if (_cur_pcs.consuming_commands[argidx] == nullptr) {
812  StrgenFatal("no command for this argidx {}", argidx);
813  }
814 
815  for (int i = sum = 0; i < argidx; i++) {
816  cs = _cur_pcs.consuming_commands[i];
817 
818  sum += (cs != nullptr) ? cs->consumes : 1;
819  }
820 
821  return sum + offset;
822 }
823 
824 static void PutArgidxCommand(Buffer *buffer)
825 {
826  buffer->AppendUtf8(SCC_ARG_INDEX);
827  buffer->AppendByte(TranslateArgumentIdx(_cur_argidx));
828 }
829 
830 
831 static void PutCommandString(Buffer *buffer, const char *str)
832 {
833  _cur_argidx = 0;
834 
835  while (*str != '\0') {
836  /* Process characters as they are until we encounter a { */
837  if (*str != '{') {
838  buffer->AppendByte(*str++);
839  continue;
840  }
841 
842  std::string param;
843  int argno;
844  int casei;
845  const CmdStruct *cs = ParseCommandString(&str, param, &argno, &casei);
846  if (cs == nullptr) break;
847 
848  if (casei != -1) {
849  buffer->AppendUtf8(SCC_SET_CASE); // {SET_CASE}
850  buffer->AppendByte(casei);
851  }
852 
853  /* For params that consume values, we need to handle the argindex properly */
854  if (cs->consumes > 0) {
855  /* Check if we need to output a move-param command */
856  if (argno != -1 && argno != _cur_argidx) {
857  _cur_argidx = argno;
858  PutArgidxCommand(buffer);
859  }
860 
861  /* Output the one from the master string... it's always accurate. */
862  cs = _cur_pcs.consuming_commands[_cur_argidx++];
863  if (cs == nullptr) {
864  StrgenFatal("{}: No argument exists at position {}", _cur_ident, _cur_argidx - 1);
865  }
866  }
867 
868  cs->proc(buffer, param.data(), cs->value);
869  }
870 }
871 
877 {
878  char buffer[2];
879  int offs = 0;
880  if (length >= 0x4000) {
881  StrgenFatal("string too long");
882  }
883 
884  if (length >= 0xC0) {
885  buffer[offs++] = (length >> 8) | 0xC0;
886  }
887  buffer[offs++] = length & 0xFF;
888  this->Write((uint8_t*)buffer, offs);
889 }
890 
896 {
897  std::vector<uint> in_use;
898  for (size_t tab = 0; tab < data.tabs; tab++) {
899  uint n = data.CountInUse((uint)tab);
900 
901  in_use.push_back(n);
902  _lang.offsets[tab] = TO_LE16(n);
903 
904  for (uint j = 0; j != in_use[tab]; j++) {
905  const LangString *ls = data.strings[(tab * TAB_SIZE) + j].get();
906  if (ls != nullptr && ls->translated.empty()) _lang.missing++;
907  }
908  }
909 
911  _lang.version = TO_LE32(data.Version());
912  _lang.missing = TO_LE16(_lang.missing);
913  _lang.winlangid = TO_LE16(_lang.winlangid);
914 
915  this->WriteHeader(&_lang);
916  Buffer buffer;
917 
918  for (size_t tab = 0; tab < data.tabs; tab++) {
919  for (uint j = 0; j != in_use[tab]; j++) {
920  const LangString *ls = data.strings[(tab * TAB_SIZE) + j].get();
921  const std::string *cmdp;
922 
923  /* For undefined strings, just set that it's an empty string */
924  if (ls == nullptr) {
925  this->WriteLength(0);
926  continue;
927  }
928 
929  _cur_ident = ls->name.c_str();
930  _cur_line = ls->line;
931 
932  /* Produce a message if a string doesn't have a translation. */
933  if (_show_todo > 0 && ls->translated.empty()) {
934  if ((_show_todo & 2) != 0) {
935  StrgenWarning("'{}' is untranslated", ls->name);
936  }
937  if ((_show_todo & 1) != 0) {
938  const char *s = "<TODO> ";
939  while (*s != '\0') buffer.AppendByte(*s++);
940  }
941  }
942 
943  /* Extract the strings and stuff from the english command string */
944  _cur_pcs = ExtractCommandString(ls->english.c_str(), false);
945 
946  if (!ls->translated_cases.empty() || !ls->translated.empty()) {
947  cmdp = &ls->translated;
948  } else {
949  cmdp = &ls->english;
950  }
951 
952  _translated = cmdp != &ls->english;
953 
954  if (!ls->translated_cases.empty()) {
955  /* Need to output a case-switch.
956  * It has this format
957  * <0x9E> <NUM CASES> <CASE1> <LEN1> <STRING1> <CASE2> <LEN2> <STRING2> <CASE3> <LEN3> <STRING3> <STRINGDEFAULT>
958  * Each LEN is printed using 2 bytes in big endian order. */
959  buffer.AppendUtf8(SCC_SWITCH_CASE);
960  buffer.AppendByte((uint8_t)ls->translated_cases.size());
961 
962  /* Write each case */
963  for (const Case &c : ls->translated_cases) {
964  buffer.AppendByte(c.caseidx);
965  /* Make some space for the 16-bit length */
966  uint pos = (uint)buffer.size();
967  buffer.AppendByte(0);
968  buffer.AppendByte(0);
969  /* Write string */
970  PutCommandString(&buffer, c.string.c_str());
971  buffer.AppendByte(0); // terminate with a zero
972  /* Fill in the length */
973  uint size = (uint)buffer.size() - (pos + 2);
974  buffer[pos + 0] = GB(size, 8, 8);
975  buffer[pos + 1] = GB(size, 0, 8);
976  }
977  }
978 
979  if (!cmdp->empty()) PutCommandString(&buffer, cmdp->c_str());
980 
981  this->WriteLength((uint)buffer.size());
982  this->Write(buffer.data(), buffer.size());
983  buffer.clear();
984  }
985  }
986 }
constexpr debug_inline bool HasBit(const T x, const uint8_t y)
Checks if a bit in a value is set.
constexpr static debug_inline uint GB(const T x, const uint8_t s, const uint8_t n)
Fetch n bits from x, started at bit s.
static const uint8_t MAX_NUM_GENDERS
Maximum number of supported genders.
Definition: language.h:20
static const uint8_t CASE_GENDER_LEN
The (maximum) length of a case/gender string.
Definition: language.h:19
static const uint8_t MAX_NUM_CASES
Maximum number of supported cases.
Definition: language.h:21
constexpr bool IsInsideBS(const T x, const size_t base, const size_t size)
Checks if a value is between a window started at some base point.
Definition: math_func.hpp:252
void MemSetT(T *ptr, uint8_t value, size_t num=1)
Type-safe version of memset().
Definition: mem_func.hpp:49
#define lengthof(array)
Return the length of an fixed size array.
Definition: stdafx.h:280
Structures related to strgen.
LanguagePackHeader _lang
Header information about a language.
Definition: strgen_base.cpp:32
static bool _translated
Whether the current language is not the master language.
Definition: strgen_base.cpp:27
int _cur_line
The current line we're parsing in the input file.
Definition: strgen_base.cpp:30
static bool _translation
Is the current file actually a translation or not.
Definition: strgen_base.cpp:28
const char * _file
The filename of the input, so we can refer to it in errors/warnings.
Definition: strgen_base.cpp:29
static const int MAX_PLURALS
The maximum number of plurals.
static const PluralForm _plural_forms[]
All plural forms used.
@ C_GENDER
These commands support genders.
Definition: strgen_tables.h:16
@ C_CASE
These commands support cases.
Definition: strgen_tables.h:15
@ C_DONTCOUNT
These commands aren't counted for comparison.
Definition: strgen_tables.h:14
void strecpy(std::span< char > dst, std::string_view src)
Copies characters from one buffer to another.
Definition: string.cpp:60
size_t Utf8Decode(char32_t *c, const char *s)
Decode and consume the next UTF-8 encoded character.
Definition: string.cpp:419
static const uint TAB_SIZE
Number of strings per StringTab.
Definition: strings_type.h:46
The buffer for writing a single string.
void AppendUtf8(uint32_t value)
Add an Unicode character encoded in UTF-8 to the buffer.
void AppendByte(uint8_t value)
Convenience method for adding a byte.
Container for the different cases of a string.
Definition: strgen.h:20
Case(int caseidx, const std::string &string)
Create a new case.
Definition: strgen_base.cpp:41
int caseidx
The index of the case.
Definition: strgen.h:21
std::string string
The translation of the case.
Definition: strgen.h:22
virtual void WriteStringID(const std::string &name, int stringid)=0
Write the string ID.
void WriteHeader(const StringData &data)
Write the header information.
Information about a single string.
Definition: strgen.h:28
int line
Line of string in source-file.
Definition: strgen.h:33
LangString(const std::string &name, const std::string &english, size_t index, int line)
Create a new string.
Definition: strgen_base.cpp:53
std::string english
English text.
Definition: strgen.h:30
std::vector< Case > translated_cases
Cases of the translation.
Definition: strgen.h:34
std::string translated
Translated text.
Definition: strgen.h:31
void FreeTranslation()
Free all data related to the translation.
Definition: strgen_base.cpp:59
std::string name
Name of the string.
Definition: strgen.h:29
Header of a language file.
Definition: language.h:24
uint8_t plural_form
plural form index
Definition: language.h:41
uint32_t version
32-bits of auto generated version info which is basically a hash of strings.h
Definition: language.h:28
uint8_t GetGenderIndex(const char *gender_str) const
Get the index for the given gender.
Definition: language.h:68
uint16_t offsets[TEXT_TAB_END]
the offsets
Definition: language.h:32
uint16_t winlangid
Windows language ID: Windows cannot and will not convert isocodes to something it can use to determin...
Definition: language.h:51
uint8_t num_genders
the number of genders of this language
Definition: language.h:53
uint8_t GetCaseIndex(const char *case_str) const
Get the index for the given case.
Definition: language.h:81
uint16_t missing
number of missing strings.
Definition: language.h:40
char digit_group_separator[8]
Thousand separator used for anything not currencies.
Definition: language.h:35
uint32_t ident
32-bits identifier
Definition: language.h:27
char digit_decimal_separator[8]
Decimal separator.
Definition: language.h:39
char digit_group_separator_currency[8]
Thousand separator used for currencies.
Definition: language.h:37
static const uint32_t IDENT
Identifier for OpenTTD language files, big endian for "LANG".
Definition: language.h:25
virtual void WriteHeader(const LanguagePackHeader *header)=0
Write the header metadata.
virtual void WriteLength(uint length)
Write the length as a simple gamma.
virtual void Write(const uint8_t *buffer, size_t length)=0
Write a number of bytes.
virtual void WriteLang(const StringData &data)
Actually write the language.
int plural_count
The number of plural forms.
Information about the currently known strings.
Definition: strgen.h:41
size_t tabs
The number of 'tabs' of strings.
Definition: strgen.h:44
uint CountInUse(uint tab) const
Count the number of tab elements that are in use.
uint VersionHashStr(uint hash, const char *s) const
Create a compound hash.
std::vector< std::unique_ptr< LangString > > strings
List of all known strings.
Definition: strgen.h:42
size_t max_strings
The maximum number of strings.
Definition: strgen.h:45
void Add(std::unique_ptr< LangString > ls)
Add a newly created LangString.
Definition: strgen_base.cpp:89
size_t next_string_id
The next string ID to allocate.
Definition: strgen.h:46
uint Version() const
Make a hash of the file to get a unique "version number".
LangString * Find(const std::string_view s)
Find a LangString based on the string name.
void FreeTranslation()
Free all data related to the translation.
Definition: strgen_base.cpp:76
StringData(size_t tabs)
Create a new string data container.
Definition: strgen_base.cpp:69
std::unordered_map< std::string_view, LangString * > name_to_string
Lookup table for the strings.
Definition: strgen.h:43
const std::string file
The file we are reading.
Definition: strgen.h:60
StringReader(StringData &data, const std::string &file, bool master, bool translation)
Prepare reading.
StringData & data
The data to fill during reading.
Definition: strgen.h:59
virtual void ParseFile()
Start parsing the file.
bool translation
Are we reading a translation, implies !master. However, the base translation will have this false.
Definition: strgen.h:62
virtual std::optional< std::string > ReadLine()=0
Read a single line from the source of strings.
virtual void HandlePragma(char *str)
Handle the pragma of the file.
bool master
Are we reading the master file?
Definition: strgen.h:61