OpenTTD Source  20240917-master-g9ab0a47812
strgen_base.cpp
Go to the documentation of this file.
1 /*
2  * This file is part of OpenTTD.
3  * OpenTTD is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, version 2.
4  * OpenTTD is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
5  * See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with OpenTTD. If not, see <http://www.gnu.org/licenses/>.
6  */
7 
10 #include "../stdafx.h"
11 #include "../core/alloc_func.hpp"
12 #include "../core/endian_func.hpp"
13 #include "../core/mem_func.hpp"
14 #include "../error_func.h"
15 #include "../string_func.h"
16 #include "../table/control_codes.h"
17 
18 #include "strgen.h"
19 
20 
21 #include "../table/strgen_tables.h"
22 
23 #include "../safeguards.h"
24 
25 /* Compiles a list of strings into a compiled string list */
26 
27 static bool _translated;
28 static bool _translation;
29 const char *_file = "(unknown file)";
30 int _cur_line;
31 int _errors, _warnings, _show_todo;
33 
34 static const ptrdiff_t MAX_COMMAND_PARAM_SIZE = 100;
35 static const CmdStruct *ParseCommandString(const char **str, char *param, int *argno, int *casei);
36 
42 Case::Case(int caseidx, const std::string &string) :
43  caseidx(caseidx), string(string)
44 {
45 }
46 
54 LangString::LangString(const std::string &name, const std::string &english, size_t index, int line) :
55  name(name), english(english), index(index), line(line)
56 {
57 }
58 
61 {
62  this->translated.clear();
63  this->translated_cases.clear();
64 }
65 
70 StringData::StringData(size_t tabs) : tabs(tabs), max_strings(tabs * TAB_SIZE)
71 {
72  this->strings.resize(max_strings);
73  this->next_string_id = 0;
74 }
75 
78 {
79  for (size_t i = 0; i < this->max_strings; i++) {
80  LangString *ls = this->strings[i].get();
81  if (ls != nullptr) ls->FreeTranslation();
82  }
83 }
84 
90 void StringData::Add(std::unique_ptr<LangString> ls)
91 {
92  this->name_to_string[ls->name] = ls.get();
93  this->strings[ls->index].swap(ls);
94 }
95 
101 LangString *StringData::Find(const std::string_view s)
102 {
103  auto it = this->name_to_string.find(s);
104  if (it == this->name_to_string.end()) return nullptr;
105 
106  return it->second;
107 }
108 
115 uint StringData::VersionHashStr(uint hash, const char *s) const
116 {
117  for (; *s != '\0'; s++) {
118  hash = std::rotl(hash, 3) ^ *s;
119  hash = (hash & 1 ? hash >> 1 ^ 0xDEADBEEF : hash >> 1);
120  }
121  return hash;
122 }
123 
129 {
130  uint hash = 0;
131 
132  for (size_t i = 0; i < this->max_strings; i++) {
133  const LangString *ls = this->strings[i].get();
134 
135  if (ls != nullptr) {
136  const CmdStruct *cs;
137  const char *s;
138  char buf[MAX_COMMAND_PARAM_SIZE];
139  int argno;
140  int casei;
141 
142  s = ls->name.c_str();
143  hash ^= i * 0x717239;
144  hash = (hash & 1 ? hash >> 1 ^ 0xDEADBEEF : hash >> 1);
145  hash = this->VersionHashStr(hash, s + 1);
146 
147  s = ls->english.c_str();
148  while ((cs = ParseCommandString(&s, buf, &argno, &casei)) != nullptr) {
149  if (cs->flags & C_DONTCOUNT) continue;
150 
151  hash ^= (cs - _cmd_structs) * 0x1234567;
152  hash = (hash & 1 ? hash >> 1 ^ 0xF00BAA4 : hash >> 1);
153  }
154  }
155  }
156 
157  return hash;
158 }
159 
164 uint StringData::CountInUse(uint tab) const
165 {
166  int i;
167  for (i = TAB_SIZE; --i >= 0;) if (this->strings[(tab * TAB_SIZE) + i] != nullptr) break;
168  return i + 1;
169 }
170 
171 static const char *_cur_ident;
172 
173 /* Used when generating some advanced commands. */
174 static ParsedCommandStruct _cur_pcs;
175 static int _cur_argidx;
176 
178 struct Buffer : std::vector<uint8_t> {
183  void AppendByte(uint8_t value)
184  {
185  this->push_back(value);
186  }
187 
192  void AppendUtf8(uint32_t value)
193  {
194  if (value < 0x80) {
195  this->push_back(value);
196  } else if (value < 0x800) {
197  this->push_back(0xC0 + GB(value, 6, 5));
198  this->push_back(0x80 + GB(value, 0, 6));
199  } else if (value < 0x10000) {
200  this->push_back(0xE0 + GB(value, 12, 4));
201  this->push_back(0x80 + GB(value, 6, 6));
202  this->push_back(0x80 + GB(value, 0, 6));
203  } else if (value < 0x110000) {
204  this->push_back(0xF0 + GB(value, 18, 3));
205  this->push_back(0x80 + GB(value, 12, 6));
206  this->push_back(0x80 + GB(value, 6, 6));
207  this->push_back(0x80 + GB(value, 0, 6));
208  } else {
209  StrgenWarning("Invalid unicode value U+0x{:X}", value);
210  }
211  }
212 };
213 
214 size_t Utf8Validate(const char *s)
215 {
216  uint32_t c;
217 
218  if (!HasBit(s[0], 7)) {
219  /* 1 byte */
220  return 1;
221  } else if (GB(s[0], 5, 3) == 6 && IsUtf8Part(s[1])) {
222  /* 2 bytes */
223  c = GB(s[0], 0, 5) << 6 | GB(s[1], 0, 6);
224  if (c >= 0x80) return 2;
225  } else if (GB(s[0], 4, 4) == 14 && IsUtf8Part(s[1]) && IsUtf8Part(s[2])) {
226  /* 3 bytes */
227  c = GB(s[0], 0, 4) << 12 | GB(s[1], 0, 6) << 6 | GB(s[2], 0, 6);
228  if (c >= 0x800) return 3;
229  } else if (GB(s[0], 3, 5) == 30 && IsUtf8Part(s[1]) && IsUtf8Part(s[2]) && IsUtf8Part(s[3])) {
230  /* 4 bytes */
231  c = GB(s[0], 0, 3) << 18 | GB(s[1], 0, 6) << 12 | GB(s[2], 0, 6) << 6 | GB(s[3], 0, 6);
232  if (c >= 0x10000 && c <= 0x10FFFF) return 4;
233  }
234 
235  return 0;
236 }
237 
238 
239 void EmitSingleChar(Buffer *buffer, char *buf, int value)
240 {
241  if (*buf != '\0') StrgenWarning("Ignoring trailing letters in command");
242  buffer->AppendUtf8(value);
243 }
244 
245 
246 /* The plural specifier looks like
247  * {NUM} {PLURAL -1 passenger passengers} then it picks either passenger/passengers depending on the count in NUM */
248 
249 /* This is encoded like
250  * CommandByte <ARG#> <NUM> {Length of each string} {each string} */
251 
252 bool ParseRelNum(char **buf, int *value, int *offset)
253 {
254  const char *s = *buf;
255  char *end;
256  bool rel = false;
257 
258  while (*s == ' ' || *s == '\t') s++;
259  if (*s == '+') {
260  rel = true;
261  s++;
262  }
263  int v = std::strtol(s, &end, 0);
264  if (end == s) return false;
265  if (rel || v < 0) {
266  *value += v;
267  } else {
268  *value = v;
269  }
270  if (offset != nullptr && *end == ':') {
271  /* Take the Nth within */
272  s = end + 1;
273  *offset = std::strtol(s, &end, 0);
274  if (end == s) return false;
275  }
276  *buf = end;
277  return true;
278 }
279 
280 /* Parse out the next word, or nullptr */
281 char *ParseWord(char **buf)
282 {
283  char *s = *buf, *r;
284 
285  while (*s == ' ' || *s == '\t') s++;
286  if (*s == '\0') return nullptr;
287 
288  if (*s == '"') {
289  r = ++s;
290  /* parse until next " or NUL */
291  for (;;) {
292  if (*s == '\0') break;
293  if (*s == '"') {
294  *s++ = '\0';
295  break;
296  }
297  s++;
298  }
299  } else {
300  /* proceed until whitespace or NUL */
301  r = s;
302  for (;;) {
303  if (*s == '\0') break;
304  if (*s == ' ' || *s == '\t') {
305  *s++ = '\0';
306  break;
307  }
308  s++;
309  }
310  }
311  *buf = s;
312  return r;
313 }
314 
315 /* Forward declaration */
316 static int TranslateArgumentIdx(int arg, int offset = 0);
317 
318 static void EmitWordList(Buffer *buffer, const std::vector<const char *> &words, uint nw)
319 {
320  buffer->AppendByte(nw);
321  for (uint i = 0; i < nw; i++) buffer->AppendByte((uint8_t)strlen(words[i]) + 1);
322  for (uint i = 0; i < nw; i++) {
323  for (uint j = 0; words[i][j] != '\0'; j++) buffer->AppendByte(words[i][j]);
324  buffer->AppendByte(0);
325  }
326 }
327 
328 void EmitPlural(Buffer *buffer, char *buf, int)
329 {
330  int argidx = _cur_argidx;
331  int offset = -1;
333  std::vector<const char *> words(std::max(expected, MAX_PLURALS), nullptr);
334  int nw = 0;
335 
336  /* Parse out the number, if one exists. Otherwise default to prev arg. */
337  if (!ParseRelNum(&buf, &argidx, &offset)) argidx--;
338 
339  const CmdStruct *cmd = _cur_pcs.consuming_commands[argidx];
340  if (offset == -1) {
341  /* Use default offset */
342  if (cmd == nullptr || cmd->default_plural_offset < 0) {
343  StrgenFatal("Command '{}' has no (default) plural position", cmd == nullptr ? "<empty>" : cmd->cmd);
344  }
345  offset = cmd->default_plural_offset;
346  }
347 
348  /* Parse each string */
349  for (nw = 0; nw < MAX_PLURALS; nw++) {
350  words[nw] = ParseWord(&buf);
351  if (words[nw] == nullptr) break;
352  }
353 
354  if (nw == 0) {
355  StrgenFatal("{}: No plural words", _cur_ident);
356  }
357 
358  if (expected != nw) {
359  if (_translated) {
360  StrgenFatal("{}: Invalid number of plural forms. Expecting {}, found {}.", _cur_ident,
361  expected, nw);
362  } else {
363  if ((_show_todo & 2) != 0) StrgenWarning("'{}' is untranslated. Tweaking english string to allow compilation for plural forms", _cur_ident);
364  if (nw > expected) {
365  nw = expected;
366  } else {
367  for (; nw < expected; nw++) {
368  words[nw] = words[nw - 1];
369  }
370  }
371  }
372  }
373 
374  buffer->AppendUtf8(SCC_PLURAL_LIST);
375  buffer->AppendByte(_lang.plural_form);
376  buffer->AppendByte(TranslateArgumentIdx(argidx, offset));
377  EmitWordList(buffer, words, nw);
378 }
379 
380 void EmitGender(Buffer *buffer, char *buf, int)
381 {
382  int argidx = _cur_argidx;
383  int offset = 0;
384  uint nw;
385 
386  if (buf[0] == '=') {
387  buf++;
388 
389  /* This is a {G=DER} command */
390  nw = _lang.GetGenderIndex(buf);
391  if (nw >= MAX_NUM_GENDERS) StrgenFatal("G argument '{}' invalid", buf);
392 
393  /* now nw contains the gender index */
394  buffer->AppendUtf8(SCC_GENDER_INDEX);
395  buffer->AppendByte(nw);
396  } else {
397  std::vector<const char *> words(MAX_NUM_GENDERS, nullptr);
398 
399  /* This is a {G 0 foo bar two} command.
400  * If no relative number exists, default to +0 */
401  ParseRelNum(&buf, &argidx, &offset);
402 
403  const CmdStruct *cmd = _cur_pcs.consuming_commands[argidx];
404  if (cmd == nullptr || (cmd->flags & C_GENDER) == 0) {
405  StrgenFatal("Command '{}' can't have a gender", cmd == nullptr ? "<empty>" : cmd->cmd);
406  }
407 
408  for (nw = 0; nw < MAX_NUM_GENDERS; nw++) {
409  words[nw] = ParseWord(&buf);
410  if (words[nw] == nullptr) break;
411  }
412  if (nw != _lang.num_genders) StrgenFatal("Bad # of arguments for gender command");
413 
414  assert(IsInsideBS(cmd->value, SCC_CONTROL_START, UINT8_MAX));
415  buffer->AppendUtf8(SCC_GENDER_LIST);
416  buffer->AppendByte(TranslateArgumentIdx(argidx, offset));
417  EmitWordList(buffer, words, nw);
418  }
419 }
420 
421 static const CmdStruct *FindCmd(const char *s, int len)
422 {
423  for (const auto &cs : _cmd_structs) {
424  if (strncmp(cs.cmd, s, len) == 0 && cs.cmd[len] == '\0') return &cs;
425  }
426  return nullptr;
427 }
428 
429 static uint ResolveCaseName(const char *str, size_t len)
430 {
431  /* First get a clean copy of only the case name, then resolve it. */
432  char case_str[CASE_GENDER_LEN];
433  len = std::min(lengthof(case_str) - 1, len);
434  memcpy(case_str, str, len);
435  case_str[len] = '\0';
436 
437  uint8_t case_idx = _lang.GetCaseIndex(case_str);
438  if (case_idx >= MAX_NUM_CASES) StrgenFatal("Invalid case-name '{}'", case_str);
439  return case_idx + 1;
440 }
441 
442 
443 /* returns nullptr on eof
444  * else returns command struct */
445 static const CmdStruct *ParseCommandString(const char **str, char *param, int *argno, int *casei)
446 {
447  const char *s = *str, *start;
448  char c;
449 
450  *argno = -1;
451  *casei = -1;
452 
453  /* Scan to the next command, exit if there's no next command. */
454  for (; *s != '{'; s++) {
455  if (*s == '\0') return nullptr;
456  }
457  s++; // Skip past the {
458 
459  if (*s >= '0' && *s <= '9') {
460  char *end;
461 
462  *argno = std::strtoul(s, &end, 0);
463  if (*end != ':') StrgenFatal("missing arg #");
464  s = end + 1;
465  }
466 
467  /* parse command name */
468  start = s;
469  do {
470  c = *s++;
471  } while (c != '}' && c != ' ' && c != '=' && c != '.' && c != 0);
472 
473  const CmdStruct *cmd = FindCmd(start, s - start - 1);
474  if (cmd == nullptr) {
475  std::string command(start, s - start - 1);
476  StrgenError("Undefined command '{}'", command);
477  return nullptr;
478  }
479 
480  if (c == '.') {
481  const char *casep = s;
482 
483  if (!(cmd->flags & C_CASE)) {
484  StrgenFatal("Command '{}' can't have a case", cmd->cmd);
485  }
486 
487  do {
488  c = *s++;
489  } while (c != '}' && c != ' ' && c != '\0');
490  *casei = ResolveCaseName(casep, s - casep - 1);
491  }
492 
493  if (c == '\0') {
494  StrgenError("Missing }} from command '{}'", start);
495  return nullptr;
496  }
497 
498 
499  if (c != '}') {
500  if (c == '=') s--;
501  /* copy params */
502  start = s;
503  for (;;) {
504  c = *s++;
505  if (c == '}') break;
506  if (c == '\0') {
507  StrgenError("Missing }} from command '{}'", start);
508  return nullptr;
509  }
510  if (s - start == MAX_COMMAND_PARAM_SIZE) FatalError("param command too long");
511  *param++ = c;
512  }
513  }
514  *param = '\0';
515 
516  *str = s;
517 
518  return cmd;
519 }
520 
528 StringReader::StringReader(StringData &data, const std::string &file, bool master, bool translation) :
529  data(data), file(file), master(master), translation(translation)
530 {
531 }
532 
533 ParsedCommandStruct ExtractCommandString(const char *s, bool)
534 {
535  char param[MAX_COMMAND_PARAM_SIZE];
536  int argno;
537  int argidx = 0;
538  int casei;
539 
541 
542  for (;;) {
543  /* read until next command from a. */
544  const CmdStruct *ar = ParseCommandString(&s, param, &argno, &casei);
545 
546  if (ar == nullptr) break;
547 
548  /* Sanity checking */
549  if (argno != -1 && ar->consumes == 0) StrgenFatal("Non consumer param can't have a paramindex");
550 
551  if (ar->consumes) {
552  if (argno != -1) argidx = argno;
553  if (argidx < 0 || (uint)argidx >= p.consuming_commands.max_size()) StrgenFatal("invalid param idx {}", argidx);
554  if (p.consuming_commands[argidx] != nullptr && p.consuming_commands[argidx] != ar) StrgenFatal("duplicate param idx {}", argidx);
555 
556  p.consuming_commands[argidx++] = ar;
557  } else if (!(ar->flags & C_DONTCOUNT)) { // Ignore some of them
558  p.non_consuming_commands.emplace_back(CmdPair{ar, param});
559  }
560  }
561 
562  return p;
563 }
564 
565 
566 const CmdStruct *TranslateCmdForCompare(const CmdStruct *a)
567 {
568  if (a == nullptr) return nullptr;
569 
570  if (strcmp(a->cmd, "STRING1") == 0 ||
571  strcmp(a->cmd, "STRING2") == 0 ||
572  strcmp(a->cmd, "STRING3") == 0 ||
573  strcmp(a->cmd, "STRING4") == 0 ||
574  strcmp(a->cmd, "STRING5") == 0 ||
575  strcmp(a->cmd, "STRING6") == 0 ||
576  strcmp(a->cmd, "STRING7") == 0 ||
577  strcmp(a->cmd, "RAW_STRING") == 0) {
578  return FindCmd("STRING", 6);
579  }
580 
581  return a;
582 }
583 
584 
585 static bool CheckCommandsMatch(const char *a, const char *b, const char *name)
586 {
587  /* If we're not translating, i.e. we're compiling the base language,
588  * it is pointless to do all these checks as it'll always be correct.
589  * After all, all checks are based on the base language.
590  */
591  if (!_translation) return true;
592 
593  bool result = true;
594 
595  ParsedCommandStruct templ = ExtractCommandString(b, true);
596  ParsedCommandStruct lang = ExtractCommandString(a, true);
597 
598  /* For each string in templ, see if we find it in lang */
599  if (templ.non_consuming_commands.max_size() != lang.non_consuming_commands.max_size()) {
600  StrgenWarning("{}: template string and language string have a different # of commands", name);
601  result = false;
602  }
603 
604  for (auto &templ_nc : templ.non_consuming_commands) {
605  /* see if we find it in lang, and zero it out */
606  bool found = false;
607  for (auto &lang_nc : lang.non_consuming_commands) {
608  if (templ_nc.cmd == lang_nc.cmd && templ_nc.param == lang_nc.param) {
609  /* it was found in both. zero it out from lang so we don't find it again */
610  lang_nc.cmd = nullptr;
611  found = true;
612  break;
613  }
614  }
615 
616  if (!found) {
617  StrgenWarning("{}: command '{}' exists in template file but not in language file", name, templ_nc.cmd->cmd);
618  result = false;
619  }
620  }
621 
622  /* if we reach here, all non consumer commands match up.
623  * Check if the non consumer commands match up also. */
624  for (uint i = 0; i < templ.consuming_commands.max_size(); i++) {
625  if (TranslateCmdForCompare(templ.consuming_commands[i]) != lang.consuming_commands[i]) {
626  StrgenWarning("{}: Param idx #{} '{}' doesn't match with template command '{}'", name, i,
627  lang.consuming_commands[i] == nullptr ? "<empty>" : TranslateCmdForCompare(lang.consuming_commands[i])->cmd,
628  templ.consuming_commands[i] == nullptr ? "<empty>" : templ.consuming_commands[i]->cmd);
629  result = false;
630  }
631  }
632 
633  return result;
634 }
635 
636 void StringReader::HandleString(char *str)
637 {
638  if (*str == '#') {
639  if (str[1] == '#' && str[2] != '#') this->HandlePragma(str + 2);
640  return;
641  }
642 
643  /* Ignore comments & blank lines */
644  if (*str == ';' || *str == ' ' || *str == '\0') return;
645 
646  char *s = strchr(str, ':');
647  if (s == nullptr) {
648  StrgenError("Line has no ':' delimiter");
649  return;
650  }
651 
652  char *t;
653  /* Trim spaces.
654  * After this str points to the command name, and s points to the command contents */
655  for (t = s; t > str && (t[-1] == ' ' || t[-1] == '\t'); t--) {}
656  *t = 0;
657  s++;
658 
659  /* Check string is valid UTF-8 */
660  const char *tmp;
661  for (tmp = s; *tmp != '\0';) {
662  size_t len = Utf8Validate(tmp);
663  if (len == 0) StrgenFatal("Invalid UTF-8 sequence in '{}'", s);
664 
665  char32_t c;
666  Utf8Decode(&c, tmp);
667  if (c <= 0x001F || // ASCII control character range
668  c == 0x200B || // Zero width space
669  (c >= 0xE000 && c <= 0xF8FF) || // Private range
670  (c >= 0xFFF0 && c <= 0xFFFF)) { // Specials range
671  StrgenFatal("Unwanted UTF-8 character U+{:04X} in sequence '{}'", (int)c, s);
672  }
673 
674  tmp += len;
675  }
676 
677  /* Check if the string has a case..
678  * The syntax for cases is IDENTNAME.case */
679  char *casep = strchr(str, '.');
680  if (casep != nullptr) *casep++ = '\0';
681 
682  /* Check if this string already exists.. */
683  LangString *ent = this->data.Find(str);
684 
685  if (this->master) {
686  if (casep != nullptr) {
687  StrgenError("Cases in the base translation are not supported.");
688  return;
689  }
690 
691  if (ent != nullptr) {
692  StrgenError("String name '{}' is used multiple times", str);
693  return;
694  }
695 
696  if (this->data.strings[this->data.next_string_id] != nullptr) {
697  StrgenError("String ID 0x{:X} for '{}' already in use by '{}'", this->data.next_string_id, str, this->data.strings[this->data.next_string_id]->name);
698  return;
699  }
700 
701  /* Allocate a new LangString */
702  this->data.Add(std::make_unique<LangString>(str, s, this->data.next_string_id++, _cur_line));
703  } else {
704  if (ent == nullptr) {
705  StrgenWarning("String name '{}' does not exist in master file", str);
706  return;
707  }
708 
709  if (!ent->translated.empty() && casep == nullptr) {
710  StrgenError("String name '{}' is used multiple times", str);
711  return;
712  }
713 
714  /* make sure that the commands match */
715  if (!CheckCommandsMatch(s, ent->english.c_str(), str)) return;
716 
717  if (casep != nullptr) {
718  ent->translated_cases.emplace_back(ResolveCaseName(casep, strlen(casep)), s);
719  } else {
720  ent->translated = s;
721  /* If the string was translated, use the line from the
722  * translated language so errors in the translated file
723  * are properly referenced to. */
724  ent->line = _cur_line;
725  }
726  }
727 }
728 
730 {
731  if (!memcmp(str, "plural ", 7)) {
732  _lang.plural_form = atoi(str + 7);
734  StrgenFatal("Invalid pluralform {}", _lang.plural_form);
735  }
736  } else {
737  StrgenFatal("unknown pragma '{}'", str);
738  }
739 }
740 
741 static void StripTrailingWhitespace(std::string &str)
742 {
743  str.erase(str.find_last_not_of("\r\n ") + 1);
744 }
745 
747 {
748  _warnings = _errors = 0;
749 
750  _translation = this->translation;
751  _file = this->file.c_str();
752 
753  /* Abusing _show_todo to replace "warning" with "info" for translations. */
754  _show_todo &= 3;
755  if (!this->translation) _show_todo |= 4;
756 
757  /* For each new file we parse, reset the genders, and language codes. */
758  MemSetT(&_lang, 0);
762 
763  _cur_line = 1;
764  while (this->data.next_string_id < this->data.max_strings) {
765  std::optional<std::string> line = this->ReadLine();
766  if (!line.has_value()) return;
767 
768  StripTrailingWhitespace(line.value());
769  this->HandleString(line.value().data());
770  _cur_line++;
771  }
772 
773  if (this->data.next_string_id == this->data.max_strings) {
774  StrgenError("Too many strings, maximum allowed is {}", this->data.max_strings);
775  }
776 }
777 
783 {
784  int last = 0;
785  for (size_t i = 0; i < data.max_strings; i++) {
786  if (data.strings[i] != nullptr) {
787  this->WriteStringID(data.strings[i]->name, (int)i);
788  last = (int)i;
789  }
790  }
791 
792  this->WriteStringID("STR_LAST_STRINGID", last);
793 }
794 
795 static int TranslateArgumentIdx(int argidx, int offset)
796 {
797  int sum;
798 
799  if (argidx < 0 || (uint)argidx >= _cur_pcs.consuming_commands.max_size()) {
800  StrgenFatal("invalid argidx {}", argidx);
801  }
802  const CmdStruct *cs = _cur_pcs.consuming_commands[argidx];
803  if (cs != nullptr && cs->consumes <= offset) {
804  StrgenFatal("invalid argidx offset {}:{}", argidx, offset);
805  }
806 
807  if (_cur_pcs.consuming_commands[argidx] == nullptr) {
808  StrgenFatal("no command for this argidx {}", argidx);
809  }
810 
811  for (int i = sum = 0; i < argidx; i++) {
812  cs = _cur_pcs.consuming_commands[i];
813 
814  sum += (cs != nullptr) ? cs->consumes : 1;
815  }
816 
817  return sum + offset;
818 }
819 
820 static void PutArgidxCommand(Buffer *buffer)
821 {
822  buffer->AppendUtf8(SCC_ARG_INDEX);
823  buffer->AppendByte(TranslateArgumentIdx(_cur_argidx));
824 }
825 
826 
827 static void PutCommandString(Buffer *buffer, const char *str)
828 {
829  _cur_argidx = 0;
830 
831  while (*str != '\0') {
832  /* Process characters as they are until we encounter a { */
833  if (*str != '{') {
834  buffer->AppendByte(*str++);
835  continue;
836  }
837 
838  char param[MAX_COMMAND_PARAM_SIZE];
839  int argno;
840  int casei;
841  const CmdStruct *cs = ParseCommandString(&str, param, &argno, &casei);
842  if (cs == nullptr) break;
843 
844  if (casei != -1) {
845  buffer->AppendUtf8(SCC_SET_CASE); // {SET_CASE}
846  buffer->AppendByte(casei);
847  }
848 
849  /* For params that consume values, we need to handle the argindex properly */
850  if (cs->consumes > 0) {
851  /* Check if we need to output a move-param command */
852  if (argno != -1 && argno != _cur_argidx) {
853  _cur_argidx = argno;
854  PutArgidxCommand(buffer);
855  }
856 
857  /* Output the one from the master string... it's always accurate. */
858  cs = _cur_pcs.consuming_commands[_cur_argidx++];
859  if (cs == nullptr) {
860  StrgenFatal("{}: No argument exists at position {}", _cur_ident, _cur_argidx - 1);
861  }
862  }
863 
864  cs->proc(buffer, param, cs->value);
865  }
866 }
867 
873 {
874  char buffer[2];
875  int offs = 0;
876  if (length >= 0x4000) {
877  StrgenFatal("string too long");
878  }
879 
880  if (length >= 0xC0) {
881  buffer[offs++] = (length >> 8) | 0xC0;
882  }
883  buffer[offs++] = length & 0xFF;
884  this->Write((uint8_t*)buffer, offs);
885 }
886 
892 {
893  std::vector<uint> in_use;
894  for (size_t tab = 0; tab < data.tabs; tab++) {
895  uint n = data.CountInUse((uint)tab);
896 
897  in_use.push_back(n);
898  _lang.offsets[tab] = TO_LE16(n);
899 
900  for (uint j = 0; j != in_use[tab]; j++) {
901  const LangString *ls = data.strings[(tab * TAB_SIZE) + j].get();
902  if (ls != nullptr && ls->translated.empty()) _lang.missing++;
903  }
904  }
905 
907  _lang.version = TO_LE32(data.Version());
908  _lang.missing = TO_LE16(_lang.missing);
909  _lang.winlangid = TO_LE16(_lang.winlangid);
910 
911  this->WriteHeader(&_lang);
912  Buffer buffer;
913 
914  for (size_t tab = 0; tab < data.tabs; tab++) {
915  for (uint j = 0; j != in_use[tab]; j++) {
916  const LangString *ls = data.strings[(tab * TAB_SIZE) + j].get();
917  const std::string *cmdp;
918 
919  /* For undefined strings, just set that it's an empty string */
920  if (ls == nullptr) {
921  this->WriteLength(0);
922  continue;
923  }
924 
925  _cur_ident = ls->name.c_str();
926  _cur_line = ls->line;
927 
928  /* Produce a message if a string doesn't have a translation. */
929  if (_show_todo > 0 && ls->translated.empty()) {
930  if ((_show_todo & 2) != 0) {
931  StrgenWarning("'{}' is untranslated", ls->name);
932  }
933  if ((_show_todo & 1) != 0) {
934  const char *s = "<TODO> ";
935  while (*s != '\0') buffer.AppendByte(*s++);
936  }
937  }
938 
939  /* Extract the strings and stuff from the english command string */
940  _cur_pcs = ExtractCommandString(ls->english.c_str(), false);
941 
942  if (!ls->translated_cases.empty() || !ls->translated.empty()) {
943  cmdp = &ls->translated;
944  } else {
945  cmdp = &ls->english;
946  }
947 
948  _translated = cmdp != &ls->english;
949 
950  if (!ls->translated_cases.empty()) {
951  /* Need to output a case-switch.
952  * It has this format
953  * <0x9E> <NUM CASES> <CASE1> <LEN1> <STRING1> <CASE2> <LEN2> <STRING2> <CASE3> <LEN3> <STRING3> <STRINGDEFAULT>
954  * Each LEN is printed using 2 bytes in big endian order. */
955  buffer.AppendUtf8(SCC_SWITCH_CASE);
956  buffer.AppendByte((uint8_t)ls->translated_cases.size());
957 
958  /* Write each case */
959  for (const Case &c : ls->translated_cases) {
960  buffer.AppendByte(c.caseidx);
961  /* Make some space for the 16-bit length */
962  uint pos = (uint)buffer.size();
963  buffer.AppendByte(0);
964  buffer.AppendByte(0);
965  /* Write string */
966  PutCommandString(&buffer, c.string.c_str());
967  buffer.AppendByte(0); // terminate with a zero
968  /* Fill in the length */
969  uint size = (uint)buffer.size() - (pos + 2);
970  buffer[pos + 0] = GB(size, 8, 8);
971  buffer[pos + 1] = GB(size, 0, 8);
972  }
973  }
974 
975  if (!cmdp->empty()) PutCommandString(&buffer, cmdp->c_str());
976 
977  this->WriteLength((uint)buffer.size());
978  this->Write(buffer.data(), buffer.size());
979  buffer.clear();
980  }
981  }
982 }
strecpy
void strecpy(std::span< char > dst, std::string_view src)
Copies characters from one buffer to another.
Definition: string.cpp:60
StringReader::HandlePragma
virtual void HandlePragma(char *str)
Handle the pragma of the file.
Definition: strgen_base.cpp:729
LangString::name
std::string name
Name of the string.
Definition: strgen.h:29
StringData::Add
void Add(std::unique_ptr< LangString > ls)
Add a newly created LangString.
Definition: strgen_base.cpp:90
StringData::name_to_string
std::unordered_map< std::string_view, LangString * > name_to_string
Lookup table for the strings.
Definition: strgen.h:43
StringData::max_strings
size_t max_strings
The maximum number of strings.
Definition: strgen.h:45
LangString::translated_cases
std::vector< Case > translated_cases
Cases of the translation.
Definition: strgen.h:34
LanguagePackHeader::IDENT
static const uint32_t IDENT
Identifier for OpenTTD language files, big endian for "LANG".
Definition: language.h:25
StringData::VersionHashStr
uint VersionHashStr(uint hash, const char *s) const
Create a compound hash.
Definition: strgen_base.cpp:115
MAX_PLURALS
static const int MAX_PLURALS
The maximum number of plurals.
Definition: strgen_tables.h:168
Buffer
The buffer for writing a single string.
Definition: strgen_base.cpp:178
Case::Case
Case(int caseidx, const std::string &string)
Create a new case.
Definition: strgen_base.cpp:42
_lang
LanguagePackHeader _lang
Header information about a language.
Definition: strgen_base.cpp:32
GB
constexpr static debug_inline uint GB(const T x, const uint8_t s, const uint8_t n)
Fetch n bits from x, started at bit s.
Definition: bitmath_func.hpp:32
_cur_line
int _cur_line
The current line we're parsing in the input file.
Definition: strgen_base.cpp:30
Buffer::AppendByte
void AppendByte(uint8_t value)
Convenience method for adding a byte.
Definition: strgen_base.cpp:183
StringData::StringData
StringData(size_t tabs)
Create a new string data container.
Definition: strgen_base.cpp:70
LanguagePackHeader::num_genders
uint8_t num_genders
the number of genders of this language
Definition: language.h:53
C_DONTCOUNT
@ C_DONTCOUNT
These commands aren't counted for comparison.
Definition: strgen_tables.h:14
Case::caseidx
int caseidx
The index of the case.
Definition: strgen.h:21
LangString::LangString
LangString(const std::string &name, const std::string &english, size_t index, int line)
Create a new string.
Definition: strgen_base.cpp:54
StringData::strings
std::vector< std::unique_ptr< LangString > > strings
List of all known strings.
Definition: strgen.h:42
C_GENDER
@ C_GENDER
These commands support genders.
Definition: strgen_tables.h:16
LanguageWriter::WriteLang
virtual void WriteLang(const StringData &data)
Actually write the language.
Definition: strgen_base.cpp:891
StringReader::master
bool master
Are we reading the master file?
Definition: strgen.h:61
StringData::next_string_id
size_t next_string_id
The next string ID to allocate.
Definition: strgen.h:46
Case::string
std::string string
The translation of the case.
Definition: strgen.h:22
TAB_SIZE
static const uint TAB_SIZE
Number of strings per StringTab.
Definition: strings_type.h:46
LangString
Information about a single string.
Definition: strgen.h:28
StringData::Version
uint Version() const
Make a hash of the file to get a unique "version number".
Definition: strgen_base.cpp:128
MAX_NUM_CASES
static const uint8_t MAX_NUM_CASES
Maximum number of supported cases.
Definition: language.h:21
IsInsideBS
constexpr bool IsInsideBS(const T x, const size_t base, const size_t size)
Checks if a value is between a window started at some base point.
Definition: math_func.hpp:252
StringReader::StringReader
StringReader(StringData &data, const std::string &file, bool master, bool translation)
Prepare reading.
Definition: strgen_base.cpp:528
StringData
Information about the currently known strings.
Definition: strgen.h:41
C_CASE
@ C_CASE
These commands support cases.
Definition: strgen_tables.h:15
PluralForm::plural_count
int plural_count
The number of plural forms.
Definition: strgen_tables.h:162
StringData::CountInUse
uint CountInUse(uint tab) const
Count the number of tab elements that are in use.
Definition: strgen_base.cpp:164
strgen.h
MAX_COMMAND_PARAM_SIZE
static const ptrdiff_t MAX_COMMAND_PARAM_SIZE
Maximum size of every command block, not counting the name of the command itself.
Definition: strgen_base.cpp:34
MAX_NUM_GENDERS
static const uint8_t MAX_NUM_GENDERS
Maximum number of supported genders.
Definition: language.h:20
lengthof
#define lengthof(array)
Return the length of an fixed size array.
Definition: stdafx.h:280
StringData::FreeTranslation
void FreeTranslation()
Free all data related to the translation.
Definition: strgen_base.cpp:77
LanguagePackHeader::offsets
uint16_t offsets[TEXT_TAB_END]
the offsets
Definition: language.h:32
CmdStruct
Definition: strgen_tables.h:23
LanguagePackHeader::digit_decimal_separator
char digit_decimal_separator[8]
Decimal separator.
Definition: language.h:39
LanguagePackHeader::plural_form
uint8_t plural_form
plural form index
Definition: language.h:41
LanguageWriter::WriteHeader
virtual void WriteHeader(const LanguagePackHeader *header)=0
Write the header metadata.
LangString::english
std::string english
English text.
Definition: strgen.h:30
LangString::translated
std::string translated
Translated text.
Definition: strgen.h:31
StringReader::ReadLine
virtual std::optional< std::string > ReadLine()=0
Read a single line from the source of strings.
CmdPair
Definition: strgen.h:137
LanguageWriter::Write
virtual void Write(const uint8_t *buffer, size_t length)=0
Write a number of bytes.
LanguagePackHeader::version
uint32_t version
32-bits of auto generated version info which is basically a hash of strings.h
Definition: language.h:28
CASE_GENDER_LEN
static const uint8_t CASE_GENDER_LEN
The (maximum) length of a case/gender string.
Definition: language.h:19
StringData::tabs
size_t tabs
The number of 'tabs' of strings.
Definition: strgen.h:44
LanguagePackHeader::missing
uint16_t missing
number of missing strings.
Definition: language.h:40
Buffer::AppendUtf8
void AppendUtf8(uint32_t value)
Add an Unicode character encoded in UTF-8 to the buffer.
Definition: strgen_base.cpp:192
StringReader::data
StringData & data
The data to fill during reading.
Definition: strgen.h:59
LanguagePackHeader::GetGenderIndex
uint8_t GetGenderIndex(const char *gender_str) const
Get the index for the given gender.
Definition: language.h:68
LanguagePackHeader::digit_group_separator_currency
char digit_group_separator_currency[8]
Thousand separator used for currencies.
Definition: language.h:37
HeaderWriter::WriteStringID
virtual void WriteStringID(const std::string &name, int stringid)=0
Write the string ID.
LanguagePackHeader::ident
uint32_t ident
32-bits identifier
Definition: language.h:27
LangString::line
int line
Line of string in source-file.
Definition: strgen.h:33
StringReader::translation
bool translation
Are we reading a translation, implies !master. However, the base translation will have this false.
Definition: strgen.h:62
StringReader::ParseFile
virtual void ParseFile()
Start parsing the file.
Definition: strgen_base.cpp:746
StringData::Find
LangString * Find(const std::string_view s)
Find a LangString based on the string name.
Definition: strgen_base.cpp:101
_translation
static bool _translation
Is the current file actually a translation or not.
Definition: strgen_base.cpp:28
ParsedCommandStruct
Definition: strgen.h:142
LanguagePackHeader::GetCaseIndex
uint8_t GetCaseIndex(const char *case_str) const
Get the index for the given case.
Definition: language.h:81
HeaderWriter::WriteHeader
void WriteHeader(const StringData &data)
Write the header information.
Definition: strgen_base.cpp:782
LanguagePackHeader::digit_group_separator
char digit_group_separator[8]
Thousand separator used for anything not currencies.
Definition: language.h:35
LanguagePackHeader::winlangid
uint16_t winlangid
Windows language ID: Windows cannot and will not convert isocodes to something it can use to determin...
Definition: language.h:51
Utf8Decode
size_t Utf8Decode(char32_t *c, const char *s)
Decode and consume the next UTF-8 encoded character.
Definition: string.cpp:419
MemSetT
void MemSetT(T *ptr, uint8_t value, size_t num=1)
Type-safe version of memset().
Definition: mem_func.hpp:49
Case
Container for the different cases of a string.
Definition: strgen.h:20
_plural_forms
static const PluralForm _plural_forms[]
All plural forms used.
Definition: strgen_tables.h:171
LangString::FreeTranslation
void FreeTranslation()
Free all data related to the translation.
Definition: strgen_base.cpp:60
LanguageWriter::WriteLength
virtual void WriteLength(uint length)
Write the length as a simple gamma.
Definition: strgen_base.cpp:872
_file
const char * _file
The filename of the input, so we can refer to it in errors/warnings.
Definition: strgen_base.cpp:29
_translated
static bool _translated
Whether the current language is not the master language.
Definition: strgen_base.cpp:27
StringReader::file
const std::string file
The file we are reading.
Definition: strgen.h:60
LanguagePackHeader
Header of a language file.
Definition: language.h:24
HasBit
constexpr debug_inline bool HasBit(const T x, const uint8_t y)
Checks if a bit in a value is set.
Definition: bitmath_func.hpp:103