OpenTTD
strgen_base.cpp
Go to the documentation of this file.
1 /* $Id: strgen_base.cpp 27380 2015-08-10 20:21:29Z michi_cc $ */
2 
3 /*
4  * This file is part of OpenTTD.
5  * OpenTTD is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, version 2.
6  * OpenTTD is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
7  * See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with OpenTTD. If not, see <http://www.gnu.org/licenses/>.
8  */
9 
12 #include "../stdafx.h"
13 #include "../core/endian_func.hpp"
14 #include "../string_func.h"
15 #include "../table/control_codes.h"
16 
17 #include "strgen.h"
18 
19 
20 #include "../table/strgen_tables.h"
21 
22 #include "../safeguards.h"
23 
24 /* Compiles a list of strings into a compiled string list */
25 
26 static bool _translated;
27 static bool _translation;
28 const char *_file = "(unknown file)";
29 int _cur_line;
30 int _errors, _warnings, _show_todo;
32 
33 static const ptrdiff_t MAX_COMMAND_PARAM_SIZE = 100;
34 static const CmdStruct *ParseCommandString(const char **str, char *param, int *argno, int *casei);
35 
42 Case::Case(int caseidx, const char *string, Case *next) :
43  caseidx(caseidx), string(stredup(string)), next(next)
44 {
45 }
46 
49 {
50  free(this->string);
51  delete this->next;
52 }
53 
61 LangString::LangString(const char *name, const char *english, int index, int line) :
62  name(stredup(name)), english(stredup(english)), translated(NULL),
63  hash_next(0), index(index), line(line), translated_case(NULL)
64 {
65 }
66 
69 {
70  free(this->name);
71  free(this->english);
72  free(this->translated);
73  delete this->translated_case;
74 }
75 
78 {
79  free(this->translated);
80  this->translated = NULL;
81 
82  delete this->translated_case;
83  this->translated_case = NULL;
84 }
85 
90 StringData::StringData(size_t tabs) : tabs(tabs), max_strings(tabs * TAB_SIZE)
91 {
92  this->strings = CallocT<LangString *>(max_strings);
93  this->hash_heads = CallocT<uint16>(max_strings);
94  this->next_string_id = 0;
95 }
96 
99 {
100  for (size_t i = 0; i < this->max_strings; i++) delete this->strings[i];
101  free(this->strings);
102  free(this->hash_heads);
103 }
104 
107 {
108  for (size_t i = 0; i < this->max_strings; i++) {
109  LangString *ls = this->strings[i];
110  if (ls != NULL) ls->FreeTranslation();
111  }
112 }
113 
119 uint StringData::HashStr(const char *s) const
120 {
121  uint hash = 0;
122  for (; *s != '\0'; s++) hash = ROL(hash, 3) ^ *s;
123  return hash % this->max_strings;
124 }
125 
131 void StringData::Add(const char *s, LangString *ls)
132 {
133  uint hash = this->HashStr(s);
134  ls->hash_next = this->hash_heads[hash];
135  /* Off-by-one for hash find. */
136  this->hash_heads[hash] = ls->index + 1;
137  this->strings[ls->index] = ls;
138 }
139 
146 {
147  int idx = this->hash_heads[this->HashStr(s)];
148 
149  while (--idx >= 0) {
150  LangString *ls = this->strings[idx];
151 
152  if (strcmp(ls->name, s) == 0) return ls;
153  idx = ls->hash_next;
154  }
155  return NULL;
156 }
157 
164 uint StringData::VersionHashStr(uint hash, const char *s) const
165 {
166  for (; *s != '\0'; s++) {
167  hash = ROL(hash, 3) ^ *s;
168  hash = (hash & 1 ? hash >> 1 ^ 0xDEADBEEF : hash >> 1);
169  }
170  return hash;
171 }
172 
178 {
179  uint hash = 0;
180 
181  for (size_t i = 0; i < this->max_strings; i++) {
182  const LangString *ls = this->strings[i];
183 
184  if (ls != NULL) {
185  const CmdStruct *cs;
186  const char *s;
187  char buf[MAX_COMMAND_PARAM_SIZE];
188  int argno;
189  int casei;
190 
191  s = ls->name;
192  hash ^= i * 0x717239;
193  hash = (hash & 1 ? hash >> 1 ^ 0xDEADBEEF : hash >> 1);
194  hash = this->VersionHashStr(hash, s + 1);
195 
196  s = ls->english;
197  while ((cs = ParseCommandString(&s, buf, &argno, &casei)) != NULL) {
198  if (cs->flags & C_DONTCOUNT) continue;
199 
200  hash ^= (cs - _cmd_structs) * 0x1234567;
201  hash = (hash & 1 ? hash >> 1 ^ 0xF00BAA4 : hash >> 1);
202  }
203  }
204  }
205 
206  return hash;
207 }
208 
213 uint StringData::CountInUse(uint tab) const
214 {
215  int i;
216  for (i = TAB_SIZE; --i >= 0;) if (this->strings[(tab * TAB_SIZE) + i] != NULL) break;
217  return i + 1;
218 }
219 
220 static const char *_cur_ident;
221 
222 struct CmdPair {
223  const CmdStruct *a;
224  const char *v;
225 };
226 
228  uint np;
229  CmdPair pairs[32];
230  const CmdStruct *cmd[32]; // ordered by param #
231 };
232 
233 /* Used when generating some advanced commands. */
234 static ParsedCommandStruct _cur_pcs;
235 static int _cur_argidx;
236 
238 struct Buffer : SmallVector<byte, 256> {
243  void AppendByte(byte value)
244  {
245  *this->Append() = value;
246  }
247 
252  void AppendUtf8(uint32 value)
253  {
254  if (value < 0x80) {
255  *this->Append() = value;
256  } else if (value < 0x800) {
257  *this->Append() = 0xC0 + GB(value, 6, 5);
258  *this->Append() = 0x80 + GB(value, 0, 6);
259  } else if (value < 0x10000) {
260  *this->Append() = 0xE0 + GB(value, 12, 4);
261  *this->Append() = 0x80 + GB(value, 6, 6);
262  *this->Append() = 0x80 + GB(value, 0, 6);
263  } else if (value < 0x110000) {
264  *this->Append() = 0xF0 + GB(value, 18, 3);
265  *this->Append() = 0x80 + GB(value, 12, 6);
266  *this->Append() = 0x80 + GB(value, 6, 6);
267  *this->Append() = 0x80 + GB(value, 0, 6);
268  } else {
269  strgen_warning("Invalid unicode value U+0x%X", value);
270  }
271  }
272 };
273 
274 size_t Utf8Validate(const char *s)
275 {
276  uint32 c;
277 
278  if (!HasBit(s[0], 7)) {
279  /* 1 byte */
280  return 1;
281  } else if (GB(s[0], 5, 3) == 6 && IsUtf8Part(s[1])) {
282  /* 2 bytes */
283  c = GB(s[0], 0, 5) << 6 | GB(s[1], 0, 6);
284  if (c >= 0x80) return 2;
285  } else if (GB(s[0], 4, 4) == 14 && IsUtf8Part(s[1]) && IsUtf8Part(s[2])) {
286  /* 3 bytes */
287  c = GB(s[0], 0, 4) << 12 | GB(s[1], 0, 6) << 6 | GB(s[2], 0, 6);
288  if (c >= 0x800) return 3;
289  } else if (GB(s[0], 3, 5) == 30 && IsUtf8Part(s[1]) && IsUtf8Part(s[2]) && IsUtf8Part(s[3])) {
290  /* 4 bytes */
291  c = GB(s[0], 0, 3) << 18 | GB(s[1], 0, 6) << 12 | GB(s[2], 0, 6) << 6 | GB(s[3], 0, 6);
292  if (c >= 0x10000 && c <= 0x10FFFF) return 4;
293  }
294 
295  return 0;
296 }
297 
298 
299 void EmitSingleChar(Buffer *buffer, char *buf, int value)
300 {
301  if (*buf != '\0') strgen_warning("Ignoring trailing letters in command");
302  buffer->AppendUtf8(value);
303 }
304 
305 
306 /* The plural specifier looks like
307  * {NUM} {PLURAL -1 passenger passengers} then it picks either passenger/passengers depending on the count in NUM */
308 
309 /* This is encoded like
310  * CommandByte <ARG#> <NUM> {Length of each string} {each string} */
311 
312 bool ParseRelNum(char **buf, int *value, int *offset)
313 {
314  const char *s = *buf;
315  char *end;
316  bool rel = false;
317 
318  while (*s == ' ' || *s == '\t') s++;
319  if (*s == '+') {
320  rel = true;
321  s++;
322  }
323  int v = strtol(s, &end, 0);
324  if (end == s) return false;
325  if (rel || v < 0) {
326  *value += v;
327  } else {
328  *value = v;
329  }
330  if (offset != NULL && *end == ':') {
331  /* Take the Nth within */
332  s = end + 1;
333  *offset = strtol(s, &end, 0);
334  if (end == s) return false;
335  }
336  *buf = end;
337  return true;
338 }
339 
340 /* Parse out the next word, or NULL */
341 char *ParseWord(char **buf)
342 {
343  char *s = *buf, *r;
344 
345  while (*s == ' ' || *s == '\t') s++;
346  if (*s == '\0') return NULL;
347 
348  if (*s == '"') {
349  r = ++s;
350  /* parse until next " or NUL */
351  for (;;) {
352  if (*s == '\0') break;
353  if (*s == '"') {
354  *s++ = '\0';
355  break;
356  }
357  s++;
358  }
359  } else {
360  /* proceed until whitespace or NUL */
361  r = s;
362  for (;;) {
363  if (*s == '\0') break;
364  if (*s == ' ' || *s == '\t') {
365  *s++ = '\0';
366  break;
367  }
368  s++;
369  }
370  }
371  *buf = s;
372  return r;
373 }
374 
375 /* Forward declaration */
376 static int TranslateArgumentIdx(int arg, int offset = 0);
377 
378 static void EmitWordList(Buffer *buffer, const char * const *words, uint nw)
379 {
380  buffer->AppendByte(nw);
381  for (uint i = 0; i < nw; i++) buffer->AppendByte((byte)strlen(words[i]) + 1);
382  for (uint i = 0; i < nw; i++) {
383  for (uint j = 0; words[i][j] != '\0'; j++) buffer->AppendByte(words[i][j]);
384  buffer->AppendByte(0);
385  }
386 }
387 
388 void EmitPlural(Buffer *buffer, char *buf, int value)
389 {
390  int argidx = _cur_argidx;
391  int offset = -1;
392  int expected = _plural_forms[_lang.plural_form].plural_count;
393  const char **words = AllocaM(const char *, max(expected, MAX_PLURALS));
394  int nw = 0;
395 
396  /* Parse out the number, if one exists. Otherwise default to prev arg. */
397  if (!ParseRelNum(&buf, &argidx, &offset)) argidx--;
398 
399  const CmdStruct *cmd = _cur_pcs.cmd[argidx];
400  if (offset == -1) {
401  /* Use default offset */
402  if (cmd == NULL || cmd->default_plural_offset < 0) {
403  strgen_fatal("Command '%s' has no (default) plural position", cmd == NULL ? "<empty>" : cmd->cmd);
404  }
405  offset = cmd->default_plural_offset;
406  }
407 
408  /* Parse each string */
409  for (nw = 0; nw < MAX_PLURALS; nw++) {
410  words[nw] = ParseWord(&buf);
411  if (words[nw] == NULL) break;
412  }
413 
414  if (nw == 0) {
415  strgen_fatal("%s: No plural words", _cur_ident);
416  }
417 
418  if (expected != nw) {
419  if (_translated) {
420  strgen_fatal("%s: Invalid number of plural forms. Expecting %d, found %d.", _cur_ident,
421  expected, nw);
422  } else {
423  if ((_show_todo & 2) != 0) strgen_warning("'%s' is untranslated. Tweaking english string to allow compilation for plural forms", _cur_ident);
424  if (nw > expected) {
425  nw = expected;
426  } else {
427  for (; nw < expected; nw++) {
428  words[nw] = words[nw - 1];
429  }
430  }
431  }
432  }
433 
434  buffer->AppendUtf8(SCC_PLURAL_LIST);
435  buffer->AppendByte(_lang.plural_form);
436  buffer->AppendByte(TranslateArgumentIdx(argidx, offset));
437  EmitWordList(buffer, words, nw);
438 }
439 
440 
441 void EmitGender(Buffer *buffer, char *buf, int value)
442 {
443  int argidx = _cur_argidx;
444  int offset = 0;
445  uint nw;
446 
447  if (buf[0] == '=') {
448  buf++;
449 
450  /* This is a {G=DER} command */
451  nw = _lang.GetGenderIndex(buf);
452  if (nw >= MAX_NUM_GENDERS) strgen_fatal("G argument '%s' invalid", buf);
453 
454  /* now nw contains the gender index */
455  buffer->AppendUtf8(SCC_GENDER_INDEX);
456  buffer->AppendByte(nw);
457  } else {
458  const char *words[MAX_NUM_GENDERS];
459 
460  /* This is a {G 0 foo bar two} command.
461  * If no relative number exists, default to +0 */
462  if (!ParseRelNum(&buf, &argidx, &offset)) {}
463 
464  const CmdStruct *cmd = _cur_pcs.cmd[argidx];
465  if (cmd == NULL || (cmd->flags & C_GENDER) == 0) {
466  strgen_fatal("Command '%s' can't have a gender", cmd == NULL ? "<empty>" : cmd->cmd);
467  }
468 
469  for (nw = 0; nw < MAX_NUM_GENDERS; nw++) {
470  words[nw] = ParseWord(&buf);
471  if (words[nw] == NULL) break;
472  }
473  if (nw != _lang.num_genders) strgen_fatal("Bad # of arguments for gender command");
474 
475  assert(IsInsideBS(cmd->value, SCC_CONTROL_START, UINT8_MAX));
476  buffer->AppendUtf8(SCC_GENDER_LIST);
477  buffer->AppendByte(TranslateArgumentIdx(argidx, offset));
478  EmitWordList(buffer, words, nw);
479  }
480 }
481 
482 static const CmdStruct *FindCmd(const char *s, int len)
483 {
484  for (const CmdStruct *cs = _cmd_structs; cs != endof(_cmd_structs); cs++) {
485  if (strncmp(cs->cmd, s, len) == 0 && cs->cmd[len] == '\0') return cs;
486  }
487  return NULL;
488 }
489 
490 static uint ResolveCaseName(const char *str, size_t len)
491 {
492  /* First get a clean copy of only the case name, then resolve it. */
493  char case_str[CASE_GENDER_LEN];
494  len = min(lengthof(case_str) - 1, len);
495  memcpy(case_str, str, len);
496  case_str[len] = '\0';
497 
498  uint8 case_idx = _lang.GetCaseIndex(case_str);
499  if (case_idx >= MAX_NUM_CASES) strgen_fatal("Invalid case-name '%s'", case_str);
500  return case_idx + 1;
501 }
502 
503 
504 /* returns NULL on eof
505  * else returns command struct */
506 static const CmdStruct *ParseCommandString(const char **str, char *param, int *argno, int *casei)
507 {
508  const char *s = *str, *start;
509  char c;
510 
511  *argno = -1;
512  *casei = -1;
513 
514  /* Scan to the next command, exit if there's no next command. */
515  for (; *s != '{'; s++) {
516  if (*s == '\0') return NULL;
517  }
518  s++; // Skip past the {
519 
520  if (*s >= '0' && *s <= '9') {
521  char *end;
522 
523  *argno = strtoul(s, &end, 0);
524  if (*end != ':') strgen_fatal("missing arg #");
525  s = end + 1;
526  }
527 
528  /* parse command name */
529  start = s;
530  do {
531  c = *s++;
532  } while (c != '}' && c != ' ' && c != '=' && c != '.' && c != 0);
533 
534  const CmdStruct *cmd = FindCmd(start, s - start - 1);
535  if (cmd == NULL) {
536  strgen_error("Undefined command '%.*s'", (int)(s - start - 1), start);
537  return NULL;
538  }
539 
540  if (c == '.') {
541  const char *casep = s;
542 
543  if (!(cmd->flags & C_CASE)) {
544  strgen_fatal("Command '%s' can't have a case", cmd->cmd);
545  }
546 
547  do {
548  c = *s++;
549  } while (c != '}' && c != ' ' && c != '\0');
550  *casei = ResolveCaseName(casep, s - casep - 1);
551  }
552 
553  if (c == '\0') {
554  strgen_error("Missing } from command '%s'", start);
555  return NULL;
556  }
557 
558 
559  if (c != '}') {
560  if (c == '=') s--;
561  /* copy params */
562  start = s;
563  for (;;) {
564  c = *s++;
565  if (c == '}') break;
566  if (c == '\0') {
567  strgen_error("Missing } from command '%s'", start);
568  return NULL;
569  }
570  if (s - start == MAX_COMMAND_PARAM_SIZE) error("param command too long");
571  *param++ = c;
572  }
573  }
574  *param = '\0';
575 
576  *str = s;
577 
578  return cmd;
579 }
580 
588 StringReader::StringReader(StringData &data, const char *file, bool master, bool translation) :
589  data(data), file(stredup(file)), master(master), translation(translation)
590 {
591 }
592 
595 {
596  free(file);
597 }
598 
599 static void ExtractCommandString(ParsedCommandStruct *p, const char *s, bool warnings)
600 {
601  char param[MAX_COMMAND_PARAM_SIZE];
602  int argno;
603  int argidx = 0;
604  int casei;
605 
606  memset(p, 0, sizeof(*p));
607 
608  for (;;) {
609  /* read until next command from a. */
610  const CmdStruct *ar = ParseCommandString(&s, param, &argno, &casei);
611 
612  if (ar == NULL) break;
613 
614  /* Sanity checking */
615  if (argno != -1 && ar->consumes == 0) strgen_fatal("Non consumer param can't have a paramindex");
616 
617  if (ar->consumes) {
618  if (argno != -1) argidx = argno;
619  if (argidx < 0 || (uint)argidx >= lengthof(p->cmd)) strgen_fatal("invalid param idx %d", argidx);
620  if (p->cmd[argidx] != NULL && p->cmd[argidx] != ar) strgen_fatal("duplicate param idx %d", argidx);
621 
622  p->cmd[argidx++] = ar;
623  } else if (!(ar->flags & C_DONTCOUNT)) { // Ignore some of them
624  if (p->np >= lengthof(p->pairs)) strgen_fatal("too many commands in string, max " PRINTF_SIZE, lengthof(p->pairs));
625  p->pairs[p->np].a = ar;
626  p->pairs[p->np].v = param[0] != '\0' ? stredup(param) : "";
627  p->np++;
628  }
629  }
630 }
631 
632 
633 static const CmdStruct *TranslateCmdForCompare(const CmdStruct *a)
634 {
635  if (a == NULL) return NULL;
636 
637  if (strcmp(a->cmd, "STRING1") == 0 ||
638  strcmp(a->cmd, "STRING2") == 0 ||
639  strcmp(a->cmd, "STRING3") == 0 ||
640  strcmp(a->cmd, "STRING4") == 0 ||
641  strcmp(a->cmd, "STRING5") == 0 ||
642  strcmp(a->cmd, "STRING6") == 0 ||
643  strcmp(a->cmd, "STRING7") == 0 ||
644  strcmp(a->cmd, "RAW_STRING") == 0) {
645  return FindCmd("STRING", 6);
646  }
647 
648  return a;
649 }
650 
651 
652 static bool CheckCommandsMatch(char *a, char *b, const char *name)
653 {
654  /* If we're not translating, i.e. we're compiling the base language,
655  * it is pointless to do all these checks as it'll always be correct.
656  * After all, all checks are based on the base language.
657  */
658  if (!_translation) return true;
659 
660  ParsedCommandStruct templ;
661  ParsedCommandStruct lang;
662  bool result = true;
663 
664  ExtractCommandString(&templ, b, true);
665  ExtractCommandString(&lang, a, true);
666 
667  /* For each string in templ, see if we find it in lang */
668  if (templ.np != lang.np) {
669  strgen_warning("%s: template string and language string have a different # of commands", name);
670  result = false;
671  }
672 
673  for (uint i = 0; i < templ.np; i++) {
674  /* see if we find it in lang, and zero it out */
675  bool found = false;
676  for (uint j = 0; j < lang.np; j++) {
677  if (templ.pairs[i].a == lang.pairs[j].a &&
678  strcmp(templ.pairs[i].v, lang.pairs[j].v) == 0) {
679  /* it was found in both. zero it out from lang so we don't find it again */
680  lang.pairs[j].a = NULL;
681  found = true;
682  break;
683  }
684  }
685 
686  if (!found) {
687  strgen_warning("%s: command '%s' exists in template file but not in language file", name, templ.pairs[i].a->cmd);
688  result = false;
689  }
690  }
691 
692  /* if we reach here, all non consumer commands match up.
693  * Check if the non consumer commands match up also. */
694  for (uint i = 0; i < lengthof(templ.cmd); i++) {
695  if (TranslateCmdForCompare(templ.cmd[i]) != lang.cmd[i]) {
696  strgen_warning("%s: Param idx #%d '%s' doesn't match with template command '%s'", name, i,
697  lang.cmd[i] == NULL ? "<empty>" : TranslateCmdForCompare(lang.cmd[i])->cmd,
698  templ.cmd[i] == NULL ? "<empty>" : templ.cmd[i]->cmd);
699  result = false;
700  }
701  }
702 
703  return result;
704 }
705 
706 void StringReader::HandleString(char *str)
707 {
708  if (*str == '#') {
709  if (str[1] == '#' && str[2] != '#') this->HandlePragma(str + 2);
710  return;
711  }
712 
713  /* Ignore comments & blank lines */
714  if (*str == ';' || *str == ' ' || *str == '\0') return;
715 
716  char *s = strchr(str, ':');
717  if (s == NULL) {
718  strgen_error("Line has no ':' delimiter");
719  return;
720  }
721 
722  char *t;
723  /* Trim spaces.
724  * After this str points to the command name, and s points to the command contents */
725  for (t = s; t > str && (t[-1] == ' ' || t[-1] == '\t'); t--) {}
726  *t = 0;
727  s++;
728 
729  /* Check string is valid UTF-8 */
730  const char *tmp;
731  for (tmp = s; *tmp != '\0';) {
732  size_t len = Utf8Validate(tmp);
733  if (len == 0) strgen_fatal("Invalid UTF-8 sequence in '%s'", s);
734 
735  WChar c;
736  Utf8Decode(&c, tmp);
737  if (c <= 0x001F || // ASCII control character range
738  c == 0x200B || // Zero width space
739  (c >= 0xE000 && c <= 0xF8FF) || // Private range
740  (c >= 0xFFF0 && c <= 0xFFFF)) { // Specials range
741  strgen_fatal("Unwanted UTF-8 character U+%04X in sequence '%s'", c, s);
742  }
743 
744  tmp += len;
745  }
746 
747  /* Check if the string has a case..
748  * The syntax for cases is IDENTNAME.case */
749  char *casep = strchr(str, '.');
750  if (casep != NULL) *casep++ = '\0';
751 
752  /* Check if this string already exists.. */
753  LangString *ent = this->data.Find(str);
754 
755  if (this->master) {
756  if (casep != NULL) {
757  strgen_error("Cases in the base translation are not supported.");
758  return;
759  }
760 
761  if (ent != NULL) {
762  strgen_error("String name '%s' is used multiple times", str);
763  return;
764  }
765 
766  if (this->data.strings[this->data.next_string_id] != NULL) {
767  strgen_error("String ID 0x%X for '%s' already in use by '%s'", this->data.next_string_id, str, this->data.strings[this->data.next_string_id]->name);
768  return;
769  }
770 
771  /* Allocate a new LangString */
772  this->data.Add(str, new LangString(str, s, this->data.next_string_id++, _cur_line));
773  } else {
774  if (ent == NULL) {
775  strgen_warning("String name '%s' does not exist in master file", str);
776  return;
777  }
778 
779  if (ent->translated && casep == NULL) {
780  strgen_error("String name '%s' is used multiple times", str);
781  return;
782  }
783 
784  /* make sure that the commands match */
785  if (!CheckCommandsMatch(s, ent->english, str)) return;
786 
787  if (casep != NULL) {
788  ent->translated_case = new Case(ResolveCaseName(casep, strlen(casep)), s, ent->translated_case);
789  } else {
790  ent->translated = stredup(s);
791  /* If the string was translated, use the line from the
792  * translated language so errors in the translated file
793  * are properly referenced to. */
794  ent->line = _cur_line;
795  }
796  }
797 }
798 
800 {
801  if (!memcmp(str, "plural ", 7)) {
802  _lang.plural_form = atoi(str + 7);
803  if (_lang.plural_form >= lengthof(_plural_forms)) {
804  strgen_fatal("Invalid pluralform %d", _lang.plural_form);
805  }
806  } else {
807  strgen_fatal("unknown pragma '%s'", str);
808  }
809 }
810 
811 static void rstrip(char *buf)
812 {
813  size_t i = strlen(buf);
814  while (i > 0 && (buf[i - 1] == '\r' || buf[i - 1] == '\n' || buf[i - 1] == ' ')) i--;
815  buf[i] = '\0';
816 }
817 
819 {
820  char buf[2048];
821  _warnings = _errors = 0;
822 
823  _translation = this->master || this->translation;
824  _file = this->file;
825 
826  /* For each new file we parse, reset the genders, and language codes. */
827  MemSetT(&_lang, 0);
831 
832  _cur_line = 1;
833  while (this->ReadLine(buf, lastof(buf)) != NULL) {
834  rstrip(buf);
835  this->HandleString(buf);
836  _cur_line++;
837  }
838 }
839 
845 {
846  int last = 0;
847  for (size_t i = 0; i < data.max_strings; i++) {
848  if (data.strings[i] != NULL) {
849  this->WriteStringID(data.strings[i]->name, (int)i);
850  last = (int)i;
851  }
852  }
853 
854  this->WriteStringID("STR_LAST_STRINGID", last);
855 }
856 
857 static int TranslateArgumentIdx(int argidx, int offset)
858 {
859  int sum;
860 
861  if (argidx < 0 || (uint)argidx >= lengthof(_cur_pcs.cmd)) {
862  strgen_fatal("invalid argidx %d", argidx);
863  }
864  const CmdStruct *cs = _cur_pcs.cmd[argidx];
865  if (cs != NULL && cs->consumes <= offset) {
866  strgen_fatal("invalid argidx offset %d:%d", argidx, offset);
867  }
868 
869  if (_cur_pcs.cmd[argidx] == NULL) {
870  strgen_fatal("no command for this argidx %d", argidx);
871  }
872 
873  for (int i = sum = 0; i < argidx; i++) {
874  const CmdStruct *cs = _cur_pcs.cmd[i];
875 
876  sum += (cs != NULL) ? cs->consumes : 1;
877  }
878 
879  return sum + offset;
880 }
881 
882 static void PutArgidxCommand(Buffer *buffer)
883 {
884  buffer->AppendUtf8(SCC_ARG_INDEX);
885  buffer->AppendByte(TranslateArgumentIdx(_cur_argidx));
886 }
887 
888 
889 static void PutCommandString(Buffer *buffer, const char *str)
890 {
891  _cur_argidx = 0;
892 
893  while (*str != '\0') {
894  /* Process characters as they are until we encounter a { */
895  if (*str != '{') {
896  buffer->AppendByte(*str++);
897  continue;
898  }
899 
900  char param[MAX_COMMAND_PARAM_SIZE];
901  int argno;
902  int casei;
903  const CmdStruct *cs = ParseCommandString(&str, param, &argno, &casei);
904  if (cs == NULL) break;
905 
906  if (casei != -1) {
907  buffer->AppendUtf8(SCC_SET_CASE); // {SET_CASE}
908  buffer->AppendByte(casei);
909  }
910 
911  /* For params that consume values, we need to handle the argindex properly */
912  if (cs->consumes > 0) {
913  /* Check if we need to output a move-param command */
914  if (argno != -1 && argno != _cur_argidx) {
915  _cur_argidx = argno;
916  PutArgidxCommand(buffer);
917  }
918 
919  /* Output the one from the master string... it's always accurate. */
920  cs = _cur_pcs.cmd[_cur_argidx++];
921  if (cs == NULL) {
922  strgen_fatal("%s: No argument exists at position %d", _cur_ident, _cur_argidx - 1);
923  }
924  }
925 
926  cs->proc(buffer, param, cs->value);
927  }
928 }
929 
935 {
936  char buffer[2];
937  int offs = 0;
938  if (length >= 0x4000) {
939  strgen_fatal("string too long");
940  }
941 
942  if (length >= 0xC0) {
943  buffer[offs++] = (length >> 8) | 0xC0;
944  }
945  buffer[offs++] = length & 0xFF;
946  this->Write((byte*)buffer, offs);
947 }
948 
954 {
955  uint *in_use = AllocaM(uint, data.tabs);
956  for (size_t tab = 0; tab < data.tabs; tab++) {
957  uint n = data.CountInUse((uint)tab);
958 
959  in_use[tab] = n;
960  _lang.offsets[tab] = TO_LE16(n);
961 
962  for (uint j = 0; j != in_use[tab]; j++) {
963  const LangString *ls = data.strings[(tab * TAB_SIZE) + j];
964  if (ls != NULL && ls->translated == NULL) _lang.missing++;
965  }
966  }
967 
968  _lang.ident = TO_LE32(LanguagePackHeader::IDENT);
969  _lang.version = TO_LE32(data.Version());
970  _lang.missing = TO_LE16(_lang.missing);
971  _lang.winlangid = TO_LE16(_lang.winlangid);
972 
973  this->WriteHeader(&_lang);
974  Buffer buffer;
975 
976  for (size_t tab = 0; tab < data.tabs; tab++) {
977  for (uint j = 0; j != in_use[tab]; j++) {
978  const LangString *ls = data.strings[(tab * TAB_SIZE) + j];
979  const Case *casep;
980  const char *cmdp;
981 
982  /* For undefined strings, just set that it's an empty string */
983  if (ls == NULL) {
984  this->WriteLength(0);
985  continue;
986  }
987 
988  _cur_ident = ls->name;
989  _cur_line = ls->line;
990 
991  /* Produce a message if a string doesn't have a translation. */
992  if (_show_todo > 0 && ls->translated == NULL) {
993  if ((_show_todo & 2) != 0) {
994  strgen_warning("'%s' is untranslated", ls->name);
995  }
996  if ((_show_todo & 1) != 0) {
997  const char *s = "<TODO> ";
998  while (*s != '\0') buffer.AppendByte(*s++);
999  }
1000  }
1001 
1002  /* Extract the strings and stuff from the english command string */
1003  ExtractCommandString(&_cur_pcs, ls->english, false);
1004 
1005  if (ls->translated_case != NULL || ls->translated != NULL) {
1006  casep = ls->translated_case;
1007  cmdp = ls->translated;
1008  } else {
1009  casep = NULL;
1010  cmdp = ls->english;
1011  }
1012 
1013  _translated = cmdp != ls->english;
1014 
1015  if (casep != NULL) {
1016  const Case *c;
1017  uint num;
1018 
1019  /* Need to output a case-switch.
1020  * It has this format
1021  * <0x9E> <NUM CASES> <CASE1> <LEN1> <STRING1> <CASE2> <LEN2> <STRING2> <CASE3> <LEN3> <STRING3> <STRINGDEFAULT>
1022  * Each LEN is printed using 2 bytes in big endian order. */
1023  buffer.AppendUtf8(SCC_SWITCH_CASE);
1024  /* Count the number of cases */
1025  for (num = 0, c = casep; c; c = c->next) num++;
1026  buffer.AppendByte(num);
1027 
1028  /* Write each case */
1029  for (c = casep; c != NULL; c = c->next) {
1030  buffer.AppendByte(c->caseidx);
1031  /* Make some space for the 16-bit length */
1032  uint pos = buffer.Length();
1033  buffer.AppendByte(0);
1034  buffer.AppendByte(0);
1035  /* Write string */
1036  PutCommandString(&buffer, c->string);
1037  buffer.AppendByte(0); // terminate with a zero
1038  /* Fill in the length */
1039  uint size = buffer.Length() - (pos + 2);
1040  buffer[pos + 0] = GB(size, 8, 8);
1041  buffer[pos + 1] = GB(size, 0, 8);
1042  }
1043  }
1044 
1045  if (cmdp != NULL) PutCommandString(&buffer, cmdp);
1046 
1047  this->WriteLength(buffer.Length());
1048  this->Write(buffer.Begin(), buffer.Length());
1049  buffer.Clear();
1050  }
1051  }
1052 }
const char * _file
The filename of the input, so we can refer to it in errors/warnings.
Definition: strgen_base.cpp:28
void AppendByte(byte value)
Convenience method for adding a byte.
size_t max_strings
The maximum number of strings.
Definition: strgen.h:47
uint8 GetCaseIndex(const char *case_str) const
Get the index for the given case.
Definition: language.h:82
static T ROL(const T x, const uint8 n)
ROtate x Left by n.
static bool _translation
Is the current file actually a translation or not.
Definition: strgen_base.cpp:27
uint VersionHashStr(uint hash, const char *s) const
Create a compound hash.
Container for the different cases of a string.
Definition: strgen.h:18
uint HashStr(const char *s) const
Create a hash of the string for finding them back quickly.
virtual void WriteLang(const StringData &data)
Actually write the language.
StringReader(StringData &data, const char *file, bool master, bool translation)
Prepare reading.
virtual char * ReadLine(char *buffer, const char *last)=0
Read a single line from the source of strings.
bool master
Are we reading the master file?
Definition: strgen.h:65
LanguagePackHeader _lang
Header information about a language.
Definition: strgen_base.cpp:31
uint32 version
32-bits of auto generated version info which is basically a hash of strings.h
Definition: language.h:30
static bool IsInsideBS(const T x, const uint base, const uint size)
Checks if a value is between a window started at some base point.
Definition: math_func.hpp:250
void Clear()
Remove all items from the list.
The buffer for writing a single string.
const T * Begin() const
Get the pointer to the first item (const)
LangString(const char *name, const char *english, int index, int line)
Create a new string.
Definition: strgen_base.cpp:61
static const uint32 IDENT
Identifier for OpenTTD language files, big endian for "LANG".
Definition: language.h:27
size_t Utf8Decode(WChar *c, const char *s)
Decode and consume the next UTF-8 encoded character.
Definition: string.cpp:437
char * translated
Translated text.
Definition: strgen.h:31
#define lastof(x)
Get the last element of an fixed size array.
Definition: depend.cpp:50
Simple vector template class.
virtual void WriteLength(uint length)
Write the length as a simple gamma.
uint16 offsets[TEXT_TAB_END]
the offsets
Definition: language.h:34
#define AllocaM(T, num_elements)
alloca() has to be called in the parent function, so define AllocaM() as a macro
Definition: alloc_func.hpp:134
void Add(const char *s, LangString *ls)
Add a newly created LangString.
uint CountInUse(uint tab) const
Count the number of tab elements that are in use.
static T max(const T a, const T b)
Returns the maximum of two values.
Definition: math_func.hpp:26
StringData(size_t tabs)
Create a new string data container.
Definition: strgen_base.cpp:90
uint16 winlangid
Windows language ID: Windows cannot and will not convert isocodes to something it can use to determin...
Definition: language.h:53
~LangString()
Free everything we allocated.
Definition: strgen_base.cpp:68
LangString * Find(const char *s)
Find a LangString based on the string name.
Information about the currently known strings.
Definition: strgen.h:43
virtual ~StringReader()
Make sure the right reader gets freed.
~Case()
Free everything we allocated.
Definition: strgen_base.cpp:48
char * english
English text.
Definition: strgen.h:30
bool translation
Are we reading a translation, implies !master. However, the base translation will have this false...
Definition: strgen.h:66
uint Length() const
Get the number of items in the list.
LangString ** strings
Array of all known strings.
Definition: strgen.h:44
Header of a language file.
Definition: language.h:26
int caseidx
The index of the case.
Definition: strgen.h:19
These commands support genders.
Definition: strgen_tables.h:18
static const int MAX_PLURALS
The maximum number of plurals.
static const uint TAB_SIZE
Number of strings per StringTab.
Definition: strings_type.h:48
int plural_count
The number of plural forms.
static const ptrdiff_t MAX_COMMAND_PARAM_SIZE
Maximum size of every command block, not counting the name of the command itself. ...
Definition: strgen_base.cpp:33
Information about a single string.
Definition: strgen.h:28
~StringData()
Free everything we allocated.
Definition: strgen_base.cpp:98
Structures related to strgen.
uint Version() const
Make a hash of the file to get a unique "version number".
char * stredup(const char *s, const char *last)
Create a duplicate of the given string.
Definition: string.cpp:126
char * name
Name of the string.
Definition: strgen.h:29
Case * next
The next, chained, case.
Definition: strgen.h:21
static const uint8 MAX_NUM_GENDERS
Maximum number of supported genders.
Definition: language.h:22
const char * file
The file we are reading.
Definition: strgen.h:64
byte plural_form
plural form index
Definition: language.h:43
#define lengthof(x)
Return the length of an fixed size array.
Definition: depend.cpp:42
static const uint8 MAX_NUM_CASES
Maximum number of supported cases.
Definition: language.h:23
static T min(const T a, const T b)
Returns the minimum of two values.
Definition: math_func.hpp:42
void FreeTranslation()
Free all data related to the translation.
virtual void HandlePragma(char *str)
Handle the pragma of the file.
uint8 GetGenderIndex(const char *gender_str) const
Get the index for the given gender.
Definition: language.h:69
char * string
The translation of the case.
Definition: strgen.h:20
virtual void ParseFile()
Start parsing the file.
int _cur_line
The current line we&#39;re parsing in the input file.
Definition: strgen_base.cpp:29
Case * translated_case
Cases of the translation.
Definition: strgen.h:35
uint16 missing
number of missing strings.
Definition: language.h:42
StringData & data
The data to fill during reading.
Definition: strgen.h:63
uint8 num_genders
the number of genders of this language
Definition: language.h:55
char digit_decimal_separator[8]
Decimal separator.
Definition: language.h:41
char digit_group_separator[8]
Thousand separator used for anything not currencies.
Definition: language.h:37
static const uint8 CASE_GENDER_LEN
The (maximum) length of a case/gender string.
Definition: language.h:21
size_t tabs
The number of &#39;tabs&#39; of strings.
Definition: strgen.h:46
void CDECL error(const char *s,...)
Error handling for fatal non-user errors.
Definition: openttd.cpp:110
static uint GB(const T x, const uint8 s, const uint8 n)
Fetch n bits from x, started at bit s.
char * strecpy(char *dst, const char *src, const char *last)
Copies characters from one buffer to another.
Definition: depend.cpp:68
uint16 * hash_heads
Hash table for the strings.
Definition: strgen.h:45
#define endof(x)
Get the end element of an fixed size array.
Definition: stdafx.h:427
These commands aren&#39;t counted for comparison.
Definition: strgen_tables.h:16
void FreeTranslation()
Free all data related to the translation.
Definition: strgen_base.cpp:77
int next_string_id
The next string ID to allocate.
Definition: strgen.h:48
static bool _translated
Whether the current language is not the master language.
Definition: strgen_base.cpp:26
static void free(const void *ptr)
Version of the standard free that accepts const pointers.
Definition: depend.cpp:114
uint16 hash_next
Next hash entry.
Definition: strgen.h:32
char digit_group_separator_currency[8]
Thousand separator used for currencies.
Definition: language.h:39
static bool HasBit(const T x, const uint8 y)
Checks if a bit in a value is set.
uint32 ident
32-bits identifier
Definition: language.h:29
int line
Line of string in source-file.
Definition: strgen.h:34
void AppendUtf8(uint32 value)
Add an Unicode character encoded in UTF-8 to the buffer.
uint16 index
The index in the language file.
Definition: strgen.h:33
uint32 WChar
Type for wide characters, i.e.
Definition: string_type.h:35
void WriteHeader(const StringData &data)
Write the header information.
static const PluralForm _plural_forms[]
All plural forms used.
static void MemSetT(T *ptr, byte value, size_t num=1)
Type-safe version of memset().
Definition: mem_func.hpp:51
Case(int caseidx, const char *string, Case *next)
Create a new case.
Definition: strgen_base.cpp:42
These commands support cases.
Definition: strgen_tables.h:17