OpenTTD Source  20241121-master-g67a0fccfad
gfx_layout_icu.cpp
Go to the documentation of this file.
1 /*
2  * This file is part of OpenTTD.
3  * OpenTTD is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, version 2.
4  * OpenTTD is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
5  * See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with OpenTTD. If not, see <http://www.gnu.org/licenses/>.
6  */
7 
10 #include "stdafx.h"
11 #include "gfx_layout_icu.h"
12 
13 #include "debug.h"
14 #include "strings_func.h"
15 #include "language.h"
16 #include "table/control_codes.h"
17 #include "zoom_func.h"
18 
19 #include "3rdparty/icu/scriptrun.h"
20 
21 #include <unicode/ubidi.h>
22 #include <unicode/brkiter.h>
23 
24 #include <hb.h>
25 #include <hb-ft.h>
26 
27 #include "safeguards.h"
28 
30 constexpr float FONT_SCALE = 64.0;
31 
37 class ICURun {
38 public:
39  int start;
40  int length;
41  UBiDiLevel level;
42  UScriptCode script;
44 
45  std::vector<GlyphID> glyphs;
46  std::vector<int> advance;
47  std::vector<int> glyph_to_char;
48  std::vector<ParagraphLayouter::Position> positions;
49  int total_advance = 0;
50 
51  ICURun(int start, int length, UBiDiLevel level, UScriptCode script = USCRIPT_UNKNOWN, Font *font = nullptr) : start(start), length(length), level(level), script(script), font(font) {}
52 
53  void Shape(UChar *buff, size_t length);
54 };
55 
60 public:
63  private:
64  std::vector<GlyphID> glyphs;
65  std::vector<Position> positions;
66  std::vector<int> glyph_to_char;
67 
68  int total_advance;
69  const Font *font;
70 
71  public:
72  ICUVisualRun(const ICURun &run, int x);
73 
74  std::span<const GlyphID> GetGlyphs() const override { return this->glyphs; }
75  std::span<const Position> GetPositions() const override { return this->positions; }
76  std::span<const int> GetGlyphToCharMap() const override { return this->glyph_to_char; }
77 
78  const Font *GetFont() const override { return this->font; }
79  int GetLeading() const override { return this->font->fc->GetHeight(); }
80  int GetGlyphCount() const override { return this->glyphs.size(); }
81  int GetAdvance() const { return this->total_advance; }
82  };
83 
85  class ICULine : public std::vector<ICUVisualRun>, public ParagraphLayouter::Line {
86  public:
87  int GetLeading() const override;
88  int GetWidth() const override;
89  int CountRuns() const override { return (uint)this->size(); }
90  const VisualRun &GetVisualRun(int run) const override { return this->at(run); }
91 
92  int GetInternalCharLength(char32_t c) const override
93  {
94  /* ICU uses UTF-16 internally which means we need to account for surrogate pairs. */
95  return c >= 0x010000U ? 2 : 1;
96  }
97  };
98 
99 private:
100  std::vector<ICURun> runs;
101  UChar *buff;
102  size_t buff_length;
103  std::vector<ICURun>::iterator current_run;
104  int partial_offset;
105 
106 public:
107  ICUParagraphLayout(std::vector<ICURun> &&runs, UChar *buff, size_t buff_length) : runs(std::move(runs)), buff(buff), buff_length(buff_length)
108  {
109  this->Reflow();
110  }
111 
112  ~ICUParagraphLayout() override { }
113 
114  void Reflow() override
115  {
116  this->current_run = this->runs.begin();
117  this->partial_offset = 0;
118  }
119 
120  std::unique_ptr<const Line> NextLine(int max_width) override;
121 };
122 
132  glyphs(run.glyphs), glyph_to_char(run.glyph_to_char), total_advance(run.total_advance), font(run.font)
133 {
134  /* If there are no positions, the ICURun was not Shaped; that should never happen. */
135  assert(!run.positions.empty());
136  this->positions.reserve(run.positions.size());
137 
138  /* Copy positions, moving x coordinate by x offset. */
139  for (const auto &pos : run.positions) {
140  this->positions.emplace_back(pos.left + x, pos.right + x, pos.top);
141  }
142 }
143 
150 void ICURun::Shape(UChar *buff, size_t buff_length)
151 {
152  auto hbfont = hb_ft_font_create_referenced(*(static_cast<const FT_Face *>(font->fc->GetOSHandle())));
153  /* Match the flags with how we render the glyphs. */
154  hb_ft_font_set_load_flags(hbfont, GetFontAAState() ? FT_LOAD_TARGET_NORMAL : FT_LOAD_TARGET_MONO);
155 
156  /* ICU buffer is in UTF-16. */
157  auto hbbuf = hb_buffer_create();
158  hb_buffer_add_utf16(hbbuf, reinterpret_cast<uint16_t *>(buff), buff_length, this->start, this->length);
159 
160  /* Set all the properties of this segment. */
161  hb_buffer_set_direction(hbbuf, (this->level & 1) == 1 ? HB_DIRECTION_RTL : HB_DIRECTION_LTR);
162  hb_buffer_set_script(hbbuf, hb_script_from_string(uscript_getShortName(this->script), -1));
163  hb_buffer_set_language(hbbuf, hb_language_from_string(_current_language->isocode, -1));
164  hb_buffer_set_cluster_level(hbbuf, HB_BUFFER_CLUSTER_LEVEL_MONOTONE_GRAPHEMES);
165 
166  /* Shape the segment. */
167  hb_shape(hbfont, hbbuf, nullptr, 0);
168 
169  unsigned int glyph_count;
170  auto glyph_info = hb_buffer_get_glyph_infos(hbbuf, &glyph_count);
171  auto glyph_pos = hb_buffer_get_glyph_positions(hbbuf, &glyph_count);
172 
173  /* Make sure any former run is lost. */
174  this->glyphs.clear();
175  this->glyph_to_char.clear();
176  this->positions.clear();
177  this->advance.clear();
178 
179  /* Reserve space, as we already know the size. */
180  this->glyphs.reserve(glyph_count);
181  this->glyph_to_char.reserve(glyph_count);
182  this->positions.reserve(glyph_count);
183  this->advance.reserve(glyph_count);
184 
185  /* Prepare the glyphs/position. ICUVisualRun will give the position an offset if needed. */
186  hb_position_t advance = 0;
187  for (unsigned int i = 0; i < glyph_count; i++) {
188  int x_advance;
189 
190  if (buff[glyph_info[i].cluster] >= SCC_SPRITE_START && buff[glyph_info[i].cluster] <= SCC_SPRITE_END && glyph_info[i].codepoint == 0) {
191  auto glyph = this->font->fc->MapCharToGlyph(buff[glyph_info[i].cluster]);
192  x_advance = this->font->fc->GetGlyphWidth(glyph);
193  this->glyphs.push_back(glyph);
194  this->positions.emplace_back(advance, advance + x_advance - 1, (this->font->fc->GetHeight() - ScaleSpriteTrad(FontCache::GetDefaultFontHeight(this->font->fc->GetSize()))) / 2); // Align sprite font to centre
195  } else {
196  x_advance = glyph_pos[i].x_advance / FONT_SCALE;
197  this->glyphs.push_back(glyph_info[i].codepoint);
198  this->positions.emplace_back(glyph_pos[i].x_offset / FONT_SCALE + advance, glyph_pos[i].x_offset / FONT_SCALE + advance + x_advance - 1, glyph_pos[i].y_offset / FONT_SCALE);
199  }
200 
201  this->glyph_to_char.push_back(glyph_info[i].cluster);
202  this->advance.push_back(x_advance);
203  advance += x_advance;
204  }
205 
206  /* Track the total advancement we made. */
207  this->total_advance = advance;
208 
209  hb_buffer_destroy(hbbuf);
210  hb_font_destroy(hbfont);
211 }
212 
218 {
219  int leading = 0;
220  for (const auto &run : *this) {
221  leading = std::max(leading, run.GetLeading());
222  }
223 
224  return leading;
225 }
226 
232 {
233  int length = 0;
234  for (const auto &run : *this) {
235  length += run.GetAdvance();
236  }
237 
238  return length;
239 }
240 
250 std::vector<ICURun> ItemizeBidi(UChar *buff, size_t length)
251 {
252  auto ubidi = ubidi_open();
253 
254  auto parLevel = _current_text_dir == TD_RTL ? UBIDI_RTL : UBIDI_LTR;
255 
256  UErrorCode err = U_ZERO_ERROR;
257  ubidi_setPara(ubidi, buff, length, parLevel, nullptr, &err);
258  if (U_FAILURE(err)) {
259  Debug(fontcache, 0, "Failed to set paragraph: {}", u_errorName(err));
260  ubidi_close(ubidi);
261  return std::vector<ICURun>();
262  }
263 
264  int32_t count = ubidi_countRuns(ubidi, &err);
265  if (U_FAILURE(err)) {
266  Debug(fontcache, 0, "Failed to count runs: {}", u_errorName(err));
267  ubidi_close(ubidi);
268  return std::vector<ICURun>();
269  }
270 
271  std::vector<ICURun> runs;
272  runs.reserve(count);
273 
274  /* Find the breakpoints for the logical runs. So we get runs that say "from START to END". */
275  int32_t logical_pos = 0;
276  while (static_cast<size_t>(logical_pos) < length) {
277  auto start_pos = logical_pos;
278 
279  /* Fetch the embedding level, so we can order bidi correctly later on. */
280  UBiDiLevel level;
281  ubidi_getLogicalRun(ubidi, start_pos, &logical_pos, &level);
282 
283  runs.emplace_back(start_pos, logical_pos - start_pos, level);
284  }
285 
286  assert(static_cast<size_t>(count) == runs.size());
287 
288  ubidi_close(ubidi);
289  return runs;
290 }
291 
302 std::vector<ICURun> ItemizeScript(UChar *buff, size_t length, std::vector<ICURun> &runs_current)
303 {
304  std::vector<ICURun> runs;
305  icu::ScriptRun script_itemizer(buff, length);
306 
307  int cur_pos = 0;
308  auto cur_run = runs_current.begin();
309  while (true) {
310  while (cur_pos < script_itemizer.getScriptEnd() && cur_run != runs_current.end()) {
311  int stop_pos = std::min(script_itemizer.getScriptEnd(), cur_run->start + cur_run->length);
312  assert(stop_pos - cur_pos > 0);
313 
314  runs.emplace_back(cur_pos, stop_pos - cur_pos, cur_run->level, script_itemizer.getScriptCode());
315 
316  if (stop_pos == cur_run->start + cur_run->length) cur_run++;
317  cur_pos = stop_pos;
318  }
319 
320  if (!script_itemizer.next()) break;
321  }
322 
323  return runs;
324 }
325 
335 std::vector<ICURun> ItemizeStyle(std::vector<ICURun> &runs_current, FontMap &font_mapping)
336 {
337  std::vector<ICURun> runs;
338 
339  int cur_pos = 0;
340  auto cur_run = runs_current.begin();
341  for (auto const &font_map : font_mapping) {
342  while (cur_pos < font_map.first && cur_run != runs_current.end()) {
343  int stop_pos = std::min(font_map.first, cur_run->start + cur_run->length);
344  assert(stop_pos - cur_pos > 0);
345 
346  runs.emplace_back(cur_pos, stop_pos - cur_pos, cur_run->level, cur_run->script, font_map.second);
347 
348  if (stop_pos == cur_run->start + cur_run->length) cur_run++;
349  cur_pos = stop_pos;
350  }
351  }
352 
353  return runs;
354 }
355 
356 /* static */ ParagraphLayouter *ICUParagraphLayoutFactory::GetParagraphLayout(UChar *buff, UChar *buff_end, FontMap &font_mapping)
357 {
358  size_t length = buff_end - buff;
359  /* Can't layout an empty string. */
360  if (length == 0) return nullptr;
361 
362  /* Can't layout our in-built sprite fonts. */
363  for (auto const &pair : font_mapping) {
364  if (pair.second->fc->IsBuiltInFont()) return nullptr;
365  }
366 
367  auto runs = ItemizeBidi(buff, length);
368  runs = ItemizeScript(buff, length, runs);
369  runs = ItemizeStyle(runs, font_mapping);
370 
371  if (runs.empty()) return nullptr;
372 
373  for (auto &run : runs) {
374  run.Shape(buff, length);
375  }
376 
377  return new ICUParagraphLayout(std::move(runs), buff, length);
378 }
379 
380 /* static */ std::unique_ptr<icu::BreakIterator> ICUParagraphLayoutFactory::break_iterator;
381 
386 {
387  auto locale = icu::Locale(_current_language->isocode);
388  UErrorCode status = U_ZERO_ERROR;
389  ICUParagraphLayoutFactory::break_iterator.reset(icu::BreakIterator::createLineInstance(locale, status));
390  assert(U_SUCCESS(status));
391 }
392 
397 /* static */ std::unique_ptr<icu::BreakIterator> ICUParagraphLayoutFactory::GetBreakIterator()
398 {
399  assert(ICUParagraphLayoutFactory::break_iterator != nullptr);
400 
401  return std::unique_ptr<icu::BreakIterator>(ICUParagraphLayoutFactory::break_iterator->clone());
402 }
403 
404 std::unique_ptr<const ICUParagraphLayout::Line> ICUParagraphLayout::NextLine(int max_width)
405 {
406  std::vector<ICURun>::iterator start_run = this->current_run;
407  std::vector<ICURun>::iterator last_run = this->current_run;
408 
409  if (start_run == this->runs.end()) return nullptr;
410 
411  int cur_width = 0;
412 
413  /* Add remaining width of the first run if it is a broken run. */
414  if (this->partial_offset > 0) {
415  if ((start_run->level & 1) == 0) {
416  for (size_t i = this->partial_offset; i < start_run->advance.size(); i++) {
417  cur_width += start_run->advance[i];
418  }
419  } else {
420  for (int i = 0; i < this->partial_offset; i++) {
421  cur_width += start_run->advance[i];
422  }
423  }
424  last_run++;
425  }
426 
427  /* Gather runs until the line is full. */
428  while (last_run != this->runs.end() && cur_width < max_width) {
429  cur_width += last_run->total_advance;
430  last_run++;
431  }
432 
433  /* If the text does not fit into the available width, find a suitable breaking point. */
434  int new_partial_length = 0;
435  if (cur_width > max_width) {
436  /* Create a break-iterator to find a good place to break lines. */
437  auto break_iterator = ICUParagraphLayoutFactory::GetBreakIterator();
438  break_iterator->setText(icu::UnicodeString(this->buff, this->buff_length));
439 
440  auto overflow_run = last_run - 1;
441 
442  /* Find the last glyph that fits. */
443  size_t index;
444  if ((overflow_run->level & 1) == 0) {
445  /* LTR */
446  for (index = overflow_run->glyphs.size(); index > 0; index--) {
447  cur_width -= overflow_run->advance[index - 1];
448  if (cur_width <= max_width) break;
449  }
450  index--;
451  } else {
452  /* RTL */
453  for (index = 0; index < overflow_run->glyphs.size(); index++) {
454  cur_width -= overflow_run->advance[index];
455  if (cur_width <= max_width) break;
456  }
457  }
458 
459  /* Find the character that matches; this is the start of the cluster. */
460  auto char_pos = overflow_run->glyph_to_char[index];
461 
462  /* See if there is a good breakpoint inside this run. */
463  int32_t break_pos = break_iterator->preceding(char_pos + 1);
464  auto overflow_run_start = overflow_run->start;
465  if (overflow_run == start_run) overflow_run_start += this->partial_offset;
466  if (break_pos != icu::BreakIterator::DONE && break_pos > overflow_run_start) {
467  /* There is a line-break inside this run that is suitable. */
468  new_partial_length = break_pos - overflow_run_start;
469  } else if (overflow_run != start_run) {
470  /* There is no suitable line-break in this run, but it is also not
471  * the only run on this line. So we remove the run. */
472  last_run--;
473  } else {
474  /* There is no suitable line-break and this is the only run on the
475  * line. So we break at the cluster. This is not pretty, but the
476  * best we can do. */
477  new_partial_length = char_pos - overflow_run_start;
478  }
479  }
480 
481  /* Reorder the runs on this line for display. */
482  std::vector<UBiDiLevel> bidi_level;
483  for (auto run = start_run; run != last_run; run++) {
484  bidi_level.push_back(run->level);
485  }
486  std::vector<int32_t> vis_to_log(bidi_level.size());
487  ubidi_reorderVisual(bidi_level.data(), bidi_level.size(), vis_to_log.data());
488 
489  /* Create line. */
490  std::unique_ptr<ICULine> line = std::make_unique<ICULine>();
491 
492  int cur_pos = 0;
493  for (auto &i : vis_to_log) {
494  auto i_run = start_run + i;
495  /* Copy the ICURun here, so we can modify it in case of a partial. */
496  ICURun run = *i_run;
497 
498  if (i_run == last_run - 1 && new_partial_length > 0) {
499  if (i_run == start_run && this->partial_offset > 0) {
500  assert(run.length > this->partial_offset);
501  run.start += this->partial_offset;
502  run.length -= this->partial_offset;
503  }
504 
505  assert(run.length > new_partial_length);
506  run.length = new_partial_length;
507 
508  run.Shape(this->buff, this->buff_length);
509  } else if (i_run == start_run && this->partial_offset > 0) {
510  assert(run.length > this->partial_offset);
511 
512  run.start += this->partial_offset;
513  run.length -= this->partial_offset;
514 
515  run.Shape(this->buff, this->buff_length);
516  }
517 
518  auto total_advance = run.total_advance;
519  line->emplace_back(std::move(run), cur_pos);
520  cur_pos += total_advance;
521  }
522 
523  if (new_partial_length > 0) {
524  this->current_run = last_run - 1;
525  if (this->current_run != start_run) this->partial_offset = 0;
526  this->partial_offset += new_partial_length;
527  } else {
528  this->current_run = last_run;
529  this->partial_offset = 0;
530  }
531 
532  return line;
533 }
534 
535 /* static */ size_t ICUParagraphLayoutFactory::AppendToBuffer(UChar *buff, const UChar *buffer_last, char32_t c)
536 {
537  assert(buff < buffer_last);
538  /* Transform from UTF-32 to internal ICU format of UTF-16. */
539  int32_t length = 0;
540  UErrorCode err = U_ZERO_ERROR;
541  u_strFromUTF32(buff, buffer_last - buff, &length, (UChar32*)&c, 1, &err);
542  return length;
543 }
int GetHeight() const
Get the height of the font.
Definition: fontcache.h:48
Container with information about a font.
Definition: gfx_layout.h:75
FontCache * fc
The font we are using.
Definition: gfx_layout.h:77
static std::unique_ptr< icu::BreakIterator > GetBreakIterator()
Get a thread-safe line break iterator.
static void InitializeLayouter()
Initialize data needed for the ICU layouter.
A single line worth of VisualRuns.
int GetLeading() const override
Get the height of the line.
int GetWidth() const override
Get the width of this line.
Visual run contains data about the bit of text with the same font.
ICUVisualRun(const ICURun &run, int x)
Constructor for a new ICUVisualRun.
Wrapper for doing layouts with ICU.
Helper class to store the information of all the runs of a paragraph in.
UScriptCode script
Script of the run.
std::vector< int > glyph_to_char
The mapping from glyphs to characters. Valid after Shape() is called.
void Shape(UChar *buff, size_t length)
Shape a single run.
std::vector< GlyphID > glyphs
The glyphs of the run. Valid after Shape() is called.
int total_advance
The total advance of the run. Valid after Shape() is called.
Font * font
Font of the run.
std::vector< int > advance
The advance (width) of the glyphs. Valid after Shape() is called.
int length
Length of the run in the buffer.
int start
Start of the run in the buffer.
std::vector< ParagraphLayouter::Position > positions
The positions of the glyphs. Valid after Shape() is called.
UBiDiLevel level
Embedding level of the run.
A single line worth of VisualRuns.
Definition: gfx_layout.h:119
Visual run contains data about the bit of text with the same font.
Definition: gfx_layout.h:107
Interface to glue fallback and normal layouter into one.
Definition: gfx_layout.h:89
Control codes that are embedded in the translation strings.
Functions related to debugging.
#define Debug(category, level, format_string,...)
Ouptut a line of debugging information.
Definition: debug.h:37
std::map< int, Font * > FontMap
Mapping from index to font.
Definition: gfx_layout.h:84
std::vector< ICURun > ItemizeStyle(std::vector< ICURun > &runs_current, FontMap &font_mapping)
Itemize the string into runs per style, based on the previous created runs.
constexpr float FONT_SCALE
HarfBuzz FreeType integration sets the font scaling, which is always in 1/64th of a pixel.
std::vector< ICURun > ItemizeScript(UChar *buff, size_t length, std::vector< ICURun > &runs_current)
Itemize the string into runs per script, based on the previous created runs.
std::vector< ICURun > ItemizeBidi(UChar *buff, size_t length)
Itemize the string into runs per embedding level.
Functions related to laying out the texts with ICU.
Information about languages and their files.
const LanguageMetadata * _current_language
The currently loaded language.
Definition: strings.cpp:54
A number of safeguards to prevent using unsafe methods.
Definition of base types and functions in a cross-platform compatible way.
TextDirection _current_text_dir
Text direction of the currently selected language.
Definition: strings.cpp:56
Functions related to OTTD's strings.
@ TD_RTL
Text is written right-to-left by default.
Definition: strings_type.h:24
char isocode[16]
the ISO code for the language (not country code)
Definition: language.h:31
Functions related to zooming.
int ScaleSpriteTrad(int value)
Scale traditional pixel dimensions to GUI zoom level, for drawing sprites.
Definition: zoom_func.h:107