OpenTTD Source 20251213-master-g1091fa6071
gfx_layout_icu.cpp
Go to the documentation of this file.
1/*
2 * This file is part of OpenTTD.
3 * OpenTTD is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, version 2.
4 * OpenTTD is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
5 * See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with OpenTTD. If not, see <https://www.gnu.org/licenses/old-licenses/gpl-2.0>.
6 */
7
10#include "stdafx.h"
11#include "gfx_layout_icu.h"
12
13#include "debug.h"
14#include "gfx_func.h"
15#include "gfx_layout_fallback.h"
16#include "string_func.h"
17#include "strings_func.h"
18#include "language.h"
19#include "table/control_codes.h"
20#include "zoom_func.h"
21
22#include "3rdparty/icu/scriptrun.h"
23
24#include <unicode/ubidi.h>
25#include <unicode/brkiter.h>
26
27#include <hb.h>
28#include <hb-ft.h>
29
30#include "safeguards.h"
31
33constexpr float FONT_SCALE = 64.0;
34
40class ICURun {
41public:
42 int start;
43 int length;
44 UBiDiLevel level;
45 UScriptCode script;
47
48 std::vector<GlyphID> glyphs;
49 std::vector<int> advance;
50 std::vector<int> glyph_to_char;
51 std::vector<ParagraphLayouter::Position> positions;
52 int total_advance = 0;
53
54 ICURun(int start, int length, UBiDiLevel level, UScriptCode script, const Font &font) : start(start), length(length), level(level), script(script), font(font) {}
55
56 void Shape(UChar *buff, size_t length);
57 void FallbackShape(UChar *buff);
58};
59
64public:
67 private:
68 std::vector<GlyphID> glyphs;
69 std::vector<Position> positions;
70 std::vector<int> glyph_to_char;
71
72 int total_advance;
73 Font font;
74
75 public:
76 ICUVisualRun(const ICURun &run, int x);
77
78 std::span<const GlyphID> GetGlyphs() const override { return this->glyphs; }
79 std::span<const Position> GetPositions() const override { return this->positions; }
80 std::span<const int> GetGlyphToCharMap() const override { return this->glyph_to_char; }
81
82 const Font &GetFont() const override { return this->font; }
83 int GetLeading() const override { return GetCharacterHeight(this->font.GetFontCache().GetSize()); }
84 int GetGlyphCount() const override { return this->glyphs.size(); }
85 int GetAdvance() const { return this->total_advance; }
86 };
87
89 class ICULine : public std::vector<ICUVisualRun>, public ParagraphLayouter::Line {
90 public:
91 int GetLeading() const override;
92 int GetWidth() const override;
93 int CountRuns() const override { return (uint)this->size(); }
94 const VisualRun &GetVisualRun(int run) const override { return this->at(run); }
95
96 int GetInternalCharLength(char32_t c) const override
97 {
98 /* ICU uses UTF-16 internally which means we need to account for surrogate pairs. */
99 return c >= 0x010000U ? 2 : 1;
100 }
101 };
102
103private:
104 std::vector<ICURun> runs;
105 UChar *buff;
106 size_t buff_length;
107 std::vector<ICURun>::iterator current_run;
108 int partial_offset;
109
110public:
111 ICUParagraphLayout(std::vector<ICURun> &&runs, UChar *buff, size_t buff_length) : runs(std::move(runs)), buff(buff), buff_length(buff_length)
112 {
113 this->Reflow();
114 }
115
116 ~ICUParagraphLayout() override { }
117
118 void Reflow() override
119 {
120 this->current_run = this->runs.begin();
121 this->partial_offset = 0;
122 }
123
124 std::unique_ptr<const Line> NextLine(int max_width) override;
125};
126
136 glyphs(run.glyphs), glyph_to_char(run.glyph_to_char), total_advance(run.total_advance), font(run.font)
137{
138 /* If there are no positions, the ICURun was not Shaped; that should never happen. */
139 assert(!run.positions.empty());
140 this->positions.reserve(run.positions.size());
141
142 /* Copy positions, moving x coordinate by x offset. */
143 for (const auto &pos : run.positions) {
144 this->positions.emplace_back(pos.left + x, pos.right + x, pos.top);
145 }
146}
147
153void ICURun::FallbackShape(UChar *buff)
154{
155 FontCache &fc = this->font.GetFontCache();
156
157 this->glyphs.reserve(this->length);
158 this->glyph_to_char.reserve(this->length);
159
160 /* Read each UTF-16 character, mapping to an appropriate glyph. */
161 for (int i = this->start; i < this->start + this->length; i += Utf16IsLeadSurrogate(buff[i]) ? 2 : 1) {
162 char32_t c = Utf16DecodeChar(reinterpret_cast<uint16_t *>(buff + i));
163 if (this->level & 1) c = SwapRtlPairedCharacters(c);
164 this->glyphs.emplace_back(fc.MapCharToGlyph(c));
165 this->glyph_to_char.push_back(i);
166 }
167
168 /* Reverse the sequence if this run is RTL. */
169 if (this->level & 1) {
170 std::reverse(std::begin(this->glyphs), std::end(this->glyphs));
171 std::reverse(std::begin(this->glyph_to_char), std::end(this->glyph_to_char));
172 }
173
174 this->positions.reserve(this->glyphs.size());
175
176 /* Set positions of each glyph. */
177 int y_offset = fc.GetGlyphYOffset();
178 int advance = 0;
179 for (const GlyphID glyph : this->glyphs) {
180 int x_advance = fc.GetGlyphWidth(glyph);
181 this->positions.emplace_back(advance, advance + x_advance - 1, y_offset);
182 this->advance.push_back(x_advance);
183 advance += x_advance;
184 }
185 this->total_advance = advance;
186}
187
194void ICURun::Shape(UChar *buff, size_t buff_length)
195{
196 FontCache &fc = this->font.GetFontCache();
197
198 /* Make sure any former run is lost. */
199 this->glyphs.clear();
200 this->glyph_to_char.clear();
201 this->positions.clear();
202 this->advance.clear();
203
204 if (fc.IsBuiltInFont()) {
205 this->FallbackShape(buff);
206 return;
207 }
208
209 auto hbfont = hb_ft_font_create_referenced(*(static_cast<const FT_Face *>(fc.GetOSHandle())));
210 /* Match the flags with how we render the glyphs. */
211 hb_ft_font_set_load_flags(hbfont, GetFontAAState() ? FT_LOAD_TARGET_NORMAL : FT_LOAD_TARGET_MONO);
212
213 /* ICU buffer is in UTF-16. */
214 auto hbbuf = hb_buffer_create();
215 hb_buffer_add_utf16(hbbuf, reinterpret_cast<uint16_t *>(buff), buff_length, this->start, this->length);
216
217 /* Set all the properties of this segment. */
218 hb_buffer_set_direction(hbbuf, (this->level & 1) == 1 ? HB_DIRECTION_RTL : HB_DIRECTION_LTR);
219 hb_buffer_set_script(hbbuf, hb_script_from_string(uscript_getShortName(this->script), -1));
220 hb_buffer_set_language(hbbuf, hb_language_from_string(_current_language->isocode, -1));
221 hb_buffer_set_cluster_level(hbbuf, HB_BUFFER_CLUSTER_LEVEL_MONOTONE_GRAPHEMES);
222
223 /* Shape the segment. */
224 hb_shape(hbfont, hbbuf, nullptr, 0);
225
226 unsigned int glyph_count;
227 auto glyph_info = hb_buffer_get_glyph_infos(hbbuf, &glyph_count);
228 auto glyph_pos = hb_buffer_get_glyph_positions(hbbuf, &glyph_count);
229
230 /* Reserve space, as we already know the size. */
231 this->glyphs.reserve(glyph_count);
232 this->glyph_to_char.reserve(glyph_count);
233 this->positions.reserve(glyph_count);
234 this->advance.reserve(glyph_count);
235
236 /* Prepare the glyphs/position. ICUVisualRun will give the position an offset if needed. */
237 int y_offset = fc.GetGlyphYOffset();
238 hb_position_t advance = 0;
239 for (unsigned int i = 0; i < glyph_count; i++) {
240 int x_advance = glyph_pos[i].x_advance / FONT_SCALE;
241 this->glyphs.push_back(glyph_info[i].codepoint);
242 this->positions.emplace_back(glyph_pos[i].x_offset / FONT_SCALE + advance, glyph_pos[i].x_offset / FONT_SCALE + advance + x_advance - 1, glyph_pos[i].y_offset / FONT_SCALE + y_offset);
243
244 this->glyph_to_char.push_back(glyph_info[i].cluster);
245 this->advance.push_back(x_advance);
246 advance += x_advance;
247 }
248
249 /* Track the total advancement we made. */
250 this->total_advance = advance;
251
252 hb_buffer_destroy(hbbuf);
253 hb_font_destroy(hbfont);
254}
255
261{
262 int leading = 0;
263 for (const auto &run : *this) {
264 leading = std::max(leading, run.GetLeading());
265 }
266
267 return leading;
268}
269
275{
276 int length = 0;
277 for (const auto &run : *this) {
278 length += run.GetAdvance();
279 }
280
281 return length;
282}
283
293std::vector<ICURun> ItemizeBidi(UChar *buff, size_t length)
294{
295 auto ubidi = ubidi_open();
296
297 auto parLevel = _current_text_dir == TD_RTL ? UBIDI_RTL : UBIDI_LTR;
298
299 UErrorCode err = U_ZERO_ERROR;
300 ubidi_setPara(ubidi, buff, length, parLevel, nullptr, &err);
301 if (U_FAILURE(err)) {
302 Debug(fontcache, 0, "Failed to set paragraph: {}", u_errorName(err));
303 ubidi_close(ubidi);
304 return std::vector<ICURun>();
305 }
306
307 int32_t count = ubidi_countRuns(ubidi, &err);
308 if (U_FAILURE(err)) {
309 Debug(fontcache, 0, "Failed to count runs: {}", u_errorName(err));
310 ubidi_close(ubidi);
311 return std::vector<ICURun>();
312 }
313
314 std::vector<ICURun> runs;
315 runs.reserve(count);
316
317 /* Find the breakpoints for the logical runs. So we get runs that say "from START to END". */
318 int32_t logical_pos = 0;
319 while (static_cast<size_t>(logical_pos) < length) {
320 auto start_pos = logical_pos;
321
322 /* Fetch the embedding level, so we can order bidi correctly later on. */
323 UBiDiLevel level;
324 ubidi_getLogicalRun(ubidi, start_pos, &logical_pos, &level);
325
326 runs.emplace_back(start_pos, logical_pos - start_pos, level, USCRIPT_UNKNOWN, Font{});
327 }
328
329 assert(static_cast<size_t>(count) == runs.size());
330
331 ubidi_close(ubidi);
332 return runs;
333}
334
345std::vector<ICURun> ItemizeScript(UChar *buff, size_t length, std::vector<ICURun> &runs_current)
346{
347 std::vector<ICURun> runs;
348 icu::ScriptRun script_itemizer(buff, length);
349
350 int cur_pos = 0;
351 auto cur_run = runs_current.begin();
352 while (true) {
353 while (cur_pos < script_itemizer.getScriptEnd() && cur_run != runs_current.end()) {
354 int stop_pos = std::min(script_itemizer.getScriptEnd(), cur_run->start + cur_run->length);
355 assert(stop_pos - cur_pos > 0);
356
357 runs.emplace_back(cur_pos, stop_pos - cur_pos, cur_run->level, script_itemizer.getScriptCode(), Font{});
358
359 if (stop_pos == cur_run->start + cur_run->length) cur_run++;
360 cur_pos = stop_pos;
361 }
362
363 if (!script_itemizer.next()) break;
364 }
365
366 return runs;
367}
368
378std::vector<ICURun> ItemizeStyle(std::vector<ICURun> &runs_current, FontMap &font_mapping)
379{
380 std::vector<ICURun> runs;
381
382 int cur_pos = 0;
383 auto cur_run = runs_current.begin();
384 for (auto const &[position, font] : font_mapping) {
385 while (cur_pos < position && cur_run != runs_current.end()) {
386 int stop_pos = std::min(position, cur_run->start + cur_run->length);
387 assert(stop_pos - cur_pos > 0);
388
389 runs.emplace_back(cur_pos, stop_pos - cur_pos, cur_run->level, cur_run->script, font);
390
391 if (stop_pos == cur_run->start + cur_run->length) cur_run++;
392 cur_pos = stop_pos;
393 }
394 }
395
396 return runs;
397}
398
399/* static */ std::unique_ptr<ParagraphLayouter> ICUParagraphLayoutFactory::GetParagraphLayout(UChar *buff, UChar *buff_end, FontMap &font_mapping)
400{
401 size_t length = buff_end - buff;
402 /* Can't layout an empty string. */
403 if (length == 0) return nullptr;
404
405 auto runs = ItemizeBidi(buff, length);
406 runs = ItemizeScript(buff, length, runs);
407 runs = ItemizeStyle(runs, font_mapping);
408
409 if (runs.empty()) return nullptr;
410
411 for (auto &run : runs) {
412 run.Shape(buff, length);
413 }
414
415 return std::make_unique<ICUParagraphLayout>(std::move(runs), buff, length);
416}
417
418/* static */ std::unique_ptr<icu::BreakIterator> ICUParagraphLayoutFactory::break_iterator;
419
424{
425 auto locale = icu::Locale(_current_language->isocode);
426 UErrorCode status = U_ZERO_ERROR;
427 ICUParagraphLayoutFactory::break_iterator.reset(icu::BreakIterator::createLineInstance(locale, status));
428 assert(U_SUCCESS(status));
429}
430
435/* static */ std::unique_ptr<icu::BreakIterator> ICUParagraphLayoutFactory::GetBreakIterator()
436{
437 assert(ICUParagraphLayoutFactory::break_iterator != nullptr);
438
439 return std::unique_ptr<icu::BreakIterator>(ICUParagraphLayoutFactory::break_iterator->clone());
440}
441
442std::unique_ptr<const ICUParagraphLayout::Line> ICUParagraphLayout::NextLine(int max_width)
443{
444 std::vector<ICURun>::iterator start_run = this->current_run;
445 std::vector<ICURun>::iterator last_run = this->current_run;
446
447 if (start_run == this->runs.end()) return nullptr;
448
449 int cur_width = 0;
450
451 /* Add remaining width of the first run if it is a broken run. */
452 if (this->partial_offset > 0) {
453 if ((start_run->level & 1) == 0) {
454 for (size_t i = this->partial_offset; i < start_run->advance.size(); i++) {
455 cur_width += start_run->advance[i];
456 }
457 } else {
458 for (int i = 0; i < this->partial_offset; i++) {
459 cur_width += start_run->advance[i];
460 }
461 }
462 last_run++;
463 }
464
465 /* Gather runs until the line is full. */
466 while (last_run != this->runs.end() && cur_width < max_width) {
467 cur_width += last_run->total_advance;
468 last_run++;
469 }
470
471 /* If the text does not fit into the available width, find a suitable breaking point. */
472 int new_partial_length = 0;
473 if (cur_width > max_width) {
474 /* Create a break-iterator to find a good place to break lines. */
475 auto break_iterator = ICUParagraphLayoutFactory::GetBreakIterator();
476 break_iterator->setText(icu::UnicodeString(this->buff, this->buff_length));
477
478 auto overflow_run = last_run - 1;
479
480 /* Find the last glyph that fits. */
481 size_t index;
482 if ((overflow_run->level & 1) == 0) {
483 /* LTR */
484 for (index = overflow_run->glyphs.size(); index > 0; /* nothing */) {
485 --index;
486 cur_width -= overflow_run->advance[index];
487 if (cur_width <= max_width) break;
488 }
489 } else {
490 /* RTL */
491 for (index = 0; index < overflow_run->glyphs.size(); index++) {
492 cur_width -= overflow_run->advance[index];
493 if (cur_width <= max_width) break;
494 }
495 }
496
497 /* Find the character that matches; this is the start of the cluster. */
498 auto char_pos = overflow_run->glyph_to_char[index];
499
500 /* See if there is a good breakpoint inside this run. */
501 int32_t break_pos = break_iterator->preceding(char_pos + 1);
502 auto overflow_run_start = overflow_run->start;
503 if (overflow_run == start_run) overflow_run_start += this->partial_offset;
504 if (break_pos != icu::BreakIterator::DONE && break_pos > overflow_run_start) {
505 /* There is a line-break inside this run that is suitable. */
506 new_partial_length = break_pos - overflow_run_start;
507 } else if (overflow_run != start_run) {
508 /* There is no suitable line-break in this run, but it is also not
509 * the only run on this line. So we remove the run. */
510 last_run--;
511 } else {
512 /* There is no suitable line-break and this is the only run on the
513 * line. So we break at the cluster. This is not pretty, but the
514 * best we can do. */
515 new_partial_length = char_pos - overflow_run_start;
516 }
517 }
518
519 /* Reorder the runs on this line for display. */
520 std::vector<UBiDiLevel> bidi_level;
521 for (auto run = start_run; run != last_run; run++) {
522 bidi_level.push_back(run->level);
523 }
524 std::vector<int32_t> vis_to_log(bidi_level.size());
525 ubidi_reorderVisual(bidi_level.data(), bidi_level.size(), vis_to_log.data());
526
527 /* Create line. */
528 std::unique_ptr<ICULine> line = std::make_unique<ICULine>();
529
530 int cur_pos = 0;
531 for (auto &i : vis_to_log) {
532 auto i_run = start_run + i;
533 /* Copy the ICURun here, so we can modify it in case of a partial. */
534 ICURun run = *i_run;
535
536 if (i_run == last_run - 1 && new_partial_length > 0) {
537 if (i_run == start_run && this->partial_offset > 0) {
538 assert(run.length > this->partial_offset);
539 run.start += this->partial_offset;
540 run.length -= this->partial_offset;
541 }
542
543 assert(run.length > new_partial_length);
544 run.length = new_partial_length;
545
546 run.Shape(this->buff, this->buff_length);
547 } else if (i_run == start_run && this->partial_offset > 0) {
548 assert(run.length > this->partial_offset);
549
550 run.start += this->partial_offset;
551 run.length -= this->partial_offset;
552
553 run.Shape(this->buff, this->buff_length);
554 }
555
556 auto total_advance = run.total_advance;
557 line->emplace_back(std::move(run), cur_pos);
558 cur_pos += total_advance;
559 }
560
561 if (new_partial_length > 0) {
562 this->current_run = last_run - 1;
563 if (this->current_run != start_run) this->partial_offset = 0;
564 this->partial_offset += new_partial_length;
565 } else {
566 this->current_run = last_run;
567 this->partial_offset = 0;
568 }
569
570 return line;
571}
572
573/* static */ size_t ICUParagraphLayoutFactory::AppendToBuffer(UChar *buff, const UChar *buffer_last, char32_t c)
574{
575 assert(buff < buffer_last);
576 /* Transform from UTF-32 to internal ICU format of UTF-16. */
577 int32_t length = 0;
578 UErrorCode err = U_ZERO_ERROR;
579 u_strFromUTF32(buff, buffer_last - buff, &length, (UChar32*)&c, 1, &err);
580 return length;
581}
Font cache for basic fonts.
Definition fontcache.h:32
virtual const void * GetOSHandle()
Get the native OS font handle, if there is one.
Definition fontcache.h:160
virtual bool IsBuiltInFont()=0
Is this a built-in sprite font?
virtual uint GetGlyphWidth(GlyphID key)=0
Get the width of the glyph with the given key.
virtual GlyphID MapCharToGlyph(char32_t key)=0
Map a character into a glyph.
FontSize GetSize() const
Get the FontSize of the font.
Definition fontcache.h:96
Container with information about a font.
Definition gfx_layout.h:98
static std::unique_ptr< icu::BreakIterator > GetBreakIterator()
Get a thread-safe line break iterator.
static void InitializeLayouter()
Initialize data needed for the ICU layouter.
A single line worth of VisualRuns.
int GetLeading() const override
Get the height of the line.
int GetWidth() const override
Get the width of this line.
Visual run contains data about the bit of text with the same font.
ICUVisualRun(const ICURun &run, int x)
Constructor for a new ICUVisualRun.
Wrapper for doing layouts with ICU.
Helper class to store the information of all the runs of a paragraph in.
UScriptCode script
Script of the run.
Font font
Font of the run.
std::vector< int > glyph_to_char
The mapping from glyphs to characters. Valid after Shape() is called.
void Shape(UChar *buff, size_t length)
Shape a single run.
std::vector< GlyphID > glyphs
The glyphs of the run. Valid after Shape() is called.
int total_advance
The total advance of the run. Valid after Shape() is called.
std::vector< int > advance
The advance (width) of the glyphs. Valid after Shape() is called.
int length
Length of the run in the buffer.
int start
Start of the run in the buffer.
std::vector< ParagraphLayouter::Position > positions
The positions of the glyphs. Valid after Shape() is called.
UBiDiLevel level
Embedding level of the run.
void FallbackShape(UChar *buff)
Manually shape a run for built-in non-truetype fonts.
A single line worth of VisualRuns.
Definition gfx_layout.h:142
Visual run contains data about the bit of text with the same font.
Definition gfx_layout.h:130
Interface to glue fallback and normal layouter into one.
Definition gfx_layout.h:112
Control codes that are embedded in the translation strings.
Functions related to debugging.
#define Debug(category, level, format_string,...)
Output a line of debugging information.
Definition debug.h:37
int GetCharacterHeight(FontSize size)
Get height of a character for a given font size.
uint32_t GlyphID
Glyphs are characters from a font.
Definition fontcache.h:18
Functions related to the gfx engine.
std::vector< std::pair< int, Font > > FontMap
Mapping from index to font.
Definition gfx_layout.h:107
Functions related to laying out the texts as fallback.
char32_t SwapRtlPairedCharacters(char32_t c)
Swap paired brackets for fallback RTL layouting.
std::vector< ICURun > ItemizeStyle(std::vector< ICURun > &runs_current, FontMap &font_mapping)
Itemize the string into runs per style, based on the previous created runs.
constexpr float FONT_SCALE
HarfBuzz FreeType integration sets the font scaling, which is always in 1/64th of a pixel.
std::vector< ICURun > ItemizeBidi(UChar *buff, size_t length)
Itemize the string into runs per embedding level.
std::vector< ICURun > ItemizeScript(UChar *buff, size_t length, std::vector< ICURun > &runs_current)
Itemize the string into runs per script, based on the previous created runs.
Functions related to laying out the texts with ICU.
Information about languages and their files.
const LanguageMetadata * _current_language
The currently loaded language.
Definition strings.cpp:54
A number of safeguards to prevent using unsafe methods.
Definition of base types and functions in a cross-platform compatible way.
Functions related to low-level strings.
char32_t Utf16DecodeChar(const uint16_t *c)
Decode an UTF-16 character.
Definition string_func.h:96
bool Utf16IsLeadSurrogate(uint c)
Is the given character a lead surrogate code point?
Definition string_func.h:65
TextDirection _current_text_dir
Text direction of the currently selected language.
Definition strings.cpp:56
Functions related to OTTD's strings.
@ TD_RTL
Text is written right-to-left by default.
char isocode[16]
the ISO code for the language (not country code)
Definition language.h:31
Functions related to zooming.