OpenTTD Source 20250522-master-g467f832c2f
utf8.cpp
Go to the documentation of this file.
1/*
2 * This file is part of OpenTTD.
3 * OpenTTD is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, version 2.
4 * OpenTTD is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
5 * See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with OpenTTD. If not, see <http://www.gnu.org/licenses/>.
6 */
7
10#include "../stdafx.h"
11
12#include "../3rdparty/catch2/catch.hpp"
13
14#include "../core/utf8.hpp"
15
16#include "../safeguards.h"
17
18TEST_CASE("Utf8View - empty")
19{
20 Utf8View view;
21 auto begin = view.begin();
22 auto end = view.end();
23 CHECK(begin == end);
24 CHECK(begin.GetByteOffset() == 0);
25}
26
27TEST_CASE("Utf8View - invalid")
28{
29 Utf8View view("\u1234\x80\x80""a\xFF\x80\x80\x80\x80\x80""b\xF0");
30 auto begin = view.begin();
31 auto end = view.end();
32 CHECK(begin < end);
33 auto it = begin;
34 CHECK(it == begin);
35 CHECK(it.GetByteOffset() == 0);
36 CHECK(*it == 0x1234);
37 ++it;
38 CHECK(begin < it);
39 CHECK(it < end);
40 CHECK(it.GetByteOffset() == 5);
41 CHECK(*it == 'a');
42 ++it;
43 CHECK(begin < it);
44 CHECK(it < end);
45 CHECK(it.GetByteOffset() == 6);
46 CHECK(*it == '?');
47 ++it;
48 CHECK(begin < it);
49 CHECK(it < end);
50 CHECK(it.GetByteOffset() == 12);
51 CHECK(*it == 'b');
52 ++it;
53 CHECK(begin < it);
54 CHECK(it < end);
55 CHECK(it.GetByteOffset() == 13);
56 CHECK(*it == '?');
57 ++it;
58 CHECK(it.GetByteOffset() == 14);
59 CHECK(begin < it);
60 CHECK(it == end);
61 --it;
62 CHECK(begin < it);
63 CHECK(it < end);
64 CHECK(it.GetByteOffset() == 13);
65 CHECK(*it == '?');
66 --it;
67 CHECK(begin < it);
68 CHECK(it < end);
69 CHECK(it.GetByteOffset() == 12);
70 CHECK(*it == 'b');
71 --it;
72 CHECK(begin < it);
73 CHECK(it < end);
74 CHECK(it.GetByteOffset() == 6);
75 CHECK(*it == '?');
76 --it;
77 CHECK(begin < it);
78 CHECK(it < end);
79 CHECK(it.GetByteOffset() == 5);
80 CHECK(*it == 'a');
81 --it;
82 CHECK(it == begin);
83 CHECK(it.GetByteOffset() == 0);
84 CHECK(*it == 0x1234);
85}
86
87TEST_CASE("Utf8View - iterate")
88{
89 Utf8View view("\u1234a\0b\U00012345"sv);
90 auto begin = view.begin();
91 auto end = view.end();
92 CHECK(begin < end);
93 auto it = begin;
94 CHECK(it == begin);
95 CHECK(it.GetByteOffset() == 0);
96 CHECK(std::distance(begin, it) == 0);
97 CHECK(std::distance(it, end) == 5);
98 CHECK(*it == 0x1234);
99 CHECK(it == view.GetIterAtByte(0));
100 CHECK(it == view.GetIterAtByte(1));
101 CHECK(it == view.GetIterAtByte(2));
102 ++it;
103 CHECK(begin < it);
104 CHECK(it < end);
105 CHECK(it.GetByteOffset() == 3);
106 CHECK(std::distance(begin, it) == 1);
107 CHECK(std::distance(it, end) == 4);
108 CHECK(*it == 'a');
109 CHECK(it == view.GetIterAtByte(3));
110 ++it;
111 CHECK(it.GetByteOffset() == 4);
112 CHECK(std::distance(begin, it) == 2);
113 CHECK(std::distance(it, end) == 3);
114 CHECK(*it == 0);
115 CHECK(it == view.GetIterAtByte(4));
116 ++it;
117 CHECK(it.GetByteOffset() == 5);
118 CHECK(std::distance(begin, it) == 3);
119 CHECK(std::distance(it, end) == 2);
120 CHECK(*it == 'b');
121 CHECK(it == view.GetIterAtByte(5));
122 ++it;
123 CHECK(begin < it);
124 CHECK(it < end);
125 CHECK(it.GetByteOffset() == 6);
126 CHECK(std::distance(begin, it) == 4);
127 CHECK(std::distance(it, end) == 1);
128 CHECK(*it == 0x00012345);
129 CHECK(it == view.GetIterAtByte(6));
130 CHECK(it == view.GetIterAtByte(7));
131 CHECK(it == view.GetIterAtByte(8));
132 CHECK(it == view.GetIterAtByte(9));
133 ++it;
134 CHECK(begin < it);
135 CHECK(it.GetByteOffset() == 10);
136 CHECK(std::distance(begin, it) == 5);
137 CHECK(std::distance(it, end) == 0);
138 CHECK(it == end);
139 CHECK(it == view.GetIterAtByte(10));
140 --it;
141 CHECK(begin < it);
142 CHECK(it < end);
143 CHECK(it.GetByteOffset() == 6);
144 CHECK(*it == 0x00012345);
145 --it;
146 CHECK(begin < it);
147 CHECK(it < end);
148 CHECK(it.GetByteOffset() == 5);
149 CHECK(*it == 'b');
150}
Constant span of UTF-8 encoded data.
Definition utf8.hpp:30
iterator GetIterAtByte(size_t offset) const
Create iterator pointing at codepoint, which occupies the byte position "offset".
Definition utf8.cpp:83