16[[nodiscard]] std::pair<char[4], size_t>
EncodeUtf8(
char32_t c);
17[[nodiscard]] std::pair<size_t, char32_t>
DecodeUtf8(std::string_view buf);
20inline bool IsUtf8Part(
char c)
22 return GB(c, 6, 2) == 2;
32 Utf8View(std::string_view src) : src(src) {}
45 using value_type = char32_t;
46 using difference_type = std::ptrdiff_t;
47 using iterator_category = std::bidirectional_iterator_tag;
49 using reference = void;
52 iterator(std::string_view src,
size_t position) : src(src), position(position) {}
54 size_t GetByteOffset()
const
56 return this->position;
59 bool operator==(
const iterator &rhs)
const
61 assert(this->src.data() == rhs.src.data());
62 return this->position == rhs.position;
65 std::strong_ordering operator<=>(
const iterator &rhs)
const
67 assert(this->src.data() == rhs.src.data());
68 return this->position <=> rhs.position;
71 char32_t operator*()
const
73 assert(this->position < this->src.size());
74 auto [len, c] =
DecodeUtf8(this->src.substr(this->position));
75 return len > 0 ? c :
'?';
80 auto size = this->src.size();
81 assert(this->position < size);
84 }
while (this->position < size && IsUtf8Part(this->src[this->position]));
97 assert(this->position > 0);
100 }
while (this->position > 0 && IsUtf8Part(this->src[this->position]));
119 return iterator(this->src, this->src.size());
Functions related to bit mathematics.
static constexpr uint GB(const T x, const uint8_t s, const uint8_t n)
Fetch n bits from x, started at bit s.
Bidirectional input iterator over codepoints.
Constant span of UTF-8 encoded data.
iterator GetIterAtByte(size_t offset) const
Create iterator pointing at codepoint, which occupies the byte position "offset".
std::pair< size_t, char32_t > DecodeUtf8(std::string_view buf)
Decode a character from UTF-8.
std::pair< char[4], size_t > EncodeUtf8(char32_t c)
Encode a character to UTF-8.