18[[nodiscard]] std::pair<char[4], size_t>
EncodeUtf8(
char32_t c);
19[[nodiscard]] std::pair<size_t, char32_t>
DecodeUtf8(std::string_view buf);
22inline bool IsUtf8Part(
char c)
24 return GB(c, 6, 2) == 2;
34 Utf8View(std::string_view src) : src(src) {}
47 using value_type = char32_t;
48 using difference_type = std::ptrdiff_t;
49 using iterator_category = std::bidirectional_iterator_tag;
51 using reference = void;
54 iterator(std::string_view src,
size_t position) : src(src), position(position) {}
56 size_t GetByteOffset()
const
58 return this->position;
61 bool operator==(
const iterator &rhs)
const
63 assert(this->src.data() == rhs.src.data());
64 return this->position == rhs.position;
67 std::strong_ordering operator<=>(
const iterator &rhs)
const
69 assert(this->src.data() == rhs.src.data());
70 return this->position <=> rhs.position;
73 char32_t operator*()
const
75 assert(this->position < this->src.size());
76 auto [len, c] =
DecodeUtf8(this->src.substr(this->position));
77 return len > 0 ? c :
'?';
82 auto size = this->src.size();
83 assert(this->position < size);
86 }
while (this->position < size && IsUtf8Part(this->src[this->position]));
99 assert(this->position > 0);
102 }
while (this->position > 0 && IsUtf8Part(this->src[this->position]));
121 return iterator(this->src, this->src.size());
Functions related to bit mathematics.
debug_inline static constexpr uint GB(const T x, const uint8_t s, const uint8_t n)
Fetch n bits from x, started at bit s.
Bidirectional input iterator over codepoints.
Constant span of UTF-8 encoded data.
iterator GetIterAtByte(size_t offset) const
Create iterator pointing at codepoint, which occupies the byte position "offset".
std::pair< size_t, char32_t > DecodeUtf8(std::string_view buf)
Decode a character from UTF-8.
std::pair< char[4], size_t > EncodeUtf8(char32_t c)
Encode a character to UTF-8.