NeBuild dev
Loading...
Searching...
No Matches
unicode.hpp
Go to the documentation of this file.
1//# This file is a part of toml++ and is subject to the the terms of the MIT license.
2//# Copyright (c) Mark Gillard <mark.gillard@outlook.com.au>
3//# See https://github.com/marzer/tomlplusplus/blob/master/LICENSE for the full license text.
4// SPDX-License-Identifier: MIT
5#pragma once
6
8#include "header_start.hpp"
10
12{
14 constexpr bool is_string_delimiter(char32_t c) noexcept
15 {
16 return c == U'"' || c == U'\'';
17 }
18
20 constexpr bool is_ascii_letter(char32_t c) noexcept
21 {
22 return (c >= U'a' && c <= U'z') || (c >= U'A' && c <= U'Z');
23 }
24
26 constexpr bool is_binary_digit(char32_t c) noexcept
27 {
28 return c == U'0' || c == U'1';
29 }
30
32 constexpr bool is_octal_digit(char32_t c) noexcept
33 {
34 return (c >= U'0' && c <= U'7');
35 }
36
38 constexpr bool is_decimal_digit(char32_t c) noexcept
39 {
40 return (c >= U'0' && c <= U'9');
41 }
42
44 constexpr bool is_hexadecimal_digit(char32_t c) noexcept
45 {
46 return U'0' <= c && c <= U'f' && (1ull << (static_cast<uint_least64_t>(c) - 0x30u)) & 0x7E0000007E03FFull;
47 }
48
49 template <typename T>
51 constexpr uint_least32_t hex_to_dec(const T c) noexcept
52 {
53 if constexpr (std::is_same_v<remove_cvref<T>, uint_least32_t>)
54 return c >= 0x41u // >= 'A'
55 ? 10u + (c | 0x20u) - 0x61u // - 'a'
56 : c - 0x30u // - '0'
57 ;
58 else
59 return hex_to_dec(static_cast<uint_least32_t>(c));
60 }
61
63 constexpr bool is_horizontal_whitespace(char32_t c) noexcept
64 {
65 return is_ascii_horizontal_whitespace(c) || is_non_ascii_horizontal_whitespace(c);
66 }
67
69 constexpr bool is_vertical_whitespace(char32_t c) noexcept
70 {
71 return is_ascii_vertical_whitespace(c) || is_non_ascii_vertical_whitespace(c);
72 }
73
75 constexpr bool is_whitespace(char32_t c) noexcept
76 {
77 return is_horizontal_whitespace(c) || is_vertical_whitespace(c);
78 }
79
81 constexpr bool is_bare_key_character(char32_t c) noexcept
82 {
83 return is_ascii_bare_key_character(c)
84#if TOML_LANG_UNRELEASED // toml/pull/891 (unicode bare keys)
85 || is_non_ascii_bare_key_character(c)
86#endif
87 ;
88 }
89
91 constexpr bool is_value_terminator(char32_t c) noexcept
92 {
93 return is_whitespace(c) || c == U']' || c == U'}' || c == U',' || c == U'#';
94 }
95
97 constexpr bool is_control_character(char c) noexcept
98 {
99 return c <= '\u001F' || c == '\u007F';
100 }
101
103 constexpr bool is_control_character(char32_t c) noexcept
104 {
105 return c <= U'\u001F' || c == U'\u007F';
106 }
107
109 constexpr bool is_nontab_control_character(char32_t c) noexcept
110 {
111 return c <= U'\u0008' || (c >= U'\u000A' && c <= U'\u001F') || c == U'\u007F';
112 }
113
115 constexpr bool is_unicode_surrogate(char32_t c) noexcept
116 {
117 return c >= 0xD800u && c <= 0xDFFF;
118 }
119
120 struct utf8_decoder
121 {
122 // utf8_decoder based on this: https://bjoern.hoehrmann.de/utf-8/decoder/dfa/
123 // Copyright (c) 2008-2009 Bjoern Hoehrmann <bjoern@hoehrmann.de>
124
125 uint_least32_t state{};
126 char32_t codepoint{};
127
128 static constexpr uint8_t state_table[]{
129 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
130 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
131 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
132 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
133 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1,
134 1, 1, 1, 1, 1, 1, 1, 1, 1, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 7, 7,
135 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
136 7, 7, 7, 8, 8, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
137 2, 2, 2, 2, 2, 2, 2, 2, 10, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 3, 3, 11, 6, 6,
138 6, 5, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
139
140 0, 12, 24, 36, 60, 96, 84, 12, 12, 12, 48, 72, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 0, 12,
141 12, 12, 12, 12, 0, 12, 0, 12, 12, 12, 24, 12, 12, 12, 12, 12, 24, 12, 24, 12, 12, 12, 12, 12, 12, 12, 12,
142 12, 24, 12, 12, 12, 12, 12, 24, 12, 12, 12, 12, 12, 12, 12, 24, 12, 12, 12, 12, 12, 12, 12, 12, 12, 36, 12,
143 36, 12, 12, 12, 36, 12, 12, 12, 12, 12, 36, 12, 36, 12, 12, 12, 36, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12
144 };
145
147 constexpr bool error() const noexcept
148 {
149 return state == uint_least32_t{ 12u };
150 }
151
153 constexpr bool has_code_point() const noexcept
154 {
155 return state == uint_least32_t{};
156 }
157
159 constexpr bool needs_more_input() const noexcept
160 {
161 return !has_code_point() && !error();
162 }
163
164 constexpr void operator()(uint8_t byte) noexcept
165 {
167
168 const auto type = state_table[byte];
169
170 codepoint = static_cast<char32_t>(has_code_point() ? (uint_least32_t{ 255u } >> type) & byte
171 : (byte & uint_least32_t{ 63u })
172 | (static_cast<uint_least32_t>(codepoint) << 6));
173
174 state = state_table[state + uint_least32_t{ 256u } + type];
175 }
176
178 constexpr void operator()(char c) noexcept
179 {
180 operator()(static_cast<uint8_t>(c));
181 }
182
184 constexpr void reset() noexcept
185 {
186 state = {};
187 }
188 };
189
191 TOML_ATTR(nonnull)
192 bool is_ascii(const char* str, size_t len) noexcept;
193}
195
197#include "header_end.hpp"
#define TOML_ASSERT_ASSUME(expr)
Definition preprocessor.hpp:1190
@ error
throw a parse_error exception in case of a tag
#define TOML_CONST_GETTER
Definition preprocessor.hpp:485
#define TOML_PURE_GETTER
Definition preprocessor.hpp:474
#define TOML_ATTR(...)
Definition preprocessor.hpp:316
#define TOML_PURE_INLINE_GETTER
Definition preprocessor.hpp:479
#define TOML_ALWAYS_INLINE
Definition preprocessor.hpp:405
#define TOML_IMPL_NAMESPACE_END
Definition preprocessor.hpp:1334
#define TOML_IMPL_NAMESPACE_START
Definition preprocessor.hpp:1333