NeBuild dev
Loading...
Searching...
No Matches
parser.inl
Go to the documentation of this file.
1// # This file is a part of toml++ and is subject to the the terms of the MIT license.
2// # Copyright (c) Mark Gillard <mark.gillard@outlook.com.au>
3// # See https://github.com/marzer/tomlplusplus/blob/master/LICENSE for the full license text.
4// SPDX-License-Identifier: MIT
5#pragma once
6
7#include "preprocessor.hpp"
8// # {{
9#if !TOML_IMPLEMENTATION
10#error This is an implementation-only header.
11#endif
12// # }}
13#if TOML_ENABLE_PARSER
14
15#include "array.hpp"
16#include "date_time.hpp"
17#include "parse_error.hpp"
18#include "parser.hpp"
19#include "source_region.hpp"
20#include "std_optional.hpp"
21#include "table.hpp"
22#include "unicode.hpp"
23#include "value.hpp"
25#include <fstream>
26#include <istream>
27#if TOML_INT_CHARCONV || TOML_FLOAT_CHARCONV
28#include <charconv>
29#endif
30#if !TOML_INT_CHARCONV || !TOML_FLOAT_CHARCONV
31#include <sstream>
32#endif
33#if !TOML_INT_CHARCONV
34#include <iomanip>
35#endif
37#include "header_start.hpp"
38
39// #---------------------------------------------------------------------------------------------------------------------
40// # UTF8 STREAMS
41// #---------------------------------------------------------------------------------------------------------------------
42
44 template <typename T>
45 class utf8_byte_stream;
46
48 constexpr auto utf8_byte_order_mark = "\xEF\xBB\xBF"sv;
49
50 template <typename Char>
51 class utf8_byte_stream<std::basic_string_view<Char>> {
52 static_assert(sizeof(Char) == 1);
53
54 private:
55 std::basic_string_view<Char> source_;
56 size_t position_ = {};
57
58 public:
60 explicit constexpr utf8_byte_stream(std::basic_string_view<Char> sv) noexcept //
61 : source_{sv} {
62 // skip bom
63 if (source_.length() >= 3u && memcmp(utf8_byte_order_mark.data(), source_.data(), 3u) == 0)
64 position_ += 3u;
65 }
66
68 constexpr bool error() const noexcept { return false; }
69
71 constexpr bool eof() const noexcept { return position_ >= source_.length(); }
72
74 explicit constexpr operator bool() const noexcept { return !eof(); }
75
77 constexpr bool peek_eof() const noexcept { return eof(); }
78
80 TOML_ATTR(nonnull)
81 size_t operator()(void* dest, size_t num) noexcept {
82 TOML_ASSERT_ASSUME(!eof());
83
84 num = impl::min(position_ + num, source_.length()) - position_;
85 std::memcpy(dest, source_.data() + position_, num);
86 position_ += num;
87 return num;
88 }
89 };
90
91 template <>
92 class utf8_byte_stream<std::istream> {
93 private:
94 std::istream* source_;
95
96 public:
98 explicit utf8_byte_stream(std::istream& stream) noexcept(!TOML_COMPILER_HAS_EXCEPTIONS) //
99 : source_{&stream} {
100 if (!*this) // eof, bad
101 return;
102
103 const auto initial_pos = source_->tellg();
104 char bom[3];
105 source_->read(bom, 3);
106 if (source_->bad() ||
107 (source_->gcount() == 3 && memcmp(utf8_byte_order_mark.data(), bom, 3u) == 0))
108 return;
109
110 source_->clear();
111 source_->seekg(initial_pos, std::istream::beg);
112 }
113
115 bool error() const noexcept { return !!(source_->rdstate() & std::istream::badbit); }
116
118 bool eof() const noexcept { return !!(source_->rdstate() & std::istream::eofbit); }
119
121 explicit operator bool() const noexcept {
122 return !(source_->rdstate() & (std::istream::badbit | std::istream::eofbit));
123 }
124
126 bool peek_eof() const noexcept(!TOML_COMPILER_HAS_EXCEPTIONS) {
127 return eof() || source_->peek() == std::istream::traits_type::eof();
128 }
129
131 TOML_ATTR(nonnull)
132 size_t operator()(void* dest, size_t num) noexcept(!TOML_COMPILER_HAS_EXCEPTIONS) {
133 TOML_ASSERT(*this);
134
135 source_->read(static_cast<char*>(dest), static_cast<std::streamsize>(num));
136 return static_cast<size_t>(source_->gcount());
137 }
138 };
139
141 char32_t value;
142 char bytes[4];
143 size_t count;
145
147 constexpr operator const char32_t&() const noexcept { return value; }
148
150 constexpr const char32_t& operator*() const noexcept { return value; }
151 };
152 static_assert(std::is_trivial_v<utf8_codepoint>);
153 static_assert(std::is_standard_layout_v<utf8_codepoint>);
154
157 virtual const source_path_ptr& source_path() const noexcept = 0;
158
160 virtual const utf8_codepoint* read_next() noexcept(!TOML_COMPILER_HAS_EXCEPTIONS) = 0;
161
163 virtual bool peek_eof() const noexcept(!TOML_COMPILER_HAS_EXCEPTIONS) = 0;
164
165#if !TOML_EXCEPTIONS
166
168 virtual optional<parse_error>&& error() noexcept = 0;
169
170#endif
171
172 virtual ~utf8_reader_interface() noexcept = default;
173 };
174
175#if TOML_EXCEPTIONS
176#define utf8_reader_error(...) throw parse_error(__VA_ARGS__)
177#define utf8_reader_return_after_error(...) static_assert(true)
178#define utf8_reader_error_check(...) static_assert(true)
179#else
180#define utf8_reader_error(...) err_.emplace(__VA_ARGS__)
181#define utf8_reader_return_after_error(...) return __VA_ARGS__
182#define utf8_reader_error_check(...) \
183 do { \
184 if TOML_UNLIKELY (err_) return __VA_ARGS__; \
185 } while (false)
186
187#endif
188
189#if defined(__APPLE__) || defined(__MINGW32__) || defined(__MINGW64__)
190#define TOML_OVERALIGNED
191#else
192#define TOML_OVERALIGNED alignas(32)
193#endif
194
195 template <typename T>
197 private:
198 static constexpr size_t block_capacity = 32;
199 utf8_byte_stream<T> stream_;
200 source_position next_pos_ = {1, 1};
201
202 impl::utf8_decoder decoder_;
204 char bytes[4];
205 size_t count;
206 } currently_decoding_;
207
209 TOML_OVERALIGNED utf8_codepoint buffer[block_capacity];
210 size_t current;
211 size_t count;
212 } codepoints_;
213
215
216#if !TOML_EXCEPTIONS
217 optional<parse_error> err_;
218#endif
219
221 TOML_ASSERT(stream_);
222
223 TOML_OVERALIGNED char raw_bytes[block_capacity];
224 size_t raw_bytes_read;
225
226 // read the next raw (encoded) block in from the stream
227 if constexpr (noexcept(stream_(raw_bytes, block_capacity)) || !TOML_EXCEPTIONS) {
228 raw_bytes_read = stream_(raw_bytes, block_capacity);
229 }
230#if TOML_EXCEPTIONS
231 else {
232 try {
233 raw_bytes_read = stream_(raw_bytes, block_capacity);
234 } catch (const std::exception& exc) {
235 throw parse_error{exc.what(), next_pos_, source_path_};
236 } catch (...) {
237 throw parse_error{"An unspecified error occurred", next_pos_, source_path_};
238 }
239 }
240#endif // TOML_EXCEPTIONS
241
242 // handle a zero-byte read
243 if TOML_UNLIKELY (!raw_bytes_read) {
244 if (stream_.eof()) {
245 // EOF only sets the error state if the decoder wants more input, otherwise
246 // a zero-byte read might have just caused the underlying stream to realize it's exhaused
247 // and set the EOF flag, and that's totally fine
248 if (decoder_.needs_more_input())
249 utf8_reader_error("Encountered EOF during incomplete utf-8 code point sequence",
250 next_pos_, source_path_);
251 } else {
252 utf8_reader_error("Reading from the underlying stream failed - zero bytes read",
253 next_pos_, source_path_);
254 }
255 return false;
256 }
257
258 TOML_ASSERT_ASSUME(raw_bytes_read);
259 std::memset(&codepoints_, 0, sizeof(codepoints_));
260
261 // helper for calculating decoded codepoint line+cols
262 const auto calc_positions = [&]() noexcept {
263 for (size_t i = 0; i < codepoints_.count; i++) {
264 auto& cp = codepoints_.buffer[i];
265 cp.position = next_pos_;
266
267 if (cp == U'\n') {
268 next_pos_.line++;
269 next_pos_.column = source_index{1};
270 } else
271 next_pos_.column++;
272 }
273 };
274
275 // decide whether we need to use the UTF-8 decoder or if we can treat this block as plain
276 // ASCII
277 const auto ascii_fast_path =
278 !decoder_.needs_more_input() && impl::is_ascii(raw_bytes, raw_bytes_read);
279
280 // ASCII fast-path
281 if (ascii_fast_path) {
282 decoder_.reset();
283 currently_decoding_.count = {};
284
285 codepoints_.count = raw_bytes_read;
286 for (size_t i = 0; i < codepoints_.count; i++) {
287 auto& cp = codepoints_.buffer[i];
288 cp.value = static_cast<char32_t>(raw_bytes[i]);
289 cp.bytes[0] = raw_bytes[i];
290 cp.count = 1u;
291 }
292 }
293
294 // UTF-8 slow-path
295 else {
296 // helper for getting precise error location
297 const auto error_pos = [&]() noexcept -> const source_position& { //
298 return codepoints_.count ? codepoints_.buffer[codepoints_.count - 1u].position
299 : next_pos_;
300 };
301
302 for (size_t i = 0; i < raw_bytes_read; i++) {
303 decoder_(static_cast<uint8_t>(raw_bytes[i]));
304 if TOML_UNLIKELY (decoder_.error()) {
305 calc_positions();
306 utf8_reader_error("Encountered invalid utf-8 sequence", error_pos(), source_path_);
308 }
309
310 currently_decoding_.bytes[currently_decoding_.count++] = raw_bytes[i];
311
312 if (decoder_.has_code_point()) {
313 auto& cp = codepoints_.buffer[codepoints_.count++];
314
315 cp.value = decoder_.codepoint;
316 cp.count = currently_decoding_.count;
317 std::memcpy(cp.bytes, currently_decoding_.bytes, currently_decoding_.count);
318 currently_decoding_.count = {};
319 } else if TOML_UNLIKELY (currently_decoding_.count == 4u) {
320 calc_positions();
321 utf8_reader_error("Encountered overlong utf-8 sequence", error_pos(), source_path_);
323 }
324 }
325 if TOML_UNLIKELY (decoder_.needs_more_input() && stream_.eof()) {
326 calc_positions();
327 utf8_reader_error("Encountered EOF during incomplete utf-8 code point sequence",
328 error_pos(), source_path_);
330 }
331 }
332
333 TOML_ASSERT_ASSUME(codepoints_.count);
334 calc_positions();
335
336 // handle general I/O errors
337 // (down here so the next_pos_ benefits from calc_positions())
338 if TOML_UNLIKELY (stream_.error()) {
339 utf8_reader_error("An I/O error occurred while reading from the underlying stream",
340 next_pos_, source_path_);
342 }
343
344 return true;
345 }
346
347 public:
348 template <typename U, typename String = std::string_view>
349 TOML_NODISCARD_CTOR explicit utf8_reader(U&& source, String&& source_path = {}) noexcept(
350 std::is_nothrow_constructible_v<utf8_byte_stream<T>, U&&>)
351 : stream_{static_cast<U&&>(source)} {
352 currently_decoding_.count = {};
353
354 codepoints_.current = {};
355 codepoints_.count = {};
356
357 if (!source_path.empty())
358 source_path_ = std::make_shared<const std::string>(static_cast<String&&>(source_path));
359 }
360
362 const source_path_ptr& source_path() const noexcept final { return source_path_; }
363
367
368 if (codepoints_.current == codepoints_.count) {
369 if TOML_UNLIKELY (!stream_ || !read_next_block()) return nullptr;
370
371 TOML_ASSERT_ASSUME(!codepoints_.current);
372 }
373 TOML_ASSERT_ASSUME(codepoints_.count);
374 TOML_ASSERT_ASSUME(codepoints_.count <= block_capacity);
375 TOML_ASSERT_ASSUME(codepoints_.current < codepoints_.count);
376
377 return &codepoints_.buffer[codepoints_.current++];
378 }
379
381 bool peek_eof() const noexcept(!TOML_COMPILER_HAS_EXCEPTIONS) final {
382 return stream_.peek_eof();
383 }
384
385#if !TOML_EXCEPTIONS
386
388 optional<parse_error>&& error() noexcept final { return std::move(err_); }
389
390#endif
391 };
392
393 template <typename Char>
394 utf8_reader(std::basic_string_view<Char>,
395 std::string_view) -> utf8_reader<std::basic_string_view<Char>>;
396 template <typename Char>
397 utf8_reader(std::basic_string_view<Char>,
399 template <typename Char>
400 utf8_reader(std::basic_istream<Char>&, std::string_view) -> utf8_reader<std::basic_istream<Char>>;
401 template <typename Char>
402 utf8_reader(std::basic_istream<Char>&, std::string&&) -> utf8_reader<std::basic_istream<Char>>;
403
404#if TOML_EXCEPTIONS
405#define utf8_buffered_reader_error_check(...) static_assert(true)
406#else
407#define utf8_buffered_reader_error_check(...) \
408 do { \
409 if TOML_UNLIKELY (reader_.error()) return __VA_ARGS__; \
410 } while (false)
411
412#endif
413
415 public:
416 static constexpr size_t max_history_length = 128;
417
418 private:
419 static constexpr size_t history_buffer_size =
420 max_history_length - 1; //'head' is stored in the reader
422 struct {
423 utf8_codepoint buffer[history_buffer_size];
424 size_t count, first;
425 } history_ = {};
426 const utf8_codepoint* head_ = {};
427 size_t negative_offset_ = {};
428
429 public:
431 explicit utf8_buffered_reader(utf8_reader_interface& reader) noexcept //
432 : reader_{reader} {}
433
435 const source_path_ptr& source_path() const noexcept { return reader_.source_path(); }
436
440
441 if (negative_offset_) {
442 negative_offset_--;
443
444 // an entry negative offset of 1 just means "replay the current head"
445 if (!negative_offset_) return head_;
446
447 // otherwise step back into the history buffer
448 else
449 return history_.buffer +
450 ((history_.first + history_.count - negative_offset_) % history_buffer_size);
451 } else {
452 // first character read from stream
453 if TOML_UNLIKELY (!history_.count && !head_) head_ = reader_.read_next();
454
455 // subsequent characters and not eof
456 else if (head_) {
457 if TOML_UNLIKELY (history_.count < history_buffer_size)
458 history_.buffer[history_.count++] = *head_;
459 else
460 history_.buffer[(history_.first++ + history_buffer_size) % history_buffer_size] =
461 *head_;
462
463 head_ = reader_.read_next();
464 }
465
466 return head_;
467 }
468 }
469
471 const utf8_codepoint* step_back(size_t count) noexcept {
473
474 TOML_ASSERT_ASSUME(history_.count);
475 TOML_ASSERT_ASSUME(negative_offset_ + count <= history_.count);
476
477 negative_offset_ += count;
478
479 return negative_offset_
480 ? history_.buffer +
481 ((history_.first + history_.count - negative_offset_) % history_buffer_size)
482 : head_;
483 }
484
486 bool peek_eof() const noexcept(!TOML_COMPILER_HAS_EXCEPTIONS) { return reader_.peek_eof(); }
487
488#if !TOML_EXCEPTIONS
489
491 optional<parse_error>&& error() noexcept { return reader_.error(); }
492
493#endif
494 };
495}
497
498// #---------------------------------------------------------------------------------------------------------------------
499// # PARSER INTERNAL IMPLEMENTATION
500// #---------------------------------------------------------------------------------------------------------------------
501
502#if TOML_EXCEPTIONS
503#define TOML_RETURNS_BY_THROWING [[noreturn]]
504#else
505#define TOML_RETURNS_BY_THROWING
506#endif
507
509 template <typename... T>
510 TOML_CONST_GETTER TOML_INTERNAL_LINKAGE constexpr bool is_match(char32_t codepoint,
511 T... vals) noexcept {
512 static_assert((std::is_same_v<char32_t, T> && ...));
513 return ((codepoint == vals) || ...);
514 }
515
516 template <uint64_t>
517 struct parse_integer_traits;
518 template <>
519 struct parse_integer_traits<2> {
520 static constexpr auto scope_qualifier = "binary integer"sv;
521 static constexpr auto is_digit = impl::is_binary_digit;
522 static constexpr auto is_signed = false;
523 static constexpr auto max_digits = 63;
524 static constexpr auto prefix_codepoint = U'b';
525 static constexpr auto prefix = "b"sv;
526 static constexpr auto full_prefix = "0b"sv;
527 };
528 template <>
529 struct parse_integer_traits<8> {
530 static constexpr auto scope_qualifier = "octal integer"sv;
531 static constexpr auto is_digit = impl::is_octal_digit;
532 static constexpr auto is_signed = false;
533 static constexpr auto max_digits = 21; // strlen("777777777777777777777")
534 static constexpr auto prefix_codepoint = U'o';
535 static constexpr auto prefix = "o"sv;
536 static constexpr auto full_prefix = "0o"sv;
537 };
538 template <>
539 struct parse_integer_traits<10> {
540 static constexpr auto scope_qualifier = "decimal integer"sv;
541 static constexpr auto is_digit = impl::is_decimal_digit;
542 static constexpr auto is_signed = true;
543 static constexpr auto max_digits = 19; // strlen("9223372036854775807")
544 static constexpr auto full_prefix = ""sv;
545 };
546 template <>
547 struct parse_integer_traits<16> {
548 static constexpr auto scope_qualifier = "hexadecimal integer"sv;
549 static constexpr auto is_digit = impl::is_hexadecimal_digit;
550 static constexpr auto is_signed = false;
551 static constexpr auto max_digits = 16; // strlen("7FFFFFFFFFFFFFFF")
552 static constexpr auto prefix_codepoint = U'x';
553 static constexpr auto prefix = "x"sv;
554 static constexpr auto full_prefix = "0x"sv;
555 };
556
559 std::string_view to_sv(node_type val) noexcept {
560 return impl::node_type_friendly_names[impl::unwrap_enum(val)];
561 }
562
565 std::string_view to_sv(const std::string& str) noexcept {
566 return std::string_view{str};
567 }
568
571 std::string_view to_sv(bool val) noexcept {
572 using namespace std::string_view_literals;
573
574 return val ? "true"sv : "false"sv;
575 }
576
579 std::string_view to_sv(const utf8_codepoint& cp) noexcept {
580 if (cp.value <= U'\x1F')
581 return impl::control_char_escapes[cp.value];
582 else if (cp.value == U'\x7F')
583 return "\\u007F"sv;
584 else
585 return std::string_view{cp.bytes, cp.count};
586 }
587
590 std::string_view to_sv(const utf8_codepoint* cp) noexcept {
591 if (cp) return to_sv(*cp);
592 return ""sv;
593 }
594
597 };
598
599 template <typename T>
600 TOML_ATTR(nonnull)
601 TOML_INTERNAL_LINKAGE void concatenate(char*& write_pos, char* const buf_end,
602 const T& arg) noexcept {
603 if TOML_UNLIKELY (write_pos >= buf_end) return;
604
605 using arg_type = impl::remove_cvref<T>;
606
607 // string views
608 if constexpr (std::is_same_v<arg_type, std::string_view>) {
609 const auto max_chars = static_cast<size_t>(buf_end - write_pos);
610 const auto len = max_chars < arg.length() ? max_chars : arg.length();
611 std::memcpy(write_pos, arg.data(), len);
612 write_pos += len;
613 }
614
615 // doubles
616 else if constexpr (std::is_same_v<arg_type, double>) {
617#if TOML_FLOAT_CHARCONV
618 const auto result = std::to_chars(write_pos, buf_end, arg);
619 write_pos = result.ptr;
620#else
621 std::ostringstream ss;
622 ss.imbue(std::locale::classic());
623 ss.precision(std::numeric_limits<arg_type>::max_digits10);
624 ss << arg;
625 concatenate(write_pos, buf_end, to_sv(std::move(ss).str()));
626#endif
627 }
628
629 // 64-bit integers
630 else if constexpr (impl::is_one_of<arg_type, int64_t, uint64_t>) {
631#if TOML_INT_CHARCONV
632 const auto result = std::to_chars(write_pos, buf_end, arg);
633 write_pos = result.ptr;
634#else
635 std::ostringstream ss;
636 ss.imbue(std::locale::classic());
637 using cast_type = std::conditional_t<std::is_signed_v<arg_type>, int64_t, uint64_t>;
638 ss << static_cast<cast_type>(arg);
639 concatenate(write_pos, buf_end, to_sv(std::move(ss).str()));
640#endif
641 }
642
643 // escaped_codepoint
644 else if constexpr (std::is_same_v<arg_type, escaped_codepoint>) {
645 if (arg.cp.value <= U'\x7F')
646 concatenate(write_pos, buf_end, to_sv(arg.cp));
647 else {
648 auto val = static_cast<uint_least32_t>(arg.cp.value);
649 const auto digits = val > 0xFFFFu ? 8u : 4u;
650 constexpr auto mask = uint_least32_t{0xFu};
651 char buf[10] = {'\\', digits > 4 ? 'U' : 'u'};
652 for (auto i = 2u + digits; i-- > 2u;) {
653 const auto hexdig = val & mask;
654 buf[i] = static_cast<char>(hexdig >= 0xAu ? ('A' + (hexdig - 0xAu)) : ('0' + hexdig));
655 val >>= 4;
656 }
657 concatenate(write_pos, buf_end, std::string_view{buf, digits + 2u});
658 }
659 }
660
661 // all other floats (fallback - coerce to double)
662 else if constexpr (std::is_floating_point_v<arg_type>)
663 concatenate(write_pos, buf_end, static_cast<double>(arg));
664
665 // all other integers (fallback - coerce to (u)int64_t)
666 else if constexpr (std::is_arithmetic_v<arg_type> && std::is_integral_v<arg_type>) {
667 using cast_type = std::conditional_t<std::is_unsigned_v<arg_type>, uint64_t, int64_t>;
668 concatenate(write_pos, buf_end, static_cast<cast_type>(arg));
669 }
670
671 else {
672 static_assert(impl::always_false<T>,
673 "concatenate() inputs are limited to std::string_views, integers, floats, and "
674 "escaped_codepoint");
675 }
676 }
677
679 static constexpr std::size_t buf_size = 512;
681 char* write_pos = buf;
682 char* const max_write_pos = buf + (buf_size - std::size_t{1}); // allow for null terminator
683
685 error_builder(std::string_view scope) noexcept {
686 concatenate(write_pos, max_write_pos, "Error while parsing "sv);
689 }
690
691 template <typename T>
692 void append(const T& arg) noexcept {
694 }
695
697 auto finish(const source_position& pos, const source_path_ptr& source_path) const {
698 *write_pos = '\0';
699
700#if TOML_EXCEPTIONS
701 throw parse_error{buf, pos, source_path};
702#else
703 return parse_error{std::string(buf, static_cast<size_t>(write_pos - buf)), pos, source_path};
704#endif
705 }
706
708 };
709
710 struct parse_scope {
711 std::string_view& storage_;
712 std::string_view parent_;
713
715 explicit parse_scope(std::string_view& current_scope, std::string_view new_scope) noexcept
716 : storage_{current_scope}, parent_{current_scope} {
717 storage_ = new_scope;
718 }
719
720 ~parse_scope() noexcept { storage_ = parent_; }
721
723 };
724#define push_parse_scope_2(scope, line) parse_scope ps_##line(current_scope, scope)
725#define push_parse_scope_1(scope, line) push_parse_scope_2(scope, line)
726#define push_parse_scope(scope) push_parse_scope_1(scope, __LINE__)
727
729 std::string buffer;
730 std::vector<std::pair<size_t, size_t>> segments;
731 std::vector<source_position> starts;
732 std::vector<source_position> ends;
733
734 void clear() noexcept {
735 buffer.clear();
736 segments.clear();
737 starts.clear();
738 ends.clear();
739 }
740
741 void push_back(std::string_view segment, source_position b, source_position e) {
742 segments.push_back({buffer.length(), segment.length()});
743 buffer.append(segment);
744 starts.push_back(b);
745 ends.push_back(e);
746 }
747
749 std::string_view operator[](size_t i) const noexcept {
750 return std::string_view{buffer.c_str() + segments[i].first, segments[i].second};
751 }
752
754 std::string_view back() const noexcept { return (*this)[segments.size() - 1u]; }
755
757 bool empty() const noexcept { return segments.empty(); }
758
760 size_t size() const noexcept { return segments.size(); }
761 };
762
764 size_t& depth_;
765
767 explicit depth_counter_scope(size_t& depth) noexcept //
768 : depth_{depth} {
769 depth_++;
770 }
771
772 ~depth_counter_scope() noexcept { depth_--; }
773
775 };
776
778 std::string_view value;
780 };
781
783 std::vector<table*>& tables;
784
786 explicit table_vector_scope(std::vector<table*>& tables_, table& tbl) //
787 : tables{tables_} {
788 tables.push_back(&tbl);
789 }
790
791 ~table_vector_scope() noexcept { tables.pop_back(); }
792
794 };
795}
797
798#if 1 // parser helper macros
799
800// Q: "what the fuck is this? MACROS????"
801// A: The parser needs to work in exceptionless mode (returning error objects directly)
802// and exception mode (reporting parse failures by throwing). Two totally different control
803// flows. These macros encapsulate the differences between the two modes so I can write code code
804// as though I was only targeting one mode and not want yeet myself into the sun.
805// They're all #undef'd at the bottom of the parser's implementation so they should be harmless
806// outside of toml++.
807
808#define is_eof() !cp
809#define assert_not_eof() TOML_ASSERT_ASSUME(cp != nullptr)
810#define return_if_eof(...) \
811 do { \
812 if TOML_UNLIKELY (is_eof()) return __VA_ARGS__; \
813 } while (false)
814
815#if TOML_EXCEPTIONS
816#define is_error() false
817#define return_after_error(...) TOML_UNREACHABLE
818#define assert_not_error() static_assert(true)
819#define return_if_error(...) static_assert(true)
820#define return_if_error_or_eof(...) return_if_eof(__VA_ARGS__)
821#else
822#define is_error() !!err
823#define return_after_error(...) return __VA_ARGS__
824#define assert_not_error() TOML_ASSERT(!is_error())
825#define return_if_error(...) \
826 do { \
827 if TOML_UNLIKELY (is_error()) return __VA_ARGS__; \
828 } while (false)
829#define return_if_error_or_eof(...) \
830 do { \
831 if TOML_UNLIKELY (is_eof() || is_error()) return __VA_ARGS__; \
832 } while (false)
833#endif
834
835#if defined(TOML_BREAK_AT_PARSE_ERRORS) && TOML_BREAK_AT_PARSE_ERRORS
836#if defined(__has_builtin)
837#if __has_builtin(__builtin_debugtrap)
838#define parse_error_break() __builtin_debugtrap()
839#elif __has_builtin(__debugbreak)
840#define parse_error_break() __debugbreak()
841#endif
842#endif
843#ifndef parse_error_break
844#if TOML_MSVC || TOML_ICC
845#define parse_error_break() __debugbreak()
846#else
847#define parse_error_break() TOML_ASSERT(false)
848#endif
849#endif
850#else
851#define parse_error_break() static_assert(true)
852#endif
853
854#define set_error_and_return(ret, ...) \
855 do { \
856 if (!is_error()) set_error(__VA_ARGS__); \
857 return_after_error(ret); \
858 } while (false)
859
860#define set_error_and_return_default(...) set_error_and_return({}, __VA_ARGS__)
861
862#define set_error_and_return_if_eof(...) \
863 do { \
864 if TOML_UNLIKELY (is_eof()) set_error_and_return(__VA_ARGS__, "encountered end-of-file"sv); \
865 } while (false)
866
867#define advance_and_return_if_error(...) \
868 do { \
869 assert_not_eof(); \
870 advance(); \
871 return_if_error(__VA_ARGS__); \
872 } while (false)
873
874#define advance_and_return_if_error_or_eof(...) \
875 do { \
876 assert_not_eof(); \
877 advance(); \
878 return_if_error(__VA_ARGS__); \
879 set_error_and_return_if_eof(__VA_ARGS__); \
880 } while (false)
881
882#endif // parser helper macros
883
885 TOML_ABI_NAMESPACE_BOOL(TOML_EXCEPTIONS, impl_ex, impl_noex);
886
887 class parser {
888 private:
889 static constexpr size_t max_nested_values = TOML_MAX_NESTED_VALUES;
890
894 const utf8_codepoint* cp = {};
895 std::vector<table*> implicit_tables;
896 std::vector<table*> dotted_key_tables;
897 std::vector<table*> open_inline_tables;
898 std::vector<array*> table_arrays;
900 std::string string_buffer;
901 std::string recording_buffer; // for diagnostics
902 bool recording = false, recording_whitespace = true;
903 std::string_view current_scope;
904 size_t nested_values = {};
905#if !TOML_EXCEPTIONS
906 mutable optional<parse_error> err;
907#endif
908
910 source_position current_position(source_index fallback_offset = 0) const noexcept {
911 if (!is_eof()) return cp->position;
912 return {prev_pos.line, static_cast<source_index>(prev_pos.column + fallback_offset)};
913 }
914
915 template <typename... T>
917 const T&... reason) const {
918 static_assert(sizeof...(T) > 0);
920
922 (builder.append(reason), ...);
923
925
926#if TOML_EXCEPTIONS
927 builder.finish(pos, reader.source_path());
928#else
929 err.emplace(builder.finish(pos, reader.source_path()));
930#endif
931 }
932
933 template <typename... T>
934 TOML_RETURNS_BY_THROWING void set_error(const T&... reason) const {
935 set_error_at(current_position(1), reason...);
936 }
937
938 void go_back(size_t count = 1) noexcept {
940 TOML_ASSERT_ASSUME(count);
941
942 cp = reader.step_back(count);
944 }
945
946 void advance() {
949
951 cp = reader.read_next();
952
953#if !TOML_EXCEPTIONS
954 if (reader.error()) {
955 err = std::move(reader.error());
956 return;
957 }
958#endif
959
960 if (recording && !is_eof()) {
961 if (recording_whitespace || !is_whitespace(*cp))
962 recording_buffer.append(cp->bytes, cp->count);
963 }
964 }
965
966 void start_recording(bool include_current = true) noexcept {
968
969 recording = true;
971 recording_buffer.clear();
972 if (include_current && !is_eof()) recording_buffer.append(cp->bytes, cp->count);
973 }
974
975 void stop_recording(size_t pop_bytes = 0) noexcept {
977
978 recording = false;
979 if (pop_bytes) {
980 if (pop_bytes >= recording_buffer.length())
981 recording_buffer.clear();
982 else if (pop_bytes == 1u)
983 recording_buffer.pop_back();
984 else
985 recording_buffer.erase(recording_buffer.begin() +
986 static_cast<ptrdiff_t>(recording_buffer.length() - pop_bytes),
987 recording_buffer.end());
988 }
989 }
990
993
994 bool consumed = false;
995 while (!is_eof() && is_horizontal_whitespace(*cp)) {
996 if TOML_UNLIKELY (!is_ascii_horizontal_whitespace(*cp))
997 set_error_and_return_default("expected space or tab, saw '"sv, escaped_codepoint{*cp},
998 "'"sv);
999
1000 consumed = true;
1002 }
1003 return consumed;
1004 }
1005
1008
1009 if TOML_UNLIKELY (is_match(*cp, U'\v', U'\f'))
1011 R"(vertical tabs '\v' and form-feeds '\f' are not legal line breaks in TOML)"sv);
1012
1013 if (*cp == U'\r') {
1014 advance_and_return_if_error({}); // skip \r
1015
1016 if TOML_UNLIKELY (is_eof())
1017 set_error_and_return_default("expected '\\n' after '\\r', saw EOF"sv);
1018
1019 if TOML_UNLIKELY (*cp != U'\n')
1020 set_error_and_return_default("expected '\\n' after '\\r', saw '"sv,
1021 escaped_codepoint{*cp}, "'"sv);
1022 } else if (*cp != U'\n')
1023 return false;
1024
1025 advance_and_return_if_error({}); // skip \n
1026 return true;
1027 }
1028
1031
1032 do {
1033 if (is_ascii_vertical_whitespace(*cp))
1034 return consume_line_break();
1035 else
1036 advance();
1037 return_if_error({});
1038 } while (!is_eof());
1039
1040 return true;
1041 }
1042
1045
1046 if (*cp != U'#') return false;
1047
1048 push_parse_scope("comment"sv);
1049
1050 advance_and_return_if_error({}); // skip the '#'
1051
1052 while (!is_eof()) {
1053 if (consume_line_break()) return true;
1054 return_if_error({});
1055
1056#if TOML_LANG_AT_LEAST(1, 0, 0)
1057
1058 // toml/issues/567 (disallow non-TAB control characters in comments)
1059 if TOML_UNLIKELY (is_nontab_control_character(*cp))
1061 "control characters other than TAB (U+0009) are explicitly prohibited in comments"sv);
1062
1063 // toml/pull/720 (disallow surrogates in comments)
1064 else if TOML_UNLIKELY (is_unicode_surrogate(*cp))
1066 "unicode surrogates (U+D800 to U+DFFF) are explicitly prohibited in comments"sv);
1067#endif
1068
1070 }
1071
1072 return true;
1073 }
1074
1076 bool consume_expected_sequence(std::u32string_view seq) {
1077 return_if_error({});
1078 TOML_ASSERT(!seq.empty());
1079
1080 for (auto c : seq) {
1082 if (*cp != c) return false;
1084 }
1085 return true;
1086 }
1087
1088 template <typename T>
1089 TOML_NODISCARD bool consume_digit_sequence(T* digits, size_t len) {
1090 return_if_error({});
1091 TOML_ASSERT_ASSUME(digits);
1092 TOML_ASSERT_ASSUME(len);
1093
1094 for (size_t i = 0; i < len; i++) {
1096 if (!is_decimal_digit(*cp)) return false;
1097
1098 digits[i] = static_cast<T>(*cp - U'0');
1100 }
1101 return true;
1102 }
1103
1104 template <typename T>
1105 TOML_NODISCARD size_t consume_variable_length_digit_sequence(T* buffer, size_t max_len) {
1106 return_if_error({});
1107 TOML_ASSERT_ASSUME(buffer);
1108 TOML_ASSERT_ASSUME(max_len);
1109
1110 size_t i = {};
1111 for (; i < max_len; i++) {
1112 if (is_eof() || !is_decimal_digit(*cp)) break;
1113
1114 buffer[i] = static_cast<T>(*cp - U'0');
1116 }
1117 return i;
1118 }
1119
1122 std::string_view parse_basic_string(bool multi_line) {
1123 return_if_error({});
1125 TOML_ASSERT_ASSUME(*cp == U'"');
1126 push_parse_scope("string"sv);
1127
1128 // skip the '"'
1130
1131 // multi-line strings ignore a single line ending right at the beginning
1132 if (multi_line) {
1134 return_if_error({});
1136 }
1137
1138 auto& str = string_buffer;
1139 str.clear();
1140 bool escaped = false;
1141 bool skipping_whitespace = false;
1142 do {
1143 if (escaped) {
1144 escaped = false;
1145
1146 // handle 'line ending slashes' in multi-line mode
1147 if (multi_line && is_whitespace(*cp)) {
1149
1152 "line-ending backslashes must be the last non-whitespace character on the line"sv);
1153
1154 skipping_whitespace = true;
1155 return_if_error({});
1156 continue;
1157 }
1158
1159 bool skip_escaped_codepoint = true;
1161 switch (const auto escaped_codepoint = *cp) {
1162 // 'regular' escape codes
1163 case U'b':
1164 str += '\b';
1165 break;
1166 case U'f':
1167 str += '\f';
1168 break;
1169 case U'n':
1170 str += '\n';
1171 break;
1172 case U'r':
1173 str += '\r';
1174 break;
1175 case U't':
1176 str += '\t';
1177 break;
1178 case U'"':
1179 str += '"';
1180 break;
1181 case U'\\':
1182 str += '\\';
1183 break;
1184
1185#if TOML_LANG_UNRELEASED // toml/pull/790 (\e shorthand for \x1B)
1186 case U'e':
1187 str += '\x1B';
1188 break;
1189#else
1190 case U'e':
1192 "escape sequence '\\e' is not supported in TOML 1.0.0 and earlier"sv);
1193#endif
1194
1195#if TOML_LANG_UNRELEASED // toml/pull/796 (\xHH unicode scalar sequences)
1196 case U'x':
1197 [[fallthrough]];
1198#else
1199 case U'x':
1201 "escape sequence '\\x' is not supported in TOML 1.0.0 and earlier"sv);
1202#endif
1203
1204 // unicode scalar sequences
1205 case U'u':
1206 [[fallthrough]];
1207 case U'U': {
1208 push_parse_scope("unicode scalar sequence"sv);
1210 skip_escaped_codepoint = false;
1211
1212 uint32_t place_value = escaped_codepoint == U'U'
1213 ? 0x10000000u
1214 : (escaped_codepoint == U'u' ? 0x1000u : 0x10u);
1215 uint32_t sequence_value{};
1216 while (place_value) {
1218
1219 if TOML_UNLIKELY (!is_hexadecimal_digit(*cp))
1220 set_error_and_return_default("expected hex digit, saw '"sv, to_sv(*cp), "'"sv);
1221
1222 sequence_value += place_value * hex_to_dec(*cp);
1223 place_value /= 16u;
1225 }
1226
1227 if TOML_UNLIKELY (is_unicode_surrogate(sequence_value))
1229 "unicode surrogates (U+D800 - U+DFFF) are explicitly prohibited"sv);
1230 else if TOML_UNLIKELY (sequence_value > 0x10FFFFu)
1231 set_error_and_return_default("values greater than U+10FFFF are invalid"sv);
1232
1233 if (sequence_value < 0x80) {
1234 str += static_cast<char>(sequence_value);
1235 } else if (sequence_value < 0x800u) {
1236 str += static_cast<char>((sequence_value >> 6) | 0xC0u);
1237 str += static_cast<char>((sequence_value & 0x3Fu) | 0x80u);
1238 } else if (sequence_value < 0x10000u) {
1239 str += static_cast<char>((sequence_value >> 12) | 0xE0u);
1240 str += static_cast<char>(((sequence_value >> 6) & 0x3Fu) | 0x80u);
1241 str += static_cast<char>((sequence_value & 0x3Fu) | 0x80u);
1242 } else if (sequence_value < 0x110000u) {
1243 str += static_cast<char>((sequence_value >> 18) | 0xF0u);
1244 str += static_cast<char>(((sequence_value >> 12) & 0x3Fu) | 0x80u);
1245 str += static_cast<char>(((sequence_value >> 6) & 0x3Fu) | 0x80u);
1246 str += static_cast<char>((sequence_value & 0x3Fu) | 0x80u);
1247 }
1248 break;
1249 }
1250
1251 // ???
1253 default:
1254 set_error_and_return_default("unknown escape sequence '\\"sv, to_sv(*cp), "'"sv);
1255 }
1256
1257 if (skip_escaped_codepoint) advance_and_return_if_error_or_eof({});
1258 } else {
1259 // handle closing delimiters
1260 if (*cp == U'"') {
1261 if (multi_line) {
1262 size_t lookaheads = {};
1263 size_t consecutive_delimiters = 1;
1264 do {
1266 lookaheads++;
1267 if (!is_eof() && *cp == U'"')
1268 consecutive_delimiters++;
1269 else
1270 break;
1271 } while (lookaheads < 4u);
1272
1273 switch (consecutive_delimiters) {
1274 // """ " (one quote somewhere in a ML string)
1275 case 1:
1276 str += '"';
1277 skipping_whitespace = false;
1278 continue;
1279
1280 // """ "" (two quotes somewhere in a ML string)
1281 case 2:
1282 str.append("\"\""sv);
1283 skipping_whitespace = false;
1284 continue;
1285
1286 // """ """ (the end of the string)
1287 case 3:
1288 return str;
1289
1290 // """ """" (one at the end of the string)
1291 case 4:
1292 str += '"';
1293 return str;
1294
1295 // """ """"" (two quotes at the end of the string)
1296 case 5:
1297 str.append("\"\""sv);
1298 advance_and_return_if_error({}); // skip the last '"'
1299 return str;
1300
1301 default:
1303 }
1304 } else {
1305 advance_and_return_if_error({}); // skip the closing delimiter
1306 return str;
1307 }
1308 }
1309
1310 // handle escapes
1311 else if (*cp == U'\\') {
1312 advance_and_return_if_error_or_eof({}); // skip the '\'
1313 skipping_whitespace = false;
1314 escaped = true;
1315 continue;
1316 }
1317
1318 // handle line endings in multi-line mode
1319 if (multi_line && is_ascii_vertical_whitespace(*cp)) {
1321 return_if_error({});
1322 if (!skipping_whitespace) str += '\n';
1323 continue;
1324 }
1325
1326 // handle control characters
1327 if TOML_UNLIKELY (is_nontab_control_character(*cp))
1329 "unescaped control characters other than TAB (U+0009) are explicitly prohibited"sv);
1330
1331#if TOML_LANG_AT_LEAST(1, 0, 0)
1332
1333 // handle surrogates in strings
1334 if TOML_UNLIKELY (is_unicode_surrogate(*cp))
1336 "unescaped unicode surrogates (U+D800 to U+DFFF) are explicitly prohibited"sv);
1337#endif
1338
1339 if (multi_line) {
1340 if (!skipping_whitespace || !is_horizontal_whitespace(*cp)) {
1341 skipping_whitespace = false;
1342 str.append(cp->bytes, cp->count);
1343 }
1344 } else
1345 str.append(cp->bytes, cp->count);
1346
1348 }
1349 } while (!is_eof());
1350
1351 set_error_and_return_default("encountered end-of-file"sv);
1352 }
1353
1356 std::string_view parse_literal_string(bool multi_line) {
1357 return_if_error({});
1359 TOML_ASSERT_ASSUME(*cp == U'\'');
1360 push_parse_scope("literal string"sv);
1361
1362 // skip the delimiter
1364
1365 // multi-line strings ignore a single line ending right at the beginning
1366 if (multi_line) {
1368 return_if_error({});
1370 }
1371
1372 auto& str = string_buffer;
1373 str.clear();
1374 do {
1375 return_if_error({});
1376
1377 // handle closing delimiters
1378 if (*cp == U'\'') {
1379 if (multi_line) {
1380 size_t lookaheads = {};
1381 size_t consecutive_delimiters = 1;
1382 do {
1384 lookaheads++;
1385 if (!is_eof() && *cp == U'\'')
1386 consecutive_delimiters++;
1387 else
1388 break;
1389 } while (lookaheads < 4u);
1390
1391 switch (consecutive_delimiters) {
1392 // ''' ' (one quote somewhere in a ML string)
1393 case 1:
1394 str += '\'';
1395 continue;
1396
1397 // ''' '' (two quotes somewhere in a ML string)
1398 case 2:
1399 str.append("''"sv);
1400 continue;
1401
1402 // ''' ''' (the end of the string)
1403 case 3:
1404 return str;
1405
1406 // ''' '''' (one at the end of the string)
1407 case 4:
1408 str += '\'';
1409 return str;
1410
1411 // ''' ''''' (two quotes at the end of the string)
1412 case 5:
1413 str.append("''"sv);
1414 advance_and_return_if_error({}); // skip the last '
1415 return str;
1416
1417 default:
1419 }
1420 } else {
1421 advance_and_return_if_error({}); // skip the closing delimiter
1422 return str;
1423 }
1424 }
1425
1426 // handle line endings in multi-line mode
1427 if (multi_line && is_ascii_vertical_whitespace(*cp)) {
1429 return_if_error({});
1430 str += '\n';
1431 continue;
1432 }
1433
1434 // handle control characters
1435 if TOML_UNLIKELY (is_nontab_control_character(*cp))
1437 "control characters other than TAB (U+0009) are explicitly prohibited"sv);
1438
1439#if TOML_LANG_AT_LEAST(1, 0, 0)
1440
1441 // handle surrogates in strings
1442 if TOML_UNLIKELY (is_unicode_surrogate(*cp))
1444 "unicode surrogates (U+D800 - U+DFFF) are explicitly prohibited"sv);
1445#endif
1446
1447 str.append(cp->bytes, cp->count);
1449 } while (!is_eof());
1450
1451 set_error_and_return_default("encountered end-of-file"sv);
1452 }
1453
1457 return_if_error({});
1459 TOML_ASSERT_ASSUME(is_string_delimiter(*cp));
1460 push_parse_scope("string"sv);
1461
1462 // get the first three characters to determine the string type
1463 const auto first = cp->value;
1465 const auto second = cp->value;
1467 const auto third = cp ? cp->value : U'\0';
1468
1469 // if we were eof at the third character then first and second need to be
1470 // the same string character (otherwise it's an unterminated string)
1471 if (is_eof()) {
1472 if (second == first) return {};
1473
1474 set_error_and_return_default("encountered end-of-file"sv);
1475 }
1476
1477 // if the first three characters are all the same string delimiter then
1478 // it's a multi-line string.
1479 else if (first == second && first == third) {
1480 return {first == U'\'' ? parse_literal_string(true) : parse_basic_string(true), true};
1481 }
1482
1483 // otherwise it's just a regular string.
1484 else {
1485 // step back two characters so that the current
1486 // character is the string delimiter
1487 go_back(2u);
1488
1489 return {first == U'\'' ? parse_literal_string(false) : parse_basic_string(false), false};
1490 }
1491 }
1492
1495 std::string_view parse_bare_key_segment() {
1496 return_if_error({});
1498 TOML_ASSERT_ASSUME(is_bare_key_character(*cp));
1499
1500 string_buffer.clear();
1501
1502 while (!is_eof()) {
1503 if (!is_bare_key_character(*cp)) break;
1504
1505 string_buffer.append(cp->bytes, cp->count);
1507 }
1508
1509 return string_buffer;
1510 }
1511
1515 return_if_error({});
1517 TOML_ASSERT_ASSUME(is_match(*cp, U't', U'f', U'T', U'F'));
1518 push_parse_scope("boolean"sv);
1519
1520 start_recording(true);
1521 auto result = is_match(*cp, U't', U'T');
1522 if (!consume_expected_sequence(result ? U"true"sv : U"false"sv))
1523 set_error_and_return_default("expected '"sv, to_sv(result), "', saw '"sv,
1524 to_sv(recording_buffer), "'"sv);
1526
1527 if (cp && !is_value_terminator(*cp))
1528 set_error_and_return_default("expected value-terminator, saw '"sv, to_sv(*cp), "'"sv);
1529
1530 return result;
1531 }
1532
1536 return_if_error({});
1538 TOML_ASSERT_ASSUME(is_match(*cp, U'i', U'n', U'I', U'N', U'+', U'-'));
1539 push_parse_scope("floating-point"sv);
1540
1541 start_recording(true);
1542 const bool negative = *cp == U'-';
1543 if (negative || *cp == U'+') advance_and_return_if_error_or_eof({});
1544
1545 const bool inf = is_match(*cp, U'i', U'I');
1546 if (!consume_expected_sequence(inf ? U"inf"sv : U"nan"sv))
1547 set_error_and_return_default("expected '"sv, inf ? "inf"sv : "nan"sv, "', saw '"sv,
1548 to_sv(recording_buffer), "'"sv);
1550
1551 if (cp && !is_value_terminator(*cp))
1552 set_error_and_return_default("expected value-terminator, saw '"sv, to_sv(*cp), "'"sv);
1553
1554 return inf ? (negative ? -std::numeric_limits<double>::infinity()
1555 : std::numeric_limits<double>::infinity())
1556 : std::numeric_limits<double>::quiet_NaN();
1557 }
1558
1561 double parse_float() {
1562 return_if_error({});
1564 TOML_ASSERT_ASSUME(is_match(*cp, U'+', U'-', U'.') || is_decimal_digit(*cp));
1565 push_parse_scope("floating-point"sv);
1566
1567 // sign
1568 const int sign = *cp == U'-' ? -1 : 1;
1569 if (is_match(*cp, U'+', U'-')) advance_and_return_if_error_or_eof({});
1570
1571 // consume value chars
1573 size_t length = {};
1574 const utf8_codepoint* prev = {};
1575 bool seen_decimal = false, seen_exponent = false;
1576 char first_integer_part = '\0';
1577 while (!is_eof() && !is_value_terminator(*cp)) {
1578 if (*cp == U'_') {
1579 if (!prev || !is_decimal_digit(*prev))
1580 set_error_and_return_default("underscores may only follow digits"sv);
1581
1582 prev = cp;
1584 continue;
1585 } else if TOML_UNLIKELY (prev && *prev == U'_' && !is_decimal_digit(*cp))
1586 set_error_and_return_default("underscores must be followed by digits"sv);
1587 else if TOML_UNLIKELY (length == sizeof(chars))
1589 "exceeds length limit of "sv, sizeof(chars), " digits"sv,
1590 (seen_exponent ? ""sv : " (consider using exponent notation)"sv));
1591 else if (*cp == U'.') {
1592 // .1
1593 // -.1
1594 // +.1 (no integer part)
1595 if (!first_integer_part)
1596 set_error_and_return_default("expected decimal digit, saw '.'"sv);
1597
1598 // 1.0e+.10 (exponent cannot have '.')
1599 else if (seen_exponent)
1600 set_error_and_return_default("expected exponent decimal digit or sign, saw '.'"sv);
1601
1602 // 1.0.e+.10
1603 // 1..0
1604 // (multiple '.')
1605 else if (seen_decimal)
1606 set_error_and_return_default("expected decimal digit or exponent, saw '.'"sv);
1607
1608 seen_decimal = true;
1609 } else if (is_match(*cp, U'e', U'E')) {
1610 if (prev && !is_decimal_digit(*prev))
1611 set_error_and_return_default("expected decimal digit, saw '"sv, to_sv(*cp), "'"sv);
1612
1613 // 1.0ee+10 (multiple 'e')
1614 else if (seen_exponent)
1615 set_error_and_return_default("expected decimal digit, saw '"sv, to_sv(*cp), "'"sv);
1616
1617 seen_decimal = true; // implied
1618 seen_exponent = true;
1619 } else if (is_match(*cp, U'+', U'-')) {
1620 // 1.-0 (sign in mantissa)
1621 if (!seen_exponent)
1622 set_error_and_return_default("expected decimal digit or '.', saw '"sv, to_sv(*cp),
1623 "'"sv);
1624
1625 // 1.0e1-0 (misplaced exponent sign)
1626 else if (!is_match(*prev, U'e', U'E'))
1627 set_error_and_return_default("expected exponent digit, saw '"sv, to_sv(*cp), "'"sv);
1628 } else if (is_decimal_digit(*cp)) {
1629 if (!seen_decimal) {
1630 if (!first_integer_part)
1631 first_integer_part = static_cast<char>(cp->bytes[0]);
1632 else if (first_integer_part == '0')
1633 set_error_and_return_default("leading zeroes are prohibited"sv);
1634 }
1635 } else
1636 set_error_and_return_default("expected decimal digit, saw '"sv, to_sv(*cp), "'"sv);
1637
1638 chars[length++] = static_cast<char>(cp->bytes[0]);
1639 prev = cp;
1641 }
1642
1643 // sanity-check ending state
1644 if (prev) {
1645 if (*prev == U'_') {
1647 set_error_and_return_default("underscores must be followed by digits"sv);
1648 } else if (is_match(*prev, U'e', U'E', U'+', U'-', U'.')) {
1650 set_error_and_return_default("expected decimal digit, saw '"sv, to_sv(*cp), "'"sv);
1651 }
1652 }
1653
1654 // convert to double
1655 double result;
1656#if TOML_FLOAT_CHARCONV
1657 {
1658 auto fc_result = std::from_chars(chars, chars + length, result);
1659 switch (fc_result.ec) {
1661 case std::errc{}: // ok
1662 return result * sign;
1663
1664 case std::errc::invalid_argument:
1665 set_error_and_return_default("'"sv, std::string_view{chars, length},
1666 "' could not be interpreted as a value"sv);
1667 break;
1668
1669 case std::errc::result_out_of_range:
1670 set_error_and_return_default("'"sv, std::string_view{chars, length},
1671 "' is not representable in 64 bits"sv);
1672 break;
1673
1674 default: //??
1676 "an unspecified error occurred while trying to interpret '"sv,
1677 std::string_view{chars, length}, "' as a value"sv);
1678 }
1679 }
1680#else
1681 {
1682 std::stringstream ss;
1683 ss.imbue(std::locale::classic());
1684 ss.write(chars, static_cast<std::streamsize>(length));
1685 if ((ss >> result))
1686 return result * sign;
1687 else
1688 set_error_and_return_default("'"sv, std::string_view{chars, length},
1689 "' could not be interpreted as a value"sv);
1690 }
1691#endif
1692 }
1693
1697 return_if_error({});
1699 TOML_ASSERT_ASSUME(is_match(*cp, U'0', U'+', U'-'));
1700 push_parse_scope("hexadecimal floating-point"sv);
1701
1702#if TOML_LANG_UNRELEASED // toml/issues/562 (hexfloats)
1703
1704 // sign
1705 const int sign = *cp == U'-' ? -1 : 1;
1706 if (is_match(*cp, U'+', U'-')) advance_and_return_if_error_or_eof({});
1707
1708 // '0'
1709 if (*cp != U'0') set_error_and_return_default(" expected '0', saw '"sv, to_sv(*cp), "'"sv);
1711
1712 // 'x' or 'X'
1713 if (!is_match(*cp, U'x', U'X'))
1714 set_error_and_return_default("expected 'x' or 'X', saw '"sv, to_sv(*cp), "'"sv);
1716
1717 // <HEX DIGITS> ([.]<HEX DIGITS>)? [pP] [+-]? <DEC DIGITS>
1718
1719 // consume value fragments
1720 struct fragment {
1721 char chars[24];
1722 size_t length;
1723 double value;
1724 };
1725 fragment fragments[] = {
1726 {}, // mantissa, whole part
1727 {}, // mantissa, fractional part
1728 {} // exponent
1729 };
1730 fragment* current_fragment = fragments;
1731 const utf8_codepoint* prev = {};
1732 int exponent_sign = 1;
1733 while (!is_eof() && !is_value_terminator(*cp)) {
1734 if (*cp == U'_') {
1735 if (!prev || !is_hexadecimal_digit(*prev))
1736 set_error_and_return_default("underscores may only follow digits"sv);
1737
1738 prev = cp;
1740 continue;
1741 } else if (prev && *prev == U'_' && !is_hexadecimal_digit(*cp))
1742 set_error_and_return_default("underscores must be followed by digits"sv);
1743 else if (*cp == U'.') {
1744 // 0x10.0p-.0 (exponent cannot have '.')
1745 if (current_fragment == fragments + 2)
1746 set_error_and_return_default("expected exponent digit or sign, saw '.'"sv);
1747
1748 // 0x10.0.p-0 (multiple '.')
1749 else if (current_fragment == fragments + 1)
1750 set_error_and_return_default("expected hexadecimal digit or exponent, saw '.'"sv);
1751
1752 else
1753 current_fragment++;
1754 } else if (is_match(*cp, U'p', U'P')) {
1755 // 0x10.0pp-0 (multiple 'p')
1756 if (current_fragment == fragments + 2)
1757 set_error_and_return_default("expected exponent digit or sign, saw '"sv, to_sv(*cp),
1758 "'"sv);
1759
1760 // 0x.p-0 (mantissa is just '.')
1761 else if (fragments[0].length == 0u && fragments[1].length == 0u)
1762 set_error_and_return_default("expected hexadecimal digit, saw '"sv, to_sv(*cp), "'"sv);
1763
1764 else
1765 current_fragment = fragments + 2;
1766 } else if (is_match(*cp, U'+', U'-')) {
1767 // 0x-10.0p-0 (sign in mantissa)
1768 if (current_fragment != fragments + 2)
1769 set_error_and_return_default("expected hexadecimal digit or '.', saw '"sv, to_sv(*cp),
1770 "'"sv);
1771
1772 // 0x10.0p0- (misplaced exponent sign)
1773 else if (!is_match(*prev, U'p', U'P'))
1774 set_error_and_return_default("expected exponent digit, saw '"sv, to_sv(*cp), "'"sv);
1775
1776 else
1777 exponent_sign = *cp == U'-' ? -1 : 1;
1778 } else if (current_fragment < fragments + 2 && !is_hexadecimal_digit(*cp))
1779 set_error_and_return_default("expected hexadecimal digit or '.', saw '"sv, to_sv(*cp),
1780 "'"sv);
1781 else if (current_fragment == fragments + 2 && !is_decimal_digit(*cp))
1782 set_error_and_return_default("expected exponent digit or sign, saw '"sv, to_sv(*cp),
1783 "'"sv);
1784 else if (current_fragment->length == sizeof(fragment::chars))
1785 set_error_and_return_default("fragment exceeeds maximum length of "sv,
1786 sizeof(fragment::chars), " characters"sv);
1787 else
1788 current_fragment->chars[current_fragment->length++] = static_cast<char>(cp->bytes[0]);
1789
1790 prev = cp;
1792 }
1793
1794 // sanity-check ending state
1795 if (current_fragment != fragments + 2 || current_fragment->length == 0u) {
1797 set_error_and_return_default("missing exponent"sv);
1798 } else if (prev && *prev == U'_') {
1800 set_error_and_return_default("underscores must be followed by digits"sv);
1801 }
1802
1803 // calculate values for the three fragments
1804 for (int fragment_idx = 0; fragment_idx < 3; fragment_idx++) {
1805 auto& f = fragments[fragment_idx];
1806 const uint32_t base = fragment_idx == 2 ? 10u : 16u;
1807
1808 // left-trim zeroes
1809 const char* c = f.chars;
1810 size_t sig = {};
1811 while (f.length && *c == '0') {
1812 f.length--;
1813 c++;
1814 sig++;
1815 }
1816 if (!f.length) continue;
1817
1818 // calculate value
1819 auto place = 1u;
1820 for (size_t i = 0; i < f.length - 1u; i++) place *= base;
1821 uint32_t val{};
1822 while (place) {
1823 if (base == 16)
1824 val += place * hex_to_dec(*c);
1825 else
1826 val += place * static_cast<uint32_t>(*c - '0');
1827 if (fragment_idx == 1) sig++;
1828 c++;
1829 place /= base;
1830 }
1831 f.value = static_cast<double>(val);
1832
1833 // shift the fractional part
1834 if (fragment_idx == 1) {
1835 while (sig--) f.value /= base;
1836 }
1837 }
1838
1839 return (fragments[0].value + fragments[1].value) *
1840 pow(2.0, fragments[2].value * exponent_sign) * sign;
1841
1842#else // !TOML_LANG_UNRELEASED
1843
1845 "hexadecimal floating-point values are not supported "
1846 "in TOML 1.0.0 and earlier"sv);
1847
1848#endif // !TOML_LANG_UNRELEASED
1849 }
1850
1851 template <uint64_t base>
1853 return_if_error({});
1855 using traits = parse_integer_traits<base>;
1856 push_parse_scope(traits::scope_qualifier);
1857
1858 [[maybe_unused]] int64_t sign = 1;
1859 if constexpr (traits::is_signed) {
1860 sign = *cp == U'-' ? -1 : 1;
1861 if (is_match(*cp, U'+', U'-')) advance_and_return_if_error_or_eof({});
1862 }
1863
1864 if constexpr (base == 10) {
1865 if (!traits::is_digit(*cp))
1866 set_error_and_return_default("expected expected digit or sign, saw '"sv, to_sv(*cp),
1867 "'"sv);
1868 } else {
1869 // '0'
1870 if (*cp != U'0') set_error_and_return_default("expected '0', saw '"sv, to_sv(*cp), "'"sv);
1872
1873 // 'b', 'o', 'x'
1874 if (*cp != traits::prefix_codepoint)
1875 set_error_and_return_default("expected '"sv, traits::prefix, "', saw '"sv, to_sv(*cp),
1876 "'"sv);
1878
1879 if (!traits::is_digit(*cp))
1880 set_error_and_return_default("expected digit, saw '"sv, to_sv(*cp), "'"sv);
1881 }
1882
1883 // consume digits
1885 size_t length = {};
1886 const utf8_codepoint* prev = {};
1887 while (!is_eof() && !is_value_terminator(*cp)) {
1888 if (*cp == U'_') {
1889 if (!prev || !traits::is_digit(*prev))
1890 set_error_and_return_default("underscores may only follow digits"sv);
1891
1892 prev = cp;
1894 continue;
1895 } else if TOML_UNLIKELY (prev && *prev == U'_' && !traits::is_digit(*cp))
1896 set_error_and_return_default("underscores must be followed by digits"sv);
1897 else if TOML_UNLIKELY (!traits::is_digit(*cp))
1898 set_error_and_return_default("expected digit, saw '"sv, to_sv(*cp), "'"sv);
1899 else if TOML_UNLIKELY (length == sizeof(digits))
1900 set_error_and_return_default("exceeds length limit of "sv, sizeof(digits), " digits"sv);
1901 else
1902 digits[length++] = static_cast<char>(cp->bytes[0]);
1903
1904 prev = cp;
1906 }
1907
1908 // sanity check ending state
1909 if (prev && *prev == U'_') {
1911 set_error_and_return_default("underscores must be followed by digits"sv);
1912 }
1913
1914 // single digits can be converted trivially
1915 if (length == 1u) {
1916 int64_t result;
1917
1918 if constexpr (base == 16)
1919 result = static_cast<int64_t>(hex_to_dec(digits[0]));
1920 else
1921 result = static_cast<int64_t>(digits[0] - '0');
1922
1923 if constexpr (traits::is_signed) result *= sign;
1924
1925 return result;
1926 }
1927
1928 // bin, oct and hex allow leading zeroes so trim them first
1929 const char* end = digits + length;
1930 const char* msd = digits;
1931 if constexpr (base != 10) {
1932 while (msd < end && *msd == '0') msd++;
1933 if (msd == end) return 0ll;
1934 }
1935
1936 // decimal integers do not allow leading zeroes
1937 else {
1938 if TOML_UNLIKELY (digits[0] == '0')
1939 set_error_and_return_default("leading zeroes are prohibited"sv);
1940 }
1941
1942 // range check
1943 if TOML_UNLIKELY (static_cast<size_t>(end - msd) > traits::max_digits)
1944 set_error_and_return_default("'"sv, traits::full_prefix, std::string_view{digits, length},
1945 "' is not representable in 64 bits"sv);
1946
1947 // do the thing
1948 {
1949 uint64_t result = {};
1950 {
1951 uint64_t power = 1;
1952 while (--end >= msd) {
1953 if constexpr (base == 16)
1954 result += power * hex_to_dec(*end);
1955 else
1956 result += power * static_cast<uint64_t>(*end - '0');
1957
1958 power *= base;
1959 }
1960 }
1961
1962 // range check
1963 static constexpr auto i64_max =
1964 static_cast<uint64_t>((std::numeric_limits<int64_t>::max)());
1965 if TOML_UNLIKELY (result > i64_max + (sign < 0 ? 1u : 0u))
1966 set_error_and_return_default("'"sv, traits::full_prefix, std::string_view{digits, length},
1967 "' is not representable in 64 bits"sv);
1968
1969 if constexpr (traits::is_signed) {
1970 // avoid signed multiply UB when parsing INT64_MIN
1971 if TOML_UNLIKELY (sign < 0 && result == i64_max + 1u)
1972 return (std::numeric_limits<int64_t>::min)();
1973
1974 return static_cast<int64_t>(result) * sign;
1975 } else
1976 return static_cast<int64_t>(result);
1977 }
1978 }
1979
1982 date parse_date(bool part_of_datetime = false) {
1983 return_if_error({});
1985 TOML_ASSERT_ASSUME(is_decimal_digit(*cp));
1986 push_parse_scope("date"sv);
1987
1988 // "YYYY"
1989 uint32_t digits[4];
1990 if (!consume_digit_sequence(digits, 4u))
1991 set_error_and_return_default("expected 4-digit year, saw '"sv, to_sv(cp), "'"sv);
1992 const auto year = digits[3] + digits[2] * 10u + digits[1] * 100u + digits[0] * 1000u;
1993 const auto is_leap_year = (year % 4u == 0u) && ((year % 100u != 0u) || (year % 400u == 0u));
1995
1996 // '-'
1997 if (*cp != U'-') set_error_and_return_default("expected '-', saw '"sv, to_sv(*cp), "'"sv);
1999
2000 // "MM"
2001 if (!consume_digit_sequence(digits, 2u))
2002 set_error_and_return_default("expected 2-digit month, saw '"sv, to_sv(cp), "'"sv);
2003 const auto month = digits[1] + digits[0] * 10u;
2004 if (month == 0u || month > 12u)
2005 set_error_and_return_default("expected month between 1 and 12 (inclusive), saw "sv, month);
2006 const auto max_days_in_month =
2007 month == 2u ? (is_leap_year ? 29u : 28u)
2008 : (month == 4u || month == 6u || month == 9u || month == 11u ? 30u : 31u);
2010
2011 // '-'
2012 if (*cp != U'-') set_error_and_return_default("expected '-', saw '"sv, to_sv(*cp), "'"sv);
2014
2015 // "DD"
2016 if (!consume_digit_sequence(digits, 2u))
2017 set_error_and_return_default("expected 2-digit day, saw '"sv, to_sv(cp), "'"sv);
2018 const auto day = digits[1] + digits[0] * 10u;
2019 if (day == 0u || day > max_days_in_month)
2020 set_error_and_return_default("expected day between 1 and "sv, max_days_in_month,
2021 " (inclusive), saw "sv, day);
2022
2023 if (!part_of_datetime && !is_eof() && !is_value_terminator(*cp))
2024 set_error_and_return_default("expected value-terminator, saw '"sv, to_sv(*cp), "'"sv);
2025
2026 return {year, month, day};
2027 }
2028
2031 time parse_time(bool part_of_datetime = false) {
2032 return_if_error({});
2034 TOML_ASSERT_ASSUME(is_decimal_digit(*cp));
2035 push_parse_scope("time"sv);
2036
2037 static constexpr size_t max_digits = 64; // far more than necessary but needed to allow
2038 // fractional millisecond truncation per the spec
2039 uint32_t digits[max_digits];
2040
2041 // "HH"
2042 if (!consume_digit_sequence(digits, 2u))
2043 set_error_and_return_default("expected 2-digit hour, saw '"sv, to_sv(cp), "'"sv);
2044 const auto hour = digits[1] + digits[0] * 10u;
2045 if (hour > 23u)
2046 set_error_and_return_default("expected hour between 0 to 59 (inclusive), saw "sv, hour);
2048
2049 // ':'
2050 if (*cp != U':') set_error_and_return_default("expected ':', saw '"sv, to_sv(*cp), "'"sv);
2052
2053 // "MM"
2054 if (!consume_digit_sequence(digits, 2u))
2055 set_error_and_return_default("expected 2-digit minute, saw '"sv, to_sv(cp), "'"sv);
2056 const auto minute = digits[1] + digits[0] * 10u;
2057 if (minute > 59u)
2058 set_error_and_return_default("expected minute between 0 and 59 (inclusive), saw "sv,
2059 minute);
2060 auto time = toml::time{hour, minute};
2061
2062 // ':'
2063 if constexpr (TOML_LANG_UNRELEASED) // toml/issues/671 (allow omission of seconds)
2064 {
2065 if (is_eof() || is_value_terminator(*cp) ||
2066 (part_of_datetime && is_match(*cp, U'+', U'-', U'Z', U'z')))
2067 return time;
2068 } else
2070 if (*cp != U':') set_error_and_return_default("expected ':', saw '"sv, to_sv(*cp), "'"sv);
2072
2073 // "SS"
2074 if (!consume_digit_sequence(digits, 2u))
2075 set_error_and_return_default("expected 2-digit second, saw '"sv, to_sv(cp), "'"sv);
2076 const auto second = digits[1] + digits[0] * 10u;
2077 if (second > 59u)
2078 set_error_and_return_default("expected second between 0 and 59 (inclusive), saw "sv,
2079 second);
2080 time.second = static_cast<decltype(time.second)>(second);
2081
2082 // '.' (early-exiting is allowed; fractional is optional)
2083 if (is_eof() || is_value_terminator(*cp) ||
2084 (part_of_datetime && is_match(*cp, U'+', U'-', U'Z', U'z')))
2085 return time;
2086 if (*cp != U'.') set_error_and_return_default("expected '.', saw '"sv, to_sv(*cp), "'"sv);
2088
2089 // "FFFFFFFFF"
2090 size_t digit_count = consume_variable_length_digit_sequence(digits, max_digits);
2091 if (!digit_count) {
2093 set_error_and_return_default("expected fractional digits, saw '"sv, to_sv(*cp), "'"sv);
2094 } else if (!is_eof()) {
2095 if (digit_count == max_digits && is_decimal_digit(*cp))
2096 set_error_and_return_default("fractional component exceeds maximum precision of "sv,
2097 max_digits);
2098 else if (!part_of_datetime && !is_value_terminator(*cp))
2099 set_error_and_return_default("expected value-terminator, saw '"sv, to_sv(*cp), "'"sv);
2100 }
2101 uint32_t value = 0u;
2102 uint32_t place = 1u;
2103 for (auto i = impl::min<size_t>(digit_count, 9u); i-- > 0u;) {
2104 value += digits[i] * place;
2105 place *= 10u;
2106 }
2107 for (auto i = digit_count; i < 9u; i++) // implicit zeros
2108 value *= 10u;
2109 time.nanosecond = value;
2110 return time;
2111 }
2112
2116 return_if_error({});
2118 TOML_ASSERT_ASSUME(is_decimal_digit(*cp));
2119 push_parse_scope("date-time"sv);
2120
2121 // "YYYY-MM-DD"
2122 auto date = parse_date(true);
2124
2125 // ' ', 'T' or 't'
2126 if (!is_match(*cp, U' ', U'T', U't'))
2127 set_error_and_return_default("expected space, 'T' or 't', saw '"sv, to_sv(*cp), "'"sv);
2129
2130 // "HH:MM:SS.FFFFFFFFF"
2131 auto time = parse_time(true);
2132 return_if_error({});
2133
2134 // no offset
2135 if (is_eof() || is_value_terminator(*cp)) return {date, time};
2136
2137 // zero offset ('Z' or 'z')
2138 time_offset offset{};
2139 if (is_match(*cp, U'Z', U'z')) advance_and_return_if_error({});
2140
2141 // explicit offset ("+/-HH:MM")
2142 else if (is_match(*cp, U'+', U'-')) {
2143 push_parse_scope("date-time offset"sv);
2144
2145 // sign
2146 int sign = *cp == U'-' ? -1 : 1;
2148
2149 // "HH"
2150 int digits[2];
2151 if (!consume_digit_sequence(digits, 2u))
2152 set_error_and_return_default("expected 2-digit hour, saw '"sv, to_sv(cp), "'"sv);
2153 const auto hour = digits[1] + digits[0] * 10;
2154 if (hour > 23)
2155 set_error_and_return_default("expected hour between 0 and 23 (inclusive), saw "sv, hour);
2157
2158 // ':'
2159 if (*cp != U':') set_error_and_return_default("expected ':', saw '"sv, to_sv(*cp), "'"sv);
2161
2162 // "MM"
2163 if (!consume_digit_sequence(digits, 2u))
2164 set_error_and_return_default("expected 2-digit minute, saw '"sv, to_sv(cp), "'"sv);
2165 const auto minute = digits[1] + digits[0] * 10;
2166 if (minute > 59)
2167 set_error_and_return_default("expected minute between 0 and 59 (inclusive), saw "sv,
2168 minute);
2169 offset.minutes = static_cast<decltype(offset.minutes)>((hour * 60 + minute) * sign);
2170 }
2171
2172 if (!is_eof() && !is_value_terminator(*cp))
2173 set_error_and_return_default("expected value-terminator, saw '"sv, to_sv(*cp), "'"sv);
2174
2175 return {date, time, offset};
2176 }
2177
2179 node_ptr parse_array();
2180
2182 node_ptr parse_inline_table();
2183
2186 return_if_error({});
2188 TOML_ASSERT_ASSUME(!is_control_character(*cp));
2189 TOML_ASSERT_ASSUME(*cp != U'_');
2190
2191 switch (cp->value) {
2192 // arrays
2193 case U'[':
2194 return parse_array();
2195
2196 // inline tables
2197 case U'{':
2198 return parse_inline_table();
2199
2200 // floats beginning with '.'
2201 case U'.':
2202 return node_ptr{new value{parse_float()}};
2203
2204 // strings
2205 case U'"':
2206 [[fallthrough]];
2207 case U'\'':
2208 return node_ptr{new value{parse_string().value}};
2209
2210 default: {
2211 const auto cp_upper = static_cast<uint_least32_t>(cp->value) & ~0x20u;
2212
2213 // bools
2214 if (cp_upper == 70u || cp_upper == 84u) // F or T
2215 return node_ptr{new value{parse_boolean()}};
2216
2217 // inf/nan
2218 else if (cp_upper == 73u || cp_upper == 78u) // I or N
2219 return node_ptr{new value{parse_inf_or_nan()}};
2220
2221 else
2222 return nullptr;
2223 }
2224 }
2226 }
2227
2229 node_ptr parse_value() {
2230 return_if_error({});
2232 TOML_ASSERT_ASSUME(!is_value_terminator(*cp));
2233 push_parse_scope("value"sv);
2234
2235 const depth_counter_scope depth_counter{nested_values};
2237 set_error_and_return_default("exceeded maximum nested value depth of "sv, max_nested_values,
2238 " (TOML_MAX_NESTED_VALUES)"sv);
2239
2240 // check if it begins with some control character
2241 // (note that this will also fail for whitespace but we're assuming we've
2242 // called consume_leading_whitespace() before calling parse_value())
2243 if TOML_UNLIKELY (is_control_character(*cp))
2244 set_error_and_return_default("unexpected control character"sv);
2245
2246 // underscores at the beginning
2247 else if (*cp == U'_')
2248 set_error_and_return_default("values may not begin with underscores"sv);
2249
2250 const auto begin_pos = cp->position;
2251 node_ptr val;
2252
2253 do {
2254 TOML_ASSERT_ASSUME(!is_control_character(*cp));
2255 TOML_ASSERT_ASSUME(*cp != U'_');
2256
2257 // detect the value type and parse accordingly,
2258 // starting with value types that can be detected
2259 // unambiguously from just one character.
2260
2262 return_if_error({});
2263 if (val) break;
2264
2265 // value types from here down require more than one character to unambiguously identify
2266 // so scan ahead and collect a set of value 'traits'.
2267 enum TOML_CLOSED_FLAGS_ENUM value_traits : int {
2268 has_nothing = 0,
2269 has_digits = 1,
2270 has_b = 1 << 1, // as second char only (0b)
2271 has_e = 1 << 2, // only float exponents
2272 has_o = 1 << 3, // as second char only (0o)
2273 has_p = 1 << 4, // only hexfloat exponents
2274 has_t = 1 << 5,
2275 has_x = 1 << 6, // as second or third char only (0x, -0x, +0x)
2276 has_z = 1 << 7,
2277 has_colon = 1 << 8,
2278 has_plus = 1 << 9,
2279 has_minus = 1 << 10,
2280 has_dot = 1 << 11,
2281 begins_sign = 1 << 12,
2282 begins_digit = 1 << 13,
2283 begins_zero = 1 << 14,
2284
2285 signs_msk = has_plus | has_minus,
2286 bdigit_msk = has_digits | begins_digit,
2287 bzero_msk = bdigit_msk | begins_zero,
2288 };
2289 value_traits traits = has_nothing;
2290 const auto has_any = [&](auto t) noexcept { return (traits & t) != has_nothing; };
2291 const auto has_none = [&](auto t) noexcept { return (traits & t) == has_nothing; };
2292 const auto add_trait = [&](auto t) noexcept {
2293 traits = static_cast<value_traits>(traits | t);
2294 };
2295
2296 // examine the first character to get the 'begins with' traits
2297 // (good fail-fast opportunity; all the remaining types begin with numeric digits or signs)
2298 if (is_decimal_digit(*cp)) {
2299 add_trait(begins_digit);
2300 if (*cp == U'0') add_trait(begins_zero);
2301 } else if (is_match(*cp, U'+', U'-'))
2302 add_trait(begins_sign);
2303 else
2304 break;
2305
2306 // scan the rest of the value to determine the remaining traits
2308 size_t char_count = {}, advance_count = {};
2309 bool eof_while_scanning = false;
2310 const auto scan = [&]() noexcept(!TOML_COMPILER_HAS_EXCEPTIONS) {
2311 if (is_eof()) return;
2312 TOML_ASSERT_ASSUME(!is_value_terminator(*cp));
2313
2314 do {
2315 if (const auto c = **cp; c != U'_') {
2316 chars[char_count++] = c;
2317
2318 if (is_decimal_digit(c))
2319 add_trait(has_digits);
2320 else if (is_ascii_letter(c)) {
2321 TOML_ASSERT_ASSUME((c >= U'a' && c <= U'z') || (c >= U'A' && c <= U'Z'));
2322 switch (static_cast<char32_t>(c | 32u)) {
2323 case U'b':
2324 if (char_count == 2u && has_any(begins_zero)) add_trait(has_b);
2325 break;
2326
2327 case U'e':
2328 if (char_count > 1u &&
2329 has_none(has_b | has_o | has_p | has_t | has_x | has_z | has_colon) &&
2330 (has_none(has_plus | has_minus) || has_any(begins_sign)))
2331 add_trait(has_e);
2332 break;
2333
2334 case U'o':
2335 if (char_count == 2u && has_any(begins_zero)) add_trait(has_o);
2336 break;
2337
2338 case U'p':
2339 if (has_any(has_x)) add_trait(has_p);
2340 break;
2341
2342 case U'x':
2343 if ((char_count == 2u && has_any(begins_zero)) ||
2344 (char_count == 3u && has_any(begins_sign) && chars[1] == U'0'))
2345 add_trait(has_x);
2346 break;
2347
2348 case U't':
2349 add_trait(has_t);
2350 break;
2351 case U'z':
2352 add_trait(has_z);
2353 break;
2354 }
2355 } else if (c <= U':') {
2356 TOML_ASSERT_ASSUME(c < U'0' || c > U'9');
2357 switch (c) {
2358 case U'+':
2359 add_trait(has_plus);
2360 break;
2361 case U'-':
2362 add_trait(has_minus);
2363 break;
2364 case U'.':
2365 add_trait(has_dot);
2366 break;
2367 case U':':
2368 add_trait(has_colon);
2369 break;
2370 }
2371 }
2372 }
2373
2375 advance_count++;
2376 eof_while_scanning = is_eof();
2377 } while (advance_count < (utf8_buffered_reader::max_history_length - 1u) && !is_eof() &&
2378 !is_value_terminator(*cp));
2379 };
2380 scan();
2381 return_if_error({});
2382
2383 // force further scanning if this could have been a date-time with a space instead of a T
2384 if (char_count == 10u //
2385 && (traits | begins_zero) == (bzero_msk | has_minus) //
2386 && chars[4] == U'-' //
2387 && chars[7] == U'-' //
2388 && !is_eof() //
2389 && *cp == U' ') {
2390 const auto pre_advance_count = advance_count;
2391 const auto pre_scan_traits = traits;
2392 chars[char_count++] = *cp;
2393 add_trait(has_t);
2394
2395 const auto backpedal = [&]() noexcept {
2396 go_back(advance_count - pre_advance_count);
2397 advance_count = pre_advance_count;
2398 traits = pre_scan_traits;
2399 char_count = 10u;
2400 };
2401
2403 advance_count++;
2404
2405 if (is_eof() || !is_decimal_digit(*cp))
2406 backpedal();
2407 else {
2408 chars[char_count++] = *cp;
2409
2411 advance_count++;
2412
2413 scan();
2414 return_if_error({});
2415
2416 if (char_count == 12u) backpedal();
2417 }
2418 }
2419
2420 // set the reader back to where we started
2421 go_back(advance_count);
2422
2423 // if after scanning ahead we still only have one value character,
2424 // the only valid value type is an integer.
2425 if (char_count == 1u) {
2426 if (has_any(begins_digit)) {
2427 val.reset(new value{static_cast<int64_t>(chars[0] - U'0')});
2428 advance(); // skip the digit
2429 break;
2430 }
2431
2432 // anything else would be ambiguous.
2433 else
2434 set_error_and_return_default(eof_while_scanning ? "encountered end-of-file"sv
2435 : "could not determine value type"sv);
2436 }
2437
2438 // now things that can be identified from two or more characters
2439 return_if_error({});
2440 TOML_ASSERT_ASSUME(char_count >= 2u);
2441
2442 // do some 'fuzzy matching' where there's no ambiguity, since that allows the specific
2443 // typed parse functions to take over and show better diagnostics if there's an issue
2444 // (as opposed to the fallback "could not determine type" message)
2445 if (has_any(has_p))
2446 val.reset(new value{parse_hex_float()});
2447 else if (has_any(has_x | has_o | has_b)) {
2448 int64_t i;
2449 value_flags flags;
2450 if (has_any(has_x)) {
2451 i = parse_integer<16>();
2452 flags = value_flags::format_as_hexadecimal;
2453 } else if (has_any(has_o)) {
2454 i = parse_integer<8>();
2455 flags = value_flags::format_as_octal;
2456 } else // has_b
2457 {
2458 i = parse_integer<2>();
2459 flags = value_flags::format_as_binary;
2460 }
2461 return_if_error({});
2462
2463 val.reset(new value{i});
2464 val->ref_cast<int64_t>().flags(flags);
2465 } else if (has_any(has_e) || (has_any(begins_digit) && chars[1] == U'.'))
2466 val.reset(new value{parse_float()});
2467 else if (has_any(begins_sign)) {
2468 // single-digit signed integers
2469 if (char_count == 2u && has_any(has_digits)) {
2470 val.reset(
2471 new value{static_cast<int64_t>(chars[1] - U'0') * (chars[0] == U'-' ? -1LL : 1LL)});
2472 advance(); // skip the sign
2473 advance(); // skip the digit
2474 break;
2475 }
2476
2477 // simple signed floats (e.g. +1.0)
2478 if (is_decimal_digit(chars[1]) && chars[2] == U'.') val.reset(new value{parse_float()});
2479
2480 // signed infinity or nan
2481 else if (is_match(chars[1], U'i', U'n', U'I', U'N'))
2482 val.reset(new value{parse_inf_or_nan()});
2483 }
2484
2485 return_if_error({});
2486 if (val) break;
2487
2488 // match trait masks against what they can match exclusively.
2489 // all correct value parses will come out of this list, so doing this as a switch is likely
2490 // to be a better friend to the optimizer on the success path (failure path can be slow but
2491 // that doesn't matter much).
2492 switch (unwrap_enum(traits)) {
2493 // binary integers
2494 // 0b10
2495 case bzero_msk | has_b:
2496 val.reset(new value{parse_integer<2>()});
2497 val->ref_cast<int64_t>().flags(value_flags::format_as_binary);
2498 break;
2499
2500 // octal integers
2501 // 0o10
2502 case bzero_msk | has_o:
2503 val.reset(new value{parse_integer<8>()});
2504 val->ref_cast<int64_t>().flags(value_flags::format_as_octal);
2505 break;
2506
2507 // decimal integers
2508 // 00
2509 // 10
2510 // +10
2511 // -10
2512 case bzero_msk:
2513 [[fallthrough]];
2514 case bdigit_msk:
2515 [[fallthrough]];
2516 case begins_sign | has_digits | has_minus:
2517 [[fallthrough]];
2518 case begins_sign | has_digits | has_plus: {
2519 // if the value was so long we exhausted the history buffer it's reasonable to assume
2520 // there was more and the value's actual type is impossible to identify without making
2521 // the buffer bigger (since it could have actually been a float), so emit an error.
2522 //
2523 // (this will likely only come up during fuzzing and similar scenarios)
2524 static constexpr size_t max_numeric_value_length =
2526 if TOML_UNLIKELY (!eof_while_scanning && advance_count > max_numeric_value_length)
2528 "numeric value too long to identify type - cannot exceed "sv,
2529 max_numeric_value_length, " characters"sv);
2530
2531 val.reset(new value{parse_integer<10>()});
2532 break;
2533 }
2534
2535 // hexadecimal integers
2536 // 0x10
2537 case bzero_msk | has_x:
2538 val.reset(new value{parse_integer<16>()});
2539 val->ref_cast<int64_t>().flags(value_flags::format_as_hexadecimal);
2540 break;
2541
2542 // decimal floats
2543 // 0e1
2544 // 0e-1
2545 // 0e+1
2546 // 0.0
2547 // 0.0e1
2548 // 0.0e-1
2549 // 0.0e+1
2550 case bzero_msk | has_e:
2551 [[fallthrough]];
2552 case bzero_msk | has_e | has_minus:
2553 [[fallthrough]];
2554 case bzero_msk | has_e | has_plus:
2555 [[fallthrough]];
2556 case bzero_msk | has_dot:
2557 [[fallthrough]];
2558 case bzero_msk | has_dot | has_e:
2559 [[fallthrough]];
2560 case bzero_msk | has_dot | has_e | has_minus:
2561 [[fallthrough]];
2562 case bzero_msk | has_dot | has_e | has_plus:
2563 [[fallthrough]];
2564 // 1e1
2565 // 1e-1
2566 // 1e+1
2567 // 1.0
2568 // 1.0e1
2569 // 1.0e-1
2570 // 1.0e+1
2571 case bdigit_msk | has_e:
2572 [[fallthrough]];
2573 case bdigit_msk | has_e | has_minus:
2574 [[fallthrough]];
2575 case bdigit_msk | has_e | has_plus:
2576 [[fallthrough]];
2577 case bdigit_msk | has_dot:
2578 [[fallthrough]];
2579 case bdigit_msk | has_dot | has_e:
2580 [[fallthrough]];
2581 case bdigit_msk | has_dot | has_e | has_minus:
2582 [[fallthrough]];
2583 case bdigit_msk | has_dot | has_e | has_plus:
2584 [[fallthrough]];
2585 // +1e1
2586 // +1.0
2587 // +1.0e1
2588 // +1.0e+1
2589 // +1.0e-1
2590 // -1.0e+1
2591 case begins_sign | has_digits | has_e | has_plus:
2592 [[fallthrough]];
2593 case begins_sign | has_digits | has_dot | has_plus:
2594 [[fallthrough]];
2595 case begins_sign | has_digits | has_dot | has_e | has_plus:
2596 [[fallthrough]];
2597 case begins_sign | has_digits | has_dot | has_e | signs_msk:
2598 [[fallthrough]];
2599 // -1e1
2600 // -1e+1
2601 // +1e-1
2602 // -1.0
2603 // -1.0e1
2604 // -1.0e-1
2605 case begins_sign | has_digits | has_e | has_minus:
2606 [[fallthrough]];
2607 case begins_sign | has_digits | has_e | signs_msk:
2608 [[fallthrough]];
2609 case begins_sign | has_digits | has_dot | has_minus:
2610 [[fallthrough]];
2611 case begins_sign | has_digits | has_dot | has_e | has_minus:
2612 val.reset(new value{parse_float()});
2613 break;
2614
2615 // hexadecimal floats
2616 // 0x10p0
2617 // 0x10p-0
2618 // 0x10p+0
2619 case bzero_msk | has_x | has_p:
2620 [[fallthrough]];
2621 case bzero_msk | has_x | has_p | has_minus:
2622 [[fallthrough]];
2623 case bzero_msk | has_x | has_p | has_plus:
2624 [[fallthrough]];
2625 // -0x10p0
2626 // -0x10p-0
2627 // +0x10p0
2628 // +0x10p+0
2629 // -0x10p+0
2630 // +0x10p-0
2631 case begins_sign | has_digits | has_x | has_p | has_minus:
2632 [[fallthrough]];
2633 case begins_sign | has_digits | has_x | has_p | has_plus:
2634 [[fallthrough]];
2635 case begins_sign | has_digits | has_x | has_p | signs_msk:
2636 [[fallthrough]];
2637 // 0x10.1p0
2638 // 0x10.1p-0
2639 // 0x10.1p+0
2640 case bzero_msk | has_x | has_dot | has_p:
2641 [[fallthrough]];
2642 case bzero_msk | has_x | has_dot | has_p | has_minus:
2643 [[fallthrough]];
2644 case bzero_msk | has_x | has_dot | has_p | has_plus:
2645 [[fallthrough]];
2646 // -0x10.1p0
2647 // -0x10.1p-0
2648 // +0x10.1p0
2649 // +0x10.1p+0
2650 // -0x10.1p+0
2651 // +0x10.1p-0
2652 case begins_sign | has_digits | has_x | has_dot | has_p | has_minus:
2653 [[fallthrough]];
2654 case begins_sign | has_digits | has_x | has_dot | has_p | has_plus:
2655 [[fallthrough]];
2656 case begins_sign | has_digits | has_x | has_dot | has_p | signs_msk:
2657 val.reset(new value{parse_hex_float()});
2658 break;
2659
2660 // times
2661 // HH:MM
2662 // HH:MM:SS
2663 // HH:MM:SS.FFFFFF
2664 case bzero_msk | has_colon:
2665 [[fallthrough]];
2666 case bzero_msk | has_colon | has_dot:
2667 [[fallthrough]];
2668 case bdigit_msk | has_colon:
2669 [[fallthrough]];
2670 case bdigit_msk | has_colon | has_dot:
2671 val.reset(new value{parse_time()});
2672 break;
2673
2674 // local dates
2675 // YYYY-MM-DD
2676 case bzero_msk | has_minus:
2677 [[fallthrough]];
2678 case bdigit_msk | has_minus:
2679 val.reset(new value{parse_date()});
2680 break;
2681
2682 // date-times
2683 // YYYY-MM-DDTHH:MM
2684 // YYYY-MM-DDTHH:MM-HH:MM
2685 // YYYY-MM-DDTHH:MM+HH:MM
2686 // YYYY-MM-DD HH:MM
2687 // YYYY-MM-DD HH:MM-HH:MM
2688 // YYYY-MM-DD HH:MM+HH:MM
2689 // YYYY-MM-DDTHH:MM:SS
2690 // YYYY-MM-DDTHH:MM:SS-HH:MM
2691 // YYYY-MM-DDTHH:MM:SS+HH:MM
2692 // YYYY-MM-DD HH:MM:SS
2693 // YYYY-MM-DD HH:MM:SS-HH:MM
2694 // YYYY-MM-DD HH:MM:SS+HH:MM
2695 case bzero_msk | has_minus | has_colon | has_t:
2696 [[fallthrough]];
2697 case bzero_msk | signs_msk | has_colon | has_t:
2698 [[fallthrough]];
2699 case bdigit_msk | has_minus | has_colon | has_t:
2700 [[fallthrough]];
2701 case bdigit_msk | signs_msk | has_colon | has_t:
2702 [[fallthrough]];
2703 // YYYY-MM-DDTHH:MM:SS.FFFFFF
2704 // YYYY-MM-DDTHH:MM:SS.FFFFFF-HH:MM
2705 // YYYY-MM-DDTHH:MM:SS.FFFFFF+HH:MM
2706 // YYYY-MM-DD HH:MM:SS.FFFFFF
2707 // YYYY-MM-DD HH:MM:SS.FFFFFF-HH:MM
2708 // YYYY-MM-DD HH:MM:SS.FFFFFF+HH:MM
2709 case bzero_msk | has_minus | has_colon | has_dot | has_t:
2710 [[fallthrough]];
2711 case bzero_msk | signs_msk | has_colon | has_dot | has_t:
2712 [[fallthrough]];
2713 case bdigit_msk | has_minus | has_colon | has_dot | has_t:
2714 [[fallthrough]];
2715 case bdigit_msk | signs_msk | has_colon | has_dot | has_t:
2716 [[fallthrough]];
2717 // YYYY-MM-DDTHH:MMZ
2718 // YYYY-MM-DD HH:MMZ
2719 // YYYY-MM-DDTHH:MM:SSZ
2720 // YYYY-MM-DD HH:MM:SSZ
2721 // YYYY-MM-DDTHH:MM:SS.FFFFFFZ
2722 // YYYY-MM-DD HH:MM:SS.FFFFFFZ
2723 case bzero_msk | has_minus | has_colon | has_z | has_t:
2724 [[fallthrough]];
2725 case bzero_msk | has_minus | has_colon | has_dot | has_z | has_t:
2726 [[fallthrough]];
2727 case bdigit_msk | has_minus | has_colon | has_z | has_t:
2728 [[fallthrough]];
2729 case bdigit_msk | has_minus | has_colon | has_dot | has_z | has_t:
2730 val.reset(new value{parse_date_time()});
2731 break;
2732 }
2733 } while (false);
2734
2735 if (!val) {
2736 set_error_at(begin_pos, "could not determine value type"sv);
2738 }
2739
2740 val->source_ = {begin_pos, current_position(1), reader.source_path()};
2741 return val;
2742 }
2743
2745 bool parse_key() {
2746 return_if_error({});
2748 TOML_ASSERT_ASSUME(is_bare_key_character(*cp) || is_string_delimiter(*cp));
2749 push_parse_scope("key"sv);
2750
2751 key_buffer.clear();
2752 recording_whitespace = false;
2753
2754 while (!is_error()) {
2755 std::string_view key_segment;
2756 const auto key_begin = current_position();
2757
2758 // bare_key_segment
2759 if (is_bare_key_character(*cp)) key_segment = parse_bare_key_segment();
2760
2761 // "quoted key segment"
2762 else if (is_string_delimiter(*cp)) {
2763 const auto begin_pos = cp->position;
2764
2765 recording_whitespace = true;
2766 parsed_string str = parse_string();
2767 recording_whitespace = false;
2768 return_if_error({});
2769
2770 if (str.was_multi_line) {
2771 set_error_at(begin_pos, "multi-line strings are prohibited in "sv,
2772 key_buffer.empty() ? ""sv : "dotted "sv, "keys"sv);
2774 } else
2775 key_segment = str.value;
2776 }
2777
2778 // ???
2779 else
2781 "expected bare key starting character or string delimiter, saw '"sv, to_sv(*cp),
2782 "'"sv);
2783
2784 const auto key_end = current_position();
2785
2786 // whitespace following the key segment
2787 consume_leading_whitespace();
2788
2789 // store segment
2790 key_buffer.push_back(key_segment, key_begin, key_end);
2791
2792 // eof or no more key to come
2793 if (is_eof() || *cp != U'.') break;
2794
2795 // was a dotted key - go around again
2797 consume_leading_whitespace();
2799 }
2800 return_if_error({});
2801
2802 return true;
2803 }
2804
2806 key make_key(size_t segment_index) const {
2807 TOML_ASSERT(key_buffer.size() > segment_index);
2808
2809 return key{key_buffer[segment_index],
2810 source_region{key_buffer.starts[segment_index], key_buffer.ends[segment_index],
2811 root.source().path}};
2812 }
2813
2817 return_if_error({});
2819 TOML_ASSERT_ASSUME(*cp == U'[');
2820 push_parse_scope("table header"sv);
2821
2822 const source_position header_begin_pos = cp->position;
2823 source_position header_end_pos;
2824 bool is_arr = false;
2825
2826 // parse header
2827 {
2828 // skip first '['
2830
2831 // skip past any whitespace that followed the '['
2832 const bool had_leading_whitespace = consume_leading_whitespace();
2834
2835 // skip second '[' (if present)
2836 if (*cp == U'[') {
2837 if (had_leading_whitespace)
2839 "[[array-of-table]] brackets must be contiguous (i.e. [ [ this ] ] is prohibited)"sv);
2840
2841 is_arr = true;
2843
2844 // skip past any whitespace that followed the '['
2845 consume_leading_whitespace();
2847 }
2848
2849 // check for a premature closing ']'
2850 if (*cp == U']')
2851 set_error_and_return_default("tables with blank bare keys are explicitly prohibited"sv);
2852
2853 // get the actual key
2854 start_recording();
2855 parse_key();
2856 stop_recording(1u);
2857 return_if_error({});
2858
2859 // skip past any whitespace that followed the key
2860 consume_leading_whitespace();
2861 return_if_error({});
2863
2864 // consume the closing ']'
2865 if (*cp != U']') set_error_and_return_default("expected ']', saw '"sv, to_sv(*cp), "'"sv);
2866 if (is_arr) {
2868 if (*cp != U']') set_error_and_return_default("expected ']', saw '"sv, to_sv(*cp), "'"sv);
2869 }
2871 header_end_pos = current_position(1);
2872
2873 // handle the rest of the line after the header
2874 consume_leading_whitespace();
2875 if (!is_eof() && !consume_comment() && !consume_line_break())
2876 set_error_and_return_default("expected a comment or whitespace, saw '"sv, to_sv(cp),
2877 "'"sv);
2878 }
2879 TOML_ASSERT(!key_buffer.empty());
2880
2881 // check if each parent is a table/table array, or can be created implicitly as a table.
2882 table* parent = &root;
2883 for (size_t i = 0, e = key_buffer.size() - 1u; i < e; i++) {
2884 const std::string_view segment = key_buffer[i];
2885 auto pit = parent->lower_bound(segment);
2886
2887 // parent already existed
2888 if (pit != parent->end() && pit->first == segment) {
2889 node& p = pit->second;
2890
2891 if (auto tbl = p.as_table()) {
2892 // adding to closed inline tables is illegal
2893 if (tbl->is_inline() &&
2894 !impl::find(open_inline_tables.begin(), open_inline_tables.end(), tbl))
2895 set_error_and_return_default("cannot insert '"sv, to_sv(recording_buffer),
2896 "' into existing inline table"sv);
2897
2898 parent = tbl;
2899 } else if (auto arr = p.as_array();
2900 arr && impl::find(table_arrays.begin(), table_arrays.end(), arr)) {
2901 // table arrays are a special case;
2902 // the spec dictates we select the most recently declared element in the array.
2903 TOML_ASSERT(!arr->empty());
2904 TOML_ASSERT(arr->back().is_table());
2905 parent = &arr->back().ref_cast<table>();
2906 } else {
2907 if (!is_arr && p.type() == node_type::table)
2908 set_error_and_return_default("cannot redefine existing table '"sv,
2909 to_sv(recording_buffer), "'"sv);
2910 else
2911 set_error_and_return_default("cannot redefine existing "sv, to_sv(p.type()), " '"sv,
2912 to_sv(recording_buffer), "' as "sv,
2913 is_arr ? "array-of-tables"sv : "table"sv);
2914 }
2915 }
2916
2917 // need to create a new implicit table
2918 else {
2919 pit = parent->emplace_hint<table>(pit, make_key(i));
2920 table& p = pit->second.ref_cast<table>();
2921 p.source_ = {header_begin_pos, header_end_pos, reader.source_path()};
2922
2923 implicit_tables.push_back(&p);
2924 parent = &p;
2925 }
2926 }
2927
2928 const auto last_segment = key_buffer.back();
2929 auto it = parent->lower_bound(last_segment);
2930
2931 // if there was already a matching node some sanity checking is necessary;
2932 // this is ok if we're making an array and the existing element is already an array (new
2933 // element) or if we're making a table and the existing element is an implicitly-created table
2934 // (promote it), otherwise this is a redefinition error.
2935 if (it != parent->end() && it->first == last_segment) {
2936 node& matching_node = it->second;
2937 if (auto arr = matching_node.as_array();
2938 is_arr && arr && impl::find(table_arrays.begin(), table_arrays.end(), arr)) {
2939 table& tbl = arr->emplace_back<table>();
2940 tbl.source_ = {header_begin_pos, header_end_pos, reader.source_path()};
2941 return &tbl;
2942 }
2943
2944 else if (auto tbl = matching_node.as_table(); !is_arr && tbl && !implicit_tables.empty()) {
2945 if (auto found = impl::find(implicit_tables.begin(), implicit_tables.end(), tbl); found) {
2946 bool ok = true;
2947 if (!tbl->empty()) {
2948 for (auto& [_, child] : *tbl) {
2949 if (!child.is_table() && !child.is_array_of_tables()) {
2950 ok = false;
2951 break;
2952 }
2953 }
2954 }
2955
2956 if (ok) {
2957 implicit_tables.erase(implicit_tables.cbegin() + (found - implicit_tables.data()));
2958 tbl->source_.begin = header_begin_pos;
2959 tbl->source_.end = header_end_pos;
2960 return tbl;
2961 }
2962 }
2963 }
2964
2965 // if we get here it's a redefinition error.
2966 if (!is_arr && matching_node.type() == node_type::table) {
2967 set_error_at(header_begin_pos, "cannot redefine existing table '"sv,
2968 to_sv(recording_buffer), "'"sv);
2970 } else {
2971 set_error_at(header_begin_pos, "cannot redefine existing "sv, to_sv(matching_node.type()),
2972 " '"sv, to_sv(recording_buffer), "' as "sv,
2973 is_arr ? "array-of-tables"sv : "table"sv);
2975 }
2976 }
2977
2978 // there was no matching node, sweet - we can freely instantiate a new table/table array.
2979 else {
2980 auto last_key = make_key(key_buffer.size() - 1u);
2981
2982 // if it's an array we need to make the array and it's first table element,
2983 // set the starting regions, and return the table element
2984 if (is_arr) {
2985 it = parent->emplace_hint<array>(it, std::move(last_key));
2986 array& tbl_arr = it->second.ref_cast<array>();
2987 table_arrays.push_back(&tbl_arr);
2988 tbl_arr.source_ = {header_begin_pos, header_end_pos, reader.source_path()};
2989
2990 table& tbl = tbl_arr.emplace_back<table>();
2991 tbl.source_ = {header_begin_pos, header_end_pos, reader.source_path()};
2992 return &tbl;
2993 }
2994
2995 // otherwise we're just making a table
2996 else {
2997 it = parent->emplace_hint<table>(it, std::move(last_key));
2998 table& tbl = it->second.ref_cast<table>();
2999 tbl.source_ = {header_begin_pos, header_end_pos, reader.source_path()};
3000 return &tbl;
3001 }
3002 }
3003 }
3004
3007 return_if_error({});
3009 TOML_ASSERT_ASSUME(is_string_delimiter(*cp) || is_bare_key_character(*cp));
3010 push_parse_scope("key-value pair"sv);
3011
3012 // read the key into the key buffer
3013 start_recording();
3014 parse_key();
3015 stop_recording(1u);
3016 return_if_error({});
3017 TOML_ASSERT(key_buffer.size() >= 1u);
3018
3019 // skip past any whitespace that followed the key
3020 consume_leading_whitespace();
3022
3023 // '='
3024 if (*cp != U'=') set_error_and_return_default("expected '=', saw '"sv, to_sv(*cp), "'"sv);
3026
3027 // skip past any whitespace that followed the '='
3028 consume_leading_whitespace();
3029 return_if_error({});
3031
3032 // check that the next character could actually be a value
3033 if (is_value_terminator(*cp))
3034 set_error_and_return_default("expected value, saw '"sv, to_sv(*cp), "'"sv);
3035
3036 // if it's a dotted kvp we need to spawn the parent sub-tables if necessary,
3037 // and set the target table to the second-to-last one in the chain
3038 if (key_buffer.size() > 1u) {
3039 for (size_t i = 0; i < key_buffer.size() - 1u; i++) {
3040 const std::string_view segment = key_buffer[i];
3041 auto pit = tbl->lower_bound(segment);
3042
3043 // parent already existed
3044 if (pit != tbl->end() && pit->first == segment) {
3045 table* p = pit->second.as_table();
3046
3047 // redefinition
3048 if TOML_UNLIKELY (!p ||
3049 !(impl::find(dotted_key_tables.begin(), dotted_key_tables.end(), p) ||
3050 impl::find(implicit_tables.begin(), implicit_tables.end(), p))) {
3051 set_error_at(key_buffer.starts[i], "cannot redefine existing "sv,
3052 to_sv(pit->second.type()), " as dotted key-value pair"sv);
3054 }
3055
3056 tbl = p;
3057 }
3058
3059 // need to create a new implicit table
3060 else {
3061 pit = tbl->emplace_hint<table>(pit, make_key(i));
3062 table& p = pit->second.ref_cast<table>();
3063 p.source_ = pit->first.source();
3064
3065 dotted_key_tables.push_back(&p);
3066 tbl = &p;
3067 }
3068 }
3069 }
3070
3071 // ensure this isn't a redefinition
3072 const std::string_view last_segment = key_buffer.back();
3073 auto it = tbl->lower_bound(last_segment);
3074 if (it != tbl->end() && it->first == last_segment) {
3075 set_error("cannot redefine existing "sv, to_sv(it->second.type()), " '"sv,
3076 to_sv(recording_buffer), "'"sv);
3078 }
3079
3080 // create the key first since the key buffer will likely get overwritten during value parsing
3081 // (inline tables)
3082 auto last_key = make_key(key_buffer.size() - 1u);
3083
3084 // now we can actually parse the value
3085 node_ptr val = parse_value();
3086 return_if_error({});
3087
3088 tbl->emplace_hint<node_ptr>(it, std::move(last_key), std::move(val));
3089 return true;
3090 }
3091
3095 push_parse_scope("root table"sv);
3096
3097 table* current_table = &root;
3098
3099 do {
3101
3102 // leading whitespace, line endings, comments
3103 if (consume_leading_whitespace() || consume_line_break() || consume_comment()) continue;
3105
3106 // [tables]
3107 // [[table array]]
3108 if (*cp == U'[') current_table = parse_table_header();
3109
3110 // bare_keys
3111 // dotted.keys
3112 // "quoted keys"
3113 else if (is_bare_key_character(*cp) || is_string_delimiter(*cp)) {
3114 push_parse_scope("key-value pair"sv);
3115
3116 parse_key_value_pair_and_insert(current_table);
3117
3118 // handle the rest of the line after the kvp
3119 // (this is not done in parse_key_value_pair() because that is also used for inline
3120 // tables)
3121 consume_leading_whitespace();
3123 if (!is_eof() && !consume_comment() && !consume_line_break())
3124 set_error("expected a comment or whitespace, saw '"sv, to_sv(cp), "'"sv);
3125 }
3126
3127 else // ??
3128 set_error("expected keys, tables, whitespace or comments, saw '"sv, to_sv(cp), "'"sv);
3129 } while (!is_eof());
3130
3131 auto eof_pos = current_position(1);
3132 root.source_.end = eof_pos;
3133 if (current_table && current_table != &root &&
3134 current_table->source_.end <= current_table->source_.begin)
3135 current_table->source_.end = eof_pos;
3136 }
3137
3138 static void update_region_ends(node& nde) noexcept {
3139 const auto type = nde.type();
3140 if (type > node_type::array) return;
3141
3142 if (type == node_type::table) {
3143 auto& tbl = nde.ref_cast<table>();
3144 if (tbl.is_inline()) // inline tables (and all their inline descendants) are already
3145 // correctly terminated
3146 return;
3147
3148 auto end = nde.source_.end;
3149 for (auto&& [k, v] : tbl) {
3150 TOML_UNUSED(k);
3151 update_region_ends(v);
3152 if (end < v.source_.end) end = v.source_.end;
3153 }
3154 } else // arrays
3155 {
3156 auto& arr = nde.ref_cast<array>();
3157 auto end = nde.source_.end;
3158 for (auto&& v : arr) {
3159 update_region_ends(v);
3160 if (end < v.source_.end) end = v.source_.end;
3161 }
3162 nde.source_.end = end;
3163 }
3164 }
3165
3166 public:
3168 : reader{reader_} {
3169 root.source_ = {prev_pos, prev_pos, reader.source_path()};
3170
3171 if (!reader.peek_eof()) {
3172 cp = reader.read_next();
3173
3174#if !TOML_EXCEPTIONS
3175 if (reader.error()) {
3176 err = std::move(reader.error());
3177 return;
3178 }
3179#endif
3180
3181 if (cp) parse_document();
3182 }
3183
3184 update_region_ends(root);
3185 }
3186
3188 operator parse_result() && noexcept {
3189#if TOML_EXCEPTIONS
3190
3191 return {std::move(root)};
3192
3193#else
3194
3195 if (err)
3196 return parse_result{*std::move(err)};
3197 else
3198 return parse_result{std::move(root)};
3199
3200#endif
3201 }
3202 };
3203
3206 return_if_error({});
3208 TOML_ASSERT_ASSUME(*cp == U'[');
3209 push_parse_scope("array"sv);
3210
3211 // skip opening '['
3213
3214 node_ptr arr_ptr{new array{}};
3215 array& arr = arr_ptr->ref_cast<array>();
3216 enum class TOML_CLOSED_ENUM parse_type : int { none, comma, val };
3217 parse_type prev = parse_type::none;
3218
3219 while (!is_error()) {
3222
3223 // commas - only legal after a value
3224 if (*cp == U',') {
3225 if (prev == parse_type::val) {
3226 prev = parse_type::comma;
3228 continue;
3229 }
3230 set_error_and_return_default("expected value or closing ']', saw comma"sv);
3231 }
3232
3233 // closing ']'
3234 else if (*cp == U']') {
3236 break;
3237 }
3238
3239 // must be a value
3240 else {
3241 if (prev == parse_type::val) {
3242 set_error_and_return_default("expected comma or closing ']', saw '"sv, to_sv(*cp), "'"sv);
3243 continue;
3244 }
3245 prev = parse_type::val;
3246
3247 auto val = parse_value();
3248 return_if_error({});
3249
3250 if (!arr.capacity()) arr.reserve(4u);
3251 arr.emplace_back<node_ptr>(std::move(val));
3252 }
3253 }
3254
3255 return_if_error({});
3256 return arr_ptr;
3257 }
3258
3261 return_if_error({});
3263 TOML_ASSERT_ASSUME(*cp == U'{');
3264 push_parse_scope("inline table"sv);
3265
3266 // skip opening '{'
3268
3269 node_ptr tbl_ptr{new table{}};
3270 table& tbl = tbl_ptr->ref_cast<table>();
3271 tbl.is_inline(true);
3272 table_vector_scope table_scope{open_inline_tables, tbl};
3273
3274 enum class TOML_CLOSED_ENUM parse_type : int { none, comma, kvp };
3275 parse_type prev = parse_type::none;
3276 while (!is_error()) {
3277 if constexpr (TOML_LANG_UNRELEASED) // toml/issues/516 (newlines/trailing commas in inline
3278 // tables)
3279 {
3281 } else {
3282 while (consume_leading_whitespace()) continue;
3283 }
3284 return_if_error({});
3286
3287 // commas - only legal after a key-value pair
3288 if (*cp == U',') {
3289 if (prev == parse_type::kvp) {
3290 prev = parse_type::comma;
3292 } else
3293 set_error_and_return_default("expected key-value pair or closing '}', saw comma"sv);
3294 }
3295
3296 // closing '}'
3297 else if (*cp == U'}') {
3298 if constexpr (!TOML_LANG_UNRELEASED) // toml/issues/516 (newlines/trailing commas in inline
3299 // tables)
3300 {
3301 if (prev == parse_type::comma) {
3303 "expected key-value pair, saw closing '}' (dangling comma)"sv);
3304 continue;
3305 }
3306 }
3308 break;
3309 }
3310
3311 // key-value pair
3312 else if (is_string_delimiter(*cp) || is_bare_key_character(*cp)) {
3313 if (prev == parse_type::kvp)
3314 set_error_and_return_default("expected comma or closing '}', saw '"sv, to_sv(*cp), "'"sv);
3315 else {
3316 prev = parse_type::kvp;
3318 }
3319 }
3320
3322 else
3323 set_error_and_return_default("expected key or closing '}', saw '"sv, to_sv(*cp), "'"sv);
3324 }
3325
3326 return_if_error({});
3327 return tbl_ptr;
3328 }
3329
3330 TOML_ABI_NAMESPACE_END; // TOML_EXCEPTIONS
3331}
3333
3334#undef TOML_RETURNS_BY_THROWING
3335#undef advance_and_return_if_error
3336#undef advance_and_return_if_error_or_eof
3337#undef assert_not_eof
3338#undef assert_not_error
3339#undef is_eof
3340#undef is_error
3341#undef parse_error_break
3342#undef push_parse_scope
3343#undef push_parse_scope_1
3344#undef push_parse_scope_2
3345#undef return_after_error
3346#undef return_if_eof
3347#undef return_if_error
3348#undef return_if_error_or_eof
3349#undef set_error_and_return
3350#undef set_error_and_return_default
3351#undef set_error_and_return_if_eof
3352#undef utf8_buffered_reader_error_check
3353#undef utf8_reader_error
3354#undef utf8_reader_error_check
3355#undef utf8_reader_return_after_error
3356
3357// #---------------------------------------------------------------------------------------------------------------------
3358// # PARSER PUBLIC IMPLEMENTATION
3359// #---------------------------------------------------------------------------------------------------------------------
3360
3364 parse_result do_parse(utf8_reader_interface && reader) {
3365 return impl::parser{std::move(reader)};
3366 }
3367
3370 parse_result do_parse_file(std::string_view file_path) {
3371#if TOML_EXCEPTIONS
3372#define TOML_PARSE_FILE_ERROR(msg, path) \
3373 throw parse_error { \
3374 msg, source_position{}, std::make_shared<const std::string>(std::move(path)) \
3375 }
3376#else
3377#define TOML_PARSE_FILE_ERROR(msg, path) \
3378 return parse_result { \
3379 parse_error { \
3380 msg, source_position{}, std::make_shared<const std::string>(std::move(path)) \
3381 } \
3382 }
3383#endif
3384
3385 std::string file_path_str(file_path);
3386
3387 // open file with a custom-sized stack buffer
3388 std::ifstream file;
3389 TOML_OVERALIGNED char file_buffer[sizeof(void*) * 1024u];
3390 file.rdbuf()->pubsetbuf(file_buffer, sizeof(file_buffer));
3391#if TOML_WINDOWS
3392 file.open(impl::widen(file_path_str).c_str(),
3393 std::ifstream::in | std::ifstream::binary | std::ifstream::ate);
3394#else
3395 file.open(file_path_str, std::ifstream::in | std::ifstream::binary | std::ifstream::ate);
3396#endif
3397 if (!file.is_open())
3398 TOML_PARSE_FILE_ERROR("File could not be opened for reading", file_path_str);
3399
3400 // get size
3401 const auto file_size = file.tellg();
3402 if (file_size == -1) TOML_PARSE_FILE_ERROR("Could not determine file size", file_path_str);
3403 file.seekg(0, std::ifstream::beg);
3404
3405 // read the whole file into memory first if the file isn't too large
3406 constexpr auto large_file_threshold = 1024 * 1024 * 2; // 2 MB
3407 if (file_size <= large_file_threshold) {
3408 std::vector<char> file_data;
3409 file_data.resize(static_cast<size_t>(file_size));
3410 file.read(file_data.data(), static_cast<std::streamsize>(file_size));
3411 return parse(std::string_view{file_data.data(), file_data.size()}, std::move(file_path_str));
3412 }
3413
3414 // otherwise parse it using the streams
3415 else
3416 return parse(file, std::move(file_path_str));
3417
3418#undef TOML_PARSE_FILE_ERROR
3419 }
3420}
3422
3425
3427 parse_result TOML_CALLCONV parse(std::string_view doc, std::string_view source_path) {
3428 return TOML_ANON_NAMESPACE::do_parse(TOML_ANON_NAMESPACE::utf8_reader{doc, source_path});
3429 }
3430
3432 parse_result TOML_CALLCONV parse(std::string_view doc, std::string && source_path) {
3433 return TOML_ANON_NAMESPACE::do_parse(
3434 TOML_ANON_NAMESPACE::utf8_reader{doc, std::move(source_path)});
3435 }
3436
3438 parse_result TOML_CALLCONV parse(std::istream & doc, std::string_view source_path) {
3439 return TOML_ANON_NAMESPACE::do_parse(TOML_ANON_NAMESPACE::utf8_reader{doc, source_path});
3440 }
3441
3443 parse_result TOML_CALLCONV parse(std::istream & doc, std::string && source_path) {
3444 return TOML_ANON_NAMESPACE::do_parse(
3445 TOML_ANON_NAMESPACE::utf8_reader{doc, std::move(source_path)});
3446 }
3447
3449 parse_result TOML_CALLCONV parse_file(std::string_view file_path) {
3450 return TOML_ANON_NAMESPACE::do_parse_file(file_path);
3451 }
3452
3453#if TOML_HAS_CHAR8
3454
3456 parse_result TOML_CALLCONV parse(std::u8string_view doc, std::string_view source_path) {
3457 return TOML_ANON_NAMESPACE::do_parse(TOML_ANON_NAMESPACE::utf8_reader{doc, source_path});
3458 }
3459
3461 parse_result TOML_CALLCONV parse(std::u8string_view doc, std::string && source_path) {
3462 return TOML_ANON_NAMESPACE::do_parse(
3463 TOML_ANON_NAMESPACE::utf8_reader{doc, std::move(source_path)});
3464 }
3465
3467 parse_result TOML_CALLCONV parse_file(std::u8string_view file_path) {
3468 std::string file_path_str;
3469 file_path_str.resize(file_path.length());
3470 memcpy(file_path_str.data(), file_path.data(), file_path.length());
3471 return TOML_ANON_NAMESPACE::do_parse_file(file_path_str);
3472 }
3473
3474#endif // TOML_HAS_CHAR8
3475
3476#if TOML_ENABLE_WINDOWS_COMPAT
3477
3479 parse_result TOML_CALLCONV parse(std::string_view doc, std::wstring_view source_path) {
3480 return TOML_ANON_NAMESPACE::do_parse(
3481 TOML_ANON_NAMESPACE::utf8_reader{doc, impl::narrow(source_path)});
3482 }
3483
3485 parse_result TOML_CALLCONV parse(std::istream & doc, std::wstring_view source_path) {
3486 return TOML_ANON_NAMESPACE::do_parse(
3487 TOML_ANON_NAMESPACE::utf8_reader{doc, impl::narrow(source_path)});
3488 }
3489
3491 parse_result TOML_CALLCONV parse_file(std::wstring_view file_path) {
3492 return TOML_ANON_NAMESPACE::do_parse_file(impl::narrow(file_path));
3493 }
3494
3495#endif // TOML_ENABLE_WINDOWS_COMPAT
3496
3497#if TOML_HAS_CHAR8 && TOML_ENABLE_WINDOWS_COMPAT
3498
3500 parse_result TOML_CALLCONV parse(std::u8string_view doc, std::wstring_view source_path) {
3501 return TOML_ANON_NAMESPACE::do_parse(
3502 TOML_ANON_NAMESPACE::utf8_reader{doc, impl::narrow(source_path)});
3503 }
3504
3505#endif // TOML_HAS_CHAR8 && TOML_ENABLE_WINDOWS_COMPAT
3506
3507 TOML_ABI_NAMESPACE_END; // TOML_EXCEPTIONS
3508}
3510
3511#undef TOML_OVERALIGNED
3512#include "header_end.hpp"
3513#endif // TOML_ENABLE_PARSER
A TOML array.
Definition array.hpp:285
TOML_EXPORTED_MEMBER_FUNCTION void reserve(size_t new_capacity)
Reserves internal storage capacity up to a pre-determined number of elements.
TOML_NODISCARD iterator end() noexcept
Returns an iterator to one-past-the-last element.
Definition array.hpp:831
TOML_NODISCARD size_t capacity() const noexcept
Returns the current max number of elements that may be held in the array's internal storage.
Definition array.hpp:1150
decltype(auto) emplace_back(Args &&... args)
Emplaces a new element at the end of the array.
Definition array.hpp:1659
void push_back(ElemType &&val, value_flags flags=preserve_source_value_flags)
Appends a new element to the end of the array.
Definition array.hpp:1633
The result of a parsing operation.
Definition parse_result.hpp:53
Definition parser.inl:887
std::vector< table * > implicit_tables
Definition parser.inl:895
TOML_NODISCARD key make_key(size_t segment_index) const
Definition parser.inl:2806
optional< parse_error > err
Definition parser.inl:906
TOML_NODISCARD node_ptr parse_value_known_prefixes()
Definition parser.inl:2185
TOML_NODISCARD TOML_NEVER_INLINE date parse_date(bool part_of_datetime=false)
Definition parser.inl:1982
utf8_buffered_reader reader
Definition parser.inl:891
table root
Definition parser.inl:892
parser(utf8_reader_interface &&reader_)
Definition parser.inl:3167
TOML_NODISCARD TOML_NEVER_INLINE bool parse_boolean()
Definition parser.inl:1514
TOML_NODISCARD TOML_NEVER_INLINE date_time parse_date_time()
Definition parser.inl:2115
TOML_RETURNS_BY_THROWING void set_error(const T &... reason) const
Definition parser.inl:934
size_t nested_values
Definition parser.inl:904
TOML_NODISCARD node_ptr parse_value()
Definition parser.inl:2229
TOML_NODISCARD node_ptr parse_inline_table()
Definition parser.inl:3260
parse_key_buffer key_buffer
Definition parser.inl:899
std::string_view current_scope
Definition parser.inl:903
std::string string_buffer
Definition parser.inl:900
bool consume_leading_whitespace()
Definition parser.inl:991
bool consume_rest_of_line()
Definition parser.inl:1029
std::string recording_buffer
Definition parser.inl:901
void advance()
Definition parser.inl:946
TOML_NODISCARD node_ptr parse_array()
Definition parser.inl:3205
TOML_NODISCARD TOML_NEVER_INLINE double parse_float()
Definition parser.inl:1561
TOML_NODISCARD TOML_NEVER_INLINE int64_t parse_integer()
Definition parser.inl:1852
TOML_NODISCARD size_t consume_variable_length_digit_sequence(T *buffer, size_t max_len)
Definition parser.inl:1105
void go_back(size_t count=1) noexcept
Definition parser.inl:938
TOML_NODISCARD bool consume_digit_sequence(T *digits, size_t len)
Definition parser.inl:1089
TOML_NODISCARD bool consume_expected_sequence(std::u32string_view seq)
Definition parser.inl:1076
bool recording_whitespace
Definition parser.inl:902
source_position prev_pos
Definition parser.inl:893
TOML_NEVER_INLINE bool parse_key_value_pair_and_insert(table *tbl)
Definition parser.inl:3006
void parse_document()
Definition parser.inl:3092
TOML_NODISCARD TOML_NEVER_INLINE std::string_view parse_basic_string(bool multi_line)
Definition parser.inl:1122
std::vector< array * > table_arrays
Definition parser.inl:898
bool recording
Definition parser.inl:902
std::vector< table * > open_inline_tables
Definition parser.inl:897
TOML_NODISCARD source_position current_position(source_index fallback_offset=0) const noexcept
Definition parser.inl:910
TOML_NODISCARD TOML_NEVER_INLINE time parse_time(bool part_of_datetime=false)
Definition parser.inl:2031
TOML_NODISCARD TOML_NEVER_INLINE table * parse_table_header()
Definition parser.inl:2816
static constexpr size_t max_nested_values
Definition parser.inl:889
const utf8_codepoint * cp
Definition parser.inl:894
TOML_NODISCARD TOML_NEVER_INLINE double parse_inf_or_nan()
Definition parser.inl:1535
static void update_region_ends(node &nde) noexcept
Definition parser.inl:3138
TOML_NEVER_INLINE bool parse_key()
Definition parser.inl:2745
void stop_recording(size_t pop_bytes=0) noexcept
Definition parser.inl:975
TOML_NODISCARD TOML_NEVER_INLINE std::string_view parse_literal_string(bool multi_line)
Definition parser.inl:1356
TOML_RETURNS_BY_THROWING TOML_NEVER_INLINE void set_error_at(source_position pos, const T &... reason) const
Definition parser.inl:916
TOML_NODISCARD TOML_NEVER_INLINE parsed_string parse_string()
Definition parser.inl:1456
bool consume_comment()
Definition parser.inl:1043
TOML_NODISCARD TOML_NEVER_INLINE std::string_view parse_bare_key_segment()
Definition parser.inl:1495
TOML_NODISCARD TOML_NEVER_INLINE double parse_hex_float()
Definition parser.inl:1696
bool consume_line_break()
Definition parser.inl:1006
void start_recording(bool include_current=true) noexcept
Definition parser.inl:966
std::vector< table * > dotted_key_tables
Definition parser.inl:896
A TOML table.
Definition table.hpp:220
TOML_CONST_INLINE_GETTER table * as_table() noexcept final
Returns a pointer to the table.
Definition table.hpp:414
TOML_PURE_INLINE_GETTER iterator begin() noexcept
Returns an iterator to the first key-value pair.
Definition table.hpp:797
TOML_PURE_INLINE_GETTER iterator end() noexcept
Returns an iterator to one-past-the-last key-value pair.
Definition table.hpp:818
TOML_PURE_INLINE_GETTER bool is_inline() const noexcept
Returns true if this table is an inline table.
Definition table.hpp:548
TOML_PURE_GETTER iterator lower_bound(std::string_view key) noexcept
Returns an iterator to the first key-value pair with key that is not less than the given key.
Definition table.hpp:1143
iterator erase(iterator pos) noexcept
Removes the specified key-value pair from the table.
Definition table.hpp:1295
Definition parser.inl:414
utf8_reader_interface & reader_
Definition parser.inl:421
TOML_PURE_INLINE_GETTER const source_path_ptr & source_path() const noexcept
Definition parser.inl:435
TOML_NODISCARD const utf8_codepoint * read_next() noexcept(!TOML_COMPILER_HAS_EXCEPTIONS)
Definition parser.inl:438
size_t count
Definition parser.inl:424
TOML_NODISCARD bool peek_eof() const noexcept(!TOML_COMPILER_HAS_EXCEPTIONS)
Definition parser.inl:486
TOML_NODISCARD const utf8_codepoint * step_back(size_t count) noexcept
Definition parser.inl:471
TOML_NODISCARD optional< parse_error > && error() noexcept
Definition parser.inl:491
static constexpr size_t max_history_length
Definition parser.inl:416
TOML_NODISCARD_CTOR utf8_buffered_reader(utf8_reader_interface &reader) noexcept
Definition parser.inl:431
TOML_PURE_INLINE_GETTER constexpr bool peek_eof() const noexcept
Definition parser.inl:77
TOML_NODISCARD_CTOR constexpr utf8_byte_stream(std::basic_string_view< Char > sv) noexcept
Definition parser.inl:60
TOML_CONST_INLINE_GETTER constexpr bool error() const noexcept
Definition parser.inl:68
TOML_PURE_INLINE_GETTER constexpr bool eof() const noexcept
Definition parser.inl:71
std::basic_string_view< Char > source_
Definition parser.inl:55
TOML_PURE_INLINE_GETTER bool eof() const noexcept
Definition parser.inl:118
TOML_NODISCARD_CTOR utf8_byte_stream(std::istream &stream) noexcept(!TOML_COMPILER_HAS_EXCEPTIONS)
Definition parser.inl:98
TOML_NODISCARD bool peek_eof() const noexcept(!TOML_COMPILER_HAS_EXCEPTIONS)
Definition parser.inl:126
TOML_PURE_INLINE_GETTER bool error() const noexcept
Definition parser.inl:115
std::istream * source_
Definition parser.inl:94
Definition parser.inl:196
TOML_NODISCARD_CTOR utf8_reader(U &&source, String &&source_path={}) noexcept(std::is_nothrow_constructible_v< utf8_byte_stream< T >, U && >)
Definition parser.inl:349
utf8_byte_stream< T > stream_
Definition parser.inl:199
source_path_ptr source_path_
Definition parser.inl:214
optional< parse_error > err_
Definition parser.inl:217
impl::utf8_decoder decoder_
Definition parser.inl:202
TOML_NODISCARD optional< parse_error > && error() noexcept final
Definition parser.inl:388
TOML_PURE_INLINE_GETTER const source_path_ptr & source_path() const noexcept final
Definition parser.inl:362
bool read_next_block() noexcept(!TOML_COMPILER_HAS_EXCEPTIONS)
Definition parser.inl:220
TOML_NODISCARD bool peek_eof() const noexcept(!TOML_COMPILER_HAS_EXCEPTIONS) final
Definition parser.inl:381
TOML_NODISCARD const utf8_codepoint * read_next() noexcept(!TOML_COMPILER_HAS_EXCEPTIONS) final
Definition parser.inl:365
enum TOML_OPEN_FLAGS_ENUM value_flags
Metadata associated with TOML values.
Definition forward_declarations.hpp:272
#define TOML_CALLCONV
Calling convention to apply to exported free/static functions. \detail Not defined by default (let th...
Definition preprocessor.hpp:1134
#define TOML_ASSERT(expr)
Sets the assert function used by the library. \detail Defaults to the standard C assert().
Definition preprocessor.hpp:1185
#define TOML_EXCEPTIONS
Sets whether the library uses exceptions to report parsing failures. \detail Defaults to 1 or 0 accor...
Definition preprocessor.hpp:1126
#define TOML_ASSERT_ASSUME(expr)
Definition preprocessor.hpp:1190
#define TOML_MAX_NESTED_VALUES
Definition preprocessor.hpp:1154
Definition json.h:5363
#define TOML_RETURNS_BY_THROWING
Definition parser.inl:505
TOML_NAMESPACE_START
Definition parser.inl:3423
TOML_EXTERNAL_LINKAGE parse_result TOML_CALLCONV parse(std::string_view doc, std::string_view source_path)
Parses a TOML document from a string view.
Definition parser.inl:3427
#define set_error_and_return_if_eof(...)
Definition parser.inl:862
TOML_ANON_NAMESPACE_START
Definition parser.inl:43
#define set_error_and_return_default(...)
Definition parser.inl:860
TOML_EXTERNAL_LINKAGE parse_result TOML_CALLCONV parse_file(std::string_view file_path)
Parses a TOML document from a file.
Definition parser.inl:3449
TOML_ENABLE_WARNINGS
Definition parser.inl:36
#define return_if_error_or_eof(...)
Definition parser.inl:829
#define is_error()
Definition parser.inl:822
TOML_IMPL_NAMESPACE_END
Definition parser.inl:3332
TOML_DISABLE_WARNINGS
Definition parser.inl:24
#define TOML_OVERALIGNED
Definition parser.inl:192
#define utf8_reader_error(...)
Definition parser.inl:180
TOML_PURE_GETTER TOML_INTERNAL_LINKAGE std::string_view to_sv(node_type val) noexcept
Definition parser.inl:559
#define TOML_PARSE_FILE_ERROR(msg, path)
#define return_if_error(...)
Definition parser.inl:825
#define return_after_error(...)
Definition parser.inl:823
#define push_parse_scope(scope)
Definition parser.inl:726
#define utf8_reader_error_check(...)
Definition parser.inl:182
#define assert_not_error()
Definition parser.inl:824
TOML_ABI_NAMESPACE_END
Definition parser.inl:3330
#define advance_and_return_if_error_or_eof(...)
Definition parser.inl:874
#define parse_error_break()
Definition parser.inl:851
#define utf8_buffered_reader_error_check(...)
Definition parser.inl:407
#define is_eof()
Definition parser.inl:808
#define advance_and_return_if_error(...)
Definition parser.inl:867
#define assert_not_eof()
Definition parser.inl:809
TOML_INTERNAL_LINKAGE constexpr auto utf8_byte_order_mark
Definition parser.inl:48
TOML_ANON_NAMESPACE_END
Definition parser.inl:496
TOML_INTERNAL_LINKAGE void concatenate(char *&write_pos, char *const buf_end, const T &arg) noexcept
Definition parser.inl:601
TOML_IMPL_NAMESPACE_START
Definition parser.inl:884
#define utf8_reader_return_after_error(...)
Definition parser.inl:181
TOML_NAMESPACE_END
Definition parser.inl:3509
#define TOML_UNUSED(...)
Definition preprocessor.hpp:603
#define TOML_NODISCARD_CTOR
Definition preprocessor.hpp:446
#define TOML_UNLIKELY(...)
Definition preprocessor.hpp:538
#define TOML_ABI_NAMESPACE_BOOL(cond, T, F)
Definition preprocessor.hpp:1323
#define TOML_EMPTY_BASES
Definition preprocessor.hpp:424
#define TOML_NEVER_INLINE
Definition preprocessor.hpp:419
#define TOML_INTERNAL_LINKAGE
Definition preprocessor.hpp:1340
#define TOML_CLOSED_ENUM
Definition preprocessor.hpp:557
#define TOML_UNLIKELY_CASE
Definition preprocessor.hpp:541
#define TOML_EXTERNAL_LINKAGE
Definition preprocessor.hpp:1339
#define TOML_UNREACHABLE
Definition preprocessor.hpp:515
#define TOML_NODISCARD
Definition preprocessor.hpp:439
#define TOML_CONST_GETTER
Definition preprocessor.hpp:485
#define TOML_LANG_UNRELEASED
Definition preprocessor.hpp:1308
#define TOML_COMPILER_HAS_EXCEPTIONS
Definition preprocessor.hpp:334
#define TOML_PURE_GETTER
Definition preprocessor.hpp:474
#define TOML_LIKELY_CASE
Definition preprocessor.hpp:528
#define TOML_CLOSED_FLAGS_ENUM
Definition preprocessor.hpp:562
#define TOML_ATTR(...)
Definition preprocessor.hpp:316
#define TOML_PURE_INLINE_GETTER
Definition preprocessor.hpp:479
#define TOML_CONST_INLINE_GETTER
Definition preprocessor.hpp:490
#define TOML_ABSTRACT_INTERFACE
Definition preprocessor.hpp:423
std::shared_ptr< const std::string > source_path_ptr
A pointer to a shared string resource containing a source path.
Definition source_region.hpp:19
A date-time.
Definition date_time.hpp:327
Definition parser.inl:763
~depth_counter_scope() noexcept
Definition parser.inl:772
size_t & depth_
Definition parser.inl:764
TOML_DELETE_DEFAULTS(depth_counter_scope)
TOML_NODISCARD_CTOR depth_counter_scope(size_t &depth) noexcept
Definition parser.inl:767
Definition parser.inl:678
void append(const T &arg) noexcept
Definition parser.inl:692
char buf[buf_size]
Definition parser.inl:680
static constexpr std::size_t buf_size
Definition parser.inl:679
TOML_NODISCARD_CTOR error_builder(std::string_view scope) noexcept
Definition parser.inl:685
TOML_DELETE_DEFAULTS(error_builder)
TOML_RETURNS_BY_THROWING auto finish(const source_position &pos, const source_path_ptr &source_path) const
Definition parser.inl:697
char *const max_write_pos
Definition parser.inl:682
char * write_pos
Definition parser.inl:681
Definition parser.inl:595
const utf8_codepoint & cp
Definition parser.inl:596
Definition parser.inl:728
TOML_PURE_INLINE_GETTER size_t size() const noexcept
Definition parser.inl:760
std::vector< std::pair< size_t, size_t > > segments
Definition parser.inl:730
std::vector< source_position > starts
Definition parser.inl:731
std::vector< source_position > ends
Definition parser.inl:732
TOML_PURE_INLINE_GETTER std::string_view back() const noexcept
Definition parser.inl:754
void clear() noexcept
Definition parser.inl:734
void push_back(std::string_view segment, source_position b, source_position e)
Definition parser.inl:741
TOML_PURE_INLINE_GETTER bool empty() const noexcept
Definition parser.inl:757
std::string buffer
Definition parser.inl:729
TOML_PURE_INLINE_GETTER std::string_view operator[](size_t i) const noexcept
Definition parser.inl:749
Definition parser.inl:710
TOML_DELETE_DEFAULTS(parse_scope)
std::string_view parent_
Definition parser.inl:712
~parse_scope() noexcept
Definition parser.inl:720
TOML_NODISCARD_CTOR parse_scope(std::string_view &current_scope, std::string_view new_scope) noexcept
Definition parser.inl:715
std::string_view & storage_
Definition parser.inl:711
Definition parser.inl:777
std::string_view value
Definition parser.inl:778
bool was_multi_line
Definition parser.inl:779
A source document line-and-column pair.
Definition source_region.hpp:43
source_index line
The line number.
Definition source_region.hpp:46
source_index column
The column number.
Definition source_region.hpp:50
A source document region.
Definition source_region.hpp:167
Definition parser.inl:782
std::vector< table * > & tables
Definition parser.inl:783
~table_vector_scope() noexcept
Definition parser.inl:791
TOML_DELETE_DEFAULTS(table_vector_scope)
TOML_NODISCARD_CTOR table_vector_scope(std::vector< table * > &tables_, table &tbl)
Definition parser.inl:786
A timezone offset.
Definition date_time.hpp:221
A local time-of-day.
Definition date_time.hpp:113
uint32_t nanosecond
The fractional nanoseconds component, from 0 - 999999999.
Definition date_time.hpp:124
uint8_t second
The second component, from 0 - 59.
Definition date_time.hpp:121
Definition parser.inl:140
TOML_PURE_INLINE_GETTER constexpr const char32_t & operator*() const noexcept
Definition parser.inl:150
source_position position
Definition parser.inl:144
char32_t value
Definition parser.inl:141
size_t count
Definition parser.inl:143
char bytes[4]
Definition parser.inl:142
Definition parser.inl:208
size_t current
Definition parser.inl:210
size_t count
Definition parser.inl:211
Definition parser.inl:203
size_t count
Definition parser.inl:205
Definition parser.inl:155
virtual TOML_NODISCARD optional< parse_error > && error() noexcept=0
virtual TOML_NODISCARD const source_path_ptr & source_path() const noexcept=0
virtual ~utf8_reader_interface() noexcept=default
virtual TOML_NODISCARD bool peek_eof() const noexcept(!TOML_COMPILER_HAS_EXCEPTIONS)=0
virtual TOML_NODISCARD const utf8_codepoint * read_next() noexcept(!TOML_COMPILER_HAS_EXCEPTIONS)=0