NeBuild dev
Loading...
Searching...
No Matches
formatter.inl
Go to the documentation of this file.
1// # This file is a part of toml++ and is subject to the the terms of the MIT license.
2// # Copyright (c) Mark Gillard <mark.gillard@outlook.com.au>
3// # See https://github.com/marzer/tomlplusplus/blob/master/LICENSE for the full license text.
4// SPDX-License-Identifier: MIT
5#pragma once
6
7#include "preprocessor.hpp"
8// # {{
9#if !TOML_IMPLEMENTATION
10#error This is an implementation-only header.
11#endif
12// # }}
13#if TOML_ENABLE_FORMATTERS
14
15#include "array.hpp"
16#include "formatter.hpp"
17#include "header_start.hpp"
18#include "parse_result.hpp"
19#include "print_to_stream.hpp"
20#include "table.hpp"
21#include "unicode.hpp"
22#include "value.hpp"
23
25 enum class TOML_CLOSED_FLAGS_ENUM formatted_string_traits : unsigned {
26 none,
27 line_breaks = 1u << 0, // \n
28 tabs = 1u << 1, // \t
29 control_chars = 1u << 2, // also includes non-ascii vertical whitespace
30 single_quotes = 1u << 3,
31 non_bare = 1u << 4, // anything not satisfying "is bare key character"
32 non_ascii = 1u << 5, // any codepoint >= 128
33
34 all = (non_ascii << 1u) - 1u
35 };
36 TOML_MAKE_FLAGS(formatted_string_traits);
37
39 formatter::formatter(const node* source_node, const parse_result* source_pr,
40 const formatter_constants& constants,
41 const formatter_config& config) noexcept //
42#if TOML_ENABLE_PARSER && !TOML_EXCEPTIONS
43 : source_{source_pr && *source_pr ? &source_pr->table() : source_node},
44 result_{source_pr},
45#else
46 : source_{source_pr ? source_pr : source_node},
47#endif
48 constants_{&constants},
49 config_{config} {
50 TOML_ASSERT_ASSUME(source_);
51
52 config_.flags = (config_.flags | constants_->mandatory_flags) & ~constants_->ignored_flags;
53
54 indent_columns_ = {};
55 for (auto c : config_.indent) indent_columns_ += c == '\t' ? 4u : 1u;
56
57 int_format_mask_ =
58 config_.flags & (format_flags::allow_binary_integers | format_flags::allow_octal_integers |
59 format_flags::allow_hexadecimal_integers);
60 }
61
63 void formatter::attach(std::ostream & stream) noexcept {
64 indent_ = {};
65 naked_newline_ = true;
66 stream_ = &stream;
67 }
68
70 void formatter::detach() noexcept {
71 stream_ = nullptr;
72 }
73
75 void formatter::print_newline(bool force) {
76 if (!naked_newline_ || force) {
77 print_to_stream(*stream_, '\n');
78 naked_newline_ = true;
79 }
80 }
81
83 void formatter::print_indent() {
84 for (int i = 0; i < indent_; i++) {
85 print_to_stream(*stream_, config_.indent);
86 naked_newline_ = false;
87 }
88 }
89
91 void formatter::print_unformatted(char c) {
92 print_to_stream(*stream_, c);
93 naked_newline_ = false;
94 }
95
97 void formatter::print_unformatted(std::string_view str) {
98 print_to_stream(*stream_, str);
99 naked_newline_ = false;
100 }
101
103 void formatter::print_string(std::string_view str, bool allow_multi_line, bool allow_bare,
104 bool allow_literal_whitespace) {
105 if (str.empty()) {
106 print_unformatted(literal_strings_allowed() ? "''"sv : "\"\""sv);
107 return;
108 }
109
110 // pre-scan the string to determine how we should output it
111 formatted_string_traits traits = {};
112
113 if (!allow_bare) traits |= formatted_string_traits::non_bare;
114 bool unicode_allowed = unicode_strings_allowed();
115
116 // ascii fast path
117 if (is_ascii(str.data(), str.length())) {
118 for (auto c : str) {
119 switch (c) {
120 case '\n':
121 traits |= formatted_string_traits::line_breaks;
122 break;
123 case '\t':
124 traits |= formatted_string_traits::tabs;
125 break;
126 case '\'':
127 traits |= formatted_string_traits::single_quotes;
128 break;
129 default: {
130 if TOML_UNLIKELY (is_control_character(c))
131 traits |= formatted_string_traits::control_chars;
132
133 if (!is_ascii_bare_key_character(static_cast<char32_t>(c)))
134 traits |= formatted_string_traits::non_bare;
135 break;
136 }
137 }
138
139 static constexpr auto all_ascii_traits =
140 formatted_string_traits::all & ~formatted_string_traits::non_ascii;
141 if (traits == all_ascii_traits) break;
142 }
143 }
144
145 // unicode slow path
146 else {
147 traits |= formatted_string_traits::non_ascii;
148 utf8_decoder decoder;
149
150 // if the unicode is malformed just treat the string as a single-line non-literal and
151 // escape all non-ascii characters (to ensure round-tripping and help with diagnostics)
152 const auto bad_unicode = [&]() noexcept {
153 traits &= ~formatted_string_traits::line_breaks;
154 traits |= formatted_string_traits::control_chars | formatted_string_traits::non_bare;
155 unicode_allowed = false;
156 };
157
158 for (auto c : str) {
159 decoder(c);
160
161 if TOML_UNLIKELY (decoder.error()) {
162 bad_unicode();
163 break;
164 }
165
166 if (!decoder.has_code_point()) continue;
167
168 switch (decoder.codepoint) {
169 case U'\n':
170 traits |= formatted_string_traits::line_breaks;
171 break;
172 case U'\t':
173 traits |= formatted_string_traits::tabs;
174 break;
175 case U'\'':
176 traits |= formatted_string_traits::single_quotes;
177 break;
178 default: {
179 if TOML_UNLIKELY (is_control_character(decoder.codepoint) ||
180 is_non_ascii_vertical_whitespace(decoder.codepoint))
181 traits |= formatted_string_traits::control_chars;
182
183 if (!is_bare_key_character(decoder.codepoint))
184 traits |= formatted_string_traits::non_bare;
185 break;
186 }
187 }
188 }
189
190 if (decoder.needs_more_input()) bad_unicode();
191 }
192
193 // strings with line breaks, tabs, and single-quotes can't be bare
194 if (!!(traits & (formatted_string_traits::line_breaks | formatted_string_traits::tabs |
195 formatted_string_traits::single_quotes)))
196 traits |= formatted_string_traits::non_bare;
197
198 // if the string meets the requirements of being 'bare' we can emit a bare string
199 // (bare strings are composed of letters and numbers; no whitespace, control chars, quotes, etc)
200 if (!(traits & formatted_string_traits::non_bare) &&
201 (!(traits & formatted_string_traits::non_ascii) || unicode_allowed)) {
202 print_unformatted(str);
203 return;
204 }
205 const auto real_tabs_allowed = allow_literal_whitespace && real_tabs_in_strings_allowed();
206
207 // determine if this should be a multi-line string (triple-quotes)
208 const auto multi_line = allow_literal_whitespace //
209 && allow_multi_line //
210 && multi_line_strings_allowed() //
211 && !!(traits & formatted_string_traits::line_breaks);
212
213 // determine if this should be a literal string (single-quotes with no escaping)
214 const auto literal = literal_strings_allowed() //
215 && !(traits & formatted_string_traits::control_chars) //
216 && (!(traits & formatted_string_traits::single_quotes) || multi_line) //
217 && (!(traits & formatted_string_traits::tabs) || real_tabs_allowed) //
218 && (!(traits & formatted_string_traits::line_breaks) || multi_line) //
219 && (!(traits & formatted_string_traits::non_ascii) || unicode_allowed);
220
221 // literal strings (single quotes, no escape codes)
222 if (literal) {
223 const auto quot = multi_line ? R"(''')"sv : R"(')"sv;
224 print_unformatted(quot);
225 print_unformatted(str);
226 print_unformatted(quot);
227 return;
228 }
229
230 // anything from here down is a non-literal string, so requires iteration and escaping.
231 print_unformatted(multi_line ? R"(""")"sv : R"(")"sv);
232
233 // ascii fast path
234 if (!(traits & formatted_string_traits::non_ascii)) {
235 for (auto c : str) {
236 switch (c) {
237 case '"':
238 print_to_stream(*stream_, R"(\")"sv);
239 break;
240 case '\\':
241 print_to_stream(*stream_, R"(\\)"sv);
242 break;
243 case '\x7F':
244 print_to_stream(*stream_, R"(\u007F)"sv);
245 break;
246 case '\t':
247 print_to_stream(*stream_, real_tabs_allowed ? "\t"sv : R"(\t)"sv);
248 break;
249 case '\n':
250 print_to_stream(*stream_, multi_line ? "\n"sv : R"(\n)"sv);
251 break;
252 default: {
253 // control characters from lookup table
254 if TOML_UNLIKELY (c >= '\x00' && c <= '\x1F')
255 print_to_stream(*stream_, control_char_escapes[c]);
256
257 // regular characters
258 else
259 print_to_stream(*stream_, c);
260 }
261 }
262 }
263 }
264
265 // unicode slow path
266 else {
267 utf8_decoder decoder;
268 const char* cp_start = str.data();
269 const char* cp_end = cp_start;
270 for (auto c : str) {
271 decoder(c);
272 cp_end++;
273
274 // if the decoder encounters malformed unicode just emit raw bytes and
275 if (decoder.error()) {
276 while (cp_start != cp_end) {
277 print_to_stream(*stream_, R"(\u00)"sv);
278 print_to_stream(*stream_, static_cast<uint8_t>(*cp_start),
279 value_flags::format_as_hexadecimal, 2);
280 cp_start++;
281 }
282 decoder.reset();
283 continue;
284 }
285
286 if (!decoder.has_code_point()) continue;
287
288 switch (decoder.codepoint) {
289 case U'"':
290 print_to_stream(*stream_, R"(\")"sv);
291 break;
292 case U'\\':
293 print_to_stream(*stream_, R"(\\)"sv);
294 break;
295 case U'\x7F':
296 print_to_stream(*stream_, R"(\u007F)"sv);
297 break;
298 case U'\t':
299 print_to_stream(*stream_, real_tabs_allowed ? "\t"sv : R"(\t)"sv);
300 break;
301 case U'\n':
302 print_to_stream(*stream_, multi_line ? "\n"sv : R"(\n)"sv);
303 break;
304 default: {
305 // control characters from lookup table
306 if TOML_UNLIKELY (decoder.codepoint <= U'\x1F')
307 print_to_stream(*stream_,
308 control_char_escapes[static_cast<uint_least32_t>(decoder.codepoint)]);
309
310 // escaped unicode characters
311 else if (decoder.codepoint > U'\x7F' &&
312 (!unicode_allowed || is_non_ascii_vertical_whitespace(decoder.codepoint))) {
313 if (static_cast<uint_least32_t>(decoder.codepoint) > 0xFFFFu) {
314 print_to_stream(*stream_, R"(\U)"sv);
315 print_to_stream(*stream_, static_cast<uint_least32_t>(decoder.codepoint),
316 value_flags::format_as_hexadecimal, 8);
317 } else {
318 print_to_stream(*stream_, R"(\u)"sv);
319 print_to_stream(*stream_, static_cast<uint_least32_t>(decoder.codepoint),
320 value_flags::format_as_hexadecimal, 4);
321 }
322 }
323
324 // regular characters
325 else
326 print_to_stream(*stream_, cp_start, static_cast<size_t>(cp_end - cp_start));
327 }
328 }
329
330 cp_start = cp_end;
331 }
332 }
333
334 print_unformatted(multi_line ? R"(""")"sv : R"(")"sv);
335 }
336
338 void formatter::print(const value<std::string>& val) {
339 print_string(val.get());
340 }
341
343 void formatter::print(const value<int64_t>& val) {
344 naked_newline_ = false;
345
346 if (*val >= 0 && !!int_format_mask_) {
347 static constexpr auto value_flags_mask = value_flags::format_as_binary |
348 value_flags::format_as_octal |
349 value_flags::format_as_hexadecimal;
350
351 const auto fmt = val.flags() & value_flags_mask;
352 switch (fmt) {
353 case value_flags::format_as_binary:
354 if (!!(int_format_mask_ & format_flags::allow_binary_integers)) {
355 print_to_stream(*stream_, "0b"sv);
356 print_to_stream(*stream_, *val, fmt);
357 return;
358 }
359 break;
360
361 case value_flags::format_as_octal:
362 if (!!(int_format_mask_ & format_flags::allow_octal_integers)) {
363 print_to_stream(*stream_, "0o"sv);
364 print_to_stream(*stream_, *val, fmt);
365 return;
366 }
367 break;
368
369 case value_flags::format_as_hexadecimal:
370 if (!!(int_format_mask_ & format_flags::allow_hexadecimal_integers)) {
371 print_to_stream(*stream_, "0x"sv);
372 print_to_stream(*stream_, *val, fmt);
373 return;
374 }
375 break;
376
377 default:
378 break;
379 }
380 }
381
382 // fallback to decimal
383 print_to_stream(*stream_, *val);
384 }
385
387 void formatter::print(const value<double>& val) {
388 const std::string_view* inf_nan = nullptr;
389 switch (fpclassify(*val)) {
390 case fp_class::neg_inf:
391 inf_nan = &constants_->float_neg_inf;
392 break;
393 case fp_class::pos_inf:
394 inf_nan = &constants_->float_pos_inf;
395 break;
396 case fp_class::nan:
397 inf_nan = &constants_->float_nan;
398 break;
399 case fp_class::ok:
400 print_to_stream(*stream_, *val, value_flags::none,
401 !!(config_.flags & format_flags::relaxed_float_precision));
402 break;
403 default:
405 }
406
407 if (inf_nan) {
408 if (!!(config_.flags & format_flags::quote_infinities_and_nans))
409 print_to_stream_bookended(*stream_, *inf_nan, '"');
410 else
411 print_to_stream(*stream_, *inf_nan);
412 }
413
414 naked_newline_ = false;
415 }
416
418 void formatter::print(const value<bool>& val) {
419 print_unformatted(*val ? constants_->bool_true : constants_->bool_false);
420 }
421
423 void formatter::print(const value<date>& val) {
424 if (!!(config_.flags & format_flags::quote_dates_and_times))
425 print_to_stream_bookended(*stream_, *val, literal_strings_allowed() ? '\'' : '"');
426 else
427 print_to_stream(*stream_, *val);
428 naked_newline_ = false;
429 }
430
432 void formatter::print(const value<time>& val) {
433 if (!!(config_.flags & format_flags::quote_dates_and_times))
434 print_to_stream_bookended(*stream_, *val, literal_strings_allowed() ? '\'' : '"');
435 else
436 print_to_stream(*stream_, *val);
437 naked_newline_ = false;
438 }
439
441 void formatter::print(const value<date_time>& val) {
442 if (!!(config_.flags & format_flags::quote_dates_and_times))
443 print_to_stream_bookended(*stream_, *val, literal_strings_allowed() ? '\'' : '"');
444 else
445 print_to_stream(*stream_, *val);
446 naked_newline_ = false;
447 }
448
450 void formatter::print_value(const node& val_node, node_type type) {
451 TOML_ASSUME(type > node_type::array);
452 switch (type) {
453 case node_type::string:
454 print(*reinterpret_cast<const value<std::string>*>(&val_node));
455 break;
456 case node_type::integer:
457 print(*reinterpret_cast<const value<int64_t>*>(&val_node));
458 break;
459 case node_type::floating_point:
460 print(*reinterpret_cast<const value<double>*>(&val_node));
461 break;
462 case node_type::boolean:
463 print(*reinterpret_cast<const value<bool>*>(&val_node));
464 break;
465 case node_type::date:
466 print(*reinterpret_cast<const value<date>*>(&val_node));
467 break;
468 case node_type::time:
469 print(*reinterpret_cast<const value<time>*>(&val_node));
470 break;
471 case node_type::date_time:
472 print(*reinterpret_cast<const value<date_time>*>(&val_node));
473 break;
474 default:
476 }
477 }
478
479#if TOML_ENABLE_PARSER && !TOML_EXCEPTIONS
480
482 bool formatter::dump_failed_parse_result() {
483 if (result_ && !(*result_)) {
484 stream() << result_->error();
485 return true;
486 }
487 return false;
488 }
489
490#else
491
493 TOML_ATTR(const)
494 bool formatter::dump_failed_parse_result() {
495 return false;
496 }
497
498#endif
499}
501
502#include "header_end.hpp"
503#endif // TOML_ENABLE_FORMATTERS
The result of a parsing operation.
Definition parse_result.hpp:53
TOML_IMPL_NAMESPACE_END
Definition formatter.inl:500
TOML_IMPL_NAMESPACE_START
Definition formatter.inl:24
#define TOML_ASSERT_ASSUME(expr)
Definition preprocessor.hpp:1190
#define TOML_UNLIKELY(...)
Definition preprocessor.hpp:538
#define TOML_EXTERNAL_LINKAGE
Definition preprocessor.hpp:1339
#define TOML_UNREACHABLE
Definition preprocessor.hpp:515
#define TOML_CLOSED_FLAGS_ENUM
Definition preprocessor.hpp:562
#define TOML_ATTR(...)
Definition preprocessor.hpp:316
#define TOML_MAKE_FLAGS(T)
Definition preprocessor.hpp:601
#define TOML_ASSUME(expr)
Definition preprocessor.hpp:506
TOML_EXPORTED_FREE_FUNCTION void TOML_CALLCONV print_to_stream(std::ostream &, std::string_view)
void print_to_stream_bookended(std::ostream &stream, const T &val, const U &bookend)
Definition print_to_stream.hpp:120