1/*
2 * Copyright (c) Facebook, Inc. and its affiliates.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include <folly/json.h>
18
19#include <algorithm>
20#include <functional>
21#include <iterator>
22#include <type_traits>
23
24#include <boost/algorithm/string.hpp>
25#include <glog/logging.h>
26
27#include <folly/Conv.h>
28#include <folly/Portability.h>
29#include <folly/Range.h>
30#include <folly/String.h>
31#include <folly/Unicode.h>
32#include <folly/lang/Bits.h>
33#include <folly/portability/Constexpr.h>
34
35namespace folly {
36
37//////////////////////////////////////////////////////////////////////
38
39namespace json {
40
41namespace {
42
43parse_error make_parse_error(
44 unsigned int line,
45 std::string const& context,
46 std::string const& expected) {
47 return parse_error(to<std::string>(
48 "json parse error on line ",
49 line,
50 !context.empty() ? to<std::string>(" near `", context, '\'') : "",
51 ": ",
52 expected));
53}
54
55struct Printer {
56 explicit Printer(
57 std::string& out,
58 unsigned* indentLevel,
59 serialization_opts const* opts)
60 : out_(out), indentLevel_(indentLevel), opts_(*opts) {}
61
62 void operator()(dynamic const& v) const {
63 switch (v.type()) {
64 case dynamic::DOUBLE:
65 if (!opts_.allow_nan_inf &&
66 (std::isnan(v.asDouble()) || std::isinf(v.asDouble()))) {
67 throw json::parse_error(
68 "folly::toJson: JSON object value was a "
69 "NaN or INF");
70 }
71 toAppend(
72 v.asDouble(), &out_, opts_.double_mode, opts_.double_num_digits);
73 break;
74 case dynamic::INT64: {
75 auto intval = v.asInt();
76 if (opts_.javascript_safe) {
77 // Use folly::to to check that this integer can be represented
78 // as a double without loss of precision.
79 intval = int64_t(to<double>(intval));
80 }
81 toAppend(intval, &out_);
82 break;
83 }
84 case dynamic::BOOL:
85 out_ += v.asBool() ? "true" : "false";
86 break;
87 case dynamic::NULLT:
88 out_ += "null";
89 break;
90 case dynamic::STRING:
91 escapeString(v.asString(), out_, opts_);
92 break;
93 case dynamic::OBJECT:
94 printObject(v);
95 break;
96 case dynamic::ARRAY:
97 printArray(v);
98 break;
99 default:
100 CHECK(0) << "Bad type " << v.type();
101 }
102 }
103
104 private:
105 void printKV(const std::pair<const dynamic, dynamic>& p) const {
106 if (!opts_.allow_non_string_keys && !p.first.isString()) {
107 throw json::parse_error(
108 "folly::toJson: JSON object key was not a "
109 "string");
110 }
111 (*this)(p.first);
112 mapColon();
113 (*this)(p.second);
114 }
115
116 template <typename Iterator>
117 void printKVPairs(Iterator begin, Iterator end) const {
118 printKV(*begin);
119 for (++begin; begin != end; ++begin) {
120 out_ += ',';
121 newline();
122 printKV(*begin);
123 }
124 }
125
126 void printObject(dynamic const& o) const {
127 if (o.empty()) {
128 out_ += "{}";
129 return;
130 }
131
132 out_ += '{';
133 indent();
134 newline();
135 if (opts_.sort_keys || opts_.sort_keys_by) {
136 using ref = std::reference_wrapper<decltype(o.items())::value_type const>;
137 auto sort_keys_by = [&](auto begin, auto end, const auto& comp) {
138 std::sort(begin, end, [&](ref a, ref b) {
139 // Only compare keys. No ordering among identical keys.
140 return comp(a.get().first, b.get().first);
141 });
142 };
143 std::vector<ref> refs(o.items().begin(), o.items().end());
144 if (opts_.sort_keys_by) {
145 sort_keys_by(refs.begin(), refs.end(), opts_.sort_keys_by);
146 } else {
147 sort_keys_by(refs.begin(), refs.end(), std::less<>());
148 }
149 printKVPairs(refs.cbegin(), refs.cend());
150 } else {
151 printKVPairs(o.items().begin(), o.items().end());
152 }
153 outdent();
154 newline();
155 out_ += '}';
156 }
157
158 void printArray(dynamic const& a) const {
159 if (a.empty()) {
160 out_ += "[]";
161 return;
162 }
163
164 out_ += '[';
165 indent();
166 newline();
167 (*this)(a[0]);
168 for (auto& val : range(std::next(a.begin()), a.end())) {
169 out_ += ',';
170 newline();
171 (*this)(val);
172 }
173 outdent();
174 newline();
175 out_ += ']';
176 }
177
178 private:
179 void outdent() const {
180 if (indentLevel_) {
181 --*indentLevel_;
182 }
183 }
184
185 void indent() const {
186 if (indentLevel_) {
187 ++*indentLevel_;
188 }
189 }
190
191 void newline() const {
192 if (indentLevel_) {
193 out_ += to<std::string>('\n', std::string(*indentLevel_ * 2, ' '));
194 }
195 }
196
197 void mapColon() const {
198 out_ += indentLevel_ ? ": " : ":";
199 }
200
201 private:
202 std::string& out_;
203 unsigned* const indentLevel_;
204 serialization_opts const& opts_;
205};
206
207//////////////////////////////////////////////////////////////////////
208
209// Wraps our input buffer with some helper functions.
210struct Input {
211 explicit Input(StringPiece range, json::serialization_opts const* opts)
212 : range_(range), opts_(*opts), lineNum_(0) {
213 storeCurrent();
214 }
215
216 Input(Input const&) = delete;
217 Input& operator=(Input const&) = delete;
218
219 char const* begin() const {
220 return range_.begin();
221 }
222
223 unsigned getLineNum() const {
224 return lineNum_;
225 }
226
227 // Parse ahead for as long as the supplied predicate is satisfied,
228 // returning a range of what was skipped.
229 template <class Predicate>
230 StringPiece skipWhile(const Predicate& p) {
231 std::size_t skipped = 0;
232 for (; skipped < range_.size(); ++skipped) {
233 if (!p(range_[skipped])) {
234 break;
235 }
236 if (range_[skipped] == '\n') {
237 ++lineNum_;
238 }
239 }
240 auto ret = range_.subpiece(0, skipped);
241 range_.advance(skipped);
242 storeCurrent();
243 return ret;
244 }
245
246 StringPiece skipDigits() {
247 return skipWhile([](char c) { return c >= '0' && c <= '9'; });
248 }
249
250 StringPiece skipMinusAndDigits() {
251 bool firstChar = true;
252 return skipWhile([&firstChar](char c) {
253 bool result = (c >= '0' && c <= '9') || (firstChar && c == '-');
254 firstChar = false;
255 return result;
256 });
257 }
258
259 void skipWhitespace() {
260 unsigned index = 0;
261 while (true) {
262 while (index < range_.size() && range_[index] == ' ') {
263 index++;
264 }
265 if (index < range_.size()) {
266 if (range_[index] == '\n') {
267 index++;
268 ++lineNum_;
269 continue;
270 }
271 if (range_[index] == '\t' || range_[index] == '\r') {
272 index++;
273 continue;
274 }
275 }
276 break;
277 }
278 range_.advance(index);
279 storeCurrent();
280 }
281
282 void expect(char c) {
283 if (**this != c) {
284 throw json::make_parse_error(
285 lineNum_, context(), to<std::string>("expected '", c, '\''));
286 }
287 ++*this;
288 }
289
290 std::size_t size() const {
291 return range_.size();
292 }
293
294 int operator*() const {
295 return current_;
296 }
297
298 void operator++() {
299 range_.pop_front();
300 storeCurrent();
301 }
302
303 template <class T>
304 T extract() {
305 try {
306 return to<T>(&range_);
307 } catch (std::exception const& e) {
308 error(e.what());
309 }
310 }
311
312 bool consume(StringPiece str) {
313 if (boost::starts_with(range_, str)) {
314 range_.advance(str.size());
315 storeCurrent();
316 return true;
317 }
318 return false;
319 }
320
321 std::string context() const {
322 return range_.subpiece(0, 16 /* arbitrary */).toString();
323 }
324
325 dynamic error(char const* what) const {
326 throw json::make_parse_error(lineNum_, context(), what);
327 }
328
329 json::serialization_opts const& getOpts() {
330 return opts_;
331 }
332
333 void incrementRecursionLevel() {
334 if (currentRecursionLevel_ > opts_.recursion_limit) {
335 error("recursion limit exceeded");
336 }
337 currentRecursionLevel_++;
338 }
339
340 void decrementRecursionLevel() {
341 currentRecursionLevel_--;
342 }
343
344 private:
345 void storeCurrent() {
346 current_ = range_.empty() ? EOF : range_.front();
347 }
348
349 private:
350 StringPiece range_;
351 json::serialization_opts const& opts_;
352 unsigned lineNum_;
353 int current_;
354 unsigned int currentRecursionLevel_{0};
355};
356
357class RecursionGuard {
358 public:
359 explicit RecursionGuard(Input& in) : in_(in) {
360 in_.incrementRecursionLevel();
361 }
362
363 ~RecursionGuard() {
364 in_.decrementRecursionLevel();
365 }
366
367 private:
368 Input& in_;
369};
370
371dynamic parseValue(Input& in, json::metadata_map* map);
372std::string parseString(Input& in);
373dynamic parseNumber(Input& in);
374
375template <class K>
376void parseObjectKeyValue(
377 Input& in,
378 dynamic& ret,
379 K&& key,
380 json::metadata_map* map) {
381 auto keyLineNumber = in.getLineNum();
382 in.skipWhitespace();
383 in.expect(':');
384 in.skipWhitespace();
385 K tmp;
386 if (map) {
387 tmp = K(key);
388 }
389 auto valueLineNumber = in.getLineNum();
390 ret.insert(std::forward<K>(key), parseValue(in, map));
391 if (map) {
392 auto val = ret.get_ptr(tmp);
393 // We just inserted it, so it should be there!
394 DCHECK(val != nullptr);
395 map->emplace(
396 val, json::parse_metadata{{{keyLineNumber}}, {{valueLineNumber}}});
397 }
398}
399
400dynamic parseObject(Input& in, json::metadata_map* map) {
401 DCHECK_EQ(*in, '{');
402 ++in;
403
404 dynamic ret = dynamic::object;
405
406 in.skipWhitespace();
407 if (*in == '}') {
408 ++in;
409 return ret;
410 }
411
412 for (;;) {
413 if (in.getOpts().allow_trailing_comma && *in == '}') {
414 break;
415 }
416 if (*in == '\"') { // string
417 auto key = parseString(in);
418 parseObjectKeyValue(in, ret, std::move(key), map);
419 } else if (!in.getOpts().allow_non_string_keys) {
420 in.error("expected string for object key name");
421 } else {
422 auto key = parseValue(in, map);
423 parseObjectKeyValue(in, ret, std::move(key), map);
424 }
425
426 in.skipWhitespace();
427 if (*in != ',') {
428 break;
429 }
430 ++in;
431 in.skipWhitespace();
432 }
433 in.expect('}');
434
435 return ret;
436}
437
438dynamic parseArray(Input& in, json::metadata_map* map) {
439 DCHECK_EQ(*in, '[');
440 ++in;
441
442 dynamic ret = dynamic::array;
443
444 in.skipWhitespace();
445 if (*in == ']') {
446 ++in;
447 return ret;
448 }
449
450 std::vector<uint32_t> lineNumbers;
451 for (;;) {
452 if (in.getOpts().allow_trailing_comma && *in == ']') {
453 break;
454 }
455 ret.push_back(parseValue(in, map));
456 if (map) {
457 lineNumbers.push_back(in.getLineNum());
458 }
459 in.skipWhitespace();
460 if (*in != ',') {
461 break;
462 }
463 ++in;
464 in.skipWhitespace();
465 }
466 if (map) {
467 for (size_t i = 0; i < ret.size(); i++) {
468 map->emplace(&ret[i], json::parse_metadata{{{0}}, {{lineNumbers[i]}}});
469 }
470 }
471 in.expect(']');
472
473 return ret;
474}
475
476dynamic parseNumber(Input& in) {
477 bool const negative = (*in == '-');
478 if (negative && in.consume("-Infinity")) {
479 if (in.getOpts().parse_numbers_as_strings) {
480 return "-Infinity";
481 } else {
482 return -std::numeric_limits<double>::infinity();
483 }
484 }
485
486 auto integral = in.skipMinusAndDigits();
487 if (negative && integral.size() < 2) {
488 in.error("expected digits after `-'");
489 }
490
491 auto const wasE = *in == 'e' || *in == 'E';
492
493 constexpr const char* maxInt = "9223372036854775807";
494 constexpr const char* minInt = "-9223372036854775808";
495 constexpr auto maxIntLen = constexpr_strlen(maxInt);
496 constexpr auto minIntLen = constexpr_strlen(minInt);
497
498 if (*in != '.' && !wasE && in.getOpts().parse_numbers_as_strings) {
499 return integral;
500 }
501
502 if (*in != '.' && !wasE) {
503 if (LIKELY(!in.getOpts().double_fallback || integral.size() < maxIntLen) ||
504 (!negative && integral.size() == maxIntLen && integral <= maxInt) ||
505 (negative && integral.size() == minIntLen && integral <= minInt)) {
506 auto val = to<int64_t>(integral);
507 in.skipWhitespace();
508 return val;
509 } else {
510 auto val = to<double>(integral);
511 in.skipWhitespace();
512 return val;
513 }
514 }
515
516 auto end = !wasE ? (++in, in.skipDigits().end()) : in.begin();
517 if (*in == 'e' || *in == 'E') {
518 ++in;
519 if (*in == '+' || *in == '-') {
520 ++in;
521 }
522 auto expPart = in.skipDigits();
523 end = expPart.end();
524 }
525 auto fullNum = range(integral.begin(), end);
526 if (in.getOpts().parse_numbers_as_strings) {
527 return fullNum;
528 }
529 auto val = to<double>(fullNum);
530 return val;
531}
532
533std::string decodeUnicodeEscape(Input& in) {
534 auto hexVal = [&](int c) -> uint16_t {
535 // clang-format off
536 return uint16_t(
537 c >= '0' && c <= '9' ? c - '0' :
538 c >= 'a' && c <= 'f' ? c - 'a' + 10 :
539 c >= 'A' && c <= 'F' ? c - 'A' + 10 :
540 (in.error("invalid hex digit"), 0));
541 // clang-format on
542 };
543
544 auto readHex = [&]() -> uint16_t {
545 if (in.size() < 4) {
546 in.error("expected 4 hex digits");
547 }
548
549 auto ret = uint16_t(hexVal(*in) * 4096);
550 ++in;
551 ret += hexVal(*in) * 256;
552 ++in;
553 ret += hexVal(*in) * 16;
554 ++in;
555 ret += hexVal(*in);
556 ++in;
557 return ret;
558 };
559
560 /*
561 * If the value encoded is in the surrogate pair range, we need to
562 * make sure there is another escape that we can use also.
563 */
564 uint32_t codePoint = readHex();
565 if (codePoint >= 0xd800 && codePoint <= 0xdbff) {
566 if (!in.consume("\\u")) {
567 in.error(
568 "expected another unicode escape for second half of "
569 "surrogate pair");
570 }
571 uint16_t second = readHex();
572 if (second >= 0xdc00 && second <= 0xdfff) {
573 codePoint = 0x10000 + ((codePoint & 0x3ff) << 10) + (second & 0x3ff);
574 } else {
575 in.error("second character in surrogate pair is invalid");
576 }
577 } else if (codePoint >= 0xdc00 && codePoint <= 0xdfff) {
578 in.error("invalid unicode code point (in range [0xdc00,0xdfff])");
579 }
580
581 return codePointToUtf8(codePoint);
582}
583
584std::string parseString(Input& in) {
585 DCHECK_EQ(*in, '\"');
586 ++in;
587
588 std::string ret;
589 for (;;) {
590 auto range = in.skipWhile([](char c) { return c != '\"' && c != '\\'; });
591 ret.append(range.begin(), range.end());
592
593 if (*in == '\"') {
594 ++in;
595 break;
596 }
597 if (*in == '\\') {
598 ++in;
599 switch (*in) {
600 // clang-format off
601 case '\"': ret.push_back('\"'); ++in; break;
602 case '\\': ret.push_back('\\'); ++in; break;
603 case '/': ret.push_back('/'); ++in; break;
604 case 'b': ret.push_back('\b'); ++in; break;
605 case 'f': ret.push_back('\f'); ++in; break;
606 case 'n': ret.push_back('\n'); ++in; break;
607 case 'r': ret.push_back('\r'); ++in; break;
608 case 't': ret.push_back('\t'); ++in; break;
609 case 'u': ++in; ret += decodeUnicodeEscape(in); break;
610 // clang-format on
611 default:
612 in.error(
613 to<std::string>("unknown escape ", *in, " in string").c_str());
614 }
615 continue;
616 }
617 if (*in == EOF) {
618 in.error("unterminated string");
619 }
620 if (!*in) {
621 /*
622 * Apparently we're actually supposed to ban all control
623 * characters from strings. This seems unnecessarily
624 * restrictive, so we're only banning zero bytes. (Since the
625 * string is presumed to be UTF-8 encoded it's fine to just
626 * check this way.)
627 */
628 in.error("null byte in string");
629 }
630
631 ret.push_back(char(*in));
632 ++in;
633 }
634
635 return ret;
636}
637
638dynamic parseValue(Input& in, json::metadata_map* map) {
639 RecursionGuard guard(in);
640
641 in.skipWhitespace();
642 // clang-format off
643 return
644 *in == '[' ? parseArray(in, map) :
645 *in == '{' ? parseObject(in, map) :
646 *in == '\"' ? parseString(in) :
647 (*in == '-' || (*in >= '0' && *in <= '9')) ? parseNumber(in) :
648 in.consume("true") ? true :
649 in.consume("false") ? false :
650 in.consume("null") ? nullptr :
651 in.consume("Infinity") ?
652 (in.getOpts().parse_numbers_as_strings ? (dynamic)"Infinity" :
653 (dynamic)std::numeric_limits<double>::infinity()) :
654 in.consume("NaN") ?
655 (in.getOpts().parse_numbers_as_strings ? (dynamic)"NaN" :
656 (dynamic)std::numeric_limits<double>::quiet_NaN()) :
657 in.error("expected json value");
658 // clang-format on
659}
660
661} // namespace
662
663//////////////////////////////////////////////////////////////////////
664
665std::array<uint64_t, 2> buildExtraAsciiToEscapeBitmap(StringPiece chars) {
666 std::array<uint64_t, 2> escapes{{0, 0}};
667 for (auto b : ByteRange(chars)) {
668 if (b >= 0x20 && b < 0x80) {
669 escapes[b / 64] |= uint64_t(1) << (b % 64);
670 }
671 }
672 return escapes;
673}
674
675std::string serialize(dynamic const& dyn, serialization_opts const& opts) {
676 std::string ret;
677 unsigned indentLevel = 0;
678 Printer p(ret, opts.pretty_formatting ? &indentLevel : nullptr, &opts);
679 p(dyn);
680 return ret;
681}
682
683// Fast path to determine the longest prefix that can be left
684// unescaped in a string of sizeof(T) bytes packed in an integer of
685// type T.
686template <bool EnableExtraAsciiEscapes, class T>
687size_t firstEscapableInWord(T s, const serialization_opts& opts) {
688 static_assert(std::is_unsigned<T>::value, "Unsigned integer required");
689 static constexpr T kOnes = ~T() / 255; // 0x...0101
690 static constexpr T kMsbs = kOnes * 0x80; // 0x...8080
691
692 // Sets the MSB of bytes < b. Precondition: b < 128.
693 auto isLess = [](T w, uint8_t b) {
694 // A byte is < b iff subtracting b underflows, so we check that
695 // the MSB wasn't set before and it's set after the subtraction.
696 return (w - kOnes * b) & ~w & kMsbs;
697 };
698
699 auto isChar = [&](uint8_t c) {
700 // A byte is == c iff it is 0 if xored with c.
701 return isLess(s ^ (kOnes * c), 1);
702 };
703
704 // The following masks have the MSB set for each byte of the word
705 // that satisfies the corresponding condition.
706 auto isHigh = s & kMsbs; // >= 128
707 auto isLow = isLess(s, 0x20); // <= 0x1f
708 auto needsEscape = isHigh | isLow | isChar('\\') | isChar('"');
709
710 if /* constexpr */ (EnableExtraAsciiEscapes) {
711 // Deal with optional bitmap for unicode escapes. Escapes can optionally be
712 // set for ascii characters 32 - 127, so the inner loop may run up to 96
713 // times. However, for the case where 0 or a handful of bits are set,
714 // looping will be minimal through use of findFirstSet.
715 for (size_t i = 0; i < opts.extra_ascii_to_escape_bitmap.size(); ++i) {
716 const auto offset = i * 64;
717 // Clear first 32 characters if this is the first index, since those are
718 // always escaped.
719 auto bitmap = opts.extra_ascii_to_escape_bitmap[i] &
720 (i == 0 ? uint64_t(-1) << 32 : ~0UL);
721 while (bitmap) {
722 auto bit = folly::findFirstSet(bitmap);
723 needsEscape |= isChar(static_cast<uint8_t>(offset + bit - 1));
724 bitmap &= bitmap - 1;
725 }
726 }
727 }
728
729 if (!needsEscape) {
730 return sizeof(T);
731 }
732
733 if (folly::kIsLittleEndian) {
734 return folly::findFirstSet(needsEscape) / 8 - 1;
735 } else {
736 return sizeof(T) - folly::findLastSet(needsEscape) / 8;
737 }
738}
739
740// Escape a string so that it is legal to print it in JSON text.
741template <bool EnableExtraAsciiEscapes>
742void escapeStringImpl(
743 StringPiece input,
744 std::string& out,
745 const serialization_opts& opts) {
746 auto hexDigit = [](uint8_t c) -> char {
747 return c < 10 ? c + '0' : c - 10 + 'a';
748 };
749
750 out.push_back('\"');
751
752 auto* p = reinterpret_cast<const unsigned char*>(input.begin());
753 auto* q = reinterpret_cast<const unsigned char*>(input.begin());
754 auto* e = reinterpret_cast<const unsigned char*>(input.end());
755
756 while (p < e) {
757 // Find the longest prefix that does not need escaping, and copy
758 // it literally into the output string.
759 auto firstEsc = p;
760 while (firstEsc < e) {
761 auto avail = e - firstEsc;
762 uint64_t word = 0;
763 if (avail >= 8) {
764 word = folly::loadUnaligned<uint64_t>(firstEsc);
765 } else {
766 word = folly::partialLoadUnaligned<uint64_t>(firstEsc, avail);
767 }
768 auto prefix = firstEscapableInWord<EnableExtraAsciiEscapes>(word, opts);
769 DCHECK_LE(prefix, avail);
770 firstEsc += prefix;
771 if (prefix < 8) {
772 break;
773 }
774 }
775 if (firstEsc > p) {
776 out.append(reinterpret_cast<const char*>(p), firstEsc - p);
777 p = firstEsc;
778 // We can't be in the middle of a multibyte sequence, so we can reset q.
779 q = p;
780 if (p == e) {
781 break;
782 }
783 }
784
785 // Handle the next byte that may need escaping.
786
787 // Since non-ascii encoding inherently does utf8 validation
788 // we explicitly validate utf8 only if non-ascii encoding is disabled.
789 if ((opts.validate_utf8 || opts.skip_invalid_utf8) &&
790 !opts.encode_non_ascii) {
791 // To achieve better spatial and temporal coherence
792 // we do utf8 validation progressively along with the
793 // string-escaping instead of two separate passes.
794
795 // As the encoding progresses, q will stay at or ahead of p.
796 CHECK_GE(q, p);
797
798 // As p catches up with q, move q forward.
799 if (q == p) {
800 // calling utf8_decode has the side effect of
801 // checking that utf8 encodings are valid
802 char32_t v = utf8ToCodePoint(q, e, opts.skip_invalid_utf8);
803 if (opts.skip_invalid_utf8 && v == U'\ufffd') {
804 out.append(reinterpret_cast<const char*>(u8"\ufffd"));
805 p = q;
806 continue;
807 }
808 }
809 }
810
811 auto encodeUnicode = opts.encode_non_ascii && (*p & 0x80);
812 if /* constexpr */ (EnableExtraAsciiEscapes) {
813 encodeUnicode = encodeUnicode ||
814 (*p >= 0x20 && *p < 0x80 &&
815 (opts.extra_ascii_to_escape_bitmap[*p / 64] &
816 (uint64_t(1) << (*p % 64))));
817 }
818
819 if (encodeUnicode) {
820 // note that this if condition captures utf8 chars
821 // with value > 127, so size > 1 byte (or they are whitelisted for
822 // Unicode encoding).
823 // NOTE: char32_t / char16_t are both unsigned.
824 char32_t cp = utf8ToCodePoint(p, e, opts.skip_invalid_utf8);
825 auto writeHex = [&](char16_t v) {
826 char buf[] = "\\u\0\0\0\0";
827 buf[2] = hexDigit((v >> 12) & 0x0f);
828 buf[3] = hexDigit((v >> 8) & 0x0f);
829 buf[4] = hexDigit((v >> 4) & 0x0f);
830 buf[5] = hexDigit(v & 0x0f);
831 out.append(buf, 6);
832 };
833 // From the ECMA-404 The JSON Data Interchange Syntax 2nd Edition Dec 2017
834 if (cp < 0x10000u) {
835 // If the code point is in the Basic Multilingual Plane (U+0000 through
836 // U+FFFF), then it may be represented as a six-character sequence:
837 // a reverse solidus, followed by the lowercase letter u, followed by
838 // four hexadecimal digits that encode the code point.
839 writeHex(static_cast<char16_t>(cp));
840 } else {
841 // To escape a code point that is not in the Basic Multilingual Plane,
842 // the character may be represented as a twelve-character sequence,
843 // encoding the UTF-16 surrogate pair corresponding to the code point.
844 writeHex(static_cast<char16_t>(
845 0xd800u + (((cp - 0x10000u) >> 10) & 0x3ffu)));
846 writeHex(static_cast<char16_t>(0xdc00u + ((cp - 0x10000u) & 0x3ffu)));
847 }
848 } else if (*p == '\\' || *p == '\"') {
849 char buf[] = "\\\0";
850 buf[1] = char(*p++);
851 out.append(buf, 2);
852 } else if (*p <= 0x1f) {
853 switch (*p) {
854 // clang-format off
855 case '\b': out.append("\\b"); p++; break;
856 case '\f': out.append("\\f"); p++; break;
857 case '\n': out.append("\\n"); p++; break;
858 case '\r': out.append("\\r"); p++; break;
859 case '\t': out.append("\\t"); p++; break;
860 // clang-format on
861 default:
862 // Note that this if condition captures non readable chars
863 // with value < 32, so size = 1 byte (e.g control chars).
864 char buf[] = "\\u00\0\0";
865 buf[4] = hexDigit(uint8_t((*p & 0xf0) >> 4));
866 buf[5] = hexDigit(uint8_t(*p & 0xf));
867 out.append(buf, 6);
868 p++;
869 }
870 } else {
871 out.push_back(char(*p++));
872 }
873 }
874
875 out.push_back('\"');
876}
877
878void escapeString(
879 StringPiece input,
880 std::string& out,
881 const serialization_opts& opts) {
882 if (FOLLY_UNLIKELY(
883 opts.extra_ascii_to_escape_bitmap[0] ||
884 opts.extra_ascii_to_escape_bitmap[1])) {
885 escapeStringImpl<true>(input, out, opts);
886 } else {
887 escapeStringImpl<false>(input, out, opts);
888 }
889}
890
891std::string stripComments(StringPiece jsonC) {
892 std::string result;
893 enum class State {
894 None,
895 InString,
896 InlineComment,
897 LineComment
898 } state = State::None;
899
900 for (size_t i = 0; i < jsonC.size(); ++i) {
901 auto s = jsonC.subpiece(i);
902 switch (state) {
903 case State::None:
904 if (s.startsWith("/*")) {
905 state = State::InlineComment;
906 ++i;
907 continue;
908 } else if (s.startsWith("//")) {
909 state = State::LineComment;
910 ++i;
911 continue;
912 } else if (s[0] == '\"') {
913 state = State::InString;
914 }
915 result.push_back(s[0]);
916 break;
917 case State::InString:
918 if (s[0] == '\\') {
919 if (UNLIKELY(s.size() == 1)) {
920 throw std::logic_error("Invalid JSONC: string is not terminated");
921 }
922 result.push_back(s[0]);
923 result.push_back(s[1]);
924 ++i;
925 continue;
926 } else if (s[0] == '\"') {
927 state = State::None;
928 }
929 result.push_back(s[0]);
930 break;
931 case State::InlineComment:
932 if (s.startsWith("*/")) {
933 state = State::None;
934 ++i;
935 }
936 break;
937 case State::LineComment:
938 if (s[0] == '\n') {
939 // skip the line break. It doesn't matter.
940 state = State::None;
941 }
942 break;
943 default:
944 throw std::logic_error("Unknown comment state");
945 }
946 }
947 return result;
948}
949
950} // namespace json
951
952//////////////////////////////////////////////////////////////////////
953
954dynamic parseJsonWithMetadata(StringPiece range, json::metadata_map* map) {
955 return parseJsonWithMetadata(range, json::serialization_opts(), map);
956}
957
958dynamic parseJsonWithMetadata(
959 StringPiece range,
960 json::serialization_opts const& opts,
961 json::metadata_map* map) {
962 json::Input in(range, &opts);
963
964 uint32_t n = in.getLineNum();
965 auto ret = parseValue(in, map);
966 if (map) {
967 map->emplace(&ret, json::parse_metadata{{{0}}, {{n}}});
968 }
969
970 in.skipWhitespace();
971 if (in.size() && *in != '\0') {
972 in.error("parsing didn't consume all input");
973 }
974 return ret;
975}
976
977dynamic parseJson(StringPiece range) {
978 return parseJson(range, json::serialization_opts());
979}
980
981dynamic parseJson(StringPiece range, json::serialization_opts const& opts) {
982 json::Input in(range, &opts);
983
984 auto ret = parseValue(in, nullptr);
985 in.skipWhitespace();
986 if (in.size() && *in != '\0') {
987 in.error("parsing didn't consume all input");
988 }
989 return ret;
990}
991
992std::string toJson(dynamic const& dyn) {
993 return json::serialize(dyn, json::serialization_opts());
994}
995
996std::string toPrettyJson(dynamic const& dyn) {
997 json::serialization_opts opts;
998 opts.pretty_formatting = true;
999 opts.sort_keys = true;
1000 return json::serialize(dyn, opts);
1001}
1002
1003//////////////////////////////////////////////////////////////////////
1004// dynamic::print_as_pseudo_json() is implemented here for header
1005// ordering reasons (most of the dynamic implementation is in
1006// dynamic-inl.h, which we don't want to include json.h).
1007
1008void dynamic::print_as_pseudo_json(std::ostream& out) const {
1009 json::serialization_opts opts;
1010 opts.allow_non_string_keys = true;
1011 opts.allow_nan_inf = true;
1012 out << json::serialize(*this, opts);
1013}
1014
1015void PrintTo(const dynamic& dyn, std::ostream* os) {
1016 json::serialization_opts opts;
1017 opts.allow_nan_inf = true;
1018 opts.allow_non_string_keys = true;
1019 opts.pretty_formatting = true;
1020 opts.sort_keys = true;
1021 *os << json::serialize(dyn, opts);
1022}
1023
1024//////////////////////////////////////////////////////////////////////
1025
1026} // namespace folly
1027