json.cpp source code [glow/thirdparty/folly/folly/json.cpp]

1	/*
2	* Copyright (c) Facebook, Inc. and its affiliates.
3	*
4	* Licensed under the Apache License, Version 2.0 (the "License");
5	* you may not use this file except in compliance with the License.
6	* You may obtain a copy of the License at
7	*
8	* http://www.apache.org/licenses/LICENSE-2.0
9	*
10	* Unless required by applicable law or agreed to in writing, software
11	* distributed under the License is distributed on an "AS IS" BASIS,
12	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13	* See the License for the specific language governing permissions and
14	* limitations under the License.
15	*/
16
17	#include <folly/json.h>
18
19	#include <algorithm>
20	#include <functional>
21	#include <iterator>
22	#include <type_traits>
23
24	#include <boost/algorithm/string.hpp>
25	#include <glog/logging.h>
26
27	#include <folly/Conv.h>
28	#include <folly/Portability.h>
29	#include <folly/Range.h>
30	#include <folly/String.h>
31	#include <folly/Unicode.h>
32	#include <folly/lang/Bits.h>
33	#include <folly/portability/Constexpr.h>
34
35	namespace folly {
36
37	//////////////////////////////////////////////////////////////////////
38
39	namespace json {
40
41	namespace {
42
43	parse_error make_parse_error(
44	unsigned int line,
45	std::string const& context,
46	std::string const& expected) {
47	return parse_error (to<std::string>(
48	"json parse error on line ",
49	line,
50	!context.empty() ? to<std::string>(" near `", context, `'\''`) : "",
51	": ",
52	expected));
53	}
54
55	struct Printer {
56	explicit Printer(
57	std::string& out,
58	unsigned* indentLevel,
59	serialization_opts const* opts)
60	: out_(out), indentLevel_(indentLevel), opts_(*opts) {}
61
62	void operator()(dynamic const& v) const {
63	switch (v.type()) {
64	case dynamic::DOUBLE:
65	if (!opts_.allow_nan_inf &&
66	(std::isnan(v.asDouble()) \|\| std::isinf(v.asDouble()))) {
67	throw json::parse_error (
68	"folly::toJson: JSON object value was a "
69	"NaN or INF");
70	}
71	toAppend(
72	v.asDouble(), &out_, opts_.double_mode, opts_.double_num_digits);
73	break;
74	case dynamic::INT64: {
75	auto intval = v.asInt();
76	if (opts_.javascript_safe) {
77	// Use folly::to to check that this integer can be represented
78	// as a double without loss of precision.
79	intval = int64_t(to<double>(intval));
80	}
81	toAppend(intval, &out_);
82	break;
83	}
84	case dynamic::BOOL:
85	out_ += v.asBool() ? "true" : "false";
86	break;
87	case dynamic::NULLT:
88	out_ += "null";
89	break;
90	case dynamic::STRING:
91	escapeString(v.asString(), out_, opts_);
92	break;
93	case dynamic::OBJECT:
94	printObject(v);
95	break;
96	case dynamic::ARRAY:
97	printArray(v);
98	break;
99	default:
100	CHECK(`0`) << "Bad type " << v.type();
101	}
102	}
103
104	private:
105	void printKV(const std::pair<const dynamic, dynamic>& p) const {
106	if (!opts_.allow_non_string_keys && !p.first.isString()) {
107	throw json::parse_error (
108	"folly::toJson: JSON object key was not a "
109	"string");
110	}
111	(*this)(p.first);
112	mapColon();
113	(*this)(p.second);
114	}
115
116	template <typename Iterator>
117	void printKVPairs(Iterator begin, Iterator end) const {
118	printKV(*begin);
119	for (++begin; begin != end; ++begin) {
120	out_ += `','`;
121	newline();
122	printKV(*begin);
123	}
124	}
125
126	void printObject(dynamic const& o) const {
127	if (o.empty()) {
128	out_ += "{}";
129	return;
130	}
131
132	out_ += `'{'`;
133	indent();
134	newline();
135	if (opts_.sort_keys \|\| opts_.sort_keys_by) {
136	using ref = std::reference_wrapper<decltype(o.items())::value_type const>;
137	auto sort_keys_by = [&](auto begin, auto end, const auto& comp) {
138	std::sort(begin, end, [&](ref a, ref b) {
139	// Only compare keys. No ordering among identical keys.
140	return comp(a.get().first, b.get().first);
141	});
142	};
143	std::vector<ref> refs(o.items().begin(), o.items().end());
144	if (opts_.sort_keys_by) {
145	sort_keys_by(refs.begin(), refs.end(), opts_.sort_keys_by);
146	} else {
147	sort_keys_by(refs.begin(), refs.end(), std::less<>());
148	}
149	printKVPairs(refs.cbegin(), refs.cend());
150	} else {
151	printKVPairs(o.items().begin(), o.items().end());
152	}
153	outdent();
154	newline();
155	out_ += `'}'`;
156	}
157
158	void printArray(dynamic const& a) const {
159	if (a.empty()) {
160	out_ += "[]";
161	return;
162	}
163
164	out_ += `'['`;
165	indent();
166	newline();
167	(*this)(a [`0`]);
168	for (auto& val : range(std::next(a.begin()), a.end())) {
169	out_ += `','`;
170	newline();
171	(*this)(val);
172	}
173	outdent();
174	newline();
175	out_ += `']'`;
176	}
177
178	private:
179	void outdent() const {
180	if (indentLevel_) {
181	--*indentLevel_;
182	}
183	}
184
185	void indent() const {
186	if (indentLevel_) {
187	++*indentLevel_;
188	}
189	}
190
191	void newline() const {
192	if (indentLevel_) {
193	out_ += to<std::string>(`'\n'`, std::string (indentLevel_ `2`, `' '`));
194	}
195	}
196
197	void mapColon() const {
198	out_ += indentLevel_ ? ": " : ":";
199	}
200
201	private:
202	std::string& out_;
203	unsigned* const indentLevel_;
204	serialization_opts const& opts_;
205	};
206
207	//////////////////////////////////////////////////////////////////////
208
209	// Wraps our input buffer with some helper functions.
210	struct Input {
211	explicit Input(StringPiece range, json::serialization_opts const* opts)
212	: range_(range), opts_(*opts), lineNum_(`0`) {
213	storeCurrent();
214	}
215
216	Input(Input const&) = delete;
217	Input& operator=(Input const&) = delete;
218
219	char const* begin() const {
220	return range_.begin();
221	}
222
223	unsigned getLineNum() const {
224	return lineNum_;
225	}
226
227	// Parse ahead for as long as the supplied predicate is satisfied,
228	// returning a range of what was skipped.
229	template <class Predicate>
230	StringPiece skipWhile(const Predicate& p) {
231	std::size_t skipped = `0`;
232	for (; skipped < range_.size(); ++skipped) {
233	if (!p(range_[skipped])) {
234	break;
235	}
236	if (range_[skipped] == `'\n'`) {
237	++lineNum_;
238	}
239	}
240	auto ret = range_.subpiece(`0`, skipped);
241	range_.advance(skipped);
242	storeCurrent();
243	return ret;
244	}
245
246	StringPiece skipDigits() {
247	return skipWhile([](char c) { return c >= `'0'` && c <= `'9'`; });
248	}
249
250	StringPiece skipMinusAndDigits() {
251	bool firstChar = true;
252	return skipWhile([&firstChar](char c) {
253	bool result = (c >= `'0'` && c <= `'9'`) \|\| (firstChar && c == `'-'`);
254	firstChar = false;
255	return result;
256	});
257	}
258
259	void skipWhitespace() {
260	unsigned index = `0`;
261	while (true) {
262	while (index < range_.size() && range_[index] == `' '`) {
263	index++;
264	}
265	if (index < range_.size()) {
266	if (range_[index] == `'\n'`) {
267	index++;
268	++lineNum_;
269	continue;
270	}
271	if (range_[index] == `'\t'` \|\| range_[index] == `'\r'`) {
272	index++;
273	continue;
274	}
275	}
276	break;
277	}
278	range_.advance(index);
279	storeCurrent();
280	}
281
282	void expect(char c) {
283	if (**this != c) {
284	throw json::make_parse_error(
285	lineNum_, context(), to<std::string>("expected '", c, `'\''`));
286	}
287	++*this;
288	}
289
290	std::size_t size() const {
291	return range_.size();
292	}
293
294	int operator() const* {
295	return current_;
296	}
297
298	void operator++() {
299	range_.pop_front();
300	storeCurrent();
301	}
302
303	template <class T>
304	T extract() {
305	try {
306	return to<T>(&range_);
307	} catch (std::exception const& e) {
308	error(e.what());
309	}
310	}
311
312	bool consume(StringPiece str) {
313	if (boost::starts_with(range_, str)) {
314	range_.advance(str.size());
315	storeCurrent();
316	return true;
317	}
318	return false;
319	}
320
321	std::string context() const {
322	return range_.subpiece(`0`, `16` / arbitrary /).toString();
323	}
324
325	dynamic error(char const* what) const {
326	throw json::make_parse_error(lineNum_, context(), what);
327	}
328
329	json::serialization_opts const& getOpts() {
330	return opts_;
331	}
332
333	void incrementRecursionLevel() {
334	if (currentRecursionLevel_ > opts_.recursion_limit) {
335	error("recursion limit exceeded");
336	}
337	currentRecursionLevel_++;
338	}
339
340	void decrementRecursionLevel() {
341	currentRecursionLevel_--;
342	}
343
344	private:
345	void storeCurrent() {
346	current_ = range_.empty() ? EOF : range_.front();
347	}
348
349	private:
350	StringPiece range_;
351	json::serialization_opts const& opts_;
352	unsigned lineNum_;
353	int current_;
354	unsigned int currentRecursionLevel_{`0`};
355	};
356
357	class RecursionGuard {
358	public:
359	explicit RecursionGuard(Input& in) : in_(in) {
360	in_.incrementRecursionLevel();
361	}
362
363	~RecursionGuard() {
364	in_.decrementRecursionLevel();
365	}
366
367	private:
368	Input& in_;
369	};
370
371	dynamic parseValue(Input& in, json::metadata_map* map);
372	std::string parseString(Input& in);
373	dynamic parseNumber(Input& in);
374
375	template <class K>
376	void parseObjectKeyValue(
377	Input& in,
378	dynamic& ret,
379	K&& key,
380	json::metadata_map* map) {
381	auto keyLineNumber = in.getLineNum();
382	in.skipWhitespace();
383	in.expect(`':'`);
384	in.skipWhitespace();
385	K tmp;
386	if (map) {
387	tmp = K(key);
388	}
389	auto valueLineNumber = in.getLineNum();
390	ret.insert(std::forward<K>(key), parseValue(in, map));
391	if (map) {
392	auto val = ret.get_ptr(tmp);
393	// We just inserted it, so it should be there!
394	DCHECK(val != nullptr);
395	map->emplace(
396	val, json::parse_metadata{{{keyLineNumber}}, {{valueLineNumber}}});
397	}
398	}
399
400	dynamic parseObject(Input& in, json::metadata_map* map) {
401	DCHECK_EQ(*in, `'{'`);
402	++in;
403
404	dynamic ret = dynamic::object;
405
406	in.skipWhitespace();
407	if (*in == `'}'`) {
408	++in;
409	return ret;
410	}
411
412	for (;;) {
413	if (in.getOpts().allow_trailing_comma && *in == `'}'`) {
414	break;
415	}
416	if (in == `'\"'`) { // string*
417	auto key = parseString(in);
418	parseObjectKeyValue(in, ret, std::move(key), map);
419	} else if (!in.getOpts().allow_non_string_keys) {
420	in.error("expected string for object key name");
421	} else {
422	auto key = parseValue(in, map);
423	parseObjectKeyValue(in, ret, std::move(key), map);
424	}
425
426	in.skipWhitespace();
427	if (*in != `','`) {
428	break;
429	}
430	++in;
431	in.skipWhitespace();
432	}
433	in.expect(`'}'`);
434
435	return ret;
436	}
437
438	dynamic parseArray(Input& in, json::metadata_map* map) {
439	DCHECK_EQ(*in, `'['`);
440	++in;
441
442	dynamic ret = dynamic::array;
443
444	in.skipWhitespace();
445	if (*in == `']'`) {
446	++in;
447	return ret;
448	}
449
450	std::vector<uint32_t> lineNumbers;
451	for (;;) {
452	if (in.getOpts().allow_trailing_comma && *in == `']'`) {
453	break;
454	}
455	ret.push_back(parseValue(in, map));
456	if (map) {
457	lineNumbers.push_back(in.getLineNum());
458	}
459	in.skipWhitespace();
460	if (*in != `','`) {
461	break;
462	}
463	++in;
464	in.skipWhitespace();
465	}
466	if (map) {
467	for (size_t i = `0`; i < ret.size(); i++) {
468	map->emplace(&ret [i], json::parse_metadata{{{`0`}}, {{lineNumbers [i]}}});
469	}
470	}
471	in.expect(`']'`);
472
473	return ret;
474	}
475
476	dynamic parseNumber(Input& in) {
477	bool const negative = (*in == `'-'`);
478	if (negative && in.consume("-Infinity")) {
479	if (in.getOpts().parse_numbers_as_strings) {
480	return "-Infinity";
481	} else {
482	return -std::numeric_limits<double>::infinity();
483	}
484	}
485
486	auto integral = in.skipMinusAndDigits();
487	if (negative && integral.size() < `2`) {
488	in.error("expected digits after `-'");
489	}
490
491	auto const wasE = in == `'e'` \|\| in == `'E'`;
492
493	constexpr const char* maxInt = "9223372036854775807";
494	constexpr const char* minInt = "-9223372036854775808";
495	constexpr auto maxIntLen = constexpr_strlen(maxInt);
496	constexpr auto minIntLen = constexpr_strlen(minInt);
497
498	if (*in != `'.'` && !wasE && in.getOpts().parse_numbers_as_strings) {
499	return integral;
500	}
501
502	if (*in != `'.'` && !wasE) {
503	if (LIKELY(!in.getOpts().double_fallback \|\| integral.size() < maxIntLen) \|\|
504	(!negative && integral.size() == maxIntLen && integral <= maxInt) \|\|
505	(negative && integral.size() == minIntLen && integral <= minInt)) {
506	auto val = to<int64_t>(integral);
507	in.skipWhitespace();
508	return val;
509	} else {
510	auto val = to<double>(integral);
511	in.skipWhitespace();
512	return val;
513	}
514	}
515
516	auto end = !wasE ? (++in, in.skipDigits().end()) : in.begin();
517	if (in == `'e'` \|\| in == `'E'`) {
518	++in;
519	if (in == `'+'` \|\| in == `'-'`) {
520	++in;
521	}
522	auto expPart = in.skipDigits();
523	end = expPart.end();
524	}
525	auto fullNum = range(integral.begin(), end);
526	if (in.getOpts().parse_numbers_as_strings) {
527	return fullNum;
528	}
529	auto val = to<double>(fullNum);
530	return val;
531	}
532
533	std::string decodeUnicodeEscape(Input& in) {
534	auto hexVal = [&](int c) -> uint16_t {
535	// clang-format off
536	return uint16_t(
537	c >= `'0'` && c <= `'9'` ? c - `'0'` :
538	c >= `'a'` && c <= `'f'` ? c - `'a'` + `10` :
539	c >= `'A'` && c <= `'F'` ? c - `'A'` + `10` :
540	(in.error("invalid hex digit"), `0`));
541	// clang-format on
542	};
543
544	auto readHex = [&]() -> uint16_t {
545	if (in.size() < `4`) {
546	in.error("expected 4 hex digits");
547	}
548
549	auto ret = uint16_t(hexVal (in) `4096`);
550	++in;
551	ret += hexVal (in) `256`;
552	++in;
553	ret += hexVal (in) `16`;
554	++in;
555	ret += hexVal (*in);
556	++in;
557	return ret;
558	};
559
560	/*
561	* If the value encoded is in the surrogate pair range, we need to
562	* make sure there is another escape that we can use also.
563	*/
564	uint32_t codePoint = readHex ();
565	if (codePoint >= `0xd800` && codePoint <= `0xdbff`) {
566	if (!in.consume("\\u")) {
567	in.error(
568	"expected another unicode escape for second half of "
569	"surrogate pair");
570	}
571	uint16_t second = readHex ();
572	if (second >= `0xdc00` && second <= `0xdfff`) {
573	codePoint = `0x10000` + ((codePoint & `0x3ff`) << `10`) + (second & `0x3ff`);
574	} else {
575	in.error("second character in surrogate pair is invalid");
576	}
577	} else if (codePoint >= `0xdc00` && codePoint <= `0xdfff`) {
578	in.error("invalid unicode code point (in range [0xdc00,0xdfff])");
579	}
580
581	return codePointToUtf8(codePoint);
582	}
583
584	std::string parseString(Input& in) {
585	DCHECK_EQ(*in, `'\"'`);
586	++in;
587
588	std::string ret;
589	for (;;) {
590	auto range = in.skipWhile([](char c) { return c != `'\"'` && c != `'\\'`; });
591	ret.append(range.begin(), range.end());
592
593	if (*in == `'\"'`) {
594	++in;
595	break;
596	}
597	if (*in == `'\\'`) {
598	++in;
599	switch (*in) {
600	// clang-format off
601	case `'\"'`: ret.push_back(`'\"'`); ++in; break;
602	case `'\\'`: ret.push_back(`'\\'`); ++in; break;
603	case `'/'`: ret.push_back(`'/'`); ++in; break;
604	case `'b'`: ret.push_back(`'\b'`); ++in; break;
605	case `'f'`: ret.push_back(`'\f'`); ++in; break;
606	case `'n'`: ret.push_back(`'\n'`); ++in; break;
607	case `'r'`: ret.push_back(`'\r'`); ++in; break;
608	case `'t'`: ret.push_back(`'\t'`); ++in; break;
609	case `'u'`: ++in; ret += decodeUnicodeEscape(in); break;
610	// clang-format on
611	default:
612	in.error(
613	to<std::string>("unknown escape ", *in, " in string").c_str());
614	}
615	continue;
616	}
617	if (*in == EOF) {
618	in.error("unterminated string");
619	}
620	if (!*in) {
621	/*
622	* Apparently we're actually supposed to ban all control
623	* characters from strings. This seems unnecessarily
624	* restrictive, so we're only banning zero bytes. (Since the
625	* string is presumed to be UTF-8 encoded it's fine to just
626	* check this way.)
627	*/
628	in.error("null byte in string");
629	}
630
631	ret.push_back(char(*in));
632	++in;
633	}
634
635	return ret;
636	}
637
638	dynamic parseValue(Input& in, json::metadata_map* map) {
639	RecursionGuard guard(in);
640
641	in.skipWhitespace();
642	// clang-format off
643	return
644	*in == `'['` ? parseArray(in, map) :
645	*in == `'{'` ? parseObject(in, map) :
646	*in == `'\"'` ? parseString(in) :
647	(in == `'-'` \|\| (in >= `'0'` && *in <= `'9'`)) ? parseNumber(in) :
648	in.consume("true") ? true :
649	in.consume("false") ? false :
650	in.consume("null") ? nullptr :
651	in.consume("Infinity") ?
652	(in.getOpts().parse_numbers_as_strings ? (dynamic)"Infinity" :
653	(dynamic)std::numeric_limits<double>::infinity()) :
654	in.consume("NaN") ?
655	(in.getOpts().parse_numbers_as_strings ? (dynamic)"NaN" :
656	(dynamic)std::numeric_limits<double>::quiet_NaN()) :
657	in.error("expected json value");
658	// clang-format on
659	}
660
661	} // namespace
662
663	//////////////////////////////////////////////////////////////////////
664
665	std::array<uint64_t, `2`> buildExtraAsciiToEscapeBitmap(StringPiece chars) {
666	std::array<uint64_t, `2`> escapes{{`0`, `0`}};
667	for (auto b : ByteRange (chars)) {
668	if (b >= `0x20` && b < `0x80`) {
669	escapes [b / `64`] \|= uint64_t(`1`) << (b % `64`);
670	}
671	}
672	return escapes;
673	}
674
675	std::string serialize(dynamic const& dyn, serialization_opts const& opts) {
676	std::string ret;
677	unsigned indentLevel = `0`;
678	Printer p(ret, opts.pretty_formatting ? &indentLevel : nullptr, &opts);
679	p (dyn);
680	return ret;
681	}
682
683	// Fast path to determine the longest prefix that can be left
684	// unescaped in a string of sizeof(T) bytes packed in an integer of
685	// type T.
686	template <bool EnableExtraAsciiEscapes, class T>
687	size_t firstEscapableInWord(T s, const serialization_opts& opts) {
688	static_assert(std::is_unsigned<T>::value, "Unsigned integer required");
689	static constexpr T kOnes = ~T() / `255`; // 0x...0101
690	static constexpr T kMsbs = kOnes * `0x80`; // 0x...8080
691
692	// Sets the MSB of bytes < b. Precondition: b < 128.
693	auto isLess = [](T w, uint8_t b) {
694	// A byte is < b iff subtracting b underflows, so we check that
695	// the MSB wasn't set before and it's set after the subtraction.
696	return (w - kOnes * b) & ~w & kMsbs;
697	};
698
699	auto isChar = [&](uint8_t c) {
700	// A byte is == c iff it is 0 if xored with c.
701	return isLess(s ^ (kOnes * c), `1`);
702	};
703
704	// The following masks have the MSB set for each byte of the word
705	// that satisfies the corresponding condition.
706	auto isHigh = s & kMsbs; // >= 128
707	auto isLow = isLess(s, `0x20`); // <= 0x1f
708	auto needsEscape = isHigh \| isLow \| isChar(`'\\'`) \| isChar(`'"'`);
709
710	if / constexpr / (EnableExtraAsciiEscapes) {
711	// Deal with optional bitmap for unicode escapes. Escapes can optionally be
712	// set for ascii characters 32 - 127, so the inner loop may run up to 96
713	// times. However, for the case where 0 or a handful of bits are set,
714	// looping will be minimal through use of findFirstSet.
715	for (size_t i = `0`; i < opts.extra_ascii_to_escape_bitmap.size(); ++i) {
716	const auto offset = i * `64`;
717	// Clear first 32 characters if this is the first index, since those are
718	// always escaped.
719	auto bitmap = opts.extra_ascii_to_escape_bitmap [i] &
720	(i == `0` ? uint64_t(-`1`) << `32` : ~`0UL`);
721	while (bitmap) {
722	auto bit = folly::findFirstSet(bitmap);
723	needsEscape \|= isChar(static_cast<uint8_t>(offset + bit - `1`));
724	bitmap &= bitmap - `1`;
725	}
726	}
727	}
728
729	if (!needsEscape) {
730	return sizeof(T);
731	}
732
733	if (folly::kIsLittleEndian) {
734	return folly::findFirstSet(needsEscape) / `8` - `1`;
735	} else {
736	return sizeof(T) - folly::findLastSet(needsEscape) / `8`;
737	}
738	}
739
740	// Escape a string so that it is legal to print it in JSON text.
741	template <bool EnableExtraAsciiEscapes>
742	void escapeStringImpl(
743	StringPiece input,
744	std::string& out,
745	const serialization_opts& opts) {
746	auto hexDigit = [](uint8_t c) -> char {
747	return c < `10` ? c + `'0'` : c - `10` + `'a'`;
748	};
749
750	out.push_back(`'\"'`);
751
752	auto* p = reinterpret_cast<const unsigned char*>(input.begin());
753	auto* q = reinterpret_cast<const unsigned char*>(input.begin());
754	auto* e = reinterpret_cast<const unsigned char*>(input.end());
755
756	while (p < e) {
757	// Find the longest prefix that does not need escaping, and copy
758	// it literally into the output string.
759	auto firstEsc = p;
760	while (firstEsc < e) {
761	auto avail = e - firstEsc;
762	uint64_t word = `0`;
763	if (avail >= `8`) {
764	word = folly::loadUnaligned<uint64_t>(firstEsc);
765	} else {
766	word = folly::partialLoadUnaligned<uint64_t>(firstEsc, avail);
767	}
768	auto prefix = firstEscapableInWord<EnableExtraAsciiEscapes>(word, opts);
769	DCHECK_LE(prefix, avail);
770	firstEsc += prefix;
771	if (prefix < `8`) {
772	break;
773	}
774	}
775	if (firstEsc > p) {
776	out.append(reinterpret_cast<const char*>(p), firstEsc - p);
777	p = firstEsc;
778	// We can't be in the middle of a multibyte sequence, so we can reset q.
779	q = p;
780	if (p == e) {
781	break;
782	}
783	}
784
785	// Handle the next byte that may need escaping.
786
787	// Since non-ascii encoding inherently does utf8 validation
788	// we explicitly validate utf8 only if non-ascii encoding is disabled.
789	if ((opts.validate_utf8 \|\| opts.skip_invalid_utf8) &&
790	!opts.encode_non_ascii) {
791	// To achieve better spatial and temporal coherence
792	// we do utf8 validation progressively along with the
793	// string-escaping instead of two separate passes.
794
795	// As the encoding progresses, q will stay at or ahead of p.
796	CHECK_GE(q, p);
797
798	// As p catches up with q, move q forward.
799	if (q == p) {
800	// calling utf8_decode has the side effect of
801	// checking that utf8 encodings are valid
802	char32_t v = utf8ToCodePoint(q, e, opts.skip_invalid_utf8);
803	if (opts.skip_invalid_utf8 && v == U`'\ufffd'`) {
804	out.append(reinterpret_cast<const char*>(u8"\ufffd"));
805	p = q;
806	continue;
807	}
808	}
809	}
810
811	auto encodeUnicode = opts.encode_non_ascii && (*p & `0x80`);
812	if / constexpr / (EnableExtraAsciiEscapes) {
813	encodeUnicode = encodeUnicode \|\|
814	(p >= `0x20` && p < `0x80` &&
815	(opts.extra_ascii_to_escape_bitmap [*p / `64`] &
816	(uint64_t(`1`) << (*p % `64`))));
817	}
818
819	if (encodeUnicode) {
820	// note that this if condition captures utf8 chars
821	// with value > 127, so size > 1 byte (or they are whitelisted for
822	// Unicode encoding).
823	// NOTE: char32_t / char16_t are both unsigned.
824	char32_t cp = utf8ToCodePoint(p, e, opts.skip_invalid_utf8);
825	auto writeHex = [&](char16_t v) {
826	char buf[] = "\\u\0\0\0\0";
827	buf[`2`] = hexDigit((v >> `12`) & `0x0f`);
828	buf[`3`] = hexDigit((v >> `8`) & `0x0f`);
829	buf[`4`] = hexDigit((v >> `4`) & `0x0f`);
830	buf[`5`] = hexDigit(v & `0x0f`);
831	out.append(buf, `6`);
832	};
833	// From the ECMA-404 The JSON Data Interchange Syntax 2nd Edition Dec 2017
834	if (cp < `0x10000u`) {
835	// If the code point is in the Basic Multilingual Plane (U+0000 through
836	// U+FFFF), then it may be represented as a six-character sequence:
837	// a reverse solidus, followed by the lowercase letter u, followed by
838	// four hexadecimal digits that encode the code point.
839	writeHex(static_cast<char16_t>(cp));
840	} else {
841	// To escape a code point that is not in the Basic Multilingual Plane,
842	// the character may be represented as a twelve-character sequence,
843	// encoding the UTF-16 surrogate pair corresponding to the code point.
844	writeHex(static_cast<char16_t>(
845	`0xd800u` + (((cp - `0x10000u`) >> `10`) & `0x3ffu`)));
846	writeHex(static_cast<char16_t>(`0xdc00u` + ((cp - `0x10000u`) & `0x3ffu`)));
847	}
848	} else if (p == `'\\'` \|\| p == `'\"'`) {
849	char buf[] = "\\\0";
850	buf[`1`] = char(*p++);
851	out.append(buf, `2`);
852	} else if (*p <= `0x1f`) {
853	switch (*p) {
854	// clang-format off
855	case `'\b'`: out.append("\\b"); p++; break;
856	case `'\f'`: out.append("\\f"); p++; break;
857	case `'\n'`: out.append("\\n"); p++; break;
858	case `'\r'`: out.append("\\r"); p++; break;
859	case `'\t'`: out.append("\\t"); p++; break;
860	// clang-format on
861	default:
862	// Note that this if condition captures non readable chars
863	// with value < 32, so size = 1 byte (e.g control chars).
864	char buf[] = "\\u00\0\0";
865	buf[`4`] = hexDigit(uint8_t((*p & `0xf0`) >> `4`));
866	buf[`5`] = hexDigit(uint8_t(*p & `0xf`));
867	out.append(buf, `6`);
868	p++;
869	}
870	} else {
871	out.push_back(char(*p++));
872	}
873	}
874
875	out.push_back(`'\"'`);
876	}
877
878	void escapeString(
879	StringPiece input,
880	std::string& out,
881	const serialization_opts& opts) {
882	if (FOLLY_UNLIKELY(
883	opts.extra_ascii_to_escape_bitmap[`0`] \|\|
884	opts.extra_ascii_to_escape_bitmap[`1`])) {
885	escapeStringImpl<true>(input, out, opts);
886	} else {
887	escapeStringImpl<false>(input, out, opts);
888	}
889	}
890
891	std::string stripComments(StringPiece jsonC) {
892	std::string result;
893	enum class State {
894	None,
895	InString,
896	InlineComment,
897	LineComment
898	} state = State::None;
899
900	for (size_t i = `0`; i < jsonC.size(); ++i) {
901	auto s = jsonC.subpiece(i);
902	switch (state) {
903	case State::None:
904	if (s.startsWith("/*")) {
905	state = State::InlineComment;
906	++i;
907	continue;
908	} else if (s.startsWith("//")) {
909	state = State::LineComment;
910	++i;
911	continue;
912	} else if (s [`0`] == `'\"'`) {
913	state = State::InString;
914	}
915	result.push_back(s [`0`]);
916	break;
917	case State::InString:
918	if (s [`0`] == `'\\'`) {
919	if (UNLIKELY(s.size() == `1`)) {
920	throw std::logic_error ("Invalid JSONC: string is not terminated");
921	}
922	result.push_back(s [`0`]);
923	result.push_back(s [`1`]);
924	++i;
925	continue;
926	} else if (s [`0`] == `'\"'`) {
927	state = State::None;
928	}
929	result.push_back(s [`0`]);
930	break;
931	case State::InlineComment:
932	if (s.startsWith("*/")) {
933	state = State::None;
934	++i;
935	}
936	break;
937	case State::LineComment:
938	if (s [`0`] == `'\n'`) {
939	// skip the line break. It doesn't matter.
940	state = State::None;
941	}
942	break;
943	default:
944	throw std::logic_error ("Unknown comment state");
945	}
946	}
947	return result;
948	}
949
950	} // namespace json
951
952	//////////////////////////////////////////////////////////////////////
953
954	dynamic parseJsonWithMetadata(StringPiece range, json::metadata_map* map) {
955	return parseJsonWithMetadata(range, json::serialization_opts (), map);
956	}
957
958	dynamic parseJsonWithMetadata(
959	StringPiece range,
960	json::serialization_opts const& opts,
961	json::metadata_map* map) {
962	json::Input in(range, &opts);
963
964	uint32_t n = in.getLineNum();
965	auto ret = parseValue(in, map);
966	if (map) {
967	map->emplace(&ret, json::parse_metadata{{{`0`}}, {{n}}});
968	}
969
970	in.skipWhitespace();
971	if (in.size() && *in != `'\0'`) {
972	in.error("parsing didn't consume all input");
973	}
974	return ret;
975	}
976
977	dynamic parseJson(StringPiece range) {
978	return parseJson(range, json::serialization_opts ());
979	}
980
981	dynamic parseJson(StringPiece range, json::serialization_opts const& opts) {
982	json::Input in(range, &opts);
983
984	auto ret = parseValue(in, nullptr);
985	in.skipWhitespace();
986	if (in.size() && *in != `'\0'`) {
987	in.error("parsing didn't consume all input");
988	}
989	return ret;
990	}
991
992	std::string toJson(dynamic const& dyn) {
993	return json::serialize(dyn, json::serialization_opts ());
994	}
995
996	std::string toPrettyJson(dynamic const& dyn) {
997	json::serialization_opts opts;
998	opts.pretty_formatting = true;
999	opts.sort_keys = true;
1000	return json::serialize(dyn, opts);
1001	}
1002
1003	//////////////////////////////////////////////////////////////////////
1004	// dynamic::print_as_pseudo_json() is implemented here for header
1005	// ordering reasons (most of the dynamic implementation is in
1006	// dynamic-inl.h, which we don't want to include json.h).
1007
1008	void dynamic::print_as_pseudo_json(std::ostream& out) const {
1009	json::serialization_opts opts;
1010	opts.allow_non_string_keys = true;
1011	opts.allow_nan_inf = true;
1012	out << json::serialize(*this, opts);
1013	}
1014
1015	void PrintTo(const dynamic& dyn, std::ostream* os) {
1016	json::serialization_opts opts;
1017	opts.allow_nan_inf = true;
1018	opts.allow_non_string_keys = true;
1019	opts.pretty_formatting = true;
1020	opts.sort_keys = true;
1021	*os << json::serialize(dyn, opts);
1022	}
1023
1024	//////////////////////////////////////////////////////////////////////
1025
1026	} // namespace folly
1027

Browse the source code of glow/thirdparty/folly/folly/json.cpp