1 | // Adapted from https://github.com/PENGUINLIONG/graphi-t |
2 | |
3 | // Copyright (c) 2019 Rendong Liang |
4 | // |
5 | // Permission is hereby granted, free of charge, to any |
6 | // person obtaining a copy of this software and associated |
7 | // documentation files (the "Software"), to deal in the |
8 | // Software without restriction, including without |
9 | // limitation the rights to use, copy, modify, merge, |
10 | // publish, distribute, sublicense, and/or sell copies of |
11 | // the Software, and to permit persons to whom the Software |
12 | // is furnished to do so, subject to the following |
13 | // conditions: |
14 | // |
15 | // The above copyright notice and this permission notice |
16 | // shall be included in all copies or substantial portions |
17 | // of the Software. |
18 | // |
19 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF |
20 | // ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED |
21 | // TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A |
22 | // PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT |
23 | // SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY |
24 | // CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION |
25 | // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR |
26 | // IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
27 | // DEALINGS IN THE SOFTWARE. |
28 | |
29 | // JSON serialization/deserialization. |
30 | // @PENGUINLIONG |
31 | #include <sstream> |
32 | // #include "gft/log.hpp" |
33 | #include "taichi/common/json.h" |
34 | |
35 | namespace liong { |
36 | namespace json { |
37 | |
38 | enum JsonTokenType { |
39 | L_JSON_TOKEN_UNDEFINED, |
40 | L_JSON_TOKEN_NULL, |
41 | L_JSON_TOKEN_TRUE, |
42 | L_JSON_TOKEN_FALSE, |
43 | L_JSON_TOKEN_STRING, |
44 | L_JSON_TOKEN_INT, |
45 | L_JSON_TOKEN_FLOAT, |
46 | L_JSON_TOKEN_COLON, |
47 | L_JSON_TOKEN_COMMA, |
48 | L_JSON_TOKEN_OPEN_BRACE, |
49 | L_JSON_TOKEN_CLOSE_BRACE, |
50 | L_JSON_TOKEN_OPEN_BRACKET, |
51 | L_JSON_TOKEN_CLOSE_BRACKET, |
52 | }; |
53 | struct JsonToken { |
54 | JsonTokenType ty; |
55 | int64_t num_int; |
56 | double num_float; |
57 | std::string str; |
58 | }; |
59 | |
60 | struct Tokenizer { |
61 | const char *pos; |
62 | const char *end; |
63 | |
64 | Tokenizer(const char *beg, const char *end) : pos(beg), end(end) { |
65 | } |
66 | |
67 | // Check the range first before calling this method. |
68 | bool unsafe_starts_with(const char *head) { |
69 | auto i = 0; |
70 | while (*head != '\0') { |
71 | if (pos[i++] != *(head++)) { |
72 | return false; |
73 | } |
74 | } |
75 | return true; |
76 | } |
77 | bool next_token(JsonToken &out) { |
78 | std::stringstream ss; |
79 | while (pos != end) { |
80 | char c = *pos; |
81 | |
82 | if (c == '\0') { |
83 | break; |
84 | } |
85 | |
86 | // Ignore whitespaces. |
87 | if (c == ' ' || c == '\t' || c == '\r' || c == '\n') { |
88 | pos += 1; |
89 | continue; |
90 | } |
91 | |
92 | // Try parse scope punctuations. |
93 | switch (c) { |
94 | case ':': |
95 | out.ty = L_JSON_TOKEN_COLON; |
96 | pos += 1; |
97 | return true; |
98 | case ',': |
99 | out.ty = L_JSON_TOKEN_COMMA; |
100 | pos += 1; |
101 | return true; |
102 | case '{': |
103 | out.ty = L_JSON_TOKEN_OPEN_BRACE; |
104 | pos += 1; |
105 | return true; |
106 | case '}': |
107 | out.ty = L_JSON_TOKEN_CLOSE_BRACE; |
108 | pos += 1; |
109 | return true; |
110 | case '[': |
111 | out.ty = L_JSON_TOKEN_OPEN_BRACKET; |
112 | pos += 1; |
113 | return true; |
114 | case ']': |
115 | out.ty = L_JSON_TOKEN_CLOSE_BRACKET; |
116 | pos += 1; |
117 | return true; |
118 | } |
119 | |
120 | // Try parse numbers. |
121 | if (c == '+' || c == '-' || (c >= '0' && c <= '9')) { |
122 | out.ty = L_JSON_TOKEN_INT; |
123 | const int STATE_INTEGRAL = 0; |
124 | const int STATE_FRACTION = 1; |
125 | const int STATE_EXPONENT = 2; |
126 | int state = STATE_INTEGRAL; |
127 | do { |
128 | c = *pos; |
129 | if (state == STATE_INTEGRAL) { |
130 | if (c == '.') { |
131 | state = STATE_FRACTION; |
132 | ss.put(c); |
133 | continue; |
134 | } |
135 | if (c == 'e') { |
136 | state = STATE_EXPONENT; |
137 | ss.put(c); |
138 | continue; |
139 | } |
140 | if (c != '+' && c != '-' && (c < '0' || c > '9')) { |
141 | break; |
142 | } |
143 | } else if (state == STATE_FRACTION) { |
144 | out.ty = L_JSON_TOKEN_FLOAT; |
145 | if (c == 'e') { |
146 | state = STATE_EXPONENT; |
147 | ss.put(c); |
148 | continue; |
149 | } |
150 | if (c < '0' || c > '9') { |
151 | break; |
152 | } |
153 | } else if (state == STATE_EXPONENT) { |
154 | out.ty = L_JSON_TOKEN_FLOAT; |
155 | if (c != '+' && c != '-' && (c < '0' || c > '9')) { |
156 | break; |
157 | } |
158 | } |
159 | ss.put(c); |
160 | } while (++pos != end); |
161 | if (out.ty == L_JSON_TOKEN_INT) { |
162 | out.num_int = std::atoll(ss.str().c_str()); |
163 | } else if (out.ty == L_JSON_TOKEN_FLOAT) { |
164 | out.num_float = std::atof(ss.str().c_str()); |
165 | } |
166 | return true; |
167 | } |
168 | |
169 | // Try parse strings. |
170 | if (c == '"') { |
171 | out.ty = L_JSON_TOKEN_STRING; |
172 | bool escape = false; |
173 | while (++pos != end) { |
174 | c = *pos; |
175 | if (escape) { |
176 | switch (c) { |
177 | case '"': |
178 | case '/': |
179 | break; |
180 | case 'b': |
181 | c = '\b'; |
182 | break; |
183 | case 'f': |
184 | c = '\f'; |
185 | break; |
186 | case 'n': |
187 | c = '\n'; |
188 | break; |
189 | case 'r': |
190 | c = '\r'; |
191 | break; |
192 | case 't': |
193 | c = '\t'; |
194 | break; |
195 | case 'u': |
196 | throw JsonException("unicode escape is not supported" ); |
197 | default: |
198 | throw JsonException("invalid escape charater" ); |
199 | } |
200 | escape = false; |
201 | } else { |
202 | if (c == '\\') { |
203 | escape = true; |
204 | continue; |
205 | } else if (c == '"') { |
206 | if (escape != false) { |
207 | throw JsonException("incomplete escape sequence" ); |
208 | } |
209 | out.str = ss.str(); |
210 | pos += 1; |
211 | return true; |
212 | } |
213 | } |
214 | ss.put(c); |
215 | } |
216 | throw JsonException("unexpected end of string" ); |
217 | } |
218 | |
219 | // Try parse literals. |
220 | if (pos + 4 <= end) { |
221 | if (unsafe_starts_with("null" )) { |
222 | out.ty = L_JSON_TOKEN_NULL; |
223 | pos += 4; |
224 | return true; |
225 | } |
226 | if (unsafe_starts_with("true" )) { |
227 | out.ty = L_JSON_TOKEN_TRUE; |
228 | pos += 4; |
229 | return true; |
230 | } |
231 | } |
232 | if (pos + 5 <= end) { |
233 | if (unsafe_starts_with("false" )) { |
234 | out.ty = L_JSON_TOKEN_FALSE; |
235 | pos += 5; |
236 | return true; |
237 | } |
238 | } |
239 | } |
240 | out.ty = L_JSON_TOKEN_UNDEFINED; |
241 | return false; |
242 | } |
243 | }; |
244 | |
245 | bool try_parse_impl(Tokenizer &tokenizer, JsonValue &out) { |
246 | JsonToken token; |
247 | while (tokenizer.next_token(token)) { |
248 | JsonValue val; |
249 | switch (token.ty) { |
250 | case L_JSON_TOKEN_TRUE: |
251 | out.ty = L_JSON_BOOLEAN; |
252 | out.b = true; |
253 | return true; |
254 | case L_JSON_TOKEN_FALSE: |
255 | out.ty = L_JSON_BOOLEAN; |
256 | out.b = false; |
257 | return true; |
258 | case L_JSON_TOKEN_NULL: |
259 | out.ty = L_JSON_NULL; |
260 | return true; |
261 | case L_JSON_TOKEN_STRING: |
262 | out.ty = L_JSON_STRING; |
263 | out.str = std::move(token.str); |
264 | return true; |
265 | case L_JSON_TOKEN_INT: |
266 | out.ty = L_JSON_INT; |
267 | out.num_int = token.num_int; |
268 | return true; |
269 | case L_JSON_TOKEN_FLOAT: |
270 | out.ty = L_JSON_FLOAT; |
271 | out.num_int = token.num_float; |
272 | return true; |
273 | case L_JSON_TOKEN_OPEN_BRACKET: |
274 | out.ty = L_JSON_ARRAY; |
275 | for (;;) { |
276 | if (!try_parse_impl(tokenizer, val)) { |
277 | // When the array has no element. |
278 | break; |
279 | } |
280 | out.arr.inner.emplace_back(std::move(val)); |
281 | if (tokenizer.next_token(token)) { |
282 | if (token.ty == L_JSON_TOKEN_COMMA) { |
283 | continue; |
284 | } else if (token.ty == L_JSON_TOKEN_CLOSE_BRACKET) { |
285 | break; |
286 | } else { |
287 | throw JsonException("unexpected token in array" ); |
288 | } |
289 | } else { |
290 | throw JsonException("unexpected end of array" ); |
291 | } |
292 | } |
293 | return true; |
294 | case L_JSON_TOKEN_OPEN_BRACE: |
295 | out.ty = L_JSON_OBJECT; |
296 | for (;;) { |
297 | // Match the key. |
298 | std::string key; |
299 | if (tokenizer.next_token(token)) { |
300 | if (token.ty == L_JSON_TOKEN_STRING) { |
301 | key = std::move(token.str); |
302 | } else if (token.ty == L_JSON_TOKEN_CLOSE_BRACE) { |
303 | // The object has no field. |
304 | break; |
305 | } else { |
306 | throw JsonException("unexpected object field key type" ); |
307 | } |
308 | } else { |
309 | throw JsonException("unexpected end of object" ); |
310 | } |
311 | // Match the colon. |
312 | if (!tokenizer.next_token(token)) { |
313 | throw JsonException("unexpected end of object" ); |
314 | } |
315 | if (token.ty != L_JSON_TOKEN_COLON) { |
316 | throw JsonException("unexpected token in object" ); |
317 | } |
318 | // Match the value. |
319 | if (!try_parse_impl(tokenizer, val)) { |
320 | throw JsonException("unexpected end of object" ); |
321 | } |
322 | out.obj.inner[key] = std::move(val); |
323 | // Should we head for another round? |
324 | if (tokenizer.next_token(token)) { |
325 | if (token.ty == L_JSON_TOKEN_COMMA) { |
326 | continue; |
327 | } else if (token.ty == L_JSON_TOKEN_CLOSE_BRACE) { |
328 | break; |
329 | } else { |
330 | throw JsonException("unexpected token in object" ); |
331 | } |
332 | } else { |
333 | throw JsonException("unexpected end of object" ); |
334 | } |
335 | } |
336 | return true; |
337 | case L_JSON_TOKEN_CLOSE_BRACE: |
338 | case L_JSON_TOKEN_CLOSE_BRACKET: |
339 | return false; |
340 | default: |
341 | throw JsonException("unexpected token" ); |
342 | } |
343 | } |
344 | throw JsonException("unexpected program state" ); |
345 | } |
346 | |
347 | JsonValue parse(const char *beg, const char *end) { |
348 | if (beg == nullptr || end == nullptr || beg >= end) { |
349 | throw JsonException("json text is empty" ); |
350 | } |
351 | JsonValue rv; |
352 | Tokenizer tokenizer(beg, end); |
353 | if (!try_parse_impl(tokenizer, rv)) { |
354 | throw JsonException("unexpected close token" ); |
355 | } |
356 | return rv; |
357 | } |
358 | JsonValue parse(const std::string &json_lit) { |
359 | return parse(json_lit.c_str(), json_lit.c_str() + json_lit.size()); |
360 | } |
361 | bool try_parse(const std::string &json_lit, JsonValue &out) { |
362 | try { |
363 | out = parse(json_lit); |
364 | } catch (JsonException e) { |
365 | // log::error("failed to parse json: ", e.what()); |
366 | return true; |
367 | } |
368 | return false; |
369 | } |
370 | |
371 | void print_impl(const JsonValue &json, std::stringstream &out) { |
372 | switch (json.ty) { |
373 | case L_JSON_NULL: |
374 | out << "null" ; |
375 | return; |
376 | case L_JSON_BOOLEAN: |
377 | out << (json.b ? "true" : "false" ); |
378 | return; |
379 | case L_JSON_FLOAT: |
380 | out << json.num_float; |
381 | return; |
382 | case L_JSON_INT: |
383 | out << json.num_int; |
384 | return; |
385 | case L_JSON_STRING: |
386 | out << "\"" << json.str << "\"" ; |
387 | return; |
388 | case L_JSON_OBJECT: |
389 | out << "{" ; |
390 | { |
391 | bool is_first_iter = true; |
392 | for (const auto &pair : json.obj.inner) { |
393 | if (is_first_iter) { |
394 | is_first_iter = false; |
395 | } else { |
396 | out << "," ; |
397 | } |
398 | out << "\"" << pair.first << "\":" ; |
399 | print_impl(pair.second, out); |
400 | } |
401 | } |
402 | out << "}" ; |
403 | return; |
404 | case L_JSON_ARRAY: |
405 | out << "[" ; |
406 | { |
407 | bool is_first_iter = true; |
408 | for (const auto &elem : json.arr.inner) { |
409 | if (is_first_iter) { |
410 | is_first_iter = false; |
411 | } else { |
412 | out << "," ; |
413 | } |
414 | print_impl(elem, out); |
415 | } |
416 | } |
417 | out << "]" ; |
418 | return; |
419 | } |
420 | } |
421 | std::string print(const JsonValue &json) { |
422 | std::stringstream ss; |
423 | print_impl(json, ss); |
424 | return ss.str(); |
425 | } |
426 | |
427 | } // namespace json |
428 | } // namespace liong |
429 | |