1
2#include <string.h>
3#include "upb/upb.h"
4#include "upb/decode.h"
5
6#include "upb/port_def.inc"
7
8/* Maps descriptor type -> upb field type. */
9const uint8_t upb_desctype_to_fieldtype[] = {
10 UPB_WIRE_TYPE_END_GROUP, /* ENDGROUP */
11 UPB_TYPE_DOUBLE, /* DOUBLE */
12 UPB_TYPE_FLOAT, /* FLOAT */
13 UPB_TYPE_INT64, /* INT64 */
14 UPB_TYPE_UINT64, /* UINT64 */
15 UPB_TYPE_INT32, /* INT32 */
16 UPB_TYPE_UINT64, /* FIXED64 */
17 UPB_TYPE_UINT32, /* FIXED32 */
18 UPB_TYPE_BOOL, /* BOOL */
19 UPB_TYPE_STRING, /* STRING */
20 UPB_TYPE_MESSAGE, /* GROUP */
21 UPB_TYPE_MESSAGE, /* MESSAGE */
22 UPB_TYPE_BYTES, /* BYTES */
23 UPB_TYPE_UINT32, /* UINT32 */
24 UPB_TYPE_ENUM, /* ENUM */
25 UPB_TYPE_INT32, /* SFIXED32 */
26 UPB_TYPE_INT64, /* SFIXED64 */
27 UPB_TYPE_INT32, /* SINT32 */
28 UPB_TYPE_INT64, /* SINT64 */
29};
30
31/* Data pertaining to the parse. */
32typedef struct {
33 const char *ptr; /* Current parsing position. */
34 const char *field_start; /* Start of this field. */
35 const char *limit; /* End of delimited region or end of buffer. */
36 upb_arena *arena;
37 int depth;
38 uint32_t end_group; /* Set to field number of END_GROUP tag, if any. */
39} upb_decstate;
40
41/* Data passed by value to each parsing function. */
42typedef struct {
43 char *msg;
44 const upb_msglayout *layout;
45 upb_decstate *state;
46} upb_decframe;
47
48#define CHK(x) if (!(x)) { return 0; }
49
50static bool upb_skip_unknowngroup(upb_decstate *d, int field_number);
51static bool upb_decode_message(upb_decstate *d, char *msg,
52 const upb_msglayout *l);
53
54static bool upb_decode_varint(const char **ptr, const char *limit,
55 uint64_t *val) {
56 uint8_t byte;
57 int bitpos = 0;
58 const char *p = *ptr;
59 *val = 0;
60
61 do {
62 CHK(bitpos < 70 && p < limit);
63 byte = *p;
64 *val |= (uint64_t)(byte & 0x7F) << bitpos;
65 p++;
66 bitpos += 7;
67 } while (byte & 0x80);
68
69 *ptr = p;
70 return true;
71}
72
73static bool upb_decode_varint32(const char **ptr, const char *limit,
74 uint32_t *val) {
75 uint64_t u64;
76 CHK(upb_decode_varint(ptr, limit, &u64) && u64 <= UINT32_MAX);
77 *val = (uint32_t)u64;
78 return true;
79}
80
81static bool upb_decode_64bit(const char **ptr, const char *limit,
82 uint64_t *val) {
83 CHK(limit - *ptr >= 8);
84 memcpy(val, *ptr, 8);
85 *ptr += 8;
86 return true;
87}
88
89static bool upb_decode_32bit(const char **ptr, const char *limit,
90 uint32_t *val) {
91 CHK(limit - *ptr >= 4);
92 memcpy(val, *ptr, 4);
93 *ptr += 4;
94 return true;
95}
96
97static int32_t upb_zzdecode_32(uint32_t n) {
98 return (n >> 1) ^ -(int32_t)(n & 1);
99}
100
101static int64_t upb_zzdecode_64(uint64_t n) {
102 return (n >> 1) ^ -(int64_t)(n & 1);
103}
104
105static bool upb_decode_string(const char **ptr, const char *limit,
106 int *outlen) {
107 uint32_t len;
108
109 CHK(upb_decode_varint32(ptr, limit, &len) &&
110 len < INT32_MAX &&
111 limit - *ptr >= (int32_t)len);
112
113 *outlen = len;
114 return true;
115}
116
117static void upb_set32(void *msg, size_t ofs, uint32_t val) {
118 memcpy((char*)msg + ofs, &val, sizeof(val));
119}
120
121static bool upb_append_unknown(upb_decstate *d, upb_decframe *frame) {
122 upb_msg_addunknown(frame->msg, d->field_start, d->ptr - d->field_start,
123 d->arena);
124 return true;
125}
126
127
128static bool upb_skip_unknownfielddata(upb_decstate *d, uint32_t tag,
129 uint32_t group_fieldnum) {
130 switch (tag & 7) {
131 case UPB_WIRE_TYPE_VARINT: {
132 uint64_t val;
133 return upb_decode_varint(&d->ptr, d->limit, &val);
134 }
135 case UPB_WIRE_TYPE_32BIT: {
136 uint32_t val;
137 return upb_decode_32bit(&d->ptr, d->limit, &val);
138 }
139 case UPB_WIRE_TYPE_64BIT: {
140 uint64_t val;
141 return upb_decode_64bit(&d->ptr, d->limit, &val);
142 }
143 case UPB_WIRE_TYPE_DELIMITED: {
144 int len;
145 CHK(upb_decode_string(&d->ptr, d->limit, &len));
146 d->ptr += len;
147 return true;
148 }
149 case UPB_WIRE_TYPE_START_GROUP:
150 return upb_skip_unknowngroup(d, tag >> 3);
151 case UPB_WIRE_TYPE_END_GROUP:
152 return (tag >> 3) == group_fieldnum;
153 }
154 return false;
155}
156
157static bool upb_skip_unknowngroup(upb_decstate *d, int field_number) {
158 while (d->ptr < d->limit && d->end_group == 0) {
159 uint32_t tag = 0;
160 CHK(upb_decode_varint32(&d->ptr, d->limit, &tag));
161 CHK(upb_skip_unknownfielddata(d, tag, field_number));
162 }
163
164 CHK(d->end_group == field_number);
165 d->end_group = 0;
166 return true;
167}
168
169static bool upb_array_grow(upb_array *arr, size_t elements, size_t elem_size,
170 upb_arena *arena) {
171 size_t needed = arr->len + elements;
172 size_t new_size = UPB_MAX(arr->size, 8);
173 size_t new_bytes;
174 size_t old_bytes;
175 void *new_data;
176 upb_alloc *alloc = upb_arena_alloc(arena);
177
178 while (new_size < needed) {
179 new_size *= 2;
180 }
181
182 old_bytes = arr->len * elem_size;
183 new_bytes = new_size * elem_size;
184 new_data = upb_realloc(alloc, arr->data, old_bytes, new_bytes);
185 CHK(new_data);
186
187 arr->data = new_data;
188 arr->size = new_size;
189 return true;
190}
191
192static void *upb_array_reserve(upb_array *arr, size_t elements,
193 size_t elem_size, upb_arena *arena) {
194 if (arr->size - arr->len < elements) {
195 CHK(upb_array_grow(arr, elements, elem_size, arena));
196 }
197 return (char*)arr->data + (arr->len * elem_size);
198}
199
200bool upb_array_add(upb_array *arr, size_t elements, size_t elem_size,
201 const void *data, upb_arena *arena) {
202 void *dest = upb_array_reserve(arr, elements, elem_size, arena);
203
204 CHK(dest);
205 arr->len += elements;
206 memcpy(dest, data, elements * elem_size);
207
208 return true;
209}
210
211static upb_array *upb_getarr(upb_decframe *frame,
212 const upb_msglayout_field *field) {
213 UPB_ASSERT(field->label == UPB_LABEL_REPEATED);
214 return *(upb_array**)&frame->msg[field->offset];
215}
216
217static upb_array *upb_getorcreatearr(upb_decframe *frame,
218 const upb_msglayout_field *field) {
219 upb_array *arr = upb_getarr(frame, field);
220
221 if (!arr) {
222 arr = upb_array_new(frame->state->arena);
223 CHK(arr);
224 *(upb_array**)&frame->msg[field->offset] = arr;
225 }
226
227 return arr;
228}
229
230static upb_msg *upb_getorcreatemsg(upb_decframe *frame,
231 const upb_msglayout_field *field,
232 const upb_msglayout **subm) {
233 upb_msg **submsg = (void*)(frame->msg + field->offset);
234 *subm = frame->layout->submsgs[field->submsg_index];
235
236 UPB_ASSERT(field->label != UPB_LABEL_REPEATED);
237
238 if (!*submsg) {
239 *submsg = upb_msg_new(*subm, frame->state->arena);
240 CHK(*submsg);
241 }
242
243 return *submsg;
244}
245
246static upb_msg *upb_addmsg(upb_decframe *frame,
247 const upb_msglayout_field *field,
248 const upb_msglayout **subm) {
249 upb_msg *submsg;
250 upb_array *arr = upb_getorcreatearr(frame, field);
251
252 *subm = frame->layout->submsgs[field->submsg_index];
253 submsg = upb_msg_new(*subm, frame->state->arena);
254 CHK(submsg);
255 upb_array_add(arr, 1, sizeof(submsg), &submsg, frame->state->arena);
256
257 return submsg;
258}
259
260static void upb_sethasbit(upb_decframe *frame,
261 const upb_msglayout_field *field) {
262 int32_t hasbit = field->presence;
263 UPB_ASSERT(field->presence > 0);
264 frame->msg[hasbit / 8] |= (1 << (hasbit % 8));
265}
266
267static void upb_setoneofcase(upb_decframe *frame,
268 const upb_msglayout_field *field) {
269 UPB_ASSERT(field->presence < 0);
270 upb_set32(frame->msg, ~field->presence, field->number);
271}
272
273static bool upb_decode_addval(upb_decframe *frame,
274 const upb_msglayout_field *field, void *val,
275 size_t size) {
276 char *field_mem = frame->msg + field->offset;
277 upb_array *arr;
278
279 if (field->label == UPB_LABEL_REPEATED) {
280 arr = upb_getorcreatearr(frame, field);
281 CHK(arr);
282 field_mem = upb_array_reserve(arr, 1, size, frame->state->arena);
283 CHK(field_mem);
284 }
285
286 memcpy(field_mem, val, size);
287 return true;
288}
289
290static void upb_decode_setpresent(upb_decframe *frame,
291 const upb_msglayout_field *field) {
292 if (field->label == UPB_LABEL_REPEATED) {
293 upb_array *arr = upb_getarr(frame, field);
294 UPB_ASSERT(arr->len < arr->size);
295 arr->len++;
296 } else if (field->presence < 0) {
297 upb_setoneofcase(frame, field);
298 } else if (field->presence > 0) {
299 upb_sethasbit(frame, field);
300 }
301}
302
303static bool upb_decode_msgfield(upb_decstate *d, upb_msg *msg,
304 const upb_msglayout *layout, int limit) {
305 const char* saved_limit = d->limit;
306 d->limit = d->ptr + limit;
307 CHK(--d->depth >= 0);
308 upb_decode_message(d, msg, layout);
309 d->depth++;
310 d->limit = saved_limit;
311 CHK(d->end_group == 0);
312 return true;
313}
314
315static bool upb_decode_groupfield(upb_decstate *d, upb_msg *msg,
316 const upb_msglayout *layout,
317 int field_number) {
318 CHK(--d->depth >= 0);
319 upb_decode_message(d, msg, layout);
320 d->depth++;
321 CHK(d->end_group == field_number);
322 d->end_group = 0;
323 return true;
324}
325
326static bool upb_decode_varintfield(upb_decstate *d, upb_decframe *frame,
327 const upb_msglayout_field *field) {
328 uint64_t val;
329 CHK(upb_decode_varint(&d->ptr, d->limit, &val));
330
331 switch (field->descriptortype) {
332 case UPB_DESCRIPTOR_TYPE_INT64:
333 case UPB_DESCRIPTOR_TYPE_UINT64:
334 CHK(upb_decode_addval(frame, field, &val, sizeof(val)));
335 break;
336 case UPB_DESCRIPTOR_TYPE_INT32:
337 case UPB_DESCRIPTOR_TYPE_UINT32:
338 case UPB_DESCRIPTOR_TYPE_ENUM: {
339 uint32_t val32 = (uint32_t)val;
340 CHK(upb_decode_addval(frame, field, &val32, sizeof(val32)));
341 break;
342 }
343 case UPB_DESCRIPTOR_TYPE_BOOL: {
344 bool valbool = val != 0;
345 CHK(upb_decode_addval(frame, field, &valbool, sizeof(valbool)));
346 break;
347 }
348 case UPB_DESCRIPTOR_TYPE_SINT32: {
349 int32_t decoded = upb_zzdecode_32((uint32_t)val);
350 CHK(upb_decode_addval(frame, field, &decoded, sizeof(decoded)));
351 break;
352 }
353 case UPB_DESCRIPTOR_TYPE_SINT64: {
354 int64_t decoded = upb_zzdecode_64(val);
355 CHK(upb_decode_addval(frame, field, &decoded, sizeof(decoded)));
356 break;
357 }
358 default:
359 return upb_append_unknown(d, frame);
360 }
361
362 upb_decode_setpresent(frame, field);
363 return true;
364}
365
366static bool upb_decode_64bitfield(upb_decstate *d, upb_decframe *frame,
367 const upb_msglayout_field *field) {
368 uint64_t val;
369 CHK(upb_decode_64bit(&d->ptr, d->limit, &val));
370
371 switch (field->descriptortype) {
372 case UPB_DESCRIPTOR_TYPE_DOUBLE:
373 case UPB_DESCRIPTOR_TYPE_FIXED64:
374 case UPB_DESCRIPTOR_TYPE_SFIXED64:
375 CHK(upb_decode_addval(frame, field, &val, sizeof(val)));
376 break;
377 default:
378 return upb_append_unknown(d, frame);
379 }
380
381 upb_decode_setpresent(frame, field);
382 return true;
383}
384
385static bool upb_decode_32bitfield(upb_decstate *d, upb_decframe *frame,
386 const upb_msglayout_field *field) {
387 uint32_t val;
388 CHK(upb_decode_32bit(&d->ptr, d->limit, &val));
389
390 switch (field->descriptortype) {
391 case UPB_DESCRIPTOR_TYPE_FLOAT:
392 case UPB_DESCRIPTOR_TYPE_FIXED32:
393 case UPB_DESCRIPTOR_TYPE_SFIXED32:
394 CHK(upb_decode_addval(frame, field, &val, sizeof(val)));
395 break;
396 default:
397 return upb_append_unknown(d, frame);
398 }
399
400 upb_decode_setpresent(frame, field);
401 return true;
402}
403
404static bool upb_decode_fixedpacked(upb_decstate *d, upb_array *arr,
405 uint32_t len, int elem_size) {
406 size_t elements = len / elem_size;
407
408 CHK((size_t)(elements * elem_size) == len);
409 CHK(upb_array_add(arr, elements, elem_size, d->ptr, d->arena));
410 d->ptr += len;
411
412 return true;
413}
414
415static upb_strview upb_decode_strfield(upb_decstate *d, uint32_t len) {
416 upb_strview ret;
417 ret.data = d->ptr;
418 ret.size = len;
419 d->ptr += len;
420 return ret;
421}
422
423static bool upb_decode_toarray(upb_decstate *d, upb_decframe *frame,
424 const upb_msglayout_field *field, int len) {
425 upb_array *arr = upb_getorcreatearr(frame, field);
426 CHK(arr);
427
428#define VARINT_CASE(ctype, decode) \
429 VARINT_CASE_EX(ctype, decode, decode)
430
431#define VARINT_CASE_EX(ctype, decode, dtype) \
432 { \
433 const char *ptr = d->ptr; \
434 const char *limit = ptr + len; \
435 while (ptr < limit) { \
436 uint64_t val; \
437 ctype decoded; \
438 CHK(upb_decode_varint(&ptr, limit, &val)); \
439 decoded = (decode)((dtype)val); \
440 CHK(upb_array_add(arr, 1, sizeof(decoded), &decoded, d->arena)); \
441 } \
442 d->ptr = ptr; \
443 return true; \
444 }
445
446 switch (field->descriptortype) {
447 case UPB_DESCRIPTOR_TYPE_STRING:
448 case UPB_DESCRIPTOR_TYPE_BYTES: {
449 upb_strview str = upb_decode_strfield(d, len);
450 return upb_array_add(arr, 1, sizeof(str), &str, d->arena);
451 }
452 case UPB_DESCRIPTOR_TYPE_FLOAT:
453 case UPB_DESCRIPTOR_TYPE_FIXED32:
454 case UPB_DESCRIPTOR_TYPE_SFIXED32:
455 return upb_decode_fixedpacked(d, arr, len, sizeof(int32_t));
456 case UPB_DESCRIPTOR_TYPE_DOUBLE:
457 case UPB_DESCRIPTOR_TYPE_FIXED64:
458 case UPB_DESCRIPTOR_TYPE_SFIXED64:
459 return upb_decode_fixedpacked(d, arr, len, sizeof(int64_t));
460 case UPB_DESCRIPTOR_TYPE_INT32:
461 case UPB_DESCRIPTOR_TYPE_UINT32:
462 case UPB_DESCRIPTOR_TYPE_ENUM:
463 VARINT_CASE(uint32_t, uint32_t);
464 case UPB_DESCRIPTOR_TYPE_INT64:
465 case UPB_DESCRIPTOR_TYPE_UINT64:
466 VARINT_CASE(uint64_t, uint64_t);
467 case UPB_DESCRIPTOR_TYPE_BOOL:
468 VARINT_CASE(bool, bool);
469 case UPB_DESCRIPTOR_TYPE_SINT32:
470 VARINT_CASE_EX(int32_t, upb_zzdecode_32, uint32_t);
471 case UPB_DESCRIPTOR_TYPE_SINT64:
472 VARINT_CASE_EX(int64_t, upb_zzdecode_64, uint64_t);
473 case UPB_DESCRIPTOR_TYPE_MESSAGE: {
474 const upb_msglayout *subm;
475 upb_msg *submsg = upb_addmsg(frame, field, &subm);
476 CHK(submsg);
477 return upb_decode_msgfield(d, submsg, subm, len);
478 }
479 case UPB_DESCRIPTOR_TYPE_GROUP:
480 return upb_append_unknown(d, frame);
481 }
482#undef VARINT_CASE
483 UPB_UNREACHABLE();
484}
485
486static bool upb_decode_delimitedfield(upb_decstate *d, upb_decframe *frame,
487 const upb_msglayout_field *field) {
488 int len;
489
490 CHK(upb_decode_string(&d->ptr, d->limit, &len));
491
492 if (field->label == UPB_LABEL_REPEATED) {
493 return upb_decode_toarray(d, frame, field, len);
494 } else {
495 switch (field->descriptortype) {
496 case UPB_DESCRIPTOR_TYPE_STRING:
497 case UPB_DESCRIPTOR_TYPE_BYTES: {
498 upb_strview str = upb_decode_strfield(d, len);
499 CHK(upb_decode_addval(frame, field, &str, sizeof(str)));
500 break;
501 }
502 case UPB_DESCRIPTOR_TYPE_MESSAGE: {
503 const upb_msglayout *subm;
504 upb_msg *submsg = upb_getorcreatemsg(frame, field, &subm);
505 CHK(submsg);
506 CHK(upb_decode_msgfield(d, submsg, subm, len));
507 break;
508 }
509 default:
510 /* TODO(haberman): should we accept the last element of a packed? */
511 d->ptr += len;
512 return upb_append_unknown(d, frame);
513 }
514 upb_decode_setpresent(frame, field);
515 return true;
516 }
517}
518
519static const upb_msglayout_field *upb_find_field(const upb_msglayout *l,
520 uint32_t field_number) {
521 /* Lots of optimization opportunities here. */
522 int i;
523 for (i = 0; i < l->field_count; i++) {
524 if (l->fields[i].number == field_number) {
525 return &l->fields[i];
526 }
527 }
528
529 return NULL; /* Unknown field. */
530}
531
532static bool upb_decode_field(upb_decstate *d, upb_decframe *frame) {
533 uint32_t tag;
534 const upb_msglayout_field *field;
535 int field_number;
536
537 d->field_start = d->ptr;
538 CHK(upb_decode_varint32(&d->ptr, d->limit, &tag));
539 field_number = tag >> 3;
540 field = upb_find_field(frame->layout, field_number);
541
542 if (field) {
543 switch (tag & 7) {
544 case UPB_WIRE_TYPE_VARINT:
545 return upb_decode_varintfield(d, frame, field);
546 case UPB_WIRE_TYPE_32BIT:
547 return upb_decode_32bitfield(d, frame, field);
548 case UPB_WIRE_TYPE_64BIT:
549 return upb_decode_64bitfield(d, frame, field);
550 case UPB_WIRE_TYPE_DELIMITED:
551 return upb_decode_delimitedfield(d, frame, field);
552 case UPB_WIRE_TYPE_START_GROUP: {
553 const upb_msglayout *layout;
554 upb_msg *group;
555
556 if (field->label == UPB_LABEL_REPEATED) {
557 group = upb_addmsg(frame, field, &layout);
558 } else {
559 group = upb_getorcreatemsg(frame, field, &layout);
560 }
561
562 return upb_decode_groupfield(d, group, layout, field_number);
563 }
564 case UPB_WIRE_TYPE_END_GROUP:
565 d->end_group = field_number;
566 return true;
567 default:
568 CHK(false);
569 }
570 } else {
571 CHK(field_number != 0);
572 CHK(upb_skip_unknownfielddata(d, tag, -1));
573 CHK(upb_append_unknown(d, frame));
574 return true;
575 }
576}
577
578static bool upb_decode_message(upb_decstate *d, char *msg, const upb_msglayout *l) {
579 upb_decframe frame;
580 frame.msg = msg;
581 frame.layout = l;
582 frame.state = d;
583
584 while (d->ptr < d->limit) {
585 CHK(upb_decode_field(d, &frame));
586 }
587
588 return true;
589}
590
591bool upb_decode(const char *buf, size_t size, void *msg, const upb_msglayout *l,
592 upb_arena *arena) {
593 upb_decstate state;
594 state.ptr = buf;
595 state.limit = buf + size;
596 state.arena = arena;
597 state.depth = 64;
598 state.end_group = 0;
599
600 CHK(upb_decode_message(&state, msg, l));
601 return state.end_group == 0;
602}
603
604#undef CHK
605