1// Protocol Buffers - Google's data interchange format
2// Copyright 2008 Google Inc. All rights reserved.
3// https://developers.google.com/protocol-buffers/
4//
5// Redistribution and use in source and binary forms, with or without
6// modification, are permitted provided that the following conditions are
7// met:
8//
9// * Redistributions of source code must retain the above copyright
10// notice, this list of conditions and the following disclaimer.
11// * Redistributions in binary form must reproduce the above
12// copyright notice, this list of conditions and the following disclaimer
13// in the documentation and/or other materials provided with the
14// distribution.
15// * Neither the name of Google Inc. nor the names of its
16// contributors may be used to endorse or promote products derived from
17// this software without specific prior written permission.
18//
19// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31#ifndef GOOGLE_PROTOBUF_PARSE_CONTEXT_H__
32#define GOOGLE_PROTOBUF_PARSE_CONTEXT_H__
33
34#include <cstdint>
35#include <cstring>
36#include <string>
37
38#include <google/protobuf/io/coded_stream.h>
39#include <google/protobuf/io/zero_copy_stream.h>
40#include <google/protobuf/arena.h>
41#include <google/protobuf/arenastring.h>
42#include <google/protobuf/implicit_weak_message.h>
43#include <google/protobuf/metadata_lite.h>
44#include <google/protobuf/port.h>
45#include <google/protobuf/repeated_field.h>
46#include <google/protobuf/wire_format_lite.h>
47#include <google/protobuf/stubs/strutil.h>
48
49#include <google/protobuf/port_def.inc>
50
51
52namespace google {
53namespace protobuf {
54
55class UnknownFieldSet;
56class DescriptorPool;
57class MessageFactory;
58
59namespace internal {
60
61// Template code below needs to know about the existence of these functions.
62PROTOBUF_EXPORT void WriteVarint(uint32 num, uint64 val, std::string* s);
63PROTOBUF_EXPORT void WriteLengthDelimited(uint32 num, StringPiece val,
64 std::string* s);
65// Inline because it is just forwarding to s->WriteVarint
66inline void WriteVarint(uint32 num, uint64 val, UnknownFieldSet* s);
67inline void WriteLengthDelimited(uint32 num, StringPiece val,
68 UnknownFieldSet* s);
69
70
71// The basic abstraction the parser is designed for is a slight modification
72// of the ZeroCopyInputStream (ZCIS) abstraction. A ZCIS presents a serialized
73// stream as a series of buffers that concatenate to the full stream.
74// Pictorially a ZCIS presents a stream in chunks like so
75// [---------------------------------------------------------------]
76// [---------------------] chunk 1
77// [----------------------------] chunk 2
78// chunk 3 [--------------]
79//
80// Where the '-' represent the bytes which are vertically lined up with the
81// bytes of the stream. The proto parser requires its input to be presented
82// similarly with the extra
83// property that each chunk has kSlopBytes past its end that overlaps with the
84// first kSlopBytes of the next chunk, or if there is no next chunk at least its
85// still valid to read those bytes. Again, pictorially, we now have
86//
87// [---------------------------------------------------------------]
88// [-------------------....] chunk 1
89// [------------------------....] chunk 2
90// chunk 3 [------------------..**]
91// chunk 4 [--****]
92// Here '-' mean the bytes of the stream or chunk and '.' means bytes past the
93// chunk that match up with the start of the next chunk. Above each chunk has
94// 4 '.' after the chunk. In the case these 'overflow' bytes represents bytes
95// past the stream, indicated by '*' above, their values are unspecified. It is
96// still legal to read them (ie. should not segfault). Reading past the
97// end should be detected by the user and indicated as an error.
98//
99// The reason for this, admittedly, unconventional invariant is to ruthlessly
100// optimize the protobuf parser. Having an overlap helps in two important ways.
101// Firstly it alleviates having to performing bounds checks if a piece of code
102// is guaranteed to not read more than kSlopBytes. Secondly, and more
103// importantly, the protobuf wireformat is such that reading a key/value pair is
104// always less than 16 bytes. This removes the need to change to next buffer in
105// the middle of reading primitive values. Hence there is no need to store and
106// load the current position.
107
108class PROTOBUF_EXPORT EpsCopyInputStream {
109 public:
110 enum { kSlopBytes = 16, kMaxCordBytesToCopy = 512 };
111
112 explicit EpsCopyInputStream(bool enable_aliasing)
113 : aliasing_(enable_aliasing ? kOnPatch : kNoAliasing) {}
114
115 void BackUp(const char* ptr) {
116 GOOGLE_DCHECK(ptr <= buffer_end_ + kSlopBytes);
117 int count;
118 if (next_chunk_ == buffer_) {
119 count = static_cast<int>(buffer_end_ + kSlopBytes - ptr);
120 } else {
121 count = size_ + static_cast<int>(buffer_end_ - ptr);
122 }
123 if (count > 0) StreamBackUp(count);
124 }
125
126 // If return value is negative it's an error
127 PROTOBUF_MUST_USE_RESULT int PushLimit(const char* ptr, int limit) {
128 GOOGLE_DCHECK(limit >= 0 && limit <= INT_MAX - kSlopBytes);
129 // This add is safe due to the invariant above, because
130 // ptr - buffer_end_ <= kSlopBytes.
131 limit += static_cast<int>(ptr - buffer_end_);
132 limit_end_ = buffer_end_ + (std::min)(0, limit);
133 auto old_limit = limit_;
134 limit_ = limit;
135 return old_limit - limit;
136 }
137
138 PROTOBUF_MUST_USE_RESULT bool PopLimit(int delta) {
139 if (PROTOBUF_PREDICT_FALSE(!EndedAtLimit())) return false;
140 limit_ = limit_ + delta;
141 // TODO(gerbens) We could remove this line and hoist the code to
142 // DoneFallback. Study the perf/bin-size effects.
143 limit_end_ = buffer_end_ + (std::min)(0, limit_);
144 return true;
145 }
146
147 PROTOBUF_MUST_USE_RESULT const char* Skip(const char* ptr, int size) {
148 if (size <= buffer_end_ + kSlopBytes - ptr) {
149 return ptr + size;
150 }
151 return SkipFallback(ptr, size);
152 }
153 PROTOBUF_MUST_USE_RESULT const char* ReadString(const char* ptr, int size,
154 std::string* s) {
155 if (size <= buffer_end_ + kSlopBytes - ptr) {
156 s->assign(ptr, size);
157 return ptr + size;
158 }
159 return ReadStringFallback(ptr, size, s);
160 }
161 PROTOBUF_MUST_USE_RESULT const char* AppendString(const char* ptr, int size,
162 std::string* s) {
163 if (size <= buffer_end_ + kSlopBytes - ptr) {
164 s->append(ptr, size);
165 return ptr + size;
166 }
167 return AppendStringFallback(ptr, size, s);
168 }
169
170 template <typename Tag, typename T>
171 PROTOBUF_MUST_USE_RESULT const char* ReadRepeatedFixed(const char* ptr,
172 Tag expected_tag,
173 RepeatedField<T>* out);
174
175 template <typename T>
176 PROTOBUF_MUST_USE_RESULT const char* ReadPackedFixed(const char* ptr,
177 int size,
178 RepeatedField<T>* out);
179 template <typename Add>
180 PROTOBUF_MUST_USE_RESULT const char* ReadPackedVarint(const char* ptr,
181 Add add);
182
183 uint32 LastTag() const { return last_tag_minus_1_ + 1; }
184 bool ConsumeEndGroup(uint32 start_tag) {
185 bool res = last_tag_minus_1_ == start_tag;
186 last_tag_minus_1_ = 0;
187 return res;
188 }
189 bool EndedAtLimit() const { return last_tag_minus_1_ == 0; }
190 bool EndedAtEndOfStream() const { return last_tag_minus_1_ == 1; }
191 void SetLastTag(uint32 tag) { last_tag_minus_1_ = tag - 1; }
192 void SetEndOfStream() { last_tag_minus_1_ = 1; }
193 bool IsExceedingLimit(const char* ptr) {
194 return ptr > limit_end_ &&
195 (next_chunk_ == nullptr || ptr - buffer_end_ > limit_);
196 }
197 int BytesUntilLimit(const char* ptr) const {
198 return limit_ + static_cast<int>(buffer_end_ - ptr);
199 }
200 // Returns true if more data is available, if false is returned one has to
201 // call Done for further checks.
202 bool DataAvailable(const char* ptr) { return ptr < limit_end_; }
203
204 protected:
205 // Returns true is limit (either an explicit limit or end of stream) is
206 // reached. It aligns *ptr across buffer seams.
207 // If limit is exceeded it returns true and ptr is set to null.
208 bool DoneWithCheck(const char** ptr, int d) {
209 GOOGLE_DCHECK(*ptr);
210 if (PROTOBUF_PREDICT_TRUE(*ptr < limit_end_)) return false;
211 // No need to fetch buffer if we ended on a limit in the slop region
212 if ((*ptr - buffer_end_) == limit_) return true;
213 auto res = DoneFallback(*ptr, d);
214 *ptr = res.first;
215 return res.second;
216 }
217
218 const char* InitFrom(StringPiece flat) {
219 overall_limit_ = 0;
220 if (flat.size() > kSlopBytes) {
221 limit_ = kSlopBytes;
222 limit_end_ = buffer_end_ = flat.data() + flat.size() - kSlopBytes;
223 next_chunk_ = buffer_;
224 if (aliasing_ == kOnPatch) aliasing_ = kNoDelta;
225 return flat.data();
226 } else {
227 std::memcpy(buffer_, flat.data(), flat.size());
228 limit_ = 0;
229 limit_end_ = buffer_end_ = buffer_ + flat.size();
230 next_chunk_ = nullptr;
231 if (aliasing_ == kOnPatch) {
232 aliasing_ = reinterpret_cast<std::uintptr_t>(flat.data()) -
233 reinterpret_cast<std::uintptr_t>(buffer_);
234 }
235 return buffer_;
236 }
237 }
238
239 const char* InitFrom(io::ZeroCopyInputStream* zcis);
240
241 const char* InitFrom(io::ZeroCopyInputStream* zcis, int limit) {
242 if (limit == -1) return InitFrom(zcis);
243 overall_limit_ = limit;
244 auto res = InitFrom(zcis);
245 limit_ = limit - static_cast<int>(buffer_end_ - res);
246 limit_end_ = buffer_end_ + (std::min)(0, limit_);
247 return res;
248 }
249
250 private:
251 const char* limit_end_; // buffer_end_ + min(limit_, 0)
252 const char* buffer_end_;
253 const char* next_chunk_;
254 int size_;
255 int limit_; // relative to buffer_end_;
256 io::ZeroCopyInputStream* zcis_ = nullptr;
257 char buffer_[2 * kSlopBytes] = {};
258 enum { kNoAliasing = 0, kOnPatch = 1, kNoDelta = 2 };
259 std::uintptr_t aliasing_ = kNoAliasing;
260 // This variable is used to communicate how the parse ended, in order to
261 // completely verify the parsed data. A wire-format parse can end because of
262 // one of the following conditions:
263 // 1) A parse can end on a pushed limit.
264 // 2) A parse can end on End Of Stream (EOS).
265 // 3) A parse can end on 0 tag (only valid for toplevel message).
266 // 4) A parse can end on an end-group tag.
267 // This variable should always be set to 0, which indicates case 1. If the
268 // parse terminated due to EOS (case 2), it's set to 1. In case the parse
269 // ended due to a terminating tag (case 3 and 4) it's set to (tag - 1).
270 // This var doesn't really belong in EpsCopyInputStream and should be part of
271 // the ParseContext, but case 2 is most easily and optimally implemented in
272 // DoneFallback.
273 uint32 last_tag_minus_1_ = 0;
274 int overall_limit_ = INT_MAX; // Overall limit independent of pushed limits.
275 // Pretty random large number that seems like a safe allocation on most
276 // systems. TODO(gerbens) do we need to set this as build flag?
277 enum { kSafeStringSize = 50000000 };
278
279 std::pair<const char*, bool> DoneFallback(const char* ptr, int d);
280 const char* Next(int overrun, int d);
281 const char* SkipFallback(const char* ptr, int size);
282 const char* AppendStringFallback(const char* ptr, int size, std::string* str);
283 const char* ReadStringFallback(const char* ptr, int size, std::string* str);
284 bool StreamNext(const void** data) {
285 bool res = zcis_->Next(data, &size_);
286 if (res) overall_limit_ -= size_;
287 return res;
288 }
289 void StreamBackUp(int count) {
290 zcis_->BackUp(count);
291 overall_limit_ += count;
292 }
293
294 template <typename A>
295 const char* AppendSize(const char* ptr, int size, const A& append) {
296 int chunk_size = buffer_end_ + kSlopBytes - ptr;
297 do {
298 GOOGLE_DCHECK(size > chunk_size);
299 append(ptr, chunk_size);
300 ptr += chunk_size;
301 size -= chunk_size;
302 // DoneFallBack asserts it isn't called when exactly on the limit. If this
303 // happens we fail the parse, as we are at the limit and still more bytes
304 // to read.
305 if (limit_ == kSlopBytes) return nullptr;
306 auto res = DoneFallback(ptr, -1);
307 if (res.second) return nullptr; // If done we passed the limit
308 ptr = res.first;
309 chunk_size = buffer_end_ + kSlopBytes - ptr;
310 } while (size > chunk_size);
311 append(ptr, size);
312 return ptr + size;
313 }
314
315 // AppendUntilEnd appends data until a limit (either a PushLimit or end of
316 // stream. Normal payloads are from length delimited fields which have an
317 // explicit size. Reading until limit only comes when the string takes
318 // the place of a protobuf, ie RawMessage/StringRawMessage, lazy fields and
319 // implicit weak messages. We keep these methods private and friend them.
320 template <typename A>
321 const char* AppendUntilEnd(const char* ptr, const A& append) {
322 while (!DoneWithCheck(&ptr, -1)) {
323 append(ptr, limit_end_ - ptr);
324 ptr = limit_end_;
325 }
326 return ptr;
327 }
328
329 PROTOBUF_MUST_USE_RESULT const char* AppendString(const char* ptr,
330 std::string* str) {
331 return AppendUntilEnd(
332 ptr, [str](const char* p, ptrdiff_t s) { str->append(p, s); });
333 }
334 friend class ImplicitWeakMessage;
335};
336
337// ParseContext holds all data that is global to the entire parse. Most
338// importantly it contains the input stream, but also recursion depth and also
339// stores the end group tag, in case a parser ended on a endgroup, to verify
340// matching start/end group tags.
341class PROTOBUF_EXPORT ParseContext : public EpsCopyInputStream {
342 public:
343 struct Data {
344 const DescriptorPool* pool = nullptr;
345 MessageFactory* factory = nullptr;
346 };
347
348 template <typename... T>
349 ParseContext(int depth, bool aliasing, const char** start, T&&... args)
350 : EpsCopyInputStream(aliasing), depth_(depth) {
351 *start = InitFrom(std::forward<T>(args)...);
352 }
353
354 void TrackCorrectEnding() { group_depth_ = 0; }
355
356 bool Done(const char** ptr) { return DoneWithCheck(ptr, group_depth_); }
357 bool DoneNoSlopCheck(const char** ptr) { return DoneWithCheck(ptr, -1); }
358
359 int depth() const { return depth_; }
360
361 Data& data() { return data_; }
362 const Data& data() const { return data_; }
363
364 template <typename T>
365 PROTOBUF_MUST_USE_RESULT const char* ParseMessage(T* msg, const char* ptr);
366 // We outline when the type is generic and we go through a virtual
367 const char* ParseMessage(MessageLite* msg, const char* ptr);
368 const char* ParseMessage(Message* msg, const char* ptr);
369
370 template <typename T>
371 PROTOBUF_MUST_USE_RESULT PROTOBUF_ALWAYS_INLINE const char* ParseGroup(
372 T* msg, const char* ptr, uint32 tag) {
373 if (--depth_ < 0) return nullptr;
374 group_depth_++;
375 ptr = msg->_InternalParse(ptr, this);
376 group_depth_--;
377 depth_++;
378 if (PROTOBUF_PREDICT_FALSE(!ConsumeEndGroup(tag))) return nullptr;
379 return ptr;
380 }
381
382 private:
383 // The context keeps an internal stack to keep track of the recursive
384 // part of the parse state.
385 // Current depth of the active parser, depth counts down.
386 // This is used to limit recursion depth (to prevent overflow on malicious
387 // data), but is also used to index in stack_ to store the current state.
388 int depth_;
389 // Unfortunately necessary for the fringe case of ending on 0 or end-group tag
390 // in the last kSlopBytes of a ZeroCopyInputStream chunk.
391 int group_depth_ = INT_MIN;
392 Data data_;
393};
394
395template <uint32 tag>
396bool ExpectTag(const char* ptr) {
397 if (tag < 128) {
398 return *ptr == tag;
399 } else {
400 static_assert(tag < 128 * 128, "We only expect tags for 1 or 2 bytes");
401 char buf[2] = {static_cast<char>(tag | 0x80), static_cast<char>(tag >> 7)};
402 return std::memcmp(ptr, buf, 2) == 0;
403 }
404}
405
406template <int>
407struct EndianHelper;
408
409template <>
410struct EndianHelper<1> {
411 static uint8 Load(const void* p) { return *static_cast<const uint8*>(p); }
412};
413
414template <>
415struct EndianHelper<2> {
416 static uint16 Load(const void* p) {
417 uint16 tmp;
418 std::memcpy(&tmp, p, 2);
419#ifndef PROTOBUF_LITTLE_ENDIAN
420 tmp = bswap_16(tmp);
421#endif
422 return tmp;
423 }
424};
425
426template <>
427struct EndianHelper<4> {
428 static uint32 Load(const void* p) {
429 uint32 tmp;
430 std::memcpy(&tmp, p, 4);
431#ifndef PROTOBUF_LITTLE_ENDIAN
432 tmp = bswap_32(tmp);
433#endif
434 return tmp;
435 }
436};
437
438template <>
439struct EndianHelper<8> {
440 static uint64 Load(const void* p) {
441 uint64 tmp;
442 std::memcpy(&tmp, p, 8);
443#ifndef PROTOBUF_LITTLE_ENDIAN
444 tmp = bswap_64(tmp);
445#endif
446 return tmp;
447 }
448};
449
450template <typename T>
451T UnalignedLoad(const char* p) {
452 auto tmp = EndianHelper<sizeof(T)>::Load(p);
453 T res;
454 memcpy(&res, &tmp, sizeof(T));
455 return res;
456}
457
458PROTOBUF_EXPORT
459std::pair<const char*, uint32> VarintParseSlow32(const char* p, uint32 res);
460PROTOBUF_EXPORT
461std::pair<const char*, uint64> VarintParseSlow64(const char* p, uint32 res);
462
463inline const char* VarintParseSlow(const char* p, uint32 res, uint32* out) {
464 auto tmp = VarintParseSlow32(p, res);
465 *out = tmp.second;
466 return tmp.first;
467}
468
469inline const char* VarintParseSlow(const char* p, uint32 res, uint64* out) {
470 auto tmp = VarintParseSlow64(p, res);
471 *out = tmp.second;
472 return tmp.first;
473}
474
475template <typename T>
476PROTOBUF_MUST_USE_RESULT const char* VarintParse(const char* p, T* out) {
477 auto ptr = reinterpret_cast<const uint8*>(p);
478 uint32 res = ptr[0];
479 if (!(res & 0x80)) {
480 *out = res;
481 return p + 1;
482 }
483 uint32 byte = ptr[1];
484 res += (byte - 1) << 7;
485 if (!(byte & 0x80)) {
486 *out = res;
487 return p + 2;
488 }
489 return VarintParseSlow(p, res, out);
490}
491
492// Used for tags, could read up to 5 bytes which must be available.
493// Caller must ensure its safe to call.
494
495PROTOBUF_EXPORT
496std::pair<const char*, uint32> ReadTagFallback(const char* p, uint32 res);
497
498// Same as ParseVarint but only accept 5 bytes at most.
499inline const char* ReadTag(const char* p, uint32* out, uint32 /*max_tag*/ = 0) {
500 uint32 res = static_cast<uint8>(p[0]);
501 if (res < 128) {
502 *out = res;
503 return p + 1;
504 }
505 uint32 second = static_cast<uint8>(p[1]);
506 res += (second - 1) << 7;
507 if (second < 128) {
508 *out = res;
509 return p + 2;
510 }
511 auto tmp = ReadTagFallback(p, res);
512 *out = tmp.second;
513 return tmp.first;
514}
515
516// Decode 2 consecutive bytes of a varint and returns the value, shifted left
517// by 1. It simultaneous updates *ptr to *ptr + 1 or *ptr + 2 depending if the
518// first byte's continuation bit is set.
519// If bit 15 of return value is set (equivalent to the continuation bits of both
520// bytes being set) the varint continues, otherwise the parse is done. On x86
521// movsx eax, dil
522// add edi, eax
523// adc [rsi], 1
524// add eax, eax
525// and eax, edi
526inline uint32 DecodeTwoBytes(const char** ptr) {
527 uint32 value = UnalignedLoad<uint16>(*ptr);
528 // Sign extend the low byte continuation bit
529 uint32_t x = static_cast<int8_t>(value);
530 // This add is an amazing operation, it cancels the low byte continuation bit
531 // from y transferring it to the carry. Simultaneously it also shifts the 7
532 // LSB left by one tightly against high byte varint bits. Hence value now
533 // contains the unpacked value shifted left by 1.
534 value += x;
535 // Use the carry to update the ptr appropriately.
536 *ptr += value < x ? 2 : 1;
537 return value & (x + x); // Mask out the high byte iff no continuation
538}
539
540// More efficient varint parsing for big varints
541inline const char* ParseBigVarint(const char* p, uint64* out) {
542 auto pnew = p;
543 auto tmp = DecodeTwoBytes(&pnew);
544 uint64 res = tmp >> 1;
545 if (PROTOBUF_PREDICT_TRUE(std::int16_t(tmp) >= 0)) {
546 *out = res;
547 return pnew;
548 }
549 for (std::uint32_t i = 1; i < 5; i++) {
550 pnew = p + 2 * i;
551 tmp = DecodeTwoBytes(&pnew);
552 res += (static_cast<std::uint64_t>(tmp) - 2) << (14 * i - 1);
553 if (PROTOBUF_PREDICT_TRUE(std::int16_t(tmp) >= 0)) {
554 *out = res;
555 return pnew;
556 }
557 }
558 return nullptr;
559}
560
561PROTOBUF_EXPORT
562std::pair<const char*, int32> ReadSizeFallback(const char* p, uint32 first);
563// Used for tags, could read up to 5 bytes which must be available. Additionally
564// it makes sure the unsigned value fits a int32, otherwise returns nullptr.
565// Caller must ensure its safe to call.
566inline uint32 ReadSize(const char** pp) {
567 auto p = *pp;
568 uint32 res = static_cast<uint8>(p[0]);
569 if (res < 128) {
570 *pp = p + 1;
571 return res;
572 }
573 auto x = ReadSizeFallback(p, res);
574 *pp = x.first;
575 return x.second;
576}
577
578// Some convenience functions to simplify the generated parse loop code.
579// Returning the value and updating the buffer pointer allows for nicer
580// function composition. We rely on the compiler to inline this.
581// Also in debug compiles having local scoped variables tend to generated
582// stack frames that scale as O(num fields).
583inline uint64 ReadVarint64(const char** p) {
584 uint64 tmp;
585 *p = VarintParse(*p, &tmp);
586 return tmp;
587}
588
589inline uint32 ReadVarint32(const char** p) {
590 uint32 tmp;
591 *p = VarintParse(*p, &tmp);
592 return tmp;
593}
594
595inline int64 ReadVarintZigZag64(const char** p) {
596 uint64 tmp;
597 *p = VarintParse(*p, &tmp);
598 return WireFormatLite::ZigZagDecode64(tmp);
599}
600
601inline int32 ReadVarintZigZag32(const char** p) {
602 uint64 tmp;
603 *p = VarintParse(*p, &tmp);
604 return WireFormatLite::ZigZagDecode32(static_cast<uint32>(tmp));
605}
606
607template <typename T>
608PROTOBUF_MUST_USE_RESULT const char* ParseContext::ParseMessage(
609 T* msg, const char* ptr) {
610 int size = ReadSize(&ptr);
611 if (!ptr) return nullptr;
612 auto old = PushLimit(ptr, size);
613 if (--depth_ < 0) return nullptr;
614 ptr = msg->_InternalParse(ptr, this);
615 if (PROTOBUF_PREDICT_FALSE(ptr == nullptr)) return nullptr;
616 depth_++;
617 if (!PopLimit(old)) return nullptr;
618 return ptr;
619}
620
621template <typename Add>
622const char* EpsCopyInputStream::ReadPackedVarint(const char* ptr, Add add) {
623 int size = ReadSize(&ptr);
624 if (ptr == nullptr) return nullptr;
625 auto old = PushLimit(ptr, size);
626 if (old < 0) return nullptr;
627 while (!DoneWithCheck(&ptr, -1)) {
628 uint64 varint;
629 ptr = VarintParse(ptr, &varint);
630 if (!ptr) return nullptr;
631 add(varint);
632 }
633 if (!PopLimit(old)) return nullptr;
634 return ptr;
635}
636
637// Helper for verification of utf8
638PROTOBUF_EXPORT
639bool VerifyUTF8(StringPiece s, const char* field_name);
640
641inline bool VerifyUTF8(const std::string* s, const char* field_name) {
642 return VerifyUTF8(*s, field_name);
643}
644
645// All the string parsers with or without UTF checking and for all CTypes.
646PROTOBUF_EXPORT PROTOBUF_MUST_USE_RESULT const char* InlineGreedyStringParser(
647 std::string* s, const char* ptr, ParseContext* ctx);
648
649
650// Add any of the following lines to debug which parse function is failing.
651
652#define GOOGLE_PROTOBUF_ASSERT_RETURN(predicate, ret) \
653 if (!(predicate)) { \
654 /* ::raise(SIGINT); */ \
655 /* GOOGLE_LOG(ERROR) << "Parse failure"; */ \
656 return ret; \
657 }
658
659#define GOOGLE_PROTOBUF_PARSER_ASSERT(predicate) \
660 GOOGLE_PROTOBUF_ASSERT_RETURN(predicate, nullptr)
661
662template <typename T>
663PROTOBUF_MUST_USE_RESULT const char* FieldParser(uint64 tag, T& field_parser,
664 const char* ptr,
665 ParseContext* ctx) {
666 uint32 number = tag >> 3;
667 GOOGLE_PROTOBUF_PARSER_ASSERT(number != 0);
668 using WireType = internal::WireFormatLite::WireType;
669 switch (tag & 7) {
670 case WireType::WIRETYPE_VARINT: {
671 uint64 value;
672 ptr = VarintParse(ptr, &value);
673 GOOGLE_PROTOBUF_PARSER_ASSERT(ptr);
674 field_parser.AddVarint(number, value);
675 break;
676 }
677 case WireType::WIRETYPE_FIXED64: {
678 uint64 value = UnalignedLoad<uint64>(ptr);
679 ptr += 8;
680 field_parser.AddFixed64(number, value);
681 break;
682 }
683 case WireType::WIRETYPE_LENGTH_DELIMITED: {
684 ptr = field_parser.ParseLengthDelimited(number, ptr, ctx);
685 GOOGLE_PROTOBUF_PARSER_ASSERT(ptr);
686 break;
687 }
688 case WireType::WIRETYPE_START_GROUP: {
689 ptr = field_parser.ParseGroup(number, ptr, ctx);
690 GOOGLE_PROTOBUF_PARSER_ASSERT(ptr);
691 break;
692 }
693 case WireType::WIRETYPE_END_GROUP: {
694 GOOGLE_LOG(FATAL) << "Can't happen";
695 break;
696 }
697 case WireType::WIRETYPE_FIXED32: {
698 uint32 value = UnalignedLoad<uint32>(ptr);
699 ptr += 4;
700 field_parser.AddFixed32(number, value);
701 break;
702 }
703 default:
704 return nullptr;
705 }
706 return ptr;
707}
708
709template <typename T>
710PROTOBUF_MUST_USE_RESULT const char* WireFormatParser(T& field_parser,
711 const char* ptr,
712 ParseContext* ctx) {
713 while (!ctx->Done(&ptr)) {
714 uint32 tag;
715 ptr = ReadTag(ptr, &tag);
716 GOOGLE_PROTOBUF_PARSER_ASSERT(ptr != nullptr);
717 if (tag == 0 || (tag & 7) == 4) {
718 ctx->SetLastTag(tag);
719 return ptr;
720 }
721 ptr = FieldParser(tag, field_parser, ptr, ctx);
722 GOOGLE_PROTOBUF_PARSER_ASSERT(ptr != nullptr);
723 }
724 return ptr;
725}
726
727// The packed parsers parse repeated numeric primitives directly into the
728// corresponding field
729
730// These are packed varints
731PROTOBUF_EXPORT PROTOBUF_MUST_USE_RESULT const char* PackedInt32Parser(
732 void* object, const char* ptr, ParseContext* ctx);
733PROTOBUF_EXPORT PROTOBUF_MUST_USE_RESULT const char* PackedUInt32Parser(
734 void* object, const char* ptr, ParseContext* ctx);
735PROTOBUF_EXPORT PROTOBUF_MUST_USE_RESULT const char* PackedInt64Parser(
736 void* object, const char* ptr, ParseContext* ctx);
737PROTOBUF_EXPORT PROTOBUF_MUST_USE_RESULT const char* PackedUInt64Parser(
738 void* object, const char* ptr, ParseContext* ctx);
739PROTOBUF_EXPORT PROTOBUF_MUST_USE_RESULT const char* PackedSInt32Parser(
740 void* object, const char* ptr, ParseContext* ctx);
741PROTOBUF_EXPORT PROTOBUF_MUST_USE_RESULT const char* PackedSInt64Parser(
742 void* object, const char* ptr, ParseContext* ctx);
743PROTOBUF_EXPORT PROTOBUF_MUST_USE_RESULT const char* PackedEnumParser(
744 void* object, const char* ptr, ParseContext* ctx);
745
746template <typename T>
747PROTOBUF_MUST_USE_RESULT const char* PackedEnumParser(
748 void* object, const char* ptr, ParseContext* ctx, bool (*is_valid)(int),
749 InternalMetadata* metadata, int field_num) {
750 return ctx->ReadPackedVarint(
751 ptr, [object, is_valid, metadata, field_num](uint64 val) {
752 if (is_valid(val)) {
753 static_cast<RepeatedField<int>*>(object)->Add(val);
754 } else {
755 WriteVarint(field_num, val, metadata->mutable_unknown_fields<T>());
756 }
757 });
758}
759
760template <typename T>
761PROTOBUF_MUST_USE_RESULT const char* PackedEnumParserArg(
762 void* object, const char* ptr, ParseContext* ctx,
763 bool (*is_valid)(const void*, int), const void* data,
764 InternalMetadata* metadata, int field_num) {
765 return ctx->ReadPackedVarint(
766 ptr, [object, is_valid, data, metadata, field_num](uint64 val) {
767 if (is_valid(data, val)) {
768 static_cast<RepeatedField<int>*>(object)->Add(val);
769 } else {
770 WriteVarint(field_num, val, metadata->mutable_unknown_fields<T>());
771 }
772 });
773}
774
775PROTOBUF_EXPORT PROTOBUF_MUST_USE_RESULT const char* PackedBoolParser(
776 void* object, const char* ptr, ParseContext* ctx);
777PROTOBUF_EXPORT PROTOBUF_MUST_USE_RESULT const char* PackedFixed32Parser(
778 void* object, const char* ptr, ParseContext* ctx);
779PROTOBUF_EXPORT PROTOBUF_MUST_USE_RESULT const char* PackedSFixed32Parser(
780 void* object, const char* ptr, ParseContext* ctx);
781PROTOBUF_EXPORT PROTOBUF_MUST_USE_RESULT const char* PackedFixed64Parser(
782 void* object, const char* ptr, ParseContext* ctx);
783PROTOBUF_EXPORT PROTOBUF_MUST_USE_RESULT const char* PackedSFixed64Parser(
784 void* object, const char* ptr, ParseContext* ctx);
785PROTOBUF_EXPORT PROTOBUF_MUST_USE_RESULT const char* PackedFloatParser(
786 void* object, const char* ptr, ParseContext* ctx);
787PROTOBUF_EXPORT PROTOBUF_MUST_USE_RESULT const char* PackedDoubleParser(
788 void* object, const char* ptr, ParseContext* ctx);
789
790// This is the only recursive parser.
791PROTOBUF_EXPORT PROTOBUF_MUST_USE_RESULT const char* UnknownGroupLiteParse(
792 std::string* unknown, const char* ptr, ParseContext* ctx);
793// This is a helper to for the UnknownGroupLiteParse but is actually also
794// useful in the generated code. It uses overload on std::string* vs
795// UnknownFieldSet* to make the generated code isomorphic between full and lite.
796PROTOBUF_EXPORT PROTOBUF_MUST_USE_RESULT const char* UnknownFieldParse(
797 uint32 tag, std::string* unknown, const char* ptr, ParseContext* ctx);
798
799} // namespace internal
800} // namespace protobuf
801} // namespace google
802
803#include <google/protobuf/port_undef.inc>
804
805#endif // GOOGLE_PROTOBUF_PARSE_CONTEXT_H__
806