1 | /* |
2 | * Copyright (c) Facebook, Inc. and its affiliates. |
3 | * |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | * you may not use this file except in compliance with the License. |
6 | * You may obtain a copy of the License at |
7 | * |
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | * |
10 | * Unless required by applicable law or agreed to in writing, software |
11 | * distributed under the License is distributed on an "AS IS" BASIS, |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | * See the License for the specific language governing permissions and |
14 | * limitations under the License. |
15 | */ |
16 | |
17 | #pragma once |
18 | |
19 | #include <type_traits> |
20 | |
21 | #include <folly/Conv.h> |
22 | #include <folly/Expected.h> |
23 | #include <folly/Likely.h> |
24 | #include <folly/Portability.h> |
25 | #include <folly/Range.h> |
26 | |
27 | namespace folly { |
28 | |
29 | /** |
30 | * Variable-length integer encoding, using a little-endian, base-128 |
31 | * representation. |
32 | * |
33 | * The MSb is set on all bytes except the last. |
34 | * |
35 | * Details: |
36 | * https://developers.google.com/protocol-buffers/docs/encoding#varints |
37 | * |
38 | * If you want to encode multiple values, GroupVarint (in GroupVarint.h) |
39 | * is faster and likely smaller. |
40 | */ |
41 | |
42 | /** |
43 | * Maximum length (in bytes) of the varint encoding of a 32-bit value. |
44 | */ |
45 | constexpr size_t kMaxVarintLength32 = 5; |
46 | |
47 | /** |
48 | * Maximum length (in bytes) of the varint encoding of a 64-bit value. |
49 | */ |
50 | constexpr size_t kMaxVarintLength64 = 10; |
51 | |
52 | /** |
53 | * Encode a value in the given buffer, returning the number of bytes used |
54 | * for encoding. |
55 | * buf must have enough space to represent the value (at least |
56 | * kMaxVarintLength64 bytes to encode arbitrary 64-bit values) |
57 | */ |
58 | size_t encodeVarint(uint64_t val, uint8_t* buf); |
59 | |
60 | /** |
61 | * Determine the number of bytes needed to represent "val". |
62 | * 32-bit values need at most 5 bytes. |
63 | * 64-bit values need at most 10 bytes. |
64 | */ |
65 | int encodeVarintSize(uint64_t val); |
66 | |
67 | /** |
68 | * Decode a value from a given buffer, advances data past the returned value. |
69 | * Throws on error. |
70 | */ |
71 | template <class T> |
72 | uint64_t decodeVarint(Range<T*>& data); |
73 | |
74 | enum class DecodeVarintError { |
75 | TooManyBytes = 0, |
76 | TooFewBytes = 1, |
77 | }; |
78 | |
79 | /** |
80 | * A variant of decodeVarint() that does not throw on error. Useful in contexts |
81 | * where only part of a serialized varint may be attempted to be decoded, e.g., |
82 | * when a serialized varint arrives on the boundary of a network packet. |
83 | */ |
84 | template <class T> |
85 | Expected<uint64_t, DecodeVarintError> tryDecodeVarint(Range<T*>& data); |
86 | |
87 | /** |
88 | * ZigZag encoding that maps signed integers with a small absolute value |
89 | * to unsigned integers with a small (positive) values. Without this, |
90 | * encoding negative values using Varint would use up 9 or 10 bytes. |
91 | * |
92 | * if x >= 0, encodeZigZag(x) == 2*x |
93 | * if x < 0, encodeZigZag(x) == -2*x + 1 |
94 | */ |
95 | |
96 | inline uint64_t encodeZigZag(int64_t val) { |
97 | // Bit-twiddling magic stolen from the Google protocol buffer document; |
98 | // val >> 63 is an arithmetic shift because val is signed |
99 | auto uval = static_cast<uint64_t>(val); |
100 | return static_cast<uint64_t>((uval << 1) ^ (val >> 63)); |
101 | } |
102 | |
103 | inline int64_t decodeZigZag(uint64_t val) { |
104 | return static_cast<int64_t>((val >> 1) ^ -(val & 1)); |
105 | } |
106 | |
107 | // Implementation below |
108 | |
109 | inline size_t encodeVarint(uint64_t val, uint8_t* buf) { |
110 | uint8_t* p = buf; |
111 | while (val >= 128) { |
112 | *p++ = 0x80 | (val & 0x7f); |
113 | val >>= 7; |
114 | } |
115 | *p++ = uint8_t(val); |
116 | return size_t(p - buf); |
117 | } |
118 | |
119 | inline int encodeVarintSize(uint64_t val) { |
120 | if (folly::kIsArchAmd64) { |
121 | // __builtin_clzll is undefined for 0 |
122 | int highBit = 64 - __builtin_clzll(val | 1); |
123 | return (highBit + 6) / 7; |
124 | } else { |
125 | int s = 1; |
126 | while (val >= 128) { |
127 | ++s; |
128 | val >>= 7; |
129 | } |
130 | return s; |
131 | } |
132 | } |
133 | |
134 | template <class T> |
135 | inline uint64_t decodeVarint(Range<T*>& data) { |
136 | auto expected = tryDecodeVarint(data); |
137 | if (!expected) { |
138 | throw std::invalid_argument( |
139 | expected.error() == DecodeVarintError::TooManyBytes |
140 | ? "Invalid varint value: too many bytes." |
141 | : "Invalid varint value: too few bytes." ); |
142 | } |
143 | return *expected; |
144 | } |
145 | |
146 | template <class T> |
147 | inline Expected<uint64_t, DecodeVarintError> tryDecodeVarint(Range<T*>& data) { |
148 | static_assert( |
149 | std::is_same<typename std::remove_cv<T>::type, char>::value || |
150 | std::is_same<typename std::remove_cv<T>::type, unsigned char>::value, |
151 | "Only character ranges are supported" ); |
152 | |
153 | const int8_t* begin = reinterpret_cast<const int8_t*>(data.begin()); |
154 | const int8_t* end = reinterpret_cast<const int8_t*>(data.end()); |
155 | const int8_t* p = begin; |
156 | uint64_t val = 0; |
157 | |
158 | // end is always greater than or equal to begin, so this subtraction is safe |
159 | if (LIKELY(size_t(end - begin) >= kMaxVarintLength64)) { // fast path |
160 | int64_t b; |
161 | do { |
162 | b = *p++; |
163 | val = (b & 0x7f); |
164 | if (b >= 0) { |
165 | break; |
166 | } |
167 | b = *p++; |
168 | val |= (b & 0x7f) << 7; |
169 | if (b >= 0) { |
170 | break; |
171 | } |
172 | b = *p++; |
173 | val |= (b & 0x7f) << 14; |
174 | if (b >= 0) { |
175 | break; |
176 | } |
177 | b = *p++; |
178 | val |= (b & 0x7f) << 21; |
179 | if (b >= 0) { |
180 | break; |
181 | } |
182 | b = *p++; |
183 | val |= (b & 0x7f) << 28; |
184 | if (b >= 0) { |
185 | break; |
186 | } |
187 | b = *p++; |
188 | val |= (b & 0x7f) << 35; |
189 | if (b >= 0) { |
190 | break; |
191 | } |
192 | b = *p++; |
193 | val |= (b & 0x7f) << 42; |
194 | if (b >= 0) { |
195 | break; |
196 | } |
197 | b = *p++; |
198 | val |= (b & 0x7f) << 49; |
199 | if (b >= 0) { |
200 | break; |
201 | } |
202 | b = *p++; |
203 | val |= (b & 0x7f) << 56; |
204 | if (b >= 0) { |
205 | break; |
206 | } |
207 | b = *p++; |
208 | val |= (b & 0x01) << 63; |
209 | if (b >= 0) { |
210 | break; |
211 | } |
212 | return makeUnexpected(DecodeVarintError::TooManyBytes); |
213 | } while (false); |
214 | } else { |
215 | int shift = 0; |
216 | while (p != end && *p < 0) { |
217 | val |= static_cast<uint64_t>(*p++ & 0x7f) << shift; |
218 | shift += 7; |
219 | } |
220 | if (p == end) { |
221 | return makeUnexpected(DecodeVarintError::TooFewBytes); |
222 | } |
223 | val |= static_cast<uint64_t>(*p++) << shift; |
224 | } |
225 | |
226 | data.uncheckedAdvance(p - begin); |
227 | return val; |
228 | } |
229 | |
230 | } // namespace folly |
231 | |