1 | /* |
2 | * Copyright (c) Facebook, Inc. and its affiliates. |
3 | * |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | * you may not use this file except in compliance with the License. |
6 | * You may obtain a copy of the License at |
7 | * |
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | * |
10 | * Unless required by applicable law or agreed to in writing, software |
11 | * distributed under the License is distributed on an "AS IS" BASIS, |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | * See the License for the specific language governing permissions and |
14 | * limitations under the License. |
15 | */ |
16 | |
17 | #include <folly/Conv.h> |
18 | #include <array> |
19 | |
20 | namespace folly { |
21 | namespace detail { |
22 | |
23 | namespace { |
24 | |
25 | /** |
26 | * Finds the first non-digit in a string. The number of digits |
27 | * searched depends on the precision of the Tgt integral. Assumes the |
28 | * string starts with NO whitespace and NO sign. |
29 | * |
30 | * The semantics of the routine is: |
31 | * for (;; ++b) { |
32 | * if (b >= e || !isdigit(*b)) return b; |
33 | * } |
34 | * |
35 | * Complete unrolling marks bottom-line (i.e. entire conversion) |
36 | * improvements of 20%. |
37 | */ |
38 | inline const char* findFirstNonDigit(const char* b, const char* e) { |
39 | for (; b < e; ++b) { |
40 | auto const c = static_cast<unsigned>(*b) - '0'; |
41 | if (c >= 10) { |
42 | break; |
43 | } |
44 | } |
45 | return b; |
46 | } |
47 | |
48 | // Maximum value of number when represented as a string |
49 | template <class T> |
50 | struct MaxString { |
51 | static const char* const value; |
52 | }; |
53 | |
54 | template <> |
55 | const char* const MaxString<uint8_t>::value = "255" ; |
56 | template <> |
57 | const char* const MaxString<uint16_t>::value = "65535" ; |
58 | template <> |
59 | const char* const MaxString<uint32_t>::value = "4294967295" ; |
60 | #if __SIZEOF_LONG__ == 4 |
61 | template <> |
62 | const char* const MaxString<unsigned long>::value = "4294967295" ; |
63 | #else |
64 | template <> |
65 | const char* const MaxString<unsigned long>::value = "18446744073709551615" ; |
66 | #endif |
67 | static_assert( |
68 | sizeof(unsigned long) >= 4, |
69 | "Wrong value for MaxString<unsigned long>::value," |
70 | " please update." ); |
71 | template <> |
72 | const char* const MaxString<unsigned long long>::value = "18446744073709551615" ; |
73 | static_assert( |
74 | sizeof(unsigned long long) >= 8, |
75 | "Wrong value for MaxString<unsigned long long>::value" |
76 | ", please update." ); |
77 | |
78 | #if FOLLY_HAVE_INT128_T |
79 | template <> |
80 | const char* const MaxString<__uint128_t>::value = |
81 | "340282366920938463463374607431768211455" ; |
82 | #endif |
83 | |
84 | /* |
85 | * Lookup tables that converts from a decimal character value to an integral |
86 | * binary value, shifted by a decimal "shift" multiplier. |
87 | * For all character values in the range '0'..'9', the table at those |
88 | * index locations returns the actual decimal value shifted by the multiplier. |
89 | * For all other values, the lookup table returns an invalid OOR value. |
90 | */ |
91 | // Out-of-range flag value, larger than the largest value that can fit in |
92 | // four decimal bytes (9999), but four of these added up together should |
93 | // still not overflow uint16_t. |
94 | constexpr int32_t OOR = 10000; |
95 | |
96 | alignas(16) constexpr uint16_t shift1[] = { |
97 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 0-9 |
98 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 10 |
99 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 20 |
100 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 30 |
101 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, 0, 1, // 40 |
102 | 2, 3, 4, 5, 6, 7, 8, 9, OOR, OOR, |
103 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 60 |
104 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 70 |
105 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 80 |
106 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 90 |
107 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 100 |
108 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 110 |
109 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 120 |
110 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 130 |
111 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 140 |
112 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 150 |
113 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 160 |
114 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 170 |
115 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 180 |
116 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 190 |
117 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 200 |
118 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 210 |
119 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 220 |
120 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 230 |
121 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 240 |
122 | OOR, OOR, OOR, OOR, OOR, OOR // 250 |
123 | }; |
124 | |
125 | alignas(16) constexpr uint16_t shift10[] = { |
126 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 0-9 |
127 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 10 |
128 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 20 |
129 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 30 |
130 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, 0, 10, // 40 |
131 | 20, 30, 40, 50, 60, 70, 80, 90, OOR, OOR, |
132 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 60 |
133 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 70 |
134 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 80 |
135 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 90 |
136 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 100 |
137 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 110 |
138 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 120 |
139 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 130 |
140 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 140 |
141 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 150 |
142 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 160 |
143 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 170 |
144 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 180 |
145 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 190 |
146 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 200 |
147 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 210 |
148 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 220 |
149 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 230 |
150 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 240 |
151 | OOR, OOR, OOR, OOR, OOR, OOR // 250 |
152 | }; |
153 | |
154 | alignas(16) constexpr uint16_t shift100[] = { |
155 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 0-9 |
156 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 10 |
157 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 20 |
158 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 30 |
159 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, 0, 100, // 40 |
160 | 200, 300, 400, 500, 600, 700, 800, 900, OOR, OOR, |
161 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 60 |
162 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 70 |
163 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 80 |
164 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 90 |
165 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 100 |
166 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 110 |
167 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 120 |
168 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 130 |
169 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 140 |
170 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 150 |
171 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 160 |
172 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 170 |
173 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 180 |
174 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 190 |
175 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 200 |
176 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 210 |
177 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 220 |
178 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 230 |
179 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 240 |
180 | OOR, OOR, OOR, OOR, OOR, OOR // 250 |
181 | }; |
182 | |
183 | alignas(16) constexpr uint16_t shift1000[] = { |
184 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 0-9 |
185 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 10 |
186 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 20 |
187 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 30 |
188 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, 0, 1000, // 40 |
189 | 2000, 3000, 4000, 5000, 6000, 7000, 8000, 9000, OOR, OOR, |
190 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 60 |
191 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 70 |
192 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 80 |
193 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 90 |
194 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 100 |
195 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 110 |
196 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 120 |
197 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 130 |
198 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 140 |
199 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 150 |
200 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 160 |
201 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 170 |
202 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 180 |
203 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 190 |
204 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 200 |
205 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 210 |
206 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 220 |
207 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 230 |
208 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 240 |
209 | OOR, OOR, OOR, OOR, OOR, OOR // 250 |
210 | }; |
211 | |
212 | struct ErrorString { |
213 | const char* string; |
214 | bool quote; |
215 | }; |
216 | |
217 | // Keep this in sync with ConversionCode in Conv.h |
218 | constexpr const std::array< |
219 | ErrorString, |
220 | static_cast<std::size_t>(ConversionCode::NUM_ERROR_CODES)> |
221 | kErrorStrings{{ |
222 | {"Success" , true}, |
223 | {"Empty input string" , true}, |
224 | {"No digits found in input string" , true}, |
225 | {"Integer overflow when parsing bool (must be 0 or 1)" , true}, |
226 | {"Invalid value for bool" , true}, |
227 | {"Non-digit character found" , true}, |
228 | {"Invalid leading character" , true}, |
229 | {"Overflow during conversion" , true}, |
230 | {"Negative overflow during conversion" , true}, |
231 | {"Unable to convert string to floating point value" , true}, |
232 | {"Non-whitespace character found after end of conversion" , true}, |
233 | {"Overflow during arithmetic conversion" , false}, |
234 | {"Negative overflow during arithmetic conversion" , false}, |
235 | {"Loss of precision during arithmetic conversion" , false}, |
236 | }}; |
237 | |
238 | // Check if ASCII is really ASCII |
239 | using IsAscii = |
240 | bool_constant<'A' == 65 && 'Z' == 90 && 'a' == 97 && 'z' == 122>; |
241 | |
242 | // The code in this file that uses tolower() really only cares about |
243 | // 7-bit ASCII characters, so we can take a nice shortcut here. |
244 | inline char tolower_ascii(char in) { |
245 | return IsAscii::value ? in | 0x20 : char(std::tolower(in)); |
246 | } |
247 | |
248 | inline bool bool_str_cmp(const char** b, size_t len, const char* value) { |
249 | // Can't use strncasecmp, since we want to ensure that the full value matches |
250 | const char* p = *b; |
251 | const char* e = *b + len; |
252 | const char* v = value; |
253 | while (*v != '\0') { |
254 | if (p == e || tolower_ascii(*p) != *v) { // value is already lowercase |
255 | return false; |
256 | } |
257 | ++p; |
258 | ++v; |
259 | } |
260 | |
261 | *b = p; |
262 | return true; |
263 | } |
264 | |
265 | } // namespace |
266 | |
267 | Expected<bool, ConversionCode> str_to_bool(StringPiece* src) noexcept { |
268 | auto b = src->begin(), e = src->end(); |
269 | for (;; ++b) { |
270 | if (b >= e) { |
271 | return makeUnexpected(ConversionCode::EMPTY_INPUT_STRING); |
272 | } |
273 | if (!std::isspace(*b)) { |
274 | break; |
275 | } |
276 | } |
277 | |
278 | bool result; |
279 | auto len = size_t(e - b); |
280 | switch (*b) { |
281 | case '0': |
282 | case '1': { |
283 | result = false; |
284 | for (; b < e && isdigit(*b); ++b) { |
285 | if (result || (*b != '0' && *b != '1')) { |
286 | return makeUnexpected(ConversionCode::BOOL_OVERFLOW); |
287 | } |
288 | result = (*b == '1'); |
289 | } |
290 | break; |
291 | } |
292 | case 'y': |
293 | case 'Y': |
294 | result = true; |
295 | if (!bool_str_cmp(&b, len, "yes" )) { |
296 | ++b; // accept the single 'y' character |
297 | } |
298 | break; |
299 | case 'n': |
300 | case 'N': |
301 | result = false; |
302 | if (!bool_str_cmp(&b, len, "no" )) { |
303 | ++b; |
304 | } |
305 | break; |
306 | case 't': |
307 | case 'T': |
308 | result = true; |
309 | if (!bool_str_cmp(&b, len, "true" )) { |
310 | ++b; |
311 | } |
312 | break; |
313 | case 'f': |
314 | case 'F': |
315 | result = false; |
316 | if (!bool_str_cmp(&b, len, "false" )) { |
317 | ++b; |
318 | } |
319 | break; |
320 | case 'o': |
321 | case 'O': |
322 | if (bool_str_cmp(&b, len, "on" )) { |
323 | result = true; |
324 | } else if (bool_str_cmp(&b, len, "off" )) { |
325 | result = false; |
326 | } else { |
327 | return makeUnexpected(ConversionCode::BOOL_INVALID_VALUE); |
328 | } |
329 | break; |
330 | default: |
331 | return makeUnexpected(ConversionCode::BOOL_INVALID_VALUE); |
332 | } |
333 | |
334 | src->assign(b, e); |
335 | |
336 | return result; |
337 | } |
338 | |
339 | /** |
340 | * StringPiece to double, with progress information. Alters the |
341 | * StringPiece parameter to munch the already-parsed characters. |
342 | */ |
343 | template <class Tgt> |
344 | Expected<Tgt, ConversionCode> str_to_floating(StringPiece* src) noexcept { |
345 | using namespace double_conversion; |
346 | static StringToDoubleConverter conv( |
347 | StringToDoubleConverter::ALLOW_TRAILING_JUNK | |
348 | StringToDoubleConverter::ALLOW_LEADING_SPACES, |
349 | 0.0, |
350 | // return this for junk input string |
351 | std::numeric_limits<double>::quiet_NaN(), |
352 | nullptr, |
353 | nullptr); |
354 | |
355 | if (src->empty()) { |
356 | return makeUnexpected(ConversionCode::EMPTY_INPUT_STRING); |
357 | } |
358 | |
359 | int length; |
360 | auto result = conv.StringToDouble( |
361 | src->data(), |
362 | static_cast<int>(src->size()), |
363 | &length); // processed char count |
364 | |
365 | if (!std::isnan(result)) { |
366 | // If we get here with length = 0, the input string is empty. |
367 | // If we get here with result = 0.0, it's either because the string |
368 | // contained only whitespace, or because we had an actual zero value |
369 | // (with potential trailing junk). If it was only whitespace, we |
370 | // want to raise an error; length will point past the last character |
371 | // that was processed, so we need to check if that character was |
372 | // whitespace or not. |
373 | if (length == 0 || |
374 | (result == 0.0 && std::isspace((*src)[size_t(length) - 1]))) { |
375 | return makeUnexpected(ConversionCode::EMPTY_INPUT_STRING); |
376 | } |
377 | if (length >= 2) { |
378 | const char* suffix = src->data() + length - 1; |
379 | // double_conversion doesn't update length correctly when there is an |
380 | // incomplete exponent specifier. Converting "12e-f-g" shouldn't consume |
381 | // any more than "12", but it will consume "12e-". |
382 | |
383 | // "123-" should only parse "123" |
384 | if (*suffix == '-' || *suffix == '+') { |
385 | --suffix; |
386 | --length; |
387 | } |
388 | // "12e-f-g" or "12euro" should only parse "12" |
389 | if (*suffix == 'e' || *suffix == 'E') { |
390 | --length; |
391 | } |
392 | } |
393 | src->advance(size_t(length)); |
394 | return Tgt(result); |
395 | } |
396 | |
397 | auto* e = src->end(); |
398 | auto* b = |
399 | std::find_if_not(src->begin(), e, [](char c) { return std::isspace(c); }); |
400 | |
401 | // There must be non-whitespace, otherwise we would have caught this above |
402 | assert(b < e); |
403 | auto size = size_t(e - b); |
404 | |
405 | bool negative = false; |
406 | if (*b == '-') { |
407 | negative = true; |
408 | ++b; |
409 | --size; |
410 | } |
411 | |
412 | result = 0.0; |
413 | |
414 | switch (tolower_ascii(*b)) { |
415 | case 'i': |
416 | if (size >= 3 && tolower_ascii(b[1]) == 'n' && |
417 | tolower_ascii(b[2]) == 'f') { |
418 | if (size >= 8 && tolower_ascii(b[3]) == 'i' && |
419 | tolower_ascii(b[4]) == 'n' && tolower_ascii(b[5]) == 'i' && |
420 | tolower_ascii(b[6]) == 't' && tolower_ascii(b[7]) == 'y') { |
421 | b += 8; |
422 | } else { |
423 | b += 3; |
424 | } |
425 | result = std::numeric_limits<Tgt>::infinity(); |
426 | } |
427 | break; |
428 | |
429 | case 'n': |
430 | if (size >= 3 && tolower_ascii(b[1]) == 'a' && |
431 | tolower_ascii(b[2]) == 'n') { |
432 | b += 3; |
433 | result = std::numeric_limits<Tgt>::quiet_NaN(); |
434 | } |
435 | break; |
436 | |
437 | default: |
438 | break; |
439 | } |
440 | |
441 | if (result == 0.0) { |
442 | // All bets are off |
443 | return makeUnexpected(ConversionCode::STRING_TO_FLOAT_ERROR); |
444 | } |
445 | |
446 | if (negative) { |
447 | result = -result; |
448 | } |
449 | |
450 | src->assign(b, e); |
451 | |
452 | return Tgt(result); |
453 | } |
454 | |
455 | template Expected<float, ConversionCode> str_to_floating<float>( |
456 | StringPiece* src) noexcept; |
457 | template Expected<double, ConversionCode> str_to_floating<double>( |
458 | StringPiece* src) noexcept; |
459 | |
460 | /** |
461 | * This class takes care of additional processing needed for signed values, |
462 | * like leading sign character and overflow checks. |
463 | */ |
464 | template <typename T, bool IsSigned = std::is_signed<T>::value> |
465 | class SignedValueHandler; |
466 | |
467 | template <typename T> |
468 | class SignedValueHandler<T, true> { |
469 | public: |
470 | ConversionCode init(const char*& b) { |
471 | negative_ = false; |
472 | if (!std::isdigit(*b)) { |
473 | if (*b == '-') { |
474 | negative_ = true; |
475 | } else if (UNLIKELY(*b != '+')) { |
476 | return ConversionCode::INVALID_LEADING_CHAR; |
477 | } |
478 | ++b; |
479 | } |
480 | return ConversionCode::SUCCESS; |
481 | } |
482 | |
483 | ConversionCode overflow() { |
484 | return negative_ ? ConversionCode::NEGATIVE_OVERFLOW |
485 | : ConversionCode::POSITIVE_OVERFLOW; |
486 | } |
487 | |
488 | template <typename U> |
489 | Expected<T, ConversionCode> finalize(U value) { |
490 | T rv; |
491 | if (negative_) { |
492 | rv = T(-value); |
493 | if (UNLIKELY(rv > 0)) { |
494 | return makeUnexpected(ConversionCode::NEGATIVE_OVERFLOW); |
495 | } |
496 | } else { |
497 | rv = T(value); |
498 | if (UNLIKELY(rv < 0)) { |
499 | return makeUnexpected(ConversionCode::POSITIVE_OVERFLOW); |
500 | } |
501 | } |
502 | return rv; |
503 | } |
504 | |
505 | private: |
506 | bool negative_; |
507 | }; |
508 | |
509 | // For unsigned types, we don't need any extra processing |
510 | template <typename T> |
511 | class SignedValueHandler<T, false> { |
512 | public: |
513 | ConversionCode init(const char*&) { |
514 | return ConversionCode::SUCCESS; |
515 | } |
516 | |
517 | ConversionCode overflow() { |
518 | return ConversionCode::POSITIVE_OVERFLOW; |
519 | } |
520 | |
521 | Expected<T, ConversionCode> finalize(T value) { |
522 | return value; |
523 | } |
524 | }; |
525 | |
526 | /** |
527 | * String represented as a pair of pointers to char to signed/unsigned |
528 | * integrals. Assumes NO whitespace before or after, and also that the |
529 | * string is composed entirely of digits (and an optional sign only for |
530 | * signed types). String may be empty, in which case digits_to returns |
531 | * an appropriate error. |
532 | */ |
533 | template <class Tgt> |
534 | inline Expected<Tgt, ConversionCode> digits_to( |
535 | const char* b, |
536 | const char* const e) noexcept { |
537 | using UT = typename std::make_unsigned<Tgt>::type; |
538 | assert(b <= e); |
539 | |
540 | SignedValueHandler<Tgt> sgn; |
541 | |
542 | auto err = sgn.init(b); |
543 | if (UNLIKELY(err != ConversionCode::SUCCESS)) { |
544 | return makeUnexpected(err); |
545 | } |
546 | |
547 | auto size = size_t(e - b); |
548 | |
549 | /* Although the string is entirely made of digits, we still need to |
550 | * check for overflow. |
551 | */ |
552 | if (size > std::numeric_limits<UT>::digits10) { |
553 | // Leading zeros? |
554 | if (b < e && *b == '0') { |
555 | for (++b;; ++b) { |
556 | if (b == e) { |
557 | return Tgt(0); // just zeros, e.g. "0000" |
558 | } |
559 | if (*b != '0') { |
560 | size = size_t(e - b); |
561 | break; |
562 | } |
563 | } |
564 | } |
565 | if (size > std::numeric_limits<UT>::digits10 && |
566 | (size != std::numeric_limits<UT>::digits10 + 1 || |
567 | strncmp(b, MaxString<UT>::value, size) > 0)) { |
568 | return makeUnexpected(sgn.overflow()); |
569 | } |
570 | } |
571 | |
572 | // Here we know that the number won't overflow when |
573 | // converted. Proceed without checks. |
574 | |
575 | UT result = 0; |
576 | |
577 | for (; e - b >= 4; b += 4) { |
578 | result *= UT(10000); |
579 | const int32_t r0 = shift1000[static_cast<size_t>(b[0])]; |
580 | const int32_t r1 = shift100[static_cast<size_t>(b[1])]; |
581 | const int32_t r2 = shift10[static_cast<size_t>(b[2])]; |
582 | const int32_t r3 = shift1[static_cast<size_t>(b[3])]; |
583 | const auto sum = r0 + r1 + r2 + r3; |
584 | if (sum >= OOR) { |
585 | goto outOfRange; |
586 | } |
587 | result += UT(sum); |
588 | } |
589 | |
590 | switch (e - b) { |
591 | case 3: { |
592 | const int32_t r0 = shift100[static_cast<size_t>(b[0])]; |
593 | const int32_t r1 = shift10[static_cast<size_t>(b[1])]; |
594 | const int32_t r2 = shift1[static_cast<size_t>(b[2])]; |
595 | const auto sum = r0 + r1 + r2; |
596 | if (sum >= OOR) { |
597 | goto outOfRange; |
598 | } |
599 | result = UT(1000 * result + sum); |
600 | break; |
601 | } |
602 | case 2: { |
603 | const int32_t r0 = shift10[static_cast<size_t>(b[0])]; |
604 | const int32_t r1 = shift1[static_cast<size_t>(b[1])]; |
605 | const auto sum = r0 + r1; |
606 | if (sum >= OOR) { |
607 | goto outOfRange; |
608 | } |
609 | result = UT(100 * result + sum); |
610 | break; |
611 | } |
612 | case 1: { |
613 | const int32_t sum = shift1[static_cast<size_t>(b[0])]; |
614 | if (sum >= OOR) { |
615 | goto outOfRange; |
616 | } |
617 | result = UT(10 * result + sum); |
618 | break; |
619 | } |
620 | default: |
621 | assert(b == e); |
622 | if (size == 0) { |
623 | return makeUnexpected(ConversionCode::NO_DIGITS); |
624 | } |
625 | break; |
626 | } |
627 | |
628 | return sgn.finalize(result); |
629 | |
630 | outOfRange: |
631 | return makeUnexpected(ConversionCode::NON_DIGIT_CHAR); |
632 | } |
633 | |
634 | template Expected<char, ConversionCode> digits_to<char>( |
635 | const char*, |
636 | const char*) noexcept; |
637 | template Expected<signed char, ConversionCode> digits_to<signed char>( |
638 | const char*, |
639 | const char*) noexcept; |
640 | template Expected<unsigned char, ConversionCode> digits_to<unsigned char>( |
641 | const char*, |
642 | const char*) noexcept; |
643 | |
644 | template Expected<short, ConversionCode> digits_to<short>( |
645 | const char*, |
646 | const char*) noexcept; |
647 | template Expected<unsigned short, ConversionCode> digits_to<unsigned short>( |
648 | const char*, |
649 | const char*) noexcept; |
650 | |
651 | template Expected<int, ConversionCode> digits_to<int>( |
652 | const char*, |
653 | const char*) noexcept; |
654 | template Expected<unsigned int, ConversionCode> digits_to<unsigned int>( |
655 | const char*, |
656 | const char*) noexcept; |
657 | |
658 | template Expected<long, ConversionCode> digits_to<long>( |
659 | const char*, |
660 | const char*) noexcept; |
661 | template Expected<unsigned long, ConversionCode> digits_to<unsigned long>( |
662 | const char*, |
663 | const char*) noexcept; |
664 | |
665 | template Expected<long long, ConversionCode> digits_to<long long>( |
666 | const char*, |
667 | const char*) noexcept; |
668 | template Expected<unsigned long long, ConversionCode> |
669 | digits_to<unsigned long long>(const char*, const char*) noexcept; |
670 | |
671 | #if FOLLY_HAVE_INT128_T |
672 | template Expected<__int128, ConversionCode> digits_to<__int128>( |
673 | const char*, |
674 | const char*) noexcept; |
675 | template Expected<unsigned __int128, ConversionCode> |
676 | digits_to<unsigned __int128>(const char*, const char*) noexcept; |
677 | #endif |
678 | |
679 | /** |
680 | * StringPiece to integrals, with progress information. Alters the |
681 | * StringPiece parameter to munch the already-parsed characters. |
682 | */ |
683 | template <class Tgt> |
684 | Expected<Tgt, ConversionCode> str_to_integral(StringPiece* src) noexcept { |
685 | using UT = typename std::make_unsigned<Tgt>::type; |
686 | |
687 | auto b = src->data(), past = src->data() + src->size(); |
688 | |
689 | for (;; ++b) { |
690 | if (UNLIKELY(b >= past)) { |
691 | return makeUnexpected(ConversionCode::EMPTY_INPUT_STRING); |
692 | } |
693 | if (!std::isspace(*b)) { |
694 | break; |
695 | } |
696 | } |
697 | |
698 | SignedValueHandler<Tgt> sgn; |
699 | auto err = sgn.init(b); |
700 | |
701 | if (UNLIKELY(err != ConversionCode::SUCCESS)) { |
702 | return makeUnexpected(err); |
703 | } |
704 | if (std::is_signed<Tgt>::value && UNLIKELY(b >= past)) { |
705 | return makeUnexpected(ConversionCode::NO_DIGITS); |
706 | } |
707 | if (UNLIKELY(!isdigit(*b))) { |
708 | return makeUnexpected(ConversionCode::NON_DIGIT_CHAR); |
709 | } |
710 | |
711 | auto m = findFirstNonDigit(b + 1, past); |
712 | |
713 | auto tmp = digits_to<UT>(b, m); |
714 | |
715 | if (UNLIKELY(!tmp.hasValue())) { |
716 | return makeUnexpected( |
717 | tmp.error() == ConversionCode::POSITIVE_OVERFLOW ? sgn.overflow() |
718 | : tmp.error()); |
719 | } |
720 | |
721 | auto res = sgn.finalize(tmp.value()); |
722 | |
723 | if (res.hasValue()) { |
724 | src->advance(size_t(m - src->data())); |
725 | } |
726 | |
727 | return res; |
728 | } |
729 | |
730 | template Expected<char, ConversionCode> str_to_integral<char>( |
731 | StringPiece* src) noexcept; |
732 | template Expected<signed char, ConversionCode> str_to_integral<signed char>( |
733 | StringPiece* src) noexcept; |
734 | template Expected<unsigned char, ConversionCode> str_to_integral<unsigned char>( |
735 | StringPiece* src) noexcept; |
736 | |
737 | template Expected<short, ConversionCode> str_to_integral<short>( |
738 | StringPiece* src) noexcept; |
739 | template Expected<unsigned short, ConversionCode> |
740 | str_to_integral<unsigned short>(StringPiece* src) noexcept; |
741 | |
742 | template Expected<int, ConversionCode> str_to_integral<int>( |
743 | StringPiece* src) noexcept; |
744 | template Expected<unsigned int, ConversionCode> str_to_integral<unsigned int>( |
745 | StringPiece* src) noexcept; |
746 | |
747 | template Expected<long, ConversionCode> str_to_integral<long>( |
748 | StringPiece* src) noexcept; |
749 | template Expected<unsigned long, ConversionCode> str_to_integral<unsigned long>( |
750 | StringPiece* src) noexcept; |
751 | |
752 | template Expected<long long, ConversionCode> str_to_integral<long long>( |
753 | StringPiece* src) noexcept; |
754 | template Expected<unsigned long long, ConversionCode> |
755 | str_to_integral<unsigned long long>(StringPiece* src) noexcept; |
756 | |
757 | #if FOLLY_HAVE_INT128_T |
758 | template Expected<__int128, ConversionCode> str_to_integral<__int128>( |
759 | StringPiece* src) noexcept; |
760 | template Expected<unsigned __int128, ConversionCode> |
761 | str_to_integral<unsigned __int128>(StringPiece* src) noexcept; |
762 | #endif |
763 | |
764 | } // namespace detail |
765 | |
766 | ConversionError makeConversionError(ConversionCode code, StringPiece input) { |
767 | using namespace detail; |
768 | static_assert( |
769 | std::is_unsigned<std::underlying_type<ConversionCode>::type>::value, |
770 | "ConversionCode should be unsigned" ); |
771 | assert((std::size_t)code < kErrorStrings.size()); |
772 | const ErrorString& err = kErrorStrings[(std::size_t)code]; |
773 | if (code == ConversionCode::EMPTY_INPUT_STRING && input.empty()) { |
774 | return {err.string, code}; |
775 | } |
776 | std::string tmp(err.string); |
777 | tmp.append(": " ); |
778 | if (err.quote) { |
779 | tmp.append(1, '"'); |
780 | } |
781 | if (!input.empty()) { |
782 | tmp.append(input.data(), input.size()); |
783 | } |
784 | if (err.quote) { |
785 | tmp.append(1, '"'); |
786 | } |
787 | return {tmp, code}; |
788 | } |
789 | |
790 | } // namespace folly |
791 | |