1 | /* |
2 | * Copyright (c) Facebook, Inc. and its affiliates. |
3 | * |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | * you may not use this file except in compliance with the License. |
6 | * You may obtain a copy of the License at |
7 | * |
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | * |
10 | * Unless required by applicable law or agreed to in writing, software |
11 | * distributed under the License is distributed on an "AS IS" BASIS, |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | * See the License for the specific language governing permissions and |
14 | * limitations under the License. |
15 | */ |
16 | |
17 | #include <folly/String.h> |
18 | |
19 | #include <cctype> |
20 | #include <cerrno> |
21 | #include <cstdarg> |
22 | #include <cstring> |
23 | #include <iterator> |
24 | #include <sstream> |
25 | #include <stdexcept> |
26 | |
27 | #include <glog/logging.h> |
28 | |
29 | #include <folly/Portability.h> |
30 | #include <folly/ScopeGuard.h> |
31 | #include <folly/container/Array.h> |
32 | |
33 | namespace folly { |
34 | |
35 | static_assert(IsConvertible<float>::value, "" ); |
36 | static_assert(IsConvertible<int>::value, "" ); |
37 | static_assert(IsConvertible<bool>::value, "" ); |
38 | static_assert(IsConvertible<int>::value, "" ); |
39 | static_assert(!IsConvertible<std::vector<int>>::value, "" ); |
40 | |
41 | namespace detail { |
42 | |
43 | struct string_table_c_escape_make_item { |
44 | constexpr char operator()(std::size_t index) const { |
45 | // clang-format off |
46 | return |
47 | index == '"' ? '"' : |
48 | index == '\\' ? '\\' : |
49 | index == '?' ? '?' : |
50 | index == '\n' ? 'n' : |
51 | index == '\r' ? 'r' : |
52 | index == '\t' ? 't' : |
53 | index < 32 || index > 126 ? 'O' : // octal |
54 | 'P'; // printable |
55 | // clang-format on |
56 | } |
57 | }; |
58 | |
59 | struct string_table_c_unescape_make_item { |
60 | constexpr char operator()(std::size_t index) const { |
61 | // clang-format off |
62 | return |
63 | index == '\'' ? '\'' : |
64 | index == '?' ? '?' : |
65 | index == '\\' ? '\\' : |
66 | index == '"' ? '"' : |
67 | index == 'a' ? '\a' : |
68 | index == 'b' ? '\b' : |
69 | index == 'f' ? '\f' : |
70 | index == 'n' ? '\n' : |
71 | index == 'r' ? '\r' : |
72 | index == 't' ? '\t' : |
73 | index == 'v' ? '\v' : |
74 | index >= '0' && index <= '7' ? 'O' : // octal |
75 | index == 'x' ? 'X' : // hex |
76 | 'I'; // invalid |
77 | // clang-format on |
78 | } |
79 | }; |
80 | |
81 | struct string_table_hex_make_item { |
82 | constexpr unsigned char operator()(std::size_t index) const { |
83 | // clang-format off |
84 | return static_cast<unsigned char>( |
85 | index >= '0' && index <= '9' ? index - '0' : |
86 | index >= 'a' && index <= 'f' ? index - 'a' + 10 : |
87 | index >= 'A' && index <= 'F' ? index - 'A' + 10 : |
88 | 16); |
89 | // clang-format on |
90 | } |
91 | }; |
92 | |
93 | struct string_table_uri_escape_make_item { |
94 | // 0 = passthrough |
95 | // 1 = unused |
96 | // 2 = safe in path (/) |
97 | // 3 = space (replace with '+' in query) |
98 | // 4 = always percent-encode |
99 | constexpr unsigned char operator()(std::size_t index) const { |
100 | // clang-format off |
101 | return |
102 | index >= '0' && index <= '9' ? 0 : |
103 | index >= 'A' && index <= 'Z' ? 0 : |
104 | index >= 'a' && index <= 'z' ? 0 : |
105 | index == '-' ? 0 : |
106 | index == '_' ? 0 : |
107 | index == '.' ? 0 : |
108 | index == '~' ? 0 : |
109 | index == '/' ? 2 : |
110 | index == ' ' ? 3 : |
111 | 4; |
112 | // clang-format on |
113 | } |
114 | }; |
115 | |
116 | FOLLY_STORAGE_CONSTEXPR decltype(cEscapeTable) cEscapeTable = |
117 | make_array_with<256>(string_table_c_escape_make_item{}); |
118 | FOLLY_STORAGE_CONSTEXPR decltype(cUnescapeTable) cUnescapeTable = |
119 | make_array_with<256>(string_table_c_unescape_make_item{}); |
120 | FOLLY_STORAGE_CONSTEXPR decltype(hexTable) hexTable = |
121 | make_array_with<256>(string_table_hex_make_item{}); |
122 | FOLLY_STORAGE_CONSTEXPR decltype(uriEscapeTable) uriEscapeTable = |
123 | make_array_with<256>(string_table_uri_escape_make_item{}); |
124 | |
125 | } // namespace detail |
126 | |
127 | static inline bool is_oddspace(char c) { |
128 | return c == '\n' || c == '\t' || c == '\r'; |
129 | } |
130 | |
131 | StringPiece ltrimWhitespace(StringPiece sp) { |
132 | // Spaces other than ' ' characters are less common but should be |
133 | // checked. This configuration where we loop on the ' ' |
134 | // separately from oddspaces was empirically fastest. |
135 | |
136 | while (true) { |
137 | while (!sp.empty() && sp.front() == ' ') { |
138 | sp.pop_front(); |
139 | } |
140 | if (!sp.empty() && is_oddspace(sp.front())) { |
141 | sp.pop_front(); |
142 | continue; |
143 | } |
144 | |
145 | return sp; |
146 | } |
147 | } |
148 | |
149 | StringPiece rtrimWhitespace(StringPiece sp) { |
150 | // Spaces other than ' ' characters are less common but should be |
151 | // checked. This configuration where we loop on the ' ' |
152 | // separately from oddspaces was empirically fastest. |
153 | |
154 | while (true) { |
155 | while (!sp.empty() && sp.back() == ' ') { |
156 | sp.pop_back(); |
157 | } |
158 | if (!sp.empty() && is_oddspace(sp.back())) { |
159 | sp.pop_back(); |
160 | continue; |
161 | } |
162 | |
163 | return sp; |
164 | } |
165 | } |
166 | |
167 | namespace { |
168 | |
169 | int stringAppendfImplHelper( |
170 | char* buf, |
171 | size_t bufsize, |
172 | const char* format, |
173 | va_list args) { |
174 | va_list args_copy; |
175 | va_copy(args_copy, args); |
176 | int bytes_used = vsnprintf(buf, bufsize, format, args_copy); |
177 | va_end(args_copy); |
178 | return bytes_used; |
179 | } |
180 | |
181 | void stringAppendfImpl(std::string& output, const char* format, va_list args) { |
182 | // Very simple; first, try to avoid an allocation by using an inline |
183 | // buffer. If that fails to hold the output string, allocate one on |
184 | // the heap, use it instead. |
185 | // |
186 | // It is hard to guess the proper size of this buffer; some |
187 | // heuristics could be based on the number of format characters, or |
188 | // static analysis of a codebase. Or, we can just pick a number |
189 | // that seems big enough for simple cases (say, one line of text on |
190 | // a terminal) without being large enough to be concerning as a |
191 | // stack variable. |
192 | std::array<char, 128> inline_buffer; |
193 | |
194 | int bytes_used = stringAppendfImplHelper( |
195 | inline_buffer.data(), inline_buffer.size(), format, args); |
196 | if (bytes_used < 0) { |
197 | throw std::runtime_error(to<std::string>( |
198 | "Invalid format string; snprintf returned negative " |
199 | "with format string: " , |
200 | format)); |
201 | } |
202 | |
203 | if (static_cast<size_t>(bytes_used) < inline_buffer.size()) { |
204 | output.append(inline_buffer.data(), size_t(bytes_used)); |
205 | return; |
206 | } |
207 | |
208 | // Couldn't fit. Heap allocate a buffer, oh well. |
209 | std::unique_ptr<char[]> heap_buffer(new char[size_t(bytes_used + 1)]); |
210 | int final_bytes_used = stringAppendfImplHelper( |
211 | heap_buffer.get(), size_t(bytes_used + 1), format, args); |
212 | // The second call can take fewer bytes if, for example, we were printing a |
213 | // string buffer with null-terminating char using a width specifier - |
214 | // vsnprintf("%.*s", buf.size(), buf) |
215 | CHECK(bytes_used >= final_bytes_used); |
216 | |
217 | // We don't keep the trailing '\0' in our output string |
218 | output.append(heap_buffer.get(), size_t(final_bytes_used)); |
219 | } |
220 | |
221 | } // namespace |
222 | |
223 | std::string stringPrintf(const char* format, ...) { |
224 | va_list ap; |
225 | va_start(ap, format); |
226 | SCOPE_EXIT { |
227 | va_end(ap); |
228 | }; |
229 | return stringVPrintf(format, ap); |
230 | } |
231 | |
232 | std::string stringVPrintf(const char* format, va_list ap) { |
233 | std::string ret; |
234 | stringAppendfImpl(ret, format, ap); |
235 | return ret; |
236 | } |
237 | |
238 | // Basic declarations; allow for parameters of strings and string |
239 | // pieces to be specified. |
240 | std::string& stringAppendf(std::string* output, const char* format, ...) { |
241 | va_list ap; |
242 | va_start(ap, format); |
243 | SCOPE_EXIT { |
244 | va_end(ap); |
245 | }; |
246 | return stringVAppendf(output, format, ap); |
247 | } |
248 | |
249 | std::string& |
250 | stringVAppendf(std::string* output, const char* format, va_list ap) { |
251 | stringAppendfImpl(*output, format, ap); |
252 | return *output; |
253 | } |
254 | |
255 | void stringPrintf(std::string* output, const char* format, ...) { |
256 | va_list ap; |
257 | va_start(ap, format); |
258 | SCOPE_EXIT { |
259 | va_end(ap); |
260 | }; |
261 | return stringVPrintf(output, format, ap); |
262 | } |
263 | |
264 | void stringVPrintf(std::string* output, const char* format, va_list ap) { |
265 | output->clear(); |
266 | stringAppendfImpl(*output, format, ap); |
267 | } |
268 | |
269 | namespace { |
270 | |
271 | struct PrettySuffix { |
272 | const char* suffix; |
273 | double val; |
274 | }; |
275 | |
276 | const PrettySuffix kPrettyTimeSuffixes[] = { |
277 | {"s " , 1e0L}, |
278 | {"ms" , 1e-3L}, |
279 | {"us" , 1e-6L}, |
280 | {"ns" , 1e-9L}, |
281 | {"ps" , 1e-12L}, |
282 | {"s " , 0}, |
283 | {nullptr, 0}, |
284 | }; |
285 | |
286 | const PrettySuffix kPrettyTimeHmsSuffixes[] = { |
287 | {"h " , 60L * 60L}, |
288 | {"m " , 60L}, |
289 | {"s " , 1e0L}, |
290 | {"ms" , 1e-3L}, |
291 | {"us" , 1e-6L}, |
292 | {"ns" , 1e-9L}, |
293 | {"ps" , 1e-12L}, |
294 | {"s " , 0}, |
295 | {nullptr, 0}, |
296 | }; |
297 | |
298 | const PrettySuffix kPrettyBytesMetricSuffixes[] = { |
299 | {"EB" , 1e18L}, |
300 | {"PB" , 1e15L}, |
301 | {"TB" , 1e12L}, |
302 | {"GB" , 1e9L}, |
303 | {"MB" , 1e6L}, |
304 | {"kB" , 1e3L}, |
305 | {"B " , 0L}, |
306 | {nullptr, 0}, |
307 | }; |
308 | |
309 | const PrettySuffix kPrettyBytesBinarySuffixes[] = { |
310 | {"EB" , int64_t(1) << 60}, |
311 | {"PB" , int64_t(1) << 50}, |
312 | {"TB" , int64_t(1) << 40}, |
313 | {"GB" , int64_t(1) << 30}, |
314 | {"MB" , int64_t(1) << 20}, |
315 | {"kB" , int64_t(1) << 10}, |
316 | {"B " , 0L}, |
317 | {nullptr, 0}, |
318 | }; |
319 | |
320 | const PrettySuffix kPrettyBytesBinaryIECSuffixes[] = { |
321 | {"EiB" , int64_t(1) << 60}, |
322 | {"PiB" , int64_t(1) << 50}, |
323 | {"TiB" , int64_t(1) << 40}, |
324 | {"GiB" , int64_t(1) << 30}, |
325 | {"MiB" , int64_t(1) << 20}, |
326 | {"KiB" , int64_t(1) << 10}, |
327 | {"B " , 0L}, |
328 | {nullptr, 0}, |
329 | }; |
330 | |
331 | const PrettySuffix kPrettyUnitsMetricSuffixes[] = { |
332 | {"qntl" , 1e18L}, |
333 | {"qdrl" , 1e15L}, |
334 | {"tril" , 1e12L}, |
335 | {"bil" , 1e9L}, |
336 | {"M" , 1e6L}, |
337 | {"k" , 1e3L}, |
338 | {" " , 0}, |
339 | {nullptr, 0}, |
340 | }; |
341 | |
342 | const PrettySuffix kPrettyUnitsBinarySuffixes[] = { |
343 | {"E" , int64_t(1) << 60}, |
344 | {"P" , int64_t(1) << 50}, |
345 | {"T" , int64_t(1) << 40}, |
346 | {"G" , int64_t(1) << 30}, |
347 | {"M" , int64_t(1) << 20}, |
348 | {"k" , int64_t(1) << 10}, |
349 | {" " , 0}, |
350 | {nullptr, 0}, |
351 | }; |
352 | |
353 | const PrettySuffix kPrettyUnitsBinaryIECSuffixes[] = { |
354 | {"Ei" , int64_t(1) << 60}, |
355 | {"Pi" , int64_t(1) << 50}, |
356 | {"Ti" , int64_t(1) << 40}, |
357 | {"Gi" , int64_t(1) << 30}, |
358 | {"Mi" , int64_t(1) << 20}, |
359 | {"Ki" , int64_t(1) << 10}, |
360 | {" " , 0}, |
361 | {nullptr, 0}, |
362 | }; |
363 | |
364 | const PrettySuffix kPrettySISuffixes[] = { |
365 | {"Y" , 1e24L}, {"Z" , 1e21L}, {"E" , 1e18L}, {"P" , 1e15L}, {"T" , 1e12L}, |
366 | {"G" , 1e9L}, {"M" , 1e6L}, {"k" , 1e3L}, {"h" , 1e2L}, {"da" , 1e1L}, |
367 | {"d" , 1e-1L}, {"c" , 1e-2L}, {"m" , 1e-3L}, {"u" , 1e-6L}, {"n" , 1e-9L}, |
368 | {"p" , 1e-12L}, {"f" , 1e-15L}, {"a" , 1e-18L}, {"z" , 1e-21L}, {"y" , 1e-24L}, |
369 | {" " , 0}, {nullptr, 0}, |
370 | }; |
371 | |
372 | const PrettySuffix* const kPrettySuffixes[PRETTY_NUM_TYPES] = { |
373 | kPrettyTimeSuffixes, |
374 | kPrettyTimeHmsSuffixes, |
375 | kPrettyBytesMetricSuffixes, |
376 | kPrettyBytesBinarySuffixes, |
377 | kPrettyBytesBinaryIECSuffixes, |
378 | kPrettyUnitsMetricSuffixes, |
379 | kPrettyUnitsBinarySuffixes, |
380 | kPrettyUnitsBinaryIECSuffixes, |
381 | kPrettySISuffixes, |
382 | }; |
383 | |
384 | } // namespace |
385 | |
386 | std::string prettyPrint(double val, PrettyType type, bool addSpace) { |
387 | char buf[100]; |
388 | |
389 | // pick the suffixes to use |
390 | assert(type >= 0); |
391 | assert(type < PRETTY_NUM_TYPES); |
392 | const PrettySuffix* suffixes = kPrettySuffixes[type]; |
393 | |
394 | // find the first suffix we're bigger than -- then use it |
395 | double abs_val = fabs(val); |
396 | for (int i = 0; suffixes[i].suffix; ++i) { |
397 | if (abs_val >= suffixes[i].val) { |
398 | snprintf( |
399 | buf, |
400 | sizeof buf, |
401 | "%.4g%s%s" , |
402 | (suffixes[i].val ? (val / suffixes[i].val) : val), |
403 | (addSpace ? " " : "" ), |
404 | suffixes[i].suffix); |
405 | return std::string(buf); |
406 | } |
407 | } |
408 | |
409 | // no suffix, we've got a tiny value -- just print it in sci-notation |
410 | snprintf(buf, sizeof buf, "%.4g" , val); |
411 | return std::string(buf); |
412 | } |
413 | |
414 | // TODO: |
415 | // 1) Benchmark & optimize |
416 | double prettyToDouble( |
417 | folly::StringPiece* const prettyString, |
418 | const PrettyType type) { |
419 | auto value = folly::to<double>(prettyString); |
420 | while (!prettyString->empty() && std::isspace(prettyString->front())) { |
421 | prettyString->advance(1); // Skipping spaces between number and suffix |
422 | } |
423 | const PrettySuffix* suffixes = kPrettySuffixes[type]; |
424 | int longestPrefixLen = -1; |
425 | int bestPrefixId = -1; |
426 | for (int j = 0; suffixes[j].suffix; ++j) { |
427 | if (suffixes[j].suffix[0] == ' ') { // Checking for " " -> number rule. |
428 | if (longestPrefixLen == -1) { |
429 | longestPrefixLen = 0; // No characters to skip |
430 | bestPrefixId = j; |
431 | } |
432 | } else if (prettyString->startsWith(suffixes[j].suffix)) { |
433 | int suffixLen = int(strlen(suffixes[j].suffix)); |
434 | // We are looking for a longest suffix matching prefix of the string |
435 | // after numeric value. We need this in case suffixes have common prefix. |
436 | if (suffixLen > longestPrefixLen) { |
437 | longestPrefixLen = suffixLen; |
438 | bestPrefixId = j; |
439 | } |
440 | } |
441 | } |
442 | if (bestPrefixId == -1) { // No valid suffix rule found |
443 | throw std::invalid_argument(folly::to<std::string>( |
444 | "Unable to parse suffix \"" , *prettyString, "\"" )); |
445 | } |
446 | prettyString->advance(size_t(longestPrefixLen)); |
447 | return suffixes[bestPrefixId].val ? value * suffixes[bestPrefixId].val |
448 | : value; |
449 | } |
450 | |
451 | double prettyToDouble(folly::StringPiece prettyString, const PrettyType type) { |
452 | double result = prettyToDouble(&prettyString, type); |
453 | detail::enforceWhitespace(prettyString); |
454 | return result; |
455 | } |
456 | |
457 | std::string hexDump(const void* ptr, size_t size) { |
458 | std::ostringstream os; |
459 | hexDump(ptr, size, std::ostream_iterator<StringPiece>(os, "\n" )); |
460 | return os.str(); |
461 | } |
462 | |
463 | // There are two variants of `strerror_r` function, one returns |
464 | // `int`, and another returns `char*`. Selecting proper version using |
465 | // preprocessor macros portably is extremely hard. |
466 | // |
467 | // For example, on Android function signature depends on `__USE_GNU` and |
468 | // `__ANDROID_API__` macros (https://git.io/fjBBE). |
469 | // |
470 | // So we are using C++ overloading trick: we pass a pointer of |
471 | // `strerror_r` to `invoke_strerror_r` function, and C++ compiler |
472 | // selects proper function. |
473 | |
474 | FOLLY_MAYBE_UNUSED |
475 | static fbstring invoke_strerror_r( |
476 | int (*strerror_r)(int, char*, size_t), |
477 | int err, |
478 | char* buf, |
479 | size_t buflen) { |
480 | // Using XSI-compatible strerror_r |
481 | int r = strerror_r(err, buf, buflen); |
482 | |
483 | // OSX/FreeBSD use EINVAL and Linux uses -1 so just check for non-zero |
484 | if (r != 0) { |
485 | return to<fbstring>( |
486 | "Unknown error " , err, " (strerror_r failed with error " , errno, ")" ); |
487 | } else { |
488 | return buf; |
489 | } |
490 | } |
491 | |
492 | FOLLY_MAYBE_UNUSED |
493 | static fbstring invoke_strerror_r( |
494 | char* (*strerror_r)(int, char*, size_t), |
495 | int err, |
496 | char* buf, |
497 | size_t buflen) { |
498 | // Using GNU strerror_r |
499 | return strerror_r(err, buf, buflen); |
500 | } |
501 | |
502 | fbstring errnoStr(int err) { |
503 | int savedErrno = errno; |
504 | |
505 | // Ensure that we reset errno upon exit. |
506 | auto guard(makeGuard([&] { errno = savedErrno; })); |
507 | |
508 | char buf[1024]; |
509 | buf[0] = '\0'; |
510 | |
511 | fbstring result; |
512 | |
513 | // https://developer.apple.com/library/mac/documentation/Darwin/Reference/ManPages/man3/strerror_r.3.html |
514 | // http://www.kernel.org/doc/man-pages/online/pages/man3/strerror.3.html |
515 | #if defined(_WIN32) && (defined(__MINGW32__) || defined(_MSC_VER)) |
516 | // mingw64 has no strerror_r, but Windows has strerror_s, which C11 added |
517 | // as well. So maybe we should use this across all platforms (together |
518 | // with strerrorlen_s). Note strerror_r and _s have swapped args. |
519 | int r = strerror_s(buf, sizeof(buf), err); |
520 | if (r != 0) { |
521 | result = to<fbstring>( |
522 | "Unknown error " , err, " (strerror_r failed with error " , errno, ")" ); |
523 | } else { |
524 | result.assign(buf); |
525 | } |
526 | #else |
527 | // Using any strerror_r |
528 | result.assign(invoke_strerror_r(strerror_r, err, buf, sizeof(buf))); |
529 | #endif |
530 | |
531 | return result; |
532 | } |
533 | |
534 | namespace { |
535 | |
536 | void toLowerAscii8(char& c) { |
537 | // Branchless tolower, based on the input-rotating trick described |
538 | // at http://www.azillionmonkeys.com/qed/asmexample.html |
539 | // |
540 | // This algorithm depends on an observation: each uppercase |
541 | // ASCII character can be converted to its lowercase equivalent |
542 | // by adding 0x20. |
543 | |
544 | // Step 1: Clear the high order bit. We'll deal with it in Step 5. |
545 | auto rotated = uint8_t(c & 0x7f); |
546 | // Currently, the value of rotated, as a function of the original c is: |
547 | // below 'A': 0- 64 |
548 | // 'A'-'Z': 65- 90 |
549 | // above 'Z': 91-127 |
550 | |
551 | // Step 2: Add 0x25 (37) |
552 | rotated += 0x25; |
553 | // Now the value of rotated, as a function of the original c is: |
554 | // below 'A': 37-101 |
555 | // 'A'-'Z': 102-127 |
556 | // above 'Z': 128-164 |
557 | |
558 | // Step 3: clear the high order bit |
559 | rotated &= 0x7f; |
560 | // below 'A': 37-101 |
561 | // 'A'-'Z': 102-127 |
562 | // above 'Z': 0- 36 |
563 | |
564 | // Step 4: Add 0x1a (26) |
565 | rotated += 0x1a; |
566 | // below 'A': 63-127 |
567 | // 'A'-'Z': 128-153 |
568 | // above 'Z': 25- 62 |
569 | |
570 | // At this point, note that only the uppercase letters have been |
571 | // transformed into values with the high order bit set (128 and above). |
572 | |
573 | // Step 5: Shift the high order bit 2 spaces to the right: the spot |
574 | // where the only 1 bit in 0x20 is. But first, how we ignored the |
575 | // high order bit of the original c in step 1? If that bit was set, |
576 | // we may have just gotten a false match on a value in the range |
577 | // 128+'A' to 128+'Z'. To correct this, need to clear the high order |
578 | // bit of rotated if the high order bit of c is set. Since we don't |
579 | // care about the other bits in rotated, the easiest thing to do |
580 | // is invert all the bits in c and bitwise-and them with rotated. |
581 | rotated &= ~c; |
582 | rotated >>= 2; |
583 | |
584 | // Step 6: Apply a mask to clear everything except the 0x20 bit |
585 | // in rotated. |
586 | rotated &= 0x20; |
587 | |
588 | // At this point, rotated is 0x20 if c is 'A'-'Z' and 0x00 otherwise |
589 | |
590 | // Step 7: Add rotated to c |
591 | c += char(rotated); |
592 | } |
593 | |
594 | void toLowerAscii32(uint32_t& c) { |
595 | // Besides being branchless, the algorithm in toLowerAscii8() has another |
596 | // interesting property: None of the addition operations will cause |
597 | // an overflow in the 8-bit value. So we can pack four 8-bit values |
598 | // into a uint32_t and run each operation on all four values in parallel |
599 | // without having to use any CPU-specific SIMD instructions. |
600 | uint32_t rotated = c & uint32_t(0x7f7f7f7fL); |
601 | rotated += uint32_t(0x25252525L); |
602 | rotated &= uint32_t(0x7f7f7f7fL); |
603 | rotated += uint32_t(0x1a1a1a1aL); |
604 | |
605 | // Step 5 involves a shift, so some bits will spill over from each |
606 | // 8-bit value into the next. But that's okay, because they're bits |
607 | // that will be cleared by the mask in step 6 anyway. |
608 | rotated &= ~c; |
609 | rotated >>= 2; |
610 | rotated &= uint32_t(0x20202020L); |
611 | c += rotated; |
612 | } |
613 | |
614 | void toLowerAscii64(uint64_t& c) { |
615 | // 64-bit version of toLower32 |
616 | uint64_t rotated = c & uint64_t(0x7f7f7f7f7f7f7f7fL); |
617 | rotated += uint64_t(0x2525252525252525L); |
618 | rotated &= uint64_t(0x7f7f7f7f7f7f7f7fL); |
619 | rotated += uint64_t(0x1a1a1a1a1a1a1a1aL); |
620 | rotated &= ~c; |
621 | rotated >>= 2; |
622 | rotated &= uint64_t(0x2020202020202020L); |
623 | c += rotated; |
624 | } |
625 | |
626 | } // namespace |
627 | |
628 | void toLowerAscii(char* str, size_t length) { |
629 | static const size_t kAlignMask64 = 7; |
630 | static const size_t kAlignMask32 = 3; |
631 | |
632 | // Convert a character at a time until we reach an address that |
633 | // is at least 32-bit aligned |
634 | auto n = (size_t)str; |
635 | n &= kAlignMask32; |
636 | n = std::min(n, length); |
637 | size_t offset = 0; |
638 | if (n != 0) { |
639 | n = std::min(4 - n, length); |
640 | do { |
641 | toLowerAscii8(str[offset]); |
642 | offset++; |
643 | } while (offset < n); |
644 | } |
645 | |
646 | n = (size_t)(str + offset); |
647 | n &= kAlignMask64; |
648 | if ((n != 0) && (offset + 4 <= length)) { |
649 | // The next address is 32-bit aligned but not 64-bit aligned. |
650 | // Convert the next 4 bytes in order to get to the 64-bit aligned |
651 | // part of the input. |
652 | toLowerAscii32(*(uint32_t*)(str + offset)); |
653 | offset += 4; |
654 | } |
655 | |
656 | // Convert 8 characters at a time |
657 | while (offset + 8 <= length) { |
658 | toLowerAscii64(*(uint64_t*)(str + offset)); |
659 | offset += 8; |
660 | } |
661 | |
662 | // Convert 4 characters at a time |
663 | while (offset + 4 <= length) { |
664 | toLowerAscii32(*(uint32_t*)(str + offset)); |
665 | offset += 4; |
666 | } |
667 | |
668 | // Convert any characters remaining after the last 4-byte aligned group |
669 | while (offset < length) { |
670 | toLowerAscii8(str[offset]); |
671 | offset++; |
672 | } |
673 | } |
674 | |
675 | namespace detail { |
676 | |
677 | size_t |
678 | hexDumpLine(const void* ptr, size_t offset, size_t size, std::string& line) { |
679 | static char hexValues[] = "0123456789abcdef" ; |
680 | // Line layout: |
681 | // 8: address |
682 | // 1: space |
683 | // (1+2)*16: hex bytes, each preceded by a space |
684 | // 1: space separating the two halves |
685 | // 3: " |" |
686 | // 16: characters |
687 | // 1: "|" |
688 | // Total: 78 |
689 | line.clear(); |
690 | line.reserve(78); |
691 | const uint8_t* p = reinterpret_cast<const uint8_t*>(ptr) + offset; |
692 | size_t n = std::min(size - offset, size_t(16)); |
693 | line.push_back(hexValues[(offset >> 28) & 0xf]); |
694 | line.push_back(hexValues[(offset >> 24) & 0xf]); |
695 | line.push_back(hexValues[(offset >> 20) & 0xf]); |
696 | line.push_back(hexValues[(offset >> 16) & 0xf]); |
697 | line.push_back(hexValues[(offset >> 12) & 0xf]); |
698 | line.push_back(hexValues[(offset >> 8) & 0xf]); |
699 | line.push_back(hexValues[(offset >> 4) & 0xf]); |
700 | line.push_back(hexValues[offset & 0xf]); |
701 | line.push_back(' '); |
702 | |
703 | for (size_t i = 0; i < n; i++) { |
704 | if (i == 8) { |
705 | line.push_back(' '); |
706 | } |
707 | |
708 | line.push_back(' '); |
709 | line.push_back(hexValues[(p[i] >> 4) & 0xf]); |
710 | line.push_back(hexValues[p[i] & 0xf]); |
711 | } |
712 | |
713 | // 3 spaces for each byte we're not printing, one separating the halves |
714 | // if necessary |
715 | line.append(3 * (16 - n) + (n <= 8), ' '); |
716 | line.append(" |" ); |
717 | |
718 | for (size_t i = 0; i < n; i++) { |
719 | char c = (p[i] >= 32 && p[i] <= 126 ? static_cast<char>(p[i]) : '.'); |
720 | line.push_back(c); |
721 | } |
722 | line.append(16 - n, ' '); |
723 | line.push_back('|'); |
724 | DCHECK_EQ(line.size(), 78u); |
725 | |
726 | return n; |
727 | } |
728 | |
729 | } // namespace detail |
730 | |
731 | std::string stripLeftMargin(std::string s) { |
732 | std::vector<StringPiece> pieces; |
733 | split("\n" , s, pieces); |
734 | auto piecer = range(pieces); |
735 | |
736 | auto piece = (piecer.end() - 1); |
737 | auto needle = std::find_if(piece->begin(), piece->end(), [](char c) { |
738 | return c != ' ' && c != '\t'; |
739 | }); |
740 | if (needle == piece->end()) { |
741 | (piecer.end() - 1)->clear(); |
742 | } |
743 | piece = piecer.begin(); |
744 | needle = std::find_if(piece->begin(), piece->end(), [](char c) { |
745 | return c != ' ' && c != '\t'; |
746 | }); |
747 | if (needle == piece->end()) { |
748 | piecer.erase(piecer.begin(), piecer.begin() + 1); |
749 | } |
750 | |
751 | const auto sentinel = std::numeric_limits<size_t>::max(); |
752 | auto indent = sentinel; |
753 | size_t max_length = 0; |
754 | for (piece = piecer.begin(); piece != piecer.end(); piece++) { |
755 | needle = std::find_if(piece->begin(), piece->end(), [](char c) { |
756 | return c != ' ' && c != '\t'; |
757 | }); |
758 | if (needle != piece->end()) { |
759 | indent = std::min<size_t>(indent, size_t(needle - piece->begin())); |
760 | } else { |
761 | max_length = std::max<size_t>(piece->size(), max_length); |
762 | } |
763 | } |
764 | indent = indent == sentinel ? max_length : indent; |
765 | for (piece = piecer.begin(); piece != piecer.end(); piece++) { |
766 | if (piece->size() < indent) { |
767 | piece->clear(); |
768 | } else { |
769 | piece->erase(piece->begin(), piece->begin() + indent); |
770 | } |
771 | } |
772 | return join("\n" , piecer); |
773 | } |
774 | |
775 | } // namespace folly |
776 | |
777 | #ifdef FOLLY_DEFINED_DMGL |
778 | #undef FOLLY_DEFINED_DMGL |
779 | #undef DMGL_NO_OPTS |
780 | #undef DMGL_PARAMS |
781 | #undef DMGL_ANSI |
782 | #undef DMGL_JAVA |
783 | #undef DMGL_VERBOSE |
784 | #undef DMGL_TYPES |
785 | #undef DMGL_RET_POSTFIX |
786 | #endif |
787 | |