1 | // Protocol Buffers - Google's data interchange format |
2 | // Copyright 2008 Google Inc. All rights reserved. |
3 | // https://developers.google.com/protocol-buffers/ |
4 | // |
5 | // Redistribution and use in source and binary forms, with or without |
6 | // modification, are permitted provided that the following conditions are |
7 | // met: |
8 | // |
9 | // * Redistributions of source code must retain the above copyright |
10 | // notice, this list of conditions and the following disclaimer. |
11 | // * Redistributions in binary form must reproduce the above |
12 | // copyright notice, this list of conditions and the following disclaimer |
13 | // in the documentation and/or other materials provided with the |
14 | // distribution. |
15 | // * Neither the name of Google Inc. nor the names of its |
16 | // contributors may be used to endorse or promote products derived from |
17 | // this software without specific prior written permission. |
18 | // |
19 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
20 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
21 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
22 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
23 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
24 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
25 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
26 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
27 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
28 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
29 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
30 | |
31 | // from google3/strings/strutil.h |
32 | |
33 | #ifndef GOOGLE_PROTOBUF_STUBS_STRUTIL_H__ |
34 | #define GOOGLE_PROTOBUF_STUBS_STRUTIL_H__ |
35 | |
36 | #include <google/protobuf/stubs/common.h> |
37 | #include <google/protobuf/stubs/stringpiece.h> |
38 | #include <stdlib.h> |
39 | |
40 | #include <cstring> |
41 | #include <google/protobuf/port_def.inc> |
42 | #include <vector> |
43 | |
44 | namespace google { |
45 | namespace protobuf { |
46 | |
47 | #if defined(_MSC_VER) && _MSC_VER < 1800 |
48 | #define strtoll _strtoi64 |
49 | #define strtoull _strtoui64 |
50 | #elif defined(__DECCXX) && defined(__osf__) |
51 | // HP C++ on Tru64 does not have strtoll, but strtol is already 64-bit. |
52 | #define strtoll strtol |
53 | #define strtoull strtoul |
54 | #endif |
55 | |
56 | // ---------------------------------------------------------------------- |
57 | // ascii_isalnum() |
58 | // Check if an ASCII character is alphanumeric. We can't use ctype's |
59 | // isalnum() because it is affected by locale. This function is applied |
60 | // to identifiers in the protocol buffer language, not to natural-language |
61 | // strings, so locale should not be taken into account. |
62 | // ascii_isdigit() |
63 | // Like above, but only accepts digits. |
64 | // ascii_isspace() |
65 | // Check if the character is a space character. |
66 | // ---------------------------------------------------------------------- |
67 | |
68 | inline bool ascii_isalnum(char c) { |
69 | return ('a' <= c && c <= 'z') || |
70 | ('A' <= c && c <= 'Z') || |
71 | ('0' <= c && c <= '9'); |
72 | } |
73 | |
74 | inline bool ascii_isdigit(char c) { |
75 | return ('0' <= c && c <= '9'); |
76 | } |
77 | |
78 | inline bool ascii_isspace(char c) { |
79 | return c == ' ' || c == '\t' || c == '\n' || c == '\v' || c == '\f' || |
80 | c == '\r'; |
81 | } |
82 | |
83 | inline bool ascii_isupper(char c) { |
84 | return c >= 'A' && c <= 'Z'; |
85 | } |
86 | |
87 | inline bool ascii_islower(char c) { |
88 | return c >= 'a' && c <= 'z'; |
89 | } |
90 | |
91 | inline char ascii_toupper(char c) { |
92 | return ascii_islower(c) ? c - ('a' - 'A') : c; |
93 | } |
94 | |
95 | inline char ascii_tolower(char c) { |
96 | return ascii_isupper(c) ? c + ('a' - 'A') : c; |
97 | } |
98 | |
99 | inline int hex_digit_to_int(char c) { |
100 | /* Assume ASCII. */ |
101 | int x = static_cast<unsigned char>(c); |
102 | if (x > '9') { |
103 | x += 9; |
104 | } |
105 | return x & 0xf; |
106 | } |
107 | |
108 | // ---------------------------------------------------------------------- |
109 | // HasPrefixString() |
110 | // Check if a string begins with a given prefix. |
111 | // StripPrefixString() |
112 | // Given a string and a putative prefix, returns the string minus the |
113 | // prefix string if the prefix matches, otherwise the original |
114 | // string. |
115 | // ---------------------------------------------------------------------- |
116 | inline bool HasPrefixString(StringPiece str, StringPiece prefix) { |
117 | return str.size() >= prefix.size() && |
118 | memcmp(str.data(), prefix.data(), prefix.size()) == 0; |
119 | } |
120 | |
121 | inline string StripPrefixString(const string& str, const string& prefix) { |
122 | if (HasPrefixString(str, prefix)) { |
123 | return str.substr(prefix.size()); |
124 | } else { |
125 | return str; |
126 | } |
127 | } |
128 | |
129 | // ---------------------------------------------------------------------- |
130 | // HasSuffixString() |
131 | // Return true if str ends in suffix. |
132 | // StripSuffixString() |
133 | // Given a string and a putative suffix, returns the string minus the |
134 | // suffix string if the suffix matches, otherwise the original |
135 | // string. |
136 | // ---------------------------------------------------------------------- |
137 | inline bool HasSuffixString(StringPiece str, StringPiece suffix) { |
138 | return str.size() >= suffix.size() && |
139 | memcmp(str.data() + str.size() - suffix.size(), suffix.data(), |
140 | suffix.size()) == 0; |
141 | } |
142 | |
143 | inline string StripSuffixString(const string& str, const string& suffix) { |
144 | if (HasSuffixString(str, suffix)) { |
145 | return str.substr(0, str.size() - suffix.size()); |
146 | } else { |
147 | return str; |
148 | } |
149 | } |
150 | |
151 | // ---------------------------------------------------------------------- |
152 | // ReplaceCharacters |
153 | // Replaces any occurrence of the character 'remove' (or the characters |
154 | // in 'remove') with the character 'replacewith'. |
155 | // Good for keeping html characters or protocol characters (\t) out |
156 | // of places where they might cause a problem. |
157 | // StripWhitespace |
158 | // Removes whitespaces from both ends of the given string. |
159 | // ---------------------------------------------------------------------- |
160 | PROTOBUF_EXPORT void ReplaceCharacters(string* s, const char* remove, |
161 | char replacewith); |
162 | |
163 | PROTOBUF_EXPORT void StripWhitespace(string* s); |
164 | |
165 | // ---------------------------------------------------------------------- |
166 | // LowerString() |
167 | // UpperString() |
168 | // ToUpper() |
169 | // Convert the characters in "s" to lowercase or uppercase. ASCII-only: |
170 | // these functions intentionally ignore locale because they are applied to |
171 | // identifiers used in the Protocol Buffer language, not to natural-language |
172 | // strings. |
173 | // ---------------------------------------------------------------------- |
174 | |
175 | inline void LowerString(string * s) { |
176 | string::iterator end = s->end(); |
177 | for (string::iterator i = s->begin(); i != end; ++i) { |
178 | // tolower() changes based on locale. We don't want this! |
179 | if ('A' <= *i && *i <= 'Z') *i += 'a' - 'A'; |
180 | } |
181 | } |
182 | |
183 | inline void UpperString(string * s) { |
184 | string::iterator end = s->end(); |
185 | for (string::iterator i = s->begin(); i != end; ++i) { |
186 | // toupper() changes based on locale. We don't want this! |
187 | if ('a' <= *i && *i <= 'z') *i += 'A' - 'a'; |
188 | } |
189 | } |
190 | |
191 | inline void ToUpper(string* s) { UpperString(s); } |
192 | |
193 | inline string ToUpper(const string& s) { |
194 | string out = s; |
195 | UpperString(&out); |
196 | return out; |
197 | } |
198 | |
199 | // ---------------------------------------------------------------------- |
200 | // StringReplace() |
201 | // Give me a string and two patterns "old" and "new", and I replace |
202 | // the first instance of "old" in the string with "new", if it |
203 | // exists. RETURN a new string, regardless of whether the replacement |
204 | // happened or not. |
205 | // ---------------------------------------------------------------------- |
206 | |
207 | PROTOBUF_EXPORT string StringReplace(const string& s, const string& oldsub, |
208 | const string& newsub, bool replace_all); |
209 | |
210 | // ---------------------------------------------------------------------- |
211 | // SplitStringUsing() |
212 | // Split a string using a character delimiter. Append the components |
213 | // to 'result'. If there are consecutive delimiters, this function skips |
214 | // over all of them. |
215 | // ---------------------------------------------------------------------- |
216 | PROTOBUF_EXPORT void SplitStringUsing(StringPiece full, const char* delim, |
217 | std::vector<string>* res); |
218 | |
219 | // Split a string using one or more byte delimiters, presented |
220 | // as a nul-terminated c string. Append the components to 'result'. |
221 | // If there are consecutive delimiters, this function will return |
222 | // corresponding empty strings. If you want to drop the empty |
223 | // strings, try SplitStringUsing(). |
224 | // |
225 | // If "full" is the empty string, yields an empty string as the only value. |
226 | // ---------------------------------------------------------------------- |
227 | PROTOBUF_EXPORT void SplitStringAllowEmpty(StringPiece full, const char* delim, |
228 | std::vector<string>* result); |
229 | |
230 | // ---------------------------------------------------------------------- |
231 | // Split() |
232 | // Split a string using a character delimiter. |
233 | // ---------------------------------------------------------------------- |
234 | inline std::vector<string> Split(StringPiece full, const char* delim, |
235 | bool skip_empty = true) { |
236 | std::vector<string> result; |
237 | if (skip_empty) { |
238 | SplitStringUsing(full, delim, &result); |
239 | } else { |
240 | SplitStringAllowEmpty(full, delim, &result); |
241 | } |
242 | return result; |
243 | } |
244 | |
245 | // ---------------------------------------------------------------------- |
246 | // JoinStrings() |
247 | // These methods concatenate a vector of strings into a C++ string, using |
248 | // the C-string "delim" as a separator between components. There are two |
249 | // flavors of the function, one flavor returns the concatenated string, |
250 | // another takes a pointer to the target string. In the latter case the |
251 | // target string is cleared and overwritten. |
252 | // ---------------------------------------------------------------------- |
253 | PROTOBUF_EXPORT void JoinStrings(const std::vector<string>& components, |
254 | const char* delim, string* result); |
255 | |
256 | inline string JoinStrings(const std::vector<string>& components, |
257 | const char* delim) { |
258 | string result; |
259 | JoinStrings(components, delim, &result); |
260 | return result; |
261 | } |
262 | |
263 | // ---------------------------------------------------------------------- |
264 | // UnescapeCEscapeSequences() |
265 | // Copies "source" to "dest", rewriting C-style escape sequences |
266 | // -- '\n', '\r', '\\', '\ooo', etc -- to their ASCII |
267 | // equivalents. "dest" must be sufficiently large to hold all |
268 | // the characters in the rewritten string (i.e. at least as large |
269 | // as strlen(source) + 1 should be safe, since the replacements |
270 | // are always shorter than the original escaped sequences). It's |
271 | // safe for source and dest to be the same. RETURNS the length |
272 | // of dest. |
273 | // |
274 | // It allows hex sequences \xhh, or generally \xhhhhh with an |
275 | // arbitrary number of hex digits, but all of them together must |
276 | // specify a value of a single byte (e.g. \x0045 is equivalent |
277 | // to \x45, and \x1234 is erroneous). |
278 | // |
279 | // It also allows escape sequences of the form \uhhhh (exactly four |
280 | // hex digits, upper or lower case) or \Uhhhhhhhh (exactly eight |
281 | // hex digits, upper or lower case) to specify a Unicode code |
282 | // point. The dest array will contain the UTF8-encoded version of |
283 | // that code-point (e.g., if source contains \u2019, then dest will |
284 | // contain the three bytes 0xE2, 0x80, and 0x99). |
285 | // |
286 | // Errors: In the first form of the call, errors are reported with |
287 | // LOG(ERROR). The same is true for the second form of the call if |
288 | // the pointer to the string std::vector is nullptr; otherwise, error |
289 | // messages are stored in the std::vector. In either case, the effect on |
290 | // the dest array is not defined, but rest of the source will be |
291 | // processed. |
292 | // ---------------------------------------------------------------------- |
293 | |
294 | PROTOBUF_EXPORT int UnescapeCEscapeSequences(const char* source, char* dest); |
295 | PROTOBUF_EXPORT int UnescapeCEscapeSequences(const char* source, char* dest, |
296 | std::vector<string>* errors); |
297 | |
298 | // ---------------------------------------------------------------------- |
299 | // UnescapeCEscapeString() |
300 | // This does the same thing as UnescapeCEscapeSequences, but creates |
301 | // a new string. The caller does not need to worry about allocating |
302 | // a dest buffer. This should be used for non performance critical |
303 | // tasks such as printing debug messages. It is safe for src and dest |
304 | // to be the same. |
305 | // |
306 | // The second call stores its errors in a supplied string vector. |
307 | // If the string vector pointer is nullptr, it reports the errors with LOG(). |
308 | // |
309 | // In the first and second calls, the length of dest is returned. In the |
310 | // the third call, the new string is returned. |
311 | // ---------------------------------------------------------------------- |
312 | |
313 | PROTOBUF_EXPORT int UnescapeCEscapeString(const string& src, string* dest); |
314 | PROTOBUF_EXPORT int UnescapeCEscapeString(const string& src, string* dest, |
315 | std::vector<string>* errors); |
316 | PROTOBUF_EXPORT string UnescapeCEscapeString(const string& src); |
317 | |
318 | // ---------------------------------------------------------------------- |
319 | // CEscape() |
320 | // Escapes 'src' using C-style escape sequences and returns the resulting |
321 | // string. |
322 | // |
323 | // Escaped chars: \n, \r, \t, ", ', \, and !isprint(). |
324 | // ---------------------------------------------------------------------- |
325 | PROTOBUF_EXPORT string CEscape(const string& src); |
326 | |
327 | // ---------------------------------------------------------------------- |
328 | // CEscapeAndAppend() |
329 | // Escapes 'src' using C-style escape sequences, and appends the escaped |
330 | // string to 'dest'. |
331 | // ---------------------------------------------------------------------- |
332 | PROTOBUF_EXPORT void CEscapeAndAppend(StringPiece src, string* dest); |
333 | |
334 | namespace strings { |
335 | // Like CEscape() but does not escape bytes with the upper bit set. |
336 | PROTOBUF_EXPORT string Utf8SafeCEscape(const string& src); |
337 | |
338 | // Like CEscape() but uses hex (\x) escapes instead of octals. |
339 | PROTOBUF_EXPORT string CHexEscape(const string& src); |
340 | } // namespace strings |
341 | |
342 | // ---------------------------------------------------------------------- |
343 | // strto32() |
344 | // strtou32() |
345 | // strto64() |
346 | // strtou64() |
347 | // Architecture-neutral plug compatible replacements for strtol() and |
348 | // strtoul(). Long's have different lengths on ILP-32 and LP-64 |
349 | // platforms, so using these is safer, from the point of view of |
350 | // overflow behavior, than using the standard libc functions. |
351 | // ---------------------------------------------------------------------- |
352 | PROTOBUF_EXPORT int32 strto32_adaptor(const char* nptr, char** endptr, |
353 | int base); |
354 | PROTOBUF_EXPORT uint32 strtou32_adaptor(const char* nptr, char** endptr, |
355 | int base); |
356 | |
357 | inline int32 strto32(const char *nptr, char **endptr, int base) { |
358 | if (sizeof(int32) == sizeof(long)) |
359 | return strtol(nptr, endptr, base); |
360 | else |
361 | return strto32_adaptor(nptr, endptr, base); |
362 | } |
363 | |
364 | inline uint32 strtou32(const char *nptr, char **endptr, int base) { |
365 | if (sizeof(uint32) == sizeof(unsigned long)) |
366 | return strtoul(nptr, endptr, base); |
367 | else |
368 | return strtou32_adaptor(nptr, endptr, base); |
369 | } |
370 | |
371 | // For now, long long is 64-bit on all the platforms we care about, so these |
372 | // functions can simply pass the call to strto[u]ll. |
373 | inline int64 strto64(const char *nptr, char **endptr, int base) { |
374 | GOOGLE_COMPILE_ASSERT(sizeof(int64) == sizeof(long long), |
375 | sizeof_int64_is_not_sizeof_long_long); |
376 | return strtoll(nptr, endptr, base); |
377 | } |
378 | |
379 | inline uint64 strtou64(const char *nptr, char **endptr, int base) { |
380 | GOOGLE_COMPILE_ASSERT(sizeof(uint64) == sizeof(unsigned long long), |
381 | sizeof_uint64_is_not_sizeof_long_long); |
382 | return strtoull(nptr, endptr, base); |
383 | } |
384 | |
385 | // ---------------------------------------------------------------------- |
386 | // safe_strtob() |
387 | // safe_strto32() |
388 | // safe_strtou32() |
389 | // safe_strto64() |
390 | // safe_strtou64() |
391 | // safe_strtof() |
392 | // safe_strtod() |
393 | // ---------------------------------------------------------------------- |
394 | PROTOBUF_EXPORT bool safe_strtob(StringPiece str, bool* value); |
395 | |
396 | PROTOBUF_EXPORT bool safe_strto32(const string& str, int32* value); |
397 | PROTOBUF_EXPORT bool safe_strtou32(const string& str, uint32* value); |
398 | inline bool safe_strto32(const char* str, int32* value) { |
399 | return safe_strto32(string(str), value); |
400 | } |
401 | inline bool safe_strto32(StringPiece str, int32* value) { |
402 | return safe_strto32(str.ToString(), value); |
403 | } |
404 | inline bool safe_strtou32(const char* str, uint32* value) { |
405 | return safe_strtou32(string(str), value); |
406 | } |
407 | inline bool safe_strtou32(StringPiece str, uint32* value) { |
408 | return safe_strtou32(str.ToString(), value); |
409 | } |
410 | |
411 | PROTOBUF_EXPORT bool safe_strto64(const string& str, int64* value); |
412 | PROTOBUF_EXPORT bool safe_strtou64(const string& str, uint64* value); |
413 | inline bool safe_strto64(const char* str, int64* value) { |
414 | return safe_strto64(string(str), value); |
415 | } |
416 | inline bool safe_strto64(StringPiece str, int64* value) { |
417 | return safe_strto64(str.ToString(), value); |
418 | } |
419 | inline bool safe_strtou64(const char* str, uint64* value) { |
420 | return safe_strtou64(string(str), value); |
421 | } |
422 | inline bool safe_strtou64(StringPiece str, uint64* value) { |
423 | return safe_strtou64(str.ToString(), value); |
424 | } |
425 | |
426 | PROTOBUF_EXPORT bool safe_strtof(const char* str, float* value); |
427 | PROTOBUF_EXPORT bool safe_strtod(const char* str, double* value); |
428 | inline bool safe_strtof(const string& str, float* value) { |
429 | return safe_strtof(str.c_str(), value); |
430 | } |
431 | inline bool safe_strtod(const string& str, double* value) { |
432 | return safe_strtod(str.c_str(), value); |
433 | } |
434 | inline bool safe_strtof(StringPiece str, float* value) { |
435 | return safe_strtof(str.ToString(), value); |
436 | } |
437 | inline bool safe_strtod(StringPiece str, double* value) { |
438 | return safe_strtod(str.ToString(), value); |
439 | } |
440 | |
441 | // ---------------------------------------------------------------------- |
442 | // FastIntToBuffer() |
443 | // FastHexToBuffer() |
444 | // FastHex64ToBuffer() |
445 | // FastHex32ToBuffer() |
446 | // FastTimeToBuffer() |
447 | // These are intended for speed. FastIntToBuffer() assumes the |
448 | // integer is non-negative. FastHexToBuffer() puts output in |
449 | // hex rather than decimal. FastTimeToBuffer() puts the output |
450 | // into RFC822 format. |
451 | // |
452 | // FastHex64ToBuffer() puts a 64-bit unsigned value in hex-format, |
453 | // padded to exactly 16 bytes (plus one byte for '\0') |
454 | // |
455 | // FastHex32ToBuffer() puts a 32-bit unsigned value in hex-format, |
456 | // padded to exactly 8 bytes (plus one byte for '\0') |
457 | // |
458 | // All functions take the output buffer as an arg. |
459 | // They all return a pointer to the beginning of the output, |
460 | // which may not be the beginning of the input buffer. |
461 | // ---------------------------------------------------------------------- |
462 | |
463 | // Suggested buffer size for FastToBuffer functions. Also works with |
464 | // DoubleToBuffer() and FloatToBuffer(). |
465 | static const int kFastToBufferSize = 32; |
466 | |
467 | PROTOBUF_EXPORT char* FastInt32ToBuffer(int32 i, char* buffer); |
468 | PROTOBUF_EXPORT char* FastInt64ToBuffer(int64 i, char* buffer); |
469 | char* FastUInt32ToBuffer(uint32 i, char* buffer); // inline below |
470 | char* FastUInt64ToBuffer(uint64 i, char* buffer); // inline below |
471 | PROTOBUF_EXPORT char* FastHexToBuffer(int i, char* buffer); |
472 | PROTOBUF_EXPORT char* FastHex64ToBuffer(uint64 i, char* buffer); |
473 | PROTOBUF_EXPORT char* FastHex32ToBuffer(uint32 i, char* buffer); |
474 | |
475 | // at least 22 bytes long |
476 | inline char* FastIntToBuffer(int i, char* buffer) { |
477 | return (sizeof(i) == 4 ? |
478 | FastInt32ToBuffer(i, buffer) : FastInt64ToBuffer(i, buffer)); |
479 | } |
480 | inline char* FastUIntToBuffer(unsigned int i, char* buffer) { |
481 | return (sizeof(i) == 4 ? |
482 | FastUInt32ToBuffer(i, buffer) : FastUInt64ToBuffer(i, buffer)); |
483 | } |
484 | inline char* FastLongToBuffer(long i, char* buffer) { |
485 | return (sizeof(i) == 4 ? |
486 | FastInt32ToBuffer(i, buffer) : FastInt64ToBuffer(i, buffer)); |
487 | } |
488 | inline char* FastULongToBuffer(unsigned long i, char* buffer) { |
489 | return (sizeof(i) == 4 ? |
490 | FastUInt32ToBuffer(i, buffer) : FastUInt64ToBuffer(i, buffer)); |
491 | } |
492 | |
493 | // ---------------------------------------------------------------------- |
494 | // FastInt32ToBufferLeft() |
495 | // FastUInt32ToBufferLeft() |
496 | // FastInt64ToBufferLeft() |
497 | // FastUInt64ToBufferLeft() |
498 | // |
499 | // Like the Fast*ToBuffer() functions above, these are intended for speed. |
500 | // Unlike the Fast*ToBuffer() functions, however, these functions write |
501 | // their output to the beginning of the buffer (hence the name, as the |
502 | // output is left-aligned). The caller is responsible for ensuring that |
503 | // the buffer has enough space to hold the output. |
504 | // |
505 | // Returns a pointer to the end of the string (i.e. the null character |
506 | // terminating the string). |
507 | // ---------------------------------------------------------------------- |
508 | |
509 | PROTOBUF_EXPORT char* FastInt32ToBufferLeft(int32 i, char* buffer); |
510 | PROTOBUF_EXPORT char* FastUInt32ToBufferLeft(uint32 i, char* buffer); |
511 | PROTOBUF_EXPORT char* FastInt64ToBufferLeft(int64 i, char* buffer); |
512 | PROTOBUF_EXPORT char* FastUInt64ToBufferLeft(uint64 i, char* buffer); |
513 | |
514 | // Just define these in terms of the above. |
515 | inline char* FastUInt32ToBuffer(uint32 i, char* buffer) { |
516 | FastUInt32ToBufferLeft(i, buffer); |
517 | return buffer; |
518 | } |
519 | inline char* FastUInt64ToBuffer(uint64 i, char* buffer) { |
520 | FastUInt64ToBufferLeft(i, buffer); |
521 | return buffer; |
522 | } |
523 | |
524 | inline string SimpleBtoa(bool value) { |
525 | return value ? "true" : "false" ; |
526 | } |
527 | |
528 | // ---------------------------------------------------------------------- |
529 | // SimpleItoa() |
530 | // Description: converts an integer to a string. |
531 | // |
532 | // Return value: string |
533 | // ---------------------------------------------------------------------- |
534 | PROTOBUF_EXPORT string SimpleItoa(int i); |
535 | PROTOBUF_EXPORT string SimpleItoa(unsigned int i); |
536 | PROTOBUF_EXPORT string SimpleItoa(long i); |
537 | PROTOBUF_EXPORT string SimpleItoa(unsigned long i); |
538 | PROTOBUF_EXPORT string SimpleItoa(long long i); |
539 | PROTOBUF_EXPORT string SimpleItoa(unsigned long long i); |
540 | |
541 | // ---------------------------------------------------------------------- |
542 | // SimpleDtoa() |
543 | // SimpleFtoa() |
544 | // DoubleToBuffer() |
545 | // FloatToBuffer() |
546 | // Description: converts a double or float to a string which, if |
547 | // passed to NoLocaleStrtod(), will produce the exact same original double |
548 | // (except in case of NaN; all NaNs are considered the same value). |
549 | // We try to keep the string short but it's not guaranteed to be as |
550 | // short as possible. |
551 | // |
552 | // DoubleToBuffer() and FloatToBuffer() write the text to the given |
553 | // buffer and return it. The buffer must be at least |
554 | // kDoubleToBufferSize bytes for doubles and kFloatToBufferSize |
555 | // bytes for floats. kFastToBufferSize is also guaranteed to be large |
556 | // enough to hold either. |
557 | // |
558 | // Return value: string |
559 | // ---------------------------------------------------------------------- |
560 | PROTOBUF_EXPORT string SimpleDtoa(double value); |
561 | PROTOBUF_EXPORT string SimpleFtoa(float value); |
562 | |
563 | PROTOBUF_EXPORT char* DoubleToBuffer(double i, char* buffer); |
564 | PROTOBUF_EXPORT char* FloatToBuffer(float i, char* buffer); |
565 | |
566 | // In practice, doubles should never need more than 24 bytes and floats |
567 | // should never need more than 14 (including null terminators), but we |
568 | // overestimate to be safe. |
569 | static const int kDoubleToBufferSize = 32; |
570 | static const int kFloatToBufferSize = 24; |
571 | |
572 | namespace strings { |
573 | |
574 | enum PadSpec { |
575 | NO_PAD = 1, |
576 | ZERO_PAD_2, |
577 | ZERO_PAD_3, |
578 | ZERO_PAD_4, |
579 | ZERO_PAD_5, |
580 | ZERO_PAD_6, |
581 | ZERO_PAD_7, |
582 | ZERO_PAD_8, |
583 | ZERO_PAD_9, |
584 | ZERO_PAD_10, |
585 | ZERO_PAD_11, |
586 | ZERO_PAD_12, |
587 | ZERO_PAD_13, |
588 | ZERO_PAD_14, |
589 | ZERO_PAD_15, |
590 | ZERO_PAD_16, |
591 | }; |
592 | |
593 | struct Hex { |
594 | uint64 value; |
595 | enum PadSpec spec; |
596 | template <class Int> |
597 | explicit Hex(Int v, PadSpec s = NO_PAD) |
598 | : spec(s) { |
599 | // Prevent sign-extension by casting integers to |
600 | // their unsigned counterparts. |
601 | #ifdef LANG_CXX11 |
602 | static_assert( |
603 | sizeof(v) == 1 || sizeof(v) == 2 || sizeof(v) == 4 || sizeof(v) == 8, |
604 | "Unknown integer type" ); |
605 | #endif |
606 | value = sizeof(v) == 1 ? static_cast<uint8>(v) |
607 | : sizeof(v) == 2 ? static_cast<uint16>(v) |
608 | : sizeof(v) == 4 ? static_cast<uint32>(v) |
609 | : static_cast<uint64>(v); |
610 | } |
611 | }; |
612 | |
613 | struct PROTOBUF_EXPORT AlphaNum { |
614 | const char *piece_data_; // move these to string_ref eventually |
615 | size_t piece_size_; // move these to string_ref eventually |
616 | |
617 | char digits[kFastToBufferSize]; |
618 | |
619 | // No bool ctor -- bools convert to an integral type. |
620 | // A bool ctor would also convert incoming pointers (bletch). |
621 | |
622 | AlphaNum(int i32) |
623 | : piece_data_(digits), |
624 | piece_size_(FastInt32ToBufferLeft(i32, digits) - &digits[0]) {} |
625 | AlphaNum(unsigned int u32) |
626 | : piece_data_(digits), |
627 | piece_size_(FastUInt32ToBufferLeft(u32, digits) - &digits[0]) {} |
628 | AlphaNum(long long i64) |
629 | : piece_data_(digits), |
630 | piece_size_(FastInt64ToBufferLeft(i64, digits) - &digits[0]) {} |
631 | AlphaNum(unsigned long long u64) |
632 | : piece_data_(digits), |
633 | piece_size_(FastUInt64ToBufferLeft(u64, digits) - &digits[0]) {} |
634 | |
635 | // Note: on some architectures, "long" is only 32 bits, not 64, but the |
636 | // performance hit of using FastInt64ToBufferLeft to handle 32-bit values |
637 | // is quite minor. |
638 | AlphaNum(long i64) |
639 | : piece_data_(digits), |
640 | piece_size_(FastInt64ToBufferLeft(i64, digits) - &digits[0]) {} |
641 | AlphaNum(unsigned long u64) |
642 | : piece_data_(digits), |
643 | piece_size_(FastUInt64ToBufferLeft(u64, digits) - &digits[0]) {} |
644 | |
645 | AlphaNum(float f) |
646 | : piece_data_(digits), piece_size_(strlen(FloatToBuffer(f, digits))) {} |
647 | AlphaNum(double f) |
648 | : piece_data_(digits), piece_size_(strlen(DoubleToBuffer(f, digits))) {} |
649 | |
650 | AlphaNum(Hex hex); |
651 | |
652 | AlphaNum(const char* c_str) |
653 | : piece_data_(c_str), piece_size_(strlen(c_str)) {} |
654 | // TODO: Add a string_ref constructor, eventually |
655 | // AlphaNum(const StringPiece &pc) : piece(pc) {} |
656 | |
657 | AlphaNum(const string& str) |
658 | : piece_data_(str.data()), piece_size_(str.size()) {} |
659 | |
660 | AlphaNum(StringPiece str) |
661 | : piece_data_(str.data()), piece_size_(str.size()) {} |
662 | |
663 | AlphaNum(internal::StringPiecePod str) |
664 | : piece_data_(str.data()), piece_size_(str.size()) {} |
665 | |
666 | size_t size() const { return piece_size_; } |
667 | const char *data() const { return piece_data_; } |
668 | |
669 | private: |
670 | // Use ":" not ':' |
671 | AlphaNum(char c); // NOLINT(runtime/explicit) |
672 | |
673 | // Disallow copy and assign. |
674 | AlphaNum(const AlphaNum&); |
675 | void operator=(const AlphaNum&); |
676 | }; |
677 | |
678 | } // namespace strings |
679 | |
680 | using strings::AlphaNum; |
681 | |
682 | // ---------------------------------------------------------------------- |
683 | // StrCat() |
684 | // This merges the given strings or numbers, with no delimiter. This |
685 | // is designed to be the fastest possible way to construct a string out |
686 | // of a mix of raw C strings, strings, bool values, |
687 | // and numeric values. |
688 | // |
689 | // Don't use this for user-visible strings. The localization process |
690 | // works poorly on strings built up out of fragments. |
691 | // |
692 | // For clarity and performance, don't use StrCat when appending to a |
693 | // string. In particular, avoid using any of these (anti-)patterns: |
694 | // str.append(StrCat(...) |
695 | // str += StrCat(...) |
696 | // str = StrCat(str, ...) |
697 | // where the last is the worse, with the potential to change a loop |
698 | // from a linear time operation with O(1) dynamic allocations into a |
699 | // quadratic time operation with O(n) dynamic allocations. StrAppend |
700 | // is a better choice than any of the above, subject to the restriction |
701 | // of StrAppend(&str, a, b, c, ...) that none of the a, b, c, ... may |
702 | // be a reference into str. |
703 | // ---------------------------------------------------------------------- |
704 | |
705 | PROTOBUF_EXPORT string StrCat(const AlphaNum& a, const AlphaNum& b); |
706 | PROTOBUF_EXPORT string StrCat(const AlphaNum& a, const AlphaNum& b, |
707 | const AlphaNum& c); |
708 | PROTOBUF_EXPORT string StrCat(const AlphaNum& a, const AlphaNum& b, |
709 | const AlphaNum& c, const AlphaNum& d); |
710 | PROTOBUF_EXPORT string StrCat(const AlphaNum& a, const AlphaNum& b, |
711 | const AlphaNum& c, const AlphaNum& d, |
712 | const AlphaNum& e); |
713 | PROTOBUF_EXPORT string StrCat(const AlphaNum& a, const AlphaNum& b, |
714 | const AlphaNum& c, const AlphaNum& d, |
715 | const AlphaNum& e, const AlphaNum& f); |
716 | PROTOBUF_EXPORT string StrCat(const AlphaNum& a, const AlphaNum& b, |
717 | const AlphaNum& c, const AlphaNum& d, |
718 | const AlphaNum& e, const AlphaNum& f, |
719 | const AlphaNum& g); |
720 | PROTOBUF_EXPORT string StrCat(const AlphaNum& a, const AlphaNum& b, |
721 | const AlphaNum& c, const AlphaNum& d, |
722 | const AlphaNum& e, const AlphaNum& f, |
723 | const AlphaNum& g, const AlphaNum& h); |
724 | PROTOBUF_EXPORT string StrCat(const AlphaNum& a, const AlphaNum& b, |
725 | const AlphaNum& c, const AlphaNum& d, |
726 | const AlphaNum& e, const AlphaNum& f, |
727 | const AlphaNum& g, const AlphaNum& h, |
728 | const AlphaNum& i); |
729 | |
730 | inline string StrCat(const AlphaNum& a) { return string(a.data(), a.size()); } |
731 | |
732 | // ---------------------------------------------------------------------- |
733 | // StrAppend() |
734 | // Same as above, but adds the output to the given string. |
735 | // WARNING: For speed, StrAppend does not try to check each of its input |
736 | // arguments to be sure that they are not a subset of the string being |
737 | // appended to. That is, while this will work: |
738 | // |
739 | // string s = "foo"; |
740 | // s += s; |
741 | // |
742 | // This will not (necessarily) work: |
743 | // |
744 | // string s = "foo"; |
745 | // StrAppend(&s, s); |
746 | // |
747 | // Note: while StrCat supports appending up to 9 arguments, StrAppend |
748 | // is currently limited to 4. That's rarely an issue except when |
749 | // automatically transforming StrCat to StrAppend, and can easily be |
750 | // worked around as consecutive calls to StrAppend are quite efficient. |
751 | // ---------------------------------------------------------------------- |
752 | |
753 | PROTOBUF_EXPORT void StrAppend(string* dest, const AlphaNum& a); |
754 | PROTOBUF_EXPORT void StrAppend(string* dest, const AlphaNum& a, |
755 | const AlphaNum& b); |
756 | PROTOBUF_EXPORT void StrAppend(string* dest, const AlphaNum& a, |
757 | const AlphaNum& b, const AlphaNum& c); |
758 | PROTOBUF_EXPORT void StrAppend(string* dest, const AlphaNum& a, |
759 | const AlphaNum& b, const AlphaNum& c, |
760 | const AlphaNum& d); |
761 | |
762 | // ---------------------------------------------------------------------- |
763 | // Join() |
764 | // These methods concatenate a range of components into a C++ string, using |
765 | // the C-string "delim" as a separator between components. |
766 | // ---------------------------------------------------------------------- |
767 | template <typename Iterator> |
768 | void Join(Iterator start, Iterator end, |
769 | const char* delim, string* result) { |
770 | for (Iterator it = start; it != end; ++it) { |
771 | if (it != start) { |
772 | result->append(delim); |
773 | } |
774 | StrAppend(result, *it); |
775 | } |
776 | } |
777 | |
778 | template <typename Range> |
779 | string Join(const Range& components, |
780 | const char* delim) { |
781 | string result; |
782 | Join(components.begin(), components.end(), delim, &result); |
783 | return result; |
784 | } |
785 | |
786 | // ---------------------------------------------------------------------- |
787 | // ToHex() |
788 | // Return a lower-case hex string representation of the given integer. |
789 | // ---------------------------------------------------------------------- |
790 | PROTOBUF_EXPORT string ToHex(uint64 num); |
791 | |
792 | // ---------------------------------------------------------------------- |
793 | // GlobalReplaceSubstring() |
794 | // Replaces all instances of a substring in a string. Does nothing |
795 | // if 'substring' is empty. Returns the number of replacements. |
796 | // |
797 | // NOTE: The string pieces must not overlap s. |
798 | // ---------------------------------------------------------------------- |
799 | PROTOBUF_EXPORT int GlobalReplaceSubstring(const string& substring, |
800 | const string& replacement, |
801 | string* s); |
802 | |
803 | // ---------------------------------------------------------------------- |
804 | // Base64Unescape() |
805 | // Converts "src" which is encoded in Base64 to its binary equivalent and |
806 | // writes it to "dest". If src contains invalid characters, dest is cleared |
807 | // and the function returns false. Returns true on success. |
808 | // ---------------------------------------------------------------------- |
809 | PROTOBUF_EXPORT bool Base64Unescape(StringPiece src, string* dest); |
810 | |
811 | // ---------------------------------------------------------------------- |
812 | // WebSafeBase64Unescape() |
813 | // This is a variation of Base64Unescape which uses '-' instead of '+', and |
814 | // '_' instead of '/'. src is not null terminated, instead specify len. I |
815 | // recommend that slen<szdest, but we honor szdest anyway. |
816 | // RETURNS the length of dest, or -1 if src contains invalid chars. |
817 | |
818 | // The variation that stores into a string clears the string first, and |
819 | // returns false (with dest empty) if src contains invalid chars; for |
820 | // this version src and dest must be different strings. |
821 | // ---------------------------------------------------------------------- |
822 | PROTOBUF_EXPORT int WebSafeBase64Unescape(const char* src, int slen, char* dest, |
823 | int szdest); |
824 | PROTOBUF_EXPORT bool WebSafeBase64Unescape(StringPiece src, string* dest); |
825 | |
826 | // Return the length to use for the output buffer given to the base64 escape |
827 | // routines. Make sure to use the same value for do_padding in both. |
828 | // This function may return incorrect results if given input_len values that |
829 | // are extremely high, which should happen rarely. |
830 | PROTOBUF_EXPORT int CalculateBase64EscapedLen(int input_len, bool do_padding); |
831 | // Use this version when calling Base64Escape without a do_padding arg. |
832 | PROTOBUF_EXPORT int CalculateBase64EscapedLen(int input_len); |
833 | |
834 | // ---------------------------------------------------------------------- |
835 | // Base64Escape() |
836 | // WebSafeBase64Escape() |
837 | // Encode "src" to "dest" using base64 encoding. |
838 | // src is not null terminated, instead specify len. |
839 | // 'dest' should have at least CalculateBase64EscapedLen() length. |
840 | // RETURNS the length of dest. |
841 | // The WebSafe variation use '-' instead of '+' and '_' instead of '/' |
842 | // so that we can place the out in the URL or cookies without having |
843 | // to escape them. It also has an extra parameter "do_padding", |
844 | // which when set to false will prevent padding with "=". |
845 | // ---------------------------------------------------------------------- |
846 | PROTOBUF_EXPORT int Base64Escape(const unsigned char* src, int slen, char* dest, |
847 | int szdest); |
848 | PROTOBUF_EXPORT int WebSafeBase64Escape(const unsigned char* src, int slen, |
849 | char* dest, int szdest, |
850 | bool do_padding); |
851 | // Encode src into dest with padding. |
852 | PROTOBUF_EXPORT void Base64Escape(StringPiece src, string* dest); |
853 | // Encode src into dest web-safely without padding. |
854 | PROTOBUF_EXPORT void WebSafeBase64Escape(StringPiece src, string* dest); |
855 | // Encode src into dest web-safely with padding. |
856 | PROTOBUF_EXPORT void WebSafeBase64EscapeWithPadding(StringPiece src, |
857 | string* dest); |
858 | |
859 | PROTOBUF_EXPORT void Base64Escape(const unsigned char* src, int szsrc, |
860 | string* dest, bool do_padding); |
861 | PROTOBUF_EXPORT void WebSafeBase64Escape(const unsigned char* src, int szsrc, |
862 | string* dest, bool do_padding); |
863 | |
864 | inline bool IsValidCodePoint(uint32 code_point) { |
865 | return code_point < 0xD800 || |
866 | (code_point >= 0xE000 && code_point <= 0x10FFFF); |
867 | } |
868 | |
869 | static const int UTFmax = 4; |
870 | // ---------------------------------------------------------------------- |
871 | // EncodeAsUTF8Char() |
872 | // Helper to append a Unicode code point to a string as UTF8, without bringing |
873 | // in any external dependencies. The output buffer must be as least 4 bytes |
874 | // large. |
875 | // ---------------------------------------------------------------------- |
876 | PROTOBUF_EXPORT int EncodeAsUTF8Char(uint32 code_point, char* output); |
877 | |
878 | // ---------------------------------------------------------------------- |
879 | // UTF8FirstLetterNumBytes() |
880 | // Length of the first UTF-8 character. |
881 | // ---------------------------------------------------------------------- |
882 | PROTOBUF_EXPORT int UTF8FirstLetterNumBytes(const char* src, int len); |
883 | |
884 | // From google3/third_party/absl/strings/escaping.h |
885 | |
886 | // ---------------------------------------------------------------------- |
887 | // CleanStringLineEndings() |
888 | // Clean up a multi-line string to conform to Unix line endings. |
889 | // Reads from src and appends to dst, so usually dst should be empty. |
890 | // |
891 | // If there is no line ending at the end of a non-empty string, it can |
892 | // be added automatically. |
893 | // |
894 | // Four different types of input are correctly handled: |
895 | // |
896 | // - Unix/Linux files: line ending is LF: pass through unchanged |
897 | // |
898 | // - DOS/Windows files: line ending is CRLF: convert to LF |
899 | // |
900 | // - Legacy Mac files: line ending is CR: convert to LF |
901 | // |
902 | // - Garbled files: random line endings: convert gracefully |
903 | // lonely CR, lonely LF, CRLF: convert to LF |
904 | // |
905 | // @param src The multi-line string to convert |
906 | // @param dst The converted string is appended to this string |
907 | // @param auto_end_last_line Automatically terminate the last line |
908 | // |
909 | // Limitations: |
910 | // |
911 | // This does not do the right thing for CRCRLF files created by |
912 | // broken programs that do another Unix->DOS conversion on files |
913 | // that are already in CRLF format. For this, a two-pass approach |
914 | // brute-force would be needed that |
915 | // |
916 | // (1) determines the presence of LF (first one is ok) |
917 | // (2) if yes, removes any CR, else convert every CR to LF |
918 | PROTOBUF_EXPORT void CleanStringLineEndings(const string& src, string* dst, |
919 | bool auto_end_last_line); |
920 | |
921 | // Same as above, but transforms the argument in place. |
922 | PROTOBUF_EXPORT void CleanStringLineEndings(string* str, |
923 | bool auto_end_last_line); |
924 | |
925 | namespace strings { |
926 | inline bool EndsWith(StringPiece text, StringPiece suffix) { |
927 | return suffix.empty() || |
928 | (text.size() >= suffix.size() && |
929 | memcmp(text.data() + (text.size() - suffix.size()), suffix.data(), |
930 | suffix.size()) == 0); |
931 | } |
932 | } // namespace strings |
933 | |
934 | namespace internal { |
935 | |
936 | // A locale-independent version of the standard strtod(), which always |
937 | // uses a dot as the decimal separator. |
938 | double NoLocaleStrtod(const char* str, char** endptr); |
939 | |
940 | } // namespace internal |
941 | |
942 | } // namespace protobuf |
943 | } // namespace google |
944 | |
945 | #include <google/protobuf/port_undef.inc> |
946 | |
947 | #endif // GOOGLE_PROTOBUF_STUBS_STRUTIL_H__ |
948 | |