1 | // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 | // Use of this source code is governed by a BSD-style license that can be |
3 | // found in the LICENSE file. |
4 | // Copied from strings/stringpiece.h with modifications |
5 | // |
6 | // A string-like object that points to a sized piece of memory. |
7 | // |
8 | // You can use StringPiece as a function or method parameter. A StringPiece |
9 | // parameter can receive a double-quoted string literal argument, a "const |
10 | // char*" argument, a string argument, or a StringPiece argument with no data |
11 | // copying. Systematic use of StringPiece for arguments reduces data |
12 | // copies and strlen() calls. |
13 | // |
14 | // Prefer passing StringPieces by value: |
15 | // void MyFunction(StringPiece arg); |
16 | // If circumstances require, you may also pass by const reference: |
17 | // void MyFunction(const StringPiece& arg); // not preferred |
18 | // Both of these have the same lifetime semantics. Passing by value |
19 | // generates slightly smaller code. For more discussion, Googlers can see |
20 | // the thread go/stringpiecebyvalue on c-users. |
21 | // |
22 | // StringPiece16 is similar to StringPiece but for butil::string16 instead of |
23 | // std::string. We do not define as large of a subset of the STL functions |
24 | // from basic_string as in StringPiece, but this can be changed if these |
25 | // functions (find, find_first_of, etc.) are found to be useful in this context. |
26 | // |
27 | |
28 | #ifndef BUTIL_STRINGS_STRING_PIECE_H_ |
29 | #define BUTIL_STRINGS_STRING_PIECE_H_ |
30 | |
31 | #include <stddef.h> |
32 | |
33 | #include <iosfwd> |
34 | #include <string> |
35 | |
36 | #include "butil/base_export.h" |
37 | #include "butil/basictypes.h" |
38 | #include "butil/containers/hash_tables.h" |
39 | #include "butil/strings/string16.h" |
40 | |
41 | namespace butil { |
42 | |
43 | template <typename STRING_TYPE> class BasicStringPiece; |
44 | typedef BasicStringPiece<std::string> StringPiece; |
45 | typedef BasicStringPiece<string16> StringPiece16; |
46 | |
47 | // internal -------------------------------------------------------------------- |
48 | |
49 | // Many of the StringPiece functions use different implementations for the |
50 | // 8-bit and 16-bit versions, and we don't want lots of template expansions in |
51 | // this (very common) header that will slow down compilation. |
52 | // |
53 | // So here we define overloaded functions called by the StringPiece template. |
54 | // For those that share an implementation, the two versions will expand to a |
55 | // template internal to the .cc file. |
56 | namespace internal { |
57 | |
58 | BUTIL_EXPORT void CopyToString(const StringPiece& self, std::string* target); |
59 | BUTIL_EXPORT void CopyToString(const StringPiece16& self, string16* target); |
60 | |
61 | BUTIL_EXPORT void AppendToString(const StringPiece& self, std::string* target); |
62 | BUTIL_EXPORT void AppendToString(const StringPiece16& self, string16* target); |
63 | |
64 | BUTIL_EXPORT size_t copy(const StringPiece& self, |
65 | char* buf, |
66 | size_t n, |
67 | size_t pos); |
68 | BUTIL_EXPORT size_t copy(const StringPiece16& self, |
69 | char16* buf, |
70 | size_t n, |
71 | size_t pos); |
72 | |
73 | BUTIL_EXPORT size_t find(const StringPiece& self, |
74 | const StringPiece& s, |
75 | size_t pos); |
76 | BUTIL_EXPORT size_t find(const StringPiece16& self, |
77 | const StringPiece16& s, |
78 | size_t pos); |
79 | BUTIL_EXPORT size_t find(const StringPiece& self, |
80 | char c, |
81 | size_t pos); |
82 | BUTIL_EXPORT size_t find(const StringPiece16& self, |
83 | char16 c, |
84 | size_t pos); |
85 | |
86 | BUTIL_EXPORT size_t rfind(const StringPiece& self, |
87 | const StringPiece& s, |
88 | size_t pos); |
89 | BUTIL_EXPORT size_t rfind(const StringPiece16& self, |
90 | const StringPiece16& s, |
91 | size_t pos); |
92 | BUTIL_EXPORT size_t rfind(const StringPiece& self, |
93 | char c, |
94 | size_t pos); |
95 | BUTIL_EXPORT size_t rfind(const StringPiece16& self, |
96 | char16 c, |
97 | size_t pos); |
98 | |
99 | BUTIL_EXPORT size_t find_first_of(const StringPiece& self, |
100 | const StringPiece& s, |
101 | size_t pos); |
102 | BUTIL_EXPORT size_t find_first_of(const StringPiece16& self, |
103 | const StringPiece16& s, |
104 | size_t pos); |
105 | |
106 | BUTIL_EXPORT size_t find_first_not_of(const StringPiece& self, |
107 | const StringPiece& s, |
108 | size_t pos); |
109 | BUTIL_EXPORT size_t find_first_not_of(const StringPiece16& self, |
110 | const StringPiece16& s, |
111 | size_t pos); |
112 | BUTIL_EXPORT size_t find_first_not_of(const StringPiece& self, |
113 | char c, |
114 | size_t pos); |
115 | BUTIL_EXPORT size_t find_first_not_of(const StringPiece16& self, |
116 | char16 c, |
117 | size_t pos); |
118 | |
119 | BUTIL_EXPORT size_t find_last_of(const StringPiece& self, |
120 | const StringPiece& s, |
121 | size_t pos); |
122 | BUTIL_EXPORT size_t find_last_of(const StringPiece16& self, |
123 | const StringPiece16& s, |
124 | size_t pos); |
125 | BUTIL_EXPORT size_t find_last_of(const StringPiece& self, |
126 | char c, |
127 | size_t pos); |
128 | BUTIL_EXPORT size_t find_last_of(const StringPiece16& self, |
129 | char16 c, |
130 | size_t pos); |
131 | |
132 | BUTIL_EXPORT size_t find_last_not_of(const StringPiece& self, |
133 | const StringPiece& s, |
134 | size_t pos); |
135 | BUTIL_EXPORT size_t find_last_not_of(const StringPiece16& self, |
136 | const StringPiece16& s, |
137 | size_t pos); |
138 | BUTIL_EXPORT size_t find_last_not_of(const StringPiece16& self, |
139 | char16 c, |
140 | size_t pos); |
141 | BUTIL_EXPORT size_t find_last_not_of(const StringPiece& self, |
142 | char c, |
143 | size_t pos); |
144 | |
145 | BUTIL_EXPORT StringPiece substr(const StringPiece& self, |
146 | size_t pos, |
147 | size_t n); |
148 | BUTIL_EXPORT StringPiece16 substr(const StringPiece16& self, |
149 | size_t pos, |
150 | size_t n); |
151 | |
152 | } // namespace internal |
153 | |
154 | // BasicStringPiece ------------------------------------------------------------ |
155 | |
156 | // Defines the types, methods, operators, and data members common to both |
157 | // StringPiece and StringPiece16. Do not refer to this class directly, but |
158 | // rather to BasicStringPiece, StringPiece, or StringPiece16. |
159 | // |
160 | // This is templatized by string class type rather than character type, so |
161 | // BasicStringPiece<std::string> or BasicStringPiece<butil::string16>. |
162 | template <typename STRING_TYPE> class BasicStringPiece { |
163 | public: |
164 | // Standard STL container boilerplate. |
165 | typedef size_t size_type; |
166 | typedef typename STRING_TYPE::value_type value_type; |
167 | typedef const value_type* pointer; |
168 | typedef const value_type& reference; |
169 | typedef const value_type& const_reference; |
170 | typedef ptrdiff_t difference_type; |
171 | typedef const value_type* const_iterator; |
172 | typedef std::reverse_iterator<const_iterator> const_reverse_iterator; |
173 | |
174 | static const size_type npos; |
175 | |
176 | public: |
177 | // We provide non-explicit singleton constructors so users can pass |
178 | // in a "const char*" or a "string" wherever a "StringPiece" is |
179 | // expected (likewise for char16, string16, StringPiece16). |
180 | BasicStringPiece() : ptr_(NULL), length_(0) {} |
181 | BasicStringPiece(const value_type* str) |
182 | : ptr_(str), |
183 | length_((str == NULL) ? 0 : STRING_TYPE::traits_type::length(str)) {} |
184 | BasicStringPiece(const STRING_TYPE& str) |
185 | : ptr_(str.data()), length_(str.size()) {} |
186 | BasicStringPiece(const value_type* offset, size_type len) |
187 | : ptr_(offset), length_(len) {} |
188 | BasicStringPiece(const typename STRING_TYPE::const_iterator& begin, |
189 | const typename STRING_TYPE::const_iterator& end) |
190 | : ptr_((end > begin) ? &(*begin) : NULL), |
191 | length_((end > begin) ? (size_type)(end - begin) : 0) {} |
192 | |
193 | // data() may return a pointer to a buffer with embedded NULs, and the |
194 | // returned buffer may or may not be null terminated. Therefore it is |
195 | // typically a mistake to pass data() to a routine that expects a NUL |
196 | // terminated string. |
197 | const value_type* data() const { return ptr_; } |
198 | size_type size() const { return length_; } |
199 | size_type length() const { return length_; } |
200 | bool empty() const { return length_ == 0; } |
201 | |
202 | void clear() { |
203 | ptr_ = NULL; |
204 | length_ = 0; |
205 | } |
206 | void set(const value_type* data, size_type len) { |
207 | ptr_ = data; |
208 | length_ = len; |
209 | } |
210 | void set(const value_type* str) { |
211 | ptr_ = str; |
212 | length_ = str ? STRING_TYPE::traits_type::length(str) : 0; |
213 | } |
214 | |
215 | value_type operator[](size_type i) const { return ptr_[i]; } |
216 | |
217 | void remove_prefix(size_type n) { |
218 | ptr_ += n; |
219 | length_ -= n; |
220 | } |
221 | |
222 | void remove_suffix(size_type n) { |
223 | length_ -= n; |
224 | } |
225 | |
226 | // Remove heading and trailing spaces. |
227 | void trim_spaces() { |
228 | size_t nsp = 0; |
229 | for (; nsp < size() && isspace(ptr_[nsp]); ++nsp) {} |
230 | remove_prefix(nsp); |
231 | nsp = 0; |
232 | for (; nsp < size() && isspace(ptr_[size()-1-nsp]); ++nsp) {} |
233 | remove_suffix(nsp); |
234 | } |
235 | |
236 | int compare(const BasicStringPiece<STRING_TYPE>& x) const { |
237 | int r = wordmemcmp( |
238 | ptr_, x.ptr_, (length_ < x.length_ ? length_ : x.length_)); |
239 | if (r == 0) { |
240 | if (length_ < x.length_) r = -1; |
241 | else if (length_ > x.length_) r = +1; |
242 | } |
243 | return r; |
244 | } |
245 | |
246 | STRING_TYPE as_string() const { |
247 | // std::string doesn't like to take a NULL pointer even with a 0 size. |
248 | return empty() ? STRING_TYPE() : STRING_TYPE(data(), size()); |
249 | } |
250 | |
251 | // Return the first/last character, UNDEFINED when StringPiece is empty. |
252 | char front() const { return *ptr_; } |
253 | char back() const { return *(ptr_ + length_ - 1); } |
254 | // Return the first/last character, 0 when StringPiece is empty. |
255 | char front_or_0() const { return length_ ? *ptr_ : '\0'; } |
256 | char back_or_0() const { return length_ ? *(ptr_ + length_ - 1) : '\0'; } |
257 | |
258 | const_iterator begin() const { return ptr_; } |
259 | const_iterator end() const { return ptr_ + length_; } |
260 | const_reverse_iterator rbegin() const { |
261 | return const_reverse_iterator(ptr_ + length_); |
262 | } |
263 | const_reverse_iterator rend() const { |
264 | return const_reverse_iterator(ptr_); |
265 | } |
266 | |
267 | size_type max_size() const { return length_; } |
268 | size_type capacity() const { return length_; } |
269 | |
270 | static int wordmemcmp(const value_type* p, |
271 | const value_type* p2, |
272 | size_type N) { |
273 | return STRING_TYPE::traits_type::compare(p, p2, N); |
274 | } |
275 | |
276 | // Sets the value of the given string target type to be the current string. |
277 | // This saves a temporary over doing |a = b.as_string()| |
278 | void CopyToString(STRING_TYPE* target) const { |
279 | internal::CopyToString(*this, target); |
280 | } |
281 | |
282 | void AppendToString(STRING_TYPE* target) const { |
283 | internal::AppendToString(*this, target); |
284 | } |
285 | |
286 | size_type copy(value_type* buf, size_type n, size_type pos = 0) const { |
287 | return internal::copy(*this, buf, n, pos); |
288 | } |
289 | |
290 | // Does "this" start with "x" |
291 | bool starts_with(const BasicStringPiece& x) const { |
292 | return ((this->length_ >= x.length_) && |
293 | (wordmemcmp(this->ptr_, x.ptr_, x.length_) == 0)); |
294 | } |
295 | |
296 | // Does "this" end with "x" |
297 | bool ends_with(const BasicStringPiece& x) const { |
298 | return ((this->length_ >= x.length_) && |
299 | (wordmemcmp(this->ptr_ + (this->length_-x.length_), |
300 | x.ptr_, x.length_) == 0)); |
301 | } |
302 | |
303 | // find: Search for a character or substring at a given offset. |
304 | size_type find(const BasicStringPiece<STRING_TYPE>& s, |
305 | size_type pos = 0) const { |
306 | return internal::find(*this, s, pos); |
307 | } |
308 | size_type find(value_type c, size_type pos = 0) const { |
309 | return internal::find(*this, c, pos); |
310 | } |
311 | |
312 | // rfind: Reverse find. |
313 | size_type rfind(const BasicStringPiece& s, |
314 | size_type pos = BasicStringPiece::npos) const { |
315 | return internal::rfind(*this, s, pos); |
316 | } |
317 | size_type rfind(value_type c, size_type pos = BasicStringPiece::npos) const { |
318 | return internal::rfind(*this, c, pos); |
319 | } |
320 | |
321 | // find_first_of: Find the first occurence of one of a set of characters. |
322 | size_type find_first_of(const BasicStringPiece& s, |
323 | size_type pos = 0) const { |
324 | return internal::find_first_of(*this, s, pos); |
325 | } |
326 | size_type find_first_of(value_type c, size_type pos = 0) const { |
327 | return find(c, pos); |
328 | } |
329 | |
330 | // find_first_not_of: Find the first occurence not of a set of characters. |
331 | size_type find_first_not_of(const BasicStringPiece& s, |
332 | size_type pos = 0) const { |
333 | return internal::find_first_not_of(*this, s, pos); |
334 | } |
335 | size_type find_first_not_of(value_type c, size_type pos = 0) const { |
336 | return internal::find_first_not_of(*this, c, pos); |
337 | } |
338 | |
339 | // find_last_of: Find the last occurence of one of a set of characters. |
340 | size_type find_last_of(const BasicStringPiece& s, |
341 | size_type pos = BasicStringPiece::npos) const { |
342 | return internal::find_last_of(*this, s, pos); |
343 | } |
344 | size_type find_last_of(value_type c, |
345 | size_type pos = BasicStringPiece::npos) const { |
346 | return rfind(c, pos); |
347 | } |
348 | |
349 | // find_last_not_of: Find the last occurence not of a set of characters. |
350 | size_type find_last_not_of(const BasicStringPiece& s, |
351 | size_type pos = BasicStringPiece::npos) const { |
352 | return internal::find_last_not_of(*this, s, pos); |
353 | } |
354 | size_type find_last_not_of(value_type c, |
355 | size_type pos = BasicStringPiece::npos) const { |
356 | return internal::find_last_not_of(*this, c, pos); |
357 | } |
358 | |
359 | // substr. |
360 | BasicStringPiece substr(size_type pos, |
361 | size_type n = BasicStringPiece::npos) const { |
362 | return internal::substr(*this, pos, n); |
363 | } |
364 | |
365 | protected: |
366 | const value_type* ptr_; |
367 | size_type length_; |
368 | }; |
369 | |
370 | template <typename STRING_TYPE> |
371 | const typename BasicStringPiece<STRING_TYPE>::size_type |
372 | BasicStringPiece<STRING_TYPE>::npos = |
373 | typename BasicStringPiece<STRING_TYPE>::size_type(-1); |
374 | |
375 | // MSVC doesn't like complex extern templates and DLLs. |
376 | #if !defined(COMPILER_MSVC) |
377 | extern template class BUTIL_EXPORT BasicStringPiece<std::string>; |
378 | extern template class BUTIL_EXPORT BasicStringPiece<string16>; |
379 | #endif |
380 | |
381 | // StingPiece operators -------------------------------------------------------- |
382 | |
383 | BUTIL_EXPORT bool operator==(const StringPiece& x, const StringPiece& y); |
384 | |
385 | inline bool operator!=(const StringPiece& x, const StringPiece& y) { |
386 | return !(x == y); |
387 | } |
388 | |
389 | inline bool operator<(const StringPiece& x, const StringPiece& y) { |
390 | const int r = StringPiece::wordmemcmp( |
391 | x.data(), y.data(), (x.size() < y.size() ? x.size() : y.size())); |
392 | return ((r < 0) || ((r == 0) && (x.size() < y.size()))); |
393 | } |
394 | |
395 | inline bool operator>(const StringPiece& x, const StringPiece& y) { |
396 | return y < x; |
397 | } |
398 | |
399 | inline bool operator<=(const StringPiece& x, const StringPiece& y) { |
400 | return !(x > y); |
401 | } |
402 | |
403 | inline bool operator>=(const StringPiece& x, const StringPiece& y) { |
404 | return !(x < y); |
405 | } |
406 | |
407 | // StringPiece16 operators ----------------------------------------------------- |
408 | |
409 | inline bool operator==(const StringPiece16& x, const StringPiece16& y) { |
410 | if (x.size() != y.size()) |
411 | return false; |
412 | |
413 | return StringPiece16::wordmemcmp(x.data(), y.data(), x.size()) == 0; |
414 | } |
415 | |
416 | inline bool operator!=(const StringPiece16& x, const StringPiece16& y) { |
417 | return !(x == y); |
418 | } |
419 | |
420 | inline bool operator<(const StringPiece16& x, const StringPiece16& y) { |
421 | const int r = StringPiece16::wordmemcmp( |
422 | x.data(), y.data(), (x.size() < y.size() ? x.size() : y.size())); |
423 | return ((r < 0) || ((r == 0) && (x.size() < y.size()))); |
424 | } |
425 | |
426 | inline bool operator>(const StringPiece16& x, const StringPiece16& y) { |
427 | return y < x; |
428 | } |
429 | |
430 | inline bool operator<=(const StringPiece16& x, const StringPiece16& y) { |
431 | return !(x > y); |
432 | } |
433 | |
434 | inline bool operator>=(const StringPiece16& x, const StringPiece16& y) { |
435 | return !(x < y); |
436 | } |
437 | |
438 | BUTIL_EXPORT std::ostream& operator<<(std::ostream& o, |
439 | const StringPiece& piece); |
440 | |
441 | // [ Ease getting first/last character of std::string before C++11 ] |
442 | // return the first/last character, UNDEFINED when the string is empty. |
443 | inline char front_char(const std::string& s) { return s[0]; } |
444 | inline char back_char(const std::string& s) { return s[s.size() - 1]; } |
445 | // return the first/last character, 0 when the string is empty. |
446 | inline char front_char_or_0(const std::string& s) { return s.empty() ? '\0' : s[0]; } |
447 | inline char back_char_or_0(const std::string& s) { return s.empty() ? '\0' : s[s.size() - 1]; } |
448 | |
449 | } // namespace butil |
450 | |
451 | // Hashing --------------------------------------------------------------------- |
452 | |
453 | // We provide appropriate hash functions so StringPiece and StringPiece16 can |
454 | // be used as keys in hash sets and maps. |
455 | |
456 | // This hash function is copied from butil/containers/hash_tables.h. We don't |
457 | // use the ones already defined for string and string16 directly because it |
458 | // would require the string constructors to be called, which we don't want. |
459 | #define HASH_STRING_PIECE(StringPieceType, string_piece) \ |
460 | std::size_t result = 0; \ |
461 | for (StringPieceType::const_iterator i = string_piece.begin(); \ |
462 | i != string_piece.end(); ++i) \ |
463 | result = (result * 131) + *i; \ |
464 | return result; \ |
465 | |
466 | namespace BUTIL_HASH_NAMESPACE { |
467 | #if defined(COMPILER_GCC) |
468 | |
469 | template<> |
470 | struct hash<butil::StringPiece> { |
471 | std::size_t operator()(const butil::StringPiece& sp) const { |
472 | HASH_STRING_PIECE(butil::StringPiece, sp); |
473 | } |
474 | }; |
475 | template<> |
476 | struct hash<butil::StringPiece16> { |
477 | std::size_t operator()(const butil::StringPiece16& sp16) const { |
478 | HASH_STRING_PIECE(butil::StringPiece16, sp16); |
479 | } |
480 | }; |
481 | |
482 | #elif defined(COMPILER_MSVC) |
483 | |
484 | inline size_t hash_value(const butil::StringPiece& sp) { |
485 | HASH_STRING_PIECE(butil::StringPiece, sp); |
486 | } |
487 | inline size_t hash_value(const butil::StringPiece16& sp16) { |
488 | HASH_STRING_PIECE(butil::StringPiece16, sp16); |
489 | } |
490 | |
491 | #endif // COMPILER |
492 | |
493 | } // namespace BUTIL_HASH_NAMESPACE |
494 | |
495 | #endif // BUTIL_STRINGS_STRING_PIECE_H_ |
496 | |