1// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4// Copied from strings/stringpiece.h with modifications
5//
6// A string-like object that points to a sized piece of memory.
7//
8// You can use StringPiece as a function or method parameter. A StringPiece
9// parameter can receive a double-quoted string literal argument, a "const
10// char*" argument, a string argument, or a StringPiece argument with no data
11// copying. Systematic use of StringPiece for arguments reduces data
12// copies and strlen() calls.
13//
14// Prefer passing StringPieces by value:
15// void MyFunction(StringPiece arg);
16// If circumstances require, you may also pass by const reference:
17// void MyFunction(const StringPiece& arg); // not preferred
18// Both of these have the same lifetime semantics. Passing by value
19// generates slightly smaller code. For more discussion, Googlers can see
20// the thread go/stringpiecebyvalue on c-users.
21//
22// StringPiece16 is similar to StringPiece but for butil::string16 instead of
23// std::string. We do not define as large of a subset of the STL functions
24// from basic_string as in StringPiece, but this can be changed if these
25// functions (find, find_first_of, etc.) are found to be useful in this context.
26//
27
28#ifndef BUTIL_STRINGS_STRING_PIECE_H_
29#define BUTIL_STRINGS_STRING_PIECE_H_
30
31#include <stddef.h>
32
33#include <iosfwd>
34#include <string>
35
36#include "butil/base_export.h"
37#include "butil/basictypes.h"
38#include "butil/containers/hash_tables.h"
39#include "butil/strings/string16.h"
40
41namespace butil {
42
43template <typename STRING_TYPE> class BasicStringPiece;
44typedef BasicStringPiece<std::string> StringPiece;
45typedef BasicStringPiece<string16> StringPiece16;
46
47// internal --------------------------------------------------------------------
48
49// Many of the StringPiece functions use different implementations for the
50// 8-bit and 16-bit versions, and we don't want lots of template expansions in
51// this (very common) header that will slow down compilation.
52//
53// So here we define overloaded functions called by the StringPiece template.
54// For those that share an implementation, the two versions will expand to a
55// template internal to the .cc file.
56namespace internal {
57
58BUTIL_EXPORT void CopyToString(const StringPiece& self, std::string* target);
59BUTIL_EXPORT void CopyToString(const StringPiece16& self, string16* target);
60
61BUTIL_EXPORT void AppendToString(const StringPiece& self, std::string* target);
62BUTIL_EXPORT void AppendToString(const StringPiece16& self, string16* target);
63
64BUTIL_EXPORT size_t copy(const StringPiece& self,
65 char* buf,
66 size_t n,
67 size_t pos);
68BUTIL_EXPORT size_t copy(const StringPiece16& self,
69 char16* buf,
70 size_t n,
71 size_t pos);
72
73BUTIL_EXPORT size_t find(const StringPiece& self,
74 const StringPiece& s,
75 size_t pos);
76BUTIL_EXPORT size_t find(const StringPiece16& self,
77 const StringPiece16& s,
78 size_t pos);
79BUTIL_EXPORT size_t find(const StringPiece& self,
80 char c,
81 size_t pos);
82BUTIL_EXPORT size_t find(const StringPiece16& self,
83 char16 c,
84 size_t pos);
85
86BUTIL_EXPORT size_t rfind(const StringPiece& self,
87 const StringPiece& s,
88 size_t pos);
89BUTIL_EXPORT size_t rfind(const StringPiece16& self,
90 const StringPiece16& s,
91 size_t pos);
92BUTIL_EXPORT size_t rfind(const StringPiece& self,
93 char c,
94 size_t pos);
95BUTIL_EXPORT size_t rfind(const StringPiece16& self,
96 char16 c,
97 size_t pos);
98
99BUTIL_EXPORT size_t find_first_of(const StringPiece& self,
100 const StringPiece& s,
101 size_t pos);
102BUTIL_EXPORT size_t find_first_of(const StringPiece16& self,
103 const StringPiece16& s,
104 size_t pos);
105
106BUTIL_EXPORT size_t find_first_not_of(const StringPiece& self,
107 const StringPiece& s,
108 size_t pos);
109BUTIL_EXPORT size_t find_first_not_of(const StringPiece16& self,
110 const StringPiece16& s,
111 size_t pos);
112BUTIL_EXPORT size_t find_first_not_of(const StringPiece& self,
113 char c,
114 size_t pos);
115BUTIL_EXPORT size_t find_first_not_of(const StringPiece16& self,
116 char16 c,
117 size_t pos);
118
119BUTIL_EXPORT size_t find_last_of(const StringPiece& self,
120 const StringPiece& s,
121 size_t pos);
122BUTIL_EXPORT size_t find_last_of(const StringPiece16& self,
123 const StringPiece16& s,
124 size_t pos);
125BUTIL_EXPORT size_t find_last_of(const StringPiece& self,
126 char c,
127 size_t pos);
128BUTIL_EXPORT size_t find_last_of(const StringPiece16& self,
129 char16 c,
130 size_t pos);
131
132BUTIL_EXPORT size_t find_last_not_of(const StringPiece& self,
133 const StringPiece& s,
134 size_t pos);
135BUTIL_EXPORT size_t find_last_not_of(const StringPiece16& self,
136 const StringPiece16& s,
137 size_t pos);
138BUTIL_EXPORT size_t find_last_not_of(const StringPiece16& self,
139 char16 c,
140 size_t pos);
141BUTIL_EXPORT size_t find_last_not_of(const StringPiece& self,
142 char c,
143 size_t pos);
144
145BUTIL_EXPORT StringPiece substr(const StringPiece& self,
146 size_t pos,
147 size_t n);
148BUTIL_EXPORT StringPiece16 substr(const StringPiece16& self,
149 size_t pos,
150 size_t n);
151
152} // namespace internal
153
154// BasicStringPiece ------------------------------------------------------------
155
156// Defines the types, methods, operators, and data members common to both
157// StringPiece and StringPiece16. Do not refer to this class directly, but
158// rather to BasicStringPiece, StringPiece, or StringPiece16.
159//
160// This is templatized by string class type rather than character type, so
161// BasicStringPiece<std::string> or BasicStringPiece<butil::string16>.
162template <typename STRING_TYPE> class BasicStringPiece {
163 public:
164 // Standard STL container boilerplate.
165 typedef size_t size_type;
166 typedef typename STRING_TYPE::value_type value_type;
167 typedef const value_type* pointer;
168 typedef const value_type& reference;
169 typedef const value_type& const_reference;
170 typedef ptrdiff_t difference_type;
171 typedef const value_type* const_iterator;
172 typedef std::reverse_iterator<const_iterator> const_reverse_iterator;
173
174 static const size_type npos;
175
176 public:
177 // We provide non-explicit singleton constructors so users can pass
178 // in a "const char*" or a "string" wherever a "StringPiece" is
179 // expected (likewise for char16, string16, StringPiece16).
180 BasicStringPiece() : ptr_(NULL), length_(0) {}
181 BasicStringPiece(const value_type* str)
182 : ptr_(str),
183 length_((str == NULL) ? 0 : STRING_TYPE::traits_type::length(str)) {}
184 BasicStringPiece(const STRING_TYPE& str)
185 : ptr_(str.data()), length_(str.size()) {}
186 BasicStringPiece(const value_type* offset, size_type len)
187 : ptr_(offset), length_(len) {}
188 BasicStringPiece(const typename STRING_TYPE::const_iterator& begin,
189 const typename STRING_TYPE::const_iterator& end)
190 : ptr_((end > begin) ? &(*begin) : NULL),
191 length_((end > begin) ? (size_type)(end - begin) : 0) {}
192
193 // data() may return a pointer to a buffer with embedded NULs, and the
194 // returned buffer may or may not be null terminated. Therefore it is
195 // typically a mistake to pass data() to a routine that expects a NUL
196 // terminated string.
197 const value_type* data() const { return ptr_; }
198 size_type size() const { return length_; }
199 size_type length() const { return length_; }
200 bool empty() const { return length_ == 0; }
201
202 void clear() {
203 ptr_ = NULL;
204 length_ = 0;
205 }
206 void set(const value_type* data, size_type len) {
207 ptr_ = data;
208 length_ = len;
209 }
210 void set(const value_type* str) {
211 ptr_ = str;
212 length_ = str ? STRING_TYPE::traits_type::length(str) : 0;
213 }
214
215 value_type operator[](size_type i) const { return ptr_[i]; }
216
217 void remove_prefix(size_type n) {
218 ptr_ += n;
219 length_ -= n;
220 }
221
222 void remove_suffix(size_type n) {
223 length_ -= n;
224 }
225
226 // Remove heading and trailing spaces.
227 void trim_spaces() {
228 size_t nsp = 0;
229 for (; nsp < size() && isspace(ptr_[nsp]); ++nsp) {}
230 remove_prefix(nsp);
231 nsp = 0;
232 for (; nsp < size() && isspace(ptr_[size()-1-nsp]); ++nsp) {}
233 remove_suffix(nsp);
234 }
235
236 int compare(const BasicStringPiece<STRING_TYPE>& x) const {
237 int r = wordmemcmp(
238 ptr_, x.ptr_, (length_ < x.length_ ? length_ : x.length_));
239 if (r == 0) {
240 if (length_ < x.length_) r = -1;
241 else if (length_ > x.length_) r = +1;
242 }
243 return r;
244 }
245
246 STRING_TYPE as_string() const {
247 // std::string doesn't like to take a NULL pointer even with a 0 size.
248 return empty() ? STRING_TYPE() : STRING_TYPE(data(), size());
249 }
250
251 // Return the first/last character, UNDEFINED when StringPiece is empty.
252 char front() const { return *ptr_; }
253 char back() const { return *(ptr_ + length_ - 1); }
254 // Return the first/last character, 0 when StringPiece is empty.
255 char front_or_0() const { return length_ ? *ptr_ : '\0'; }
256 char back_or_0() const { return length_ ? *(ptr_ + length_ - 1) : '\0'; }
257
258 const_iterator begin() const { return ptr_; }
259 const_iterator end() const { return ptr_ + length_; }
260 const_reverse_iterator rbegin() const {
261 return const_reverse_iterator(ptr_ + length_);
262 }
263 const_reverse_iterator rend() const {
264 return const_reverse_iterator(ptr_);
265 }
266
267 size_type max_size() const { return length_; }
268 size_type capacity() const { return length_; }
269
270 static int wordmemcmp(const value_type* p,
271 const value_type* p2,
272 size_type N) {
273 return STRING_TYPE::traits_type::compare(p, p2, N);
274 }
275
276 // Sets the value of the given string target type to be the current string.
277 // This saves a temporary over doing |a = b.as_string()|
278 void CopyToString(STRING_TYPE* target) const {
279 internal::CopyToString(*this, target);
280 }
281
282 void AppendToString(STRING_TYPE* target) const {
283 internal::AppendToString(*this, target);
284 }
285
286 size_type copy(value_type* buf, size_type n, size_type pos = 0) const {
287 return internal::copy(*this, buf, n, pos);
288 }
289
290 // Does "this" start with "x"
291 bool starts_with(const BasicStringPiece& x) const {
292 return ((this->length_ >= x.length_) &&
293 (wordmemcmp(this->ptr_, x.ptr_, x.length_) == 0));
294 }
295
296 // Does "this" end with "x"
297 bool ends_with(const BasicStringPiece& x) const {
298 return ((this->length_ >= x.length_) &&
299 (wordmemcmp(this->ptr_ + (this->length_-x.length_),
300 x.ptr_, x.length_) == 0));
301 }
302
303 // find: Search for a character or substring at a given offset.
304 size_type find(const BasicStringPiece<STRING_TYPE>& s,
305 size_type pos = 0) const {
306 return internal::find(*this, s, pos);
307 }
308 size_type find(value_type c, size_type pos = 0) const {
309 return internal::find(*this, c, pos);
310 }
311
312 // rfind: Reverse find.
313 size_type rfind(const BasicStringPiece& s,
314 size_type pos = BasicStringPiece::npos) const {
315 return internal::rfind(*this, s, pos);
316 }
317 size_type rfind(value_type c, size_type pos = BasicStringPiece::npos) const {
318 return internal::rfind(*this, c, pos);
319 }
320
321 // find_first_of: Find the first occurence of one of a set of characters.
322 size_type find_first_of(const BasicStringPiece& s,
323 size_type pos = 0) const {
324 return internal::find_first_of(*this, s, pos);
325 }
326 size_type find_first_of(value_type c, size_type pos = 0) const {
327 return find(c, pos);
328 }
329
330 // find_first_not_of: Find the first occurence not of a set of characters.
331 size_type find_first_not_of(const BasicStringPiece& s,
332 size_type pos = 0) const {
333 return internal::find_first_not_of(*this, s, pos);
334 }
335 size_type find_first_not_of(value_type c, size_type pos = 0) const {
336 return internal::find_first_not_of(*this, c, pos);
337 }
338
339 // find_last_of: Find the last occurence of one of a set of characters.
340 size_type find_last_of(const BasicStringPiece& s,
341 size_type pos = BasicStringPiece::npos) const {
342 return internal::find_last_of(*this, s, pos);
343 }
344 size_type find_last_of(value_type c,
345 size_type pos = BasicStringPiece::npos) const {
346 return rfind(c, pos);
347 }
348
349 // find_last_not_of: Find the last occurence not of a set of characters.
350 size_type find_last_not_of(const BasicStringPiece& s,
351 size_type pos = BasicStringPiece::npos) const {
352 return internal::find_last_not_of(*this, s, pos);
353 }
354 size_type find_last_not_of(value_type c,
355 size_type pos = BasicStringPiece::npos) const {
356 return internal::find_last_not_of(*this, c, pos);
357 }
358
359 // substr.
360 BasicStringPiece substr(size_type pos,
361 size_type n = BasicStringPiece::npos) const {
362 return internal::substr(*this, pos, n);
363 }
364
365 protected:
366 const value_type* ptr_;
367 size_type length_;
368};
369
370template <typename STRING_TYPE>
371const typename BasicStringPiece<STRING_TYPE>::size_type
372BasicStringPiece<STRING_TYPE>::npos =
373 typename BasicStringPiece<STRING_TYPE>::size_type(-1);
374
375// MSVC doesn't like complex extern templates and DLLs.
376#if !defined(COMPILER_MSVC)
377extern template class BUTIL_EXPORT BasicStringPiece<std::string>;
378extern template class BUTIL_EXPORT BasicStringPiece<string16>;
379#endif
380
381// StingPiece operators --------------------------------------------------------
382
383BUTIL_EXPORT bool operator==(const StringPiece& x, const StringPiece& y);
384
385inline bool operator!=(const StringPiece& x, const StringPiece& y) {
386 return !(x == y);
387}
388
389inline bool operator<(const StringPiece& x, const StringPiece& y) {
390 const int r = StringPiece::wordmemcmp(
391 x.data(), y.data(), (x.size() < y.size() ? x.size() : y.size()));
392 return ((r < 0) || ((r == 0) && (x.size() < y.size())));
393}
394
395inline bool operator>(const StringPiece& x, const StringPiece& y) {
396 return y < x;
397}
398
399inline bool operator<=(const StringPiece& x, const StringPiece& y) {
400 return !(x > y);
401}
402
403inline bool operator>=(const StringPiece& x, const StringPiece& y) {
404 return !(x < y);
405}
406
407// StringPiece16 operators -----------------------------------------------------
408
409inline bool operator==(const StringPiece16& x, const StringPiece16& y) {
410 if (x.size() != y.size())
411 return false;
412
413 return StringPiece16::wordmemcmp(x.data(), y.data(), x.size()) == 0;
414}
415
416inline bool operator!=(const StringPiece16& x, const StringPiece16& y) {
417 return !(x == y);
418}
419
420inline bool operator<(const StringPiece16& x, const StringPiece16& y) {
421 const int r = StringPiece16::wordmemcmp(
422 x.data(), y.data(), (x.size() < y.size() ? x.size() : y.size()));
423 return ((r < 0) || ((r == 0) && (x.size() < y.size())));
424}
425
426inline bool operator>(const StringPiece16& x, const StringPiece16& y) {
427 return y < x;
428}
429
430inline bool operator<=(const StringPiece16& x, const StringPiece16& y) {
431 return !(x > y);
432}
433
434inline bool operator>=(const StringPiece16& x, const StringPiece16& y) {
435 return !(x < y);
436}
437
438BUTIL_EXPORT std::ostream& operator<<(std::ostream& o,
439 const StringPiece& piece);
440
441// [ Ease getting first/last character of std::string before C++11 ]
442// return the first/last character, UNDEFINED when the string is empty.
443inline char front_char(const std::string& s) { return s[0]; }
444inline char back_char(const std::string& s) { return s[s.size() - 1]; }
445// return the first/last character, 0 when the string is empty.
446inline char front_char_or_0(const std::string& s) { return s.empty() ? '\0' : s[0]; }
447inline char back_char_or_0(const std::string& s) { return s.empty() ? '\0' : s[s.size() - 1]; }
448
449} // namespace butil
450
451// Hashing ---------------------------------------------------------------------
452
453// We provide appropriate hash functions so StringPiece and StringPiece16 can
454// be used as keys in hash sets and maps.
455
456// This hash function is copied from butil/containers/hash_tables.h. We don't
457// use the ones already defined for string and string16 directly because it
458// would require the string constructors to be called, which we don't want.
459#define HASH_STRING_PIECE(StringPieceType, string_piece) \
460 std::size_t result = 0; \
461 for (StringPieceType::const_iterator i = string_piece.begin(); \
462 i != string_piece.end(); ++i) \
463 result = (result * 131) + *i; \
464 return result; \
465
466namespace BUTIL_HASH_NAMESPACE {
467#if defined(COMPILER_GCC)
468
469template<>
470struct hash<butil::StringPiece> {
471 std::size_t operator()(const butil::StringPiece& sp) const {
472 HASH_STRING_PIECE(butil::StringPiece, sp);
473 }
474};
475template<>
476struct hash<butil::StringPiece16> {
477 std::size_t operator()(const butil::StringPiece16& sp16) const {
478 HASH_STRING_PIECE(butil::StringPiece16, sp16);
479 }
480};
481
482#elif defined(COMPILER_MSVC)
483
484inline size_t hash_value(const butil::StringPiece& sp) {
485 HASH_STRING_PIECE(butil::StringPiece, sp);
486}
487inline size_t hash_value(const butil::StringPiece16& sp16) {
488 HASH_STRING_PIECE(butil::StringPiece16, sp16);
489}
490
491#endif // COMPILER
492
493} // namespace BUTIL_HASH_NAMESPACE
494
495#endif // BUTIL_STRINGS_STRING_PIECE_H_
496