1 | /* |
2 | * Copyright (c) Facebook, Inc. and its affiliates. |
3 | * |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | * you may not use this file except in compliance with the License. |
6 | * You may obtain a copy of the License at |
7 | * |
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | * |
10 | * Unless required by applicable law or agreed to in writing, software |
11 | * distributed under the License is distributed on an "AS IS" BASIS, |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | * See the License for the specific language governing permissions and |
14 | * limitations under the License. |
15 | */ |
16 | |
17 | // @author Mark Rabkin ([email protected]) |
18 | // @author Andrei Alexandrescu ([email protected]) |
19 | |
20 | #pragma once |
21 | |
22 | #include <folly/Portability.h> |
23 | #include <folly/hash/SpookyHashV2.h> |
24 | #include <folly/lang/Exception.h> |
25 | #include <folly/portability/Constexpr.h> |
26 | #include <folly/portability/String.h> |
27 | |
28 | #include <algorithm> |
29 | #include <array> |
30 | #include <cassert> |
31 | #include <climits> |
32 | #include <cstddef> |
33 | #include <cstring> |
34 | #include <iosfwd> |
35 | #include <iterator> |
36 | #include <stdexcept> |
37 | #include <string> |
38 | #include <type_traits> |
39 | |
40 | #if FOLLY_HAS_STRING_VIEW |
41 | #include <string_view> // @manual |
42 | #endif |
43 | |
44 | #include <folly/CpuId.h> |
45 | #include <folly/Likely.h> |
46 | #include <folly/Traits.h> |
47 | #include <folly/detail/RangeCommon.h> |
48 | #include <folly/detail/RangeSse42.h> |
49 | |
50 | // Ignore shadowing warnings within this file, so includers can use -Wshadow. |
51 | FOLLY_PUSH_WARNING |
52 | FOLLY_GNU_DISABLE_WARNING("-Wshadow" ) |
53 | |
54 | namespace folly { |
55 | |
56 | /** |
57 | * Ubiquitous helper template for knowing what's a string. |
58 | */ |
59 | template <class T> |
60 | struct IsSomeString : std::false_type {}; |
61 | |
62 | template <typename Alloc> |
63 | struct IsSomeString<std::basic_string<char, std::char_traits<char>, Alloc>> |
64 | : std::true_type {}; |
65 | |
66 | template <class Iter> |
67 | class Range; |
68 | |
69 | /** |
70 | * Finds the first occurrence of needle in haystack. The algorithm is on |
71 | * average faster than O(haystack.size() * needle.size()) but not as fast |
72 | * as Boyer-Moore. On the upside, it does not do any upfront |
73 | * preprocessing and does not allocate memory. |
74 | */ |
75 | template < |
76 | class Iter, |
77 | class Comp = std::equal_to<typename Range<Iter>::value_type>> |
78 | inline size_t |
79 | qfind(const Range<Iter>& haystack, const Range<Iter>& needle, Comp eq = Comp()); |
80 | |
81 | /** |
82 | * Finds the first occurrence of needle in haystack. The result is the |
83 | * offset reported to the beginning of haystack, or string::npos if |
84 | * needle wasn't found. |
85 | */ |
86 | template <class Iter> |
87 | size_t qfind( |
88 | const Range<Iter>& haystack, |
89 | const typename Range<Iter>::value_type& needle); |
90 | |
91 | /** |
92 | * Finds the last occurrence of needle in haystack. The result is the |
93 | * offset reported to the beginning of haystack, or string::npos if |
94 | * needle wasn't found. |
95 | */ |
96 | template <class Iter> |
97 | size_t rfind( |
98 | const Range<Iter>& haystack, |
99 | const typename Range<Iter>::value_type& needle); |
100 | |
101 | /** |
102 | * Finds the first occurrence of any element of needle in |
103 | * haystack. The algorithm is O(haystack.size() * needle.size()). |
104 | */ |
105 | template <class Iter> |
106 | inline size_t qfind_first_of( |
107 | const Range<Iter>& haystack, |
108 | const Range<Iter>& needle); |
109 | |
110 | /** |
111 | * Small internal helper - returns the value just before an iterator. |
112 | */ |
113 | namespace detail { |
114 | |
115 | /* |
116 | * Use IsCharPointer<T>::type to enable const char* or char*. |
117 | * Use IsCharPointer<T>::const_type to enable only const char*. |
118 | */ |
119 | template <class T> |
120 | struct IsCharPointer {}; |
121 | |
122 | template <> |
123 | struct IsCharPointer<char*> { |
124 | typedef int type; |
125 | }; |
126 | |
127 | template <> |
128 | struct IsCharPointer<const char*> { |
129 | typedef int const_type; |
130 | typedef int type; |
131 | }; |
132 | |
133 | } // namespace detail |
134 | |
135 | /** |
136 | * Range abstraction keeping a pair of iterators. We couldn't use |
137 | * boost's similar range abstraction because we need an API identical |
138 | * with the former StringPiece class, which is used by a lot of other |
139 | * code. This abstraction does fulfill the needs of boost's |
140 | * range-oriented algorithms though. |
141 | * |
142 | * (Keep memory lifetime in mind when using this class, since it |
143 | * doesn't manage the data it refers to - just like an iterator |
144 | * wouldn't.) |
145 | */ |
146 | template <class Iter> |
147 | class Range { |
148 | private: |
149 | template <typename Alloc> |
150 | using string = std::basic_string<char, std::char_traits<char>, Alloc>; |
151 | |
152 | public: |
153 | typedef std::size_t size_type; |
154 | typedef Iter iterator; |
155 | typedef Iter const_iterator; |
156 | typedef typename std::remove_reference< |
157 | typename std::iterator_traits<Iter>::reference>::type value_type; |
158 | using difference_type = typename std::iterator_traits<Iter>::difference_type; |
159 | typedef typename std::iterator_traits<Iter>::reference reference; |
160 | |
161 | /** |
162 | * For MutableStringPiece and MutableByteRange we define StringPiece |
163 | * and ByteRange as const_range_type (for everything else its just |
164 | * identity). We do that to enable operations such as find with |
165 | * args which are const. |
166 | */ |
167 | typedef typename std::conditional< |
168 | std::is_same<Iter, char*>::value || |
169 | std::is_same<Iter, unsigned char*>::value, |
170 | Range<const value_type*>, |
171 | Range<Iter>>::type const_range_type; |
172 | |
173 | typedef std::char_traits<typename std::remove_const<value_type>::type> |
174 | traits_type; |
175 | |
176 | static const size_type npos; |
177 | |
178 | // Works for all iterators |
179 | constexpr Range() : b_(), e_() {} |
180 | |
181 | constexpr Range(const Range&) = default; |
182 | constexpr Range(Range&&) = default; |
183 | |
184 | public: |
185 | // Works for all iterators |
186 | constexpr Range(Iter start, Iter end) : b_(start), e_(end) {} |
187 | |
188 | // Works only for random-access iterators |
189 | constexpr Range(Iter start, size_t size) : b_(start), e_(start + size) {} |
190 | |
191 | /* implicit */ Range(std::nullptr_t) = delete; |
192 | |
193 | constexpr /* implicit */ Range(Iter str) |
194 | : b_(str), e_(str + constexpr_strlen(str)) { |
195 | static_assert( |
196 | std::is_same<int, typename detail::IsCharPointer<Iter>::type>::value, |
197 | "This constructor is only available for character ranges" ); |
198 | } |
199 | |
200 | template < |
201 | class Alloc, |
202 | class T = Iter, |
203 | typename detail::IsCharPointer<T>::const_type = 0> |
204 | /* implicit */ Range(const string<Alloc>& str) |
205 | : b_(str.data()), e_(b_ + str.size()) {} |
206 | |
207 | template < |
208 | class Alloc, |
209 | class T = Iter, |
210 | typename detail::IsCharPointer<T>::const_type = 0> |
211 | Range(const string<Alloc>& str, typename string<Alloc>::size_type startFrom) { |
212 | if (UNLIKELY(startFrom > str.size())) { |
213 | throw_exception<std::out_of_range>("index out of range" ); |
214 | } |
215 | b_ = str.data() + startFrom; |
216 | e_ = str.data() + str.size(); |
217 | } |
218 | |
219 | template < |
220 | class Alloc, |
221 | class T = Iter, |
222 | typename detail::IsCharPointer<T>::const_type = 0> |
223 | Range( |
224 | const string<Alloc>& str, |
225 | typename string<Alloc>::size_type startFrom, |
226 | typename string<Alloc>::size_type size) { |
227 | if (UNLIKELY(startFrom > str.size())) { |
228 | throw_exception<std::out_of_range>("index out of range" ); |
229 | } |
230 | b_ = str.data() + startFrom; |
231 | if (str.size() - startFrom < size) { |
232 | e_ = str.data() + str.size(); |
233 | } else { |
234 | e_ = b_ + size; |
235 | } |
236 | } |
237 | |
238 | Range(const Range& other, size_type first, size_type length = npos) |
239 | : Range(other.subpiece(first, length)) {} |
240 | |
241 | template < |
242 | class Container, |
243 | class = typename std::enable_if< |
244 | std::is_same<Iter, typename Container::const_pointer>::value>::type, |
245 | class = decltype( |
246 | Iter(std::declval<Container const&>().data()), |
247 | Iter( |
248 | std::declval<Container const&>().data() + |
249 | std::declval<Container const&>().size()))> |
250 | /* implicit */ constexpr Range(Container const& container) |
251 | : b_(container.data()), e_(b_ + container.size()) {} |
252 | |
253 | template < |
254 | class Container, |
255 | class = typename std::enable_if< |
256 | std::is_same<Iter, typename Container::const_pointer>::value>::type, |
257 | class = decltype( |
258 | Iter(std::declval<Container const&>().data()), |
259 | Iter( |
260 | std::declval<Container const&>().data() + |
261 | std::declval<Container const&>().size()))> |
262 | Range(Container const& container, typename Container::size_type startFrom) { |
263 | auto const cdata = container.data(); |
264 | auto const csize = container.size(); |
265 | if (UNLIKELY(startFrom > csize)) { |
266 | throw_exception<std::out_of_range>("index out of range" ); |
267 | } |
268 | b_ = cdata + startFrom; |
269 | e_ = cdata + csize; |
270 | } |
271 | |
272 | template < |
273 | class Container, |
274 | class = typename std::enable_if< |
275 | std::is_same<Iter, typename Container::const_pointer>::value>::type, |
276 | class = decltype( |
277 | Iter(std::declval<Container const&>().data()), |
278 | Iter( |
279 | std::declval<Container const&>().data() + |
280 | std::declval<Container const&>().size()))> |
281 | Range( |
282 | Container const& container, |
283 | typename Container::size_type startFrom, |
284 | typename Container::size_type size) { |
285 | auto const cdata = container.data(); |
286 | auto const csize = container.size(); |
287 | if (UNLIKELY(startFrom > csize)) { |
288 | throw_exception<std::out_of_range>("index out of range" ); |
289 | } |
290 | b_ = cdata + startFrom; |
291 | if (csize - startFrom < size) { |
292 | e_ = cdata + csize; |
293 | } else { |
294 | e_ = b_ + size; |
295 | } |
296 | } |
297 | |
298 | // Allow implicit conversion from Range<const char*> (aka StringPiece) to |
299 | // Range<const unsigned char*> (aka ByteRange), as they're both frequently |
300 | // used to represent ranges of bytes. Allow explicit conversion in the other |
301 | // direction. |
302 | template < |
303 | class OtherIter, |
304 | typename std::enable_if< |
305 | (std::is_same<Iter, const unsigned char*>::value && |
306 | (std::is_same<OtherIter, const char*>::value || |
307 | std::is_same<OtherIter, char*>::value)), |
308 | int>::type = 0> |
309 | /* implicit */ Range(const Range<OtherIter>& other) |
310 | : b_(reinterpret_cast<const unsigned char*>(other.begin())), |
311 | e_(reinterpret_cast<const unsigned char*>(other.end())) {} |
312 | |
313 | template < |
314 | class OtherIter, |
315 | typename std::enable_if< |
316 | (std::is_same<Iter, unsigned char*>::value && |
317 | std::is_same<OtherIter, char*>::value), |
318 | int>::type = 0> |
319 | /* implicit */ Range(const Range<OtherIter>& other) |
320 | : b_(reinterpret_cast<unsigned char*>(other.begin())), |
321 | e_(reinterpret_cast<unsigned char*>(other.end())) {} |
322 | |
323 | template < |
324 | class OtherIter, |
325 | typename std::enable_if< |
326 | (std::is_same<Iter, const char*>::value && |
327 | (std::is_same<OtherIter, const unsigned char*>::value || |
328 | std::is_same<OtherIter, unsigned char*>::value)), |
329 | int>::type = 0> |
330 | explicit Range(const Range<OtherIter>& other) |
331 | : b_(reinterpret_cast<const char*>(other.begin())), |
332 | e_(reinterpret_cast<const char*>(other.end())) {} |
333 | |
334 | template < |
335 | class OtherIter, |
336 | typename std::enable_if< |
337 | (std::is_same<Iter, char*>::value && |
338 | std::is_same<OtherIter, unsigned char*>::value), |
339 | int>::type = 0> |
340 | explicit Range(const Range<OtherIter>& other) |
341 | : b_(reinterpret_cast<char*>(other.begin())), |
342 | e_(reinterpret_cast<char*>(other.end())) {} |
343 | |
344 | // Allow implicit conversion from Range<From> to Range<To> if From is |
345 | // implicitly convertible to To. |
346 | template < |
347 | class OtherIter, |
348 | typename std::enable_if< |
349 | (!std::is_same<Iter, OtherIter>::value && |
350 | std::is_convertible<OtherIter, Iter>::value), |
351 | int>::type = 0> |
352 | constexpr /* implicit */ Range(const Range<OtherIter>& other) |
353 | : b_(other.begin()), e_(other.end()) {} |
354 | |
355 | // Allow explicit conversion from Range<From> to Range<To> if From is |
356 | // explicitly convertible to To. |
357 | template < |
358 | class OtherIter, |
359 | typename std::enable_if< |
360 | (!std::is_same<Iter, OtherIter>::value && |
361 | !std::is_convertible<OtherIter, Iter>::value && |
362 | std::is_constructible<Iter, const OtherIter&>::value), |
363 | int>::type = 0> |
364 | constexpr explicit Range(const Range<OtherIter>& other) |
365 | : b_(other.begin()), e_(other.end()) {} |
366 | |
367 | /** |
368 | * Allow explicit construction of Range() from a std::array of a |
369 | * convertible type. |
370 | * |
371 | * For instance, this allows constructing StringPiece from a |
372 | * std::array<char, N> or a std::array<const char, N> |
373 | */ |
374 | template < |
375 | class T, |
376 | size_t N, |
377 | typename = typename std::enable_if< |
378 | std::is_convertible<const T*, Iter>::value>::type> |
379 | constexpr explicit Range(const std::array<T, N>& array) |
380 | : b_{array.empty() ? nullptr : &array.at(0)}, |
381 | e_{array.empty() ? nullptr : &array.at(0) + N} {} |
382 | template < |
383 | class T, |
384 | size_t N, |
385 | typename = |
386 | typename std::enable_if<std::is_convertible<T*, Iter>::value>::type> |
387 | constexpr explicit Range(std::array<T, N>& array) |
388 | : b_{array.empty() ? nullptr : &array.at(0)}, |
389 | e_{array.empty() ? nullptr : &array.at(0) + N} {} |
390 | |
391 | Range& operator=(const Range& rhs) & = default; |
392 | Range& operator=(Range&& rhs) & = default; |
393 | |
394 | template < |
395 | class Alloc, |
396 | class T = Iter, |
397 | typename detail::IsCharPointer<T>::const_type = 0> |
398 | Range& operator=(string<Alloc>&& rhs) = delete; |
399 | |
400 | void clear() { |
401 | b_ = Iter(); |
402 | e_ = Iter(); |
403 | } |
404 | |
405 | void assign(Iter start, Iter end) { |
406 | b_ = start; |
407 | e_ = end; |
408 | } |
409 | |
410 | void reset(Iter start, size_type size) { |
411 | b_ = start; |
412 | e_ = start + size; |
413 | } |
414 | |
415 | // Works only for Range<const char*> |
416 | template <typename Alloc> |
417 | void reset(const string<Alloc>& str) { |
418 | reset(str.data(), str.size()); |
419 | } |
420 | |
421 | constexpr size_type size() const { |
422 | #if __clang__ || !__GNUC__ || __GNUC__ >= 7 |
423 | assert(b_ <= e_); |
424 | #endif |
425 | return size_type(e_ - b_); |
426 | } |
427 | constexpr size_type walk_size() const { |
428 | return size_type(std::distance(b_, e_)); |
429 | } |
430 | constexpr bool empty() const { |
431 | return b_ == e_; |
432 | } |
433 | constexpr Iter data() const { |
434 | return b_; |
435 | } |
436 | constexpr Iter start() const { |
437 | return b_; |
438 | } |
439 | constexpr Iter begin() const { |
440 | return b_; |
441 | } |
442 | constexpr Iter end() const { |
443 | return e_; |
444 | } |
445 | constexpr Iter cbegin() const { |
446 | return b_; |
447 | } |
448 | constexpr Iter cend() const { |
449 | return e_; |
450 | } |
451 | value_type& front() { |
452 | assert(b_ < e_); |
453 | return *b_; |
454 | } |
455 | value_type& back() { |
456 | assert(b_ < e_); |
457 | return *std::prev(e_); |
458 | } |
459 | const value_type& front() const { |
460 | assert(b_ < e_); |
461 | return *b_; |
462 | } |
463 | const value_type& back() const { |
464 | assert(b_ < e_); |
465 | return *std::prev(e_); |
466 | } |
467 | |
468 | private: |
469 | // It would be nice to be able to implicit convert to any target type |
470 | // T for which either an (Iter, Iter) or (Iter, size_type) noexcept |
471 | // constructor was available, and explicitly convert to any target |
472 | // type for which those signatures were available but not noexcept. |
473 | // The problem is that this creates ambiguity when there is also a |
474 | // T constructor that takes a type U that is implicitly convertible |
475 | // from Range. |
476 | // |
477 | // To avoid ambiguity, we need to avoid having explicit operator T |
478 | // and implicit operator U coexist when T is constructible from U. |
479 | // U cannot be deduced when searching for operator T (and C++ won't |
480 | // perform an existential search for it), so we must limit the implicit |
481 | // target types to a finite set that we can enumerate. |
482 | // |
483 | // At the moment the set of implicit target types consists of just |
484 | // std::string_view (when it is available). |
485 | #if FOLLY_HAS_STRING_VIEW |
486 | struct NotStringView {}; |
487 | template <typename ValueType> |
488 | struct StringViewType |
489 | : std::conditional< |
490 | std::is_pod<std::remove_const_t<ValueType>>::value, |
491 | std::basic_string_view<std::remove_const_t<ValueType>>, |
492 | NotStringView> {}; |
493 | |
494 | template <typename Target> |
495 | struct IsConstructibleViaStringView |
496 | : Conjunction< |
497 | std::is_constructible< |
498 | _t<StringViewType<value_type>>, |
499 | Iter const&, |
500 | size_type>, |
501 | std::is_constructible<Target, _t<StringViewType<value_type>>>> {}; |
502 | #else |
503 | template <typename Target> |
504 | using IsConstructibleViaStringView = std::false_type; |
505 | #endif |
506 | |
507 | public: |
508 | /// explicit operator conversion to any compatible type |
509 | /// |
510 | /// A compatible type is one which is constructible with an iterator and a |
511 | /// size (preferred), or a pair of iterators (fallback), passed by const-ref. |
512 | /// |
513 | /// Participates in overload resolution precisely when the target type is |
514 | /// compatible. This allows std::is_constructible compile-time checks to work. |
515 | template < |
516 | typename Tgt, |
517 | std::enable_if_t< |
518 | std::is_constructible<Tgt, Iter const&, size_type>::value && |
519 | !IsConstructibleViaStringView<Tgt>::value, |
520 | int> = 0> |
521 | constexpr explicit operator Tgt() const noexcept( |
522 | std::is_nothrow_constructible<Tgt, Iter const&, size_type>::value) { |
523 | return Tgt(b_, walk_size()); |
524 | } |
525 | template < |
526 | typename Tgt, |
527 | std::enable_if_t< |
528 | !std::is_constructible<Tgt, Iter const&, size_type>::value && |
529 | std::is_constructible<Tgt, Iter const&, Iter const&>::value && |
530 | !IsConstructibleViaStringView<Tgt>::value, |
531 | int> = 0> |
532 | constexpr explicit operator Tgt() const noexcept( |
533 | std::is_nothrow_constructible<Tgt, Iter const&, Iter const&>::value) { |
534 | return Tgt(b_, e_); |
535 | } |
536 | |
537 | #if FOLLY_HAS_STRING_VIEW |
538 | /// implicit operator conversion to std::string_view |
539 | template < |
540 | typename Tgt, |
541 | typename ValueType = value_type, |
542 | std::enable_if_t< |
543 | StrictConjunction< |
544 | std::is_same<Tgt, _t<StringViewType<ValueType>>>, |
545 | std::is_constructible< |
546 | _t<StringViewType<ValueType>>, |
547 | Iter const&, |
548 | size_type>>::value, |
549 | int> = 0> |
550 | constexpr operator Tgt() const noexcept( |
551 | std::is_nothrow_constructible<Tgt, Iter const&, size_type>::value) { |
552 | return Tgt(b_, walk_size()); |
553 | } |
554 | #endif |
555 | |
556 | /// explicit non-operator conversion to any compatible type |
557 | /// |
558 | /// A compatible type is one which is constructible with an iterator and a |
559 | /// size (preferred), or a pair of iterators (fallback), passed by const-ref. |
560 | /// |
561 | /// Participates in overload resolution precisely when the target type is |
562 | /// compatible. This allows is_invocable compile-time checks to work. |
563 | /// |
564 | /// Provided in addition to the explicit operator conversion to permit passing |
565 | /// additional arguments to the target type constructor. A canonical example |
566 | /// of an additional argument might be an allocator, where the target type is |
567 | /// some specialization of std::vector or std::basic_string in a context which |
568 | /// requires a non-default-constructed allocator. |
569 | template <typename Tgt, typename... Args> |
570 | constexpr std::enable_if_t< |
571 | std::is_constructible<Tgt, Iter const&, size_type>::value, |
572 | Tgt> |
573 | to(Args&&... args) const noexcept( |
574 | std::is_nothrow_constructible<Tgt, Iter const&, size_type, Args&&...>:: |
575 | value) { |
576 | return Tgt(b_, walk_size(), static_cast<Args&&>(args)...); |
577 | } |
578 | template <typename Tgt, typename... Args> |
579 | constexpr std::enable_if_t< |
580 | !std::is_constructible<Tgt, Iter const&, size_type>::value && |
581 | std::is_constructible<Tgt, Iter const&, Iter const&>::value, |
582 | Tgt> |
583 | to(Args&&... args) const noexcept( |
584 | std::is_nothrow_constructible<Tgt, Iter const&, Iter const&, Args&&...>:: |
585 | value) { |
586 | return Tgt(b_, e_, static_cast<Args&&>(args)...); |
587 | } |
588 | |
589 | // Works only for Range<const char*> and Range<char*> |
590 | std::string str() const { |
591 | return to<std::string>(); |
592 | } |
593 | std::string toString() const { |
594 | return to<std::string>(); |
595 | } |
596 | |
597 | const_range_type castToConst() const { |
598 | return const_range_type(*this); |
599 | } |
600 | |
601 | int compare(const const_range_type& o) const { |
602 | const size_type tsize = this->size(); |
603 | const size_type osize = o.size(); |
604 | const size_type msize = std::min(tsize, osize); |
605 | int r = traits_type::compare(data(), o.data(), msize); |
606 | if (r == 0 && tsize != osize) { |
607 | // We check the signed bit of the subtraction and bit shift it |
608 | // to produce either 0 or 2. The subtraction yields the |
609 | // comparison values of either -1 or 1. |
610 | r = (static_cast<int>((osize - tsize) >> (CHAR_BIT * sizeof(size_t) - 1)) |
611 | << 1) - |
612 | 1; |
613 | } |
614 | return r; |
615 | } |
616 | |
617 | value_type& operator[](size_t i) { |
618 | assert(i < size()); |
619 | return b_[i]; |
620 | } |
621 | |
622 | const value_type& operator[](size_t i) const { |
623 | assert(i < size()); |
624 | return b_[i]; |
625 | } |
626 | |
627 | value_type& at(size_t i) { |
628 | if (i >= size()) { |
629 | throw_exception<std::out_of_range>("index out of range" ); |
630 | } |
631 | return b_[i]; |
632 | } |
633 | |
634 | const value_type& at(size_t i) const { |
635 | if (i >= size()) { |
636 | throw_exception<std::out_of_range>("index out of range" ); |
637 | } |
638 | return b_[i]; |
639 | } |
640 | |
641 | // Do NOT use this function, which was left behind for backwards |
642 | // compatibility. Use SpookyHashV2 instead -- it is faster, and produces |
643 | // a 64-bit hash, which means dramatically fewer collisions in large maps. |
644 | // (The above advice does not apply if you are targeting a 32-bit system.) |
645 | // |
646 | // Works only for Range<const char*> and Range<char*> |
647 | // |
648 | // |
649 | // ** WANT TO GET RID OF THIS LINT? ** |
650 | // |
651 | // A) Use a better hash function (*cough*folly::Hash*cough*), but |
652 | // only if you don't serialize data in a format that depends on |
653 | // this formula (ie the writer and reader assume this exact hash |
654 | // function is used). |
655 | // |
656 | // B) If you have to use this exact function then make your own hasher |
657 | // object and copy the body over (see thrift example: D3972362). |
658 | // https://github.com/facebook/fbthrift/commit/f8ed502e24ab4a32a9d5f266580 |
659 | [[deprecated( |
660 | "Replace with folly::Hash if the hash is not serialized" )]] uint32_t |
661 | hash() const { |
662 | // Taken from fbi/nstring.h: |
663 | // Quick and dirty bernstein hash...fine for short ascii strings |
664 | uint32_t hash = 5381; |
665 | for (size_t ix = 0; ix < size(); ix++) { |
666 | hash = ((hash << 5) + hash) + b_[ix]; |
667 | } |
668 | return hash; |
669 | } |
670 | |
671 | void advance(size_type n) { |
672 | if (UNLIKELY(n > size())) { |
673 | throw_exception<std::out_of_range>("index out of range" ); |
674 | } |
675 | b_ += n; |
676 | } |
677 | |
678 | void subtract(size_type n) { |
679 | if (UNLIKELY(n > size())) { |
680 | throw_exception<std::out_of_range>("index out of range" ); |
681 | } |
682 | e_ -= n; |
683 | } |
684 | |
685 | Range subpiece(size_type first, size_type length = npos) const { |
686 | if (UNLIKELY(first > size())) { |
687 | throw_exception<std::out_of_range>("index out of range" ); |
688 | } |
689 | |
690 | return Range(b_ + first, std::min(length, size() - first)); |
691 | } |
692 | |
693 | // unchecked versions |
694 | void uncheckedAdvance(size_type n) { |
695 | assert(n <= size()); |
696 | b_ += n; |
697 | } |
698 | |
699 | void uncheckedSubtract(size_type n) { |
700 | assert(n <= size()); |
701 | e_ -= n; |
702 | } |
703 | |
704 | Range uncheckedSubpiece(size_type first, size_type length = npos) const { |
705 | assert(first <= size()); |
706 | return Range(b_ + first, std::min(length, size() - first)); |
707 | } |
708 | |
709 | void pop_front() { |
710 | assert(b_ < e_); |
711 | ++b_; |
712 | } |
713 | |
714 | void pop_back() { |
715 | assert(b_ < e_); |
716 | --e_; |
717 | } |
718 | |
719 | // string work-alike functions |
720 | size_type find(const_range_type str) const { |
721 | return qfind(castToConst(), str); |
722 | } |
723 | |
724 | size_type find(const_range_type str, size_t pos) const { |
725 | if (pos > size()) { |
726 | return std::string::npos; |
727 | } |
728 | size_t ret = qfind(castToConst().subpiece(pos), str); |
729 | return ret == npos ? ret : ret + pos; |
730 | } |
731 | |
732 | size_type find(Iter s, size_t pos, size_t n) const { |
733 | if (pos > size()) { |
734 | return std::string::npos; |
735 | } |
736 | auto forFinding = castToConst(); |
737 | size_t ret = qfind( |
738 | pos ? forFinding.subpiece(pos) : forFinding, const_range_type(s, n)); |
739 | return ret == npos ? ret : ret + pos; |
740 | } |
741 | |
742 | // Works only for Range<(const) (unsigned) char*> which have Range(Iter) ctor |
743 | size_type find(const Iter s) const { |
744 | return qfind(castToConst(), const_range_type(s)); |
745 | } |
746 | |
747 | // Works only for Range<(const) (unsigned) char*> which have Range(Iter) ctor |
748 | size_type find(const Iter s, size_t pos) const { |
749 | if (pos > size()) { |
750 | return std::string::npos; |
751 | } |
752 | size_type ret = qfind(castToConst().subpiece(pos), const_range_type(s)); |
753 | return ret == npos ? ret : ret + pos; |
754 | } |
755 | |
756 | size_type find(value_type c) const { |
757 | return qfind(castToConst(), c); |
758 | } |
759 | |
760 | size_type rfind(value_type c) const { |
761 | return folly::rfind(castToConst(), c); |
762 | } |
763 | |
764 | size_type find(value_type c, size_t pos) const { |
765 | if (pos > size()) { |
766 | return std::string::npos; |
767 | } |
768 | size_type ret = qfind(castToConst().subpiece(pos), c); |
769 | return ret == npos ? ret : ret + pos; |
770 | } |
771 | |
772 | size_type find_first_of(const_range_type needles) const { |
773 | return qfind_first_of(castToConst(), needles); |
774 | } |
775 | |
776 | size_type find_first_of(const_range_type needles, size_t pos) const { |
777 | if (pos > size()) { |
778 | return std::string::npos; |
779 | } |
780 | size_type ret = qfind_first_of(castToConst().subpiece(pos), needles); |
781 | return ret == npos ? ret : ret + pos; |
782 | } |
783 | |
784 | // Works only for Range<(const) (unsigned) char*> which have Range(Iter) ctor |
785 | size_type find_first_of(Iter needles) const { |
786 | return find_first_of(const_range_type(needles)); |
787 | } |
788 | |
789 | // Works only for Range<(const) (unsigned) char*> which have Range(Iter) ctor |
790 | size_type find_first_of(Iter needles, size_t pos) const { |
791 | return find_first_of(const_range_type(needles), pos); |
792 | } |
793 | |
794 | size_type find_first_of(Iter needles, size_t pos, size_t n) const { |
795 | return find_first_of(const_range_type(needles, n), pos); |
796 | } |
797 | |
798 | size_type find_first_of(value_type c) const { |
799 | return find(c); |
800 | } |
801 | |
802 | size_type find_first_of(value_type c, size_t pos) const { |
803 | return find(c, pos); |
804 | } |
805 | |
806 | /** |
807 | * Determine whether the range contains the given subrange or item. |
808 | * |
809 | * Note: Call find() directly if the index is needed. |
810 | */ |
811 | bool contains(const const_range_type& other) const { |
812 | return find(other) != std::string::npos; |
813 | } |
814 | |
815 | bool contains(const value_type& other) const { |
816 | return find(other) != std::string::npos; |
817 | } |
818 | |
819 | void swap(Range& rhs) { |
820 | std::swap(b_, rhs.b_); |
821 | std::swap(e_, rhs.e_); |
822 | } |
823 | |
824 | /** |
825 | * Does this Range start with another range? |
826 | */ |
827 | bool startsWith(const const_range_type& other) const { |
828 | return size() >= other.size() && |
829 | castToConst().subpiece(0, other.size()) == other; |
830 | } |
831 | bool startsWith(value_type c) const { |
832 | return !empty() && front() == c; |
833 | } |
834 | |
835 | template <class Comp> |
836 | bool startsWith(const const_range_type& other, Comp&& eq) const { |
837 | if (size() < other.size()) { |
838 | return false; |
839 | } |
840 | auto const trunc = subpiece(0, other.size()); |
841 | return std::equal( |
842 | trunc.begin(), trunc.end(), other.begin(), std::forward<Comp>(eq)); |
843 | } |
844 | |
845 | /** |
846 | * Does this Range end with another range? |
847 | */ |
848 | bool endsWith(const const_range_type& other) const { |
849 | return size() >= other.size() && |
850 | castToConst().subpiece(size() - other.size()) == other; |
851 | } |
852 | bool endsWith(value_type c) const { |
853 | return !empty() && back() == c; |
854 | } |
855 | |
856 | template <class Comp> |
857 | bool endsWith(const const_range_type& other, Comp&& eq) const { |
858 | if (size() < other.size()) { |
859 | return false; |
860 | } |
861 | auto const trunc = subpiece(size() - other.size()); |
862 | return std::equal( |
863 | trunc.begin(), trunc.end(), other.begin(), std::forward<Comp>(eq)); |
864 | } |
865 | |
866 | template <class Comp> |
867 | bool equals(const const_range_type& other, Comp&& eq) const { |
868 | return size() == other.size() && |
869 | std::equal(begin(), end(), other.begin(), std::forward<Comp>(eq)); |
870 | } |
871 | |
872 | /** |
873 | * Remove the items in [b, e), as long as this subrange is at the beginning |
874 | * or end of the Range. |
875 | * |
876 | * Required for boost::algorithm::trim() |
877 | */ |
878 | void erase(Iter b, Iter e) { |
879 | if (b == b_) { |
880 | b_ = e; |
881 | } else if (e == e_) { |
882 | e_ = b; |
883 | } else { |
884 | throw_exception<std::out_of_range>("index out of range" ); |
885 | } |
886 | } |
887 | |
888 | /** |
889 | * Remove the given prefix and return true if the range starts with the given |
890 | * prefix; return false otherwise. |
891 | */ |
892 | bool removePrefix(const const_range_type& prefix) { |
893 | return startsWith(prefix) && (b_ += prefix.size(), true); |
894 | } |
895 | bool removePrefix(value_type prefix) { |
896 | return startsWith(prefix) && (++b_, true); |
897 | } |
898 | |
899 | /** |
900 | * Remove the given suffix and return true if the range ends with the given |
901 | * suffix; return false otherwise. |
902 | */ |
903 | bool removeSuffix(const const_range_type& suffix) { |
904 | return endsWith(suffix) && (e_ -= suffix.size(), true); |
905 | } |
906 | bool removeSuffix(value_type suffix) { |
907 | return endsWith(suffix) && (--e_, true); |
908 | } |
909 | |
910 | /** |
911 | * Replaces the content of the range, starting at position 'pos', with |
912 | * contents of 'replacement'. Entire 'replacement' must fit into the |
913 | * range. Returns false if 'replacements' does not fit. Example use: |
914 | * |
915 | * char in[] = "buffer"; |
916 | * auto msp = MutablesStringPiece(input); |
917 | * EXPECT_TRUE(msp.replaceAt(2, "tt")); |
918 | * EXPECT_EQ(msp, "butter"); |
919 | * |
920 | * // not enough space |
921 | * EXPECT_FALSE(msp.replace(msp.size() - 1, "rr")); |
922 | * EXPECT_EQ(msp, "butter"); // unchanged |
923 | */ |
924 | bool replaceAt(size_t pos, const_range_type replacement) { |
925 | if (size() < pos + replacement.size()) { |
926 | return false; |
927 | } |
928 | |
929 | std::copy(replacement.begin(), replacement.end(), begin() + pos); |
930 | |
931 | return true; |
932 | } |
933 | |
934 | /** |
935 | * Replaces all occurences of 'source' with 'dest'. Returns number |
936 | * of replacements made. Source and dest have to have the same |
937 | * length. Throws if the lengths are different. If 'source' is a |
938 | * pattern that is overlapping with itself, we perform sequential |
939 | * replacement: "aaaaaaa".replaceAll("aa", "ba") --> "bababaa" |
940 | * |
941 | * Example use: |
942 | * |
943 | * char in[] = "buffer"; |
944 | * auto msp = MutablesStringPiece(input); |
945 | * EXPECT_EQ(msp.replaceAll("ff","tt"), 1); |
946 | * EXPECT_EQ(msp, "butter"); |
947 | */ |
948 | size_t replaceAll(const_range_type source, const_range_type dest) { |
949 | if (source.size() != dest.size()) { |
950 | throw_exception<std::invalid_argument>( |
951 | "replacement must have the same size as source" ); |
952 | } |
953 | |
954 | if (dest.empty()) { |
955 | return 0; |
956 | } |
957 | |
958 | size_t pos = 0; |
959 | size_t num_replaced = 0; |
960 | size_type found = std::string::npos; |
961 | while ((found = find(source, pos)) != std::string::npos) { |
962 | replaceAt(found, dest); |
963 | pos += source.size(); |
964 | ++num_replaced; |
965 | } |
966 | |
967 | return num_replaced; |
968 | } |
969 | |
970 | /** |
971 | * Splits this `Range` `[b, e)` in the position `i` dictated by the next |
972 | * occurence of `delimiter`. |
973 | * |
974 | * Returns a new `Range` `[b, i)` and adjusts this range to start right after |
975 | * the delimiter's position. This range will be empty if the delimiter is not |
976 | * found. If called on an empty `Range`, both this and the returned `Range` |
977 | * will be empty. |
978 | * |
979 | * Example: |
980 | * |
981 | * folly::StringPiece s("sample string for split_next"); |
982 | * auto p = s.split_step(' '); |
983 | * |
984 | * // prints "string for split_next" |
985 | * cout << s << endl; |
986 | * |
987 | * // prints "sample" |
988 | * cout << p << endl; |
989 | * |
990 | * Example 2: |
991 | * |
992 | * void tokenize(StringPiece s, char delimiter) { |
993 | * while (!s.empty()) { |
994 | * cout << s.split_step(delimiter); |
995 | * } |
996 | * } |
997 | * |
998 | * @author: Marcelo Juchem <marcelo@fb.com> |
999 | */ |
1000 | Range split_step(value_type delimiter) { |
1001 | auto i = std::find(b_, e_, delimiter); |
1002 | Range result(b_, i); |
1003 | |
1004 | b_ = i == e_ ? e_ : std::next(i); |
1005 | |
1006 | return result; |
1007 | } |
1008 | |
1009 | Range split_step(Range delimiter) { |
1010 | auto i = find(delimiter); |
1011 | Range result(b_, i == std::string::npos ? size() : i); |
1012 | |
1013 | b_ = result.end() == e_ |
1014 | ? e_ |
1015 | : std::next( |
1016 | result.end(), |
1017 | typename std::iterator_traits<Iter>::difference_type( |
1018 | delimiter.size())); |
1019 | |
1020 | return result; |
1021 | } |
1022 | |
1023 | /** |
1024 | * Convenience method that calls `split_step()` and passes the result to a |
1025 | * functor, returning whatever the functor does. Any additional arguments |
1026 | * `args` passed to this function are perfectly forwarded to the functor. |
1027 | * |
1028 | * Say you have a functor with this signature: |
1029 | * |
1030 | * Foo fn(Range r) { } |
1031 | * |
1032 | * `split_step()`'s return type will be `Foo`. It works just like: |
1033 | * |
1034 | * auto result = fn(myRange.split_step(' ')); |
1035 | * |
1036 | * A functor returning `void` is also supported. |
1037 | * |
1038 | * Example: |
1039 | * |
1040 | * void do_some_parsing(folly::StringPiece s) { |
1041 | * auto version = s.split_step(' ', [&](folly::StringPiece x) { |
1042 | * if (x.empty()) { |
1043 | * throw std::invalid_argument("empty string"); |
1044 | * } |
1045 | * return std::strtoull(x.begin(), x.end(), 16); |
1046 | * }); |
1047 | * |
1048 | * // ... |
1049 | * } |
1050 | * |
1051 | * struct Foo { |
1052 | * void parse(folly::StringPiece s) { |
1053 | * s.split_step(' ', parse_field, bar, 10); |
1054 | * s.split_step('\t', parse_field, baz, 20); |
1055 | * |
1056 | * auto const kludge = [](folly::StringPiece x, int &out, int def) { |
1057 | * if (x == "null") { |
1058 | * out = 0; |
1059 | * } else { |
1060 | * parse_field(x, out, def); |
1061 | * } |
1062 | * }; |
1063 | * |
1064 | * s.split_step('\t', kludge, gaz); |
1065 | * s.split_step(' ', kludge, foo); |
1066 | * } |
1067 | * |
1068 | * private: |
1069 | * int bar; |
1070 | * int baz; |
1071 | * int gaz; |
1072 | * int foo; |
1073 | * |
1074 | * static parse_field(folly::StringPiece s, int &out, int def) { |
1075 | * try { |
1076 | * out = folly::to<int>(s); |
1077 | * } catch (std::exception const &) { |
1078 | * value = def; |
1079 | * } |
1080 | * } |
1081 | * }; |
1082 | * |
1083 | * @author: Marcelo Juchem <marcelo@fb.com> |
1084 | */ |
1085 | template <typename TProcess, typename... Args> |
1086 | auto split_step(value_type delimiter, TProcess&& process, Args&&... args) |
1087 | -> decltype(process(std::declval<Range>(), std::forward<Args>(args)...)) { |
1088 | return process(split_step(delimiter), std::forward<Args>(args)...); |
1089 | } |
1090 | |
1091 | template <typename TProcess, typename... Args> |
1092 | auto split_step(Range delimiter, TProcess&& process, Args&&... args) |
1093 | -> decltype(process(std::declval<Range>(), std::forward<Args>(args)...)) { |
1094 | return process(split_step(delimiter), std::forward<Args>(args)...); |
1095 | } |
1096 | |
1097 | private: |
1098 | Iter b_, e_; |
1099 | }; |
1100 | |
1101 | template <class Iter> |
1102 | const typename Range<Iter>::size_type Range<Iter>::npos = std::string::npos; |
1103 | |
1104 | template <class Iter> |
1105 | void swap(Range<Iter>& lhs, Range<Iter>& rhs) { |
1106 | lhs.swap(rhs); |
1107 | } |
1108 | |
1109 | /** |
1110 | * Create a range from two iterators, with type deduction. |
1111 | */ |
1112 | template <class Iter> |
1113 | constexpr Range<Iter> range(Iter first, Iter last) { |
1114 | return Range<Iter>(first, last); |
1115 | } |
1116 | |
1117 | /* |
1118 | * Creates a range to reference the contents of a contiguous-storage container. |
1119 | */ |
1120 | // Use pointers for types with '.data()' member |
1121 | template <class Collection> |
1122 | constexpr auto range(Collection& v) -> Range<decltype(v.data())> { |
1123 | return Range<decltype(v.data())>(v.data(), v.data() + v.size()); |
1124 | } |
1125 | template <class Collection> |
1126 | constexpr auto range(Collection const& v) -> Range<decltype(v.data())> { |
1127 | return Range<decltype(v.data())>(v.data(), v.data() + v.size()); |
1128 | } |
1129 | template <class Collection> |
1130 | constexpr auto crange(Collection const& v) -> Range<decltype(v.data())> { |
1131 | return Range<decltype(v.data())>(v.data(), v.data() + v.size()); |
1132 | } |
1133 | |
1134 | template <class T, size_t n> |
1135 | constexpr Range<T*> range(T (&array)[n]) { |
1136 | return Range<T*>(array, array + n); |
1137 | } |
1138 | template <class T, size_t n> |
1139 | constexpr Range<T const*> range(T const (&array)[n]) { |
1140 | return Range<T const*>(array, array + n); |
1141 | } |
1142 | template <class T, size_t n> |
1143 | constexpr Range<T const*> crange(T const (&array)[n]) { |
1144 | return Range<T const*>(array, array + n); |
1145 | } |
1146 | |
1147 | template <class T, size_t n> |
1148 | constexpr Range<T*> range(std::array<T, n>& array) { |
1149 | return Range<T*>{array}; |
1150 | } |
1151 | template <class T, size_t n> |
1152 | constexpr Range<T const*> range(std::array<T, n> const& array) { |
1153 | return Range<T const*>{array}; |
1154 | } |
1155 | template <class T, size_t n> |
1156 | constexpr Range<T const*> crange(std::array<T, n> const& array) { |
1157 | return Range<T const*>{array}; |
1158 | } |
1159 | |
1160 | typedef Range<const char*> StringPiece; |
1161 | typedef Range<char*> MutableStringPiece; |
1162 | typedef Range<const unsigned char*> ByteRange; |
1163 | typedef Range<unsigned char*> MutableByteRange; |
1164 | |
1165 | template <class C> |
1166 | std::basic_ostream<C>& operator<<( |
1167 | std::basic_ostream<C>& os, |
1168 | Range<C const*> piece) { |
1169 | using StreamSize = decltype(os.width()); |
1170 | os.write(piece.start(), static_cast<StreamSize>(piece.size())); |
1171 | return os; |
1172 | } |
1173 | |
1174 | template <class C> |
1175 | std::basic_ostream<C>& operator<<(std::basic_ostream<C>& os, Range<C*> piece) { |
1176 | using StreamSize = decltype(os.width()); |
1177 | os.write(piece.start(), static_cast<StreamSize>(piece.size())); |
1178 | return os; |
1179 | } |
1180 | |
1181 | /** |
1182 | * Templated comparison operators |
1183 | */ |
1184 | |
1185 | template <class Iter> |
1186 | inline bool operator==(const Range<Iter>& lhs, const Range<Iter>& rhs) { |
1187 | return lhs.size() == rhs.size() && lhs.compare(rhs) == 0; |
1188 | } |
1189 | |
1190 | template <class Iter> |
1191 | inline bool operator!=(const Range<Iter>& lhs, const Range<Iter>& rhs) { |
1192 | return !(operator==(lhs, rhs)); |
1193 | } |
1194 | |
1195 | template <class Iter> |
1196 | inline bool operator<(const Range<Iter>& lhs, const Range<Iter>& rhs) { |
1197 | return lhs.compare(rhs) < 0; |
1198 | } |
1199 | |
1200 | template <class Iter> |
1201 | inline bool operator<=(const Range<Iter>& lhs, const Range<Iter>& rhs) { |
1202 | return lhs.compare(rhs) <= 0; |
1203 | } |
1204 | |
1205 | template <class Iter> |
1206 | inline bool operator>(const Range<Iter>& lhs, const Range<Iter>& rhs) { |
1207 | return lhs.compare(rhs) > 0; |
1208 | } |
1209 | |
1210 | template <class Iter> |
1211 | inline bool operator>=(const Range<Iter>& lhs, const Range<Iter>& rhs) { |
1212 | return lhs.compare(rhs) >= 0; |
1213 | } |
1214 | |
1215 | /** |
1216 | * Specializations of comparison operators for StringPiece |
1217 | */ |
1218 | |
1219 | namespace detail { |
1220 | |
1221 | template <class A, class B> |
1222 | struct ComparableAsStringPiece { |
1223 | enum { |
1224 | value = (std::is_convertible<A, StringPiece>::value && |
1225 | std::is_same<B, StringPiece>::value) || |
1226 | (std::is_convertible<B, StringPiece>::value && |
1227 | std::is_same<A, StringPiece>::value) |
1228 | }; |
1229 | }; |
1230 | |
1231 | } // namespace detail |
1232 | |
1233 | /** |
1234 | * operator== through conversion for Range<const char*> |
1235 | */ |
1236 | template <class T, class U> |
1237 | std::enable_if_t<detail::ComparableAsStringPiece<T, U>::value, bool> operator==( |
1238 | const T& lhs, |
1239 | const U& rhs) { |
1240 | return StringPiece(lhs) == StringPiece(rhs); |
1241 | } |
1242 | |
1243 | /** |
1244 | * operator!= through conversion for Range<const char*> |
1245 | */ |
1246 | template <class T, class U> |
1247 | std::enable_if_t<detail::ComparableAsStringPiece<T, U>::value, bool> operator!=( |
1248 | const T& lhs, |
1249 | const U& rhs) { |
1250 | return StringPiece(lhs) != StringPiece(rhs); |
1251 | } |
1252 | |
1253 | /** |
1254 | * operator< through conversion for Range<const char*> |
1255 | */ |
1256 | template <class T, class U> |
1257 | std::enable_if_t<detail::ComparableAsStringPiece<T, U>::value, bool> operator<( |
1258 | const T& lhs, |
1259 | const U& rhs) { |
1260 | return StringPiece(lhs) < StringPiece(rhs); |
1261 | } |
1262 | |
1263 | /** |
1264 | * operator> through conversion for Range<const char*> |
1265 | */ |
1266 | template <class T, class U> |
1267 | std::enable_if_t<detail::ComparableAsStringPiece<T, U>::value, bool> operator>( |
1268 | const T& lhs, |
1269 | const U& rhs) { |
1270 | return StringPiece(lhs) > StringPiece(rhs); |
1271 | } |
1272 | |
1273 | /** |
1274 | * operator< through conversion for Range<const char*> |
1275 | */ |
1276 | template <class T, class U> |
1277 | std::enable_if_t<detail::ComparableAsStringPiece<T, U>::value, bool> operator<=( |
1278 | const T& lhs, |
1279 | const U& rhs) { |
1280 | return StringPiece(lhs) <= StringPiece(rhs); |
1281 | } |
1282 | |
1283 | /** |
1284 | * operator> through conversion for Range<const char*> |
1285 | */ |
1286 | template <class T, class U> |
1287 | std::enable_if_t<detail::ComparableAsStringPiece<T, U>::value, bool> operator>=( |
1288 | const T& lhs, |
1289 | const U& rhs) { |
1290 | return StringPiece(lhs) >= StringPiece(rhs); |
1291 | } |
1292 | |
1293 | /** |
1294 | * Finds substrings faster than brute force by borrowing from Boyer-Moore |
1295 | */ |
1296 | template <class Iter, class Comp> |
1297 | size_t qfind(const Range<Iter>& haystack, const Range<Iter>& needle, Comp eq) { |
1298 | // Don't use std::search, use a Boyer-Moore-like trick by comparing |
1299 | // the last characters first |
1300 | auto const nsize = needle.size(); |
1301 | if (haystack.size() < nsize) { |
1302 | return std::string::npos; |
1303 | } |
1304 | if (!nsize) { |
1305 | return 0; |
1306 | } |
1307 | auto const nsize_1 = nsize - 1; |
1308 | auto const lastNeedle = needle[nsize_1]; |
1309 | |
1310 | // Boyer-Moore skip value for the last char in the needle. Zero is |
1311 | // not a valid value; skip will be computed the first time it's |
1312 | // needed. |
1313 | std::string::size_type skip = 0; |
1314 | |
1315 | auto i = haystack.begin(); |
1316 | auto iEnd = haystack.end() - nsize_1; |
1317 | |
1318 | while (i < iEnd) { |
1319 | // Boyer-Moore: match the last element in the needle |
1320 | while (!eq(i[nsize_1], lastNeedle)) { |
1321 | if (++i == iEnd) { |
1322 | // not found |
1323 | return std::string::npos; |
1324 | } |
1325 | } |
1326 | // Here we know that the last char matches |
1327 | // Continue in pedestrian mode |
1328 | for (size_t j = 0;;) { |
1329 | assert(j < nsize); |
1330 | if (!eq(i[j], needle[j])) { |
1331 | // Not found, we can skip |
1332 | // Compute the skip value lazily |
1333 | if (skip == 0) { |
1334 | skip = 1; |
1335 | while (skip <= nsize_1 && !eq(needle[nsize_1 - skip], lastNeedle)) { |
1336 | ++skip; |
1337 | } |
1338 | } |
1339 | i += skip; |
1340 | break; |
1341 | } |
1342 | // Check if done searching |
1343 | if (++j == nsize) { |
1344 | // Yay |
1345 | return size_t(i - haystack.begin()); |
1346 | } |
1347 | } |
1348 | } |
1349 | return std::string::npos; |
1350 | } |
1351 | |
1352 | namespace detail { |
1353 | |
1354 | inline size_t qfind_first_byte_of( |
1355 | const StringPiece haystack, |
1356 | const StringPiece needles) { |
1357 | static auto const qfind_first_byte_of_fn = folly::CpuId().sse42() |
1358 | ? qfind_first_byte_of_sse42 |
1359 | : qfind_first_byte_of_nosse; |
1360 | return qfind_first_byte_of_fn(haystack, needles); |
1361 | } |
1362 | |
1363 | } // namespace detail |
1364 | |
1365 | template <class Iter, class Comp> |
1366 | size_t qfind_first_of( |
1367 | const Range<Iter>& haystack, |
1368 | const Range<Iter>& needles, |
1369 | Comp eq) { |
1370 | auto ret = std::find_first_of( |
1371 | haystack.begin(), haystack.end(), needles.begin(), needles.end(), eq); |
1372 | return ret == haystack.end() ? std::string::npos : ret - haystack.begin(); |
1373 | } |
1374 | |
1375 | struct AsciiCaseSensitive { |
1376 | bool operator()(char lhs, char rhs) const { |
1377 | return lhs == rhs; |
1378 | } |
1379 | }; |
1380 | |
1381 | /** |
1382 | * Check if two ascii characters are case insensitive equal. |
1383 | * The difference between the lower/upper case characters are the 6-th bit. |
1384 | * We also check they are alpha chars, in case of xor = 32. |
1385 | */ |
1386 | struct AsciiCaseInsensitive { |
1387 | bool operator()(char lhs, char rhs) const { |
1388 | char k = lhs ^ rhs; |
1389 | if (k == 0) { |
1390 | return true; |
1391 | } |
1392 | if (k != 32) { |
1393 | return false; |
1394 | } |
1395 | k = lhs | rhs; |
1396 | return (k >= 'a' && k <= 'z'); |
1397 | } |
1398 | }; |
1399 | |
1400 | template <class Iter> |
1401 | size_t qfind( |
1402 | const Range<Iter>& haystack, |
1403 | const typename Range<Iter>::value_type& needle) { |
1404 | auto pos = std::find(haystack.begin(), haystack.end(), needle); |
1405 | return pos == haystack.end() ? std::string::npos : pos - haystack.data(); |
1406 | } |
1407 | |
1408 | template <class Iter> |
1409 | size_t rfind( |
1410 | const Range<Iter>& haystack, |
1411 | const typename Range<Iter>::value_type& needle) { |
1412 | for (auto i = haystack.size(); i-- > 0;) { |
1413 | if (haystack[i] == needle) { |
1414 | return i; |
1415 | } |
1416 | } |
1417 | return std::string::npos; |
1418 | } |
1419 | |
1420 | // specialization for StringPiece |
1421 | template <> |
1422 | inline size_t qfind(const Range<const char*>& haystack, const char& needle) { |
1423 | // memchr expects a not-null pointer, early return if the range is empty. |
1424 | if (haystack.empty()) { |
1425 | return std::string::npos; |
1426 | } |
1427 | auto pos = static_cast<const char*>( |
1428 | ::memchr(haystack.data(), needle, haystack.size())); |
1429 | return pos == nullptr ? std::string::npos : pos - haystack.data(); |
1430 | } |
1431 | |
1432 | template <> |
1433 | inline size_t rfind(const Range<const char*>& haystack, const char& needle) { |
1434 | // memchr expects a not-null pointer, early return if the range is empty. |
1435 | if (haystack.empty()) { |
1436 | return std::string::npos; |
1437 | } |
1438 | auto pos = static_cast<const char*>( |
1439 | ::memrchr(haystack.data(), needle, haystack.size())); |
1440 | return pos == nullptr ? std::string::npos : pos - haystack.data(); |
1441 | } |
1442 | |
1443 | // specialization for ByteRange |
1444 | template <> |
1445 | inline size_t qfind( |
1446 | const Range<const unsigned char*>& haystack, |
1447 | const unsigned char& needle) { |
1448 | // memchr expects a not-null pointer, early return if the range is empty. |
1449 | if (haystack.empty()) { |
1450 | return std::string::npos; |
1451 | } |
1452 | auto pos = static_cast<const unsigned char*>( |
1453 | ::memchr(haystack.data(), needle, haystack.size())); |
1454 | return pos == nullptr ? std::string::npos : pos - haystack.data(); |
1455 | } |
1456 | |
1457 | template <> |
1458 | inline size_t rfind( |
1459 | const Range<const unsigned char*>& haystack, |
1460 | const unsigned char& needle) { |
1461 | // memchr expects a not-null pointer, early return if the range is empty. |
1462 | if (haystack.empty()) { |
1463 | return std::string::npos; |
1464 | } |
1465 | auto pos = static_cast<const unsigned char*>( |
1466 | ::memrchr(haystack.data(), needle, haystack.size())); |
1467 | return pos == nullptr ? std::string::npos : pos - haystack.data(); |
1468 | } |
1469 | |
1470 | template <class Iter> |
1471 | size_t qfind_first_of(const Range<Iter>& haystack, const Range<Iter>& needles) { |
1472 | return qfind_first_of(haystack, needles, AsciiCaseSensitive()); |
1473 | } |
1474 | |
1475 | // specialization for StringPiece |
1476 | template <> |
1477 | inline size_t qfind_first_of( |
1478 | const Range<const char*>& haystack, |
1479 | const Range<const char*>& needles) { |
1480 | return detail::qfind_first_byte_of(haystack, needles); |
1481 | } |
1482 | |
1483 | // specialization for ByteRange |
1484 | template <> |
1485 | inline size_t qfind_first_of( |
1486 | const Range<const unsigned char*>& haystack, |
1487 | const Range<const unsigned char*>& needles) { |
1488 | return detail::qfind_first_byte_of( |
1489 | StringPiece(haystack), StringPiece(needles)); |
1490 | } |
1491 | |
1492 | template <class Key, class Enable> |
1493 | struct hasher; |
1494 | |
1495 | template <class T> |
1496 | struct hasher< |
1497 | folly::Range<T*>, |
1498 | std::enable_if_t<std::is_integral<T>::value, void>> { |
1499 | using folly_is_avalanching = std::true_type; |
1500 | |
1501 | size_t operator()(folly::Range<T*> r) const { |
1502 | // std::is_integral<T> is too restrictive, but is sufficient to |
1503 | // guarantee we can just hash all of the underlying bytes to get a |
1504 | // suitable hash of T. Something like absl::is_uniquely_represented<T> |
1505 | // would be better. std::is_pod is not enough, because POD types |
1506 | // can contain pointers and padding. Also, floating point numbers |
1507 | // may be == without being bit-identical. |
1508 | return hash::SpookyHashV2::Hash64(r.begin(), r.size() * sizeof(T), 0); |
1509 | } |
1510 | }; |
1511 | |
1512 | /** |
1513 | * _sp is a user-defined literal suffix to make an appropriate Range |
1514 | * specialization from a literal string. |
1515 | * |
1516 | * Modeled after C++17's `sv` suffix. |
1517 | */ |
1518 | inline namespace literals { |
1519 | inline namespace string_piece_literals { |
1520 | constexpr Range<char const*> operator"" _sp( |
1521 | char const* str, |
1522 | size_t len) noexcept { |
1523 | return Range<char const*>(str, len); |
1524 | } |
1525 | |
1526 | constexpr Range<char16_t const*> operator"" _sp( |
1527 | char16_t const* str, |
1528 | size_t len) noexcept { |
1529 | return Range<char16_t const*>(str, len); |
1530 | } |
1531 | |
1532 | constexpr Range<char32_t const*> operator"" _sp( |
1533 | char32_t const* str, |
1534 | size_t len) noexcept { |
1535 | return Range<char32_t const*>(str, len); |
1536 | } |
1537 | |
1538 | constexpr Range<wchar_t const*> operator"" _sp( |
1539 | wchar_t const* str, |
1540 | size_t len) noexcept { |
1541 | return Range<wchar_t const*>(str, len); |
1542 | } |
1543 | } // namespace string_piece_literals |
1544 | } // namespace literals |
1545 | |
1546 | } // namespace folly |
1547 | |
1548 | FOLLY_POP_WARNING |
1549 | |
1550 | FOLLY_ASSUME_FBVECTOR_COMPATIBLE_1(folly::Range) |
1551 | |