1//===- StringRef.h - Constant String Reference Wrapper ----------*- C++ -*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9
10#ifndef LLVM_ADT_STRINGREF_H
11#define LLVM_ADT_STRINGREF_H
12
13#include "llvm/ADT/STLExtras.h"
14#include "llvm/ADT/iterator_range.h"
15#include "llvm/Support/Compiler.h"
16#include <algorithm>
17#include <cassert>
18#include <cstddef>
19#include <cstring>
20#include <limits>
21#include <string>
22#include <type_traits>
23#include <utility>
24
25namespace llvm {
26
27 class APInt;
28 class hash_code;
29 template <typename T> class SmallVectorImpl;
30 class StringRef;
31
32 /// Helper functions for StringRef::getAsInteger.
33 bool getAsUnsignedInteger(StringRef Str, unsigned Radix,
34 unsigned long long &Result);
35
36 bool getAsSignedInteger(StringRef Str, unsigned Radix, long long &Result);
37
38 bool consumeUnsignedInteger(StringRef &Str, unsigned Radix,
39 unsigned long long &Result);
40 bool consumeSignedInteger(StringRef &Str, unsigned Radix, long long &Result);
41
42 /// StringRef - Represent a constant reference to a string, i.e. a character
43 /// array and a length, which need not be null terminated.
44 ///
45 /// This class does not own the string data, it is expected to be used in
46 /// situations where the character data resides in some other buffer, whose
47 /// lifetime extends past that of the StringRef. For this reason, it is not in
48 /// general safe to store a StringRef.
49 class StringRef {
50 public:
51 static const size_t npos = ~size_t(0);
52
53 using iterator = const char *;
54 using const_iterator = const char *;
55 using size_type = size_t;
56
57 private:
58 /// The start of the string, in an external buffer.
59 const char *Data = nullptr;
60
61 /// The length of the string.
62 size_t Length = 0;
63
64 // Workaround memcmp issue with null pointers (undefined behavior)
65 // by providing a specialized version
66 LLVM_ATTRIBUTE_ALWAYS_INLINE
67 static int compareMemory(const char *Lhs, const char *Rhs, size_t Length) {
68 if (Length == 0) { return 0; }
69 return ::memcmp(Lhs,Rhs,Length);
70 }
71
72 public:
73 /// @name Constructors
74 /// @{
75
76 /// Construct an empty string ref.
77 /*implicit*/ StringRef() = default;
78
79 /// Disable conversion from nullptr. This prevents things like
80 /// if (S == nullptr)
81 StringRef(std::nullptr_t) = delete;
82
83 /// Construct a string ref from a cstring.
84 LLVM_ATTRIBUTE_ALWAYS_INLINE
85 /*implicit*/ StringRef(const char *Str)
86 : Data(Str), Length(Str ? ::strlen(Str) : 0) {}
87
88 /// Construct a string ref from a pointer and length.
89 LLVM_ATTRIBUTE_ALWAYS_INLINE
90 /*implicit*/ constexpr StringRef(const char *data, size_t length)
91 : Data(data), Length(length) {}
92
93 /// Construct a string ref from an std::string.
94 LLVM_ATTRIBUTE_ALWAYS_INLINE
95 /*implicit*/ StringRef(const std::string &Str)
96 : Data(Str.data()), Length(Str.length()) {}
97
98 static StringRef withNullAsEmpty(const char *data) {
99 return StringRef(data ? data : "");
100 }
101
102 /// @}
103 /// @name Iterators
104 /// @{
105
106 iterator begin() const { return Data; }
107
108 iterator end() const { return Data + Length; }
109
110 const unsigned char *bytes_begin() const {
111 return reinterpret_cast<const unsigned char *>(begin());
112 }
113 const unsigned char *bytes_end() const {
114 return reinterpret_cast<const unsigned char *>(end());
115 }
116 iterator_range<const unsigned char *> bytes() const {
117 return make_range(bytes_begin(), bytes_end());
118 }
119
120 /// @}
121 /// @name String Operations
122 /// @{
123
124 /// data - Get a pointer to the start of the string (which may not be null
125 /// terminated).
126 LLVM_NODISCARD
127 LLVM_ATTRIBUTE_ALWAYS_INLINE
128 const char *data() const { return Data; }
129
130 /// empty - Check if the string is empty.
131 LLVM_NODISCARD
132 LLVM_ATTRIBUTE_ALWAYS_INLINE
133 bool empty() const { return Length == 0; }
134
135 /// size - Get the string size.
136 LLVM_NODISCARD
137 LLVM_ATTRIBUTE_ALWAYS_INLINE
138 size_t size() const { return Length; }
139
140 /// front - Get the first character in the string.
141 LLVM_NODISCARD
142 char front() const {
143 assert(!empty());
144 return Data[0];
145 }
146
147 /// back - Get the last character in the string.
148 LLVM_NODISCARD
149 char back() const {
150 assert(!empty());
151 return Data[Length-1];
152 }
153
154 // copy - Allocate copy in Allocator and return StringRef to it.
155 template <typename Allocator>
156 LLVM_NODISCARD StringRef copy(Allocator &A) const {
157 // Don't request a length 0 copy from the allocator.
158 if (empty())
159 return StringRef();
160 char *S = A.template Allocate<char>(Length);
161 std::copy(begin(), end(), S);
162 return StringRef(S, Length);
163 }
164
165 /// equals - Check for string equality, this is more efficient than
166 /// compare() when the relative ordering of inequal strings isn't needed.
167 LLVM_NODISCARD
168 LLVM_ATTRIBUTE_ALWAYS_INLINE
169 bool equals(StringRef RHS) const {
170 return (Length == RHS.Length &&
171 compareMemory(Data, RHS.Data, RHS.Length) == 0);
172 }
173
174 /// equals_lower - Check for string equality, ignoring case.
175 LLVM_NODISCARD
176 bool equals_lower(StringRef RHS) const {
177 return Length == RHS.Length && compare_lower(RHS) == 0;
178 }
179
180 /// compare - Compare two strings; the result is -1, 0, or 1 if this string
181 /// is lexicographically less than, equal to, or greater than the \p RHS.
182 LLVM_NODISCARD
183 LLVM_ATTRIBUTE_ALWAYS_INLINE
184 int compare(StringRef RHS) const {
185 // Check the prefix for a mismatch.
186 if (int Res = compareMemory(Data, RHS.Data, std::min(Length, RHS.Length)))
187 return Res < 0 ? -1 : 1;
188
189 // Otherwise the prefixes match, so we only need to check the lengths.
190 if (Length == RHS.Length)
191 return 0;
192 return Length < RHS.Length ? -1 : 1;
193 }
194
195 /// compare_lower - Compare two strings, ignoring case.
196 LLVM_NODISCARD
197 int compare_lower(StringRef RHS) const;
198
199 /// compare_numeric - Compare two strings, treating sequences of digits as
200 /// numbers.
201 LLVM_NODISCARD
202 int compare_numeric(StringRef RHS) const;
203
204 /// Determine the edit distance between this string and another
205 /// string.
206 ///
207 /// \param Other the string to compare this string against.
208 ///
209 /// \param AllowReplacements whether to allow character
210 /// replacements (change one character into another) as a single
211 /// operation, rather than as two operations (an insertion and a
212 /// removal).
213 ///
214 /// \param MaxEditDistance If non-zero, the maximum edit distance that
215 /// this routine is allowed to compute. If the edit distance will exceed
216 /// that maximum, returns \c MaxEditDistance+1.
217 ///
218 /// \returns the minimum number of character insertions, removals,
219 /// or (if \p AllowReplacements is \c true) replacements needed to
220 /// transform one of the given strings into the other. If zero,
221 /// the strings are identical.
222 LLVM_NODISCARD
223 unsigned edit_distance(StringRef Other, bool AllowReplacements = true,
224 unsigned MaxEditDistance = 0) const;
225
226 /// str - Get the contents as an std::string.
227 LLVM_NODISCARD
228 std::string str() const {
229 if (!Data) return std::string();
230 return std::string(Data, Length);
231 }
232
233 /// @}
234 /// @name Operator Overloads
235 /// @{
236
237 LLVM_NODISCARD
238 char operator[](size_t Index) const {
239 assert(Index < Length && "Invalid index!");
240 return Data[Index];
241 }
242
243 /// Disallow accidental assignment from a temporary std::string.
244 ///
245 /// The declaration here is extra complicated so that `stringRef = {}`
246 /// and `stringRef = "abc"` continue to select the move assignment operator.
247 template <typename T>
248 typename std::enable_if<std::is_same<T, std::string>::value,
249 StringRef>::type &
250 operator=(T &&Str) = delete;
251
252 /// @}
253 /// @name Type Conversions
254 /// @{
255
256 operator std::string() const {
257 return str();
258 }
259
260 /// @}
261 /// @name String Predicates
262 /// @{
263
264 /// Check if this string starts with the given \p Prefix.
265 LLVM_NODISCARD
266 LLVM_ATTRIBUTE_ALWAYS_INLINE
267 bool startswith(StringRef Prefix) const {
268 return Length >= Prefix.Length &&
269 compareMemory(Data, Prefix.Data, Prefix.Length) == 0;
270 }
271
272 /// Check if this string starts with the given \p Prefix, ignoring case.
273 LLVM_NODISCARD
274 bool startswith_lower(StringRef Prefix) const;
275
276 /// Check if this string ends with the given \p Suffix.
277 LLVM_NODISCARD
278 LLVM_ATTRIBUTE_ALWAYS_INLINE
279 bool endswith(StringRef Suffix) const {
280 return Length >= Suffix.Length &&
281 compareMemory(end() - Suffix.Length, Suffix.Data, Suffix.Length) == 0;
282 }
283
284 /// Check if this string ends with the given \p Suffix, ignoring case.
285 LLVM_NODISCARD
286 bool endswith_lower(StringRef Suffix) const;
287
288 /// @}
289 /// @name String Searching
290 /// @{
291
292 /// Search for the first character \p C in the string.
293 ///
294 /// \returns The index of the first occurrence of \p C, or npos if not
295 /// found.
296 LLVM_NODISCARD
297 LLVM_ATTRIBUTE_ALWAYS_INLINE
298 size_t find(char C, size_t From = 0) const {
299 size_t FindBegin = std::min(From, Length);
300 if (FindBegin < Length) { // Avoid calling memchr with nullptr.
301 // Just forward to memchr, which is faster than a hand-rolled loop.
302 if (const void *P = ::memchr(Data + FindBegin, C, Length - FindBegin))
303 return static_cast<const char *>(P) - Data;
304 }
305 return npos;
306 }
307
308 /// Search for the first character \p C in the string, ignoring case.
309 ///
310 /// \returns The index of the first occurrence of \p C, or npos if not
311 /// found.
312 LLVM_NODISCARD
313 size_t find_lower(char C, size_t From = 0) const;
314
315 /// Search for the first character satisfying the predicate \p F
316 ///
317 /// \returns The index of the first character satisfying \p F starting from
318 /// \p From, or npos if not found.
319 LLVM_NODISCARD
320 LLVM_ATTRIBUTE_ALWAYS_INLINE
321 size_t find_if(function_ref<bool(char)> F, size_t From = 0) const {
322 StringRef S = drop_front(From);
323 while (!S.empty()) {
324 if (F(S.front()))
325 return size() - S.size();
326 S = S.drop_front();
327 }
328 return npos;
329 }
330
331 /// Search for the first character not satisfying the predicate \p F
332 ///
333 /// \returns The index of the first character not satisfying \p F starting
334 /// from \p From, or npos if not found.
335 LLVM_NODISCARD
336 LLVM_ATTRIBUTE_ALWAYS_INLINE
337 size_t find_if_not(function_ref<bool(char)> F, size_t From = 0) const {
338 return find_if([F](char c) { return !F(c); }, From);
339 }
340
341 /// Search for the first string \p Str in the string.
342 ///
343 /// \returns The index of the first occurrence of \p Str, or npos if not
344 /// found.
345 LLVM_NODISCARD
346 size_t find(StringRef Str, size_t From = 0) const;
347
348 /// Search for the first string \p Str in the string, ignoring case.
349 ///
350 /// \returns The index of the first occurrence of \p Str, or npos if not
351 /// found.
352 LLVM_NODISCARD
353 size_t find_lower(StringRef Str, size_t From = 0) const;
354
355 /// Search for the last character \p C in the string.
356 ///
357 /// \returns The index of the last occurrence of \p C, or npos if not
358 /// found.
359 LLVM_NODISCARD
360 size_t rfind(char C, size_t From = npos) const {
361 From = std::min(From, Length);
362 size_t i = From;
363 while (i != 0) {
364 --i;
365 if (Data[i] == C)
366 return i;
367 }
368 return npos;
369 }
370
371 /// Search for the last character \p C in the string, ignoring case.
372 ///
373 /// \returns The index of the last occurrence of \p C, or npos if not
374 /// found.
375 LLVM_NODISCARD
376 size_t rfind_lower(char C, size_t From = npos) const;
377
378 /// Search for the last string \p Str in the string.
379 ///
380 /// \returns The index of the last occurrence of \p Str, or npos if not
381 /// found.
382 LLVM_NODISCARD
383 size_t rfind(StringRef Str) const;
384
385 /// Search for the last string \p Str in the string, ignoring case.
386 ///
387 /// \returns The index of the last occurrence of \p Str, or npos if not
388 /// found.
389 LLVM_NODISCARD
390 size_t rfind_lower(StringRef Str) const;
391
392 /// Find the first character in the string that is \p C, or npos if not
393 /// found. Same as find.
394 LLVM_NODISCARD
395 size_t find_first_of(char C, size_t From = 0) const {
396 return find(C, From);
397 }
398
399 /// Find the first character in the string that is in \p Chars, or npos if
400 /// not found.
401 ///
402 /// Complexity: O(size() + Chars.size())
403 LLVM_NODISCARD
404 size_t find_first_of(StringRef Chars, size_t From = 0) const;
405
406 /// Find the first character in the string that is not \p C or npos if not
407 /// found.
408 LLVM_NODISCARD
409 size_t find_first_not_of(char C, size_t From = 0) const;
410
411 /// Find the first character in the string that is not in the string
412 /// \p Chars, or npos if not found.
413 ///
414 /// Complexity: O(size() + Chars.size())
415 LLVM_NODISCARD
416 size_t find_first_not_of(StringRef Chars, size_t From = 0) const;
417
418 /// Find the last character in the string that is \p C, or npos if not
419 /// found.
420 LLVM_NODISCARD
421 size_t find_last_of(char C, size_t From = npos) const {
422 return rfind(C, From);
423 }
424
425 /// Find the last character in the string that is in \p C, or npos if not
426 /// found.
427 ///
428 /// Complexity: O(size() + Chars.size())
429 LLVM_NODISCARD
430 size_t find_last_of(StringRef Chars, size_t From = npos) const;
431
432 /// Find the last character in the string that is not \p C, or npos if not
433 /// found.
434 LLVM_NODISCARD
435 size_t find_last_not_of(char C, size_t From = npos) const;
436
437 /// Find the last character in the string that is not in \p Chars, or
438 /// npos if not found.
439 ///
440 /// Complexity: O(size() + Chars.size())
441 LLVM_NODISCARD
442 size_t find_last_not_of(StringRef Chars, size_t From = npos) const;
443
444 /// Return true if the given string is a substring of *this, and false
445 /// otherwise.
446 LLVM_NODISCARD
447 LLVM_ATTRIBUTE_ALWAYS_INLINE
448 bool contains(StringRef Other) const { return find(Other) != npos; }
449
450 /// Return true if the given character is contained in *this, and false
451 /// otherwise.
452 LLVM_NODISCARD
453 LLVM_ATTRIBUTE_ALWAYS_INLINE
454 bool contains(char C) const { return find_first_of(C) != npos; }
455
456 /// Return true if the given string is a substring of *this, and false
457 /// otherwise.
458 LLVM_NODISCARD
459 LLVM_ATTRIBUTE_ALWAYS_INLINE
460 bool contains_lower(StringRef Other) const {
461 return find_lower(Other) != npos;
462 }
463
464 /// Return true if the given character is contained in *this, and false
465 /// otherwise.
466 LLVM_NODISCARD
467 LLVM_ATTRIBUTE_ALWAYS_INLINE
468 bool contains_lower(char C) const { return find_lower(C) != npos; }
469
470 /// @}
471 /// @name Helpful Algorithms
472 /// @{
473
474 /// Return the number of occurrences of \p C in the string.
475 LLVM_NODISCARD
476 size_t count(char C) const {
477 size_t Count = 0;
478 for (size_t i = 0, e = Length; i != e; ++i)
479 if (Data[i] == C)
480 ++Count;
481 return Count;
482 }
483
484 /// Return the number of non-overlapped occurrences of \p Str in
485 /// the string.
486 size_t count(StringRef Str) const;
487
488 /// Parse the current string as an integer of the specified radix. If
489 /// \p Radix is specified as zero, this does radix autosensing using
490 /// extended C rules: 0 is octal, 0x is hex, 0b is binary.
491 ///
492 /// If the string is invalid or if only a subset of the string is valid,
493 /// this returns true to signify the error. The string is considered
494 /// erroneous if empty or if it overflows T.
495 template <typename T>
496 typename std::enable_if<std::numeric_limits<T>::is_signed, bool>::type
497 getAsInteger(unsigned Radix, T &Result) const {
498 long long LLVal;
499 if (getAsSignedInteger(*this, Radix, LLVal) ||
500 static_cast<T>(LLVal) != LLVal)
501 return true;
502 Result = LLVal;
503 return false;
504 }
505
506 template <typename T>
507 typename std::enable_if<!std::numeric_limits<T>::is_signed, bool>::type
508 getAsInteger(unsigned Radix, T &Result) const {
509 unsigned long long ULLVal;
510 // The additional cast to unsigned long long is required to avoid the
511 // Visual C++ warning C4805: '!=' : unsafe mix of type 'bool' and type
512 // 'unsigned __int64' when instantiating getAsInteger with T = bool.
513 if (getAsUnsignedInteger(*this, Radix, ULLVal) ||
514 static_cast<unsigned long long>(static_cast<T>(ULLVal)) != ULLVal)
515 return true;
516 Result = ULLVal;
517 return false;
518 }
519
520 /// Parse the current string as an integer of the specified radix. If
521 /// \p Radix is specified as zero, this does radix autosensing using
522 /// extended C rules: 0 is octal, 0x is hex, 0b is binary.
523 ///
524 /// If the string does not begin with a number of the specified radix,
525 /// this returns true to signify the error. The string is considered
526 /// erroneous if empty or if it overflows T.
527 /// The portion of the string representing the discovered numeric value
528 /// is removed from the beginning of the string.
529 template <typename T>
530 typename std::enable_if<std::numeric_limits<T>::is_signed, bool>::type
531 consumeInteger(unsigned Radix, T &Result) {
532 long long LLVal;
533 if (consumeSignedInteger(*this, Radix, LLVal) ||
534 static_cast<long long>(static_cast<T>(LLVal)) != LLVal)
535 return true;
536 Result = LLVal;
537 return false;
538 }
539
540 template <typename T>
541 typename std::enable_if<!std::numeric_limits<T>::is_signed, bool>::type
542 consumeInteger(unsigned Radix, T &Result) {
543 unsigned long long ULLVal;
544 if (consumeUnsignedInteger(*this, Radix, ULLVal) ||
545 static_cast<unsigned long long>(static_cast<T>(ULLVal)) != ULLVal)
546 return true;
547 Result = ULLVal;
548 return false;
549 }
550
551 /// Parse the current string as an integer of the specified \p Radix, or of
552 /// an autosensed radix if the \p Radix given is 0. The current value in
553 /// \p Result is discarded, and the storage is changed to be wide enough to
554 /// store the parsed integer.
555 ///
556 /// \returns true if the string does not solely consist of a valid
557 /// non-empty number in the appropriate base.
558 ///
559 /// APInt::fromString is superficially similar but assumes the
560 /// string is well-formed in the given radix.
561 bool getAsInteger(unsigned Radix, APInt &Result) const;
562
563 /// Parse the current string as an IEEE double-precision floating
564 /// point value. The string must be a well-formed double.
565 ///
566 /// If \p AllowInexact is false, the function will fail if the string
567 /// cannot be represented exactly. Otherwise, the function only fails
568 /// in case of an overflow or underflow.
569 bool getAsDouble(double &Result, bool AllowInexact = true) const;
570
571 /// @}
572 /// @name String Operations
573 /// @{
574
575 // Convert the given ASCII string to lowercase.
576 LLVM_NODISCARD
577 std::string lower() const;
578
579 /// Convert the given ASCII string to uppercase.
580 LLVM_NODISCARD
581 std::string upper() const;
582
583 /// @}
584 /// @name Substring Operations
585 /// @{
586
587 /// Return a reference to the substring from [Start, Start + N).
588 ///
589 /// \param Start The index of the starting character in the substring; if
590 /// the index is npos or greater than the length of the string then the
591 /// empty substring will be returned.
592 ///
593 /// \param N The number of characters to included in the substring. If N
594 /// exceeds the number of characters remaining in the string, the string
595 /// suffix (starting with \p Start) will be returned.
596 LLVM_NODISCARD
597 LLVM_ATTRIBUTE_ALWAYS_INLINE
598 StringRef substr(size_t Start, size_t N = npos) const {
599 Start = std::min(Start, Length);
600 return StringRef(Data + Start, std::min(N, Length - Start));
601 }
602
603 /// Return a StringRef equal to 'this' but with only the first \p N
604 /// elements remaining. If \p N is greater than the length of the
605 /// string, the entire string is returned.
606 LLVM_NODISCARD
607 LLVM_ATTRIBUTE_ALWAYS_INLINE
608 StringRef take_front(size_t N = 1) const {
609 if (N >= size())
610 return *this;
611 return drop_back(size() - N);
612 }
613
614 /// Return a StringRef equal to 'this' but with only the last \p N
615 /// elements remaining. If \p N is greater than the length of the
616 /// string, the entire string is returned.
617 LLVM_NODISCARD
618 LLVM_ATTRIBUTE_ALWAYS_INLINE
619 StringRef take_back(size_t N = 1) const {
620 if (N >= size())
621 return *this;
622 return drop_front(size() - N);
623 }
624
625 /// Return the longest prefix of 'this' such that every character
626 /// in the prefix satisfies the given predicate.
627 LLVM_NODISCARD
628 LLVM_ATTRIBUTE_ALWAYS_INLINE
629 StringRef take_while(function_ref<bool(char)> F) const {
630 return substr(0, find_if_not(F));
631 }
632
633 /// Return the longest prefix of 'this' such that no character in
634 /// the prefix satisfies the given predicate.
635 LLVM_NODISCARD
636 LLVM_ATTRIBUTE_ALWAYS_INLINE
637 StringRef take_until(function_ref<bool(char)> F) const {
638 return substr(0, find_if(F));
639 }
640
641 /// Return a StringRef equal to 'this' but with the first \p N elements
642 /// dropped.
643 LLVM_NODISCARD
644 LLVM_ATTRIBUTE_ALWAYS_INLINE
645 StringRef drop_front(size_t N = 1) const {
646 assert(size() >= N && "Dropping more elements than exist");
647 return substr(N);
648 }
649
650 /// Return a StringRef equal to 'this' but with the last \p N elements
651 /// dropped.
652 LLVM_NODISCARD
653 LLVM_ATTRIBUTE_ALWAYS_INLINE
654 StringRef drop_back(size_t N = 1) const {
655 assert(size() >= N && "Dropping more elements than exist");
656 return substr(0, size()-N);
657 }
658
659 /// Return a StringRef equal to 'this', but with all characters satisfying
660 /// the given predicate dropped from the beginning of the string.
661 LLVM_NODISCARD
662 LLVM_ATTRIBUTE_ALWAYS_INLINE
663 StringRef drop_while(function_ref<bool(char)> F) const {
664 return substr(find_if_not(F));
665 }
666
667 /// Return a StringRef equal to 'this', but with all characters not
668 /// satisfying the given predicate dropped from the beginning of the string.
669 LLVM_NODISCARD
670 LLVM_ATTRIBUTE_ALWAYS_INLINE
671 StringRef drop_until(function_ref<bool(char)> F) const {
672 return substr(find_if(F));
673 }
674
675 /// Returns true if this StringRef has the given prefix and removes that
676 /// prefix.
677 LLVM_ATTRIBUTE_ALWAYS_INLINE
678 bool consume_front(StringRef Prefix) {
679 if (!startswith(Prefix))
680 return false;
681
682 *this = drop_front(Prefix.size());
683 return true;
684 }
685
686 /// Returns true if this StringRef has the given suffix and removes that
687 /// suffix.
688 LLVM_ATTRIBUTE_ALWAYS_INLINE
689 bool consume_back(StringRef Suffix) {
690 if (!endswith(Suffix))
691 return false;
692
693 *this = drop_back(Suffix.size());
694 return true;
695 }
696
697 /// Return a reference to the substring from [Start, End).
698 ///
699 /// \param Start The index of the starting character in the substring; if
700 /// the index is npos or greater than the length of the string then the
701 /// empty substring will be returned.
702 ///
703 /// \param End The index following the last character to include in the
704 /// substring. If this is npos or exceeds the number of characters
705 /// remaining in the string, the string suffix (starting with \p Start)
706 /// will be returned. If this is less than \p Start, an empty string will
707 /// be returned.
708 LLVM_NODISCARD
709 LLVM_ATTRIBUTE_ALWAYS_INLINE
710 StringRef slice(size_t Start, size_t End) const {
711 Start = std::min(Start, Length);
712 End = std::min(std::max(Start, End), Length);
713 return StringRef(Data + Start, End - Start);
714 }
715
716 /// Split into two substrings around the first occurrence of a separator
717 /// character.
718 ///
719 /// If \p Separator is in the string, then the result is a pair (LHS, RHS)
720 /// such that (*this == LHS + Separator + RHS) is true and RHS is
721 /// maximal. If \p Separator is not in the string, then the result is a
722 /// pair (LHS, RHS) where (*this == LHS) and (RHS == "").
723 ///
724 /// \param Separator The character to split on.
725 /// \returns The split substrings.
726 LLVM_NODISCARD
727 std::pair<StringRef, StringRef> split(char Separator) const {
728 return split(StringRef(&Separator, 1));
729 }
730
731 /// Split into two substrings around the first occurrence of a separator
732 /// string.
733 ///
734 /// If \p Separator is in the string, then the result is a pair (LHS, RHS)
735 /// such that (*this == LHS + Separator + RHS) is true and RHS is
736 /// maximal. If \p Separator is not in the string, then the result is a
737 /// pair (LHS, RHS) where (*this == LHS) and (RHS == "").
738 ///
739 /// \param Separator - The string to split on.
740 /// \return - The split substrings.
741 LLVM_NODISCARD
742 std::pair<StringRef, StringRef> split(StringRef Separator) const {
743 size_t Idx = find(Separator);
744 if (Idx == npos)
745 return std::make_pair(*this, StringRef());
746 return std::make_pair(slice(0, Idx), slice(Idx + Separator.size(), npos));
747 }
748
749 /// Split into two substrings around the last occurrence of a separator
750 /// string.
751 ///
752 /// If \p Separator is in the string, then the result is a pair (LHS, RHS)
753 /// such that (*this == LHS + Separator + RHS) is true and RHS is
754 /// minimal. If \p Separator is not in the string, then the result is a
755 /// pair (LHS, RHS) where (*this == LHS) and (RHS == "").
756 ///
757 /// \param Separator - The string to split on.
758 /// \return - The split substrings.
759 LLVM_NODISCARD
760 std::pair<StringRef, StringRef> rsplit(StringRef Separator) const {
761 size_t Idx = rfind(Separator);
762 if (Idx == npos)
763 return std::make_pair(*this, StringRef());
764 return std::make_pair(slice(0, Idx), slice(Idx + Separator.size(), npos));
765 }
766
767 /// Split into substrings around the occurrences of a separator string.
768 ///
769 /// Each substring is stored in \p A. If \p MaxSplit is >= 0, at most
770 /// \p MaxSplit splits are done and consequently <= \p MaxSplit + 1
771 /// elements are added to A.
772 /// If \p KeepEmpty is false, empty strings are not added to \p A. They
773 /// still count when considering \p MaxSplit
774 /// An useful invariant is that
775 /// Separator.join(A) == *this if MaxSplit == -1 and KeepEmpty == true
776 ///
777 /// \param A - Where to put the substrings.
778 /// \param Separator - The string to split on.
779 /// \param MaxSplit - The maximum number of times the string is split.
780 /// \param KeepEmpty - True if empty substring should be added.
781 void split(SmallVectorImpl<StringRef> &A,
782 StringRef Separator, int MaxSplit = -1,
783 bool KeepEmpty = true) const;
784
785 /// Split into substrings around the occurrences of a separator character.
786 ///
787 /// Each substring is stored in \p A. If \p MaxSplit is >= 0, at most
788 /// \p MaxSplit splits are done and consequently <= \p MaxSplit + 1
789 /// elements are added to A.
790 /// If \p KeepEmpty is false, empty strings are not added to \p A. They
791 /// still count when considering \p MaxSplit
792 /// An useful invariant is that
793 /// Separator.join(A) == *this if MaxSplit == -1 and KeepEmpty == true
794 ///
795 /// \param A - Where to put the substrings.
796 /// \param Separator - The string to split on.
797 /// \param MaxSplit - The maximum number of times the string is split.
798 /// \param KeepEmpty - True if empty substring should be added.
799 void split(SmallVectorImpl<StringRef> &A, char Separator, int MaxSplit = -1,
800 bool KeepEmpty = true) const;
801
802 /// Split into two substrings around the last occurrence of a separator
803 /// character.
804 ///
805 /// If \p Separator is in the string, then the result is a pair (LHS, RHS)
806 /// such that (*this == LHS + Separator + RHS) is true and RHS is
807 /// minimal. If \p Separator is not in the string, then the result is a
808 /// pair (LHS, RHS) where (*this == LHS) and (RHS == "").
809 ///
810 /// \param Separator - The character to split on.
811 /// \return - The split substrings.
812 LLVM_NODISCARD
813 std::pair<StringRef, StringRef> rsplit(char Separator) const {
814 return rsplit(StringRef(&Separator, 1));
815 }
816
817 /// Return string with consecutive \p Char characters starting from the
818 /// the left removed.
819 LLVM_NODISCARD
820 StringRef ltrim(char Char) const {
821 return drop_front(std::min(Length, find_first_not_of(Char)));
822 }
823
824 /// Return string with consecutive characters in \p Chars starting from
825 /// the left removed.
826 LLVM_NODISCARD
827 StringRef ltrim(StringRef Chars = " \t\n\v\f\r") const {
828 return drop_front(std::min(Length, find_first_not_of(Chars)));
829 }
830
831 /// Return string with consecutive \p Char characters starting from the
832 /// right removed.
833 LLVM_NODISCARD
834 StringRef rtrim(char Char) const {
835 return drop_back(Length - std::min(Length, find_last_not_of(Char) + 1));
836 }
837
838 /// Return string with consecutive characters in \p Chars starting from
839 /// the right removed.
840 LLVM_NODISCARD
841 StringRef rtrim(StringRef Chars = " \t\n\v\f\r") const {
842 return drop_back(Length - std::min(Length, find_last_not_of(Chars) + 1));
843 }
844
845 /// Return string with consecutive \p Char characters starting from the
846 /// left and right removed.
847 LLVM_NODISCARD
848 StringRef trim(char Char) const {
849 return ltrim(Char).rtrim(Char);
850 }
851
852 /// Return string with consecutive characters in \p Chars starting from
853 /// the left and right removed.
854 LLVM_NODISCARD
855 StringRef trim(StringRef Chars = " \t\n\v\f\r") const {
856 return ltrim(Chars).rtrim(Chars);
857 }
858
859 /// @}
860 };
861
862 /// A wrapper around a string literal that serves as a proxy for constructing
863 /// global tables of StringRefs with the length computed at compile time.
864 /// In order to avoid the invocation of a global constructor, StringLiteral
865 /// should *only* be used in a constexpr context, as such:
866 ///
867 /// constexpr StringLiteral S("test");
868 ///
869 class StringLiteral : public StringRef {
870 private:
871 constexpr StringLiteral(const char *Str, size_t N) : StringRef(Str, N) {
872 }
873
874 public:
875 template <size_t N>
876 constexpr StringLiteral(const char (&Str)[N])
877#if defined(__clang__) && __has_attribute(enable_if)
878#pragma clang diagnostic push
879#pragma clang diagnostic ignored "-Wgcc-compat"
880 __attribute((enable_if(__builtin_strlen(Str) == N - 1,
881 "invalid string literal")))
882#pragma clang diagnostic pop
883#endif
884 : StringRef(Str, N - 1) {
885 }
886
887 // Explicit construction for strings like "foo\0bar".
888 template <size_t N>
889 static constexpr StringLiteral withInnerNUL(const char (&Str)[N]) {
890 return StringLiteral(Str, N - 1);
891 }
892 };
893
894 /// @name StringRef Comparison Operators
895 /// @{
896
897 LLVM_ATTRIBUTE_ALWAYS_INLINE
898 inline bool operator==(StringRef LHS, StringRef RHS) {
899 return LHS.equals(RHS);
900 }
901
902 LLVM_ATTRIBUTE_ALWAYS_INLINE
903 inline bool operator!=(StringRef LHS, StringRef RHS) { return !(LHS == RHS); }
904
905 inline bool operator<(StringRef LHS, StringRef RHS) {
906 return LHS.compare(RHS) == -1;
907 }
908
909 inline bool operator<=(StringRef LHS, StringRef RHS) {
910 return LHS.compare(RHS) != 1;
911 }
912
913 inline bool operator>(StringRef LHS, StringRef RHS) {
914 return LHS.compare(RHS) == 1;
915 }
916
917 inline bool operator>=(StringRef LHS, StringRef RHS) {
918 return LHS.compare(RHS) != -1;
919 }
920
921 inline std::string &operator+=(std::string &buffer, StringRef string) {
922 return buffer.append(string.data(), string.size());
923 }
924
925 /// @}
926
927 /// Compute a hash_code for a StringRef.
928 LLVM_NODISCARD
929 hash_code hash_value(StringRef S);
930
931 // StringRefs can be treated like a POD type.
932 template <typename T> struct isPodLike;
933 template <> struct isPodLike<StringRef> { static const bool value = true; };
934
935} // end namespace llvm
936
937#endif // LLVM_ADT_STRINGREF_H
938