1//===- StringRef.h - Constant String Reference Wrapper ----------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef LLVM_ADT_STRINGREF_H
10#define LLVM_ADT_STRINGREF_H
11
12#include "llvm/ADT/STLExtras.h"
13#include "llvm/ADT/iterator_range.h"
14#include "llvm/Support/Compiler.h"
15#include <algorithm>
16#include <cassert>
17#include <cstddef>
18#include <cstring>
19#include <limits>
20#include <string>
21#include <type_traits>
22#include <utility>
23
24// Declare the __builtin_strlen intrinsic for MSVC so it can be used in
25// constexpr context.
26#if defined(_MSC_VER)
27extern "C" size_t __builtin_strlen(const char *);
28#endif
29
30namespace llvm {
31
32  class APInt;
33  class hash_code;
34  template <typename T> class SmallVectorImpl;
35  class StringRef;
36
37  /// Helper functions for StringRef::getAsInteger.
38  bool getAsUnsignedInteger(StringRef Str, unsigned Radix,
39                            unsigned long long &Result);
40
41  bool getAsSignedInteger(StringRef Str, unsigned Radix, long long &Result);
42
43  bool consumeUnsignedInteger(StringRef &Str, unsigned Radix,
44                              unsigned long long &Result);
45  bool consumeSignedInteger(StringRef &Str, unsigned Radix, long long &Result);
46
47  /// StringRef - Represent a constant reference to a string, i.e. a character
48  /// array and a length, which need not be null terminated.
49  ///
50  /// This class does not own the string data, it is expected to be used in
51  /// situations where the character data resides in some other buffer, whose
52  /// lifetime extends past that of the StringRef. For this reason, it is not in
53  /// general safe to store a StringRef.
54  class StringRef {
55  public:
56    static const size_t npos = ~size_t(0);
57
58    using iterator = const char *;
59    using const_iterator = const char *;
60    using size_type = size_t;
61
62  private:
63    /// The start of the string, in an external buffer.
64    const char *Data = nullptr;
65
66    /// The length of the string.
67    size_t Length = 0;
68
69    // Workaround memcmp issue with null pointers (undefined behavior)
70    // by providing a specialized version
71    static int compareMemory(const char *Lhs, const char *Rhs, size_t Length) {
72      if (Length == 0) { return 0; }
73      return ::memcmp(Lhs,Rhs,Length);
74    }
75
76    // Constexpr version of std::strlen.
77    static constexpr size_t strLen(const char *Str) {
78#if __cplusplus > 201402L
79      return std::char_traits<char>::length(Str);
80#elif __has_builtin(__builtin_strlen) || defined(__GNUC__) || \
81    (defined(_MSC_VER) && _MSC_VER >= 1916)
82      return __builtin_strlen(Str);
83#else
84      const char *Begin = Str;
85      while (*Str != '\0')
86        ++Str;
87      return Str - Begin;
88#endif
89    }
90
91  public:
92    /// @name Constructors
93    /// @{
94
95    /// Construct an empty string ref.
96    /*implicit*/ StringRef() = default;
97
98    /// Disable conversion from nullptr.  This prevents things like
99    /// if (S == nullptr)
100    StringRef(std::nullptr_t) = delete;
101
102    /// Construct a string ref from a cstring.
103    /*implicit*/ constexpr StringRef(const char *Str)
104        : Data(Str), Length(Str ? strLen(Str) : 0) {}
105
106    /// Construct a string ref from a pointer and length.
107    /*implicit*/ constexpr StringRef(const char *data, size_t length)
108        : Data(data), Length(length) {}
109
110    /// Construct a string ref from an std::string.
111    /*implicit*/ StringRef(const std::string &Str)
112      : Data(Str.data()), Length(Str.length()) {}
113
114    static StringRef withNullAsEmpty(const char *data) {
115      return StringRef(data ? data : "");
116    }
117
118    /// @}
119    /// @name Iterators
120    /// @{
121
122    iterator begin() const { return Data; }
123
124    iterator end() const { return Data + Length; }
125
126    const unsigned char *bytes_begin() const {
127      return reinterpret_cast<const unsigned char *>(begin());
128    }
129    const unsigned char *bytes_end() const {
130      return reinterpret_cast<const unsigned char *>(end());
131    }
132    iterator_range<const unsigned char *> bytes() const {
133      return make_range(bytes_begin(), bytes_end());
134    }
135
136    /// @}
137    /// @name String Operations
138    /// @{
139
140    /// data - Get a pointer to the start of the string (which may not be null
141    /// terminated).
142    LLVM_NODISCARD
143    const char *data() const { return Data; }
144
145    /// empty - Check if the string is empty.
146    LLVM_NODISCARD
147    bool empty() const { return Length == 0; }
148
149    /// size - Get the string size.
150    LLVM_NODISCARD
151    size_t size() const { return Length; }
152
153    /// front - Get the first character in the string.
154    LLVM_NODISCARD
155    char front() const {
156      assert(!empty());
157      return Data[0];
158    }
159
160    /// back - Get the last character in the string.
161    LLVM_NODISCARD
162    char back() const {
163      assert(!empty());
164      return Data[Length-1];
165    }
166
167    // copy - Allocate copy in Allocator and return StringRef to it.
168    template <typename Allocator>
169    LLVM_NODISCARD StringRef copy(Allocator &A) const {
170      // Don't request a length 0 copy from the allocator.
171      if (empty())
172        return StringRef();
173      char *S = A.template Allocate<char>(Length);
174      std::copy(begin(), end(), S);
175      return StringRef(S, Length);
176    }
177
178    /// equals - Check for string equality, this is more efficient than
179    /// compare() when the relative ordering of inequal strings isn't needed.
180    LLVM_NODISCARD
181    bool equals(StringRef RHS) const {
182      return (Length == RHS.Length &&
183              compareMemory(Data, RHS.Data, RHS.Length) == 0);
184    }
185
186    /// equals_lower - Check for string equality, ignoring case.
187    LLVM_NODISCARD
188    bool equals_lower(StringRef RHS) const {
189      return Length == RHS.Length && compare_lower(RHS) == 0;
190    }
191
192    /// compare - Compare two strings; the result is -1, 0, or 1 if this string
193    /// is lexicographically less than, equal to, or greater than the \p RHS.
194    LLVM_NODISCARD
195    int compare(StringRef RHS) const {
196      // Check the prefix for a mismatch.
197      if (int Res = compareMemory(Data, RHS.Data, std::min(Length, RHS.Length)))
198        return Res < 0 ? -1 : 1;
199
200      // Otherwise the prefixes match, so we only need to check the lengths.
201      if (Length == RHS.Length)
202        return 0;
203      return Length < RHS.Length ? -1 : 1;
204    }
205
206    /// compare_lower - Compare two strings, ignoring case.
207    LLVM_NODISCARD
208    int compare_lower(StringRef RHS) const;
209
210    /// compare_numeric - Compare two strings, treating sequences of digits as
211    /// numbers.
212    LLVM_NODISCARD
213    int compare_numeric(StringRef RHS) const;
214
215    /// Determine the edit distance between this string and another
216    /// string.
217    ///
218    /// \param Other the string to compare this string against.
219    ///
220    /// \param AllowReplacements whether to allow character
221    /// replacements (change one character into another) as a single
222    /// operation, rather than as two operations (an insertion and a
223    /// removal).
224    ///
225    /// \param MaxEditDistance If non-zero, the maximum edit distance that
226    /// this routine is allowed to compute. If the edit distance will exceed
227    /// that maximum, returns \c MaxEditDistance+1.
228    ///
229    /// \returns the minimum number of character insertions, removals,
230    /// or (if \p AllowReplacements is \c true) replacements needed to
231    /// transform one of the given strings into the other. If zero,
232    /// the strings are identical.
233    LLVM_NODISCARD
234    unsigned edit_distance(StringRef Other, bool AllowReplacements = true,
235                           unsigned MaxEditDistance = 0) const;
236
237    /// str - Get the contents as an std::string.
238    LLVM_NODISCARD
239    std::string str() const {
240      if (!Data) return std::string();
241      return std::string(Data, Length);
242    }
243
244    /// @}
245    /// @name Operator Overloads
246    /// @{
247
248    LLVM_NODISCARD
249    char operator[](size_t Index) const {
250      assert(Index < Length && "Invalid index!");
251      return Data[Index];
252    }
253
254    /// Disallow accidental assignment from a temporary std::string.
255    ///
256    /// The declaration here is extra complicated so that `stringRef = {}`
257    /// and `stringRef = "abc"` continue to select the move assignment operator.
258    template <typename T>
259    typename std::enable_if<std::is_same<T, std::string>::value,
260                            StringRef>::type &
261    operator=(T &&Str) = delete;
262
263    /// @}
264    /// @name Type Conversions
265    /// @{
266
267    operator std::string() const {
268      return str();
269    }
270
271    /// @}
272    /// @name String Predicates
273    /// @{
274
275    /// Check if this string starts with the given \p Prefix.
276    LLVM_NODISCARD
277    bool startswith(StringRef Prefix) const {
278      return Length >= Prefix.Length &&
279             compareMemory(Data, Prefix.Data, Prefix.Length) == 0;
280    }
281
282    /// Check if this string starts with the given \p Prefix, ignoring case.
283    LLVM_NODISCARD
284    bool startswith_lower(StringRef Prefix) const;
285
286    /// Check if this string ends with the given \p Suffix.
287    LLVM_NODISCARD
288    bool endswith(StringRef Suffix) const {
289      return Length >= Suffix.Length &&
290        compareMemory(end() - Suffix.Length, Suffix.Data, Suffix.Length) == 0;
291    }
292
293    /// Check if this string ends with the given \p Suffix, ignoring case.
294    LLVM_NODISCARD
295    bool endswith_lower(StringRef Suffix) const;
296
297    /// @}
298    /// @name String Searching
299    /// @{
300
301    /// Search for the first character \p C in the string.
302    ///
303    /// \returns The index of the first occurrence of \p C, or npos if not
304    /// found.
305    LLVM_NODISCARD
306    size_t find(char C, size_t From = 0) const {
307      size_t FindBegin = std::min(From, Length);
308      if (FindBegin < Length) { // Avoid calling memchr with nullptr.
309        // Just forward to memchr, which is faster than a hand-rolled loop.
310        if (const void *P = ::memchr(Data + FindBegin, C, Length - FindBegin))
311          return static_cast<const char *>(P) - Data;
312      }
313      return npos;
314    }
315
316    /// Search for the first character \p C in the string, ignoring case.
317    ///
318    /// \returns The index of the first occurrence of \p C, or npos if not
319    /// found.
320    LLVM_NODISCARD
321    size_t find_lower(char C, size_t From = 0) const;
322
323    /// Search for the first character satisfying the predicate \p F
324    ///
325    /// \returns The index of the first character satisfying \p F starting from
326    /// \p From, or npos if not found.
327    LLVM_NODISCARD
328    size_t find_if(function_ref<bool(char)> F, size_t From = 0) const {
329      StringRef S = drop_front(From);
330      while (!S.empty()) {
331        if (F(S.front()))
332          return size() - S.size();
333        S = S.drop_front();
334      }
335      return npos;
336    }
337
338    /// Search for the first character not satisfying the predicate \p F
339    ///
340    /// \returns The index of the first character not satisfying \p F starting
341    /// from \p From, or npos if not found.
342    LLVM_NODISCARD
343    size_t find_if_not(function_ref<bool(char)> F, size_t From = 0) const {
344      return find_if([F](char c) { return !F(c); }, From);
345    }
346
347    /// Search for the first string \p Str in the string.
348    ///
349    /// \returns The index of the first occurrence of \p Str, or npos if not
350    /// found.
351    LLVM_NODISCARD
352    size_t find(StringRef Str, size_t From = 0) const;
353
354    /// Search for the first string \p Str in the string, ignoring case.
355    ///
356    /// \returns The index of the first occurrence of \p Str, or npos if not
357    /// found.
358    LLVM_NODISCARD
359    size_t find_lower(StringRef Str, size_t From = 0) const;
360
361    /// Search for the last character \p C in the string.
362    ///
363    /// \returns The index of the last occurrence of \p C, or npos if not
364    /// found.
365    LLVM_NODISCARD
366    size_t rfind(char C, size_t From = npos) const {
367      From = std::min(From, Length);
368      size_t i = From;
369      while (i != 0) {
370        --i;
371        if (Data[i] == C)
372          return i;
373      }
374      return npos;
375    }
376
377    /// Search for the last character \p C in the string, ignoring case.
378    ///
379    /// \returns The index of the last occurrence of \p C, or npos if not
380    /// found.
381    LLVM_NODISCARD
382    size_t rfind_lower(char C, size_t From = npos) const;
383
384    /// Search for the last string \p Str in the string.
385    ///
386    /// \returns The index of the last occurrence of \p Str, or npos if not
387    /// found.
388    LLVM_NODISCARD
389    size_t rfind(StringRef Str) const;
390
391    /// Search for the last string \p Str in the string, ignoring case.
392    ///
393    /// \returns The index of the last occurrence of \p Str, or npos if not
394    /// found.
395    LLVM_NODISCARD
396    size_t rfind_lower(StringRef Str) const;
397
398    /// Find the first character in the string that is \p C, or npos if not
399    /// found. Same as find.
400    LLVM_NODISCARD
401    size_t find_first_of(char C, size_t From = 0) const {
402      return find(C, From);
403    }
404
405    /// Find the first character in the string that is in \p Chars, or npos if
406    /// not found.
407    ///
408    /// Complexity: O(size() + Chars.size())
409    LLVM_NODISCARD
410    size_t find_first_of(StringRef Chars, size_t From = 0) const;
411
412    /// Find the first character in the string that is not \p C or npos if not
413    /// found.
414    LLVM_NODISCARD
415    size_t find_first_not_of(char C, size_t From = 0) const;
416
417    /// Find the first character in the string that is not in the string
418    /// \p Chars, or npos if not found.
419    ///
420    /// Complexity: O(size() + Chars.size())
421    LLVM_NODISCARD
422    size_t find_first_not_of(StringRef Chars, size_t From = 0) const;
423
424    /// Find the last character in the string that is \p C, or npos if not
425    /// found.
426    LLVM_NODISCARD
427    size_t find_last_of(char C, size_t From = npos) const {
428      return rfind(C, From);
429    }
430
431    /// Find the last character in the string that is in \p C, or npos if not
432    /// found.
433    ///
434    /// Complexity: O(size() + Chars.size())
435    LLVM_NODISCARD
436    size_t find_last_of(StringRef Chars, size_t From = npos) const;
437
438    /// Find the last character in the string that is not \p C, or npos if not
439    /// found.
440    LLVM_NODISCARD
441    size_t find_last_not_of(char C, size_t From = npos) const;
442
443    /// Find the last character in the string that is not in \p Chars, or
444    /// npos if not found.
445    ///
446    /// Complexity: O(size() + Chars.size())
447    LLVM_NODISCARD
448    size_t find_last_not_of(StringRef Chars, size_t From = npos) const;
449
450    /// Return true if the given string is a substring of *this, and false
451    /// otherwise.
452    LLVM_NODISCARD
453    bool contains(StringRef Other) const { return find(Other) != npos; }
454
455    /// Return true if the given character is contained in *this, and false
456    /// otherwise.
457    LLVM_NODISCARD
458    bool contains(char C) const { return find_first_of(C) != npos; }
459
460    /// Return true if the given string is a substring of *this, and false
461    /// otherwise.
462    LLVM_NODISCARD
463    bool contains_lower(StringRef Other) const {
464      return find_lower(Other) != npos;
465    }
466
467    /// Return true if the given character is contained in *this, and false
468    /// otherwise.
469    LLVM_NODISCARD
470    bool contains_lower(char C) const { return find_lower(C) != npos; }
471
472    /// @}
473    /// @name Helpful Algorithms
474    /// @{
475
476    /// Return the number of occurrences of \p C in the string.
477    LLVM_NODISCARD
478    size_t count(char C) const {
479      size_t Count = 0;
480      for (size_t i = 0, e = Length; i != e; ++i)
481        if (Data[i] == C)
482          ++Count;
483      return Count;
484    }
485
486    /// Return the number of non-overlapped occurrences of \p Str in
487    /// the string.
488    size_t count(StringRef Str) const;
489
490    /// Parse the current string as an integer of the specified radix.  If
491    /// \p Radix is specified as zero, this does radix autosensing using
492    /// extended C rules: 0 is octal, 0x is hex, 0b is binary.
493    ///
494    /// If the string is invalid or if only a subset of the string is valid,
495    /// this returns true to signify the error.  The string is considered
496    /// erroneous if empty or if it overflows T.
497    template <typename T>
498    typename std::enable_if<std::numeric_limits<T>::is_signed, bool>::type
499    getAsInteger(unsigned Radix, T &Result) const {
500      long long LLVal;
501      if (getAsSignedInteger(*this, Radix, LLVal) ||
502            static_cast<T>(LLVal) != LLVal)
503        return true;
504      Result = LLVal;
505      return false;
506    }
507
508    template <typename T>
509    typename std::enable_if<!std::numeric_limits<T>::is_signed, bool>::type
510    getAsInteger(unsigned Radix, T &Result) const {
511      unsigned long long ULLVal;
512      // The additional cast to unsigned long long is required to avoid the
513      // Visual C++ warning C4805: '!=' : unsafe mix of type 'bool' and type
514      // 'unsigned __int64' when instantiating getAsInteger with T = bool.
515      if (getAsUnsignedInteger(*this, Radix, ULLVal) ||
516          static_cast<unsigned long long>(static_cast<T>(ULLVal)) != ULLVal)
517        return true;
518      Result = ULLVal;
519      return false;
520    }
521
522    /// Parse the current string as an integer of the specified radix.  If
523    /// \p Radix is specified as zero, this does radix autosensing using
524    /// extended C rules: 0 is octal, 0x is hex, 0b is binary.
525    ///
526    /// If the string does not begin with a number of the specified radix,
527    /// this returns true to signify the error. The string is considered
528    /// erroneous if empty or if it overflows T.
529    /// The portion of the string representing the discovered numeric value
530    /// is removed from the beginning of the string.
531    template <typename T>
532    typename std::enable_if<std::numeric_limits<T>::is_signed, bool>::type
533    consumeInteger(unsigned Radix, T &Result) {
534      long long LLVal;
535      if (consumeSignedInteger(*this, Radix, LLVal) ||
536          static_cast<long long>(static_cast<T>(LLVal)) != LLVal)
537        return true;
538      Result = LLVal;
539      return false;
540    }
541
542    template <typename T>
543    typename std::enable_if<!std::numeric_limits<T>::is_signed, bool>::type
544    consumeInteger(unsigned Radix, T &Result) {
545      unsigned long long ULLVal;
546      if (consumeUnsignedInteger(*this, Radix, ULLVal) ||
547          static_cast<unsigned long long>(static_cast<T>(ULLVal)) != ULLVal)
548        return true;
549      Result = ULLVal;
550      return false;
551    }
552
553    /// Parse the current string as an integer of the specified \p Radix, or of
554    /// an autosensed radix if the \p Radix given is 0.  The current value in
555    /// \p Result is discarded, and the storage is changed to be wide enough to
556    /// store the parsed integer.
557    ///
558    /// \returns true if the string does not solely consist of a valid
559    /// non-empty number in the appropriate base.
560    ///
561    /// APInt::fromString is superficially similar but assumes the
562    /// string is well-formed in the given radix.
563    bool getAsInteger(unsigned Radix, APInt &Result) const;
564
565    /// Parse the current string as an IEEE double-precision floating
566    /// point value.  The string must be a well-formed double.
567    ///
568    /// If \p AllowInexact is false, the function will fail if the string
569    /// cannot be represented exactly.  Otherwise, the function only fails
570    /// in case of an overflow or underflow, or an invalid floating point
571    /// representation.
572    bool getAsDouble(double &Result, bool AllowInexact = true) const;
573
574    /// @}
575    /// @name String Operations
576    /// @{
577
578    // Convert the given ASCII string to lowercase.
579    LLVM_NODISCARD
580    std::string lower() const;
581
582    /// Convert the given ASCII string to uppercase.
583    LLVM_NODISCARD
584    std::string upper() const;
585
586    /// @}
587    /// @name Substring Operations
588    /// @{
589
590    /// Return a reference to the substring from [Start, Start + N).
591    ///
592    /// \param Start The index of the starting character in the substring; if
593    /// the index is npos or greater than the length of the string then the
594    /// empty substring will be returned.
595    ///
596    /// \param N The number of characters to included in the substring. If N
597    /// exceeds the number of characters remaining in the string, the string
598    /// suffix (starting with \p Start) will be returned.
599    LLVM_NODISCARD
600    StringRef substr(size_t Start, size_t N = npos) const {
601      Start = std::min(Start, Length);
602      return StringRef(Data + Start, std::min(N, Length - Start));
603    }
604
605    /// Return a StringRef equal to 'this' but with only the first \p N
606    /// elements remaining.  If \p N is greater than the length of the
607    /// string, the entire string is returned.
608    LLVM_NODISCARD
609    StringRef take_front(size_t N = 1) const {
610      if (N >= size())
611        return *this;
612      return drop_back(size() - N);
613    }
614
615    /// Return a StringRef equal to 'this' but with only the last \p N
616    /// elements remaining.  If \p N is greater than the length of the
617    /// string, the entire string is returned.
618    LLVM_NODISCARD
619    StringRef take_back(size_t N = 1) const {
620      if (N >= size())
621        return *this;
622      return drop_front(size() - N);
623    }
624
625    /// Return the longest prefix of 'this' such that every character
626    /// in the prefix satisfies the given predicate.
627    LLVM_NODISCARD
628    StringRef take_while(function_ref<bool(char)> F) const {
629      return substr(0, find_if_not(F));
630    }
631
632    /// Return the longest prefix of 'this' such that no character in
633    /// the prefix satisfies the given predicate.
634    LLVM_NODISCARD
635    StringRef take_until(function_ref<bool(char)> F) const {
636      return substr(0, find_if(F));
637    }
638
639    /// Return a StringRef equal to 'this' but with the first \p N elements
640    /// dropped.
641    LLVM_NODISCARD
642    StringRef drop_front(size_t N = 1) const {
643      assert(size() >= N && "Dropping more elements than exist");
644      return substr(N);
645    }
646
647    /// Return a StringRef equal to 'this' but with the last \p N elements
648    /// dropped.
649    LLVM_NODISCARD
650    StringRef drop_back(size_t N = 1) const {
651      assert(size() >= N && "Dropping more elements than exist");
652      return substr(0, size()-N);
653    }
654
655    /// Return a StringRef equal to 'this', but with all characters satisfying
656    /// the given predicate dropped from the beginning of the string.
657    LLVM_NODISCARD
658    StringRef drop_while(function_ref<bool(char)> F) const {
659      return substr(find_if_not(F));
660    }
661
662    /// Return a StringRef equal to 'this', but with all characters not
663    /// satisfying the given predicate dropped from the beginning of the string.
664    LLVM_NODISCARD
665    StringRef drop_until(function_ref<bool(char)> F) const {
666      return substr(find_if(F));
667    }
668
669    /// Returns true if this StringRef has the given prefix and removes that
670    /// prefix.
671    bool consume_front(StringRef Prefix) {
672      if (!startswith(Prefix))
673        return false;
674
675      *this = drop_front(Prefix.size());
676      return true;
677    }
678
679    /// Returns true if this StringRef has the given suffix and removes that
680    /// suffix.
681    bool consume_back(StringRef Suffix) {
682      if (!endswith(Suffix))
683        return false;
684
685      *this = drop_back(Suffix.size());
686      return true;
687    }
688
689    /// Return a reference to the substring from [Start, End).
690    ///
691    /// \param Start The index of the starting character in the substring; if
692    /// the index is npos or greater than the length of the string then the
693    /// empty substring will be returned.
694    ///
695    /// \param End The index following the last character to include in the
696    /// substring. If this is npos or exceeds the number of characters
697    /// remaining in the string, the string suffix (starting with \p Start)
698    /// will be returned. If this is less than \p Start, an empty string will
699    /// be returned.
700    LLVM_NODISCARD
701    StringRef slice(size_t Start, size_t End) const {
702      Start = std::min(Start, Length);
703      End = std::min(std::max(Start, End), Length);
704      return StringRef(Data + Start, End - Start);
705    }
706
707    /// Split into two substrings around the first occurrence of a separator
708    /// character.
709    ///
710    /// If \p Separator is in the string, then the result is a pair (LHS, RHS)
711    /// such that (*this == LHS + Separator + RHS) is true and RHS is
712    /// maximal. If \p Separator is not in the string, then the result is a
713    /// pair (LHS, RHS) where (*this == LHS) and (RHS == "").
714    ///
715    /// \param Separator The character to split on.
716    /// \returns The split substrings.
717    LLVM_NODISCARD
718    std::pair<StringRef, StringRef> split(char Separator) const {
719      return split(StringRef(&Separator, 1));
720    }
721
722    /// Split into two substrings around the first occurrence of a separator
723    /// string.
724    ///
725    /// If \p Separator is in the string, then the result is a pair (LHS, RHS)
726    /// such that (*this == LHS + Separator + RHS) is true and RHS is
727    /// maximal. If \p Separator is not in the string, then the result is a
728    /// pair (LHS, RHS) where (*this == LHS) and (RHS == "").
729    ///
730    /// \param Separator - The string to split on.
731    /// \return - The split substrings.
732    LLVM_NODISCARD
733    std::pair<StringRef, StringRef> split(StringRef Separator) const {
734      size_t Idx = find(Separator);
735      if (Idx == npos)
736        return std::make_pair(*this, StringRef());
737      return std::make_pair(slice(0, Idx), slice(Idx + Separator.size(), npos));
738    }
739
740    /// Split into two substrings around the last occurrence of a separator
741    /// string.
742    ///
743    /// If \p Separator is in the string, then the result is a pair (LHS, RHS)
744    /// such that (*this == LHS + Separator + RHS) is true and RHS is
745    /// minimal. If \p Separator is not in the string, then the result is a
746    /// pair (LHS, RHS) where (*this == LHS) and (RHS == "").
747    ///
748    /// \param Separator - The string to split on.
749    /// \return - The split substrings.
750    LLVM_NODISCARD
751    std::pair<StringRef, StringRef> rsplit(StringRef Separator) const {
752      size_t Idx = rfind(Separator);
753      if (Idx == npos)
754        return std::make_pair(*this, StringRef());
755      return std::make_pair(slice(0, Idx), slice(Idx + Separator.size(), npos));
756    }
757
758    /// Split into substrings around the occurrences of a separator string.
759    ///
760    /// Each substring is stored in \p A. If \p MaxSplit is >= 0, at most
761    /// \p MaxSplit splits are done and consequently <= \p MaxSplit + 1
762    /// elements are added to A.
763    /// If \p KeepEmpty is false, empty strings are not added to \p A. They
764    /// still count when considering \p MaxSplit
765    /// An useful invariant is that
766    /// Separator.join(A) == *this if MaxSplit == -1 and KeepEmpty == true
767    ///
768    /// \param A - Where to put the substrings.
769    /// \param Separator - The string to split on.
770    /// \param MaxSplit - The maximum number of times the string is split.
771    /// \param KeepEmpty - True if empty substring should be added.
772    void split(SmallVectorImpl<StringRef> &A,
773               StringRef Separator, int MaxSplit = -1,
774               bool KeepEmpty = true) const;
775
776    /// Split into substrings around the occurrences of a separator character.
777    ///
778    /// Each substring is stored in \p A. If \p MaxSplit is >= 0, at most
779    /// \p MaxSplit splits are done and consequently <= \p MaxSplit + 1
780    /// elements are added to A.
781    /// If \p KeepEmpty is false, empty strings are not added to \p A. They
782    /// still count when considering \p MaxSplit
783    /// An useful invariant is that
784    /// Separator.join(A) == *this if MaxSplit == -1 and KeepEmpty == true
785    ///
786    /// \param A - Where to put the substrings.
787    /// \param Separator - The string to split on.
788    /// \param MaxSplit - The maximum number of times the string is split.
789    /// \param KeepEmpty - True if empty substring should be added.
790    void split(SmallVectorImpl<StringRef> &A, char Separator, int MaxSplit = -1,
791               bool KeepEmpty = true) const;
792
793    /// Split into two substrings around the last occurrence of a separator
794    /// character.
795    ///
796    /// If \p Separator is in the string, then the result is a pair (LHS, RHS)
797    /// such that (*this == LHS + Separator + RHS) is true and RHS is
798    /// minimal. If \p Separator is not in the string, then the result is a
799    /// pair (LHS, RHS) where (*this == LHS) and (RHS == "").
800    ///
801    /// \param Separator - The character to split on.
802    /// \return - The split substrings.
803    LLVM_NODISCARD
804    std::pair<StringRef, StringRef> rsplit(char Separator) const {
805      return rsplit(StringRef(&Separator, 1));
806    }
807
808    /// Return string with consecutive \p Char characters starting from the
809    /// the left removed.
810    LLVM_NODISCARD
811    StringRef ltrim(char Char) const {
812      return drop_front(std::min(Length, find_first_not_of(Char)));
813    }
814
815    /// Return string with consecutive characters in \p Chars starting from
816    /// the left removed.
817    LLVM_NODISCARD
818    StringRef ltrim(StringRef Chars = " \t\n\v\f\r") const {
819      return drop_front(std::min(Length, find_first_not_of(Chars)));
820    }
821
822    /// Return string with consecutive \p Char characters starting from the
823    /// right removed.
824    LLVM_NODISCARD
825    StringRef rtrim(char Char) const {
826      return drop_back(Length - std::min(Length, find_last_not_of(Char) + 1));
827    }
828
829    /// Return string with consecutive characters in \p Chars starting from
830    /// the right removed.
831    LLVM_NODISCARD
832    StringRef rtrim(StringRef Chars = " \t\n\v\f\r") const {
833      return drop_back(Length - std::min(Length, find_last_not_of(Chars) + 1));
834    }
835
836    /// Return string with consecutive \p Char characters starting from the
837    /// left and right removed.
838    LLVM_NODISCARD
839    StringRef trim(char Char) const {
840      return ltrim(Char).rtrim(Char);
841    }
842
843    /// Return string with consecutive characters in \p Chars starting from
844    /// the left and right removed.
845    LLVM_NODISCARD
846    StringRef trim(StringRef Chars = " \t\n\v\f\r") const {
847      return ltrim(Chars).rtrim(Chars);
848    }
849
850    /// @}
851  };
852
853  /// A wrapper around a string literal that serves as a proxy for constructing
854  /// global tables of StringRefs with the length computed at compile time.
855  /// In order to avoid the invocation of a global constructor, StringLiteral
856  /// should *only* be used in a constexpr context, as such:
857  ///
858  /// constexpr StringLiteral S("test");
859  ///
860  class StringLiteral : public StringRef {
861  private:
862    constexpr StringLiteral(const char *Str, size_t N) : StringRef(Str, N) {
863    }
864
865  public:
866    template <size_t N>
867    constexpr StringLiteral(const char (&Str)[N])
868#if defined(__clang__) && __has_attribute(enable_if)
869#pragma clang diagnostic push
870#pragma clang diagnostic ignored "-Wgcc-compat"
871        __attribute((enable_if(__builtin_strlen(Str) == N - 1,
872                               "invalid string literal")))
873#pragma clang diagnostic pop
874#endif
875        : StringRef(Str, N - 1) {
876    }
877
878    // Explicit construction for strings like "foo\0bar".
879    template <size_t N>
880    static constexpr StringLiteral withInnerNUL(const char (&Str)[N]) {
881      return StringLiteral(Str, N - 1);
882    }
883  };
884
885  /// @name StringRef Comparison Operators
886  /// @{
887
888  inline bool operator==(StringRef LHS, StringRef RHS) {
889    return LHS.equals(RHS);
890  }
891
892  inline bool operator!=(StringRef LHS, StringRef RHS) { return !(LHS == RHS); }
893
894  inline bool operator<(StringRef LHS, StringRef RHS) {
895    return LHS.compare(RHS) == -1;
896  }
897
898  inline bool operator<=(StringRef LHS, StringRef RHS) {
899    return LHS.compare(RHS) != 1;
900  }
901
902  inline bool operator>(StringRef LHS, StringRef RHS) {
903    return LHS.compare(RHS) == 1;
904  }
905
906  inline bool operator>=(StringRef LHS, StringRef RHS) {
907    return LHS.compare(RHS) != -1;
908  }
909
910  inline std::string &operator+=(std::string &buffer, StringRef string) {
911    return buffer.append(string.data(), string.size());
912  }
913
914  /// @}
915
916  /// Compute a hash_code for a StringRef.
917  LLVM_NODISCARD
918  hash_code hash_value(StringRef S);
919
920} // end namespace llvm
921
922#endif // LLVM_ADT_STRINGREF_H
923