1 | /* Copyright 2015 The TensorFlow Authors. All Rights Reserved. |
2 | |
3 | Licensed under the Apache License, Version 2.0 (the "License"); |
4 | you may not use this file except in compliance with the License. |
5 | You may obtain a copy of the License at |
6 | |
7 | http://www.apache.org/licenses/LICENSE-2.0 |
8 | |
9 | Unless required by applicable law or agreed to in writing, software |
10 | distributed under the License is distributed on an "AS IS" BASIS, |
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | See the License for the specific language governing permissions and |
13 | limitations under the License. |
14 | ==============================================================================*/ |
15 | |
16 | #ifndef TENSORFLOW_TSL_PLATFORM_STR_UTIL_H_ |
17 | #define TENSORFLOW_TSL_PLATFORM_STR_UTIL_H_ |
18 | |
19 | #include <cstdint> |
20 | #include <string> |
21 | #include <vector> |
22 | |
23 | #include "absl/strings/str_join.h" |
24 | #include "absl/strings/str_split.h" |
25 | #include "tensorflow/tsl/platform/macros.h" |
26 | #include "tensorflow/tsl/platform/stringpiece.h" |
27 | #include "tensorflow/tsl/platform/types.h" |
28 | |
29 | // Basic string utility routines |
30 | namespace tsl { |
31 | namespace str_util { |
32 | |
33 | // Returns a version of 'src' where unprintable characters have been |
34 | // escaped using C-style escape sequences. |
35 | std::string CEscape(StringPiece src); |
36 | |
37 | // Copies "source" to "dest", rewriting C-style escape sequences -- |
38 | // '\n', '\r', '\\', '\ooo', etc -- to their ASCII equivalents. |
39 | // |
40 | // Errors: Sets the description of the first encountered error in |
41 | // 'error'. To disable error reporting, set 'error' to NULL. |
42 | // |
43 | // NOTE: Does not support \u or \U! |
44 | bool CUnescape(StringPiece source, std::string* dest, std::string* error); |
45 | |
46 | // Removes any trailing whitespace from "*s". |
47 | void StripTrailingWhitespace(std::string* s); |
48 | |
49 | // Removes leading ascii_isspace() characters. |
50 | // Returns number of characters removed. |
51 | size_t RemoveLeadingWhitespace(StringPiece* text); |
52 | |
53 | // Removes trailing ascii_isspace() characters. |
54 | // Returns number of characters removed. |
55 | size_t RemoveTrailingWhitespace(StringPiece* text); |
56 | |
57 | // Removes leading and trailing ascii_isspace() chars. |
58 | // Returns number of chars removed. |
59 | size_t RemoveWhitespaceContext(StringPiece* text); |
60 | |
61 | // Consume a leading positive integer value. If any digits were |
62 | // found, store the value of the leading unsigned number in "*val", |
63 | // advance "*s" past the consumed number, and return true. If |
64 | // overflow occurred, returns false. Otherwise, returns false. |
65 | bool ConsumeLeadingDigits(StringPiece* s, uint64_t* val); |
66 | |
67 | // Consume a leading token composed of non-whitespace characters only. |
68 | // If *s starts with a non-zero number of non-whitespace characters, store |
69 | // them in *val, advance *s past them, and return true. Else return false. |
70 | bool ConsumeNonWhitespace(StringPiece* s, StringPiece* val); |
71 | |
72 | // If "*s" starts with "expected", consume it and return true. |
73 | // Otherwise, return false. |
74 | bool ConsumePrefix(StringPiece* s, StringPiece expected); |
75 | |
76 | // If "*s" ends with "expected", remove it and return true. |
77 | // Otherwise, return false. |
78 | bool ConsumeSuffix(StringPiece* s, StringPiece expected); |
79 | |
80 | // If "s" starts with "expected", return a view into "s" after "expected" but |
81 | // keep "s" unchanged. |
82 | // Otherwise, return the original "s". |
83 | TF_MUST_USE_RESULT StringPiece StripPrefix(StringPiece s, StringPiece expected); |
84 | |
85 | // If "s" ends with "expected", return a view into "s" until "expected" but |
86 | // keep "s" unchanged. |
87 | // Otherwise, return the original "s". |
88 | TF_MUST_USE_RESULT StringPiece StripSuffix(StringPiece s, StringPiece expected); |
89 | |
90 | // Return lower-cased version of s. |
91 | std::string Lowercase(StringPiece s); |
92 | |
93 | // Return upper-cased version of s. |
94 | std::string Uppercase(StringPiece s); |
95 | |
96 | // Capitalize first character of each word in "*s". "delimiters" is a |
97 | // set of characters that can be used as word boundaries. |
98 | void TitlecaseString(std::string* s, StringPiece delimiters); |
99 | |
100 | // Replaces the first occurrence (if replace_all is false) or all occurrences |
101 | // (if replace_all is true) of oldsub in s with newsub. |
102 | std::string StringReplace(StringPiece s, StringPiece oldsub, StringPiece newsub, |
103 | bool replace_all); |
104 | |
105 | // Join functionality |
106 | template <typename T> |
107 | std::string Join(const T& s, const char* sep) { |
108 | return absl::StrJoin(s, sep); |
109 | } |
110 | |
111 | // A variant of Join where for each element of "s", f(&dest_string, elem) |
112 | // is invoked (f is often constructed with a lambda of the form: |
113 | // [](string* result, ElemType elem) |
114 | template <typename T, typename Formatter> |
115 | std::string Join(const T& s, const char* sep, Formatter f) { |
116 | return absl::StrJoin(s, sep, f); |
117 | } |
118 | |
119 | struct AllowEmpty { |
120 | bool operator()(StringPiece sp) const { return true; } |
121 | }; |
122 | struct SkipEmpty { |
123 | bool operator()(StringPiece sp) const { return !sp.empty(); } |
124 | }; |
125 | struct SkipWhitespace { |
126 | bool operator()(StringPiece sp) const { |
127 | return !absl::StripTrailingAsciiWhitespace(sp).empty(); |
128 | } |
129 | }; |
130 | |
131 | // Split strings using any of the supplied delimiters. For example: |
132 | // Split("a,b.c,d", ".,") would return {"a", "b", "c", "d"}. |
133 | inline std::vector<string> Split(StringPiece text, StringPiece delims) { |
134 | return text.empty() ? std::vector<string>() |
135 | : absl::StrSplit(text, absl::ByAnyChar(delims)); |
136 | } |
137 | |
138 | template <typename Predicate> |
139 | std::vector<string> Split(StringPiece text, StringPiece delims, Predicate p) { |
140 | return text.empty() ? std::vector<string>() |
141 | : absl::StrSplit(text, absl::ByAnyChar(delims), p); |
142 | } |
143 | |
144 | inline std::vector<string> Split(StringPiece text, char delim) { |
145 | return text.empty() ? std::vector<string>() : absl::StrSplit(text, delim); |
146 | } |
147 | |
148 | template <typename Predicate> |
149 | std::vector<string> Split(StringPiece text, char delim, Predicate p) { |
150 | return text.empty() ? std::vector<string>() : absl::StrSplit(text, delim, p); |
151 | } |
152 | |
153 | // StartsWith() |
154 | // |
155 | // Returns whether a given string `text` begins with `prefix`. |
156 | bool StartsWith(StringPiece text, StringPiece prefix); |
157 | |
158 | // EndsWith() |
159 | // |
160 | // Returns whether a given string `text` ends with `suffix`. |
161 | bool EndsWith(StringPiece text, StringPiece suffix); |
162 | |
163 | // StrContains() |
164 | // |
165 | // Returns whether a given string `haystack` contains the substring `needle`. |
166 | bool StrContains(StringPiece haystack, StringPiece needle); |
167 | |
168 | // Returns the length of the given null-terminated byte string 'str'. |
169 | // Returns 'string_max_len' if the null character was not found in the first |
170 | // 'string_max_len' bytes of 'str'. |
171 | size_t Strnlen(const char* str, const size_t string_max_len); |
172 | |
173 | // ----- NON STANDARD, TF SPECIFIC METHOD ----- |
174 | // Converts "^2ILoveYou!" to "i_love_you_". More specifically: |
175 | // - converts all non-alphanumeric characters to underscores |
176 | // - replaces each occurrence of a capital letter (except the very |
177 | // first character and if there is already an '_' before it) with '_' |
178 | // followed by this letter in lower case |
179 | // - Skips leading non-alpha characters |
180 | // This method is useful for producing strings matching "[a-z][a-z0-9_]*" |
181 | // as required by OpDef.ArgDef.name. The resulting string is either empty or |
182 | // matches this regex. |
183 | std::string ArgDefCase(StringPiece s); |
184 | |
185 | } // namespace str_util |
186 | } // namespace tsl |
187 | |
188 | #endif // TENSORFLOW_TSL_PLATFORM_STR_UTIL_H_ |
189 | |