1 | /* Copyright 2015 The TensorFlow Authors. All Rights Reserved. |
2 | |
3 | Licensed under the Apache License, Version 2.0 (the "License"); |
4 | you may not use this file except in compliance with the License. |
5 | You may obtain a copy of the License at |
6 | |
7 | http://www.apache.org/licenses/LICENSE-2.0 |
8 | |
9 | Unless required by applicable law or agreed to in writing, software |
10 | distributed under the License is distributed on an "AS IS" BASIS, |
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | See the License for the specific language governing permissions and |
13 | limitations under the License. |
14 | ==============================================================================*/ |
15 | |
16 | #include "tensorflow/tsl/platform/str_util.h" |
17 | |
18 | #include <cctype> |
19 | #include <cstdint> |
20 | #include <string> |
21 | #include <vector> |
22 | |
23 | #include "absl/strings/ascii.h" |
24 | #include "absl/strings/escaping.h" |
25 | #include "absl/strings/match.h" |
26 | #include "absl/strings/strip.h" |
27 | #include "tensorflow/tsl/platform/logging.h" |
28 | #include "tensorflow/tsl/platform/stringpiece.h" |
29 | |
30 | namespace tsl { |
31 | namespace str_util { |
32 | |
33 | string CEscape(StringPiece src) { return absl::CEscape(src); } |
34 | |
35 | bool CUnescape(StringPiece source, string* dest, string* error) { |
36 | return absl::CUnescape(source, dest, error); |
37 | } |
38 | |
39 | void StripTrailingWhitespace(string* s) { |
40 | absl::StripTrailingAsciiWhitespace(s); |
41 | } |
42 | |
43 | size_t RemoveLeadingWhitespace(StringPiece* text) { |
44 | absl::string_view new_text = absl::StripLeadingAsciiWhitespace(*text); |
45 | size_t count = text->size() - new_text.size(); |
46 | *text = new_text; |
47 | return count; |
48 | } |
49 | |
50 | size_t RemoveTrailingWhitespace(StringPiece* text) { |
51 | absl::string_view new_text = absl::StripTrailingAsciiWhitespace(*text); |
52 | size_t count = text->size() - new_text.size(); |
53 | *text = new_text; |
54 | return count; |
55 | } |
56 | |
57 | size_t RemoveWhitespaceContext(StringPiece* text) { |
58 | absl::string_view new_text = absl::StripAsciiWhitespace(*text); |
59 | size_t count = text->size() - new_text.size(); |
60 | *text = new_text; |
61 | return count; |
62 | } |
63 | |
64 | bool ConsumeLeadingDigits(StringPiece* s, uint64_t* val) { |
65 | const char* p = s->data(); |
66 | const char* limit = p + s->size(); |
67 | uint64_t v = 0; |
68 | while (p < limit) { |
69 | const char c = *p; |
70 | if (c < '0' || c > '9') break; |
71 | uint64_t new_v = (v * 10) + (c - '0'); |
72 | if (new_v / 8 < v) { |
73 | // Overflow occurred |
74 | return false; |
75 | } |
76 | v = new_v; |
77 | p++; |
78 | } |
79 | if (p > s->data()) { |
80 | // Consume some digits |
81 | s->remove_prefix(p - s->data()); |
82 | *val = v; |
83 | return true; |
84 | } else { |
85 | return false; |
86 | } |
87 | } |
88 | |
89 | bool ConsumeNonWhitespace(StringPiece* s, StringPiece* val) { |
90 | const char* p = s->data(); |
91 | const char* limit = p + s->size(); |
92 | while (p < limit) { |
93 | const char c = *p; |
94 | if (isspace(c)) break; |
95 | p++; |
96 | } |
97 | const size_t n = p - s->data(); |
98 | if (n > 0) { |
99 | *val = StringPiece(s->data(), n); |
100 | s->remove_prefix(n); |
101 | return true; |
102 | } else { |
103 | *val = StringPiece(); |
104 | return false; |
105 | } |
106 | } |
107 | |
108 | bool ConsumePrefix(StringPiece* s, StringPiece expected) { |
109 | return absl::ConsumePrefix(s, expected); |
110 | } |
111 | |
112 | bool ConsumeSuffix(StringPiece* s, StringPiece expected) { |
113 | return absl::ConsumeSuffix(s, expected); |
114 | } |
115 | |
116 | StringPiece StripPrefix(StringPiece s, StringPiece expected) { |
117 | return absl::StripPrefix(s, expected); |
118 | } |
119 | |
120 | StringPiece StripSuffix(StringPiece s, StringPiece expected) { |
121 | return absl::StripSuffix(s, expected); |
122 | } |
123 | |
124 | // Return lower-cased version of s. |
125 | string Lowercase(StringPiece s) { return absl::AsciiStrToLower(s); } |
126 | |
127 | // Return upper-cased version of s. |
128 | string Uppercase(StringPiece s) { return absl::AsciiStrToUpper(s); } |
129 | |
130 | void TitlecaseString(string* s, StringPiece delimiters) { |
131 | bool upper = true; |
132 | for (string::iterator ss = s->begin(); ss != s->end(); ++ss) { |
133 | if (upper) { |
134 | *ss = toupper(*ss); |
135 | } |
136 | upper = (delimiters.find(*ss) != StringPiece::npos); |
137 | } |
138 | } |
139 | |
140 | string StringReplace(StringPiece s, StringPiece oldsub, StringPiece newsub, |
141 | bool replace_all) { |
142 | // TODO(jlebar): We could avoid having to shift data around in the string if |
143 | // we had a StringPiece::find() overload that searched for a StringPiece. |
144 | string res(s); |
145 | size_t pos = 0; |
146 | while ((pos = res.find(oldsub.data(), pos, oldsub.size())) != string::npos) { |
147 | res.replace(pos, oldsub.size(), newsub.data(), newsub.size()); |
148 | pos += newsub.size(); |
149 | if (oldsub.empty()) { |
150 | pos++; // Match at the beginning of the text and after every byte |
151 | } |
152 | if (!replace_all) { |
153 | break; |
154 | } |
155 | } |
156 | return res; |
157 | } |
158 | |
159 | bool StartsWith(StringPiece text, StringPiece prefix) { |
160 | return absl::StartsWith(text, prefix); |
161 | } |
162 | |
163 | bool EndsWith(StringPiece text, StringPiece suffix) { |
164 | return absl::EndsWith(text, suffix); |
165 | } |
166 | |
167 | bool StrContains(StringPiece haystack, StringPiece needle) { |
168 | return absl::StrContains(haystack, needle); |
169 | } |
170 | |
171 | size_t Strnlen(const char* str, const size_t string_max_len) { |
172 | size_t len = 0; |
173 | while (len < string_max_len && str[len] != '\0') { |
174 | ++len; |
175 | } |
176 | return len; |
177 | } |
178 | |
179 | string ArgDefCase(StringPiece s) { |
180 | const size_t n = s.size(); |
181 | |
182 | // Compute the size of resulting string. |
183 | // Number of extra underscores we will need to add. |
184 | size_t = 0; |
185 | // Number of non-alpha chars in the beginning to skip. |
186 | size_t to_skip = 0; |
187 | for (size_t i = 0; i < n; ++i) { |
188 | // If we are skipping and current letter is non-alpha, skip it as well |
189 | if (i == to_skip && !isalpha(s[i])) { |
190 | ++to_skip; |
191 | continue; |
192 | } |
193 | |
194 | // If we are here, we are not skipping any more. |
195 | // If this letter is upper case, not the very first char in the |
196 | // resulting string, and previous letter isn't replaced with an underscore, |
197 | // we will need to insert an underscore. |
198 | if (isupper(s[i]) && i != to_skip && i > 0 && isalnum(s[i - 1])) { |
199 | ++extra_us; |
200 | } |
201 | } |
202 | |
203 | // Initialize result with all '_'s. There is no string |
204 | // constructor that does not initialize memory. |
205 | string result(n + extra_us - to_skip, '_'); |
206 | // i - index into s |
207 | // j - index into result |
208 | for (size_t i = to_skip, j = 0; i < n; ++i, ++j) { |
209 | DCHECK_LT(j, result.size()); |
210 | char c = s[i]; |
211 | // If c is not alphanumeric, we don't need to do anything |
212 | // since there is already an underscore in its place. |
213 | if (isalnum(c)) { |
214 | if (isupper(c)) { |
215 | // If current char is upper case, we might need to insert an |
216 | // underscore. |
217 | if (i != to_skip) { |
218 | DCHECK_GT(j, 0); |
219 | if (result[j - 1] != '_') ++j; |
220 | } |
221 | result[j] = tolower(c); |
222 | } else { |
223 | result[j] = c; |
224 | } |
225 | } |
226 | } |
227 | |
228 | return result; |
229 | } |
230 | |
231 | } // namespace str_util |
232 | } // namespace tsl |
233 | |