1/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2
3Licensed under the Apache License, Version 2.0 (the "License");
4you may not use this file except in compliance with the License.
5You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9Unless required by applicable law or agreed to in writing, software
10distributed under the License is distributed on an "AS IS" BASIS,
11WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12See the License for the specific language governing permissions and
13limitations under the License.
14==============================================================================*/
15
16#ifndef TENSORFLOW_TSL_PLATFORM_STR_UTIL_H_
17#define TENSORFLOW_TSL_PLATFORM_STR_UTIL_H_
18
19#include <cstdint>
20#include <string>
21#include <vector>
22
23#include "absl/strings/str_join.h"
24#include "absl/strings/str_split.h"
25#include "tensorflow/tsl/platform/macros.h"
26#include "tensorflow/tsl/platform/stringpiece.h"
27#include "tensorflow/tsl/platform/types.h"
28
29// Basic string utility routines
30namespace tsl {
31namespace str_util {
32
33// Returns a version of 'src' where unprintable characters have been
34// escaped using C-style escape sequences.
35std::string CEscape(StringPiece src);
36
37// Copies "source" to "dest", rewriting C-style escape sequences --
38// '\n', '\r', '\\', '\ooo', etc -- to their ASCII equivalents.
39//
40// Errors: Sets the description of the first encountered error in
41// 'error'. To disable error reporting, set 'error' to NULL.
42//
43// NOTE: Does not support \u or \U!
44bool CUnescape(StringPiece source, std::string* dest, std::string* error);
45
46// Removes any trailing whitespace from "*s".
47void StripTrailingWhitespace(std::string* s);
48
49// Removes leading ascii_isspace() characters.
50// Returns number of characters removed.
51size_t RemoveLeadingWhitespace(StringPiece* text);
52
53// Removes trailing ascii_isspace() characters.
54// Returns number of characters removed.
55size_t RemoveTrailingWhitespace(StringPiece* text);
56
57// Removes leading and trailing ascii_isspace() chars.
58// Returns number of chars removed.
59size_t RemoveWhitespaceContext(StringPiece* text);
60
61// Consume a leading positive integer value. If any digits were
62// found, store the value of the leading unsigned number in "*val",
63// advance "*s" past the consumed number, and return true. If
64// overflow occurred, returns false. Otherwise, returns false.
65bool ConsumeLeadingDigits(StringPiece* s, uint64_t* val);
66
67// Consume a leading token composed of non-whitespace characters only.
68// If *s starts with a non-zero number of non-whitespace characters, store
69// them in *val, advance *s past them, and return true. Else return false.
70bool ConsumeNonWhitespace(StringPiece* s, StringPiece* val);
71
72// If "*s" starts with "expected", consume it and return true.
73// Otherwise, return false.
74bool ConsumePrefix(StringPiece* s, StringPiece expected);
75
76// If "*s" ends with "expected", remove it and return true.
77// Otherwise, return false.
78bool ConsumeSuffix(StringPiece* s, StringPiece expected);
79
80// If "s" starts with "expected", return a view into "s" after "expected" but
81// keep "s" unchanged.
82// Otherwise, return the original "s".
83TF_MUST_USE_RESULT StringPiece StripPrefix(StringPiece s, StringPiece expected);
84
85// If "s" ends with "expected", return a view into "s" until "expected" but
86// keep "s" unchanged.
87// Otherwise, return the original "s".
88TF_MUST_USE_RESULT StringPiece StripSuffix(StringPiece s, StringPiece expected);
89
90// Return lower-cased version of s.
91std::string Lowercase(StringPiece s);
92
93// Return upper-cased version of s.
94std::string Uppercase(StringPiece s);
95
96// Capitalize first character of each word in "*s". "delimiters" is a
97// set of characters that can be used as word boundaries.
98void TitlecaseString(std::string* s, StringPiece delimiters);
99
100// Replaces the first occurrence (if replace_all is false) or all occurrences
101// (if replace_all is true) of oldsub in s with newsub.
102std::string StringReplace(StringPiece s, StringPiece oldsub, StringPiece newsub,
103 bool replace_all);
104
105// Join functionality
106template <typename T>
107std::string Join(const T& s, const char* sep) {
108 return absl::StrJoin(s, sep);
109}
110
111// A variant of Join where for each element of "s", f(&dest_string, elem)
112// is invoked (f is often constructed with a lambda of the form:
113// [](string* result, ElemType elem)
114template <typename T, typename Formatter>
115std::string Join(const T& s, const char* sep, Formatter f) {
116 return absl::StrJoin(s, sep, f);
117}
118
119struct AllowEmpty {
120 bool operator()(StringPiece sp) const { return true; }
121};
122struct SkipEmpty {
123 bool operator()(StringPiece sp) const { return !sp.empty(); }
124};
125struct SkipWhitespace {
126 bool operator()(StringPiece sp) const {
127 return !absl::StripTrailingAsciiWhitespace(sp).empty();
128 }
129};
130
131// Split strings using any of the supplied delimiters. For example:
132// Split("a,b.c,d", ".,") would return {"a", "b", "c", "d"}.
133inline std::vector<string> Split(StringPiece text, StringPiece delims) {
134 return text.empty() ? std::vector<string>()
135 : absl::StrSplit(text, absl::ByAnyChar(delims));
136}
137
138template <typename Predicate>
139std::vector<string> Split(StringPiece text, StringPiece delims, Predicate p) {
140 return text.empty() ? std::vector<string>()
141 : absl::StrSplit(text, absl::ByAnyChar(delims), p);
142}
143
144inline std::vector<string> Split(StringPiece text, char delim) {
145 return text.empty() ? std::vector<string>() : absl::StrSplit(text, delim);
146}
147
148template <typename Predicate>
149std::vector<string> Split(StringPiece text, char delim, Predicate p) {
150 return text.empty() ? std::vector<string>() : absl::StrSplit(text, delim, p);
151}
152
153// StartsWith()
154//
155// Returns whether a given string `text` begins with `prefix`.
156bool StartsWith(StringPiece text, StringPiece prefix);
157
158// EndsWith()
159//
160// Returns whether a given string `text` ends with `suffix`.
161bool EndsWith(StringPiece text, StringPiece suffix);
162
163// StrContains()
164//
165// Returns whether a given string `haystack` contains the substring `needle`.
166bool StrContains(StringPiece haystack, StringPiece needle);
167
168// Returns the length of the given null-terminated byte string 'str'.
169// Returns 'string_max_len' if the null character was not found in the first
170// 'string_max_len' bytes of 'str'.
171size_t Strnlen(const char* str, const size_t string_max_len);
172
173// ----- NON STANDARD, TF SPECIFIC METHOD -----
174// Converts "^2ILoveYou!" to "i_love_you_". More specifically:
175// - converts all non-alphanumeric characters to underscores
176// - replaces each occurrence of a capital letter (except the very
177// first character and if there is already an '_' before it) with '_'
178// followed by this letter in lower case
179// - Skips leading non-alpha characters
180// This method is useful for producing strings matching "[a-z][a-z0-9_]*"
181// as required by OpDef.ArgDef.name. The resulting string is either empty or
182// matches this regex.
183std::string ArgDefCase(StringPiece s);
184
185} // namespace str_util
186} // namespace tsl
187
188#endif // TENSORFLOW_TSL_PLATFORM_STR_UTIL_H_
189