1// This file is MACHINE GENERATED! Do not edit.
2
3#ifndef TENSORFLOW_CC_OPS_STRING_OPS_INTERNAL_H_
4#define TENSORFLOW_CC_OPS_STRING_OPS_INTERNAL_H_
5
6// This file is MACHINE GENERATED! Do not edit.
7
8#include "tensorflow/cc/framework/ops.h"
9#include "tensorflow/cc/framework/scope.h"
10#include "tensorflow/core/framework/tensor.h"
11#include "tensorflow/core/framework/tensor_shape.h"
12#include "tensorflow/core/framework/types.h"
13#include "tensorflow/core/lib/gtl/array_slice.h"
14
15namespace tensorflow {
16namespace ops {
17namespace internal {
18// NOTE: This namespace has internal TensorFlow details that
19// are not part of TensorFlow's public API.
20
21/// @defgroup string_ops_internal String Ops Internal
22/// @{
23
24/// Check if the input matches the regex pattern.
25///
26/// The input is a string tensor of any shape. The pattern is the
27/// regular expression to be matched with every element of the input tensor.
28/// The boolean values (True or False) of the output tensor indicate
29/// if the input matches the regex pattern provided.
30///
31/// The pattern follows the re2 syntax (https://github.com/google/re2/wiki/Syntax)
32///
33/// Args:
34/// * scope: A Scope object
35/// * input: A string tensor of the text to be processed.
36/// * pattern: The regular expression to match the input.
37///
38/// Returns:
39/// * `Output`: A bool tensor with the same shape as `input`.
40class StaticRegexFullMatch {
41 public:
42 StaticRegexFullMatch(const ::tensorflow::Scope& scope, ::tensorflow::Input
43 input, StringPiece pattern);
44 operator ::tensorflow::Output() const { return output; }
45 operator ::tensorflow::Input() const { return output; }
46 ::tensorflow::Node* node() const { return output.node(); }
47
48 Operation operation;
49 ::tensorflow::Output output;
50};
51
52/// Replaces the match of pattern in input with rewrite.
53///
54/// It follows the re2 syntax (https://github.com/google/re2/wiki/Syntax)
55///
56/// Args:
57/// * scope: A Scope object
58/// * input: The text to be processed.
59/// * pattern: The regular expression to match the input.
60/// * rewrite: The rewrite to be applied to the matched expression.
61///
62/// Optional attributes (see `Attrs`):
63/// * replace_global: If True, the replacement is global, otherwise the replacement
64/// is done only on the first match.
65///
66/// Returns:
67/// * `Output`: The text after applying pattern and rewrite.
68class StaticRegexReplace {
69 public:
70 /// Optional attribute setters for StaticRegexReplace
71 struct Attrs {
72 /// If True, the replacement is global, otherwise the replacement
73 /// is done only on the first match.
74 ///
75 /// Defaults to true
76 TF_MUST_USE_RESULT Attrs ReplaceGlobal(bool x) {
77 Attrs ret = *this;
78 ret.replace_global_ = x;
79 return ret;
80 }
81
82 bool replace_global_ = true;
83 };
84 StaticRegexReplace(const ::tensorflow::Scope& scope, ::tensorflow::Input input,
85 StringPiece pattern, StringPiece rewrite);
86 StaticRegexReplace(const ::tensorflow::Scope& scope, ::tensorflow::Input input,
87 StringPiece pattern, StringPiece rewrite, const
88 StaticRegexReplace::Attrs& attrs);
89 operator ::tensorflow::Output() const { return output; }
90 operator ::tensorflow::Input() const { return output; }
91 ::tensorflow::Node* node() const { return output.node(); }
92
93 static Attrs ReplaceGlobal(bool x) {
94 return Attrs().ReplaceGlobal(x);
95 }
96
97 Operation operation;
98 ::tensorflow::Output output;
99};
100
101/// Decodes each string in `input` into a sequence of Unicode code points.
102///
103/// The character codepoints for all strings are returned using a single vector
104/// `char_values`, with strings expanded to characters in row-major order.
105///
106/// The `row_splits` tensor indicates where the codepoints for
107/// each input string begin and end within the `char_values` tensor.
108/// In particular, the values for the `i`th
109/// string (in row-major order) are stored in the slice
110/// `[row_splits[i]:row_splits[i+1]]`. Thus:
111///
112/// * `char_values[row_splits[i]+j]` is the Unicode codepoint for the `j`th
113/// character in the `i`th string (in row-major order).
114/// * `row_splits[i+1] - row_splits[i]` is the number of characters in the `i`th
115/// string (in row-major order).
116///
117/// Args:
118/// * scope: A Scope object
119/// * input: The text to be decoded. Can have any shape. Note that the output is flattened
120/// to a vector of char values.
121/// * input_encoding: Text encoding of the input strings. This is any of the encodings supported
122/// by ICU ucnv algorithmic converters. Examples: `"UTF-16", "US ASCII", "UTF-8"`.
123///
124/// Optional attributes (see `Attrs`):
125/// * errors: Error handling policy when there is invalid formatting found in the input.
126/// The value of 'strict' will cause the operation to produce a InvalidArgument
127/// error on any invalid input formatting. A value of 'replace' (the default) will
128/// cause the operation to replace any invalid formatting in the input with the
129/// `replacement_char` codepoint. A value of 'ignore' will cause the operation to
130/// skip any invalid formatting in the input and produce no corresponding output
131/// character.
132/// * replacement_char: The replacement character codepoint to be used in place of any invalid
133/// formatting in the input when `errors='replace'`. Any valid unicode codepoint may
134/// be used. The default value is the default unicode replacement character is
135/// 0xFFFD or U+65533.)
136/// * replace_control_characters: Whether to replace the C0 control characters (00-1F) with the
137/// `replacement_char`. Default is false.
138///
139/// Returns:
140/// * `Output` row_splits: A 1D int32 tensor containing the row splits.
141/// * `Output` char_values: A 1D int32 Tensor containing the decoded codepoints.
142class UnicodeDecode {
143 public:
144 /// Optional attribute setters for UnicodeDecode
145 struct Attrs {
146 /// Error handling policy when there is invalid formatting found in the input.
147 /// The value of 'strict' will cause the operation to produce a InvalidArgument
148 /// error on any invalid input formatting. A value of 'replace' (the default) will
149 /// cause the operation to replace any invalid formatting in the input with the
150 /// `replacement_char` codepoint. A value of 'ignore' will cause the operation to
151 /// skip any invalid formatting in the input and produce no corresponding output
152 /// character.
153 ///
154 /// Defaults to "replace"
155 TF_MUST_USE_RESULT Attrs Errors(StringPiece x) {
156 Attrs ret = *this;
157 ret.errors_ = x;
158 return ret;
159 }
160
161 /// The replacement character codepoint to be used in place of any invalid
162 /// formatting in the input when `errors='replace'`. Any valid unicode codepoint may
163 /// be used. The default value is the default unicode replacement character is
164 /// 0xFFFD or U+65533.)
165 ///
166 /// Defaults to 65533
167 TF_MUST_USE_RESULT Attrs ReplacementChar(int64 x) {
168 Attrs ret = *this;
169 ret.replacement_char_ = x;
170 return ret;
171 }
172
173 /// Whether to replace the C0 control characters (00-1F) with the
174 /// `replacement_char`. Default is false.
175 ///
176 /// Defaults to false
177 TF_MUST_USE_RESULT Attrs ReplaceControlCharacters(bool x) {
178 Attrs ret = *this;
179 ret.replace_control_characters_ = x;
180 return ret;
181 }
182
183 /// Defaults to DT_INT64
184 TF_MUST_USE_RESULT Attrs Tsplits(DataType x) {
185 Attrs ret = *this;
186 ret.Tsplits_ = x;
187 return ret;
188 }
189
190 StringPiece errors_ = "replace";
191 int64 replacement_char_ = 65533;
192 bool replace_control_characters_ = false;
193 DataType Tsplits_ = DT_INT64;
194 };
195 UnicodeDecode(const ::tensorflow::Scope& scope, ::tensorflow::Input input,
196 StringPiece input_encoding);
197 UnicodeDecode(const ::tensorflow::Scope& scope, ::tensorflow::Input input,
198 StringPiece input_encoding, const UnicodeDecode::Attrs& attrs);
199
200 static Attrs Errors(StringPiece x) {
201 return Attrs().Errors(x);
202 }
203 static Attrs ReplacementChar(int64 x) {
204 return Attrs().ReplacementChar(x);
205 }
206 static Attrs ReplaceControlCharacters(bool x) {
207 return Attrs().ReplaceControlCharacters(x);
208 }
209 static Attrs Tsplits(DataType x) {
210 return Attrs().Tsplits(x);
211 }
212
213 Operation operation;
214 ::tensorflow::Output row_splits;
215 ::tensorflow::Output char_values;
216};
217
218/// Decodes each string in `input` into a sequence of Unicode code points.
219///
220/// The character codepoints for all strings are returned using a single vector
221/// `char_values`, with strings expanded to characters in row-major order.
222/// Similarly, the character start byte offsets are returned using a single vector
223/// `char_to_byte_starts`, with strings expanded in row-major order.
224///
225/// The `row_splits` tensor indicates where the codepoints and start offsets for
226/// each input string begin and end within the `char_values` and
227/// `char_to_byte_starts` tensors. In particular, the values for the `i`th
228/// string (in row-major order) are stored in the slice
229/// `[row_splits[i]:row_splits[i+1]]`. Thus:
230///
231/// * `char_values[row_splits[i]+j]` is the Unicode codepoint for the `j`th
232/// character in the `i`th string (in row-major order).
233/// * `char_to_bytes_starts[row_splits[i]+j]` is the start byte offset for the `j`th
234/// character in the `i`th string (in row-major order).
235/// * `row_splits[i+1] - row_splits[i]` is the number of characters in the `i`th
236/// string (in row-major order).
237///
238/// Args:
239/// * scope: A Scope object
240/// * input: The text to be decoded. Can have any shape. Note that the output is flattened
241/// to a vector of char values.
242/// * input_encoding: Text encoding of the input strings. This is any of the encodings supported
243/// by ICU ucnv algorithmic converters. Examples: `"UTF-16", "US ASCII", "UTF-8"`.
244///
245/// Optional attributes (see `Attrs`):
246/// * errors: Error handling policy when there is invalid formatting found in the input.
247/// The value of 'strict' will cause the operation to produce a InvalidArgument
248/// error on any invalid input formatting. A value of 'replace' (the default) will
249/// cause the operation to replace any invalid formatting in the input with the
250/// `replacement_char` codepoint. A value of 'ignore' will cause the operation to
251/// skip any invalid formatting in the input and produce no corresponding output
252/// character.
253/// * replacement_char: The replacement character codepoint to be used in place of any invalid
254/// formatting in the input when `errors='replace'`. Any valid unicode codepoint may
255/// be used. The default value is the default unicode replacement character is
256/// 0xFFFD or U+65533.)
257/// * replace_control_characters: Whether to replace the C0 control characters (00-1F) with the
258/// `replacement_char`. Default is false.
259///
260/// Returns:
261/// * `Output` row_splits: A 1D int32 tensor containing the row splits.
262/// * `Output` char_values: A 1D int32 Tensor containing the decoded codepoints.
263/// * `Output` char_to_byte_starts: A 1D int32 Tensor containing the byte index in the input string where each
264/// character in `char_values` starts.
265class UnicodeDecodeWithOffsets {
266 public:
267 /// Optional attribute setters for UnicodeDecodeWithOffsets
268 struct Attrs {
269 /// Error handling policy when there is invalid formatting found in the input.
270 /// The value of 'strict' will cause the operation to produce a InvalidArgument
271 /// error on any invalid input formatting. A value of 'replace' (the default) will
272 /// cause the operation to replace any invalid formatting in the input with the
273 /// `replacement_char` codepoint. A value of 'ignore' will cause the operation to
274 /// skip any invalid formatting in the input and produce no corresponding output
275 /// character.
276 ///
277 /// Defaults to "replace"
278 TF_MUST_USE_RESULT Attrs Errors(StringPiece x) {
279 Attrs ret = *this;
280 ret.errors_ = x;
281 return ret;
282 }
283
284 /// The replacement character codepoint to be used in place of any invalid
285 /// formatting in the input when `errors='replace'`. Any valid unicode codepoint may
286 /// be used. The default value is the default unicode replacement character is
287 /// 0xFFFD or U+65533.)
288 ///
289 /// Defaults to 65533
290 TF_MUST_USE_RESULT Attrs ReplacementChar(int64 x) {
291 Attrs ret = *this;
292 ret.replacement_char_ = x;
293 return ret;
294 }
295
296 /// Whether to replace the C0 control characters (00-1F) with the
297 /// `replacement_char`. Default is false.
298 ///
299 /// Defaults to false
300 TF_MUST_USE_RESULT Attrs ReplaceControlCharacters(bool x) {
301 Attrs ret = *this;
302 ret.replace_control_characters_ = x;
303 return ret;
304 }
305
306 /// Defaults to DT_INT64
307 TF_MUST_USE_RESULT Attrs Tsplits(DataType x) {
308 Attrs ret = *this;
309 ret.Tsplits_ = x;
310 return ret;
311 }
312
313 StringPiece errors_ = "replace";
314 int64 replacement_char_ = 65533;
315 bool replace_control_characters_ = false;
316 DataType Tsplits_ = DT_INT64;
317 };
318 UnicodeDecodeWithOffsets(const ::tensorflow::Scope& scope, ::tensorflow::Input
319 input, StringPiece input_encoding);
320 UnicodeDecodeWithOffsets(const ::tensorflow::Scope& scope, ::tensorflow::Input
321 input, StringPiece input_encoding, const
322 UnicodeDecodeWithOffsets::Attrs& attrs);
323
324 static Attrs Errors(StringPiece x) {
325 return Attrs().Errors(x);
326 }
327 static Attrs ReplacementChar(int64 x) {
328 return Attrs().ReplacementChar(x);
329 }
330 static Attrs ReplaceControlCharacters(bool x) {
331 return Attrs().ReplaceControlCharacters(x);
332 }
333 static Attrs Tsplits(DataType x) {
334 return Attrs().Tsplits(x);
335 }
336
337 Operation operation;
338 ::tensorflow::Output row_splits;
339 ::tensorflow::Output char_values;
340 ::tensorflow::Output char_to_byte_starts;
341};
342
343/// Encode a tensor of ints into unicode strings.
344///
345/// Returns a vector of strings, where `output[i]` is constructed by encoding the
346/// Unicode codepoints in `input_values[input_splits[i]:input_splits[i+1]]`
347/// using `output_encoding`.
348///
349/// ---
350///
351/// Example:
352///
353/// ```
354/// input_values = [72, 101, 108, 108, 111, 87, 111, 114, 108, 100]
355/// input_splits = [0, 5, 10]
356/// output_encoding = 'UTF-8'
357///
358/// output = ['Hello', 'World']
359/// ```
360///
361/// Args:
362/// * scope: A Scope object
363/// * input_values: A 1D tensor containing the unicode codepoints that should be encoded.
364/// * input_splits: A 1D tensor specifying how the unicode codepoints should be split into strings.
365/// In particular, `output[i]` is constructed by encoding the codepoints in the
366/// slice `input_values[input_splits[i]:input_splits[i+1]]`.
367/// * output_encoding: Unicode encoding of the output strings. Valid encodings are: `"UTF-8",
368/// "UTF-16-BE", and "UTF-32-BE"`.
369///
370/// Optional attributes (see `Attrs`):
371/// * errors: Error handling policy when there is invalid formatting found in the input.
372/// The value of 'strict' will cause the operation to produce a InvalidArgument
373/// error on any invalid input formatting. A value of 'replace' (the default) will
374/// cause the operation to replace any invalid formatting in the input with the
375/// `replacement_char` codepoint. A value of 'ignore' will cause the operation to
376/// skip any invalid formatting in the input and produce no corresponding output
377/// character.
378/// * replacement_char: The replacement character codepoint to be used in place of any invalid
379/// formatting in the input when `errors='replace'`. Any valid unicode codepoint may
380/// be used. The default value is the default unicode replacement character is
381/// 0xFFFD (U+65533).
382///
383/// Returns:
384/// * `Output`: The 1-D Tensor of strings encoded from the provided unicode codepoints.
385class UnicodeEncode {
386 public:
387 /// Optional attribute setters for UnicodeEncode
388 struct Attrs {
389 /// Error handling policy when there is invalid formatting found in the input.
390 /// The value of 'strict' will cause the operation to produce a InvalidArgument
391 /// error on any invalid input formatting. A value of 'replace' (the default) will
392 /// cause the operation to replace any invalid formatting in the input with the
393 /// `replacement_char` codepoint. A value of 'ignore' will cause the operation to
394 /// skip any invalid formatting in the input and produce no corresponding output
395 /// character.
396 ///
397 /// Defaults to "replace"
398 TF_MUST_USE_RESULT Attrs Errors(StringPiece x) {
399 Attrs ret = *this;
400 ret.errors_ = x;
401 return ret;
402 }
403
404 /// The replacement character codepoint to be used in place of any invalid
405 /// formatting in the input when `errors='replace'`. Any valid unicode codepoint may
406 /// be used. The default value is the default unicode replacement character is
407 /// 0xFFFD (U+65533).
408 ///
409 /// Defaults to 65533
410 TF_MUST_USE_RESULT Attrs ReplacementChar(int64 x) {
411 Attrs ret = *this;
412 ret.replacement_char_ = x;
413 return ret;
414 }
415
416 StringPiece errors_ = "replace";
417 int64 replacement_char_ = 65533;
418 };
419 UnicodeEncode(const ::tensorflow::Scope& scope, ::tensorflow::Input
420 input_values, ::tensorflow::Input input_splits, StringPiece
421 output_encoding);
422 UnicodeEncode(const ::tensorflow::Scope& scope, ::tensorflow::Input
423 input_values, ::tensorflow::Input input_splits, StringPiece
424 output_encoding, const UnicodeEncode::Attrs& attrs);
425 operator ::tensorflow::Output() const { return output; }
426 operator ::tensorflow::Input() const { return output; }
427 ::tensorflow::Node* node() const { return output.node(); }
428
429 static Attrs Errors(StringPiece x) {
430 return Attrs().Errors(x);
431 }
432 static Attrs ReplacementChar(int64 x) {
433 return Attrs().ReplacementChar(x);
434 }
435
436 Operation operation;
437 ::tensorflow::Output output;
438};
439
440/// TODO: add doc.
441///
442/// Args:
443/// * scope: A Scope object
444///
445/// Returns:
446/// * `Output`: The output tensor.
447class UnsortedSegmentJoin {
448 public:
449 /// Optional attribute setters for UnsortedSegmentJoin
450 struct Attrs {
451 /// Defaults to ""
452 TF_MUST_USE_RESULT Attrs Separator(StringPiece x) {
453 Attrs ret = *this;
454 ret.separator_ = x;
455 return ret;
456 }
457
458 StringPiece separator_ = "";
459 };
460 UnsortedSegmentJoin(const ::tensorflow::Scope& scope, ::tensorflow::Input
461 inputs, ::tensorflow::Input segment_ids,
462 ::tensorflow::Input num_segments);
463 UnsortedSegmentJoin(const ::tensorflow::Scope& scope, ::tensorflow::Input
464 inputs, ::tensorflow::Input segment_ids,
465 ::tensorflow::Input num_segments, const
466 UnsortedSegmentJoin::Attrs& attrs);
467 operator ::tensorflow::Output() const { return output; }
468 operator ::tensorflow::Input() const { return output; }
469 ::tensorflow::Node* node() const { return output.node(); }
470
471 static Attrs Separator(StringPiece x) {
472 return Attrs().Separator(x);
473 }
474
475 Operation operation;
476 ::tensorflow::Output output;
477};
478
479} // namespace internal
480} // namespace ops
481} // namespace tensorflow
482
483#endif // TENSORFLOW_CC_OPS_STRING_OPS_INTERNAL_H_
484