1 | // This file is MACHINE GENERATED! Do not edit. |
2 | |
3 | #ifndef TENSORFLOW_CC_OPS_STRING_OPS_INTERNAL_H_ |
4 | #define TENSORFLOW_CC_OPS_STRING_OPS_INTERNAL_H_ |
5 | |
6 | // This file is MACHINE GENERATED! Do not edit. |
7 | |
8 | #include "tensorflow/cc/framework/ops.h" |
9 | #include "tensorflow/cc/framework/scope.h" |
10 | #include "tensorflow/core/framework/tensor.h" |
11 | #include "tensorflow/core/framework/tensor_shape.h" |
12 | #include "tensorflow/core/framework/types.h" |
13 | #include "tensorflow/core/lib/gtl/array_slice.h" |
14 | |
15 | namespace tensorflow { |
16 | namespace ops { |
17 | namespace internal { |
18 | // NOTE: This namespace has internal TensorFlow details that |
19 | // are not part of TensorFlow's public API. |
20 | |
21 | /// @defgroup string_ops_internal String Ops Internal |
22 | /// @{ |
23 | |
24 | /// Check if the input matches the regex pattern. |
25 | /// |
26 | /// The input is a string tensor of any shape. The pattern is the |
27 | /// regular expression to be matched with every element of the input tensor. |
28 | /// The boolean values (True or False) of the output tensor indicate |
29 | /// if the input matches the regex pattern provided. |
30 | /// |
31 | /// The pattern follows the re2 syntax (https://github.com/google/re2/wiki/Syntax) |
32 | /// |
33 | /// Args: |
34 | /// * scope: A Scope object |
35 | /// * input: A string tensor of the text to be processed. |
36 | /// * pattern: The regular expression to match the input. |
37 | /// |
38 | /// Returns: |
39 | /// * `Output`: A bool tensor with the same shape as `input`. |
40 | class StaticRegexFullMatch { |
41 | public: |
42 | StaticRegexFullMatch(const ::tensorflow::Scope& scope, ::tensorflow::Input |
43 | input, StringPiece pattern); |
44 | operator ::tensorflow::Output() const { return output; } |
45 | operator ::tensorflow::Input() const { return output; } |
46 | ::tensorflow::Node* node() const { return output.node(); } |
47 | |
48 | Operation operation; |
49 | ::tensorflow::Output output; |
50 | }; |
51 | |
52 | /// Replaces the match of pattern in input with rewrite. |
53 | /// |
54 | /// It follows the re2 syntax (https://github.com/google/re2/wiki/Syntax) |
55 | /// |
56 | /// Args: |
57 | /// * scope: A Scope object |
58 | /// * input: The text to be processed. |
59 | /// * pattern: The regular expression to match the input. |
60 | /// * rewrite: The rewrite to be applied to the matched expression. |
61 | /// |
62 | /// Optional attributes (see `Attrs`): |
63 | /// * replace_global: If True, the replacement is global, otherwise the replacement |
64 | /// is done only on the first match. |
65 | /// |
66 | /// Returns: |
67 | /// * `Output`: The text after applying pattern and rewrite. |
68 | class StaticRegexReplace { |
69 | public: |
70 | /// Optional attribute setters for StaticRegexReplace |
71 | struct Attrs { |
72 | /// If True, the replacement is global, otherwise the replacement |
73 | /// is done only on the first match. |
74 | /// |
75 | /// Defaults to true |
76 | TF_MUST_USE_RESULT Attrs ReplaceGlobal(bool x) { |
77 | Attrs ret = *this; |
78 | ret.replace_global_ = x; |
79 | return ret; |
80 | } |
81 | |
82 | bool replace_global_ = true; |
83 | }; |
84 | StaticRegexReplace(const ::tensorflow::Scope& scope, ::tensorflow::Input input, |
85 | StringPiece pattern, StringPiece rewrite); |
86 | StaticRegexReplace(const ::tensorflow::Scope& scope, ::tensorflow::Input input, |
87 | StringPiece pattern, StringPiece rewrite, const |
88 | StaticRegexReplace::Attrs& attrs); |
89 | operator ::tensorflow::Output() const { return output; } |
90 | operator ::tensorflow::Input() const { return output; } |
91 | ::tensorflow::Node* node() const { return output.node(); } |
92 | |
93 | static Attrs ReplaceGlobal(bool x) { |
94 | return Attrs().ReplaceGlobal(x); |
95 | } |
96 | |
97 | Operation operation; |
98 | ::tensorflow::Output output; |
99 | }; |
100 | |
101 | /// Decodes each string in `input` into a sequence of Unicode code points. |
102 | /// |
103 | /// The character codepoints for all strings are returned using a single vector |
104 | /// `char_values`, with strings expanded to characters in row-major order. |
105 | /// |
106 | /// The `row_splits` tensor indicates where the codepoints for |
107 | /// each input string begin and end within the `char_values` tensor. |
108 | /// In particular, the values for the `i`th |
109 | /// string (in row-major order) are stored in the slice |
110 | /// `[row_splits[i]:row_splits[i+1]]`. Thus: |
111 | /// |
112 | /// * `char_values[row_splits[i]+j]` is the Unicode codepoint for the `j`th |
113 | /// character in the `i`th string (in row-major order). |
114 | /// * `row_splits[i+1] - row_splits[i]` is the number of characters in the `i`th |
115 | /// string (in row-major order). |
116 | /// |
117 | /// Args: |
118 | /// * scope: A Scope object |
119 | /// * input: The text to be decoded. Can have any shape. Note that the output is flattened |
120 | /// to a vector of char values. |
121 | /// * input_encoding: Text encoding of the input strings. This is any of the encodings supported |
122 | /// by ICU ucnv algorithmic converters. Examples: `"UTF-16", "US ASCII", "UTF-8"`. |
123 | /// |
124 | /// Optional attributes (see `Attrs`): |
125 | /// * errors: Error handling policy when there is invalid formatting found in the input. |
126 | /// The value of 'strict' will cause the operation to produce a InvalidArgument |
127 | /// error on any invalid input formatting. A value of 'replace' (the default) will |
128 | /// cause the operation to replace any invalid formatting in the input with the |
129 | /// `replacement_char` codepoint. A value of 'ignore' will cause the operation to |
130 | /// skip any invalid formatting in the input and produce no corresponding output |
131 | /// character. |
132 | /// * replacement_char: The replacement character codepoint to be used in place of any invalid |
133 | /// formatting in the input when `errors='replace'`. Any valid unicode codepoint may |
134 | /// be used. The default value is the default unicode replacement character is |
135 | /// 0xFFFD or U+65533.) |
136 | /// * replace_control_characters: Whether to replace the C0 control characters (00-1F) with the |
137 | /// `replacement_char`. Default is false. |
138 | /// |
139 | /// Returns: |
140 | /// * `Output` row_splits: A 1D int32 tensor containing the row splits. |
141 | /// * `Output` char_values: A 1D int32 Tensor containing the decoded codepoints. |
142 | class UnicodeDecode { |
143 | public: |
144 | /// Optional attribute setters for UnicodeDecode |
145 | struct Attrs { |
146 | /// Error handling policy when there is invalid formatting found in the input. |
147 | /// The value of 'strict' will cause the operation to produce a InvalidArgument |
148 | /// error on any invalid input formatting. A value of 'replace' (the default) will |
149 | /// cause the operation to replace any invalid formatting in the input with the |
150 | /// `replacement_char` codepoint. A value of 'ignore' will cause the operation to |
151 | /// skip any invalid formatting in the input and produce no corresponding output |
152 | /// character. |
153 | /// |
154 | /// Defaults to "replace" |
155 | TF_MUST_USE_RESULT Attrs Errors(StringPiece x) { |
156 | Attrs ret = *this; |
157 | ret.errors_ = x; |
158 | return ret; |
159 | } |
160 | |
161 | /// The replacement character codepoint to be used in place of any invalid |
162 | /// formatting in the input when `errors='replace'`. Any valid unicode codepoint may |
163 | /// be used. The default value is the default unicode replacement character is |
164 | /// 0xFFFD or U+65533.) |
165 | /// |
166 | /// Defaults to 65533 |
167 | TF_MUST_USE_RESULT Attrs ReplacementChar(int64 x) { |
168 | Attrs ret = *this; |
169 | ret.replacement_char_ = x; |
170 | return ret; |
171 | } |
172 | |
173 | /// Whether to replace the C0 control characters (00-1F) with the |
174 | /// `replacement_char`. Default is false. |
175 | /// |
176 | /// Defaults to false |
177 | TF_MUST_USE_RESULT Attrs ReplaceControlCharacters(bool x) { |
178 | Attrs ret = *this; |
179 | ret.replace_control_characters_ = x; |
180 | return ret; |
181 | } |
182 | |
183 | /// Defaults to DT_INT64 |
184 | TF_MUST_USE_RESULT Attrs Tsplits(DataType x) { |
185 | Attrs ret = *this; |
186 | ret.Tsplits_ = x; |
187 | return ret; |
188 | } |
189 | |
190 | StringPiece errors_ = "replace" ; |
191 | int64 replacement_char_ = 65533; |
192 | bool replace_control_characters_ = false; |
193 | DataType Tsplits_ = DT_INT64; |
194 | }; |
195 | UnicodeDecode(const ::tensorflow::Scope& scope, ::tensorflow::Input input, |
196 | StringPiece input_encoding); |
197 | UnicodeDecode(const ::tensorflow::Scope& scope, ::tensorflow::Input input, |
198 | StringPiece input_encoding, const UnicodeDecode::Attrs& attrs); |
199 | |
200 | static Attrs Errors(StringPiece x) { |
201 | return Attrs().Errors(x); |
202 | } |
203 | static Attrs ReplacementChar(int64 x) { |
204 | return Attrs().ReplacementChar(x); |
205 | } |
206 | static Attrs ReplaceControlCharacters(bool x) { |
207 | return Attrs().ReplaceControlCharacters(x); |
208 | } |
209 | static Attrs Tsplits(DataType x) { |
210 | return Attrs().Tsplits(x); |
211 | } |
212 | |
213 | Operation operation; |
214 | ::tensorflow::Output row_splits; |
215 | ::tensorflow::Output char_values; |
216 | }; |
217 | |
218 | /// Decodes each string in `input` into a sequence of Unicode code points. |
219 | /// |
220 | /// The character codepoints for all strings are returned using a single vector |
221 | /// `char_values`, with strings expanded to characters in row-major order. |
222 | /// Similarly, the character start byte offsets are returned using a single vector |
223 | /// `char_to_byte_starts`, with strings expanded in row-major order. |
224 | /// |
225 | /// The `row_splits` tensor indicates where the codepoints and start offsets for |
226 | /// each input string begin and end within the `char_values` and |
227 | /// `char_to_byte_starts` tensors. In particular, the values for the `i`th |
228 | /// string (in row-major order) are stored in the slice |
229 | /// `[row_splits[i]:row_splits[i+1]]`. Thus: |
230 | /// |
231 | /// * `char_values[row_splits[i]+j]` is the Unicode codepoint for the `j`th |
232 | /// character in the `i`th string (in row-major order). |
233 | /// * `char_to_bytes_starts[row_splits[i]+j]` is the start byte offset for the `j`th |
234 | /// character in the `i`th string (in row-major order). |
235 | /// * `row_splits[i+1] - row_splits[i]` is the number of characters in the `i`th |
236 | /// string (in row-major order). |
237 | /// |
238 | /// Args: |
239 | /// * scope: A Scope object |
240 | /// * input: The text to be decoded. Can have any shape. Note that the output is flattened |
241 | /// to a vector of char values. |
242 | /// * input_encoding: Text encoding of the input strings. This is any of the encodings supported |
243 | /// by ICU ucnv algorithmic converters. Examples: `"UTF-16", "US ASCII", "UTF-8"`. |
244 | /// |
245 | /// Optional attributes (see `Attrs`): |
246 | /// * errors: Error handling policy when there is invalid formatting found in the input. |
247 | /// The value of 'strict' will cause the operation to produce a InvalidArgument |
248 | /// error on any invalid input formatting. A value of 'replace' (the default) will |
249 | /// cause the operation to replace any invalid formatting in the input with the |
250 | /// `replacement_char` codepoint. A value of 'ignore' will cause the operation to |
251 | /// skip any invalid formatting in the input and produce no corresponding output |
252 | /// character. |
253 | /// * replacement_char: The replacement character codepoint to be used in place of any invalid |
254 | /// formatting in the input when `errors='replace'`. Any valid unicode codepoint may |
255 | /// be used. The default value is the default unicode replacement character is |
256 | /// 0xFFFD or U+65533.) |
257 | /// * replace_control_characters: Whether to replace the C0 control characters (00-1F) with the |
258 | /// `replacement_char`. Default is false. |
259 | /// |
260 | /// Returns: |
261 | /// * `Output` row_splits: A 1D int32 tensor containing the row splits. |
262 | /// * `Output` char_values: A 1D int32 Tensor containing the decoded codepoints. |
263 | /// * `Output` char_to_byte_starts: A 1D int32 Tensor containing the byte index in the input string where each |
264 | /// character in `char_values` starts. |
265 | class UnicodeDecodeWithOffsets { |
266 | public: |
267 | /// Optional attribute setters for UnicodeDecodeWithOffsets |
268 | struct Attrs { |
269 | /// Error handling policy when there is invalid formatting found in the input. |
270 | /// The value of 'strict' will cause the operation to produce a InvalidArgument |
271 | /// error on any invalid input formatting. A value of 'replace' (the default) will |
272 | /// cause the operation to replace any invalid formatting in the input with the |
273 | /// `replacement_char` codepoint. A value of 'ignore' will cause the operation to |
274 | /// skip any invalid formatting in the input and produce no corresponding output |
275 | /// character. |
276 | /// |
277 | /// Defaults to "replace" |
278 | TF_MUST_USE_RESULT Attrs Errors(StringPiece x) { |
279 | Attrs ret = *this; |
280 | ret.errors_ = x; |
281 | return ret; |
282 | } |
283 | |
284 | /// The replacement character codepoint to be used in place of any invalid |
285 | /// formatting in the input when `errors='replace'`. Any valid unicode codepoint may |
286 | /// be used. The default value is the default unicode replacement character is |
287 | /// 0xFFFD or U+65533.) |
288 | /// |
289 | /// Defaults to 65533 |
290 | TF_MUST_USE_RESULT Attrs ReplacementChar(int64 x) { |
291 | Attrs ret = *this; |
292 | ret.replacement_char_ = x; |
293 | return ret; |
294 | } |
295 | |
296 | /// Whether to replace the C0 control characters (00-1F) with the |
297 | /// `replacement_char`. Default is false. |
298 | /// |
299 | /// Defaults to false |
300 | TF_MUST_USE_RESULT Attrs ReplaceControlCharacters(bool x) { |
301 | Attrs ret = *this; |
302 | ret.replace_control_characters_ = x; |
303 | return ret; |
304 | } |
305 | |
306 | /// Defaults to DT_INT64 |
307 | TF_MUST_USE_RESULT Attrs Tsplits(DataType x) { |
308 | Attrs ret = *this; |
309 | ret.Tsplits_ = x; |
310 | return ret; |
311 | } |
312 | |
313 | StringPiece errors_ = "replace" ; |
314 | int64 replacement_char_ = 65533; |
315 | bool replace_control_characters_ = false; |
316 | DataType Tsplits_ = DT_INT64; |
317 | }; |
318 | UnicodeDecodeWithOffsets(const ::tensorflow::Scope& scope, ::tensorflow::Input |
319 | input, StringPiece input_encoding); |
320 | UnicodeDecodeWithOffsets(const ::tensorflow::Scope& scope, ::tensorflow::Input |
321 | input, StringPiece input_encoding, const |
322 | UnicodeDecodeWithOffsets::Attrs& attrs); |
323 | |
324 | static Attrs Errors(StringPiece x) { |
325 | return Attrs().Errors(x); |
326 | } |
327 | static Attrs ReplacementChar(int64 x) { |
328 | return Attrs().ReplacementChar(x); |
329 | } |
330 | static Attrs ReplaceControlCharacters(bool x) { |
331 | return Attrs().ReplaceControlCharacters(x); |
332 | } |
333 | static Attrs Tsplits(DataType x) { |
334 | return Attrs().Tsplits(x); |
335 | } |
336 | |
337 | Operation operation; |
338 | ::tensorflow::Output row_splits; |
339 | ::tensorflow::Output char_values; |
340 | ::tensorflow::Output char_to_byte_starts; |
341 | }; |
342 | |
343 | /// Encode a tensor of ints into unicode strings. |
344 | /// |
345 | /// Returns a vector of strings, where `output[i]` is constructed by encoding the |
346 | /// Unicode codepoints in `input_values[input_splits[i]:input_splits[i+1]]` |
347 | /// using `output_encoding`. |
348 | /// |
349 | /// --- |
350 | /// |
351 | /// Example: |
352 | /// |
353 | /// ``` |
354 | /// input_values = [72, 101, 108, 108, 111, 87, 111, 114, 108, 100] |
355 | /// input_splits = [0, 5, 10] |
356 | /// output_encoding = 'UTF-8' |
357 | /// |
358 | /// output = ['Hello', 'World'] |
359 | /// ``` |
360 | /// |
361 | /// Args: |
362 | /// * scope: A Scope object |
363 | /// * input_values: A 1D tensor containing the unicode codepoints that should be encoded. |
364 | /// * input_splits: A 1D tensor specifying how the unicode codepoints should be split into strings. |
365 | /// In particular, `output[i]` is constructed by encoding the codepoints in the |
366 | /// slice `input_values[input_splits[i]:input_splits[i+1]]`. |
367 | /// * output_encoding: Unicode encoding of the output strings. Valid encodings are: `"UTF-8", |
368 | /// "UTF-16-BE", and "UTF-32-BE"`. |
369 | /// |
370 | /// Optional attributes (see `Attrs`): |
371 | /// * errors: Error handling policy when there is invalid formatting found in the input. |
372 | /// The value of 'strict' will cause the operation to produce a InvalidArgument |
373 | /// error on any invalid input formatting. A value of 'replace' (the default) will |
374 | /// cause the operation to replace any invalid formatting in the input with the |
375 | /// `replacement_char` codepoint. A value of 'ignore' will cause the operation to |
376 | /// skip any invalid formatting in the input and produce no corresponding output |
377 | /// character. |
378 | /// * replacement_char: The replacement character codepoint to be used in place of any invalid |
379 | /// formatting in the input when `errors='replace'`. Any valid unicode codepoint may |
380 | /// be used. The default value is the default unicode replacement character is |
381 | /// 0xFFFD (U+65533). |
382 | /// |
383 | /// Returns: |
384 | /// * `Output`: The 1-D Tensor of strings encoded from the provided unicode codepoints. |
385 | class UnicodeEncode { |
386 | public: |
387 | /// Optional attribute setters for UnicodeEncode |
388 | struct Attrs { |
389 | /// Error handling policy when there is invalid formatting found in the input. |
390 | /// The value of 'strict' will cause the operation to produce a InvalidArgument |
391 | /// error on any invalid input formatting. A value of 'replace' (the default) will |
392 | /// cause the operation to replace any invalid formatting in the input with the |
393 | /// `replacement_char` codepoint. A value of 'ignore' will cause the operation to |
394 | /// skip any invalid formatting in the input and produce no corresponding output |
395 | /// character. |
396 | /// |
397 | /// Defaults to "replace" |
398 | TF_MUST_USE_RESULT Attrs Errors(StringPiece x) { |
399 | Attrs ret = *this; |
400 | ret.errors_ = x; |
401 | return ret; |
402 | } |
403 | |
404 | /// The replacement character codepoint to be used in place of any invalid |
405 | /// formatting in the input when `errors='replace'`. Any valid unicode codepoint may |
406 | /// be used. The default value is the default unicode replacement character is |
407 | /// 0xFFFD (U+65533). |
408 | /// |
409 | /// Defaults to 65533 |
410 | TF_MUST_USE_RESULT Attrs ReplacementChar(int64 x) { |
411 | Attrs ret = *this; |
412 | ret.replacement_char_ = x; |
413 | return ret; |
414 | } |
415 | |
416 | StringPiece errors_ = "replace" ; |
417 | int64 replacement_char_ = 65533; |
418 | }; |
419 | UnicodeEncode(const ::tensorflow::Scope& scope, ::tensorflow::Input |
420 | input_values, ::tensorflow::Input input_splits, StringPiece |
421 | output_encoding); |
422 | UnicodeEncode(const ::tensorflow::Scope& scope, ::tensorflow::Input |
423 | input_values, ::tensorflow::Input input_splits, StringPiece |
424 | output_encoding, const UnicodeEncode::Attrs& attrs); |
425 | operator ::tensorflow::Output() const { return output; } |
426 | operator ::tensorflow::Input() const { return output; } |
427 | ::tensorflow::Node* node() const { return output.node(); } |
428 | |
429 | static Attrs Errors(StringPiece x) { |
430 | return Attrs().Errors(x); |
431 | } |
432 | static Attrs ReplacementChar(int64 x) { |
433 | return Attrs().ReplacementChar(x); |
434 | } |
435 | |
436 | Operation operation; |
437 | ::tensorflow::Output output; |
438 | }; |
439 | |
440 | /// TODO: add doc. |
441 | /// |
442 | /// Args: |
443 | /// * scope: A Scope object |
444 | /// |
445 | /// Returns: |
446 | /// * `Output`: The output tensor. |
447 | class UnsortedSegmentJoin { |
448 | public: |
449 | /// Optional attribute setters for UnsortedSegmentJoin |
450 | struct Attrs { |
451 | /// Defaults to "" |
452 | TF_MUST_USE_RESULT Attrs Separator(StringPiece x) { |
453 | Attrs ret = *this; |
454 | ret.separator_ = x; |
455 | return ret; |
456 | } |
457 | |
458 | StringPiece separator_ = "" ; |
459 | }; |
460 | UnsortedSegmentJoin(const ::tensorflow::Scope& scope, ::tensorflow::Input |
461 | inputs, ::tensorflow::Input segment_ids, |
462 | ::tensorflow::Input num_segments); |
463 | UnsortedSegmentJoin(const ::tensorflow::Scope& scope, ::tensorflow::Input |
464 | inputs, ::tensorflow::Input segment_ids, |
465 | ::tensorflow::Input num_segments, const |
466 | UnsortedSegmentJoin::Attrs& attrs); |
467 | operator ::tensorflow::Output() const { return output; } |
468 | operator ::tensorflow::Input() const { return output; } |
469 | ::tensorflow::Node* node() const { return output.node(); } |
470 | |
471 | static Attrs Separator(StringPiece x) { |
472 | return Attrs().Separator(x); |
473 | } |
474 | |
475 | Operation operation; |
476 | ::tensorflow::Output output; |
477 | }; |
478 | |
479 | } // namespace internal |
480 | } // namespace ops |
481 | } // namespace tensorflow |
482 | |
483 | #endif // TENSORFLOW_CC_OPS_STRING_OPS_INTERNAL_H_ |
484 | |