1 | // This file is MACHINE GENERATED! Do not edit. |
2 | |
3 | #ifndef TENSORFLOW_CC_OPS_STRING_OPS_H_ |
4 | #define TENSORFLOW_CC_OPS_STRING_OPS_H_ |
5 | |
6 | // This file is MACHINE GENERATED! Do not edit. |
7 | |
8 | #include "tensorflow/cc/framework/ops.h" |
9 | #include "tensorflow/cc/framework/scope.h" |
10 | #include "tensorflow/core/framework/tensor.h" |
11 | #include "tensorflow/core/framework/tensor_shape.h" |
12 | #include "tensorflow/core/framework/types.h" |
13 | #include "tensorflow/core/lib/gtl/array_slice.h" |
14 | |
15 | namespace tensorflow { |
16 | namespace ops { |
17 | |
18 | /// @defgroup string_ops String Ops |
19 | /// @{ |
20 | |
21 | /// Converts each entry in the given tensor to strings. |
22 | /// |
23 | /// Supports many numeric types and boolean. |
24 | /// |
25 | /// For Unicode, see the |
26 | /// [https://www.tensorflow.org/tutorials/representation/unicode](Working with Unicode text) |
27 | /// tutorial. |
28 | /// |
29 | /// Examples: |
30 | /// |
31 | /// >>> tf.strings.as_string([3, 2]) |
32 | /// <tf.Tensor: shape=(2,), dtype=string, numpy=array([b'3', b'2'], dtype=object)> |
33 | /// >>> tf.strings.as_string([3.1415926, 2.71828], precision=2).numpy() |
34 | /// array([b'3.14', b'2.72'], dtype=object) |
35 | /// |
36 | /// Args: |
37 | /// * scope: A Scope object |
38 | /// |
39 | /// Optional attributes (see `Attrs`): |
40 | /// * precision: The post-decimal precision to use for floating point numbers. |
41 | /// Only used if precision > -1. |
42 | /// * scientific: Use scientific notation for floating point numbers. |
43 | /// * shortest: Use shortest representation (either scientific or standard) for |
44 | /// floating point numbers. |
45 | /// * width: Pad pre-decimal numbers to this width. |
46 | /// Applies to both floating point and integer numbers. |
47 | /// Only used if width > -1. |
48 | /// * fill: The value to pad if width > -1. If empty, pads with spaces. |
49 | /// Another typical value is '0'. String cannot be longer than 1 character. |
50 | /// |
51 | /// Returns: |
52 | /// * `Output`: The output tensor. |
53 | class AsString { |
54 | public: |
55 | /// Optional attribute setters for AsString |
56 | struct Attrs { |
57 | /// The post-decimal precision to use for floating point numbers. |
58 | /// Only used if precision > -1. |
59 | /// |
60 | /// Defaults to -1 |
61 | TF_MUST_USE_RESULT Attrs Precision(int64 x) { |
62 | Attrs ret = *this; |
63 | ret.precision_ = x; |
64 | return ret; |
65 | } |
66 | |
67 | /// Use scientific notation for floating point numbers. |
68 | /// |
69 | /// Defaults to false |
70 | TF_MUST_USE_RESULT Attrs Scientific(bool x) { |
71 | Attrs ret = *this; |
72 | ret.scientific_ = x; |
73 | return ret; |
74 | } |
75 | |
76 | /// Use shortest representation (either scientific or standard) for |
77 | /// floating point numbers. |
78 | /// |
79 | /// Defaults to false |
80 | TF_MUST_USE_RESULT Attrs Shortest(bool x) { |
81 | Attrs ret = *this; |
82 | ret.shortest_ = x; |
83 | return ret; |
84 | } |
85 | |
86 | /// Pad pre-decimal numbers to this width. |
87 | /// Applies to both floating point and integer numbers. |
88 | /// Only used if width > -1. |
89 | /// |
90 | /// Defaults to -1 |
91 | TF_MUST_USE_RESULT Attrs Width(int64 x) { |
92 | Attrs ret = *this; |
93 | ret.width_ = x; |
94 | return ret; |
95 | } |
96 | |
97 | /// The value to pad if width > -1. If empty, pads with spaces. |
98 | /// Another typical value is '0'. String cannot be longer than 1 character. |
99 | /// |
100 | /// Defaults to "" |
101 | TF_MUST_USE_RESULT Attrs Fill(StringPiece x) { |
102 | Attrs ret = *this; |
103 | ret.fill_ = x; |
104 | return ret; |
105 | } |
106 | |
107 | int64 precision_ = -1; |
108 | bool scientific_ = false; |
109 | bool shortest_ = false; |
110 | int64 width_ = -1; |
111 | StringPiece fill_ = "" ; |
112 | }; |
113 | AsString(const ::tensorflow::Scope& scope, ::tensorflow::Input input); |
114 | AsString(const ::tensorflow::Scope& scope, ::tensorflow::Input input, const |
115 | AsString::Attrs& attrs); |
116 | operator ::tensorflow::Output() const { return output; } |
117 | operator ::tensorflow::Input() const { return output; } |
118 | ::tensorflow::Node* node() const { return output.node(); } |
119 | |
120 | static Attrs Precision(int64 x) { |
121 | return Attrs().Precision(x); |
122 | } |
123 | static Attrs Scientific(bool x) { |
124 | return Attrs().Scientific(x); |
125 | } |
126 | static Attrs Shortest(bool x) { |
127 | return Attrs().Shortest(x); |
128 | } |
129 | static Attrs Width(int64 x) { |
130 | return Attrs().Width(x); |
131 | } |
132 | static Attrs Fill(StringPiece x) { |
133 | return Attrs().Fill(x); |
134 | } |
135 | |
136 | Operation operation; |
137 | ::tensorflow::Output output; |
138 | }; |
139 | |
140 | /// Decode web-safe base64-encoded strings. |
141 | /// |
142 | /// Input may or may not have padding at the end. See |
143 | /// [EncodeBase64](https://www.tensorflow.org/api_docs/python/tf/io/encode_base64) |
144 | /// for padding. Web-safe means that input must use - and _ instead of + and /. |
145 | /// |
146 | /// Args: |
147 | /// * scope: A Scope object |
148 | /// * input: Base64 strings to decode. |
149 | /// |
150 | /// Returns: |
151 | /// * `Output`: Decoded strings. |
152 | class DecodeBase64 { |
153 | public: |
154 | DecodeBase64(const ::tensorflow::Scope& scope, ::tensorflow::Input input); |
155 | operator ::tensorflow::Output() const { return output; } |
156 | operator ::tensorflow::Input() const { return output; } |
157 | ::tensorflow::Node* node() const { return output.node(); } |
158 | |
159 | Operation operation; |
160 | ::tensorflow::Output output; |
161 | }; |
162 | |
163 | /// Encode strings into web-safe base64 format. |
164 | /// |
165 | /// Refer to [this article](https://en.wikipedia.org/wiki/Base64) for more information on |
166 | /// base64 format. Base64 strings may have padding with '=' at the |
167 | /// end so that the encoded has length multiple of 4. See Padding section of the |
168 | /// link above. |
169 | /// |
170 | /// Web-safe means that the encoder uses - and _ instead of + and /. |
171 | /// |
172 | /// Args: |
173 | /// * scope: A Scope object |
174 | /// * input: Strings to be encoded. |
175 | /// |
176 | /// Optional attributes (see `Attrs`): |
177 | /// * pad: Bool whether padding is applied at the ends. |
178 | /// |
179 | /// Returns: |
180 | /// * `Output`: Input strings encoded in base64. |
181 | class EncodeBase64 { |
182 | public: |
183 | /// Optional attribute setters for EncodeBase64 |
184 | struct Attrs { |
185 | /// Bool whether padding is applied at the ends. |
186 | /// |
187 | /// Defaults to false |
188 | TF_MUST_USE_RESULT Attrs Pad(bool x) { |
189 | Attrs ret = *this; |
190 | ret.pad_ = x; |
191 | return ret; |
192 | } |
193 | |
194 | bool pad_ = false; |
195 | }; |
196 | EncodeBase64(const ::tensorflow::Scope& scope, ::tensorflow::Input input); |
197 | EncodeBase64(const ::tensorflow::Scope& scope, ::tensorflow::Input input, const |
198 | EncodeBase64::Attrs& attrs); |
199 | operator ::tensorflow::Output() const { return output; } |
200 | operator ::tensorflow::Input() const { return output; } |
201 | ::tensorflow::Node* node() const { return output.node(); } |
202 | |
203 | static Attrs Pad(bool x) { |
204 | return Attrs().Pad(x); |
205 | } |
206 | |
207 | Operation operation; |
208 | ::tensorflow::Output output; |
209 | }; |
210 | |
211 | /// Joins a string Tensor across the given dimensions. |
212 | /// |
213 | /// Computes the string join across dimensions in the given string Tensor of shape |
214 | /// `[\\(d_0, d_1, ..., d_{n-1}\\)]`. Returns a new Tensor created by joining the input |
215 | /// strings with the given separator (default: empty string). Negative indices are |
216 | /// counted backwards from the end, with `-1` being equivalent to `n - 1`. If |
217 | /// indices are not specified, joins across all dimensions beginning from `n - 1` |
218 | /// through `0`. |
219 | /// |
220 | /// For example: |
221 | /// |
222 | /// ```python |
223 | /// # tensor `a` is [["a", "b"], ["c", "d"]] |
224 | /// tf.reduce_join(a, 0) ==> ["ac", "bd"] |
225 | /// tf.reduce_join(a, 1) ==> ["ab", "cd"] |
226 | /// tf.reduce_join(a, -2) = tf.reduce_join(a, 0) ==> ["ac", "bd"] |
227 | /// tf.reduce_join(a, -1) = tf.reduce_join(a, 1) ==> ["ab", "cd"] |
228 | /// tf.reduce_join(a, 0, keep_dims=True) ==> [["ac", "bd"]] |
229 | /// tf.reduce_join(a, 1, keep_dims=True) ==> [["ab"], ["cd"]] |
230 | /// tf.reduce_join(a, 0, separator=".") ==> ["a.c", "b.d"] |
231 | /// tf.reduce_join(a, [0, 1]) ==> "acbd" |
232 | /// tf.reduce_join(a, [1, 0]) ==> "abcd" |
233 | /// tf.reduce_join(a, []) ==> [["a", "b"], ["c", "d"]] |
234 | /// tf.reduce_join(a) = tf.reduce_join(a, [1, 0]) ==> "abcd" |
235 | /// ``` |
236 | /// |
237 | /// Args: |
238 | /// * scope: A Scope object |
239 | /// * inputs: The input to be joined. All reduced indices must have non-zero size. |
240 | /// * reduction_indices: The dimensions to reduce over. Dimensions are reduced in the |
241 | /// order specified. Omitting `reduction_indices` is equivalent to passing |
242 | /// `[n-1, n-2, ..., 0]`. Negative indices from `-n` to `-1` are supported. |
243 | /// |
244 | /// Optional attributes (see `Attrs`): |
245 | /// * keep_dims: If `True`, retain reduced dimensions with length `1`. |
246 | /// * separator: The separator to use when joining. |
247 | /// |
248 | /// Returns: |
249 | /// * `Output`: Has shape equal to that of the input with reduced dimensions removed or |
250 | /// set to `1` depending on `keep_dims`. |
251 | class ReduceJoin { |
252 | public: |
253 | /// Optional attribute setters for ReduceJoin |
254 | struct Attrs { |
255 | /// If `True`, retain reduced dimensions with length `1`. |
256 | /// |
257 | /// Defaults to false |
258 | TF_MUST_USE_RESULT Attrs KeepDims(bool x) { |
259 | Attrs ret = *this; |
260 | ret.keep_dims_ = x; |
261 | return ret; |
262 | } |
263 | |
264 | /// The separator to use when joining. |
265 | /// |
266 | /// Defaults to "" |
267 | TF_MUST_USE_RESULT Attrs Separator(StringPiece x) { |
268 | Attrs ret = *this; |
269 | ret.separator_ = x; |
270 | return ret; |
271 | } |
272 | |
273 | bool keep_dims_ = false; |
274 | StringPiece separator_ = "" ; |
275 | }; |
276 | ReduceJoin(const ::tensorflow::Scope& scope, ::tensorflow::Input inputs, |
277 | ::tensorflow::Input reduction_indices); |
278 | ReduceJoin(const ::tensorflow::Scope& scope, ::tensorflow::Input inputs, |
279 | ::tensorflow::Input reduction_indices, const ReduceJoin::Attrs& |
280 | attrs); |
281 | operator ::tensorflow::Output() const { return output; } |
282 | operator ::tensorflow::Input() const { return output; } |
283 | ::tensorflow::Node* node() const { return output.node(); } |
284 | |
285 | static Attrs KeepDims(bool x) { |
286 | return Attrs().KeepDims(x); |
287 | } |
288 | static Attrs Separator(StringPiece x) { |
289 | return Attrs().Separator(x); |
290 | } |
291 | |
292 | Operation operation; |
293 | ::tensorflow::Output output; |
294 | }; |
295 | |
296 | /// Check if the input matches the regex pattern. |
297 | /// |
298 | /// The input is a string tensor of any shape. The pattern is a scalar |
299 | /// string tensor which is applied to every element of the input tensor. |
300 | /// The boolean values (True or False) of the output tensor indicate |
301 | /// if the input matches the regex pattern provided. |
302 | /// |
303 | /// The pattern follows the re2 syntax (https://github.com/google/re2/wiki/Syntax) |
304 | /// |
305 | /// Examples: |
306 | /// |
307 | /// >>> tf.strings.regex_full_match(["TF lib", "lib TF"], ".*lib$") |
308 | /// <tf.Tensor: shape=(2,), dtype=bool, numpy=array([ True, False])> |
309 | /// >>> tf.strings.regex_full_match(["TF lib", "lib TF"], ".*TF$") |
310 | /// <tf.Tensor: shape=(2,), dtype=bool, numpy=array([False, True])> |
311 | /// |
312 | /// Args: |
313 | /// * scope: A Scope object |
314 | /// * input: A string tensor of the text to be processed. |
315 | /// * pattern: A scalar string tensor containing the regular expression to match the input. |
316 | /// |
317 | /// Returns: |
318 | /// * `Output`: A bool tensor with the same shape as `input`. |
319 | class RegexFullMatch { |
320 | public: |
321 | RegexFullMatch(const ::tensorflow::Scope& scope, ::tensorflow::Input input, |
322 | ::tensorflow::Input pattern); |
323 | operator ::tensorflow::Output() const { return output; } |
324 | operator ::tensorflow::Input() const { return output; } |
325 | ::tensorflow::Node* node() const { return output.node(); } |
326 | |
327 | Operation operation; |
328 | ::tensorflow::Output output; |
329 | }; |
330 | |
331 | /// Replaces matches of the `pattern` regular expression in `input` with the |
332 | /// replacement string provided in `rewrite`. |
333 | /// |
334 | /// It follows the re2 syntax (https://github.com/google/re2/wiki/Syntax) |
335 | /// |
336 | /// Args: |
337 | /// * scope: A Scope object |
338 | /// * input: The text to be processed. |
339 | /// * pattern: The regular expression to be matched in the `input` strings. |
340 | /// * rewrite: The rewrite string to be substituted for the `pattern` expression where it is |
341 | /// matched in the `input` strings. |
342 | /// |
343 | /// Optional attributes (see `Attrs`): |
344 | /// * replace_global: If True, the replacement is global (that is, all matches of the `pattern` regular |
345 | /// expression in each input string are rewritten), otherwise the `rewrite` |
346 | /// substitution is only made for the first `pattern` match. |
347 | /// |
348 | /// Returns: |
349 | /// * `Output`: The text after applying pattern match and rewrite substitution. |
350 | class RegexReplace { |
351 | public: |
352 | /// Optional attribute setters for RegexReplace |
353 | struct Attrs { |
354 | /// If True, the replacement is global (that is, all matches of the `pattern` regular |
355 | /// expression in each input string are rewritten), otherwise the `rewrite` |
356 | /// substitution is only made for the first `pattern` match. |
357 | /// |
358 | /// Defaults to true |
359 | TF_MUST_USE_RESULT Attrs ReplaceGlobal(bool x) { |
360 | Attrs ret = *this; |
361 | ret.replace_global_ = x; |
362 | return ret; |
363 | } |
364 | |
365 | bool replace_global_ = true; |
366 | }; |
367 | RegexReplace(const ::tensorflow::Scope& scope, ::tensorflow::Input input, |
368 | ::tensorflow::Input pattern, ::tensorflow::Input rewrite); |
369 | RegexReplace(const ::tensorflow::Scope& scope, ::tensorflow::Input input, |
370 | ::tensorflow::Input pattern, ::tensorflow::Input rewrite, const |
371 | RegexReplace::Attrs& attrs); |
372 | operator ::tensorflow::Output() const { return output; } |
373 | operator ::tensorflow::Input() const { return output; } |
374 | ::tensorflow::Node* node() const { return output.node(); } |
375 | |
376 | static Attrs ReplaceGlobal(bool x) { |
377 | return Attrs().ReplaceGlobal(x); |
378 | } |
379 | |
380 | Operation operation; |
381 | ::tensorflow::Output output; |
382 | }; |
383 | |
384 | /// Formats a string template using a list of tensors. |
385 | /// |
386 | /// Formats a string template using a list of tensors, pretty-printing tensor summaries. |
387 | /// |
388 | /// Args: |
389 | /// * scope: A Scope object |
390 | /// * inputs: The list of tensors to format into the placeholder string. |
391 | /// |
392 | /// Optional attributes (see `Attrs`): |
393 | /// * template_: A string, the template to format tensor summaries into. |
394 | /// * placeholder: A string, at each placeholder in the template a subsequent tensor summary will be inserted. |
395 | /// * summarize: When formatting the tensor summaries print the first and last summarize entries of each tensor dimension. |
396 | /// |
397 | /// Returns: |
398 | /// * `Output`: = The resulting string scalar. |
399 | class StringFormat { |
400 | public: |
401 | /// Optional attribute setters for StringFormat |
402 | struct Attrs { |
403 | /// A string, the template to format tensor summaries into. |
404 | /// |
405 | /// Defaults to "%s" |
406 | TF_MUST_USE_RESULT Attrs Template(StringPiece x) { |
407 | Attrs ret = *this; |
408 | ret.template_ = x; |
409 | return ret; |
410 | } |
411 | |
412 | /// A string, at each placeholder in the template a subsequent tensor summary will be inserted. |
413 | /// |
414 | /// Defaults to "%s" |
415 | TF_MUST_USE_RESULT Attrs Placeholder(StringPiece x) { |
416 | Attrs ret = *this; |
417 | ret.placeholder_ = x; |
418 | return ret; |
419 | } |
420 | |
421 | /// When formatting the tensor summaries print the first and last summarize entries of each tensor dimension. |
422 | /// |
423 | /// Defaults to 3 |
424 | TF_MUST_USE_RESULT Attrs Summarize(int64 x) { |
425 | Attrs ret = *this; |
426 | ret.summarize_ = x; |
427 | return ret; |
428 | } |
429 | |
430 | StringPiece template_ = "%s" ; |
431 | StringPiece placeholder_ = "%s" ; |
432 | int64 summarize_ = 3; |
433 | }; |
434 | StringFormat(const ::tensorflow::Scope& scope, ::tensorflow::InputList inputs); |
435 | StringFormat(const ::tensorflow::Scope& scope, ::tensorflow::InputList inputs, |
436 | const StringFormat::Attrs& attrs); |
437 | operator ::tensorflow::Output() const { return output; } |
438 | operator ::tensorflow::Input() const { return output; } |
439 | ::tensorflow::Node* node() const { return output.node(); } |
440 | |
441 | static Attrs Template(StringPiece x) { |
442 | return Attrs().Template(x); |
443 | } |
444 | static Attrs Placeholder(StringPiece x) { |
445 | return Attrs().Placeholder(x); |
446 | } |
447 | static Attrs Summarize(int64 x) { |
448 | return Attrs().Summarize(x); |
449 | } |
450 | |
451 | Operation operation; |
452 | ::tensorflow::Output output; |
453 | }; |
454 | |
455 | /// Joins the strings in the given list of string tensors into one tensor; |
456 | /// |
457 | /// with the given separator (default is an empty separator). |
458 | /// |
459 | /// Examples: |
460 | /// |
461 | /// >>> s = ["hello", "world", "tensorflow"] |
462 | /// >>> tf.strings.join(s, " ") |
463 | /// <tf.Tensor: shape=(), dtype=string, numpy=b'hello world tensorflow'> |
464 | /// |
465 | /// Args: |
466 | /// * scope: A Scope object |
467 | /// * inputs: A list of string tensors. The tensors must all have the same shape, |
468 | /// or be scalars. Scalars may be mixed in; these will be broadcast to the shape |
469 | /// of non-scalar inputs. |
470 | /// |
471 | /// Optional attributes (see `Attrs`): |
472 | /// * separator: string, an optional join separator. |
473 | /// |
474 | /// Returns: |
475 | /// * `Output`: The output tensor. |
476 | class StringJoin { |
477 | public: |
478 | /// Optional attribute setters for StringJoin |
479 | struct Attrs { |
480 | /// string, an optional join separator. |
481 | /// |
482 | /// Defaults to "" |
483 | TF_MUST_USE_RESULT Attrs Separator(StringPiece x) { |
484 | Attrs ret = *this; |
485 | ret.separator_ = x; |
486 | return ret; |
487 | } |
488 | |
489 | StringPiece separator_ = "" ; |
490 | }; |
491 | StringJoin(const ::tensorflow::Scope& scope, ::tensorflow::InputList inputs); |
492 | StringJoin(const ::tensorflow::Scope& scope, ::tensorflow::InputList inputs, |
493 | const StringJoin::Attrs& attrs); |
494 | operator ::tensorflow::Output() const { return output; } |
495 | operator ::tensorflow::Input() const { return output; } |
496 | ::tensorflow::Node* node() const { return output.node(); } |
497 | |
498 | static Attrs Separator(StringPiece x) { |
499 | return Attrs().Separator(x); |
500 | } |
501 | |
502 | Operation operation; |
503 | ::tensorflow::Output output; |
504 | }; |
505 | |
506 | /// String lengths of `input`. |
507 | /// |
508 | /// Computes the length of each string given in the input tensor. |
509 | /// |
510 | /// >>> strings = tf.constant(['Hello','TensorFlow', '\U0001F642']) |
511 | /// >>> tf.strings.length(strings).numpy() # default counts bytes |
512 | /// array([ 5, 10, 4], dtype=int32) |
513 | /// >>> tf.strings.length(strings, unit="UTF8_CHAR").numpy() |
514 | /// array([ 5, 10, 1], dtype=int32) |
515 | /// |
516 | /// |
517 | /// Args: |
518 | /// * scope: A Scope object |
519 | /// * input: The strings for which to compute the length for each element. |
520 | /// |
521 | /// Optional attributes (see `Attrs`): |
522 | /// * unit: The unit that is counted to compute string length. One of: `"BYTE"` (for |
523 | /// the number of bytes in each string) or `"UTF8_CHAR"` (for the number of UTF-8 |
524 | /// encoded Unicode code points in each string). Results are undefined |
525 | /// if `unit=UTF8_CHAR` and the `input` strings do not contain structurally |
526 | /// valid UTF-8. |
527 | /// |
528 | /// Returns: |
529 | /// * `Output`: Integer tensor that has the same shape as `input`. The output contains the |
530 | /// element-wise string lengths of `input`. |
531 | class StringLength { |
532 | public: |
533 | /// Optional attribute setters for StringLength |
534 | struct Attrs { |
535 | /// The unit that is counted to compute string length. One of: `"BYTE"` (for |
536 | /// the number of bytes in each string) or `"UTF8_CHAR"` (for the number of UTF-8 |
537 | /// encoded Unicode code points in each string). Results are undefined |
538 | /// if `unit=UTF8_CHAR` and the `input` strings do not contain structurally |
539 | /// valid UTF-8. |
540 | /// |
541 | /// Defaults to "BYTE" |
542 | TF_MUST_USE_RESULT Attrs Unit(StringPiece x) { |
543 | Attrs ret = *this; |
544 | ret.unit_ = x; |
545 | return ret; |
546 | } |
547 | |
548 | StringPiece unit_ = "BYTE" ; |
549 | }; |
550 | StringLength(const ::tensorflow::Scope& scope, ::tensorflow::Input input); |
551 | StringLength(const ::tensorflow::Scope& scope, ::tensorflow::Input input, const |
552 | StringLength::Attrs& attrs); |
553 | operator ::tensorflow::Output() const { return output; } |
554 | operator ::tensorflow::Input() const { return output; } |
555 | ::tensorflow::Node* node() const { return output.node(); } |
556 | |
557 | static Attrs Unit(StringPiece x) { |
558 | return Attrs().Unit(x); |
559 | } |
560 | |
561 | Operation operation; |
562 | ::tensorflow::Output output; |
563 | }; |
564 | |
565 | /// Converts all uppercase characters into their respective lowercase replacements. |
566 | /// |
567 | /// Example: |
568 | /// |
569 | /// >>> tf.strings.lower("CamelCase string and ALL CAPS") |
570 | /// <tf.Tensor: shape=(), dtype=string, numpy=b'camelcase string and all caps'> |
571 | /// |
572 | /// |
573 | /// Args: |
574 | /// * scope: A Scope object |
575 | /// * input: The input to be lower-cased. |
576 | /// |
577 | /// Optional attributes (see `Attrs`): |
578 | /// * encoding: Character encoding of `input`. Allowed values are '' and 'utf-8'. |
579 | /// Value '' is interpreted as ASCII. |
580 | /// |
581 | /// Returns: |
582 | /// * `Output`: The output tensor. |
583 | class StringLower { |
584 | public: |
585 | /// Optional attribute setters for StringLower |
586 | struct Attrs { |
587 | /// Character encoding of `input`. Allowed values are '' and 'utf-8'. |
588 | /// Value '' is interpreted as ASCII. |
589 | /// |
590 | /// Defaults to "" |
591 | TF_MUST_USE_RESULT Attrs Encoding(StringPiece x) { |
592 | Attrs ret = *this; |
593 | ret.encoding_ = x; |
594 | return ret; |
595 | } |
596 | |
597 | StringPiece encoding_ = "" ; |
598 | }; |
599 | StringLower(const ::tensorflow::Scope& scope, ::tensorflow::Input input); |
600 | StringLower(const ::tensorflow::Scope& scope, ::tensorflow::Input input, const |
601 | StringLower::Attrs& attrs); |
602 | operator ::tensorflow::Output() const { return output; } |
603 | operator ::tensorflow::Input() const { return output; } |
604 | ::tensorflow::Node* node() const { return output.node(); } |
605 | |
606 | static Attrs Encoding(StringPiece x) { |
607 | return Attrs().Encoding(x); |
608 | } |
609 | |
610 | Operation operation; |
611 | ::tensorflow::Output output; |
612 | }; |
613 | |
614 | /// Creates ngrams from ragged string data. |
615 | /// |
616 | /// This op accepts a ragged tensor with 1 ragged dimension containing only |
617 | /// strings and outputs a ragged tensor with 1 ragged dimension containing ngrams |
618 | /// of that string, joined along the innermost axis. |
619 | /// |
620 | /// Args: |
621 | /// * scope: A Scope object |
622 | /// * data: The values tensor of the ragged string tensor to make ngrams out of. Must be a |
623 | /// 1D string tensor. |
624 | /// * data_splits: The splits tensor of the ragged string tensor to make ngrams out of. |
625 | /// * separator: The string to append between elements of the token. Use "" for no separator. |
626 | /// * ngram_widths: The sizes of the ngrams to create. |
627 | /// * left_pad: The string to use to pad the left side of the ngram sequence. Only used if |
628 | /// pad_width != 0. |
629 | /// * right_pad: The string to use to pad the right side of the ngram sequence. Only used if |
630 | /// pad_width != 0. |
631 | /// * pad_width: The number of padding elements to add to each side of each |
632 | /// sequence. Note that padding will never be greater than 'ngram_widths'-1 |
633 | /// regardless of this value. If `pad_width=-1`, then add `max(ngram_widths)-1` |
634 | /// elements. |
635 | /// |
636 | /// Returns: |
637 | /// * `Output` ngrams: The values tensor of the output ngrams ragged tensor. |
638 | /// * `Output` ngrams_splits: The splits tensor of the output ngrams ragged tensor. |
639 | class StringNGrams { |
640 | public: |
641 | StringNGrams(const ::tensorflow::Scope& scope, ::tensorflow::Input data, |
642 | ::tensorflow::Input data_splits, StringPiece separator, const |
643 | gtl::ArraySlice<int>& ngram_widths, StringPiece left_pad, |
644 | StringPiece right_pad, int64 pad_width, bool |
645 | preserve_short_sequences); |
646 | |
647 | Operation operation; |
648 | ::tensorflow::Output ngrams; |
649 | ::tensorflow::Output ngrams_splits; |
650 | }; |
651 | |
652 | /// Split elements of `input` based on `delimiter` into a `SparseTensor`. |
653 | /// |
654 | /// Let N be the size of source (typically N will be the batch size). Split each |
655 | /// element of `input` based on `delimiter` and return a `SparseTensor` |
656 | /// containing the splitted tokens. Empty tokens are ignored. |
657 | /// |
658 | /// `delimiter` can be empty, or a string of split characters. If `delimiter` is an |
659 | /// empty string, each element of `input` is split into individual single-byte |
660 | /// character strings, including splitting of UTF-8 multibyte sequences. Otherwise |
661 | /// every character of `delimiter` is a potential split point. |
662 | /// |
663 | /// For example: |
664 | /// N = 2, input[0] is 'hello world' and input[1] is 'a b c', then the output |
665 | /// will be |
666 | /// |
667 | /// indices = [0, 0; |
668 | /// 0, 1; |
669 | /// 1, 0; |
670 | /// 1, 1; |
671 | /// 1, 2] |
672 | /// shape = [2, 3] |
673 | /// values = ['hello', 'world', 'a', 'b', 'c'] |
674 | /// |
675 | /// Args: |
676 | /// * scope: A Scope object |
677 | /// * input: 1-D. Strings to split. |
678 | /// * delimiter: 0-D. Delimiter characters (bytes), or empty string. |
679 | /// |
680 | /// Optional attributes (see `Attrs`): |
681 | /// * skip_empty: A `bool`. If `True`, skip the empty strings from the result. |
682 | /// |
683 | /// Returns: |
684 | /// * `Output` indices: A dense matrix of int64 representing the indices of the sparse tensor. |
685 | /// * `Output` values: A vector of strings corresponding to the splited values. |
686 | /// * `Output` shape: a length-2 vector of int64 representing the shape of the sparse |
687 | /// tensor, where the first value is N and the second value is the maximum number |
688 | /// of tokens in a single input entry. |
689 | class StringSplit { |
690 | public: |
691 | /// Optional attribute setters for StringSplit |
692 | struct Attrs { |
693 | /// A `bool`. If `True`, skip the empty strings from the result. |
694 | /// |
695 | /// Defaults to true |
696 | TF_MUST_USE_RESULT Attrs SkipEmpty(bool x) { |
697 | Attrs ret = *this; |
698 | ret.skip_empty_ = x; |
699 | return ret; |
700 | } |
701 | |
702 | bool skip_empty_ = true; |
703 | }; |
704 | StringSplit(const ::tensorflow::Scope& scope, ::tensorflow::Input input, |
705 | ::tensorflow::Input delimiter); |
706 | StringSplit(const ::tensorflow::Scope& scope, ::tensorflow::Input input, |
707 | ::tensorflow::Input delimiter, const StringSplit::Attrs& attrs); |
708 | |
709 | static Attrs SkipEmpty(bool x) { |
710 | return Attrs().SkipEmpty(x); |
711 | } |
712 | |
713 | Operation operation; |
714 | ::tensorflow::Output indices; |
715 | ::tensorflow::Output values; |
716 | ::tensorflow::Output shape; |
717 | }; |
718 | |
719 | /// Split elements of `source` based on `sep` into a `SparseTensor`. |
720 | /// |
721 | /// Let N be the size of source (typically N will be the batch size). Split each |
722 | /// element of `source` based on `sep` and return a `SparseTensor` |
723 | /// containing the split tokens. Empty tokens are ignored. |
724 | /// |
725 | /// For example, N = 2, source[0] is 'hello world' and source[1] is 'a b c', |
726 | /// then the output will be |
727 | /// ``` |
728 | /// st.indices = [0, 0; |
729 | /// 0, 1; |
730 | /// 1, 0; |
731 | /// 1, 1; |
732 | /// 1, 2] |
733 | /// st.shape = [2, 3] |
734 | /// st.values = ['hello', 'world', 'a', 'b', 'c'] |
735 | /// ``` |
736 | /// |
737 | /// If `sep` is given, consecutive delimiters are not grouped together and are |
738 | /// deemed to delimit empty strings. For example, source of `"1<>2<><>3"` and |
739 | /// sep of `"<>"` returns `["1", "2", "", "3"]`. If `sep` is None or an empty |
740 | /// string, consecutive whitespace are regarded as a single separator, and the |
741 | /// result will contain no empty strings at the startor end if the string has |
742 | /// leading or trailing whitespace. |
743 | /// |
744 | /// Note that the above mentioned behavior matches python's str.split. |
745 | /// |
746 | /// Args: |
747 | /// * scope: A Scope object |
748 | /// * input: `1-D` string `Tensor`, the strings to split. |
749 | /// * sep: `0-D` string `Tensor`, the delimiter character. |
750 | /// |
751 | /// Optional attributes (see `Attrs`): |
752 | /// * maxsplit: An `int`. If `maxsplit > 0`, limit of the split of the result. |
753 | /// |
754 | /// Returns: |
755 | /// * `Output` indices |
756 | /// * `Output` values |
757 | /// * `Output` shape |
758 | class StringSplitV2 { |
759 | public: |
760 | /// Optional attribute setters for StringSplitV2 |
761 | struct Attrs { |
762 | /// An `int`. If `maxsplit > 0`, limit of the split of the result. |
763 | /// |
764 | /// Defaults to -1 |
765 | TF_MUST_USE_RESULT Attrs Maxsplit(int64 x) { |
766 | Attrs ret = *this; |
767 | ret.maxsplit_ = x; |
768 | return ret; |
769 | } |
770 | |
771 | int64 maxsplit_ = -1; |
772 | }; |
773 | StringSplitV2(const ::tensorflow::Scope& scope, ::tensorflow::Input input, |
774 | ::tensorflow::Input sep); |
775 | StringSplitV2(const ::tensorflow::Scope& scope, ::tensorflow::Input input, |
776 | ::tensorflow::Input sep, const StringSplitV2::Attrs& attrs); |
777 | |
778 | static Attrs Maxsplit(int64 x) { |
779 | return Attrs().Maxsplit(x); |
780 | } |
781 | |
782 | Operation operation; |
783 | ::tensorflow::Output indices; |
784 | ::tensorflow::Output values; |
785 | ::tensorflow::Output shape; |
786 | }; |
787 | |
788 | /// Strip leading and trailing whitespaces from the Tensor. |
789 | /// |
790 | /// Examples: |
791 | /// |
792 | /// >>> tf.strings.strip(["\nTensorFlow", " The python library "]).numpy() |
793 | /// array([b'TensorFlow', b'The python library'], dtype=object) |
794 | /// |
795 | /// Args: |
796 | /// * scope: A Scope object |
797 | /// * input: A string `Tensor` of any shape. |
798 | /// |
799 | /// Returns: |
800 | /// * `Output`: A string `Tensor` of the same shape as the input. |
801 | class StringStrip { |
802 | public: |
803 | StringStrip(const ::tensorflow::Scope& scope, ::tensorflow::Input input); |
804 | operator ::tensorflow::Output() const { return output; } |
805 | operator ::tensorflow::Input() const { return output; } |
806 | ::tensorflow::Node* node() const { return output.node(); } |
807 | |
808 | Operation operation; |
809 | ::tensorflow::Output output; |
810 | }; |
811 | |
812 | /// Converts each string in the input Tensor to its hash mod by a number of buckets. |
813 | /// |
814 | /// The hash function is deterministic on the content of the string within the |
815 | /// process. |
816 | /// |
817 | /// Note that the hash function may change from time to time. |
818 | /// This functionality will be deprecated and it's recommended to use |
819 | /// `tf.string_to_hash_bucket_fast()` or `tf.string_to_hash_bucket_strong()`. |
820 | /// |
821 | /// Args: |
822 | /// * scope: A Scope object |
823 | /// * num_buckets: The number of buckets. |
824 | /// |
825 | /// Returns: |
826 | /// * `Output`: A Tensor of the same shape as the input `string_tensor`. |
827 | class StringToHashBucket { |
828 | public: |
829 | StringToHashBucket(const ::tensorflow::Scope& scope, ::tensorflow::Input |
830 | string_tensor, int64 num_buckets); |
831 | operator ::tensorflow::Output() const { return output; } |
832 | operator ::tensorflow::Input() const { return output; } |
833 | ::tensorflow::Node* node() const { return output.node(); } |
834 | |
835 | Operation operation; |
836 | ::tensorflow::Output output; |
837 | }; |
838 | |
839 | /// Converts each string in the input Tensor to its hash mod by a number of buckets. |
840 | /// |
841 | /// The hash function is deterministic on the content of the string within the |
842 | /// process and will never change. However, it is not suitable for cryptography. |
843 | /// This function may be used when CPU time is scarce and inputs are trusted or |
844 | /// unimportant. There is a risk of adversaries constructing inputs that all hash |
845 | /// to the same bucket. To prevent this problem, use a strong hash function with |
846 | /// `tf.string_to_hash_bucket_strong`. |
847 | /// |
848 | /// Examples: |
849 | /// |
850 | /// >>> tf.strings.to_hash_bucket_fast(["Hello", "TensorFlow", "2.x"], 3).numpy() |
851 | /// array([0, 2, 2]) |
852 | /// |
853 | /// Args: |
854 | /// * scope: A Scope object |
855 | /// * input: The strings to assign a hash bucket. |
856 | /// * num_buckets: The number of buckets. |
857 | /// |
858 | /// Returns: |
859 | /// * `Output`: A Tensor of the same shape as the input `string_tensor`. |
860 | class StringToHashBucketFast { |
861 | public: |
862 | StringToHashBucketFast(const ::tensorflow::Scope& scope, ::tensorflow::Input |
863 | input, int64 num_buckets); |
864 | operator ::tensorflow::Output() const { return output; } |
865 | operator ::tensorflow::Input() const { return output; } |
866 | ::tensorflow::Node* node() const { return output.node(); } |
867 | |
868 | Operation operation; |
869 | ::tensorflow::Output output; |
870 | }; |
871 | |
872 | /// Converts each string in the input Tensor to its hash mod by a number of buckets. |
873 | /// |
874 | /// The hash function is deterministic on the content of the string within the |
875 | /// process. The hash function is a keyed hash function, where attribute `key` |
876 | /// defines the key of the hash function. `key` is an array of 2 elements. |
877 | /// |
878 | /// A strong hash is important when inputs may be malicious, e.g. URLs with |
879 | /// additional components. Adversaries could try to make their inputs hash to the |
880 | /// same bucket for a denial-of-service attack or to skew the results. A strong |
881 | /// hash can be used to make it difficult to find inputs with a skewed hash value |
882 | /// distribution over buckets. This requires that the hash function is |
883 | /// seeded by a high-entropy (random) "key" unknown to the adversary. |
884 | /// |
885 | /// The additional robustness comes at a cost of roughly 4x higher compute |
886 | /// time than `tf.string_to_hash_bucket_fast`. |
887 | /// |
888 | /// Examples: |
889 | /// |
890 | /// >>> tf.strings.to_hash_bucket_strong(["Hello", "TF"], 3, [1, 2]).numpy() |
891 | /// array([2, 0]) |
892 | /// |
893 | /// Args: |
894 | /// * scope: A Scope object |
895 | /// * input: The strings to assign a hash bucket. |
896 | /// * num_buckets: The number of buckets. |
897 | /// * key: The key used to seed the hash function, passed as a list of two uint64 |
898 | /// elements. |
899 | /// |
900 | /// Returns: |
901 | /// * `Output`: A Tensor of the same shape as the input `string_tensor`. |
902 | class StringToHashBucketStrong { |
903 | public: |
904 | StringToHashBucketStrong(const ::tensorflow::Scope& scope, ::tensorflow::Input |
905 | input, int64 num_buckets, const gtl::ArraySlice<int>& |
906 | key); |
907 | operator ::tensorflow::Output() const { return output; } |
908 | operator ::tensorflow::Input() const { return output; } |
909 | ::tensorflow::Node* node() const { return output.node(); } |
910 | |
911 | Operation operation; |
912 | ::tensorflow::Output output; |
913 | }; |
914 | |
915 | /// Converts all lowercase characters into their respective uppercase replacements. |
916 | /// |
917 | /// Example: |
918 | /// |
919 | /// >>> tf.strings.upper("CamelCase string and ALL CAPS") |
920 | /// <tf.Tensor: shape=(), dtype=string, numpy=b'CAMELCASE STRING AND ALL CAPS'> |
921 | /// |
922 | /// |
923 | /// Args: |
924 | /// * scope: A Scope object |
925 | /// * input: The input to be upper-cased. |
926 | /// |
927 | /// Optional attributes (see `Attrs`): |
928 | /// * encoding: Character encoding of `input`. Allowed values are '' and 'utf-8'. |
929 | /// Value '' is interpreted as ASCII. |
930 | /// |
931 | /// Returns: |
932 | /// * `Output`: The output tensor. |
933 | class StringUpper { |
934 | public: |
935 | /// Optional attribute setters for StringUpper |
936 | struct Attrs { |
937 | /// Character encoding of `input`. Allowed values are '' and 'utf-8'. |
938 | /// Value '' is interpreted as ASCII. |
939 | /// |
940 | /// Defaults to "" |
941 | TF_MUST_USE_RESULT Attrs Encoding(StringPiece x) { |
942 | Attrs ret = *this; |
943 | ret.encoding_ = x; |
944 | return ret; |
945 | } |
946 | |
947 | StringPiece encoding_ = "" ; |
948 | }; |
949 | StringUpper(const ::tensorflow::Scope& scope, ::tensorflow::Input input); |
950 | StringUpper(const ::tensorflow::Scope& scope, ::tensorflow::Input input, const |
951 | StringUpper::Attrs& attrs); |
952 | operator ::tensorflow::Output() const { return output; } |
953 | operator ::tensorflow::Input() const { return output; } |
954 | ::tensorflow::Node* node() const { return output.node(); } |
955 | |
956 | static Attrs Encoding(StringPiece x) { |
957 | return Attrs().Encoding(x); |
958 | } |
959 | |
960 | Operation operation; |
961 | ::tensorflow::Output output; |
962 | }; |
963 | |
964 | /// Return substrings from `Tensor` of strings. |
965 | /// |
966 | /// For each string in the input `Tensor`, creates a substring starting at index |
967 | /// `pos` with a total length of `len`. |
968 | /// |
969 | /// If `len` defines a substring that would extend beyond the length of the input |
970 | /// string, or if `len` is negative, then as many characters as possible are used. |
971 | /// |
972 | /// A negative `pos` indicates distance within the string backwards from the end. |
973 | /// |
974 | /// If `pos` specifies an index which is out of range for any of the input strings, |
975 | /// then an `InvalidArgumentError` is thrown. |
976 | /// |
977 | /// `pos` and `len` must have the same shape, otherwise a `ValueError` is thrown on |
978 | /// Op creation. |
979 | /// |
980 | /// *NOTE*: `Substr` supports broadcasting up to two dimensions. More about |
981 | /// broadcasting |
982 | /// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) |
983 | /// |
984 | /// --- |
985 | /// |
986 | /// Examples |
987 | /// |
988 | /// Using scalar `pos` and `len`: |
989 | /// |
990 | /// ```python |
991 | /// input = [b'Hello', b'World'] |
992 | /// position = 1 |
993 | /// length = 3 |
994 | /// |
995 | /// output = [b'ell', b'orl'] |
996 | /// ``` |
997 | /// |
998 | /// Using `pos` and `len` with same shape as `input`: |
999 | /// |
1000 | /// ```python |
1001 | /// input = [[b'ten', b'eleven', b'twelve'], |
1002 | /// [b'thirteen', b'fourteen', b'fifteen'], |
1003 | /// [b'sixteen', b'seventeen', b'eighteen']] |
1004 | /// position = [[1, 2, 3], |
1005 | /// [1, 2, 3], |
1006 | /// [1, 2, 3]] |
1007 | /// length = [[2, 3, 4], |
1008 | /// [4, 3, 2], |
1009 | /// [5, 5, 5]] |
1010 | /// |
1011 | /// output = [[b'en', b'eve', b'lve'], |
1012 | /// [b'hirt', b'urt', b'te'], |
1013 | /// [b'ixtee', b'vente', b'hteen']] |
1014 | /// ``` |
1015 | /// |
1016 | /// Broadcasting `pos` and `len` onto `input`: |
1017 | /// |
1018 | /// ``` |
1019 | /// input = [[b'ten', b'eleven', b'twelve'], |
1020 | /// [b'thirteen', b'fourteen', b'fifteen'], |
1021 | /// [b'sixteen', b'seventeen', b'eighteen'], |
1022 | /// [b'nineteen', b'twenty', b'twentyone']] |
1023 | /// position = [1, 2, 3] |
1024 | /// length = [1, 2, 3] |
1025 | /// |
1026 | /// output = [[b'e', b'ev', b'lve'], |
1027 | /// [b'h', b'ur', b'tee'], |
1028 | /// [b'i', b've', b'hte'], |
1029 | /// [b'i', b'en', b'nty']] |
1030 | /// ``` |
1031 | /// |
1032 | /// Broadcasting `input` onto `pos` and `len`: |
1033 | /// |
1034 | /// ``` |
1035 | /// input = b'thirteen' |
1036 | /// position = [1, 5, 7] |
1037 | /// length = [3, 2, 1] |
1038 | /// |
1039 | /// output = [b'hir', b'ee', b'n'] |
1040 | /// ``` |
1041 | /// |
1042 | /// Raises: |
1043 | /// |
1044 | /// * `ValueError`: If the first argument cannot be converted to a |
1045 | /// Tensor of `dtype string`. |
1046 | /// * `InvalidArgumentError`: If indices are out of range. |
1047 | /// * `ValueError`: If `pos` and `len` are not the same shape. |
1048 | /// |
1049 | /// |
1050 | /// Args: |
1051 | /// * scope: A Scope object |
1052 | /// * input: Tensor of strings |
1053 | /// * pos: Scalar defining the position of first character in each substring |
1054 | /// * len: Scalar defining the number of characters to include in each substring |
1055 | /// |
1056 | /// Optional attributes (see `Attrs`): |
1057 | /// * unit: The unit that is used to create the substring. One of: `"BYTE"` (for |
1058 | /// defining position and length by bytes) or `"UTF8_CHAR"` (for the UTF-8 |
1059 | /// encoded Unicode code points). The default is `"BYTE"`. Results are undefined if |
1060 | /// `unit=UTF8_CHAR` and the `input` strings do not contain structurally valid |
1061 | /// UTF-8. |
1062 | /// |
1063 | /// Returns: |
1064 | /// * `Output`: Tensor of substrings |
1065 | class Substr { |
1066 | public: |
1067 | /// Optional attribute setters for Substr |
1068 | struct Attrs { |
1069 | /// The unit that is used to create the substring. One of: `"BYTE"` (for |
1070 | /// defining position and length by bytes) or `"UTF8_CHAR"` (for the UTF-8 |
1071 | /// encoded Unicode code points). The default is `"BYTE"`. Results are undefined if |
1072 | /// `unit=UTF8_CHAR` and the `input` strings do not contain structurally valid |
1073 | /// UTF-8. |
1074 | /// |
1075 | /// Defaults to "BYTE" |
1076 | TF_MUST_USE_RESULT Attrs Unit(StringPiece x) { |
1077 | Attrs ret = *this; |
1078 | ret.unit_ = x; |
1079 | return ret; |
1080 | } |
1081 | |
1082 | StringPiece unit_ = "BYTE" ; |
1083 | }; |
1084 | Substr(const ::tensorflow::Scope& scope, ::tensorflow::Input input, |
1085 | ::tensorflow::Input pos, ::tensorflow::Input len); |
1086 | Substr(const ::tensorflow::Scope& scope, ::tensorflow::Input input, |
1087 | ::tensorflow::Input pos, ::tensorflow::Input len, const Substr::Attrs& |
1088 | attrs); |
1089 | operator ::tensorflow::Output() const { return output; } |
1090 | operator ::tensorflow::Input() const { return output; } |
1091 | ::tensorflow::Node* node() const { return output.node(); } |
1092 | |
1093 | static Attrs Unit(StringPiece x) { |
1094 | return Attrs().Unit(x); |
1095 | } |
1096 | |
1097 | Operation operation; |
1098 | ::tensorflow::Output output; |
1099 | }; |
1100 | |
1101 | /// Determine the script codes of a given tensor of Unicode integer code points. |
1102 | /// |
1103 | /// This operation converts Unicode code points to script codes corresponding to |
1104 | /// each code point. Script codes correspond to International Components for |
1105 | /// Unicode (ICU) UScriptCode values. |
1106 | /// |
1107 | /// See |
1108 | /// [ICU project docs](http://icu-project.org/apiref/icu4c/uscript_8h.html) |
1109 | /// for more details on script codes. |
1110 | /// |
1111 | /// For an example, see the unicode strings guide on [unicode scripts] |
1112 | /// (https://www.tensorflow.org/tutorials/load_data/unicode#representing_unicode). |
1113 | /// |
1114 | /// Returns -1 (USCRIPT_INVALID_CODE) for invalid codepoints. Output shape will |
1115 | /// match input shape. |
1116 | /// |
1117 | /// Examples: |
1118 | /// |
1119 | /// >>> tf.strings.unicode_script([1, 31, 38]) |
1120 | /// <tf.Tensor: shape=(3,), dtype=int32, numpy=array([0, 0, 0], dtype=int32)> |
1121 | /// |
1122 | /// Args: |
1123 | /// * scope: A Scope object |
1124 | /// * input: A Tensor of int32 Unicode code points. |
1125 | /// |
1126 | /// Returns: |
1127 | /// * `Output`: A Tensor of int32 script codes corresponding to each input code point. |
1128 | class UnicodeScript { |
1129 | public: |
1130 | UnicodeScript(const ::tensorflow::Scope& scope, ::tensorflow::Input input); |
1131 | operator ::tensorflow::Output() const { return output; } |
1132 | operator ::tensorflow::Input() const { return output; } |
1133 | ::tensorflow::Node* node() const { return output.node(); } |
1134 | |
1135 | Operation operation; |
1136 | ::tensorflow::Output output; |
1137 | }; |
1138 | |
1139 | /// Transcode the input text from a source encoding to a destination encoding. |
1140 | /// |
1141 | /// The input is a string tensor of any shape. The output is a string tensor of |
1142 | /// the same shape containing the transcoded strings. Output strings are always |
1143 | /// valid unicode. If the input contains invalid encoding positions, the |
1144 | /// `errors` attribute sets the policy for how to deal with them. If the default |
1145 | /// error-handling policy is used, invalid formatting will be substituted in the |
1146 | /// output by the `replacement_char`. If the errors policy is to `ignore`, any |
1147 | /// invalid encoding positions in the input are skipped and not included in the |
1148 | /// output. If it set to `strict` then any invalid formatting will result in an |
1149 | /// InvalidArgument error. |
1150 | /// |
1151 | /// This operation can be used with `output_encoding = input_encoding` to enforce |
1152 | /// correct formatting for inputs even if they are already in the desired encoding. |
1153 | /// |
1154 | /// If the input is prefixed by a Byte Order Mark needed to determine encoding |
1155 | /// (e.g. if the encoding is UTF-16 and the BOM indicates big-endian), then that |
1156 | /// BOM will be consumed and not emitted into the output. If the input encoding |
1157 | /// is marked with an explicit endianness (e.g. UTF-16-BE), then the BOM is |
1158 | /// interpreted as a non-breaking-space and is preserved in the output (including |
1159 | /// always for UTF-8). |
1160 | /// |
1161 | /// The end result is that if the input is marked as an explicit endianness the |
1162 | /// transcoding is faithful to all codepoints in the source. If it is not marked |
1163 | /// with an explicit endianness, the BOM is not considered part of the string itself |
1164 | /// but as metadata, and so is not preserved in the output. |
1165 | /// |
1166 | /// Examples: |
1167 | /// |
1168 | /// >>> tf.strings.unicode_transcode(["Hello", "TensorFlow", "2.x"], "UTF-8", "UTF-16-BE") |
1169 | /// <tf.Tensor: shape=(3,), dtype=string, numpy= |
1170 | /// array([b'\x00H\x00e\x00l\x00l\x00o', |
1171 | /// b'\x00T\x00e\x00n\x00s\x00o\x00r\x00F\x00l\x00o\x00w', |
1172 | /// b'\x002\x00.\x00x'], dtype=object)> |
1173 | /// >>> tf.strings.unicode_transcode(["A", "B", "C"], "US ASCII", "UTF-8").numpy() |
1174 | /// array([b'A', b'B', b'C'], dtype=object) |
1175 | /// |
1176 | /// Args: |
1177 | /// * scope: A Scope object |
1178 | /// * input: The text to be processed. Can have any shape. |
1179 | /// * input_encoding: Text encoding of the input strings. This is any of the encodings supported |
1180 | /// by ICU ucnv algorithmic converters. Examples: `"UTF-16", "US ASCII", "UTF-8"`. |
1181 | /// * output_encoding: The unicode encoding to use in the output. Must be one of |
1182 | /// `"UTF-8", "UTF-16-BE", "UTF-32-BE"`. Multi-byte encodings will be big-endian. |
1183 | /// |
1184 | /// Optional attributes (see `Attrs`): |
1185 | /// * errors: Error handling policy when there is invalid formatting found in the input. |
1186 | /// The value of 'strict' will cause the operation to produce a InvalidArgument |
1187 | /// error on any invalid input formatting. A value of 'replace' (the default) will |
1188 | /// cause the operation to replace any invalid formatting in the input with the |
1189 | /// `replacement_char` codepoint. A value of 'ignore' will cause the operation to |
1190 | /// skip any invalid formatting in the input and produce no corresponding output |
1191 | /// character. |
1192 | /// * replacement_char: The replacement character codepoint to be used in place of any invalid |
1193 | /// formatting in the input when `errors='replace'`. Any valid unicode codepoint may |
1194 | /// be used. The default value is the default unicode replacement character is |
1195 | /// 0xFFFD or U+65533.) |
1196 | /// |
1197 | /// Note that for UTF-8, passing a replacement character expressible in 1 byte, such |
1198 | /// as ' ', will preserve string alignment to the source since invalid bytes will be |
1199 | /// replaced with a 1-byte replacement. For UTF-16-BE and UTF-16-LE, any 1 or 2 byte |
1200 | /// replacement character will preserve byte alignment to the source. |
1201 | /// * replace_control_characters: Whether to replace the C0 control characters (00-1F) with the |
1202 | /// `replacement_char`. Default is false. |
1203 | /// |
1204 | /// Returns: |
1205 | /// * `Output`: A string tensor containing unicode text encoded using `output_encoding`. |
1206 | class UnicodeTranscode { |
1207 | public: |
1208 | /// Optional attribute setters for UnicodeTranscode |
1209 | struct Attrs { |
1210 | /// Error handling policy when there is invalid formatting found in the input. |
1211 | /// The value of 'strict' will cause the operation to produce a InvalidArgument |
1212 | /// error on any invalid input formatting. A value of 'replace' (the default) will |
1213 | /// cause the operation to replace any invalid formatting in the input with the |
1214 | /// `replacement_char` codepoint. A value of 'ignore' will cause the operation to |
1215 | /// skip any invalid formatting in the input and produce no corresponding output |
1216 | /// character. |
1217 | /// |
1218 | /// Defaults to "replace" |
1219 | TF_MUST_USE_RESULT Attrs Errors(StringPiece x) { |
1220 | Attrs ret = *this; |
1221 | ret.errors_ = x; |
1222 | return ret; |
1223 | } |
1224 | |
1225 | /// The replacement character codepoint to be used in place of any invalid |
1226 | /// formatting in the input when `errors='replace'`. Any valid unicode codepoint may |
1227 | /// be used. The default value is the default unicode replacement character is |
1228 | /// 0xFFFD or U+65533.) |
1229 | /// |
1230 | /// Note that for UTF-8, passing a replacement character expressible in 1 byte, such |
1231 | /// as ' ', will preserve string alignment to the source since invalid bytes will be |
1232 | /// replaced with a 1-byte replacement. For UTF-16-BE and UTF-16-LE, any 1 or 2 byte |
1233 | /// replacement character will preserve byte alignment to the source. |
1234 | /// |
1235 | /// Defaults to 65533 |
1236 | TF_MUST_USE_RESULT Attrs ReplacementChar(int64 x) { |
1237 | Attrs ret = *this; |
1238 | ret.replacement_char_ = x; |
1239 | return ret; |
1240 | } |
1241 | |
1242 | /// Whether to replace the C0 control characters (00-1F) with the |
1243 | /// `replacement_char`. Default is false. |
1244 | /// |
1245 | /// Defaults to false |
1246 | TF_MUST_USE_RESULT Attrs ReplaceControlCharacters(bool x) { |
1247 | Attrs ret = *this; |
1248 | ret.replace_control_characters_ = x; |
1249 | return ret; |
1250 | } |
1251 | |
1252 | StringPiece errors_ = "replace" ; |
1253 | int64 replacement_char_ = 65533; |
1254 | bool replace_control_characters_ = false; |
1255 | }; |
1256 | UnicodeTranscode(const ::tensorflow::Scope& scope, ::tensorflow::Input input, |
1257 | StringPiece input_encoding, StringPiece output_encoding); |
1258 | UnicodeTranscode(const ::tensorflow::Scope& scope, ::tensorflow::Input input, |
1259 | StringPiece input_encoding, StringPiece output_encoding, const |
1260 | UnicodeTranscode::Attrs& attrs); |
1261 | operator ::tensorflow::Output() const { return output; } |
1262 | operator ::tensorflow::Input() const { return output; } |
1263 | ::tensorflow::Node* node() const { return output.node(); } |
1264 | |
1265 | static Attrs Errors(StringPiece x) { |
1266 | return Attrs().Errors(x); |
1267 | } |
1268 | static Attrs ReplacementChar(int64 x) { |
1269 | return Attrs().ReplacementChar(x); |
1270 | } |
1271 | static Attrs ReplaceControlCharacters(bool x) { |
1272 | return Attrs().ReplaceControlCharacters(x); |
1273 | } |
1274 | |
1275 | Operation operation; |
1276 | ::tensorflow::Output output; |
1277 | }; |
1278 | |
1279 | /// @} |
1280 | |
1281 | } // namespace ops |
1282 | } // namespace tensorflow |
1283 | |
1284 | #endif // TENSORFLOW_CC_OPS_STRING_OPS_H_ |
1285 | |