string_ops_internal.h source code [tensorflow/tensorflow/cc/ops/string_ops_internal.h]

1	// This file is MACHINE GENERATED! Do not edit.
2
3	#ifndef TENSORFLOW_CC_OPS_STRING_OPS_INTERNAL_H_
4	#define TENSORFLOW_CC_OPS_STRING_OPS_INTERNAL_H_
5
6	// This file is MACHINE GENERATED! Do not edit.
7
8	#include "tensorflow/cc/framework/ops.h"
9	#include "tensorflow/cc/framework/scope.h"
10	#include "tensorflow/core/framework/tensor.h"
11	#include "tensorflow/core/framework/tensor_shape.h"
12	#include "tensorflow/core/framework/types.h"
13	#include "tensorflow/core/lib/gtl/array_slice.h"
14
15	namespace tensorflow {
16	namespace ops {
17	namespace internal {
18	// NOTE: This namespace has internal TensorFlow details that
19	// are not part of TensorFlow's public API.
20
21	/// @defgroup string_ops_internal String Ops Internal
22	/// @{
23
24	/// Check if the input matches the regex pattern.
25	///
26	/// The input is a string tensor of any shape. The pattern is the
27	/// regular expression to be matched with every element of the input tensor.
28	/// The boolean values (True or False) of the output tensor indicate
29	/// if the input matches the regex pattern provided.
30	///
31	/// The pattern follows the re2 syntax (https://github.com/google/re2/wiki/Syntax)
32	///
33	/// Args:
34	/// scope: A Scope object*
35	/// input: A string tensor of the text to be processed.*
36	/// pattern: The regular expression to match the input.*
37	///
38	/// Returns:
39	/// `Output`: A bool tensor with the same shape as `input`.*
40	class StaticRegexFullMatch {
41	public:
42	StaticRegexFullMatch(const ::tensorflow::Scope& scope, ::tensorflow::Input
43	input, StringPiece pattern);
44	operator ::tensorflow::Output() const { return output; }
45	operator ::tensorflow::Input() const { return output; }
46	::tensorflow::Node* node() const { return output.node(); }
47
48	Operation operation;
49	::tensorflow::Output output;
50	};
51
52	/// Replaces the match of pattern in input with rewrite.
53	///
54	/// It follows the re2 syntax (https://github.com/google/re2/wiki/Syntax)
55	///
56	/// Args:
57	/// scope: A Scope object*
58	/// input: The text to be processed.*
59	/// pattern: The regular expression to match the input.*
60	/// rewrite: The rewrite to be applied to the matched expression.*
61	///
62	/// Optional attributes (see `Attrs`):
63	/// replace_global: If True, the replacement is global, otherwise the replacement*
64	/// is done only on the first match.
65	///
66	/// Returns:
67	/// `Output`: The text after applying pattern and rewrite.*
68	class StaticRegexReplace {
69	public:
70	/// Optional attribute setters for StaticRegexReplace
71	struct Attrs {
72	/// If True, the replacement is global, otherwise the replacement
73	/// is done only on the first match.
74	///
75	/// Defaults to true
76	TF_MUST_USE_RESULT Attrs ReplaceGlobal(bool x) {
77	Attrs ret = *this;
78	ret.replace_global_ = x;
79	return ret;
80	}
81
82	bool replace_global_ = true;
83	};
84	StaticRegexReplace(const ::tensorflow::Scope& scope, ::tensorflow::Input input,
85	StringPiece pattern, StringPiece rewrite);
86	StaticRegexReplace(const ::tensorflow::Scope& scope, ::tensorflow::Input input,
87	StringPiece pattern, StringPiece rewrite, const
88	StaticRegexReplace::Attrs& attrs);
89	operator ::tensorflow::Output() const { return output; }
90	operator ::tensorflow::Input() const { return output; }
91	::tensorflow::Node* node() const { return output.node(); }
92
93	static Attrs ReplaceGlobal(bool x) {
94	return Attrs ().ReplaceGlobal(x);
95	}
96
97	Operation operation;
98	::tensorflow::Output output;
99	};
100
101	/// Decodes each string in `input` into a sequence of Unicode code points.
102	///
103	/// The character codepoints for all strings are returned using a single vector
104	/// `char_values`, with strings expanded to characters in row-major order.
105	///
106	/// The `row_splits` tensor indicates where the codepoints for
107	/// each input string begin and end within the `char_values` tensor.
108	/// In particular, the values for the `i`th
109	/// string (in row-major order) are stored in the slice
110	/// `[row_splits[i]:row_splits[i+1]]`. Thus:
111	///
112	/// `char_values[row_splits[i]+j]` is the Unicode codepoint for the `j`th*
113	/// character in the `i`th string (in row-major order).
114	/// `row_splits[i+1] - row_splits[i]` is the number of characters in the `i`th*
115	/// string (in row-major order).
116	///
117	/// Args:
118	/// scope: A Scope object*
119	/// input: The text to be decoded. Can have any shape. Note that the output is flattened*
120	/// to a vector of char values.
121	/// input_encoding: Text encoding of the input strings. This is any of the encodings supported*
122	/// by ICU ucnv algorithmic converters. Examples: `"UTF-16", "US ASCII", "UTF-8"`.
123	///
124	/// Optional attributes (see `Attrs`):
125	/// errors: Error handling policy when there is invalid formatting found in the input.*
126	/// The value of 'strict' will cause the operation to produce a InvalidArgument
127	/// error on any invalid input formatting. A value of 'replace' (the default) will
128	/// cause the operation to replace any invalid formatting in the input with the
129	/// `replacement_char` codepoint. A value of 'ignore' will cause the operation to
130	/// skip any invalid formatting in the input and produce no corresponding output
131	/// character.
132	/// replacement_char: The replacement character codepoint to be used in place of any invalid*
133	/// formatting in the input when `errors='replace'`. Any valid unicode codepoint may
134	/// be used. The default value is the default unicode replacement character is
135	/// 0xFFFD or U+65533.)
136	/// replace_control_characters: Whether to replace the C0 control characters (00-1F) with the*
137	/// `replacement_char`. Default is false.
138	///
139	/// Returns:
140	/// `Output` row_splits: A 1D int32 tensor containing the row splits.*
141	/// `Output` char_values: A 1D int32 Tensor containing the decoded codepoints.*
142	class UnicodeDecode {
143	public:
144	/// Optional attribute setters for UnicodeDecode
145	struct Attrs {
146	/// Error handling policy when there is invalid formatting found in the input.
147	/// The value of 'strict' will cause the operation to produce a InvalidArgument
148	/// error on any invalid input formatting. A value of 'replace' (the default) will
149	/// cause the operation to replace any invalid formatting in the input with the
150	/// `replacement_char` codepoint. A value of 'ignore' will cause the operation to
151	/// skip any invalid formatting in the input and produce no corresponding output
152	/// character.
153	///
154	/// Defaults to "replace"
155	TF_MUST_USE_RESULT Attrs Errors(StringPiece x) {
156	Attrs ret = *this;
157	ret.errors_ = x;
158	return ret;
159	}
160
161	/// The replacement character codepoint to be used in place of any invalid
162	/// formatting in the input when `errors='replace'`. Any valid unicode codepoint may
163	/// be used. The default value is the default unicode replacement character is
164	/// 0xFFFD or U+65533.)
165	///
166	/// Defaults to 65533
167	TF_MUST_USE_RESULT Attrs ReplacementChar(int64 x) {
168	Attrs ret = *this;
169	ret.replacement_char_ = x;
170	return ret;
171	}
172
173	/// Whether to replace the C0 control characters (00-1F) with the
174	/// `replacement_char`. Default is false.
175	///
176	/// Defaults to false
177	TF_MUST_USE_RESULT Attrs ReplaceControlCharacters(bool x) {
178	Attrs ret = *this;
179	ret.replace_control_characters_ = x;
180	return ret;
181	}
182
183	/// Defaults to DT_INT64
184	TF_MUST_USE_RESULT Attrs Tsplits(DataType x) {
185	Attrs ret = *this;
186	ret.Tsplits_ = x;
187	return ret;
188	}
189
190	StringPiece errors_ = "replace";
191	int64 replacement_char_ = `65533`;
192	bool replace_control_characters_ = false;
193	DataType Tsplits_ = DT_INT64;
194	};
195	UnicodeDecode(const ::tensorflow::Scope& scope, ::tensorflow::Input input,
196	StringPiece input_encoding);
197	UnicodeDecode(const ::tensorflow::Scope& scope, ::tensorflow::Input input,
198	StringPiece input_encoding, const UnicodeDecode::Attrs& attrs);
199
200	static Attrs Errors(StringPiece x) {
201	return Attrs ().Errors(x);
202	}
203	static Attrs ReplacementChar(int64 x) {
204	return Attrs ().ReplacementChar(x);
205	}
206	static Attrs ReplaceControlCharacters(bool x) {
207	return Attrs ().ReplaceControlCharacters(x);
208	}
209	static Attrs Tsplits(DataType x) {
210	return Attrs ().Tsplits(x);
211	}
212
213	Operation operation;
214	::tensorflow::Output row_splits;
215	::tensorflow::Output char_values;
216	};
217
218	/// Decodes each string in `input` into a sequence of Unicode code points.
219	///
220	/// The character codepoints for all strings are returned using a single vector
221	/// `char_values`, with strings expanded to characters in row-major order.
222	/// Similarly, the character start byte offsets are returned using a single vector
223	/// `char_to_byte_starts`, with strings expanded in row-major order.
224	///
225	/// The `row_splits` tensor indicates where the codepoints and start offsets for
226	/// each input string begin and end within the `char_values` and
227	/// `char_to_byte_starts` tensors. In particular, the values for the `i`th
228	/// string (in row-major order) are stored in the slice
229	/// `[row_splits[i]:row_splits[i+1]]`. Thus:
230	///
231	/// `char_values[row_splits[i]+j]` is the Unicode codepoint for the `j`th*
232	/// character in the `i`th string (in row-major order).
233	/// `char_to_bytes_starts[row_splits[i]+j]` is the start byte offset for the `j`th*
234	/// character in the `i`th string (in row-major order).
235	/// `row_splits[i+1] - row_splits[i]` is the number of characters in the `i`th*
236	/// string (in row-major order).
237	///
238	/// Args:
239	/// scope: A Scope object*
240	/// input: The text to be decoded. Can have any shape. Note that the output is flattened*
241	/// to a vector of char values.
242	/// input_encoding: Text encoding of the input strings. This is any of the encodings supported*
243	/// by ICU ucnv algorithmic converters. Examples: `"UTF-16", "US ASCII", "UTF-8"`.
244	///
245	/// Optional attributes (see `Attrs`):
246	/// errors: Error handling policy when there is invalid formatting found in the input.*
247	/// The value of 'strict' will cause the operation to produce a InvalidArgument
248	/// error on any invalid input formatting. A value of 'replace' (the default) will
249	/// cause the operation to replace any invalid formatting in the input with the
250	/// `replacement_char` codepoint. A value of 'ignore' will cause the operation to
251	/// skip any invalid formatting in the input and produce no corresponding output
252	/// character.
253	/// replacement_char: The replacement character codepoint to be used in place of any invalid*
254	/// formatting in the input when `errors='replace'`. Any valid unicode codepoint may
255	/// be used. The default value is the default unicode replacement character is
256	/// 0xFFFD or U+65533.)
257	/// replace_control_characters: Whether to replace the C0 control characters (00-1F) with the*
258	/// `replacement_char`. Default is false.
259	///
260	/// Returns:
261	/// `Output` row_splits: A 1D int32 tensor containing the row splits.*
262	/// `Output` char_values: A 1D int32 Tensor containing the decoded codepoints.*
263	/// `Output` char_to_byte_starts: A 1D int32 Tensor containing the byte index in the input string where each*
264	/// character in `char_values` starts.
265	class UnicodeDecodeWithOffsets {
266	public:
267	/// Optional attribute setters for UnicodeDecodeWithOffsets
268	struct Attrs {
269	/// Error handling policy when there is invalid formatting found in the input.
270	/// The value of 'strict' will cause the operation to produce a InvalidArgument
271	/// error on any invalid input formatting. A value of 'replace' (the default) will
272	/// cause the operation to replace any invalid formatting in the input with the
273	/// `replacement_char` codepoint. A value of 'ignore' will cause the operation to
274	/// skip any invalid formatting in the input and produce no corresponding output
275	/// character.
276	///
277	/// Defaults to "replace"
278	TF_MUST_USE_RESULT Attrs Errors(StringPiece x) {
279	Attrs ret = *this;
280	ret.errors_ = x;
281	return ret;
282	}
283
284	/// The replacement character codepoint to be used in place of any invalid
285	/// formatting in the input when `errors='replace'`. Any valid unicode codepoint may
286	/// be used. The default value is the default unicode replacement character is
287	/// 0xFFFD or U+65533.)
288	///
289	/// Defaults to 65533
290	TF_MUST_USE_RESULT Attrs ReplacementChar(int64 x) {
291	Attrs ret = *this;
292	ret.replacement_char_ = x;
293	return ret;
294	}
295
296	/// Whether to replace the C0 control characters (00-1F) with the
297	/// `replacement_char`. Default is false.
298	///
299	/// Defaults to false
300	TF_MUST_USE_RESULT Attrs ReplaceControlCharacters(bool x) {
301	Attrs ret = *this;
302	ret.replace_control_characters_ = x;
303	return ret;
304	}
305
306	/// Defaults to DT_INT64
307	TF_MUST_USE_RESULT Attrs Tsplits(DataType x) {
308	Attrs ret = *this;
309	ret.Tsplits_ = x;
310	return ret;
311	}
312
313	StringPiece errors_ = "replace";
314	int64 replacement_char_ = `65533`;
315	bool replace_control_characters_ = false;
316	DataType Tsplits_ = DT_INT64;
317	};
318	UnicodeDecodeWithOffsets(const ::tensorflow::Scope& scope, ::tensorflow::Input
319	input, StringPiece input_encoding);
320	UnicodeDecodeWithOffsets(const ::tensorflow::Scope& scope, ::tensorflow::Input
321	input, StringPiece input_encoding, const
322	UnicodeDecodeWithOffsets::Attrs& attrs);
323
324	static Attrs Errors(StringPiece x) {
325	return Attrs ().Errors(x);
326	}
327	static Attrs ReplacementChar(int64 x) {
328	return Attrs ().ReplacementChar(x);
329	}
330	static Attrs ReplaceControlCharacters(bool x) {
331	return Attrs ().ReplaceControlCharacters(x);
332	}
333	static Attrs Tsplits(DataType x) {
334	return Attrs ().Tsplits(x);
335	}
336
337	Operation operation;
338	::tensorflow::Output row_splits;
339	::tensorflow::Output char_values;
340	::tensorflow::Output char_to_byte_starts;
341	};
342
343	/// Encode a tensor of ints into unicode strings.
344	///
345	/// Returns a vector of strings, where `output[i]` is constructed by encoding the
346	/// Unicode codepoints in `input_values[input_splits[i]:input_splits[i+1]]`
347	/// using `output_encoding`.
348	///
349	/// ---
350	///
351	/// Example:
352	///
353	/// ```
354	/// input_values = [72, 101, 108, 108, 111, 87, 111, 114, 108, 100]
355	/// input_splits = [0, 5, 10]
356	/// output_encoding = 'UTF-8'
357	///
358	/// output = ['Hello', 'World']
359	/// ```
360	///
361	/// Args:
362	/// scope: A Scope object*
363	/// input_values: A 1D tensor containing the unicode codepoints that should be encoded.*
364	/// input_splits: A 1D tensor specifying how the unicode codepoints should be split into strings.*
365	/// In particular, `output[i]` is constructed by encoding the codepoints in the
366	/// slice `input_values[input_splits[i]:input_splits[i+1]]`.
367	/// output_encoding: Unicode encoding of the output strings. Valid encodings are: `"UTF-8",*
368	/// "UTF-16-BE", and "UTF-32-BE"`.
369	///
370	/// Optional attributes (see `Attrs`):
371	/// errors: Error handling policy when there is invalid formatting found in the input.*
372	/// The value of 'strict' will cause the operation to produce a InvalidArgument
373	/// error on any invalid input formatting. A value of 'replace' (the default) will
374	/// cause the operation to replace any invalid formatting in the input with the
375	/// `replacement_char` codepoint. A value of 'ignore' will cause the operation to
376	/// skip any invalid formatting in the input and produce no corresponding output
377	/// character.
378	/// replacement_char: The replacement character codepoint to be used in place of any invalid*
379	/// formatting in the input when `errors='replace'`. Any valid unicode codepoint may
380	/// be used. The default value is the default unicode replacement character is
381	/// 0xFFFD (U+65533).
382	///
383	/// Returns:
384	/// `Output`: The 1-D Tensor of strings encoded from the provided unicode codepoints.*
385	class UnicodeEncode {
386	public:
387	/// Optional attribute setters for UnicodeEncode
388	struct Attrs {
389	/// Error handling policy when there is invalid formatting found in the input.
390	/// The value of 'strict' will cause the operation to produce a InvalidArgument
391	/// error on any invalid input formatting. A value of 'replace' (the default) will
392	/// cause the operation to replace any invalid formatting in the input with the
393	/// `replacement_char` codepoint. A value of 'ignore' will cause the operation to
394	/// skip any invalid formatting in the input and produce no corresponding output
395	/// character.
396	///
397	/// Defaults to "replace"
398	TF_MUST_USE_RESULT Attrs Errors(StringPiece x) {
399	Attrs ret = *this;
400	ret.errors_ = x;
401	return ret;
402	}
403
404	/// The replacement character codepoint to be used in place of any invalid
405	/// formatting in the input when `errors='replace'`. Any valid unicode codepoint may
406	/// be used. The default value is the default unicode replacement character is
407	/// 0xFFFD (U+65533).
408	///
409	/// Defaults to 65533
410	TF_MUST_USE_RESULT Attrs ReplacementChar(int64 x) {
411	Attrs ret = *this;
412	ret.replacement_char_ = x;
413	return ret;
414	}
415
416	StringPiece errors_ = "replace";
417	int64 replacement_char_ = `65533`;
418	};
419	UnicodeEncode(const ::tensorflow::Scope& scope, ::tensorflow::Input
420	input_values, ::tensorflow::Input input_splits, StringPiece
421	output_encoding);
422	UnicodeEncode(const ::tensorflow::Scope& scope, ::tensorflow::Input
423	input_values, ::tensorflow::Input input_splits, StringPiece
424	output_encoding, const UnicodeEncode::Attrs& attrs);
425	operator ::tensorflow::Output() const { return output; }
426	operator ::tensorflow::Input() const { return output; }
427	::tensorflow::Node* node() const { return output.node(); }
428
429	static Attrs Errors(StringPiece x) {
430	return Attrs ().Errors(x);
431	}
432	static Attrs ReplacementChar(int64 x) {
433	return Attrs ().ReplacementChar(x);
434	}
435
436	Operation operation;
437	::tensorflow::Output output;
438	};
439
440	/// TODO: add doc.
441	///
442	/// Args:
443	/// scope: A Scope object*
444	///
445	/// Returns:
446	/// `Output`: The output tensor.*
447	class UnsortedSegmentJoin {
448	public:
449	/// Optional attribute setters for UnsortedSegmentJoin
450	struct Attrs {
451	/// Defaults to ""
452	TF_MUST_USE_RESULT Attrs Separator(StringPiece x) {
453	Attrs ret = *this;
454	ret.separator_ = x;
455	return ret;
456	}
457
458	StringPiece separator_ = "";
459	};
460	UnsortedSegmentJoin(const ::tensorflow::Scope& scope, ::tensorflow::Input
461	inputs, ::tensorflow::Input segment_ids,
462	::tensorflow::Input num_segments);
463	UnsortedSegmentJoin(const ::tensorflow::Scope& scope, ::tensorflow::Input
464	inputs, ::tensorflow::Input segment_ids,
465	::tensorflow::Input num_segments, const
466	UnsortedSegmentJoin::Attrs& attrs);
467	operator ::tensorflow::Output() const { return output; }
468	operator ::tensorflow::Input() const { return output; }
469	::tensorflow::Node* node() const { return output.node(); }
470
471	static Attrs Separator(StringPiece x) {
472	return Attrs ().Separator(x);
473	}
474
475	Operation operation;
476	::tensorflow::Output output;
477	};
478
479	} // namespace internal
480	} // namespace ops
481	} // namespace tensorflow
482
483	#endif // TENSORFLOW_CC_OPS_STRING_OPS_INTERNAL_H_
484

Browse the source code of tensorflow/tensorflow/cc/ops/string_ops_internal.h