1 | /* Copyright 2016 The TensorFlow Authors. All Rights Reserved. |
2 | |
3 | Licensed under the Apache License, Version 2.0 (the "License"); |
4 | you may not use this file except in compliance with the License. |
5 | You may obtain a copy of the License at |
6 | |
7 | http://www.apache.org/licenses/LICENSE-2.0 |
8 | |
9 | Unless required by applicable law or agreed to in writing, software |
10 | distributed under the License is distributed on an "AS IS" BASIS, |
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | See the License for the specific language governing permissions and |
13 | limitations under the License. |
14 | ==============================================================================*/ |
15 | |
16 | // A set of lightweight wrappers which simplify access to Feature protos. |
17 | // |
18 | // TensorFlow Example proto uses associative maps on top of oneof fields. |
19 | // SequenceExample proto uses associative map of FeatureList. |
20 | // So accessing feature values is not very convenient. |
21 | // |
22 | // For example, to read a first value of integer feature "tag": |
23 | // int id = example.features().feature().at("tag").int64_list().value(0); |
24 | // |
25 | // to add a value: |
26 | // auto features = example->mutable_features(); |
27 | // (*features->mutable_feature())["tag"].mutable_int64_list()->add_value(id); |
28 | // |
29 | // For float features you have to use float_list, for string - bytes_list. |
30 | // |
31 | // To do the same with this library: |
32 | // int id = GetFeatureValues<int64_t>("tag", example).Get(0); |
33 | // GetFeatureValues<int64_t>("tag", &example)->Add(id); |
34 | // |
35 | // Modification of bytes features is slightly different: |
36 | // auto tag = GetFeatureValues<std::string>("tag", &example); |
37 | // *tag->Add() = "lorem ipsum"; |
38 | // |
39 | // To copy multiple values into a feature: |
40 | // AppendFeatureValues({1,2,3}, "tag", &example); |
41 | // |
42 | // GetFeatureValues gives you access to underlying data - RepeatedField object |
43 | // (RepeatedPtrField for byte list). So refer to its documentation of |
44 | // RepeatedField for full list of supported methods. |
45 | // |
46 | // NOTE: Due to the nature of oneof proto fields setting a feature of one type |
47 | // automatically clears all values stored as another type with the same feature |
48 | // key. |
49 | // |
50 | // This library also has tools to work with SequenceExample protos. |
51 | // |
52 | // To get a value from SequenceExample.context: |
53 | // int id = GetFeatureValues<protobuf_int64>("tag", se.context()).Get(0); |
54 | // To add a value to the context: |
55 | // GetFeatureValues<protobuf_int64>("tag", se.mutable_context())->Add(42); |
56 | // |
57 | // To add values to feature_lists: |
58 | // AppendFeatureValues({4.0}, |
59 | // GetFeatureList("images", &se)->Add()); |
60 | // AppendFeatureValues({5.0, 3.0}, |
61 | // GetFeatureList("images", &se)->Add()); |
62 | // This will create a feature list keyed as "images" with two features: |
63 | // feature_lists { |
64 | // feature_list { |
65 | // key: "images" |
66 | // value { |
67 | // feature { float_list { value: [4.0] } } |
68 | // feature { float_list { value: [5.0, 3.0] } } |
69 | // } |
70 | // } |
71 | // } |
72 | // For string-valued features, note that the Append... and Set... functions |
73 | // support absl::string_view containers. This allows you to copy existing |
74 | // buffers into a Feature with only one copy: |
75 | // std::vector<absl::string_view> image; |
76 | // image.push_back(image_buffer); // No copy. |
77 | // SetFeatureValues(image, "image", &example); // Copy. |
78 | // |
79 | // Functions exposed by this library: |
80 | // HasFeature<[FeatureType]>(key, proto) -> bool |
81 | // Returns true if a feature with the specified key, and optionally |
82 | // FeatureType, belongs to the Features or Example proto. |
83 | // HasFeatureList(key, sequence_example) -> bool |
84 | // Returns true if SequenceExample has a feature_list with the key. |
85 | // |
86 | // GetFeatureValues<FeatureType>(key, proto) -> RepeatedField<FeatureType> |
87 | // Returns values for the specified key and the FeatureType. |
88 | // Supported types for the proto: Example, Features. |
89 | // GetFeatureList(key, sequence_example) -> RepeatedPtrField<Feature> |
90 | // Returns Feature protos associated with a key. |
91 | // |
92 | // AppendFeatureValues(begin, end, feature) |
93 | // AppendFeatureValues(container or initializer_list, feature) |
94 | // Copies values into a Feature. |
95 | // AppendFeatureValues(begin, end, key, proto) |
96 | // AppendFeatureValues(container or initializer_list, key, proto) |
97 | // Copies values into Features and Example protos with the specified key. |
98 | // |
99 | // ClearFeatureValues<FeatureType>(feature) |
100 | // Clears the feature's repeated field of the given type. |
101 | // |
102 | // SetFeatureValues(begin, end, feature) |
103 | // SetFeatureValues(container or initializer_list, feature) |
104 | // Clears a Feature, then copies values into it. |
105 | // SetFeatureValues(begin, end, key, proto) |
106 | // SetFeatureValues(container or initializer_list, key, proto) |
107 | // Clears Features or Example protos with the specified key, |
108 | // then copies values into them. |
109 | // |
110 | // Auxiliary functions, it is unlikely you'll need to use them directly: |
111 | // GetFeatures(proto) -> Features |
112 | // A convenience function to get Features proto. |
113 | // Supported types for the proto: Example, Features. |
114 | // GetFeature(key, proto) -> Feature |
115 | // Returns a Feature proto for the specified key. |
116 | // Supported types for the proto: Example, Features. |
117 | // GetFeatureValues<FeatureType>(feature) -> RepeatedField<FeatureType> |
118 | // Returns values of the feature for the FeatureType. |
119 | |
120 | #ifndef TENSORFLOW_CORE_EXAMPLE_FEATURE_UTIL_H_ |
121 | #define TENSORFLOW_CORE_EXAMPLE_FEATURE_UTIL_H_ |
122 | |
123 | #include <algorithm> |
124 | #include <iterator> |
125 | #include <string> |
126 | #include <type_traits> |
127 | #include <utility> |
128 | |
129 | #include "absl/strings/string_view.h" |
130 | #include "tensorflow/core/example/example.pb.h" |
131 | #include "tensorflow/core/example/feature.pb.h" |
132 | #include "tensorflow/core/platform/protobuf.h" |
133 | #include "tensorflow/core/platform/stringpiece.h" |
134 | |
135 | // Must come after the import for absl::string_view. |
136 | #ifdef ABSL_HAVE_STD_STRING_VIEW |
137 | #include <string_view> |
138 | #endif |
139 | |
140 | namespace tensorflow { |
141 | namespace internal { |
142 | |
143 | // TODO(gorban): Update all clients in a followup CL. |
144 | // Returns a reference to a feature corresponding to the name. |
145 | // Note: it will create a new Feature if it is missing in the example. |
146 | ABSL_DEPRECATED("Use GetFeature instead." ) |
147 | Feature& ExampleFeature(absl::string_view name, Example* example); |
148 | |
149 | // Specializations of RepeatedFieldTrait define a type of RepeatedField |
150 | // corresponding to a selected feature type. |
151 | template <typename FeatureType> |
152 | struct RepeatedFieldTrait; |
153 | |
154 | template <> |
155 | struct RepeatedFieldTrait<protobuf_int64> { |
156 | using Type = protobuf::RepeatedField<protobuf_int64>; |
157 | }; |
158 | |
159 | template <> |
160 | struct RepeatedFieldTrait<float> { |
161 | using Type = protobuf::RepeatedField<float>; |
162 | }; |
163 | |
164 | template <> |
165 | struct RepeatedFieldTrait<tstring> { |
166 | using Type = protobuf::RepeatedPtrField<std::string>; |
167 | }; |
168 | |
169 | template <> |
170 | struct RepeatedFieldTrait<std::string> { |
171 | using Type = protobuf::RepeatedPtrField<std::string>; |
172 | }; |
173 | |
174 | // Specializations of FeatureTrait define a type of feature corresponding to a |
175 | // selected value type. |
176 | template <typename ValueType, class Enable = void> |
177 | struct FeatureTrait; |
178 | |
179 | template <typename ValueType> |
180 | struct FeatureTrait<ValueType, typename std::enable_if< |
181 | std::is_integral<ValueType>::value>::type> { |
182 | using Type = protobuf_int64; |
183 | }; |
184 | |
185 | template <typename ValueType> |
186 | struct FeatureTrait< |
187 | ValueType, |
188 | typename std::enable_if<std::is_floating_point<ValueType>::value>::type> { |
189 | using Type = float; |
190 | }; |
191 | |
192 | template <typename T> |
193 | struct is_string |
194 | : public std::integral_constant< |
195 | bool, |
196 | std::is_same<char*, typename std::decay<T>::type>::value || |
197 | std::is_same<const char*, typename std::decay<T>::type>::value> { |
198 | }; |
199 | |
200 | template <> |
201 | struct is_string<std::string> : std::true_type {}; |
202 | |
203 | template <> |
204 | struct is_string<::tensorflow::StringPiece> : std::true_type {}; |
205 | |
206 | template <> |
207 | struct is_string<tstring> : std::true_type {}; |
208 | |
209 | template <typename ValueType> |
210 | struct FeatureTrait< |
211 | ValueType, typename std::enable_if<is_string<ValueType>::value>::type> { |
212 | using Type = std::string; |
213 | }; |
214 | |
215 | // Port of the C++20 `requires` expressions. |
216 | template <typename... T, typename F> |
217 | constexpr bool Requires(F) { |
218 | return std::is_invocable<F, T...>::value; |
219 | } |
220 | |
221 | struct NoneSuch {}; |
222 | |
223 | // True if the Feature map in a tf.Example supports heterogenous lookup. |
224 | // See https://abseil.io/tips/144. |
225 | inline constexpr bool kFeatureMapHasHeterogeneousLookup = |
226 | Requires<const decltype(Features::default_instance().feature())>( |
227 | [](auto&& c) -> decltype(c.find(NoneSuch{})) {}); |
228 | |
229 | // Converts an `absl::string_view` into a string-type compatible for use in the |
230 | // protobuf library (e.g. as lookup keys in `proto2::Map` or as elements addable |
231 | // to a `proto2::RepeatedPtrField`) depending on the BUILD mode. |
232 | // |
233 | // NOTE: While the newest versions of `proto2::Map` support heterogenous lookup, |
234 | // it does so through `std::string_view`. If the type is just an alias (as noted |
235 | // by `ABSL_USES_STD_STRING_VIEW`) then nothing more needs to be done; however, |
236 | // when the type is not an alias an explicit conversion to is necessary. |
237 | // |
238 | // NOTE: This conversion is only necessary until the migration for protobuf to |
239 | // take a dependency on ABSL is complete. |
240 | inline auto ProtoMapKey(absl::string_view str) { |
241 | if constexpr (kFeatureMapHasHeterogeneousLookup) { |
242 | #ifdef ABSL_USES_STD_STRING_VIEW |
243 | return str; |
244 | #else |
245 | #ifdef ABSL_HAVE_STD_STRING_VIEW |
246 | return std::string_view(str.data(), str.size()); |
247 | #else |
248 | return std::string(str); |
249 | #endif |
250 | #endif |
251 | } else { |
252 | return std::string(str); |
253 | } |
254 | } |
255 | |
256 | } // namespace internal |
257 | |
258 | // Returns true if sequence_example has a feature_list with the specified key. |
259 | bool HasFeatureList(absl::string_view key, |
260 | const SequenceExample& sequence_example); |
261 | |
262 | template <typename T> |
263 | struct TypeHasFeatures : std::false_type {}; |
264 | |
265 | template <> |
266 | struct TypeHasFeatures<SequenceExample> : std::true_type {}; |
267 | |
268 | template <> |
269 | struct TypeHasFeatures<Example> : std::true_type {}; |
270 | |
271 | template <> |
272 | struct TypeHasFeatures<Features> : std::true_type {}; |
273 | |
274 | // A family of template functions to return mutable Features proto from a |
275 | // container proto. Supported ProtoTypes: SequenceExample, Example, Features. |
276 | template <typename ProtoType> |
277 | typename std::enable_if<TypeHasFeatures<ProtoType>::value, Features*>::type |
278 | GetFeatures(ProtoType* proto); |
279 | |
280 | template <typename ProtoType> |
281 | typename std::enable_if<TypeHasFeatures<ProtoType>::value, |
282 | const Features&>::type |
283 | GetFeatures(const ProtoType& proto); |
284 | |
285 | // Base declaration of a family of template functions to return a read only |
286 | // repeated field of feature values. |
287 | template <typename FeatureType> |
288 | const typename internal::RepeatedFieldTrait<FeatureType>::Type& |
289 | GetFeatureValues(const Feature& feature); |
290 | |
291 | // Returns a read only repeated field corresponding to a feature with the |
292 | // specified name and FeatureType. Supported ProtoTypes: SequenceExample, |
293 | // Example, Features. |
294 | template <typename FeatureType, typename ProtoType> |
295 | const typename internal::RepeatedFieldTrait<FeatureType>::Type& |
296 | GetFeatureValues(absl::string_view key, const ProtoType& proto) { |
297 | return GetFeatureValues<FeatureType>( |
298 | GetFeatures(proto).feature().at(internal::ProtoMapKey(key))); |
299 | } |
300 | |
301 | // Returns a mutable repeated field of a feature values. |
302 | template <typename FeatureType> |
303 | typename internal::RepeatedFieldTrait<FeatureType>::Type* GetFeatureValues( |
304 | Feature* feature); |
305 | |
306 | // Returns a mutable repeated field corresponding to a feature with the |
307 | // specified name and FeatureType. Supported ProtoTypes: SequenceExample, |
308 | // Example, Features. |
309 | template <typename FeatureType, typename ProtoType> |
310 | typename internal::RepeatedFieldTrait<FeatureType>::Type* GetFeatureValues( |
311 | absl::string_view key, ProtoType* proto) { |
312 | ::tensorflow::Feature& feature = |
313 | (*GetFeatures(proto)->mutable_feature())[internal::ProtoMapKey(key)]; |
314 | return GetFeatureValues<FeatureType>(&feature); |
315 | } |
316 | |
317 | // Returns a read-only Feature proto for the specified key, throws |
318 | // std::out_of_range if the key is not found. Supported types for the proto: |
319 | // SequenceExample, Example, Features. |
320 | template <typename ProtoType> |
321 | const Feature& GetFeature(absl::string_view key, const ProtoType& proto) { |
322 | return GetFeatures(proto).feature().at(internal::ProtoMapKey(key)); |
323 | } |
324 | |
325 | // Returns a mutable Feature proto for the specified key, creates a new if |
326 | // necessary. Supported types for the proto: SequenceExample, Example, Features. |
327 | template <typename ProtoType> |
328 | Feature* GetFeature(absl::string_view key, ProtoType* proto) { |
329 | return &(*GetFeatures(proto)->mutable_feature())[internal::ProtoMapKey(key)]; |
330 | } |
331 | |
332 | // Returns a repeated field with features corresponding to a feature_list key. |
333 | const protobuf::RepeatedPtrField<Feature>& GetFeatureList( |
334 | absl::string_view key, const SequenceExample& sequence_example); |
335 | |
336 | // Returns a mutable repeated field with features corresponding to a |
337 | // feature_list key. It will create a new FeatureList if necessary. |
338 | protobuf::RepeatedPtrField<Feature>* GetFeatureList( |
339 | absl::string_view feature_list_key, SequenceExample* sequence_example); |
340 | |
341 | template <typename IteratorType> |
342 | void AppendFeatureValues(IteratorType first, IteratorType last, |
343 | Feature* feature) { |
344 | using FeatureType = typename internal::FeatureTrait< |
345 | typename std::iterator_traits<IteratorType>::value_type>::Type; |
346 | auto& values = *GetFeatureValues<FeatureType>(feature); |
347 | values.Reserve(std::distance(first, last)); |
348 | for (auto it = first; it != last; ++it) { |
349 | *values.Add() = *it; |
350 | } |
351 | } |
352 | |
353 | template <typename ValueType> |
354 | void AppendFeatureValues(std::initializer_list<ValueType> container, |
355 | Feature* feature) { |
356 | using FeatureType = typename internal::FeatureTrait<ValueType>::Type; |
357 | auto& values = *GetFeatureValues<FeatureType>(feature); |
358 | values.Reserve(container.size()); |
359 | for (auto& elt : container) { |
360 | *values.Add() = std::move(elt); |
361 | } |
362 | } |
363 | |
364 | namespace internal { |
365 | |
366 | // HasSize<T>::value is true_type if T has a size() member. |
367 | template <typename T, typename = void> |
368 | struct HasSize : std::false_type {}; |
369 | |
370 | template <typename T> |
371 | struct HasSize<T, absl::void_t<decltype(std::declval<T>().size())>> |
372 | : std::true_type {}; |
373 | |
374 | // Reserves the container's size, if a container.size() method exists. |
375 | template <typename ContainerType, typename RepeatedFieldType> |
376 | auto ReserveIfSizeAvailable(const ContainerType& container, |
377 | RepeatedFieldType& values) -> |
378 | typename std::enable_if_t<HasSize<ContainerType>::value, void> { |
379 | values.Reserve(container.size()); |
380 | } |
381 | |
382 | template <typename ContainerType, typename RepeatedFieldType> |
383 | auto ReserveIfSizeAvailable(const ContainerType& container, |
384 | RepeatedFieldType& values) -> |
385 | typename std::enable_if_t<!HasSize<ContainerType>::value, void> {} |
386 | |
387 | } // namespace internal |
388 | |
389 | template <typename ContainerType> |
390 | void AppendFeatureValues(const ContainerType& container, Feature* feature) { |
391 | using IteratorType = typename ContainerType::const_iterator; |
392 | using FeatureType = typename internal::FeatureTrait< |
393 | typename std::iterator_traits<IteratorType>::value_type>::Type; |
394 | auto* values = GetFeatureValues<FeatureType>(feature); |
395 | internal::ReserveIfSizeAvailable(container, *values); |
396 | // This is equivalent to std::copy into `values` with a |
397 | // RepeatedFieldBackInserter, the difference is RFBI isn't compatible with |
398 | // types that we want to convert (e.g. absl::string_view -> std::string). |
399 | for (const auto& elt : container) { |
400 | if constexpr (internal::is_string<FeatureType>::value) { |
401 | *values->Add() = std::string(elt); |
402 | } else { |
403 | *values->Add() = elt; |
404 | } |
405 | } |
406 | } |
407 | |
408 | // Copies elements from the range, defined by [first, last) into the feature |
409 | // obtainable from the (proto, key) combination. |
410 | template <typename IteratorType, typename ProtoType> |
411 | void AppendFeatureValues(IteratorType first, IteratorType last, |
412 | absl::string_view key, ProtoType* proto) { |
413 | AppendFeatureValues(first, last, GetFeature(key, GetFeatures(proto))); |
414 | } |
415 | |
416 | // Copies all elements from the container into a feature. |
417 | template <typename ContainerType, typename ProtoType> |
418 | void AppendFeatureValues(const ContainerType& container, absl::string_view key, |
419 | ProtoType* proto) { |
420 | AppendFeatureValues<ContainerType>(container, |
421 | GetFeature(key, GetFeatures(proto))); |
422 | } |
423 | |
424 | // Copies all elements from the initializer list into a Feature contained by |
425 | // Features or Example proto. |
426 | template <typename ValueType, typename ProtoType> |
427 | void AppendFeatureValues(std::initializer_list<ValueType> container, |
428 | absl::string_view key, ProtoType* proto) { |
429 | AppendFeatureValues<ValueType>(container, |
430 | GetFeature(key, GetFeatures(proto))); |
431 | } |
432 | |
433 | // Clears the feature's repeated field (int64, float, or string). |
434 | template <typename... FeatureType> |
435 | void ClearFeatureValues(Feature* feature); |
436 | |
437 | // Clears the feature's repeated field (int64, float, or string). Copies |
438 | // elements from the range, defined by [first, last) into the feature's repeated |
439 | // field. |
440 | template <typename IteratorType> |
441 | void SetFeatureValues(IteratorType first, IteratorType last, Feature* feature) { |
442 | using FeatureType = typename internal::FeatureTrait< |
443 | typename std::iterator_traits<IteratorType>::value_type>::Type; |
444 | ClearFeatureValues<FeatureType>(feature); |
445 | AppendFeatureValues(first, last, feature); |
446 | } |
447 | |
448 | // Clears the feature's repeated field (int64, float, or string). Copies all |
449 | // elements from the initializer list into the feature's repeated field. |
450 | template <typename ValueType> |
451 | void SetFeatureValues(std::initializer_list<ValueType> container, |
452 | Feature* feature) { |
453 | using FeatureType = typename internal::FeatureTrait<ValueType>::Type; |
454 | ClearFeatureValues<FeatureType>(feature); |
455 | AppendFeatureValues(container, feature); |
456 | } |
457 | |
458 | // Clears the feature's repeated field (int64, float, or string). Copies all |
459 | // elements from the container into the feature's repeated field. |
460 | template <typename ContainerType> |
461 | void SetFeatureValues(const ContainerType& container, Feature* feature) { |
462 | using IteratorType = typename ContainerType::const_iterator; |
463 | using FeatureType = typename internal::FeatureTrait< |
464 | typename std::iterator_traits<IteratorType>::value_type>::Type; |
465 | ClearFeatureValues<FeatureType>(feature); |
466 | AppendFeatureValues(container, feature); |
467 | } |
468 | |
469 | // Clears the feature's repeated field (int64, float, or string). Copies |
470 | // elements from the range, defined by [first, last) into the feature's repeated |
471 | // field. |
472 | template <typename IteratorType, typename ProtoType> |
473 | void SetFeatureValues(IteratorType first, IteratorType last, |
474 | absl::string_view key, ProtoType* proto) { |
475 | SetFeatureValues(first, last, GetFeature(key, GetFeatures(proto))); |
476 | } |
477 | |
478 | // Clears the feature's repeated field (int64, float, or string). Copies all |
479 | // elements from the container into the feature's repeated field. |
480 | template <typename ContainerType, typename ProtoType> |
481 | void SetFeatureValues(const ContainerType& container, absl::string_view key, |
482 | ProtoType* proto) { |
483 | SetFeatureValues<ContainerType>(container, |
484 | GetFeature(key, GetFeatures(proto))); |
485 | } |
486 | |
487 | // Clears the feature's repeated field (int64, float, or string). Copies all |
488 | // elements from the initializer list into the feature's repeated field. |
489 | template <typename ValueType, typename ProtoType> |
490 | void SetFeatureValues(std::initializer_list<ValueType> container, |
491 | absl::string_view key, ProtoType* proto) { |
492 | SetFeatureValues<ValueType>(container, GetFeature(key, GetFeatures(proto))); |
493 | } |
494 | |
495 | // Returns true if a feature with the specified key belongs to the Features. |
496 | // The template parameter pack accepts zero or one template argument - which |
497 | // is FeatureType. If the FeatureType not specified (zero template arguments) |
498 | // the function will not check the feature type. Otherwise it will return false |
499 | // if the feature has a wrong type. |
500 | template <typename... FeatureType> |
501 | bool HasFeature(absl::string_view key, const Features& features); |
502 | |
503 | // Returns true if a feature with the specified key belongs to the Example. |
504 | // Doesn't check feature type if used without FeatureType, otherwise the |
505 | // specialized versions return false if the feature has a wrong type. |
506 | template <typename... FeatureType> |
507 | bool HasFeature(absl::string_view key, const Example& example) { |
508 | return HasFeature<FeatureType...>(key, GetFeatures(example)); |
509 | } |
510 | |
511 | // TODO(gorban): update all clients in a followup CL. |
512 | template <typename... FeatureType> |
513 | ABSL_DEPRECATED("Use HasFeature instead." ) |
514 | bool ExampleHasFeature(absl::string_view key, const Example& example) { |
515 | return HasFeature<FeatureType...>(key, example); |
516 | } |
517 | |
518 | } // namespace tensorflow |
519 | #endif // TENSORFLOW_CORE_EXAMPLE_FEATURE_UTIL_H_ |
520 | |