1/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
2
3Licensed under the Apache License, Version 2.0 (the "License");
4you may not use this file except in compliance with the License.
5You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9Unless required by applicable law or agreed to in writing, software
10distributed under the License is distributed on an "AS IS" BASIS,
11WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12See the License for the specific language governing permissions and
13limitations under the License.
14==============================================================================*/
15
16// A set of lightweight wrappers which simplify access to Feature protos.
17//
18// TensorFlow Example proto uses associative maps on top of oneof fields.
19// SequenceExample proto uses associative map of FeatureList.
20// So accessing feature values is not very convenient.
21//
22// For example, to read a first value of integer feature "tag":
23// int id = example.features().feature().at("tag").int64_list().value(0);
24//
25// to add a value:
26// auto features = example->mutable_features();
27// (*features->mutable_feature())["tag"].mutable_int64_list()->add_value(id);
28//
29// For float features you have to use float_list, for string - bytes_list.
30//
31// To do the same with this library:
32// int id = GetFeatureValues<int64_t>("tag", example).Get(0);
33// GetFeatureValues<int64_t>("tag", &example)->Add(id);
34//
35// Modification of bytes features is slightly different:
36// auto tag = GetFeatureValues<std::string>("tag", &example);
37// *tag->Add() = "lorem ipsum";
38//
39// To copy multiple values into a feature:
40// AppendFeatureValues({1,2,3}, "tag", &example);
41//
42// GetFeatureValues gives you access to underlying data - RepeatedField object
43// (RepeatedPtrField for byte list). So refer to its documentation of
44// RepeatedField for full list of supported methods.
45//
46// NOTE: Due to the nature of oneof proto fields setting a feature of one type
47// automatically clears all values stored as another type with the same feature
48// key.
49//
50// This library also has tools to work with SequenceExample protos.
51//
52// To get a value from SequenceExample.context:
53// int id = GetFeatureValues<protobuf_int64>("tag", se.context()).Get(0);
54// To add a value to the context:
55// GetFeatureValues<protobuf_int64>("tag", se.mutable_context())->Add(42);
56//
57// To add values to feature_lists:
58// AppendFeatureValues({4.0},
59// GetFeatureList("images", &se)->Add());
60// AppendFeatureValues({5.0, 3.0},
61// GetFeatureList("images", &se)->Add());
62// This will create a feature list keyed as "images" with two features:
63// feature_lists {
64// feature_list {
65// key: "images"
66// value {
67// feature { float_list { value: [4.0] } }
68// feature { float_list { value: [5.0, 3.0] } }
69// }
70// }
71// }
72// For string-valued features, note that the Append... and Set... functions
73// support absl::string_view containers. This allows you to copy existing
74// buffers into a Feature with only one copy:
75// std::vector<absl::string_view> image;
76// image.push_back(image_buffer); // No copy.
77// SetFeatureValues(image, "image", &example); // Copy.
78//
79// Functions exposed by this library:
80// HasFeature<[FeatureType]>(key, proto) -> bool
81// Returns true if a feature with the specified key, and optionally
82// FeatureType, belongs to the Features or Example proto.
83// HasFeatureList(key, sequence_example) -> bool
84// Returns true if SequenceExample has a feature_list with the key.
85//
86// GetFeatureValues<FeatureType>(key, proto) -> RepeatedField<FeatureType>
87// Returns values for the specified key and the FeatureType.
88// Supported types for the proto: Example, Features.
89// GetFeatureList(key, sequence_example) -> RepeatedPtrField<Feature>
90// Returns Feature protos associated with a key.
91//
92// AppendFeatureValues(begin, end, feature)
93// AppendFeatureValues(container or initializer_list, feature)
94// Copies values into a Feature.
95// AppendFeatureValues(begin, end, key, proto)
96// AppendFeatureValues(container or initializer_list, key, proto)
97// Copies values into Features and Example protos with the specified key.
98//
99// ClearFeatureValues<FeatureType>(feature)
100// Clears the feature's repeated field of the given type.
101//
102// SetFeatureValues(begin, end, feature)
103// SetFeatureValues(container or initializer_list, feature)
104// Clears a Feature, then copies values into it.
105// SetFeatureValues(begin, end, key, proto)
106// SetFeatureValues(container or initializer_list, key, proto)
107// Clears Features or Example protos with the specified key,
108// then copies values into them.
109//
110// Auxiliary functions, it is unlikely you'll need to use them directly:
111// GetFeatures(proto) -> Features
112// A convenience function to get Features proto.
113// Supported types for the proto: Example, Features.
114// GetFeature(key, proto) -> Feature
115// Returns a Feature proto for the specified key.
116// Supported types for the proto: Example, Features.
117// GetFeatureValues<FeatureType>(feature) -> RepeatedField<FeatureType>
118// Returns values of the feature for the FeatureType.
119
120#ifndef TENSORFLOW_CORE_EXAMPLE_FEATURE_UTIL_H_
121#define TENSORFLOW_CORE_EXAMPLE_FEATURE_UTIL_H_
122
123#include <algorithm>
124#include <iterator>
125#include <string>
126#include <type_traits>
127#include <utility>
128
129#include "absl/strings/string_view.h"
130#include "tensorflow/core/example/example.pb.h"
131#include "tensorflow/core/example/feature.pb.h"
132#include "tensorflow/core/platform/protobuf.h"
133#include "tensorflow/core/platform/stringpiece.h"
134
135// Must come after the import for absl::string_view.
136#ifdef ABSL_HAVE_STD_STRING_VIEW
137#include <string_view>
138#endif
139
140namespace tensorflow {
141namespace internal {
142
143// TODO(gorban): Update all clients in a followup CL.
144// Returns a reference to a feature corresponding to the name.
145// Note: it will create a new Feature if it is missing in the example.
146ABSL_DEPRECATED("Use GetFeature instead.")
147Feature& ExampleFeature(absl::string_view name, Example* example);
148
149// Specializations of RepeatedFieldTrait define a type of RepeatedField
150// corresponding to a selected feature type.
151template <typename FeatureType>
152struct RepeatedFieldTrait;
153
154template <>
155struct RepeatedFieldTrait<protobuf_int64> {
156 using Type = protobuf::RepeatedField<protobuf_int64>;
157};
158
159template <>
160struct RepeatedFieldTrait<float> {
161 using Type = protobuf::RepeatedField<float>;
162};
163
164template <>
165struct RepeatedFieldTrait<tstring> {
166 using Type = protobuf::RepeatedPtrField<std::string>;
167};
168
169template <>
170struct RepeatedFieldTrait<std::string> {
171 using Type = protobuf::RepeatedPtrField<std::string>;
172};
173
174// Specializations of FeatureTrait define a type of feature corresponding to a
175// selected value type.
176template <typename ValueType, class Enable = void>
177struct FeatureTrait;
178
179template <typename ValueType>
180struct FeatureTrait<ValueType, typename std::enable_if<
181 std::is_integral<ValueType>::value>::type> {
182 using Type = protobuf_int64;
183};
184
185template <typename ValueType>
186struct FeatureTrait<
187 ValueType,
188 typename std::enable_if<std::is_floating_point<ValueType>::value>::type> {
189 using Type = float;
190};
191
192template <typename T>
193struct is_string
194 : public std::integral_constant<
195 bool,
196 std::is_same<char*, typename std::decay<T>::type>::value ||
197 std::is_same<const char*, typename std::decay<T>::type>::value> {
198};
199
200template <>
201struct is_string<std::string> : std::true_type {};
202
203template <>
204struct is_string<::tensorflow::StringPiece> : std::true_type {};
205
206template <>
207struct is_string<tstring> : std::true_type {};
208
209template <typename ValueType>
210struct FeatureTrait<
211 ValueType, typename std::enable_if<is_string<ValueType>::value>::type> {
212 using Type = std::string;
213};
214
215// Port of the C++20 `requires` expressions.
216template <typename... T, typename F>
217constexpr bool Requires(F) {
218 return std::is_invocable<F, T...>::value;
219}
220
221struct NoneSuch {};
222
223// True if the Feature map in a tf.Example supports heterogenous lookup.
224// See https://abseil.io/tips/144.
225inline constexpr bool kFeatureMapHasHeterogeneousLookup =
226 Requires<const decltype(Features::default_instance().feature())>(
227 [](auto&& c) -> decltype(c.find(NoneSuch{})) {});
228
229// Converts an `absl::string_view` into a string-type compatible for use in the
230// protobuf library (e.g. as lookup keys in `proto2::Map` or as elements addable
231// to a `proto2::RepeatedPtrField`) depending on the BUILD mode.
232//
233// NOTE: While the newest versions of `proto2::Map` support heterogenous lookup,
234// it does so through `std::string_view`. If the type is just an alias (as noted
235// by `ABSL_USES_STD_STRING_VIEW`) then nothing more needs to be done; however,
236// when the type is not an alias an explicit conversion to is necessary.
237//
238// NOTE: This conversion is only necessary until the migration for protobuf to
239// take a dependency on ABSL is complete.
240inline auto ProtoMapKey(absl::string_view str) {
241 if constexpr (kFeatureMapHasHeterogeneousLookup) {
242#ifdef ABSL_USES_STD_STRING_VIEW
243 return str;
244#else
245#ifdef ABSL_HAVE_STD_STRING_VIEW
246 return std::string_view(str.data(), str.size());
247#else
248 return std::string(str);
249#endif
250#endif
251 } else {
252 return std::string(str);
253 }
254}
255
256} // namespace internal
257
258// Returns true if sequence_example has a feature_list with the specified key.
259bool HasFeatureList(absl::string_view key,
260 const SequenceExample& sequence_example);
261
262template <typename T>
263struct TypeHasFeatures : std::false_type {};
264
265template <>
266struct TypeHasFeatures<SequenceExample> : std::true_type {};
267
268template <>
269struct TypeHasFeatures<Example> : std::true_type {};
270
271template <>
272struct TypeHasFeatures<Features> : std::true_type {};
273
274// A family of template functions to return mutable Features proto from a
275// container proto. Supported ProtoTypes: SequenceExample, Example, Features.
276template <typename ProtoType>
277typename std::enable_if<TypeHasFeatures<ProtoType>::value, Features*>::type
278GetFeatures(ProtoType* proto);
279
280template <typename ProtoType>
281typename std::enable_if<TypeHasFeatures<ProtoType>::value,
282 const Features&>::type
283GetFeatures(const ProtoType& proto);
284
285// Base declaration of a family of template functions to return a read only
286// repeated field of feature values.
287template <typename FeatureType>
288const typename internal::RepeatedFieldTrait<FeatureType>::Type&
289GetFeatureValues(const Feature& feature);
290
291// Returns a read only repeated field corresponding to a feature with the
292// specified name and FeatureType. Supported ProtoTypes: SequenceExample,
293// Example, Features.
294template <typename FeatureType, typename ProtoType>
295const typename internal::RepeatedFieldTrait<FeatureType>::Type&
296GetFeatureValues(absl::string_view key, const ProtoType& proto) {
297 return GetFeatureValues<FeatureType>(
298 GetFeatures(proto).feature().at(internal::ProtoMapKey(key)));
299}
300
301// Returns a mutable repeated field of a feature values.
302template <typename FeatureType>
303typename internal::RepeatedFieldTrait<FeatureType>::Type* GetFeatureValues(
304 Feature* feature);
305
306// Returns a mutable repeated field corresponding to a feature with the
307// specified name and FeatureType. Supported ProtoTypes: SequenceExample,
308// Example, Features.
309template <typename FeatureType, typename ProtoType>
310typename internal::RepeatedFieldTrait<FeatureType>::Type* GetFeatureValues(
311 absl::string_view key, ProtoType* proto) {
312 ::tensorflow::Feature& feature =
313 (*GetFeatures(proto)->mutable_feature())[internal::ProtoMapKey(key)];
314 return GetFeatureValues<FeatureType>(&feature);
315}
316
317// Returns a read-only Feature proto for the specified key, throws
318// std::out_of_range if the key is not found. Supported types for the proto:
319// SequenceExample, Example, Features.
320template <typename ProtoType>
321const Feature& GetFeature(absl::string_view key, const ProtoType& proto) {
322 return GetFeatures(proto).feature().at(internal::ProtoMapKey(key));
323}
324
325// Returns a mutable Feature proto for the specified key, creates a new if
326// necessary. Supported types for the proto: SequenceExample, Example, Features.
327template <typename ProtoType>
328Feature* GetFeature(absl::string_view key, ProtoType* proto) {
329 return &(*GetFeatures(proto)->mutable_feature())[internal::ProtoMapKey(key)];
330}
331
332// Returns a repeated field with features corresponding to a feature_list key.
333const protobuf::RepeatedPtrField<Feature>& GetFeatureList(
334 absl::string_view key, const SequenceExample& sequence_example);
335
336// Returns a mutable repeated field with features corresponding to a
337// feature_list key. It will create a new FeatureList if necessary.
338protobuf::RepeatedPtrField<Feature>* GetFeatureList(
339 absl::string_view feature_list_key, SequenceExample* sequence_example);
340
341template <typename IteratorType>
342void AppendFeatureValues(IteratorType first, IteratorType last,
343 Feature* feature) {
344 using FeatureType = typename internal::FeatureTrait<
345 typename std::iterator_traits<IteratorType>::value_type>::Type;
346 auto& values = *GetFeatureValues<FeatureType>(feature);
347 values.Reserve(std::distance(first, last));
348 for (auto it = first; it != last; ++it) {
349 *values.Add() = *it;
350 }
351}
352
353template <typename ValueType>
354void AppendFeatureValues(std::initializer_list<ValueType> container,
355 Feature* feature) {
356 using FeatureType = typename internal::FeatureTrait<ValueType>::Type;
357 auto& values = *GetFeatureValues<FeatureType>(feature);
358 values.Reserve(container.size());
359 for (auto& elt : container) {
360 *values.Add() = std::move(elt);
361 }
362}
363
364namespace internal {
365
366// HasSize<T>::value is true_type if T has a size() member.
367template <typename T, typename = void>
368struct HasSize : std::false_type {};
369
370template <typename T>
371struct HasSize<T, absl::void_t<decltype(std::declval<T>().size())>>
372 : std::true_type {};
373
374// Reserves the container's size, if a container.size() method exists.
375template <typename ContainerType, typename RepeatedFieldType>
376auto ReserveIfSizeAvailable(const ContainerType& container,
377 RepeatedFieldType& values) ->
378 typename std::enable_if_t<HasSize<ContainerType>::value, void> {
379 values.Reserve(container.size());
380}
381
382template <typename ContainerType, typename RepeatedFieldType>
383auto ReserveIfSizeAvailable(const ContainerType& container,
384 RepeatedFieldType& values) ->
385 typename std::enable_if_t<!HasSize<ContainerType>::value, void> {}
386
387} // namespace internal
388
389template <typename ContainerType>
390void AppendFeatureValues(const ContainerType& container, Feature* feature) {
391 using IteratorType = typename ContainerType::const_iterator;
392 using FeatureType = typename internal::FeatureTrait<
393 typename std::iterator_traits<IteratorType>::value_type>::Type;
394 auto* values = GetFeatureValues<FeatureType>(feature);
395 internal::ReserveIfSizeAvailable(container, *values);
396 // This is equivalent to std::copy into `values` with a
397 // RepeatedFieldBackInserter, the difference is RFBI isn't compatible with
398 // types that we want to convert (e.g. absl::string_view -> std::string).
399 for (const auto& elt : container) {
400 if constexpr (internal::is_string<FeatureType>::value) {
401 *values->Add() = std::string(elt);
402 } else {
403 *values->Add() = elt;
404 }
405 }
406}
407
408// Copies elements from the range, defined by [first, last) into the feature
409// obtainable from the (proto, key) combination.
410template <typename IteratorType, typename ProtoType>
411void AppendFeatureValues(IteratorType first, IteratorType last,
412 absl::string_view key, ProtoType* proto) {
413 AppendFeatureValues(first, last, GetFeature(key, GetFeatures(proto)));
414}
415
416// Copies all elements from the container into a feature.
417template <typename ContainerType, typename ProtoType>
418void AppendFeatureValues(const ContainerType& container, absl::string_view key,
419 ProtoType* proto) {
420 AppendFeatureValues<ContainerType>(container,
421 GetFeature(key, GetFeatures(proto)));
422}
423
424// Copies all elements from the initializer list into a Feature contained by
425// Features or Example proto.
426template <typename ValueType, typename ProtoType>
427void AppendFeatureValues(std::initializer_list<ValueType> container,
428 absl::string_view key, ProtoType* proto) {
429 AppendFeatureValues<ValueType>(container,
430 GetFeature(key, GetFeatures(proto)));
431}
432
433// Clears the feature's repeated field (int64, float, or string).
434template <typename... FeatureType>
435void ClearFeatureValues(Feature* feature);
436
437// Clears the feature's repeated field (int64, float, or string). Copies
438// elements from the range, defined by [first, last) into the feature's repeated
439// field.
440template <typename IteratorType>
441void SetFeatureValues(IteratorType first, IteratorType last, Feature* feature) {
442 using FeatureType = typename internal::FeatureTrait<
443 typename std::iterator_traits<IteratorType>::value_type>::Type;
444 ClearFeatureValues<FeatureType>(feature);
445 AppendFeatureValues(first, last, feature);
446}
447
448// Clears the feature's repeated field (int64, float, or string). Copies all
449// elements from the initializer list into the feature's repeated field.
450template <typename ValueType>
451void SetFeatureValues(std::initializer_list<ValueType> container,
452 Feature* feature) {
453 using FeatureType = typename internal::FeatureTrait<ValueType>::Type;
454 ClearFeatureValues<FeatureType>(feature);
455 AppendFeatureValues(container, feature);
456}
457
458// Clears the feature's repeated field (int64, float, or string). Copies all
459// elements from the container into the feature's repeated field.
460template <typename ContainerType>
461void SetFeatureValues(const ContainerType& container, Feature* feature) {
462 using IteratorType = typename ContainerType::const_iterator;
463 using FeatureType = typename internal::FeatureTrait<
464 typename std::iterator_traits<IteratorType>::value_type>::Type;
465 ClearFeatureValues<FeatureType>(feature);
466 AppendFeatureValues(container, feature);
467}
468
469// Clears the feature's repeated field (int64, float, or string). Copies
470// elements from the range, defined by [first, last) into the feature's repeated
471// field.
472template <typename IteratorType, typename ProtoType>
473void SetFeatureValues(IteratorType first, IteratorType last,
474 absl::string_view key, ProtoType* proto) {
475 SetFeatureValues(first, last, GetFeature(key, GetFeatures(proto)));
476}
477
478// Clears the feature's repeated field (int64, float, or string). Copies all
479// elements from the container into the feature's repeated field.
480template <typename ContainerType, typename ProtoType>
481void SetFeatureValues(const ContainerType& container, absl::string_view key,
482 ProtoType* proto) {
483 SetFeatureValues<ContainerType>(container,
484 GetFeature(key, GetFeatures(proto)));
485}
486
487// Clears the feature's repeated field (int64, float, or string). Copies all
488// elements from the initializer list into the feature's repeated field.
489template <typename ValueType, typename ProtoType>
490void SetFeatureValues(std::initializer_list<ValueType> container,
491 absl::string_view key, ProtoType* proto) {
492 SetFeatureValues<ValueType>(container, GetFeature(key, GetFeatures(proto)));
493}
494
495// Returns true if a feature with the specified key belongs to the Features.
496// The template parameter pack accepts zero or one template argument - which
497// is FeatureType. If the FeatureType not specified (zero template arguments)
498// the function will not check the feature type. Otherwise it will return false
499// if the feature has a wrong type.
500template <typename... FeatureType>
501bool HasFeature(absl::string_view key, const Features& features);
502
503// Returns true if a feature with the specified key belongs to the Example.
504// Doesn't check feature type if used without FeatureType, otherwise the
505// specialized versions return false if the feature has a wrong type.
506template <typename... FeatureType>
507bool HasFeature(absl::string_view key, const Example& example) {
508 return HasFeature<FeatureType...>(key, GetFeatures(example));
509}
510
511// TODO(gorban): update all clients in a followup CL.
512template <typename... FeatureType>
513ABSL_DEPRECATED("Use HasFeature instead.")
514bool ExampleHasFeature(absl::string_view key, const Example& example) {
515 return HasFeature<FeatureType...>(key, example);
516}
517
518} // namespace tensorflow
519#endif // TENSORFLOW_CORE_EXAMPLE_FEATURE_UTIL_H_
520