1 | /* Copyright 2016 The TensorFlow Authors. All Rights Reserved. |
2 | |
3 | Licensed under the Apache License, Version 2.0 (the "License"); |
4 | you may not use this file except in compliance with the License. |
5 | You may obtain a copy of the License at |
6 | |
7 | http://www.apache.org/licenses/LICENSE-2.0 |
8 | |
9 | Unless required by applicable law or agreed to in writing, software |
10 | distributed under the License is distributed on an "AS IS" BASIS, |
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | See the License for the specific language governing permissions and |
13 | limitations under the License. |
14 | ==============================================================================*/ |
15 | |
16 | // See docs in ../ops/array_ops.cc |
17 | |
18 | #ifndef TENSORFLOW_CORE_KERNELS_ONE_HOT_OP_H_ |
19 | #define TENSORFLOW_CORE_KERNELS_ONE_HOT_OP_H_ |
20 | // Generator definition for OneHotOp, must be compilable by nvcc. |
21 | |
22 | #define EIGEN_USE_THREADS |
23 | |
24 | #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" |
25 | #include "tensorflow/core/framework/bounds_check.h" |
26 | #include "tensorflow/core/framework/tensor_types.h" |
27 | #include "tensorflow/core/platform/macros.h" |
28 | #include "tensorflow/core/platform/types.h" |
29 | |
30 | namespace tensorflow { |
31 | |
32 | typedef Eigen::ThreadPoolDevice CPUDevice; |
33 | |
34 | namespace generator { |
35 | |
36 | template <typename T, typename TI> |
37 | class OneGenerator { |
38 | public: |
39 | EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE |
40 | OneGenerator(const typename TTypes<TI>::ConstMatrix& indices, |
41 | const typename TTypes<T>::ConstScalar& on_value, |
42 | const typename TTypes<T>::ConstScalar& off_value) |
43 | : indices_(indices), on_value_(on_value), off_value_(off_value) {} |
44 | |
45 | EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T |
46 | operator()(const Eigen::array<Eigen::DenseIndex, 3>& pre_depth_suff) const { |
47 | return (indices_(pre_depth_suff[0], pre_depth_suff[2]) == pre_depth_suff[1]) |
48 | ? on_value_() |
49 | : off_value_(); |
50 | } |
51 | |
52 | private: |
53 | const typename TTypes<TI>::ConstMatrix indices_; |
54 | const typename TTypes<T>::ConstScalar on_value_; |
55 | const typename TTypes<T>::ConstScalar off_value_; |
56 | }; |
57 | |
58 | } // namespace generator |
59 | |
60 | namespace functor { |
61 | |
62 | template <typename Device, typename T, typename TI> |
63 | struct OneHot { |
64 | EIGEN_ALWAYS_INLINE static void Compute( |
65 | const Device& d, const typename TTypes<TI>::ConstMatrix& indices, |
66 | const typename TTypes<T>::ConstScalar& on_value, |
67 | const typename TTypes<T>::ConstScalar& off_value, |
68 | typename TTypes<T, 3>::Tensor* output) { |
69 | generator::OneGenerator<T, TI> generator(indices, on_value, off_value); |
70 | output->device(d) = output->generate(generator); |
71 | } |
72 | }; |
73 | |
74 | template <typename T, typename TI> |
75 | struct OneHot<CPUDevice, T, TI> { |
76 | EIGEN_ALWAYS_INLINE static void Compute( |
77 | const CPUDevice& d, const typename TTypes<TI>::ConstMatrix& indices, |
78 | const typename TTypes<T>::ConstScalar& on_value, |
79 | const typename TTypes<T>::ConstScalar& off_value, |
80 | typename TTypes<T, 3>::Tensor* output) { |
81 | // Pre-fill output with `off_value`. |
82 | output->device(d) = output->constant(off_value()); |
83 | |
84 | // Iterate through indices and update on_value elements in the output. |
85 | Eigen::Index prefix_size = output->dimensions()[0]; |
86 | Eigen::Index depth_size = output->dimensions()[1]; |
87 | Eigen::Index suffix_size = output->dimensions()[2]; |
88 | |
89 | // Cost of setting one `on_value` coefficient. |
90 | double bytes_loaded = sizeof(T); |
91 | double bytes_stored = sizeof(T); |
92 | double cycles = 0.0; |
93 | const Eigen::TensorOpCost cost(bytes_loaded, bytes_stored, cycles); |
94 | |
95 | if (suffix_size == 1) { |
96 | const auto func = [&](Eigen::Index start, Eigen::Index end) -> void { |
97 | for (Eigen::Index i = start; i < end; ++i) { |
98 | const TI depth = internal::SubtleMustCopy(indices(i, 0)); |
99 | if (FastBoundsCheck(depth, depth_size)) { |
100 | (*output)(i, depth, 0) = on_value(); |
101 | } |
102 | } |
103 | }; |
104 | d.parallelFor(prefix_size, cost, func); |
105 | } else { |
106 | const auto func = [&](Eigen::Index start, Eigen::Index end) -> void { |
107 | for (Eigen::Index i = start; i < end; ++i) { |
108 | const Eigen::Index d0 = i / suffix_size; |
109 | const Eigen::Index d1 = i - (d0 * suffix_size); |
110 | const TI depth = internal::SubtleMustCopy(indices(d0, d1)); |
111 | if (FastBoundsCheck(depth, depth_size)) { |
112 | (*output)(d0, depth, d1) = on_value(); |
113 | } |
114 | } |
115 | }; |
116 | d.parallelFor(prefix_size * suffix_size, cost * suffix_size, func); |
117 | } |
118 | } |
119 | }; |
120 | |
121 | } // namespace functor |
122 | |
123 | } // namespace tensorflow |
124 | |
125 | #endif // TENSORFLOW_CORE_KERNELS_ONE_HOT_OP_H_ |
126 | |