1 | /* Copyright 2015 The TensorFlow Authors. All Rights Reserved. |
2 | |
3 | Licensed under the Apache License, Version 2.0 (the "License"); |
4 | you may not use this file except in compliance with the License. |
5 | You may obtain a copy of the License at |
6 | |
7 | http://www.apache.org/licenses/LICENSE-2.0 |
8 | |
9 | Unless required by applicable law or agreed to in writing, software |
10 | distributed under the License is distributed on an "AS IS" BASIS, |
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | See the License for the specific language governing permissions and |
13 | limitations under the License. |
14 | ==============================================================================*/ |
15 | |
16 | #ifndef TENSORFLOW_CORE_KERNELS_TILE_FUNCTOR_H_ |
17 | #define TENSORFLOW_CORE_KERNELS_TILE_FUNCTOR_H_ |
18 | |
19 | #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" |
20 | |
21 | #include "tensorflow/core/framework/tensor.h" |
22 | #include "tensorflow/core/framework/tensor_types.h" |
23 | #include "tensorflow/core/platform/types.h" |
24 | |
25 | namespace tensorflow { |
26 | |
27 | namespace internal { |
28 | |
29 | // Device-specific naive implementation for Tile. |
30 | |
31 | template <typename T> |
32 | void TileSimple(const Eigen::ThreadPoolDevice& d, Tensor* out, |
33 | const Tensor& in); |
34 | |
35 | #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM |
36 | template <typename T> |
37 | void TileSimple(const Eigen::GpuDevice& d, Tensor* out, const Tensor& in); |
38 | #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM |
39 | |
40 | template <typename Device, typename T, typename Tmultiples, int NDIM> |
41 | void TileUsingEigen(const Device& d, Tensor* out, const Tensor& in, |
42 | const gtl::ArraySlice<Tmultiples> broadcast_array) { |
43 | Eigen::array<Tmultiples, NDIM> b; |
44 | for (int i = 0; i < NDIM; ++i) b[i] = broadcast_array[i]; |
45 | MaybeWith32BitIndexing<Device>( |
46 | [&](auto out32, auto in32) { out32.device(d) = in32.broadcast(b); }, |
47 | out->tensor<T, NDIM>(), in.tensor<T, NDIM>()); |
48 | } |
49 | |
50 | template <typename Device, typename T, typename Tmultiples> |
51 | void TileUsingEigen(const Device& d, Tensor* out, const Tensor& in, |
52 | const gtl::ArraySlice<Tmultiples>) { |
53 | auto x = in.tensor<T, 0>(); |
54 | auto y = out->tensor<T, 0>(); |
55 | // In the scalar case we simply copy the input. |
56 | y.device(d) = x; |
57 | } |
58 | |
59 | } // end namespace internal |
60 | |
61 | namespace functor { |
62 | |
63 | template <typename Device, typename T, typename Tmultiples> |
64 | struct Tile { |
65 | void operator()(const Device& d, Tensor* out, const Tensor& in, |
66 | const gtl::ArraySlice<Tmultiples> broadcast_array) const { |
67 | switch (in.dims()) { |
68 | case 0: |
69 | internal::TileUsingEigen<Device, T, Tmultiples>(d, out, in, |
70 | broadcast_array); |
71 | break; |
72 | case 1: |
73 | internal::TileUsingEigen<Device, T, Tmultiples, 1>(d, out, in, |
74 | broadcast_array); |
75 | break; |
76 | case 2: |
77 | internal::TileUsingEigen<Device, T, Tmultiples, 2>(d, out, in, |
78 | broadcast_array); |
79 | break; |
80 | case 3: |
81 | internal::TileUsingEigen<Device, T, Tmultiples, 3>(d, out, in, |
82 | broadcast_array); |
83 | break; |
84 | case 4: |
85 | internal::TileUsingEigen<Device, T, Tmultiples, 4>(d, out, in, |
86 | broadcast_array); |
87 | break; |
88 | case 5: |
89 | internal::TileUsingEigen<Device, T, Tmultiples, 5>(d, out, in, |
90 | broadcast_array); |
91 | break; |
92 | case 6: |
93 | internal::TileUsingEigen<Device, T, Tmultiples, 6>(d, out, in, |
94 | broadcast_array); |
95 | break; |
96 | case 7: |
97 | internal::TileUsingEigen<Device, T, Tmultiples, 7>(d, out, in, |
98 | broadcast_array); |
99 | break; |
100 | default: |
101 | internal::TileSimple<T>(d, out, in); |
102 | break; |
103 | } |
104 | } |
105 | }; |
106 | |
107 | } // end namespace functor |
108 | } // end namespace tensorflow |
109 | |
110 | #endif // TENSORFLOW_CORE_KERNELS_TILE_FUNCTOR_H_ |
111 | |