1 | /* Copyright 2015 The TensorFlow Authors. All Rights Reserved. |
2 | |
3 | Licensed under the Apache License, Version 2.0 (the "License"); |
4 | you may not use this file except in compliance with the License. |
5 | You may obtain a copy of the License at |
6 | |
7 | http://www.apache.org/licenses/LICENSE-2.0 |
8 | |
9 | Unless required by applicable law or agreed to in writing, software |
10 | distributed under the License is distributed on an "AS IS" BASIS, |
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | See the License for the specific language governing permissions and |
13 | limitations under the License. |
14 | ==============================================================================*/ |
15 | |
16 | // See docs in ../ops/array_ops.cc. |
17 | |
18 | #define EIGEN_USE_THREADS |
19 | |
20 | #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" |
21 | #include "tensorflow/core/framework/bounds_check.h" |
22 | #include "tensorflow/core/framework/op_kernel.h" |
23 | #include "tensorflow/core/framework/register_types.h" |
24 | #include "tensorflow/core/framework/tensor.h" |
25 | #include "tensorflow/core/kernels/ops_util.h" |
26 | #include "tensorflow/core/kernels/split_lib.h" |
27 | #include "tensorflow/core/lib/core/status.h" |
28 | #include "tensorflow/core/lib/gtl/array_slice.h" |
29 | |
30 | namespace tensorflow { |
31 | |
32 | typedef Eigen::ThreadPoolDevice CPUDevice; |
33 | typedef Eigen::GpuDevice GPUDevice; |
34 | |
35 | template <typename Device, typename T> |
36 | class UnpackOp : public OpKernel { |
37 | public: |
38 | explicit UnpackOp(OpKernelConstruction* context) : OpKernel(context) { |
39 | OP_REQUIRES_OK(context, context->GetAttr("axis" , &axis_)); |
40 | } |
41 | |
42 | void Compute(OpKernelContext* context) override { |
43 | const int32_t num = num_outputs(); |
44 | const Tensor& input = context->input(0); |
45 | const TensorShape& input_shape = input.shape(); |
46 | |
47 | int axis = axis_; |
48 | if (axis < 0) axis += input_shape.dims(); |
49 | |
50 | OP_REQUIRES(context, 0 <= axis && axis < input_shape.dims(), |
51 | errors::InvalidArgument("axis = " , axis_, " not in [" , |
52 | -input_shape.dims(), ", " , |
53 | input_shape.dims(), ")" )); |
54 | |
55 | OP_REQUIRES( |
56 | context, input_shape.dims() > 0 && input_shape.dim_size(axis) == num, |
57 | errors::InvalidArgument("Input shape axis " , axis, " must equal " , num, |
58 | ", got shape " , input_shape.DebugString())); |
59 | |
60 | auto output_shape = input_shape; |
61 | output_shape.RemoveDim(axis); |
62 | const int64_t output_size = output_shape.num_elements(); |
63 | OP_REQUIRES( |
64 | context, |
65 | FastBoundsCheck(output_size, |
66 | std::numeric_limits<Eigen::DenseIndex>::max()), |
67 | errors::InvalidArgument("output size must fit in Eigen DenseIndex" )); |
68 | |
69 | // Special case: Aligned, so we can share the underlying buffer. |
70 | // |
71 | // Apply this optimization conservatively: if input is aligned, |
72 | // the resulting tensors must be aligned. It's conservative |
73 | // because if the immediate consumer of the resulting tensors are |
74 | // not using eigen for computation, its perfectly fine to avoid |
75 | // the copying. |
76 | if (axis == 0 && |
77 | (output_size == 0 || IsInnerDimsSizeAligned<T>(input_shape))) { |
78 | for (int i = 0; i < num; ++i) { |
79 | Tensor output; |
80 | CHECK(output.CopyFrom(input.Slice(i, i + 1), output_shape)); |
81 | context->set_output(i, output); |
82 | } |
83 | return; |
84 | } |
85 | |
86 | Eigen::DenseIndex before_dim = 1; |
87 | for (int i = 0; i < axis; ++i) { |
88 | before_dim *= input_shape.dim_size(i); |
89 | } |
90 | |
91 | Eigen::DenseIndex after_dim = 1; |
92 | for (int i = axis + 1; i < input_shape.dims(); ++i) { |
93 | after_dim *= input_shape.dim_size(i); |
94 | } |
95 | const Eigen::DenseIndex axis_dim = input_shape.dim_size(axis); |
96 | |
97 | // Except for shape, unpack is a special case of split, so we reuse the |
98 | // same computational kernels. |
99 | auto input_reshaped = |
100 | input.shaped<T, 2>({before_dim, axis_dim * after_dim}); |
101 | |
102 | for (int i = 0; i < num; ++i) { |
103 | if (!context->output_required(i)) continue; |
104 | |
105 | Tensor* output; |
106 | OP_REQUIRES_OK(context, |
107 | context->allocate_output(i, output_shape, &output)); |
108 | |
109 | if (output_shape.num_elements() > 0) { |
110 | auto output_shaped = output->shaped<T, 2>({before_dim, after_dim}); |
111 | Eigen::DSizes<Eigen::DenseIndex, 2> indices{0, i * after_dim}; |
112 | Eigen::DSizes<Eigen::DenseIndex, 2> sizes{before_dim, after_dim}; |
113 | functor::Split<Device, T, 2>()(context->eigen_device<Device>(), |
114 | output_shaped, input_reshaped, indices, |
115 | sizes); |
116 | } |
117 | } |
118 | } |
119 | |
120 | private: |
121 | int axis_; |
122 | }; |
123 | |
124 | #define REGISTER_UNPACK(type) \ |
125 | REGISTER_KERNEL_BUILDER( \ |
126 | Name("Unpack").Device(DEVICE_CPU).TypeConstraint<type>("T"), \ |
127 | UnpackOp<CPUDevice, type>) |
128 | |
129 | TF_CALL_ALL_TYPES(REGISTER_UNPACK); |
130 | |
131 | #undef REGISTER_UNPACK |
132 | |
133 | #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM |
134 | |
135 | #define REGISTER_GPU(type) \ |
136 | REGISTER_KERNEL_BUILDER( \ |
137 | Name("Unpack").Device(DEVICE_GPU).TypeConstraint<type>("T"), \ |
138 | UnpackOp<GPUDevice, type>) |
139 | |
140 | TF_CALL_bfloat16(REGISTER_GPU); |
141 | TF_CALL_uint8(REGISTER_GPU); |
142 | TF_CALL_GPU_ALL_TYPES(REGISTER_GPU); |
143 | #undef REGISTER_GPU |
144 | |
145 | #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM |
146 | |
147 | // A special DEVICE_DEFAULT kernel for int32. |
148 | // TODO(b/25387198): Also enable int32 in device memory. This kernel |
149 | // registration requires all int32 inputs and outputs to be in host memory. |
150 | REGISTER_KERNEL_BUILDER(Name("Unpack" ) |
151 | .Device(DEVICE_DEFAULT) |
152 | .HostMemory("value" ) |
153 | .HostMemory("output" ) |
154 | .TypeConstraint<int32>("T" ), |
155 | UnpackOp<CPUDevice, int32>); |
156 | REGISTER_KERNEL_BUILDER(Name("Unpack" ) |
157 | .Device(DEVICE_DEFAULT) |
158 | .HostMemory("value" ) |
159 | .HostMemory("output" ) |
160 | .TypeConstraint<int64_t>("T" ), |
161 | UnpackOp<CPUDevice, int64>); |
162 | |
163 | } // end namespace tensorflow |
164 | |