1/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2
3Licensed under the Apache License, Version 2.0 (the "License");
4you may not use this file except in compliance with the License.
5You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9Unless required by applicable law or agreed to in writing, software
10distributed under the License is distributed on an "AS IS" BASIS,
11WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12See the License for the specific language governing permissions and
13limitations under the License.
14==============================================================================*/
15
16// This is an internal header file intended to only be included as the
17// front-matter in the implementation files of various reduction ops. It
18// is a header file because we split the various reduction ops into their
19// own compilation units to get more parallelism in compilation.
20
21#ifndef TENSORFLOW_CORE_KERNELS_REDUCTION_OPS_COMMON_H_
22#define TENSORFLOW_CORE_KERNELS_REDUCTION_OPS_COMMON_H_
23
24#define EIGEN_USE_THREADS
25
26#include "third_party/eigen3/Eigen/Core"
27#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
28
29#include "tensorflow/core/framework/numeric_op.h"
30#include "tensorflow/core/framework/op_kernel.h"
31#include "tensorflow/core/framework/register_types.h"
32#include "tensorflow/core/framework/tensor.h"
33#include "tensorflow/core/framework/types.h"
34#include "tensorflow/core/kernels/reduction_ops.h"
35#include "tensorflow/core/kernels/transpose_functor.h"
36#include "tensorflow/core/lib/core/status.h"
37#include "tensorflow/core/lib/gtl/inlined_vector.h"
38#include "tensorflow/core/platform/logging.h"
39
40namespace tensorflow {
41
42typedef Eigen::ThreadPoolDevice CPUDevice;
43typedef Eigen::GpuDevice GPUDevice;
44
45template <typename Device>
46struct Constants {
47 // Derive Index type. int (32-bit) or long (64-bit) depending on the
48 // compile-time configuration. "float" here is not relevant.
49 // TODO(zhifengc): Moves the definition to TTypes.
50 typedef TTypes<float>::Tensor::Index Index;
51 Eigen::array<Index, 1> kZero;
52 Eigen::array<Index, 1> kOne;
53 Eigen::array<Index, 2> kZeroTwo;
54
55 Constants() {
56 kZero[0] = 0;
57 kOne[0] = 1;
58 kZeroTwo[0] = 0;
59 kZeroTwo[1] = 2;
60 }
61};
62
63struct ConstantsBase {
64 const Eigen::IndexList<Eigen::type2index<0>> kZero;
65 const Eigen::IndexList<Eigen::type2index<1>> kOne;
66 const Eigen::IndexList<Eigen::type2index<0>, Eigen::type2index<2>> kZeroTwo;
67};
68template <>
69struct Constants<CPUDevice> : ConstantsBase {};
70
71class ReductionHelper {
72 public:
73 ReductionHelper() : reduce_first_axis_(false) {}
74
75 Status Simplify(const Tensor& data, const Tensor& axis, const bool keep_dims);
76
77 // We need to do roughly:
78 // tmp_out = allocate(out_reshape())
79 // tmp_out.reshape(out_reshape) = data.reshape(data_reshape).reduce(axes)
80 // out = tmp_out.reshape(out_shape)
81
82 // The reduction result must be allocated with this shape.
83 TensorShape out_reshape() const;
84
85 // The final output shape must be allocated with this shape.
86 TensorShape out_shape() const;
87
88 // The reduction is on a reshaped tensor of this rank.
89 int ndims() const { return data_reshape_.size(); }
90
91 // True if need to reduce the 0-th dimension.
92 bool reduce_first_axis() const { return reduce_first_axis_; }
93
94 // The output is reshaped.
95 template <typename T, int N>
96 typename TTypes<T, N>::Tensor out(Tensor* out) {
97 return out->shaped<T, N>(out_reshape_);
98 }
99
100 // The input is reshaped.
101 template <typename T, int N>
102 typename TTypes<T, N>::ConstTensor in(const Tensor& data) {
103 return data.shaped<T, N>(data_reshape_);
104 }
105
106 // Shape of shuffled input
107 TensorShape data_reshape() const {
108 TensorShape shape;
109 for (auto s : data_reshape_) shape.AddDim(s);
110 return shape;
111 }
112
113 // Shape with all reduction dimensions at the end
114 TensorShape shuffled_shape();
115
116 // Permutation of reduced dims needed to put reduction dimensions at the end
117 gtl::InlinedVector<int32, 8> permutation();
118
119 private:
120 bool reduce_first_axis_; // True if need to reduce the 0-th dimension.
121 gtl::InlinedVector<int64_t, 4>
122 data_reshape_; // Reshape data before reduction.
123 gtl::InlinedVector<int64_t, 4> out_shape_; // The final output shape.
124 gtl::InlinedVector<int64_t, 4> out_reshape_; // Reshape output for reduction.
125};
126
127// For operations where the output is a reduction function along some
128// dimensions of the input.
129template <typename Device, class T, typename Tperm, typename Reducer>
130class ReductionOp : public OpKernel {
131 public:
132 explicit ReductionOp(OpKernelConstruction* ctx) : OpKernel(ctx) {
133 const DataType dt = DataTypeToEnum<T>::v();
134 const DataType pt = DataTypeToEnum<Tperm>::v();
135 OP_REQUIRES_OK(ctx, ctx->MatchSignature({dt, pt}, {dt}));
136
137 OP_REQUIRES_OK(ctx, ctx->GetAttr("keep_dims", &keep_dims_));
138 }
139
140 void Compute(OpKernelContext* ctx) override {
141 const Tensor& data = ctx->input(0);
142 const Tensor& axes = ctx->input(1);
143 VLOG(1) << "data shape: " << data.shape().DebugString();
144 VLOG(1) << "axes : " << axes.SummarizeValue(10);
145
146 ReductionHelper helper;
147 OP_REQUIRES_OK(ctx, helper.Simplify(data, axes, keep_dims_));
148 CHECK_GE(helper.ndims(), 0);
149
150 bool is_scalar_identity = functor::ReducerTraits<Reducer>::IsScalarIdentity;
151 bool is_trivial = helper.ndims() == 0 ||
152 (helper.ndims() == 1 && !helper.reduce_first_axis());
153 if (is_scalar_identity && is_trivial) {
154 Tensor out;
155 // Special case. Reduces nothing and does not alter the input values.
156 if (!out.CopyFrom(data, helper.out_shape())) {
157 ctx->SetStatus(errors::Internal("Error during reduction copy."));
158 }
159 ctx->set_output(0, out);
160 return;
161 }
162
163 // We must allocate temp tensors using the same alloc attr as
164 // output(0) because it is returned as output(0) in the end.
165 const AllocatorAttributes alloc_attr = ctx->output_alloc_attr(0);
166
167 Tensor tmp_out;
168 typedef functor::ReduceFunctor<Device, Reducer> Functor;
169 Constants<Device> constants;
170 const Device& d = ctx->eigen_device<Device>();
171 Reducer reducer;
172
173 if (data.NumElements() > 0 && is_trivial && !is_scalar_identity) {
174 OP_REQUIRES_OK(ctx, ctx->allocate_temp(ctx->expected_output_dtype(0),
175 TensorShape({data.NumElements()}),
176 &tmp_out, alloc_attr));
177 Functor::Reduce(ctx, tmp_out.flat<T>(),
178 data.shaped<T, 2>({1, data.NumElements()}),
179 constants.kZero, reducer);
180 } else {
181 // A temporary tensor whose size matches the size of the reduced
182 // output.
183 OP_REQUIRES_OK(
184 ctx, ctx->allocate_temp(ctx->expected_output_dtype(0),
185 helper.out_reshape(), &tmp_out, alloc_attr));
186
187 if (tmp_out.NumElements() == 0) {
188 // Nothing to do, fall through to final reshaping.
189 } else if (data.NumElements() == 0) {
190 // Degenerate reduction where the input is empty but the output is
191 // nonempty (thus tmp_out.NumElements() > 0), and we must fill the
192 // output with identity elements. Example: tf.reduce_sum(tf.zeros((0,
193 // 3)), [0]). Eigen sometimes crashes in this case, so we do it
194 // manually.
195 Functor::FillIdentity(d, tmp_out.flat<T>(), reducer);
196 } else if ((helper.ndims() == 1) && helper.reduce_first_axis()) {
197 // Reduce to a scalar.
198 Functor::Reduce(ctx, helper.out<T, 0>(&tmp_out), helper.in<T, 1>(data),
199 constants.kZero, reducer);
200 } else if ((helper.ndims() == 2) && helper.reduce_first_axis()) {
201 // Can be viewed as a reduction of a matrix along 1st dimension.
202 Functor::Reduce(ctx, helper.out<T, 1>(&tmp_out), helper.in<T, 2>(data),
203 constants.kZero, reducer);
204 } else if ((helper.ndims() == 2) && !helper.reduce_first_axis()) {
205 // Can be viewed as a reduction of a matrix along 2nd dimension.
206 Functor::Reduce(ctx, helper.out<T, 1>(&tmp_out), helper.in<T, 2>(data),
207 constants.kOne, reducer);
208 } else if ((helper.ndims() == 3) && helper.reduce_first_axis()) {
209 // Can be viewed as a reduction of a 3D tensor along 1st and 3rd
210 // dimensions.
211 Functor::Reduce(ctx, helper.out<T, 1>(&tmp_out), helper.in<T, 3>(data),
212 constants.kZeroTwo, reducer);
213 } else if ((helper.ndims() == 3) && !helper.reduce_first_axis()) {
214 // Can be viewed as a reduction of a 3D tensor along 2nd dimension.
215 Functor::Reduce(ctx, helper.out<T, 2>(&tmp_out), helper.in<T, 3>(data),
216 constants.kOne, reducer);
217 } else {
218 // If we don't hit one of the cases above, transpose the data so that
219 // all reduced dimensions are last and reuse the 2-D -> 1-D case.
220 Tensor data_reshaped;
221 OP_REQUIRES(ctx, data_reshaped.CopyFrom(data, helper.data_reshape()),
222 errors::Internal("Error during reduction copy."));
223 Tensor shuffled;
224 OP_REQUIRES_OK(ctx, ctx->allocate_temp(DataTypeToEnum<T>::value,
225 helper.shuffled_shape(),
226 &shuffled, alloc_attr));
227 OP_REQUIRES_OK(ctx, DoTranspose(d, data_reshaped, helper.permutation(),
228 &shuffled));
229 const int64_t unreduced = tmp_out.NumElements();
230 const int64_t reduced = shuffled.NumElements() / unreduced;
231 const Tensor& const_shuffled = shuffled;
232 Functor::Reduce(ctx, tmp_out.flat<T>(),
233 const_shuffled.shaped<T, 2>({unreduced, reduced}),
234 constants.kOne, reducer);
235 }
236 }
237
238 // Set the real output using the contents of the reduction but the
239 // real expected output shape. The number of elements should
240 // match between the two shapes.
241 Tensor out;
242 OP_REQUIRES(ctx, out.CopyFrom(tmp_out, helper.out_shape()),
243 errors::Internal("Error during reduction copy."));
244 ctx->set_output(0, out);
245 }
246
247 private:
248 // True if the number of dimensions should be maintained.
249 bool keep_dims_;
250};
251
252namespace functor {
253
254template <typename Device, typename Reducer>
255struct ReduceFunctorBase {
256 template <typename OUT_T, typename IN_T, typename ReductionAxes>
257 static void Reduce(OpKernelContext* ctx, OUT_T out, IN_T in,
258 const ReductionAxes& reduction_axes,
259 const Reducer& reducer) {
260 const Device& d = ctx->eigen_device<Device>();
261 ReduceEigenImpl<Device, OUT_T, IN_T, ReductionAxes, Reducer> reducer_impl;
262 reducer_impl(d, out, in, reduction_axes, reducer);
263 }
264
265 template <typename OUT_T>
266 static void FillIdentity(const Device& d, OUT_T out, const Reducer& reducer) {
267 FillIdentityEigenImpl(d, out, reducer);
268 }
269};
270
271template <typename Reducer>
272struct ReduceFunctor<CPUDevice, Reducer>
273 : ReduceFunctorBase<CPUDevice, Reducer> {};
274
275} // namespace functor
276} // namespace tensorflow
277
278#endif // TENSORFLOW_CORE_KERNELS_REDUCTION_OPS_COMMON_H_
279