reduction_ops_common.h source code [tensorflow/tensorflow/core/kernels/reduction_ops_common.h]

1	/ Copyright 2015 The TensorFlow Authors. All Rights Reserved.*
2
3	Licensed under the Apache License, Version 2.0 (the "License");
4	you may not use this file except in compliance with the License.
5	You may obtain a copy of the License at
6
7	http://www.apache.org/licenses/LICENSE-2.0
8
9	Unless required by applicable law or agreed to in writing, software
10	distributed under the License is distributed on an "AS IS" BASIS,
11	WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12	See the License for the specific language governing permissions and
13	limitations under the License.
14	==============================================================================/*
15
16	// This is an internal header file intended to only be included as the
17	// front-matter in the implementation files of various reduction ops. It
18	// is a header file because we split the various reduction ops into their
19	// own compilation units to get more parallelism in compilation.
20
21	#ifndef TENSORFLOW_CORE_KERNELS_REDUCTION_OPS_COMMON_H_
22	#define TENSORFLOW_CORE_KERNELS_REDUCTION_OPS_COMMON_H_
23
24	#define EIGEN_USE_THREADS
25
26	#include "third_party/eigen3/Eigen/Core"
27	#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
28
29	#include "tensorflow/core/framework/numeric_op.h"
30	#include "tensorflow/core/framework/op_kernel.h"
31	#include "tensorflow/core/framework/register_types.h"
32	#include "tensorflow/core/framework/tensor.h"
33	#include "tensorflow/core/framework/types.h"
34	#include "tensorflow/core/kernels/reduction_ops.h"
35	#include "tensorflow/core/kernels/transpose_functor.h"
36	#include "tensorflow/core/lib/core/status.h"
37	#include "tensorflow/core/lib/gtl/inlined_vector.h"
38	#include "tensorflow/core/platform/logging.h"
39
40	namespace tensorflow {
41
42	typedef Eigen::ThreadPoolDevice CPUDevice;
43	typedef Eigen::GpuDevice GPUDevice;
44
45	template <typename Device>
46	struct Constants {
47	// Derive Index type. int (32-bit) or long (64-bit) depending on the
48	// compile-time configuration. "float" here is not relevant.
49	// TODO(zhifengc): Moves the definition to TTypes.
50	typedef TTypes<float>::Tensor::Index Index;
51	Eigen::array<Index, `1`> kZero;
52	Eigen::array<Index, `1`> kOne;
53	Eigen::array<Index, `2`> kZeroTwo;
54
55	Constants() {
56	kZero [`0`] = `0`;
57	kOne [`0`] = `1`;
58	kZeroTwo [`0`] = `0`;
59	kZeroTwo [`1`] = `2`;
60	}
61	};
62
63	struct ConstantsBase {
64	const Eigen::IndexList<Eigen::type2index<`0`>> kZero;
65	const Eigen::IndexList<Eigen::type2index<`1`>> kOne;
66	const Eigen::IndexList<Eigen::type2index<`0`>, Eigen::type2index<`2`>> kZeroTwo;
67	};
68	template <>
69	struct Constants<CPUDevice> : ConstantsBase {};
70
71	class ReductionHelper {
72	public:
73	ReductionHelper() : reduce_first_axis_(false) {}
74
75	Status Simplify(const Tensor& data, const Tensor& axis, const bool keep_dims);
76
77	// We need to do roughly:
78	// tmp_out = allocate(out_reshape())
79	// tmp_out.reshape(out_reshape) = data.reshape(data_reshape).reduce(axes)
80	// out = tmp_out.reshape(out_shape)
81
82	// The reduction result must be allocated with this shape.
83	TensorShape out_reshape() const;
84
85	// The final output shape must be allocated with this shape.
86	TensorShape out_shape() const;
87
88	// The reduction is on a reshaped tensor of this rank.
89	int ndims() const { return data_reshape_.size(); }
90
91	// True if need to reduce the 0-th dimension.
92	bool reduce_first_axis() const { return reduce_first_axis_; }
93
94	// The output is reshaped.
95	template <typename T, int N>
96	typename TTypes<T, N>::Tensor out(Tensor* out) {
97	return out->shaped<T, N>(out_reshape_);
98	}
99
100	// The input is reshaped.
101	template <typename T, int N>
102	typename TTypes<T, N>::ConstTensor in(const Tensor& data) {
103	return data.shaped<T, N>(data_reshape_);
104	}
105
106	// Shape of shuffled input
107	TensorShape data_reshape() const {
108	TensorShape shape;
109	for (auto s : data_reshape_) shape.AddDim(s);
110	return shape;
111	}
112
113	// Shape with all reduction dimensions at the end
114	TensorShape shuffled_shape();
115
116	// Permutation of reduced dims needed to put reduction dimensions at the end
117	gtl::InlinedVector<int32, `8`> permutation();
118
119	private:
120	bool reduce_first_axis_; // True if need to reduce the 0-th dimension.
121	gtl::InlinedVector<int64_t, `4`>
122	data_reshape_; // Reshape data before reduction.
123	gtl::InlinedVector<int64_t, `4`> out_shape_; // The final output shape.
124	gtl::InlinedVector<int64_t, `4`> out_reshape_; // Reshape output for reduction.
125	};
126
127	// For operations where the output is a reduction function along some
128	// dimensions of the input.
129	template <typename Device, class T, typename Tperm, typename Reducer>
130	class ReductionOp : public OpKernel {
131	public:
132	explicit ReductionOp(OpKernelConstruction* ctx) : OpKernel(ctx) {
133	const DataType dt = DataTypeToEnum<T>::v();
134	const DataType pt = DataTypeToEnum<Tperm>::v();
135	OP_REQUIRES_OK(ctx, ctx->MatchSignature({dt, pt}, {dt}));
136
137	OP_REQUIRES_OK(ctx, ctx->GetAttr("keep_dims", &keep_dims_));
138	}
139
140	void Compute(OpKernelContext* ctx) override {
141	const Tensor& data = ctx->input(`0`);
142	const Tensor& axes = ctx->input(`1`);
143	VLOG(`1`) << "data shape: " << data.shape().DebugString();
144	VLOG(`1`) << "axes : " << axes.SummarizeValue(`10`);
145
146	ReductionHelper helper;
147	OP_REQUIRES_OK(ctx, helper.Simplify(data, axes, keep_dims_));
148	CHECK_GE(helper.ndims(), `0`);
149
150	bool is_scalar_identity = functor::ReducerTraits<Reducer>::IsScalarIdentity;
151	bool is_trivial = helper.ndims() == `0` \|\|
152	(helper.ndims() == `1` && !helper.reduce_first_axis());
153	if (is_scalar_identity && is_trivial) {
154	Tensor out;
155	// Special case. Reduces nothing and does not alter the input values.
156	if (!out.CopyFrom(data, helper.out_shape())) {
157	ctx->SetStatus(errors::Internal("Error during reduction copy."));
158	}
159	ctx->set_output(`0`, out);
160	return;
161	}
162
163	// We must allocate temp tensors using the same alloc attr as
164	// output(0) because it is returned as output(0) in the end.
165	const AllocatorAttributes alloc_attr = ctx->output_alloc_attr(`0`);
166
167	Tensor tmp_out;
168	typedef functor::ReduceFunctor<Device, Reducer> Functor;
169	Constants<Device> constants;
170	const Device& d = ctx->eigen_device<Device>();
171	Reducer reducer;
172
173	if (data.NumElements() > `0` && is_trivial && !is_scalar_identity) {
174	OP_REQUIRES_OK(ctx, ctx->allocate_temp(ctx->expected_output_dtype(`0`),
175	TensorShape ({data.NumElements()}),
176	&tmp_out, alloc_attr));
177	Functor::Reduce(ctx, tmp_out.flat<T>(),
178	data.shaped<T, `2`>({`1`, data.NumElements()}),
179	constants.kZero, reducer);
180	} else {
181	// A temporary tensor whose size matches the size of the reduced
182	// output.
183	OP_REQUIRES_OK(
184	ctx, ctx->allocate_temp(ctx->expected_output_dtype(`0`),
185	helper.out_reshape(), &tmp_out, alloc_attr));
186
187	if (tmp_out.NumElements() == `0`) {
188	// Nothing to do, fall through to final reshaping.
189	} else if (data.NumElements() == `0`) {
190	// Degenerate reduction where the input is empty but the output is
191	// nonempty (thus tmp_out.NumElements() > 0), and we must fill the
192	// output with identity elements. Example: tf.reduce_sum(tf.zeros((0,
193	// 3)), [0]). Eigen sometimes crashes in this case, so we do it
194	// manually.
195	Functor::FillIdentity(d, tmp_out.flat<T>(), reducer);
196	} else if ((helper.ndims() == `1`) && helper.reduce_first_axis()) {
197	// Reduce to a scalar.
198	Functor::Reduce(ctx, helper.out<T, `0`>(&tmp_out), helper.in<T, `1`>(data),
199	constants.kZero, reducer);
200	} else if ((helper.ndims() == `2`) && helper.reduce_first_axis()) {
201	// Can be viewed as a reduction of a matrix along 1st dimension.
202	Functor::Reduce(ctx, helper.out<T, `1`>(&tmp_out), helper.in<T, `2`>(data),
203	constants.kZero, reducer);
204	} else if ((helper.ndims() == `2`) && !helper.reduce_first_axis()) {
205	// Can be viewed as a reduction of a matrix along 2nd dimension.
206	Functor::Reduce(ctx, helper.out<T, `1`>(&tmp_out), helper.in<T, `2`>(data),
207	constants.kOne, reducer);
208	} else if ((helper.ndims() == `3`) && helper.reduce_first_axis()) {
209	// Can be viewed as a reduction of a 3D tensor along 1st and 3rd
210	// dimensions.
211	Functor::Reduce(ctx, helper.out<T, `1`>(&tmp_out), helper.in<T, `3`>(data),
212	constants.kZeroTwo, reducer);
213	} else if ((helper.ndims() == `3`) && !helper.reduce_first_axis()) {
214	// Can be viewed as a reduction of a 3D tensor along 2nd dimension.
215	Functor::Reduce(ctx, helper.out<T, `2`>(&tmp_out), helper.in<T, `3`>(data),
216	constants.kOne, reducer);
217	} else {
218	// If we don't hit one of the cases above, transpose the data so that
219	// all reduced dimensions are last and reuse the 2-D -> 1-D case.
220	Tensor data_reshaped;
221	OP_REQUIRES(ctx, data_reshaped.CopyFrom(data, helper.data_reshape()),
222	errors::Internal("Error during reduction copy."));
223	Tensor shuffled;
224	OP_REQUIRES_OK(ctx, ctx->allocate_temp(DataTypeToEnum<T>::value,
225	helper.shuffled_shape(),
226	&shuffled, alloc_attr));
227	OP_REQUIRES_OK(ctx, DoTranspose(d, data_reshaped, helper.permutation(),
228	&shuffled));
229	const int64_t unreduced = tmp_out.NumElements();
230	const int64_t reduced = shuffled.NumElements() / unreduced;
231	const Tensor& const_shuffled = shuffled;
232	Functor::Reduce(ctx, tmp_out.flat<T>(),
233	const_shuffled.shaped<T, `2`>({unreduced, reduced}),
234	constants.kOne, reducer);
235	}
236	}
237
238	// Set the real output using the contents of the reduction but the
239	// real expected output shape. The number of elements should
240	// match between the two shapes.
241	Tensor out;
242	OP_REQUIRES(ctx, out.CopyFrom(tmp_out, helper.out_shape()),
243	errors::Internal("Error during reduction copy."));
244	ctx->set_output(`0`, out);
245	}
246
247	private:
248	// True if the number of dimensions should be maintained.
249	bool keep_dims_;
250	};
251
252	namespace functor {
253
254	template <typename Device, typename Reducer>
255	struct ReduceFunctorBase {
256	template <typename OUT_T, typename IN_T, typename ReductionAxes>
257	static void Reduce(OpKernelContext* ctx, OUT_T out, IN_T in,
258	const ReductionAxes& reduction_axes,
259	const Reducer& reducer) {
260	const Device& d = ctx->eigen_device<Device>();
261	ReduceEigenImpl<Device, OUT_T, IN_T, ReductionAxes, Reducer> reducer_impl;
262	reducer_impl(d, out, in, reduction_axes, reducer);
263	}
264
265	template <typename OUT_T>
266	static void FillIdentity(const Device& d, OUT_T out, const Reducer& reducer) {
267	FillIdentityEigenImpl(d, out, reducer);
268	}
269	};
270
271	template <typename Reducer>
272	struct ReduceFunctor<CPUDevice, Reducer>
273	: ReduceFunctorBase<CPUDevice, Reducer> {};
274
275	} // namespace functor
276	} // namespace tensorflow
277
278	#endif // TENSORFLOW_CORE_KERNELS_REDUCTION_OPS_COMMON_H_
279

Browse the source code of tensorflow/tensorflow/core/kernels/reduction_ops_common.h