sparse_reduce_op.cc source code [tensorflow/tensorflow/core/kernels/sparse_reduce_op.cc]

1	/ Copyright 2016 The TensorFlow Authors. All Rights Reserved.*
2
3	Licensed under the Apache License, Version 2.0 (the "License");
4	you may not use this file except in compliance with the License.
5	You may obtain a copy of the License at
6
7	http://www.apache.org/licenses/LICENSE-2.0
8
9	Unless required by applicable law or agreed to in writing, software
10	distributed under the License is distributed on an "AS IS" BASIS,
11	WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12	See the License for the specific language governing permissions and
13	limitations under the License.
14	==============================================================================/*
15
16	// See docs in ../ops/sparse_ops.cc.
17
18	#define EIGEN_USE_THREADS
19
20	#include "tensorflow/core/framework/op_kernel.h"
21	#include "tensorflow/core/framework/op_requires.h"
22	#include "tensorflow/core/framework/register_types.h"
23	#include "tensorflow/core/framework/tensor.h"
24	#include "tensorflow/core/framework/tensor_shape.h"
25	#include "tensorflow/core/framework/tensor_util.h"
26	#include "tensorflow/core/framework/types.h"
27	#include "tensorflow/core/util/sparse/sparse_tensor.h"
28
29	// TODO(b/31496047): Fix non-standard include order.
30	#include <numeric> // clang-format off
31
32	using tensorflow::sparse::SparseTensor;
33	using tensorflow::gtl::ArraySlice;
34
35	namespace tensorflow {
36
37	struct ReduceDetails {
38	// The dimensions to call Reorder() with.
39	std::vector<int64_t> reorder_dims;
40
41	// The dimensions to call group() with after Reorder().
42	std::vector<int64_t> group_by_dims;
43
44	// The shape after reduction.
45	TensorShape reduced_shape;
46	};
47
48	// Compute common reduce parameters that'll be used for SparseTensor
49	// reductions. Usage:
50	// ReduceDetails reduction = SparseTensorReduceHelper(sp, axes, keep_dims);
51	// sp.Reorder(reduction.reorder_dims);
52	// for (const auto& g : sp.group(reduction.group_by_dims)) {
53	// ...
54	// }
55	// // Set output shape to reduction.reduced_shape.
56	ReduceDetails SparseTensorReduceHelper(const SparseTensor &sp,
57	gtl::ArraySlice<int32> axes_slice,
58	bool keep_dims) {
59	ReduceDetails reduction;
60
61	std::vector<int32> reduction_axes(axes_slice.begin(), axes_slice.end());
62	int ndims = sp.dims();
63	for (int64_t i = `0`; i < reduction_axes.size(); ++i) {
64	reduction_axes [i] = (reduction_axes [i] + ndims) % ndims;
65	}
66	std::sort(reduction_axes.begin(), reduction_axes.end());
67
68	// (0) Calculate the grouping dimensions:
69	// group_by_dims == {0, .., NDIMS-1} \ reduction_axes.
70	std::vector<int64_t> perm(ndims);
71	std::iota(perm.begin(), perm.end(), `0`);
72
73	// Requires perm and reduction_axes_ be sorted; group_by_dims will be
74	// sorted as well.
75	std::set_difference(
76	perm.begin(), perm.end(), reduction_axes.begin(), reduction_axes.end(),
77	std::inserter(reduction.group_by_dims, reduction.group_by_dims.begin()));
78
79	// Now append the rest of the axes (the complement of group_by_dims_);
80	// result is used by Reorder().
81	reduction.reorder_dims = reduction.group_by_dims;
82	std::set_difference(perm.begin(), perm.end(), reduction.group_by_dims.begin(),
83	reduction.group_by_dims.end(),
84	std::back_inserter(reduction.reorder_dims));
85
86	// (1) Calculate the shape after reduction.
87	auto sp_shape = sp.shape();
88	std::vector<int64_t> out_dim_sizes;
89	if (keep_dims) {
90	out_dim_sizes.reserve(ndims);
91	auto beg = reduction.group_by_dims.begin();
92	auto end = reduction.group_by_dims.end();
93	for (int d = `0`; d < ndims; ++d) {
94	if (std::find(beg, end, d) == end) {
95	out_dim_sizes.push_back(`1`); // A reduced axis.
96	} else {
97	out_dim_sizes.push_back(sp_shape [d]);
98	}
99	}
100	} else {
101	out_dim_sizes = sp.PickDims(reduction.group_by_dims);
102	}
103
104	reduction.reduced_shape = TensorShape (out_dim_sizes);
105	return reduction;
106	}
107
108	Status ValidateInputs(const Tensor shape_t, const* Tensor *reduction_axes_t) {
109	// indices and values are validated in SparseTensor ctor.
110	if (!TensorShapeUtils::IsVector(shape_t->shape())) {
111	return errors::InvalidArgument(
112	"Expected input_shape to be a vector; got shape: ",
113	shape_t->shape().DebugString());
114	}
115	if (!TensorShapeUtils::IsScalar(reduction_axes_t->shape()) &&
116	!TensorShapeUtils::IsVector(reduction_axes_t->shape())) {
117	return errors::InvalidArgument(
118	"Expected reduction_axes to be a scalar or a vector; got shape: ",
119	reduction_axes_t->shape().DebugString());
120	}
121
122	const auto reduction_axes_flat = reduction_axes_t->flat<int32>();
123	for (int64_t i = `0`; i < reduction_axes_flat.size(); i++) {
124	int32_t axis = reduction_axes_flat (i);
125	if (axis < -shape_t->NumElements() \|\| axis >= shape_t->NumElements()) {
126	return errors::InvalidArgument("Invalid reduction dimension ", axis,
127	", for input with ",
128	shape_t->NumElements(), " dimensions.");
129	}
130	}
131
132	return OkStatus();
133	}
134
135	struct SumOp {
136	template <typename T>
137	static void Run(OpKernelContext ctx, typename* TTypes<T>::Scalar &s, const typename TTypes<T>::UnalignedVec &v) {
138	s.device(ctx->eigen_cpu_device()) = v.sum();
139	}
140	static StringPiece Name() {
141	return "sum";
142	}
143	};
144
145	struct MaxOp {
146	template <typename T>
147	static void Run(OpKernelContext ctx, typename* TTypes<T>::Scalar &s, const typename TTypes<T>::UnalignedVec &v) {
148	s.device(ctx->eigen_cpu_device()) = v.maximum();
149	}
150	static StringPiece Name() {
151	return "max";
152	}
153	};
154
155	template <typename T, typename Op>
156	class SparseReduceOp : public OpKernel {
157	public:
158	explicit SparseReduceOp(OpKernelConstruction *ctx) : OpKernel(ctx) {
159	OP_REQUIRES_OK(ctx, ctx->GetAttr("keep_dims", &keep_dims_));
160	}
161
162	void Compute(OpKernelContext *ctx) override {
163	const Tensor indices_t, values_t, shape_t, reduction_axes_t;
164	OP_REQUIRES_OK(ctx, ctx->input("input_indices", &indices_t));
165	OP_REQUIRES_OK(ctx, ctx->input("input_values", &values_t));
166	OP_REQUIRES_OK(ctx, ctx->input("input_shape", &shape_t));
167	OP_REQUIRES_OK(ctx, ctx->input("reduction_axes", &reduction_axes_t));
168
169	OP_REQUIRES_OK(ctx, ValidateInputs(shape_t, reduction_axes_t));
170
171	// TODO(zongheng): we will call Reorder() below, which will modify
172	// in-place the underlying indices and values buffers. To avoid
173	// surprises of this kernel being stateful, we work around the above by
174	// making deep copies here. Remove this if/when we change Reorder()'s
175	// semantics.
176	const auto shape_vec = shape_t->vec<int64_t>();
177	TensorShape shape;
178	OP_REQUIRES_OK(ctx, TensorShape::BuildTensorShape(shape_vec, &shape));
179
180	SparseTensor sp;
181	OP_REQUIRES_OK(ctx, SparseTensor::Create(
182	tensor::DeepCopy(indices_t), tensor::DeepCopy(values_t),
183	shape, &sp));
184	ReduceDetails reduction = SparseTensorReduceHelper(
185	sp, reduction_axes_t->flat<int32>(), keep_dims_);
186
187	Tensor *out_values;
188	OP_REQUIRES_OK(
189	ctx, ctx->allocate_output(`0`, reduction.reduced_shape, &out_values));
190	auto out_flat = out_values->flat<T>();
191	out_flat.setZero();
192
193	Tensor tmp_reduced_val;
194	OP_REQUIRES_OK(ctx, ctx->allocate_temp(DataTypeToEnum<T>::value,
195	TensorShape ({}), &tmp_reduced_val));
196	auto reduced_val = tmp_reduced_val.scalar<T>();
197
198	// Compute strides, and use it to convert coords to flat index. The
199	// coordinates returned by .group() have the same ndims as group_by_dims.
200	gtl::InlinedVector<int64_t, `8`> output_strides(reduction.group_by_dims.size());
201	if (!output_strides.empty()) { // Do this iff we don't reduce all.
202	output_strides.back() = `1`;
203	for (int d = output_strides.size() - `2`; d >= `0`; --d) {
204	output_strides [d] =
205	output_strides [d + `1`] * shape_vec (reduction.group_by_dims [d + `1`]);
206	}
207	}
208
209	auto CoordinatesToFlatIndex = [](ArraySlice<int64_t> coords,
210	ArraySlice<int64_t> strides) -> int64 {
211	if (strides.empty()) { // Reduce all.
212	return `0`;
213	}
214	CHECK_EQ(coords.size(), strides.size());
215	int64_t idx = `0`;
216	for (int i = `0`; i < coords.size(); ++i) {
217	idx += coords [i] * strides [i];
218	}
219	return idx;
220	};
221
222	// Each group maps one-on-one onto a value in the reduced tensor.
223	// g.group() provides the coordinates of a particular reduced value.
224	sp.Reorder<T>(reduction.reorder_dims);
225	for (const auto &g : sp.group(reduction.group_by_dims)) {
226	Op::template Run<T>(ctx, reduced_val, g.template values<T>());
227	OP_REQUIRES(ctx,
228	output_strides.empty() \|\|
229	(g.group().size() == output_strides.size()),
230	errors::Internal(
231	"Expected group size and output_strides size to match",
232	", but got ", g.group().size(), " and ",
233	output_strides.size()));
234	const int64_t idx = CoordinatesToFlatIndex(g.group(), output_strides);
235	OP_REQUIRES(ctx,
236	idx >= `0` && idx < out_flat.size(),
237	errors::Internal(
238	"Obtained a write index of ", idx,
239	" which is outside of bounds of [0, ",
240	out_flat.size(), ")"));
241	out_flat(idx) = reduced_val();
242	VLOG(`2`) << "coords: " << absl::StrJoin(g.group(), ",")
243	<< "; idx: " << idx << "; group " << Op::Name() << ": "
244	<< reduced_val();
245	}
246	}
247
248	private:
249	// True if the number of dimensions should be maintained.
250	bool keep_dims_;
251	};
252
253	#define REGISTER_KERNELS(T) \
254	REGISTER_KERNEL_BUILDER( \
255	Name("SparseReduceSum").Device(DEVICE_CPU).TypeConstraint<T>("T"), \
256	SparseReduceOp<T, SumOp>)
257	TF_CALL_NUMBER_TYPES(REGISTER_KERNELS);
258	#undef REGISTER_KERNELS
259
260	#define REGISTER_KERNELS(T) \
261	REGISTER_KERNEL_BUILDER( \
262	Name("SparseReduceMax").Device(DEVICE_CPU).TypeConstraint<T>("T"), \
263	SparseReduceOp<T, MaxOp>)
264	TF_CALL_REAL_NUMBER_TYPES(REGISTER_KERNELS);
265	#undef REGISTER_KERNELS
266
267	template <typename T, typename Op>
268	class SparseReduceSparseOp : public OpKernel {
269	public:
270	explicit SparseReduceSparseOp(OpKernelConstruction *ctx) : OpKernel(ctx) {
271	OP_REQUIRES_OK(ctx, ctx->GetAttr("keep_dims", &keep_dims_));
272	}
273
274	void Compute(OpKernelContext *ctx) override {
275	const Tensor indices_t, values_t, shape_t, reduction_axes_t;
276	OP_REQUIRES_OK(ctx, ctx->input("input_indices", &indices_t));
277	OP_REQUIRES_OK(ctx, ctx->input("input_values", &values_t));
278	OP_REQUIRES_OK(ctx, ctx->input("input_shape", &shape_t));
279	OP_REQUIRES_OK(ctx, ctx->input("reduction_axes", &reduction_axes_t));
280
281	OP_REQUIRES_OK(ctx, ValidateInputs(shape_t, reduction_axes_t));
282
283	TensorShape shape;
284	OP_REQUIRES_OK(ctx, TensorShape::BuildTensorShape(shape_t->vec<int64_t>(),
285	&shape));
286	SparseTensor sp;
287	OP_REQUIRES_OK(ctx, SparseTensor::Create(tensor::DeepCopy(*indices_t),
288	tensor::DeepCopy(*values_t),
289	shape, &sp));
290	ReduceDetails reduction = SparseTensorReduceHelper(
291	sp, reduction_axes_t->flat<int32>(), keep_dims_);
292
293	sp.Reorder<T>(reduction.reorder_dims);
294	// Count nnzs in the output SparseTensor.
295	int64_t nnz = `0`;
296	auto iter = sp.group(reduction.group_by_dims);
297	for (auto it = iter.begin(); it != iter.end(); ++it) {
298	nnz++;
299	}
300
301	Tensor *out_indices_t;
302	OP_REQUIRES_OK(ctx,
303	ctx->allocate_output(
304	`0`, TensorShape ({nnz, reduction.reduced_shape.dims()}),
305	&out_indices_t));
306	typename TTypes<int64_t>::Matrix out_indices_mat =
307	out_indices_t->matrix<int64_t>();
308	// For keep_dims. We don't explicitly set dim fields for reduced dims below.
309	out_indices_mat.setZero();
310
311	Tensor *out_values_t;
312	OP_REQUIRES_OK(ctx,
313	ctx->allocate_output(`1`, TensorShape ({nnz}), &out_values_t));
314	auto out_flat = out_values_t->flat<T>();
315
316	Tensor tmp_reduced_val;
317	OP_REQUIRES_OK(ctx, ctx->allocate_temp(DataTypeToEnum<T>::value,
318	TensorShape ({}), &tmp_reduced_val));
319	auto reduced_val = tmp_reduced_val.scalar<T>();
320	int64_t i = `0`;
321	for (const auto &g : sp.group(reduction.group_by_dims)) {
322	Op::template Run<T>(ctx, reduced_val, g.template values<T>());
323	std::vector<int64_t> group = g.group();
324	for (int64_t j = `0`; j < group.size(); j++) {
325	if (keep_dims_) {
326	out_indices_mat (i, reduction.group_by_dims [j]) = group [j];
327	} else {
328	out_indices_mat (i, j) = group [j];
329	}
330	}
331	out_flat(i) = reduced_val();
332	i++;
333	VLOG(`2`) << "coords: " << absl::StrJoin(g.group(), ",")
334	<< "; group " << Op::Name() << ": "
335	<< reduced_val();
336	}
337
338	Tensor *out_shape_t;
339	OP_REQUIRES_OK(ctx, ctx->allocate_output(
340	`2`, TensorShape ({reduction.reduced_shape.dims()}),
341	&out_shape_t));
342	auto out_shape_flat = out_shape_t->flat<int64_t>();
343	auto out_dim_sizes = reduction.reduced_shape.dim_sizes();
344	if (!out_dim_sizes.empty()) {
345	std::copy(out_dim_sizes.begin(), out_dim_sizes.end(), &out_shape_flat (`0`));
346	}
347	}
348
349	private:
350	// True if the number of dimensions should be maintained.
351	bool keep_dims_;
352	};
353
354	#define REGISTER_KERNELS(T) \
355	REGISTER_KERNEL_BUILDER( \
356	Name("SparseReduceSumSparse").Device(DEVICE_CPU).TypeConstraint<T>("T"), \
357	SparseReduceSparseOp<T, SumOp>)
358	TF_CALL_NUMBER_TYPES(REGISTER_KERNELS);
359	#undef REGISTER_KERNELS
360
361	#define REGISTER_KERNELS(T) \
362	REGISTER_KERNEL_BUILDER( \
363	Name("SparseReduceMaxSparse").Device(DEVICE_CPU).TypeConstraint<T>("T"), \
364	SparseReduceSparseOp<T, MaxOp>)
365	TF_CALL_REAL_NUMBER_TYPES(REGISTER_KERNELS);
366	#undef REGISTER_KERNELS
367
368	} // namespace tensorflow
369

Browse the source code of tensorflow/tensorflow/core/kernels/sparse_reduce_op.cc