1/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
2
3Licensed under the Apache License, Version 2.0 (the "License");
4you may not use this file except in compliance with the License.
5You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9Unless required by applicable law or agreed to in writing, software
10distributed under the License is distributed on an "AS IS" BASIS,
11WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12See the License for the specific language governing permissions and
13limitations under the License.
14==============================================================================*/
15
16// See docs in ../ops/sparse_ops.cc.
17
18#define EIGEN_USE_THREADS
19
20#include "tensorflow/core/framework/op_kernel.h"
21#include "tensorflow/core/framework/op_requires.h"
22#include "tensorflow/core/framework/register_types.h"
23#include "tensorflow/core/framework/tensor.h"
24#include "tensorflow/core/framework/tensor_shape.h"
25#include "tensorflow/core/framework/tensor_util.h"
26#include "tensorflow/core/framework/types.h"
27#include "tensorflow/core/util/sparse/sparse_tensor.h"
28
29// TODO(b/31496047): Fix non-standard include order.
30#include <numeric> // clang-format off
31
32using tensorflow::sparse::SparseTensor;
33using tensorflow::gtl::ArraySlice;
34
35namespace tensorflow {
36
37struct ReduceDetails {
38 // The dimensions to call Reorder() with.
39 std::vector<int64_t> reorder_dims;
40
41 // The dimensions to call group() with after Reorder().
42 std::vector<int64_t> group_by_dims;
43
44 // The shape after reduction.
45 TensorShape reduced_shape;
46};
47
48// Compute common reduce parameters that'll be used for SparseTensor
49// reductions. Usage:
50// ReduceDetails reduction = SparseTensorReduceHelper(sp, axes, keep_dims);
51// sp.Reorder(reduction.reorder_dims);
52// for (const auto& g : sp.group(reduction.group_by_dims)) {
53// ...
54// }
55// // Set output shape to reduction.reduced_shape.
56ReduceDetails SparseTensorReduceHelper(const SparseTensor &sp,
57 gtl::ArraySlice<int32> axes_slice,
58 bool keep_dims) {
59 ReduceDetails reduction;
60
61 std::vector<int32> reduction_axes(axes_slice.begin(), axes_slice.end());
62 int ndims = sp.dims();
63 for (int64_t i = 0; i < reduction_axes.size(); ++i) {
64 reduction_axes[i] = (reduction_axes[i] + ndims) % ndims;
65 }
66 std::sort(reduction_axes.begin(), reduction_axes.end());
67
68 // (0) Calculate the grouping dimensions:
69 // group_by_dims == {0, .., NDIMS-1} \ reduction_axes.
70 std::vector<int64_t> perm(ndims);
71 std::iota(perm.begin(), perm.end(), 0);
72
73 // Requires perm and reduction_axes_ be sorted; group_by_dims will be
74 // sorted as well.
75 std::set_difference(
76 perm.begin(), perm.end(), reduction_axes.begin(), reduction_axes.end(),
77 std::inserter(reduction.group_by_dims, reduction.group_by_dims.begin()));
78
79 // Now append the rest of the axes (the complement of group_by_dims_);
80 // result is used by Reorder().
81 reduction.reorder_dims = reduction.group_by_dims;
82 std::set_difference(perm.begin(), perm.end(), reduction.group_by_dims.begin(),
83 reduction.group_by_dims.end(),
84 std::back_inserter(reduction.reorder_dims));
85
86 // (1) Calculate the shape after reduction.
87 auto sp_shape = sp.shape();
88 std::vector<int64_t> out_dim_sizes;
89 if (keep_dims) {
90 out_dim_sizes.reserve(ndims);
91 auto beg = reduction.group_by_dims.begin();
92 auto end = reduction.group_by_dims.end();
93 for (int d = 0; d < ndims; ++d) {
94 if (std::find(beg, end, d) == end) {
95 out_dim_sizes.push_back(1); // A reduced axis.
96 } else {
97 out_dim_sizes.push_back(sp_shape[d]);
98 }
99 }
100 } else {
101 out_dim_sizes = sp.PickDims(reduction.group_by_dims);
102 }
103
104 reduction.reduced_shape = TensorShape(out_dim_sizes);
105 return reduction;
106}
107
108Status ValidateInputs(const Tensor *shape_t, const Tensor *reduction_axes_t) {
109 // indices and values are validated in SparseTensor ctor.
110 if (!TensorShapeUtils::IsVector(shape_t->shape())) {
111 return errors::InvalidArgument(
112 "Expected input_shape to be a vector; got shape: ",
113 shape_t->shape().DebugString());
114 }
115 if (!TensorShapeUtils::IsScalar(reduction_axes_t->shape()) &&
116 !TensorShapeUtils::IsVector(reduction_axes_t->shape())) {
117 return errors::InvalidArgument(
118 "Expected reduction_axes to be a scalar or a vector; got shape: ",
119 reduction_axes_t->shape().DebugString());
120 }
121
122 const auto reduction_axes_flat = reduction_axes_t->flat<int32>();
123 for (int64_t i = 0; i < reduction_axes_flat.size(); i++) {
124 int32_t axis = reduction_axes_flat(i);
125 if (axis < -shape_t->NumElements() || axis >= shape_t->NumElements()) {
126 return errors::InvalidArgument("Invalid reduction dimension ", axis,
127 ", for input with ",
128 shape_t->NumElements(), " dimensions.");
129 }
130 }
131
132 return OkStatus();
133}
134
135struct SumOp {
136 template <typename T>
137 static void Run(OpKernelContext *ctx, typename TTypes<T>::Scalar &s, const typename TTypes<T>::UnalignedVec &v) {
138 s.device(ctx->eigen_cpu_device()) = v.sum();
139 }
140 static StringPiece Name() {
141 return "sum";
142 }
143};
144
145struct MaxOp {
146 template <typename T>
147 static void Run(OpKernelContext *ctx, typename TTypes<T>::Scalar &s, const typename TTypes<T>::UnalignedVec &v) {
148 s.device(ctx->eigen_cpu_device()) = v.maximum();
149 }
150 static StringPiece Name() {
151 return "max";
152 }
153};
154
155template <typename T, typename Op>
156class SparseReduceOp : public OpKernel {
157 public:
158 explicit SparseReduceOp(OpKernelConstruction *ctx) : OpKernel(ctx) {
159 OP_REQUIRES_OK(ctx, ctx->GetAttr("keep_dims", &keep_dims_));
160 }
161
162 void Compute(OpKernelContext *ctx) override {
163 const Tensor *indices_t, *values_t, *shape_t, *reduction_axes_t;
164 OP_REQUIRES_OK(ctx, ctx->input("input_indices", &indices_t));
165 OP_REQUIRES_OK(ctx, ctx->input("input_values", &values_t));
166 OP_REQUIRES_OK(ctx, ctx->input("input_shape", &shape_t));
167 OP_REQUIRES_OK(ctx, ctx->input("reduction_axes", &reduction_axes_t));
168
169 OP_REQUIRES_OK(ctx, ValidateInputs(shape_t, reduction_axes_t));
170
171 // TODO(zongheng): we will call Reorder() below, which will modify
172 // in-place the underlying indices and values buffers. To avoid
173 // surprises of this kernel being stateful, we work around the above by
174 // making deep copies here. Remove this if/when we change Reorder()'s
175 // semantics.
176 const auto shape_vec = shape_t->vec<int64_t>();
177 TensorShape shape;
178 OP_REQUIRES_OK(ctx, TensorShape::BuildTensorShape(shape_vec, &shape));
179
180 SparseTensor sp;
181 OP_REQUIRES_OK(ctx, SparseTensor::Create(
182 tensor::DeepCopy(*indices_t), tensor::DeepCopy(*values_t),
183 shape, &sp));
184 ReduceDetails reduction = SparseTensorReduceHelper(
185 sp, reduction_axes_t->flat<int32>(), keep_dims_);
186
187 Tensor *out_values;
188 OP_REQUIRES_OK(
189 ctx, ctx->allocate_output(0, reduction.reduced_shape, &out_values));
190 auto out_flat = out_values->flat<T>();
191 out_flat.setZero();
192
193 Tensor tmp_reduced_val;
194 OP_REQUIRES_OK(ctx, ctx->allocate_temp(DataTypeToEnum<T>::value,
195 TensorShape({}), &tmp_reduced_val));
196 auto reduced_val = tmp_reduced_val.scalar<T>();
197
198 // Compute strides, and use it to convert coords to flat index. The
199 // coordinates returned by .group() have the same ndims as group_by_dims.
200 gtl::InlinedVector<int64_t, 8> output_strides(reduction.group_by_dims.size());
201 if (!output_strides.empty()) { // Do this iff we don't reduce all.
202 output_strides.back() = 1;
203 for (int d = output_strides.size() - 2; d >= 0; --d) {
204 output_strides[d] =
205 output_strides[d + 1] * shape_vec(reduction.group_by_dims[d + 1]);
206 }
207 }
208
209 auto CoordinatesToFlatIndex = [](ArraySlice<int64_t> coords,
210 ArraySlice<int64_t> strides) -> int64 {
211 if (strides.empty()) { // Reduce all.
212 return 0;
213 }
214 CHECK_EQ(coords.size(), strides.size());
215 int64_t idx = 0;
216 for (int i = 0; i < coords.size(); ++i) {
217 idx += coords[i] * strides[i];
218 }
219 return idx;
220 };
221
222 // Each group maps one-on-one onto a value in the reduced tensor.
223 // g.group() provides the coordinates of a particular reduced value.
224 sp.Reorder<T>(reduction.reorder_dims);
225 for (const auto &g : sp.group(reduction.group_by_dims)) {
226 Op::template Run<T>(ctx, reduced_val, g.template values<T>());
227 OP_REQUIRES(ctx,
228 output_strides.empty() ||
229 (g.group().size() == output_strides.size()),
230 errors::Internal(
231 "Expected group size and output_strides size to match",
232 ", but got ", g.group().size(), " and ",
233 output_strides.size()));
234 const int64_t idx = CoordinatesToFlatIndex(g.group(), output_strides);
235 OP_REQUIRES(ctx,
236 idx >= 0 && idx < out_flat.size(),
237 errors::Internal(
238 "Obtained a write index of ", idx,
239 " which is outside of bounds of [0, ",
240 out_flat.size(), ")"));
241 out_flat(idx) = reduced_val();
242 VLOG(2) << "coords: " << absl::StrJoin(g.group(), ",")
243 << "; idx: " << idx << "; group " << Op::Name() << ": "
244 << reduced_val();
245 }
246 }
247
248 private:
249 // True if the number of dimensions should be maintained.
250 bool keep_dims_;
251};
252
253#define REGISTER_KERNELS(T) \
254 REGISTER_KERNEL_BUILDER( \
255 Name("SparseReduceSum").Device(DEVICE_CPU).TypeConstraint<T>("T"), \
256 SparseReduceOp<T, SumOp>)
257TF_CALL_NUMBER_TYPES(REGISTER_KERNELS);
258#undef REGISTER_KERNELS
259
260#define REGISTER_KERNELS(T) \
261 REGISTER_KERNEL_BUILDER( \
262 Name("SparseReduceMax").Device(DEVICE_CPU).TypeConstraint<T>("T"), \
263 SparseReduceOp<T, MaxOp>)
264TF_CALL_REAL_NUMBER_TYPES(REGISTER_KERNELS);
265#undef REGISTER_KERNELS
266
267template <typename T, typename Op>
268class SparseReduceSparseOp : public OpKernel {
269 public:
270 explicit SparseReduceSparseOp(OpKernelConstruction *ctx) : OpKernel(ctx) {
271 OP_REQUIRES_OK(ctx, ctx->GetAttr("keep_dims", &keep_dims_));
272 }
273
274 void Compute(OpKernelContext *ctx) override {
275 const Tensor *indices_t, *values_t, *shape_t, *reduction_axes_t;
276 OP_REQUIRES_OK(ctx, ctx->input("input_indices", &indices_t));
277 OP_REQUIRES_OK(ctx, ctx->input("input_values", &values_t));
278 OP_REQUIRES_OK(ctx, ctx->input("input_shape", &shape_t));
279 OP_REQUIRES_OK(ctx, ctx->input("reduction_axes", &reduction_axes_t));
280
281 OP_REQUIRES_OK(ctx, ValidateInputs(shape_t, reduction_axes_t));
282
283 TensorShape shape;
284 OP_REQUIRES_OK(ctx, TensorShape::BuildTensorShape(shape_t->vec<int64_t>(),
285 &shape));
286 SparseTensor sp;
287 OP_REQUIRES_OK(ctx, SparseTensor::Create(tensor::DeepCopy(*indices_t),
288 tensor::DeepCopy(*values_t),
289 shape, &sp));
290 ReduceDetails reduction = SparseTensorReduceHelper(
291 sp, reduction_axes_t->flat<int32>(), keep_dims_);
292
293 sp.Reorder<T>(reduction.reorder_dims);
294 // Count nnzs in the output SparseTensor.
295 int64_t nnz = 0;
296 auto iter = sp.group(reduction.group_by_dims);
297 for (auto it = iter.begin(); it != iter.end(); ++it) {
298 nnz++;
299 }
300
301 Tensor *out_indices_t;
302 OP_REQUIRES_OK(ctx,
303 ctx->allocate_output(
304 0, TensorShape({nnz, reduction.reduced_shape.dims()}),
305 &out_indices_t));
306 typename TTypes<int64_t>::Matrix out_indices_mat =
307 out_indices_t->matrix<int64_t>();
308 // For keep_dims. We don't explicitly set dim fields for reduced dims below.
309 out_indices_mat.setZero();
310
311 Tensor *out_values_t;
312 OP_REQUIRES_OK(ctx,
313 ctx->allocate_output(1, TensorShape({nnz}), &out_values_t));
314 auto out_flat = out_values_t->flat<T>();
315
316 Tensor tmp_reduced_val;
317 OP_REQUIRES_OK(ctx, ctx->allocate_temp(DataTypeToEnum<T>::value,
318 TensorShape({}), &tmp_reduced_val));
319 auto reduced_val = tmp_reduced_val.scalar<T>();
320 int64_t i = 0;
321 for (const auto &g : sp.group(reduction.group_by_dims)) {
322 Op::template Run<T>(ctx, reduced_val, g.template values<T>());
323 std::vector<int64_t> group = g.group();
324 for (int64_t j = 0; j < group.size(); j++) {
325 if (keep_dims_) {
326 out_indices_mat(i, reduction.group_by_dims[j]) = group[j];
327 } else {
328 out_indices_mat(i, j) = group[j];
329 }
330 }
331 out_flat(i) = reduced_val();
332 i++;
333 VLOG(2) << "coords: " << absl::StrJoin(g.group(), ",")
334 << "; group " << Op::Name() << ": "
335 << reduced_val();
336 }
337
338 Tensor *out_shape_t;
339 OP_REQUIRES_OK(ctx, ctx->allocate_output(
340 2, TensorShape({reduction.reduced_shape.dims()}),
341 &out_shape_t));
342 auto out_shape_flat = out_shape_t->flat<int64_t>();
343 auto out_dim_sizes = reduction.reduced_shape.dim_sizes();
344 if (!out_dim_sizes.empty()) {
345 std::copy(out_dim_sizes.begin(), out_dim_sizes.end(), &out_shape_flat(0));
346 }
347 }
348
349 private:
350 // True if the number of dimensions should be maintained.
351 bool keep_dims_;
352};
353
354#define REGISTER_KERNELS(T) \
355 REGISTER_KERNEL_BUILDER( \
356 Name("SparseReduceSumSparse").Device(DEVICE_CPU).TypeConstraint<T>("T"), \
357 SparseReduceSparseOp<T, SumOp>)
358TF_CALL_NUMBER_TYPES(REGISTER_KERNELS);
359#undef REGISTER_KERNELS
360
361#define REGISTER_KERNELS(T) \
362 REGISTER_KERNEL_BUILDER( \
363 Name("SparseReduceMaxSparse").Device(DEVICE_CPU).TypeConstraint<T>("T"), \
364 SparseReduceSparseOp<T, MaxOp>)
365TF_CALL_REAL_NUMBER_TYPES(REGISTER_KERNELS);
366#undef REGISTER_KERNELS
367
368} // namespace tensorflow
369