1 | /* Copyright 2016 The TensorFlow Authors. All Rights Reserved. |
2 | |
3 | Licensed under the Apache License, Version 2.0 (the "License"); |
4 | you may not use this file except in compliance with the License. |
5 | You may obtain a copy of the License at |
6 | |
7 | http://www.apache.org/licenses/LICENSE-2.0 |
8 | |
9 | Unless required by applicable law or agreed to in writing, software |
10 | distributed under the License is distributed on an "AS IS" BASIS, |
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | See the License for the specific language governing permissions and |
13 | limitations under the License. |
14 | ==============================================================================*/ |
15 | |
16 | // See docs in ../ops/sparse_ops.cc. |
17 | |
18 | #define EIGEN_USE_THREADS |
19 | |
20 | #include "tensorflow/core/framework/op_kernel.h" |
21 | #include "tensorflow/core/framework/op_requires.h" |
22 | #include "tensorflow/core/framework/register_types.h" |
23 | #include "tensorflow/core/framework/tensor.h" |
24 | #include "tensorflow/core/framework/tensor_shape.h" |
25 | #include "tensorflow/core/framework/tensor_util.h" |
26 | #include "tensorflow/core/framework/types.h" |
27 | #include "tensorflow/core/util/sparse/sparse_tensor.h" |
28 | |
29 | // TODO(b/31496047): Fix non-standard include order. |
30 | #include <numeric> // clang-format off |
31 | |
32 | using tensorflow::sparse::SparseTensor; |
33 | using tensorflow::gtl::ArraySlice; |
34 | |
35 | namespace tensorflow { |
36 | |
37 | struct ReduceDetails { |
38 | // The dimensions to call Reorder() with. |
39 | std::vector<int64_t> reorder_dims; |
40 | |
41 | // The dimensions to call group() with after Reorder(). |
42 | std::vector<int64_t> group_by_dims; |
43 | |
44 | // The shape after reduction. |
45 | TensorShape reduced_shape; |
46 | }; |
47 | |
48 | // Compute common reduce parameters that'll be used for SparseTensor |
49 | // reductions. Usage: |
50 | // ReduceDetails reduction = SparseTensorReduceHelper(sp, axes, keep_dims); |
51 | // sp.Reorder(reduction.reorder_dims); |
52 | // for (const auto& g : sp.group(reduction.group_by_dims)) { |
53 | // ... |
54 | // } |
55 | // // Set output shape to reduction.reduced_shape. |
56 | ReduceDetails SparseTensorReduceHelper(const SparseTensor &sp, |
57 | gtl::ArraySlice<int32> axes_slice, |
58 | bool keep_dims) { |
59 | ReduceDetails reduction; |
60 | |
61 | std::vector<int32> reduction_axes(axes_slice.begin(), axes_slice.end()); |
62 | int ndims = sp.dims(); |
63 | for (int64_t i = 0; i < reduction_axes.size(); ++i) { |
64 | reduction_axes[i] = (reduction_axes[i] + ndims) % ndims; |
65 | } |
66 | std::sort(reduction_axes.begin(), reduction_axes.end()); |
67 | |
68 | // (0) Calculate the grouping dimensions: |
69 | // group_by_dims == {0, .., NDIMS-1} \ reduction_axes. |
70 | std::vector<int64_t> perm(ndims); |
71 | std::iota(perm.begin(), perm.end(), 0); |
72 | |
73 | // Requires perm and reduction_axes_ be sorted; group_by_dims will be |
74 | // sorted as well. |
75 | std::set_difference( |
76 | perm.begin(), perm.end(), reduction_axes.begin(), reduction_axes.end(), |
77 | std::inserter(reduction.group_by_dims, reduction.group_by_dims.begin())); |
78 | |
79 | // Now append the rest of the axes (the complement of group_by_dims_); |
80 | // result is used by Reorder(). |
81 | reduction.reorder_dims = reduction.group_by_dims; |
82 | std::set_difference(perm.begin(), perm.end(), reduction.group_by_dims.begin(), |
83 | reduction.group_by_dims.end(), |
84 | std::back_inserter(reduction.reorder_dims)); |
85 | |
86 | // (1) Calculate the shape after reduction. |
87 | auto sp_shape = sp.shape(); |
88 | std::vector<int64_t> out_dim_sizes; |
89 | if (keep_dims) { |
90 | out_dim_sizes.reserve(ndims); |
91 | auto beg = reduction.group_by_dims.begin(); |
92 | auto end = reduction.group_by_dims.end(); |
93 | for (int d = 0; d < ndims; ++d) { |
94 | if (std::find(beg, end, d) == end) { |
95 | out_dim_sizes.push_back(1); // A reduced axis. |
96 | } else { |
97 | out_dim_sizes.push_back(sp_shape[d]); |
98 | } |
99 | } |
100 | } else { |
101 | out_dim_sizes = sp.PickDims(reduction.group_by_dims); |
102 | } |
103 | |
104 | reduction.reduced_shape = TensorShape(out_dim_sizes); |
105 | return reduction; |
106 | } |
107 | |
108 | Status ValidateInputs(const Tensor *shape_t, const Tensor *reduction_axes_t) { |
109 | // indices and values are validated in SparseTensor ctor. |
110 | if (!TensorShapeUtils::IsVector(shape_t->shape())) { |
111 | return errors::InvalidArgument( |
112 | "Expected input_shape to be a vector; got shape: " , |
113 | shape_t->shape().DebugString()); |
114 | } |
115 | if (!TensorShapeUtils::IsScalar(reduction_axes_t->shape()) && |
116 | !TensorShapeUtils::IsVector(reduction_axes_t->shape())) { |
117 | return errors::InvalidArgument( |
118 | "Expected reduction_axes to be a scalar or a vector; got shape: " , |
119 | reduction_axes_t->shape().DebugString()); |
120 | } |
121 | |
122 | const auto reduction_axes_flat = reduction_axes_t->flat<int32>(); |
123 | for (int64_t i = 0; i < reduction_axes_flat.size(); i++) { |
124 | int32_t axis = reduction_axes_flat(i); |
125 | if (axis < -shape_t->NumElements() || axis >= shape_t->NumElements()) { |
126 | return errors::InvalidArgument("Invalid reduction dimension " , axis, |
127 | ", for input with " , |
128 | shape_t->NumElements(), " dimensions." ); |
129 | } |
130 | } |
131 | |
132 | return OkStatus(); |
133 | } |
134 | |
135 | struct SumOp { |
136 | template <typename T> |
137 | static void Run(OpKernelContext *ctx, typename TTypes<T>::Scalar &s, const typename TTypes<T>::UnalignedVec &v) { |
138 | s.device(ctx->eigen_cpu_device()) = v.sum(); |
139 | } |
140 | static StringPiece Name() { |
141 | return "sum" ; |
142 | } |
143 | }; |
144 | |
145 | struct MaxOp { |
146 | template <typename T> |
147 | static void Run(OpKernelContext *ctx, typename TTypes<T>::Scalar &s, const typename TTypes<T>::UnalignedVec &v) { |
148 | s.device(ctx->eigen_cpu_device()) = v.maximum(); |
149 | } |
150 | static StringPiece Name() { |
151 | return "max" ; |
152 | } |
153 | }; |
154 | |
155 | template <typename T, typename Op> |
156 | class SparseReduceOp : public OpKernel { |
157 | public: |
158 | explicit SparseReduceOp(OpKernelConstruction *ctx) : OpKernel(ctx) { |
159 | OP_REQUIRES_OK(ctx, ctx->GetAttr("keep_dims" , &keep_dims_)); |
160 | } |
161 | |
162 | void Compute(OpKernelContext *ctx) override { |
163 | const Tensor *indices_t, *values_t, *shape_t, *reduction_axes_t; |
164 | OP_REQUIRES_OK(ctx, ctx->input("input_indices" , &indices_t)); |
165 | OP_REQUIRES_OK(ctx, ctx->input("input_values" , &values_t)); |
166 | OP_REQUIRES_OK(ctx, ctx->input("input_shape" , &shape_t)); |
167 | OP_REQUIRES_OK(ctx, ctx->input("reduction_axes" , &reduction_axes_t)); |
168 | |
169 | OP_REQUIRES_OK(ctx, ValidateInputs(shape_t, reduction_axes_t)); |
170 | |
171 | // TODO(zongheng): we will call Reorder() below, which will modify |
172 | // in-place the underlying indices and values buffers. To avoid |
173 | // surprises of this kernel being stateful, we work around the above by |
174 | // making deep copies here. Remove this if/when we change Reorder()'s |
175 | // semantics. |
176 | const auto shape_vec = shape_t->vec<int64_t>(); |
177 | TensorShape shape; |
178 | OP_REQUIRES_OK(ctx, TensorShape::BuildTensorShape(shape_vec, &shape)); |
179 | |
180 | SparseTensor sp; |
181 | OP_REQUIRES_OK(ctx, SparseTensor::Create( |
182 | tensor::DeepCopy(*indices_t), tensor::DeepCopy(*values_t), |
183 | shape, &sp)); |
184 | ReduceDetails reduction = SparseTensorReduceHelper( |
185 | sp, reduction_axes_t->flat<int32>(), keep_dims_); |
186 | |
187 | Tensor *out_values; |
188 | OP_REQUIRES_OK( |
189 | ctx, ctx->allocate_output(0, reduction.reduced_shape, &out_values)); |
190 | auto out_flat = out_values->flat<T>(); |
191 | out_flat.setZero(); |
192 | |
193 | Tensor tmp_reduced_val; |
194 | OP_REQUIRES_OK(ctx, ctx->allocate_temp(DataTypeToEnum<T>::value, |
195 | TensorShape({}), &tmp_reduced_val)); |
196 | auto reduced_val = tmp_reduced_val.scalar<T>(); |
197 | |
198 | // Compute strides, and use it to convert coords to flat index. The |
199 | // coordinates returned by .group() have the same ndims as group_by_dims. |
200 | gtl::InlinedVector<int64_t, 8> output_strides(reduction.group_by_dims.size()); |
201 | if (!output_strides.empty()) { // Do this iff we don't reduce all. |
202 | output_strides.back() = 1; |
203 | for (int d = output_strides.size() - 2; d >= 0; --d) { |
204 | output_strides[d] = |
205 | output_strides[d + 1] * shape_vec(reduction.group_by_dims[d + 1]); |
206 | } |
207 | } |
208 | |
209 | auto CoordinatesToFlatIndex = [](ArraySlice<int64_t> coords, |
210 | ArraySlice<int64_t> strides) -> int64 { |
211 | if (strides.empty()) { // Reduce all. |
212 | return 0; |
213 | } |
214 | CHECK_EQ(coords.size(), strides.size()); |
215 | int64_t idx = 0; |
216 | for (int i = 0; i < coords.size(); ++i) { |
217 | idx += coords[i] * strides[i]; |
218 | } |
219 | return idx; |
220 | }; |
221 | |
222 | // Each group maps one-on-one onto a value in the reduced tensor. |
223 | // g.group() provides the coordinates of a particular reduced value. |
224 | sp.Reorder<T>(reduction.reorder_dims); |
225 | for (const auto &g : sp.group(reduction.group_by_dims)) { |
226 | Op::template Run<T>(ctx, reduced_val, g.template values<T>()); |
227 | OP_REQUIRES(ctx, |
228 | output_strides.empty() || |
229 | (g.group().size() == output_strides.size()), |
230 | errors::Internal( |
231 | "Expected group size and output_strides size to match" , |
232 | ", but got " , g.group().size(), " and " , |
233 | output_strides.size())); |
234 | const int64_t idx = CoordinatesToFlatIndex(g.group(), output_strides); |
235 | OP_REQUIRES(ctx, |
236 | idx >= 0 && idx < out_flat.size(), |
237 | errors::Internal( |
238 | "Obtained a write index of " , idx, |
239 | " which is outside of bounds of [0, " , |
240 | out_flat.size(), ")" )); |
241 | out_flat(idx) = reduced_val(); |
242 | VLOG(2) << "coords: " << absl::StrJoin(g.group(), "," ) |
243 | << "; idx: " << idx << "; group " << Op::Name() << ": " |
244 | << reduced_val(); |
245 | } |
246 | } |
247 | |
248 | private: |
249 | // True if the number of dimensions should be maintained. |
250 | bool keep_dims_; |
251 | }; |
252 | |
253 | #define REGISTER_KERNELS(T) \ |
254 | REGISTER_KERNEL_BUILDER( \ |
255 | Name("SparseReduceSum").Device(DEVICE_CPU).TypeConstraint<T>("T"), \ |
256 | SparseReduceOp<T, SumOp>) |
257 | TF_CALL_NUMBER_TYPES(REGISTER_KERNELS); |
258 | #undef REGISTER_KERNELS |
259 | |
260 | #define REGISTER_KERNELS(T) \ |
261 | REGISTER_KERNEL_BUILDER( \ |
262 | Name("SparseReduceMax").Device(DEVICE_CPU).TypeConstraint<T>("T"), \ |
263 | SparseReduceOp<T, MaxOp>) |
264 | TF_CALL_REAL_NUMBER_TYPES(REGISTER_KERNELS); |
265 | #undef REGISTER_KERNELS |
266 | |
267 | template <typename T, typename Op> |
268 | class SparseReduceSparseOp : public OpKernel { |
269 | public: |
270 | explicit SparseReduceSparseOp(OpKernelConstruction *ctx) : OpKernel(ctx) { |
271 | OP_REQUIRES_OK(ctx, ctx->GetAttr("keep_dims" , &keep_dims_)); |
272 | } |
273 | |
274 | void Compute(OpKernelContext *ctx) override { |
275 | const Tensor *indices_t, *values_t, *shape_t, *reduction_axes_t; |
276 | OP_REQUIRES_OK(ctx, ctx->input("input_indices" , &indices_t)); |
277 | OP_REQUIRES_OK(ctx, ctx->input("input_values" , &values_t)); |
278 | OP_REQUIRES_OK(ctx, ctx->input("input_shape" , &shape_t)); |
279 | OP_REQUIRES_OK(ctx, ctx->input("reduction_axes" , &reduction_axes_t)); |
280 | |
281 | OP_REQUIRES_OK(ctx, ValidateInputs(shape_t, reduction_axes_t)); |
282 | |
283 | TensorShape shape; |
284 | OP_REQUIRES_OK(ctx, TensorShape::BuildTensorShape(shape_t->vec<int64_t>(), |
285 | &shape)); |
286 | SparseTensor sp; |
287 | OP_REQUIRES_OK(ctx, SparseTensor::Create(tensor::DeepCopy(*indices_t), |
288 | tensor::DeepCopy(*values_t), |
289 | shape, &sp)); |
290 | ReduceDetails reduction = SparseTensorReduceHelper( |
291 | sp, reduction_axes_t->flat<int32>(), keep_dims_); |
292 | |
293 | sp.Reorder<T>(reduction.reorder_dims); |
294 | // Count nnzs in the output SparseTensor. |
295 | int64_t nnz = 0; |
296 | auto iter = sp.group(reduction.group_by_dims); |
297 | for (auto it = iter.begin(); it != iter.end(); ++it) { |
298 | nnz++; |
299 | } |
300 | |
301 | Tensor *out_indices_t; |
302 | OP_REQUIRES_OK(ctx, |
303 | ctx->allocate_output( |
304 | 0, TensorShape({nnz, reduction.reduced_shape.dims()}), |
305 | &out_indices_t)); |
306 | typename TTypes<int64_t>::Matrix out_indices_mat = |
307 | out_indices_t->matrix<int64_t>(); |
308 | // For keep_dims. We don't explicitly set dim fields for reduced dims below. |
309 | out_indices_mat.setZero(); |
310 | |
311 | Tensor *out_values_t; |
312 | OP_REQUIRES_OK(ctx, |
313 | ctx->allocate_output(1, TensorShape({nnz}), &out_values_t)); |
314 | auto out_flat = out_values_t->flat<T>(); |
315 | |
316 | Tensor tmp_reduced_val; |
317 | OP_REQUIRES_OK(ctx, ctx->allocate_temp(DataTypeToEnum<T>::value, |
318 | TensorShape({}), &tmp_reduced_val)); |
319 | auto reduced_val = tmp_reduced_val.scalar<T>(); |
320 | int64_t i = 0; |
321 | for (const auto &g : sp.group(reduction.group_by_dims)) { |
322 | Op::template Run<T>(ctx, reduced_val, g.template values<T>()); |
323 | std::vector<int64_t> group = g.group(); |
324 | for (int64_t j = 0; j < group.size(); j++) { |
325 | if (keep_dims_) { |
326 | out_indices_mat(i, reduction.group_by_dims[j]) = group[j]; |
327 | } else { |
328 | out_indices_mat(i, j) = group[j]; |
329 | } |
330 | } |
331 | out_flat(i) = reduced_val(); |
332 | i++; |
333 | VLOG(2) << "coords: " << absl::StrJoin(g.group(), "," ) |
334 | << "; group " << Op::Name() << ": " |
335 | << reduced_val(); |
336 | } |
337 | |
338 | Tensor *out_shape_t; |
339 | OP_REQUIRES_OK(ctx, ctx->allocate_output( |
340 | 2, TensorShape({reduction.reduced_shape.dims()}), |
341 | &out_shape_t)); |
342 | auto out_shape_flat = out_shape_t->flat<int64_t>(); |
343 | auto out_dim_sizes = reduction.reduced_shape.dim_sizes(); |
344 | if (!out_dim_sizes.empty()) { |
345 | std::copy(out_dim_sizes.begin(), out_dim_sizes.end(), &out_shape_flat(0)); |
346 | } |
347 | } |
348 | |
349 | private: |
350 | // True if the number of dimensions should be maintained. |
351 | bool keep_dims_; |
352 | }; |
353 | |
354 | #define REGISTER_KERNELS(T) \ |
355 | REGISTER_KERNEL_BUILDER( \ |
356 | Name("SparseReduceSumSparse").Device(DEVICE_CPU).TypeConstraint<T>("T"), \ |
357 | SparseReduceSparseOp<T, SumOp>) |
358 | TF_CALL_NUMBER_TYPES(REGISTER_KERNELS); |
359 | #undef REGISTER_KERNELS |
360 | |
361 | #define REGISTER_KERNELS(T) \ |
362 | REGISTER_KERNEL_BUILDER( \ |
363 | Name("SparseReduceMaxSparse").Device(DEVICE_CPU).TypeConstraint<T>("T"), \ |
364 | SparseReduceSparseOp<T, MaxOp>) |
365 | TF_CALL_REAL_NUMBER_TYPES(REGISTER_KERNELS); |
366 | #undef REGISTER_KERNELS |
367 | |
368 | } // namespace tensorflow |
369 | |