1/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
2
3Licensed under the Apache License, Version 2.0 (the "License");
4you may not use this file except in compliance with the License.
5You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9Unless required by applicable law or agreed to in writing, software
10distributed under the License is distributed on an "AS IS" BASIS,
11WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12See the License for the specific language governing permissions and
13limitations under the License.
14==============================================================================*/
15
16// SparseSparseBinaryOpShared is the shared code for binary coefficient-wise
17// (cwise) operations of the following form:
18//
19// sparse_t <binary cwise op> sparse_t -> new sparse_t
20//
21// The output SparseTensor may store up to "a_nnz + b_nnz" elements.
22
23// IMPLEMENTATION DETAILS (not part of the interface specification).
24//
25// This kernel implements the "union" semantics on the non-zeros: namely, any
26// non-zero from either side participate in the calculations, and any resultant
27// zeros will NOT be excluded from the output storage.
28//
29// (In the future, we could always add a pruning op the prunes away the zeros,
30// if desirable.)
31
32// See docs of all registered ops in ../ops/sparse_ops.cc.
33
34#define EIGEN_USE_THREADS
35
36#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
37#include "tensorflow/core/framework/op_kernel.h"
38#include "tensorflow/core/framework/register_types.h"
39#include "tensorflow/core/framework/tensor.h"
40#include "tensorflow/core/framework/tensor_util.h"
41#include "tensorflow/core/framework/types.h"
42#include "tensorflow/core/kernels/cwise_ops.h"
43#include "tensorflow/core/kernels/cwise_ops_common.h"
44#include "tensorflow/core/util/sparse/sparse_tensor.h"
45
46namespace tensorflow {
47
48typedef Eigen::ThreadPoolDevice CPUDevice;
49
50namespace {
51// Unions the sparse indices and outputs corresponding values: namely, if a
52// non-zero appear in one side, it will participate in the calculation, where
53// the counterpart on the other side is either a value or an implicit zero.
54//
55// On exit, outputs the augmented values in "{a,b}_augmented_values", and fills
56// "entries_to_copy" with "(from_a?, index)" pairs. All three vectors have the
57// same size.
58//
59// The input and output sparse tensors are assumed ordered in the canonical
60// row-major order.
61template <typename T>
62void UnionSparseIndicesAndValues(
63 typename TTypes<int64_t>::ConstMatrix a_indices_mat,
64 typename TTypes<T>::ConstFlat a_values, int64_t a_nnz,
65 typename TTypes<int64_t>::ConstMatrix b_indices_mat,
66 typename TTypes<T>::ConstFlat b_values, int64_t b_nnz, int num_dims,
67 std::vector<T> *a_augmented_values, std::vector<T> *b_augmented_values,
68 std::vector<std::pair<bool, int64>> *entries_to_copy) {
69 entries_to_copy->reserve(a_nnz + b_nnz);
70 a_augmented_values->reserve(a_nnz);
71 b_augmented_values->reserve(b_nnz);
72
73 int64_t i = 0, j = 0;
74 const T kZero = T(0);
75 while (i < a_nnz && j < b_nnz) {
76 switch (sparse::DimComparator::cmp(a_indices_mat, b_indices_mat, i, j,
77 num_dims)) {
78 case -1:
79 entries_to_copy->emplace_back(true, i);
80 a_augmented_values->push_back(a_values(i));
81 b_augmented_values->push_back(kZero);
82 ++i;
83 break;
84 case 0:
85 entries_to_copy->emplace_back(true, i);
86 a_augmented_values->push_back(a_values(i));
87 b_augmented_values->push_back(b_values(j));
88 ++i;
89 ++j;
90 break;
91 case 1:
92 entries_to_copy->emplace_back(false, j);
93 a_augmented_values->push_back(kZero);
94 b_augmented_values->push_back(b_values(j));
95 ++j;
96 break;
97 }
98 }
99 // Handles leftovers; at most one loop runs.
100 while (i < a_nnz) {
101 entries_to_copy->emplace_back(/* is_a */ true, i);
102 a_augmented_values->push_back(a_values(i++));
103 b_augmented_values->push_back(kZero);
104 }
105 while (j < b_nnz) {
106 entries_to_copy->emplace_back(/* is_a */ false, j);
107 a_augmented_values->push_back(kZero);
108 b_augmented_values->push_back(b_values(j++));
109 }
110}
111} // anonymous namespace
112
113// Device: CPUDevice. GPU kernel is not supported currently.
114// T: dtype of the SparseTensor's.
115// Functor: binary cwise operation to perform on the corresponding operand
116// values. See cwise_ops.h for a list of possible functors to register with.
117template <typename Device, typename T, typename Functor>
118class SparseSparseBinaryOpShared : public OpKernel {
119 public:
120 explicit SparseSparseBinaryOpShared(OpKernelConstruction *ctx)
121 : OpKernel(ctx) {}
122
123 void Compute(OpKernelContext *ctx) override {
124 const Tensor *a_indices_t, *a_values_t, *a_shape_t, *b_indices_t,
125 *b_values_t, *b_shape_t;
126 OP_REQUIRES_OK(ctx, ctx->input("a_indices", &a_indices_t));
127 OP_REQUIRES_OK(ctx, ctx->input("a_values", &a_values_t));
128 OP_REQUIRES_OK(ctx, ctx->input("a_shape", &a_shape_t));
129 OP_REQUIRES_OK(ctx, ctx->input("b_indices", &b_indices_t));
130 OP_REQUIRES_OK(ctx, ctx->input("b_values", &b_values_t));
131 OP_REQUIRES_OK(ctx, ctx->input("b_shape", &b_shape_t));
132
133 // Validations.
134 OP_REQUIRES(
135 ctx,
136 TensorShapeUtils::IsMatrix(a_indices_t->shape()) &&
137 TensorShapeUtils::IsMatrix(b_indices_t->shape()),
138 errors::InvalidArgument("Inputs a_indices and b_indices should be "
139 "matrices but received shapes: ",
140 a_indices_t->shape().DebugString(), ", ",
141 b_indices_t->shape().DebugString()));
142 OP_REQUIRES(ctx,
143 TensorShapeUtils::IsVector(a_values_t->shape()) &&
144 TensorShapeUtils::IsVector(b_values_t->shape()),
145 errors::InvalidArgument(
146 "Inputs a_values and b_values should be vectors "
147 "but received shapes: ",
148 a_values_t->shape().DebugString(), " and ",
149 b_values_t->shape().DebugString()));
150
151 const int64_t a_nnz = a_indices_t->dim_size(0);
152 const int64_t b_nnz = b_indices_t->dim_size(0);
153
154 const auto a_values = a_values_t->vec<T>();
155 const auto b_values = b_values_t->vec<T>();
156
157 OP_REQUIRES(
158 ctx, a_values.size() == a_nnz && b_values.size() == b_nnz,
159 errors::InvalidArgument("Expected ", a_nnz, " and ", b_nnz,
160 " non-empty input values, got ",
161 a_values.size(), " and ", b_values.size()));
162
163 OP_REQUIRES(ctx,
164 TensorShapeUtils::IsVector(a_shape_t->shape()) &&
165 TensorShapeUtils::IsVector(b_shape_t->shape()),
166 errors::InvalidArgument(
167 "Input shapes should be a vector but received shapes ",
168 a_shape_t->shape().DebugString(), " and ",
169 b_shape_t->shape().DebugString()));
170 const int num_dims = a_indices_t->dim_size(1);
171 OP_REQUIRES(
172 ctx, a_shape_t->NumElements() == num_dims,
173 errors::InvalidArgument("Second dimension of a_indices and length of "
174 "a_shape must match, got ",
175 num_dims, " and ", a_shape_t->NumElements()));
176 OP_REQUIRES(ctx, num_dims > 0,
177 errors::InvalidArgument("Tensors must not be empty"));
178 OP_REQUIRES(ctx, a_shape_t->IsSameSize(*b_shape_t),
179 errors::InvalidArgument(
180 "Operands do not have the same ranks; got shapes: ",
181 a_shape_t->SummarizeValue(10), " and ",
182 b_shape_t->SummarizeValue(10)));
183 const auto a_shape = a_shape_t->flat<int64_t>();
184 const auto b_shape = b_shape_t->flat<int64_t>();
185 for (int i = 0; i < a_shape_t->NumElements(); ++i) {
186 OP_REQUIRES(ctx, a_shape(i) == b_shape(i),
187 errors::InvalidArgument("Operands' shapes do not match: got ",
188 a_shape(i), " and ", b_shape(i),
189 " for dimension ", i));
190 }
191
192 const auto a_indices_mat = a_indices_t->matrix<int64_t>();
193 const auto b_indices_mat = b_indices_t->matrix<int64_t>();
194 std::vector<T> a_augmented_values, b_augmented_values;
195 std::vector<std::pair<bool, int64>> entries_to_copy; // from_a?, idx
196 UnionSparseIndicesAndValues(a_indices_mat, a_values, a_nnz, b_indices_mat,
197 b_values, b_nnz, num_dims, &a_augmented_values,
198 &b_augmented_values, &entries_to_copy);
199
200 // Allocates and fills output tensors.
201 const int64_t sum_nnz = a_augmented_values.size();
202 Tensor *output_indices_t, *output_values_t;
203 OP_REQUIRES_OK(ctx,
204 ctx->allocate_output(0, TensorShape({sum_nnz, num_dims}),
205 &output_indices_t));
206 OP_REQUIRES_OK(
207 ctx, ctx->allocate_output(1, TensorShape({sum_nnz}), &output_values_t));
208 auto output_indices_mat = output_indices_t->matrix<int64_t>();
209
210 for (int64_t i = 0; i < sum_nnz; ++i) {
211 const bool from_a = entries_to_copy[i].first;
212 const int64_t idx = entries_to_copy[i].second;
213 output_indices_mat.chip<0>(i) =
214 from_a ? a_indices_mat.chip<0>(idx) : b_indices_mat.chip<0>(idx);
215 }
216
217 // Performs the functor operation using Eigen.
218 //
219 // Note that the two stack-allocated std::vector's may not be aligned. Using
220 // allocate_temp() would've given us aligned storage, but we do not know
221 // their sizes in advance, so we couldn't use allocate_temp() anyway.
222 //
223 // TODO(zongheng): measure if it's worthwhile to somehow force alignment.
224 using UnalignedTensorMap =
225 Eigen::TensorMap<Eigen::Tensor<const T, 1, Eigen::RowMajor>,
226 Eigen::Unaligned>;
227 auto a_augmented_values_t =
228 UnalignedTensorMap(a_augmented_values.data(), sum_nnz);
229 auto b_augmented_values_t =
230 UnalignedTensorMap(b_augmented_values.data(), sum_nnz);
231 output_values_t->flat<T>().device(ctx->eigen_device<Device>()) =
232 a_augmented_values_t.binaryExpr(b_augmented_values_t,
233 typename Functor::func());
234 }
235};
236
237#define REGISTER_KERNELS(T) \
238 REGISTER_KERNEL_BUILDER( \
239 Name("SparseSparseMinimum").Device(DEVICE_CPU).TypeConstraint<T>("T"), \
240 SparseSparseBinaryOpShared<CPUDevice, T, functor::minimum<T>>) \
241 \
242 REGISTER_KERNEL_BUILDER( \
243 Name("SparseSparseMaximum").Device(DEVICE_CPU).TypeConstraint<T>("T"), \
244 SparseSparseBinaryOpShared<CPUDevice, T, functor::maximum<T>>)
245
246TF_CALL_REAL_NUMBER_TYPES(REGISTER_KERNELS);
247#undef REGISTER_KERNELS
248
249} // namespace tensorflow
250