1 | /* Copyright 2016 The TensorFlow Authors. All Rights Reserved. |
2 | |
3 | Licensed under the Apache License, Version 2.0 (the "License"); |
4 | you may not use this file except in compliance with the License. |
5 | You may obtain a copy of the License at |
6 | |
7 | http://www.apache.org/licenses/LICENSE-2.0 |
8 | |
9 | Unless required by applicable law or agreed to in writing, software |
10 | distributed under the License is distributed on an "AS IS" BASIS, |
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | See the License for the specific language governing permissions and |
13 | limitations under the License. |
14 | ==============================================================================*/ |
15 | |
16 | // SparseSparseBinaryOpShared is the shared code for binary coefficient-wise |
17 | // (cwise) operations of the following form: |
18 | // |
19 | // sparse_t <binary cwise op> sparse_t -> new sparse_t |
20 | // |
21 | // The output SparseTensor may store up to "a_nnz + b_nnz" elements. |
22 | |
23 | // IMPLEMENTATION DETAILS (not part of the interface specification). |
24 | // |
25 | // This kernel implements the "union" semantics on the non-zeros: namely, any |
26 | // non-zero from either side participate in the calculations, and any resultant |
27 | // zeros will NOT be excluded from the output storage. |
28 | // |
29 | // (In the future, we could always add a pruning op the prunes away the zeros, |
30 | // if desirable.) |
31 | |
32 | // See docs of all registered ops in ../ops/sparse_ops.cc. |
33 | |
34 | #define EIGEN_USE_THREADS |
35 | |
36 | #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" |
37 | #include "tensorflow/core/framework/op_kernel.h" |
38 | #include "tensorflow/core/framework/register_types.h" |
39 | #include "tensorflow/core/framework/tensor.h" |
40 | #include "tensorflow/core/framework/tensor_util.h" |
41 | #include "tensorflow/core/framework/types.h" |
42 | #include "tensorflow/core/kernels/cwise_ops.h" |
43 | #include "tensorflow/core/kernels/cwise_ops_common.h" |
44 | #include "tensorflow/core/util/sparse/sparse_tensor.h" |
45 | |
46 | namespace tensorflow { |
47 | |
48 | typedef Eigen::ThreadPoolDevice CPUDevice; |
49 | |
50 | namespace { |
51 | // Unions the sparse indices and outputs corresponding values: namely, if a |
52 | // non-zero appear in one side, it will participate in the calculation, where |
53 | // the counterpart on the other side is either a value or an implicit zero. |
54 | // |
55 | // On exit, outputs the augmented values in "{a,b}_augmented_values", and fills |
56 | // "entries_to_copy" with "(from_a?, index)" pairs. All three vectors have the |
57 | // same size. |
58 | // |
59 | // The input and output sparse tensors are assumed ordered in the canonical |
60 | // row-major order. |
61 | template <typename T> |
62 | void UnionSparseIndicesAndValues( |
63 | typename TTypes<int64_t>::ConstMatrix a_indices_mat, |
64 | typename TTypes<T>::ConstFlat a_values, int64_t a_nnz, |
65 | typename TTypes<int64_t>::ConstMatrix b_indices_mat, |
66 | typename TTypes<T>::ConstFlat b_values, int64_t b_nnz, int num_dims, |
67 | std::vector<T> *a_augmented_values, std::vector<T> *b_augmented_values, |
68 | std::vector<std::pair<bool, int64>> *entries_to_copy) { |
69 | entries_to_copy->reserve(a_nnz + b_nnz); |
70 | a_augmented_values->reserve(a_nnz); |
71 | b_augmented_values->reserve(b_nnz); |
72 | |
73 | int64_t i = 0, j = 0; |
74 | const T kZero = T(0); |
75 | while (i < a_nnz && j < b_nnz) { |
76 | switch (sparse::DimComparator::cmp(a_indices_mat, b_indices_mat, i, j, |
77 | num_dims)) { |
78 | case -1: |
79 | entries_to_copy->emplace_back(true, i); |
80 | a_augmented_values->push_back(a_values(i)); |
81 | b_augmented_values->push_back(kZero); |
82 | ++i; |
83 | break; |
84 | case 0: |
85 | entries_to_copy->emplace_back(true, i); |
86 | a_augmented_values->push_back(a_values(i)); |
87 | b_augmented_values->push_back(b_values(j)); |
88 | ++i; |
89 | ++j; |
90 | break; |
91 | case 1: |
92 | entries_to_copy->emplace_back(false, j); |
93 | a_augmented_values->push_back(kZero); |
94 | b_augmented_values->push_back(b_values(j)); |
95 | ++j; |
96 | break; |
97 | } |
98 | } |
99 | // Handles leftovers; at most one loop runs. |
100 | while (i < a_nnz) { |
101 | entries_to_copy->emplace_back(/* is_a */ true, i); |
102 | a_augmented_values->push_back(a_values(i++)); |
103 | b_augmented_values->push_back(kZero); |
104 | } |
105 | while (j < b_nnz) { |
106 | entries_to_copy->emplace_back(/* is_a */ false, j); |
107 | a_augmented_values->push_back(kZero); |
108 | b_augmented_values->push_back(b_values(j++)); |
109 | } |
110 | } |
111 | } // anonymous namespace |
112 | |
113 | // Device: CPUDevice. GPU kernel is not supported currently. |
114 | // T: dtype of the SparseTensor's. |
115 | // Functor: binary cwise operation to perform on the corresponding operand |
116 | // values. See cwise_ops.h for a list of possible functors to register with. |
117 | template <typename Device, typename T, typename Functor> |
118 | class SparseSparseBinaryOpShared : public OpKernel { |
119 | public: |
120 | explicit SparseSparseBinaryOpShared(OpKernelConstruction *ctx) |
121 | : OpKernel(ctx) {} |
122 | |
123 | void Compute(OpKernelContext *ctx) override { |
124 | const Tensor *a_indices_t, *a_values_t, *a_shape_t, *b_indices_t, |
125 | *b_values_t, *b_shape_t; |
126 | OP_REQUIRES_OK(ctx, ctx->input("a_indices" , &a_indices_t)); |
127 | OP_REQUIRES_OK(ctx, ctx->input("a_values" , &a_values_t)); |
128 | OP_REQUIRES_OK(ctx, ctx->input("a_shape" , &a_shape_t)); |
129 | OP_REQUIRES_OK(ctx, ctx->input("b_indices" , &b_indices_t)); |
130 | OP_REQUIRES_OK(ctx, ctx->input("b_values" , &b_values_t)); |
131 | OP_REQUIRES_OK(ctx, ctx->input("b_shape" , &b_shape_t)); |
132 | |
133 | // Validations. |
134 | OP_REQUIRES( |
135 | ctx, |
136 | TensorShapeUtils::IsMatrix(a_indices_t->shape()) && |
137 | TensorShapeUtils::IsMatrix(b_indices_t->shape()), |
138 | errors::InvalidArgument("Inputs a_indices and b_indices should be " |
139 | "matrices but received shapes: " , |
140 | a_indices_t->shape().DebugString(), ", " , |
141 | b_indices_t->shape().DebugString())); |
142 | OP_REQUIRES(ctx, |
143 | TensorShapeUtils::IsVector(a_values_t->shape()) && |
144 | TensorShapeUtils::IsVector(b_values_t->shape()), |
145 | errors::InvalidArgument( |
146 | "Inputs a_values and b_values should be vectors " |
147 | "but received shapes: " , |
148 | a_values_t->shape().DebugString(), " and " , |
149 | b_values_t->shape().DebugString())); |
150 | |
151 | const int64_t a_nnz = a_indices_t->dim_size(0); |
152 | const int64_t b_nnz = b_indices_t->dim_size(0); |
153 | |
154 | const auto a_values = a_values_t->vec<T>(); |
155 | const auto b_values = b_values_t->vec<T>(); |
156 | |
157 | OP_REQUIRES( |
158 | ctx, a_values.size() == a_nnz && b_values.size() == b_nnz, |
159 | errors::InvalidArgument("Expected " , a_nnz, " and " , b_nnz, |
160 | " non-empty input values, got " , |
161 | a_values.size(), " and " , b_values.size())); |
162 | |
163 | OP_REQUIRES(ctx, |
164 | TensorShapeUtils::IsVector(a_shape_t->shape()) && |
165 | TensorShapeUtils::IsVector(b_shape_t->shape()), |
166 | errors::InvalidArgument( |
167 | "Input shapes should be a vector but received shapes " , |
168 | a_shape_t->shape().DebugString(), " and " , |
169 | b_shape_t->shape().DebugString())); |
170 | const int num_dims = a_indices_t->dim_size(1); |
171 | OP_REQUIRES( |
172 | ctx, a_shape_t->NumElements() == num_dims, |
173 | errors::InvalidArgument("Second dimension of a_indices and length of " |
174 | "a_shape must match, got " , |
175 | num_dims, " and " , a_shape_t->NumElements())); |
176 | OP_REQUIRES(ctx, num_dims > 0, |
177 | errors::InvalidArgument("Tensors must not be empty" )); |
178 | OP_REQUIRES(ctx, a_shape_t->IsSameSize(*b_shape_t), |
179 | errors::InvalidArgument( |
180 | "Operands do not have the same ranks; got shapes: " , |
181 | a_shape_t->SummarizeValue(10), " and " , |
182 | b_shape_t->SummarizeValue(10))); |
183 | const auto a_shape = a_shape_t->flat<int64_t>(); |
184 | const auto b_shape = b_shape_t->flat<int64_t>(); |
185 | for (int i = 0; i < a_shape_t->NumElements(); ++i) { |
186 | OP_REQUIRES(ctx, a_shape(i) == b_shape(i), |
187 | errors::InvalidArgument("Operands' shapes do not match: got " , |
188 | a_shape(i), " and " , b_shape(i), |
189 | " for dimension " , i)); |
190 | } |
191 | |
192 | const auto a_indices_mat = a_indices_t->matrix<int64_t>(); |
193 | const auto b_indices_mat = b_indices_t->matrix<int64_t>(); |
194 | std::vector<T> a_augmented_values, b_augmented_values; |
195 | std::vector<std::pair<bool, int64>> entries_to_copy; // from_a?, idx |
196 | UnionSparseIndicesAndValues(a_indices_mat, a_values, a_nnz, b_indices_mat, |
197 | b_values, b_nnz, num_dims, &a_augmented_values, |
198 | &b_augmented_values, &entries_to_copy); |
199 | |
200 | // Allocates and fills output tensors. |
201 | const int64_t sum_nnz = a_augmented_values.size(); |
202 | Tensor *output_indices_t, *output_values_t; |
203 | OP_REQUIRES_OK(ctx, |
204 | ctx->allocate_output(0, TensorShape({sum_nnz, num_dims}), |
205 | &output_indices_t)); |
206 | OP_REQUIRES_OK( |
207 | ctx, ctx->allocate_output(1, TensorShape({sum_nnz}), &output_values_t)); |
208 | auto output_indices_mat = output_indices_t->matrix<int64_t>(); |
209 | |
210 | for (int64_t i = 0; i < sum_nnz; ++i) { |
211 | const bool from_a = entries_to_copy[i].first; |
212 | const int64_t idx = entries_to_copy[i].second; |
213 | output_indices_mat.chip<0>(i) = |
214 | from_a ? a_indices_mat.chip<0>(idx) : b_indices_mat.chip<0>(idx); |
215 | } |
216 | |
217 | // Performs the functor operation using Eigen. |
218 | // |
219 | // Note that the two stack-allocated std::vector's may not be aligned. Using |
220 | // allocate_temp() would've given us aligned storage, but we do not know |
221 | // their sizes in advance, so we couldn't use allocate_temp() anyway. |
222 | // |
223 | // TODO(zongheng): measure if it's worthwhile to somehow force alignment. |
224 | using UnalignedTensorMap = |
225 | Eigen::TensorMap<Eigen::Tensor<const T, 1, Eigen::RowMajor>, |
226 | Eigen::Unaligned>; |
227 | auto a_augmented_values_t = |
228 | UnalignedTensorMap(a_augmented_values.data(), sum_nnz); |
229 | auto b_augmented_values_t = |
230 | UnalignedTensorMap(b_augmented_values.data(), sum_nnz); |
231 | output_values_t->flat<T>().device(ctx->eigen_device<Device>()) = |
232 | a_augmented_values_t.binaryExpr(b_augmented_values_t, |
233 | typename Functor::func()); |
234 | } |
235 | }; |
236 | |
237 | #define REGISTER_KERNELS(T) \ |
238 | REGISTER_KERNEL_BUILDER( \ |
239 | Name("SparseSparseMinimum").Device(DEVICE_CPU).TypeConstraint<T>("T"), \ |
240 | SparseSparseBinaryOpShared<CPUDevice, T, functor::minimum<T>>) \ |
241 | \ |
242 | REGISTER_KERNEL_BUILDER( \ |
243 | Name("SparseSparseMaximum").Device(DEVICE_CPU).TypeConstraint<T>("T"), \ |
244 | SparseSparseBinaryOpShared<CPUDevice, T, functor::maximum<T>>) |
245 | |
246 | TF_CALL_REAL_NUMBER_TYPES(REGISTER_KERNELS); |
247 | #undef REGISTER_KERNELS |
248 | |
249 | } // namespace tensorflow |
250 | |