sparse_sparse_binary_op_shared.cc source code [tensorflow/tensorflow/core/kernels/sparse_sparse_binary_op_shared.cc]

1	/ Copyright 2016 The TensorFlow Authors. All Rights Reserved.*
2
3	Licensed under the Apache License, Version 2.0 (the "License");
4	you may not use this file except in compliance with the License.
5	You may obtain a copy of the License at
6
7	http://www.apache.org/licenses/LICENSE-2.0
8
9	Unless required by applicable law or agreed to in writing, software
10	distributed under the License is distributed on an "AS IS" BASIS,
11	WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12	See the License for the specific language governing permissions and
13	limitations under the License.
14	==============================================================================/*
15
16	// SparseSparseBinaryOpShared is the shared code for binary coefficient-wise
17	// (cwise) operations of the following form:
18	//
19	// sparse_t <binary cwise op> sparse_t -> new sparse_t
20	//
21	// The output SparseTensor may store up to "a_nnz + b_nnz" elements.
22
23	// IMPLEMENTATION DETAILS (not part of the interface specification).
24	//
25	// This kernel implements the "union" semantics on the non-zeros: namely, any
26	// non-zero from either side participate in the calculations, and any resultant
27	// zeros will NOT be excluded from the output storage.
28	//
29	// (In the future, we could always add a pruning op the prunes away the zeros,
30	// if desirable.)
31
32	// See docs of all registered ops in ../ops/sparse_ops.cc.
33
34	#define EIGEN_USE_THREADS
35
36	#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
37	#include "tensorflow/core/framework/op_kernel.h"
38	#include "tensorflow/core/framework/register_types.h"
39	#include "tensorflow/core/framework/tensor.h"
40	#include "tensorflow/core/framework/tensor_util.h"
41	#include "tensorflow/core/framework/types.h"
42	#include "tensorflow/core/kernels/cwise_ops.h"
43	#include "tensorflow/core/kernels/cwise_ops_common.h"
44	#include "tensorflow/core/util/sparse/sparse_tensor.h"
45
46	namespace tensorflow {
47
48	typedef Eigen::ThreadPoolDevice CPUDevice;
49
50	namespace {
51	// Unions the sparse indices and outputs corresponding values: namely, if a
52	// non-zero appear in one side, it will participate in the calculation, where
53	// the counterpart on the other side is either a value or an implicit zero.
54	//
55	// On exit, outputs the augmented values in "{a,b}_augmented_values", and fills
56	// "entries_to_copy" with "(from_a?, index)" pairs. All three vectors have the
57	// same size.
58	//
59	// The input and output sparse tensors are assumed ordered in the canonical
60	// row-major order.
61	template <typename T>
62	void UnionSparseIndicesAndValues(
63	typename TTypes<int64_t>::ConstMatrix a_indices_mat,
64	typename TTypes<T>::ConstFlat a_values, int64_t a_nnz,
65	typename TTypes<int64_t>::ConstMatrix b_indices_mat,
66	typename TTypes<T>::ConstFlat b_values, int64_t b_nnz, int num_dims,
67	std::vector<T> a_augmented_values, std::vector<T> b_augmented_values,
68	std::vector<std::pair<bool, int64>> *entries_to_copy) {
69	entries_to_copy->reserve(a_nnz + b_nnz);
70	a_augmented_values->reserve(a_nnz);
71	b_augmented_values->reserve(b_nnz);
72
73	int64_t i = `0`, j = `0`;
74	const T kZero = T(`0`);
75	while (i < a_nnz && j < b_nnz) {
76	switch (sparse::DimComparator::cmp(a_indices_mat, b_indices_mat, i, j,
77	num_dims)) {
78	case -`1`:
79	entries_to_copy->emplace_back(true, i);
80	a_augmented_values->push_back(a_values(i));
81	b_augmented_values->push_back(kZero);
82	++i;
83	break;
84	case `0`:
85	entries_to_copy->emplace_back(true, i);
86	a_augmented_values->push_back(a_values(i));
87	b_augmented_values->push_back(b_values(j));
88	++i;
89	++j;
90	break;
91	case `1`:
92	entries_to_copy->emplace_back(false, j);
93	a_augmented_values->push_back(kZero);
94	b_augmented_values->push_back(b_values(j));
95	++j;
96	break;
97	}
98	}
99	// Handles leftovers; at most one loop runs.
100	while (i < a_nnz) {
101	entries_to_copy->emplace_back(/ is_a / true, i);
102	a_augmented_values->push_back(a_values(i++));
103	b_augmented_values->push_back(kZero);
104	}
105	while (j < b_nnz) {
106	entries_to_copy->emplace_back(/ is_a / false, j);
107	a_augmented_values->push_back(kZero);
108	b_augmented_values->push_back(b_values(j++));
109	}
110	}
111	} // anonymous namespace
112
113	// Device: CPUDevice. GPU kernel is not supported currently.
114	// T: dtype of the SparseTensor's.
115	// Functor: binary cwise operation to perform on the corresponding operand
116	// values. See cwise_ops.h for a list of possible functors to register with.
117	template <typename Device, typename T, typename Functor>
118	class SparseSparseBinaryOpShared : public OpKernel {
119	public:
120	explicit SparseSparseBinaryOpShared(OpKernelConstruction *ctx)
121	: OpKernel(ctx) {}
122
123	void Compute(OpKernelContext *ctx) override {
124	const Tensor a_indices_t, a_values_t, a_shape_t, b_indices_t,
125	b_values_t, b_shape_t;
126	OP_REQUIRES_OK(ctx, ctx->input("a_indices", &a_indices_t));
127	OP_REQUIRES_OK(ctx, ctx->input("a_values", &a_values_t));
128	OP_REQUIRES_OK(ctx, ctx->input("a_shape", &a_shape_t));
129	OP_REQUIRES_OK(ctx, ctx->input("b_indices", &b_indices_t));
130	OP_REQUIRES_OK(ctx, ctx->input("b_values", &b_values_t));
131	OP_REQUIRES_OK(ctx, ctx->input("b_shape", &b_shape_t));
132
133	// Validations.
134	OP_REQUIRES(
135	ctx,
136	TensorShapeUtils::IsMatrix(a_indices_t->shape()) &&
137	TensorShapeUtils::IsMatrix(b_indices_t->shape()),
138	errors::InvalidArgument("Inputs a_indices and b_indices should be "
139	"matrices but received shapes: ",
140	a_indices_t->shape().DebugString(), ", ",
141	b_indices_t->shape().DebugString()));
142	OP_REQUIRES(ctx,
143	TensorShapeUtils::IsVector(a_values_t->shape()) &&
144	TensorShapeUtils::IsVector(b_values_t->shape()),
145	errors::InvalidArgument(
146	"Inputs a_values and b_values should be vectors "
147	"but received shapes: ",
148	a_values_t->shape().DebugString(), " and ",
149	b_values_t->shape().DebugString()));
150
151	const int64_t a_nnz = a_indices_t->dim_size(`0`);
152	const int64_t b_nnz = b_indices_t->dim_size(`0`);
153
154	const auto a_values = a_values_t->vec<T>();
155	const auto b_values = b_values_t->vec<T>();
156
157	OP_REQUIRES(
158	ctx, a_values.size() == a_nnz && b_values.size() == b_nnz,
159	errors::InvalidArgument("Expected ", a_nnz, " and ", b_nnz,
160	" non-empty input values, got ",
161	a_values.size(), " and ", b_values.size()));
162
163	OP_REQUIRES(ctx,
164	TensorShapeUtils::IsVector(a_shape_t->shape()) &&
165	TensorShapeUtils::IsVector(b_shape_t->shape()),
166	errors::InvalidArgument(
167	"Input shapes should be a vector but received shapes ",
168	a_shape_t->shape().DebugString(), " and ",
169	b_shape_t->shape().DebugString()));
170	const int num_dims = a_indices_t->dim_size(`1`);
171	OP_REQUIRES(
172	ctx, a_shape_t->NumElements() == num_dims,
173	errors::InvalidArgument("Second dimension of a_indices and length of "
174	"a_shape must match, got ",
175	num_dims, " and ", a_shape_t->NumElements()));
176	OP_REQUIRES(ctx, num_dims > `0`,
177	errors::InvalidArgument("Tensors must not be empty"));
178	OP_REQUIRES(ctx, a_shape_t->IsSameSize(*b_shape_t),
179	errors::InvalidArgument(
180	"Operands do not have the same ranks; got shapes: ",
181	a_shape_t->SummarizeValue(`10`), " and ",
182	b_shape_t->SummarizeValue(`10`)));
183	const auto a_shape = a_shape_t->flat<int64_t>();
184	const auto b_shape = b_shape_t->flat<int64_t>();
185	for (int i = `0`; i < a_shape_t->NumElements(); ++i) {
186	OP_REQUIRES(ctx, a_shape(i) == b_shape(i),
187	errors::InvalidArgument("Operands' shapes do not match: got ",
188	a_shape(i), " and ", b_shape(i),
189	" for dimension ", i));
190	}
191
192	const auto a_indices_mat = a_indices_t->matrix<int64_t>();
193	const auto b_indices_mat = b_indices_t->matrix<int64_t>();
194	std::vector<T> a_augmented_values, b_augmented_values;
195	std::vector<std::pair<bool, int64>> entries_to_copy; // from_a?, idx
196	UnionSparseIndicesAndValues(a_indices_mat, a_values, a_nnz, b_indices_mat,
197	b_values, b_nnz, num_dims, &a_augmented_values,
198	&b_augmented_values, &entries_to_copy);
199
200	// Allocates and fills output tensors.
201	const int64_t sum_nnz = a_augmented_values.size();
202	Tensor output_indices_t, output_values_t;
203	OP_REQUIRES_OK(ctx,
204	ctx->allocate_output(`0`, TensorShape ({sum_nnz, num_dims}),
205	&output_indices_t));
206	OP_REQUIRES_OK(
207	ctx, ctx->allocate_output(`1`, TensorShape ({sum_nnz}), &output_values_t));
208	auto output_indices_mat = output_indices_t->matrix<int64_t>();
209
210	for (int64_t i = `0`; i < sum_nnz; ++i) {
211	const bool from_a = entries_to_copy [i].first;
212	const int64_t idx = entries_to_copy [i].second;
213	output_indices_mat.chip<`0`>(i) =
214	from_a ? a_indices_mat.chip<`0`>(idx) : b_indices_mat.chip<`0`>(idx);
215	}
216
217	// Performs the functor operation using Eigen.
218	//
219	// Note that the two stack-allocated std::vector's may not be aligned. Using
220	// allocate_temp() would've given us aligned storage, but we do not know
221	// their sizes in advance, so we couldn't use allocate_temp() anyway.
222	//
223	// TODO(zongheng): measure if it's worthwhile to somehow force alignment.
224	using UnalignedTensorMap =
225	Eigen::TensorMap<Eigen::Tensor<const T, `1`, Eigen::RowMajor>,
226	Eigen::Unaligned>;
227	auto a_augmented_values_t =
228	UnalignedTensorMap(a_augmented_values.data(), sum_nnz);
229	auto b_augmented_values_t =
230	UnalignedTensorMap(b_augmented_values.data(), sum_nnz);
231	output_values_t->flat<T>().device(ctx->eigen_device<Device>()) =
232	a_augmented_values_t.binaryExpr(b_augmented_values_t,
233	typename Functor::func());
234	}
235	};
236
237	#define REGISTER_KERNELS(T) \
238	REGISTER_KERNEL_BUILDER( \
239	Name("SparseSparseMinimum").Device(DEVICE_CPU).TypeConstraint<T>("T"), \
240	SparseSparseBinaryOpShared<CPUDevice, T, functor::minimum<T>>) \
241	\
242	REGISTER_KERNEL_BUILDER( \
243	Name("SparseSparseMaximum").Device(DEVICE_CPU).TypeConstraint<T>("T"), \
244	SparseSparseBinaryOpShared<CPUDevice, T, functor::maximum<T>>)
245
246	TF_CALL_REAL_NUMBER_TYPES(REGISTER_KERNELS);
247	#undef REGISTER_KERNELS
248
249	} // namespace tensorflow
250

Browse the source code of tensorflow/tensorflow/core/kernels/sparse_sparse_binary_op_shared.cc