reverse_op.cc source code [tensorflow/tensorflow/core/kernels/reverse_op.cc]

1	/ Copyright 2015 The TensorFlow Authors. All Rights Reserved.*
2
3	Licensed under the Apache License, Version 2.0 (the "License");
4	you may not use this file except in compliance with the License.
5	You may obtain a copy of the License at
6
7	http://www.apache.org/licenses/LICENSE-2.0
8
9	Unless required by applicable law or agreed to in writing, software
10	distributed under the License is distributed on an "AS IS" BASIS,
11	WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12	See the License for the specific language governing permissions and
13	limitations under the License.
14	==============================================================================/*
15
16	// See docs in ../ops/array_ops.cc
17	#define EIGEN_USE_THREADS
18
19	#include "tensorflow/core/kernels/reverse_op.h"
20	#include <memory>
21	#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
22	#include "tensorflow/core/framework/bounds_check.h"
23	#include "tensorflow/core/framework/op_kernel.h"
24	#include "tensorflow/core/framework/register_types.h"
25	#include "tensorflow/core/framework/tensor.h"
26	#include "tensorflow/core/framework/tensor_shape.h"
27	#include "tensorflow/core/framework/type_traits.h"
28	#include "tensorflow/core/framework/types.h"
29	#include "tensorflow/core/lib/core/status.h"
30	#include "tensorflow/core/platform/logging.h"
31	#include "tensorflow/core/util/work_sharder.h"
32
33	namespace tensorflow {
34
35	typedef Eigen::ThreadPoolDevice CPUDevice;
36	typedef Eigen::GpuDevice GPUDevice;
37
38	namespace {
39
40	// Reverse rows (middle dimension) of a three dimensional tensor.
41	// NUM_CHANNELS can be <= 0 to compute it dynamically from <input>
42	// Otherwise, it must equal input.dim_size(2) and is used as a compile-time
43	// constant.
44	template <typename T, int NUM_CHANNELS>
45	void ReverseRows(OpKernelContext* context, const Tensor& input,
46	Tensor* result) {
47	auto work = [&input, result](int64_t start, int64_t end) {
48	const int64_t inner_size =
49	NUM_CHANNELS > `0` ? NUM_CHANNELS : input.dim_size(`2`);
50	const int64_t middle_size = input.dim_size(`1`);
51	const int64_t row_size = inner_size * middle_size;
52	DCHECK_EQ(input.dim_size(`2`), inner_size);
53
54	const T* in_ptr = input.bit_casted_tensor<T, `3`>().data();
55	T* out_ptr = result->bit_casted_tensor<T, `3`>().data();
56
57	in_ptr += start * row_size;
58	out_ptr += start * row_size;
59
60	for (int outer_dim = start; outer_dim < end; ++outer_dim) {
61	out_ptr += row_size;
62	int remaining = middle_size;
63	while (remaining > `0`) {
64	out_ptr -= inner_size;
65	memcpy(out_ptr, in_ptr, inner_size * sizeof(T));
66	in_ptr += inner_size;
67	--remaining;
68	}
69
70	out_ptr += row_size;
71	}
72	};
73
74	// Shard across outer dimension.
75	const int64_t N = input.dim_size(`0`);
76	const int64_t cost_per_unit = input.NumElements() / N;
77	auto worker_threads = context->device()->tensorflow_cpu_worker_threads();
78	Shard(worker_threads->num_threads, worker_threads->workers, N, cost_per_unit,
79	std::move(work));
80	}
81
82	template <typename T>
83	struct data_type_can_memcpy {
84	static constexpr bool value =
85	std::is_same<T, uint8>::value \|\| std::is_same<T, int8>::value \|\|
86	std::is_same<T, bool>::value \|\| std::is_same<T, uint16>::value \|\|
87	std::is_same<T, int16>::value \|\| std::is_same<T, Eigen::half>::value \|\|
88	std::is_same<T, int32>::value \|\| std::is_same<T, float>::value \|\|
89	std::is_same<T, int64_t>::value \|\| std::is_same<T, double>::value \|\|
90	std::is_same<T, complex64>::value \|\| std::is_same<T, complex128>::value;
91	};
92
93	template <typename T, int NUM_CHANNELS>
94	typename std::enable_if<data_type_can_memcpy<T>::value>::type
95	DoHandleReverseCase(OpKernelContext* context, const Tensor& input,
96	Tensor* result) {
97	if (sizeof(T) == `1`) {
98	static_assert(sizeof(uint8) == `1`, "uint8 must be 1 byte.");
99	ReverseRows<uint8, NUM_CHANNELS>(context, input, result);
100	} else if (sizeof(T) == `2`) {
101	static_assert(sizeof(uint16) == `2`, "uint16 must be 2 bytes");
102	ReverseRows<uint16, NUM_CHANNELS>(context, input, result);
103	} else if (sizeof(T) == `4`) {
104	static_assert(sizeof(uint32) == `4`, "uint32 must be 4 bytes");
105	ReverseRows<uint32, NUM_CHANNELS>(context, input, result);
106	} else if (sizeof(T) == `8`) {
107	static_assert(sizeof(uint64) == `8`, "uint64 must be 8 bytes");
108	ReverseRows<uint64, NUM_CHANNELS>(context, input, result);
109	} else if (sizeof(T) == `16`) {
110	static_assert(sizeof(complex128) == `16`, "complex128 must be 16 bytes");
111	ReverseRows<complex128, NUM_CHANNELS>(context, input, result);
112	} else {
113	context->CtxFailure(errors::InvalidArgument(DataTypeString(input.dtype()),
114	" has unexpected size of ",
115	sizeof(T), " bytes"));
116	}
117	}
118
119	template <typename T, int NUM_CHANNELS>
120	typename std::enable_if<!data_type_can_memcpy<T>::value>::type
121	DoHandleReverseCase(OpKernelContext* context, const Tensor& input,
122	Tensor* result) {}
123
124	} // namespace
125
126	template <typename Device, typename T, int NDIMS>
127	void HandleReverseCase(OpKernelContext* context,
128	typename TTypes<bool, `1`>::ConstTensor dims,
129	Tensor* result) {
130	const Tensor& input = context->input(`0`);
131
132	// Use optimized reverse if possible.
133	if (NDIMS == `3` && std::is_same<Device, CPUDevice>::value &&
134	data_type_can_memcpy<T>::value && (!dims (`0`) && dims (`1`) && !dims (`2`))) {
135	if (input.dim_size(`2`) == `3`) {
136	DoHandleReverseCase<T, `3`>(context, input, result);
137	} else {
138	DoHandleReverseCase<T, -`1`>(context, input, result);
139	}
140	return;
141	}
142	typename Eigen::array<bool, NDIMS> axes_di;
143	for (int i = `0`; i < NDIMS; i++) {
144	axes_di[i] = dims (i);
145	}
146	functor::Reverse<Device, T, NDIMS>()(context->eigen_device<Device>(),
147	input.tensor<T, NDIMS>(), axes_di,
148	result->tensor<T, NDIMS>());
149	}
150
151	template <typename Device, typename T>
152	class ReverseOp : public OpKernel {
153	public:
154	explicit ReverseOp(OpKernelConstruction* context) : OpKernel(context) {}
155
156	void Compute(OpKernelContext* context) override {
157	const Tensor& input = context->input(`0`);
158	// If input is provided, check to make sure the first dimension is valid.
159	if (input.dims() > `0`) {
160	OP_REQUIRES(
161	context, input.dim_size(`0`) != `0`,
162	errors::InvalidArgument("Invalid input first dimension. Found 0."));
163	}
164	const Tensor& dims = context->input(`1`);
165
166	if (TensorShapeUtils::IsScalar(input.shape())) {
167	context->set_output(`0`, input);
168	} else {
169	const int input_dims = input.dims();
170	OP_REQUIRES(context, TensorShapeUtils::IsVector(dims.shape()),
171	errors::InvalidArgument("'dims' must be 1-dimension, not ",
172	dims.dims()));
173
174	OP_REQUIRES(
175	context, input_dims == dims.dim_size(`0`),
176	errors::InvalidArgument(
177	"'dims' must have the same number of values as 'input' has "
178	"dimensions. 'input' has ",
179	input_dims, "'dims' has ", dims.dim_size(`0`), " values"));
180	OP_REQUIRES(context, input_dims <= `8`,
181	errors::Unimplemented(
182	"reverse is not implemented for tensors of rank > 8."));
183
184	Tensor* output = nullptr;
185	OP_REQUIRES_OK(context,
186	context->allocate_output(`0`, input.shape(), &output));
187
188	#define HANDLE_REVERSE(NDIMS) \
189	case NDIMS: \
190	HandleReverseCase<Device, T, NDIMS>(context, dims.vec<bool>(), output); \
191	return;
192
193	switch (input_dims) {
194	HANDLE_REVERSE(`0`);
195	HANDLE_REVERSE(`1`);
196	HANDLE_REVERSE(`2`);
197	HANDLE_REVERSE(`3`);
198	HANDLE_REVERSE(`4`);
199	HANDLE_REVERSE(`5`);
200	HANDLE_REVERSE(`6`);
201	HANDLE_REVERSE(`7`);
202	HANDLE_REVERSE(`8`);
203	}
204	#undef HANDLE_REVERSE
205	}
206	}
207	};
208
209	template <typename Device, typename T, int NDIMS>
210	void HandleReverseV2Case(OpKernelContext* context,
211	const gtl::ArraySlice<bool> axes, Tensor* result) {
212	const Tensor& input = context->input(`0`);
213
214	// Use optimized reverse if possible.
215	if (NDIMS == `3` && std::is_same<Device, CPUDevice>::value &&
216	data_type_can_memcpy<T>::value && (!axes [`0`] && axes [`1`] && !axes [`2`])) {
217	if (input.dim_size(`2`) == `3`) {
218	DoHandleReverseCase<T, `3`>(context, input, result);
219	} else {
220	DoHandleReverseCase<T, -`1`>(context, input, result);
221	}
222	return;
223	}
224
225	typename Eigen::array<bool, NDIMS> axes_di;
226	for (int i = `0`; i < NDIMS; i++) {
227	axes_di[i] = axes [i];
228	}
229	functor::Reverse<Device, T, NDIMS>()(context->eigen_device<Device>(),
230	input.tensor<T, NDIMS>(), axes_di,
231	result->tensor<T, NDIMS>());
232	}
233
234	template <typename Device, typename T, typename Tidx>
235	class ReverseV2Op : public OpKernel {
236	public:
237	explicit ReverseV2Op(OpKernelConstruction* context) : OpKernel(context) {}
238
239	void Compute(OpKernelContext* context) override {
240	const Tensor& input = context->input(`0`);
241	const Tensor& sparse_dims = context->input(`1`);
242
243	if (TensorShapeUtils::IsScalar(input.shape()) \|\| input.NumElements() == `0`) {
244	context->set_output(`0`, input);
245	} else {
246	const int input_dims = input.dims();
247	const TensorShape& sparse_dims_shape = sparse_dims.shape();
248	const auto& axes_sparse_flat = sparse_dims.flat<Tidx>();
249
250	OP_REQUIRES(context, TensorShapeUtils::IsVector(sparse_dims_shape),
251	errors::InvalidArgument("'dims' must be 1-dimension, not ",
252	sparse_dims.dims()));
253	gtl::InlinedVector<bool, `8`> axes_dense(input_dims, false);
254	for (int dummy = `0`; dummy < axes_sparse_flat.size(); dummy++) {
255	Tidx axis = internal::SubtleMustCopy<Tidx>(axes_sparse_flat(dummy));
256	Tidx canonical_axis = axis < `0` ? input_dims + axis : axis;
257	OP_REQUIRES(context, canonical_axis >= `0` && canonical_axis < input_dims,
258	errors::InvalidArgument("'axis'[", dummy, "] = ", axis,
259	" is out of valid range [", `0`, ", ",
260	input_dims - `1`));
261	OP_REQUIRES(context, !axes_dense[canonical_axis],
262	errors::InvalidArgument("axis ", canonical_axis,
263	" specified more than once."));
264	axes_dense[canonical_axis] = true;
265	}
266
267	OP_REQUIRES(context, input_dims <= `8`,
268	errors::Unimplemented(
269	"reverse is not implemented for tensors of rank > 8."));
270
271	Tensor* output = nullptr;
272	OP_REQUIRES_OK(context,
273	context->allocate_output(`0`, input.shape(), &output));
274
275	// TODO(cwhipkey): we can do dimension folding to reduce, e.g., a reverse
276	// of a single dimension to the dims=3 or dims=2 case, regardless of the
277	// number of dimensions in the tensor. This would let some ops use faster
278	// lower-dimension code (and use optimized versions).
279
280	#define HANDLE_REVERSE(NDIMS) \
281	case NDIMS: \
282	HandleReverseV2Case<Device, T, NDIMS>(context, axes_dense, output); \
283	return;
284
285	switch (input_dims) {
286	HANDLE_REVERSE(`0`);
287	HANDLE_REVERSE(`1`);
288	HANDLE_REVERSE(`2`);
289	HANDLE_REVERSE(`3`);
290	HANDLE_REVERSE(`4`);
291	HANDLE_REVERSE(`5`);
292	HANDLE_REVERSE(`6`);
293	HANDLE_REVERSE(`7`);
294	HANDLE_REVERSE(`8`);
295	}
296	#undef HANDLE_REVERSE
297	}
298	}
299	};
300
301	#define REGISTER_KERNELS(T) \
302	REGISTER_KERNEL_BUILDER(Name("Reverse") \
303	.Device(DEVICE_CPU) \
304	.TypeConstraint<T>("T") \
305	.HostMemory("dims"), \
306	ReverseOp<CPUDevice, T>) \
307	REGISTER_KERNEL_BUILDER(Name("ReverseV2") \
308	.Device(DEVICE_CPU) \
309	.TypeConstraint<T>("T") \
310	.TypeConstraint<int32>("Tidx") \
311	.HostMemory("axis"), \
312	ReverseV2Op<CPUDevice, T, int32>) \
313	REGISTER_KERNEL_BUILDER(Name("ReverseV2") \
314	.Device(DEVICE_CPU) \
315	.TypeConstraint<T>("T") \
316	.TypeConstraint<int64_t>("Tidx") \
317	.HostMemory("axis"), \
318	ReverseV2Op<CPUDevice, T, int64>)
319	TF_CALL_POD_TYPES(REGISTER_KERNELS);
320	TF_CALL_tstring(REGISTER_KERNELS);
321	#undef REGISTER_KERNELS
322
323	#if GOOGLE_CUDA \|\| TENSORFLOW_USE_ROCM
324
325	// Forward declarations of the function specializations for GPU (to prevent
326	// building the GPU versions here, they will be built compiling _gpu.cu.cc).
327	namespace functor {
328	#define DECLARE_GPU_SPEC_DIM(T, DIM) \
329	template <> \
330	void Reverse<GPUDevice, T, DIM>::operator()( \
331	const GPUDevice& d, typename TTypes<T, DIM>::ConstTensor input, \
332	const Eigen::array<bool, DIM>& reverse_dims, \
333	typename TTypes<T, DIM>::Tensor output); \
334	extern template struct Reverse<GPUDevice, T, DIM>;
335	#define DECLARE_GPU_SPEC(T) \
336	DECLARE_GPU_SPEC_DIM(T, 0) \
337	DECLARE_GPU_SPEC_DIM(T, 1) \
338	DECLARE_GPU_SPEC_DIM(T, 2) \
339	DECLARE_GPU_SPEC_DIM(T, 3) \
340	DECLARE_GPU_SPEC_DIM(T, 4) \
341	DECLARE_GPU_SPEC_DIM(T, 5) \
342	DECLARE_GPU_SPEC_DIM(T, 6) \
343	DECLARE_GPU_SPEC_DIM(T, 7) \
344	DECLARE_GPU_SPEC_DIM(T, 8)
345
346	TF_CALL_uint8(DECLARE_GPU_SPEC);
347	TF_CALL_int8(DECLARE_GPU_SPEC);
348	TF_CALL_GPU_ALL_TYPES(DECLARE_GPU_SPEC);
349	#undef DECLARE_GPU_SPEC
350	#undef DECLARE_GPU_SPEC_DIM
351	} // namespace functor
352
353	// Registration of the GPU implementations.
354	#define REGISTER_GPU_KERNELS(T) \
355	REGISTER_KERNEL_BUILDER(Name("Reverse") \
356	.Device(DEVICE_GPU) \
357	.TypeConstraint<T>("T") \
358	.HostMemory("dims"), \
359	ReverseOp<GPUDevice, T>) \
360	REGISTER_KERNEL_BUILDER(Name("ReverseV2") \
361	.Device(DEVICE_GPU) \
362	.TypeConstraint<T>("T") \
363	.TypeConstraint<int32>("Tidx") \
364	.HostMemory("axis"), \
365	ReverseV2Op<GPUDevice, T, int32>) \
366	REGISTER_KERNEL_BUILDER(Name("ReverseV2") \
367	.Device(DEVICE_GPU) \
368	.TypeConstraint<T>("T") \
369	.TypeConstraint<int64_t>("Tidx") \
370	.HostMemory("axis"), \
371	ReverseV2Op<GPUDevice, T, int64>)
372	TF_CALL_uint8(REGISTER_GPU_KERNELS);
373	TF_CALL_int8(REGISTER_GPU_KERNELS);
374	TF_CALL_GPU_ALL_TYPES(REGISTER_GPU_KERNELS);
375	#undef REGISTER_GPU_KERNEL
376
377	// A special GPU kernel for int32.
378	// TODO(b/25387198): Also enable int32 in device memory. This kernel
379	// registration requires all int32 inputs and outputs to be in host memory.
380	REGISTER_KERNEL_BUILDER(Name("Reverse")
381	.Device(DEVICE_GPU)
382	.TypeConstraint<int32>("T")
383	.HostMemory("tensor")
384	.HostMemory("dims")
385	.HostMemory("output"),
386	ReverseOp<CPUDevice, int32>);
387	REGISTER_KERNEL_BUILDER(Name("ReverseV2")
388	.Device(DEVICE_GPU)
389	.TypeConstraint<int32>("T")
390	.TypeConstraint<int32>("Tidx")
391	.HostMemory("tensor")
392	.HostMemory("axis")
393	.HostMemory("output"),
394	ReverseV2Op<CPUDevice, int32, int32>);
395	REGISTER_KERNEL_BUILDER(Name("ReverseV2")
396	.Device(DEVICE_GPU)
397	.TypeConstraint<int32>("T")
398	.TypeConstraint<int64_t>("Tidx")
399	.HostMemory("tensor")
400	.HostMemory("axis")
401	.HostMemory("output"),
402	ReverseV2Op<CPUDevice, int32, int64>);
403	#endif // GOOGLE_CUDA \|\| TENSORFLOW_USE_ROCM
404
405	} // namespace tensorflow
406

Browse the source code of tensorflow/tensorflow/core/kernels/reverse_op.cc