tile_ops.cc source code [tensorflow/tensorflow/core/kernels/tile_ops.cc]

1	/ Copyright 2015 The TensorFlow Authors. All Rights Reserved.*
2
3	Licensed under the Apache License, Version 2.0 (the "License");
4	you may not use this file except in compliance with the License.
5	You may obtain a copy of the License at
6
7	http://www.apache.org/licenses/LICENSE-2.0
8
9	Unless required by applicable law or agreed to in writing, software
10	distributed under the License is distributed on an "AS IS" BASIS,
11	WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12	See the License for the specific language governing permissions and
13	limitations under the License.
14	==============================================================================/*
15
16	// See docs in ../ops/array_ops.cc.
17
18	#define EIGEN_USE_THREADS
19
20	#if GOOGLE_CUDA \|\| TENSORFLOW_USE_ROCM
21	#define EIGEN_USE_GPU
22	#endif // GOOGLE_CUDA \|\| TENSORFLOW_USE_ROCM
23
24	#include <vector>
25
26	#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
27
28	#include "tensorflow/core/framework/numeric_op.h"
29	#include "tensorflow/core/framework/op_kernel.h"
30	#include "tensorflow/core/framework/register_types.h"
31	#include "tensorflow/core/framework/tensor.h"
32	#include "tensorflow/core/framework/tensor_types.h"
33	#include "tensorflow/core/framework/type_index.h"
34	#include "tensorflow/core/framework/types.h"
35	#include "tensorflow/core/lib/core/errors.h"
36	#include "tensorflow/core/lib/gtl/array_slice.h"
37	#include "tensorflow/core/platform/macros.h"
38	#include "tensorflow/core/platform/types.h"
39
40	namespace tensorflow {
41
42	typedef Eigen::ThreadPoolDevice CPUDevice;
43	typedef Eigen::GpuDevice GPUDevice;
44
45	// Forward declarations of functors that will be defined in tile_ops_impl.h
46	namespace functor {
47	template <typename Device, typename T, typename Tmultiple>
48	struct Tile {
49	void operator()(const Device& d, Tensor* out, const Tensor& in,
50	const gtl::ArraySlice<Tmultiple> broadcast_array) const;
51	};
52
53	template <typename Device, typename T, int NDIM>
54	struct TileGrad {
55	void operator()(const Device& d, typename TTypes<T, NDIM>::Tensor out,
56	typename TTypes<T, NDIM>::ConstTensor in,
57	const Eigen::DSizes<Eigen::DenseIndex, NDIM>& indices,
58	const Eigen::DSizes<Eigen::DenseIndex, NDIM>& sizes,
59	bool first) const;
60	};
61
62	template <typename Device, typename T>
63	struct TileGrad<Device, T, `0`> {
64	void operator()(const Device& d, typename TTypes<T, `0`>::Tensor out,
65	typename TTypes<T, `0`>::ConstTensor in,
66	const Eigen::DSizes<Eigen::DenseIndex, `0`>&,
67	const Eigen::DSizes<Eigen::DenseIndex, `0`>&, bool first) const;
68	};
69
70	template <typename Device, typename T, int NDIM, int REDUCEDNDIM>
71	struct ReduceAndReshape {
72	void operator()(
73	const Device& d, typename TTypes<T, NDIM>::Tensor out,
74	typename TTypes<T, NDIM>::ConstTensor in,
75	const Eigen::DSizes<Eigen::DenseIndex, REDUCEDNDIM>& reduce_dim,
76	const Eigen::DSizes<Eigen::DenseIndex, NDIM>& reshape_dim) const;
77	};
78
79	// Explicit instantiations are defined in tile_ops_{cpu,gpu}_impl.,*
80	// below are their declarations.
81
82	#if GOOGLE_CUDA \|\| TENSORFLOW_USE_ROCM
83	extern template struct Tile<GPUDevice, bool, int32>;
84	extern template struct Tile<GPUDevice, bool, int64_t>;
85	extern template struct Tile<GPUDevice, float, int32>;
86	extern template struct Tile<GPUDevice, float, int64_t>;
87	extern template struct Tile<GPUDevice, double, int32>;
88	extern template struct Tile<GPUDevice, double, int64_t>;
89	extern template struct Tile<GPUDevice, complex64, int32>;
90	extern template struct Tile<GPUDevice, complex64, int64_t>;
91	extern template struct Tile<GPUDevice, complex128, int32>;
92	extern template struct Tile<GPUDevice, complex128, int64_t>;
93	extern template struct Tile<GPUDevice, Eigen::half, int32>;
94	extern template struct Tile<GPUDevice, Eigen::half, int64_t>;
95	extern template struct Tile<GPUDevice, int16, int32>;
96	extern template struct Tile<GPUDevice, int16, int64_t>;
97	extern template struct Tile<GPUDevice, int32, int32>;
98	extern template struct Tile<GPUDevice, int32, int64_t>;
99	extern template struct Tile<GPUDevice, int64_t, int32>;
100	extern template struct Tile<GPUDevice, int64_t, int64_t>;
101	#define DECLARE_CUDA_DIM(T, NDIM) \
102	extern template struct TileGrad<GPUDevice, T, NDIM>; \
103	extern template struct ReduceAndReshape<GPUDevice, T, NDIM, 1>
104	#else // GOOGLE_CUDA \|\| TENSORFLOW_USE_ROCM
105	#define DECLARE_CUDA_DIM(T, NDIM)
106	#endif // GOOGLE_CUDA \|\| TENSORFLOW_USE_ROCM
107
108	#define DECLARE_TYPE(T) \
109	extern template struct Tile<CPUDevice, T, int32>; \
110	extern template struct Tile<CPUDevice, T, int64_t>;
111	TF_CALL_bool(DECLARE_TYPE);
112	TF_CALL_float(DECLARE_TYPE);
113	TF_CALL_bfloat16(DECLARE_TYPE);
114	TF_CALL_double(DECLARE_TYPE);
115	TF_CALL_uint8(DECLARE_TYPE);
116	TF_CALL_int32(DECLARE_TYPE);
117	TF_CALL_int16(DECLARE_TYPE);
118	TF_CALL_int64(DECLARE_TYPE);
119	TF_CALL_uint32(DECLARE_TYPE);
120	TF_CALL_uint64(DECLARE_TYPE);
121	TF_CALL_half(DECLARE_TYPE);
122	TF_CALL_complex64(DECLARE_TYPE);
123	TF_CALL_complex128(DECLARE_TYPE);
124	TF_CALL_tstring(DECLARE_TYPE);
125	TF_CALL_variant(DECLARE_TYPE);
126	#undef DECLARE_TYPE
127
128	#define DECLARE_DIM(T, NDIM) \
129	DECLARE_CUDA_DIM(T, NDIM); \
130	extern template struct TileGrad<CPUDevice, T, NDIM>; \
131	extern template struct ReduceAndReshape<CPUDevice, T, NDIM, 1>;
132
133	#define DECLARE_TYPE(T) \
134	DECLARE_DIM(T, 1) \
135	DECLARE_DIM(T, 2) \
136	DECLARE_DIM(T, 3) \
137	DECLARE_DIM(T, 4) \
138	DECLARE_DIM(T, 5) \
139	DECLARE_DIM(T, 6) \
140	DECLARE_DIM(T, 7)
141	TF_CALL_float(DECLARE_TYPE);
142	TF_CALL_bfloat16(DECLARE_TYPE);
143	TF_CALL_double(DECLARE_TYPE);
144	TF_CALL_int16(DECLARE_TYPE);
145	TF_CALL_int32(DECLARE_TYPE);
146	TF_CALL_int64(DECLARE_TYPE);
147	TF_CALL_half(DECLARE_TYPE);
148	TF_CALL_complex64(DECLARE_TYPE);
149	TF_CALL_complex128(DECLARE_TYPE);
150	#undef DECLARE_TYPE
151
152	#undef DECLARE_DIM
153	#undef DECLARE_CUDA_DIM
154
155	} // namespace functor
156
157	// --------------------------------------------------------------------------
158	template <typename Device, typename Tmultiples>
159	class TileOp : public OpKernel {
160	public:
161	explicit TileOp(OpKernelConstruction* context) : OpKernel(context) {}
162
163	void Compute(OpKernelContext* context) override {
164	const Tensor& input = context->input(`0`);
165	const Tensor& multiples = context->input(`1`);
166
167	OP_REQUIRES(
168	context, TensorShapeUtils::IsVector(multiples.shape()),
169	errors::InvalidArgument("Expected multiples to be 1-D, but got shape ",
170	multiples.shape().DebugString()));
171	OP_REQUIRES(context, input.dims() == multiples.NumElements(),
172	errors::InvalidArgument(
173	"Expected multiples argument to be a vector of length ",
174	input.dims(), " but got length ", multiples.dim_size(`0`)));
175	const int input_dims = input.dims();
176
177	// Eigen doesn't support scalars on the GPU, so handle 0-D specially
178	if (input_dims == `0`) {
179	context->set_output(`0`, input);
180	return;
181	}
182
183	const gtl::ArraySlice<Tmultiples> multiples_array(
184	multiples.flat<Tmultiples>().data(), input_dims);
185	TensorShape output_shape;
186	for (int i = `0`; i < input_dims; ++i) {
187	OP_REQUIRES(
188	context, multiples_array[i] >= `0`,
189	errors::InvalidArgument("Expected multiples[", i, "] >= 0, but got ",
190	multiples_array[i]));
191	OP_REQUIRES_OK(context, output_shape.AddDimWithStatus(
192	input.dim_size(i) * multiples_array[i]));
193	}
194	if (output_shape == input.shape()) {
195	context->set_output(`0`, input);
196	return;
197	}
198	Tensor* result = nullptr;
199	OP_REQUIRES_OK(context, context->allocate_output(`0`, output_shape, &result));
200
201	// If there's no output, there's nothing to do.
202	if (output_shape.num_elements() == `0`) return;
203
204	#define HANDLE_TYPE(DT) \
205	if (context->input(0).dtype() == DT) { \
206	HandleCase<DT>(context, multiples_array, result); \
207	return; \
208	}
209
210	#define HANDLE_TYPE_NAME(T) HANDLE_TYPE(DataTypeToEnum<T>::value)
211
212	// Invoke macro using TF_CALL_ so type-filtering for platform applies.*
213	TF_CALL_bool(HANDLE_TYPE_NAME);
214	TF_CALL_bfloat16(HANDLE_TYPE_NAME);
215	TF_CALL_float(HANDLE_TYPE_NAME);
216	TF_CALL_double(HANDLE_TYPE_NAME);
217	TF_CALL_uint8(HANDLE_TYPE_NAME);
218	TF_CALL_int8(HANDLE_TYPE_NAME);
219	TF_CALL_int32(HANDLE_TYPE_NAME);
220	TF_CALL_int16(HANDLE_TYPE_NAME);
221	TF_CALL_int64(HANDLE_TYPE_NAME);
222	TF_CALL_uint32(HANDLE_TYPE_NAME);
223	TF_CALL_uint64(HANDLE_TYPE_NAME);
224	TF_CALL_half(HANDLE_TYPE_NAME);
225	TF_CALL_tstring(HANDLE_TYPE_NAME); // when DEVICE=CPUDevice.
226	TF_CALL_complex64(HANDLE_TYPE_NAME);
227	TF_CALL_complex128(HANDLE_TYPE_NAME);
228	TF_CALL_variant(HANDLE_TYPE_NAME); // when DEVICE=CPUDevice
229
230	#undef HANDLE_TYPE_NAME
231	#undef HANDLE_TYPE
232
233	OP_REQUIRES(
234	context, false,
235	errors::Unimplemented(
236	"TileOp : The input data type is not supported, DataType : ",
237	DataTypeString(context->input(`0`).dtype()),
238	", Dimension : ", input_dims));
239	}
240
241	private:
242	template <DataType DT>
243	void HandleCaseImpl(OpKernelContext* context,
244	const gtl::ArraySlice<Tmultiples> multiples_array,
245	Tensor* result) {
246	typedef typename EnumToDataType<DT>::Type T;
247	functor::Tile<Device, T, Tmultiples>()(context->eigen_device<Device>(),
248	result, context->input(`0`),
249	multiples_array);
250	}
251
252	template <DataType DT>
253	void HandleCase(OpKernelContext* context,
254	const gtl::ArraySlice<Tmultiples> multiples_array,
255	Tensor* result);
256
257	TF_DISALLOW_COPY_AND_ASSIGN(TileOp);
258	};
259
260	template <typename Device, typename Tmultiples>
261	template <DataType DT>
262	inline void TileOp<Device, Tmultiples>::HandleCase(
263	OpKernelContext* context, const gtl::ArraySlice<Tmultiples> multiples_array,
264	Tensor* result) {
265	// TODO(vrv): print out the device name if useful. Currently disabled to avoid
266	// having to use RTTI.
267	LOG(FATAL) << "TileOp: Invalid combination of Device, DT: "
268	// << typeid(Device).name() << ", "
269	<< DataTypeString(DT);
270	}
271
272	#define HANDLE_CASE(device, dtype, Tmultiples) \
273	template <> \
274	template <> \
275	void TileOp<device, Tmultiples>::HandleCase<dtype>( \
276	OpKernelContext * context, \
277	const gtl::ArraySlice<Tmultiples> multiples_array, Tensor* result) { \
278	HandleCaseImpl<dtype>(context, multiples_array, result); \
279	}
280
281	#define HANDLE_TYPE_NAME_CPU(T) \
282	HANDLE_CASE(CPUDevice, DataTypeToEnum<T>::value, int32); \
283	HANDLE_CASE(CPUDevice, DataTypeToEnum<T>::value, int64_t);
284
285	#define HANDLE_TYPE_NAME_GPU(T) \
286	HANDLE_CASE(GPUDevice, DataTypeToEnum<T>::value, int32); \
287	HANDLE_CASE(GPUDevice, DataTypeToEnum<T>::value, int64_t);
288
289	TF_CALL_bool(HANDLE_TYPE_NAME_CPU);
290	TF_CALL_float(HANDLE_TYPE_NAME_CPU);
291	TF_CALL_bfloat16(HANDLE_TYPE_NAME_CPU);
292	TF_CALL_double(HANDLE_TYPE_NAME_CPU);
293	TF_CALL_uint8(HANDLE_TYPE_NAME_CPU);
294	TF_CALL_int8(HANDLE_TYPE_NAME_CPU);
295	TF_CALL_int32(HANDLE_TYPE_NAME_CPU);
296	TF_CALL_int16(HANDLE_TYPE_NAME_CPU);
297	TF_CALL_int64(HANDLE_TYPE_NAME_CPU);
298	TF_CALL_uint32(HANDLE_TYPE_NAME_CPU);
299	TF_CALL_uint64(HANDLE_TYPE_NAME_CPU);
300	TF_CALL_half(HANDLE_TYPE_NAME_CPU);
301	TF_CALL_complex64(HANDLE_TYPE_NAME_CPU);
302	TF_CALL_complex128(HANDLE_TYPE_NAME_CPU);
303	TF_CALL_tstring(HANDLE_TYPE_NAME_CPU);
304	TF_CALL_variant(HANDLE_TYPE_NAME_CPU);
305
306	#if GOOGLE_CUDA \|\| TENSORFLOW_USE_ROCM
307	TF_CALL_bool(HANDLE_TYPE_NAME_GPU);
308	TF_CALL_float(HANDLE_TYPE_NAME_GPU);
309	TF_CALL_double(HANDLE_TYPE_NAME_GPU);
310	TF_CALL_int16(HANDLE_TYPE_NAME_GPU);
311	TF_CALL_int32(HANDLE_TYPE_NAME_GPU);
312	TF_CALL_int64(HANDLE_TYPE_NAME_GPU);
313	TF_CALL_half(HANDLE_TYPE_NAME_GPU);
314	TF_CALL_complex64(HANDLE_TYPE_NAME_GPU);
315	TF_CALL_complex128(HANDLE_TYPE_NAME_GPU);
316	#endif // GOOGLE_CUDA \|\| TENSORFLOW_USE_ROCM
317
318
319	#undef HANDLE_TYPE_NAME_CPU
320	#undef HANDLE_TYPE_NAME_GPU
321	#undef HANDLE_CASE
322
323	// --------------------------------------------------------------------------
324	template <typename Device, typename Tmultiples>
325	class TileGradientOp : public OpKernel {
326	public:
327	explicit TileGradientOp(OpKernelConstruction* context) : OpKernel(context) {}
328
329	void Compute(OpKernelContext* context) override {
330	const Tensor& input = context->input(`0`);
331	const Tensor& multiples = context->input(`1`);
332	OP_REQUIRES(
333	context, TensorShapeUtils::IsVector(multiples.shape()),
334	errors::InvalidArgument("Expected multiples to be 1-D, but got shape ",
335	multiples.shape().DebugString()));
336	OP_REQUIRES(context, input.dims() == multiples.NumElements(),
337	errors::InvalidArgument(
338	"Expected multiples argument to be a vector of length ",
339	input.dims(), " but got length ", multiples.dim_size(`0`)));
340
341	const int input_dims = input.dims();
342
343	// Eigen doesn't support scalars on the GPU, so handle 0-D specially
344	if (input_dims == `0`) {
345	context->set_output(`0`, input);
346	return;
347	}
348
349	const gtl::ArraySlice<Tmultiples> multiples_array(
350	multiples.flat<Tmultiples>().data(), input_dims);
351	TensorShape output_shape;
352	std::vector<Tmultiples> input_dim_size_vec;
353	for (int i = `0`; i < input_dims; ++i) {
354	OP_REQUIRES(
355	context, multiples_array[i] > `0`,
356	errors::InvalidArgument("Expected multiples[", i, "] > 0, but got ",
357	multiples_array[i]));
358	OP_REQUIRES(context, input.dim_size(i) % multiples_array[i] == `0`,
359	errors::InvalidArgument("Expected input_dim[", i,
360	"] to be divisible by multiples[", i,
361	"], but ", input.dim_size(i), " % ",
362	multiples_array[i], " != 0"));
363	output_shape.AddDim(input.dim_size(i) / multiples_array[i]);
364	input_dim_size_vec.push_back(input.dim_size(i));
365	}
366	if (output_shape == input.shape()) {
367	context->set_output(`0`, input);
368	return;
369	}
370	Tensor* result = nullptr;
371	OP_REQUIRES_OK(context, context->allocate_output(`0`, output_shape, &result));
372
373	#define HANDLE_DIM(DT, NDIM) \
374	if (context->input(0).dtype() == DT && input_dims == NDIM) { \
375	HandleCase<DT, NDIM>(context, input_dim_size_vec, multiples_array, \
376	result); \
377	return; \
378	}
379
380	#define HANDLE_TYPE(T) \
381	HANDLE_DIM(T, 1) \
382	HANDLE_DIM(T, 2) \
383	HANDLE_DIM(T, 3) \
384	HANDLE_DIM(T, 4) \
385	HANDLE_DIM(T, 5) \
386	HANDLE_DIM(T, 6) \
387	HANDLE_DIM(T, 7)
388
389	#define HANDLE_TYPE_NAME(T) HANDLE_TYPE(DataTypeToEnum<T>::value)
390
391	TF_CALL_float(HANDLE_TYPE_NAME);
392	TF_CALL_double(HANDLE_TYPE_NAME);
393	TF_CALL_int32(HANDLE_TYPE_NAME);
394	TF_CALL_int16(HANDLE_TYPE_NAME);
395	TF_CALL_int64(HANDLE_TYPE_NAME);
396	TF_CALL_half(HANDLE_TYPE_NAME);
397	TF_CALL_bfloat16(HANDLE_TYPE_NAME);
398	TF_CALL_complex64(HANDLE_TYPE_NAME);
399	TF_CALL_complex128(HANDLE_TYPE_NAME);
400
401	#undef HANDLE_TYPE_NAME
402	#undef HANDLE_TYPE
403	#undef HANDLE_DIM
404
405	OP_REQUIRES(context, false,
406	errors::Unimplemented("TileGradientOp : The input data type or "
407	"dimension is not supported, DataType : ",
408	DataTypeString(context->input(`0`).dtype()),
409	", Dimension : ", input_dims));
410	}
411
412	private:
413	template <DataType DT, int NDIM>
414	void HandleCase(OpKernelContext* context,
415	const std::vector<Tmultiples>& input_dims,
416	const gtl::ArraySlice<Tmultiples> multiples_array,
417	Tensor* result);
418
419	template <DataType DT, int NDIM>
420	void HandleCaseImpl(OpKernelContext* context,
421	const std::vector<Tmultiples>& input_dims,
422	const gtl::ArraySlice<Tmultiples> multiples_array,
423	Tensor* result) {
424	typedef typename EnumToDataType<DT>::Type T;
425
426	bool reduction_only = true;
427	std::vector<Tmultiples> reduction_dims;
428
429	for (int i = `0`; i < NDIM; ++i) {
430	if (input_dims[i] > multiples_array[i] && multiples_array[i] > `1`) {
431	reduction_only = false;
432	break;
433	} else {
434	if (multiples_array[i] == input_dims[i]) {
435	reduction_dims.push_back(i);
436	}
437	}
438	}
439
440	if (reduction_only) {
441	#define HANDLE_DIM(D) \
442	if (reduction_dims.size() == (D)) { \
443	HandleReduce<T, NDIM, (D)>(context, reduction_dims, result); \
444	return; \
445	}
446	// NOTE(keveman): Handling the most common case here.
447	// Adding more cases here would require more templating and code
448	// explosion. For instance, HANDLE_DIM(2) wouldn't make sense for NDIM=1.
449	HANDLE_DIM(`1`);
450
451	// Fall through to the unoptimized version.
452	#undef HANDLE_DIM
453	}
454
455	Eigen::DSizes<Eigen::DenseIndex, NDIM> indices;
456	Eigen::DSizes<Eigen::DenseIndex, NDIM> sizes;
457
458	// Accumulate slices along the dimensions into the output. The number of
459	// slices along dimension 'i' is simply the multiple along dimension 'i'
460	// passed to the original Tile op.
461	for (int i = `0`; i < NDIM; ++i) {
462	sizes[i] = input_dims[i] / multiples_array[i];
463	indices[i] = `0`;
464	}
465
466	bool first = true;
467	while (true) {
468	functor::TileGrad<Device, T, NDIM>()(
469	context->eigen_device<Device>(), result->tensor<T, NDIM>(),
470	context->input(`0`).tensor<T, NDIM>(), indices, sizes, first);
471	first = false;
472	// Increment the begin indices.
473	int i = `0`;
474	while (i < NDIM && indices[i] / sizes[i] == multiples_array[i] - `1`) {
475	indices[i] = `0`;
476	++i;
477	}
478	// We are finished if we have iterated to the maximum along all
479	// dimensions.
480	if (i == NDIM) {
481	break;
482	}
483	indices[i] += sizes[i];
484	}
485	}
486
487	template <typename T, int NDIM, int REDUCENDIM>
488	void HandleReduce(OpKernelContext* context,
489	const std::vector<Tmultiples>& reduce_dim_in,
490	Tensor* result) {
491	static_assert(NDIM >= REDUCENDIM, "Too many reduced dimensions");
492	Eigen::DSizes<Eigen::DenseIndex, REDUCENDIM> reduce_dim;
493	Eigen::DSizes<Eigen::DenseIndex, NDIM> reshape_dim;
494
495	for (int i = `0`; i < REDUCENDIM; ++i) {
496	reduce_dim[i] = reduce_dim_in[i];
497	}
498
499	for (int i = `0`; i < NDIM; ++i) {
500	reshape_dim[i] = result->dim_size(i);
501	}
502
503	functor::ReduceAndReshape<Device, T, NDIM, REDUCENDIM>()(
504	context->eigen_device<Device>(), result->tensor<T, NDIM>(),
505	context->input(`0`).tensor<T, NDIM>(), reduce_dim, reshape_dim);
506	}
507
508	TF_DISALLOW_COPY_AND_ASSIGN(TileGradientOp);
509	};
510
511	template <typename Device, typename Tmultiples>
512	template <DataType DT, int NDIM>
513	inline void TileGradientOp<Device, Tmultiples>::HandleCase(
514	OpKernelContext* context, const std::vector<Tmultiples>& input_dims,
515	const gtl::ArraySlice<Tmultiples> multiples_array, Tensor* result) {
516	LOG(FATAL) << "TileGradientOp: Invalid combination of Device, DT and NDIM: "
517	<< TypeIndex::Make<Device>().name() << ", " << DataTypeString(DT)
518	<< ", " << NDIM;
519	}
520
521	#define HANDLE_CASE(device, T, dtype, Tmultiples, ndim) \
522	template <> \
523	template <> \
524	void TileGradientOp<device, Tmultiples>::HandleCase<dtype, ndim>( \
525	OpKernelContext * context, const std::vector<Tmultiples>& input_dims, \
526	const gtl::ArraySlice<Tmultiples> multiples_array, Tensor* result) { \
527	HandleCaseImpl<dtype, ndim>(context, input_dims, multiples_array, result); \
528	}
529
530	// 0-D handled specially above
531	#define HANDLE_CASE_DIM(device, T, dtype) \
532	HANDLE_CASE(device, T, dtype, int32, 1); \
533	HANDLE_CASE(device, T, dtype, int32, 2); \
534	HANDLE_CASE(device, T, dtype, int32, 3); \
535	HANDLE_CASE(device, T, dtype, int32, 4); \
536	HANDLE_CASE(device, T, dtype, int32, 5); \
537	HANDLE_CASE(device, T, dtype, int32, 6); \
538	HANDLE_CASE(device, T, dtype, int32, 7); \
539	HANDLE_CASE(device, T, dtype, int64_t, 1); \
540	HANDLE_CASE(device, T, dtype, int64_t, 2); \
541	HANDLE_CASE(device, T, dtype, int64_t, 3); \
542	HANDLE_CASE(device, T, dtype, int64_t, 4); \
543	HANDLE_CASE(device, T, dtype, int64_t, 5); \
544	HANDLE_CASE(device, T, dtype, int64_t, 6); \
545	HANDLE_CASE(device, T, dtype, int64_t, 7);
546
547	#define HANDLE_TYPE_NAME_CPU(T) \
548	HANDLE_CASE_DIM(CPUDevice, T, DataTypeToEnum<T>::value);
549
550	#define HANDLE_TYPE_NAME_GPU(T) \
551	HANDLE_CASE_DIM(GPUDevice, T, DataTypeToEnum<T>::value);
552
553	TF_CALL_float(HANDLE_TYPE_NAME_CPU);
554	TF_CALL_double(HANDLE_TYPE_NAME_CPU);
555	TF_CALL_int16(HANDLE_TYPE_NAME_CPU);
556	TF_CALL_int32(HANDLE_TYPE_NAME_CPU);
557	TF_CALL_int64(HANDLE_TYPE_NAME_CPU);
558	TF_CALL_half(HANDLE_TYPE_NAME_CPU);
559	TF_CALL_complex64(HANDLE_TYPE_NAME_CPU);
560	TF_CALL_complex128(HANDLE_TYPE_NAME_CPU);
561
562	#if GOOGLE_CUDA \|\| TENSORFLOW_USE_ROCM
563	TF_CALL_float(HANDLE_TYPE_NAME_GPU);
564	TF_CALL_double(HANDLE_TYPE_NAME_GPU);
565	TF_CALL_int16(HANDLE_TYPE_NAME_GPU);
566	TF_CALL_int32(HANDLE_TYPE_NAME_GPU);
567	TF_CALL_int64(HANDLE_TYPE_NAME_GPU);
568	TF_CALL_half(HANDLE_TYPE_NAME_GPU);
569	TF_CALL_complex64(HANDLE_TYPE_NAME_GPU);
570	TF_CALL_complex128(HANDLE_TYPE_NAME_GPU);
571	#endif // GOOGLE_CUDA \|\| TENSORFLOW_USE_ROCM
572
573
574	#undef HANDLE_TYPE_NAME_CPU
575	#undef HANDLE_TYPE_NAME_GPU
576	#undef HANDLE_CASE_DIM
577	#undef HANDLE_CASE
578
579	REGISTER_KERNEL_BUILDER(Name("Tile")
580	.Device(DEVICE_CPU)
581	.HostMemory("multiples")
582	.TypeConstraint<int32>("Tmultiples"),
583	TileOp<CPUDevice, int32>);
584	REGISTER_KERNEL_BUILDER(Name("Tile")
585	.Device(DEVICE_CPU)
586	.HostMemory("multiples")
587	.TypeConstraint<int64_t>("Tmultiples"),
588	TileOp<CPUDevice, int64>);
589	REGISTER_KERNEL_BUILDER(Name("TileGrad")
590	.Device(DEVICE_CPU)
591	.HostMemory("multiples")
592	.TypeConstraint<int32>("Tmultiples"),
593	TileGradientOp<CPUDevice, int32>);
594	REGISTER_KERNEL_BUILDER(Name("TileGrad")
595	.Device(DEVICE_CPU)
596	.HostMemory("multiples")
597	.TypeConstraint<int64_t>("Tmultiples"),
598	TileGradientOp<CPUDevice, int64>);
599
600	#if GOOGLE_CUDA \|\| TENSORFLOW_USE_ROCM
601	#define REGISTER_GPU_TILE(type) \
602	REGISTER_KERNEL_BUILDER(Name("Tile") \
603	.Device(DEVICE_GPU) \
604	.TypeConstraint<type>("T") \
605	.TypeConstraint<int32>("Tmultiples") \
606	.HostMemory("multiples"), \
607	TileOp<GPUDevice, int32>); \
608	REGISTER_KERNEL_BUILDER(Name("Tile") \
609	.Device(DEVICE_GPU) \
610	.TypeConstraint<type>("T") \
611	.TypeConstraint<int64_t>("Tmultiples") \
612	.HostMemory("multiples"), \
613	TileOp<GPUDevice, int64>);
614
615	#define REGISTER_GPU_TILE_GRAD(type) \
616	REGISTER_KERNEL_BUILDER(Name("TileGrad") \
617	.Device(DEVICE_GPU) \
618	.TypeConstraint<type>("T") \
619	.TypeConstraint<int32>("Tmultiples") \
620	.HostMemory("multiples"), \
621	TileGradientOp<GPUDevice, int32>); \
622	REGISTER_KERNEL_BUILDER(Name("TileGrad") \
623	.Device(DEVICE_GPU) \
624	.TypeConstraint<type>("T") \
625	.TypeConstraint<int64_t>("Tmultiples") \
626	.HostMemory("multiples"), \
627	TileGradientOp<GPUDevice, int64>);
628
629	#define REGISTER_GPU(type) \
630	REGISTER_GPU_TILE(type); \
631	REGISTER_GPU_TILE_GRAD(type);
632
633	TF_CALL_bool(REGISTER_GPU_TILE);
634	TF_CALL_float(REGISTER_GPU);
635	TF_CALL_double(REGISTER_GPU);
636	TF_CALL_half(REGISTER_GPU);
637	TF_CALL_int16(REGISTER_GPU);
638	TF_CALL_int32(REGISTER_GPU);
639	TF_CALL_int64(REGISTER_GPU);
640	TF_CALL_complex64(REGISTER_GPU);
641	TF_CALL_complex128(REGISTER_GPU)
642
643	#undef REGISTER_GPU_TILE
644	#undef REGISTER_GPU_TILE_GRAD
645	#undef REGISTER_GPU
646	#endif // GOOGLE_CUDA \|\| TENSORFLOW_USE_ROCM
647
648
649	} // namespace tensorflow
650

Browse the source code of tensorflow/tensorflow/core/kernels/tile_ops.cc