dilation_ops.cc source code [tensorflow/tensorflow/core/kernels/dilation_ops.cc]

1	/ Copyright 2016 The TensorFlow Authors. All Rights Reserved.*
2
3	Licensed under the Apache License, Version 2.0 (the "License");
4	you may not use this file except in compliance with the License.
5	You may obtain a copy of the License at
6
7	http://www.apache.org/licenses/LICENSE-2.0
8
9	Unless required by applicable law or agreed to in writing, software
10	distributed under the License is distributed on an "AS IS" BASIS,
11	WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12	See the License for the specific language governing permissions and
13	limitations under the License.
14	==============================================================================/*
15
16	// See docs in ../ops/nn_ops.cc.
17
18	#define EIGEN_USE_THREADS
19
20	#include "tensorflow/core/kernels/dilation_ops.h"
21
22	#include <cfloat>
23	#include <vector>
24
25	#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
26	#include "tensorflow/core/common_runtime/device.h"
27	#include "tensorflow/core/framework/kernel_shape_util.h"
28	#include "tensorflow/core/framework/numeric_op.h"
29	#include "tensorflow/core/framework/op_kernel.h"
30	#include "tensorflow/core/framework/register_types.h"
31	#include "tensorflow/core/framework/tensor.h"
32	#include "tensorflow/core/framework/tensor_shape.h"
33	#include "tensorflow/core/framework/tensor_slice.h"
34	#include "tensorflow/core/lib/core/errors.h"
35	#include "tensorflow/core/lib/gtl/array_slice.h"
36	#include "tensorflow/core/util/determinism.h"
37	#include "tensorflow/core/util/padding.h"
38
39	namespace tensorflow {
40
41	typedef Eigen::ThreadPoolDevice CPUDevice;
42	typedef Eigen::GpuDevice GPUDevice;
43
44	void ParseAttributes(OpKernelConstruction* context, std::vector<int32>* strides,
45	std::vector<int32>* rates, Padding* padding) {
46	OP_REQUIRES_OK(context, context->GetAttr("strides", strides));
47	OP_REQUIRES(context, strides->size() == `4`,
48	errors::InvalidArgument("Sliding window stride field must "
49	"specify 4 dimensions"));
50	OP_REQUIRES(context, (strides)[`0`] == `1` && (strides)[`3`] == `1`,
51	errors::Unimplemented(
52	"Stride is only supported across spatial dimensions."));
53
54	OP_REQUIRES_OK(context, context->GetAttr("rates", rates));
55	OP_REQUIRES(context, rates->size() == `4`,
56	errors::InvalidArgument("Input stride (atrous rate) field "
57	"must specify 4 dimensions"));
58	OP_REQUIRES(context, (rates)[`0`] == `1` && (rates)[`3`] == `1`,
59	errors::Unimplemented(
60	"Rate is only supported across spatial dimensions."));
61
62	OP_REQUIRES_OK(context, context->GetAttr("padding", padding));
63	}
64
65	void ParseSizes(OpKernelContext* context, const std::vector<int32>& strides,
66	const std::vector<int32>& rates, const Padding& padding,
67	int* stride_rows, int* stride_cols, int* rate_rows,
68	int* rate_cols, int64_t* pad_top, int64_t* pad_left,
69	int64_t* out_rows, int64_t* out_cols) {
70	// Input tensor is of the following dimensions:
71	// [ batch, input_rows, input_cols, depth ]
72	const Tensor& input = context->input(`0`);
73	OP_REQUIRES(context, input.dims() == `4`,
74	errors::InvalidArgument("input must be 4-dimensional",
75	input.shape().DebugString()));
76	const int input_rows = input.dim_size(`1`);
77	const int input_cols = input.dim_size(`2`);
78	const int depth = input.dim_size(`3`);
79
80	// For now we take the stride and rate from the second and third dimensions
81	// only (we do not support striding on the batch or depth dimension).
82	*stride_rows = strides [`1`];
83	*stride_cols = strides [`2`];
84	*rate_rows = rates [`1`];
85	*rate_cols = rates [`2`];
86
87	// Input filter is of the following dimensions:
88	// [ filter_rows, filter_cols, depth ]
89	const Tensor& filter = context->input(`1`);
90	OP_REQUIRES(context, filter.dims() == `3`,
91	errors::InvalidArgument("filter must be 3-dimensional: ",
92	filter.shape().DebugString()));
93	const int filter_rows = filter.dim_size(`0`);
94	const int filter_cols = filter.dim_size(`1`);
95	OP_REQUIRES(context, depth == filter.dim_size(`2`),
96	errors::InvalidArgument(
97	"input and filter must have the same depth: ", depth, " vs ",
98	filter.dim_size(`2`)));
99
100	// Effective filter size, after introducing rate - 1 zeros between each
101	// non-zero filter element.
102	const int filter_rows_eff =
103	filter_rows + (filter_rows - `1`) * (*rate_rows - `1`);
104	const int filter_cols_eff =
105	filter_cols + (filter_cols - `1`) * (*rate_cols - `1`);
106
107	OP_REQUIRES_OK(
108	context, GetWindowedOutputSize(input_rows, filter_rows_eff, *stride_rows,
109	padding, out_rows, pad_top));
110	OP_REQUIRES_OK(
111	context, GetWindowedOutputSize(input_cols, filter_cols_eff, *stride_cols,
112	padding, out_cols, pad_left));
113	}
114
115	template <typename Device, typename T>
116	class DilationOp : public OpKernel {
117	public:
118	explicit DilationOp(OpKernelConstruction* context) : OpKernel(context) {
119	ParseAttributes(context, &strides_, &rates_, &padding_);
120	}
121
122	void Compute(OpKernelContext* context) override {
123	const Tensor& input = context->input(`0`);
124	const Tensor& filter = context->input(`1`);
125
126	// Determine relevant sizes from input and filters.
127	int stride_rows = `0`, stride_cols = `0`;
128	int rate_rows = `0`, rate_cols = `0`;
129	int64_t pad_top = `0`, pad_left = `0`;
130	int64_t out_rows = `0`, out_cols = `0`;
131	ParseSizes(context, strides_, rates_, padding_, &stride_rows, &stride_cols,
132	&rate_rows, &rate_cols, &pad_top, &pad_left, &out_rows,
133	&out_cols);
134	if (!context->status().ok()) return;
135
136	// Output tensor is of the following dimensions:
137	// [ batch, out_rows, out_cols, depth ]
138	const int batch = input.dim_size(`0`);
139	const int depth = input.dim_size(`3`);
140	const std::vector<int64_t> out_sizes = {batch, out_rows, out_cols, depth};
141	TensorShape out_shape(out_sizes);
142
143	Tensor* output = nullptr;
144	OP_REQUIRES_OK(context, context->allocate_output(`0`, out_shape, &output));
145
146	// If there is nothing to compute, return.
147	if (out_shape.num_elements() == `0`) {
148	return;
149	}
150
151	functor::Dilation<Device, T>()(
152	context->eigen_device<Device>(), input.tensor<T, `4`>(),
153	filter.tensor<T, `3`>(), stride_rows, stride_cols, rate_rows, rate_cols,
154	pad_top, pad_left, output->tensor<T, `4`>());
155	}
156
157	std::vector<int32> strides_;
158	std::vector<int32> rates_;
159	Padding padding_;
160	};
161
162	// Partial specialization of Dilation functor for a CPUDevice.
163	namespace functor {
164	template <typename T>
165	struct Dilation<CPUDevice, T> {
166	void operator()(const CPUDevice& d, typename TTypes<T, `4`>::ConstTensor input,
167	typename TTypes<T, `3`>::ConstTensor filter, int stride_rows,
168	int stride_cols, int rate_rows, int rate_cols, int pad_top,
169	int pad_left, typename TTypes<T, `4`>::Tensor output) {
170	const int batch = input.dimension(`0`);
171	const int input_rows = input.dimension(`1`);
172	const int input_cols = input.dimension(`2`);
173	const int depth = input.dimension(`3`);
174
175	const int filter_rows = filter.dimension(`0`);
176	const int filter_cols = filter.dimension(`1`);
177
178	const int output_rows = output.dimension(`1`);
179	const int output_cols = output.dimension(`2`);
180
181	// This is a reference implementation, likely to be slow.
182	// TODO(gpapan): Write multi-threaded implementation.
183	for (int b = `0`; b < batch; ++b) {
184	for (int h_out = `0`; h_out < output_rows; ++h_out) {
185	int h_beg = h_out * stride_rows - pad_top;
186	for (int w_out = `0`; w_out < output_cols; ++w_out) {
187	int w_beg = w_out * stride_cols - pad_left;
188	for (int d = `0`; d < depth; ++d) {
189	T cur_val = Eigen::NumTraits<T>::lowest();
190	for (int h = `0`; h < filter_rows; ++h) {
191	const int h_in = h_beg + h * rate_rows;
192	if (h_in >= `0` && h_in < input_rows) {
193	for (int w = `0`; w < filter_cols; ++w) {
194	const int w_in = w_beg + w * rate_cols;
195	if (w_in >= `0` && w_in < input_cols) {
196	const T val = input(b, h_in, w_in, d) + filter(h, w, d);
197	if (val > cur_val) {
198	cur_val = val;
199	}
200	}
201	}
202	}
203	}
204	output(b, h_out, w_out, d) = cur_val;
205	}
206	}
207	}
208	}
209	}
210	};
211	} // namespace functor
212
213	template <typename Device, typename T>
214	class DilationBackpropInputOp : public OpKernel {
215	public:
216	explicit DilationBackpropInputOp(OpKernelConstruction* context)
217	: OpKernel(context) {
218	ParseAttributes(context, &strides_, &rates_, &padding_);
219	}
220
221	void Compute(OpKernelContext* context) override {
222	const Tensor& input = context->input(`0`);
223	const Tensor& filter = context->input(`1`);
224	const Tensor& out_backprop = context->input(`2`);
225
226	if (std::is_same<Device, GPUDevice>::value) {
227	OP_REQUIRES(context, !tensorflow::OpDeterminismRequired(),
228	errors::Unimplemented("Determinism is not yet supported "
229	"for Dilation2DBackpropInput."));
230	}
231	// Determine relevant sizes from input and filters.
232	int stride_rows = `0`, stride_cols = `0`;
233	int rate_rows = `0`, rate_cols = `0`;
234	int64_t pad_top = `0`, pad_left = `0`;
235	int64_t out_rows = `0`, out_cols = `0`;
236	ParseSizes(context, strides_, rates_, padding_, &stride_rows, &stride_cols,
237	&rate_rows, &rate_cols, &pad_top, &pad_left, &out_rows,
238	&out_cols);
239	if (!context->status().ok()) return;
240
241	// Verify that the incoming gradient tensor has the expected size
242	// [ batch, out_rows, out_cols, depth ]
243	const int batch = input.dim_size(`0`);
244	const int depth = input.dim_size(`3`);
245	OP_REQUIRES(context,
246	batch == out_backprop.dim_size(`0`) &&
247	out_rows == out_backprop.dim_size(`1`) &&
248	out_cols == out_backprop.dim_size(`2`) &&
249	depth == out_backprop.dim_size(`3`),
250	errors::InvalidArgument("out_backprop has incompatible size."));
251
252	// The computed in_backprop has the same dimensions as the input:
253	// [ batch, input_rows, input_cols, depth ]
254	Tensor* in_backprop = nullptr;
255	OP_REQUIRES_OK(context,
256	context->allocate_output(`0`, input.shape(), &in_backprop));
257
258	// If there is nothing to compute, return.
259	if (input.shape().num_elements() == `0`) {
260	return;
261	}
262
263	functor::DilationBackpropInput<Device, T>()(
264	context->eigen_device<Device>(), input.tensor<T, `4`>(),
265	filter.tensor<T, `3`>(), out_backprop.tensor<T, `4`>(), stride_rows,
266	stride_cols, rate_rows, rate_cols, pad_top, pad_left,
267	in_backprop->tensor<T, `4`>());
268	}
269
270	std::vector<int32> strides_;
271	std::vector<int32> rates_;
272	Padding padding_;
273	};
274
275	// Partial specialization of DilationBackpropInput functor for a CPUDevice.
276	namespace functor {
277	template <typename T>
278	struct DilationBackpropInput<CPUDevice, T> {
279	void operator()(const CPUDevice& d, typename TTypes<T, `4`>::ConstTensor input,
280	typename TTypes<T, `3`>::ConstTensor filter,
281	typename TTypes<T, `4`>::ConstTensor out_backprop,
282	int stride_rows, int stride_cols, int rate_rows,
283	int rate_cols, int pad_top, int pad_left,
284	typename TTypes<T, `4`>::Tensor in_backprop) {
285	const int batch = input.dimension(`0`);
286	const int input_rows = input.dimension(`1`);
287	const int input_cols = input.dimension(`2`);
288	const int depth = input.dimension(`3`);
289
290	const int filter_rows = filter.dimension(`0`);
291	const int filter_cols = filter.dimension(`1`);
292
293	const int output_rows = out_backprop.dimension(`1`);
294	const int output_cols = out_backprop.dimension(`2`);
295
296	// Initialize gradient with all zeros.
297	in_backprop.setZero();
298
299	// This is a reference implementation, likely to be slow.
300	// TODO(gpapan): Write multi-threaded implementation.
301	// In the case of multiple argmax branches, we only back-propagate along the
302	// last branch, i.e., the one with largest value of `h filter_cols + w`,*
303	// similarly to the max-pooling backward routines.
304	for (int b = `0`; b < batch; ++b) {
305	for (int h_out = `0`; h_out < output_rows; ++h_out) {
306	int h_beg = h_out * stride_rows - pad_top;
307	for (int w_out = `0`; w_out < output_cols; ++w_out) {
308	int w_beg = w_out * stride_cols - pad_left;
309	for (int d = `0`; d < depth; ++d) {
310	T cur_val = Eigen::NumTraits<T>::lowest();
311	int h_in_max = (h_beg < `0`) ? `0` : h_beg;
312	int w_in_max = (w_beg < `0`) ? `0` : w_beg;
313	for (int h = `0`; h < filter_rows; ++h) {
314	const int h_in = h_beg + h * rate_rows;
315	if (h_in >= `0` && h_in < input_rows) {
316	for (int w = `0`; w < filter_cols; ++w) {
317	const int w_in = w_beg + w * rate_cols;
318	if (w_in >= `0` && w_in < input_cols) {
319	const T val = input(b, h_in, w_in, d) + filter(h, w, d);
320	if (val > cur_val) {
321	cur_val = val;
322	h_in_max = h_in;
323	w_in_max = w_in;
324	}
325	}
326	}
327	}
328	}
329	if (h_in_max < input_rows && w_in_max < input_cols) {
330	in_backprop(b, h_in_max, w_in_max, d) +=
331	out_backprop(b, h_out, w_out, d);
332	}
333	}
334	}
335	}
336	}
337	}
338	};
339	} // namespace functor
340
341	template <typename Device, typename T>
342	class DilationBackpropFilterOp : public OpKernel {
343	public:
344	explicit DilationBackpropFilterOp(OpKernelConstruction* context)
345	: OpKernel(context) {
346	ParseAttributes(context, &strides_, &rates_, &padding_);
347	}
348
349	void Compute(OpKernelContext* context) override {
350	if (std::is_same<Device, GPUDevice>::value) {
351	OP_REQUIRES(context, !tensorflow::OpDeterminismRequired(),
352	errors::Unimplemented("Determinism is not yet supported "
353	"for Dilation2DBackpropFilter."));
354	}
355	const Tensor& input = context->input(`0`);
356	const Tensor& filter = context->input(`1`);
357	const Tensor& out_backprop = context->input(`2`);
358
359	// Determine relevant sizes from input and filters.
360	int stride_rows = `0`, stride_cols = `0`;
361	int rate_rows = `0`, rate_cols = `0`;
362	int64_t pad_top = `0`, pad_left = `0`;
363	int64_t out_rows = `0`, out_cols = `0`;
364	ParseSizes(context, strides_, rates_, padding_, &stride_rows, &stride_cols,
365	&rate_rows, &rate_cols, &pad_top, &pad_left, &out_rows,
366	&out_cols);
367	if (!context->status().ok()) return;
368
369	// Verify that the incoming gradient tensor has the expected size
370	// [ batch, out_rows, out_cols, depth ]
371	const int batch = input.dim_size(`0`);
372	const int depth = input.dim_size(`3`);
373	OP_REQUIRES(context,
374	batch == out_backprop.dim_size(`0`) &&
375	out_rows == out_backprop.dim_size(`1`) &&
376	out_cols == out_backprop.dim_size(`2`) &&
377	depth == out_backprop.dim_size(`3`),
378	errors::InvalidArgument("out_backprop has incompatible size."));
379
380	// The computed filter_backprop has the same dimensions as the filter:
381	// [ batch, input_rows, input_cols, depth ]
382	Tensor* filter_backprop = nullptr;
383	OP_REQUIRES_OK(
384	context, context->allocate_output(`0`, filter.shape(), &filter_backprop));
385
386	// If there is nothing to compute, return.
387	if (filter.shape().num_elements() == `0`) {
388	return;
389	}
390
391	functor::DilationBackpropFilter<Device, T>()(
392	context->eigen_device<Device>(), input.tensor<T, `4`>(),
393	filter.tensor<T, `3`>(), out_backprop.tensor<T, `4`>(), stride_rows,
394	stride_cols, rate_rows, rate_cols, pad_top, pad_left,
395	filter_backprop->tensor<T, `3`>());
396	}
397
398	std::vector<int32> strides_;
399	std::vector<int32> rates_;
400	Padding padding_;
401	};
402
403	// Partial specialization of DilationBackpropFilter functor for a CPUDevice.
404	namespace functor {
405	template <typename T>
406	struct DilationBackpropFilter<CPUDevice, T> {
407	void operator()(const CPUDevice& d, typename TTypes<T, `4`>::ConstTensor input,
408	typename TTypes<T, `3`>::ConstTensor filter,
409	typename TTypes<T, `4`>::ConstTensor out_backprop,
410	int stride_rows, int stride_cols, int rate_rows,
411	int rate_cols, int pad_top, int pad_left,
412	typename TTypes<T, `3`>::Tensor filter_backprop) {
413	const int batch = input.dimension(`0`);
414	const int input_rows = input.dimension(`1`);
415	const int input_cols = input.dimension(`2`);
416	const int depth = input.dimension(`3`);
417
418	const int filter_rows = filter.dimension(`0`);
419	const int filter_cols = filter.dimension(`1`);
420
421	const int output_rows = out_backprop.dimension(`1`);
422	const int output_cols = out_backprop.dimension(`2`);
423
424	// Initialize gradient with all zeros.
425	filter_backprop.setZero();
426
427	// This is a reference implementation, likely to be slow.
428	// TODO(gpapan): Write multi-threaded implementation.
429	// In the case of multiple argmax branches, we only back-propagate along the
430	// last branch, i.e., the one with largest value of `h filter_cols + w`,*
431	// similarly to the max-pooling backward routines.
432	for (int b = `0`; b < batch; ++b) {
433	for (int h_out = `0`; h_out < output_rows; ++h_out) {
434	int h_beg = h_out * stride_rows - pad_top;
435	for (int w_out = `0`; w_out < output_cols; ++w_out) {
436	int w_beg = w_out * stride_cols - pad_left;
437	for (int d = `0`; d < depth; ++d) {
438	T cur_val = Eigen::NumTraits<T>::lowest();
439	int h_max = `0`;
440	int w_max = `0`;
441	for (int h = `0`; h < filter_rows; ++h) {
442	const int h_in = h_beg + h * rate_rows;
443	if (h_in >= `0` && h_in < input_rows) {
444	for (int w = `0`; w < filter_cols; ++w) {
445	const int w_in = w_beg + w * rate_cols;
446	if (w_in >= `0` && w_in < input_cols) {
447	const T val = input(b, h_in, w_in, d) + filter(h, w, d);
448	if (val > cur_val) {
449	cur_val = val;
450	h_max = h;
451	w_max = w;
452	}
453	}
454	}
455	}
456	}
457	if (h_max < filter_rows && w_max < filter_cols) {
458	filter_backprop(h_max, w_max, d) +=
459	out_backprop(b, h_out, w_out, d);
460	}
461	}
462	}
463	}
464	}
465	}
466	};
467	} // namespace functor
468
469	#define REGISTER(T) \
470	REGISTER_KERNEL_BUILDER( \
471	Name("Dilation2D").Device(DEVICE_CPU).TypeConstraint<T>("T"), \
472	DilationOp<CPUDevice, T>); \
473	\
474	REGISTER_KERNEL_BUILDER(Name("Dilation2DBackpropInput") \
475	.Device(DEVICE_CPU) \
476	.TypeConstraint<T>("T"), \
477	DilationBackpropInputOp<CPUDevice, T>); \
478	\
479	REGISTER_KERNEL_BUILDER(Name("Dilation2DBackpropFilter") \
480	.Device(DEVICE_CPU) \
481	.TypeConstraint<T>("T"), \
482	DilationBackpropFilterOp<CPUDevice, T>);
483
484	TF_CALL_REAL_NUMBER_TYPES(REGISTER);
485
486	#undef REGISTER
487
488	#if GOOGLE_CUDA \|\| TENSORFLOW_USE_ROCM
489
490	#define REGISTER(T) \
491	REGISTER_KERNEL_BUILDER( \
492	Name("Dilation2D").Device(DEVICE_GPU).TypeConstraint<T>("T"), \
493	DilationOp<GPUDevice, T>); \
494	\
495	REGISTER_KERNEL_BUILDER(Name("Dilation2DBackpropInput") \
496	.Device(DEVICE_GPU) \
497	.TypeConstraint<T>("T"), \
498	DilationBackpropInputOp<GPUDevice, T>); \
499	\
500	REGISTER_KERNEL_BUILDER(Name("Dilation2DBackpropFilter") \
501	.Device(DEVICE_GPU) \
502	.TypeConstraint<T>("T"), \
503	DilationBackpropFilterOp<GPUDevice, T>);
504
505	TF_CALL_GPU_NUMBER_TYPES(REGISTER);
506
507	#undef REGISTER
508
509	#endif // GOOGLE_CUDA \|\| TENSORFLOW_USE_ROCM
510
511	} // namespace tensorflow
512

Browse the source code of tensorflow/tensorflow/core/kernels/dilation_ops.cc