1/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
2
3Licensed under the Apache License, Version 2.0 (the "License");
4you may not use this file except in compliance with the License.
5You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9Unless required by applicable law or agreed to in writing, software
10distributed under the License is distributed on an "AS IS" BASIS,
11WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12See the License for the specific language governing permissions and
13limitations under the License.
14==============================================================================*/
15
16#define EIGEN_USE_THREADS
17
18#include <algorithm>
19#include <cmath>
20#include <random>
21#include <vector>
22
23#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
24#include "tensorflow/core/framework/numeric_op.h"
25#include "tensorflow/core/framework/op_kernel.h"
26#include "tensorflow/core/kernels/fractional_pool_common.h"
27#include "tensorflow/core/lib/random/random.h"
28#include "tensorflow/core/platform/logging.h"
29#include "tensorflow/core/platform/mutex.h"
30#include "tensorflow/core/util/guarded_philox_random.h"
31#include "tensorflow/core/util/overflow.h"
32
33namespace tensorflow {
34typedef Eigen::ThreadPoolDevice CPUDevice;
35
36template <typename T>
37class FractionalAvgPoolOp : public OpKernel {
38 public:
39 explicit FractionalAvgPoolOp(OpKernelConstruction* context)
40 : OpKernel(context) {
41 OP_REQUIRES_OK(context, context->GetAttr("pooling_ratio", &pooling_ratio_));
42 OP_REQUIRES_OK(context, context->GetAttr("pseudo_random", &pseudo_random_));
43 OP_REQUIRES_OK(context, context->GetAttr("overlapping", &overlapping_));
44 OP_REQUIRES(context, pooling_ratio_.size() == 4,
45 errors::InvalidArgument(
46 "pooling_ratio field must specify 4 dimensions"));
47 OP_REQUIRES(
48 context, pooling_ratio_[0] == 1 || pooling_ratio_[3] == 1,
49 errors::Unimplemented("Fractional average pooling is not yet "
50 "supported on the batch nor channel dimension."));
51 OP_REQUIRES_OK(context, context->GetAttr("deterministic", &deterministic_));
52 OP_REQUIRES_OK(context, context->GetAttr("seed", &seed_));
53 OP_REQUIRES_OK(context, context->GetAttr("seed2", &seed2_));
54 if (deterministic_) {
55 // If both seeds are not set when deterministic_ is true, force set seeds.
56 if ((seed_ == 0) && (seed2_ == 0)) {
57 seed_ = random::New64();
58 seed2_ = random::New64();
59 }
60 } else {
61 OP_REQUIRES(
62 context, (seed_ == 0) && (seed2_ == 0),
63 errors::InvalidArgument(
64 "Both seed and seed2 should be 0 if deterministic is false."));
65 }
66 }
67
68 void Compute(OpKernelContext* context) override {
69 typedef Eigen::Map<const Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic>>
70 ConstEigenMatrixMap;
71 typedef Eigen::Map<Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic>>
72 EigenMatrixMap;
73
74 constexpr int tensor_in_and_out_dims = 4;
75
76 const Tensor& tensor_in = context->input(0);
77 OP_REQUIRES(context, tensor_in.dims() == tensor_in_and_out_dims,
78 errors::InvalidArgument("tensor_in must be 4-dimensional"));
79
80 std::vector<int> input_size(tensor_in_and_out_dims);
81 std::vector<int> output_size(tensor_in_and_out_dims);
82 for (int i = 0; i < tensor_in_and_out_dims; ++i) {
83 input_size[i] = tensor_in.dim_size(i);
84 OP_REQUIRES(
85 context, pooling_ratio_[i] <= input_size[i],
86 errors::InvalidArgument(
87 "Pooling ratio cannot be bigger than input tensor dim size."));
88 }
89 // Output size.
90 for (int i = 0; i < tensor_in_and_out_dims; ++i) {
91 output_size[i] =
92 static_cast<int>(std::floor(input_size[i] / pooling_ratio_[i]));
93 DCHECK_GT(output_size[i], 0);
94 }
95
96 // Generate pooling sequence.
97 std::vector<int64_t> row_cum_seq;
98 std::vector<int64_t> col_cum_seq;
99 GuardedPhiloxRandom generator;
100 generator.Init(seed_, seed2_);
101 row_cum_seq = GeneratePoolingSequence(input_size[1], output_size[1],
102 &generator, pseudo_random_);
103 col_cum_seq = GeneratePoolingSequence(input_size[2], output_size[2],
104 &generator, pseudo_random_);
105
106 // Prepare output.
107 Tensor* output_tensor = nullptr;
108 OP_REQUIRES_OK(context, context->allocate_output(
109 0,
110 TensorShape({output_size[0], output_size[1],
111 output_size[2], output_size[3]}),
112 &output_tensor));
113 Tensor* output_row_seq_tensor = nullptr;
114 OP_REQUIRES_OK(
115 context, context->allocate_output(
116 1, TensorShape({static_cast<int64_t>(row_cum_seq.size())}),
117 &output_row_seq_tensor));
118 Tensor* output_col_seq_tensor = nullptr;
119 OP_REQUIRES_OK(
120 context, context->allocate_output(
121 2, TensorShape({static_cast<int64_t>(col_cum_seq.size())}),
122 &output_col_seq_tensor));
123
124 ConstEigenMatrixMap in_mat(tensor_in.flat<T>().data(), input_size[3],
125 input_size[2] * input_size[1] * input_size[0]);
126
127 EigenMatrixMap out_mat(output_tensor->flat<T>().data(), output_size[3],
128 output_size[2] * output_size[1] * output_size[0]);
129 // out_count corresponds to number of elements in each pooling cell.
130 Eigen::Matrix<T, Eigen::Dynamic, 1> out_count(out_mat.cols());
131
132 // Initializes the output tensor and out_count with 0.
133 out_mat.setZero();
134 out_count.setZero();
135
136 auto output_row_seq_flat = output_row_seq_tensor->flat<int64_t>();
137 auto output_col_seq_flat = output_col_seq_tensor->flat<int64_t>();
138
139 // Set output tensors.
140 for (int i = 0; i < row_cum_seq.size(); ++i) {
141 output_row_seq_flat(i) = row_cum_seq[i];
142 }
143
144 for (int i = 0; i < col_cum_seq.size(); ++i) {
145 output_col_seq_flat(i) = col_cum_seq[i];
146 }
147
148 // For both input and output,
149 // 0: batch
150 // 1: row / row
151 // 2: col / col
152 // 3: depth / channel
153 const int64_t row_max = input_size[1] - 1;
154 const int64_t col_max = input_size[2] - 1;
155 for (int64_t b = 0; b < input_size[0]; ++b) {
156 // row sequence.
157 for (int64_t hs = 0; hs < row_cum_seq.size() - 1; ++hs) {
158 // row start and end.
159 const int64_t row_start = row_cum_seq[hs];
160 int64_t row_end =
161 overlapping_ ? row_cum_seq[hs + 1] : row_cum_seq[hs + 1] - 1;
162 row_end = std::min(row_end, row_max);
163
164 // col sequence.
165 for (int64_t ws = 0; ws < col_cum_seq.size() - 1; ++ws) {
166 const int64_t out_offset =
167 (b * output_size[1] + hs) * output_size[2] + ws;
168 // col start and end.
169 const int64_t col_start = col_cum_seq[ws];
170 int64_t col_end =
171 overlapping_ ? col_cum_seq[ws + 1] : col_cum_seq[ws + 1] - 1;
172 col_end = std::min(col_end, col_max);
173 for (int64_t h = row_start; h <= row_end; ++h) {
174 for (int64_t w = col_start; w <= col_end; ++w) {
175 const int64_t in_offset =
176 (b * input_size[1] + h) * input_size[2] + w;
177 out_mat.col(out_offset) += in_mat.col(in_offset);
178 out_count(out_offset)++;
179 }
180 }
181 }
182 }
183 }
184 DCHECK_GT(out_count.minCoeff(), 0);
185 out_mat.array().rowwise() /= out_count.transpose().array();
186 }
187
188 private:
189 bool deterministic_;
190 int64_t seed_;
191 int64_t seed2_;
192 std::vector<float> pooling_ratio_;
193 bool pseudo_random_;
194 bool overlapping_;
195};
196
197#define REGISTER_FRACTIONALAVGPOOL(type) \
198 REGISTER_KERNEL_BUILDER( \
199 Name("FractionalAvgPool").Device(DEVICE_CPU).TypeConstraint<type>("T"), \
200 FractionalAvgPoolOp<type>)
201
202REGISTER_FRACTIONALAVGPOOL(int32);
203REGISTER_FRACTIONALAVGPOOL(int64_t);
204REGISTER_FRACTIONALAVGPOOL(float);
205REGISTER_FRACTIONALAVGPOOL(double);
206
207#undef REGISTER_FRACTIONALAVGPOOL
208
209template <class T>
210class FractionalAvgPoolGradOp : public OpKernel {
211 public:
212 explicit FractionalAvgPoolGradOp(OpKernelConstruction* context)
213 : OpKernel(context) {
214 OP_REQUIRES_OK(context, context->GetAttr("overlapping", &overlapping_));
215 }
216
217 void Compute(OpKernelContext* context) override {
218 // Here's the basic idea:
219 // Batch and depth dimension are independent from row and col dimension. And
220 // because FractionalAvgPool currently only support pooling along row and
221 // col, we can basically think of this 4D tensor backpropagation as
222 // operation of a series of 2D planes.
223 //
224 // For each element of a 'slice' (2D plane) of output_backprop, we need to
225 // figure out its contributors when doing FractionalAvgPool operation. This
226 // can be done based on row_pooling_sequence, col_pooling_seq and
227 // overlapping.
228 // Once we figure out the original contributors, we just need to evenly
229 // divide the value of this element among these contributors.
230 //
231 // Internally, we divide the out_backprop tensor and store it in a temporary
232 // tensor of double type. And cast it to the corresponding type.
233 typedef Eigen::Map<const Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic>>
234 ConstEigenMatrixMap;
235 typedef Eigen::Map<Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic>>
236 EigenDoubleMatrixMap;
237
238 // Grab the inputs.
239 const Tensor& orig_input_tensor_shape = context->input(0);
240 OP_REQUIRES(context,
241 orig_input_tensor_shape.dims() == 1 &&
242 orig_input_tensor_shape.NumElements() == 4,
243 errors::InvalidArgument("original input tensor shape must be"
244 "1-dimensional and 4 elements"));
245 int64_t num_elements = 1;
246 for (int i = 0; i < orig_input_tensor_shape.dims(); i++) {
247 OP_REQUIRES(context, orig_input_tensor_shape.dim_size(i) > 0,
248 errors::InvalidArgument(
249 "orig_input_tensor_shape must be positive, got: ",
250 orig_input_tensor_shape.dim_size(i)));
251 num_elements = MultiplyWithoutOverflow(
252 num_elements, orig_input_tensor_shape.dim_size(i));
253 OP_REQUIRES(
254 context, num_elements > 0,
255 errors::InvalidArgument(
256 "The total elements specified by orig_input_tensor_shape",
257 " is too large. Encountered overflow after multiplying ",
258 orig_input_tensor_shape.dim_size(i), ", result: ", num_elements));
259 }
260
261 const Tensor& out_backprop = context->input(1);
262 OP_REQUIRES(context, out_backprop.dims() == 4,
263 errors::InvalidArgument("out_backprop must be 4-dimensional"));
264 for (int i = 0; i < out_backprop.dims(); i++) {
265 OP_REQUIRES(context, out_backprop.dim_size(i) > 0,
266 errors::InvalidArgument(
267 "out_backprop must be positive for all dimension, got:",
268 out_backprop.dim_size(i)));
269 }
270
271 const Tensor& row_seq_tensor = context->input(2);
272 const Tensor& col_seq_tensor = context->input(3);
273
274 const int64_t out_batch = out_backprop.dim_size(0);
275 const int64_t out_rows = out_backprop.dim_size(1);
276 const int64_t out_cols = out_backprop.dim_size(2);
277 const int64_t out_depth = out_backprop.dim_size(3);
278
279 OP_REQUIRES(context, row_seq_tensor.NumElements() > out_rows,
280 errors::InvalidArgument("Given out_backprop shape ",
281 out_backprop.shape().DebugString(),
282 ", row_seq_tensor must have at least ",
283 out_rows + 1, " elements, but got ",
284 row_seq_tensor.NumElements()));
285 OP_REQUIRES(context, col_seq_tensor.NumElements() > out_cols,
286 errors::InvalidArgument("Given out_backprop shape ",
287 out_backprop.shape().DebugString(),
288 ", col_seq_tensor must have at least ",
289 out_cols + 1, " elements, but got ",
290 col_seq_tensor.NumElements()));
291
292 auto row_seq_tensor_flat = row_seq_tensor.flat<int64_t>();
293 auto col_seq_tensor_flat = col_seq_tensor.flat<int64_t>();
294 auto orig_input_tensor_shape_flat = orig_input_tensor_shape.flat<int64_t>();
295
296 const int64_t in_batch = orig_input_tensor_shape_flat(0);
297 const int64_t in_rows = orig_input_tensor_shape_flat(1);
298 const int64_t in_cols = orig_input_tensor_shape_flat(2);
299 const int64_t in_depth = orig_input_tensor_shape_flat(3);
300 OP_REQUIRES(
301 context, in_batch != 0,
302 errors::InvalidArgument("Batch dimension of input must not be 0"));
303 OP_REQUIRES(
304 context, in_rows != 0,
305 errors::InvalidArgument("Rows dimension of input must not be 0"));
306 OP_REQUIRES(
307 context, in_cols != 0,
308 errors::InvalidArgument("Columns dimension of input must not be 0"));
309 OP_REQUIRES(
310 context, in_depth != 0,
311 errors::InvalidArgument("Depth dimension of input must not be 0"));
312
313 constexpr int tensor_in_and_out_dims = 4;
314 // Transform orig_input_tensor_shape into TensorShape
315 TensorShape in_shape;
316 for (auto i = 0; i < tensor_in_and_out_dims; ++i) {
317 in_shape.AddDim(orig_input_tensor_shape_flat(i));
318 }
319
320 // Create intermediate in_backprop.
321 Tensor in_backprop_tensor_temp;
322 OP_REQUIRES_OK(context, context->forward_input_or_allocate_temp(
323 {0}, DataTypeToEnum<double>::v(), in_shape,
324 &in_backprop_tensor_temp));
325 in_backprop_tensor_temp.flat<double>().setZero();
326 // Transform 4D tensor to 2D matrix.
327 EigenDoubleMatrixMap in_backprop_tensor_temp_mat(
328 in_backprop_tensor_temp.flat<double>().data(), in_depth,
329 in_cols * in_rows * in_batch);
330 ConstEigenMatrixMap out_backprop_mat(out_backprop.flat<T>().data(),
331 out_depth,
332 out_cols * out_rows * out_batch);
333 // Loop through each element of out_backprop and evenly distribute the
334 // element to the corresponding pooling cell.
335 const int64_t in_max_row_index = in_rows - 1;
336 const int64_t in_max_col_index = in_cols - 1;
337 for (int64_t b = 0; b < out_batch; ++b) {
338 for (int64_t r = 0; r < out_rows; ++r) {
339 const int64_t in_row_start = row_seq_tensor_flat(r);
340
341 int64_t in_row_end = overlapping_ ? row_seq_tensor_flat(r + 1)
342 : row_seq_tensor_flat(r + 1) - 1;
343 in_row_end = std::min(in_row_end, in_max_row_index);
344 OP_REQUIRES(context, in_row_start >= 0 && in_row_end >= 0,
345 errors::InvalidArgument(
346 "Row sequence tensor values must not be negative, got ",
347 row_seq_tensor_flat));
348
349 for (int64_t c = 0; c < out_cols; ++c) {
350 const int64_t in_col_start = col_seq_tensor_flat(c);
351 int64_t in_col_end = overlapping_ ? col_seq_tensor_flat(c + 1)
352 : col_seq_tensor_flat(c + 1) - 1;
353 in_col_end = std::min(in_col_end, in_max_col_index);
354
355 OP_REQUIRES(
356 context, in_col_start >= 0 && in_col_end >= 0,
357 errors::InvalidArgument(
358 "Column sequence tensor values must not be negative, got ",
359 col_seq_tensor_flat));
360 const int64_t num_elements_in_pooling_cell =
361 (in_row_end - in_row_start + 1) * (in_col_end - in_col_start + 1);
362 const int64_t out_index = (b * out_rows + r) * out_cols + c;
363 // Now we can evenly distribute out_backprop(b, h, w, *) to
364 // in_backprop(b, hs:he, ws:we, *).
365 for (int64_t in_r = in_row_start; in_r <= in_row_end; ++in_r) {
366 for (int64_t in_c = in_col_start; in_c <= in_col_end; ++in_c) {
367 const int64_t in_index = (b * in_rows + in_r) * in_cols + in_c;
368 // Walk through each channel (depth).
369 for (int64_t d = 0; d < out_depth; ++d) {
370 const double out_backprop_element = static_cast<double>(
371 out_backprop_mat.coeffRef(d, out_index));
372 double& in_backprop_ref =
373 in_backprop_tensor_temp_mat.coeffRef(d, in_index);
374 in_backprop_ref +=
375 out_backprop_element / num_elements_in_pooling_cell;
376 }
377 }
378 }
379 }
380 }
381 }
382
383 // Depending on the type, cast double to type T.
384 Tensor* in_backprop_tensor = nullptr;
385 OP_REQUIRES_OK(context, context->forward_input_or_allocate_output(
386 {0}, 0, in_shape, &in_backprop_tensor));
387 auto in_backprop_tensor_flat = in_backprop_tensor->flat<T>();
388 auto in_backprop_tensor_temp_flat = in_backprop_tensor_temp.flat<double>();
389 for (int64_t i = 0; i < in_backprop_tensor_flat.size(); ++i) {
390 in_backprop_tensor_flat(i) =
391 static_cast<T>(in_backprop_tensor_temp_flat(i));
392 }
393 }
394
395 private:
396 bool overlapping_;
397};
398
399#define REGISTER_FRACTIONALAVGPOOLGRAD(type) \
400 REGISTER_KERNEL_BUILDER(Name("FractionalAvgPoolGrad") \
401 .Device(DEVICE_CPU) \
402 .TypeConstraint<type>("T"), \
403 FractionalAvgPoolGradOp<type>)
404
405REGISTER_FRACTIONALAVGPOOLGRAD(int32);
406REGISTER_FRACTIONALAVGPOOLGRAD(int64_t);
407REGISTER_FRACTIONALAVGPOOLGRAD(float);
408REGISTER_FRACTIONALAVGPOOLGRAD(double);
409
410#undef REGISTER_FRACTIONALAVGPOOLGRAD
411} // namespace tensorflow
412