1 | /* Copyright 2016 The TensorFlow Authors. All Rights Reserved. |
2 | |
3 | Licensed under the Apache License, Version 2.0 (the "License"); |
4 | you may not use this file except in compliance with the License. |
5 | You may obtain a copy of the License at |
6 | |
7 | http://www.apache.org/licenses/LICENSE-2.0 |
8 | |
9 | Unless required by applicable law or agreed to in writing, software |
10 | distributed under the License is distributed on an "AS IS" BASIS, |
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | See the License for the specific language governing permissions and |
13 | limitations under the License. |
14 | ==============================================================================*/ |
15 | |
16 | #define EIGEN_USE_THREADS |
17 | |
18 | #include <algorithm> |
19 | #include <cmath> |
20 | #include <random> |
21 | #include <vector> |
22 | |
23 | #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" |
24 | #include "tensorflow/core/framework/numeric_op.h" |
25 | #include "tensorflow/core/framework/op_kernel.h" |
26 | #include "tensorflow/core/kernels/fractional_pool_common.h" |
27 | #include "tensorflow/core/lib/random/random.h" |
28 | #include "tensorflow/core/platform/logging.h" |
29 | #include "tensorflow/core/platform/mutex.h" |
30 | #include "tensorflow/core/util/guarded_philox_random.h" |
31 | #include "tensorflow/core/util/overflow.h" |
32 | |
33 | namespace tensorflow { |
34 | typedef Eigen::ThreadPoolDevice CPUDevice; |
35 | |
36 | template <typename T> |
37 | class FractionalAvgPoolOp : public OpKernel { |
38 | public: |
39 | explicit FractionalAvgPoolOp(OpKernelConstruction* context) |
40 | : OpKernel(context) { |
41 | OP_REQUIRES_OK(context, context->GetAttr("pooling_ratio" , &pooling_ratio_)); |
42 | OP_REQUIRES_OK(context, context->GetAttr("pseudo_random" , &pseudo_random_)); |
43 | OP_REQUIRES_OK(context, context->GetAttr("overlapping" , &overlapping_)); |
44 | OP_REQUIRES(context, pooling_ratio_.size() == 4, |
45 | errors::InvalidArgument( |
46 | "pooling_ratio field must specify 4 dimensions" )); |
47 | OP_REQUIRES( |
48 | context, pooling_ratio_[0] == 1 || pooling_ratio_[3] == 1, |
49 | errors::Unimplemented("Fractional average pooling is not yet " |
50 | "supported on the batch nor channel dimension." )); |
51 | OP_REQUIRES_OK(context, context->GetAttr("deterministic" , &deterministic_)); |
52 | OP_REQUIRES_OK(context, context->GetAttr("seed" , &seed_)); |
53 | OP_REQUIRES_OK(context, context->GetAttr("seed2" , &seed2_)); |
54 | if (deterministic_) { |
55 | // If both seeds are not set when deterministic_ is true, force set seeds. |
56 | if ((seed_ == 0) && (seed2_ == 0)) { |
57 | seed_ = random::New64(); |
58 | seed2_ = random::New64(); |
59 | } |
60 | } else { |
61 | OP_REQUIRES( |
62 | context, (seed_ == 0) && (seed2_ == 0), |
63 | errors::InvalidArgument( |
64 | "Both seed and seed2 should be 0 if deterministic is false." )); |
65 | } |
66 | } |
67 | |
68 | void Compute(OpKernelContext* context) override { |
69 | typedef Eigen::Map<const Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic>> |
70 | ConstEigenMatrixMap; |
71 | typedef Eigen::Map<Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic>> |
72 | EigenMatrixMap; |
73 | |
74 | constexpr int tensor_in_and_out_dims = 4; |
75 | |
76 | const Tensor& tensor_in = context->input(0); |
77 | OP_REQUIRES(context, tensor_in.dims() == tensor_in_and_out_dims, |
78 | errors::InvalidArgument("tensor_in must be 4-dimensional" )); |
79 | |
80 | std::vector<int> input_size(tensor_in_and_out_dims); |
81 | std::vector<int> output_size(tensor_in_and_out_dims); |
82 | for (int i = 0; i < tensor_in_and_out_dims; ++i) { |
83 | input_size[i] = tensor_in.dim_size(i); |
84 | OP_REQUIRES( |
85 | context, pooling_ratio_[i] <= input_size[i], |
86 | errors::InvalidArgument( |
87 | "Pooling ratio cannot be bigger than input tensor dim size." )); |
88 | } |
89 | // Output size. |
90 | for (int i = 0; i < tensor_in_and_out_dims; ++i) { |
91 | output_size[i] = |
92 | static_cast<int>(std::floor(input_size[i] / pooling_ratio_[i])); |
93 | DCHECK_GT(output_size[i], 0); |
94 | } |
95 | |
96 | // Generate pooling sequence. |
97 | std::vector<int64_t> row_cum_seq; |
98 | std::vector<int64_t> col_cum_seq; |
99 | GuardedPhiloxRandom generator; |
100 | generator.Init(seed_, seed2_); |
101 | row_cum_seq = GeneratePoolingSequence(input_size[1], output_size[1], |
102 | &generator, pseudo_random_); |
103 | col_cum_seq = GeneratePoolingSequence(input_size[2], output_size[2], |
104 | &generator, pseudo_random_); |
105 | |
106 | // Prepare output. |
107 | Tensor* output_tensor = nullptr; |
108 | OP_REQUIRES_OK(context, context->allocate_output( |
109 | 0, |
110 | TensorShape({output_size[0], output_size[1], |
111 | output_size[2], output_size[3]}), |
112 | &output_tensor)); |
113 | Tensor* output_row_seq_tensor = nullptr; |
114 | OP_REQUIRES_OK( |
115 | context, context->allocate_output( |
116 | 1, TensorShape({static_cast<int64_t>(row_cum_seq.size())}), |
117 | &output_row_seq_tensor)); |
118 | Tensor* output_col_seq_tensor = nullptr; |
119 | OP_REQUIRES_OK( |
120 | context, context->allocate_output( |
121 | 2, TensorShape({static_cast<int64_t>(col_cum_seq.size())}), |
122 | &output_col_seq_tensor)); |
123 | |
124 | ConstEigenMatrixMap in_mat(tensor_in.flat<T>().data(), input_size[3], |
125 | input_size[2] * input_size[1] * input_size[0]); |
126 | |
127 | EigenMatrixMap out_mat(output_tensor->flat<T>().data(), output_size[3], |
128 | output_size[2] * output_size[1] * output_size[0]); |
129 | // out_count corresponds to number of elements in each pooling cell. |
130 | Eigen::Matrix<T, Eigen::Dynamic, 1> out_count(out_mat.cols()); |
131 | |
132 | // Initializes the output tensor and out_count with 0. |
133 | out_mat.setZero(); |
134 | out_count.setZero(); |
135 | |
136 | auto output_row_seq_flat = output_row_seq_tensor->flat<int64_t>(); |
137 | auto output_col_seq_flat = output_col_seq_tensor->flat<int64_t>(); |
138 | |
139 | // Set output tensors. |
140 | for (int i = 0; i < row_cum_seq.size(); ++i) { |
141 | output_row_seq_flat(i) = row_cum_seq[i]; |
142 | } |
143 | |
144 | for (int i = 0; i < col_cum_seq.size(); ++i) { |
145 | output_col_seq_flat(i) = col_cum_seq[i]; |
146 | } |
147 | |
148 | // For both input and output, |
149 | // 0: batch |
150 | // 1: row / row |
151 | // 2: col / col |
152 | // 3: depth / channel |
153 | const int64_t row_max = input_size[1] - 1; |
154 | const int64_t col_max = input_size[2] - 1; |
155 | for (int64_t b = 0; b < input_size[0]; ++b) { |
156 | // row sequence. |
157 | for (int64_t hs = 0; hs < row_cum_seq.size() - 1; ++hs) { |
158 | // row start and end. |
159 | const int64_t row_start = row_cum_seq[hs]; |
160 | int64_t row_end = |
161 | overlapping_ ? row_cum_seq[hs + 1] : row_cum_seq[hs + 1] - 1; |
162 | row_end = std::min(row_end, row_max); |
163 | |
164 | // col sequence. |
165 | for (int64_t ws = 0; ws < col_cum_seq.size() - 1; ++ws) { |
166 | const int64_t out_offset = |
167 | (b * output_size[1] + hs) * output_size[2] + ws; |
168 | // col start and end. |
169 | const int64_t col_start = col_cum_seq[ws]; |
170 | int64_t col_end = |
171 | overlapping_ ? col_cum_seq[ws + 1] : col_cum_seq[ws + 1] - 1; |
172 | col_end = std::min(col_end, col_max); |
173 | for (int64_t h = row_start; h <= row_end; ++h) { |
174 | for (int64_t w = col_start; w <= col_end; ++w) { |
175 | const int64_t in_offset = |
176 | (b * input_size[1] + h) * input_size[2] + w; |
177 | out_mat.col(out_offset) += in_mat.col(in_offset); |
178 | out_count(out_offset)++; |
179 | } |
180 | } |
181 | } |
182 | } |
183 | } |
184 | DCHECK_GT(out_count.minCoeff(), 0); |
185 | out_mat.array().rowwise() /= out_count.transpose().array(); |
186 | } |
187 | |
188 | private: |
189 | bool deterministic_; |
190 | int64_t seed_; |
191 | int64_t seed2_; |
192 | std::vector<float> pooling_ratio_; |
193 | bool pseudo_random_; |
194 | bool overlapping_; |
195 | }; |
196 | |
197 | #define REGISTER_FRACTIONALAVGPOOL(type) \ |
198 | REGISTER_KERNEL_BUILDER( \ |
199 | Name("FractionalAvgPool").Device(DEVICE_CPU).TypeConstraint<type>("T"), \ |
200 | FractionalAvgPoolOp<type>) |
201 | |
202 | REGISTER_FRACTIONALAVGPOOL(int32); |
203 | REGISTER_FRACTIONALAVGPOOL(int64_t); |
204 | REGISTER_FRACTIONALAVGPOOL(float); |
205 | REGISTER_FRACTIONALAVGPOOL(double); |
206 | |
207 | #undef REGISTER_FRACTIONALAVGPOOL |
208 | |
209 | template <class T> |
210 | class FractionalAvgPoolGradOp : public OpKernel { |
211 | public: |
212 | explicit FractionalAvgPoolGradOp(OpKernelConstruction* context) |
213 | : OpKernel(context) { |
214 | OP_REQUIRES_OK(context, context->GetAttr("overlapping" , &overlapping_)); |
215 | } |
216 | |
217 | void Compute(OpKernelContext* context) override { |
218 | // Here's the basic idea: |
219 | // Batch and depth dimension are independent from row and col dimension. And |
220 | // because FractionalAvgPool currently only support pooling along row and |
221 | // col, we can basically think of this 4D tensor backpropagation as |
222 | // operation of a series of 2D planes. |
223 | // |
224 | // For each element of a 'slice' (2D plane) of output_backprop, we need to |
225 | // figure out its contributors when doing FractionalAvgPool operation. This |
226 | // can be done based on row_pooling_sequence, col_pooling_seq and |
227 | // overlapping. |
228 | // Once we figure out the original contributors, we just need to evenly |
229 | // divide the value of this element among these contributors. |
230 | // |
231 | // Internally, we divide the out_backprop tensor and store it in a temporary |
232 | // tensor of double type. And cast it to the corresponding type. |
233 | typedef Eigen::Map<const Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic>> |
234 | ConstEigenMatrixMap; |
235 | typedef Eigen::Map<Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic>> |
236 | EigenDoubleMatrixMap; |
237 | |
238 | // Grab the inputs. |
239 | const Tensor& orig_input_tensor_shape = context->input(0); |
240 | OP_REQUIRES(context, |
241 | orig_input_tensor_shape.dims() == 1 && |
242 | orig_input_tensor_shape.NumElements() == 4, |
243 | errors::InvalidArgument("original input tensor shape must be" |
244 | "1-dimensional and 4 elements" )); |
245 | int64_t num_elements = 1; |
246 | for (int i = 0; i < orig_input_tensor_shape.dims(); i++) { |
247 | OP_REQUIRES(context, orig_input_tensor_shape.dim_size(i) > 0, |
248 | errors::InvalidArgument( |
249 | "orig_input_tensor_shape must be positive, got: " , |
250 | orig_input_tensor_shape.dim_size(i))); |
251 | num_elements = MultiplyWithoutOverflow( |
252 | num_elements, orig_input_tensor_shape.dim_size(i)); |
253 | OP_REQUIRES( |
254 | context, num_elements > 0, |
255 | errors::InvalidArgument( |
256 | "The total elements specified by orig_input_tensor_shape" , |
257 | " is too large. Encountered overflow after multiplying " , |
258 | orig_input_tensor_shape.dim_size(i), ", result: " , num_elements)); |
259 | } |
260 | |
261 | const Tensor& out_backprop = context->input(1); |
262 | OP_REQUIRES(context, out_backprop.dims() == 4, |
263 | errors::InvalidArgument("out_backprop must be 4-dimensional" )); |
264 | for (int i = 0; i < out_backprop.dims(); i++) { |
265 | OP_REQUIRES(context, out_backprop.dim_size(i) > 0, |
266 | errors::InvalidArgument( |
267 | "out_backprop must be positive for all dimension, got:" , |
268 | out_backprop.dim_size(i))); |
269 | } |
270 | |
271 | const Tensor& row_seq_tensor = context->input(2); |
272 | const Tensor& col_seq_tensor = context->input(3); |
273 | |
274 | const int64_t out_batch = out_backprop.dim_size(0); |
275 | const int64_t out_rows = out_backprop.dim_size(1); |
276 | const int64_t out_cols = out_backprop.dim_size(2); |
277 | const int64_t out_depth = out_backprop.dim_size(3); |
278 | |
279 | OP_REQUIRES(context, row_seq_tensor.NumElements() > out_rows, |
280 | errors::InvalidArgument("Given out_backprop shape " , |
281 | out_backprop.shape().DebugString(), |
282 | ", row_seq_tensor must have at least " , |
283 | out_rows + 1, " elements, but got " , |
284 | row_seq_tensor.NumElements())); |
285 | OP_REQUIRES(context, col_seq_tensor.NumElements() > out_cols, |
286 | errors::InvalidArgument("Given out_backprop shape " , |
287 | out_backprop.shape().DebugString(), |
288 | ", col_seq_tensor must have at least " , |
289 | out_cols + 1, " elements, but got " , |
290 | col_seq_tensor.NumElements())); |
291 | |
292 | auto row_seq_tensor_flat = row_seq_tensor.flat<int64_t>(); |
293 | auto col_seq_tensor_flat = col_seq_tensor.flat<int64_t>(); |
294 | auto orig_input_tensor_shape_flat = orig_input_tensor_shape.flat<int64_t>(); |
295 | |
296 | const int64_t in_batch = orig_input_tensor_shape_flat(0); |
297 | const int64_t in_rows = orig_input_tensor_shape_flat(1); |
298 | const int64_t in_cols = orig_input_tensor_shape_flat(2); |
299 | const int64_t in_depth = orig_input_tensor_shape_flat(3); |
300 | OP_REQUIRES( |
301 | context, in_batch != 0, |
302 | errors::InvalidArgument("Batch dimension of input must not be 0" )); |
303 | OP_REQUIRES( |
304 | context, in_rows != 0, |
305 | errors::InvalidArgument("Rows dimension of input must not be 0" )); |
306 | OP_REQUIRES( |
307 | context, in_cols != 0, |
308 | errors::InvalidArgument("Columns dimension of input must not be 0" )); |
309 | OP_REQUIRES( |
310 | context, in_depth != 0, |
311 | errors::InvalidArgument("Depth dimension of input must not be 0" )); |
312 | |
313 | constexpr int tensor_in_and_out_dims = 4; |
314 | // Transform orig_input_tensor_shape into TensorShape |
315 | TensorShape in_shape; |
316 | for (auto i = 0; i < tensor_in_and_out_dims; ++i) { |
317 | in_shape.AddDim(orig_input_tensor_shape_flat(i)); |
318 | } |
319 | |
320 | // Create intermediate in_backprop. |
321 | Tensor in_backprop_tensor_temp; |
322 | OP_REQUIRES_OK(context, context->forward_input_or_allocate_temp( |
323 | {0}, DataTypeToEnum<double>::v(), in_shape, |
324 | &in_backprop_tensor_temp)); |
325 | in_backprop_tensor_temp.flat<double>().setZero(); |
326 | // Transform 4D tensor to 2D matrix. |
327 | EigenDoubleMatrixMap in_backprop_tensor_temp_mat( |
328 | in_backprop_tensor_temp.flat<double>().data(), in_depth, |
329 | in_cols * in_rows * in_batch); |
330 | ConstEigenMatrixMap out_backprop_mat(out_backprop.flat<T>().data(), |
331 | out_depth, |
332 | out_cols * out_rows * out_batch); |
333 | // Loop through each element of out_backprop and evenly distribute the |
334 | // element to the corresponding pooling cell. |
335 | const int64_t in_max_row_index = in_rows - 1; |
336 | const int64_t in_max_col_index = in_cols - 1; |
337 | for (int64_t b = 0; b < out_batch; ++b) { |
338 | for (int64_t r = 0; r < out_rows; ++r) { |
339 | const int64_t in_row_start = row_seq_tensor_flat(r); |
340 | |
341 | int64_t in_row_end = overlapping_ ? row_seq_tensor_flat(r + 1) |
342 | : row_seq_tensor_flat(r + 1) - 1; |
343 | in_row_end = std::min(in_row_end, in_max_row_index); |
344 | OP_REQUIRES(context, in_row_start >= 0 && in_row_end >= 0, |
345 | errors::InvalidArgument( |
346 | "Row sequence tensor values must not be negative, got " , |
347 | row_seq_tensor_flat)); |
348 | |
349 | for (int64_t c = 0; c < out_cols; ++c) { |
350 | const int64_t in_col_start = col_seq_tensor_flat(c); |
351 | int64_t in_col_end = overlapping_ ? col_seq_tensor_flat(c + 1) |
352 | : col_seq_tensor_flat(c + 1) - 1; |
353 | in_col_end = std::min(in_col_end, in_max_col_index); |
354 | |
355 | OP_REQUIRES( |
356 | context, in_col_start >= 0 && in_col_end >= 0, |
357 | errors::InvalidArgument( |
358 | "Column sequence tensor values must not be negative, got " , |
359 | col_seq_tensor_flat)); |
360 | const int64_t num_elements_in_pooling_cell = |
361 | (in_row_end - in_row_start + 1) * (in_col_end - in_col_start + 1); |
362 | const int64_t out_index = (b * out_rows + r) * out_cols + c; |
363 | // Now we can evenly distribute out_backprop(b, h, w, *) to |
364 | // in_backprop(b, hs:he, ws:we, *). |
365 | for (int64_t in_r = in_row_start; in_r <= in_row_end; ++in_r) { |
366 | for (int64_t in_c = in_col_start; in_c <= in_col_end; ++in_c) { |
367 | const int64_t in_index = (b * in_rows + in_r) * in_cols + in_c; |
368 | // Walk through each channel (depth). |
369 | for (int64_t d = 0; d < out_depth; ++d) { |
370 | const double out_backprop_element = static_cast<double>( |
371 | out_backprop_mat.coeffRef(d, out_index)); |
372 | double& in_backprop_ref = |
373 | in_backprop_tensor_temp_mat.coeffRef(d, in_index); |
374 | in_backprop_ref += |
375 | out_backprop_element / num_elements_in_pooling_cell; |
376 | } |
377 | } |
378 | } |
379 | } |
380 | } |
381 | } |
382 | |
383 | // Depending on the type, cast double to type T. |
384 | Tensor* in_backprop_tensor = nullptr; |
385 | OP_REQUIRES_OK(context, context->forward_input_or_allocate_output( |
386 | {0}, 0, in_shape, &in_backprop_tensor)); |
387 | auto in_backprop_tensor_flat = in_backprop_tensor->flat<T>(); |
388 | auto in_backprop_tensor_temp_flat = in_backprop_tensor_temp.flat<double>(); |
389 | for (int64_t i = 0; i < in_backprop_tensor_flat.size(); ++i) { |
390 | in_backprop_tensor_flat(i) = |
391 | static_cast<T>(in_backprop_tensor_temp_flat(i)); |
392 | } |
393 | } |
394 | |
395 | private: |
396 | bool overlapping_; |
397 | }; |
398 | |
399 | #define REGISTER_FRACTIONALAVGPOOLGRAD(type) \ |
400 | REGISTER_KERNEL_BUILDER(Name("FractionalAvgPoolGrad") \ |
401 | .Device(DEVICE_CPU) \ |
402 | .TypeConstraint<type>("T"), \ |
403 | FractionalAvgPoolGradOp<type>) |
404 | |
405 | REGISTER_FRACTIONALAVGPOOLGRAD(int32); |
406 | REGISTER_FRACTIONALAVGPOOLGRAD(int64_t); |
407 | REGISTER_FRACTIONALAVGPOOLGRAD(float); |
408 | REGISTER_FRACTIONALAVGPOOLGRAD(double); |
409 | |
410 | #undef REGISTER_FRACTIONALAVGPOOLGRAD |
411 | } // namespace tensorflow |
412 | |