1 | /* Copyright 2015 The TensorFlow Authors. All Rights Reserved. |
2 | |
3 | Licensed under the Apache License, Version 2.0 (the "License"); |
4 | you may not use this file except in compliance with the License. |
5 | You may obtain a copy of the License at |
6 | |
7 | http://www.apache.org/licenses/LICENSE-2.0 |
8 | |
9 | Unless required by applicable law or agreed to in writing, software |
10 | distributed under the License is distributed on an "AS IS" BASIS, |
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | See the License for the specific language governing permissions and |
13 | limitations under the License. |
14 | ==============================================================================*/ |
15 | |
16 | #ifndef TENSORFLOW_CORE_KERNELS_EIGEN_POOLING_H_ |
17 | #define TENSORFLOW_CORE_KERNELS_EIGEN_POOLING_H_ |
18 | |
19 | #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" |
20 | |
21 | namespace Eigen { |
22 | |
23 | /** SpatialMaxPooling |
24 | * \ingroup CXX11_NeuralNetworks_Module |
25 | * |
26 | * \brief Applies a max-pooling over a multichannel input image. |
27 | * |
28 | * The input parameter is expected to be a with a rank of 4 (channels, height, |
29 | * width, others in col-major, and the reverse of that in row-major). |
30 | * |
31 | * The result can be assigned to a tensor of rank equal to the rank of the |
32 | * input. The dimensions of the result will be channels, height, width, and |
33 | * others (in col-major, and the reverse of that if the input was row-major). |
34 | * |
35 | * The order of the width and height dimensions can be swapped if needed. |
36 | * |
37 | */ |
38 | template <typename Input> |
39 | EIGEN_ALWAYS_INLINE static const TensorReshapingOp< |
40 | const Eigen::DSizes<typename internal::traits<Input>::Index, |
41 | internal::traits<Input>::NumDimensions>, |
42 | const TensorReductionOp< |
43 | internal::MaxReducer< |
44 | std::remove_const_t<typename internal::traits<Input>::Scalar>>, |
45 | std::conditional_t< |
46 | internal::traits<Input>::Layout == ColMajor, |
47 | const Eigen::IndexList<Eigen::type2index<1>, Eigen::type2index<2>>, |
48 | const Eigen::IndexList<Eigen::type2index<2>, Eigen::type2index<3>>>, |
49 | const TensorImagePatchOp<Dynamic, Dynamic, const Input>>> |
50 | SpatialMaxPooling(const Input& input, DenseIndex patchRows, |
51 | DenseIndex patchCols, DenseIndex strideRows, |
52 | DenseIndex strideCols, const PaddingType padding_type, |
53 | DenseIndex in_strideRows = 1, DenseIndex in_strideCols = 1) { |
54 | EIGEN_STATIC_ASSERT(internal::traits<Input>::NumDimensions == 4, |
55 | YOU_MADE_A_PROGRAMMING_MISTAKE); |
56 | |
57 | typedef typename internal::traits<Input>::Index TensorIndex; |
58 | TensorRef<Tensor<typename internal::traits<Input>::Scalar, |
59 | internal::traits<Input>::NumDimensions, |
60 | internal::traits<Input>::Layout, TensorIndex> > |
61 | in(input); |
62 | |
63 | const DenseIndex patchRowsEff = |
64 | patchRows + (patchRows - 1) * (in_strideRows - 1); |
65 | const DenseIndex patchColsEff = |
66 | patchCols + (patchCols - 1) * (in_strideCols - 1); |
67 | |
68 | static const bool isColMajor = (internal::traits<Input>::Layout == ColMajor); |
69 | static const int idxRows = isColMajor ? 1 : 2; |
70 | static const int idxCols = isColMajor ? 2 : 1; |
71 | |
72 | // Molds the output of the reduction into the shape expected by the user. |
73 | // (assuming col-major): |
74 | // - 1st dim: channels |
75 | // - 2nd dim: output height |
76 | // - 3rd dim: output width |
77 | // - 4th dim and beyond: everything else including batch size |
78 | Eigen::DSizes<TensorIndex, internal::traits<Input>::NumDimensions> |
79 | post_reduce_dims; |
80 | post_reduce_dims[0] = in.dimension(0); |
81 | if (padding_type == PADDING_VALID) { |
82 | post_reduce_dims[idxRows] = Eigen::divup( |
83 | static_cast<DenseIndex>(in.dimension(idxRows)) - patchRowsEff + 1, |
84 | strideRows); |
85 | post_reduce_dims[idxCols] = Eigen::divup( |
86 | static_cast<DenseIndex>(in.dimension(idxCols)) - patchColsEff + 1, |
87 | strideCols); |
88 | } else { |
89 | post_reduce_dims[idxRows] = Eigen::divup( |
90 | static_cast<DenseIndex>(in.dimension(idxRows)), strideRows); |
91 | post_reduce_dims[idxCols] = Eigen::divup( |
92 | static_cast<DenseIndex>(in.dimension(idxCols)), strideCols); |
93 | } |
94 | post_reduce_dims[3] = in.dimension(3); |
95 | |
96 | // Take advantage of cxx11 to give the compiler information it can use to |
97 | // optimize the code. |
98 | std::conditional_t< |
99 | internal::traits<Input>::Layout == ColMajor, |
100 | const Eigen::IndexList<Eigen::type2index<1>, Eigen::type2index<2>>, |
101 | const Eigen::IndexList<Eigen::type2index<2>, Eigen::type2index<3>>> |
102 | reduction_dims; |
103 | |
104 | return input |
105 | .extract_image_patches( |
106 | patchRows, patchCols, strideRows, strideCols, in_strideRows, |
107 | in_strideCols, padding_type, |
108 | Eigen::NumTraits<std::remove_const_t< |
109 | typename internal::traits<Input>::Scalar>>::lowest()) |
110 | .maximum(reduction_dims) |
111 | .reshape(post_reduce_dims); |
112 | } |
113 | |
114 | /** CuboidMaxPooling |
115 | * \ingroup CXX11_NeuralNetworks_Module |
116 | * |
117 | * \brief Applies a max-pooling over a multichannel input volume. |
118 | * |
119 | * The input parameter is expected to be a tensor with a rank of 5 (channels, |
120 | * depth, height, width, others in col-major, and the reverse of that in |
121 | * row-major). |
122 | * |
123 | * The result can be assigned to a tensor of rank equal to the rank of the |
124 | * input. The dimensions of the result will be channels, depth, height, width, |
125 | * and others (in col-major, and the reverse of that if the input was |
126 | * row-major). |
127 | * |
128 | * The order of the depth, width and height dimensions can be swapped if |
129 | * needed. |
130 | * |
131 | */ |
132 | template <typename Input> |
133 | EIGEN_ALWAYS_INLINE static const TensorReshapingOp< |
134 | const Eigen::DSizes<DenseIndex, internal::traits<Input>::NumDimensions>, |
135 | const TensorReductionOp< |
136 | internal::MaxReducer<float>, |
137 | const Eigen::IndexList<Eigen::type2index<1> >, |
138 | const TensorReshapingOp< |
139 | const Eigen::DSizes<DenseIndex, 3>, |
140 | const TensorVolumePatchOp<Dynamic, Dynamic, Dynamic, |
141 | const Input> > > > |
142 | CuboidMaxPooling(const Input& input, DenseIndex patchPlanes, |
143 | DenseIndex patchRows, DenseIndex patchCols, |
144 | DenseIndex stridePlanes, DenseIndex strideRows, |
145 | DenseIndex strideCols, const PaddingType padding_type) { |
146 | EIGEN_STATIC_ASSERT(internal::traits<Input>::NumDimensions == 5, |
147 | YOU_MADE_A_PROGRAMMING_MISTAKE); |
148 | static const bool isColMajor = (internal::traits<Input>::Layout == ColMajor); |
149 | |
150 | typedef typename internal::traits<Input>::Index TensorIndex; |
151 | TensorRef<Tensor<typename internal::traits<Input>::Scalar, |
152 | internal::traits<Input>::NumDimensions, |
153 | internal::traits<Input>::Layout, TensorIndex> > |
154 | in(input); |
155 | |
156 | static const int idxPlanes = isColMajor ? 1 : 3; |
157 | static const int idxRows = 2; |
158 | static const int idxCols = isColMajor ? 3 : 1; |
159 | |
160 | // Molds the output of the reduction into the shape expected by the used |
161 | // (assuming col-major): |
162 | // - 1st dim: channels |
163 | // - 2nd dim: output depth |
164 | // - 3rd dim: output height |
165 | // - 4th dim: output width |
166 | // - 5th dim and beyond: everything else including batch size |
167 | Eigen::DSizes<DenseIndex, internal::traits<Input>::NumDimensions> |
168 | post_reduce_dims; |
169 | post_reduce_dims[0] = in.dimension(0); |
170 | if (padding_type == PADDING_VALID) { |
171 | post_reduce_dims[idxPlanes] = Eigen::divup( |
172 | static_cast<DenseIndex>(in.dimension(idxPlanes)) - patchPlanes + 1, |
173 | stridePlanes); |
174 | post_reduce_dims[idxRows] = Eigen::divup( |
175 | static_cast<DenseIndex>(in.dimension(idxRows)) - patchRows + 1, |
176 | strideRows); |
177 | post_reduce_dims[idxCols] = Eigen::divup( |
178 | static_cast<DenseIndex>(in.dimension(idxCols)) - patchCols + 1, |
179 | strideCols); |
180 | } else { |
181 | post_reduce_dims[idxPlanes] = Eigen::divup( |
182 | static_cast<DenseIndex>(in.dimension(idxPlanes)), stridePlanes); |
183 | post_reduce_dims[idxRows] = Eigen::divup( |
184 | static_cast<DenseIndex>(in.dimension(idxRows)), strideRows); |
185 | post_reduce_dims[idxCols] = Eigen::divup( |
186 | static_cast<DenseIndex>(in.dimension(idxCols)), strideCols); |
187 | } |
188 | post_reduce_dims[4] = in.dimension(4); |
189 | |
190 | Eigen::DSizes<DenseIndex, 3> pre_reduce_dims; |
191 | pre_reduce_dims[1] = patchRows * patchCols * patchPlanes; |
192 | if (isColMajor) { |
193 | pre_reduce_dims[0] = post_reduce_dims[0]; |
194 | pre_reduce_dims[2] = post_reduce_dims[1] * post_reduce_dims[2] * |
195 | post_reduce_dims[3] * post_reduce_dims[4]; |
196 | } else { |
197 | pre_reduce_dims[0] = post_reduce_dims[0] * post_reduce_dims[1] * |
198 | post_reduce_dims[2] * post_reduce_dims[3]; |
199 | pre_reduce_dims[2] = post_reduce_dims[4]; |
200 | } |
201 | |
202 | // Take advantage of cxx11 to give the compiler information it can use to |
203 | // optimize the code. |
204 | Eigen::IndexList<Eigen::type2index<1> > reduction_dims; |
205 | return input |
206 | .extract_volume_patches(patchPlanes, patchRows, patchCols, stridePlanes, |
207 | strideRows, strideCols, padding_type, |
208 | -Eigen::NumTraits<float>::highest()) |
209 | .reshape(pre_reduce_dims) |
210 | .maximum(reduction_dims) |
211 | .reshape(post_reduce_dims); |
212 | } |
213 | |
214 | /** SpatialAvgPooling |
215 | * \ingroup CXX11_NeuralNetworks_Module |
216 | * |
217 | * \brief Applies an average pooling over a multichannel input image. |
218 | * |
219 | * The input parameter is expected to be a tensor with a rank of 4 (channels, |
220 | * height, width, others in col-major, and the reverse of that in row-major). |
221 | * |
222 | * The result can be assigned to a tensor of rank equal to the rank of the |
223 | * input. The dimensions of the result will be channels, height, width, and |
224 | * others (in col-major, and the reverse of that if the input was row-major). |
225 | * |
226 | * The order of the width and height dimensions can be swapped if needed. |
227 | * |
228 | */ |
229 | namespace internal { |
230 | |
231 | template <typename T> |
232 | struct AvgPoolMeanReducer { |
233 | #if (EIGEN_ARCH_i386 || EIGEN_ARCH_x86_64) && !defined(__CUDACC__) && \ |
234 | !defined(__HIPCC__) |
235 | // We only support packet access for floats. |
236 | static constexpr bool PacketAccess = internal::is_same<T, float>::value; |
237 | #else |
238 | static const bool PacketAccess = false; |
239 | #endif |
240 | static constexpr bool IsStateful = true; |
241 | |
242 | EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE AvgPoolMeanReducer() : scalarCount_(0) { |
243 | typedef typename packet_traits<T>::type Packet; |
244 | #if defined(__HIPCC__) |
245 | packetCount_ = 0; |
246 | #else |
247 | packetCount_ = pset1<Packet>(T(0.0)); |
248 | #endif |
249 | } |
250 | |
251 | EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T t, T* accum) { |
252 | if (t != -Eigen::NumTraits<T>::highest()) { |
253 | (*accum) = (*accum) + t; |
254 | scalarCount_++; |
255 | } |
256 | } |
257 | |
258 | EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T initialize() const { |
259 | return static_cast<T>(0); |
260 | } |
261 | |
262 | EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalize(const T accum) const { |
263 | eigen_assert(scalarCount_ > 0); |
264 | return accum / T(scalarCount_); |
265 | } |
266 | |
267 | #if (EIGEN_ARCH_i386 || EIGEN_ARCH_x86_64) && !defined(__CUDACC__) && \ |
268 | !defined(__HIPCC__) |
269 | #ifdef EIGEN_VECTORIZE_AVX512 |
270 | #define pequal(a, b) \ |
271 | _mm512_castsi512_ps( \ |
272 | _mm512_maskz_set1_epi32(_mm512_cmp_ps_mask(a, b, _CMP_EQ_UQ), -1)) |
273 | |
274 | // The ternarylogic function immediate determines the values in the result |
275 | // In the case below, 0xd8 implies (false_mask) ? (b) : (a) |
276 | // For details, refer to the vpternlogd instruction table at |
277 | // http://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-vol-2c-manual.pdf |
278 | |
279 | #define psel(a, b, false_mask) \ |
280 | _mm512_castsi512_ps(_mm512_ternarylogic_epi32( \ |
281 | _mm512_castps_si512(a), _mm512_castps_si512(b), \ |
282 | _mm512_castps_si512(false_mask), 0xd8)) |
283 | #elif defined EIGEN_VECTORIZE_AVX |
284 | #define pequal(a, b) _mm256_cmp_ps(a, b, _CMP_EQ_UQ) |
285 | #define psel(a, b, false_mask) _mm256_blendv_ps(a, b, false_mask) |
286 | #else |
287 | #define pequal(a, b) _mm_cmpeq_ps(a, b) |
288 | #define psel(a, b, false_mask) \ |
289 | _mm_or_ps(_mm_andnot_ps(false_mask, a), _mm_and_ps(false_mask, b)) |
290 | #endif |
291 | |
292 | template <typename Packet> |
293 | EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reducePacket(const Packet& p, |
294 | Packet* accum) { |
295 | reducePacketWithType(static_cast<T>(0), p, accum); |
296 | } |
297 | |
298 | template <typename Packet> |
299 | EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reducePacketWithType( |
300 | T, const Packet& p, Packet* accum) { |
301 | Packet skip_mask = |
302 | pequal(p, pset1<Packet>(-Eigen::NumTraits<T>::highest())); |
303 | (*accum) = padd<Packet>(*accum, psel(p, pset1<Packet>(0), skip_mask)); |
304 | packetCount_ = padd<Packet>( |
305 | packetCount_, psel(pset1<Packet>(1), pset1<Packet>(0), skip_mask)); |
306 | } |
307 | |
308 | template <typename Packet> |
309 | EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet initializePacket() const { |
310 | return pset1<Packet>(0); |
311 | } |
312 | |
313 | template <typename Packet> |
314 | EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet |
315 | finalizePacket(const Packet& vaccum) const { |
316 | return pdiv(vaccum, packetCount_); |
317 | } |
318 | template <typename Packet> |
319 | EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T |
320 | finalizeBoth(const T saccum, const Packet& vaccum) const { |
321 | return (saccum + predux(vaccum)) / (scalarCount_ + predux(packetCount_)); |
322 | } |
323 | #endif |
324 | |
325 | protected: |
326 | typedef typename packet_traits<T>::type Packet; |
327 | int scalarCount_; |
328 | #if defined(__HIPCC__) |
329 | int packetCount_; |
330 | #else |
331 | Packet packetCount_; |
332 | #endif |
333 | }; |
334 | |
335 | template <typename Device> |
336 | struct reducer_traits<AvgPoolMeanReducer<float>, Device> { |
337 | enum { |
338 | Cost = 1, |
339 | #if (EIGEN_ARCH_i386 || EIGEN_ARCH_x86_64) && !defined(__CUDACC__) && \ |
340 | !defined(__HIPCC__) |
341 | // We only support packet access for floats. |
342 | PacketAccess = true, |
343 | #else |
344 | PacketAccess = false, |
345 | #endif |
346 | IsStateful = true, |
347 | IsExactlyAssociative = false |
348 | }; |
349 | }; |
350 | |
351 | template <> |
352 | struct reducer_traits<AvgPoolMeanReducer<float>, GpuDevice> { |
353 | enum { |
354 | Cost = 1, |
355 | PacketAccess = false, |
356 | IsStateful = true, |
357 | IsExactlyAssociative = false |
358 | }; |
359 | }; |
360 | |
361 | } // namespace internal |
362 | |
363 | template <typename Input> |
364 | EIGEN_ALWAYS_INLINE static const TensorReshapingOp< |
365 | const Eigen::DSizes<typename internal::traits<Input>::Index, |
366 | internal::traits<Input>::NumDimensions>, |
367 | const TensorReductionOp< |
368 | internal::AvgPoolMeanReducer< |
369 | std::remove_const_t<typename internal::traits<Input>::Scalar>>, |
370 | std::conditional_t< |
371 | internal::traits<Input>::Layout == ColMajor, |
372 | const Eigen::IndexList<Eigen::type2index<1>, Eigen::type2index<2>>, |
373 | const Eigen::IndexList<Eigen::type2index<2>, Eigen::type2index<3>>>, |
374 | const TensorImagePatchOp<Dynamic, Dynamic, const Input>>> |
375 | SpatialAvgPooling(const Input& input, DenseIndex patchRows, |
376 | DenseIndex patchCols, DenseIndex strideRows, |
377 | DenseIndex strideCols, const PaddingType padding_type, |
378 | DenseIndex in_strideRows = 1, DenseIndex in_strideCols = 1) { |
379 | EIGEN_STATIC_ASSERT(internal::traits<Input>::NumDimensions == 4, |
380 | YOU_MADE_A_PROGRAMMING_MISTAKE); |
381 | |
382 | typedef typename internal::traits<Input>::Index TensorIndex; |
383 | TensorRef<Tensor<typename internal::traits<Input>::Scalar, |
384 | internal::traits<Input>::NumDimensions, |
385 | internal::traits<Input>::Layout, TensorIndex> > |
386 | in(input); |
387 | |
388 | const DenseIndex patchRowsEff = |
389 | patchRows + (patchRows - 1) * (in_strideRows - 1); |
390 | const DenseIndex patchColsEff = |
391 | patchCols + (patchCols - 1) * (in_strideCols - 1); |
392 | |
393 | static const bool isColMajor = (internal::traits<Input>::Layout == ColMajor); |
394 | static const int idxRows = isColMajor ? 1 : 2; |
395 | static const int idxCols = isColMajor ? 2 : 1; |
396 | |
397 | // Molds the output of the reduction into the shape expected by the user. |
398 | // (assuming col-major): |
399 | // - 1st dim: channels |
400 | // - 2nd dim: output height |
401 | // - 3rd dim: output width |
402 | // - 4th dim and beyond: everything else including batch size |
403 | Eigen::DSizes<TensorIndex, internal::traits<Input>::NumDimensions> |
404 | post_reduce_dims; |
405 | post_reduce_dims[0] = in.dimension(0); |
406 | if (padding_type == PADDING_VALID) { |
407 | post_reduce_dims[idxRows] = Eigen::divup( |
408 | static_cast<DenseIndex>(in.dimension(idxRows)) - patchRowsEff + 1, |
409 | strideRows); |
410 | post_reduce_dims[idxCols] = Eigen::divup( |
411 | static_cast<DenseIndex>(in.dimension(idxCols)) - patchColsEff + 1, |
412 | strideCols); |
413 | } else { |
414 | post_reduce_dims[idxRows] = Eigen::divup( |
415 | static_cast<DenseIndex>(in.dimension(idxRows)), strideRows); |
416 | post_reduce_dims[idxCols] = Eigen::divup( |
417 | static_cast<DenseIndex>(in.dimension(idxCols)), strideCols); |
418 | } |
419 | post_reduce_dims[3] = in.dimension(3); |
420 | |
421 | typedef std::remove_const_t<typename internal::traits<Input>::Scalar> |
422 | CoeffReturnType; |
423 | internal::AvgPoolMeanReducer<CoeffReturnType> mean_with_nan; |
424 | |
425 | // Take advantage of cxx11 to give the compiler information it can use to |
426 | // optimize the code. |
427 | std::conditional_t< |
428 | internal::traits<Input>::Layout == ColMajor, |
429 | const Eigen::IndexList<Eigen::type2index<1>, Eigen::type2index<2>>, |
430 | const Eigen::IndexList<Eigen::type2index<2>, Eigen::type2index<3>>> |
431 | reduction_dims; |
432 | return input |
433 | .extract_image_patches(patchRows, patchCols, strideRows, strideCols, |
434 | in_strideRows, in_strideCols, padding_type, |
435 | -Eigen::NumTraits<CoeffReturnType>::highest()) |
436 | .reduce(reduction_dims, mean_with_nan) |
437 | .reshape(post_reduce_dims); |
438 | } |
439 | |
440 | /** CuboidAvgPooling |
441 | * \ingroup CXX11_NeuralNetworks_Module |
442 | * |
443 | * \brief Applies an average pooling over a multichannel input volume. |
444 | * |
445 | * The input parameter is expected to be a tensor with a rank of 5 (channels, |
446 | * depth, height, width, others, and the reverse of that in row-major). |
447 | * |
448 | * The result can be assigned to a tensor of rank equal to the rank of the |
449 | * input. The dimensions of the result will be channels, depth, width, and |
450 | * others (in col-major, and the reverse of that if the input was row-major). |
451 | * |
452 | * The order of the depth, width and height dimensions can be swapped if |
453 | * needed. |
454 | * |
455 | */ |
456 | template <typename Input> |
457 | EIGEN_ALWAYS_INLINE static const TensorReshapingOp< |
458 | const Eigen::DSizes<DenseIndex, internal::traits<Input>::NumDimensions>, |
459 | const TensorReductionOp< |
460 | internal::AvgPoolMeanReducer<float>, |
461 | const Eigen::IndexList<Eigen::type2index<1> >, |
462 | const TensorReshapingOp< |
463 | const Eigen::DSizes<DenseIndex, 3>, |
464 | const TensorVolumePatchOp<Dynamic, Dynamic, Dynamic, |
465 | const Input> > > > |
466 | CuboidAvgPooling(const Input& input, DenseIndex patchPlanes, |
467 | DenseIndex patchRows, DenseIndex patchCols, |
468 | DenseIndex stridePlanes, DenseIndex strideRows, |
469 | DenseIndex strideCols, const PaddingType padding_type) { |
470 | EIGEN_STATIC_ASSERT(internal::traits<Input>::NumDimensions == 5, |
471 | YOU_MADE_A_PROGRAMMING_MISTAKE); |
472 | static const bool isColMajor = (internal::traits<Input>::Layout == ColMajor); |
473 | |
474 | typedef typename internal::traits<Input>::Index TensorIndex; |
475 | TensorRef<Tensor<typename internal::traits<Input>::Scalar, |
476 | internal::traits<Input>::NumDimensions, |
477 | internal::traits<Input>::Layout, TensorIndex> > |
478 | in(input); |
479 | |
480 | static const int idxPlanes = isColMajor ? 1 : 3; |
481 | static const int idxRows = 2; |
482 | static const int idxCols = isColMajor ? 3 : 1; |
483 | // Molds the output of the reduction into the shape expected by the used |
484 | // (assuming col-major): |
485 | // - 1st dim: channels |
486 | // - 2nd dim: outupt depth |
487 | // - 3rd dim: output height |
488 | // - 4th dim: output width |
489 | // - 5th dim and beyond: everything else including batch size |
490 | Eigen::DSizes<DenseIndex, internal::traits<Input>::NumDimensions> |
491 | post_reduce_dims; |
492 | post_reduce_dims[0] = in.dimension(0); |
493 | if (padding_type == PADDING_VALID) { |
494 | post_reduce_dims[idxPlanes] = Eigen::divup( |
495 | static_cast<DenseIndex>(in.dimension(idxPlanes)) - patchPlanes + 1, |
496 | stridePlanes); |
497 | post_reduce_dims[idxRows] = Eigen::divup( |
498 | static_cast<DenseIndex>(in.dimension(idxRows)) - patchRows + 1, |
499 | strideRows); |
500 | post_reduce_dims[idxCols] = Eigen::divup( |
501 | static_cast<DenseIndex>(in.dimension(idxCols)) - patchCols + 1, |
502 | strideCols); |
503 | } else { |
504 | post_reduce_dims[idxPlanes] = Eigen::divup( |
505 | static_cast<DenseIndex>(in.dimension(idxPlanes)), stridePlanes); |
506 | post_reduce_dims[idxRows] = Eigen::divup( |
507 | static_cast<DenseIndex>(in.dimension(idxRows)), strideRows); |
508 | post_reduce_dims[idxCols] = Eigen::divup( |
509 | static_cast<DenseIndex>(in.dimension(idxCols)), strideCols); |
510 | } |
511 | post_reduce_dims[4] = in.dimension(4); |
512 | |
513 | Eigen::DSizes<DenseIndex, 3> pre_reduce_dims; |
514 | pre_reduce_dims[1] = patchRows * patchCols * patchPlanes; |
515 | if (isColMajor) { |
516 | pre_reduce_dims[0] = post_reduce_dims[0]; |
517 | pre_reduce_dims[2] = post_reduce_dims[1] * post_reduce_dims[2] * |
518 | post_reduce_dims[3] * post_reduce_dims[4]; |
519 | } else { |
520 | pre_reduce_dims[0] = post_reduce_dims[0] * post_reduce_dims[1] * |
521 | post_reduce_dims[2] * post_reduce_dims[3]; |
522 | pre_reduce_dims[2] = post_reduce_dims[4]; |
523 | } |
524 | |
525 | typedef std::remove_const_t<typename internal::traits<Input>::Scalar> |
526 | CoeffReturnType; |
527 | internal::AvgPoolMeanReducer<CoeffReturnType> mean_with_nan; |
528 | |
529 | // Take advantage of cxx11 to give the compiler information it can use to |
530 | // optimize the code. |
531 | Eigen::IndexList<Eigen::type2index<1> > reduction_dims; |
532 | return input |
533 | .extract_volume_patches(patchPlanes, patchRows, patchCols, stridePlanes, |
534 | strideRows, strideCols, padding_type, |
535 | -Eigen::NumTraits<float>::highest()) |
536 | .reshape(pre_reduce_dims) |
537 | .reduce(reduction_dims, mean_with_nan) |
538 | .reshape(post_reduce_dims); |
539 | } |
540 | |
541 | } // end namespace Eigen |
542 | |
543 | #endif // TENSORFLOW_CORE_KERNELS_EIGEN_POOLING_H_ |
544 | |