1 | /* Copyright 2020 The TensorFlow Authors. All Rights Reserved. |
2 | |
3 | Licensed under the Apache License, Version 2.0 (the "License"); |
4 | you may not use this file except in compliance with the License. |
5 | You may obtain a copy of the License at |
6 | |
7 | http://www.apache.org/licenses/LICENSE-2.0 |
8 | |
9 | Unless required by applicable law or agreed to in writing, software |
10 | distributed under the License is distributed on an "AS IS" BASIS, |
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | See the License for the specific language governing permissions and |
13 | limitations under the License. |
14 | ==============================================================================*/ |
15 | |
16 | #ifndef TENSORFLOW_CORE_FRAMEWORK_KERNEL_SHAPE_UTIL_H_ |
17 | #define TENSORFLOW_CORE_FRAMEWORK_KERNEL_SHAPE_UTIL_H_ |
18 | |
19 | #include <array> |
20 | |
21 | #include "tensorflow/core/platform/status.h" |
22 | #include "tensorflow/core/util/padding.h" |
23 | |
24 | namespace tensorflow { |
25 | // GetWindowedOutputSize(): Given an input tensor, kernel, stride and padding |
26 | // type, the function computes the output and padding dimensions. |
27 | // |
28 | // For example, ignoring batches or multiple features, a 1D convolution |
29 | // takes as input a 1D tensor of shape (H), and convolves it with a filter of |
30 | // shape (K). |
31 | // |
32 | // It also takes in a few additional parameters: |
33 | // |
34 | // Stride (S): the stride with which we apply the filters. This is the offset |
35 | // between locations where we apply the filters. A larger stride |
36 | // means that the output will be spatially smaller. |
37 | // |
38 | // Padding (P): the padding we apply to the input tensor along each |
39 | // dimension. This is usually used to make sure that the spatial dimensions |
40 | // do not shrink when we progress with convolutions. This function supports two |
41 | // types of padding. |
42 | // SAME: the pad value is computed so that the output will have size H/S. |
43 | // VALID: no padding is carried out. |
44 | // If you want to use EXPLICIT padding, GetWindowedOutputSizeVerbose must be |
45 | // called instead. Note the padded area is zero-filled. |
46 | // |
47 | // The output dimensions for convolution and many other operations, when given |
48 | // all the parameters above, are as follows: |
49 | // - When Padding = SAME: the output size is (H'), where |
50 | // H' = ceil(float(H) / float(S)) |
51 | // where ceil is the ceiling function. The number of padded cells |
52 | // is computed as: |
53 | // Pc = ((H' - 1) * S + K - H) / 2 |
54 | // When the stride is 1, the expression simplifies to |
55 | // H' = H, Pc = (K-1)/2. |
56 | // This is where SAME comes from - the output has the same size as the input |
57 | // has. |
58 | // |
59 | // - When Padding = VALID: the output size is computed as |
60 | // H' = ceil(float(H - K + 1) / float(S)) |
61 | // and the number of padded cells is always zero. |
62 | // When the stride is 1, the expression simplifies to |
63 | // H' = H-K+1. |
64 | // |
65 | // For convolution, mathematically, the output value at location (r') |
66 | // is the inner product of two vectors: the chunk of input at |
67 | // ((r'*S-Pr) : (r'*S-Pr+K)), |
68 | // and the filter. |
69 | // |
70 | // For 2D and 3D convolutions, the spatial dimensions are orthogonal, so the |
71 | // size and padding of each spatial dimension can be computed by calling |
72 | // GetWindowedOutputSize separately for each dimension. |
73 | // |
74 | Status GetWindowedOutputSize(int64_t input_size, int64_t filter_size, |
75 | int64_t stride, Padding padding_type, |
76 | int64_t* output_size, int64_t* padding_size); |
77 | |
78 | // The V2 version computes the same outputs with arbitrary dilation_rate. |
79 | // The output dimensions are computed as follows: |
80 | // - When adding dilation_rate (D), we compute an effective filter size (K'): |
81 | // K' = (K - 1) * D + 1 |
82 | // - When Padding = SAME: the output size is (H'), where |
83 | // H' = ceil(float(H) / float(S)) |
84 | // where ceil is the ceiling function. The number of padded cells |
85 | // is computed as: |
86 | // Pc = ((H' - 1) * S + K' - H) / 2 |
87 | // When the stride is 1, the expression simplifies to |
88 | // H' = H, Pc = (K'-1)/2. |
89 | // This is where SAME comes from - the output has the same size as the input |
90 | // has. |
91 | // |
92 | // - When Padding = VALID: the output size is computed as |
93 | // H' = ceil(float(H - K' + 1) / float(S)) |
94 | // and the number of padded cells is always zero. |
95 | // When the stride is 1, the expression simplifies to |
96 | // H' = H-K'+1. |
97 | // |
98 | // If you want to use EXPLICIT padding, GetWindowedOutputSizeVerboseV2 must be |
99 | // called instead |
100 | // |
101 | // TODO(b/67112639): Merge V2 versions and the original versions eventually. |
102 | Status GetWindowedOutputSizeV2(int64_t input_size, int64_t filter_size, |
103 | int64_t dilation_rate, int64_t stride, |
104 | Padding padding_type, int64_t* output_size, |
105 | int64_t* padding_size); |
106 | |
107 | // Returns the same output dimensions as in GetWindowedOutputSize, but returns |
108 | // verbose padding dimensions (before/after), and EXPLICIT padding is supported. |
109 | // When padding_type is EXPLICIT, *padding_before and *padding_after must |
110 | // already point to initialized integers with the padding amounts. Otherwise, |
111 | // *padding_before and *padding_after are set by this function, and any |
112 | // excess padding (caused by an odd padding size value) is added to the |
113 | // 'padding_after' dimension. |
114 | Status GetWindowedOutputSizeVerbose(int64_t input_size, int64_t filter_size, |
115 | int64_t stride, Padding padding_type, |
116 | int64_t* output_size, |
117 | int64_t* padding_before, |
118 | int64_t* padding_after); |
119 | |
120 | // The V2 version computes the same outputs with arbitrary dilation_rate. For |
121 | // detailed equations, refer to the comments for GetWindowedOutputSizeV2(). |
122 | Status GetWindowedOutputSizeVerboseV2(int64_t input_size, int64_t filter_size, |
123 | int64_t dilation_rate, int64_t stride, |
124 | Padding padding_type, |
125 | int64_t* output_size, |
126 | int64_t* padding_before, |
127 | int64_t* padding_after); |
128 | |
129 | // Given an input tensor, kernel, stride and padding type, populates the 3D size |
130 | // of the output tensor and padding to be applied to the input tensor at the |
131 | // lower end of every dimension. Use for 3D convolutions, where the input data |
132 | // is padded with zeros, as well as for 3D avg/max pooling, where the input data |
133 | // is padded with invalid values that are not considered for pooling. EXPLICIT |
134 | // padding is not supported. |
135 | Status Get3dOutputSize(const std::array<int64_t, 3>& input, |
136 | const std::array<int64_t, 3>& window, |
137 | const std::array<int64_t, 3>& strides, |
138 | Padding padding_type, std::array<int64_t, 3>* output_ptr, |
139 | std::array<int64_t, 3>* padding_ptr); |
140 | |
141 | // The V2 version computes the same outputs with arbitrary dilation_rate. For |
142 | // detailed equations, refer to the comments for GetWindowedOutputSizeV2(). |
143 | Status Get3dOutputSizeV2(const std::array<int64_t, 3>& input, |
144 | const std::array<int64_t, 3>& window, |
145 | const std::array<int64_t, 3>& dilations, |
146 | const std::array<int64_t, 3>& strides, |
147 | Padding padding_type, |
148 | std::array<int64_t, 3>* output_ptr, |
149 | std::array<int64_t, 3>* padding_ptr); |
150 | |
151 | } // namespace tensorflow |
152 | #endif // TENSORFLOW_CORE_FRAMEWORK_KERNEL_SHAPE_UTIL_H_ |
153 | |