1 | /* Copyright 2016 The TensorFlow Authors. All Rights Reserved. |
2 | |
3 | Licensed under the Apache License, Version 2.0 (the "License"); |
4 | you may not use this file except in compliance with the License. |
5 | You may obtain a copy of the License at |
6 | |
7 | http://www.apache.org/licenses/LICENSE-2.0 |
8 | |
9 | Unless required by applicable law or agreed to in writing, software |
10 | distributed under the License is distributed on an "AS IS" BASIS, |
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | See the License for the specific language governing permissions and |
13 | limitations under the License. |
14 | ==============================================================================*/ |
15 | |
16 | // This is a helper struct to package up the input and output |
17 | // parameters of an image resizer (the height, widths, etc.). To |
18 | // reduce code duplication and ensure consistency across the different |
19 | // resizers, it performs the input validation. |
20 | |
21 | #ifndef TENSORFLOW_CORE_UTIL_IMAGE_RESIZER_STATE_H_ |
22 | #define TENSORFLOW_CORE_UTIL_IMAGE_RESIZER_STATE_H_ |
23 | |
24 | #define EIGEN_USE_THREADS |
25 | #include <math.h> |
26 | |
27 | #include <algorithm> |
28 | #include <array> |
29 | |
30 | #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" |
31 | #include "tensorflow/core/framework/bounds_check.h" |
32 | #include "tensorflow/core/framework/op_kernel.h" |
33 | #include "tensorflow/core/framework/register_types.h" |
34 | #include "tensorflow/core/framework/tensor.h" |
35 | #include "tensorflow/core/framework/tensor_shape.h" |
36 | #include "tensorflow/core/framework/types.h" |
37 | |
38 | namespace tensorflow { |
39 | |
40 | // CalculateResizeScale determines the float scaling factor. |
41 | inline float CalculateResizeScale(int64_t in_size, int64_t out_size, |
42 | bool align_corners) { |
43 | return (align_corners && out_size > 1) |
44 | ? (in_size - 1) / static_cast<float>(out_size - 1) |
45 | : in_size / static_cast<float>(out_size); |
46 | } |
47 | |
48 | // Half pixel scaler scales assuming that the pixel centers are at 0.5, i.e. the |
49 | // floating point coordinates of the top,left pixel is 0.5,0.5. |
50 | struct HalfPixelScaler { |
51 | HalfPixelScaler(){}; |
52 | inline float operator()(const int x, const float scale) const { |
53 | // Note that we subtract 0.5 from the return value, as the existing bilinear |
54 | // sampling code etc assumes pixels are in the old coordinate system. |
55 | return (static_cast<float>(x) + 0.5f) * scale - 0.5f; |
56 | } |
57 | }; |
58 | |
59 | // Older incorrect scaling method that causes all resizes to have a slight |
60 | // translation leading to inconsistent results. For example, a flip then a |
61 | // resize gives different results then a resize then a flip. |
62 | struct LegacyScaler { |
63 | LegacyScaler(){}; |
64 | inline float operator()(const int x, const float scale) const { |
65 | return static_cast<float>(x) * scale; |
66 | } |
67 | }; |
68 | |
69 | struct ImageResizerState { |
70 | explicit ImageResizerState(bool align_corners, bool half_pixel_centers) |
71 | : align_corners_(align_corners), |
72 | half_pixel_centers_(half_pixel_centers) {} |
73 | |
74 | // ValidateAndCalculateOutputSize checks the bounds on the input tensors |
75 | // and requested size, sets up some of the resizing state such as the |
76 | // height_scale and width_scale, and calculates the output size. |
77 | // If any of these operations fails, it sets an error status in |
78 | // the context, which the caller must check. |
79 | void ValidateAndCalculateOutputSize(OpKernelContext* context) { |
80 | OP_REQUIRES( |
81 | context, |
82 | !half_pixel_centers_ || (half_pixel_centers_ && !align_corners_), |
83 | errors::InvalidArgument("If half_pixel_centers is True, " |
84 | "align_corners must be False." )); |
85 | |
86 | const TensorShape& input_shape = context->input(0).shape(); |
87 | OP_REQUIRES(context, input_shape.dims() == 4, |
88 | errors::InvalidArgument("input must be 4-dimensional" , |
89 | input_shape.DebugString())); |
90 | batch_size = input_shape.dim_size(0); |
91 | channels = input_shape.dim_size(3); |
92 | OP_REQUIRES( |
93 | context, channels > 0, |
94 | errors::InvalidArgument("image must have at least one channel" )); |
95 | |
96 | // Verify and assign `in_height` and `in_width`. |
97 | OP_REQUIRES( |
98 | context, input_shape.dim_size(1) > 0 && input_shape.dim_size(2) > 0, |
99 | errors::InvalidArgument("input image must be of non-zero size" )); |
100 | OP_REQUIRES( |
101 | context, |
102 | FastBoundsCheck(input_shape.dim_size(1), |
103 | std::numeric_limits<int32>::max()) && |
104 | FastBoundsCheck(input_shape.dim_size(2), |
105 | std::numeric_limits<int32>::max()), |
106 | errors::InvalidArgument("input sizes must be between 0 and max int32" )); |
107 | in_height = static_cast<int32>(input_shape.dim_size(1)); |
108 | in_width = static_cast<int32>(input_shape.dim_size(2)); |
109 | |
110 | // Verify the output tensor's shape. |
111 | const Tensor& shape_t = context->input(1); |
112 | OP_REQUIRES(context, shape_t.dims() == 1, |
113 | errors::InvalidArgument("shape_t must be 1-dimensional" , |
114 | shape_t.shape().DebugString())); |
115 | OP_REQUIRES(context, shape_t.NumElements() == 2, |
116 | errors::InvalidArgument("shape_t must have two elements" , |
117 | shape_t.shape().DebugString())); |
118 | |
119 | // Verify and assign `out_height` and `out_width`. |
120 | auto Svec = shape_t.vec<int32>(); |
121 | out_height = internal::SubtleMustCopy(Svec(0)); |
122 | out_width = internal::SubtleMustCopy(Svec(1)); |
123 | OP_REQUIRES(context, out_height > 0 && out_width > 0, |
124 | errors::InvalidArgument("output dimensions must be positive" )); |
125 | |
126 | height_scale = CalculateResizeScale(in_height, out_height, align_corners_); |
127 | width_scale = CalculateResizeScale(in_width, out_width, align_corners_); |
128 | |
129 | // Guard against overflows |
130 | OP_REQUIRES(context, |
131 | ceilf((out_height - 1) * height_scale) <= |
132 | static_cast<float>(std::numeric_limits<int64_t>::max()), |
133 | errors::InvalidArgument( |
134 | "input image height scale would cause an overflow" )); |
135 | OP_REQUIRES( |
136 | context, |
137 | ceilf((out_width - 1) * width_scale) <= static_cast<float>(INT_MAX), |
138 | errors::InvalidArgument( |
139 | "input image width scale would cause an overflow" )); |
140 | } |
141 | |
142 | // Calculates all the required variables, and allocates the output. |
143 | void ValidateAndCreateOutput(OpKernelContext* context) { |
144 | ValidateAndCalculateOutputSize(context); |
145 | if (!context->status().ok()) return; |
146 | |
147 | TensorShape shape; |
148 | // Guard against shape overflow |
149 | OP_REQUIRES_OK(context, shape.AddDimWithStatus(batch_size)); |
150 | OP_REQUIRES_OK(context, shape.AddDimWithStatus(out_height)); |
151 | OP_REQUIRES_OK(context, shape.AddDimWithStatus(out_width)); |
152 | OP_REQUIRES_OK(context, shape.AddDimWithStatus(channels)); |
153 | |
154 | OP_REQUIRES_OK(context, context->allocate_output(0, shape, &output)); |
155 | } |
156 | |
157 | int64_t batch_size; |
158 | int64_t out_height; |
159 | int64_t out_width; |
160 | int64_t in_height; |
161 | int64_t in_width; |
162 | int64_t channels; |
163 | float height_scale; |
164 | float width_scale; |
165 | Tensor* output = nullptr; |
166 | |
167 | private: |
168 | bool align_corners_; |
169 | bool half_pixel_centers_; |
170 | }; |
171 | |
172 | struct ImageResizerGradientState { |
173 | explicit ImageResizerGradientState(bool align_corners, |
174 | bool half_pixel_centers) |
175 | : align_corners_(align_corners), |
176 | half_pixel_centers_(half_pixel_centers) {} |
177 | |
178 | void ValidateAndCreateOutput(OpKernelContext* context) { |
179 | OP_REQUIRES( |
180 | context, |
181 | !half_pixel_centers_ || (half_pixel_centers_ && !align_corners_), |
182 | errors::InvalidArgument("If half_pixel_centers is True, " |
183 | "align_corners must be False." )); |
184 | |
185 | const Tensor& input = context->input(0); |
186 | OP_REQUIRES(context, input.dims() == 4, |
187 | errors::InvalidArgument("input_grad must be 4-dimensional" , |
188 | input.shape().DebugString())); |
189 | |
190 | // Resizers always produce float images, so input gradient must |
191 | // always be a float. |
192 | OP_REQUIRES(context, input.dtype() == DT_FLOAT, |
193 | errors::InvalidArgument("input_grad must be of type float" , |
194 | DataTypeString(input.dtype()))); |
195 | |
196 | batch_size = input.dim_size(0); |
197 | channels = input.dim_size(3); |
198 | |
199 | resized_height = input.dim_size(1); |
200 | resized_width = input.dim_size(2); |
201 | |
202 | // The following check is also carried out for the forward op. It is added |
203 | // here to prevent a divide-by-zero exception when either height_scale or |
204 | // width_scale is being calculated. |
205 | OP_REQUIRES(context, resized_height > 0 && resized_width > 0, |
206 | errors::InvalidArgument("resized dimensions must be positive" )); |
207 | |
208 | const TensorShape& output_shape = context->input(1).shape(); |
209 | OP_REQUIRES(context, output_shape.dims() == 4, |
210 | errors::InvalidArgument("original_image must be 4-dimensional" , |
211 | output_shape.DebugString())); |
212 | original_height = output_shape.dim_size(1); |
213 | original_width = output_shape.dim_size(2); |
214 | |
215 | // The following check is also carried out for the forward op. It is added |
216 | // here to prevent either height_scale or width_scale from being set to |
217 | // zero, which would cause a divide-by-zero exception in the deterministic |
218 | // back-prop path. |
219 | OP_REQUIRES( |
220 | context, original_height > 0 && original_width > 0, |
221 | errors::InvalidArgument("original dimensions must be positive" )); |
222 | |
223 | OP_REQUIRES( |
224 | context, |
225 | FastBoundsCheck(original_height, std::numeric_limits<int32>::max()) && |
226 | FastBoundsCheck(original_width, std::numeric_limits<int32>::max()), |
227 | errors::InvalidArgument( |
228 | "original sizes must be between 0 and max int32" )); |
229 | |
230 | height_scale = |
231 | CalculateResizeScale(original_height, resized_height, align_corners_); |
232 | width_scale = |
233 | CalculateResizeScale(original_width, resized_width, align_corners_); |
234 | |
235 | OP_REQUIRES_OK(context, context->allocate_output( |
236 | 0, |
237 | TensorShape({batch_size, original_height, |
238 | original_width, channels}), |
239 | &output)); |
240 | } |
241 | |
242 | int64_t batch_size; |
243 | int64_t channels; |
244 | int64_t resized_height; |
245 | int64_t resized_width; |
246 | int64_t original_height; |
247 | int64_t original_width; |
248 | float height_scale; |
249 | float width_scale; |
250 | Tensor* output = nullptr; |
251 | |
252 | private: |
253 | bool align_corners_; |
254 | bool half_pixel_centers_; |
255 | }; |
256 | |
257 | } // namespace tensorflow |
258 | |
259 | #endif // TENSORFLOW_CORE_UTIL_IMAGE_RESIZER_STATE_H_ |
260 | |