1/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
2
3Licensed under the Apache License, Version 2.0 (the "License");
4you may not use this file except in compliance with the License.
5You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9Unless required by applicable law or agreed to in writing, software
10distributed under the License is distributed on an "AS IS" BASIS,
11WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12See the License for the specific language governing permissions and
13limitations under the License.
14==============================================================================*/
15
16// This is a helper struct to package up the input and output
17// parameters of an image resizer (the height, widths, etc.). To
18// reduce code duplication and ensure consistency across the different
19// resizers, it performs the input validation.
20
21#ifndef TENSORFLOW_CORE_UTIL_IMAGE_RESIZER_STATE_H_
22#define TENSORFLOW_CORE_UTIL_IMAGE_RESIZER_STATE_H_
23
24#define EIGEN_USE_THREADS
25#include <math.h>
26
27#include <algorithm>
28#include <array>
29
30#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
31#include "tensorflow/core/framework/bounds_check.h"
32#include "tensorflow/core/framework/op_kernel.h"
33#include "tensorflow/core/framework/register_types.h"
34#include "tensorflow/core/framework/tensor.h"
35#include "tensorflow/core/framework/tensor_shape.h"
36#include "tensorflow/core/framework/types.h"
37
38namespace tensorflow {
39
40// CalculateResizeScale determines the float scaling factor.
41inline float CalculateResizeScale(int64_t in_size, int64_t out_size,
42 bool align_corners) {
43 return (align_corners && out_size > 1)
44 ? (in_size - 1) / static_cast<float>(out_size - 1)
45 : in_size / static_cast<float>(out_size);
46}
47
48// Half pixel scaler scales assuming that the pixel centers are at 0.5, i.e. the
49// floating point coordinates of the top,left pixel is 0.5,0.5.
50struct HalfPixelScaler {
51 HalfPixelScaler(){};
52 inline float operator()(const int x, const float scale) const {
53 // Note that we subtract 0.5 from the return value, as the existing bilinear
54 // sampling code etc assumes pixels are in the old coordinate system.
55 return (static_cast<float>(x) + 0.5f) * scale - 0.5f;
56 }
57};
58
59// Older incorrect scaling method that causes all resizes to have a slight
60// translation leading to inconsistent results. For example, a flip then a
61// resize gives different results then a resize then a flip.
62struct LegacyScaler {
63 LegacyScaler(){};
64 inline float operator()(const int x, const float scale) const {
65 return static_cast<float>(x) * scale;
66 }
67};
68
69struct ImageResizerState {
70 explicit ImageResizerState(bool align_corners, bool half_pixel_centers)
71 : align_corners_(align_corners),
72 half_pixel_centers_(half_pixel_centers) {}
73
74 // ValidateAndCalculateOutputSize checks the bounds on the input tensors
75 // and requested size, sets up some of the resizing state such as the
76 // height_scale and width_scale, and calculates the output size.
77 // If any of these operations fails, it sets an error status in
78 // the context, which the caller must check.
79 void ValidateAndCalculateOutputSize(OpKernelContext* context) {
80 OP_REQUIRES(
81 context,
82 !half_pixel_centers_ || (half_pixel_centers_ && !align_corners_),
83 errors::InvalidArgument("If half_pixel_centers is True, "
84 "align_corners must be False."));
85
86 const TensorShape& input_shape = context->input(0).shape();
87 OP_REQUIRES(context, input_shape.dims() == 4,
88 errors::InvalidArgument("input must be 4-dimensional",
89 input_shape.DebugString()));
90 batch_size = input_shape.dim_size(0);
91 channels = input_shape.dim_size(3);
92 OP_REQUIRES(
93 context, channels > 0,
94 errors::InvalidArgument("image must have at least one channel"));
95
96 // Verify and assign `in_height` and `in_width`.
97 OP_REQUIRES(
98 context, input_shape.dim_size(1) > 0 && input_shape.dim_size(2) > 0,
99 errors::InvalidArgument("input image must be of non-zero size"));
100 OP_REQUIRES(
101 context,
102 FastBoundsCheck(input_shape.dim_size(1),
103 std::numeric_limits<int32>::max()) &&
104 FastBoundsCheck(input_shape.dim_size(2),
105 std::numeric_limits<int32>::max()),
106 errors::InvalidArgument("input sizes must be between 0 and max int32"));
107 in_height = static_cast<int32>(input_shape.dim_size(1));
108 in_width = static_cast<int32>(input_shape.dim_size(2));
109
110 // Verify the output tensor's shape.
111 const Tensor& shape_t = context->input(1);
112 OP_REQUIRES(context, shape_t.dims() == 1,
113 errors::InvalidArgument("shape_t must be 1-dimensional",
114 shape_t.shape().DebugString()));
115 OP_REQUIRES(context, shape_t.NumElements() == 2,
116 errors::InvalidArgument("shape_t must have two elements",
117 shape_t.shape().DebugString()));
118
119 // Verify and assign `out_height` and `out_width`.
120 auto Svec = shape_t.vec<int32>();
121 out_height = internal::SubtleMustCopy(Svec(0));
122 out_width = internal::SubtleMustCopy(Svec(1));
123 OP_REQUIRES(context, out_height > 0 && out_width > 0,
124 errors::InvalidArgument("output dimensions must be positive"));
125
126 height_scale = CalculateResizeScale(in_height, out_height, align_corners_);
127 width_scale = CalculateResizeScale(in_width, out_width, align_corners_);
128
129 // Guard against overflows
130 OP_REQUIRES(context,
131 ceilf((out_height - 1) * height_scale) <=
132 static_cast<float>(std::numeric_limits<int64_t>::max()),
133 errors::InvalidArgument(
134 "input image height scale would cause an overflow"));
135 OP_REQUIRES(
136 context,
137 ceilf((out_width - 1) * width_scale) <= static_cast<float>(INT_MAX),
138 errors::InvalidArgument(
139 "input image width scale would cause an overflow"));
140 }
141
142 // Calculates all the required variables, and allocates the output.
143 void ValidateAndCreateOutput(OpKernelContext* context) {
144 ValidateAndCalculateOutputSize(context);
145 if (!context->status().ok()) return;
146
147 TensorShape shape;
148 // Guard against shape overflow
149 OP_REQUIRES_OK(context, shape.AddDimWithStatus(batch_size));
150 OP_REQUIRES_OK(context, shape.AddDimWithStatus(out_height));
151 OP_REQUIRES_OK(context, shape.AddDimWithStatus(out_width));
152 OP_REQUIRES_OK(context, shape.AddDimWithStatus(channels));
153
154 OP_REQUIRES_OK(context, context->allocate_output(0, shape, &output));
155 }
156
157 int64_t batch_size;
158 int64_t out_height;
159 int64_t out_width;
160 int64_t in_height;
161 int64_t in_width;
162 int64_t channels;
163 float height_scale;
164 float width_scale;
165 Tensor* output = nullptr;
166
167 private:
168 bool align_corners_;
169 bool half_pixel_centers_;
170};
171
172struct ImageResizerGradientState {
173 explicit ImageResizerGradientState(bool align_corners,
174 bool half_pixel_centers)
175 : align_corners_(align_corners),
176 half_pixel_centers_(half_pixel_centers) {}
177
178 void ValidateAndCreateOutput(OpKernelContext* context) {
179 OP_REQUIRES(
180 context,
181 !half_pixel_centers_ || (half_pixel_centers_ && !align_corners_),
182 errors::InvalidArgument("If half_pixel_centers is True, "
183 "align_corners must be False."));
184
185 const Tensor& input = context->input(0);
186 OP_REQUIRES(context, input.dims() == 4,
187 errors::InvalidArgument("input_grad must be 4-dimensional",
188 input.shape().DebugString()));
189
190 // Resizers always produce float images, so input gradient must
191 // always be a float.
192 OP_REQUIRES(context, input.dtype() == DT_FLOAT,
193 errors::InvalidArgument("input_grad must be of type float",
194 DataTypeString(input.dtype())));
195
196 batch_size = input.dim_size(0);
197 channels = input.dim_size(3);
198
199 resized_height = input.dim_size(1);
200 resized_width = input.dim_size(2);
201
202 // The following check is also carried out for the forward op. It is added
203 // here to prevent a divide-by-zero exception when either height_scale or
204 // width_scale is being calculated.
205 OP_REQUIRES(context, resized_height > 0 && resized_width > 0,
206 errors::InvalidArgument("resized dimensions must be positive"));
207
208 const TensorShape& output_shape = context->input(1).shape();
209 OP_REQUIRES(context, output_shape.dims() == 4,
210 errors::InvalidArgument("original_image must be 4-dimensional",
211 output_shape.DebugString()));
212 original_height = output_shape.dim_size(1);
213 original_width = output_shape.dim_size(2);
214
215 // The following check is also carried out for the forward op. It is added
216 // here to prevent either height_scale or width_scale from being set to
217 // zero, which would cause a divide-by-zero exception in the deterministic
218 // back-prop path.
219 OP_REQUIRES(
220 context, original_height > 0 && original_width > 0,
221 errors::InvalidArgument("original dimensions must be positive"));
222
223 OP_REQUIRES(
224 context,
225 FastBoundsCheck(original_height, std::numeric_limits<int32>::max()) &&
226 FastBoundsCheck(original_width, std::numeric_limits<int32>::max()),
227 errors::InvalidArgument(
228 "original sizes must be between 0 and max int32"));
229
230 height_scale =
231 CalculateResizeScale(original_height, resized_height, align_corners_);
232 width_scale =
233 CalculateResizeScale(original_width, resized_width, align_corners_);
234
235 OP_REQUIRES_OK(context, context->allocate_output(
236 0,
237 TensorShape({batch_size, original_height,
238 original_width, channels}),
239 &output));
240 }
241
242 int64_t batch_size;
243 int64_t channels;
244 int64_t resized_height;
245 int64_t resized_width;
246 int64_t original_height;
247 int64_t original_width;
248 float height_scale;
249 float width_scale;
250 Tensor* output = nullptr;
251
252 private:
253 bool align_corners_;
254 bool half_pixel_centers_;
255};
256
257} // namespace tensorflow
258
259#endif // TENSORFLOW_CORE_UTIL_IMAGE_RESIZER_STATE_H_
260