1 | /* Copyright 2015 The TensorFlow Authors. All Rights Reserved. |
2 | |
3 | Licensed under the Apache License, Version 2.0 (the "License"); |
4 | you may not use this file except in compliance with the License. |
5 | You may obtain a copy of the License at |
6 | |
7 | http://www.apache.org/licenses/LICENSE-2.0 |
8 | |
9 | Unless required by applicable law or agreed to in writing, software |
10 | distributed under the License is distributed on an "AS IS" BASIS, |
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | See the License for the specific language governing permissions and |
13 | limitations under the License. |
14 | ==============================================================================*/ |
15 | |
16 | // See docs in ../ops/math_ops.cc. |
17 | |
18 | #define EIGEN_USE_THREADS |
19 | |
20 | #include "tensorflow/core/framework/op.h" |
21 | #include "tensorflow/core/framework/op_kernel.h" |
22 | #include "tensorflow/core/framework/type_traits.h" |
23 | #include "tensorflow/core/framework/types.h" |
24 | #include "tensorflow/core/kernels/cwise_ops.h" |
25 | #include "tensorflow/core/kernels/meta_support.h" |
26 | #include "tensorflow/core/kernels/quantization_utils.h" |
27 | #include "tensorflow/core/lib/core/errors.h" |
28 | |
29 | namespace { |
30 | enum { |
31 | QUANTIZE_MODE_MIN_COMBINED, |
32 | QUANTIZE_MODE_MIN_FIRST, |
33 | QUANTIZE_MODE_SCALED, |
34 | }; |
35 | enum { |
36 | // Round half away from zero: if the fraction of y is exactly 0.5, then |
37 | // round(y) = y + 0.5 if y > 0 |
38 | // round(y) = y - 0.5 if y < 0 |
39 | // E.g., -5.5 gets rounded to -6, -5.4 goes to -5, |
40 | // 5.4 goes to 5, and 5.5 goes to 6. |
41 | ROUND_HALF_AWAY_FROM_ZERO, |
42 | // Round half to even: if the fraction of y is exactly 0.5, then round(y) is |
43 | // the nearest even integer to y. |
44 | // E.g., 23.5 gets rounded to 24, 24.5 gets rounded to 24, while -23.5 becomes |
45 | // -24, and -24.5 gets rounded to 24. |
46 | ROUND_HALF_TO_EVEN, |
47 | }; |
48 | } // namespace |
49 | |
50 | namespace tensorflow { |
51 | |
52 | typedef Eigen::ThreadPoolDevice CPUDevice; |
53 | |
54 | // Quantize a tensor from float to T, with user-specified min_range and |
55 | // max_range. |
56 | // TODO(xbing): Add a new QuantizeOp just taking scale, |
57 | // rather than min_range and max_range. |
58 | template <typename Device, typename T> |
59 | class QuantizeV2Op : public OpKernel { |
60 | public: |
61 | explicit QuantizeV2Op(OpKernelConstruction* ctx) : OpKernel(ctx) { |
62 | half_range_ = |
63 | !std::is_signed<T>::value |
64 | ? 0.0f |
65 | : (static_cast<double>(std::numeric_limits<T>::max()) - |
66 | static_cast<double>(std::numeric_limits<T>::min()) + 1) / |
67 | 2.0f; |
68 | string mode_string; |
69 | OP_REQUIRES_OK(ctx, ctx->GetAttr("mode" , &mode_string)); |
70 | OP_REQUIRES(ctx, |
71 | (mode_string == "MIN_COMBINED" || mode_string == "MIN_FIRST" || |
72 | mode_string == "SCALED" ), |
73 | errors::InvalidArgument("Mode string must be 'MIN_COMBINED'," |
74 | " 'MIN_FIRST', or 'SCALED', is '" + |
75 | mode_string + "'" )); |
76 | if (mode_string == "MIN_COMBINED" ) { |
77 | mode_ = QUANTIZE_MODE_MIN_COMBINED; |
78 | } else if (mode_string == "MIN_FIRST" ) { |
79 | mode_ = QUANTIZE_MODE_MIN_FIRST; |
80 | } else if (mode_string == "SCALED" ) { |
81 | mode_ = QUANTIZE_MODE_SCALED; |
82 | } |
83 | |
84 | string round_mode_string; |
85 | OP_REQUIRES_OK(ctx, ctx->GetAttr("round_mode" , &round_mode_string)); |
86 | OP_REQUIRES(ctx, |
87 | (round_mode_string == "HALF_AWAY_FROM_ZERO" || |
88 | round_mode_string == "HALF_TO_EVEN" ), |
89 | errors::InvalidArgument("Round mode string must be " |
90 | "'HALF_AWAY_FROM_ZERO' or " |
91 | "'HALF_TO_EVEN', is '" + |
92 | round_mode_string + "'" )); |
93 | if (round_mode_string == "HALF_AWAY_FROM_ZERO" ) { |
94 | round_mode_ = ROUND_HALF_AWAY_FROM_ZERO; |
95 | } else if (round_mode_string == "HALF_TO_EVEN" ) { |
96 | OP_REQUIRES(ctx, mode_string == "SCALED" , |
97 | errors::InvalidArgument("Round mode 'HALF_TO_EVEN' " |
98 | "only supported for mode 'SCALED', " |
99 | "b ut mode is '" + |
100 | mode_string + "'." )); |
101 | round_mode_ = ROUND_HALF_TO_EVEN; |
102 | } |
103 | OP_REQUIRES_OK(ctx, ctx->GetAttr("narrow_range" , &narrow_range_)); |
104 | OP_REQUIRES_OK(ctx, ctx->GetAttr("axis" , &axis_)); |
105 | OP_REQUIRES_OK( |
106 | ctx, ctx->GetAttr("ensure_minimum_range" , &ensure_minimum_range_)); |
107 | } |
108 | |
109 | void Compute(OpKernelContext* ctx) override { |
110 | const Tensor& input = ctx->input(0); |
111 | const Tensor& input_min_range = ctx->input(1); |
112 | const Tensor& input_max_range = ctx->input(2); |
113 | |
114 | int num_slices = 1; |
115 | if (axis_ > -1) { |
116 | OP_REQUIRES( |
117 | ctx, input.dims() > axis_, |
118 | errors::InvalidArgument( |
119 | "Axis is on a zero-based index, so its value must always be less " |
120 | "than number of input's dims, but given axis value was " , |
121 | axis_, " and input's dims was " , input.dims())); |
122 | num_slices = input.dim_size(axis_); |
123 | OP_REQUIRES(ctx, input_min_range.dims() == 1, |
124 | errors::InvalidArgument( |
125 | "If axis is specified, min_range must be a 1-D tensor " |
126 | "whose size matches the axis dimension of the input and " |
127 | "output tensors, but min_range dims are " , |
128 | input_min_range.dims())); |
129 | OP_REQUIRES(ctx, input_min_range.dim_size(0) == num_slices, |
130 | errors::InvalidArgument( |
131 | "If axis is specified, min_range must be a 1-D tensor " |
132 | "whose size matches the axis dimension of the input and " |
133 | "output tensors, but min_range is a 1-D tensor of size " , |
134 | input_min_range.dim_size(0), |
135 | " and input's axis dimension is of size " , num_slices)); |
136 | OP_REQUIRES(ctx, input_max_range.dims() == 1, |
137 | errors::InvalidArgument( |
138 | "If axis is specified, max_range must be a 1-D tensor " |
139 | "whose size matches the axis dimension of the input and " |
140 | "output tensors, but max_range dims are " , |
141 | input_max_range.dims())); |
142 | OP_REQUIRES(ctx, input_max_range.dim_size(0) == num_slices, |
143 | errors::InvalidArgument( |
144 | "If axis is specified, max_range must be a 1-D tensor " |
145 | "whose size matches the axis dimension of the input and " |
146 | "output tensors, but max_range is a 1-D tensor of size " , |
147 | input_max_range.dim_size(0), |
148 | " and input's axis dimension is of size " , num_slices)); |
149 | } else { |
150 | OP_REQUIRES(ctx, input_min_range.NumElements() == 1, |
151 | errors::InvalidArgument( |
152 | "If axis is not specified, min_range must contain a " |
153 | "single float element, but it contains " , |
154 | input_min_range.NumElements(), " elements" )); |
155 | OP_REQUIRES(ctx, input_max_range.NumElements() == 1, |
156 | errors::InvalidArgument( |
157 | "If axis is not specified, max_range must contain a " |
158 | "single float element, but it contains " , |
159 | input_max_range.NumElements(), " elements" )); |
160 | } |
161 | |
162 | const TensorShape& minmax_shape = ctx->input(1).shape(); |
163 | Tensor* output = nullptr; |
164 | OP_REQUIRES_OK(ctx, ctx->allocate_output(0, input.shape(), &output)); |
165 | |
166 | Tensor* output_min_tensor = nullptr; |
167 | Tensor* output_max_tensor = nullptr; |
168 | |
169 | if (num_slices == 1) { |
170 | OP_REQUIRES_OK(ctx, ctx->allocate_output(1, {}, &output_min_tensor)); |
171 | OP_REQUIRES_OK(ctx, ctx->allocate_output(2, {}, &output_max_tensor)); |
172 | const float min_range = input_min_range.template flat<float>()(0); |
173 | const float max_range = input_max_range.template flat<float>()(0); |
174 | QuantizeTensor(ctx, input, min_range, max_range, output, |
175 | output_min_tensor, output_max_tensor); |
176 | return; |
177 | } |
178 | |
179 | OP_REQUIRES(ctx, mode_ != QUANTIZE_MODE_MIN_FIRST, |
180 | errors::Unimplemented("MIN_FIRST mode is not implemented for " |
181 | "Quantize with axis != -1." )); |
182 | OP_REQUIRES_OK(ctx, |
183 | ctx->allocate_output(1, minmax_shape, &output_min_tensor)); |
184 | OP_REQUIRES_OK(ctx, |
185 | ctx->allocate_output(2, minmax_shape, &output_max_tensor)); |
186 | |
187 | auto input_tensor = |
188 | input.template flat_inner_outer_dims<float, 3>(axis_ - 1); |
189 | int64_t pre_dim = 1, post_dim = 1; |
190 | for (int i = 0; i < axis_; ++i) { |
191 | pre_dim *= output->dim_size(i); |
192 | } |
193 | for (int i = axis_ + 1; i < output->dims(); ++i) { |
194 | post_dim *= output->dim_size(i); |
195 | } |
196 | auto output_tensor = output->template bit_casted_shaped<T, 3>( |
197 | {pre_dim, num_slices, post_dim}); |
198 | auto min_ranges = input_min_range.template vec<float>(); |
199 | auto max_ranges = input_max_range.template vec<float>(); |
200 | for (int i = 0; i < num_slices; ++i) { |
201 | QuantizeSlice(ctx->eigen_device<Device>(), ctx, |
202 | input_tensor.template chip<1>(i), min_ranges(i), |
203 | max_ranges(i), output_tensor.template chip<1>(i), |
204 | &output_min_tensor->flat<float>()(i), |
205 | &output_max_tensor->flat<float>()(i)); |
206 | } |
207 | } |
208 | |
209 | void QuantizeTensor(OpKernelContext* ctx, const Tensor& input, |
210 | const float input_min_range, const float input_max_range, |
211 | Tensor* output, Tensor* output_min_tensor, |
212 | Tensor* output_max_tensor) { |
213 | OP_REQUIRES(ctx, !(input_max_range < input_min_range), |
214 | errors::InvalidArgument( |
215 | "input_max_range must be larger than input_min_range." )); |
216 | |
217 | // When the minimum and maximum ranges are too close together, nudge them |
218 | // apart by a small value so that they are slightly different. This helps |
219 | // us avoid creating ill-formed buffers where all quantized values map to |
220 | // the same float number. These kinds of buffers cause problems for |
221 | // downstream ops when they need to do calculations on them. |
222 | // We pick the value by making sure that zero is not more than 100x the |
223 | // overall range from the maximum, so that the value can be easily |
224 | // represented when we promote the quantized value to a higher |
225 | // intermediate bit depth, since that's a common requirement. |
226 | float min_range = std::min(0.0f, input_min_range); |
227 | const float epsilon = std::max(1.0f, std::max(fabsf(input_min_range), |
228 | fabsf(input_max_range))) * |
229 | ensure_minimum_range_; |
230 | float max_range = |
231 | std::max(0.0f, std::max(input_max_range, min_range + epsilon)); |
232 | |
233 | if (mode_ == QUANTIZE_MODE_MIN_FIRST) { |
234 | if (meta::IsSupportedAndEnabled() && std::is_same<T, quint8>()) { |
235 | TTypes<const float>::Vec input_array = input.flat<float>(); |
236 | |
237 | meta::Quantize(ctx, input_array.data(), input_array.size(), min_range, |
238 | max_range, output->flat<quint8>().data()); |
239 | } else { |
240 | FloatTensorToQuantizedInPlaceUsingEigen<T>( |
241 | ctx->template eigen_device<Device>(), input, min_range, max_range, |
242 | output); |
243 | } |
244 | output_min_tensor->flat<float>()(0) = min_range; |
245 | output_max_tensor->flat<float>()(0) = max_range; |
246 | } else { |
247 | QuantizeSlice(ctx->eigen_device<Device>(), ctx, input.flat<float>(), |
248 | input_min_range, input_max_range, |
249 | output->template flat<T>(), |
250 | &output_min_tensor->flat<float>()(0), |
251 | &output_max_tensor->flat<float>()(0)); |
252 | } |
253 | } |
254 | |
255 | template <typename ConstVec, typename Vec> |
256 | void QuantizeSlice(const Device& d, OpKernelContext* ctx, |
257 | const ConstVec& input, float input_min_range, |
258 | float input_max_range, Vec output, float* output_min_range, |
259 | float* output_max_range) { |
260 | OP_REQUIRES(ctx, !(input_max_range < input_min_range), |
261 | errors::InvalidArgument( |
262 | "input_max_range must be larger than input_min_range." )); |
263 | float min_range = std::min(0.0f, input_min_range); |
264 | const float epsilon = std::max(1.0f, std::max(fabsf(input_min_range), |
265 | fabsf(input_max_range))) * |
266 | ensure_minimum_range_; |
267 | float max_range = |
268 | std::max(0.0f, std::max(input_max_range, min_range + epsilon)); |
269 | |
270 | if (mode_ == QUANTIZE_MODE_MIN_COMBINED) { |
271 | const float scale_factor = |
272 | (static_cast<double>(std::numeric_limits<T>::max()) - |
273 | static_cast<double>(std::numeric_limits<T>::min())) / |
274 | (max_range - min_range); |
275 | |
276 | // Quantize: |
277 | // Make input in range of [min_range, max_range], then |
278 | // subtract min_range to be in range of [0, max_range - min_range] |
279 | // Divide by (max_range - min_range) to get to [0, 1.0] |
280 | // Multiply by range of T, after that shift left 1/2 range of T if |
281 | // T is signed. |
282 | // Note that the number is rounded before the cast. Rounding follows the |
283 | // semantic of std::round, which implements "round-half-away-zero", |
284 | // e.g., -5.5 gets rounded to -6, -5.4 goes to -5, 5.4 goes to 5, |
285 | // and 5.5 goes to 6. |
286 | bool is_signed = std::is_signed<T>::value; |
287 | if (is_signed) { |
288 | // The slow path. |
289 | // TODO(xbing,yonghui): Speedup this path as well. |
290 | output.device(d) = |
291 | ((input.cwiseMin(max_range).cwiseMax(min_range) - min_range) * |
292 | scale_factor - |
293 | half_range_) |
294 | .round() |
295 | .template cast<T>(); |
296 | } else { |
297 | // The fast path that avoids unaryExpr |
298 | // According to the micro-benchmark, adding device here doesn't help. |
299 | output.device(d) = |
300 | ((input.cwiseMin(max_range).cwiseMax(min_range) - min_range) * |
301 | scale_factor + |
302 | 0.5f) |
303 | .template cast<T>(); |
304 | } |
305 | } else if (mode_ == QUANTIZE_MODE_SCALED) { |
306 | const int min_output_value = |
307 | std::numeric_limits<T>::min() + (narrow_range_ ? 1 : 0); |
308 | const int max_output_value = std::numeric_limits<T>::max(); |
309 | const float scale_factor_from_min_side = |
310 | (min_output_value * min_range > 0) |
311 | ? min_output_value / min_range |
312 | : std::numeric_limits<float>::max(); |
313 | const float scale_factor_from_max_side = |
314 | (max_output_value * max_range > 0) |
315 | ? max_output_value / max_range |
316 | : std::numeric_limits<float>::max(); |
317 | const float scale_factor = |
318 | std::min(scale_factor_from_min_side, scale_factor_from_max_side); |
319 | min_range = min_output_value / scale_factor; |
320 | max_range = max_output_value / scale_factor; |
321 | if (round_mode_ == ROUND_HALF_TO_EVEN) { |
322 | output.device(d) = |
323 | (input.cwiseMin(max_range).cwiseMax(min_range) * scale_factor) |
324 | .unaryExpr( |
325 | Eigen::internal::scalar_round_half_to_even_op<float>()) |
326 | .template cast<T>(); |
327 | } else if (round_mode_ == ROUND_HALF_AWAY_FROM_ZERO) { |
328 | output.device(d) = |
329 | (input.cwiseMin(max_range).cwiseMax(min_range) * scale_factor) |
330 | .round() |
331 | .template cast<T>(); |
332 | } |
333 | } |
334 | |
335 | *output_min_range = min_range; |
336 | *output_max_range = max_range; |
337 | } |
338 | |
339 | private: |
340 | float half_range_; |
341 | float ensure_minimum_range_; |
342 | int mode_; |
343 | int round_mode_; |
344 | int axis_; |
345 | bool narrow_range_; |
346 | }; |
347 | |
348 | REGISTER_KERNEL_BUILDER( |
349 | Name("QuantizeV2" ).Device(DEVICE_CPU).TypeConstraint<quint8>("T" ), |
350 | QuantizeV2Op<CPUDevice, quint8>); |
351 | REGISTER_KERNEL_BUILDER( |
352 | Name("QuantizeV2" ).Device(DEVICE_CPU).TypeConstraint<qint8>("T" ), |
353 | QuantizeV2Op<CPUDevice, qint8>); |
354 | REGISTER_KERNEL_BUILDER( |
355 | Name("QuantizeV2" ).Device(DEVICE_CPU).TypeConstraint<quint16>("T" ), |
356 | QuantizeV2Op<CPUDevice, quint16>); |
357 | REGISTER_KERNEL_BUILDER( |
358 | Name("QuantizeV2" ).Device(DEVICE_CPU).TypeConstraint<qint16>("T" ), |
359 | QuantizeV2Op<CPUDevice, qint16>); |
360 | REGISTER_KERNEL_BUILDER( |
361 | Name("QuantizeV2" ).Device(DEVICE_CPU).TypeConstraint<qint32>("T" ), |
362 | QuantizeV2Op<CPUDevice, qint32>); |
363 | } // namespace tensorflow |
364 | |