1/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2
3Licensed under the Apache License, Version 2.0 (the "License");
4you may not use this file except in compliance with the License.
5You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9Unless required by applicable law or agreed to in writing, software
10distributed under the License is distributed on an "AS IS" BASIS,
11WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12See the License for the specific language governing permissions and
13limitations under the License.
14==============================================================================*/
15
16// See docs in ../ops/math_ops.cc.
17
18#define EIGEN_USE_THREADS
19
20#include "tensorflow/core/framework/op.h"
21#include "tensorflow/core/framework/op_kernel.h"
22#include "tensorflow/core/framework/type_traits.h"
23#include "tensorflow/core/framework/types.h"
24#include "tensorflow/core/kernels/cwise_ops.h"
25#include "tensorflow/core/kernels/meta_support.h"
26#include "tensorflow/core/kernels/quantization_utils.h"
27#include "tensorflow/core/lib/core/errors.h"
28
29namespace {
30enum {
31 QUANTIZE_MODE_MIN_COMBINED,
32 QUANTIZE_MODE_MIN_FIRST,
33 QUANTIZE_MODE_SCALED,
34};
35enum {
36 // Round half away from zero: if the fraction of y is exactly 0.5, then
37 // round(y) = y + 0.5 if y > 0
38 // round(y) = y - 0.5 if y < 0
39 // E.g., -5.5 gets rounded to -6, -5.4 goes to -5,
40 // 5.4 goes to 5, and 5.5 goes to 6.
41 ROUND_HALF_AWAY_FROM_ZERO,
42 // Round half to even: if the fraction of y is exactly 0.5, then round(y) is
43 // the nearest even integer to y.
44 // E.g., 23.5 gets rounded to 24, 24.5 gets rounded to 24, while -23.5 becomes
45 // -24, and -24.5 gets rounded to 24.
46 ROUND_HALF_TO_EVEN,
47};
48} // namespace
49
50namespace tensorflow {
51
52typedef Eigen::ThreadPoolDevice CPUDevice;
53
54// Quantize a tensor from float to T, with user-specified min_range and
55// max_range.
56// TODO(xbing): Add a new QuantizeOp just taking scale,
57// rather than min_range and max_range.
58template <typename Device, typename T>
59class QuantizeV2Op : public OpKernel {
60 public:
61 explicit QuantizeV2Op(OpKernelConstruction* ctx) : OpKernel(ctx) {
62 half_range_ =
63 !std::is_signed<T>::value
64 ? 0.0f
65 : (static_cast<double>(std::numeric_limits<T>::max()) -
66 static_cast<double>(std::numeric_limits<T>::min()) + 1) /
67 2.0f;
68 string mode_string;
69 OP_REQUIRES_OK(ctx, ctx->GetAttr("mode", &mode_string));
70 OP_REQUIRES(ctx,
71 (mode_string == "MIN_COMBINED" || mode_string == "MIN_FIRST" ||
72 mode_string == "SCALED"),
73 errors::InvalidArgument("Mode string must be 'MIN_COMBINED',"
74 " 'MIN_FIRST', or 'SCALED', is '" +
75 mode_string + "'"));
76 if (mode_string == "MIN_COMBINED") {
77 mode_ = QUANTIZE_MODE_MIN_COMBINED;
78 } else if (mode_string == "MIN_FIRST") {
79 mode_ = QUANTIZE_MODE_MIN_FIRST;
80 } else if (mode_string == "SCALED") {
81 mode_ = QUANTIZE_MODE_SCALED;
82 }
83
84 string round_mode_string;
85 OP_REQUIRES_OK(ctx, ctx->GetAttr("round_mode", &round_mode_string));
86 OP_REQUIRES(ctx,
87 (round_mode_string == "HALF_AWAY_FROM_ZERO" ||
88 round_mode_string == "HALF_TO_EVEN"),
89 errors::InvalidArgument("Round mode string must be "
90 "'HALF_AWAY_FROM_ZERO' or "
91 "'HALF_TO_EVEN', is '" +
92 round_mode_string + "'"));
93 if (round_mode_string == "HALF_AWAY_FROM_ZERO") {
94 round_mode_ = ROUND_HALF_AWAY_FROM_ZERO;
95 } else if (round_mode_string == "HALF_TO_EVEN") {
96 OP_REQUIRES(ctx, mode_string == "SCALED",
97 errors::InvalidArgument("Round mode 'HALF_TO_EVEN' "
98 "only supported for mode 'SCALED', "
99 "b ut mode is '" +
100 mode_string + "'."));
101 round_mode_ = ROUND_HALF_TO_EVEN;
102 }
103 OP_REQUIRES_OK(ctx, ctx->GetAttr("narrow_range", &narrow_range_));
104 OP_REQUIRES_OK(ctx, ctx->GetAttr("axis", &axis_));
105 OP_REQUIRES_OK(
106 ctx, ctx->GetAttr("ensure_minimum_range", &ensure_minimum_range_));
107 }
108
109 void Compute(OpKernelContext* ctx) override {
110 const Tensor& input = ctx->input(0);
111 const Tensor& input_min_range = ctx->input(1);
112 const Tensor& input_max_range = ctx->input(2);
113
114 int num_slices = 1;
115 if (axis_ > -1) {
116 OP_REQUIRES(
117 ctx, input.dims() > axis_,
118 errors::InvalidArgument(
119 "Axis is on a zero-based index, so its value must always be less "
120 "than number of input's dims, but given axis value was ",
121 axis_, " and input's dims was ", input.dims()));
122 num_slices = input.dim_size(axis_);
123 OP_REQUIRES(ctx, input_min_range.dims() == 1,
124 errors::InvalidArgument(
125 "If axis is specified, min_range must be a 1-D tensor "
126 "whose size matches the axis dimension of the input and "
127 "output tensors, but min_range dims are ",
128 input_min_range.dims()));
129 OP_REQUIRES(ctx, input_min_range.dim_size(0) == num_slices,
130 errors::InvalidArgument(
131 "If axis is specified, min_range must be a 1-D tensor "
132 "whose size matches the axis dimension of the input and "
133 "output tensors, but min_range is a 1-D tensor of size ",
134 input_min_range.dim_size(0),
135 " and input's axis dimension is of size ", num_slices));
136 OP_REQUIRES(ctx, input_max_range.dims() == 1,
137 errors::InvalidArgument(
138 "If axis is specified, max_range must be a 1-D tensor "
139 "whose size matches the axis dimension of the input and "
140 "output tensors, but max_range dims are ",
141 input_max_range.dims()));
142 OP_REQUIRES(ctx, input_max_range.dim_size(0) == num_slices,
143 errors::InvalidArgument(
144 "If axis is specified, max_range must be a 1-D tensor "
145 "whose size matches the axis dimension of the input and "
146 "output tensors, but max_range is a 1-D tensor of size ",
147 input_max_range.dim_size(0),
148 " and input's axis dimension is of size ", num_slices));
149 } else {
150 OP_REQUIRES(ctx, input_min_range.NumElements() == 1,
151 errors::InvalidArgument(
152 "If axis is not specified, min_range must contain a "
153 "single float element, but it contains ",
154 input_min_range.NumElements(), " elements"));
155 OP_REQUIRES(ctx, input_max_range.NumElements() == 1,
156 errors::InvalidArgument(
157 "If axis is not specified, max_range must contain a "
158 "single float element, but it contains ",
159 input_max_range.NumElements(), " elements"));
160 }
161
162 const TensorShape& minmax_shape = ctx->input(1).shape();
163 Tensor* output = nullptr;
164 OP_REQUIRES_OK(ctx, ctx->allocate_output(0, input.shape(), &output));
165
166 Tensor* output_min_tensor = nullptr;
167 Tensor* output_max_tensor = nullptr;
168
169 if (num_slices == 1) {
170 OP_REQUIRES_OK(ctx, ctx->allocate_output(1, {}, &output_min_tensor));
171 OP_REQUIRES_OK(ctx, ctx->allocate_output(2, {}, &output_max_tensor));
172 const float min_range = input_min_range.template flat<float>()(0);
173 const float max_range = input_max_range.template flat<float>()(0);
174 QuantizeTensor(ctx, input, min_range, max_range, output,
175 output_min_tensor, output_max_tensor);
176 return;
177 }
178
179 OP_REQUIRES(ctx, mode_ != QUANTIZE_MODE_MIN_FIRST,
180 errors::Unimplemented("MIN_FIRST mode is not implemented for "
181 "Quantize with axis != -1."));
182 OP_REQUIRES_OK(ctx,
183 ctx->allocate_output(1, minmax_shape, &output_min_tensor));
184 OP_REQUIRES_OK(ctx,
185 ctx->allocate_output(2, minmax_shape, &output_max_tensor));
186
187 auto input_tensor =
188 input.template flat_inner_outer_dims<float, 3>(axis_ - 1);
189 int64_t pre_dim = 1, post_dim = 1;
190 for (int i = 0; i < axis_; ++i) {
191 pre_dim *= output->dim_size(i);
192 }
193 for (int i = axis_ + 1; i < output->dims(); ++i) {
194 post_dim *= output->dim_size(i);
195 }
196 auto output_tensor = output->template bit_casted_shaped<T, 3>(
197 {pre_dim, num_slices, post_dim});
198 auto min_ranges = input_min_range.template vec<float>();
199 auto max_ranges = input_max_range.template vec<float>();
200 for (int i = 0; i < num_slices; ++i) {
201 QuantizeSlice(ctx->eigen_device<Device>(), ctx,
202 input_tensor.template chip<1>(i), min_ranges(i),
203 max_ranges(i), output_tensor.template chip<1>(i),
204 &output_min_tensor->flat<float>()(i),
205 &output_max_tensor->flat<float>()(i));
206 }
207 }
208
209 void QuantizeTensor(OpKernelContext* ctx, const Tensor& input,
210 const float input_min_range, const float input_max_range,
211 Tensor* output, Tensor* output_min_tensor,
212 Tensor* output_max_tensor) {
213 OP_REQUIRES(ctx, !(input_max_range < input_min_range),
214 errors::InvalidArgument(
215 "input_max_range must be larger than input_min_range."));
216
217 // When the minimum and maximum ranges are too close together, nudge them
218 // apart by a small value so that they are slightly different. This helps
219 // us avoid creating ill-formed buffers where all quantized values map to
220 // the same float number. These kinds of buffers cause problems for
221 // downstream ops when they need to do calculations on them.
222 // We pick the value by making sure that zero is not more than 100x the
223 // overall range from the maximum, so that the value can be easily
224 // represented when we promote the quantized value to a higher
225 // intermediate bit depth, since that's a common requirement.
226 float min_range = std::min(0.0f, input_min_range);
227 const float epsilon = std::max(1.0f, std::max(fabsf(input_min_range),
228 fabsf(input_max_range))) *
229 ensure_minimum_range_;
230 float max_range =
231 std::max(0.0f, std::max(input_max_range, min_range + epsilon));
232
233 if (mode_ == QUANTIZE_MODE_MIN_FIRST) {
234 if (meta::IsSupportedAndEnabled() && std::is_same<T, quint8>()) {
235 TTypes<const float>::Vec input_array = input.flat<float>();
236
237 meta::Quantize(ctx, input_array.data(), input_array.size(), min_range,
238 max_range, output->flat<quint8>().data());
239 } else {
240 FloatTensorToQuantizedInPlaceUsingEigen<T>(
241 ctx->template eigen_device<Device>(), input, min_range, max_range,
242 output);
243 }
244 output_min_tensor->flat<float>()(0) = min_range;
245 output_max_tensor->flat<float>()(0) = max_range;
246 } else {
247 QuantizeSlice(ctx->eigen_device<Device>(), ctx, input.flat<float>(),
248 input_min_range, input_max_range,
249 output->template flat<T>(),
250 &output_min_tensor->flat<float>()(0),
251 &output_max_tensor->flat<float>()(0));
252 }
253 }
254
255 template <typename ConstVec, typename Vec>
256 void QuantizeSlice(const Device& d, OpKernelContext* ctx,
257 const ConstVec& input, float input_min_range,
258 float input_max_range, Vec output, float* output_min_range,
259 float* output_max_range) {
260 OP_REQUIRES(ctx, !(input_max_range < input_min_range),
261 errors::InvalidArgument(
262 "input_max_range must be larger than input_min_range."));
263 float min_range = std::min(0.0f, input_min_range);
264 const float epsilon = std::max(1.0f, std::max(fabsf(input_min_range),
265 fabsf(input_max_range))) *
266 ensure_minimum_range_;
267 float max_range =
268 std::max(0.0f, std::max(input_max_range, min_range + epsilon));
269
270 if (mode_ == QUANTIZE_MODE_MIN_COMBINED) {
271 const float scale_factor =
272 (static_cast<double>(std::numeric_limits<T>::max()) -
273 static_cast<double>(std::numeric_limits<T>::min())) /
274 (max_range - min_range);
275
276 // Quantize:
277 // Make input in range of [min_range, max_range], then
278 // subtract min_range to be in range of [0, max_range - min_range]
279 // Divide by (max_range - min_range) to get to [0, 1.0]
280 // Multiply by range of T, after that shift left 1/2 range of T if
281 // T is signed.
282 // Note that the number is rounded before the cast. Rounding follows the
283 // semantic of std::round, which implements "round-half-away-zero",
284 // e.g., -5.5 gets rounded to -6, -5.4 goes to -5, 5.4 goes to 5,
285 // and 5.5 goes to 6.
286 bool is_signed = std::is_signed<T>::value;
287 if (is_signed) {
288 // The slow path.
289 // TODO(xbing,yonghui): Speedup this path as well.
290 output.device(d) =
291 ((input.cwiseMin(max_range).cwiseMax(min_range) - min_range) *
292 scale_factor -
293 half_range_)
294 .round()
295 .template cast<T>();
296 } else {
297 // The fast path that avoids unaryExpr
298 // According to the micro-benchmark, adding device here doesn't help.
299 output.device(d) =
300 ((input.cwiseMin(max_range).cwiseMax(min_range) - min_range) *
301 scale_factor +
302 0.5f)
303 .template cast<T>();
304 }
305 } else if (mode_ == QUANTIZE_MODE_SCALED) {
306 const int min_output_value =
307 std::numeric_limits<T>::min() + (narrow_range_ ? 1 : 0);
308 const int max_output_value = std::numeric_limits<T>::max();
309 const float scale_factor_from_min_side =
310 (min_output_value * min_range > 0)
311 ? min_output_value / min_range
312 : std::numeric_limits<float>::max();
313 const float scale_factor_from_max_side =
314 (max_output_value * max_range > 0)
315 ? max_output_value / max_range
316 : std::numeric_limits<float>::max();
317 const float scale_factor =
318 std::min(scale_factor_from_min_side, scale_factor_from_max_side);
319 min_range = min_output_value / scale_factor;
320 max_range = max_output_value / scale_factor;
321 if (round_mode_ == ROUND_HALF_TO_EVEN) {
322 output.device(d) =
323 (input.cwiseMin(max_range).cwiseMax(min_range) * scale_factor)
324 .unaryExpr(
325 Eigen::internal::scalar_round_half_to_even_op<float>())
326 .template cast<T>();
327 } else if (round_mode_ == ROUND_HALF_AWAY_FROM_ZERO) {
328 output.device(d) =
329 (input.cwiseMin(max_range).cwiseMax(min_range) * scale_factor)
330 .round()
331 .template cast<T>();
332 }
333 }
334
335 *output_min_range = min_range;
336 *output_max_range = max_range;
337 }
338
339 private:
340 float half_range_;
341 float ensure_minimum_range_;
342 int mode_;
343 int round_mode_;
344 int axis_;
345 bool narrow_range_;
346};
347
348REGISTER_KERNEL_BUILDER(
349 Name("QuantizeV2").Device(DEVICE_CPU).TypeConstraint<quint8>("T"),
350 QuantizeV2Op<CPUDevice, quint8>);
351REGISTER_KERNEL_BUILDER(
352 Name("QuantizeV2").Device(DEVICE_CPU).TypeConstraint<qint8>("T"),
353 QuantizeV2Op<CPUDevice, qint8>);
354REGISTER_KERNEL_BUILDER(
355 Name("QuantizeV2").Device(DEVICE_CPU).TypeConstraint<quint16>("T"),
356 QuantizeV2Op<CPUDevice, quint16>);
357REGISTER_KERNEL_BUILDER(
358 Name("QuantizeV2").Device(DEVICE_CPU).TypeConstraint<qint16>("T"),
359 QuantizeV2Op<CPUDevice, qint16>);
360REGISTER_KERNEL_BUILDER(
361 Name("QuantizeV2").Device(DEVICE_CPU).TypeConstraint<qint32>("T"),
362 QuantizeV2Op<CPUDevice, qint32>);
363} // namespace tensorflow
364