quantize_op.cc source code [tensorflow/tensorflow/core/kernels/quantize_op.cc]

1	/ Copyright 2015 The TensorFlow Authors. All Rights Reserved.*
2
3	Licensed under the Apache License, Version 2.0 (the "License");
4	you may not use this file except in compliance with the License.
5	You may obtain a copy of the License at
6
7	http://www.apache.org/licenses/LICENSE-2.0
8
9	Unless required by applicable law or agreed to in writing, software
10	distributed under the License is distributed on an "AS IS" BASIS,
11	WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12	See the License for the specific language governing permissions and
13	limitations under the License.
14	==============================================================================/*
15
16	// See docs in ../ops/math_ops.cc.
17
18	#define EIGEN_USE_THREADS
19
20	#include "tensorflow/core/framework/op.h"
21	#include "tensorflow/core/framework/op_kernel.h"
22	#include "tensorflow/core/framework/type_traits.h"
23	#include "tensorflow/core/framework/types.h"
24	#include "tensorflow/core/kernels/cwise_ops.h"
25	#include "tensorflow/core/kernels/meta_support.h"
26	#include "tensorflow/core/kernels/quantization_utils.h"
27	#include "tensorflow/core/lib/core/errors.h"
28
29	namespace {
30	enum {
31	QUANTIZE_MODE_MIN_COMBINED,
32	QUANTIZE_MODE_MIN_FIRST,
33	QUANTIZE_MODE_SCALED,
34	};
35	enum {
36	// Round half away from zero: if the fraction of y is exactly 0.5, then
37	// round(y) = y + 0.5 if y > 0
38	// round(y) = y - 0.5 if y < 0
39	// E.g., -5.5 gets rounded to -6, -5.4 goes to -5,
40	// 5.4 goes to 5, and 5.5 goes to 6.
41	ROUND_HALF_AWAY_FROM_ZERO,
42	// Round half to even: if the fraction of y is exactly 0.5, then round(y) is
43	// the nearest even integer to y.
44	// E.g., 23.5 gets rounded to 24, 24.5 gets rounded to 24, while -23.5 becomes
45	// -24, and -24.5 gets rounded to 24.
46	ROUND_HALF_TO_EVEN,
47	};
48	} // namespace
49
50	namespace tensorflow {
51
52	typedef Eigen::ThreadPoolDevice CPUDevice;
53
54	// Quantize a tensor from float to T, with user-specified min_range and
55	// max_range.
56	// TODO(xbing): Add a new QuantizeOp just taking scale,
57	// rather than min_range and max_range.
58	template <typename Device, typename T>
59	class QuantizeV2Op : public OpKernel {
60	public:
61	explicit QuantizeV2Op(OpKernelConstruction* ctx) : OpKernel(ctx) {
62	half_range_ =
63	!std::is_signed<T>::value
64	? `0.0f`
65	: (static_cast<double>(std::numeric_limits<T>::max()) -
66	static_cast<double>(std::numeric_limits<T>::min()) + `1`) /
67	`2.0f`;
68	string mode_string;
69	OP_REQUIRES_OK(ctx, ctx->GetAttr("mode", &mode_string));
70	OP_REQUIRES(ctx,
71	(mode_string == "MIN_COMBINED" \|\| mode_string == "MIN_FIRST" \|\|
72	mode_string == "SCALED"),
73	errors::InvalidArgument("Mode string must be 'MIN_COMBINED',"
74	" 'MIN_FIRST', or 'SCALED', is '" +
75	mode_string + "'"));
76	if (mode_string == "MIN_COMBINED") {
77	mode_ = QUANTIZE_MODE_MIN_COMBINED;
78	} else if (mode_string == "MIN_FIRST") {
79	mode_ = QUANTIZE_MODE_MIN_FIRST;
80	} else if (mode_string == "SCALED") {
81	mode_ = QUANTIZE_MODE_SCALED;
82	}
83
84	string round_mode_string;
85	OP_REQUIRES_OK(ctx, ctx->GetAttr("round_mode", &round_mode_string));
86	OP_REQUIRES(ctx,
87	(round_mode_string == "HALF_AWAY_FROM_ZERO" \|\|
88	round_mode_string == "HALF_TO_EVEN"),
89	errors::InvalidArgument("Round mode string must be "
90	"'HALF_AWAY_FROM_ZERO' or "
91	"'HALF_TO_EVEN', is '" +
92	round_mode_string + "'"));
93	if (round_mode_string == "HALF_AWAY_FROM_ZERO") {
94	round_mode_ = ROUND_HALF_AWAY_FROM_ZERO;
95	} else if (round_mode_string == "HALF_TO_EVEN") {
96	OP_REQUIRES(ctx, mode_string == "SCALED",
97	errors::InvalidArgument("Round mode 'HALF_TO_EVEN' "
98	"only supported for mode 'SCALED', "
99	"b ut mode is '" +
100	mode_string + "'."));
101	round_mode_ = ROUND_HALF_TO_EVEN;
102	}
103	OP_REQUIRES_OK(ctx, ctx->GetAttr("narrow_range", &narrow_range_));
104	OP_REQUIRES_OK(ctx, ctx->GetAttr("axis", &axis_));
105	OP_REQUIRES_OK(
106	ctx, ctx->GetAttr("ensure_minimum_range", &ensure_minimum_range_));
107	}
108
109	void Compute(OpKernelContext* ctx) override {
110	const Tensor& input = ctx->input(`0`);
111	const Tensor& input_min_range = ctx->input(`1`);
112	const Tensor& input_max_range = ctx->input(`2`);
113
114	int num_slices = `1`;
115	if (axis_ > -`1`) {
116	OP_REQUIRES(
117	ctx, input.dims() > axis_,
118	errors::InvalidArgument(
119	"Axis is on a zero-based index, so its value must always be less "
120	"than number of input's dims, but given axis value was ",
121	axis_, " and input's dims was ", input.dims()));
122	num_slices = input.dim_size(axis_);
123	OP_REQUIRES(ctx, input_min_range.dims() == `1`,
124	errors::InvalidArgument(
125	"If axis is specified, min_range must be a 1-D tensor "
126	"whose size matches the axis dimension of the input and "
127	"output tensors, but min_range dims are ",
128	input_min_range.dims()));
129	OP_REQUIRES(ctx, input_min_range.dim_size(`0`) == num_slices,
130	errors::InvalidArgument(
131	"If axis is specified, min_range must be a 1-D tensor "
132	"whose size matches the axis dimension of the input and "
133	"output tensors, but min_range is a 1-D tensor of size ",
134	input_min_range.dim_size(`0`),
135	" and input's axis dimension is of size ", num_slices));
136	OP_REQUIRES(ctx, input_max_range.dims() == `1`,
137	errors::InvalidArgument(
138	"If axis is specified, max_range must be a 1-D tensor "
139	"whose size matches the axis dimension of the input and "
140	"output tensors, but max_range dims are ",
141	input_max_range.dims()));
142	OP_REQUIRES(ctx, input_max_range.dim_size(`0`) == num_slices,
143	errors::InvalidArgument(
144	"If axis is specified, max_range must be a 1-D tensor "
145	"whose size matches the axis dimension of the input and "
146	"output tensors, but max_range is a 1-D tensor of size ",
147	input_max_range.dim_size(`0`),
148	" and input's axis dimension is of size ", num_slices));
149	} else {
150	OP_REQUIRES(ctx, input_min_range.NumElements() == `1`,
151	errors::InvalidArgument(
152	"If axis is not specified, min_range must contain a "
153	"single float element, but it contains ",
154	input_min_range.NumElements(), " elements"));
155	OP_REQUIRES(ctx, input_max_range.NumElements() == `1`,
156	errors::InvalidArgument(
157	"If axis is not specified, max_range must contain a "
158	"single float element, but it contains ",
159	input_max_range.NumElements(), " elements"));
160	}
161
162	const TensorShape& minmax_shape = ctx->input(`1`).shape();
163	Tensor* output = nullptr;
164	OP_REQUIRES_OK(ctx, ctx->allocate_output(`0`, input.shape(), &output));
165
166	Tensor* output_min_tensor = nullptr;
167	Tensor* output_max_tensor = nullptr;
168
169	if (num_slices == `1`) {
170	OP_REQUIRES_OK(ctx, ctx->allocate_output(`1`, {}, &output_min_tensor));
171	OP_REQUIRES_OK(ctx, ctx->allocate_output(`2`, {}, &output_max_tensor));
172	const float min_range = input_min_range.template flat<float>()(`0`);
173	const float max_range = input_max_range.template flat<float>()(`0`);
174	QuantizeTensor(ctx, input, min_range, max_range, output,
175	output_min_tensor, output_max_tensor);
176	return;
177	}
178
179	OP_REQUIRES(ctx, mode_ != QUANTIZE_MODE_MIN_FIRST,
180	errors::Unimplemented("MIN_FIRST mode is not implemented for "
181	"Quantize with axis != -1."));
182	OP_REQUIRES_OK(ctx,
183	ctx->allocate_output(`1`, minmax_shape, &output_min_tensor));
184	OP_REQUIRES_OK(ctx,
185	ctx->allocate_output(`2`, minmax_shape, &output_max_tensor));
186
187	auto input_tensor =
188	input.template flat_inner_outer_dims<float, `3`>(axis_ - `1`);
189	int64_t pre_dim = `1`, post_dim = `1`;
190	for (int i = `0`; i < axis_; ++i) {
191	pre_dim *= output->dim_size(i);
192	}
193	for (int i = axis_ + `1`; i < output->dims(); ++i) {
194	post_dim *= output->dim_size(i);
195	}
196	auto output_tensor = output->template bit_casted_shaped<T, `3`>(
197	{pre_dim, num_slices, post_dim});
198	auto min_ranges = input_min_range.template vec<float>();
199	auto max_ranges = input_max_range.template vec<float>();
200	for (int i = `0`; i < num_slices; ++i) {
201	QuantizeSlice(ctx->eigen_device<Device>(), ctx,
202	input_tensor.template chip<`1`>(i), min_ranges (i),
203	max_ranges (i), output_tensor.template chip<`1`>(i),
204	&output_min_tensor->flat<float>()(i),
205	&output_max_tensor->flat<float>()(i));
206	}
207	}
208
209	void QuantizeTensor(OpKernelContext* ctx, const Tensor& input,
210	const float input_min_range, const float input_max_range,
211	Tensor* output, Tensor* output_min_tensor,
212	Tensor* output_max_tensor) {
213	OP_REQUIRES(ctx, !(input_max_range < input_min_range),
214	errors::InvalidArgument(
215	"input_max_range must be larger than input_min_range."));
216
217	// When the minimum and maximum ranges are too close together, nudge them
218	// apart by a small value so that they are slightly different. This helps
219	// us avoid creating ill-formed buffers where all quantized values map to
220	// the same float number. These kinds of buffers cause problems for
221	// downstream ops when they need to do calculations on them.
222	// We pick the value by making sure that zero is not more than 100x the
223	// overall range from the maximum, so that the value can be easily
224	// represented when we promote the quantized value to a higher
225	// intermediate bit depth, since that's a common requirement.
226	float min_range = std::min(`0.0f`, input_min_range);
227	const float epsilon = std::max(`1.0f`, std::max(fabsf(input_min_range),
228	fabsf(input_max_range))) *
229	ensure_minimum_range_;
230	float max_range =
231	std::max(`0.0f`, std::max(input_max_range, min_range + epsilon));
232
233	if (mode_ == QUANTIZE_MODE_MIN_FIRST) {
234	if (meta::IsSupportedAndEnabled() && std::is_same<T, quint8>()) {
235	TTypes<const float>::Vec input_array = input.flat<float>();
236
237	meta::Quantize(ctx, input_array.data(), input_array.size(), min_range,
238	max_range, output->flat<quint8>().data());
239	} else {
240	FloatTensorToQuantizedInPlaceUsingEigen<T>(
241	ctx->template eigen_device<Device>(), input, min_range, max_range,
242	output);
243	}
244	output_min_tensor->flat<float>()(`0`) = min_range;
245	output_max_tensor->flat<float>()(`0`) = max_range;
246	} else {
247	QuantizeSlice(ctx->eigen_device<Device>(), ctx, input.flat<float>(),
248	input_min_range, input_max_range,
249	output->template flat<T>(),
250	&output_min_tensor->flat<float>()(`0`),
251	&output_max_tensor->flat<float>()(`0`));
252	}
253	}
254
255	template <typename ConstVec, typename Vec>
256	void QuantizeSlice(const Device& d, OpKernelContext* ctx,
257	const ConstVec& input, float input_min_range,
258	float input_max_range, Vec output, float* output_min_range,
259	float* output_max_range) {
260	OP_REQUIRES(ctx, !(input_max_range < input_min_range),
261	errors::InvalidArgument(
262	"input_max_range must be larger than input_min_range."));
263	float min_range = std::min(`0.0f`, input_min_range);
264	const float epsilon = std::max(`1.0f`, std::max(fabsf(input_min_range),
265	fabsf(input_max_range))) *
266	ensure_minimum_range_;
267	float max_range =
268	std::max(`0.0f`, std::max(input_max_range, min_range + epsilon));
269
270	if (mode_ == QUANTIZE_MODE_MIN_COMBINED) {
271	const float scale_factor =
272	(static_cast<double>(std::numeric_limits<T>::max()) -
273	static_cast<double>(std::numeric_limits<T>::min())) /
274	(max_range - min_range);
275
276	// Quantize:
277	// Make input in range of [min_range, max_range], then
278	// subtract min_range to be in range of [0, max_range - min_range]
279	// Divide by (max_range - min_range) to get to [0, 1.0]
280	// Multiply by range of T, after that shift left 1/2 range of T if
281	// T is signed.
282	// Note that the number is rounded before the cast. Rounding follows the
283	// semantic of std::round, which implements "round-half-away-zero",
284	// e.g., -5.5 gets rounded to -6, -5.4 goes to -5, 5.4 goes to 5,
285	// and 5.5 goes to 6.
286	bool is_signed = std::is_signed<T>::value;
287	if (is_signed) {
288	// The slow path.
289	// TODO(xbing,yonghui): Speedup this path as well.
290	output.device(d) =
291	((input.cwiseMin(max_range).cwiseMax(min_range) - min_range) *
292	scale_factor -
293	half_range_)
294	.round()
295	.template cast<T>();
296	} else {
297	// The fast path that avoids unaryExpr
298	// According to the micro-benchmark, adding device here doesn't help.
299	output.device(d) =
300	((input.cwiseMin(max_range).cwiseMax(min_range) - min_range) *
301	scale_factor +
302	`0.5f`)
303	.template cast<T>();
304	}
305	} else if (mode_ == QUANTIZE_MODE_SCALED) {
306	const int min_output_value =
307	std::numeric_limits<T>::min() + (narrow_range_ ? `1` : `0`);
308	const int max_output_value = std::numeric_limits<T>::max();
309	const float scale_factor_from_min_side =
310	(min_output_value * min_range > `0`)
311	? min_output_value / min_range
312	: std::numeric_limits<float>::max();
313	const float scale_factor_from_max_side =
314	(max_output_value * max_range > `0`)
315	? max_output_value / max_range
316	: std::numeric_limits<float>::max();
317	const float scale_factor =
318	std::min(scale_factor_from_min_side, scale_factor_from_max_side);
319	min_range = min_output_value / scale_factor;
320	max_range = max_output_value / scale_factor;
321	if (round_mode_ == ROUND_HALF_TO_EVEN) {
322	output.device(d) =
323	(input.cwiseMin(max_range).cwiseMax(min_range) * scale_factor)
324	.unaryExpr(
325	Eigen::internal::scalar_round_half_to_even_op<float>())
326	.template cast<T>();
327	} else if (round_mode_ == ROUND_HALF_AWAY_FROM_ZERO) {
328	output.device(d) =
329	(input.cwiseMin(max_range).cwiseMax(min_range) * scale_factor)
330	.round()
331	.template cast<T>();
332	}
333	}
334
335	*output_min_range = min_range;
336	*output_max_range = max_range;
337	}
338
339	private:
340	float half_range_;
341	float ensure_minimum_range_;
342	int mode_;
343	int round_mode_;
344	int axis_;
345	bool narrow_range_;
346	};
347
348	REGISTER_KERNEL_BUILDER(
349	Name("QuantizeV2").Device(DEVICE_CPU).TypeConstraint<quint8>("T"),
350	QuantizeV2Op<CPUDevice, quint8>);
351	REGISTER_KERNEL_BUILDER(
352	Name("QuantizeV2").Device(DEVICE_CPU).TypeConstraint<qint8>("T"),
353	QuantizeV2Op<CPUDevice, qint8>);
354	REGISTER_KERNEL_BUILDER(
355	Name("QuantizeV2").Device(DEVICE_CPU).TypeConstraint<quint16>("T"),
356	QuantizeV2Op<CPUDevice, quint16>);
357	REGISTER_KERNEL_BUILDER(
358	Name("QuantizeV2").Device(DEVICE_CPU).TypeConstraint<qint16>("T"),
359	QuantizeV2Op<CPUDevice, qint16>);
360	REGISTER_KERNEL_BUILDER(
361	Name("QuantizeV2").Device(DEVICE_CPU).TypeConstraint<qint32>("T"),
362	QuantizeV2Op<CPUDevice, qint32>);
363	} // namespace tensorflow
364

Browse the source code of tensorflow/tensorflow/core/kernels/quantize_op.cc