quantize_and_dequantize_op.h source code [tensorflow/tensorflow/core/kernels/quantize_and_dequantize_op.h]

1	/ Copyright 2015 The TensorFlow Authors. All Rights Reserved.*
2
3	Licensed under the Apache License, Version 2.0 (the "License");
4	you may not use this file except in compliance with the License.
5	You may obtain a copy of the License at
6
7	http://www.apache.org/licenses/LICENSE-2.0
8
9	Unless required by applicable law or agreed to in writing, software
10	distributed under the License is distributed on an "AS IS" BASIS,
11	WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12	See the License for the specific language governing permissions and
13	limitations under the License.
14	==============================================================================/*
15
16	#ifndef TENSORFLOW_CORE_KERNELS_QUANTIZE_AND_DEQUANTIZE_OP_H_
17	#define TENSORFLOW_CORE_KERNELS_QUANTIZE_AND_DEQUANTIZE_OP_H_
18
19	#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
20	#include "tensorflow/core/framework/op_kernel.h"
21	#include "tensorflow/core/framework/tensor.h"
22	#include "tensorflow/core/framework/tensor_types.h"
23	#include "tensorflow/core/kernels/cwise_ops.h"
24	#include "tensorflow/core/platform/types.h"
25
26	namespace tensorflow {
27
28	enum QuantizerRoundMode {
29	// Round half up: if the fraction of y is exactly 0.5, then
30	// round(y) = y + 0.5
31	// E.g., -5.5 gets rounded to -5, -5.4 goes to -5,
32	// 5.4 goes to 5, and 5.5 goes to 6.
33	ROUND_HALF_UP,
34	// Round half to even: if the fraction of y is exactly 0.5, then round(y) is
35	// the nearest even integer to y.
36	// E.g., 23.5 gets rounded to 24, 24.5 gets rounded to 24, while -23.5 becomes
37	// -24, and -24.5 gets rounded to 24.
38	ROUND_HALF_TO_EVEN,
39	};
40
41	namespace functor {
42
43	// TODO(pauldonnelly): 'signed_input' should really be called 'signed_output'.
44
45	template <typename Device, typename T>
46	struct QuantizeAndDequantizeOneScaleFunctor {
47	void operator()(const Device& d, typename TTypes<T>::ConstVec input,
48	bool signed_input, int num_bits, bool range_given,
49	Tensor* input_min_tensor, Tensor* input_max_tensor,
50	QuantizerRoundMode round_mode, bool narrow_range,
51	typename TTypes<T>::Vec output);
52	};
53
54	template <typename Device, typename T>
55	struct QuantizeAndDequantizePerChannelFunctor {
56	void operator()(const Device& d, typename TTypes<T, `3`>::ConstTensor input,
57	bool signed_input, int num_bits, bool range_given,
58	Tensor* input_min_tensor, Tensor* input_max_tensor,
59	QuantizerRoundMode round_mode, bool narrow_range,
60	typename TTypes<T, `3`>::Tensor output);
61	};
62
63	template <typename Device, typename T>
64	struct QuantizeAndDequantizeOneScaleGradientFunctor {
65	void operator()(const Device& d, typename TTypes<T>::ConstFlat gradient,
66	typename TTypes<T>::ConstFlat input,
67	typename TTypes<T>::ConstScalar input_min,
68	typename TTypes<T>::ConstScalar input_max,
69	typename TTypes<T>::Flat input_backprop,
70	typename TTypes<T>::Scalar input_min_backprop,
71	typename TTypes<T>::Scalar input_max_backprop);
72	};
73
74	template <typename Device, typename T>
75	struct QuantizeAndDequantizePerChannelGradientFunctor {
76	void operator()(const Device& d, typename TTypes<T, `3`>::ConstTensor gradient,
77	typename TTypes<T, `3`>::ConstTensor input,
78	const Tensor* input_min_tensor,
79	const Tensor* input_max_tensor,
80	typename TTypes<T, `3`>::Tensor input_backprop,
81	typename TTypes<T>::Flat input_min_backprop,
82	typename TTypes<T>::Flat input_max_backprop);
83	};
84
85	// The implementation below runs on both CPU and GPU.
86	template <typename Device, typename T, typename Func,
87	typename Vec = typename TTypes<T>::Vec,
88	typename ConstVec = typename TTypes<T>::ConstVec>
89	void ClampScaleAndRound(const Device& d, ConstVec input, T min_range,
90	T max_range, T scale, T inverse_scale, Func round_func,
91	Vec output) {
92	output.device(d) = (input.cwiseMin(max_range).cwiseMax(min_range) * scale)
93	.unaryExpr(round_func) *
94	inverse_scale;
95	}
96
97	// The implementation below runs on both CPU and GPU.
98	template <typename Device, typename T, typename Vec = typename TTypes<T>::Vec,
99	typename ConstVec = typename TTypes<T>::ConstVec>
100	void ClampScaleAndRound(const Device& d, ConstVec input, T min_range,
101	T max_range, T scale, T inverse_scale,
102	QuantizerRoundMode round_mode, Vec output) {
103	switch (round_mode) {
104	case ROUND_HALF_TO_EVEN:
105	ClampScaleAndRound(d, input, min_range, max_range, scale, inverse_scale,
106	Eigen::internal::scalar_round_half_to_even_op<T>(),
107	output);
108	break;
109	case ROUND_HALF_UP:
110	ClampScaleAndRound(d, input, min_range, max_range, scale, inverse_scale,
111	Eigen::internal::scalar_round_up_op<T>(), output);
112	break;
113	}
114	}
115
116	// The implementation below runs on both CPU and GPU.
117	template <typename Device, typename T, typename Func,
118	typename Vec = typename TTypes<T>::Vec,
119	typename ConstVec = typename TTypes<T>::ConstVec>
120	void ScaleAndRound(const Device& d, ConstVec input, T scale, T inverse_scale,
121	Func round_func, Vec output) {
122	output.device(d) = (input * scale).unaryExpr(round_func) * inverse_scale;
123	}
124
125	// The implementation below runs on both CPU and GPU.
126	template <typename Device, typename T, typename Vec = typename TTypes<T>::Vec,
127	typename ConstVec = typename TTypes<T>::ConstVec>
128	void ScaleAndRound(const Device& d, ConstVec input, T scale, T inverse_scale,
129	QuantizerRoundMode round_mode, Vec output) {
130	switch (round_mode) {
131	case ROUND_HALF_TO_EVEN:
132	ScaleAndRound(d, input, scale, inverse_scale,
133	Eigen::internal::scalar_round_half_to_even_op<T>(), output);
134	break;
135	case ROUND_HALF_UP:
136	ScaleAndRound(d, input, scale, inverse_scale,
137	Eigen::internal::scalar_round_up_op<T>(), output);
138	break;
139	}
140	}
141
142	template <typename T>
143	void ComputeQuantizationRange(bool signed_input, int num_bits,
144	QuantizerRoundMode round_mode, bool narrow_range,
145	T* min_range, T* max_range, T* scale,
146	T* inverse_scale) {
147	// Calculate the range for the simulated integer quantization:
148	// e.g. [-127,127] for signed = true, narrow_range = true, num_bits = 8,
149	// or [-128,127] for signed = true, narrow_range = false, num_bits = 8,
150	// or [0, 255] for signed = false, num_bits = 8.
151	const int64_t min_quantized =
152	signed_input ? narrow_range ? -(`1ULL` << (num_bits - `1`)) + `1`
153	: -(`1ULL` << (num_bits - `1`))
154	: `0`;
155	const int64_t max_quantized =
156	signed_input ? (`1ULL` << (num_bits - `1`)) - `1` : (`1ULL` << num_bits) - `1`;
157	// Determine the maximum scaling factor that would scale
158	// [min_range, max_range] to not exceed [min_quantized, max_quantized],
159	// while keeping 0 unchanged.
160	const T scale_from_min_side = (min_quantized * *min_range > `0`)
161	? min_quantized / *min_range
162	: std::numeric_limits<T>::max();
163	const T scale_from_max_side = (max_quantized * *max_range > `0`)
164	? max_quantized / *max_range
165	: std::numeric_limits<T>::max();
166
167	// Note: Avoids changing the side of the range that determines scale.
168	if (scale_from_min_side < scale_from_max_side) {
169	*scale = scale_from_min_side;
170	inverse_scale = min_range / min_quantized;
171	max_range = max_quantized *inverse_scale;
172	} else {
173	*scale = scale_from_max_side;
174	inverse_scale = max_range / max_quantized;
175	min_range = min_quantized *inverse_scale;
176	}
177	}
178
179	// The implementation below runs on both CPU and GPU.
180	template <typename Device, typename T>
181	struct QuantizeAndDequantizeOneScaleImpl {
182	static void Compute(const Device& d, typename TTypes<T>::ConstVec input,
183	bool signed_input, int num_bits, bool range_given,
184	Tensor* input_min_tensor, Tensor* input_max_tensor,
185	QuantizerRoundMode round_mode, bool narrow_range,
186	typename TTypes<T>::Vec output) {
187	T min_range;
188	T max_range;
189	auto input_min = input_min_tensor->scalar<T>();
190	auto input_max = input_max_tensor->scalar<T>();
191	if (!range_given) {
192	input_min.device(d) = input.minimum();
193	input_max.device(d) = input.maximum();
194	d.memcpyDeviceToHost(&min_range, input_min.data(), sizeof(T));
195	d.memcpyDeviceToHost(&max_range, input_max.data(), sizeof(T));
196	} else {
197	// Copy the range values from their respective tensors on the host.
198	min_range = input_min_tensor->scalar<T>()();
199	max_range = input_max_tensor->scalar<T>()();
200	}
201
202	T scale, inverse_scale;
203	ComputeQuantizationRange(signed_input, num_bits, round_mode, narrow_range,
204	&min_range, &max_range, &scale, &inverse_scale);
205
206	if (range_given) {
207	// Note: The clamping here is to avoid overflow in the quantized type.
208	// The semantics of the op does not guarantee to clamp to the specified
209	// min_range and max_range - because we may have changed either min_range
210	// or max_range.
211	ClampScaleAndRound(d, input, min_range, max_range, scale, inverse_scale,
212	round_mode, output);
213	} else {
214	ScaleAndRound(d, input, scale, inverse_scale, round_mode, output);
215	}
216	}
217	};
218
219	// The implementation below runs on both CPU and GPU.
220
221	template <typename Device, typename T>
222	struct QuantizeAndDequantizePerChannelImpl {
223	static void Compute(const Device& d, typename TTypes<T, `3`>::ConstTensor input,
224	bool signed_input, int num_bits, bool range_given,
225	Tensor* input_min_tensor, Tensor* input_max_tensor,
226	QuantizerRoundMode round_mode, bool narrow_range,
227	typename TTypes<T, `3`>::Tensor output) {
228	using Index = typename tensorflow::TTypes<T>::ConstTensor::Index;
229	int num_channels = input.dimension(`1`);
230	auto input_min = input_min_tensor->vec<T>();
231	auto input_max = input_max_tensor->vec<T>();
232	std::vector<T> min_range(num_channels);
233	std::vector<T> max_range(num_channels);
234
235	if (!range_given) {
236	Eigen::IndexList<Eigen::type2index<`0`>, Eigen::type2index<`2`> > reduce_dims;
237	input_min.device(d) = input.minimum(reduce_dims);
238	input_max.device(d) = input.maximum(reduce_dims);
239	d.memcpyDeviceToHost(min_range.data(), input_min.data(),
240	num_channels * sizeof(T));
241	d.memcpyDeviceToHost(max_range.data(), input_max.data(),
242	num_channels * sizeof(T));
243	} else {
244	// Copy the range values from their respective tensors on the host.
245	std::memcpy(min_range.data(), input_min_tensor->vec<T>().data(),
246	num_channels * sizeof(T));
247	std::memcpy(max_range.data(), input_max_tensor->vec<T>().data(),
248	num_channels * sizeof(T));
249	}
250
251	for (Index i = `0`; i < num_channels; ++i) {
252	const auto input_chip = input.template chip<`1`>(i);
253	auto output_chip = output.template chip<`1`>(i);
254
255	T scale, inverse_scale;
256	ComputeQuantizationRange(signed_input, num_bits, round_mode, narrow_range,
257	&min_range[i], &max_range[i], &scale,
258	&inverse_scale);
259	if (range_given) {
260	ClampScaleAndRound(d, input_chip, min_range[i], max_range[i], scale,
261	inverse_scale, round_mode, output_chip);
262	} else {
263	ScaleAndRound(d, input_chip, scale, inverse_scale, round_mode,
264	output_chip);
265	}
266	}
267	}
268	};
269
270	template <typename Device, typename T>
271	struct QuantizeAndDequantizeOneScaleGradientImpl {
272	static void Compute(const Device& d, typename TTypes<T>::ConstFlat gradient,
273	typename TTypes<T>::ConstFlat input,
274	typename TTypes<T>::ConstScalar input_min,
275	typename TTypes<T>::ConstScalar input_max,
276	typename TTypes<T>::Flat input_backprop,
277	typename TTypes<T>::Scalar input_min_backprop,
278	typename TTypes<T>::Scalar input_max_backprop) {
279	const T min_val = input_min();
280	const T max_val = input_max();
281	const auto in_range =
282	(input >= min_val && input <= max_val)
283	.select(input.constant(`1.0f`), input.constant(`0.0f`));
284	input_backprop.device(d) = gradient * in_range;
285	input_min_backprop.device(d) = input_min_backprop.constant(`0.0f`);
286	input_max_backprop.device(d) = input_max_backprop.constant(`0.0f`);
287	}
288	};
289
290	template <typename Device, typename T>
291	struct QuantizeAndDequantizePerChannelGradientImpl {
292	static void Compute(const Device& d,
293	typename TTypes<T, `3`>::ConstTensor gradient,
294	typename TTypes<T, `3`>::ConstTensor input,
295	const Tensor* input_min_tensor,
296	const Tensor* input_max_tensor,
297	typename TTypes<T, `3`>::Tensor input_backprop,
298	typename TTypes<T>::Flat input_min_backprop,
299	typename TTypes<T>::Flat input_max_backprop) {
300	using Index = typename tensorflow::TTypes<T>::ConstTensor::Index;
301	auto input_min = input_min_tensor->vec<T>();
302	auto input_max = input_max_tensor->vec<T>();
303	int num_channels = input.dimension(`1`);
304	for (Index i = `0`; i < num_channels; ++i) {
305	const auto gradient_chip = gradient.template chip<`1`>(i);
306	const auto input_chip = input.template chip<`1`>(i);
307	const T min_val = input_min(i);
308	const T max_val = input_max(i);
309	const auto in_range =
310	(input_chip >= min_val && input_chip <= max_val)
311	.select(input_chip.constant(`1.0f`), input_chip.constant(`0.0f`));
312	input_backprop.template chip<`1`>(i).device(d) = gradient_chip * in_range;
313	}
314	input_min_backprop.device(d) = input_min_backprop.constant(`0.0f`);
315	input_max_backprop.device(d) = input_max_backprop.constant(`0.0f`);
316	}
317	};
318
319	} // end of namespace functor
320	} // end of namespace tensorflow
321
322	#endif // TENSORFLOW_CORE_KERNELS_QUANTIZE_AND_DEQUANTIZE_OP_H_
323

Browse the source code of tensorflow/tensorflow/core/kernels/quantize_and_dequantize_op.h