fake_quant_ops_functor.h source code [tensorflow/tensorflow/core/kernels/fake_quant_ops_functor.h]

1	/ Copyright 2016 The TensorFlow Authors. All Rights Reserved.*
2
3	Licensed under the Apache License, Version 2.0 (the "License");
4	you may not use this file except in compliance with the License.
5	You may obtain a copy of the License at
6
7	http://www.apache.org/licenses/LICENSE-2.0
8
9	Unless required by applicable law or agreed to in writing, software
10	distributed under the License is distributed on an "AS IS" BASIS,
11	WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12	See the License for the specific language governing permissions and
13	limitations under the License.
14	==============================================================================/*
15
16	#ifndef TENSORFLOW_CORE_KERNELS_FAKE_QUANT_OPS_FUNCTOR_H_
17	#define TENSORFLOW_CORE_KERNELS_FAKE_QUANT_OPS_FUNCTOR_H_
18
19	#include <tuple>
20
21	#define EIGEN_STACK_ALLOCATION_LIMIT 0
22	#define EIGEN_USE_THREADS
23	#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
24	#include "tensorflow/core/framework/tensor_types.h"
25	#include "tensorflow/core/platform/types.h"
26
27	EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float StdRound(float input) {
28	// On Android, std::round() isn't present, just round().
29	#if defined(__ANDROID__)
30	return round(input);
31	#else
32	return std::round(input);
33	#endif
34	}
35
36	namespace tensorflow {
37
38	// Gymnastics with nudged zero point is to ensure that real zero maps to
39	// an integer, which is required for e.g. zero-padding in convolutional layers.
40	// Outputs nudged_min, nudged_max, nudged_scale.
41	EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void Nudge(
42	const float min, const float max, const int quant_min, const int quant_max,
43	float* nudged_min, float* nudged_max, float* scale, float* inv_scale) {
44	const float quant_min_float = static_cast<float>(quant_min);
45	const float quant_max_float = static_cast<float>(quant_max);
46	*scale = (max - min) / (quant_max_float - quant_min_float);
47	// Re-calculate the inverse to avoid loss of precision which would result
48	// from simply taking the reciprocal of scale*
49	*inv_scale = (quant_max_float - quant_min_float) / (max - min);
50	const float zero_point_from_min = quant_min_float - min / *scale;
51	const uint16 nudged_zero_point = [zero_point_from_min, quant_min,
52	quant_min_float, quant_max,
53	quant_max_float] {
54	if (zero_point_from_min < quant_min_float) {
55	return static_cast<uint16>(quant_min);
56	}
57	if (zero_point_from_min > quant_max_float) {
58	return static_cast<uint16>(quant_max);
59	}
60	return static_cast<uint16>(StdRound(zero_point_from_min));
61	}();
62	nudged_min = (quant_min_float - nudged_zero_point) (*scale);
63	nudged_max = (quant_max_float - nudged_zero_point) (*scale);
64	}
65
66	template <typename T>
67	using ConstScalar = typename tensorflow::TTypes<T>::ConstScalar;
68	template <typename T>
69	using Scalar = typename tensorflow::TTypes<T>::Scalar;
70	template <typename T>
71	using ConstVec = typename tensorflow::TTypes<T>::ConstVec;
72	template <typename T>
73	using Vec = typename tensorflow::TTypes<T>::Vec;
74	template <typename T>
75	using ConstFlat = typename tensorflow::TTypes<T>::ConstFlat;
76	template <typename T>
77	using Flat = typename tensorflow::TTypes<T>::Flat;
78
79	// Functor called by FakeQuantWithMinMaxArgsOp to do the work. Compiles both
80	// for CPU and GPU.
81	template <typename Device>
82	struct FakeQuantWithMinMaxArgsFunctor {
83	void operator()(const Device& d, ConstFlat<float> inputs, const float min,
84	const float max, const int quant_min, const int quant_max,
85	Flat<float> outputs) {
86	eigen_assert(min <= `0.0f` && "min should be <= 0.0");
87	eigen_assert(max >= `0.0f` && "max should be >= 0.0");
88	eigen_assert(min < max && "min should be < max");
89
90	float nudged_min, nudged_max, nudged_scale, inv_nudged_scale;
91	Nudge(min, max, quant_min, quant_max, &nudged_min, &nudged_max,
92	&nudged_scale, &inv_nudged_scale);
93
94	const float quant_zero = floor(-nudged_min * inv_nudged_scale + `0.5f`);
95
96	auto clamped = inputs.cwiseMin(nudged_max).cwiseMax(nudged_min);
97	auto clamped_shifted = clamped - nudged_min;
98	outputs.device(d) =
99	(clamped_shifted * inv_nudged_scale - quant_zero + `0.5f`).floor() *
100	nudged_scale;
101	}
102	};
103
104	// Functor called by FakeQuantWithMinMaxArgsGradientOp to do the work. Compiles
105	// both for CPU and GPU.
106	template <typename Device>
107	struct FakeQuantWithMinMaxArgsGradientFunctor {
108	void operator()(const Device& d, ConstFlat<float> gradients,
109	ConstFlat<float> inputs, const float min, const float max,
110	const int quant_min, const int quant_max,
111	Flat<float> backprops) {
112	eigen_assert(min <= `0.0f` && "min should be <= 0.0");
113	eigen_assert(max >= `0.0f` && "max should be >= 0.0");
114	eigen_assert(min < max && "min should be < max");
115
116	float nudged_min, nudged_max, nudged_scale, inv_nudged_scale;
117	Nudge(min, max, quant_min, quant_max, &nudged_min, &nudged_max,
118	&nudged_scale, &inv_nudged_scale);
119
120	auto between_nudged_min_max =
121	(inputs >= nudged_min && inputs <= nudged_max)
122	.select(inputs.constant(`1.0f`), inputs.constant(`0.0f`));
123	backprops.device(d) = gradients * between_nudged_min_max;
124	}
125	};
126
127	// Functor called by FakeQuantWithMinMaxVarsOp to do the work. Compiles both
128	// for CPU and GPU.
129	template <typename Device>
130	struct FakeQuantWithMinMaxVarsFunctor {
131	void operator()(const Device& d, ConstFlat<float> inputs,
132	ConstScalar<float> min, ConstScalar<float> max,
133	const int quant_min, const int quant_max,
134	Flat<float> outputs) {
135	const float min_val = min ();
136	const float max_val = max ();
137	// If min and max are both zero, we should just return zero.
138	if (min_val == `0.0f` && max_val == `0.0f`) {
139	outputs.device(d) = outputs.constant(`0.0f`);
140	return;
141	}
142	float nudged_min, nudged_max, nudged_scale, inv_nudged_scale;
143	Nudge(min_val, max_val, quant_min, quant_max, &nudged_min, &nudged_max,
144	&nudged_scale, &inv_nudged_scale);
145
146	const float quant_zero = floor(-nudged_min * inv_nudged_scale + `0.5f`);
147	const auto nudged_scale_repl = inputs.constant(nudged_scale);
148	// const auto inv_nudged_scale_repl = inputs.constant(inv_nudged_scale);
149
150	const auto clamped = inputs.cwiseMin(nudged_max).cwiseMax(nudged_min);
151	const auto clamped_shifted = clamped - nudged_min;
152	outputs.device(d) =
153	(clamped_shifted / nudged_scale_repl - quant_zero + `0.5f`).floor() *
154	nudged_scale_repl;
155	}
156	};
157
158	// Functor called by FakeQuantWithMinMaxVarsGradientOp to do the work. Compiles
159	// both for CPU and GPU.
160	template <typename Device>
161	struct FakeQuantWithMinMaxVarsGradientFunctor {
162	void operator()(const Device& d, ConstFlat<float> gradients,
163	ConstFlat<float> inputs, ConstScalar<float> min,
164	ConstScalar<float> max, const int quant_min,
165	const int quant_max, Flat<float> backprops_wrt_input,
166	Scalar<float> backprop_wrt_min,
167	Scalar<float> backprop_wrt_max) {
168	const float min_val = min ();
169	const float max_val = max ();
170	// If min and max are both zero, we propagate everything to inputs.
171	if (min_val == `0.0f` && max_val == `0.0f`) {
172	backprops_wrt_input.device(d) = gradients;
173	backprop_wrt_min.device(d) = backprop_wrt_min.constant(`0.0f`);
174	backprop_wrt_max.device(d) = backprop_wrt_max.constant(`0.0f`);
175	return;
176	}
177	float nudged_min, nudged_max, nudged_scale, inv_nudged_scale;
178	Nudge(min_val, max_val, quant_min, quant_max, &nudged_min, &nudged_max,
179	&nudged_scale, &inv_nudged_scale);
180
181	const auto between_min_max =
182	(inputs >= nudged_min && inputs <= nudged_max)
183	.select(inputs.constant(`1.0f`), inputs.constant(`0.0f`));
184	backprops_wrt_input.device(d) = gradients * between_min_max;
185
186	const auto below_min =
187	(inputs < nudged_min)
188	.select(inputs.constant(`1.0f`), inputs.constant(`0.0f`));
189	backprop_wrt_min.device(d) = (gradients * below_min).sum();
190
191	const auto above_max =
192	(inputs > nudged_max)
193	.select(inputs.constant(`1.0f`), inputs.constant(`0.0f`));
194	backprop_wrt_max.device(d) = (gradients * above_max).sum();
195	}
196	};
197
198	using Index = typename tensorflow::TTypes<float>::ConstTensor::Index;
199
200	// Functor called by FakeQuantWithMinMaxVarsPerChannelOp to do the work.
201	// Compiles both for CPU and GPU.
202	//
203	// Already verified: inputs, outputs are of shape [b, d], min, max are of shape
204	// [d].
205	template <typename Device>
206	struct FakeQuantWithMinMaxVarsPerChannelFunctor {
207	void operator()(const Device& d, TTypes<float>::ConstMatrix inputs,
208	ConstVec<float> min, ConstVec<float> max, const int quant_min,
209	const int quant_max, TTypes<float>::Matrix outputs) {
210	for (Index i = `0`; i < min.size(); ++i) {
211	const float min_val = min (i);
212	const float max_val = max (i);
213	// If min and max are both zero, we should just return zero.
214	if (min_val == `0.0f` && max_val == `0.0f`) {
215	auto chip = outputs.chip<`1`>(i);
216	chip.device(d) = chip.constant(`0.0f`);
217	continue;
218	}
219	float nudged_min, nudged_max, nudged_scale, inv_nudged_scale;
220	Nudge(min_val, max_val, quant_min, quant_max, &nudged_min, &nudged_max,
221	&nudged_scale, &inv_nudged_scale);
222
223	const float quant_zero = floor(-nudged_min * inv_nudged_scale + `0.5f`);
224
225	const auto clamped =
226	inputs.chip<`1`>(i).cwiseMin(nudged_max).cwiseMax(nudged_min);
227	const auto clamped_shifted = clamped - nudged_min;
228
229	outputs.chip<`1`>(i).device(d) =
230	(clamped_shifted * inv_nudged_scale - quant_zero + `0.5f`).floor() *
231	nudged_scale;
232	}
233	}
234	};
235
236	// Functor called by FakeQuantWithMinMaxVarsPerChannelGradientOp to do the work.
237	// Compiles both for CPU and GPU.
238	//
239	// Already verified: gradients, inputs, backprops_wrt_input are of shape [b, d],
240	// min, max, backprop_wrt_min, backprop_wrt_max are of shape [d].
241	template <typename Device>
242	struct FakeQuantWithMinMaxVarsPerChannelGradientFunctor {
243	void operator()(const Device& d, TTypes<float>::ConstMatrix gradients,
244	TTypes<float>::ConstMatrix inputs, ConstVec<float> min,
245	ConstVec<float> max, const int quant_min, const int quant_max,
246	TTypes<float>::Matrix backprops_wrt_input,
247	Vec<float> backprop_wrt_min, Vec<float> backprop_wrt_max) {
248	for (Index i = `0`; i < min.size(); ++i) {
249	const float min_val = min (i);
250	const float max_val = max (i);
251	const auto gradients_chip = gradients.chip<`1`>(i);
252	const auto inputs_chip = inputs.chip<`1`>(i);
253	// If min and max are both zero, we propagate everything to inputs.
254	if (min_val == `0.0f` && max_val == `0.0f`) {
255	backprops_wrt_input.chip<`1`>(i).device(d) = gradients_chip;
256	auto min_chip = backprop_wrt_min.chip<`0`>(i);
257	auto max_chip = backprop_wrt_max.chip<`0`>(i);
258	min_chip.device(d) = min_chip.constant(`0.0f`);
259	max_chip.device(d) = max_chip.constant(`0.0f`);
260	continue;
261	}
262	float nudged_min, nudged_max, nudged_scale, inv_nudged_scale;
263	Nudge(min_val, max_val, quant_min, quant_max, &nudged_min, &nudged_max,
264	&nudged_scale, &inv_nudged_scale);
265
266	const auto between_min_max =
267	(inputs_chip >= nudged_min && inputs_chip <= nudged_max)
268	.select(inputs_chip.constant(`1.0f`), inputs_chip.constant(`0.0f`));
269	backprops_wrt_input.chip<`1`>(i).device(d) =
270	gradients_chip * between_min_max;
271
272	const auto below_min =
273	(inputs_chip < nudged_min)
274	.select(inputs_chip.constant(`1.0f`), inputs_chip.constant(`0.0f`));
275	Eigen::DSizes<Index, `1`> reduce(`0`);
276	backprop_wrt_min.chip<`0`>(i).device(d) =
277	(gradients_chip * below_min).sum(reduce);
278
279	const auto above_max =
280	(inputs_chip > nudged_max)
281	.select(inputs_chip.constant(`1.0f`), inputs_chip.constant(`0.0f`));
282	backprop_wrt_max.chip<`0`>(i).device(d) =
283	(gradients_chip * above_max).sum(reduce);
284	}
285	}
286	};
287
288	} // namespace tensorflow
289
290	#endif // TENSORFLOW_CORE_KERNELS_FAKE_QUANT_OPS_FUNCTOR_H_
291

Browse the source code of tensorflow/tensorflow/core/kernels/fake_quant_ops_functor.h