1 | /* Copyright 2016 The TensorFlow Authors. All Rights Reserved. |
2 | |
3 | Licensed under the Apache License, Version 2.0 (the "License"); |
4 | you may not use this file except in compliance with the License. |
5 | You may obtain a copy of the License at |
6 | |
7 | http://www.apache.org/licenses/LICENSE-2.0 |
8 | |
9 | Unless required by applicable law or agreed to in writing, software |
10 | distributed under the License is distributed on an "AS IS" BASIS, |
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | See the License for the specific language governing permissions and |
13 | limitations under the License. |
14 | ==============================================================================*/ |
15 | |
16 | #ifndef TENSORFLOW_CORE_KERNELS_FAKE_QUANT_OPS_FUNCTOR_H_ |
17 | #define TENSORFLOW_CORE_KERNELS_FAKE_QUANT_OPS_FUNCTOR_H_ |
18 | |
19 | #include <tuple> |
20 | |
21 | #define EIGEN_STACK_ALLOCATION_LIMIT 0 |
22 | #define EIGEN_USE_THREADS |
23 | #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" |
24 | #include "tensorflow/core/framework/tensor_types.h" |
25 | #include "tensorflow/core/platform/types.h" |
26 | |
27 | EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float StdRound(float input) { |
28 | // On Android, std::round() isn't present, just round(). |
29 | #if defined(__ANDROID__) |
30 | return round(input); |
31 | #else |
32 | return std::round(input); |
33 | #endif |
34 | } |
35 | |
36 | namespace tensorflow { |
37 | |
38 | // Gymnastics with nudged zero point is to ensure that real zero maps to |
39 | // an integer, which is required for e.g. zero-padding in convolutional layers. |
40 | // Outputs nudged_min, nudged_max, nudged_scale. |
41 | EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void Nudge( |
42 | const float min, const float max, const int quant_min, const int quant_max, |
43 | float* nudged_min, float* nudged_max, float* scale, float* inv_scale) { |
44 | const float quant_min_float = static_cast<float>(quant_min); |
45 | const float quant_max_float = static_cast<float>(quant_max); |
46 | *scale = (max - min) / (quant_max_float - quant_min_float); |
47 | // Re-calculate the inverse to avoid loss of precision which would result |
48 | // from simply taking the reciprocal of *scale |
49 | *inv_scale = (quant_max_float - quant_min_float) / (max - min); |
50 | const float zero_point_from_min = quant_min_float - min / *scale; |
51 | const uint16 nudged_zero_point = [zero_point_from_min, quant_min, |
52 | quant_min_float, quant_max, |
53 | quant_max_float] { |
54 | if (zero_point_from_min < quant_min_float) { |
55 | return static_cast<uint16>(quant_min); |
56 | } |
57 | if (zero_point_from_min > quant_max_float) { |
58 | return static_cast<uint16>(quant_max); |
59 | } |
60 | return static_cast<uint16>(StdRound(zero_point_from_min)); |
61 | }(); |
62 | *nudged_min = (quant_min_float - nudged_zero_point) * (*scale); |
63 | *nudged_max = (quant_max_float - nudged_zero_point) * (*scale); |
64 | } |
65 | |
66 | template <typename T> |
67 | using ConstScalar = typename tensorflow::TTypes<T>::ConstScalar; |
68 | template <typename T> |
69 | using Scalar = typename tensorflow::TTypes<T>::Scalar; |
70 | template <typename T> |
71 | using ConstVec = typename tensorflow::TTypes<T>::ConstVec; |
72 | template <typename T> |
73 | using Vec = typename tensorflow::TTypes<T>::Vec; |
74 | template <typename T> |
75 | using ConstFlat = typename tensorflow::TTypes<T>::ConstFlat; |
76 | template <typename T> |
77 | using Flat = typename tensorflow::TTypes<T>::Flat; |
78 | |
79 | // Functor called by FakeQuantWithMinMaxArgsOp to do the work. Compiles both |
80 | // for CPU and GPU. |
81 | template <typename Device> |
82 | struct FakeQuantWithMinMaxArgsFunctor { |
83 | void operator()(const Device& d, ConstFlat<float> inputs, const float min, |
84 | const float max, const int quant_min, const int quant_max, |
85 | Flat<float> outputs) { |
86 | eigen_assert(min <= 0.0f && "min should be <= 0.0" ); |
87 | eigen_assert(max >= 0.0f && "max should be >= 0.0" ); |
88 | eigen_assert(min < max && "min should be < max" ); |
89 | |
90 | float nudged_min, nudged_max, nudged_scale, inv_nudged_scale; |
91 | Nudge(min, max, quant_min, quant_max, &nudged_min, &nudged_max, |
92 | &nudged_scale, &inv_nudged_scale); |
93 | |
94 | const float quant_zero = floor(-nudged_min * inv_nudged_scale + 0.5f); |
95 | |
96 | auto clamped = inputs.cwiseMin(nudged_max).cwiseMax(nudged_min); |
97 | auto clamped_shifted = clamped - nudged_min; |
98 | outputs.device(d) = |
99 | (clamped_shifted * inv_nudged_scale - quant_zero + 0.5f).floor() * |
100 | nudged_scale; |
101 | } |
102 | }; |
103 | |
104 | // Functor called by FakeQuantWithMinMaxArgsGradientOp to do the work. Compiles |
105 | // both for CPU and GPU. |
106 | template <typename Device> |
107 | struct FakeQuantWithMinMaxArgsGradientFunctor { |
108 | void operator()(const Device& d, ConstFlat<float> gradients, |
109 | ConstFlat<float> inputs, const float min, const float max, |
110 | const int quant_min, const int quant_max, |
111 | Flat<float> backprops) { |
112 | eigen_assert(min <= 0.0f && "min should be <= 0.0" ); |
113 | eigen_assert(max >= 0.0f && "max should be >= 0.0" ); |
114 | eigen_assert(min < max && "min should be < max" ); |
115 | |
116 | float nudged_min, nudged_max, nudged_scale, inv_nudged_scale; |
117 | Nudge(min, max, quant_min, quant_max, &nudged_min, &nudged_max, |
118 | &nudged_scale, &inv_nudged_scale); |
119 | |
120 | auto between_nudged_min_max = |
121 | (inputs >= nudged_min && inputs <= nudged_max) |
122 | .select(inputs.constant(1.0f), inputs.constant(0.0f)); |
123 | backprops.device(d) = gradients * between_nudged_min_max; |
124 | } |
125 | }; |
126 | |
127 | // Functor called by FakeQuantWithMinMaxVarsOp to do the work. Compiles both |
128 | // for CPU and GPU. |
129 | template <typename Device> |
130 | struct FakeQuantWithMinMaxVarsFunctor { |
131 | void operator()(const Device& d, ConstFlat<float> inputs, |
132 | ConstScalar<float> min, ConstScalar<float> max, |
133 | const int quant_min, const int quant_max, |
134 | Flat<float> outputs) { |
135 | const float min_val = min(); |
136 | const float max_val = max(); |
137 | // If min and max are both zero, we should just return zero. |
138 | if (min_val == 0.0f && max_val == 0.0f) { |
139 | outputs.device(d) = outputs.constant(0.0f); |
140 | return; |
141 | } |
142 | float nudged_min, nudged_max, nudged_scale, inv_nudged_scale; |
143 | Nudge(min_val, max_val, quant_min, quant_max, &nudged_min, &nudged_max, |
144 | &nudged_scale, &inv_nudged_scale); |
145 | |
146 | const float quant_zero = floor(-nudged_min * inv_nudged_scale + 0.5f); |
147 | const auto nudged_scale_repl = inputs.constant(nudged_scale); |
148 | // const auto inv_nudged_scale_repl = inputs.constant(inv_nudged_scale); |
149 | |
150 | const auto clamped = inputs.cwiseMin(nudged_max).cwiseMax(nudged_min); |
151 | const auto clamped_shifted = clamped - nudged_min; |
152 | outputs.device(d) = |
153 | (clamped_shifted / nudged_scale_repl - quant_zero + 0.5f).floor() * |
154 | nudged_scale_repl; |
155 | } |
156 | }; |
157 | |
158 | // Functor called by FakeQuantWithMinMaxVarsGradientOp to do the work. Compiles |
159 | // both for CPU and GPU. |
160 | template <typename Device> |
161 | struct FakeQuantWithMinMaxVarsGradientFunctor { |
162 | void operator()(const Device& d, ConstFlat<float> gradients, |
163 | ConstFlat<float> inputs, ConstScalar<float> min, |
164 | ConstScalar<float> max, const int quant_min, |
165 | const int quant_max, Flat<float> backprops_wrt_input, |
166 | Scalar<float> backprop_wrt_min, |
167 | Scalar<float> backprop_wrt_max) { |
168 | const float min_val = min(); |
169 | const float max_val = max(); |
170 | // If min and max are both zero, we propagate everything to inputs. |
171 | if (min_val == 0.0f && max_val == 0.0f) { |
172 | backprops_wrt_input.device(d) = gradients; |
173 | backprop_wrt_min.device(d) = backprop_wrt_min.constant(0.0f); |
174 | backprop_wrt_max.device(d) = backprop_wrt_max.constant(0.0f); |
175 | return; |
176 | } |
177 | float nudged_min, nudged_max, nudged_scale, inv_nudged_scale; |
178 | Nudge(min_val, max_val, quant_min, quant_max, &nudged_min, &nudged_max, |
179 | &nudged_scale, &inv_nudged_scale); |
180 | |
181 | const auto between_min_max = |
182 | (inputs >= nudged_min && inputs <= nudged_max) |
183 | .select(inputs.constant(1.0f), inputs.constant(0.0f)); |
184 | backprops_wrt_input.device(d) = gradients * between_min_max; |
185 | |
186 | const auto below_min = |
187 | (inputs < nudged_min) |
188 | .select(inputs.constant(1.0f), inputs.constant(0.0f)); |
189 | backprop_wrt_min.device(d) = (gradients * below_min).sum(); |
190 | |
191 | const auto above_max = |
192 | (inputs > nudged_max) |
193 | .select(inputs.constant(1.0f), inputs.constant(0.0f)); |
194 | backprop_wrt_max.device(d) = (gradients * above_max).sum(); |
195 | } |
196 | }; |
197 | |
198 | using Index = typename tensorflow::TTypes<float>::ConstTensor::Index; |
199 | |
200 | // Functor called by FakeQuantWithMinMaxVarsPerChannelOp to do the work. |
201 | // Compiles both for CPU and GPU. |
202 | // |
203 | // Already verified: inputs, outputs are of shape [b, d], min, max are of shape |
204 | // [d]. |
205 | template <typename Device> |
206 | struct FakeQuantWithMinMaxVarsPerChannelFunctor { |
207 | void operator()(const Device& d, TTypes<float>::ConstMatrix inputs, |
208 | ConstVec<float> min, ConstVec<float> max, const int quant_min, |
209 | const int quant_max, TTypes<float>::Matrix outputs) { |
210 | for (Index i = 0; i < min.size(); ++i) { |
211 | const float min_val = min(i); |
212 | const float max_val = max(i); |
213 | // If min and max are both zero, we should just return zero. |
214 | if (min_val == 0.0f && max_val == 0.0f) { |
215 | auto chip = outputs.chip<1>(i); |
216 | chip.device(d) = chip.constant(0.0f); |
217 | continue; |
218 | } |
219 | float nudged_min, nudged_max, nudged_scale, inv_nudged_scale; |
220 | Nudge(min_val, max_val, quant_min, quant_max, &nudged_min, &nudged_max, |
221 | &nudged_scale, &inv_nudged_scale); |
222 | |
223 | const float quant_zero = floor(-nudged_min * inv_nudged_scale + 0.5f); |
224 | |
225 | const auto clamped = |
226 | inputs.chip<1>(i).cwiseMin(nudged_max).cwiseMax(nudged_min); |
227 | const auto clamped_shifted = clamped - nudged_min; |
228 | |
229 | outputs.chip<1>(i).device(d) = |
230 | (clamped_shifted * inv_nudged_scale - quant_zero + 0.5f).floor() * |
231 | nudged_scale; |
232 | } |
233 | } |
234 | }; |
235 | |
236 | // Functor called by FakeQuantWithMinMaxVarsPerChannelGradientOp to do the work. |
237 | // Compiles both for CPU and GPU. |
238 | // |
239 | // Already verified: gradients, inputs, backprops_wrt_input are of shape [b, d], |
240 | // min, max, backprop_wrt_min, backprop_wrt_max are of shape [d]. |
241 | template <typename Device> |
242 | struct FakeQuantWithMinMaxVarsPerChannelGradientFunctor { |
243 | void operator()(const Device& d, TTypes<float>::ConstMatrix gradients, |
244 | TTypes<float>::ConstMatrix inputs, ConstVec<float> min, |
245 | ConstVec<float> max, const int quant_min, const int quant_max, |
246 | TTypes<float>::Matrix backprops_wrt_input, |
247 | Vec<float> backprop_wrt_min, Vec<float> backprop_wrt_max) { |
248 | for (Index i = 0; i < min.size(); ++i) { |
249 | const float min_val = min(i); |
250 | const float max_val = max(i); |
251 | const auto gradients_chip = gradients.chip<1>(i); |
252 | const auto inputs_chip = inputs.chip<1>(i); |
253 | // If min and max are both zero, we propagate everything to inputs. |
254 | if (min_val == 0.0f && max_val == 0.0f) { |
255 | backprops_wrt_input.chip<1>(i).device(d) = gradients_chip; |
256 | auto min_chip = backprop_wrt_min.chip<0>(i); |
257 | auto max_chip = backprop_wrt_max.chip<0>(i); |
258 | min_chip.device(d) = min_chip.constant(0.0f); |
259 | max_chip.device(d) = max_chip.constant(0.0f); |
260 | continue; |
261 | } |
262 | float nudged_min, nudged_max, nudged_scale, inv_nudged_scale; |
263 | Nudge(min_val, max_val, quant_min, quant_max, &nudged_min, &nudged_max, |
264 | &nudged_scale, &inv_nudged_scale); |
265 | |
266 | const auto between_min_max = |
267 | (inputs_chip >= nudged_min && inputs_chip <= nudged_max) |
268 | .select(inputs_chip.constant(1.0f), inputs_chip.constant(0.0f)); |
269 | backprops_wrt_input.chip<1>(i).device(d) = |
270 | gradients_chip * between_min_max; |
271 | |
272 | const auto below_min = |
273 | (inputs_chip < nudged_min) |
274 | .select(inputs_chip.constant(1.0f), inputs_chip.constant(0.0f)); |
275 | Eigen::DSizes<Index, 1> reduce(0); |
276 | backprop_wrt_min.chip<0>(i).device(d) = |
277 | (gradients_chip * below_min).sum(reduce); |
278 | |
279 | const auto above_max = |
280 | (inputs_chip > nudged_max) |
281 | .select(inputs_chip.constant(1.0f), inputs_chip.constant(0.0f)); |
282 | backprop_wrt_max.chip<0>(i).device(d) = |
283 | (gradients_chip * above_max).sum(reduce); |
284 | } |
285 | } |
286 | }; |
287 | |
288 | } // namespace tensorflow |
289 | |
290 | #endif // TENSORFLOW_CORE_KERNELS_FAKE_QUANT_OPS_FUNCTOR_H_ |
291 | |