1 | /* Copyright 2017 The TensorFlow Authors. All Rights Reserved. |
2 | |
3 | Licensed under the Apache License, Version 2.0 (the "License"); |
4 | you may not use this file except in compliance with the License. |
5 | You may obtain a copy of the License at |
6 | |
7 | http://www.apache.org/licenses/LICENSE-2.0 |
8 | |
9 | Unless required by applicable law or agreed to in writing, software |
10 | distributed under the License is distributed on an "AS IS" BASIS, |
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | See the License for the specific language governing permissions and |
13 | limitations under the License. |
14 | ==============================================================================*/ |
15 | |
16 | // Implements a quantized eight-bit version of the matmul operation. |
17 | |
18 | #define EIGEN_USE_THREADS |
19 | |
20 | #if defined(__ARM_NEON__) || defined(__ARM_NEON) |
21 | #define USE_NEON |
22 | #define QUANTIZED_ADD_USE_NEON |
23 | #include <arm_neon.h> |
24 | #endif |
25 | |
26 | #include "tensorflow/core/framework/op_kernel.h" |
27 | #include "tensorflow/core/framework/tensor.h" |
28 | #include "tensorflow/core/framework/tensor_shape.h" |
29 | #include "tensorflow/core/kernels/meta_support.h" |
30 | #include "tensorflow/core/kernels/quantization_utils.h" |
31 | #include "tensorflow/core/lib/core/errors.h" |
32 | #include "tensorflow/core/util/bcast.h" |
33 | |
34 | // There are implementations for three broadcast patterns for add: |
35 | // - Scalar * Array |
36 | // - Array * Array |
37 | // - Array * Shorter Array (repeated to match first) |
38 | // |
39 | // These handle a lot of common broadcast patterns, and we have NEON SIMD |
40 | // versions to accelerate performance on ARM platforms. |
41 | |
42 | namespace tensorflow { |
43 | namespace { |
44 | |
45 | template <class T, class Toutput> |
46 | void ScalarAddition(OpKernelContext* context, const T* full_input, |
47 | float full_input_min, float full_input_max, |
48 | int64_t num_elements, T scalar_input, |
49 | float scalar_input_min, float scalar_input_max, |
50 | float output_min, float output_max, Toutput* output) { |
51 | const Toutput scalar_in_output_range = RequantizeInNewRange<T, Toutput>( |
52 | scalar_input, scalar_input_min, scalar_input_max, output_min, output_max); |
53 | for (int i = 0; i < num_elements; ++i) { |
54 | const Toutput full_input_in_output_range = RequantizeInNewRange<T, Toutput>( |
55 | full_input[i], full_input_min, full_input_max, output_min, output_max); |
56 | output[i] = full_input_in_output_range + scalar_in_output_range; |
57 | } |
58 | } |
59 | |
60 | #ifdef QUANTIZED_ADD_USE_NEON |
61 | |
62 | template <> |
63 | void ScalarAddition(OpKernelContext* context, const quint8* full_input, |
64 | float full_input_min, float full_input_max, |
65 | int64 num_elements, quint8 scalar_input, |
66 | float scalar_input_min, float scalar_input_max, |
67 | float output_min, float output_max, qint32* output) { |
68 | const int32 scalar_in_output_range = RequantizeInNewRange<quint8, qint32>( |
69 | scalar_input, scalar_input_min, scalar_input_max, output_min, output_max); |
70 | |
71 | const float input_0_float = |
72 | QuantizedToFloat<quint8>(0, full_input_min, full_input_max); |
73 | const float input_1_float = |
74 | QuantizedToFloat<quint8>(1, full_input_min, full_input_max); |
75 | const int64 input_0_int64 = |
76 | FloatToQuantizedUnclamped<qint32>(input_0_float, output_min, output_max); |
77 | const int64 input_1_int64 = |
78 | FloatToQuantizedUnclamped<qint32>(input_1_float, output_min, output_max); |
79 | const int32 input_mult_int32 = input_1_int64 - input_0_int64; |
80 | |
81 | const int64 lowest_quantized = |
82 | static_cast<int64_t>(Eigen::NumTraits<qint32>::lowest()); |
83 | const int64 highest_quantized = |
84 | static_cast<int64_t>(Eigen::NumTraits<qint32>::highest()); |
85 | |
86 | const int64x2_t input_0_64x2 = vmovq_n_s64(input_0_int64); |
87 | const int32x2_t input_mult_32x2 = vmov_n_s32(input_mult_int32); |
88 | const int32x4_t scalar_in_output_range_32x4 = |
89 | vmovq_n_s32(scalar_in_output_range); |
90 | int64 i = 0; |
91 | for (; i < (num_elements - 7); i += 8) { |
92 | const uint8* full_input_ptr = &(full_input->value) + i; |
93 | const std::array<int32x4_t, 2> output_value = |
94 | Requantize8x8To32Neon(full_input_ptr, input_0_64x2, input_mult_32x2); |
95 | const int32x4_t result_low_32x4 = |
96 | vaddq_s32(output_value[0], scalar_in_output_range_32x4); |
97 | const int32x4_t result_high_32x4 = |
98 | vaddq_s32(output_value[1], scalar_in_output_range_32x4); |
99 | int32* output_ptr = &(output->value) + i; |
100 | vst1q_s32(output_ptr + 0, result_low_32x4); |
101 | vst1q_s32(output_ptr + 4, result_high_32x4); |
102 | } |
103 | for (; i < num_elements; ++i) { |
104 | const int64 full_input_value = static_cast<int64_t>(full_input[i]); |
105 | int64 full_input_in_output_range_64 = |
106 | input_0_int64 + (full_input_value * input_mult_int32); |
107 | full_input_in_output_range_64 = |
108 | std::max(full_input_in_output_range_64, lowest_quantized); |
109 | full_input_in_output_range_64 = |
110 | std::min(full_input_in_output_range_64, highest_quantized); |
111 | const int32 full_input_in_output_range = |
112 | static_cast<int32>(full_input_in_output_range_64); |
113 | output[i] = full_input_in_output_range + scalar_in_output_range; |
114 | } |
115 | } |
116 | |
117 | #else // QUANTIZED_ADD_USE_NEON |
118 | |
119 | template <> |
120 | void ScalarAddition(OpKernelContext* context, const quint8* full_input, |
121 | float full_input_min, float full_input_max, |
122 | int64_t num_elements, quint8 scalar_input, |
123 | float scalar_input_min, float scalar_input_max, |
124 | float output_min, float output_max, qint32* output) { |
125 | const int32_t scalar_in_output_range = RequantizeInNewRange<quint8, qint32>( |
126 | scalar_input, scalar_input_min, scalar_input_max, output_min, output_max); |
127 | |
128 | const float input_0_float = |
129 | QuantizedToFloat<quint8>(0, full_input_min, full_input_max); |
130 | const float input_1_float = |
131 | QuantizedToFloat<quint8>(1, full_input_min, full_input_max); |
132 | const int64_t input_0_int64 = |
133 | FloatToQuantizedUnclamped<qint32>(input_0_float, output_min, output_max); |
134 | const int64_t input_1_int64 = |
135 | FloatToQuantizedUnclamped<qint32>(input_1_float, output_min, output_max); |
136 | const int32_t input_mult_int32 = input_1_int64 - input_0_int64; |
137 | |
138 | const int64_t lowest_quantized = |
139 | static_cast<int64_t>(Eigen::NumTraits<qint32>::lowest()); |
140 | const int64_t highest_quantized = |
141 | static_cast<int64_t>(Eigen::NumTraits<qint32>::highest()); |
142 | |
143 | for (int i = 0; i < num_elements; ++i) { |
144 | const int64_t full_input_value = static_cast<int64_t>(full_input[i]); |
145 | int64_t full_input_in_output_range_64 = |
146 | input_0_int64 + (full_input_value * input_mult_int32); |
147 | full_input_in_output_range_64 = |
148 | std::max(full_input_in_output_range_64, lowest_quantized); |
149 | full_input_in_output_range_64 = |
150 | std::min(full_input_in_output_range_64, highest_quantized); |
151 | const int32_t full_input_in_output_range = |
152 | static_cast<int32>(full_input_in_output_range_64); |
153 | output[i] = full_input_in_output_range + scalar_in_output_range; |
154 | } |
155 | } |
156 | |
157 | #endif // QUANTIZED_ADD_USE_NEON |
158 | |
159 | template <class T, class Toutput> |
160 | void VectorAddition(OpKernelContext* context, const T* x_data, float min_x, |
161 | float max_x, const T* y_data, float min_y, float max_y, |
162 | int64_t num_elements, float output_min, float output_max, |
163 | Toutput* output) { |
164 | for (int i = 0; i < num_elements; ++i) { |
165 | const Toutput x_in_output_range = RequantizeInNewRange<T, Toutput>( |
166 | x_data[i], min_x, max_x, output_min, output_max); |
167 | const Toutput y_in_output_range = RequantizeInNewRange<T, Toutput>( |
168 | y_data[i], min_y, max_y, output_min, output_max); |
169 | output[i] = x_in_output_range + y_in_output_range; |
170 | } |
171 | } |
172 | |
173 | #ifdef QUANTIZED_ADD_USE_NEON |
174 | |
175 | template <> |
176 | void VectorAddition(OpKernelContext* context, const quint8* x_data, float min_x, |
177 | float max_x, const quint8* y_data, float min_y, float max_y, |
178 | int64 num_elements, float output_min, float output_max, |
179 | qint32* output) { |
180 | const float x_0_float = QuantizedToFloat<quint8>(0, min_x, max_x); |
181 | const float x_1_float = QuantizedToFloat<quint8>(1, min_x, max_x); |
182 | const int64 x_0_int64 = |
183 | FloatToQuantizedUnclamped<qint32>(x_0_float, output_min, output_max); |
184 | const int64 x_1_int64 = |
185 | FloatToQuantizedUnclamped<qint32>(x_1_float, output_min, output_max); |
186 | const int32 x_mult_int32 = x_1_int64 - x_0_int64; |
187 | |
188 | const float y_0_float = QuantizedToFloat<quint8>(0, min_y, max_y); |
189 | const float y_1_float = QuantizedToFloat<quint8>(1, min_y, max_y); |
190 | const int64 y_0_int64 = |
191 | FloatToQuantizedUnclamped<qint32>(y_0_float, output_min, output_max); |
192 | const int64 y_1_int64 = |
193 | FloatToQuantizedUnclamped<qint32>(y_1_float, output_min, output_max); |
194 | const int32 y_mult_int32 = y_1_int64 - y_0_int64; |
195 | |
196 | const int64 lowest_quantized = |
197 | static_cast<int64_t>(Eigen::NumTraits<qint32>::lowest()); |
198 | const int64 highest_quantized = |
199 | static_cast<int64_t>(Eigen::NumTraits<qint32>::highest()); |
200 | |
201 | const int64x2_t x_0_64x2 = vmovq_n_s64(x_0_int64); |
202 | const int32x2_t x_mult_32x2 = vmov_n_s32(x_mult_int32); |
203 | |
204 | const int64x2_t y_0_64x2 = vmovq_n_s64(y_0_int64); |
205 | const int32x2_t y_mult_32x2 = vmov_n_s32(y_mult_int32); |
206 | |
207 | int64 i = 0; |
208 | for (; i < (num_elements - 7); i += 8) { |
209 | const uint8* x_ptr = &(x_data->value) + i; |
210 | const std::array<int32x4_t, 2> x_output_value = |
211 | Requantize8x8To32Neon(x_ptr, x_0_64x2, x_mult_32x2); |
212 | const uint8* y_ptr = &(y_data->value) + i; |
213 | const std::array<int32x4_t, 2> y_output_value = |
214 | Requantize8x8To32Neon(y_ptr, y_0_64x2, y_mult_32x2); |
215 | |
216 | const int32x4_t result_low_32x4 = |
217 | vaddq_s32(x_output_value[0], y_output_value[0]); |
218 | const int32x4_t result_high_32x4 = |
219 | vaddq_s32(x_output_value[1], y_output_value[1]); |
220 | int32* output_ptr = &(output->value) + i; |
221 | vst1q_s32(output_ptr + 0, result_low_32x4); |
222 | vst1q_s32(output_ptr + 4, result_high_32x4); |
223 | } |
224 | |
225 | for (; i < num_elements; ++i) { |
226 | const int64 x_value = static_cast<int64_t>(x_data[i]); |
227 | int64 x_in_output_range_64 = x_0_int64 + (x_value * x_mult_int32); |
228 | x_in_output_range_64 = std::max(x_in_output_range_64, lowest_quantized); |
229 | x_in_output_range_64 = std::min(x_in_output_range_64, highest_quantized); |
230 | const int32 x_in_output_range = static_cast<int32>(x_in_output_range_64); |
231 | |
232 | const int64 y_value = static_cast<int64_t>(y_data[i]); |
233 | int64 y_in_output_range_64 = y_0_int64 + (y_value * y_mult_int32); |
234 | y_in_output_range_64 = std::max(y_in_output_range_64, lowest_quantized); |
235 | y_in_output_range_64 = std::min(y_in_output_range_64, highest_quantized); |
236 | const int32 y_in_output_range = static_cast<int32>(y_in_output_range_64); |
237 | |
238 | output[i] = x_in_output_range + y_in_output_range; |
239 | } |
240 | } |
241 | |
242 | #else // QUANTIZED_ADD_USE_NEON |
243 | |
244 | template <> |
245 | void VectorAddition(OpKernelContext* context, const quint8* x_data, float min_x, |
246 | float max_x, const quint8* y_data, float min_y, float max_y, |
247 | int64_t num_elements, float output_min, float output_max, |
248 | qint32* output) { |
249 | const float x_0_float = QuantizedToFloat<quint8>(0, min_x, max_x); |
250 | const float x_1_float = QuantizedToFloat<quint8>(1, min_x, max_x); |
251 | const int64_t x_0_int64 = |
252 | FloatToQuantizedUnclamped<qint32>(x_0_float, output_min, output_max); |
253 | const int64_t x_1_int64 = |
254 | FloatToQuantizedUnclamped<qint32>(x_1_float, output_min, output_max); |
255 | const int32_t x_mult_int32 = x_1_int64 - x_0_int64; |
256 | |
257 | const float y_0_float = QuantizedToFloat<quint8>(0, min_y, max_y); |
258 | const float y_1_float = QuantizedToFloat<quint8>(1, min_y, max_y); |
259 | const int64_t y_0_int64 = |
260 | FloatToQuantizedUnclamped<qint32>(y_0_float, output_min, output_max); |
261 | const int64_t y_1_int64 = |
262 | FloatToQuantizedUnclamped<qint32>(y_1_float, output_min, output_max); |
263 | const int32_t y_mult_int32 = y_1_int64 - y_0_int64; |
264 | |
265 | const int64_t lowest_quantized = |
266 | static_cast<int64_t>(Eigen::NumTraits<qint32>::lowest()); |
267 | const int64_t highest_quantized = |
268 | static_cast<int64_t>(Eigen::NumTraits<qint32>::highest()); |
269 | |
270 | for (int i = 0; i < num_elements; ++i) { |
271 | const int64_t x_value = static_cast<int64_t>(x_data[i]); |
272 | int64_t x_in_output_range_64 = x_0_int64 + (x_value * x_mult_int32); |
273 | x_in_output_range_64 = std::max(x_in_output_range_64, lowest_quantized); |
274 | x_in_output_range_64 = std::min(x_in_output_range_64, highest_quantized); |
275 | const int32_t x_in_output_range = static_cast<int32>(x_in_output_range_64); |
276 | |
277 | const int64_t y_value = static_cast<int64_t>(y_data[i]); |
278 | int64_t y_in_output_range_64 = y_0_int64 + (y_value * y_mult_int32); |
279 | y_in_output_range_64 = std::max(y_in_output_range_64, lowest_quantized); |
280 | y_in_output_range_64 = std::min(y_in_output_range_64, highest_quantized); |
281 | const int32_t y_in_output_range = static_cast<int32>(y_in_output_range_64); |
282 | |
283 | output[i] = x_in_output_range + y_in_output_range; |
284 | } |
285 | } |
286 | |
287 | #endif // QUANTIZED_ADD_USE_NEON |
288 | |
289 | template <class T, class Toutput> |
290 | void VectorTensorAddition(const T* vector_data, float min_vector, |
291 | float max_vector, int64_t vector_num_elements, |
292 | const T* tensor_data, float min_tensor, |
293 | float max_tensor, int64_t tensor_num_elements, |
294 | float output_min, float output_max, Toutput* output) { |
295 | for (int i = 0; i < tensor_num_elements; ++i) { |
296 | const int64_t vector_i = i % vector_num_elements; |
297 | const Toutput vector_in_output_range = RequantizeInNewRange<T, Toutput>( |
298 | vector_data[vector_i], min_vector, max_vector, output_min, output_max); |
299 | const Toutput tensor_in_output_range = RequantizeInNewRange<T, Toutput>( |
300 | tensor_data[i], min_tensor, max_tensor, output_min, output_max); |
301 | output[i] = vector_in_output_range + tensor_in_output_range; |
302 | } |
303 | } |
304 | |
305 | #ifdef QUANTIZED_ADD_USE_NEON |
306 | |
307 | template <> |
308 | void VectorTensorAddition(const quint8* vector_data, float min_vector, |
309 | float max_vector, int64 vector_num_elements, |
310 | const quint8* tensor_data, float min_tensor, |
311 | float max_tensor, int64 tensor_num_elements, |
312 | float output_min, float output_max, qint32* output) { |
313 | const float vector_0_float = |
314 | QuantizedToFloat<quint8>(0, min_vector, max_vector); |
315 | const float vector_1_float = |
316 | QuantizedToFloat<quint8>(1, min_vector, max_vector); |
317 | const int64 vector_0_int64 = |
318 | FloatToQuantizedUnclamped<qint32>(vector_0_float, output_min, output_max); |
319 | const int64 vector_1_int64 = |
320 | FloatToQuantizedUnclamped<qint32>(vector_1_float, output_min, output_max); |
321 | const int32 vector_mult_int32 = vector_1_int64 - vector_0_int64; |
322 | |
323 | const float tensor_0_float = |
324 | QuantizedToFloat<quint8>(0, min_tensor, max_tensor); |
325 | const float tensor_1_float = |
326 | QuantizedToFloat<quint8>(1, min_tensor, max_tensor); |
327 | const int64 tensor_0_int64 = |
328 | FloatToQuantizedUnclamped<qint32>(tensor_0_float, output_min, output_max); |
329 | const int64 tensor_1_int64 = |
330 | FloatToQuantizedUnclamped<qint32>(tensor_1_float, output_min, output_max); |
331 | const int32 tensor_mult_int32 = tensor_1_int64 - tensor_0_int64; |
332 | |
333 | const int64 lowest_quantized = |
334 | static_cast<int64_t>(Eigen::NumTraits<qint32>::lowest()); |
335 | const int64 highest_quantized = |
336 | static_cast<int64_t>(Eigen::NumTraits<qint32>::highest()); |
337 | |
338 | const int64x2_t vector_0_64x2 = vmovq_n_s64(vector_0_int64); |
339 | const int32x2_t vector_mult_32x2 = vmov_n_s32(vector_mult_int32); |
340 | |
341 | const int64x2_t tensor_0_64x2 = vmovq_n_s64(tensor_0_int64); |
342 | const int32x2_t tensor_mult_32x2 = vmov_n_s32(tensor_mult_int32); |
343 | |
344 | for (int64 base_i = 0; base_i < tensor_num_elements; |
345 | base_i += vector_num_elements) { |
346 | int64 i = base_i; |
347 | int64 vector_i = 0; |
348 | for (; vector_i < (vector_num_elements - 7); vector_i += 8, i += 8) { |
349 | const uint8* vector_ptr = &(vector_data->value) + vector_i; |
350 | const std::array<int32x4_t, 2> vector_output_value = |
351 | Requantize8x8To32Neon(vector_ptr, vector_0_64x2, vector_mult_32x2); |
352 | const uint8* tensor_ptr = &(tensor_data->value) + i; |
353 | const std::array<int32x4_t, 2> tensor_output_value = |
354 | Requantize8x8To32Neon(tensor_ptr, tensor_0_64x2, tensor_mult_32x2); |
355 | |
356 | const int32x4_t result_low_32x4 = |
357 | vaddq_s32(vector_output_value[0], tensor_output_value[0]); |
358 | const int32x4_t result_high_32x4 = |
359 | vaddq_s32(vector_output_value[1], tensor_output_value[1]); |
360 | int32* output_ptr = &(output->value) + i; |
361 | vst1q_s32(output_ptr + 0, result_low_32x4); |
362 | vst1q_s32(output_ptr + 4, result_high_32x4); |
363 | } |
364 | for (; vector_i < vector_num_elements; ++vector_i, ++i) { |
365 | const int64 vector_value = static_cast<int64_t>(vector_data[vector_i]); |
366 | int64 vector_in_output_range_64 = |
367 | vector_0_int64 + (vector_value * vector_mult_int32); |
368 | vector_in_output_range_64 = |
369 | std::max(vector_in_output_range_64, lowest_quantized); |
370 | vector_in_output_range_64 = |
371 | std::min(vector_in_output_range_64, highest_quantized); |
372 | const int32 vector_in_output_range = |
373 | static_cast<int32>(vector_in_output_range_64); |
374 | |
375 | const int64 tensor_value = static_cast<int64_t>(tensor_data[i]); |
376 | int64 tensor_in_output_range_64 = |
377 | tensor_0_int64 + (tensor_value * tensor_mult_int32); |
378 | tensor_in_output_range_64 = |
379 | std::max(tensor_in_output_range_64, lowest_quantized); |
380 | tensor_in_output_range_64 = |
381 | std::min(tensor_in_output_range_64, highest_quantized); |
382 | const int32 tensor_in_output_range = |
383 | static_cast<int32>(tensor_in_output_range_64); |
384 | |
385 | output[i] = vector_in_output_range + tensor_in_output_range; |
386 | } |
387 | } |
388 | } |
389 | |
390 | #else // QUANTIZED_ADD_USE_NEON |
391 | |
392 | template <> |
393 | void VectorTensorAddition(const quint8* vector_data, float min_vector, |
394 | float max_vector, int64_t vector_num_elements, |
395 | const quint8* tensor_data, float min_tensor, |
396 | float max_tensor, int64_t tensor_num_elements, |
397 | float output_min, float output_max, qint32* output) { |
398 | const float vector_0_float = |
399 | QuantizedToFloat<quint8>(0, min_vector, max_vector); |
400 | const float vector_1_float = |
401 | QuantizedToFloat<quint8>(1, min_vector, max_vector); |
402 | const int64_t vector_0_int64 = |
403 | FloatToQuantizedUnclamped<qint32>(vector_0_float, output_min, output_max); |
404 | const int64_t vector_1_int64 = |
405 | FloatToQuantizedUnclamped<qint32>(vector_1_float, output_min, output_max); |
406 | const int32_t vector_mult_int32 = vector_1_int64 - vector_0_int64; |
407 | |
408 | const float tensor_0_float = |
409 | QuantizedToFloat<quint8>(0, min_tensor, max_tensor); |
410 | const float tensor_1_float = |
411 | QuantizedToFloat<quint8>(1, min_tensor, max_tensor); |
412 | const int64_t tensor_0_int64 = |
413 | FloatToQuantizedUnclamped<qint32>(tensor_0_float, output_min, output_max); |
414 | const int64_t tensor_1_int64 = |
415 | FloatToQuantizedUnclamped<qint32>(tensor_1_float, output_min, output_max); |
416 | const int32_t tensor_mult_int32 = tensor_1_int64 - tensor_0_int64; |
417 | |
418 | const int64_t lowest_quantized = |
419 | static_cast<int64_t>(Eigen::NumTraits<qint32>::lowest()); |
420 | const int64_t highest_quantized = |
421 | static_cast<int64_t>(Eigen::NumTraits<qint32>::highest()); |
422 | |
423 | for (int i = 0; i < tensor_num_elements; ++i) { |
424 | const int64_t vector_i = i % vector_num_elements; |
425 | const int64_t vector_value = static_cast<int64_t>(vector_data[vector_i]); |
426 | int64_t vector_in_output_range_64 = |
427 | vector_0_int64 + (vector_value * vector_mult_int32); |
428 | vector_in_output_range_64 = |
429 | std::max(vector_in_output_range_64, lowest_quantized); |
430 | vector_in_output_range_64 = |
431 | std::min(vector_in_output_range_64, highest_quantized); |
432 | const int32_t vector_in_output_range = |
433 | static_cast<int32>(vector_in_output_range_64); |
434 | |
435 | const int64_t tensor_value = static_cast<int64_t>(tensor_data[i]); |
436 | int64_t tensor_in_output_range_64 = |
437 | tensor_0_int64 + (tensor_value * tensor_mult_int32); |
438 | tensor_in_output_range_64 = |
439 | std::max(tensor_in_output_range_64, lowest_quantized); |
440 | tensor_in_output_range_64 = |
441 | std::min(tensor_in_output_range_64, highest_quantized); |
442 | const int32_t tensor_in_output_range = |
443 | static_cast<int32>(tensor_in_output_range_64); |
444 | |
445 | output[i] = vector_in_output_range + tensor_in_output_range; |
446 | } |
447 | } |
448 | |
449 | #endif // QUANTIZED_ADD_USE_NEON |
450 | |
451 | } // namespace |
452 | |
453 | template <class T, class Toutput> |
454 | class QuantizedAddOp : public OpKernel { |
455 | public: |
456 | explicit QuantizedAddOp(OpKernelConstruction* context) : OpKernel(context) {} |
457 | |
458 | void Compute(OpKernelContext* context) override { |
459 | const Tensor& x = context->input(0); |
460 | const Tensor& y = context->input(1); |
461 | const Tensor& min_x_tensor = context->input(2); |
462 | const Tensor& max_x_tensor = context->input(3); |
463 | const Tensor& min_y_tensor = context->input(4); |
464 | const Tensor& max_y_tensor = context->input(5); |
465 | |
466 | OP_REQUIRES(context, TensorShapeUtils::IsScalar(min_x_tensor.shape()), |
467 | errors::InvalidArgument("`min_x` must be rank 0 but is rank " , |
468 | min_x_tensor.dims())); |
469 | OP_REQUIRES(context, TensorShapeUtils::IsScalar(max_x_tensor.shape()), |
470 | errors::InvalidArgument("`max_x` must be rank 0 but is rank " , |
471 | max_x_tensor.dims())); |
472 | OP_REQUIRES(context, TensorShapeUtils::IsScalar(min_y_tensor.shape()), |
473 | errors::InvalidArgument("`min_y` must be rank 0 but is rank " , |
474 | min_y_tensor.dims())); |
475 | OP_REQUIRES(context, TensorShapeUtils::IsScalar(max_y_tensor.shape()), |
476 | errors::InvalidArgument("`max_y` must be rank 0 but is rank " , |
477 | max_y_tensor.dims())); |
478 | |
479 | const float min_x = min_x_tensor.scalar<float>()(); |
480 | const float max_x = max_x_tensor.scalar<float>()(); |
481 | const float min_y = min_y_tensor.scalar<float>()(); |
482 | const float max_y = max_y_tensor.scalar<float>()(); |
483 | |
484 | BCast bcast(BCast::FromShape(x.shape()), BCast::FromShape(y.shape())); |
485 | if (!bcast.IsValid()) { |
486 | context->SetStatus(errors::InvalidArgument( |
487 | "Incompatible shapes: " , x.shape().DebugString(), " vs. " , |
488 | y.shape().DebugString())); |
489 | return; |
490 | } |
491 | Tensor* z; |
492 | OP_REQUIRES_OK(context, context->allocate_output( |
493 | 0, BCast::ToShape(bcast.output_shape()), &z)); |
494 | |
495 | // Make sure that we have valid quantization ranges for the input buffers. |
496 | // If the difference between the min and max is negative or zero, it makes |
497 | // it hard to do meaningful intermediate operations on the values. |
498 | OP_REQUIRES(context, (max_x > min_x), |
499 | errors::InvalidArgument("max_x must be larger than min_x." )); |
500 | OP_REQUIRES(context, (max_y > min_y), |
501 | errors::InvalidArgument("max_y must be larger than min_y." )); |
502 | const T* x_data = x.flat<T>().data(); |
503 | const T* y_data = y.flat<T>().data(); |
504 | Toutput* z_data = z->flat<Toutput>().data(); |
505 | |
506 | // We want the range of the output to be symmetrical around zero so that |
507 | // adding zero leaves the result unchanged, and to contain the largest of |
508 | // the two input values with some room to spare. |
509 | const float smallest_min = std::min(min_x, min_y); |
510 | const float largest_max = std::max(max_x, max_y); |
511 | const float biggest_range = |
512 | std::max(std::abs(smallest_min), std::abs(largest_max)); |
513 | const float output_range = (biggest_range * (1 << 14)); |
514 | const float min_z_value = -output_range; |
515 | const float max_z_value = output_range; |
516 | |
517 | const int ndims = bcast.x_reshape().size(); |
518 | if (ndims <= 1) { |
519 | if (x.NumElements() == 1) { |
520 | ScalarAddition<T, Toutput>(context, y_data, min_y, max_y, |
521 | y.NumElements(), x_data[0], min_x, max_x, |
522 | min_z_value, max_z_value, z_data); |
523 | } else if (y.NumElements() == 1) { |
524 | ScalarAddition<T, Toutput>(context, x_data, min_x, max_x, |
525 | x.NumElements(), y_data[0], min_y, max_y, |
526 | min_z_value, max_z_value, z_data); |
527 | } else { |
528 | VectorAddition<T, Toutput>(context, x_data, min_x, max_x, y_data, min_y, |
529 | max_y, x.NumElements(), min_z_value, |
530 | max_z_value, z_data); |
531 | } |
532 | } else if (ndims == 2) { |
533 | const T* vector_data; |
534 | int64_t vector_num_elements; |
535 | float vector_min; |
536 | float vector_max; |
537 | const T* tensor_data; |
538 | int64_t tensor_num_elements; |
539 | float tensor_min; |
540 | float tensor_max; |
541 | if (x.NumElements() < y.NumElements()) { |
542 | vector_data = x_data; |
543 | vector_num_elements = x.NumElements(); |
544 | vector_min = min_x; |
545 | vector_max = max_x; |
546 | tensor_data = y_data; |
547 | tensor_num_elements = y.NumElements(); |
548 | tensor_min = min_y; |
549 | tensor_max = max_y; |
550 | } else { |
551 | vector_data = y_data; |
552 | vector_num_elements = y.NumElements(); |
553 | vector_min = min_y; |
554 | vector_max = max_y; |
555 | tensor_data = x_data; |
556 | tensor_num_elements = x.NumElements(); |
557 | tensor_min = min_x; |
558 | tensor_max = max_x; |
559 | } |
560 | OP_REQUIRES(context, vector_num_elements > 0, |
561 | errors::InvalidArgument("Must have some elements to add" )); |
562 | VectorTensorAddition<T, Toutput>( |
563 | vector_data, vector_min, vector_max, vector_num_elements, tensor_data, |
564 | tensor_min, tensor_max, tensor_num_elements, min_z_value, max_z_value, |
565 | z_data); |
566 | } else { |
567 | LOG(INFO) << "ndims=" << ndims; |
568 | LOG(INFO) << "bcast.x_reshape()=" |
569 | << TensorShape(bcast.x_reshape()).DebugString(); |
570 | LOG(INFO) << "bcast.y_reshape()=" |
571 | << TensorShape(bcast.y_reshape()).DebugString(); |
572 | LOG(INFO) << "bcast.x_bcast()=" |
573 | << TensorShape(bcast.x_bcast()).DebugString(); |
574 | LOG(INFO) << "bcast.y_bcast()=" |
575 | << TensorShape(bcast.y_bcast()).DebugString(); |
576 | |
577 | context->SetStatus(errors::Unimplemented( |
578 | "Broadcast between " , context->input(0).shape().DebugString(), |
579 | " and " , context->input(1).shape().DebugString(), |
580 | " is not supported yet." )); |
581 | return; |
582 | } |
583 | |
584 | Tensor* z_min = nullptr; |
585 | OP_REQUIRES_OK(context, context->allocate_output(1, {}, &z_min)); |
586 | z_min->flat<float>()(0) = min_z_value; |
587 | |
588 | Tensor* z_max = nullptr; |
589 | OP_REQUIRES_OK(context, context->allocate_output(2, {}, &z_max)); |
590 | z_max->flat<float>()(0) = max_z_value; |
591 | } |
592 | }; |
593 | |
594 | REGISTER_KERNEL_BUILDER(Name("QuantizedAdd" ) |
595 | .Device(DEVICE_CPU) |
596 | .TypeConstraint<quint8>("T1" ) |
597 | .TypeConstraint<quint8>("T2" ) |
598 | .TypeConstraint<qint32>("Toutput" ), |
599 | QuantizedAddOp<quint8, qint32>); |
600 | |
601 | } // namespace tensorflow |
602 | |