1/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2
3Licensed under the Apache License, Version 2.0 (the "License");
4you may not use this file except in compliance with the License.
5You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9Unless required by applicable law or agreed to in writing, software
10distributed under the License is distributed on an "AS IS" BASIS,
11WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12See the License for the specific language governing permissions and
13limitations under the License.
14==============================================================================*/
15
16// Implements a quantized eight-bit version of the matmul operation.
17
18#define EIGEN_USE_THREADS
19
20#if defined(__ARM_NEON__) || defined(__ARM_NEON)
21#define USE_NEON
22#define QUANTIZED_ADD_USE_NEON
23#include <arm_neon.h>
24#endif
25
26#include "tensorflow/core/framework/op_kernel.h"
27#include "tensorflow/core/framework/tensor.h"
28#include "tensorflow/core/framework/tensor_shape.h"
29#include "tensorflow/core/kernels/meta_support.h"
30#include "tensorflow/core/kernels/quantization_utils.h"
31#include "tensorflow/core/lib/core/errors.h"
32#include "tensorflow/core/util/bcast.h"
33
34// There are implementations for three broadcast patterns for add:
35// - Scalar * Array
36// - Array * Array
37// - Array * Shorter Array (repeated to match first)
38//
39// These handle a lot of common broadcast patterns, and we have NEON SIMD
40// versions to accelerate performance on ARM platforms.
41
42namespace tensorflow {
43namespace {
44
45template <class T, class Toutput>
46void ScalarAddition(OpKernelContext* context, const T* full_input,
47 float full_input_min, float full_input_max,
48 int64_t num_elements, T scalar_input,
49 float scalar_input_min, float scalar_input_max,
50 float output_min, float output_max, Toutput* output) {
51 const Toutput scalar_in_output_range = RequantizeInNewRange<T, Toutput>(
52 scalar_input, scalar_input_min, scalar_input_max, output_min, output_max);
53 for (int i = 0; i < num_elements; ++i) {
54 const Toutput full_input_in_output_range = RequantizeInNewRange<T, Toutput>(
55 full_input[i], full_input_min, full_input_max, output_min, output_max);
56 output[i] = full_input_in_output_range + scalar_in_output_range;
57 }
58}
59
60#ifdef QUANTIZED_ADD_USE_NEON
61
62template <>
63void ScalarAddition(OpKernelContext* context, const quint8* full_input,
64 float full_input_min, float full_input_max,
65 int64 num_elements, quint8 scalar_input,
66 float scalar_input_min, float scalar_input_max,
67 float output_min, float output_max, qint32* output) {
68 const int32 scalar_in_output_range = RequantizeInNewRange<quint8, qint32>(
69 scalar_input, scalar_input_min, scalar_input_max, output_min, output_max);
70
71 const float input_0_float =
72 QuantizedToFloat<quint8>(0, full_input_min, full_input_max);
73 const float input_1_float =
74 QuantizedToFloat<quint8>(1, full_input_min, full_input_max);
75 const int64 input_0_int64 =
76 FloatToQuantizedUnclamped<qint32>(input_0_float, output_min, output_max);
77 const int64 input_1_int64 =
78 FloatToQuantizedUnclamped<qint32>(input_1_float, output_min, output_max);
79 const int32 input_mult_int32 = input_1_int64 - input_0_int64;
80
81 const int64 lowest_quantized =
82 static_cast<int64_t>(Eigen::NumTraits<qint32>::lowest());
83 const int64 highest_quantized =
84 static_cast<int64_t>(Eigen::NumTraits<qint32>::highest());
85
86 const int64x2_t input_0_64x2 = vmovq_n_s64(input_0_int64);
87 const int32x2_t input_mult_32x2 = vmov_n_s32(input_mult_int32);
88 const int32x4_t scalar_in_output_range_32x4 =
89 vmovq_n_s32(scalar_in_output_range);
90 int64 i = 0;
91 for (; i < (num_elements - 7); i += 8) {
92 const uint8* full_input_ptr = &(full_input->value) + i;
93 const std::array<int32x4_t, 2> output_value =
94 Requantize8x8To32Neon(full_input_ptr, input_0_64x2, input_mult_32x2);
95 const int32x4_t result_low_32x4 =
96 vaddq_s32(output_value[0], scalar_in_output_range_32x4);
97 const int32x4_t result_high_32x4 =
98 vaddq_s32(output_value[1], scalar_in_output_range_32x4);
99 int32* output_ptr = &(output->value) + i;
100 vst1q_s32(output_ptr + 0, result_low_32x4);
101 vst1q_s32(output_ptr + 4, result_high_32x4);
102 }
103 for (; i < num_elements; ++i) {
104 const int64 full_input_value = static_cast<int64_t>(full_input[i]);
105 int64 full_input_in_output_range_64 =
106 input_0_int64 + (full_input_value * input_mult_int32);
107 full_input_in_output_range_64 =
108 std::max(full_input_in_output_range_64, lowest_quantized);
109 full_input_in_output_range_64 =
110 std::min(full_input_in_output_range_64, highest_quantized);
111 const int32 full_input_in_output_range =
112 static_cast<int32>(full_input_in_output_range_64);
113 output[i] = full_input_in_output_range + scalar_in_output_range;
114 }
115}
116
117#else // QUANTIZED_ADD_USE_NEON
118
119template <>
120void ScalarAddition(OpKernelContext* context, const quint8* full_input,
121 float full_input_min, float full_input_max,
122 int64_t num_elements, quint8 scalar_input,
123 float scalar_input_min, float scalar_input_max,
124 float output_min, float output_max, qint32* output) {
125 const int32_t scalar_in_output_range = RequantizeInNewRange<quint8, qint32>(
126 scalar_input, scalar_input_min, scalar_input_max, output_min, output_max);
127
128 const float input_0_float =
129 QuantizedToFloat<quint8>(0, full_input_min, full_input_max);
130 const float input_1_float =
131 QuantizedToFloat<quint8>(1, full_input_min, full_input_max);
132 const int64_t input_0_int64 =
133 FloatToQuantizedUnclamped<qint32>(input_0_float, output_min, output_max);
134 const int64_t input_1_int64 =
135 FloatToQuantizedUnclamped<qint32>(input_1_float, output_min, output_max);
136 const int32_t input_mult_int32 = input_1_int64 - input_0_int64;
137
138 const int64_t lowest_quantized =
139 static_cast<int64_t>(Eigen::NumTraits<qint32>::lowest());
140 const int64_t highest_quantized =
141 static_cast<int64_t>(Eigen::NumTraits<qint32>::highest());
142
143 for (int i = 0; i < num_elements; ++i) {
144 const int64_t full_input_value = static_cast<int64_t>(full_input[i]);
145 int64_t full_input_in_output_range_64 =
146 input_0_int64 + (full_input_value * input_mult_int32);
147 full_input_in_output_range_64 =
148 std::max(full_input_in_output_range_64, lowest_quantized);
149 full_input_in_output_range_64 =
150 std::min(full_input_in_output_range_64, highest_quantized);
151 const int32_t full_input_in_output_range =
152 static_cast<int32>(full_input_in_output_range_64);
153 output[i] = full_input_in_output_range + scalar_in_output_range;
154 }
155}
156
157#endif // QUANTIZED_ADD_USE_NEON
158
159template <class T, class Toutput>
160void VectorAddition(OpKernelContext* context, const T* x_data, float min_x,
161 float max_x, const T* y_data, float min_y, float max_y,
162 int64_t num_elements, float output_min, float output_max,
163 Toutput* output) {
164 for (int i = 0; i < num_elements; ++i) {
165 const Toutput x_in_output_range = RequantizeInNewRange<T, Toutput>(
166 x_data[i], min_x, max_x, output_min, output_max);
167 const Toutput y_in_output_range = RequantizeInNewRange<T, Toutput>(
168 y_data[i], min_y, max_y, output_min, output_max);
169 output[i] = x_in_output_range + y_in_output_range;
170 }
171}
172
173#ifdef QUANTIZED_ADD_USE_NEON
174
175template <>
176void VectorAddition(OpKernelContext* context, const quint8* x_data, float min_x,
177 float max_x, const quint8* y_data, float min_y, float max_y,
178 int64 num_elements, float output_min, float output_max,
179 qint32* output) {
180 const float x_0_float = QuantizedToFloat<quint8>(0, min_x, max_x);
181 const float x_1_float = QuantizedToFloat<quint8>(1, min_x, max_x);
182 const int64 x_0_int64 =
183 FloatToQuantizedUnclamped<qint32>(x_0_float, output_min, output_max);
184 const int64 x_1_int64 =
185 FloatToQuantizedUnclamped<qint32>(x_1_float, output_min, output_max);
186 const int32 x_mult_int32 = x_1_int64 - x_0_int64;
187
188 const float y_0_float = QuantizedToFloat<quint8>(0, min_y, max_y);
189 const float y_1_float = QuantizedToFloat<quint8>(1, min_y, max_y);
190 const int64 y_0_int64 =
191 FloatToQuantizedUnclamped<qint32>(y_0_float, output_min, output_max);
192 const int64 y_1_int64 =
193 FloatToQuantizedUnclamped<qint32>(y_1_float, output_min, output_max);
194 const int32 y_mult_int32 = y_1_int64 - y_0_int64;
195
196 const int64 lowest_quantized =
197 static_cast<int64_t>(Eigen::NumTraits<qint32>::lowest());
198 const int64 highest_quantized =
199 static_cast<int64_t>(Eigen::NumTraits<qint32>::highest());
200
201 const int64x2_t x_0_64x2 = vmovq_n_s64(x_0_int64);
202 const int32x2_t x_mult_32x2 = vmov_n_s32(x_mult_int32);
203
204 const int64x2_t y_0_64x2 = vmovq_n_s64(y_0_int64);
205 const int32x2_t y_mult_32x2 = vmov_n_s32(y_mult_int32);
206
207 int64 i = 0;
208 for (; i < (num_elements - 7); i += 8) {
209 const uint8* x_ptr = &(x_data->value) + i;
210 const std::array<int32x4_t, 2> x_output_value =
211 Requantize8x8To32Neon(x_ptr, x_0_64x2, x_mult_32x2);
212 const uint8* y_ptr = &(y_data->value) + i;
213 const std::array<int32x4_t, 2> y_output_value =
214 Requantize8x8To32Neon(y_ptr, y_0_64x2, y_mult_32x2);
215
216 const int32x4_t result_low_32x4 =
217 vaddq_s32(x_output_value[0], y_output_value[0]);
218 const int32x4_t result_high_32x4 =
219 vaddq_s32(x_output_value[1], y_output_value[1]);
220 int32* output_ptr = &(output->value) + i;
221 vst1q_s32(output_ptr + 0, result_low_32x4);
222 vst1q_s32(output_ptr + 4, result_high_32x4);
223 }
224
225 for (; i < num_elements; ++i) {
226 const int64 x_value = static_cast<int64_t>(x_data[i]);
227 int64 x_in_output_range_64 = x_0_int64 + (x_value * x_mult_int32);
228 x_in_output_range_64 = std::max(x_in_output_range_64, lowest_quantized);
229 x_in_output_range_64 = std::min(x_in_output_range_64, highest_quantized);
230 const int32 x_in_output_range = static_cast<int32>(x_in_output_range_64);
231
232 const int64 y_value = static_cast<int64_t>(y_data[i]);
233 int64 y_in_output_range_64 = y_0_int64 + (y_value * y_mult_int32);
234 y_in_output_range_64 = std::max(y_in_output_range_64, lowest_quantized);
235 y_in_output_range_64 = std::min(y_in_output_range_64, highest_quantized);
236 const int32 y_in_output_range = static_cast<int32>(y_in_output_range_64);
237
238 output[i] = x_in_output_range + y_in_output_range;
239 }
240}
241
242#else // QUANTIZED_ADD_USE_NEON
243
244template <>
245void VectorAddition(OpKernelContext* context, const quint8* x_data, float min_x,
246 float max_x, const quint8* y_data, float min_y, float max_y,
247 int64_t num_elements, float output_min, float output_max,
248 qint32* output) {
249 const float x_0_float = QuantizedToFloat<quint8>(0, min_x, max_x);
250 const float x_1_float = QuantizedToFloat<quint8>(1, min_x, max_x);
251 const int64_t x_0_int64 =
252 FloatToQuantizedUnclamped<qint32>(x_0_float, output_min, output_max);
253 const int64_t x_1_int64 =
254 FloatToQuantizedUnclamped<qint32>(x_1_float, output_min, output_max);
255 const int32_t x_mult_int32 = x_1_int64 - x_0_int64;
256
257 const float y_0_float = QuantizedToFloat<quint8>(0, min_y, max_y);
258 const float y_1_float = QuantizedToFloat<quint8>(1, min_y, max_y);
259 const int64_t y_0_int64 =
260 FloatToQuantizedUnclamped<qint32>(y_0_float, output_min, output_max);
261 const int64_t y_1_int64 =
262 FloatToQuantizedUnclamped<qint32>(y_1_float, output_min, output_max);
263 const int32_t y_mult_int32 = y_1_int64 - y_0_int64;
264
265 const int64_t lowest_quantized =
266 static_cast<int64_t>(Eigen::NumTraits<qint32>::lowest());
267 const int64_t highest_quantized =
268 static_cast<int64_t>(Eigen::NumTraits<qint32>::highest());
269
270 for (int i = 0; i < num_elements; ++i) {
271 const int64_t x_value = static_cast<int64_t>(x_data[i]);
272 int64_t x_in_output_range_64 = x_0_int64 + (x_value * x_mult_int32);
273 x_in_output_range_64 = std::max(x_in_output_range_64, lowest_quantized);
274 x_in_output_range_64 = std::min(x_in_output_range_64, highest_quantized);
275 const int32_t x_in_output_range = static_cast<int32>(x_in_output_range_64);
276
277 const int64_t y_value = static_cast<int64_t>(y_data[i]);
278 int64_t y_in_output_range_64 = y_0_int64 + (y_value * y_mult_int32);
279 y_in_output_range_64 = std::max(y_in_output_range_64, lowest_quantized);
280 y_in_output_range_64 = std::min(y_in_output_range_64, highest_quantized);
281 const int32_t y_in_output_range = static_cast<int32>(y_in_output_range_64);
282
283 output[i] = x_in_output_range + y_in_output_range;
284 }
285}
286
287#endif // QUANTIZED_ADD_USE_NEON
288
289template <class T, class Toutput>
290void VectorTensorAddition(const T* vector_data, float min_vector,
291 float max_vector, int64_t vector_num_elements,
292 const T* tensor_data, float min_tensor,
293 float max_tensor, int64_t tensor_num_elements,
294 float output_min, float output_max, Toutput* output) {
295 for (int i = 0; i < tensor_num_elements; ++i) {
296 const int64_t vector_i = i % vector_num_elements;
297 const Toutput vector_in_output_range = RequantizeInNewRange<T, Toutput>(
298 vector_data[vector_i], min_vector, max_vector, output_min, output_max);
299 const Toutput tensor_in_output_range = RequantizeInNewRange<T, Toutput>(
300 tensor_data[i], min_tensor, max_tensor, output_min, output_max);
301 output[i] = vector_in_output_range + tensor_in_output_range;
302 }
303}
304
305#ifdef QUANTIZED_ADD_USE_NEON
306
307template <>
308void VectorTensorAddition(const quint8* vector_data, float min_vector,
309 float max_vector, int64 vector_num_elements,
310 const quint8* tensor_data, float min_tensor,
311 float max_tensor, int64 tensor_num_elements,
312 float output_min, float output_max, qint32* output) {
313 const float vector_0_float =
314 QuantizedToFloat<quint8>(0, min_vector, max_vector);
315 const float vector_1_float =
316 QuantizedToFloat<quint8>(1, min_vector, max_vector);
317 const int64 vector_0_int64 =
318 FloatToQuantizedUnclamped<qint32>(vector_0_float, output_min, output_max);
319 const int64 vector_1_int64 =
320 FloatToQuantizedUnclamped<qint32>(vector_1_float, output_min, output_max);
321 const int32 vector_mult_int32 = vector_1_int64 - vector_0_int64;
322
323 const float tensor_0_float =
324 QuantizedToFloat<quint8>(0, min_tensor, max_tensor);
325 const float tensor_1_float =
326 QuantizedToFloat<quint8>(1, min_tensor, max_tensor);
327 const int64 tensor_0_int64 =
328 FloatToQuantizedUnclamped<qint32>(tensor_0_float, output_min, output_max);
329 const int64 tensor_1_int64 =
330 FloatToQuantizedUnclamped<qint32>(tensor_1_float, output_min, output_max);
331 const int32 tensor_mult_int32 = tensor_1_int64 - tensor_0_int64;
332
333 const int64 lowest_quantized =
334 static_cast<int64_t>(Eigen::NumTraits<qint32>::lowest());
335 const int64 highest_quantized =
336 static_cast<int64_t>(Eigen::NumTraits<qint32>::highest());
337
338 const int64x2_t vector_0_64x2 = vmovq_n_s64(vector_0_int64);
339 const int32x2_t vector_mult_32x2 = vmov_n_s32(vector_mult_int32);
340
341 const int64x2_t tensor_0_64x2 = vmovq_n_s64(tensor_0_int64);
342 const int32x2_t tensor_mult_32x2 = vmov_n_s32(tensor_mult_int32);
343
344 for (int64 base_i = 0; base_i < tensor_num_elements;
345 base_i += vector_num_elements) {
346 int64 i = base_i;
347 int64 vector_i = 0;
348 for (; vector_i < (vector_num_elements - 7); vector_i += 8, i += 8) {
349 const uint8* vector_ptr = &(vector_data->value) + vector_i;
350 const std::array<int32x4_t, 2> vector_output_value =
351 Requantize8x8To32Neon(vector_ptr, vector_0_64x2, vector_mult_32x2);
352 const uint8* tensor_ptr = &(tensor_data->value) + i;
353 const std::array<int32x4_t, 2> tensor_output_value =
354 Requantize8x8To32Neon(tensor_ptr, tensor_0_64x2, tensor_mult_32x2);
355
356 const int32x4_t result_low_32x4 =
357 vaddq_s32(vector_output_value[0], tensor_output_value[0]);
358 const int32x4_t result_high_32x4 =
359 vaddq_s32(vector_output_value[1], tensor_output_value[1]);
360 int32* output_ptr = &(output->value) + i;
361 vst1q_s32(output_ptr + 0, result_low_32x4);
362 vst1q_s32(output_ptr + 4, result_high_32x4);
363 }
364 for (; vector_i < vector_num_elements; ++vector_i, ++i) {
365 const int64 vector_value = static_cast<int64_t>(vector_data[vector_i]);
366 int64 vector_in_output_range_64 =
367 vector_0_int64 + (vector_value * vector_mult_int32);
368 vector_in_output_range_64 =
369 std::max(vector_in_output_range_64, lowest_quantized);
370 vector_in_output_range_64 =
371 std::min(vector_in_output_range_64, highest_quantized);
372 const int32 vector_in_output_range =
373 static_cast<int32>(vector_in_output_range_64);
374
375 const int64 tensor_value = static_cast<int64_t>(tensor_data[i]);
376 int64 tensor_in_output_range_64 =
377 tensor_0_int64 + (tensor_value * tensor_mult_int32);
378 tensor_in_output_range_64 =
379 std::max(tensor_in_output_range_64, lowest_quantized);
380 tensor_in_output_range_64 =
381 std::min(tensor_in_output_range_64, highest_quantized);
382 const int32 tensor_in_output_range =
383 static_cast<int32>(tensor_in_output_range_64);
384
385 output[i] = vector_in_output_range + tensor_in_output_range;
386 }
387 }
388}
389
390#else // QUANTIZED_ADD_USE_NEON
391
392template <>
393void VectorTensorAddition(const quint8* vector_data, float min_vector,
394 float max_vector, int64_t vector_num_elements,
395 const quint8* tensor_data, float min_tensor,
396 float max_tensor, int64_t tensor_num_elements,
397 float output_min, float output_max, qint32* output) {
398 const float vector_0_float =
399 QuantizedToFloat<quint8>(0, min_vector, max_vector);
400 const float vector_1_float =
401 QuantizedToFloat<quint8>(1, min_vector, max_vector);
402 const int64_t vector_0_int64 =
403 FloatToQuantizedUnclamped<qint32>(vector_0_float, output_min, output_max);
404 const int64_t vector_1_int64 =
405 FloatToQuantizedUnclamped<qint32>(vector_1_float, output_min, output_max);
406 const int32_t vector_mult_int32 = vector_1_int64 - vector_0_int64;
407
408 const float tensor_0_float =
409 QuantizedToFloat<quint8>(0, min_tensor, max_tensor);
410 const float tensor_1_float =
411 QuantizedToFloat<quint8>(1, min_tensor, max_tensor);
412 const int64_t tensor_0_int64 =
413 FloatToQuantizedUnclamped<qint32>(tensor_0_float, output_min, output_max);
414 const int64_t tensor_1_int64 =
415 FloatToQuantizedUnclamped<qint32>(tensor_1_float, output_min, output_max);
416 const int32_t tensor_mult_int32 = tensor_1_int64 - tensor_0_int64;
417
418 const int64_t lowest_quantized =
419 static_cast<int64_t>(Eigen::NumTraits<qint32>::lowest());
420 const int64_t highest_quantized =
421 static_cast<int64_t>(Eigen::NumTraits<qint32>::highest());
422
423 for (int i = 0; i < tensor_num_elements; ++i) {
424 const int64_t vector_i = i % vector_num_elements;
425 const int64_t vector_value = static_cast<int64_t>(vector_data[vector_i]);
426 int64_t vector_in_output_range_64 =
427 vector_0_int64 + (vector_value * vector_mult_int32);
428 vector_in_output_range_64 =
429 std::max(vector_in_output_range_64, lowest_quantized);
430 vector_in_output_range_64 =
431 std::min(vector_in_output_range_64, highest_quantized);
432 const int32_t vector_in_output_range =
433 static_cast<int32>(vector_in_output_range_64);
434
435 const int64_t tensor_value = static_cast<int64_t>(tensor_data[i]);
436 int64_t tensor_in_output_range_64 =
437 tensor_0_int64 + (tensor_value * tensor_mult_int32);
438 tensor_in_output_range_64 =
439 std::max(tensor_in_output_range_64, lowest_quantized);
440 tensor_in_output_range_64 =
441 std::min(tensor_in_output_range_64, highest_quantized);
442 const int32_t tensor_in_output_range =
443 static_cast<int32>(tensor_in_output_range_64);
444
445 output[i] = vector_in_output_range + tensor_in_output_range;
446 }
447}
448
449#endif // QUANTIZED_ADD_USE_NEON
450
451} // namespace
452
453template <class T, class Toutput>
454class QuantizedAddOp : public OpKernel {
455 public:
456 explicit QuantizedAddOp(OpKernelConstruction* context) : OpKernel(context) {}
457
458 void Compute(OpKernelContext* context) override {
459 const Tensor& x = context->input(0);
460 const Tensor& y = context->input(1);
461 const Tensor& min_x_tensor = context->input(2);
462 const Tensor& max_x_tensor = context->input(3);
463 const Tensor& min_y_tensor = context->input(4);
464 const Tensor& max_y_tensor = context->input(5);
465
466 OP_REQUIRES(context, TensorShapeUtils::IsScalar(min_x_tensor.shape()),
467 errors::InvalidArgument("`min_x` must be rank 0 but is rank ",
468 min_x_tensor.dims()));
469 OP_REQUIRES(context, TensorShapeUtils::IsScalar(max_x_tensor.shape()),
470 errors::InvalidArgument("`max_x` must be rank 0 but is rank ",
471 max_x_tensor.dims()));
472 OP_REQUIRES(context, TensorShapeUtils::IsScalar(min_y_tensor.shape()),
473 errors::InvalidArgument("`min_y` must be rank 0 but is rank ",
474 min_y_tensor.dims()));
475 OP_REQUIRES(context, TensorShapeUtils::IsScalar(max_y_tensor.shape()),
476 errors::InvalidArgument("`max_y` must be rank 0 but is rank ",
477 max_y_tensor.dims()));
478
479 const float min_x = min_x_tensor.scalar<float>()();
480 const float max_x = max_x_tensor.scalar<float>()();
481 const float min_y = min_y_tensor.scalar<float>()();
482 const float max_y = max_y_tensor.scalar<float>()();
483
484 BCast bcast(BCast::FromShape(x.shape()), BCast::FromShape(y.shape()));
485 if (!bcast.IsValid()) {
486 context->SetStatus(errors::InvalidArgument(
487 "Incompatible shapes: ", x.shape().DebugString(), " vs. ",
488 y.shape().DebugString()));
489 return;
490 }
491 Tensor* z;
492 OP_REQUIRES_OK(context, context->allocate_output(
493 0, BCast::ToShape(bcast.output_shape()), &z));
494
495 // Make sure that we have valid quantization ranges for the input buffers.
496 // If the difference between the min and max is negative or zero, it makes
497 // it hard to do meaningful intermediate operations on the values.
498 OP_REQUIRES(context, (max_x > min_x),
499 errors::InvalidArgument("max_x must be larger than min_x."));
500 OP_REQUIRES(context, (max_y > min_y),
501 errors::InvalidArgument("max_y must be larger than min_y."));
502 const T* x_data = x.flat<T>().data();
503 const T* y_data = y.flat<T>().data();
504 Toutput* z_data = z->flat<Toutput>().data();
505
506 // We want the range of the output to be symmetrical around zero so that
507 // adding zero leaves the result unchanged, and to contain the largest of
508 // the two input values with some room to spare.
509 const float smallest_min = std::min(min_x, min_y);
510 const float largest_max = std::max(max_x, max_y);
511 const float biggest_range =
512 std::max(std::abs(smallest_min), std::abs(largest_max));
513 const float output_range = (biggest_range * (1 << 14));
514 const float min_z_value = -output_range;
515 const float max_z_value = output_range;
516
517 const int ndims = bcast.x_reshape().size();
518 if (ndims <= 1) {
519 if (x.NumElements() == 1) {
520 ScalarAddition<T, Toutput>(context, y_data, min_y, max_y,
521 y.NumElements(), x_data[0], min_x, max_x,
522 min_z_value, max_z_value, z_data);
523 } else if (y.NumElements() == 1) {
524 ScalarAddition<T, Toutput>(context, x_data, min_x, max_x,
525 x.NumElements(), y_data[0], min_y, max_y,
526 min_z_value, max_z_value, z_data);
527 } else {
528 VectorAddition<T, Toutput>(context, x_data, min_x, max_x, y_data, min_y,
529 max_y, x.NumElements(), min_z_value,
530 max_z_value, z_data);
531 }
532 } else if (ndims == 2) {
533 const T* vector_data;
534 int64_t vector_num_elements;
535 float vector_min;
536 float vector_max;
537 const T* tensor_data;
538 int64_t tensor_num_elements;
539 float tensor_min;
540 float tensor_max;
541 if (x.NumElements() < y.NumElements()) {
542 vector_data = x_data;
543 vector_num_elements = x.NumElements();
544 vector_min = min_x;
545 vector_max = max_x;
546 tensor_data = y_data;
547 tensor_num_elements = y.NumElements();
548 tensor_min = min_y;
549 tensor_max = max_y;
550 } else {
551 vector_data = y_data;
552 vector_num_elements = y.NumElements();
553 vector_min = min_y;
554 vector_max = max_y;
555 tensor_data = x_data;
556 tensor_num_elements = x.NumElements();
557 tensor_min = min_x;
558 tensor_max = max_x;
559 }
560 OP_REQUIRES(context, vector_num_elements > 0,
561 errors::InvalidArgument("Must have some elements to add"));
562 VectorTensorAddition<T, Toutput>(
563 vector_data, vector_min, vector_max, vector_num_elements, tensor_data,
564 tensor_min, tensor_max, tensor_num_elements, min_z_value, max_z_value,
565 z_data);
566 } else {
567 LOG(INFO) << "ndims=" << ndims;
568 LOG(INFO) << "bcast.x_reshape()="
569 << TensorShape(bcast.x_reshape()).DebugString();
570 LOG(INFO) << "bcast.y_reshape()="
571 << TensorShape(bcast.y_reshape()).DebugString();
572 LOG(INFO) << "bcast.x_bcast()="
573 << TensorShape(bcast.x_bcast()).DebugString();
574 LOG(INFO) << "bcast.y_bcast()="
575 << TensorShape(bcast.y_bcast()).DebugString();
576
577 context->SetStatus(errors::Unimplemented(
578 "Broadcast between ", context->input(0).shape().DebugString(),
579 " and ", context->input(1).shape().DebugString(),
580 " is not supported yet."));
581 return;
582 }
583
584 Tensor* z_min = nullptr;
585 OP_REQUIRES_OK(context, context->allocate_output(1, {}, &z_min));
586 z_min->flat<float>()(0) = min_z_value;
587
588 Tensor* z_max = nullptr;
589 OP_REQUIRES_OK(context, context->allocate_output(2, {}, &z_max));
590 z_max->flat<float>()(0) = max_z_value;
591 }
592};
593
594REGISTER_KERNEL_BUILDER(Name("QuantizedAdd")
595 .Device(DEVICE_CPU)
596 .TypeConstraint<quint8>("T1")
597 .TypeConstraint<quint8>("T2")
598 .TypeConstraint<qint32>("Toutput"),
599 QuantizedAddOp<quint8, qint32>);
600
601} // namespace tensorflow
602