add.cc source code [tensorflow/tensorflow/lite/kernels/add.cc]

1	/ Copyright 2017 The TensorFlow Authors. All Rights Reserved.*
2
3	Licensed under the Apache License, Version 2.0 (the "License");
4	you may not use this file except in compliance with the License.
5	You may obtain a copy of the License at
6
7	http://www.apache.org/licenses/LICENSE-2.0
8
9	Unless required by applicable law or agreed to in writing, software
10	distributed under the License is distributed on an "AS IS" BASIS,
11	WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12	See the License for the specific language governing permissions and
13	limitations under the License.
14	==============================================================================/*
15	#include "tensorflow/lite/kernels/internal/optimized/integer_ops/add.h"
16
17	#include <stddef.h>
18	#include <stdint.h>
19
20	#include <algorithm>
21
22	#include "tensorflow/lite/c/builtin_op_data.h"
23	#include "tensorflow/lite/c/common.h"
24	#include "tensorflow/lite/kernels/internal/compatibility.h"
25	#include "tensorflow/lite/kernels/internal/optimized/cpu_check.h"
26	#include "tensorflow/lite/kernels/internal/optimized/neon_check.h"
27	#include "tensorflow/lite/kernels/internal/optimized/optimized_ops.h"
28	#include "tensorflow/lite/kernels/internal/quantization_util.h"
29	#include "tensorflow/lite/kernels/internal/reference/add.h"
30	#include "tensorflow/lite/kernels/internal/reference/integer_ops/add.h"
31	#include "tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h"
32	#include "tensorflow/lite/kernels/internal/reference/reference_ops.h"
33	#include "tensorflow/lite/kernels/internal/tensor.h"
34	#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
35	#include "tensorflow/lite/kernels/internal/types.h"
36	#include "tensorflow/lite/kernels/kernel_util.h"
37	#include "tensorflow/lite/kernels/op_macros.h"
38
39	namespace tflite {
40	namespace ops {
41	namespace builtin {
42	namespace add {
43	// This file has three implementation of Add.
44	enum KernelType {
45	kReference,
46	kGenericOptimized, // Neon-free
47	kNeonOptimized,
48	};
49
50	constexpr int kInputTensor1 = `0`;
51	constexpr int kInputTensor2 = `1`;
52	constexpr int kOutputTensor = `0`;
53
54	struct OpData {
55	// These fields are used in both the general 8-bit -> 8bit quantized path,
56	// and the special 16-bit -> 16bit quantized path
57	int input1_shift;
58	int input2_shift;
59	int32 output_activation_min;
60	int32 output_activation_max;
61
62	// These fields are used only in the general 8-bit -> 8bit quantized path
63	int32 input1_multiplier;
64	int32 input2_multiplier;
65	int32 output_multiplier;
66	int output_shift;
67	int left_shift;
68	int32 input1_offset;
69	int32 input2_offset;
70	int32 output_offset;
71
72	// This parameter is used to indicate whether
73	// parameter scale is power of two.
74	// It is used in 16-bit -> 16-bit quantization.
75	bool pot_scale_int16;
76	};
77
78	void* Init(TfLiteContext* context, const char* buffer, size_t length) {
79	auto* data = new OpData;
80	return data;
81	}
82
83	void Free(TfLiteContext* context, void* buffer) {
84	delete reinterpret_cast<OpData*>(buffer);
85	}
86
87	TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
88	auto* params = reinterpret_cast<TfLiteAddParams*>(node->builtin_data);
89	OpData* data = reinterpret_cast<OpData*>(node->user_data);
90
91	TF_LITE_ENSURE_EQ(context, NumInputs(node), `2`);
92	TF_LITE_ENSURE_EQ(context, NumOutputs(node), `1`);
93
94	const TfLiteTensor* input1;
95	TF_LITE_ENSURE_OK(context,
96	GetInputSafe(context, node, kInputTensor1, &input1));
97	const TfLiteTensor* input2;
98	TF_LITE_ENSURE_OK(context,
99	GetInputSafe(context, node, kInputTensor2, &input2));
100	TfLiteTensor* output;
101	TF_LITE_ENSURE_OK(context,
102	GetOutputSafe(context, node, kOutputTensor, &output));
103
104	TF_LITE_ENSURE_TYPES_EQ(context, input1->type, input2->type);
105	output->type = input2->type;
106
107	const bool requires_broadcast = !HaveSameShapes(input1, input2);
108
109	TfLiteIntArray* output_size = nullptr;
110	if (requires_broadcast) {
111	TF_LITE_ENSURE_OK(context, CalculateShapeForBroadcast(
112	context, input1, input2, &output_size));
113	} else {
114	output_size = TfLiteIntArrayCopy(input1->dims);
115	}
116
117	// 8bit -> 8bit general quantized path, with general rescalings
118	// as well as, int16 -> int16 with general rescalings
119
120	// There are two implementations of ADD operator in case of
121	// 16bit input/output depending on whether the scale parameter is
122	// the power of 2 or not. Currently only implementation for
123	// general case is used, but we need to use another implementation
124	// for older versions.
125	bool general_scale_int16 = false;
126
127	bool input1_scale_is_pot = false;
128	bool input2_scale_is_pot = false;
129	bool output_scale_is_pot = false;
130
131	int input1_scale_log2_rounded{`0`};
132	int input2_scale_log2_rounded{`0`};
133	int output_scale_log2_rounded{`0`};
134
135	if (input1->type == kTfLiteInt16 && input2->type == kTfLiteInt16 &&
136	output->type == kTfLiteInt16) {
137	// In case of int16, quantization is symmetic and
138	// zero point should be zero.
139	TF_LITE_ENSURE_EQ(context, input1->params.zero_point, `0`);
140	TF_LITE_ENSURE_EQ(context, input2->params.zero_point, `0`);
141	TF_LITE_ENSURE_EQ(context, output->params.zero_point, `0`);
142
143	general_scale_int16 = !params \|\| !params->pot_scale_int16;
144
145	if (!general_scale_int16) {
146	// Do preparation in the case of the scale parameter is power of 2.
147
148	input1_scale_is_pot =
149	CheckedLog2(input1->params.scale, &input1_scale_log2_rounded);
150
151	input2_scale_is_pot =
152	CheckedLog2(input2->params.scale, &input2_scale_log2_rounded);
153
154	output_scale_is_pot =
155	CheckedLog2(output->params.scale, &output_scale_log2_rounded);
156
157	general_scale_int16 =
158	!input1_scale_is_pot \|\| !input2_scale_is_pot \|\| !output_scale_is_pot;
159	}
160	}
161
162	data->pot_scale_int16 = !general_scale_int16;
163
164	if (output->type == kTfLiteUInt8 \|\| output->type == kTfLiteInt8 \|\|
165	general_scale_int16) {
166	// 8bit -> 8bit general quantized path, with general rescalings
167	// as well as, 16bit -> 16bit with general rescalings
168	data->input1_offset = -input1->params.zero_point;
169	data->input2_offset = -input2->params.zero_point;
170	data->output_offset = output->params.zero_point;
171
172	// The shift is set to 15 for 16-bit and 20 in case of 8-bit, accordingly.
173	// In case of 16-bit we have 65535 << 15 which is less than 1 << 31,
174	// therefore the addition will still fit in a 32 bit accumulator.
175	data->left_shift = general_scale_int16 ? `15` : `20`;
176	const double twice_max_input_scale =
177	`2` * std::max(input1->params.scale, input2->params.scale);
178	const double real_input1_multiplier =
179	input1->params.scale / twice_max_input_scale;
180	const double real_input2_multiplier =
181	input2->params.scale / twice_max_input_scale;
182	const double real_output_multiplier =
183	twice_max_input_scale /
184	((`1` << data->left_shift) * output->params.scale);
185
186	QuantizeMultiplierSmallerThanOneExp(
187	real_input1_multiplier, &data->input1_multiplier, &data->input1_shift);
188
189	QuantizeMultiplierSmallerThanOneExp(
190	real_input2_multiplier, &data->input2_multiplier, &data->input2_shift);
191
192	QuantizeMultiplierSmallerThanOneExp(
193	real_output_multiplier, &data->output_multiplier, &data->output_shift);
194
195	TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized(
196	context, params->activation, output, &data->output_activation_min,
197	&data->output_activation_max));
198	} else if (output->type == kTfLiteInt16) {
199	// 16bit -> 16bit special quantized path, supporting only a rather
200	// narrow case of quantization parameters: zero_points must all be 0
201	// ("symmetric quantization") and scales must be power-of-two (which
202	// we abbreviate as "POT" below). The intended use case for this path
203	// is in LSTM cells, where, due to the constraints of implementing
204	// some of the math in these LSTM cells in fixed-point arithmetic,
205	// we need to have such symmetric, power-of-two quantization
206	// (Fixed-point formats are inherently symmetric, power-of-two).
207	TF_LITE_ENSURE_EQ(context, input1->params.zero_point, `0`);
208	TF_LITE_ENSURE_EQ(context, input2->params.zero_point, `0`);
209	TF_LITE_ENSURE_EQ(context, output->params.zero_point, `0`);
210
211	TF_LITE_ENSURE(context, input1_scale_is_pot);
212	TF_LITE_ENSURE(context, input2_scale_is_pot);
213	TF_LITE_ENSURE(context, output_scale_is_pot);
214
215	data->input1_shift = input1_scale_log2_rounded - output_scale_log2_rounded;
216	data->input2_shift = input2_scale_log2_rounded - output_scale_log2_rounded;
217
218	// Shifting of one input is supported. The graph quantization should ensure
219	// that the other input matches the output.
220	TF_LITE_ENSURE(context, data->input1_shift == `0` \|\| data->input2_shift == `0`);
221	TF_LITE_ENSURE(context, data->input1_shift <= `0`);
222	TF_LITE_ENSURE(context, data->input2_shift <= `0`);
223
224	TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized(
225	context, params->activation, output, &data->output_activation_min,
226	&data->output_activation_max));
227	}
228
229	return context->ResizeTensor(context, output, output_size);
230	}
231
232	template <KernelType kernel_type>
233	void EvalAdd(TfLiteContext* context, TfLiteNode* node, TfLiteAddParams* params,
234	const OpData* data, const TfLiteTensor* input1,
235	const TfLiteTensor* input2, TfLiteTensor* output) {
236	tflite::ArithmeticParams op_params;
237	const bool need_broadcast = optimized_ops::ProcessBroadcastShapes(
238	GetTensorShape(input1), GetTensorShape(input2), &op_params);
239	#define TF_LITE_ADD(type, opname, data_type) \
240	data_type output_activation_min, output_activation_max; \
241	CalculateActivationRange(params->activation, &output_activation_min, \
242	&output_activation_max); \
243	SetActivationParams(output_activation_min, output_activation_max, \
244	&op_params); \
245	type::opname(op_params, GetTensorShape(input1), \
246	GetTensorData<data_type>(input1), GetTensorShape(input2), \
247	GetTensorData<data_type>(input2), GetTensorShape(output), \
248	GetTensorData<data_type>(output))
249	if (output->type == kTfLiteInt32) {
250	if (kernel_type == kReference) {
251	if (need_broadcast) {
252	TF_LITE_ADD(reference_ops, BroadcastAdd4DSlow, int32_t);
253	} else {
254	TF_LITE_ADD(reference_ops, Add, int32_t);
255	}
256	} else {
257	if (need_broadcast) {
258	TF_LITE_ADD(optimized_ops, BroadcastAdd4DSlow, int32_t);
259	} else {
260	TF_LITE_ADD(optimized_ops, Add, int32_t);
261	}
262	}
263	} else if (output->type == kTfLiteInt64) {
264	if (kernel_type == kReference) {
265	if (need_broadcast) {
266	TF_LITE_ADD(reference_ops, BroadcastAdd4DSlow, int64_t);
267	} else {
268	TF_LITE_ADD(reference_ops, Add, int64_t);
269	}
270	} else {
271	if (need_broadcast) {
272	TF_LITE_ADD(optimized_ops, BroadcastAdd4DSlow, int64_t);
273	} else {
274	TF_LITE_ADD(optimized_ops, Add, int64_t);
275	}
276	}
277	} else if (output->type == kTfLiteFloat32) {
278	if (kernel_type == kReference) {
279	if (need_broadcast) {
280	TF_LITE_ADD(reference_ops, BroadcastAdd4DSlow, float);
281	} else {
282	TF_LITE_ADD(reference_ops, Add, float);
283	}
284	} else {
285	if (need_broadcast) {
286	TF_LITE_ADD(optimized_ops, BroadcastAddDispatch, float);
287	} else {
288	TF_LITE_ADD(optimized_ops, Add, float);
289	}
290	}
291	}
292	#undef TF_LITE_ADD
293	}
294
295	template <KernelType kernel_type>
296	TfLiteStatus EvalAddQuantized(TfLiteContext* context, TfLiteNode* node,
297	TfLiteAddParams* params, const OpData* data,
298	const TfLiteTensor* input1,
299	const TfLiteTensor* input2,
300	TfLiteTensor* output) {
301	if (output->type == kTfLiteUInt8 \|\| output->type == kTfLiteInt8 \|\|
302	!data->pot_scale_int16) {
303	tflite::ArithmeticParams op_params;
304	op_params.left_shift = data->left_shift;
305	op_params.input1_offset = data->input1_offset;
306	op_params.input1_multiplier = data->input1_multiplier;
307	op_params.input1_shift = data->input1_shift;
308	op_params.input2_offset = data->input2_offset;
309	op_params.input2_multiplier = data->input2_multiplier;
310	op_params.input2_shift = data->input2_shift;
311	op_params.output_offset = data->output_offset;
312	op_params.output_multiplier = data->output_multiplier;
313	op_params.output_shift = data->output_shift;
314	SetActivationParams(data->output_activation_min,
315	data->output_activation_max, &op_params);
316	bool need_broadcast = optimized_ops::ProcessBroadcastShapes(
317	GetTensorShape(input1), GetTensorShape(input2), &op_params);
318	#define TF_LITE_ADD(type, opname, dtype) \
319	type::opname(op_params, GetTensorShape(input1), \
320	GetTensorData<dtype>(input1), GetTensorShape(input2), \
321	GetTensorData<dtype>(input2), GetTensorShape(output), \
322	GetTensorData<dtype>(output));
323	if (output->type == kTfLiteInt8) {
324	if (kernel_type == kReference) {
325	if (need_broadcast) {
326	TF_LITE_ADD(reference_integer_ops, BroadcastAdd4DSlow, int8_t);
327	} else {
328	TF_LITE_ADD(reference_integer_ops, Add, int8_t);
329	}
330	} else {
331	if (need_broadcast) {
332	TF_LITE_ADD(optimized_integer_ops, BroadcastAddDispatch, int8_t);
333	} else {
334	TF_LITE_ADD(optimized_integer_ops, Add, int8_t);
335	}
336	}
337	} else if (output->type == kTfLiteInt16) {
338	if (need_broadcast) {
339	TF_LITE_ADD(reference_ops, BroadcastAdd4DSlow, int16_t);
340	} else {
341	if (kernel_type == kReference) {
342	reference_ops::Add(
343	op_params, GetTensorShape(input1), GetTensorData<int16_t>(input1),
344	GetTensorShape(input2), GetTensorData<int16_t>(input2),
345	GetTensorShape(output), GetTensorData<int16_t>(output), false);
346	} else {
347	TF_LITE_ADD(optimized_integer_ops, Add, int16_t);
348	}
349	}
350	} else {
351	if (kernel_type == kReference) {
352	if (need_broadcast) {
353	TF_LITE_ADD(reference_ops, BroadcastAdd4DSlow, uint8_t);
354	} else {
355	TF_LITE_ADD(reference_ops, Add, uint8_t);
356	}
357	} else {
358	if (need_broadcast) {
359	TF_LITE_ADD(optimized_ops, BroadcastAddDispatch, uint8_t);
360	} else {
361	TF_LITE_ADD(optimized_ops, Add, uint8_t);
362	}
363	}
364	}
365	#undef TF_LITE_ADD
366	} else if (output->type == kTfLiteInt16) {
367	tflite::ArithmeticParams op_params;
368	op_params.input1_shift = data->input1_shift;
369	op_params.input2_shift = data->input2_shift;
370	SetActivationParams(data->output_activation_min,
371	data->output_activation_max, &op_params);
372	#define TF_LITE_ADD(type, opname) \
373	type::opname(op_params, GetTensorShape(input1), \
374	GetTensorData<int16_t>(input1), GetTensorShape(input2), \
375	GetTensorData<int16_t>(input2), GetTensorShape(output), \
376	GetTensorData<int16_t>(output))
377	// The quantized version of Add doesn't support activations, so we
378	// always use BroadcastAdd.
379	if (kernel_type == kReference) {
380	TF_LITE_ADD(reference_ops, Add);
381	} else {
382	TF_LITE_ADD(optimized_ops, Add);
383	}
384	#undef TF_LITE_ADD
385	}
386
387	return kTfLiteOk;
388	}
389
390	template <KernelType kernel_type>
391	TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
392	auto* params = reinterpret_cast<TfLiteAddParams*>(node->builtin_data);
393	OpData* data = reinterpret_cast<OpData*>(node->user_data);
394
395	const TfLiteTensor* input1;
396	TF_LITE_ENSURE_OK(context,
397	GetInputSafe(context, node, kInputTensor1, &input1));
398	const TfLiteTensor* input2;
399	TF_LITE_ENSURE_OK(context,
400	GetInputSafe(context, node, kInputTensor2, &input2));
401	TfLiteTensor* output;
402	TF_LITE_ENSURE_OK(context,
403	GetOutputSafe(context, node, kOutputTensor, &output));
404
405	if (output->type == kTfLiteFloat32 \|\| output->type == kTfLiteInt32 \|\|
406	output->type == kTfLiteInt64) {
407	EvalAdd<kernel_type>(context, node, params, data, input1, input2, output);
408	} else if (output->type == kTfLiteUInt8 \|\| output->type == kTfLiteInt8 \|\|
409	output->type == kTfLiteInt16) {
410	TF_LITE_ENSURE_OK(context,
411	EvalAddQuantized<kernel_type>(context, node, params, data,
412	input1, input2, output));
413	} else {
414	TF_LITE_UNSUPPORTED_TYPE(context, output->type, "Add");
415	}
416
417	return kTfLiteOk;
418	}
419
420	} // namespace add
421
422	TfLiteRegistration* Register_ADD_REF() {
423	static TfLiteRegistration r = {add::Init, add::Free, add::Prepare,
424	add::Eval<add::kReference>};
425	return &r;
426	}
427
428	TfLiteRegistration* Register_ADD_GENERIC_OPT() {
429	static TfLiteRegistration r = {add::Init, add::Free, add::Prepare,
430	add::Eval<add::kGenericOptimized>};
431	return &r;
432	}
433
434	TfLiteRegistration* Register_ADD_NEON_OPT() {
435	static TfLiteRegistration r = {add::Init, add::Free, add::Prepare,
436	add::Eval<add::kNeonOptimized>};
437	return &r;
438	}
439
440	TfLiteRegistration* Register_ADD() {
441	#ifdef USE_NEON
442	return Register_ADD_NEON_OPT();
443	#else
444	return Register_ADD_GENERIC_OPT();
445	#endif
446	}
447
448	} // namespace builtin
449	} // namespace ops
450	} // namespace tflite
451

Browse the source code of tensorflow/tensorflow/lite/kernels/add.cc