div.cc source code [tensorflow/tensorflow/lite/kernels/div.cc]

1	/ Copyright 2017 The TensorFlow Authors. All Rights Reserved.*
2
3	Licensed under the Apache License, Version 2.0 (the "License");
4	you may not use this file except in compliance with the License.
5	You may obtain a copy of the License at
6
7	http://www.apache.org/licenses/LICENSE-2.0
8
9	Unless required by applicable law or agreed to in writing, software
10	distributed under the License is distributed on an "AS IS" BASIS,
11	WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12	See the License for the specific language governing permissions and
13	limitations under the License.
14	==============================================================================/*
15	#include <stddef.h>
16	#include <stdint.h>
17
18	#include "tensorflow/lite/c/builtin_op_data.h"
19	#include "tensorflow/lite/c/common.h"
20	#include "tensorflow/lite/kernels/internal/compatibility.h"
21	#include "tensorflow/lite/kernels/internal/optimized/cpu_check.h"
22	#include "tensorflow/lite/kernels/internal/optimized/neon_check.h"
23	#include "tensorflow/lite/kernels/internal/optimized/optimized_ops.h"
24	#include "tensorflow/lite/kernels/internal/quantization_util.h"
25	#include "tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h"
26	#include "tensorflow/lite/kernels/internal/reference/reference_ops.h"
27	#include "tensorflow/lite/kernels/internal/tensor.h"
28	#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
29	#include "tensorflow/lite/kernels/internal/types.h"
30	#include "tensorflow/lite/kernels/kernel_util.h"
31
32	namespace tflite {
33	namespace ops {
34	namespace builtin {
35	namespace div {
36
37	// This file has three implementation of Div.
38	enum KernelType {
39	kReference,
40	kGenericOptimized, // Neon-free
41	kNeonOptimized,
42	};
43
44	constexpr int kInputTensor1 = `0`;
45	constexpr int kInputTensor2 = `1`;
46	constexpr int kOutputTensor = `0`;
47
48	struct OpData {
49	bool requires_broadcast;
50
51	// Parameters used in the quantized paths where the output is 8bit
52	int32 output_activation_min;
53	int32 output_activation_max;
54
55	// Parameters used in all quantized paths
56	int32_t output_multiplier;
57	int output_shift;
58	};
59
60	void* Init(TfLiteContext* context, const char* buffer, size_t length) {
61	auto* data = new OpData;
62	data->requires_broadcast = false;
63	return data;
64	}
65
66	void Free(TfLiteContext* context, void* buffer) {
67	delete reinterpret_cast<OpData*>(buffer);
68	}
69
70	TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
71	auto* params = reinterpret_cast<TfLiteDivParams*>(node->builtin_data);
72	OpData* data = reinterpret_cast<OpData*>(node->user_data);
73
74	TF_LITE_ENSURE_EQ(context, NumInputs(node), `2`);
75	TF_LITE_ENSURE_EQ(context, NumOutputs(node), `1`);
76
77	const TfLiteTensor* input1;
78	TF_LITE_ENSURE_OK(context,
79	GetInputSafe(context, node, kInputTensor1, &input1));
80	const TfLiteTensor* input2;
81	TF_LITE_ENSURE_OK(context,
82	GetInputSafe(context, node, kInputTensor2, &input2));
83	TfLiteTensor* output;
84	TF_LITE_ENSURE_OK(context,
85	GetOutputSafe(context, node, kOutputTensor, &output));
86
87	TF_LITE_ENSURE_TYPES_EQ(context, input1->type, input2->type);
88	output->type = input2->type;
89
90	data->requires_broadcast = !HaveSameShapes(input1, input2);
91
92	TfLiteIntArray* output_size = nullptr;
93	if (data->requires_broadcast) {
94	TF_LITE_ENSURE_OK(context, CalculateShapeForBroadcast(
95	context, input1, input2, &output_size));
96	} else {
97	output_size = TfLiteIntArrayCopy(input1->dims);
98	}
99
100	if (output->type == kTfLiteUInt8) {
101	TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized(
102	context, params->activation, output, &data->output_activation_min,
103	&data->output_activation_max));
104	const double real_multiplier =
105	input1->params.scale / (input2->params.scale * output->params.scale);
106	QuantizeMultiplier(real_multiplier, &data->output_multiplier,
107	&data->output_shift);
108	}
109
110	return context->ResizeTensor(context, output, output_size);
111	}
112
113	template <KernelType kernel_type>
114	void EvalDiv(TfLiteContext* context, TfLiteNode* node, TfLiteDivParams* params,
115	const OpData* data, const TfLiteTensor* input1,
116	const TfLiteTensor* input2, TfLiteTensor* output) {
117	#define TF_LITE_DIV(type, opname, data_type) \
118	tflite::ArithmeticParams op_params; \
119	data_type output_activation_min, output_activation_max; \
120	CalculateActivationRange(params->activation, &output_activation_min, \
121	&output_activation_max); \
122	SetActivationParams(output_activation_min, output_activation_max, \
123	&op_params); \
124	type::opname(op_params, GetTensorShape(input1), \
125	GetTensorData<data_type>(input1), GetTensorShape(input2), \
126	GetTensorData<data_type>(input2), GetTensorShape(output), \
127	GetTensorData<data_type>(output))
128	if (output->type == kTfLiteInt32) {
129	if (kernel_type == kReference) {
130	if (data->requires_broadcast) {
131	TF_LITE_DIV(reference_ops, BroadcastDivSlow, int32_t);
132	} else {
133	TF_LITE_DIV(reference_ops, Div, int32_t);
134	}
135	} else {
136	if (data->requires_broadcast) {
137	TF_LITE_DIV(optimized_ops, BroadcastDivSlow, int32_t);
138	} else {
139	TF_LITE_DIV(optimized_ops, Div, int32_t);
140	}
141	}
142	} else if (output->type == kTfLiteFloat32) {
143	if (kernel_type == kReference) {
144	if (data->requires_broadcast) {
145	TF_LITE_DIV(reference_ops, BroadcastDivSlow, float);
146	} else {
147	TF_LITE_DIV(reference_ops, Div, float);
148	}
149	} else {
150	if (data->requires_broadcast) {
151	TF_LITE_DIV(optimized_ops, BroadcastDivSlow, float);
152	} else {
153	TF_LITE_DIV(optimized_ops, Div, float);
154	}
155	}
156	}
157	#undef TF_LITE_DIV
158	}
159
160	template <KernelType kernel_type>
161	TfLiteStatus EvalQuantized(TfLiteContext* context, TfLiteNode* node,
162	TfLiteDivParams* params, const OpData* data,
163	const TfLiteTensor* input1,
164	const TfLiteTensor* input2, TfLiteTensor* output) {
165	if (input1->type == kTfLiteUInt8 && input2->type == kTfLiteUInt8 &&
166	output->type == kTfLiteUInt8) {
167	tflite::ArithmeticParams op_params;
168	SetActivationParams(data->output_activation_min,
169	data->output_activation_max, &op_params);
170	op_params.input1_offset = -input1->params.zero_point;
171	op_params.input2_offset = -input2->params.zero_point;
172	op_params.output_offset = output->params.zero_point;
173	op_params.output_multiplier = data->output_multiplier;
174	op_params.output_shift = data->output_shift;
175	bool need_broadcast = optimized_ops::ProcessBroadcastShapes(
176	GetTensorShape(input1), GetTensorShape(input2), &op_params);
177	#define TF_LITE_DIV(type, opname, dtype) \
178	type::opname(op_params, GetTensorShape(input1), \
179	GetTensorData<dtype>(input1), GetTensorShape(input2), \
180	GetTensorData<dtype>(input2), GetTensorShape(output), \
181	GetTensorData<dtype>(output))
182	if (kernel_type == kReference) {
183	if (need_broadcast) {
184	TF_LITE_DIV(reference_ops, BroadcastDivSlow, uint8_t);
185	} else {
186	TF_LITE_DIV(reference_ops, Div, uint8_t);
187	}
188	} else {
189	if (need_broadcast) {
190	TF_LITE_DIV(optimized_ops, BroadcastDivSlow, uint8_t);
191	} else {
192	TF_LITE_DIV(optimized_ops, Div, uint8_t);
193	}
194	}
195	#undef TF_LITE_DIV
196	} else {
197	TF_LITE_KERNEL_LOG(
198	context, "Unsupported combination of input and output types in Div.");
199	return kTfLiteError;
200	}
201	return kTfLiteOk;
202	}
203
204	template <typename T>
205	TfLiteStatus CheckNonZero(TfLiteContext* context, const TfLiteTensor* tensor) {
206	const auto* data = GetTensorData<T>(tensor);
207	const size_t number_elements = tensor->bytes / sizeof(T);
208	for (size_t i = `0`; i < number_elements; i++) {
209	TF_LITE_ENSURE(context, data[i] != `0`);
210	}
211	return kTfLiteOk;
212	}
213
214	template <KernelType kernel_type>
215	TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
216	auto* params = reinterpret_cast<TfLiteDivParams*>(node->builtin_data);
217	OpData* data = reinterpret_cast<OpData*>(node->user_data);
218
219	const TfLiteTensor* input1;
220	TF_LITE_ENSURE_OK(context,
221	GetInputSafe(context, node, kInputTensor1, &input1));
222	const TfLiteTensor* input2;
223	TF_LITE_ENSURE_OK(context,
224	GetInputSafe(context, node, kInputTensor2, &input2));
225	TfLiteTensor* output;
226	TF_LITE_ENSURE_OK(context,
227	GetOutputSafe(context, node, kOutputTensor, &output));
228
229
230	if (output->type == kTfLiteFloat32) {
231	// Div by zero seems ok in this case, we don't do a check at this point.
232	// However, unlike in TF where infinities are returned, here we return an
233	// activation min/max value if any or std::numeric_limits<float>::min/max.
234	EvalDiv<kernel_type>(context, node, params, data, input1, input2, output);
235	} else if (output->type == kTfLiteInt32) {
236	CheckNonZero<int32_t>(context, input2);
237	EvalDiv<kernel_type>(context, node, params, data, input1, input2, output);
238	} else if (output->type == kTfLiteUInt8) {
239	CheckNonZero<uint8_t>(context, input2);
240	TF_LITE_ENSURE_OK(
241	context, EvalQuantized<kernel_type>(context, node, params, data, input1,
242	input2, output));
243	} else {
244	TF_LITE_KERNEL_LOG(
245	context,
246	"Div only supports FLOAT32, INT32 and quantized UINT8 now, got %d.",
247	output->type);
248	return kTfLiteError;
249	}
250
251	return kTfLiteOk;
252	}
253
254	} // namespace div
255
256	TfLiteRegistration* Register_DIV_REF() {
257	static TfLiteRegistration r = {div::Init, div::Free, div::Prepare,
258	div::Eval<div::kReference>};
259	return &r;
260	}
261
262	TfLiteRegistration* Register_DIV_GENERIC_OPT() {
263	static TfLiteRegistration r = {div::Init, div::Free, div::Prepare,
264	div::Eval<div::kGenericOptimized>};
265	return &r;
266	}
267
268	TfLiteRegistration* Register_DIV_NEON_OPT() {
269	static TfLiteRegistration r = {div::Init, div::Free, div::Prepare,
270	div::Eval<div::kNeonOptimized>};
271	return &r;
272	}
273
274	TfLiteRegistration* Register_DIV() {
275	#ifdef USE_NEON
276	return Register_DIV_NEON_OPT();
277	#else
278	return Register_DIV_GENERIC_OPT();
279	#endif
280	}
281
282	} // namespace builtin
283	} // namespace ops
284	} // namespace tflite
285

Browse the source code of tensorflow/tensorflow/lite/kernels/div.cc