1/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2
3Licensed under the Apache License, Version 2.0 (the "License");
4you may not use this file except in compliance with the License.
5You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9Unless required by applicable law or agreed to in writing, software
10distributed under the License is distributed on an "AS IS" BASIS,
11WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12See the License for the specific language governing permissions and
13limitations under the License.
14==============================================================================*/
15#include <stddef.h>
16#include <stdint.h>
17
18#include "tensorflow/lite/c/builtin_op_data.h"
19#include "tensorflow/lite/c/common.h"
20#include "tensorflow/lite/kernels/internal/compatibility.h"
21#include "tensorflow/lite/kernels/internal/optimized/cpu_check.h"
22#include "tensorflow/lite/kernels/internal/optimized/neon_check.h"
23#include "tensorflow/lite/kernels/internal/optimized/optimized_ops.h"
24#include "tensorflow/lite/kernels/internal/quantization_util.h"
25#include "tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h"
26#include "tensorflow/lite/kernels/internal/reference/reference_ops.h"
27#include "tensorflow/lite/kernels/internal/tensor.h"
28#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
29#include "tensorflow/lite/kernels/internal/types.h"
30#include "tensorflow/lite/kernels/kernel_util.h"
31
32namespace tflite {
33namespace ops {
34namespace builtin {
35namespace div {
36
37// This file has three implementation of Div.
38enum KernelType {
39 kReference,
40 kGenericOptimized, // Neon-free
41 kNeonOptimized,
42};
43
44constexpr int kInputTensor1 = 0;
45constexpr int kInputTensor2 = 1;
46constexpr int kOutputTensor = 0;
47
48struct OpData {
49 bool requires_broadcast;
50
51 // Parameters used in the quantized paths where the output is 8bit
52 int32 output_activation_min;
53 int32 output_activation_max;
54
55 // Parameters used in all quantized paths
56 int32_t output_multiplier;
57 int output_shift;
58};
59
60void* Init(TfLiteContext* context, const char* buffer, size_t length) {
61 auto* data = new OpData;
62 data->requires_broadcast = false;
63 return data;
64}
65
66void Free(TfLiteContext* context, void* buffer) {
67 delete reinterpret_cast<OpData*>(buffer);
68}
69
70TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
71 auto* params = reinterpret_cast<TfLiteDivParams*>(node->builtin_data);
72 OpData* data = reinterpret_cast<OpData*>(node->user_data);
73
74 TF_LITE_ENSURE_EQ(context, NumInputs(node), 2);
75 TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
76
77 const TfLiteTensor* input1;
78 TF_LITE_ENSURE_OK(context,
79 GetInputSafe(context, node, kInputTensor1, &input1));
80 const TfLiteTensor* input2;
81 TF_LITE_ENSURE_OK(context,
82 GetInputSafe(context, node, kInputTensor2, &input2));
83 TfLiteTensor* output;
84 TF_LITE_ENSURE_OK(context,
85 GetOutputSafe(context, node, kOutputTensor, &output));
86
87 TF_LITE_ENSURE_TYPES_EQ(context, input1->type, input2->type);
88 output->type = input2->type;
89
90 data->requires_broadcast = !HaveSameShapes(input1, input2);
91
92 TfLiteIntArray* output_size = nullptr;
93 if (data->requires_broadcast) {
94 TF_LITE_ENSURE_OK(context, CalculateShapeForBroadcast(
95 context, input1, input2, &output_size));
96 } else {
97 output_size = TfLiteIntArrayCopy(input1->dims);
98 }
99
100 if (output->type == kTfLiteUInt8) {
101 TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized(
102 context, params->activation, output, &data->output_activation_min,
103 &data->output_activation_max));
104 const double real_multiplier =
105 input1->params.scale / (input2->params.scale * output->params.scale);
106 QuantizeMultiplier(real_multiplier, &data->output_multiplier,
107 &data->output_shift);
108 }
109
110 return context->ResizeTensor(context, output, output_size);
111}
112
113template <KernelType kernel_type>
114void EvalDiv(TfLiteContext* context, TfLiteNode* node, TfLiteDivParams* params,
115 const OpData* data, const TfLiteTensor* input1,
116 const TfLiteTensor* input2, TfLiteTensor* output) {
117#define TF_LITE_DIV(type, opname, data_type) \
118 tflite::ArithmeticParams op_params; \
119 data_type output_activation_min, output_activation_max; \
120 CalculateActivationRange(params->activation, &output_activation_min, \
121 &output_activation_max); \
122 SetActivationParams(output_activation_min, output_activation_max, \
123 &op_params); \
124 type::opname(op_params, GetTensorShape(input1), \
125 GetTensorData<data_type>(input1), GetTensorShape(input2), \
126 GetTensorData<data_type>(input2), GetTensorShape(output), \
127 GetTensorData<data_type>(output))
128 if (output->type == kTfLiteInt32) {
129 if (kernel_type == kReference) {
130 if (data->requires_broadcast) {
131 TF_LITE_DIV(reference_ops, BroadcastDivSlow, int32_t);
132 } else {
133 TF_LITE_DIV(reference_ops, Div, int32_t);
134 }
135 } else {
136 if (data->requires_broadcast) {
137 TF_LITE_DIV(optimized_ops, BroadcastDivSlow, int32_t);
138 } else {
139 TF_LITE_DIV(optimized_ops, Div, int32_t);
140 }
141 }
142 } else if (output->type == kTfLiteFloat32) {
143 if (kernel_type == kReference) {
144 if (data->requires_broadcast) {
145 TF_LITE_DIV(reference_ops, BroadcastDivSlow, float);
146 } else {
147 TF_LITE_DIV(reference_ops, Div, float);
148 }
149 } else {
150 if (data->requires_broadcast) {
151 TF_LITE_DIV(optimized_ops, BroadcastDivSlow, float);
152 } else {
153 TF_LITE_DIV(optimized_ops, Div, float);
154 }
155 }
156 }
157#undef TF_LITE_DIV
158}
159
160template <KernelType kernel_type>
161TfLiteStatus EvalQuantized(TfLiteContext* context, TfLiteNode* node,
162 TfLiteDivParams* params, const OpData* data,
163 const TfLiteTensor* input1,
164 const TfLiteTensor* input2, TfLiteTensor* output) {
165 if (input1->type == kTfLiteUInt8 && input2->type == kTfLiteUInt8 &&
166 output->type == kTfLiteUInt8) {
167 tflite::ArithmeticParams op_params;
168 SetActivationParams(data->output_activation_min,
169 data->output_activation_max, &op_params);
170 op_params.input1_offset = -input1->params.zero_point;
171 op_params.input2_offset = -input2->params.zero_point;
172 op_params.output_offset = output->params.zero_point;
173 op_params.output_multiplier = data->output_multiplier;
174 op_params.output_shift = data->output_shift;
175 bool need_broadcast = optimized_ops::ProcessBroadcastShapes(
176 GetTensorShape(input1), GetTensorShape(input2), &op_params);
177#define TF_LITE_DIV(type, opname, dtype) \
178 type::opname(op_params, GetTensorShape(input1), \
179 GetTensorData<dtype>(input1), GetTensorShape(input2), \
180 GetTensorData<dtype>(input2), GetTensorShape(output), \
181 GetTensorData<dtype>(output))
182 if (kernel_type == kReference) {
183 if (need_broadcast) {
184 TF_LITE_DIV(reference_ops, BroadcastDivSlow, uint8_t);
185 } else {
186 TF_LITE_DIV(reference_ops, Div, uint8_t);
187 }
188 } else {
189 if (need_broadcast) {
190 TF_LITE_DIV(optimized_ops, BroadcastDivSlow, uint8_t);
191 } else {
192 TF_LITE_DIV(optimized_ops, Div, uint8_t);
193 }
194 }
195#undef TF_LITE_DIV
196 } else {
197 TF_LITE_KERNEL_LOG(
198 context, "Unsupported combination of input and output types in Div.");
199 return kTfLiteError;
200 }
201 return kTfLiteOk;
202}
203
204template <typename T>
205TfLiteStatus CheckNonZero(TfLiteContext* context, const TfLiteTensor* tensor) {
206 const auto* data = GetTensorData<T>(tensor);
207 const size_t number_elements = tensor->bytes / sizeof(T);
208 for (size_t i = 0; i < number_elements; i++) {
209 TF_LITE_ENSURE(context, data[i] != 0);
210 }
211 return kTfLiteOk;
212}
213
214template <KernelType kernel_type>
215TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
216 auto* params = reinterpret_cast<TfLiteDivParams*>(node->builtin_data);
217 OpData* data = reinterpret_cast<OpData*>(node->user_data);
218
219 const TfLiteTensor* input1;
220 TF_LITE_ENSURE_OK(context,
221 GetInputSafe(context, node, kInputTensor1, &input1));
222 const TfLiteTensor* input2;
223 TF_LITE_ENSURE_OK(context,
224 GetInputSafe(context, node, kInputTensor2, &input2));
225 TfLiteTensor* output;
226 TF_LITE_ENSURE_OK(context,
227 GetOutputSafe(context, node, kOutputTensor, &output));
228
229
230 if (output->type == kTfLiteFloat32) {
231 // Div by zero seems ok in this case, we don't do a check at this point.
232 // However, unlike in TF where infinities are returned, here we return an
233 // activation min/max value if any or std::numeric_limits<float>::min/max.
234 EvalDiv<kernel_type>(context, node, params, data, input1, input2, output);
235 } else if (output->type == kTfLiteInt32) {
236 CheckNonZero<int32_t>(context, input2);
237 EvalDiv<kernel_type>(context, node, params, data, input1, input2, output);
238 } else if (output->type == kTfLiteUInt8) {
239 CheckNonZero<uint8_t>(context, input2);
240 TF_LITE_ENSURE_OK(
241 context, EvalQuantized<kernel_type>(context, node, params, data, input1,
242 input2, output));
243 } else {
244 TF_LITE_KERNEL_LOG(
245 context,
246 "Div only supports FLOAT32, INT32 and quantized UINT8 now, got %d.",
247 output->type);
248 return kTfLiteError;
249 }
250
251 return kTfLiteOk;
252}
253
254} // namespace div
255
256TfLiteRegistration* Register_DIV_REF() {
257 static TfLiteRegistration r = {div::Init, div::Free, div::Prepare,
258 div::Eval<div::kReference>};
259 return &r;
260}
261
262TfLiteRegistration* Register_DIV_GENERIC_OPT() {
263 static TfLiteRegistration r = {div::Init, div::Free, div::Prepare,
264 div::Eval<div::kGenericOptimized>};
265 return &r;
266}
267
268TfLiteRegistration* Register_DIV_NEON_OPT() {
269 static TfLiteRegistration r = {div::Init, div::Free, div::Prepare,
270 div::Eval<div::kNeonOptimized>};
271 return &r;
272}
273
274TfLiteRegistration* Register_DIV() {
275#ifdef USE_NEON
276 return Register_DIV_NEON_OPT();
277#else
278 return Register_DIV_GENERIC_OPT();
279#endif
280}
281
282} // namespace builtin
283} // namespace ops
284} // namespace tflite
285