1/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2
3Licensed under the Apache License, Version 2.0 (the "License");
4you may not use this file except in compliance with the License.
5You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9Unless required by applicable law or agreed to in writing, software
10distributed under the License is distributed on an "AS IS" BASIS,
11WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12See the License for the specific language governing permissions and
13limitations under the License.
14==============================================================================*/
15
16#include "tensorflow/lite/kernels/internal/optimized/integer_ops/depthwise_conv.h"
17
18#include <stddef.h>
19#include <stdint.h>
20
21#include <vector>
22
23#include "tensorflow/lite/c/builtin_op_data.h"
24#include "tensorflow/lite/c/common.h"
25#include "tensorflow/lite/kernels/cpu_backend_context.h"
26#include "tensorflow/lite/kernels/internal/compatibility.h"
27#include "tensorflow/lite/kernels/internal/optimized/cpu_check.h"
28#include "tensorflow/lite/kernels/internal/optimized/depthwiseconv_multithread.h"
29#include "tensorflow/lite/kernels/internal/optimized/integer_ops/depthwise_conv_hybrid.h"
30#include "tensorflow/lite/kernels/internal/optimized/neon_check.h"
31#include "tensorflow/lite/kernels/internal/quantization_util.h"
32#include "tensorflow/lite/kernels/internal/reference/depthwiseconv_float.h"
33#include "tensorflow/lite/kernels/internal/reference/depthwiseconv_uint8.h"
34#include "tensorflow/lite/kernels/internal/reference/integer_ops/depthwise_conv.h"
35#include "tensorflow/lite/kernels/internal/tensor.h"
36#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
37#include "tensorflow/lite/kernels/internal/tensor_utils.h"
38#include "tensorflow/lite/kernels/internal/types.h"
39#include "tensorflow/lite/kernels/kernel_util.h"
40#include "tensorflow/lite/kernels/padding.h"
41
42namespace tflite {
43namespace ops {
44namespace builtin {
45namespace depthwise_conv {
46
47constexpr int kInputTensor = 0;
48constexpr int kFilterTensor = 1;
49constexpr int kBiasTensor = 2;
50constexpr int kOutputTensor = 0;
51
52// This file has three implementation of DepthwiseConv.
53enum KernelType {
54 kReference,
55 kGenericOptimized, // Neon-free
56 kNeonOptimized,
57};
58
59const int kTensorNotAllocated = -1;
60
61struct OpData {
62 TfLitePaddingValues padding;
63 // The scaling factor from input to output (aka the 'real multiplier') can
64 // be represented as a fixed point multiplier plus a left shift.
65 int32_t output_multiplier;
66 int output_shift;
67 // The range of the fused activation layer. For example for kNone and
68 // uint8_t these would be 0 and 255.
69 int32_t output_activation_min;
70 int32_t output_activation_max;
71
72 // Per channel output multiplier and shift.
73 std::vector<int32_t> per_channel_output_multiplier;
74 std::vector<int> per_channel_output_shift;
75
76 // Hybrid per channel temporary tensors.
77 int input_quantized_id = kTensorNotAllocated;
78 int scaling_factors_id = kTensorNotAllocated;
79 int input_offset_id = kTensorNotAllocated;
80 int32_t input_quantized_index;
81 int32_t scaling_factors_index;
82 int32_t input_offset_index;
83};
84
85void* Init(TfLiteContext* context, const char* buffer, size_t length) {
86 // This is a builtin op, so we don't use the contents in 'buffer', if any.
87 // Instead, we allocate a new object to carry information from Prepare() to
88 // Eval().
89 return new OpData;
90}
91
92void Free(TfLiteContext* context, void* buffer) {
93 delete reinterpret_cast<OpData*>(buffer);
94}
95
96TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
97 auto* params =
98 reinterpret_cast<TfLiteDepthwiseConvParams*>(node->builtin_data);
99 OpData* data = reinterpret_cast<OpData*>(node->user_data);
100
101 bool has_bias = NumInputs(node) == 3;
102
103 TF_LITE_ENSURE(context, has_bias || NumInputs(node) == 2);
104 const TfLiteTensor* input;
105 TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, kInputTensor, &input));
106 const TfLiteTensor* filter;
107 TF_LITE_ENSURE_OK(context,
108 GetInputSafe(context, node, kFilterTensor, &filter));
109 const TfLiteTensor* bias = nullptr;
110
111 TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
112 TfLiteTensor* output;
113 TF_LITE_ENSURE_OK(context,
114 GetOutputSafe(context, node, kOutputTensor, &output));
115
116 TF_LITE_ENSURE_EQ(context, NumDimensions(input), 4);
117 TF_LITE_ENSURE_EQ(context, NumDimensions(filter), 4);
118 TF_LITE_ENSURE(context, params->dilation_height_factor > 0);
119 TF_LITE_ENSURE(context, params->dilation_width_factor > 0);
120
121 const TfLiteType data_type = input->type;
122
123 const TfLiteType filter_type = filter->type;
124 const bool is_hybrid =
125 data_type == kTfLiteFloat32 && filter_type == kTfLiteInt8;
126 TF_LITE_ENSURE(context,
127 data_type == kTfLiteFloat32 || data_type == kTfLiteUInt8 ||
128 data_type == kTfLiteInt8 || data_type == kTfLiteInt16);
129 TF_LITE_ENSURE_TYPES_EQ(context, output->type, data_type);
130 if (!is_hybrid) {
131 TF_LITE_ENSURE(context,
132 filter->type == data_type || data_type == kTfLiteInt16);
133 }
134
135 if (data_type == kTfLiteInt16) {
136 TF_LITE_ENSURE_EQ(context, input->params.zero_point, 0);
137 TF_LITE_ENSURE_EQ(context, output->params.zero_point, 0);
138 }
139
140 // Filter in DepthwiseConv is expected to be [1, H, W, O].
141 TF_LITE_ENSURE_EQ(context, SizeOfDimension(filter, 0), 1);
142
143 if (has_bias) {
144 TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, kBiasTensor, &bias));
145 if (data_type == kTfLiteUInt8 || data_type == kTfLiteInt8) {
146 TF_LITE_ENSURE_TYPES_EQ(context, bias->type, kTfLiteInt32);
147 TF_LITE_ENSURE_EQ(context, bias->params.zero_point, 0);
148 } else if (data_type == kTfLiteInt16) {
149 TF_LITE_ENSURE_TYPES_EQ(context, bias->type, kTfLiteInt64);
150 TF_LITE_ENSURE_EQ(context, bias->params.zero_point, 0);
151 } else {
152 TF_LITE_ENSURE_TYPES_EQ(context, bias->type, data_type);
153 }
154 TF_LITE_ENSURE_EQ(context, NumDimensions(bias), 1);
155 TF_LITE_ENSURE_EQ(context, SizeOfDimension(filter, 3),
156 SizeOfDimension(bias, 0));
157 }
158
159 int channels_out = SizeOfDimension(filter, 3);
160 int width = SizeOfDimension(input, 2);
161 int height = SizeOfDimension(input, 1);
162 int filter_width = SizeOfDimension(filter, 2);
163 int filter_height = SizeOfDimension(filter, 1);
164 int batches = SizeOfDimension(input, 0);
165
166 // Matching GetWindowedOutputSize in TensorFlow.
167 auto padding = params->padding;
168 int out_width, out_height;
169
170 data->padding = ComputePaddingHeightWidth(
171 params->stride_height, params->stride_width,
172 params->dilation_height_factor, params->dilation_width_factor, height,
173 width, filter_height, filter_width, padding, &out_height, &out_width);
174
175 // Note that quantized inference requires that all tensors have their
176 // parameters set. This is usually done during quantized training or
177 // calibration.
178 if (data_type != kTfLiteFloat32) {
179 TF_LITE_ENSURE_EQ(context, filter->quantization.type,
180 kTfLiteAffineQuantization);
181 TF_LITE_ENSURE(context, filter->quantization.type != kTfLiteNoQuantization);
182 const auto* affine_quantization =
183 reinterpret_cast<TfLiteAffineQuantization*>(
184 filter->quantization.params);
185 TF_LITE_ENSURE(context, affine_quantization);
186 TF_LITE_ENSURE(context, affine_quantization->scale);
187 TF_LITE_ENSURE(context, (affine_quantization->scale->size == 1 ||
188 affine_quantization->scale->size == channels_out));
189
190 data->per_channel_output_multiplier.resize(channels_out);
191 data->per_channel_output_shift.resize(channels_out);
192 TF_LITE_ENSURE_STATUS(tflite::PopulateConvolutionQuantizationParams(
193 context, input, filter, bias, output, params->activation,
194 &data->output_multiplier, &data->output_shift,
195 &data->output_activation_min, &data->output_activation_max,
196 data->per_channel_output_multiplier.data(),
197 data->per_channel_output_shift.data(), channels_out));
198 }
199
200 if (is_hybrid) {
201 TF_LITE_ENSURE(context, filter->quantization.type != kTfLiteNoQuantization);
202 const auto* affine_quantization =
203 reinterpret_cast<TfLiteAffineQuantization*>(
204 filter->quantization.params);
205 TF_LITE_ENSURE(context, affine_quantization);
206 TF_LITE_ENSURE(context, affine_quantization->scale);
207 TF_LITE_ENSURE_EQ(
208 context, affine_quantization->scale->size,
209 filter->dims->data[affine_quantization->quantized_dimension]);
210
211 int temporaries_count = 0;
212 data->input_quantized_index = temporaries_count;
213 if (data->input_quantized_id == kTensorNotAllocated) {
214 TF_LITE_ENSURE_OK(
215 context, context->AddTensors(context, 1, &data->input_quantized_id));
216 }
217 ++temporaries_count;
218 data->scaling_factors_index = temporaries_count;
219 if (data->scaling_factors_id == kTensorNotAllocated) {
220 TF_LITE_ENSURE_OK(
221 context, context->AddTensors(context, 1, &data->scaling_factors_id));
222 }
223 ++temporaries_count;
224 data->input_offset_index = temporaries_count;
225 if (data->input_offset_id == kTensorNotAllocated) {
226 TF_LITE_ENSURE_OK(
227 context, context->AddTensors(context, 1, &data->input_offset_id));
228 }
229 ++temporaries_count;
230
231 TfLiteIntArrayFree(node->temporaries);
232 node->temporaries = TfLiteIntArrayCreate(temporaries_count);
233
234 node->temporaries->data[data->input_quantized_index] =
235 data->input_quantized_id;
236 TfLiteTensor* input_quantized;
237 TF_LITE_ENSURE_OK(
238 context, GetTemporarySafe(context, node, data->input_quantized_index,
239 &input_quantized));
240 input_quantized->type = kTfLiteInt8;
241 input_quantized->allocation_type = kTfLiteArenaRw;
242 if (!TfLiteIntArrayEqual(input_quantized->dims, input->dims)) {
243 TfLiteIntArray* input_quantized_size = TfLiteIntArrayCopy(input->dims);
244 TF_LITE_ENSURE_OK(context, context->ResizeTensor(context, input_quantized,
245 input_quantized_size));
246 }
247 node->temporaries->data[data->scaling_factors_index] =
248 data->scaling_factors_id;
249 TfLiteTensor* scaling_factors;
250 TF_LITE_ENSURE_OK(
251 context, GetTemporarySafe(context, node, data->scaling_factors_index,
252 &scaling_factors));
253 scaling_factors->type = kTfLiteFloat32;
254 scaling_factors->allocation_type = kTfLiteArenaRw;
255 const int batch_size = SizeOfDimension(input, 0);
256 int scaling_dims[1] = {batch_size};
257 if (!TfLiteIntArrayEqualsArray(scaling_factors->dims, 1, scaling_dims)) {
258 TfLiteIntArray* scaling_factors_size = TfLiteIntArrayCreate(1);
259 scaling_factors_size->data[0] = batch_size;
260 TF_LITE_ENSURE_OK(context, context->ResizeTensor(context, scaling_factors,
261 scaling_factors_size));
262 }
263 node->temporaries->data[data->input_offset_index] = data->input_offset_id;
264 TfLiteTensor* input_offsets;
265 TF_LITE_ENSURE_OK(context,
266 GetTemporarySafe(context, node, data->input_offset_index,
267 &input_offsets));
268 input_offsets->type = kTfLiteInt32;
269 input_offsets->allocation_type = kTfLiteArenaRw;
270 if (!TfLiteIntArrayEqualsArray(input_offsets->dims, 1, scaling_dims)) {
271 TfLiteIntArray* input_offsets_size = TfLiteIntArrayCreate(1);
272 input_offsets_size->data[0] = batch_size;
273 TF_LITE_ENSURE_OK(context, context->ResizeTensor(context, input_offsets,
274 input_offsets_size));
275 }
276 }
277
278 TfLiteIntArray* outputSize = TfLiteIntArrayCreate(4);
279 outputSize->data[0] = batches;
280 outputSize->data[1] = out_height;
281 outputSize->data[2] = out_width;
282 outputSize->data[3] = channels_out;
283 return context->ResizeTensor(context, output, outputSize);
284}
285
286TfLiteStatus ComputeDepthMultiplier(TfLiteContext* context,
287 const TfLiteTensor* input,
288 const TfLiteTensor* filter,
289 int16* depth_multiplier) {
290 int num_filter_channels = SizeOfDimension(filter, 3);
291 int num_input_channels = SizeOfDimension(input, 3);
292 TF_LITE_ENSURE(context, num_input_channels != 0);
293 TF_LITE_ENSURE_EQ(context, num_filter_channels % num_input_channels, 0);
294 *depth_multiplier = num_filter_channels / num_input_channels;
295 return kTfLiteOk;
296}
297
298template <KernelType kernel_type>
299TfLiteStatus EvalFloat(TfLiteContext* context, TfLiteNode* node,
300 TfLiteDepthwiseConvParams* params, OpData* data,
301 const TfLiteTensor* input, const TfLiteTensor* filter,
302 const TfLiteTensor* bias, TfLiteTensor* output) {
303 float output_activation_min, output_activation_max;
304 CalculateActivationRange(params->activation, &output_activation_min,
305 &output_activation_max);
306
307 DepthwiseParams op_params;
308 op_params.padding_type = PaddingType::kSame;
309 op_params.padding_values.width = data->padding.width;
310 op_params.padding_values.height = data->padding.height;
311 op_params.stride_width = params->stride_width;
312 op_params.stride_height = params->stride_height;
313 op_params.dilation_width_factor = params->dilation_width_factor;
314 op_params.dilation_height_factor = params->dilation_height_factor;
315 op_params.float_activation_min = output_activation_min;
316 op_params.float_activation_max = output_activation_max;
317 TF_LITE_ENSURE_STATUS(ComputeDepthMultiplier(context, input, filter,
318 &op_params.depth_multiplier));
319 if (kernel_type == kReference) {
320 reference_ops::DepthwiseConv(
321 op_params, GetTensorShape(input), GetTensorData<float>(input),
322 GetTensorShape(filter), GetTensorData<float>(filter),
323 GetTensorShape(bias), GetTensorData<float>(bias),
324 GetTensorShape(output), GetTensorData<float>(output));
325 } else {
326 optimized_ops::DepthwiseConv<float, float>(
327 op_params, GetTensorShape(input), GetTensorData<float>(input),
328 GetTensorShape(filter), GetTensorData<float>(filter),
329 GetTensorShape(bias), GetTensorData<float>(bias),
330 GetTensorShape(output), GetTensorData<float>(output),
331 CpuBackendContext::GetFromContext(context));
332 }
333 return kTfLiteOk;
334}
335
336template <KernelType kernel_type>
337TfLiteStatus EvalQuantized(TfLiteContext* context, TfLiteNode* node,
338 TfLiteDepthwiseConvParams* params, OpData* data,
339 const TfLiteTensor* input,
340 const TfLiteTensor* filter, const TfLiteTensor* bias,
341 TfLiteTensor* output) {
342 auto input_offset = -input->params.zero_point;
343 auto filter_offset = -filter->params.zero_point;
344 auto output_offset = output->params.zero_point;
345
346 DepthwiseParams op_params;
347 op_params.padding_type = PaddingType::kSame;
348 op_params.padding_values.width = data->padding.width;
349 op_params.padding_values.height = data->padding.height;
350 op_params.stride_width = params->stride_width;
351 op_params.stride_height = params->stride_height;
352 op_params.dilation_width_factor = params->dilation_width_factor;
353 op_params.dilation_height_factor = params->dilation_height_factor;
354 op_params.input_offset = input_offset;
355 op_params.weights_offset = filter_offset;
356 op_params.output_offset = output_offset;
357 op_params.output_multiplier = data->output_multiplier;
358 op_params.output_shift = -data->output_shift;
359 op_params.quantized_activation_min = data->output_activation_min;
360 op_params.quantized_activation_max = data->output_activation_max;
361 TF_LITE_ENSURE_STATUS(ComputeDepthMultiplier(context, input, filter,
362 &op_params.depth_multiplier));
363 if (kernel_type == kReference) {
364 reference_ops::DepthwiseConv(
365 op_params, GetTensorShape(input), GetTensorData<uint8_t>(input),
366 GetTensorShape(filter), GetTensorData<uint8_t>(filter),
367 GetTensorShape(bias), GetTensorData<int32_t>(bias),
368 GetTensorShape(output), GetTensorData<uint8_t>(output));
369 } else {
370 optimized_ops::DepthwiseConv<uint8, int32>(
371 op_params, GetTensorShape(input), GetTensorData<uint8_t>(input),
372 GetTensorShape(filter), GetTensorData<uint8_t>(filter),
373 GetTensorShape(bias), GetTensorData<int32_t>(bias),
374 GetTensorShape(output), GetTensorData<uint8_t>(output),
375 CpuBackendContext::GetFromContext(context));
376 }
377 return kTfLiteOk;
378}
379
380template <KernelType kernel_type>
381TfLiteStatus EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node,
382 TfLiteDepthwiseConvParams* params,
383 OpData* data, const TfLiteTensor* input,
384 const TfLiteTensor* filter,
385 const TfLiteTensor* bias,
386 TfLiteTensor* output) {
387 DepthwiseParams op_params;
388 op_params.padding_type = PaddingType::kSame;
389 op_params.padding_values.width = data->padding.width;
390 op_params.padding_values.height = data->padding.height;
391 op_params.stride_width = params->stride_width;
392 op_params.stride_height = params->stride_height;
393 op_params.dilation_width_factor = params->dilation_width_factor;
394 op_params.dilation_height_factor = params->dilation_height_factor;
395 op_params.input_offset = -input->params.zero_point;
396 op_params.weights_offset = 0;
397 op_params.output_offset = output->params.zero_point;
398 op_params.quantized_activation_min = data->output_activation_min;
399 op_params.quantized_activation_max = data->output_activation_max;
400 TF_LITE_ENSURE_STATUS(ComputeDepthMultiplier(context, input, filter,
401 &op_params.depth_multiplier));
402
403 if (kernel_type == kReference) {
404 reference_integer_ops::DepthwiseConvPerChannel(
405 op_params, data->per_channel_output_multiplier.data(),
406 data->per_channel_output_shift.data(), GetTensorShape(input),
407 GetTensorData<int8>(input), GetTensorShape(filter),
408 GetTensorData<int8>(filter), GetTensorShape(bias),
409 GetTensorData<int32>(bias), GetTensorShape(output),
410 GetTensorData<int8>(output));
411 } else {
412 optimized_integer_ops::DepthwiseConvPerChannel(
413 op_params, data->per_channel_output_multiplier.data(),
414 data->per_channel_output_shift.data(), GetTensorShape(input),
415 GetTensorData<int8>(input), GetTensorShape(filter),
416 GetTensorData<int8>(filter), GetTensorShape(bias),
417 GetTensorData<int32>(bias), GetTensorShape(output),
418 GetTensorData<int8>(output),
419 CpuBackendContext::GetFromContext(context));
420 }
421 return kTfLiteOk;
422}
423
424TfLiteStatus EvalQuantizedPerChannel16x8(
425 const TfLiteDepthwiseConvParams* params, const OpData* data,
426 const TfLiteTensor* input, const TfLiteTensor* filter,
427 const TfLiteTensor* bias, TfLiteTensor* output) {
428 DepthwiseParams op_params;
429 op_params.padding_type = PaddingType::kSame;
430 op_params.padding_values.width = data->padding.width;
431 op_params.padding_values.height = data->padding.height;
432 op_params.stride_width = params->stride_width;
433 op_params.stride_height = params->stride_height;
434 op_params.dilation_width_factor = params->dilation_width_factor;
435 op_params.dilation_height_factor = params->dilation_height_factor;
436 op_params.depth_multiplier = params->depth_multiplier;
437 op_params.weights_offset = 0;
438 op_params.quantized_activation_min = data->output_activation_min;
439 op_params.quantized_activation_max = data->output_activation_max;
440
441 reference_integer_ops::DepthwiseConvPerChannel(
442 op_params, data->per_channel_output_multiplier.data(),
443 data->per_channel_output_shift.data(), GetTensorShape(input),
444 GetTensorData<int16>(input), GetTensorShape(filter),
445 GetTensorData<int8>(filter), GetTensorShape(bias),
446 GetTensorData<std::int64_t>(bias), GetTensorShape(output),
447 GetTensorData<int16>(output));
448
449 return kTfLiteOk;
450}
451
452template <KernelType kernel_type>
453TfLiteStatus EvalHybridPerChannel(TfLiteContext* context, TfLiteNode* node,
454 TfLiteDepthwiseConvParams* params,
455 OpData* data, const TfLiteTensor* input,
456 const TfLiteTensor* filter,
457 const TfLiteTensor* bias,
458 TfLiteTensor* output) {
459 float output_activation_min, output_activation_max;
460 CalculateActivationRange(params->activation, &output_activation_min,
461 &output_activation_max);
462 const int batch_size = SizeOfDimension(input, 0);
463 TF_LITE_ENSURE(context, batch_size != 0);
464 const int input_size = NumElements(input) / batch_size;
465 TfLiteTensor* input_quantized;
466 TF_LITE_ENSURE_OK(context,
467 GetTemporarySafe(context, node, data->input_quantized_index,
468 &input_quantized));
469 int8_t* quantized_input_ptr_batch = input_quantized->data.int8;
470 TfLiteTensor* scaling_factors_tensor;
471 TF_LITE_ENSURE_OK(context,
472 GetTemporarySafe(context, node, data->scaling_factors_index,
473 &scaling_factors_tensor));
474 float* scaling_factors_ptr = GetTensorData<float>(scaling_factors_tensor);
475 TfLiteTensor* input_offset_tensor;
476 TF_LITE_ENSURE_OK(context,
477 GetTemporarySafe(context, node, data->input_offset_index,
478 &input_offset_tensor));
479 int32_t* input_offset_ptr = GetTensorData<int32_t>(input_offset_tensor);
480
481 for (int b = 0; b < batch_size; ++b) {
482 const int offset = b * input_size;
483 tensor_utils::AsymmetricQuantizeFloats(
484 GetTensorData<float>(input) + offset, input_size,
485 quantized_input_ptr_batch + offset, &scaling_factors_ptr[b],
486 &input_offset_ptr[b]);
487 }
488
489 DepthwiseParams op_params;
490 op_params.padding_type = PaddingType::kSame;
491 op_params.padding_values.width = data->padding.width;
492 op_params.padding_values.height = data->padding.height;
493 op_params.stride_width = params->stride_width;
494 op_params.stride_height = params->stride_height;
495 op_params.dilation_width_factor = params->dilation_width_factor;
496 op_params.dilation_height_factor = params->dilation_height_factor;
497 op_params.depth_multiplier = params->depth_multiplier;
498
499 op_params.weights_offset = 0;
500 op_params.float_activation_min = output_activation_min;
501 op_params.float_activation_max = output_activation_max;
502 TF_LITE_ENSURE(context, filter->quantization.type != kTfLiteNoQuantization);
503 const auto* affine_quantization =
504 reinterpret_cast<TfLiteAffineQuantization*>(filter->quantization.params);
505 if (kernel_type == kReference) {
506 reference_integer_ops::DepthwiseConvHybridPerChannel(
507 op_params, scaling_factors_ptr, GetTensorShape(input),
508 quantized_input_ptr_batch, GetTensorShape(filter),
509 GetTensorData<int8>(filter), GetTensorShape(bias),
510 GetTensorData<float>(bias), GetTensorShape(output),
511 GetTensorData<float>(output), affine_quantization->scale->data,
512 input_offset_ptr);
513 } else {
514 optimized_integer_ops::DepthwiseConvHybridPerChannel(
515 op_params, scaling_factors_ptr, GetTensorShape(input),
516 quantized_input_ptr_batch, GetTensorShape(filter),
517 GetTensorData<int8>(filter), GetTensorShape(bias),
518 GetTensorData<float>(bias), GetTensorShape(output),
519 GetTensorData<float>(output), affine_quantization->scale->data,
520 input_offset_ptr, CpuBackendContext::GetFromContext(context));
521 }
522
523 return kTfLiteOk;
524}
525
526template <KernelType kernel_type, TfLiteType input_type>
527TfLiteStatus EvalImpl(TfLiteContext* context, TfLiteNode* node) {
528 auto* params =
529 reinterpret_cast<TfLiteDepthwiseConvParams*>(node->builtin_data);
530 OpData* data = reinterpret_cast<OpData*>(node->user_data);
531
532 TfLiteTensor* output;
533 TF_LITE_ENSURE_OK(context,
534 GetOutputSafe(context, node, kOutputTensor, &output));
535 const TfLiteTensor* input;
536 TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, kInputTensor, &input));
537 const TfLiteTensor* filter;
538 TF_LITE_ENSURE_OK(context,
539 GetInputSafe(context, node, kFilterTensor, &filter));
540 const TfLiteTensor* bias =
541 (NumInputs(node) == 3) ? GetInput(context, node, kBiasTensor) : nullptr;
542 TFLITE_DCHECK_EQ(input_type, input->type);
543
544 switch (input_type) { // Already know in/out types are same.
545 case kTfLiteFloat32:
546 if (filter->type == kTfLiteFloat32) {
547 return EvalFloat<kernel_type>(context, node, params, data, input,
548 filter, bias, output);
549 } else if (filter->type == kTfLiteInt8) {
550 return EvalHybridPerChannel<kernel_type>(context, node, params, data,
551 input, filter, bias, output);
552 } else {
553 TF_LITE_KERNEL_LOG(
554 context, "Type %s with filter type %s not currently supported.",
555 TfLiteTypeGetName(input->type), TfLiteTypeGetName(filter->type));
556 return kTfLiteError;
557 }
558 break;
559 case kTfLiteUInt8:
560 return EvalQuantized<kernel_type>(context, node, params, data, input,
561 filter, bias, output);
562 break;
563 case kTfLiteInt8:
564 return EvalQuantizedPerChannel<kernel_type>(context, node, params, data,
565 input, filter, bias, output);
566 break;
567 case kTfLiteInt16:
568 return EvalQuantizedPerChannel16x8(params, data, input, filter, bias,
569 output);
570 break;
571 default:
572 TF_LITE_KERNEL_LOG(context, "Type %d not currently supported.",
573 input->type);
574 return kTfLiteError;
575 }
576}
577
578template <KernelType kernel_type>
579TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
580 const TfLiteTensor* input;
581 TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, kInputTensor, &input));
582
583 switch (input->type) { // Already know in/out types are same.
584 case kTfLiteFloat32:
585 return EvalImpl<kernel_type, kTfLiteFloat32>(context, node);
586 case kTfLiteUInt8:
587 return EvalImpl<kernel_type, kTfLiteUInt8>(context, node);
588 case kTfLiteInt8:
589 return EvalImpl<kernel_type, kTfLiteInt8>(context, node);
590 case kTfLiteInt16:
591 return EvalImpl<kernel_type, kTfLiteInt16>(context, node);
592 default:
593 TF_LITE_KERNEL_LOG(context, "Type %d not currently supported.",
594 input->type);
595 return kTfLiteError;
596 }
597}
598
599} // namespace depthwise_conv
600
601TfLiteRegistration* Register_DEPTHWISE_CONVOLUTION_REF() {
602 static TfLiteRegistration r = {
603 depthwise_conv::Init, depthwise_conv::Free, depthwise_conv::Prepare,
604 depthwise_conv::Eval<depthwise_conv::kReference>};
605 return &r;
606}
607
608TfLiteRegistration* Register_DEPTHWISE_CONVOLUTION_GENERIC_OPT() {
609 static TfLiteRegistration r = {
610 depthwise_conv::Init, depthwise_conv::Free, depthwise_conv::Prepare,
611 depthwise_conv::Eval<depthwise_conv::kGenericOptimized>};
612 return &r;
613}
614
615TfLiteRegistration* Register_DEPTHWISE_CONVOLUTION_NEON_OPT() {
616 static TfLiteRegistration r = {
617 depthwise_conv::Init, depthwise_conv::Free, depthwise_conv::Prepare,
618 depthwise_conv::Eval<depthwise_conv::kNeonOptimized>};
619 return &r;
620}
621
622TfLiteRegistration* Register_DEPTHWISE_CONVOLUTION_NEON_OPT_UINT8() {
623 static TfLiteRegistration r = {
624 depthwise_conv::Init, depthwise_conv::Free, depthwise_conv::Prepare,
625 depthwise_conv::EvalImpl<depthwise_conv::kNeonOptimized, kTfLiteUInt8>};
626 return &r;
627}
628
629TfLiteRegistration* Register_DEPTHWISE_CONV_2D() {
630#ifdef USE_NEON
631 return Register_DEPTHWISE_CONVOLUTION_NEON_OPT();
632#else
633 return Register_DEPTHWISE_CONVOLUTION_GENERIC_OPT();
634#endif
635}
636
637// Warning: Clients using this variant are responsible for ensuring that their
638// models only need the UINT8 type. TFLite's op registration mechanism doesn't
639// yet allow for more nuanced registration mechanisms.
640TfLiteRegistration* Register_DEPTHWISE_CONV_2D_UINT8() {
641#ifdef USE_NEON
642 return Register_DEPTHWISE_CONVOLUTION_NEON_OPT_UINT8();
643#else
644 return Register_DEPTHWISE_CONV_2D();
645#endif
646}
647
648} // namespace builtin
649} // namespace ops
650} // namespace tflite
651