depthwise_conv.cc source code [tensorflow/tensorflow/lite/kernels/depthwise_conv.cc]

1	/ Copyright 2017 The TensorFlow Authors. All Rights Reserved.*
2
3	Licensed under the Apache License, Version 2.0 (the "License");
4	you may not use this file except in compliance with the License.
5	You may obtain a copy of the License at
6
7	http://www.apache.org/licenses/LICENSE-2.0
8
9	Unless required by applicable law or agreed to in writing, software
10	distributed under the License is distributed on an "AS IS" BASIS,
11	WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12	See the License for the specific language governing permissions and
13	limitations under the License.
14	==============================================================================/*
15
16	#include "tensorflow/lite/kernels/internal/optimized/integer_ops/depthwise_conv.h"
17
18	#include <stddef.h>
19	#include <stdint.h>
20
21	#include <vector>
22
23	#include "tensorflow/lite/c/builtin_op_data.h"
24	#include "tensorflow/lite/c/common.h"
25	#include "tensorflow/lite/kernels/cpu_backend_context.h"
26	#include "tensorflow/lite/kernels/internal/compatibility.h"
27	#include "tensorflow/lite/kernels/internal/optimized/cpu_check.h"
28	#include "tensorflow/lite/kernels/internal/optimized/depthwiseconv_multithread.h"
29	#include "tensorflow/lite/kernels/internal/optimized/integer_ops/depthwise_conv_hybrid.h"
30	#include "tensorflow/lite/kernels/internal/optimized/neon_check.h"
31	#include "tensorflow/lite/kernels/internal/quantization_util.h"
32	#include "tensorflow/lite/kernels/internal/reference/depthwiseconv_float.h"
33	#include "tensorflow/lite/kernels/internal/reference/depthwiseconv_uint8.h"
34	#include "tensorflow/lite/kernels/internal/reference/integer_ops/depthwise_conv.h"
35	#include "tensorflow/lite/kernels/internal/tensor.h"
36	#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
37	#include "tensorflow/lite/kernels/internal/tensor_utils.h"
38	#include "tensorflow/lite/kernels/internal/types.h"
39	#include "tensorflow/lite/kernels/kernel_util.h"
40	#include "tensorflow/lite/kernels/padding.h"
41
42	namespace tflite {
43	namespace ops {
44	namespace builtin {
45	namespace depthwise_conv {
46
47	constexpr int kInputTensor = `0`;
48	constexpr int kFilterTensor = `1`;
49	constexpr int kBiasTensor = `2`;
50	constexpr int kOutputTensor = `0`;
51
52	// This file has three implementation of DepthwiseConv.
53	enum KernelType {
54	kReference,
55	kGenericOptimized, // Neon-free
56	kNeonOptimized,
57	};
58
59	const int kTensorNotAllocated = -`1`;
60
61	struct OpData {
62	TfLitePaddingValues padding;
63	// The scaling factor from input to output (aka the 'real multiplier') can
64	// be represented as a fixed point multiplier plus a left shift.
65	int32_t output_multiplier;
66	int output_shift;
67	// The range of the fused activation layer. For example for kNone and
68	// uint8_t these would be 0 and 255.
69	int32_t output_activation_min;
70	int32_t output_activation_max;
71
72	// Per channel output multiplier and shift.
73	std::vector<int32_t> per_channel_output_multiplier;
74	std::vector<int> per_channel_output_shift;
75
76	// Hybrid per channel temporary tensors.
77	int input_quantized_id = kTensorNotAllocated;
78	int scaling_factors_id = kTensorNotAllocated;
79	int input_offset_id = kTensorNotAllocated;
80	int32_t input_quantized_index;
81	int32_t scaling_factors_index;
82	int32_t input_offset_index;
83	};
84
85	void* Init(TfLiteContext* context, const char* buffer, size_t length) {
86	// This is a builtin op, so we don't use the contents in 'buffer', if any.
87	// Instead, we allocate a new object to carry information from Prepare() to
88	// Eval().
89	return new OpData;
90	}
91
92	void Free(TfLiteContext* context, void* buffer) {
93	delete reinterpret_cast<OpData*>(buffer);
94	}
95
96	TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
97	auto* params =
98	reinterpret_cast<TfLiteDepthwiseConvParams*>(node->builtin_data);
99	OpData* data = reinterpret_cast<OpData*>(node->user_data);
100
101	bool has_bias = NumInputs(node) == `3`;
102
103	TF_LITE_ENSURE(context, has_bias \|\| NumInputs(node) == `2`);
104	const TfLiteTensor* input;
105	TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, kInputTensor, &input));
106	const TfLiteTensor* filter;
107	TF_LITE_ENSURE_OK(context,
108	GetInputSafe(context, node, kFilterTensor, &filter));
109	const TfLiteTensor* bias = nullptr;
110
111	TF_LITE_ENSURE_EQ(context, NumOutputs(node), `1`);
112	TfLiteTensor* output;
113	TF_LITE_ENSURE_OK(context,
114	GetOutputSafe(context, node, kOutputTensor, &output));
115
116	TF_LITE_ENSURE_EQ(context, NumDimensions(input), `4`);
117	TF_LITE_ENSURE_EQ(context, NumDimensions(filter), `4`);
118	TF_LITE_ENSURE(context, params->dilation_height_factor > `0`);
119	TF_LITE_ENSURE(context, params->dilation_width_factor > `0`);
120
121	const TfLiteType data_type = input->type;
122
123	const TfLiteType filter_type = filter->type;
124	const bool is_hybrid =
125	data_type == kTfLiteFloat32 && filter_type == kTfLiteInt8;
126	TF_LITE_ENSURE(context,
127	data_type == kTfLiteFloat32 \|\| data_type == kTfLiteUInt8 \|\|
128	data_type == kTfLiteInt8 \|\| data_type == kTfLiteInt16);
129	TF_LITE_ENSURE_TYPES_EQ(context, output->type, data_type);
130	if (!is_hybrid) {
131	TF_LITE_ENSURE(context,
132	filter->type == data_type \|\| data_type == kTfLiteInt16);
133	}
134
135	if (data_type == kTfLiteInt16) {
136	TF_LITE_ENSURE_EQ(context, input->params.zero_point, `0`);
137	TF_LITE_ENSURE_EQ(context, output->params.zero_point, `0`);
138	}
139
140	// Filter in DepthwiseConv is expected to be [1, H, W, O].
141	TF_LITE_ENSURE_EQ(context, SizeOfDimension(filter, `0`), `1`);
142
143	if (has_bias) {
144	TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, kBiasTensor, &bias));
145	if (data_type == kTfLiteUInt8 \|\| data_type == kTfLiteInt8) {
146	TF_LITE_ENSURE_TYPES_EQ(context, bias->type, kTfLiteInt32);
147	TF_LITE_ENSURE_EQ(context, bias->params.zero_point, `0`);
148	} else if (data_type == kTfLiteInt16) {
149	TF_LITE_ENSURE_TYPES_EQ(context, bias->type, kTfLiteInt64);
150	TF_LITE_ENSURE_EQ(context, bias->params.zero_point, `0`);
151	} else {
152	TF_LITE_ENSURE_TYPES_EQ(context, bias->type, data_type);
153	}
154	TF_LITE_ENSURE_EQ(context, NumDimensions(bias), `1`);
155	TF_LITE_ENSURE_EQ(context, SizeOfDimension(filter, `3`),
156	SizeOfDimension(bias, `0`));
157	}
158
159	int channels_out = SizeOfDimension(filter, `3`);
160	int width = SizeOfDimension(input, `2`);
161	int height = SizeOfDimension(input, `1`);
162	int filter_width = SizeOfDimension(filter, `2`);
163	int filter_height = SizeOfDimension(filter, `1`);
164	int batches = SizeOfDimension(input, `0`);
165
166	// Matching GetWindowedOutputSize in TensorFlow.
167	auto padding = params->padding;
168	int out_width, out_height;
169
170	data->padding = ComputePaddingHeightWidth(
171	params->stride_height, params->stride_width,
172	params->dilation_height_factor, params->dilation_width_factor, height,
173	width, filter_height, filter_width, padding, &out_height, &out_width);
174
175	// Note that quantized inference requires that all tensors have their
176	// parameters set. This is usually done during quantized training or
177	// calibration.
178	if (data_type != kTfLiteFloat32) {
179	TF_LITE_ENSURE_EQ(context, filter->quantization.type,
180	kTfLiteAffineQuantization);
181	TF_LITE_ENSURE(context, filter->quantization.type != kTfLiteNoQuantization);
182	const auto* affine_quantization =
183	reinterpret_cast<TfLiteAffineQuantization*>(
184	filter->quantization.params);
185	TF_LITE_ENSURE(context, affine_quantization);
186	TF_LITE_ENSURE(context, affine_quantization->scale);
187	TF_LITE_ENSURE(context, (affine_quantization->scale->size == `1` \|\|
188	affine_quantization->scale->size == channels_out));
189
190	data->per_channel_output_multiplier.resize(channels_out);
191	data->per_channel_output_shift.resize(channels_out);
192	TF_LITE_ENSURE_STATUS(tflite::PopulateConvolutionQuantizationParams(
193	context, input, filter, bias, output, params->activation,
194	&data->output_multiplier, &data->output_shift,
195	&data->output_activation_min, &data->output_activation_max,
196	data->per_channel_output_multiplier.data(),
197	data->per_channel_output_shift.data(), channels_out));
198	}
199
200	if (is_hybrid) {
201	TF_LITE_ENSURE(context, filter->quantization.type != kTfLiteNoQuantization);
202	const auto* affine_quantization =
203	reinterpret_cast<TfLiteAffineQuantization*>(
204	filter->quantization.params);
205	TF_LITE_ENSURE(context, affine_quantization);
206	TF_LITE_ENSURE(context, affine_quantization->scale);
207	TF_LITE_ENSURE_EQ(
208	context, affine_quantization->scale->size,
209	filter->dims->data[affine_quantization->quantized_dimension]);
210
211	int temporaries_count = `0`;
212	data->input_quantized_index = temporaries_count;
213	if (data->input_quantized_id == kTensorNotAllocated) {
214	TF_LITE_ENSURE_OK(
215	context, context->AddTensors(context, `1`, &data->input_quantized_id));
216	}
217	++temporaries_count;
218	data->scaling_factors_index = temporaries_count;
219	if (data->scaling_factors_id == kTensorNotAllocated) {
220	TF_LITE_ENSURE_OK(
221	context, context->AddTensors(context, `1`, &data->scaling_factors_id));
222	}
223	++temporaries_count;
224	data->input_offset_index = temporaries_count;
225	if (data->input_offset_id == kTensorNotAllocated) {
226	TF_LITE_ENSURE_OK(
227	context, context->AddTensors(context, `1`, &data->input_offset_id));
228	}
229	++temporaries_count;
230
231	TfLiteIntArrayFree(node->temporaries);
232	node->temporaries = TfLiteIntArrayCreate(temporaries_count);
233
234	node->temporaries->data[data->input_quantized_index] =
235	data->input_quantized_id;
236	TfLiteTensor* input_quantized;
237	TF_LITE_ENSURE_OK(
238	context, GetTemporarySafe(context, node, data->input_quantized_index,
239	&input_quantized));
240	input_quantized->type = kTfLiteInt8;
241	input_quantized->allocation_type = kTfLiteArenaRw;
242	if (!TfLiteIntArrayEqual(input_quantized->dims, input->dims)) {
243	TfLiteIntArray* input_quantized_size = TfLiteIntArrayCopy(input->dims);
244	TF_LITE_ENSURE_OK(context, context->ResizeTensor(context, input_quantized,
245	input_quantized_size));
246	}
247	node->temporaries->data[data->scaling_factors_index] =
248	data->scaling_factors_id;
249	TfLiteTensor* scaling_factors;
250	TF_LITE_ENSURE_OK(
251	context, GetTemporarySafe(context, node, data->scaling_factors_index,
252	&scaling_factors));
253	scaling_factors->type = kTfLiteFloat32;
254	scaling_factors->allocation_type = kTfLiteArenaRw;
255	const int batch_size = SizeOfDimension(input, `0`);
256	int scaling_dims[`1`] = {batch_size};
257	if (!TfLiteIntArrayEqualsArray(scaling_factors->dims, `1`, scaling_dims)) {
258	TfLiteIntArray* scaling_factors_size = TfLiteIntArrayCreate(`1`);
259	scaling_factors_size->data[`0`] = batch_size;
260	TF_LITE_ENSURE_OK(context, context->ResizeTensor(context, scaling_factors,
261	scaling_factors_size));
262	}
263	node->temporaries->data[data->input_offset_index] = data->input_offset_id;
264	TfLiteTensor* input_offsets;
265	TF_LITE_ENSURE_OK(context,
266	GetTemporarySafe(context, node, data->input_offset_index,
267	&input_offsets));
268	input_offsets->type = kTfLiteInt32;
269	input_offsets->allocation_type = kTfLiteArenaRw;
270	if (!TfLiteIntArrayEqualsArray(input_offsets->dims, `1`, scaling_dims)) {
271	TfLiteIntArray* input_offsets_size = TfLiteIntArrayCreate(`1`);
272	input_offsets_size->data[`0`] = batch_size;
273	TF_LITE_ENSURE_OK(context, context->ResizeTensor(context, input_offsets,
274	input_offsets_size));
275	}
276	}
277
278	TfLiteIntArray* outputSize = TfLiteIntArrayCreate(`4`);
279	outputSize->data[`0`] = batches;
280	outputSize->data[`1`] = out_height;
281	outputSize->data[`2`] = out_width;
282	outputSize->data[`3`] = channels_out;
283	return context->ResizeTensor(context, output, outputSize);
284	}
285
286	TfLiteStatus ComputeDepthMultiplier(TfLiteContext* context,
287	const TfLiteTensor* input,
288	const TfLiteTensor* filter,
289	int16* depth_multiplier) {
290	int num_filter_channels = SizeOfDimension(filter, `3`);
291	int num_input_channels = SizeOfDimension(input, `3`);
292	TF_LITE_ENSURE(context, num_input_channels != `0`);
293	TF_LITE_ENSURE_EQ(context, num_filter_channels % num_input_channels, `0`);
294	*depth_multiplier = num_filter_channels / num_input_channels;
295	return kTfLiteOk;
296	}
297
298	template <KernelType kernel_type>
299	TfLiteStatus EvalFloat(TfLiteContext* context, TfLiteNode* node,
300	TfLiteDepthwiseConvParams* params, OpData* data,
301	const TfLiteTensor* input, const TfLiteTensor* filter,
302	const TfLiteTensor* bias, TfLiteTensor* output) {
303	float output_activation_min, output_activation_max;
304	CalculateActivationRange(params->activation, &output_activation_min,
305	&output_activation_max);
306
307	DepthwiseParams op_params;
308	op_params.padding_type = PaddingType::kSame;
309	op_params.padding_values.width = data->padding.width;
310	op_params.padding_values.height = data->padding.height;
311	op_params.stride_width = params->stride_width;
312	op_params.stride_height = params->stride_height;
313	op_params.dilation_width_factor = params->dilation_width_factor;
314	op_params.dilation_height_factor = params->dilation_height_factor;
315	op_params.float_activation_min = output_activation_min;
316	op_params.float_activation_max = output_activation_max;
317	TF_LITE_ENSURE_STATUS(ComputeDepthMultiplier(context, input, filter,
318	&op_params.depth_multiplier));
319	if (kernel_type == kReference) {
320	reference_ops::DepthwiseConv(
321	op_params, GetTensorShape(input), GetTensorData<float>(input),
322	GetTensorShape(filter), GetTensorData<float>(filter),
323	GetTensorShape(bias), GetTensorData<float>(bias),
324	GetTensorShape(output), GetTensorData<float>(output));
325	} else {
326	optimized_ops::DepthwiseConv<float, float>(
327	op_params, GetTensorShape(input), GetTensorData<float>(input),
328	GetTensorShape(filter), GetTensorData<float>(filter),
329	GetTensorShape(bias), GetTensorData<float>(bias),
330	GetTensorShape(output), GetTensorData<float>(output),
331	CpuBackendContext::GetFromContext(context));
332	}
333	return kTfLiteOk;
334	}
335
336	template <KernelType kernel_type>
337	TfLiteStatus EvalQuantized(TfLiteContext* context, TfLiteNode* node,
338	TfLiteDepthwiseConvParams* params, OpData* data,
339	const TfLiteTensor* input,
340	const TfLiteTensor* filter, const TfLiteTensor* bias,
341	TfLiteTensor* output) {
342	auto input_offset = -input->params.zero_point;
343	auto filter_offset = -filter->params.zero_point;
344	auto output_offset = output->params.zero_point;
345
346	DepthwiseParams op_params;
347	op_params.padding_type = PaddingType::kSame;
348	op_params.padding_values.width = data->padding.width;
349	op_params.padding_values.height = data->padding.height;
350	op_params.stride_width = params->stride_width;
351	op_params.stride_height = params->stride_height;
352	op_params.dilation_width_factor = params->dilation_width_factor;
353	op_params.dilation_height_factor = params->dilation_height_factor;
354	op_params.input_offset = input_offset;
355	op_params.weights_offset = filter_offset;
356	op_params.output_offset = output_offset;
357	op_params.output_multiplier = data->output_multiplier;
358	op_params.output_shift = -data->output_shift;
359	op_params.quantized_activation_min = data->output_activation_min;
360	op_params.quantized_activation_max = data->output_activation_max;
361	TF_LITE_ENSURE_STATUS(ComputeDepthMultiplier(context, input, filter,
362	&op_params.depth_multiplier));
363	if (kernel_type == kReference) {
364	reference_ops::DepthwiseConv(
365	op_params, GetTensorShape(input), GetTensorData<uint8_t>(input),
366	GetTensorShape(filter), GetTensorData<uint8_t>(filter),
367	GetTensorShape(bias), GetTensorData<int32_t>(bias),
368	GetTensorShape(output), GetTensorData<uint8_t>(output));
369	} else {
370	optimized_ops::DepthwiseConv<uint8, int32>(
371	op_params, GetTensorShape(input), GetTensorData<uint8_t>(input),
372	GetTensorShape(filter), GetTensorData<uint8_t>(filter),
373	GetTensorShape(bias), GetTensorData<int32_t>(bias),
374	GetTensorShape(output), GetTensorData<uint8_t>(output),
375	CpuBackendContext::GetFromContext(context));
376	}
377	return kTfLiteOk;
378	}
379
380	template <KernelType kernel_type>
381	TfLiteStatus EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node,
382	TfLiteDepthwiseConvParams* params,
383	OpData* data, const TfLiteTensor* input,
384	const TfLiteTensor* filter,
385	const TfLiteTensor* bias,
386	TfLiteTensor* output) {
387	DepthwiseParams op_params;
388	op_params.padding_type = PaddingType::kSame;
389	op_params.padding_values.width = data->padding.width;
390	op_params.padding_values.height = data->padding.height;
391	op_params.stride_width = params->stride_width;
392	op_params.stride_height = params->stride_height;
393	op_params.dilation_width_factor = params->dilation_width_factor;
394	op_params.dilation_height_factor = params->dilation_height_factor;
395	op_params.input_offset = -input->params.zero_point;
396	op_params.weights_offset = `0`;
397	op_params.output_offset = output->params.zero_point;
398	op_params.quantized_activation_min = data->output_activation_min;
399	op_params.quantized_activation_max = data->output_activation_max;
400	TF_LITE_ENSURE_STATUS(ComputeDepthMultiplier(context, input, filter,
401	&op_params.depth_multiplier));
402
403	if (kernel_type == kReference) {
404	reference_integer_ops::DepthwiseConvPerChannel(
405	op_params, data->per_channel_output_multiplier.data(),
406	data->per_channel_output_shift.data(), GetTensorShape(input),
407	GetTensorData<int8>(input), GetTensorShape(filter),
408	GetTensorData<int8>(filter), GetTensorShape(bias),
409	GetTensorData<int32>(bias), GetTensorShape(output),
410	GetTensorData<int8>(output));
411	} else {
412	optimized_integer_ops::DepthwiseConvPerChannel(
413	op_params, data->per_channel_output_multiplier.data(),
414	data->per_channel_output_shift.data(), GetTensorShape(input),
415	GetTensorData<int8>(input), GetTensorShape(filter),
416	GetTensorData<int8>(filter), GetTensorShape(bias),
417	GetTensorData<int32>(bias), GetTensorShape(output),
418	GetTensorData<int8>(output),
419	CpuBackendContext::GetFromContext(context));
420	}
421	return kTfLiteOk;
422	}
423
424	TfLiteStatus EvalQuantizedPerChannel16x8(
425	const TfLiteDepthwiseConvParams* params, const OpData* data,
426	const TfLiteTensor* input, const TfLiteTensor* filter,
427	const TfLiteTensor* bias, TfLiteTensor* output) {
428	DepthwiseParams op_params;
429	op_params.padding_type = PaddingType::kSame;
430	op_params.padding_values.width = data->padding.width;
431	op_params.padding_values.height = data->padding.height;
432	op_params.stride_width = params->stride_width;
433	op_params.stride_height = params->stride_height;
434	op_params.dilation_width_factor = params->dilation_width_factor;
435	op_params.dilation_height_factor = params->dilation_height_factor;
436	op_params.depth_multiplier = params->depth_multiplier;
437	op_params.weights_offset = `0`;
438	op_params.quantized_activation_min = data->output_activation_min;
439	op_params.quantized_activation_max = data->output_activation_max;
440
441	reference_integer_ops::DepthwiseConvPerChannel(
442	op_params, data->per_channel_output_multiplier.data(),
443	data->per_channel_output_shift.data(), GetTensorShape(input),
444	GetTensorData<int16>(input), GetTensorShape(filter),
445	GetTensorData<int8>(filter), GetTensorShape(bias),
446	GetTensorData<std::int64_t>(bias), GetTensorShape(output),
447	GetTensorData<int16>(output));
448
449	return kTfLiteOk;
450	}
451
452	template <KernelType kernel_type>
453	TfLiteStatus EvalHybridPerChannel(TfLiteContext* context, TfLiteNode* node,
454	TfLiteDepthwiseConvParams* params,
455	OpData* data, const TfLiteTensor* input,
456	const TfLiteTensor* filter,
457	const TfLiteTensor* bias,
458	TfLiteTensor* output) {
459	float output_activation_min, output_activation_max;
460	CalculateActivationRange(params->activation, &output_activation_min,
461	&output_activation_max);
462	const int batch_size = SizeOfDimension(input, `0`);
463	TF_LITE_ENSURE(context, batch_size != `0`);
464	const int input_size = NumElements(input) / batch_size;
465	TfLiteTensor* input_quantized;
466	TF_LITE_ENSURE_OK(context,
467	GetTemporarySafe(context, node, data->input_quantized_index,
468	&input_quantized));
469	int8_t* quantized_input_ptr_batch = input_quantized->data.int8;
470	TfLiteTensor* scaling_factors_tensor;
471	TF_LITE_ENSURE_OK(context,
472	GetTemporarySafe(context, node, data->scaling_factors_index,
473	&scaling_factors_tensor));
474	float* scaling_factors_ptr = GetTensorData<float>(scaling_factors_tensor);
475	TfLiteTensor* input_offset_tensor;
476	TF_LITE_ENSURE_OK(context,
477	GetTemporarySafe(context, node, data->input_offset_index,
478	&input_offset_tensor));
479	int32_t* input_offset_ptr = GetTensorData<int32_t>(input_offset_tensor);
480
481	for (int b = `0`; b < batch_size; ++b) {
482	const int offset = b * input_size;
483	tensor_utils::AsymmetricQuantizeFloats(
484	GetTensorData<float>(input) + offset, input_size,
485	quantized_input_ptr_batch + offset, &scaling_factors_ptr[b],
486	&input_offset_ptr[b]);
487	}
488
489	DepthwiseParams op_params;
490	op_params.padding_type = PaddingType::kSame;
491	op_params.padding_values.width = data->padding.width;
492	op_params.padding_values.height = data->padding.height;
493	op_params.stride_width = params->stride_width;
494	op_params.stride_height = params->stride_height;
495	op_params.dilation_width_factor = params->dilation_width_factor;
496	op_params.dilation_height_factor = params->dilation_height_factor;
497	op_params.depth_multiplier = params->depth_multiplier;
498
499	op_params.weights_offset = `0`;
500	op_params.float_activation_min = output_activation_min;
501	op_params.float_activation_max = output_activation_max;
502	TF_LITE_ENSURE(context, filter->quantization.type != kTfLiteNoQuantization);
503	const auto* affine_quantization =
504	reinterpret_cast<TfLiteAffineQuantization*>(filter->quantization.params);
505	if (kernel_type == kReference) {
506	reference_integer_ops::DepthwiseConvHybridPerChannel(
507	op_params, scaling_factors_ptr, GetTensorShape(input),
508	quantized_input_ptr_batch, GetTensorShape(filter),
509	GetTensorData<int8>(filter), GetTensorShape(bias),
510	GetTensorData<float>(bias), GetTensorShape(output),
511	GetTensorData<float>(output), affine_quantization->scale->data,
512	input_offset_ptr);
513	} else {
514	optimized_integer_ops::DepthwiseConvHybridPerChannel(
515	op_params, scaling_factors_ptr, GetTensorShape(input),
516	quantized_input_ptr_batch, GetTensorShape(filter),
517	GetTensorData<int8>(filter), GetTensorShape(bias),
518	GetTensorData<float>(bias), GetTensorShape(output),
519	GetTensorData<float>(output), affine_quantization->scale->data,
520	input_offset_ptr, CpuBackendContext::GetFromContext(context));
521	}
522
523	return kTfLiteOk;
524	}
525
526	template <KernelType kernel_type, TfLiteType input_type>
527	TfLiteStatus EvalImpl(TfLiteContext* context, TfLiteNode* node) {
528	auto* params =
529	reinterpret_cast<TfLiteDepthwiseConvParams*>(node->builtin_data);
530	OpData* data = reinterpret_cast<OpData*>(node->user_data);
531
532	TfLiteTensor* output;
533	TF_LITE_ENSURE_OK(context,
534	GetOutputSafe(context, node, kOutputTensor, &output));
535	const TfLiteTensor* input;
536	TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, kInputTensor, &input));
537	const TfLiteTensor* filter;
538	TF_LITE_ENSURE_OK(context,
539	GetInputSafe(context, node, kFilterTensor, &filter));
540	const TfLiteTensor* bias =
541	(NumInputs(node) == `3`) ? GetInput(context, node, kBiasTensor) : nullptr;
542	TFLITE_DCHECK_EQ(input_type, input->type);
543
544	switch (input_type) { // Already know in/out types are same.
545	case kTfLiteFloat32:
546	if (filter->type == kTfLiteFloat32) {
547	return EvalFloat<kernel_type>(context, node, params, data, input,
548	filter, bias, output);
549	} else if (filter->type == kTfLiteInt8) {
550	return EvalHybridPerChannel<kernel_type>(context, node, params, data,
551	input, filter, bias, output);
552	} else {
553	TF_LITE_KERNEL_LOG(
554	context, "Type %s with filter type %s not currently supported.",
555	TfLiteTypeGetName(input->type), TfLiteTypeGetName(filter->type));
556	return kTfLiteError;
557	}
558	break;
559	case kTfLiteUInt8:
560	return EvalQuantized<kernel_type>(context, node, params, data, input,
561	filter, bias, output);
562	break;
563	case kTfLiteInt8:
564	return EvalQuantizedPerChannel<kernel_type>(context, node, params, data,
565	input, filter, bias, output);
566	break;
567	case kTfLiteInt16:
568	return EvalQuantizedPerChannel16x8(params, data, input, filter, bias,
569	output);
570	break;
571	default:
572	TF_LITE_KERNEL_LOG(context, "Type %d not currently supported.",
573	input->type);
574	return kTfLiteError;
575	}
576	}
577
578	template <KernelType kernel_type>
579	TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
580	const TfLiteTensor* input;
581	TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, kInputTensor, &input));
582
583	switch (input->type) { // Already know in/out types are same.
584	case kTfLiteFloat32:
585	return EvalImpl<kernel_type, kTfLiteFloat32>(context, node);
586	case kTfLiteUInt8:
587	return EvalImpl<kernel_type, kTfLiteUInt8>(context, node);
588	case kTfLiteInt8:
589	return EvalImpl<kernel_type, kTfLiteInt8>(context, node);
590	case kTfLiteInt16:
591	return EvalImpl<kernel_type, kTfLiteInt16>(context, node);
592	default:
593	TF_LITE_KERNEL_LOG(context, "Type %d not currently supported.",
594	input->type);
595	return kTfLiteError;
596	}
597	}
598
599	} // namespace depthwise_conv
600
601	TfLiteRegistration* Register_DEPTHWISE_CONVOLUTION_REF() {
602	static TfLiteRegistration r = {
603	depthwise_conv::Init, depthwise_conv::Free, depthwise_conv::Prepare,
604	depthwise_conv::Eval<depthwise_conv::kReference>};
605	return &r;
606	}
607
608	TfLiteRegistration* Register_DEPTHWISE_CONVOLUTION_GENERIC_OPT() {
609	static TfLiteRegistration r = {
610	depthwise_conv::Init, depthwise_conv::Free, depthwise_conv::Prepare,
611	depthwise_conv::Eval<depthwise_conv::kGenericOptimized>};
612	return &r;
613	}
614
615	TfLiteRegistration* Register_DEPTHWISE_CONVOLUTION_NEON_OPT() {
616	static TfLiteRegistration r = {
617	depthwise_conv::Init, depthwise_conv::Free, depthwise_conv::Prepare,
618	depthwise_conv::Eval<depthwise_conv::kNeonOptimized>};
619	return &r;
620	}
621
622	TfLiteRegistration* Register_DEPTHWISE_CONVOLUTION_NEON_OPT_UINT8() {
623	static TfLiteRegistration r = {
624	depthwise_conv::Init, depthwise_conv::Free, depthwise_conv::Prepare,
625	depthwise_conv::EvalImpl<depthwise_conv::kNeonOptimized, kTfLiteUInt8>};
626	return &r;
627	}
628
629	TfLiteRegistration* Register_DEPTHWISE_CONV_2D() {
630	#ifdef USE_NEON
631	return Register_DEPTHWISE_CONVOLUTION_NEON_OPT();
632	#else
633	return Register_DEPTHWISE_CONVOLUTION_GENERIC_OPT();
634	#endif
635	}
636
637	// Warning: Clients using this variant are responsible for ensuring that their
638	// models only need the UINT8 type. TFLite's op registration mechanism doesn't
639	// yet allow for more nuanced registration mechanisms.
640	TfLiteRegistration* Register_DEPTHWISE_CONV_2D_UINT8() {
641	#ifdef USE_NEON
642	return Register_DEPTHWISE_CONVOLUTION_NEON_OPT_UINT8();
643	#else
644	return Register_DEPTHWISE_CONV_2D();
645	#endif
646	}
647
648	} // namespace builtin
649	} // namespace ops
650	} // namespace tflite
651

Browse the source code of tensorflow/tensorflow/lite/kernels/depthwise_conv.cc