transpose_conv.cc source code [tensorflow/tensorflow/lite/kernels/transpose_conv.cc]

1	/ Copyright 2018 The TensorFlow Authors. All Rights Reserved.*
2
3	Licensed under the Apache License, Version 2.0 (the "License");
4	you may not use this file except in compliance with the License.
5	You may obtain a copy of the License at
6
7	http://www.apache.org/licenses/LICENSE-2.0
8
9	Unless required by applicable law or agreed to in writing, software
10	distributed under the License is distributed on an "AS IS" BASIS,
11	WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12	See the License for the specific language governing permissions and
13	limitations under the License.
14	==============================================================================/*
15
16	#include <stddef.h>
17	#include <stdint.h>
18
19	#include <vector>
20
21	#include "tensorflow/lite/c/builtin_op_data.h"
22	#include "tensorflow/lite/c/common.h"
23	#include "tensorflow/lite/kernels/cpu_backend_context.h"
24	#include "tensorflow/lite/kernels/internal/compatibility.h"
25	// NOLINTNEXTLINE - This header file shouldn't go to the top.
26	#include "tensorflow/lite/kernels/internal/optimized/integer_ops/transpose_conv.h"
27	#include "tensorflow/lite/kernels/internal/optimized/optimized_ops.h"
28	// NOLINTNEXTLINE - This header file shouldn't go to the top.
29	#include "tensorflow/lite/kernels/internal/reference/integer_ops/transpose_conv.h"
30	#include "tensorflow/lite/kernels/internal/reference/reference_ops.h"
31	#include "tensorflow/lite/kernels/internal/tensor.h"
32	#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
33	#include "tensorflow/lite/kernels/internal/types.h"
34	#include "tensorflow/lite/kernels/kernel_util.h"
35	#include "tensorflow/lite/kernels/padding.h"
36
37	namespace tflite {
38	namespace ops {
39	namespace builtin {
40	namespace transpose_conv {
41
42	// This file has 2 implementation of TransposeConv.
43	enum KernelType {
44	kReference,
45	kGenericOptimized, // Neon-free
46	};
47
48	constexpr int kOutputShapeTensor = `0`;
49	constexpr int kWeightsTensor = `1`;
50	constexpr int kDataInputTensor = `2`;
51	constexpr int kBiasTensor = `3`;
52	constexpr int kOutputTensor = `0`;
53
54	const int kTensorNotAllocated = -`1`;
55
56	struct OpData {
57	// IDs are the arbitrary identifiers used by TF Lite to identify and access
58	// memory buffers.
59	int col2im_id = kTensorNotAllocated;
60	int transposed_weights_id = kTensorNotAllocated;
61	int scratch_tensor_id = kTensorNotAllocated;
62
63	// col2im is the temporary tensor allocated and used in optimized path for
64	// storing col2im data:gemm result for input_matrix x filter_matrix.
65	int32_t col2im_index;
66
67	// TfLiteConverter will transpose weights from HWOI to OHWI order.
68	// In optimized path, we will transpose them back to HWOI, this temporary
69	// tensor is allocated for storing transposed weights.
70	int32_t transposed_weights_index;
71
72	// Scratch tensor is used in the quantized path for storing accumulation
73	// results.
74	int32_t scratch_tensor_index;
75
76	TfLitePaddingValues padding;
77	// The scaling factor from input to output (aka the 'real multiplier') can
78	// be represented as a fixed point multiplier plus a left shift.
79	int32_t output_multiplier;
80	int output_shift;
81
82	// Per channel output multiplier and shift.
83	std::vector<int32_t> per_channel_output_multiplier;
84	std::vector<int32_t> per_channel_output_shift;
85
86	// The range of the fused activation layer. For example for kNone and
87	// uint8_t these would be 0 and 255.
88	int32_t output_activation_min;
89	int32_t output_activation_max;
90
91	bool has_col2im = false;
92	bool weights_are_transposed = false;
93	};
94
95	void* Init(TfLiteContext* context, const char* buffer, size_t length) {
96	return new OpData;
97	}
98
99	void Free(TfLiteContext* context, void* buffer) {
100	delete reinterpret_cast<OpData*>(buffer);
101	}
102
103	TfLiteStatus ResizeTensor(TfLiteContext* context,
104	const TfLiteTensor* shape_tensor,
105	TfLiteTensor* tensor_to_resize) {
106	// Currently only support int32 for output shape.
107	if (shape_tensor->type != kTfLiteInt32) {
108	TF_LITE_KERNEL_LOG(context, "Output shape is %s, not int32.",
109	TfLiteTypeGetName(shape_tensor->type));
110	return kTfLiteError;
111	}
112
113	TfLiteIntArray* shape = TfLiteIntArrayCreate(NumElements(shape_tensor));
114	for (int i = `0`; i < shape->size; ++i) {
115	shape->data[i] = GetTensorData<int32_t>(shape_tensor)[i];
116	}
117
118	return context->ResizeTensor(context, tensor_to_resize, shape);
119	}
120
121	// Allocate temporary tensors if necessary.
122	template <KernelType kernel_type>
123	static TfLiteStatus AllocateTemporaryTensorsIfRequired(TfLiteContext* context,
124	TfLiteType input_type,
125	TfLiteType weights_type,
126	TfLiteNode* node) {
127	OpData* data = reinterpret_cast<OpData*>(node->user_data);
128	int temporaries_count = `0`;
129
130	// Allocate col2im tensor. Currently it's only used for optimized kernels.
131	if (kernel_type == kGenericOptimized) {
132	if (data->col2im_id == kTensorNotAllocated) {
133	context->AddTensors(context, `1`, &data->col2im_id);
134	}
135	data->col2im_index = temporaries_count;
136	data->has_col2im = true;
137	++temporaries_count;
138	}
139
140	// Allocate transposed_weights tensor. Currently it's only used for optimized
141	// float kernels.
142	if (kernel_type == kGenericOptimized) {
143	if (data->transposed_weights_id == kTensorNotAllocated) {
144	context->AddTensors(context, `1`, &data->transposed_weights_id);
145	}
146	data->transposed_weights_index = temporaries_count;
147	data->weights_are_transposed = true;
148	++temporaries_count;
149	}
150
151	// Allocate scratch buffer tensor
152	if (input_type == kTfLiteUInt8 \|\| input_type == kTfLiteInt8 \|\|
153	input_type == kTfLiteInt16) {
154	if (data->scratch_tensor_id == kTensorNotAllocated) {
155	context->AddTensors(context, `1`, &data->scratch_tensor_id);
156	}
157	data->scratch_tensor_index = temporaries_count;
158	++temporaries_count;
159	}
160
161	TfLiteIntArrayFree(node->temporaries);
162	node->temporaries = TfLiteIntArrayCreate(temporaries_count);
163
164	return kTfLiteOk;
165	}
166
167	TfLiteStatus ResizeCol2ImTensor(TfLiteContext* context,
168	const TfLiteTensor* output_shape,
169	const TfLiteTensor* weights,
170	const TfLiteTensor* input,
171	TfLiteTensor* col2im) {
172	if (output_shape->type != kTfLiteInt32) {
173	TF_LITE_KERNEL_LOG(context, "col2im shape is %s, not int32.",
174	TfLiteTypeGetName(output_shape->type));
175	return kTfLiteError;
176	}
177	TF_LITE_ENSURE_EQ(context, NumElements(output_shape), `4`);
178	TfLiteIntArray* col2im_shape_array = TfLiteIntArrayCreate(`2`);
179	const RuntimeShape& input_shape = GetTensorShape(input);
180	const RuntimeShape& weights_shape = GetTensorShape(weights);
181	col2im_shape_array->data[`0`] = input_shape.Dims(`1`) * input_shape.Dims(`2`);
182	col2im_shape_array->data[`1`] =
183	weights_shape.Dims(`0`) * weights_shape.Dims(`1`) * weights_shape.Dims(`2`);
184
185	col2im->type = input->type == kTfLiteFloat32 ? kTfLiteFloat32 : kTfLiteInt32;
186	col2im->allocation_type = kTfLiteDynamic;
187	return context->ResizeTensor(context, col2im, col2im_shape_array);
188	}
189
190	TfLiteStatus ResizeAndTransposeWeights(TfLiteContext* context,
191	const TfLiteTensor* weights,
192	TfLiteTensor* transposed_weights) {
193	TfLiteIntArray* transposed_weights_shape_array = TfLiteIntArrayCreate(`4`);
194	const RuntimeShape& input_shape = GetTensorShape(weights);
195	transposed_weights_shape_array->data[`0`] = input_shape.Dims(`1`);
196	transposed_weights_shape_array->data[`1`] = input_shape.Dims(`2`);
197	transposed_weights_shape_array->data[`2`] = input_shape.Dims(`0`);
198	transposed_weights_shape_array->data[`3`] = input_shape.Dims(`3`);
199
200	transposed_weights->type = weights->type;
201	transposed_weights->allocation_type = kTfLiteDynamic;
202	TF_LITE_ENSURE_STATUS(context->ResizeTensor(context, transposed_weights,
203	transposed_weights_shape_array));
204
205	// Transpose the weights from OHWI order to HWOI order.
206	TransposeParams transpose_params;
207	transpose_params.perm_count = `4`;
208	transpose_params.perm[`0`] = `1`;
209	transpose_params.perm[`1`] = `2`;
210	transpose_params.perm[`2`] = `0`;
211	transpose_params.perm[`3`] = `3`;
212
213	if (weights->type == kTfLiteFloat32) {
214	optimized_ops::Transpose(transpose_params, input_shape,
215	GetTensorData<float>(weights),
216	GetTensorShape(transposed_weights),
217	GetTensorData<float>(transposed_weights));
218	} else if (weights->type == kTfLiteUInt8) {
219	optimized_ops::Transpose(transpose_params, input_shape,
220	GetTensorData<uint8>(weights),
221	GetTensorShape(transposed_weights),
222	GetTensorData<uint8>(transposed_weights));
223	} else if (weights->type == kTfLiteInt8) {
224	// int16 transpose_conv also with int8 weights
225	optimized_ops::Transpose(transpose_params, input_shape,
226	GetTensorData<int8>(weights),
227	GetTensorShape(transposed_weights),
228	GetTensorData<int8>(transposed_weights));
229	} else {
230	TF_LITE_KERNEL_LOG(
231	context,
232	"Only float32, uint8, int8, int16 is supported currently, got %s.",
233	TfLiteTypeGetName(weights->type));
234	return kTfLiteError;
235	}
236
237	return kTfLiteOk;
238	}
239
240	template <KernelType kernel_type>
241	TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
242	OpData* data = reinterpret_cast<OpData*>(node->user_data);
243
244	bool has_bias = NumInputs(node) == `4`;
245
246	// Sanity checks on op
247	TF_LITE_ENSURE(context, has_bias \|\| NumInputs(node) == `3`);
248	TF_LITE_ENSURE_EQ(context, NumOutputs(node), `1`);
249
250	// Retrieve tensors
251	const TfLiteTensor* output_shape;
252	TF_LITE_ENSURE_OK(
253	context, GetInputSafe(context, node, kOutputShapeTensor, &output_shape));
254	const TfLiteTensor* weights;
255	TF_LITE_ENSURE_OK(context,
256	GetInputSafe(context, node, kWeightsTensor, &weights));
257	const TfLiteTensor* input;
258	TF_LITE_ENSURE_OK(context,
259	GetInputSafe(context, node, kDataInputTensor, &input));
260	const TfLiteTensor* bias = nullptr;
261
262	TfLiteTensor* output;
263	TF_LITE_ENSURE_OK(context,
264	GetOutputSafe(context, node, kOutputTensor, &output));
265
266	// Tensor sanity checks
267	TF_LITE_ENSURE_EQ(context, NumDimensions(output_shape), `1`);
268	TF_LITE_ENSURE_EQ(context, NumDimensions(input), `4`);
269	TF_LITE_ENSURE_EQ(context, NumDimensions(weights), `4`);
270	TF_LITE_ENSURE(context,
271	input->type == kTfLiteFloat32 \|\| input->type == kTfLiteUInt8 \|\|
272	input->type == kTfLiteInt8 \|\| input->type == kTfLiteInt16);
273
274	if (has_bias) {
275	bias = GetOptionalInputTensor(context, node, kBiasTensor);
276	if (bias) {
277	if (input->type == kTfLiteUInt8 \|\| input->type == kTfLiteInt8) {
278	TF_LITE_ENSURE_TYPES_EQ(context, bias->type, kTfLiteInt32);
279	if (input->type == kTfLiteInt8) {
280	TF_LITE_ENSURE_EQ(context, bias->params.zero_point, `0`);
281	}
282	} else if (input->type == kTfLiteInt16) {
283	TF_LITE_ENSURE(context, (bias->type == kTfLiteInt64) \|\|
284	(bias->type == kTfLiteInt32));
285	TF_LITE_ENSURE_EQ(context, bias->params.zero_point, `0`);
286	} else {
287	TF_LITE_ENSURE_TYPES_EQ(context, bias->type, input->type);
288	}
289	TF_LITE_ENSURE_EQ(context, NumElements(bias),
290	SizeOfDimension(weights, `0`));
291	}
292	}
293
294	if (input->type == kTfLiteInt16) {
295	TF_LITE_ENSURE_EQ(context, weights->type, kTfLiteInt8);
296	TF_LITE_ENSURE_EQ(context, input->params.zero_point, `0`);
297	TF_LITE_ENSURE_EQ(context, output->params.zero_point, `0`);
298	} else {
299	TF_LITE_ENSURE_TYPES_EQ(context, weights->type, input->type);
300	}
301	TF_LITE_ENSURE_TYPES_EQ(context, output->type, input->type);
302	// Ensure that weights and inputs have the same channel dimension.
303	// Note: TOCO will reorder weights in the following format: OHWI.
304	TF_LITE_ENSURE_EQ(context, SizeOfDimension(input, `3`),
305	SizeOfDimension(weights, `3`));
306
307	// Allocate col2Im, transposed_weights & scratch Tensor.
308	TF_LITE_ENSURE_STATUS(AllocateTemporaryTensorsIfRequired<kernel_type>(
309	context, input->type, weights->type, node));
310
311	OpData* user_data = reinterpret_cast<OpData*>(node->user_data);
312	TfLiteTensor* col2im = nullptr;
313	if (data->has_col2im) {
314	node->temporaries->data[data->col2im_index] = data->col2im_id;
315	TF_LITE_ENSURE_OK(
316	context,
317	GetTemporarySafe(context, node, user_data->col2im_index, &col2im));
318	}
319
320	if (!IsConstantTensor(output_shape)) {
321	// Defer resizing until Eval().
322	SetTensorToDynamic(output);
323	if (data->has_col2im) {
324	SetTensorToDynamic(col2im);
325	}
326	} else {
327	TF_LITE_ENSURE_STATUS(ResizeTensor(context, output_shape, output));
328	if (data->has_col2im) {
329	TF_LITE_ENSURE_STATUS(
330	ResizeCol2ImTensor(context, output_shape, weights, input, col2im));
331	}
332	}
333
334	if (data->weights_are_transposed) {
335	node->temporaries->data[data->transposed_weights_index] =
336	data->transposed_weights_id;
337	TfLiteTensor* transposed_weights;
338	TF_LITE_ENSURE_OK(
339	context,
340	GetTemporarySafe(context, node, user_data->transposed_weights_index,
341	&transposed_weights));
342	if (!IsConstantTensor(weights)) {
343	SetTensorToDynamic(transposed_weights);
344	} else {
345	ResizeAndTransposeWeights(context, weights, transposed_weights);
346	}
347	}
348
349	if (input->type == kTfLiteUInt8 \|\| input->type == kTfLiteInt8 \|\|
350	input->type == kTfLiteInt16) {
351	node->temporaries->data[data->scratch_tensor_index] =
352	data->scratch_tensor_id;
353	TfLiteTensor* scratch_buffer;
354	TF_LITE_ENSURE_OK(
355	context, GetTemporarySafe(context, node, data->scratch_tensor_index,
356	&scratch_buffer));
357	if (input->type == kTfLiteInt16 && bias && bias->type == kTfLiteInt64) {
358	scratch_buffer->type = kTfLiteInt64;
359	} else {
360	scratch_buffer->type = kTfLiteInt32;
361	}
362
363	scratch_buffer->allocation_type = kTfLiteDynamic;
364	if (!IsConstantTensor(output_shape)) {
365	SetTensorToDynamic(scratch_buffer);
366	} else {
367	TF_LITE_ENSURE_STATUS(
368	ResizeTensor(context, output_shape, scratch_buffer));
369	}
370
371	TF_LITE_ENSURE_EQ(context, weights->quantization.type,
372	kTfLiteAffineQuantization);
373	const auto* affine_quantization =
374	reinterpret_cast<TfLiteAffineQuantization*>(
375	weights->quantization.params);
376	const int channels_out = weights->dims->data[`0`];
377	TF_LITE_ENSURE(context, affine_quantization);
378	TF_LITE_ENSURE(context, affine_quantization->scale);
379	TF_LITE_ENSURE(context, (affine_quantization->scale->size == `1` \|\|
380	affine_quantization->scale->size == channels_out));
381
382	data->per_channel_output_multiplier.resize(channels_out);
383	data->per_channel_output_shift.resize(channels_out);
384	TF_LITE_ENSURE_STATUS(tflite::PopulateConvolutionQuantizationParams(
385	context, input, weights, bias, output, kTfLiteActNone,
386	&data->output_multiplier, &data->output_shift,
387	&data->output_activation_min, &data->output_activation_max,
388	data->per_channel_output_multiplier.data(),
389	data->per_channel_output_shift.data(), channels_out));
390	}
391
392	return kTfLiteOk;
393	}
394
395	template <KernelType kernel_type>
396	void EvalFloat(TfLiteContext* context, const TfLiteTransposeConvParams* params,
397	const OpData* data, const TfLiteTensor* input,
398	const TfLiteTensor* weights, const TfLiteTensor* bias,
399	const TfLiteTensor* transposed_weights, TfLiteTensor* col2im,
400	TfLiteTensor* output) {
401	tflite::ConvParams op_params;
402	op_params.padding_type = PaddingType::kSame;
403	op_params.padding_values.width = data->padding.width;
404	op_params.padding_values.height = data->padding.height;
405	op_params.padding_values.width_offset = data->padding.width_offset;
406	op_params.padding_values.height_offset = data->padding.height_offset;
407	op_params.stride_width = params->stride_width;
408	op_params.stride_height = params->stride_height;
409
410	switch (kernel_type) {
411	case kReference: {
412	reference_ops::TransposeConv(
413	op_params, GetTensorShape(input), GetTensorData<float>(input),
414	GetTensorShape(weights), GetTensorData<float>(weights),
415	GetTensorShape(bias), GetTensorData<float>(bias),
416	GetTensorShape(output), GetTensorData<float>(output),
417	GetTensorShape(col2im), GetTensorData<float>(col2im));
418	break;
419	}
420	case kGenericOptimized: {
421	optimized_ops::TransposeConvV2(
422	op_params, GetTensorShape(input), GetTensorData<float>(input),
423	GetTensorShape(transposed_weights),
424	GetTensorData<float>(transposed_weights), GetTensorShape(bias),
425	GetTensorData<float>(bias), GetTensorShape(output),
426	GetTensorData<float>(output), GetTensorShape(col2im),
427	GetTensorData<float>(col2im),
428	CpuBackendContext::GetFromContext(context));
429	break;
430	}
431	}
432	}
433
434	template <KernelType kernel_type>
435	void EvalQuantized(TfLiteContext* context,
436	const TfLiteTransposeConvParams* params, OpData* data,
437	const TfLiteTensor* input, const TfLiteTensor* weights,
438	const TfLiteTensor* transposed_weights,
439	const TfLiteTensor* bias, TfLiteTensor* col2im,
440	TfLiteTensor* output, TfLiteTensor* scratch_buffer) {
441	int32_t input_offset = -input->params.zero_point;
442	int32_t filter_offset = -weights->params.zero_point;
443	int32_t output_offset = output->params.zero_point;
444
445	tflite::ConvParams op_params;
446	op_params.padding_type = PaddingType::kSame;
447	op_params.padding_values.width = data->padding.width;
448	op_params.padding_values.height = data->padding.height;
449	op_params.padding_values.width_offset = data->padding.width_offset;
450	op_params.padding_values.height_offset = data->padding.height_offset;
451	op_params.stride_width = params->stride_width;
452	op_params.stride_height = params->stride_height;
453	op_params.input_offset = input_offset;
454	op_params.output_offset = output_offset;
455	op_params.weights_offset = filter_offset;
456	op_params.output_multiplier = data->output_multiplier;
457	op_params.output_shift = -data->output_shift;
458	op_params.quantized_activation_min = data->output_activation_min;
459	op_params.quantized_activation_max = data->output_activation_max;
460
461	switch (kernel_type) {
462	case kReference: {
463	reference_ops::TransposeConv(
464	op_params, GetTensorShape(input), GetTensorData<uint8>(input),
465	GetTensorShape(weights), GetTensorData<uint8>(weights),
466	GetTensorShape(bias), GetTensorData<int32_t>(bias),
467	GetTensorShape(output), GetTensorData<uint8>(output),
468	GetTensorShape(col2im), GetTensorData<uint8>(col2im),
469	GetTensorData<int32_t>(scratch_buffer));
470	break;
471	}
472	case kGenericOptimized: {
473	optimized_ops::TransposeConvV2(
474	op_params, GetTensorShape(input), GetTensorData<uint8>(input),
475	GetTensorShape(transposed_weights),
476	GetTensorData<uint8>(transposed_weights), GetTensorShape(bias),
477	GetTensorData<int32>(bias), GetTensorShape(output),
478	GetTensorData<uint8>(output), GetTensorShape(col2im),
479	GetTensorData<int32>(col2im), GetTensorData<int32>(scratch_buffer),
480	CpuBackendContext::GetFromContext(context));
481	break;
482	}
483	}
484	}
485
486	template <KernelType kernel_type>
487	void EvalQuantizedPerChannel(
488	TfLiteContext* context, const TfLiteTransposeConvParams* params,
489	OpData* data, const TfLiteTensor* input, const TfLiteTensor* weights,
490	const TfLiteTensor* transposed_weights, const TfLiteTensor* bias,
491	TfLiteTensor* col2im, TfLiteTensor* output, TfLiteTensor* scratch_buffer) {
492	tflite::ConvParams op_params;
493	op_params.padding_type = PaddingType::kSame;
494	op_params.padding_values.width = data->padding.width;
495	op_params.padding_values.height = data->padding.height;
496	op_params.padding_values.width_offset = data->padding.width_offset;
497	op_params.padding_values.height_offset = data->padding.height_offset;
498	op_params.stride_width = params->stride_width;
499	op_params.stride_height = params->stride_height;
500	// Need to flip the sign of input offset to add it directly to the quantized
501	// buffer.
502	op_params.input_offset = -input->params.zero_point;
503	op_params.output_offset = output->params.zero_point;
504	op_params.quantized_activation_min = data->output_activation_min;
505	op_params.quantized_activation_max = data->output_activation_max;
506
507	switch (kernel_type) {
508	case kReference: {
509	reference_integer_ops::TransposeConv(
510	op_params, data->per_channel_output_multiplier.data(),
511	data->per_channel_output_shift.data(), GetTensorShape(input),
512	GetTensorData<int8>(input), GetTensorShape(weights),
513	GetTensorData<int8>(weights), GetTensorShape(bias),
514	GetTensorData<int32>(bias), GetTensorShape(output),
515	GetTensorData<int8>(output), GetTensorShape(col2im),
516	GetTensorData<int8>(col2im), GetTensorData<int32_t>(scratch_buffer));
517	break;
518	}
519	case kGenericOptimized: {
520	optimized_integer_ops::TransposeConvV2(
521	op_params, data->per_channel_output_multiplier.data(),
522	data->per_channel_output_shift.data(), GetTensorShape(input),
523	GetTensorData<int8>(input), GetTensorShape(transposed_weights),
524	GetTensorData<int8>(transposed_weights), GetTensorShape(bias),
525	GetTensorData<int32>(bias), GetTensorShape(output),
526	GetTensorData<int8>(output), GetTensorShape(col2im),
527	GetTensorData<int32>(col2im), GetTensorData<int32>(scratch_buffer),
528	CpuBackendContext::GetFromContext(context));
529	break;
530	}
531	}
532	}
533
534	template <KernelType kernel_type>
535	void EvalQuantizedPerChannel16x8(
536	TfLiteContext* context, const TfLiteTransposeConvParams* params,
537	OpData* data, const TfLiteTensor* input, const TfLiteTensor* weights,
538	const TfLiteTensor* transposed_weights, const TfLiteTensor* bias,
539	TfLiteTensor* col2im, TfLiteTensor* output, TfLiteTensor* scratch_buffer) {
540	tflite::ConvParams op_params;
541	op_params.padding_type = PaddingType::kSame;
542	op_params.padding_values.width = data->padding.width;
543	op_params.padding_values.height = data->padding.height;
544	op_params.padding_values.width_offset = data->padding.width_offset;
545	op_params.padding_values.height_offset = data->padding.height_offset;
546	op_params.stride_width = params->stride_width;
547	op_params.stride_height = params->stride_height;
548	// Need to flip the sign of input offset to add it directly to the quantized
549	// buffer.
550	op_params.input_offset = -input->params.zero_point;
551	op_params.output_offset = output->params.zero_point;
552	op_params.quantized_activation_min = data->output_activation_min;
553	op_params.quantized_activation_max = data->output_activation_max;
554
555	// To prevent 32bit accum overflow for 16x8 quantization, it enables the
556	// optimized path only when zero_point is 0.
557	bool has_non_zero_point = input->params.zero_point \|\|
558	weights->params.zero_point \|\|
559	output->params.zero_point;
560
561	// Fallback to reference kernel when bias_type is int64 as
562	// there is no optimized kernel for int64 bias yet.
563	if (bias && bias->type == kTfLiteInt64) {
564	reference_integer_ops::TransposeConv(
565	op_params, data->per_channel_output_multiplier.data(),
566	data->per_channel_output_shift.data(), GetTensorShape(input),
567	GetTensorData<int16>(input), GetTensorShape(weights),
568	GetTensorData<int8>(weights), GetTensorShape(bias),
569	GetTensorData<int64_t>(bias), GetTensorShape(output),
570	GetTensorData<int16>(output), GetTensorShape(col2im),
571	GetTensorData<int8>(col2im), GetTensorData<int64_t>(scratch_buffer));
572	} else if (kernel_type == kReference \|\| has_non_zero_point) {
573	reference_integer_ops::TransposeConv(
574	op_params, data->per_channel_output_multiplier.data(),
575	data->per_channel_output_shift.data(), GetTensorShape(input),
576	GetTensorData<int16>(input), GetTensorShape(weights),
577	GetTensorData<int8>(weights), GetTensorShape(bias),
578	GetTensorData<int32_t>(bias), GetTensorShape(output),
579	GetTensorData<int16>(output), GetTensorShape(col2im),
580	GetTensorData<int8>(col2im), GetTensorData<int32_t>(scratch_buffer));
581	} else {
582	optimized_integer_ops::TransposeConvV2(
583	op_params, data->per_channel_output_multiplier.data(),
584	data->per_channel_output_shift.data(), GetTensorShape(input),
585	GetTensorData<int16>(input), GetTensorShape(transposed_weights),
586	GetTensorData<int8>(transposed_weights), GetTensorShape(bias),
587	GetTensorData<int32>(bias), GetTensorShape(output),
588	GetTensorData<int16>(output), GetTensorShape(col2im),
589	GetTensorData<int32>(col2im), GetTensorData<int32>(scratch_buffer),
590	CpuBackendContext::GetFromContext(context));
591	}
592	}
593
594	template <KernelType kernel_type>
595	TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
596	// Retrieve tensors (All should be allocated by now)
597	const TfLiteTensor* output_shape;
598	TF_LITE_ENSURE_OK(
599	context, GetInputSafe(context, node, kOutputShapeTensor, &output_shape));
600	const TfLiteTensor* weights;
601	TF_LITE_ENSURE_OK(context,
602	GetInputSafe(context, node, kWeightsTensor, &weights));
603	const TfLiteTensor* input;
604	TF_LITE_ENSURE_OK(context,
605	GetInputSafe(context, node, kDataInputTensor, &input));
606	const TfLiteTensor* bias =
607	(NumInputs(node) == `4`)
608	? GetOptionalInputTensor(context, node, kBiasTensor)
609	: nullptr;
610	TfLiteTensor* output;
611	TF_LITE_ENSURE_OK(context,
612	GetOutputSafe(context, node, kOutputTensor, &output));
613	OpData* data = reinterpret_cast<OpData*>(node->user_data);
614	TfLiteTensor* col2im = data->has_col2im
615	? GetTemporary(context, node, data->col2im_index)
616	: nullptr;
617	TfLiteTensor* transposed_weights =
618	data->weights_are_transposed
619	? GetTemporary(context, node, data->transposed_weights_index)
620	: nullptr;
621	const auto* params =
622	reinterpret_cast<TfLiteTransposeConvParams*>(node->builtin_data);
623
624	// Prevent divisions by 0
625	TF_LITE_ENSURE(context, params->stride_height > `0`);
626	TF_LITE_ENSURE(context, params->stride_width > `0`);
627
628	// Resize any deferred dynamic tensors
629	if (IsDynamicTensor(output)) {
630	TF_LITE_ENSURE_OK(context, ResizeTensor(context, output_shape, output));
631	}
632	if (data->has_col2im && IsDynamicTensor(col2im)) {
633	TF_LITE_ENSURE_OK(context, ResizeCol2ImTensor(context, output_shape,
634	weights, input, col2im));
635	}
636
637	// Get height and width of the output image.
638	const int width = SizeOfDimension(output, `2`);
639	const int height = SizeOfDimension(output, `1`);
640	const int filter_width = SizeOfDimension(weights, `2`);
641	const int filter_height = SizeOfDimension(weights, `1`);
642
643	int unused_output_height, unused_output_width;
644	data->padding = ComputePaddingHeightWidth(
645	params->stride_height, params->stride_width, `1`, `1`, height, width,
646	filter_height, filter_width, params->padding, &unused_output_height,
647	&unused_output_width);
648
649	// Currently support float32, uint8, int8, int16.
650	switch (input->type) {
651	case kTfLiteFloat32: {
652	// Only for GenericOptimized path, we use transposed weights.
653	if (data->weights_are_transposed) {
654	if (!IsConstantTensor(weights)) {
655	ResizeAndTransposeWeights(context, weights, transposed_weights);
656	}
657	}
658	EvalFloat<kernel_type>(context, params, data, input, weights, bias,
659	transposed_weights, col2im, output);
660	break;
661	}
662	case kTfLiteUInt8: {
663	TfLiteTensor* scratch_buffer;
664	TF_LITE_ENSURE_OK(
665	context, GetTemporarySafe(context, node, data->scratch_tensor_index,
666	&scratch_buffer));
667	if (IsDynamicTensor(scratch_buffer)) {
668	TF_LITE_ENSURE_OK(context,
669	ResizeTensor(context, output_shape, scratch_buffer));
670	}
671	if (data->weights_are_transposed) {
672	if (!IsConstantTensor(weights)) {
673	ResizeAndTransposeWeights(context, weights, transposed_weights);
674	}
675	}
676	EvalQuantized<kernel_type>(context, params, data, input, weights,
677	transposed_weights, bias, col2im, output,
678	scratch_buffer);
679	break;
680	}
681	case kTfLiteInt8: {
682	TfLiteTensor* scratch_buffer;
683	TF_LITE_ENSURE_OK(
684	context, GetTemporarySafe(context, node, data->scratch_tensor_index,
685	&scratch_buffer));
686	if (IsDynamicTensor(scratch_buffer)) {
687	TF_LITE_ENSURE_OK(context,
688	ResizeTensor(context, output_shape, scratch_buffer));
689	}
690	if (data->weights_are_transposed && !IsConstantTensor(weights)) {
691	ResizeAndTransposeWeights(context, weights, transposed_weights);
692	}
693	EvalQuantizedPerChannel<kernel_type>(context, params, data, input,
694	weights, transposed_weights, bias,
695	col2im, output, scratch_buffer);
696	break;
697	}
698	case kTfLiteInt16: {
699	TfLiteTensor* scratch_buffer;
700	TF_LITE_ENSURE_OK(
701	context, GetTemporarySafe(context, node, data->scratch_tensor_index,
702	&scratch_buffer));
703	if (IsDynamicTensor(scratch_buffer)) {
704	TF_LITE_ENSURE_OK(context,
705	ResizeTensor(context, output_shape, scratch_buffer));
706	}
707	if (data->weights_are_transposed && !IsConstantTensor(weights)) {
708	ResizeAndTransposeWeights(context, weights, transposed_weights);
709	}
710	EvalQuantizedPerChannel16x8<kernel_type>(
711	context, params, data, input, weights, transposed_weights, bias,
712	col2im, output, scratch_buffer);
713	break;
714	}
715	default:
716	TF_LITE_KERNEL_LOG(context, "Type '%s' is not currently supported.",
717	TfLiteTypeGetName(input->type));
718	return kTfLiteError;
719	}
720	return kTfLiteOk;
721	}
722
723	} // namespace transpose_conv
724
725	TfLiteRegistration* Register_TRANSPOSECONV_REF() {
726	static TfLiteRegistration r = {
727	transpose_conv::Init, transpose_conv::Free,
728	transpose_conv::Prepare<transpose_conv::kReference>,
729	transpose_conv::Eval<transpose_conv::kReference>};
730	return &r;
731	}
732
733	TfLiteRegistration* Register_TRANSPOSECONV_GENERIC_OPT() {
734	static TfLiteRegistration r = {
735	transpose_conv::Init, transpose_conv::Free,
736	transpose_conv::Prepare<transpose_conv::kGenericOptimized>,
737	transpose_conv::Eval<transpose_conv::kGenericOptimized>};
738	return &r;
739	}
740
741	TfLiteRegistration* Register_TRANSPOSE_CONV() {
742	return Register_TRANSPOSECONV_GENERIC_OPT();
743	}
744
745	} // namespace builtin
746	} // namespace ops
747	} // namespace tflite
748

Browse the source code of tensorflow/tensorflow/lite/kernels/transpose_conv.cc