1/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
2
3Licensed under the Apache License, Version 2.0 (the "License");
4you may not use this file except in compliance with the License.
5You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9Unless required by applicable law or agreed to in writing, software
10distributed under the License is distributed on an "AS IS" BASIS,
11WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12See the License for the specific language governing permissions and
13limitations under the License.
14==============================================================================*/
15
16#include <stddef.h>
17#include <stdint.h>
18
19#include <vector>
20
21#include "tensorflow/lite/c/builtin_op_data.h"
22#include "tensorflow/lite/c/common.h"
23#include "tensorflow/lite/kernels/cpu_backend_context.h"
24#include "tensorflow/lite/kernels/internal/compatibility.h"
25// NOLINTNEXTLINE - This header file shouldn't go to the top.
26#include "tensorflow/lite/kernels/internal/optimized/integer_ops/transpose_conv.h"
27#include "tensorflow/lite/kernels/internal/optimized/optimized_ops.h"
28// NOLINTNEXTLINE - This header file shouldn't go to the top.
29#include "tensorflow/lite/kernels/internal/reference/integer_ops/transpose_conv.h"
30#include "tensorflow/lite/kernels/internal/reference/reference_ops.h"
31#include "tensorflow/lite/kernels/internal/tensor.h"
32#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
33#include "tensorflow/lite/kernels/internal/types.h"
34#include "tensorflow/lite/kernels/kernel_util.h"
35#include "tensorflow/lite/kernels/padding.h"
36
37namespace tflite {
38namespace ops {
39namespace builtin {
40namespace transpose_conv {
41
42// This file has 2 implementation of TransposeConv.
43enum KernelType {
44 kReference,
45 kGenericOptimized, // Neon-free
46};
47
48constexpr int kOutputShapeTensor = 0;
49constexpr int kWeightsTensor = 1;
50constexpr int kDataInputTensor = 2;
51constexpr int kBiasTensor = 3;
52constexpr int kOutputTensor = 0;
53
54const int kTensorNotAllocated = -1;
55
56struct OpData {
57 // IDs are the arbitrary identifiers used by TF Lite to identify and access
58 // memory buffers.
59 int col2im_id = kTensorNotAllocated;
60 int transposed_weights_id = kTensorNotAllocated;
61 int scratch_tensor_id = kTensorNotAllocated;
62
63 // col2im is the temporary tensor allocated and used in optimized path for
64 // storing col2im data:gemm result for input_matrix x filter_matrix.
65 int32_t col2im_index;
66
67 // TfLiteConverter will transpose weights from HWOI to OHWI order.
68 // In optimized path, we will transpose them back to HWOI, this temporary
69 // tensor is allocated for storing transposed weights.
70 int32_t transposed_weights_index;
71
72 // Scratch tensor is used in the quantized path for storing accumulation
73 // results.
74 int32_t scratch_tensor_index;
75
76 TfLitePaddingValues padding;
77 // The scaling factor from input to output (aka the 'real multiplier') can
78 // be represented as a fixed point multiplier plus a left shift.
79 int32_t output_multiplier;
80 int output_shift;
81
82 // Per channel output multiplier and shift.
83 std::vector<int32_t> per_channel_output_multiplier;
84 std::vector<int32_t> per_channel_output_shift;
85
86 // The range of the fused activation layer. For example for kNone and
87 // uint8_t these would be 0 and 255.
88 int32_t output_activation_min;
89 int32_t output_activation_max;
90
91 bool has_col2im = false;
92 bool weights_are_transposed = false;
93};
94
95void* Init(TfLiteContext* context, const char* buffer, size_t length) {
96 return new OpData;
97}
98
99void Free(TfLiteContext* context, void* buffer) {
100 delete reinterpret_cast<OpData*>(buffer);
101}
102
103TfLiteStatus ResizeTensor(TfLiteContext* context,
104 const TfLiteTensor* shape_tensor,
105 TfLiteTensor* tensor_to_resize) {
106 // Currently only support int32 for output shape.
107 if (shape_tensor->type != kTfLiteInt32) {
108 TF_LITE_KERNEL_LOG(context, "Output shape is %s, not int32.",
109 TfLiteTypeGetName(shape_tensor->type));
110 return kTfLiteError;
111 }
112
113 TfLiteIntArray* shape = TfLiteIntArrayCreate(NumElements(shape_tensor));
114 for (int i = 0; i < shape->size; ++i) {
115 shape->data[i] = GetTensorData<int32_t>(shape_tensor)[i];
116 }
117
118 return context->ResizeTensor(context, tensor_to_resize, shape);
119}
120
121// Allocate temporary tensors if necessary.
122template <KernelType kernel_type>
123static TfLiteStatus AllocateTemporaryTensorsIfRequired(TfLiteContext* context,
124 TfLiteType input_type,
125 TfLiteType weights_type,
126 TfLiteNode* node) {
127 OpData* data = reinterpret_cast<OpData*>(node->user_data);
128 int temporaries_count = 0;
129
130 // Allocate col2im tensor. Currently it's only used for optimized kernels.
131 if (kernel_type == kGenericOptimized) {
132 if (data->col2im_id == kTensorNotAllocated) {
133 context->AddTensors(context, 1, &data->col2im_id);
134 }
135 data->col2im_index = temporaries_count;
136 data->has_col2im = true;
137 ++temporaries_count;
138 }
139
140 // Allocate transposed_weights tensor. Currently it's only used for optimized
141 // float kernels.
142 if (kernel_type == kGenericOptimized) {
143 if (data->transposed_weights_id == kTensorNotAllocated) {
144 context->AddTensors(context, 1, &data->transposed_weights_id);
145 }
146 data->transposed_weights_index = temporaries_count;
147 data->weights_are_transposed = true;
148 ++temporaries_count;
149 }
150
151 // Allocate scratch buffer tensor
152 if (input_type == kTfLiteUInt8 || input_type == kTfLiteInt8 ||
153 input_type == kTfLiteInt16) {
154 if (data->scratch_tensor_id == kTensorNotAllocated) {
155 context->AddTensors(context, 1, &data->scratch_tensor_id);
156 }
157 data->scratch_tensor_index = temporaries_count;
158 ++temporaries_count;
159 }
160
161 TfLiteIntArrayFree(node->temporaries);
162 node->temporaries = TfLiteIntArrayCreate(temporaries_count);
163
164 return kTfLiteOk;
165}
166
167TfLiteStatus ResizeCol2ImTensor(TfLiteContext* context,
168 const TfLiteTensor* output_shape,
169 const TfLiteTensor* weights,
170 const TfLiteTensor* input,
171 TfLiteTensor* col2im) {
172 if (output_shape->type != kTfLiteInt32) {
173 TF_LITE_KERNEL_LOG(context, "col2im shape is %s, not int32.",
174 TfLiteTypeGetName(output_shape->type));
175 return kTfLiteError;
176 }
177 TF_LITE_ENSURE_EQ(context, NumElements(output_shape), 4);
178 TfLiteIntArray* col2im_shape_array = TfLiteIntArrayCreate(2);
179 const RuntimeShape& input_shape = GetTensorShape(input);
180 const RuntimeShape& weights_shape = GetTensorShape(weights);
181 col2im_shape_array->data[0] = input_shape.Dims(1) * input_shape.Dims(2);
182 col2im_shape_array->data[1] =
183 weights_shape.Dims(0) * weights_shape.Dims(1) * weights_shape.Dims(2);
184
185 col2im->type = input->type == kTfLiteFloat32 ? kTfLiteFloat32 : kTfLiteInt32;
186 col2im->allocation_type = kTfLiteDynamic;
187 return context->ResizeTensor(context, col2im, col2im_shape_array);
188}
189
190TfLiteStatus ResizeAndTransposeWeights(TfLiteContext* context,
191 const TfLiteTensor* weights,
192 TfLiteTensor* transposed_weights) {
193 TfLiteIntArray* transposed_weights_shape_array = TfLiteIntArrayCreate(4);
194 const RuntimeShape& input_shape = GetTensorShape(weights);
195 transposed_weights_shape_array->data[0] = input_shape.Dims(1);
196 transposed_weights_shape_array->data[1] = input_shape.Dims(2);
197 transposed_weights_shape_array->data[2] = input_shape.Dims(0);
198 transposed_weights_shape_array->data[3] = input_shape.Dims(3);
199
200 transposed_weights->type = weights->type;
201 transposed_weights->allocation_type = kTfLiteDynamic;
202 TF_LITE_ENSURE_STATUS(context->ResizeTensor(context, transposed_weights,
203 transposed_weights_shape_array));
204
205 // Transpose the weights from OHWI order to HWOI order.
206 TransposeParams transpose_params;
207 transpose_params.perm_count = 4;
208 transpose_params.perm[0] = 1;
209 transpose_params.perm[1] = 2;
210 transpose_params.perm[2] = 0;
211 transpose_params.perm[3] = 3;
212
213 if (weights->type == kTfLiteFloat32) {
214 optimized_ops::Transpose(transpose_params, input_shape,
215 GetTensorData<float>(weights),
216 GetTensorShape(transposed_weights),
217 GetTensorData<float>(transposed_weights));
218 } else if (weights->type == kTfLiteUInt8) {
219 optimized_ops::Transpose(transpose_params, input_shape,
220 GetTensorData<uint8>(weights),
221 GetTensorShape(transposed_weights),
222 GetTensorData<uint8>(transposed_weights));
223 } else if (weights->type == kTfLiteInt8) {
224 // int16 transpose_conv also with int8 weights
225 optimized_ops::Transpose(transpose_params, input_shape,
226 GetTensorData<int8>(weights),
227 GetTensorShape(transposed_weights),
228 GetTensorData<int8>(transposed_weights));
229 } else {
230 TF_LITE_KERNEL_LOG(
231 context,
232 "Only float32, uint8, int8, int16 is supported currently, got %s.",
233 TfLiteTypeGetName(weights->type));
234 return kTfLiteError;
235 }
236
237 return kTfLiteOk;
238}
239
240template <KernelType kernel_type>
241TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
242 OpData* data = reinterpret_cast<OpData*>(node->user_data);
243
244 bool has_bias = NumInputs(node) == 4;
245
246 // Sanity checks on op
247 TF_LITE_ENSURE(context, has_bias || NumInputs(node) == 3);
248 TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
249
250 // Retrieve tensors
251 const TfLiteTensor* output_shape;
252 TF_LITE_ENSURE_OK(
253 context, GetInputSafe(context, node, kOutputShapeTensor, &output_shape));
254 const TfLiteTensor* weights;
255 TF_LITE_ENSURE_OK(context,
256 GetInputSafe(context, node, kWeightsTensor, &weights));
257 const TfLiteTensor* input;
258 TF_LITE_ENSURE_OK(context,
259 GetInputSafe(context, node, kDataInputTensor, &input));
260 const TfLiteTensor* bias = nullptr;
261
262 TfLiteTensor* output;
263 TF_LITE_ENSURE_OK(context,
264 GetOutputSafe(context, node, kOutputTensor, &output));
265
266 // Tensor sanity checks
267 TF_LITE_ENSURE_EQ(context, NumDimensions(output_shape), 1);
268 TF_LITE_ENSURE_EQ(context, NumDimensions(input), 4);
269 TF_LITE_ENSURE_EQ(context, NumDimensions(weights), 4);
270 TF_LITE_ENSURE(context,
271 input->type == kTfLiteFloat32 || input->type == kTfLiteUInt8 ||
272 input->type == kTfLiteInt8 || input->type == kTfLiteInt16);
273
274 if (has_bias) {
275 bias = GetOptionalInputTensor(context, node, kBiasTensor);
276 if (bias) {
277 if (input->type == kTfLiteUInt8 || input->type == kTfLiteInt8) {
278 TF_LITE_ENSURE_TYPES_EQ(context, bias->type, kTfLiteInt32);
279 if (input->type == kTfLiteInt8) {
280 TF_LITE_ENSURE_EQ(context, bias->params.zero_point, 0);
281 }
282 } else if (input->type == kTfLiteInt16) {
283 TF_LITE_ENSURE(context, (bias->type == kTfLiteInt64) ||
284 (bias->type == kTfLiteInt32));
285 TF_LITE_ENSURE_EQ(context, bias->params.zero_point, 0);
286 } else {
287 TF_LITE_ENSURE_TYPES_EQ(context, bias->type, input->type);
288 }
289 TF_LITE_ENSURE_EQ(context, NumElements(bias),
290 SizeOfDimension(weights, 0));
291 }
292 }
293
294 if (input->type == kTfLiteInt16) {
295 TF_LITE_ENSURE_EQ(context, weights->type, kTfLiteInt8);
296 TF_LITE_ENSURE_EQ(context, input->params.zero_point, 0);
297 TF_LITE_ENSURE_EQ(context, output->params.zero_point, 0);
298 } else {
299 TF_LITE_ENSURE_TYPES_EQ(context, weights->type, input->type);
300 }
301 TF_LITE_ENSURE_TYPES_EQ(context, output->type, input->type);
302 // Ensure that weights and inputs have the same channel dimension.
303 // Note: TOCO will reorder weights in the following format: OHWI.
304 TF_LITE_ENSURE_EQ(context, SizeOfDimension(input, 3),
305 SizeOfDimension(weights, 3));
306
307 // Allocate col2Im, transposed_weights & scratch Tensor.
308 TF_LITE_ENSURE_STATUS(AllocateTemporaryTensorsIfRequired<kernel_type>(
309 context, input->type, weights->type, node));
310
311 OpData* user_data = reinterpret_cast<OpData*>(node->user_data);
312 TfLiteTensor* col2im = nullptr;
313 if (data->has_col2im) {
314 node->temporaries->data[data->col2im_index] = data->col2im_id;
315 TF_LITE_ENSURE_OK(
316 context,
317 GetTemporarySafe(context, node, user_data->col2im_index, &col2im));
318 }
319
320 if (!IsConstantTensor(output_shape)) {
321 // Defer resizing until Eval().
322 SetTensorToDynamic(output);
323 if (data->has_col2im) {
324 SetTensorToDynamic(col2im);
325 }
326 } else {
327 TF_LITE_ENSURE_STATUS(ResizeTensor(context, output_shape, output));
328 if (data->has_col2im) {
329 TF_LITE_ENSURE_STATUS(
330 ResizeCol2ImTensor(context, output_shape, weights, input, col2im));
331 }
332 }
333
334 if (data->weights_are_transposed) {
335 node->temporaries->data[data->transposed_weights_index] =
336 data->transposed_weights_id;
337 TfLiteTensor* transposed_weights;
338 TF_LITE_ENSURE_OK(
339 context,
340 GetTemporarySafe(context, node, user_data->transposed_weights_index,
341 &transposed_weights));
342 if (!IsConstantTensor(weights)) {
343 SetTensorToDynamic(transposed_weights);
344 } else {
345 ResizeAndTransposeWeights(context, weights, transposed_weights);
346 }
347 }
348
349 if (input->type == kTfLiteUInt8 || input->type == kTfLiteInt8 ||
350 input->type == kTfLiteInt16) {
351 node->temporaries->data[data->scratch_tensor_index] =
352 data->scratch_tensor_id;
353 TfLiteTensor* scratch_buffer;
354 TF_LITE_ENSURE_OK(
355 context, GetTemporarySafe(context, node, data->scratch_tensor_index,
356 &scratch_buffer));
357 if (input->type == kTfLiteInt16 && bias && bias->type == kTfLiteInt64) {
358 scratch_buffer->type = kTfLiteInt64;
359 } else {
360 scratch_buffer->type = kTfLiteInt32;
361 }
362
363 scratch_buffer->allocation_type = kTfLiteDynamic;
364 if (!IsConstantTensor(output_shape)) {
365 SetTensorToDynamic(scratch_buffer);
366 } else {
367 TF_LITE_ENSURE_STATUS(
368 ResizeTensor(context, output_shape, scratch_buffer));
369 }
370
371 TF_LITE_ENSURE_EQ(context, weights->quantization.type,
372 kTfLiteAffineQuantization);
373 const auto* affine_quantization =
374 reinterpret_cast<TfLiteAffineQuantization*>(
375 weights->quantization.params);
376 const int channels_out = weights->dims->data[0];
377 TF_LITE_ENSURE(context, affine_quantization);
378 TF_LITE_ENSURE(context, affine_quantization->scale);
379 TF_LITE_ENSURE(context, (affine_quantization->scale->size == 1 ||
380 affine_quantization->scale->size == channels_out));
381
382 data->per_channel_output_multiplier.resize(channels_out);
383 data->per_channel_output_shift.resize(channels_out);
384 TF_LITE_ENSURE_STATUS(tflite::PopulateConvolutionQuantizationParams(
385 context, input, weights, bias, output, kTfLiteActNone,
386 &data->output_multiplier, &data->output_shift,
387 &data->output_activation_min, &data->output_activation_max,
388 data->per_channel_output_multiplier.data(),
389 data->per_channel_output_shift.data(), channels_out));
390 }
391
392 return kTfLiteOk;
393}
394
395template <KernelType kernel_type>
396void EvalFloat(TfLiteContext* context, const TfLiteTransposeConvParams* params,
397 const OpData* data, const TfLiteTensor* input,
398 const TfLiteTensor* weights, const TfLiteTensor* bias,
399 const TfLiteTensor* transposed_weights, TfLiteTensor* col2im,
400 TfLiteTensor* output) {
401 tflite::ConvParams op_params;
402 op_params.padding_type = PaddingType::kSame;
403 op_params.padding_values.width = data->padding.width;
404 op_params.padding_values.height = data->padding.height;
405 op_params.padding_values.width_offset = data->padding.width_offset;
406 op_params.padding_values.height_offset = data->padding.height_offset;
407 op_params.stride_width = params->stride_width;
408 op_params.stride_height = params->stride_height;
409
410 switch (kernel_type) {
411 case kReference: {
412 reference_ops::TransposeConv(
413 op_params, GetTensorShape(input), GetTensorData<float>(input),
414 GetTensorShape(weights), GetTensorData<float>(weights),
415 GetTensorShape(bias), GetTensorData<float>(bias),
416 GetTensorShape(output), GetTensorData<float>(output),
417 GetTensorShape(col2im), GetTensorData<float>(col2im));
418 break;
419 }
420 case kGenericOptimized: {
421 optimized_ops::TransposeConvV2(
422 op_params, GetTensorShape(input), GetTensorData<float>(input),
423 GetTensorShape(transposed_weights),
424 GetTensorData<float>(transposed_weights), GetTensorShape(bias),
425 GetTensorData<float>(bias), GetTensorShape(output),
426 GetTensorData<float>(output), GetTensorShape(col2im),
427 GetTensorData<float>(col2im),
428 CpuBackendContext::GetFromContext(context));
429 break;
430 }
431 }
432}
433
434template <KernelType kernel_type>
435void EvalQuantized(TfLiteContext* context,
436 const TfLiteTransposeConvParams* params, OpData* data,
437 const TfLiteTensor* input, const TfLiteTensor* weights,
438 const TfLiteTensor* transposed_weights,
439 const TfLiteTensor* bias, TfLiteTensor* col2im,
440 TfLiteTensor* output, TfLiteTensor* scratch_buffer) {
441 int32_t input_offset = -input->params.zero_point;
442 int32_t filter_offset = -weights->params.zero_point;
443 int32_t output_offset = output->params.zero_point;
444
445 tflite::ConvParams op_params;
446 op_params.padding_type = PaddingType::kSame;
447 op_params.padding_values.width = data->padding.width;
448 op_params.padding_values.height = data->padding.height;
449 op_params.padding_values.width_offset = data->padding.width_offset;
450 op_params.padding_values.height_offset = data->padding.height_offset;
451 op_params.stride_width = params->stride_width;
452 op_params.stride_height = params->stride_height;
453 op_params.input_offset = input_offset;
454 op_params.output_offset = output_offset;
455 op_params.weights_offset = filter_offset;
456 op_params.output_multiplier = data->output_multiplier;
457 op_params.output_shift = -data->output_shift;
458 op_params.quantized_activation_min = data->output_activation_min;
459 op_params.quantized_activation_max = data->output_activation_max;
460
461 switch (kernel_type) {
462 case kReference: {
463 reference_ops::TransposeConv(
464 op_params, GetTensorShape(input), GetTensorData<uint8>(input),
465 GetTensorShape(weights), GetTensorData<uint8>(weights),
466 GetTensorShape(bias), GetTensorData<int32_t>(bias),
467 GetTensorShape(output), GetTensorData<uint8>(output),
468 GetTensorShape(col2im), GetTensorData<uint8>(col2im),
469 GetTensorData<int32_t>(scratch_buffer));
470 break;
471 }
472 case kGenericOptimized: {
473 optimized_ops::TransposeConvV2(
474 op_params, GetTensorShape(input), GetTensorData<uint8>(input),
475 GetTensorShape(transposed_weights),
476 GetTensorData<uint8>(transposed_weights), GetTensorShape(bias),
477 GetTensorData<int32>(bias), GetTensorShape(output),
478 GetTensorData<uint8>(output), GetTensorShape(col2im),
479 GetTensorData<int32>(col2im), GetTensorData<int32>(scratch_buffer),
480 CpuBackendContext::GetFromContext(context));
481 break;
482 }
483 }
484}
485
486template <KernelType kernel_type>
487void EvalQuantizedPerChannel(
488 TfLiteContext* context, const TfLiteTransposeConvParams* params,
489 OpData* data, const TfLiteTensor* input, const TfLiteTensor* weights,
490 const TfLiteTensor* transposed_weights, const TfLiteTensor* bias,
491 TfLiteTensor* col2im, TfLiteTensor* output, TfLiteTensor* scratch_buffer) {
492 tflite::ConvParams op_params;
493 op_params.padding_type = PaddingType::kSame;
494 op_params.padding_values.width = data->padding.width;
495 op_params.padding_values.height = data->padding.height;
496 op_params.padding_values.width_offset = data->padding.width_offset;
497 op_params.padding_values.height_offset = data->padding.height_offset;
498 op_params.stride_width = params->stride_width;
499 op_params.stride_height = params->stride_height;
500 // Need to flip the sign of input offset to add it directly to the quantized
501 // buffer.
502 op_params.input_offset = -input->params.zero_point;
503 op_params.output_offset = output->params.zero_point;
504 op_params.quantized_activation_min = data->output_activation_min;
505 op_params.quantized_activation_max = data->output_activation_max;
506
507 switch (kernel_type) {
508 case kReference: {
509 reference_integer_ops::TransposeConv(
510 op_params, data->per_channel_output_multiplier.data(),
511 data->per_channel_output_shift.data(), GetTensorShape(input),
512 GetTensorData<int8>(input), GetTensorShape(weights),
513 GetTensorData<int8>(weights), GetTensorShape(bias),
514 GetTensorData<int32>(bias), GetTensorShape(output),
515 GetTensorData<int8>(output), GetTensorShape(col2im),
516 GetTensorData<int8>(col2im), GetTensorData<int32_t>(scratch_buffer));
517 break;
518 }
519 case kGenericOptimized: {
520 optimized_integer_ops::TransposeConvV2(
521 op_params, data->per_channel_output_multiplier.data(),
522 data->per_channel_output_shift.data(), GetTensorShape(input),
523 GetTensorData<int8>(input), GetTensorShape(transposed_weights),
524 GetTensorData<int8>(transposed_weights), GetTensorShape(bias),
525 GetTensorData<int32>(bias), GetTensorShape(output),
526 GetTensorData<int8>(output), GetTensorShape(col2im),
527 GetTensorData<int32>(col2im), GetTensorData<int32>(scratch_buffer),
528 CpuBackendContext::GetFromContext(context));
529 break;
530 }
531 }
532}
533
534template <KernelType kernel_type>
535void EvalQuantizedPerChannel16x8(
536 TfLiteContext* context, const TfLiteTransposeConvParams* params,
537 OpData* data, const TfLiteTensor* input, const TfLiteTensor* weights,
538 const TfLiteTensor* transposed_weights, const TfLiteTensor* bias,
539 TfLiteTensor* col2im, TfLiteTensor* output, TfLiteTensor* scratch_buffer) {
540 tflite::ConvParams op_params;
541 op_params.padding_type = PaddingType::kSame;
542 op_params.padding_values.width = data->padding.width;
543 op_params.padding_values.height = data->padding.height;
544 op_params.padding_values.width_offset = data->padding.width_offset;
545 op_params.padding_values.height_offset = data->padding.height_offset;
546 op_params.stride_width = params->stride_width;
547 op_params.stride_height = params->stride_height;
548 // Need to flip the sign of input offset to add it directly to the quantized
549 // buffer.
550 op_params.input_offset = -input->params.zero_point;
551 op_params.output_offset = output->params.zero_point;
552 op_params.quantized_activation_min = data->output_activation_min;
553 op_params.quantized_activation_max = data->output_activation_max;
554
555 // To prevent 32bit accum overflow for 16x8 quantization, it enables the
556 // optimized path only when zero_point is 0.
557 bool has_non_zero_point = input->params.zero_point ||
558 weights->params.zero_point ||
559 output->params.zero_point;
560
561 // Fallback to reference kernel when bias_type is int64 as
562 // there is no optimized kernel for int64 bias yet.
563 if (bias && bias->type == kTfLiteInt64) {
564 reference_integer_ops::TransposeConv(
565 op_params, data->per_channel_output_multiplier.data(),
566 data->per_channel_output_shift.data(), GetTensorShape(input),
567 GetTensorData<int16>(input), GetTensorShape(weights),
568 GetTensorData<int8>(weights), GetTensorShape(bias),
569 GetTensorData<int64_t>(bias), GetTensorShape(output),
570 GetTensorData<int16>(output), GetTensorShape(col2im),
571 GetTensorData<int8>(col2im), GetTensorData<int64_t>(scratch_buffer));
572 } else if (kernel_type == kReference || has_non_zero_point) {
573 reference_integer_ops::TransposeConv(
574 op_params, data->per_channel_output_multiplier.data(),
575 data->per_channel_output_shift.data(), GetTensorShape(input),
576 GetTensorData<int16>(input), GetTensorShape(weights),
577 GetTensorData<int8>(weights), GetTensorShape(bias),
578 GetTensorData<int32_t>(bias), GetTensorShape(output),
579 GetTensorData<int16>(output), GetTensorShape(col2im),
580 GetTensorData<int8>(col2im), GetTensorData<int32_t>(scratch_buffer));
581 } else {
582 optimized_integer_ops::TransposeConvV2(
583 op_params, data->per_channel_output_multiplier.data(),
584 data->per_channel_output_shift.data(), GetTensorShape(input),
585 GetTensorData<int16>(input), GetTensorShape(transposed_weights),
586 GetTensorData<int8>(transposed_weights), GetTensorShape(bias),
587 GetTensorData<int32>(bias), GetTensorShape(output),
588 GetTensorData<int16>(output), GetTensorShape(col2im),
589 GetTensorData<int32>(col2im), GetTensorData<int32>(scratch_buffer),
590 CpuBackendContext::GetFromContext(context));
591 }
592}
593
594template <KernelType kernel_type>
595TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
596 // Retrieve tensors (All should be allocated by now)
597 const TfLiteTensor* output_shape;
598 TF_LITE_ENSURE_OK(
599 context, GetInputSafe(context, node, kOutputShapeTensor, &output_shape));
600 const TfLiteTensor* weights;
601 TF_LITE_ENSURE_OK(context,
602 GetInputSafe(context, node, kWeightsTensor, &weights));
603 const TfLiteTensor* input;
604 TF_LITE_ENSURE_OK(context,
605 GetInputSafe(context, node, kDataInputTensor, &input));
606 const TfLiteTensor* bias =
607 (NumInputs(node) == 4)
608 ? GetOptionalInputTensor(context, node, kBiasTensor)
609 : nullptr;
610 TfLiteTensor* output;
611 TF_LITE_ENSURE_OK(context,
612 GetOutputSafe(context, node, kOutputTensor, &output));
613 OpData* data = reinterpret_cast<OpData*>(node->user_data);
614 TfLiteTensor* col2im = data->has_col2im
615 ? GetTemporary(context, node, data->col2im_index)
616 : nullptr;
617 TfLiteTensor* transposed_weights =
618 data->weights_are_transposed
619 ? GetTemporary(context, node, data->transposed_weights_index)
620 : nullptr;
621 const auto* params =
622 reinterpret_cast<TfLiteTransposeConvParams*>(node->builtin_data);
623
624 // Prevent divisions by 0
625 TF_LITE_ENSURE(context, params->stride_height > 0);
626 TF_LITE_ENSURE(context, params->stride_width > 0);
627
628 // Resize any deferred dynamic tensors
629 if (IsDynamicTensor(output)) {
630 TF_LITE_ENSURE_OK(context, ResizeTensor(context, output_shape, output));
631 }
632 if (data->has_col2im && IsDynamicTensor(col2im)) {
633 TF_LITE_ENSURE_OK(context, ResizeCol2ImTensor(context, output_shape,
634 weights, input, col2im));
635 }
636
637 // Get height and width of the output image.
638 const int width = SizeOfDimension(output, 2);
639 const int height = SizeOfDimension(output, 1);
640 const int filter_width = SizeOfDimension(weights, 2);
641 const int filter_height = SizeOfDimension(weights, 1);
642
643 int unused_output_height, unused_output_width;
644 data->padding = ComputePaddingHeightWidth(
645 params->stride_height, params->stride_width, 1, 1, height, width,
646 filter_height, filter_width, params->padding, &unused_output_height,
647 &unused_output_width);
648
649 // Currently support float32, uint8, int8, int16.
650 switch (input->type) {
651 case kTfLiteFloat32: {
652 // Only for GenericOptimized path, we use transposed weights.
653 if (data->weights_are_transposed) {
654 if (!IsConstantTensor(weights)) {
655 ResizeAndTransposeWeights(context, weights, transposed_weights);
656 }
657 }
658 EvalFloat<kernel_type>(context, params, data, input, weights, bias,
659 transposed_weights, col2im, output);
660 break;
661 }
662 case kTfLiteUInt8: {
663 TfLiteTensor* scratch_buffer;
664 TF_LITE_ENSURE_OK(
665 context, GetTemporarySafe(context, node, data->scratch_tensor_index,
666 &scratch_buffer));
667 if (IsDynamicTensor(scratch_buffer)) {
668 TF_LITE_ENSURE_OK(context,
669 ResizeTensor(context, output_shape, scratch_buffer));
670 }
671 if (data->weights_are_transposed) {
672 if (!IsConstantTensor(weights)) {
673 ResizeAndTransposeWeights(context, weights, transposed_weights);
674 }
675 }
676 EvalQuantized<kernel_type>(context, params, data, input, weights,
677 transposed_weights, bias, col2im, output,
678 scratch_buffer);
679 break;
680 }
681 case kTfLiteInt8: {
682 TfLiteTensor* scratch_buffer;
683 TF_LITE_ENSURE_OK(
684 context, GetTemporarySafe(context, node, data->scratch_tensor_index,
685 &scratch_buffer));
686 if (IsDynamicTensor(scratch_buffer)) {
687 TF_LITE_ENSURE_OK(context,
688 ResizeTensor(context, output_shape, scratch_buffer));
689 }
690 if (data->weights_are_transposed && !IsConstantTensor(weights)) {
691 ResizeAndTransposeWeights(context, weights, transposed_weights);
692 }
693 EvalQuantizedPerChannel<kernel_type>(context, params, data, input,
694 weights, transposed_weights, bias,
695 col2im, output, scratch_buffer);
696 break;
697 }
698 case kTfLiteInt16: {
699 TfLiteTensor* scratch_buffer;
700 TF_LITE_ENSURE_OK(
701 context, GetTemporarySafe(context, node, data->scratch_tensor_index,
702 &scratch_buffer));
703 if (IsDynamicTensor(scratch_buffer)) {
704 TF_LITE_ENSURE_OK(context,
705 ResizeTensor(context, output_shape, scratch_buffer));
706 }
707 if (data->weights_are_transposed && !IsConstantTensor(weights)) {
708 ResizeAndTransposeWeights(context, weights, transposed_weights);
709 }
710 EvalQuantizedPerChannel16x8<kernel_type>(
711 context, params, data, input, weights, transposed_weights, bias,
712 col2im, output, scratch_buffer);
713 break;
714 }
715 default:
716 TF_LITE_KERNEL_LOG(context, "Type '%s' is not currently supported.",
717 TfLiteTypeGetName(input->type));
718 return kTfLiteError;
719 }
720 return kTfLiteOk;
721}
722
723} // namespace transpose_conv
724
725TfLiteRegistration* Register_TRANSPOSECONV_REF() {
726 static TfLiteRegistration r = {
727 transpose_conv::Init, transpose_conv::Free,
728 transpose_conv::Prepare<transpose_conv::kReference>,
729 transpose_conv::Eval<transpose_conv::kReference>};
730 return &r;
731}
732
733TfLiteRegistration* Register_TRANSPOSECONV_GENERIC_OPT() {
734 static TfLiteRegistration r = {
735 transpose_conv::Init, transpose_conv::Free,
736 transpose_conv::Prepare<transpose_conv::kGenericOptimized>,
737 transpose_conv::Eval<transpose_conv::kGenericOptimized>};
738 return &r;
739}
740
741TfLiteRegistration* Register_TRANSPOSE_CONV() {
742 return Register_TRANSPOSECONV_GENERIC_OPT();
743}
744
745} // namespace builtin
746} // namespace ops
747} // namespace tflite
748