1 | /* Copyright 2018 The TensorFlow Authors. All Rights Reserved. |
2 | |
3 | Licensed under the Apache License, Version 2.0 (the "License"); |
4 | you may not use this file except in compliance with the License. |
5 | You may obtain a copy of the License at |
6 | |
7 | http://www.apache.org/licenses/LICENSE-2.0 |
8 | |
9 | Unless required by applicable law or agreed to in writing, software |
10 | distributed under the License is distributed on an "AS IS" BASIS, |
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | See the License for the specific language governing permissions and |
13 | limitations under the License. |
14 | ==============================================================================*/ |
15 | |
16 | #include <stddef.h> |
17 | #include <stdint.h> |
18 | |
19 | #include <vector> |
20 | |
21 | #include "tensorflow/lite/c/builtin_op_data.h" |
22 | #include "tensorflow/lite/c/common.h" |
23 | #include "tensorflow/lite/kernels/cpu_backend_context.h" |
24 | #include "tensorflow/lite/kernels/internal/compatibility.h" |
25 | // NOLINTNEXTLINE - This header file shouldn't go to the top. |
26 | #include "tensorflow/lite/kernels/internal/optimized/integer_ops/transpose_conv.h" |
27 | #include "tensorflow/lite/kernels/internal/optimized/optimized_ops.h" |
28 | // NOLINTNEXTLINE - This header file shouldn't go to the top. |
29 | #include "tensorflow/lite/kernels/internal/reference/integer_ops/transpose_conv.h" |
30 | #include "tensorflow/lite/kernels/internal/reference/reference_ops.h" |
31 | #include "tensorflow/lite/kernels/internal/tensor.h" |
32 | #include "tensorflow/lite/kernels/internal/tensor_ctypes.h" |
33 | #include "tensorflow/lite/kernels/internal/types.h" |
34 | #include "tensorflow/lite/kernels/kernel_util.h" |
35 | #include "tensorflow/lite/kernels/padding.h" |
36 | |
37 | namespace tflite { |
38 | namespace ops { |
39 | namespace builtin { |
40 | namespace transpose_conv { |
41 | |
42 | // This file has 2 implementation of TransposeConv. |
43 | enum KernelType { |
44 | kReference, |
45 | kGenericOptimized, // Neon-free |
46 | }; |
47 | |
48 | constexpr int kOutputShapeTensor = 0; |
49 | constexpr int kWeightsTensor = 1; |
50 | constexpr int kDataInputTensor = 2; |
51 | constexpr int kBiasTensor = 3; |
52 | constexpr int kOutputTensor = 0; |
53 | |
54 | const int kTensorNotAllocated = -1; |
55 | |
56 | struct OpData { |
57 | // IDs are the arbitrary identifiers used by TF Lite to identify and access |
58 | // memory buffers. |
59 | int col2im_id = kTensorNotAllocated; |
60 | int transposed_weights_id = kTensorNotAllocated; |
61 | int scratch_tensor_id = kTensorNotAllocated; |
62 | |
63 | // col2im is the temporary tensor allocated and used in optimized path for |
64 | // storing col2im data:gemm result for input_matrix x filter_matrix. |
65 | int32_t col2im_index; |
66 | |
67 | // TfLiteConverter will transpose weights from HWOI to OHWI order. |
68 | // In optimized path, we will transpose them back to HWOI, this temporary |
69 | // tensor is allocated for storing transposed weights. |
70 | int32_t transposed_weights_index; |
71 | |
72 | // Scratch tensor is used in the quantized path for storing accumulation |
73 | // results. |
74 | int32_t scratch_tensor_index; |
75 | |
76 | TfLitePaddingValues padding; |
77 | // The scaling factor from input to output (aka the 'real multiplier') can |
78 | // be represented as a fixed point multiplier plus a left shift. |
79 | int32_t output_multiplier; |
80 | int output_shift; |
81 | |
82 | // Per channel output multiplier and shift. |
83 | std::vector<int32_t> per_channel_output_multiplier; |
84 | std::vector<int32_t> per_channel_output_shift; |
85 | |
86 | // The range of the fused activation layer. For example for kNone and |
87 | // uint8_t these would be 0 and 255. |
88 | int32_t output_activation_min; |
89 | int32_t output_activation_max; |
90 | |
91 | bool has_col2im = false; |
92 | bool weights_are_transposed = false; |
93 | }; |
94 | |
95 | void* Init(TfLiteContext* context, const char* buffer, size_t length) { |
96 | return new OpData; |
97 | } |
98 | |
99 | void Free(TfLiteContext* context, void* buffer) { |
100 | delete reinterpret_cast<OpData*>(buffer); |
101 | } |
102 | |
103 | TfLiteStatus ResizeTensor(TfLiteContext* context, |
104 | const TfLiteTensor* shape_tensor, |
105 | TfLiteTensor* tensor_to_resize) { |
106 | // Currently only support int32 for output shape. |
107 | if (shape_tensor->type != kTfLiteInt32) { |
108 | TF_LITE_KERNEL_LOG(context, "Output shape is %s, not int32." , |
109 | TfLiteTypeGetName(shape_tensor->type)); |
110 | return kTfLiteError; |
111 | } |
112 | |
113 | TfLiteIntArray* shape = TfLiteIntArrayCreate(NumElements(shape_tensor)); |
114 | for (int i = 0; i < shape->size; ++i) { |
115 | shape->data[i] = GetTensorData<int32_t>(shape_tensor)[i]; |
116 | } |
117 | |
118 | return context->ResizeTensor(context, tensor_to_resize, shape); |
119 | } |
120 | |
121 | // Allocate temporary tensors if necessary. |
122 | template <KernelType kernel_type> |
123 | static TfLiteStatus AllocateTemporaryTensorsIfRequired(TfLiteContext* context, |
124 | TfLiteType input_type, |
125 | TfLiteType weights_type, |
126 | TfLiteNode* node) { |
127 | OpData* data = reinterpret_cast<OpData*>(node->user_data); |
128 | int temporaries_count = 0; |
129 | |
130 | // Allocate col2im tensor. Currently it's only used for optimized kernels. |
131 | if (kernel_type == kGenericOptimized) { |
132 | if (data->col2im_id == kTensorNotAllocated) { |
133 | context->AddTensors(context, 1, &data->col2im_id); |
134 | } |
135 | data->col2im_index = temporaries_count; |
136 | data->has_col2im = true; |
137 | ++temporaries_count; |
138 | } |
139 | |
140 | // Allocate transposed_weights tensor. Currently it's only used for optimized |
141 | // float kernels. |
142 | if (kernel_type == kGenericOptimized) { |
143 | if (data->transposed_weights_id == kTensorNotAllocated) { |
144 | context->AddTensors(context, 1, &data->transposed_weights_id); |
145 | } |
146 | data->transposed_weights_index = temporaries_count; |
147 | data->weights_are_transposed = true; |
148 | ++temporaries_count; |
149 | } |
150 | |
151 | // Allocate scratch buffer tensor |
152 | if (input_type == kTfLiteUInt8 || input_type == kTfLiteInt8 || |
153 | input_type == kTfLiteInt16) { |
154 | if (data->scratch_tensor_id == kTensorNotAllocated) { |
155 | context->AddTensors(context, 1, &data->scratch_tensor_id); |
156 | } |
157 | data->scratch_tensor_index = temporaries_count; |
158 | ++temporaries_count; |
159 | } |
160 | |
161 | TfLiteIntArrayFree(node->temporaries); |
162 | node->temporaries = TfLiteIntArrayCreate(temporaries_count); |
163 | |
164 | return kTfLiteOk; |
165 | } |
166 | |
167 | TfLiteStatus ResizeCol2ImTensor(TfLiteContext* context, |
168 | const TfLiteTensor* output_shape, |
169 | const TfLiteTensor* weights, |
170 | const TfLiteTensor* input, |
171 | TfLiteTensor* col2im) { |
172 | if (output_shape->type != kTfLiteInt32) { |
173 | TF_LITE_KERNEL_LOG(context, "col2im shape is %s, not int32." , |
174 | TfLiteTypeGetName(output_shape->type)); |
175 | return kTfLiteError; |
176 | } |
177 | TF_LITE_ENSURE_EQ(context, NumElements(output_shape), 4); |
178 | TfLiteIntArray* col2im_shape_array = TfLiteIntArrayCreate(2); |
179 | const RuntimeShape& input_shape = GetTensorShape(input); |
180 | const RuntimeShape& weights_shape = GetTensorShape(weights); |
181 | col2im_shape_array->data[0] = input_shape.Dims(1) * input_shape.Dims(2); |
182 | col2im_shape_array->data[1] = |
183 | weights_shape.Dims(0) * weights_shape.Dims(1) * weights_shape.Dims(2); |
184 | |
185 | col2im->type = input->type == kTfLiteFloat32 ? kTfLiteFloat32 : kTfLiteInt32; |
186 | col2im->allocation_type = kTfLiteDynamic; |
187 | return context->ResizeTensor(context, col2im, col2im_shape_array); |
188 | } |
189 | |
190 | TfLiteStatus ResizeAndTransposeWeights(TfLiteContext* context, |
191 | const TfLiteTensor* weights, |
192 | TfLiteTensor* transposed_weights) { |
193 | TfLiteIntArray* transposed_weights_shape_array = TfLiteIntArrayCreate(4); |
194 | const RuntimeShape& input_shape = GetTensorShape(weights); |
195 | transposed_weights_shape_array->data[0] = input_shape.Dims(1); |
196 | transposed_weights_shape_array->data[1] = input_shape.Dims(2); |
197 | transposed_weights_shape_array->data[2] = input_shape.Dims(0); |
198 | transposed_weights_shape_array->data[3] = input_shape.Dims(3); |
199 | |
200 | transposed_weights->type = weights->type; |
201 | transposed_weights->allocation_type = kTfLiteDynamic; |
202 | TF_LITE_ENSURE_STATUS(context->ResizeTensor(context, transposed_weights, |
203 | transposed_weights_shape_array)); |
204 | |
205 | // Transpose the weights from OHWI order to HWOI order. |
206 | TransposeParams transpose_params; |
207 | transpose_params.perm_count = 4; |
208 | transpose_params.perm[0] = 1; |
209 | transpose_params.perm[1] = 2; |
210 | transpose_params.perm[2] = 0; |
211 | transpose_params.perm[3] = 3; |
212 | |
213 | if (weights->type == kTfLiteFloat32) { |
214 | optimized_ops::Transpose(transpose_params, input_shape, |
215 | GetTensorData<float>(weights), |
216 | GetTensorShape(transposed_weights), |
217 | GetTensorData<float>(transposed_weights)); |
218 | } else if (weights->type == kTfLiteUInt8) { |
219 | optimized_ops::Transpose(transpose_params, input_shape, |
220 | GetTensorData<uint8>(weights), |
221 | GetTensorShape(transposed_weights), |
222 | GetTensorData<uint8>(transposed_weights)); |
223 | } else if (weights->type == kTfLiteInt8) { |
224 | // int16 transpose_conv also with int8 weights |
225 | optimized_ops::Transpose(transpose_params, input_shape, |
226 | GetTensorData<int8>(weights), |
227 | GetTensorShape(transposed_weights), |
228 | GetTensorData<int8>(transposed_weights)); |
229 | } else { |
230 | TF_LITE_KERNEL_LOG( |
231 | context, |
232 | "Only float32, uint8, int8, int16 is supported currently, got %s." , |
233 | TfLiteTypeGetName(weights->type)); |
234 | return kTfLiteError; |
235 | } |
236 | |
237 | return kTfLiteOk; |
238 | } |
239 | |
240 | template <KernelType kernel_type> |
241 | TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { |
242 | OpData* data = reinterpret_cast<OpData*>(node->user_data); |
243 | |
244 | bool has_bias = NumInputs(node) == 4; |
245 | |
246 | // Sanity checks on op |
247 | TF_LITE_ENSURE(context, has_bias || NumInputs(node) == 3); |
248 | TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1); |
249 | |
250 | // Retrieve tensors |
251 | const TfLiteTensor* output_shape; |
252 | TF_LITE_ENSURE_OK( |
253 | context, GetInputSafe(context, node, kOutputShapeTensor, &output_shape)); |
254 | const TfLiteTensor* weights; |
255 | TF_LITE_ENSURE_OK(context, |
256 | GetInputSafe(context, node, kWeightsTensor, &weights)); |
257 | const TfLiteTensor* input; |
258 | TF_LITE_ENSURE_OK(context, |
259 | GetInputSafe(context, node, kDataInputTensor, &input)); |
260 | const TfLiteTensor* bias = nullptr; |
261 | |
262 | TfLiteTensor* output; |
263 | TF_LITE_ENSURE_OK(context, |
264 | GetOutputSafe(context, node, kOutputTensor, &output)); |
265 | |
266 | // Tensor sanity checks |
267 | TF_LITE_ENSURE_EQ(context, NumDimensions(output_shape), 1); |
268 | TF_LITE_ENSURE_EQ(context, NumDimensions(input), 4); |
269 | TF_LITE_ENSURE_EQ(context, NumDimensions(weights), 4); |
270 | TF_LITE_ENSURE(context, |
271 | input->type == kTfLiteFloat32 || input->type == kTfLiteUInt8 || |
272 | input->type == kTfLiteInt8 || input->type == kTfLiteInt16); |
273 | |
274 | if (has_bias) { |
275 | bias = GetOptionalInputTensor(context, node, kBiasTensor); |
276 | if (bias) { |
277 | if (input->type == kTfLiteUInt8 || input->type == kTfLiteInt8) { |
278 | TF_LITE_ENSURE_TYPES_EQ(context, bias->type, kTfLiteInt32); |
279 | if (input->type == kTfLiteInt8) { |
280 | TF_LITE_ENSURE_EQ(context, bias->params.zero_point, 0); |
281 | } |
282 | } else if (input->type == kTfLiteInt16) { |
283 | TF_LITE_ENSURE(context, (bias->type == kTfLiteInt64) || |
284 | (bias->type == kTfLiteInt32)); |
285 | TF_LITE_ENSURE_EQ(context, bias->params.zero_point, 0); |
286 | } else { |
287 | TF_LITE_ENSURE_TYPES_EQ(context, bias->type, input->type); |
288 | } |
289 | TF_LITE_ENSURE_EQ(context, NumElements(bias), |
290 | SizeOfDimension(weights, 0)); |
291 | } |
292 | } |
293 | |
294 | if (input->type == kTfLiteInt16) { |
295 | TF_LITE_ENSURE_EQ(context, weights->type, kTfLiteInt8); |
296 | TF_LITE_ENSURE_EQ(context, input->params.zero_point, 0); |
297 | TF_LITE_ENSURE_EQ(context, output->params.zero_point, 0); |
298 | } else { |
299 | TF_LITE_ENSURE_TYPES_EQ(context, weights->type, input->type); |
300 | } |
301 | TF_LITE_ENSURE_TYPES_EQ(context, output->type, input->type); |
302 | // Ensure that weights and inputs have the same channel dimension. |
303 | // Note: TOCO will reorder weights in the following format: OHWI. |
304 | TF_LITE_ENSURE_EQ(context, SizeOfDimension(input, 3), |
305 | SizeOfDimension(weights, 3)); |
306 | |
307 | // Allocate col2Im, transposed_weights & scratch Tensor. |
308 | TF_LITE_ENSURE_STATUS(AllocateTemporaryTensorsIfRequired<kernel_type>( |
309 | context, input->type, weights->type, node)); |
310 | |
311 | OpData* user_data = reinterpret_cast<OpData*>(node->user_data); |
312 | TfLiteTensor* col2im = nullptr; |
313 | if (data->has_col2im) { |
314 | node->temporaries->data[data->col2im_index] = data->col2im_id; |
315 | TF_LITE_ENSURE_OK( |
316 | context, |
317 | GetTemporarySafe(context, node, user_data->col2im_index, &col2im)); |
318 | } |
319 | |
320 | if (!IsConstantTensor(output_shape)) { |
321 | // Defer resizing until Eval(). |
322 | SetTensorToDynamic(output); |
323 | if (data->has_col2im) { |
324 | SetTensorToDynamic(col2im); |
325 | } |
326 | } else { |
327 | TF_LITE_ENSURE_STATUS(ResizeTensor(context, output_shape, output)); |
328 | if (data->has_col2im) { |
329 | TF_LITE_ENSURE_STATUS( |
330 | ResizeCol2ImTensor(context, output_shape, weights, input, col2im)); |
331 | } |
332 | } |
333 | |
334 | if (data->weights_are_transposed) { |
335 | node->temporaries->data[data->transposed_weights_index] = |
336 | data->transposed_weights_id; |
337 | TfLiteTensor* transposed_weights; |
338 | TF_LITE_ENSURE_OK( |
339 | context, |
340 | GetTemporarySafe(context, node, user_data->transposed_weights_index, |
341 | &transposed_weights)); |
342 | if (!IsConstantTensor(weights)) { |
343 | SetTensorToDynamic(transposed_weights); |
344 | } else { |
345 | ResizeAndTransposeWeights(context, weights, transposed_weights); |
346 | } |
347 | } |
348 | |
349 | if (input->type == kTfLiteUInt8 || input->type == kTfLiteInt8 || |
350 | input->type == kTfLiteInt16) { |
351 | node->temporaries->data[data->scratch_tensor_index] = |
352 | data->scratch_tensor_id; |
353 | TfLiteTensor* scratch_buffer; |
354 | TF_LITE_ENSURE_OK( |
355 | context, GetTemporarySafe(context, node, data->scratch_tensor_index, |
356 | &scratch_buffer)); |
357 | if (input->type == kTfLiteInt16 && bias && bias->type == kTfLiteInt64) { |
358 | scratch_buffer->type = kTfLiteInt64; |
359 | } else { |
360 | scratch_buffer->type = kTfLiteInt32; |
361 | } |
362 | |
363 | scratch_buffer->allocation_type = kTfLiteDynamic; |
364 | if (!IsConstantTensor(output_shape)) { |
365 | SetTensorToDynamic(scratch_buffer); |
366 | } else { |
367 | TF_LITE_ENSURE_STATUS( |
368 | ResizeTensor(context, output_shape, scratch_buffer)); |
369 | } |
370 | |
371 | TF_LITE_ENSURE_EQ(context, weights->quantization.type, |
372 | kTfLiteAffineQuantization); |
373 | const auto* affine_quantization = |
374 | reinterpret_cast<TfLiteAffineQuantization*>( |
375 | weights->quantization.params); |
376 | const int channels_out = weights->dims->data[0]; |
377 | TF_LITE_ENSURE(context, affine_quantization); |
378 | TF_LITE_ENSURE(context, affine_quantization->scale); |
379 | TF_LITE_ENSURE(context, (affine_quantization->scale->size == 1 || |
380 | affine_quantization->scale->size == channels_out)); |
381 | |
382 | data->per_channel_output_multiplier.resize(channels_out); |
383 | data->per_channel_output_shift.resize(channels_out); |
384 | TF_LITE_ENSURE_STATUS(tflite::PopulateConvolutionQuantizationParams( |
385 | context, input, weights, bias, output, kTfLiteActNone, |
386 | &data->output_multiplier, &data->output_shift, |
387 | &data->output_activation_min, &data->output_activation_max, |
388 | data->per_channel_output_multiplier.data(), |
389 | data->per_channel_output_shift.data(), channels_out)); |
390 | } |
391 | |
392 | return kTfLiteOk; |
393 | } |
394 | |
395 | template <KernelType kernel_type> |
396 | void EvalFloat(TfLiteContext* context, const TfLiteTransposeConvParams* params, |
397 | const OpData* data, const TfLiteTensor* input, |
398 | const TfLiteTensor* weights, const TfLiteTensor* bias, |
399 | const TfLiteTensor* transposed_weights, TfLiteTensor* col2im, |
400 | TfLiteTensor* output) { |
401 | tflite::ConvParams op_params; |
402 | op_params.padding_type = PaddingType::kSame; |
403 | op_params.padding_values.width = data->padding.width; |
404 | op_params.padding_values.height = data->padding.height; |
405 | op_params.padding_values.width_offset = data->padding.width_offset; |
406 | op_params.padding_values.height_offset = data->padding.height_offset; |
407 | op_params.stride_width = params->stride_width; |
408 | op_params.stride_height = params->stride_height; |
409 | |
410 | switch (kernel_type) { |
411 | case kReference: { |
412 | reference_ops::TransposeConv( |
413 | op_params, GetTensorShape(input), GetTensorData<float>(input), |
414 | GetTensorShape(weights), GetTensorData<float>(weights), |
415 | GetTensorShape(bias), GetTensorData<float>(bias), |
416 | GetTensorShape(output), GetTensorData<float>(output), |
417 | GetTensorShape(col2im), GetTensorData<float>(col2im)); |
418 | break; |
419 | } |
420 | case kGenericOptimized: { |
421 | optimized_ops::TransposeConvV2( |
422 | op_params, GetTensorShape(input), GetTensorData<float>(input), |
423 | GetTensorShape(transposed_weights), |
424 | GetTensorData<float>(transposed_weights), GetTensorShape(bias), |
425 | GetTensorData<float>(bias), GetTensorShape(output), |
426 | GetTensorData<float>(output), GetTensorShape(col2im), |
427 | GetTensorData<float>(col2im), |
428 | CpuBackendContext::GetFromContext(context)); |
429 | break; |
430 | } |
431 | } |
432 | } |
433 | |
434 | template <KernelType kernel_type> |
435 | void EvalQuantized(TfLiteContext* context, |
436 | const TfLiteTransposeConvParams* params, OpData* data, |
437 | const TfLiteTensor* input, const TfLiteTensor* weights, |
438 | const TfLiteTensor* transposed_weights, |
439 | const TfLiteTensor* bias, TfLiteTensor* col2im, |
440 | TfLiteTensor* output, TfLiteTensor* scratch_buffer) { |
441 | int32_t input_offset = -input->params.zero_point; |
442 | int32_t filter_offset = -weights->params.zero_point; |
443 | int32_t output_offset = output->params.zero_point; |
444 | |
445 | tflite::ConvParams op_params; |
446 | op_params.padding_type = PaddingType::kSame; |
447 | op_params.padding_values.width = data->padding.width; |
448 | op_params.padding_values.height = data->padding.height; |
449 | op_params.padding_values.width_offset = data->padding.width_offset; |
450 | op_params.padding_values.height_offset = data->padding.height_offset; |
451 | op_params.stride_width = params->stride_width; |
452 | op_params.stride_height = params->stride_height; |
453 | op_params.input_offset = input_offset; |
454 | op_params.output_offset = output_offset; |
455 | op_params.weights_offset = filter_offset; |
456 | op_params.output_multiplier = data->output_multiplier; |
457 | op_params.output_shift = -data->output_shift; |
458 | op_params.quantized_activation_min = data->output_activation_min; |
459 | op_params.quantized_activation_max = data->output_activation_max; |
460 | |
461 | switch (kernel_type) { |
462 | case kReference: { |
463 | reference_ops::TransposeConv( |
464 | op_params, GetTensorShape(input), GetTensorData<uint8>(input), |
465 | GetTensorShape(weights), GetTensorData<uint8>(weights), |
466 | GetTensorShape(bias), GetTensorData<int32_t>(bias), |
467 | GetTensorShape(output), GetTensorData<uint8>(output), |
468 | GetTensorShape(col2im), GetTensorData<uint8>(col2im), |
469 | GetTensorData<int32_t>(scratch_buffer)); |
470 | break; |
471 | } |
472 | case kGenericOptimized: { |
473 | optimized_ops::TransposeConvV2( |
474 | op_params, GetTensorShape(input), GetTensorData<uint8>(input), |
475 | GetTensorShape(transposed_weights), |
476 | GetTensorData<uint8>(transposed_weights), GetTensorShape(bias), |
477 | GetTensorData<int32>(bias), GetTensorShape(output), |
478 | GetTensorData<uint8>(output), GetTensorShape(col2im), |
479 | GetTensorData<int32>(col2im), GetTensorData<int32>(scratch_buffer), |
480 | CpuBackendContext::GetFromContext(context)); |
481 | break; |
482 | } |
483 | } |
484 | } |
485 | |
486 | template <KernelType kernel_type> |
487 | void EvalQuantizedPerChannel( |
488 | TfLiteContext* context, const TfLiteTransposeConvParams* params, |
489 | OpData* data, const TfLiteTensor* input, const TfLiteTensor* weights, |
490 | const TfLiteTensor* transposed_weights, const TfLiteTensor* bias, |
491 | TfLiteTensor* col2im, TfLiteTensor* output, TfLiteTensor* scratch_buffer) { |
492 | tflite::ConvParams op_params; |
493 | op_params.padding_type = PaddingType::kSame; |
494 | op_params.padding_values.width = data->padding.width; |
495 | op_params.padding_values.height = data->padding.height; |
496 | op_params.padding_values.width_offset = data->padding.width_offset; |
497 | op_params.padding_values.height_offset = data->padding.height_offset; |
498 | op_params.stride_width = params->stride_width; |
499 | op_params.stride_height = params->stride_height; |
500 | // Need to flip the sign of input offset to add it directly to the quantized |
501 | // buffer. |
502 | op_params.input_offset = -input->params.zero_point; |
503 | op_params.output_offset = output->params.zero_point; |
504 | op_params.quantized_activation_min = data->output_activation_min; |
505 | op_params.quantized_activation_max = data->output_activation_max; |
506 | |
507 | switch (kernel_type) { |
508 | case kReference: { |
509 | reference_integer_ops::TransposeConv( |
510 | op_params, data->per_channel_output_multiplier.data(), |
511 | data->per_channel_output_shift.data(), GetTensorShape(input), |
512 | GetTensorData<int8>(input), GetTensorShape(weights), |
513 | GetTensorData<int8>(weights), GetTensorShape(bias), |
514 | GetTensorData<int32>(bias), GetTensorShape(output), |
515 | GetTensorData<int8>(output), GetTensorShape(col2im), |
516 | GetTensorData<int8>(col2im), GetTensorData<int32_t>(scratch_buffer)); |
517 | break; |
518 | } |
519 | case kGenericOptimized: { |
520 | optimized_integer_ops::TransposeConvV2( |
521 | op_params, data->per_channel_output_multiplier.data(), |
522 | data->per_channel_output_shift.data(), GetTensorShape(input), |
523 | GetTensorData<int8>(input), GetTensorShape(transposed_weights), |
524 | GetTensorData<int8>(transposed_weights), GetTensorShape(bias), |
525 | GetTensorData<int32>(bias), GetTensorShape(output), |
526 | GetTensorData<int8>(output), GetTensorShape(col2im), |
527 | GetTensorData<int32>(col2im), GetTensorData<int32>(scratch_buffer), |
528 | CpuBackendContext::GetFromContext(context)); |
529 | break; |
530 | } |
531 | } |
532 | } |
533 | |
534 | template <KernelType kernel_type> |
535 | void EvalQuantizedPerChannel16x8( |
536 | TfLiteContext* context, const TfLiteTransposeConvParams* params, |
537 | OpData* data, const TfLiteTensor* input, const TfLiteTensor* weights, |
538 | const TfLiteTensor* transposed_weights, const TfLiteTensor* bias, |
539 | TfLiteTensor* col2im, TfLiteTensor* output, TfLiteTensor* scratch_buffer) { |
540 | tflite::ConvParams op_params; |
541 | op_params.padding_type = PaddingType::kSame; |
542 | op_params.padding_values.width = data->padding.width; |
543 | op_params.padding_values.height = data->padding.height; |
544 | op_params.padding_values.width_offset = data->padding.width_offset; |
545 | op_params.padding_values.height_offset = data->padding.height_offset; |
546 | op_params.stride_width = params->stride_width; |
547 | op_params.stride_height = params->stride_height; |
548 | // Need to flip the sign of input offset to add it directly to the quantized |
549 | // buffer. |
550 | op_params.input_offset = -input->params.zero_point; |
551 | op_params.output_offset = output->params.zero_point; |
552 | op_params.quantized_activation_min = data->output_activation_min; |
553 | op_params.quantized_activation_max = data->output_activation_max; |
554 | |
555 | // To prevent 32bit accum overflow for 16x8 quantization, it enables the |
556 | // optimized path only when zero_point is 0. |
557 | bool has_non_zero_point = input->params.zero_point || |
558 | weights->params.zero_point || |
559 | output->params.zero_point; |
560 | |
561 | // Fallback to reference kernel when bias_type is int64 as |
562 | // there is no optimized kernel for int64 bias yet. |
563 | if (bias && bias->type == kTfLiteInt64) { |
564 | reference_integer_ops::TransposeConv( |
565 | op_params, data->per_channel_output_multiplier.data(), |
566 | data->per_channel_output_shift.data(), GetTensorShape(input), |
567 | GetTensorData<int16>(input), GetTensorShape(weights), |
568 | GetTensorData<int8>(weights), GetTensorShape(bias), |
569 | GetTensorData<int64_t>(bias), GetTensorShape(output), |
570 | GetTensorData<int16>(output), GetTensorShape(col2im), |
571 | GetTensorData<int8>(col2im), GetTensorData<int64_t>(scratch_buffer)); |
572 | } else if (kernel_type == kReference || has_non_zero_point) { |
573 | reference_integer_ops::TransposeConv( |
574 | op_params, data->per_channel_output_multiplier.data(), |
575 | data->per_channel_output_shift.data(), GetTensorShape(input), |
576 | GetTensorData<int16>(input), GetTensorShape(weights), |
577 | GetTensorData<int8>(weights), GetTensorShape(bias), |
578 | GetTensorData<int32_t>(bias), GetTensorShape(output), |
579 | GetTensorData<int16>(output), GetTensorShape(col2im), |
580 | GetTensorData<int8>(col2im), GetTensorData<int32_t>(scratch_buffer)); |
581 | } else { |
582 | optimized_integer_ops::TransposeConvV2( |
583 | op_params, data->per_channel_output_multiplier.data(), |
584 | data->per_channel_output_shift.data(), GetTensorShape(input), |
585 | GetTensorData<int16>(input), GetTensorShape(transposed_weights), |
586 | GetTensorData<int8>(transposed_weights), GetTensorShape(bias), |
587 | GetTensorData<int32>(bias), GetTensorShape(output), |
588 | GetTensorData<int16>(output), GetTensorShape(col2im), |
589 | GetTensorData<int32>(col2im), GetTensorData<int32>(scratch_buffer), |
590 | CpuBackendContext::GetFromContext(context)); |
591 | } |
592 | } |
593 | |
594 | template <KernelType kernel_type> |
595 | TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { |
596 | // Retrieve tensors (All should be allocated by now) |
597 | const TfLiteTensor* output_shape; |
598 | TF_LITE_ENSURE_OK( |
599 | context, GetInputSafe(context, node, kOutputShapeTensor, &output_shape)); |
600 | const TfLiteTensor* weights; |
601 | TF_LITE_ENSURE_OK(context, |
602 | GetInputSafe(context, node, kWeightsTensor, &weights)); |
603 | const TfLiteTensor* input; |
604 | TF_LITE_ENSURE_OK(context, |
605 | GetInputSafe(context, node, kDataInputTensor, &input)); |
606 | const TfLiteTensor* bias = |
607 | (NumInputs(node) == 4) |
608 | ? GetOptionalInputTensor(context, node, kBiasTensor) |
609 | : nullptr; |
610 | TfLiteTensor* output; |
611 | TF_LITE_ENSURE_OK(context, |
612 | GetOutputSafe(context, node, kOutputTensor, &output)); |
613 | OpData* data = reinterpret_cast<OpData*>(node->user_data); |
614 | TfLiteTensor* col2im = data->has_col2im |
615 | ? GetTemporary(context, node, data->col2im_index) |
616 | : nullptr; |
617 | TfLiteTensor* transposed_weights = |
618 | data->weights_are_transposed |
619 | ? GetTemporary(context, node, data->transposed_weights_index) |
620 | : nullptr; |
621 | const auto* params = |
622 | reinterpret_cast<TfLiteTransposeConvParams*>(node->builtin_data); |
623 | |
624 | // Prevent divisions by 0 |
625 | TF_LITE_ENSURE(context, params->stride_height > 0); |
626 | TF_LITE_ENSURE(context, params->stride_width > 0); |
627 | |
628 | // Resize any deferred dynamic tensors |
629 | if (IsDynamicTensor(output)) { |
630 | TF_LITE_ENSURE_OK(context, ResizeTensor(context, output_shape, output)); |
631 | } |
632 | if (data->has_col2im && IsDynamicTensor(col2im)) { |
633 | TF_LITE_ENSURE_OK(context, ResizeCol2ImTensor(context, output_shape, |
634 | weights, input, col2im)); |
635 | } |
636 | |
637 | // Get height and width of the output image. |
638 | const int width = SizeOfDimension(output, 2); |
639 | const int height = SizeOfDimension(output, 1); |
640 | const int filter_width = SizeOfDimension(weights, 2); |
641 | const int filter_height = SizeOfDimension(weights, 1); |
642 | |
643 | int unused_output_height, unused_output_width; |
644 | data->padding = ComputePaddingHeightWidth( |
645 | params->stride_height, params->stride_width, 1, 1, height, width, |
646 | filter_height, filter_width, params->padding, &unused_output_height, |
647 | &unused_output_width); |
648 | |
649 | // Currently support float32, uint8, int8, int16. |
650 | switch (input->type) { |
651 | case kTfLiteFloat32: { |
652 | // Only for GenericOptimized path, we use transposed weights. |
653 | if (data->weights_are_transposed) { |
654 | if (!IsConstantTensor(weights)) { |
655 | ResizeAndTransposeWeights(context, weights, transposed_weights); |
656 | } |
657 | } |
658 | EvalFloat<kernel_type>(context, params, data, input, weights, bias, |
659 | transposed_weights, col2im, output); |
660 | break; |
661 | } |
662 | case kTfLiteUInt8: { |
663 | TfLiteTensor* scratch_buffer; |
664 | TF_LITE_ENSURE_OK( |
665 | context, GetTemporarySafe(context, node, data->scratch_tensor_index, |
666 | &scratch_buffer)); |
667 | if (IsDynamicTensor(scratch_buffer)) { |
668 | TF_LITE_ENSURE_OK(context, |
669 | ResizeTensor(context, output_shape, scratch_buffer)); |
670 | } |
671 | if (data->weights_are_transposed) { |
672 | if (!IsConstantTensor(weights)) { |
673 | ResizeAndTransposeWeights(context, weights, transposed_weights); |
674 | } |
675 | } |
676 | EvalQuantized<kernel_type>(context, params, data, input, weights, |
677 | transposed_weights, bias, col2im, output, |
678 | scratch_buffer); |
679 | break; |
680 | } |
681 | case kTfLiteInt8: { |
682 | TfLiteTensor* scratch_buffer; |
683 | TF_LITE_ENSURE_OK( |
684 | context, GetTemporarySafe(context, node, data->scratch_tensor_index, |
685 | &scratch_buffer)); |
686 | if (IsDynamicTensor(scratch_buffer)) { |
687 | TF_LITE_ENSURE_OK(context, |
688 | ResizeTensor(context, output_shape, scratch_buffer)); |
689 | } |
690 | if (data->weights_are_transposed && !IsConstantTensor(weights)) { |
691 | ResizeAndTransposeWeights(context, weights, transposed_weights); |
692 | } |
693 | EvalQuantizedPerChannel<kernel_type>(context, params, data, input, |
694 | weights, transposed_weights, bias, |
695 | col2im, output, scratch_buffer); |
696 | break; |
697 | } |
698 | case kTfLiteInt16: { |
699 | TfLiteTensor* scratch_buffer; |
700 | TF_LITE_ENSURE_OK( |
701 | context, GetTemporarySafe(context, node, data->scratch_tensor_index, |
702 | &scratch_buffer)); |
703 | if (IsDynamicTensor(scratch_buffer)) { |
704 | TF_LITE_ENSURE_OK(context, |
705 | ResizeTensor(context, output_shape, scratch_buffer)); |
706 | } |
707 | if (data->weights_are_transposed && !IsConstantTensor(weights)) { |
708 | ResizeAndTransposeWeights(context, weights, transposed_weights); |
709 | } |
710 | EvalQuantizedPerChannel16x8<kernel_type>( |
711 | context, params, data, input, weights, transposed_weights, bias, |
712 | col2im, output, scratch_buffer); |
713 | break; |
714 | } |
715 | default: |
716 | TF_LITE_KERNEL_LOG(context, "Type '%s' is not currently supported." , |
717 | TfLiteTypeGetName(input->type)); |
718 | return kTfLiteError; |
719 | } |
720 | return kTfLiteOk; |
721 | } |
722 | |
723 | } // namespace transpose_conv |
724 | |
725 | TfLiteRegistration* Register_TRANSPOSECONV_REF() { |
726 | static TfLiteRegistration r = { |
727 | transpose_conv::Init, transpose_conv::Free, |
728 | transpose_conv::Prepare<transpose_conv::kReference>, |
729 | transpose_conv::Eval<transpose_conv::kReference>}; |
730 | return &r; |
731 | } |
732 | |
733 | TfLiteRegistration* Register_TRANSPOSECONV_GENERIC_OPT() { |
734 | static TfLiteRegistration r = { |
735 | transpose_conv::Init, transpose_conv::Free, |
736 | transpose_conv::Prepare<transpose_conv::kGenericOptimized>, |
737 | transpose_conv::Eval<transpose_conv::kGenericOptimized>}; |
738 | return &r; |
739 | } |
740 | |
741 | TfLiteRegistration* Register_TRANSPOSE_CONV() { |
742 | return Register_TRANSPOSECONV_GENERIC_OPT(); |
743 | } |
744 | |
745 | } // namespace builtin |
746 | } // namespace ops |
747 | } // namespace tflite |
748 | |