1 | /* Copyright 2017 The TensorFlow Authors. All Rights Reserved. |
2 | |
3 | Licensed under the Apache License, Version 2.0 (the "License"); |
4 | you may not use this file except in compliance with the License. |
5 | You may obtain a copy of the License at |
6 | |
7 | http://www.apache.org/licenses/LICENSE-2.0 |
8 | |
9 | Unless required by applicable law or agreed to in writing, software |
10 | distributed under the License is distributed on an "AS IS" BASIS, |
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | See the License for the specific language governing permissions and |
13 | limitations under the License. |
14 | ==============================================================================*/ |
15 | #include "tensorflow/lite/kernels/internal/reference/reduce.h" |
16 | |
17 | #include <stddef.h> |
18 | |
19 | #include <cstdint> |
20 | #include <limits> |
21 | |
22 | #include "ruy/profiler/instrumentation.h" // from @ruy |
23 | #include "tensorflow/lite/c/builtin_op_data.h" |
24 | #include "tensorflow/lite/c/c_api_types.h" |
25 | #include "tensorflow/lite/c/common.h" |
26 | #include "tensorflow/lite/kernels/cpu_backend_context.h" |
27 | #include "tensorflow/lite/kernels/internal/compatibility.h" |
28 | #include "tensorflow/lite/kernels/internal/optimized/integer_ops/mean.h" |
29 | #include "tensorflow/lite/kernels/internal/optimized/neon_check.h" |
30 | #include "tensorflow/lite/kernels/internal/optimized/optimized_ops.h" |
31 | #include "tensorflow/lite/kernels/internal/optimized/reduce.h" |
32 | #include "tensorflow/lite/kernels/internal/quantization_util.h" |
33 | #include "tensorflow/lite/kernels/internal/reduce_common.h" |
34 | #include "tensorflow/lite/kernels/internal/reference/integer_ops/mean.h" |
35 | #include "tensorflow/lite/kernels/internal/reference/reference_ops.h" |
36 | #include "tensorflow/lite/kernels/internal/tensor.h" |
37 | #include "tensorflow/lite/kernels/internal/tensor_ctypes.h" |
38 | #include "tensorflow/lite/kernels/internal/types.h" |
39 | #include "tensorflow/lite/kernels/kernel_util.h" |
40 | |
41 | namespace tflite { |
42 | namespace ops { |
43 | namespace builtin { |
44 | namespace reduce { |
45 | |
46 | // This file has reference implementation of reduce_* operators. |
47 | enum KernelType { |
48 | kReference, |
49 | kGenericOptimized, |
50 | }; |
51 | |
52 | struct OpData { |
53 | int32_t multiplier; |
54 | int shift; |
55 | // The index of the temporary tensor where the quantized inputs are cached. |
56 | int scratch_tensor_index; |
57 | }; |
58 | |
59 | struct OpContext { |
60 | OpContext(TfLiteContext* context, TfLiteNode* node) { |
61 | params = reinterpret_cast<TfLiteReducerParams*>(node->builtin_data); |
62 | input = GetInput(context, node, 0); |
63 | axis = GetInput(context, node, 1); |
64 | output = GetOutput(context, node, 0); |
65 | } |
66 | TfLiteReducerParams* params; |
67 | const TfLiteTensor* input; |
68 | const TfLiteTensor* axis; |
69 | TfLiteTensor* output; |
70 | }; |
71 | |
72 | void* Init(TfLiteContext* context, const char* buffer, size_t length) { |
73 | // Creates three temp tensors to store index and axis for internal |
74 | // implementation only. |
75 | auto* op_data = new OpData(); |
76 | context->AddTensors(context, 4, &op_data->scratch_tensor_index); |
77 | return op_data; |
78 | } |
79 | |
80 | void Free(TfLiteContext* context, void* buffer) { |
81 | delete reinterpret_cast<OpData*>(buffer); |
82 | } |
83 | |
84 | // Resizes the temp tensor that stores resolved axis. |
85 | TfLiteStatus ResizeTempAxis(TfLiteContext* context, OpContext* op_context, |
86 | TfLiteTensor* resolved_axis) { |
87 | TfLiteIntArray* axis_size = TfLiteIntArrayCreate(1); |
88 | axis_size->data[0] = static_cast<int>(NumElements(op_context->axis)); |
89 | return context->ResizeTensor(context, resolved_axis, axis_size); |
90 | } |
91 | |
92 | // Resizes the temp tensor that stores temp sum of reduced elements. |
93 | TfLiteStatus ResizeTempAccum(TfLiteContext* context, OpContext* op_context, |
94 | TfLiteTensor* temp_accum) { |
95 | TfLiteIntArray* size = TfLiteIntArrayCreate(1); |
96 | size->data[0] = static_cast<int>(NumElements(op_context->output)); |
97 | return context->ResizeTensor(context, temp_accum, size); |
98 | } |
99 | |
100 | // Resizes output array based on the input size and resolved axis. |
101 | TfLiteStatus ResizeOutputTensor(TfLiteContext* context, OpContext* op_context) { |
102 | size_t num_axis = NumElements(op_context->axis); |
103 | const TfLiteIntArray* input_dims = op_context->input->dims; |
104 | int input_num_dims = NumDimensions(op_context->input); |
105 | if (input_num_dims == 0) { |
106 | return context->ResizeTensor(context, op_context->output, |
107 | TfLiteIntArrayCreate(0)); |
108 | } |
109 | const int* axis = GetTensorData<int>(op_context->axis); |
110 | if (op_context->params->keep_dims) { |
111 | TfLiteIntArray* output_dims = TfLiteIntArrayCreate(input_num_dims); |
112 | for (int idx = 0; idx < input_num_dims; ++idx) { |
113 | bool is_axis = false; |
114 | for (int axis_idx = 0; axis_idx < num_axis; ++axis_idx) { |
115 | if (axis[axis_idx] == idx || axis[axis_idx] + input_num_dims == idx) { |
116 | is_axis = true; |
117 | break; |
118 | } |
119 | } |
120 | if (is_axis) { |
121 | output_dims->data[idx] = 1; |
122 | } else { |
123 | output_dims->data[idx] = input_dims->data[idx]; |
124 | } |
125 | } |
126 | return context->ResizeTensor(context, op_context->output, output_dims); |
127 | } else { |
128 | // Calculates size of reducing axis. |
129 | int num_reduce_axis = num_axis; |
130 | for (int i = 0; i < num_axis; ++i) { |
131 | int current = axis[i]; |
132 | if (current < 0) { |
133 | current += input_num_dims; |
134 | } |
135 | TF_LITE_ENSURE(context, current >= 0 && current < input_num_dims); |
136 | for (int j = 0; j < i; ++j) { |
137 | int previous = axis[j]; |
138 | if (previous < 0) { |
139 | previous += input_num_dims; |
140 | } |
141 | if (current == previous) { |
142 | --num_reduce_axis; |
143 | break; |
144 | } |
145 | } |
146 | } |
147 | // Determines output dimensions. |
148 | TfLiteIntArray* output_dims = |
149 | TfLiteIntArrayCreate(input_num_dims - num_reduce_axis); |
150 | int num_skip_axis = 0; |
151 | for (int idx = 0; idx < input_num_dims; ++idx) { |
152 | bool is_axis = false; |
153 | for (int axis_idx = 0; axis_idx < num_axis; ++axis_idx) { |
154 | if (axis[axis_idx] == idx || axis[axis_idx] + input_num_dims == idx) { |
155 | ++num_skip_axis; |
156 | is_axis = true; |
157 | break; |
158 | } |
159 | } |
160 | if (!is_axis) { |
161 | output_dims->data[idx - num_skip_axis] = input_dims->data[idx]; |
162 | } |
163 | } |
164 | return context->ResizeTensor(context, op_context->output, output_dims); |
165 | } |
166 | } |
167 | |
168 | // Resizes the temp tensor that stores normalized dims. |
169 | TfLiteStatus ResizeTempDims(TfLiteContext* context, OpContext* op_context, |
170 | TfLiteTensor* normalized_dims) { |
171 | TfLiteIntArray* dims_size = TfLiteIntArrayCreate(1); |
172 | dims_size->data[0] = (op_context->input->dims->size); |
173 | return context->ResizeTensor(context, normalized_dims, dims_size); |
174 | } |
175 | |
176 | // Initializes temp tensors to store index and resolved axis. |
177 | TfLiteStatus InitializeTemporaries(TfLiteContext* context, TfLiteNode* node, |
178 | OpContext* op_context) { |
179 | // Creates a temp index to iterate through input data. |
180 | OpData* op_data = reinterpret_cast<OpData*>(node->user_data); |
181 | TfLiteIntArrayFree(node->temporaries); |
182 | node->temporaries = TfLiteIntArrayCreate(4); |
183 | node->temporaries->data[0] = op_data->scratch_tensor_index; |
184 | TfLiteTensor* scratch_tensor; |
185 | TF_LITE_ENSURE_OK( |
186 | context, GetTemporarySafe(context, node, /*index=*/0, &scratch_tensor)); |
187 | scratch_tensor->type = kTfLiteInt32; |
188 | scratch_tensor->allocation_type = kTfLiteArenaRw; |
189 | TfLiteIntArray* index_size = TfLiteIntArrayCreate(1); |
190 | index_size->data[0] = NumDimensions(op_context->input); |
191 | TF_LITE_ENSURE_OK(context, |
192 | context->ResizeTensor(context, scratch_tensor, index_size)); |
193 | |
194 | // Creates a temp tensor to store resolved axis given input data. |
195 | node->temporaries->data[1] = op_data->scratch_tensor_index + 1; |
196 | TfLiteTensor* resolved_axis; |
197 | TF_LITE_ENSURE_OK( |
198 | context, GetTemporarySafe(context, node, /*index=*/1, &resolved_axis)); |
199 | resolved_axis->type = kTfLiteInt32; |
200 | // Creates a temporary accumulation tensor to store temp sums when calculating |
201 | // mean or temp prod when calculating reduce prod. |
202 | node->temporaries->data[2] = op_data->scratch_tensor_index + 2; |
203 | TfLiteTensor* temp_accum; |
204 | TF_LITE_ENSURE_OK(context, |
205 | GetTemporarySafe(context, node, /*index=*/2, &temp_accum)); |
206 | switch (op_context->input->type) { |
207 | case kTfLiteFloat32: |
208 | temp_accum->type = kTfLiteFloat32; |
209 | break; |
210 | case kTfLiteInt32: |
211 | temp_accum->type = kTfLiteInt64; |
212 | break; |
213 | case kTfLiteInt64: |
214 | temp_accum->type = kTfLiteInt64; |
215 | break; |
216 | case kTfLiteUInt8: |
217 | case kTfLiteInt8: |
218 | case kTfLiteInt16: |
219 | temp_accum->type = kTfLiteInt32; |
220 | break; |
221 | case kTfLiteBool: |
222 | temp_accum->type = kTfLiteBool; |
223 | break; |
224 | default: |
225 | return kTfLiteError; |
226 | } |
227 | // Creates a temp tensor to store normalized shape given input data. |
228 | node->temporaries->data[3] = op_data->scratch_tensor_index + 3; |
229 | TfLiteTensor* normalized_dims; |
230 | TF_LITE_ENSURE_OK( |
231 | context, GetTemporarySafe(context, node, /*index=*/3, &normalized_dims)); |
232 | normalized_dims->type = kTfLiteInt32; |
233 | return kTfLiteOk; |
234 | } |
235 | |
236 | TfLiteStatus PrepareSimple(TfLiteContext* context, TfLiteNode* node) { |
237 | TF_LITE_ENSURE_EQ(context, NumInputs(node), 2); |
238 | TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1); |
239 | |
240 | OpContext op_context(context, node); |
241 | TF_LITE_ENSURE_TYPES_EQ(context, op_context.axis->type, kTfLiteInt32); |
242 | TF_LITE_ENSURE_OK(context, InitializeTemporaries(context, node, &op_context)); |
243 | |
244 | if (op_context.input->type == kTfLiteInt16) { |
245 | TF_LITE_ENSURE_EQ(context, op_context.input->params.zero_point, 0); |
246 | TF_LITE_ENSURE_EQ(context, op_context.output->params.zero_point, 0); |
247 | } |
248 | |
249 | TfLiteTensor* resolved_axis; |
250 | TF_LITE_ENSURE_OK( |
251 | context, GetTemporarySafe(context, node, /*index=*/1, &resolved_axis)); |
252 | TfLiteTensor* normalized_dims; |
253 | TF_LITE_ENSURE_OK( |
254 | context, GetTemporarySafe(context, node, /*index=*/3, &normalized_dims)); |
255 | |
256 | if (!IsConstantTensor(op_context.input)) { |
257 | SetTensorToDynamic(normalized_dims); |
258 | } else { |
259 | normalized_dims->allocation_type = kTfLiteArenaRw; |
260 | TF_LITE_ENSURE_OK(context, |
261 | ResizeTempDims(context, &op_context, normalized_dims)); |
262 | } |
263 | // Leaves work to Eval if axis is not constant; else resizes output. |
264 | if (!IsConstantTensor(op_context.axis)) { |
265 | SetTensorToDynamic(op_context.output); |
266 | SetTensorToDynamic(resolved_axis); |
267 | return kTfLiteOk; |
268 | } |
269 | resolved_axis->allocation_type = kTfLiteArenaRw; |
270 | TF_LITE_ENSURE_OK(context, |
271 | ResizeTempAxis(context, &op_context, resolved_axis)); |
272 | TF_LITE_ENSURE_OK(context, ResizeOutputTensor(context, &op_context)); |
273 | return kTfLiteOk; |
274 | } |
275 | |
276 | TfLiteStatus PrepareAllOrAny(TfLiteContext* context, TfLiteNode* node) { |
277 | TF_LITE_ENSURE_EQ(context, NumInputs(node), 2); |
278 | const TfLiteTensor* input; |
279 | TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, 0, &input)); |
280 | TF_LITE_ENSURE_TYPES_EQ(context, input->type, kTfLiteBool); |
281 | return PrepareSimple(context, node); |
282 | } |
283 | |
284 | TfLiteStatus PrepareMeanOrSum(TfLiteContext* context, TfLiteNode* node) { |
285 | TF_LITE_ENSURE_OK(context, PrepareSimple(context, node)); |
286 | OpData* data = reinterpret_cast<OpData*>(node->user_data); |
287 | |
288 | // reduce_mean requires a buffer to store intermediate sum result. |
289 | OpContext op_context(context, node); |
290 | if (op_context.input->type == kTfLiteInt8 || |
291 | op_context.input->type == kTfLiteUInt8 || |
292 | op_context.input->type == kTfLiteInt16) { |
293 | const double real_multiplier = |
294 | static_cast<double>(op_context.input->params.scale) / |
295 | static_cast<double>(op_context.output->params.scale); |
296 | int exponent; |
297 | QuantizeMultiplier(real_multiplier, &data->multiplier, &exponent); |
298 | data->shift = exponent; |
299 | } |
300 | |
301 | if (op_context.input->type == kTfLiteInt16) { |
302 | TF_LITE_ENSURE_EQ(context, op_context.input->params.zero_point, 0); |
303 | TF_LITE_ENSURE_EQ(context, op_context.output->params.zero_point, 0); |
304 | } |
305 | |
306 | TfLiteTensor* temp_sum; |
307 | TF_LITE_ENSURE_OK(context, |
308 | GetTemporarySafe(context, node, /*index=*/2, &temp_sum)); |
309 | if (!IsConstantTensor(op_context.axis)) { |
310 | SetTensorToDynamic(temp_sum); |
311 | return kTfLiteOk; |
312 | } |
313 | temp_sum->allocation_type = kTfLiteArenaRw; |
314 | return ResizeTempAccum(context, &op_context, temp_sum); |
315 | } |
316 | |
317 | double GetQuantProdScaling(double input_scale, double output_scale, |
318 | int reduced_axis_size) { |
319 | // The scaling after taking the product of all the quantized values should |
320 | // be (input_scale**reduced_axis_size)/output_scale but to avoid overflowing |
321 | // the accumulator we instead scale each multiplication by |
322 | // input_scale/nth_root(output_scale, reduced_axis_size). |
323 | return input_scale / std::pow(output_scale, 1.0 / reduced_axis_size); |
324 | } |
325 | |
326 | TfLiteStatus PrepareProd(TfLiteContext* context, TfLiteNode* node) { |
327 | TF_LITE_ENSURE_OK(context, PrepareSimple(context, node)); |
328 | |
329 | OpContext op_context(context, node); |
330 | OpData* data = reinterpret_cast<OpData*>(node->user_data); |
331 | |
332 | TfLiteTensor* temp_prod; |
333 | TF_LITE_ENSURE_OK(context, |
334 | GetTemporarySafe(context, node, /*index=*/2, &temp_prod)); |
335 | |
336 | if (op_context.input->type == kTfLiteInt16) { |
337 | TF_LITE_ENSURE_EQ(context, op_context.input->params.zero_point, 0); |
338 | TF_LITE_ENSURE_EQ(context, op_context.output->params.zero_point, 0); |
339 | } |
340 | |
341 | if (!IsConstantTensor(op_context.axis)) { |
342 | SetTensorToDynamic(temp_prod); |
343 | return kTfLiteOk; |
344 | } |
345 | |
346 | const int input_size = GetTensorShape(op_context.input).FlatSize(); |
347 | const int output_size = GetTensorShape(op_context.output).FlatSize(); |
348 | // We support both quantized and non-quantized int8/int16 inputs |
349 | if (op_context.input->quantization.type != kTfLiteNoQuantization && |
350 | (op_context.input->type == kTfLiteInt8 || |
351 | op_context.input->type == kTfLiteInt16) && |
352 | input_size != 0 && output_size != 0) { |
353 | const int reduced_axis_size = input_size / output_size; |
354 | const double scaling = GetQuantProdScaling( |
355 | static_cast<double>(op_context.input->params.scale), |
356 | static_cast<double>(op_context.output->params.scale), |
357 | reduced_axis_size); |
358 | QuantizeMultiplier(scaling, &data->multiplier, &data->shift); |
359 | } |
360 | |
361 | temp_prod->allocation_type = kTfLiteArenaRw; |
362 | return ResizeTempAccum(context, &op_context, temp_prod); |
363 | } |
364 | |
365 | void ResolveAxis(const int* axis_data, int axis_count, |
366 | tflite::MeanParams* op_params) { |
367 | int i = 0; |
368 | for (; i < axis_count; ++i) { |
369 | op_params->axis[i] = static_cast<int16>(axis_data[i]); |
370 | } |
371 | for (; i < 4; ++i) { |
372 | op_params->axis[i] = 1; |
373 | } |
374 | } |
375 | |
376 | template <typename T, typename U> |
377 | TfLiteStatus Mean(TfLiteContext* context, const OpContext* op_context, |
378 | int* temp_index, int* resolved_axis, U* temp_sum, |
379 | KernelType kernel_type) { |
380 | int num_axis = static_cast<int>(NumElements(op_context->axis)); |
381 | auto args = std::tuple( |
382 | GetTensorData<T>(op_context->input), &op_context->input->dims->data[0], |
383 | op_context->input->dims->size, GetTensorData<T>(op_context->output), |
384 | &op_context->output->dims->data[0], op_context->output->dims->size, |
385 | GetTensorData<int>(op_context->axis), num_axis, |
386 | op_context->params->keep_dims, temp_index, resolved_axis, temp_sum); |
387 | if (kernel_type == kReference) { |
388 | TF_LITE_ENSURE(context, std::apply(reference_ops::Mean<T, U>, args)); |
389 | } else { |
390 | TF_LITE_ENSURE(context, std::apply(optimized_ops::Mean<T, U>, args)); |
391 | } |
392 | return kTfLiteOk; |
393 | } |
394 | |
395 | template <typename T> |
396 | TfLiteStatus QuantizedMeanOrSum(TfLiteContext* context, |
397 | const OpContext* op_context, int* temp_index, |
398 | int* resolved_axis, int* temp_sum, |
399 | KernelType kernel_type, bool compute_sum) { |
400 | int num_axis = static_cast<int>(NumElements(op_context->axis)); |
401 | auto args = std::tuple( |
402 | GetTensorData<T>(op_context->input), op_context->input->params.zero_point, |
403 | op_context->input->params.scale, &op_context->input->dims->data[0], |
404 | op_context->input->dims->size, GetTensorData<T>(op_context->output), |
405 | op_context->output->params.zero_point, op_context->output->params.scale, |
406 | &op_context->output->dims->data[0], op_context->output->dims->size, |
407 | GetTensorData<int>(op_context->axis), num_axis, |
408 | op_context->params->keep_dims, temp_index, resolved_axis, temp_sum, |
409 | compute_sum); |
410 | if (kernel_type == kReference) { |
411 | TF_LITE_ENSURE( |
412 | context, |
413 | std::apply(reference_ops::QuantizedMeanOrSum<T, int32_t>, args)); |
414 | } else { |
415 | TF_LITE_ENSURE( |
416 | context, |
417 | std::apply(optimized_ops::QuantizedMeanOrSum<T, int32_t>, args)); |
418 | } |
419 | return kTfLiteOk; |
420 | } |
421 | |
422 | template <typename integer_type> |
423 | TfLiteStatus EvalIntegerMean(TfLiteContext* context, |
424 | const OpContext& op_context, int num_axis, |
425 | OpData* data, TfLiteTensor* temp_index, |
426 | TfLiteTensor* resolved_axis, |
427 | TfLiteTensor* temp_sum, |
428 | TfLiteTensor* normalized_dims, |
429 | KernelType kernel_type) { |
430 | tflite::MeanParams op_params; |
431 | op_params.axis_count = num_axis; |
432 | ResolveAxis(GetTensorData<int>(op_context.axis), num_axis, &op_params); |
433 | const TfLiteTensor* input = op_context.input; |
434 | |
435 | if (input->params.zero_point == op_context.output->params.zero_point && |
436 | input->params.scale == op_context.output->params.scale) { |
437 | Mean<integer_type, int>(context, &op_context, |
438 | GetTensorData<int>(temp_index), |
439 | GetTensorData<int>(resolved_axis), |
440 | GetTensorData<int>(temp_sum), kernel_type); |
441 | } else { |
442 | QuantizedMeanOrSum<integer_type>( |
443 | context, &op_context, GetTensorData<int>(temp_index), |
444 | GetTensorData<int>(resolved_axis), GetTensorData<int32_t>(temp_sum), |
445 | kernel_type, /*compute_sum=*/false); |
446 | } |
447 | return kTfLiteOk; |
448 | } |
449 | |
450 | template <typename T> |
451 | void InitializeMeanOutputTyped(TfLiteTensor* output) { |
452 | RuntimeShape output_shape = GetTensorShape(output); |
453 | const size_t flat_size = output_shape.FlatSize(); |
454 | T* output_data = GetTensorData<T>(output); |
455 | T nan_value = std::numeric_limits<T>::quiet_NaN(); |
456 | for (int idx = 0; idx < flat_size; ++idx) { |
457 | *output_data++ = nan_value; |
458 | } |
459 | } |
460 | |
461 | TfLiteStatus InitializeMeanOutput(TfLiteTensor* output) { |
462 | switch (output->type) { |
463 | case kTfLiteFloat32: |
464 | InitializeMeanOutputTyped<float>(output); |
465 | break; |
466 | case kTfLiteInt32: |
467 | InitializeMeanOutputTyped<int>(output); |
468 | break; |
469 | case kTfLiteInt64: |
470 | InitializeMeanOutputTyped<int64_t>(output); |
471 | break; |
472 | case kTfLiteUInt8: |
473 | InitializeMeanOutputTyped<uint8_t>(output); |
474 | break; |
475 | case kTfLiteInt8: |
476 | InitializeMeanOutputTyped<int8_t>(output); |
477 | break; |
478 | case kTfLiteInt16: |
479 | InitializeMeanOutputTyped<int16_t>(output); |
480 | break; |
481 | default: |
482 | return kTfLiteError; |
483 | } |
484 | return kTfLiteOk; |
485 | } |
486 | |
487 | template <KernelType kernel_type> |
488 | TfLiteStatus EvalMean(TfLiteContext* context, TfLiteNode* node) { |
489 | OpContext op_context(context, node); |
490 | OpData* data = reinterpret_cast<OpData*>(node->user_data); |
491 | |
492 | int num_axis = static_cast<int>(NumElements(op_context.axis)); |
493 | TfLiteTensor* temp_index; |
494 | TF_LITE_ENSURE_OK(context, |
495 | GetTemporarySafe(context, node, /*index=*/0, &temp_index)); |
496 | TfLiteTensor* resolved_axis; |
497 | TF_LITE_ENSURE_OK( |
498 | context, GetTemporarySafe(context, node, /*index=*/1, &resolved_axis)); |
499 | TfLiteTensor* temp_sum; |
500 | TF_LITE_ENSURE_OK(context, |
501 | GetTemporarySafe(context, node, /*index=*/2, &temp_sum)); |
502 | // Resize the output tensor if the output tensor is dynamic. |
503 | if (IsDynamicTensor(op_context.output)) { |
504 | TF_LITE_ENSURE_OK(context, |
505 | ResizeTempAxis(context, &op_context, resolved_axis)); |
506 | TF_LITE_ENSURE_OK(context, ResizeOutputTensor(context, &op_context)); |
507 | TF_LITE_ENSURE_OK(context, ResizeTempAccum(context, &op_context, temp_sum)); |
508 | } |
509 | TfLiteTensor* normalized_dims; |
510 | TF_LITE_ENSURE_OK( |
511 | context, GetTemporarySafe(context, node, /*index=*/3, &normalized_dims)); |
512 | if (IsDynamicTensor(normalized_dims)) { |
513 | TF_LITE_ENSURE_OK(context, |
514 | ResizeTempDims(context, &op_context, normalized_dims)); |
515 | } |
516 | |
517 | // Return early when input is empty. |
518 | const TfLiteTensor* input = op_context.input; |
519 | RuntimeShape input_shape = GetTensorShape(input); |
520 | if (input_shape.FlatSize() == 0) { |
521 | TF_LITE_ENSURE_OK(context, InitializeMeanOutput(op_context.output)); |
522 | return kTfLiteOk; |
523 | } |
524 | |
525 | if (kernel_type == kGenericOptimized) { |
526 | // Use optimized ops if available. |
527 | switch (input->type) { |
528 | case kTfLiteInt8: { |
529 | tflite::MeanParams op_params; |
530 | op_params.axis_count = num_axis; |
531 | ResolveAxis(GetTensorData<int>(op_context.axis), num_axis, &op_params); |
532 | if (op_context.params->keep_dims && NumDimensions(input) == 4 && |
533 | op_params.axis_count == 2 && |
534 | ((op_params.axis[0] == 1 && op_params.axis[1] == 2) || |
535 | (op_params.axis[0] == 2 && op_params.axis[1] == 1))) { |
536 | optimized_integer_ops::Mean( |
537 | op_params, input_shape, GetTensorData<int8_t>(input), |
538 | input->params.zero_point, input->params.scale, |
539 | GetTensorShape(op_context.output), |
540 | GetTensorData<int8_t>(op_context.output), |
541 | op_context.output->params.zero_point, |
542 | op_context.output->params.scale, |
543 | CpuBackendContext::GetFromContext(context)); |
544 | return kTfLiteOk; |
545 | } |
546 | } break; |
547 | case kTfLiteUInt8: { |
548 | tflite::MeanParams op_params; |
549 | op_params.axis_count = num_axis; |
550 | ResolveAxis(GetTensorData<int>(op_context.axis), num_axis, &op_params); |
551 | if (op_context.params->keep_dims && NumDimensions(input) == 4 && |
552 | op_params.axis_count == 2 && |
553 | ((op_params.axis[0] == 1 && op_params.axis[1] == 2) || |
554 | (op_params.axis[0] == 2 && op_params.axis[1] == 1))) { |
555 | optimized_ops::Mean(op_params, input_shape, |
556 | GetTensorData<uint8_t>(input), |
557 | input->params.zero_point, input->params.scale, |
558 | GetTensorShape(op_context.output), |
559 | GetTensorData<uint8_t>(op_context.output), |
560 | op_context.output->params.zero_point, |
561 | op_context.output->params.scale, |
562 | CpuBackendContext::GetFromContext(context)); |
563 | return kTfLiteOk; |
564 | } |
565 | } break; |
566 | default: |
567 | break; |
568 | } |
569 | } |
570 | |
571 | switch (op_context.input->type) { |
572 | case kTfLiteFloat32: |
573 | Mean<float, float>(context, &op_context, GetTensorData<int>(temp_index), |
574 | GetTensorData<int>(resolved_axis), |
575 | GetTensorData<float>(temp_sum), kernel_type); |
576 | break; |
577 | case kTfLiteInt32: |
578 | Mean<int, int64_t>(context, &op_context, GetTensorData<int>(temp_index), |
579 | GetTensorData<int>(resolved_axis), |
580 | GetTensorData<int64_t>(temp_sum), kernel_type); |
581 | break; |
582 | case kTfLiteInt64: |
583 | Mean<int64_t, int64_t>(context, &op_context, |
584 | GetTensorData<int>(temp_index), |
585 | GetTensorData<int>(resolved_axis), |
586 | GetTensorData<int64_t>(temp_sum), kernel_type); |
587 | break; |
588 | case kTfLiteInt8: { |
589 | TF_LITE_ENSURE_OK( |
590 | context, EvalIntegerMean<int8_t>(context, op_context, num_axis, data, |
591 | temp_index, resolved_axis, temp_sum, |
592 | normalized_dims, kernel_type)); |
593 | } break; |
594 | case kTfLiteInt16: { |
595 | TF_LITE_ENSURE_OK( |
596 | context, EvalIntegerMean<int16_t>(context, op_context, num_axis, data, |
597 | temp_index, resolved_axis, temp_sum, |
598 | normalized_dims, kernel_type)); |
599 | } break; |
600 | case kTfLiteUInt8: { |
601 | TF_LITE_ENSURE_OK( |
602 | context, EvalIntegerMean<uint8_t>(context, op_context, num_axis, data, |
603 | temp_index, resolved_axis, temp_sum, |
604 | normalized_dims, kernel_type)); |
605 | } break; |
606 | default: |
607 | return kTfLiteError; |
608 | } |
609 | return kTfLiteOk; |
610 | } |
611 | |
612 | template <typename T> |
613 | struct EvalData { |
614 | std::function<T(T, T)> reduce_func; |
615 | const T* input_data; |
616 | T output; |
617 | }; |
618 | |
619 | // Returns true if 'axis' holds all dims [0 ... N-1] where N is num_dims. |
620 | bool IsReduceAllDims(const TfLiteTensor* axis, int num_axis, int num_dims) { |
621 | int dims_mask = 0; |
622 | for (int i = 0; i < num_axis; ++i) { |
623 | dims_mask |= 1 << (axis->data.i32[i]); |
624 | } |
625 | return num_dims == 0 ? dims_mask == 0 : (dims_mask == (1 << num_dims) - 1); |
626 | } |
627 | |
628 | // Worker for reducing single interval. Interval is identified by index |
629 | // from [start, end). |
630 | template <typename T> |
631 | struct ReduceWorkerTask : cpu_backend_threadpool::Task { |
632 | ReduceWorkerTask(EvalData<T>* eval_data, int start, int end) |
633 | : eval_data(eval_data), start(start), end(end) {} |
634 | void Run() override { |
635 | auto* input_data = eval_data->input_data; |
636 | T& output = eval_data->output; |
637 | auto& reducer = eval_data->reduce_func; |
638 | for (int i = start; i < end; ++i) { |
639 | output = reducer(output, input_data[i]); |
640 | } |
641 | } |
642 | |
643 | private: |
644 | EvalData<T>* eval_data; |
645 | int start; |
646 | int end; |
647 | }; |
648 | |
649 | // Apply reduce operation using the 'reducer' function on all of 'input_data'. |
650 | // and reduce all to single element. |
651 | template <typename T> |
652 | void ReduceAllDims(const T* input_data, const int* input_dims, |
653 | const int input_num_dims, T* output_data, T init_value, |
654 | T reducer(const T current, const T in), |
655 | TfLiteContext* context) { |
656 | EvalData<T> eval_data; |
657 | eval_data.reduce_func = reducer; |
658 | eval_data.input_data = input_data; |
659 | eval_data.output = init_value; |
660 | |
661 | int num_elems = NumElements(input_dims, input_num_dims); |
662 | |
663 | // Fetch backend context and number of threads. |
664 | CpuBackendContext* cpu_backend_context = |
665 | CpuBackendContext::GetFromContext(context); |
666 | int thread_count = cpu_backend_context->max_num_threads(); |
667 | const int kMinElementsPerThread = 1024; |
668 | if (num_elems / thread_count < kMinElementsPerThread) thread_count = 1; |
669 | |
670 | if (thread_count == 1) { |
671 | output_data[0] = num_elems > 0 ? input_data[0] : init_value; |
672 | for (int i = 1; i < num_elems; ++i) { |
673 | output_data[0] = reducer(output_data[0], input_data[i]); |
674 | } |
675 | return; |
676 | } |
677 | std::vector<ReduceWorkerTask<T>> tasks; |
678 | std::vector<EvalData<T>> data; |
679 | tasks.reserve(thread_count); |
680 | data.reserve(thread_count); |
681 | int start = 0; |
682 | for (int i = 0; i < thread_count; ++i) { |
683 | data.push_back(eval_data); |
684 | int end = start + (num_elems - start) / (thread_count - i); |
685 | tasks.emplace_back(ReduceWorkerTask<T>(&data.back(), start, end)); |
686 | start = end; |
687 | } |
688 | // Run all tasks on the thread pool. |
689 | cpu_backend_threadpool::Execute(tasks.size(), tasks.data(), |
690 | cpu_backend_context); |
691 | // Reduce all data from different workers. |
692 | output_data[0] = data[0].output; |
693 | for (int i = 1; i < data.size(); ++i) { |
694 | output_data[0] = reducer(output_data[0], data[i].output); |
695 | } |
696 | } |
697 | |
698 | // The underlying logic for Reduce Sum/Prod/Max/Min/Any |
699 | template <typename T> |
700 | TfLiteStatus EvalType(TfLiteContext* context, TfLiteNode* node, |
701 | OpContext* op_context, KernelType kernel_type, |
702 | ReduceType reduce_type) { |
703 | int64_t num_axis = NumElements(op_context->axis); |
704 | TfLiteTensor* temp_index; |
705 | TF_LITE_ENSURE_OK(context, |
706 | GetTemporarySafe(context, node, /*index=*/0, &temp_index)); |
707 | TfLiteTensor* resolved_axis; |
708 | TF_LITE_ENSURE_OK( |
709 | context, GetTemporarySafe(context, node, /*index=*/1, &resolved_axis)); |
710 | // Resize the output tensor if the output tensor is dynamic. |
711 | if (IsDynamicTensor(op_context->output)) { |
712 | TF_LITE_ENSURE_OK(context, |
713 | ResizeTempAxis(context, op_context, resolved_axis)); |
714 | TF_LITE_ENSURE_OK(context, ResizeOutputTensor(context, op_context)); |
715 | } |
716 | |
717 | const TfLiteTensor* input = op_context->input; |
718 | if (input->type == kTfLiteUInt8 || input->type == kTfLiteInt8 || |
719 | input->type == kTfLiteInt16) { |
720 | TF_LITE_ENSURE_EQ(context, input->params.scale, |
721 | op_context->output->params.scale); |
722 | TF_LITE_ENSURE_EQ(context, input->params.zero_point, |
723 | op_context->output->params.zero_point); |
724 | } |
725 | if (kernel_type == kReference) { |
726 | T init_value = 0; |
727 | T (*reducer)(const T current, const T in); |
728 | switch (reduce_type) { |
729 | case kSum: |
730 | reducer = [](const T current, const T in) -> T { return in + current; }; |
731 | init_value = T(0); |
732 | break; |
733 | case kProd: |
734 | init_value = static_cast<T>(1); |
735 | reducer = [](const T current, const T in) -> T { return in * current; }; |
736 | break; |
737 | case kMax: |
738 | init_value = std::numeric_limits<T>::lowest(); |
739 | reducer = [](const T current, const T in) -> T { |
740 | return (in > current) ? in : current; |
741 | }; |
742 | break; |
743 | case kMin: |
744 | init_value = std::numeric_limits<T>::max(); |
745 | reducer = [](const T current, const T in) -> T { |
746 | return (in < current) ? in : current; |
747 | }; |
748 | break; |
749 | case kAny: |
750 | init_value = false; |
751 | reducer = [](const T current, const T in) -> T { |
752 | return in || current; |
753 | }; |
754 | break; |
755 | case kAll: |
756 | init_value = true; |
757 | reducer = [](const T current, const T in) -> T { |
758 | return in && current; |
759 | }; |
760 | break; |
761 | default: |
762 | TF_LITE_KERNEL_LOG(context, "Unsupported ReduceType: %d" , reduce_type); |
763 | return kTfLiteError; |
764 | } |
765 | |
766 | int num_resolved_axis = 0; |
767 | TF_LITE_ENSURE_MSG( |
768 | context, |
769 | tflite::reference_ops::ResolveAxis( |
770 | input->dims->size, GetTensorData<int>(op_context->axis), num_axis, |
771 | GetTensorData<int>(resolved_axis), &num_resolved_axis), |
772 | "Invalid axis index." ); |
773 | |
774 | if (IsReduceAllDims(resolved_axis, num_resolved_axis, input->dims->size)) { |
775 | ReduceAllDims(GetTensorData<T>(input), input->dims->data, |
776 | input->dims->size, GetTensorData<T>(op_context->output), |
777 | init_value, reducer, context); |
778 | return kTfLiteOk; |
779 | } |
780 | TF_LITE_ENSURE( |
781 | context, |
782 | reference_ops::ReduceGeneric<T>( |
783 | GetTensorData<T>(input), input->dims->data, input->dims->size, |
784 | GetTensorData<T>(op_context->output), |
785 | op_context->output->dims->data, op_context->output->dims->size, |
786 | GetTensorData<int>(op_context->axis), num_axis, |
787 | op_context->params->keep_dims, GetTensorData<int>(temp_index), |
788 | GetTensorData<int>(resolved_axis), init_value, reducer)); |
789 | return kTfLiteOk; |
790 | } else { |
791 | TfLiteTensor* normalized_dims; |
792 | TF_LITE_ENSURE_OK(context, GetTemporarySafe(context, node, /*index=*/3, |
793 | &normalized_dims)); |
794 | if (IsDynamicTensor(normalized_dims)) { |
795 | TF_LITE_ENSURE_OK(context, |
796 | ResizeTempDims(context, op_context, normalized_dims)); |
797 | } |
798 | TF_LITE_ENSURE( |
799 | context, |
800 | optimized_ops::ReduceGeneric<T>( |
801 | GetTensorData<T>(input), input->dims->data, input->dims->size, |
802 | GetTensorData<T>(op_context->output), |
803 | op_context->output->dims->data, op_context->output->dims->size, |
804 | GetTensorData<int>(op_context->axis), num_axis, |
805 | GetTensorData<int>(resolved_axis), |
806 | GetTensorData<int>(normalized_dims), reduce_type)); |
807 | return kTfLiteOk; |
808 | } |
809 | } |
810 | |
811 | // The entry point that handles input types and then calls template functions to |
812 | // handle ReduceType. |
813 | template <KernelType kernel_type, ReduceType reduce_type> |
814 | TfLiteStatus EvalGeneric(TfLiteContext* context, TfLiteNode* node) { |
815 | OpContext op_context(context, node); |
816 | switch (op_context.input->type) { |
817 | case kTfLiteFloat32: |
818 | return EvalType<float>(context, node, &op_context, kernel_type, |
819 | reduce_type); |
820 | break; |
821 | case kTfLiteInt32: |
822 | return EvalType<int>(context, node, &op_context, kernel_type, |
823 | reduce_type); |
824 | break; |
825 | case kTfLiteInt64: |
826 | return EvalType<int64_t>(context, node, &op_context, kernel_type, |
827 | reduce_type); |
828 | break; |
829 | case kTfLiteUInt8: |
830 | return EvalType<uint8_t>(context, node, &op_context, kernel_type, |
831 | reduce_type); |
832 | break; |
833 | case kTfLiteInt8: |
834 | return EvalType<int8_t>(context, node, &op_context, kernel_type, |
835 | reduce_type); |
836 | break; |
837 | case kTfLiteInt16: |
838 | return EvalType<int16_t>(context, node, &op_context, kernel_type, |
839 | reduce_type); |
840 | break; |
841 | case kTfLiteBool: |
842 | return EvalType<bool>(context, node, &op_context, kernel_type, |
843 | reduce_type); |
844 | break; |
845 | default: |
846 | return kTfLiteError; |
847 | } |
848 | } |
849 | |
850 | template <KernelType kernel_type> |
851 | TfLiteStatus EvalSum(TfLiteContext* context, TfLiteNode* node) { |
852 | OpContext op_context(context, node); |
853 | ruy::profiler::ScopeLabel label("Sum" ); |
854 | const auto& input = op_context.input; |
855 | const auto& output = op_context.output; |
856 | const bool same_scale = |
857 | (input->params.scale == output->params.scale && |
858 | input->params.zero_point == output->params.zero_point); |
859 | const bool eight_bit_quantized = |
860 | input->type == kTfLiteUInt8 || input->type == kTfLiteInt8; |
861 | const bool need_rescale = (eight_bit_quantized && !same_scale); |
862 | if (need_rescale) { |
863 | // Rescaling 8bit reduce sum. |
864 | TfLiteTensor* temp_index; |
865 | TF_LITE_ENSURE_OK( |
866 | context, GetTemporarySafe(context, node, /*index=*/0, &temp_index)); |
867 | TfLiteTensor* resolved_axis; |
868 | TF_LITE_ENSURE_OK( |
869 | context, GetTemporarySafe(context, node, /*index=*/1, &resolved_axis)); |
870 | TfLiteTensor* temp_sum; |
871 | TF_LITE_ENSURE_OK(context, |
872 | GetTemporarySafe(context, node, /*index=*/2, &temp_sum)); |
873 | // Resize the output tensor if the output tensor is dynamic. |
874 | if (IsDynamicTensor(op_context.output)) { |
875 | TF_LITE_ENSURE_OK(context, |
876 | ResizeTempAxis(context, &op_context, resolved_axis)); |
877 | TF_LITE_ENSURE_OK(context, ResizeOutputTensor(context, &op_context)); |
878 | TF_LITE_ENSURE_OK(context, |
879 | ResizeTempAccum(context, &op_context, temp_sum)); |
880 | } |
881 | |
882 | if (input->type == kTfLiteUInt8) { |
883 | QuantizedMeanOrSum<uint8_t>(context, &op_context, |
884 | GetTensorData<int>(temp_index), |
885 | GetTensorData<int>(resolved_axis), |
886 | GetTensorData<int32_t>(temp_sum), kernel_type, |
887 | /*compute_sum=*/true); |
888 | } else { |
889 | QuantizedMeanOrSum<int8_t>(context, &op_context, |
890 | GetTensorData<int>(temp_index), |
891 | GetTensorData<int>(resolved_axis), |
892 | GetTensorData<int32_t>(temp_sum), kernel_type, |
893 | /*compute_sum=*/true); |
894 | } |
895 | } else { |
896 | return EvalGeneric<kernel_type, kSum>(context, node); |
897 | } |
898 | |
899 | return kTfLiteOk; |
900 | } |
901 | |
902 | template <KernelType kernel_type, typename T> |
903 | TfLiteStatus EvalQuantizedProd(TfLiteContext* context, TfLiteNode* node, |
904 | OpContext* op_context) { |
905 | OpData* data = reinterpret_cast<OpData*>(node->user_data); |
906 | |
907 | const int64_t num_axis = NumElements(op_context->axis); |
908 | TfLiteTensor* temp_index; |
909 | TF_LITE_ENSURE_OK(context, |
910 | GetTemporarySafe(context, node, /*index=*/0, &temp_index)); |
911 | TfLiteTensor* resolved_axis; |
912 | TF_LITE_ENSURE_OK( |
913 | context, GetTemporarySafe(context, node, /*index=*/1, &resolved_axis)); |
914 | TfLiteTensor* temp_prod; |
915 | TF_LITE_ENSURE_OK(context, |
916 | GetTemporarySafe(context, node, /*index=*/2, &temp_prod)); |
917 | TfLiteTensor* normalized_dims; |
918 | TF_LITE_ENSURE_OK( |
919 | context, GetTemporarySafe(context, node, /*index=*/3, &normalized_dims)); |
920 | const TfLiteTensor* input = op_context->input; |
921 | TfLiteTensor* output = op_context->output; |
922 | |
923 | // Return early when input shape has zero dim. |
924 | for (int i = 0; i < input->dims->size; ++i) { |
925 | if (input->dims->data[i] == 0) return kTfLiteOk; |
926 | } |
927 | |
928 | if (IsDynamicTensor(normalized_dims)) { |
929 | TF_LITE_ENSURE_OK(context, |
930 | ResizeTempDims(context, op_context, normalized_dims)); |
931 | } |
932 | // Resize the output tensor if the output tensor is dynamic. |
933 | if (IsDynamicTensor(output)) { |
934 | TF_LITE_ENSURE_OK(context, |
935 | ResizeTempAxis(context, op_context, resolved_axis)); |
936 | TF_LITE_ENSURE_OK(context, ResizeOutputTensor(context, op_context)); |
937 | TF_LITE_ENSURE_OK(context, ResizeTempAccum(context, op_context, temp_prod)); |
938 | |
939 | const int input_size = GetTensorShape(input).FlatSize(); |
940 | const int output_size = GetTensorShape(output).FlatSize(); |
941 | TF_LITE_ENSURE(context, input_size != 0); |
942 | TF_LITE_ENSURE(context, output_size != 0); |
943 | |
944 | const int reduced_axis_size = input_size / output_size; |
945 | const double scaling = GetQuantProdScaling( |
946 | static_cast<double>(input->params.scale), |
947 | static_cast<double>(output->params.scale), reduced_axis_size); |
948 | QuantizeMultiplier(scaling, &data->multiplier, &data->shift); |
949 | } |
950 | |
951 | if (kernel_type == kReference) { |
952 | TF_LITE_ENSURE( |
953 | context, |
954 | reference_ops::QuantizedReduceProd<T>( |
955 | GetTensorData<T>(input), input->params.zero_point, |
956 | GetTensorShape(input), GetTensorData<T>(output), |
957 | output->params.zero_point, GetTensorShape(output), |
958 | GetTensorData<int>(op_context->axis), num_axis, |
959 | op_context->params->keep_dims, GetTensorData<int>(temp_index), |
960 | GetTensorData<int>(resolved_axis), GetTensorData<int32>(temp_prod), |
961 | data->multiplier, data->shift)); |
962 | return kTfLiteOk; |
963 | } else { |
964 | TF_LITE_ENSURE( |
965 | context, |
966 | optimized_ops::QuantizedReduceProd<T>( |
967 | GetTensorData<T>(input), input->params.zero_point, |
968 | GetTensorShape(input), GetTensorData<T>(output), |
969 | output->params.zero_point, GetTensorShape(output), |
970 | GetTensorData<int>(op_context->axis), num_axis, |
971 | GetTensorData<int>(resolved_axis), |
972 | GetTensorData<int>(normalized_dims), |
973 | GetTensorData<int32>(temp_prod), data->multiplier, data->shift)); |
974 | return kTfLiteOk; |
975 | } |
976 | } |
977 | |
978 | template <KernelType kernel_type> |
979 | TfLiteStatus EvalProd(TfLiteContext* context, TfLiteNode* node) { |
980 | OpContext op_context(context, node); |
981 | // As we need to support both quantized and non-quantized int8/int16 inputs, |
982 | // we separate the evaluation between EvalQuantizedProd for quantized |
983 | // int8/int16 inputs and EvalGeneric for non-quantized int8/int16 (and |
984 | // other non-quantized types). |
985 | if (op_context.input->quantization.type != kTfLiteNoQuantization) { |
986 | if (op_context.input->type == kTfLiteInt8) { |
987 | return EvalQuantizedProd<kernel_type, int8_t>(context, node, &op_context); |
988 | } else if (op_context.input->type == kTfLiteInt16) { |
989 | return EvalQuantizedProd<kernel_type, int16_t>(context, node, |
990 | &op_context); |
991 | } else { |
992 | TF_LITE_KERNEL_LOG(context, "Unsupported quantized data type: %d" , |
993 | op_context.input->type); |
994 | return kTfLiteError; |
995 | } |
996 | } else { |
997 | return EvalGeneric<kernel_type, kProd>(context, node); |
998 | } |
999 | } |
1000 | |
1001 | } // namespace reduce |
1002 | |
1003 | using ops::builtin::reduce::ReduceType; |
1004 | |
1005 | TfLiteRegistration* Register_MEAN_OPT() { |
1006 | static TfLiteRegistration r = {reduce::Init, reduce::Free, |
1007 | reduce::PrepareMeanOrSum, |
1008 | reduce::EvalMean<reduce::kGenericOptimized>}; |
1009 | return &r; |
1010 | } |
1011 | |
1012 | TfLiteRegistration* Register_MEAN_REF() { |
1013 | static TfLiteRegistration r = {reduce::Init, reduce::Free, |
1014 | reduce::PrepareMeanOrSum, |
1015 | reduce::EvalMean<reduce::kReference>}; |
1016 | return &r; |
1017 | } |
1018 | |
1019 | TfLiteRegistration* Register_SUM_REF() { |
1020 | static TfLiteRegistration r = {reduce::Init, reduce::Free, |
1021 | reduce::PrepareMeanOrSum, |
1022 | reduce::EvalSum<reduce::kReference>}; |
1023 | return &r; |
1024 | } |
1025 | |
1026 | TfLiteRegistration* Register_SUM_OPT() { |
1027 | static TfLiteRegistration r = {reduce::Init, reduce::Free, |
1028 | reduce::PrepareMeanOrSum, |
1029 | reduce::EvalSum<reduce::kGenericOptimized>}; |
1030 | return &r; |
1031 | } |
1032 | |
1033 | TfLiteRegistration* Register_REDUCE_PROD_REF() { |
1034 | static TfLiteRegistration r = {reduce::Init, reduce::Free, |
1035 | reduce::PrepareProd, |
1036 | reduce::EvalProd<reduce::kReference>}; |
1037 | return &r; |
1038 | } |
1039 | |
1040 | TfLiteRegistration* Register_REDUCE_PROD_OPT() { |
1041 | static TfLiteRegistration r = {reduce::Init, reduce::Free, |
1042 | reduce::PrepareProd, |
1043 | reduce::EvalProd<reduce::kGenericOptimized>}; |
1044 | return &r; |
1045 | } |
1046 | |
1047 | TfLiteRegistration* Register_REDUCE_MAX_REF() { |
1048 | static TfLiteRegistration r = { |
1049 | reduce::Init, reduce::Free, reduce::PrepareSimple, |
1050 | reduce::EvalGeneric<reduce::kReference, ReduceType::kMax>}; |
1051 | return &r; |
1052 | } |
1053 | |
1054 | TfLiteRegistration* Register_REDUCE_MAX_OPT() { |
1055 | static TfLiteRegistration r = { |
1056 | reduce::Init, reduce::Free, reduce::PrepareSimple, |
1057 | reduce::EvalGeneric<reduce::kGenericOptimized, ReduceType::kMax>}; |
1058 | return &r; |
1059 | } |
1060 | |
1061 | TfLiteRegistration* Register_REDUCE_MIN_REF() { |
1062 | static TfLiteRegistration r = { |
1063 | reduce::Init, reduce::Free, reduce::PrepareSimple, |
1064 | reduce::EvalGeneric<reduce::kReference, ReduceType::kMin>}; |
1065 | return &r; |
1066 | } |
1067 | |
1068 | TfLiteRegistration* Register_REDUCE_MIN_OPT() { |
1069 | static TfLiteRegistration r = { |
1070 | reduce::Init, reduce::Free, reduce::PrepareSimple, |
1071 | reduce::EvalGeneric<reduce::kGenericOptimized, ReduceType::kMin>}; |
1072 | return &r; |
1073 | } |
1074 | |
1075 | TfLiteRegistration* Register_REDUCE_ANY_REF() { |
1076 | static TfLiteRegistration r = { |
1077 | reduce::Init, reduce::Free, reduce::PrepareAllOrAny, |
1078 | reduce::EvalGeneric<reduce::kReference, ReduceType::kAny>}; |
1079 | return &r; |
1080 | } |
1081 | |
1082 | TfLiteRegistration* Register_REDUCE_ANY_OPT() { |
1083 | static TfLiteRegistration r = { |
1084 | reduce::Init, reduce::Free, reduce::PrepareAllOrAny, |
1085 | reduce::EvalGeneric<reduce::kGenericOptimized, ReduceType::kAny>}; |
1086 | return &r; |
1087 | } |
1088 | |
1089 | TfLiteRegistration* Register_REDUCE_ALL_REF() { |
1090 | static TfLiteRegistration r = { |
1091 | reduce::Init, reduce::Free, reduce::PrepareAllOrAny, |
1092 | reduce::EvalGeneric<reduce::kReference, ReduceType::kAll>}; |
1093 | return &r; |
1094 | } |
1095 | |
1096 | TfLiteRegistration* Register_REDUCE_ALL_OPT() { |
1097 | static TfLiteRegistration r = { |
1098 | reduce::Init, reduce::Free, reduce::PrepareAllOrAny, |
1099 | reduce::EvalGeneric<reduce::kGenericOptimized, ReduceType::kAll>}; |
1100 | return &r; |
1101 | } |
1102 | |
1103 | TfLiteRegistration* Register_MEAN() { return Register_MEAN_OPT(); } |
1104 | |
1105 | TfLiteRegistration* Register_SUM() { return Register_SUM_OPT(); } |
1106 | TfLiteRegistration* Register_REDUCE_PROD() { |
1107 | return Register_REDUCE_PROD_OPT(); |
1108 | } |
1109 | TfLiteRegistration* Register_REDUCE_MAX() { return Register_REDUCE_MAX_OPT(); } |
1110 | TfLiteRegistration* Register_REDUCE_MIN() { return Register_REDUCE_MIN_OPT(); } |
1111 | TfLiteRegistration* Register_REDUCE_ANY() { return Register_REDUCE_ANY_OPT(); } |
1112 | TfLiteRegistration* Register_REDUCE_ALL() { return Register_REDUCE_ALL_OPT(); } |
1113 | |
1114 | } // namespace builtin |
1115 | } // namespace ops |
1116 | } // namespace tflite |
1117 | |