1/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2
3Licensed under the Apache License, Version 2.0 (the "License");
4you may not use this file except in compliance with the License.
5You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9Unless required by applicable law or agreed to in writing, software
10distributed under the License is distributed on an "AS IS" BASIS,
11WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12See the License for the specific language governing permissions and
13limitations under the License.
14==============================================================================*/
15
16#include "tensorflow/lite/core/subgraph.h"
17
18#include <stdarg.h>
19#include <stddef.h>
20
21#include <algorithm>
22#include <cstdint>
23#include <cstdlib>
24#include <cstring>
25#include <iterator>
26#include <memory>
27#include <string>
28#include <utility>
29#include <vector>
30
31#include "tensorflow/lite/allocation.h"
32#include "tensorflow/lite/builtin_ops.h"
33#include "tensorflow/lite/c/c_api_types.h"
34#include "tensorflow/lite/c/common.h"
35#include "tensorflow/lite/c/common_internal.h"
36#include "tensorflow/lite/context_util.h"
37#include "tensorflow/lite/core/api/error_reporter.h"
38#include "tensorflow/lite/core/api/profiler.h"
39#include "tensorflow/lite/core/api/tensor_utils.h"
40#include "tensorflow/lite/core/macros.h"
41#include "tensorflow/lite/experimental/resource/resource_base.h"
42#include "tensorflow/lite/graph_info.h"
43#include "tensorflow/lite/memory_planner.h"
44#include "tensorflow/lite/minimal_logging.h"
45#include "tensorflow/lite/schema/schema_generated.h"
46#include "tensorflow/lite/util.h"
47#ifdef TFLITE_USE_SIMPLE_MEMORY_PLANNER
48#include "tensorflow/lite/simple_planner.h"
49#else
50#include "tensorflow/lite/arena_planner.h"
51#endif
52#ifdef TF_LITE_TENSORFLOW_PROFILER
53#include "tensorflow/lite/tensorflow_profiler_logger.h"
54#endif // TF_LITE_TENSORFLOW_PROFILER
55
56namespace tflite {
57
58namespace {
59
60struct TfLiteQuantizationDeleter {
61 void operator()(TfLiteQuantization* q) {
62 if (q) TfLiteQuantizationFree(q);
63 }
64};
65
66using ScopedTfLiteQuantization =
67 std::unique_ptr<TfLiteQuantization, TfLiteQuantizationDeleter>;
68
69struct TfLiteSparsityDeleter {
70 void operator()(TfLiteSparsity* s) {
71 if (s) TfLiteSparsityFree(s);
72 }
73};
74
75using ScopedTfLiteSparsity =
76 std::unique_ptr<TfLiteSparsity, TfLiteSparsityDeleter>;
77
78TfLiteStatus ReportOpError(TfLiteContext* context, const TfLiteNode& node,
79 const TfLiteRegistration& registration,
80 int node_index, const char* message) {
81 TF_LITE_KERNEL_LOG(context, "Node number %d (%s) %s.", node_index,
82 registration.custom_name
83 ? registration.custom_name
84 : EnumNameBuiltinOperator(static_cast<BuiltinOperator>(
85 registration.builtin_code)),
86 message);
87 return kTfLiteError;
88}
89
90// Stub method which returns kTfLiteError when the function is forbidden.
91// We're registering this function to several different function to save
92// compiled binary size. Please note the restrictions:
93// * The type of first parameter have to be `TfLiteContext*`.
94// * All parameters must be trivially destructible. (E.g. No C++ class)
95TfLiteStatus ForbiddenContextFunction(TfLiteContext* context, ...) {
96 TF_LITE_KERNEL_LOG(context,
97 "The function is forbidden if not calling in delegate.");
98 return kTfLiteError;
99}
100
101// Set the ForbiddenContextFunction to a compatible function pointer.
102template <typename FunctionType>
103void SetForbiddenContextFunction(FunctionType* func) {
104 *func = reinterpret_cast<FunctionType>(ForbiddenContextFunction);
105}
106
107// Returns true if at least one tensor in the given list is kTfLiteDynamic.
108template <typename TensorIntArray>
109bool HasDynamicTensorImpl(const TfLiteContext& context,
110 const TensorIntArray& int_array,
111 int* dynamic_tensor_index) {
112 for (int i : int_array) {
113 if (i == kTfLiteOptionalTensor) continue;
114 const TfLiteTensor& tensor = context.tensors[i];
115 if (tensor.allocation_type == kTfLiteDynamic) {
116 if (dynamic_tensor_index) {
117 *dynamic_tensor_index = i;
118 }
119 return true;
120 }
121 }
122 return false;
123}
124
125bool HasDynamicTensor(const TfLiteContext& context,
126 const TfLiteIntArray* int_array,
127 int* dynamic_tensor_index) {
128 return HasDynamicTensorImpl(context, TfLiteIntArrayView{int_array},
129 dynamic_tensor_index);
130}
131
132// Gets the legacy TfLiteQuantizationParams from the current TfLiteQuantization.
133TfLiteQuantizationParams GetLegacyQuantization(
134 const TfLiteQuantization& quantization) {
135 TfLiteQuantizationParams legacy_quantization;
136 legacy_quantization.scale = 0;
137 legacy_quantization.zero_point = 0;
138
139 // If the quantization type isn't affine, return the empty
140 // legacy_quantization.
141 if (quantization.type != kTfLiteAffineQuantization) {
142 return legacy_quantization;
143 }
144
145 auto* affine_quantization =
146 static_cast<TfLiteAffineQuantization*>(quantization.params);
147 if (!affine_quantization || !affine_quantization->scale ||
148 !affine_quantization->zero_point ||
149 affine_quantization->scale->size != 1 ||
150 affine_quantization->zero_point->size != 1) {
151 return legacy_quantization;
152 }
153
154 // We know its per-layer quantization now.
155 legacy_quantization.scale = affine_quantization->scale->data[0];
156 legacy_quantization.zero_point = affine_quantization->zero_point->data[0];
157 return legacy_quantization;
158}
159
160static constexpr const char kUnknownCustomOpName[] = "UnknownCustomOp";
161const char* GetTFLiteOpName(const TfLiteRegistration& op_reg) {
162 if (op_reg.builtin_code == tflite::BuiltinOperator_CUSTOM) {
163 const char* const custom_name = op_reg.custom_name;
164 return custom_name ? custom_name : kUnknownCustomOpName;
165 }
166 if (op_reg.builtin_code == tflite::BuiltinOperator_DELEGATE &&
167 op_reg.custom_name) {
168 return op_reg.custom_name;
169 }
170 return tflite::EnumNamesBuiltinOperator()[op_reg.builtin_code];
171}
172
173// Verifies custom allocation for tensor, if applicable.
174TfLiteStatus VerifyCustomAllocationForTensor(
175 TfLiteContext* context,
176 const std::map<int, TfLiteCustomAllocation>& tensor_idx_to_alloc,
177 const int tensor_idx) {
178 auto& tensor = context->tensors[tensor_idx];
179 if (tensor.allocation_type != kTfLiteCustom) return kTfLiteOk;
180 const auto idx_and_alloc = tensor_idx_to_alloc.find(tensor_idx);
181 TF_LITE_ENSURE(context, idx_and_alloc != tensor_idx_to_alloc.end());
182 if (idx_and_alloc->second.bytes < tensor.bytes) {
183 TF_LITE_KERNEL_LOG(context,
184 "Custom allocation is too small for tensor idx: %d",
185 tensor_idx);
186 return kTfLiteError;
187 }
188 return kTfLiteOk;
189}
190
191} // namespace
192
193// A trivial implementation of GraphInfo around the Interpreter.
194// NOTE: this interpreter info represents the subset of the
195// graph that is executed according to execution plan. Thus,
196// the indices are execution plan indices rather than raw node
197// indices.
198class InterpreterInfo : public GraphInfo {
199 public:
200 explicit InterpreterInfo(Subgraph* subgraph) : subgraph_(subgraph) {}
201
202 size_t num_tensors() const override { return subgraph_->tensors_size(); }
203 TfLiteTensor* tensors() override { return subgraph_->tensors(); }
204 TfLiteTensor* tensor(size_t index) override {
205 return subgraph_->tensor(index);
206 }
207 size_t num_execution_nodes() const override {
208 return subgraph_->execution_plan().size();
209 }
210 size_t num_total_nodes() const override { return subgraph_->nodes_size(); }
211 const TfLiteNode& node(size_t index) const override {
212 int node_index = subgraph_->execution_plan()[index];
213 return subgraph_->nodes_and_registration()[node_index].first;
214 }
215 size_t node_index(size_t index) const override {
216 return subgraph_->execution_plan()[index];
217 }
218 const std::vector<int>& inputs() const override {
219 return subgraph_->inputs();
220 }
221 const std::vector<int>& outputs() const override {
222 return subgraph_->outputs();
223 }
224 const std::vector<int>& variables() const override {
225 return subgraph_->variables();
226 }
227
228 public:
229 Subgraph* subgraph_;
230};
231
232Subgraph::Subgraph(ErrorReporter* error_reporter,
233 TfLiteExternalContext** external_contexts,
234 std::vector<std::unique_ptr<Subgraph>>* subgraphs,
235 resource::ResourceMap* resources,
236 resource::ResourceIDMap* resource_ids,
237 resource::InitializationStatusMap* initialization_status_map,
238 int subgraph_index)
239 : external_contexts_(external_contexts),
240 error_reporter_(error_reporter),
241 next_execution_plan_index_to_prepare_(0),
242 next_execution_plan_index_to_plan_allocation_(0),
243 subgraphs_(subgraphs),
244 subgraph_index_(subgraph_index),
245 resources_(resources),
246 resource_ids_(resource_ids),
247 initialization_status_map_(initialization_status_map),
248 options_(nullptr) {
249 context_.impl_ = static_cast<void*>(this);
250 context_.ResizeTensor = ResizeTensor;
251 context_.ReportError = ReportErrorC;
252 context_.AddTensors = AddTensors;
253 context_.tensors = nullptr;
254 context_.tensors_size = 0;
255 context_.allow_fp32_relax_to_fp16 = false;
256 context_.recommended_num_threads = -1;
257 context_.GetExternalContext = GetExternalContext;
258 context_.SetExternalContext = SetExternalContext;
259 context_.profiler = nullptr;
260 context_.GetTensor = nullptr;
261 context_.GetEvalTensor = nullptr;
262 context_.GetModelMetadata = GetModelMetadata;
263
264 // Reserve some space for the tensors to avoid excessive resizing.
265 tensors_.reserve(kTensorsReservedCapacity);
266 nodes_and_registration_.reserve(kTensorsReservedCapacity);
267 // Invalid to call these except from TfLiteDelegate
268 SwitchToKernelContext();
269}
270
271Subgraph::~Subgraph() {
272 for (int node_index = 0; node_index < nodes_and_registration_.size();
273 ++node_index) {
274 CleanupNode(node_index);
275 }
276
277 for (size_t i = 0; i < context_.tensors_size; i++) {
278 TfLiteTensor* tensor = &context_.tensors[i];
279 if (tensor->buffer_handle != kTfLiteNullBufferHandle) {
280 TfLiteDelegateFreeBufferHandleInternal(&context_, tensor->delegate,
281 &tensor->buffer_handle);
282 }
283
284 TfLiteTensorFree(tensor);
285 }
286}
287
288void Subgraph::CleanupNode(int node_index) {
289 TfLiteNode& node = nodes_and_registration_[node_index].first;
290 const TfLiteRegistration& registration =
291 nodes_and_registration_[node_index].second;
292 TfLiteIntArrayFree(node.inputs);
293 TfLiteIntArrayFree(node.outputs);
294 TfLiteIntArrayFree(node.temporaries);
295 TfLiteIntArrayFree(node.intermediates);
296 if (node.builtin_data) free(node.builtin_data);
297 OpFree(registration, node.user_data);
298 node.builtin_data = nullptr;
299}
300
301TfLiteStatus Subgraph::ReplaceNodeSubsetsWithDelegateKernels(
302 TfLiteContext* context, TfLiteRegistration registration,
303 const TfLiteIntArray* nodes_to_replace, TfLiteDelegate* delegate) {
304 return static_cast<Subgraph*>(context->impl_)
305 ->ReplaceNodeSubsetsWithDelegateKernels(registration, nodes_to_replace,
306 delegate);
307}
308
309namespace {
310
311// Copy a std::vector<int> to an existing TfLiteIntArray.
312// This is a low-level data manipulation function, and it's caller's
313// responsibility to ensure TfLiteIntArray has enough size.
314void CopyVectorToTfLiteIntArray(const std::vector<int>& vec,
315 TfLiteIntArray* arr) {
316 arr->size = vec.size();
317 memcpy(arr->data, vec.data(), sizeof(int) * arr->size);
318}
319
320// This function allocates a continuous memory space that contains a
321// TfLiteDelegateParams followed by a several TfLiteIntArray.
322// When calling `free` at TfLiteDelegateParams*, all the allocated space
323// will be freed together.
324//
325// +-----------------------------------+
326// | TfLiteDelegateParams |
327// | TfLiteDelegate* delegate; |
328// | TfLiteIntArray* nodes_to_replace; |--\
329// | TfLiteIntArray* input_tensors; |--+--\
330// | TfLiteIntArray* output_tensors; |--+--+--\
331// +-----------------------------------+ | | |
332// | TfLiteIntArray (variable size) |<-/ | |
333// +-----------------------------------+ | |
334// | TfLiteIntArray (variable size) |<----/ |
335// +-----------------------------------+ |
336// | TfLiteIntArray (variable size) |<-------/
337// +-----------------------------------+
338TfLiteDelegateParams* CreateDelegateParams(TfLiteDelegate* delegate,
339 const NodeSubset& node_subset) {
340 // Step 1: Calculate the allocation size.
341 int allocation_size = sizeof(TfLiteDelegateParams);
342
343 int nodes_to_replace_size =
344 TfLiteIntArrayGetSizeInBytes(node_subset.nodes.size());
345 allocation_size += nodes_to_replace_size;
346
347 int input_tensors_size =
348 TfLiteIntArrayGetSizeInBytes(node_subset.input_tensors.size());
349 allocation_size += input_tensors_size;
350
351 int output_tensors_size =
352 TfLiteIntArrayGetSizeInBytes(node_subset.output_tensors.size());
353 allocation_size += output_tensors_size;
354
355 // Step 2: Allocate the memory.
356 // Use `char*` for conveniently step through the allocated space by bytes.
357 char* allocation = static_cast<char*>(malloc(allocation_size));
358
359 // Step 3: Fill all data structures.
360 TfLiteDelegateParams* params =
361 reinterpret_cast<TfLiteDelegateParams*>(allocation);
362 params->delegate = delegate;
363 allocation += sizeof(TfLiteDelegateParams);
364
365 params->nodes_to_replace = reinterpret_cast<TfLiteIntArray*>(allocation);
366 CopyVectorToTfLiteIntArray(node_subset.nodes, params->nodes_to_replace);
367 allocation += nodes_to_replace_size;
368
369 params->input_tensors = reinterpret_cast<TfLiteIntArray*>(allocation);
370 CopyVectorToTfLiteIntArray(node_subset.input_tensors, params->input_tensors);
371 allocation += input_tensors_size;
372
373 params->output_tensors = reinterpret_cast<TfLiteIntArray*>(allocation);
374 CopyVectorToTfLiteIntArray(node_subset.output_tensors,
375 params->output_tensors);
376 allocation += output_tensors_size;
377
378 return params;
379}
380
381// Assumes that params is not nullptr.
382void PopulatePreviewDelegateParams(const NodeSubset& node_subset,
383 TfLiteDelegateParams* params) {
384 // Since these params are used for previewing partitioning, params->delegate
385 // is not required.
386 params->delegate = nullptr;
387
388 params->nodes_to_replace = TfLiteIntArrayCreate(node_subset.nodes.size());
389 CopyVectorToTfLiteIntArray(node_subset.nodes, params->nodes_to_replace);
390
391 params->input_tensors =
392 TfLiteIntArrayCreate(node_subset.input_tensors.size());
393 CopyVectorToTfLiteIntArray(node_subset.input_tensors, params->input_tensors);
394
395 params->output_tensors =
396 TfLiteIntArrayCreate(node_subset.output_tensors.size());
397 CopyVectorToTfLiteIntArray(node_subset.output_tensors,
398 params->output_tensors);
399}
400
401} // namespace
402
403TfLiteStatus Subgraph::ReplaceNodeSubsetsWithDelegateKernels(
404 TfLiteRegistration registration, const TfLiteIntArray* nodes_to_replace,
405 TfLiteDelegate* delegate) {
406 // Ignore empty node replacement sets.
407 if (!nodes_to_replace->size) {
408 return kTfLiteOk;
409 }
410
411 // Annotate the registration as DELEGATE op.
412 registration.builtin_code = BuiltinOperator_DELEGATE;
413
414 // Analyze the graph to find all independent node_subsets that are either
415 // fully not-this-delegate or this-delegate computation.
416 InterpreterInfo info(this);
417 std::vector<NodeSubset> node_subsets;
418 PartitionGraphIntoIndependentNodeSubsets(&info, nodes_to_replace,
419 &node_subsets);
420
421 // On Android the log message below is used for diagnosing delegation success
422 // also in production builds. Use VERBOSE here so that the logging is turned
423 // off in production builds on other platforms.
424 TFLITE_LOG_PROD(
425 tflite::TFLITE_LOG_VERBOSE,
426 "Replacing %d node(s) with delegate (%s) node, yielding %zu partitions.",
427 nodes_to_replace->size,
428 registration.custom_name ? registration.custom_name : "unknown",
429 node_subsets.size());
430
431 execution_plan_.clear();
432
433 for (auto& node_subset : node_subsets) {
434 // Subsets claimed by the delegate should have a "macro" op created, the
435 // other node_subsets (kTfNonPartition) just have their nodes added back to
436 // the execution plan.
437 switch (node_subset.type) {
438 case NodeSubset::kTfNonPartition:
439 for (auto it = node_subset.nodes.begin(); it != node_subset.nodes.end();
440 ++it) {
441 execution_plan_.push_back(*it);
442 }
443 break;
444 case NodeSubset::kTfPartition: {
445 int node_index;
446
447 TfLiteDelegateParams* params =
448 CreateDelegateParams(delegate, node_subset);
449 TF_LITE_ENSURE_STATUS(AddNodeWithParameters(
450 node_subset.input_tensors, node_subset.output_tensors, {}, nullptr,
451 0, params, &registration, &node_index));
452
453 // Initialize the output tensors's delegate-related fields.
454 for (int tensor_index : node_subset.output_tensors) {
455 TfLiteTensor* tensor = &tensors_[tensor_index];
456 TF_LITE_ENSURE(&context_, tensor->delegate == nullptr ||
457 tensor->delegate == delegate);
458 tensor->delegate = delegate;
459 }
460
461 // Associate the node with the delegate.
462 TfLiteNode* node = &nodes_and_registration_[node_index].first;
463 node->delegate = delegate;
464 } break;
465 case NodeSubset::kTfUnexplored:
466 return kTfLiteError;
467 break;
468 }
469 }
470 return kTfLiteOk;
471}
472
473TfLiteExternalContext* Subgraph::GetExternalContext(
474 TfLiteExternalContextType type) {
475 if (static_cast<int>(type) >= 0 && type < kTfLiteMaxExternalContexts) {
476 return external_contexts_[type];
477 }
478 return nullptr;
479}
480
481TfLiteExternalContext* Subgraph::GetExternalContext(
482 struct TfLiteContext* context, TfLiteExternalContextType type) {
483 return static_cast<Subgraph*>(context->impl_)->GetExternalContext(type);
484}
485
486void Subgraph::SetExternalContext(TfLiteExternalContextType type,
487 TfLiteExternalContext* ctx) {
488 if (static_cast<int>(type) >= 0 && type < kTfLiteMaxExternalContexts) {
489 external_contexts_[type] = ctx;
490 }
491}
492
493void Subgraph::SetExternalContext(struct TfLiteContext* context,
494 TfLiteExternalContextType type,
495 TfLiteExternalContext* ctx) {
496 return static_cast<Subgraph*>(context->impl_)->SetExternalContext(type, ctx);
497}
498
499// Gets an TfLiteIntArray* representing the execution plan. The interpreter owns
500// this memory and it is only guaranteed to exist during the invocation of the
501// delegate prepare.
502TfLiteStatus Subgraph::GetExecutionPlan(TfLiteIntArray** execution_plan) {
503 plan_cache_.reset(TfLiteIntArrayCreate(execution_plan_.size()));
504 *execution_plan = plan_cache_.get();
505 static_assert(sizeof(plan_cache_->data[0]) == sizeof(execution_plan_[0]),
506 "TfLiteIntArray and execution_plan do not contain same type.");
507 std::memcpy(plan_cache_->data, execution_plan_.data(),
508 sizeof(plan_cache_->data[0]) * execution_plan_.size());
509 return kTfLiteOk;
510}
511
512// WARNING: This is an experimental interface that is subject to change.
513// Entry point for C node plugin API to get the execution plan
514TfLiteStatus Subgraph::GetExecutionPlan(struct TfLiteContext* context,
515 TfLiteIntArray** execution_plan) {
516 return static_cast<Subgraph*>(context->impl_)
517 ->GetExecutionPlan(execution_plan);
518}
519
520void Subgraph::FreeDelegatePartitioningData() {
521 for (auto& params : partitioning_preview_cache_) {
522 TfLiteIntArrayFree(params.nodes_to_replace);
523 TfLiteIntArrayFree(params.input_tensors);
524 TfLiteIntArrayFree(params.output_tensors);
525 }
526 partitioning_preview_cache_.clear();
527}
528
529TfLiteStatus Subgraph::GetModelMetadata(const char* name, const char** ptr,
530 size_t* bytes) {
531 TF_LITE_ENSURE(&context_, ptr != nullptr);
532 TF_LITE_ENSURE(&context_, bytes != nullptr);
533 *ptr = nullptr;
534 *bytes = 0;
535 if (!metadata_) return kTfLiteError;
536 const std::string name_str = name;
537 auto itr = metadata_->find(name_str);
538 if (itr != metadata_->end()) {
539 *ptr = itr->second.c_str();
540 *bytes = itr->second.size();
541 return kTfLiteOk;
542 }
543 return kTfLiteError;
544}
545
546TfLiteStatus Subgraph::GetModelMetadata(const struct TfLiteContext* context,
547 const char* name, const char** ptr,
548 size_t* bytes) {
549 return static_cast<Subgraph*>(context->impl_)
550 ->GetModelMetadata(name, ptr, bytes);
551}
552
553TfLiteStatus Subgraph::PreviewDelegatePartitioning(
554 const TfLiteIntArray* nodes_to_replace,
555 TfLiteDelegateParams** partition_params_array, int* num_partitions) {
556 // Ensure partitioning cache is empty.
557 FreeDelegatePartitioningData();
558 // Defaults.
559 if (!partition_params_array || !num_partitions) return kTfLiteError;
560 *partition_params_array = nullptr;
561 *num_partitions = 0;
562 if (!nodes_to_replace->size) {
563 return kTfLiteOk;
564 }
565
566 // Partition the execution plan into node subsets.
567 InterpreterInfo info(this);
568 std::vector<NodeSubset> node_subsets;
569 PartitionGraphIntoIndependentNodeSubsets(&info, nodes_to_replace,
570 &node_subsets);
571
572 // Create one TfLiteDelegateParams per node-subset which would be delegated.
573 for (auto& node_subset : node_subsets) {
574 if (node_subset.type != NodeSubset::kTfPartition) {
575 continue;
576 }
577 partitioning_preview_cache_.emplace_back();
578 PopulatePreviewDelegateParams(node_subset,
579 &partitioning_preview_cache_.back());
580 ++*num_partitions;
581 }
582
583 *partition_params_array = partitioning_preview_cache_.data();
584 return kTfLiteOk;
585}
586
587TfLiteStatus Subgraph::PreviewDelegatePartitioning(
588 struct TfLiteContext* context, const TfLiteIntArray* nodes_to_replace,
589 TfLiteDelegateParams** partition_params_array, int* num_partitions) {
590 return static_cast<Subgraph*>(context->impl_)
591 ->PreviewDelegatePartitioning(nodes_to_replace, partition_params_array,
592 num_partitions);
593}
594
595TfLiteStatus Subgraph::SetInputs(std::vector<int> inputs) {
596 TF_LITE_ENSURE_OK(&context_,
597 CheckTensorIndices("inputs", inputs.data(), inputs.size()));
598 inputs_ = std::move(inputs);
599 return kTfLiteOk;
600}
601
602TfLiteStatus Subgraph::SetOutputs(std::vector<int> outputs) {
603 TF_LITE_ENSURE_OK(
604 &context_, CheckTensorIndices("outputs", outputs.data(), outputs.size()));
605 outputs_ = std::move(outputs);
606 return kTfLiteOk;
607}
608
609TfLiteStatus Subgraph::SetVariables(std::vector<int> variables) {
610 TF_LITE_ENSURE_OK(&context_, CheckTensorIndices("variables", variables.data(),
611 variables.size()));
612 variables_ = std::move(variables);
613 return kTfLiteOk;
614}
615
616TfLiteStatus Subgraph::SetMetadata(
617 const std::map<std::string, std::string>* metadata) {
618 metadata_ = metadata;
619 return kTfLiteOk;
620}
621
622void Subgraph::SetCancellationFunction(void* data,
623 bool (*check_cancelled_func)(void*)) {
624 cancellation_data_ = data;
625 check_cancelled_func_ = check_cancelled_func;
626}
627
628TfLiteStatus Subgraph::EnsureTensorDataIsReadable(int tensor_index) {
629 TfLiteTensor* t = &tensors_[tensor_index];
630 TF_LITE_ENSURE(&context_, t != nullptr);
631 TfLiteStatus status = kTfLiteOk;
632 if (t->data_is_stale) {
633 TF_LITE_ENSURE(&context_, t->delegate != nullptr);
634 TF_LITE_ENSURE(&context_, t->buffer_handle != kTfLiteNullBufferHandle);
635 status = TfLiteDelegateCopyFromBufferHandleInternal(&context_, t->delegate,
636 t->buffer_handle, t);
637 t->data_is_stale = false;
638 }
639 return status;
640}
641
642bool Subgraph::IsCancelled() {
643 return (check_cancelled_func_ != nullptr) &&
644 (*check_cancelled_func_)(cancellation_data_);
645}
646
647void Subgraph::ReserveNodes(int count) {
648 nodes_and_registration_.reserve(count);
649}
650
651TfLiteStatus Subgraph::CheckTensorIndices(const char* label, const int* indices,
652 int length) {
653 // Making sure kTfLiteOptionalTensor is not re-defined to something other than
654 // -1.
655 static_assert(kTfLiteOptionalTensor == -1,
656 "kTfLiteOptionalTensor should be defined -1");
657
658 for (int i = 0; i < length; i++) {
659 int index = indices[i];
660 // Continue if index == kTfLiteOptionalTensor before additional comparisons
661 // below, size_t(-1) is always >= context_tensors_size.
662 if (index == kTfLiteOptionalTensor) {
663 continue;
664 }
665 if (index < 0 || static_cast<size_t>(index) >= context_.tensors_size) {
666 ReportError(
667 "Invalid tensor index %d in %s. The subgraph has %d tensors\n", index,
668 label, context_.tensors_size);
669 consistent_ = false;
670 return kTfLiteError;
671 }
672 }
673 return kTfLiteOk;
674}
675
676// We have two arrays and we need to check that elements from one array don't
677// show up in the other. We could sort both arrays and then iterate with two
678// pointers from start to finish always increasing the smaller one but since
679// these arrays are usually short (<25 elements for inputs, usually <3 for
680// outputs), this might be slower than the naive approach (if arrays have size n
681// and m, with n >> m ~ O(1), first approach is O(nlogn) whereas the other is
682// O(n)). Plus, sorting the input and output arrays might not be something we
683// want as it destroys ordering of elements.
684//
685// If it turns out that this is an issue, we can switch to the other algorithm.
686TfLiteStatus Subgraph::CheckInputAndOutputForOverlap(const int* input_indices,
687 int num_inputs,
688 const int* output_indices,
689 int num_outputs) {
690 for (int i = 0; i < num_inputs; i++) {
691 for (int j = 0; j < num_outputs; j++) {
692 if (input_indices[i] == output_indices[j]) {
693 ReportError("Tensor %d is both input %d and output %d\n",
694 input_indices[i], i, j);
695 consistent_ = false;
696 return kTfLiteError;
697 }
698 }
699 }
700 return kTfLiteOk;
701}
702
703TfLiteStatus Subgraph::BytesRequired(TfLiteType type, const int* dims,
704 size_t dims_size, size_t* bytes) {
705 TF_LITE_ENSURE(&context_, bytes != nullptr);
706 // When 'dims_size' is 0, we simply assume it's a scalar. Therefore, we start
707 // 'count' as 1.
708 size_t count = 1;
709 for (int k = 0; k < dims_size; k++) {
710 size_t old_count = count;
711 TF_LITE_ENSURE_MSG(
712 &context_,
713 MultiplyAndCheckOverflow(old_count, dims[k], &count) == kTfLiteOk,
714 "BytesRequired number of elements overflowed.\n");
715 }
716 size_t type_size = 0;
717 TF_LITE_ENSURE_OK(&context_, GetSizeOfType(&context_, type, &type_size));
718 TF_LITE_ENSURE_MSG(
719 &context_, MultiplyAndCheckOverflow(type_size, count, bytes) == kTfLiteOk,
720 "BytesRequired number of bytes overflowed.\n");
721 return kTfLiteOk;
722}
723
724TfLiteStatus Subgraph::AllocateTensors() {
725 if (!consistent_) {
726 ReportError("AllocateTensors() called on inconsistent model.");
727 return kTfLiteError;
728 }
729
730 // Restore delegation state if applicable.
731 TF_LITE_ENSURE_STATUS(RedoAllDelegates());
732
733 // The runtime doesn't need to adjust any allocations if the state is
734 // invokable & no inputs are dynamic (which implies memory plan is unchanged).
735 const bool no_reallocations_necessary =
736 state_ != kStateUninvokable &&
737 !HasDynamicTensorImpl(context_, inputs(), &dynamic_tensor_index_);
738 if (no_reallocations_necessary) {
739 // If non-persistent memory was released, re-allocate it.
740 if (memory_planner_ && !memory_planner_->HasNonPersistentMemory()) {
741 memory_planner_->AcquireNonPersistentMemory();
742 }
743 // Check custom allocations, which may have been modified since last
744 // AllocateTensors() call.
745 if (!custom_allocations_.empty()) {
746 for (const auto& idx_and_alloc : custom_allocations_) {
747 const int idx = idx_and_alloc.first;
748 TfLiteTensor* tensor_at_index = tensor(idx);
749 TF_LITE_ENSURE_EQ(context(), tensor_at_index->allocation_type,
750 kTfLiteCustom);
751 TF_LITE_ENSURE_STATUS(VerifyCustomAllocationForTensor(
752 context(), custom_allocations_, idx));
753 }
754 }
755 return kTfLiteOk;
756 }
757
758 // Profile "AllocateTensors" only when memory planning is needed.
759 TFLITE_SCOPED_TAGGED_DEFAULT_PROFILE(profiler_.get(), "AllocateTensors");
760
761 next_execution_plan_index_to_prepare_ = 0;
762 next_execution_plan_index_to_plan_allocation_ = 0;
763 next_original_execution_plan_index_to_prepare_ = 0;
764 if (memory_planner_) {
765 TF_LITE_ENSURE_STATUS(memory_planner_->ResetAllocations());
766 }
767
768 TF_LITE_ENSURE_STATUS(PrepareOpsAndTensors());
769
770 state_ = kStateInvokable;
771
772 // Reset the variable tensors to zero after (re)allocating the tensors.
773 // Developers shouldn't rely on the side effect of this function to reset
774 // variable tensors. They should call `ResetVariableTensors` directly
775 // instead.
776 ResetVariableTensors();
777
778 // Initialize the mapping between tensor index and the last execution plan
779 // index that uses the tensor.
780 InitializeTensorReleaseMap();
781
782 return kTfLiteOk;
783}
784
785// TODO(b/115961645): Support non-zero default values.
786TfLiteStatus Subgraph::ResetVariableTensors() {
787 for (auto& tensor : tensors_) {
788 if (!tensor.is_variable) {
789 continue;
790 }
791
792 if (tensor.allocation_type == kTfLiteArenaRwPersistent) {
793 // If variable tensors allocation type is `kTfLiteArenaRwPersistent`, then
794 // they must be allocated after the initial `PrepareOpsAndTensors()` is
795 // called.
796 TF_LITE_ENSURE(&context_, tensor.data.raw != nullptr);
797 tflite::ResetVariableTensor(&tensor);
798 } else {
799 // If variable tensors allocation type is not `kTfLiteArenaRwPersistent`,
800 // then it can only be `kTfLiteCustom` in which case, we do not reset it.
801 TF_LITE_ENSURE_EQ(&context_, tensor.allocation_type, kTfLiteCustom);
802 }
803 }
804 return kTfLiteOk;
805}
806
807TfLiteStatus Subgraph::AddNodeWithParameters(
808 const std::vector<int>& inputs, const std::vector<int>& outputs,
809 const std::vector<int>& intermediates, const char* init_data,
810 size_t init_data_size, void* builtin_data,
811 const TfLiteRegistration* registration, int* node_index) {
812 std::unique_ptr<void, decltype(free)*> builtin_data_deleter(builtin_data,
813 free);
814 if (state_ == kStateInvokableAndImmutable) {
815 ReportError("AddNodeWithParameters is disallowed when graph is immutable.");
816 return kTfLiteError;
817 }
818 state_ = kStateUninvokable;
819
820 TF_LITE_ENSURE_OK(&context_, CheckTensorIndices("node inputs", inputs.data(),
821 inputs.size()));
822 TF_LITE_ENSURE_OK(
823 &context_,
824 CheckTensorIndices("node outputs", outputs.data(), outputs.size()));
825
826 // For builtin ops, inputs and outputs must not overlap. Custom ops must do
827 // this check by themselves if they don't support overlapping tensors. This
828 // distinction is to allow custom ops to just forward a tensor, reusing it as
829 // both input and output.
830 if (builtin_data != nullptr) {
831 TF_LITE_ENSURE_OK(&context_, CheckInputAndOutputForOverlap(
832 inputs.data(), inputs.size(),
833 outputs.data(), outputs.size()));
834 }
835
836 int new_node_index = nodes_and_registration_.size();
837 if (node_index) *node_index = new_node_index;
838 nodes_and_registration_.emplace_back();
839 auto& node_and_reg = nodes_and_registration_.back();
840 TfLiteNode& node = node_and_reg.first;
841
842 // NOTE, here we are not using move semantics yet, since our internal
843 // representation isn't std::vector, but in the future we would like to avoid
844 // copies, so we want the interface to take r-value references now.
845 node.inputs = ConvertVectorToTfLiteIntArray(inputs);
846 node.outputs = ConvertVectorToTfLiteIntArray(outputs);
847 node.intermediates = ConvertVectorToTfLiteIntArray(intermediates);
848 node.temporaries = TfLiteIntArrayCreate(0);
849 if (init_data) {
850 node.user_data = OpInit(*registration, init_data, init_data_size);
851 } else {
852 node.user_data = OpInit(
853 *registration, static_cast<const char*>(builtin_data_deleter.get()), 0);
854 }
855
856 node.builtin_data = builtin_data_deleter.release();
857
858 if (registration->builtin_code == BuiltinOperator_CUSTOM) {
859 // When it's a CUSTOM op, the `custom_options` field in the Flatbuffer
860 // `Operator` table is passed in.
861 node.custom_initial_data = init_data;
862 node.custom_initial_data_size = init_data_size;
863 } else {
864 node.custom_initial_data = nullptr;
865 node.custom_initial_data_size = 0;
866 }
867 node.might_have_side_effect = OpMightHaveSideEffect(&node, registration);
868
869 node.delegate = nullptr;
870 // Copying of registration is required to support unresolved custom ops.
871 node_and_reg.second = *registration;
872 execution_plan_.push_back(new_node_index);
873 return kTfLiteOk;
874}
875
876namespace {
877// Returns true if any tensor identified by indexes in 'tensor_indexes' is
878// of type 'kTfLiteResource'. False otherwise.
879bool AnyTensorOfTypeResource(const std::vector<TfLiteTensor>& tensors,
880 const TfLiteIntArray* tensor_indexes) {
881 for (int i = 0; i < tensor_indexes->size; ++i) {
882 int tensor_index = tensor_indexes->data[i];
883 if (tensor_index >= 0 && tensor_index < tensors.size() &&
884 tensors[tensor_index].type == kTfLiteResource)
885 return true;
886 }
887 return false;
888}
889
890} // namespace
891
892bool Subgraph::OpMightHaveSideEffect(
893 const TfLiteNode* node, const TfLiteRegistration* registration) const {
894 // Check if any of the input tensors are of type resource.
895 if (AnyTensorOfTypeResource(tensors_, node->inputs)) return true;
896 // Check if any of the output tensors are of type resource.
897 if (AnyTensorOfTypeResource(tensors_, node->outputs)) return true;
898 // Consider control flow ops has side effect, some ops in the control flow
899 // subgraph can have side effect.
900 if (registration->builtin_code == kTfLiteBuiltinIf ||
901 registration->builtin_code == kTfLiteBuiltinWhile ||
902 registration->builtin_code == kTfLiteBuiltinCallOnce)
903 return true;
904 return false;
905}
906
907TfLiteStatus Subgraph::ResizeInputTensor(int tensor_index,
908 const std::vector<int>& dims) {
909 const bool delegates_applied = !pre_delegation_execution_plan_.empty();
910 const bool graph_is_immutable = state_ == kStateInvokableAndImmutable;
911 if (graph_is_immutable && !delegates_applied) {
912 ReportError("ResizeInputTensor is disallowed when graph is immutable.");
913 return kTfLiteError;
914 }
915
916 TF_LITE_ENSURE(&context_,
917 tensor_index < context_.tensors_size && tensor_index >= 0);
918 TfLiteTensor* tensor = &context_.tensors[tensor_index];
919
920 // Short-circuit the state change if the dimensions don't change, avoiding
921 // unnecessary (re)allocations.
922 //
923 // Note that it's required to check `tensor->data.raw != nullptr`. Otherwise
924 // the subgraph won't allocate memory for a dynamic tensor when its size
925 // is equal to the original tensor size.
926 if (tensor->data.raw != nullptr &&
927 EqualArrayAndTfLiteIntArray(tensor->dims, dims.size(), dims.data())) {
928 return kTfLiteOk;
929 }
930
931 if (graph_is_immutable) {
932 // Undo delegation if it resulted in the graph being immutable.
933 TF_LITE_ENSURE_STATUS(UndoAllDelegates());
934 }
935 state_ = kStateUninvokable;
936 return ResizeTensorImpl(tensor, ConvertVectorToTfLiteIntArray(dims));
937}
938
939TfLiteStatus Subgraph::ResizeInputTensorStrict(int tensor_index,
940 const std::vector<int>& dims) {
941 TF_LITE_ENSURE(&context_,
942 tensor_index < context_.tensors_size && tensor_index >= 0);
943 TfLiteTensor* tensor = &context_.tensors[tensor_index];
944
945 // Ensure that only unknown dimensions can be resized.
946 TF_LITE_ENSURE_EQ(&context_, tensor->dims->size, dims.size());
947 for (size_t idx = 0; idx < dims.size(); idx++) {
948 // `dims_signature` is not defined when no unknown dimensions are present.
949 int dim_signature;
950 if (tensor->dims_signature && tensor->dims_signature->size) {
951 dim_signature = tensor->dims_signature->data[idx];
952 } else {
953 dim_signature = tensor->dims->data[idx];
954 }
955
956 if (dim_signature != -1 && dim_signature != dims[idx]) {
957 ReportError(
958 "Attempting to resize dimension %d of tensor %d with value %d to %d. "
959 "ResizeInputTensorStrict only allows mutating unknown dimensions "
960 "identified by -1.",
961 idx, tensor_index, dim_signature, dims[idx]);
962 return kTfLiteError;
963 }
964 }
965
966 return ResizeInputTensor(tensor_index, dims);
967}
968
969TfLiteStatus Subgraph::ReleaseNonPersistentMemory() {
970 state_ = kStateUninvokable;
971 if (memory_planner_) {
972 TF_LITE_ENSURE_STATUS(memory_planner_->ReleaseNonPersistentMemory());
973 }
974 return kTfLiteOk;
975}
976
977TfLiteStatus Subgraph::ReleaseMemory() {
978 state_ = kStateUninvokable;
979 ReleaseNonPersistentMemory();
980
981 // Free dynamic input tensors.
982 for (const int input_tensor_idx : inputs_) {
983 if (input_tensor_idx == kTfLiteOptionalTensor) continue;
984 TfLiteTensor* input_tensor = tensor(input_tensor_idx);
985 if (!input_tensor || input_tensor->allocation_type != kTfLiteDynamic)
986 continue;
987 if (input_tensor->data.raw) {
988 TfLiteTensorDataFree(input_tensor);
989 }
990 }
991 // Free dynamic output tensors.
992 for (const int output_tensor_idx : outputs_) {
993 if (output_tensor_idx == kTfLiteOptionalTensor) continue;
994 TfLiteTensor* output_tensor = tensor(output_tensor_idx);
995 if (!output_tensor || output_tensor->allocation_type != kTfLiteDynamic)
996 continue;
997 if (output_tensor->data.raw) {
998 TfLiteTensorDataFree(output_tensor);
999 }
1000 }
1001
1002 return kTfLiteOk;
1003}
1004
1005// Give 'op_reg' a chance to initialize itself using the contents of
1006// 'buffer'. If registration_external is valid, use the 'init' callback from
1007// that.
1008void* Subgraph::OpInit(const TfLiteRegistration& op_reg, const char* buffer,
1009 size_t length) {
1010 if (op_reg.registration_external && op_reg.registration_external->init) {
1011 return op_reg.registration_external->init(
1012 op_reg.registration_external->init_data,
1013 reinterpret_cast<TfLiteOpaqueContext*>(&context_), buffer, length);
1014 }
1015 if (op_reg.init == nullptr) return nullptr;
1016 return op_reg.init(&context_, buffer, length);
1017}
1018
1019TfLiteStatus Subgraph::OpPrepare(const TfLiteRegistration& op_reg,
1020 TfLiteNode* node) {
1021 if (op_reg.registration_external && op_reg.registration_external->prepare) {
1022 // The 'data' field required by the 'prepare' function pointer must be
1023 // retrieved from the 'registration_external' object itself.
1024 return op_reg.registration_external->prepare(
1025 op_reg.registration_external->prepare_data,
1026 reinterpret_cast<TfLiteOpaqueContext*>(&context_),
1027 reinterpret_cast<TfLiteOpaqueNode*>(node));
1028 }
1029 if (op_reg.prepare == nullptr) {
1030 // Check if it's an unresolved custom op.
1031 if (IsUnresolvedCustomOp(op_reg)) {
1032 if (IsFlexOp(op_reg.custom_name)) {
1033 ReportError(
1034 "Select TensorFlow op(s), included in the given model, is(are) not "
1035 "supported by this interpreter. Make sure you apply/link the Flex "
1036 "delegate before inference. For the Android, it can be resolved by "
1037 "adding \"org.tensorflow:tensorflow-lite-select-tf-ops\" "
1038 "dependency. See instructions: "
1039 "https://www.tensorflow.org/lite/guide/ops_select");
1040 } else {
1041 ReportError(
1042 "Encountered unresolved custom op: %s.\nSee instructions: "
1043 "https://www.tensorflow.org/lite/guide/ops_custom ",
1044 op_reg.custom_name ? op_reg.custom_name : "UnknownOp");
1045 }
1046 return kTfLiteUnresolvedOps;
1047 }
1048 // Resolved ops can have a null Prepare function.
1049 return kTfLiteOk;
1050 }
1051 return op_reg.prepare(&context_, node);
1052}
1053
1054// Invoke the operator represented by 'node'.
1055TfLiteStatus Subgraph::OpInvoke(const TfLiteRegistration& op_reg,
1056 TfLiteNode* node) {
1057 if (op_reg.registration_external && op_reg.registration_external->invoke) {
1058 return op_reg.registration_external->invoke(
1059 op_reg.registration_external->invoke_data,
1060 reinterpret_cast<TfLiteOpaqueContext*>(&context_),
1061 reinterpret_cast<TfLiteOpaqueNode*>(node));
1062 }
1063 if (op_reg.invoke == nullptr) return kTfLiteError;
1064 return op_reg.invoke(&context_, node);
1065}
1066
1067// Let 'op_reg' release any memory it might have allocated via 'OpInit'.
1068// If registration_external is valid, use the 'free' callback from that.
1069void Subgraph::OpFree(const TfLiteRegistration& op_reg, void* buffer) {
1070 if (op_reg.registration_external && op_reg.registration_external->free &&
1071 buffer) {
1072 return op_reg.registration_external->free(
1073 op_reg.registration_external->free_data,
1074 reinterpret_cast<TfLiteOpaqueContext*>(&context_), buffer);
1075 }
1076 if (op_reg.free == nullptr) return;
1077 if (buffer) {
1078 op_reg.free(&context_, buffer);
1079 }
1080}
1081
1082TfLiteStatus Subgraph::MayAllocateOpOutput(TfLiteNode* node) {
1083 if (ShouldOptimizeMemoryForLargeTensors()) {
1084 for (int i = 0; i < node->outputs->size; ++i) {
1085 int tensor_index = node->outputs->data[i];
1086 TfLiteTensor* tensor = &context_.tensors[tensor_index];
1087 if (tensor->data.raw == nullptr &&
1088 tensor->allocation_type == kTfLiteDynamic) {
1089 TfLiteTensorRealloc(tensor->bytes, tensor);
1090 }
1091 }
1092 }
1093 return kTfLiteOk;
1094}
1095
1096TfLiteStatus Subgraph::PrepareOpsStartingAt(
1097 int first_execution_plan_index, const std::vector<int>& execution_plan,
1098 int* last_execution_plan_index_prepared) {
1099 if (first_execution_plan_index == 0) {
1100 // Forwarding inputs without modification won't be not evaluated in the
1101 // operators. So, it needs to look up the subgraph's output tensors at the
1102 // beginning.
1103 has_dynamic_tensors_ =
1104 HasDynamicTensorImpl(context_, outputs(), &dynamic_tensor_index_);
1105 }
1106 for (int execution_plan_index = first_execution_plan_index;
1107 execution_plan_index < execution_plan.size(); execution_plan_index++) {
1108 int node_index = execution_plan[execution_plan_index];
1109 TfLiteNode& node = nodes_and_registration_[node_index].first;
1110 const TfLiteRegistration& registration =
1111 nodes_and_registration_[node_index].second;
1112 EnsureTensorsVectorCapacity();
1113#ifdef TF_LITE_TENSORFLOW_PROFILER
1114 tflite::OnTfLiteOpPrepare(GetTFLiteOpName(registration), subgraph_index_,
1115 node_index);
1116#endif // TF_LITE_TENSORFLOW_PROFILER
1117 const TfLiteStatus op_prepare_status = OpPrepare(registration, &node);
1118 if (op_prepare_status != kTfLiteOk) {
1119 ReportOpError(&context_, node, registration, node_index,
1120 "failed to prepare");
1121 return op_prepare_status;
1122 }
1123
1124 *last_execution_plan_index_prepared = execution_plan_index;
1125
1126 // Discontinue if the node has dynamic outputs. Note that we don't
1127 // stop for dynamic temporary tensors since they won't affect the
1128 // sizes of other tensors in the graph.
1129 if (HasDynamicTensor(context_, node.outputs, &dynamic_tensor_index_)) {
1130 has_dynamic_tensors_ = true;
1131 return kTfLiteOk;
1132 }
1133 }
1134 return kTfLiteOk;
1135}
1136
1137TfLiteStatus Subgraph::PrepareOpsAndTensors() {
1138 if (!memory_planner_) {
1139#ifdef TFLITE_USE_SIMPLE_MEMORY_PLANNER
1140 memory_planner_.reset(new SimplePlanner(&context_, CreateGraphInfo()));
1141#else
1142 memory_planner_ = std::make_unique<ArenaPlanner>(
1143 &context_, CreateGraphInfo(), ShouldPreserveAllTensors(),
1144 kDefaultTensorAlignment, subgraph_index_);
1145#endif
1146 memory_planner_->PlanAllocations();
1147 }
1148
1149 // Prepare original execution plan if any applied delegate wants it.
1150 // If any of the delegates is immutable, this won't be triggered
1151 // post-delegation (since we undo/redo delegation). For all other cases, other
1152 // delegates that do shape propagation themselves would still be able to.
1153 bool prepare_original_plan = false;
1154 if (!pre_delegation_execution_plan_.empty()) {
1155 for (int i = 0; i < delegates_applied_.size(); ++i) {
1156 if ((TfLiteDelegateGetFlagsInternal(delegates_applied_[i]) &
1157 kTfLiteDelegateFlagsRequirePropagatedShapes)) {
1158 prepare_original_plan = true;
1159 break;
1160 }
1161 }
1162 }
1163 if (prepare_original_plan) {
1164 int last_original_exec_plan_index_prepared = 0;
1165 TF_LITE_ENSURE_STATUS(PrepareOpsStartingAt(
1166 next_execution_plan_index_to_prepare_, pre_delegation_execution_plan_,
1167 &last_original_exec_plan_index_prepared));
1168 next_original_execution_plan_index_to_prepare_ =
1169 last_original_exec_plan_index_prepared + 1;
1170 }
1171
1172 int last_exec_plan_index_prepared = 0;
1173 TF_LITE_ENSURE_STATUS(
1174 PrepareOpsStartingAt(next_execution_plan_index_to_prepare_,
1175 execution_plan_, &last_exec_plan_index_prepared));
1176 next_execution_plan_index_to_prepare_ = last_exec_plan_index_prepared + 1;
1177
1178 // Execute arena allocations.
1179 TF_LITE_ENSURE_STATUS(memory_planner_->ExecuteAllocations(
1180 next_execution_plan_index_to_plan_allocation_,
1181 last_exec_plan_index_prepared));
1182
1183 if (!custom_allocations_.empty()) {
1184 // Verify custom allocations for output tensors from the ops that have just
1185 // been prepared. Other output tensors might be resized later.
1186 if (!nodes_and_registration_.empty()) {
1187 for (int node_idx = next_execution_plan_index_to_plan_allocation_;
1188 node_idx <= last_exec_plan_index_prepared; ++node_idx) {
1189 TfLiteNode& node = nodes_and_registration_[node_idx].first;
1190 for (int i = 0; i < node.outputs->size; ++i) {
1191 const int output_tensor_idx = node.outputs->data[i];
1192 if (output_tensor_idx == kTfLiteOptionalTensor) continue;
1193 TF_LITE_ENSURE_STATUS(VerifyCustomAllocationForTensor(
1194 context(), custom_allocations_, output_tensor_idx));
1195 }
1196 }
1197 }
1198 // Check input custom allocs only if we just prepared nodes from the idx 0.
1199 if (next_execution_plan_index_to_plan_allocation_ == 0) {
1200 for (const int input_tensor_idx : inputs_) {
1201 if (input_tensor_idx == kTfLiteOptionalTensor) continue;
1202 TF_LITE_ENSURE_STATUS(VerifyCustomAllocationForTensor(
1203 context(), custom_allocations_, input_tensor_idx));
1204 }
1205 }
1206 }
1207
1208 next_execution_plan_index_to_plan_allocation_ =
1209 last_exec_plan_index_prepared + 1;
1210
1211 return kTfLiteOk;
1212}
1213
1214TfLiteStatus Subgraph::RemoveUnusedInputs() {
1215 auto graph_info = CreateGraphInfo();
1216 std::vector<int> refcounts(graph_info->num_tensors(), 0);
1217
1218 for (int tensor_index : graph_info->variables()) {
1219 refcounts[tensor_index]++;
1220 }
1221 // Count references to node input tensors.
1222 for (size_t i = 0; i < graph_info->num_execution_nodes(); ++i) {
1223 const TfLiteNode& node = graph_info->node(i);
1224 TfLiteIntArray* node_inputs = node.inputs;
1225 for (int j = 0; j < node_inputs->size; ++j) {
1226 int tensor_index = node_inputs->data[j];
1227 if (tensor_index != kTfLiteOptionalTensor) {
1228 refcounts[tensor_index]++;
1229 }
1230 }
1231 }
1232 // Count references to SubGraph output tensors.
1233 for (auto iter = outputs_.begin(); iter != outputs_.end(); iter++) {
1234 if (*iter == kTfLiteOptionalTensor) continue;
1235 refcounts[*iter]++;
1236 }
1237
1238 // Mark unused inputs as kTfLiteOptionalTensor.
1239 for (auto iter = inputs_.begin(); iter != inputs_.end(); iter++) {
1240 if (*iter == kTfLiteOptionalTensor) continue;
1241 if (refcounts[*iter] == 0) {
1242 tensor(*iter)->bytes = 0; // To make it clearer for memory analysis.
1243 *iter = kTfLiteOptionalTensor;
1244 }
1245 }
1246 return kTfLiteOk;
1247}
1248
1249TfLiteStatus Subgraph::Invoke() {
1250 if (!consistent_) {
1251 ReportError("Invoke called on model that is not consistent.");
1252 return kTfLiteError;
1253 }
1254
1255 TfLiteStatus status = kTfLiteOk;
1256 if (state_ == kStateUninvokable) {
1257 ReportError("Invoke called on model that is not ready.");
1258 return kTfLiteError;
1259 } else if (memory_planner_ && !memory_planner_->HasNonPersistentMemory()) {
1260 ReportError("Non-persistent memory is not available.");
1261 return kTfLiteError;
1262 }
1263 TFLITE_SCOPED_TAGGED_DEFAULT_PROFILE(profiler_.get(), "Invoke");
1264#ifdef TF_LITE_TENSORFLOW_PROFILER
1265 tensorflow::profiler::TraceMe* trace_subgraph =
1266 tflite::OnTfLiteSubgraphInvoke(name_.c_str(), subgraph_index_);
1267#endif // TF_LITE_TENSORFLOW_PROFILER
1268
1269 // Invocations are always done in node order.
1270 // Note that calling Invoke repeatedly will cause the original memory plan to
1271 // be reused, unless either ResizeInputTensor() or AllocateTensors() has been
1272 // called.
1273 for (int execution_plan_index = 0;
1274 execution_plan_index < execution_plan_.size(); execution_plan_index++) {
1275 if (execution_plan_index == next_execution_plan_index_to_prepare_) {
1276 TF_LITE_ENSURE_STATUS(PrepareOpsAndTensors());
1277 TF_LITE_ENSURE(&context_, next_execution_plan_index_to_prepare_ >=
1278 execution_plan_index);
1279 }
1280 int node_index = execution_plan_[execution_plan_index];
1281 TfLiteNode& node = nodes_and_registration_[node_index].first;
1282 const TfLiteRegistration& registration =
1283 nodes_and_registration_[node_index].second;
1284
1285 const char* op_name = nullptr;
1286 if (profiler_) op_name = GetTFLiteOpName(registration);
1287#ifdef TF_LITE_TENSORFLOW_PROFILER
1288 if (!op_name) {
1289 op_name = GetTFLiteOpName(registration);
1290 }
1291 tensorflow::profiler::TraceMe* trace_op =
1292 tflite::OnTfLiteOpInvoke(op_name, subgraph_index_, node_index);
1293#endif // TF_LITE_TENSORFLOW_PROFILER
1294 TFLITE_SCOPED_TAGGED_OPERATOR_PROFILE(profiler_.get(), op_name, node_index);
1295
1296 for (int i = 0; i < node.inputs->size; ++i) {
1297 int tensor_index = node.inputs->data[i];
1298 if (tensor_index == kTfLiteOptionalTensor) {
1299 continue;
1300 }
1301 TfLiteTensor* tensor = &tensors_[tensor_index];
1302 if (tensor->delegate && tensor->delegate != node.delegate &&
1303 tensor->data_is_stale) {
1304 TF_LITE_ENSURE_STATUS(EnsureTensorDataIsReadable(tensor_index));
1305 }
1306 if (tensor->data.raw == nullptr && tensor->bytes > 0) {
1307 if (registration.builtin_code == kTfLiteBuiltinReshape && i == 1 &&
1308 tensor->dims->size != 1) {
1309 // In general, having a tensor here with no buffer will be an error.
1310 // However, for the reshape operator, the second input tensor is
1311 // sometimes only used for the shape, not for the data. Thus, null
1312 // buffer is ok in this situation.
1313 // The situation where null buffer is not ok for reshape operator is
1314 // only when there are 2 inputs given to the node and the one
1315 // corresponding to the shape (i == 1) is a vector that contains all
1316 // dimensions. See `GetOutputShape()` function in
1317 // `tensorflow/lite/kernels/reshape.cc`
1318 continue;
1319 } else {
1320 // In all other cases, we need to return an error as otherwise we will
1321 // trigger a null pointer dereference (likely).
1322 ReportError("Input tensor %d lacks data", tensor_index);
1323 return kTfLiteError;
1324 }
1325 }
1326 }
1327 // Allocate dynamic tensors which memory is required to be allocated
1328 // before executing the node.
1329 MayAllocateOpOutput(&node);
1330
1331 if (check_cancelled_func_ != nullptr &&
1332 check_cancelled_func_(cancellation_data_)) {
1333 ReportError("Client requested cancel during Invoke()");
1334 return kTfLiteError;
1335 }
1336
1337 EnsureTensorsVectorCapacity();
1338 tensor_resized_since_op_invoke_ = false;
1339 if (OpInvoke(registration, &node) != kTfLiteOk) {
1340 return ReportOpError(&context_, node, registration, node_index,
1341 "failed to invoke");
1342 }
1343
1344 // Force execution prep for downstream ops if the latest op triggered the
1345 // resize of a dynamic tensor.
1346 if (tensor_resized_since_op_invoke_ &&
1347 HasDynamicTensor(context_, node.outputs, nullptr)) {
1348 next_execution_plan_index_to_prepare_ = execution_plan_index + 1;
1349
1350 // This happens when an intermediate dynamic tensor is resized.
1351 // We don't have to prepare all the ops, but we need to recompute
1352 // the allocation plan.
1353 if (next_execution_plan_index_to_plan_allocation_ >
1354 next_execution_plan_index_to_prepare_) {
1355 next_execution_plan_index_to_plan_allocation_ =
1356 next_execution_plan_index_to_prepare_;
1357 if (memory_planner_) {
1358 TF_LITE_ENSURE_STATUS(memory_planner_->ResetAllocationsAfter(
1359 next_execution_plan_index_to_plan_allocation_ - 1));
1360 }
1361 }
1362 }
1363 // Release dynamic tensor memory if configured by the user.
1364 MaybeReleaseDynamicTensors(node, node_index);
1365
1366#ifdef TF_LITE_TENSORFLOW_PROFILER
1367 tflite::OnTfLiteOpInvokeEnd(trace_op);
1368#endif // TF_LITE_TENSORFLOW_PROFILER
1369 }
1370#ifdef TF_LITE_TENSORFLOW_PROFILER
1371 tflite::OnTfLiteSubgraphInvokeEnd(trace_subgraph);
1372#endif // TF_LITE_TENSORFLOW_PROFILER
1373 return status;
1374}
1375
1376TfLiteStatus Subgraph::ResizeTensor(TfLiteContext* context,
1377 TfLiteTensor* tensor,
1378 TfLiteIntArray* new_size) {
1379 // If the dimensions don't change, avoiding
1380 // unnecessary (re)allocations.
1381 //
1382 // Note that it's required to check `tensor->data.raw != nullptr`. Otherwise
1383 // the subgraph won't allocate memory for a dynamic tensor when its size
1384 // is equal to the original tensor size.
1385 if (tensor->data.raw != nullptr &&
1386 EqualArrayAndTfLiteIntArray(tensor->dims, new_size->size,
1387 new_size->data)) {
1388 // A number of clients assume |new_size| remains valid upon success, so
1389 // swap it in as the new (but logically identical) tensor dims.
1390 TfLiteIntArrayFree(tensor->dims);
1391 tensor->dims = new_size;
1392 return kTfLiteOk;
1393 }
1394
1395 // Note here that context->impl_ is recovering the this pointer for an
1396 // instance of Interpreter to call into the member function ResizeTensorImpl
1397 // (this function is static).
1398 return static_cast<Subgraph*>(context->impl_)
1399 ->ResizeTensorImpl(tensor, new_size);
1400}
1401
1402void Subgraph::ReportErrorImpl(const char* format, va_list args) {
1403 error_reporter_->Report(format, args);
1404}
1405
1406void Subgraph::ReportErrorC(TfLiteContext* context, const char* format, ...) {
1407 va_list args;
1408 va_start(args, format);
1409 auto* f = static_cast<Subgraph*>(context->impl_);
1410 // Note here that context->impl_ is recovering the this pointer for an
1411 // instance of Subgraph to call into the member function ReportErrorImpl
1412 // (this function is static).
1413 f->ReportErrorImpl(format, args);
1414 va_end(args);
1415}
1416
1417// Entry point for C node plugin API to report an error.
1418void Subgraph::ReportError(const char* format, ...) {
1419 va_list args;
1420 va_start(args, format);
1421 auto* f = static_cast<Subgraph*>(context_.impl_);
1422 // Note here that context->impl_ is recovering the this pointer for an
1423 // instance of Subgraph to call into the member function ReportErrorImpl
1424 // (this function is static).
1425 f->ReportErrorImpl(format, args);
1426 va_end(args);
1427}
1428
1429TfLiteStatus Subgraph::AddTensors(int tensors_to_add,
1430 int* first_new_tensor_index) {
1431 const size_t base_index = tensors_.size();
1432 if (first_new_tensor_index) *first_new_tensor_index = base_index;
1433 tensors_.resize(tensors_.size() + tensors_to_add);
1434 for (size_t i = base_index; i < tensors_.size(); i++) {
1435 memset(&tensors_[i], 0, sizeof(tensors_[i]));
1436 tensors_[i].buffer_handle = kTfLiteNullBufferHandle;
1437 }
1438 context_.tensors = tensors_.data();
1439 context_.tensors_size = tensors_.size();
1440 return kTfLiteOk;
1441}
1442
1443TfLiteStatus Subgraph::AddTensors(TfLiteContext* context, int tensors_to_add,
1444 int* first_new_tensor_index) {
1445 // Note here that context->impl_ is recovering the this pointer for an
1446 // instance of Interpreter to call into the member function AddTensors
1447 // (this function is static).
1448 return static_cast<Subgraph*>(context->impl_)
1449 ->AddTensors(tensors_to_add, first_new_tensor_index);
1450}
1451
1452TfLiteStatus Subgraph::GetNodeAndRegistration(
1453 int node_index, TfLiteNode** node, TfLiteRegistration** registration) {
1454 TF_LITE_ENSURE(&context_, node_index >= 0);
1455 auto nodes_size = nodes_and_registration_.size();
1456 TF_LITE_ENSURE(&context_, static_cast<size_t>(node_index) < nodes_size);
1457 TF_LITE_ENSURE(&context_, node != nullptr && registration != nullptr);
1458 auto& node_and_reg = nodes_and_registration_[node_index];
1459 *node = &node_and_reg.first;
1460 *registration = &node_and_reg.second;
1461 return kTfLiteOk;
1462}
1463
1464TfLiteStatus Subgraph::GetNodeAndRegistration(
1465 struct TfLiteContext* context, int node_index, TfLiteNode** node,
1466 TfLiteRegistration** registration) {
1467 return static_cast<Subgraph*>(context->impl_)
1468 ->GetNodeAndRegistration(node_index, node, registration);
1469}
1470
1471TfLiteStatus Subgraph::SetTensorParametersReadOnly(
1472 int tensor_index, TfLiteType type, const char* name, const size_t ndims,
1473 const int* dims, TfLiteQuantization quantization, const char* buffer,
1474 size_t bytes, const Allocation* allocation, TfLiteSparsity* sparsity) {
1475 // Ensure quantization cleanup on failure.
1476 ScopedTfLiteQuantization scoped_quantization(&quantization);
1477 ScopedTfLiteSparsity scoped_sparsity(sparsity);
1478 if (state_ == kStateInvokableAndImmutable) {
1479 ReportError(
1480 "SetTensorParametersReadOnly is disallowed when graph is immutable.");
1481 return kTfLiteError;
1482 }
1483
1484 TF_LITE_ENSURE(&context_,
1485 tensor_index < context_.tensors_size && tensor_index >= 0);
1486
1487 // For most tensors we know exactly how much memory is necessary so we can
1488 // ensure the buffer is large enough. However, we need to skip string tensors
1489 // and sparse tensors because their sizes change with the contents.
1490 // TODO(b/145615516): Extend BytesRequired to check sparse tensors.
1491 if (type != kTfLiteString && type != kTfLiteResource &&
1492 type != kTfLiteVariant && sparsity == nullptr) {
1493 size_t required_bytes;
1494 TF_LITE_ENSURE_OK(&context_,
1495 BytesRequired(type, dims, ndims, &required_bytes));
1496 TF_LITE_ENSURE_EQ(&context_, required_bytes, bytes);
1497 }
1498
1499 TfLiteTensor& tensor = context_.tensors[tensor_index];
1500 if (type == tensor.type &&
1501 EqualArrayAndTfLiteIntArray(tensor.dims, ndims, dims)) {
1502 // Fast path which does not invalidate the invokable property.
1503 TfLiteTensorDataFree(&tensor);
1504 TfLiteQuantizationFree(&tensor.quantization);
1505 tensor.data.raw = const_cast<char*>(buffer);
1506 if (!tensor.dims) tensor.dims = ConvertArrayToTfLiteIntArray(ndims, dims);
1507 tensor.params = GetLegacyQuantization(quantization);
1508 tensor.quantization = *scoped_quantization.release();
1509 tensor.sparsity = scoped_sparsity.release();
1510 tensor.allocation_type = kTfLiteMmapRo;
1511 tensor.allocation = allocation;
1512 } else {
1513 state_ = kStateUninvokable;
1514 TfLiteTensorReset(type, name, ConvertArrayToTfLiteIntArray(ndims, dims),
1515 GetLegacyQuantization(quantization),
1516 const_cast<char*>(buffer), bytes, kTfLiteMmapRo,
1517 allocation, false, &tensor);
1518 tensor.quantization = *scoped_quantization.release();
1519 tensor.sparsity = scoped_sparsity.release();
1520 }
1521 return kTfLiteOk;
1522}
1523
1524// Set description of inputs/outputs/data/fptrs for node `node_index`.
1525// This variant assumes an external buffer has been allocated of size
1526// bytes. The lifetime of buffer must be ensured to be greater or equal
1527// to Interpreter.
1528TfLiteStatus Subgraph::SetTensorParametersReadWrite(
1529 int tensor_index, TfLiteType type, const char* name, const size_t ndims,
1530 const int* dims, TfLiteQuantization quantization, bool is_variable,
1531 const size_t ndims_signature, const int* dims_signature) {
1532 // Ensure quantization cleanup on failure.
1533 ScopedTfLiteQuantization scoped_quantization(&quantization);
1534 if (state_ == kStateInvokableAndImmutable) {
1535 ReportError(
1536 "SetTensorParametersReadWrite is disallowed when graph is immutable.");
1537 return kTfLiteError;
1538 }
1539 TF_LITE_ENSURE(&context_,
1540 tensor_index < context_.tensors_size && tensor_index >= 0);
1541 size_t required_bytes = 0;
1542 if (type != kTfLiteString && type != kTfLiteResource &&
1543 type != kTfLiteVariant) {
1544 // These types will be allocated in our arena so we need to record how
1545 // many bytes we will need based on the dimensions. String tensors are
1546 // allocated dynamically and we can't know ahead of time how much space
1547 // they will require.
1548 TF_LITE_ENSURE_OK(&context_,
1549 BytesRequired(type, dims, ndims, &required_bytes));
1550 }
1551
1552 TfLiteAllocationType allocation_type = kTfLiteArenaRw;
1553 if (type == kTfLiteString || type == kTfLiteResource ||
1554 type == kTfLiteVariant) {
1555 if (is_variable) {
1556 // We don't have a real use case for string variable tensor.
1557 ReportError("String variable tensor isn't supported.");
1558 return kTfLiteError;
1559 }
1560 allocation_type = kTfLiteDynamic;
1561 } else if (is_variable) {
1562 allocation_type = kTfLiteArenaRwPersistent;
1563 }
1564
1565 TfLiteTensor& tensor = context_.tensors[tensor_index];
1566
1567 TfLiteTensorReset(type, name, ConvertArrayToTfLiteIntArray(ndims, dims),
1568 GetLegacyQuantization(quantization),
1569 /*buffer=*/nullptr, required_bytes, allocation_type,
1570 nullptr, is_variable, &tensor);
1571 tensor.quantization = *scoped_quantization.release();
1572 tensor.dims_signature =
1573 ConvertArrayToTfLiteIntArray(ndims_signature, dims_signature);
1574 return kTfLiteOk;
1575}
1576
1577TfLiteStatus Subgraph::SetExecutionPlan(const std::vector<int>& new_plan) {
1578 for (int node_index : new_plan) {
1579 TF_LITE_ENSURE(&context_, node_index >= 0 &&
1580 node_index < nodes_and_registration_.size());
1581 }
1582 execution_plan_ = new_plan;
1583 return kTfLiteOk;
1584}
1585
1586TfLiteStatus Subgraph::ResizeTensorImpl(TfLiteTensor* tensor,
1587 TfLiteIntArray* new_size) {
1588 // Note that in theory we could resize kTfLiteArenaRwPersistent tensors too.
1589 if (tensor->allocation_type == kTfLiteArenaRw ||
1590 tensor->allocation_type == kTfLiteDynamic ||
1591 tensor->allocation_type == kTfLiteArenaRwPersistent ||
1592 tensor->allocation_type == kTfLitePersistentRo ||
1593 tensor->allocation_type == kTfLiteCustom) {
1594 tensor_resized_since_op_invoke_ |=
1595 TfLiteIntArrayEqual(tensor->dims, new_size) == 0;
1596 if (tensor->type != kTfLiteString && tensor->type != kTfLiteResource &&
1597 tensor->type != kTfLiteVariant) {
1598 size_t bytesRequired;
1599 TfLiteStatus status = BytesRequired(tensor->type, new_size->data,
1600 new_size->size, &bytesRequired);
1601 if (status != kTfLiteOk) {
1602 TfLiteIntArrayFree(new_size);
1603 return kTfLiteError;
1604 }
1605
1606 // Realloc space for heap-allocated tensors.
1607 TfLiteTensorResizeMaybeCopy(bytesRequired, tensor, false);
1608 tensor->bytes = bytesRequired;
1609 }
1610 if (tensor->dims) TfLiteIntArrayFree(tensor->dims);
1611 tensor->dims = new_size;
1612
1613 // Reset arena-allocated tensors; they will be allocated later.
1614 if (tensor->allocation_type == kTfLiteArenaRw ||
1615 tensor->allocation_type == kTfLiteArenaRwPersistent) {
1616 tensor->data.raw = nullptr;
1617 }
1618 } else {
1619 // kTfLiteMmapRo tensors are stored in the flatbuffer and are therefore
1620 // of fixed size.
1621 TfLiteIntArrayFree(new_size);
1622 ReportError("Attempting to resize a fixed-size tensor.");
1623 return kTfLiteError;
1624 }
1625 return kTfLiteOk;
1626}
1627
1628void Subgraph::OptimizeMemoryForLargeTensors(
1629 int large_tensors_thresholds_in_bytes) {
1630 for (size_t tensor_index = 0; tensor_index < context_.tensors_size;
1631 tensor_index++) {
1632 TfLiteTensor* tensor = &context_.tensors[tensor_index];
1633 if (tensor->bytes >= large_tensors_thresholds_in_bytes &&
1634 tensor->allocation_type == kTfLiteArenaRw &&
1635 // Skip input tensors since they are handled by ResizeInputTensor().
1636 std::find(inputs_.begin(), inputs_.end(), tensor_index) ==
1637 inputs_.end()) {
1638 // Change large tensors' allocation_type and data.raw. This method must be
1639 // called before AllocateTensors() to avoid handling them by ArenaPlanner.
1640 tensor->allocation_type = kTfLiteDynamic;
1641 tensor->data.raw = nullptr;
1642 }
1643 }
1644}
1645
1646void Subgraph::SwitchToDelegateContext() {
1647 context_.GetNodeAndRegistration = GetNodeAndRegistration;
1648 context_.ReplaceNodeSubsetsWithDelegateKernels =
1649 ReplaceNodeSubsetsWithDelegateKernels;
1650 context_.GetExecutionPlan = GetExecutionPlan;
1651 context_.PreviewDelegatePartitioning = PreviewDelegatePartitioning;
1652}
1653
1654void Subgraph::SwitchToKernelContext() {
1655 context_.GetNodeAndRegistration = [](struct TfLiteContext* context,
1656 int node_index, TfLiteNode** node,
1657 TfLiteRegistration** registration) {
1658 return ForbiddenContextFunction(context);
1659 };
1660 context_.ReplaceNodeSubsetsWithDelegateKernels =
1661 [](TfLiteContext* context, TfLiteRegistration registration,
1662 const TfLiteIntArray* nodes_to_replace, TfLiteDelegate* delegate) {
1663 return ForbiddenContextFunction(context);
1664 };
1665 context_.GetExecutionPlan = [](struct TfLiteContext* context,
1666 TfLiteIntArray**) {
1667 return ForbiddenContextFunction(context);
1668 };
1669 context_.PreviewDelegatePartitioning =
1670 [](struct TfLiteContext* context, const TfLiteIntArray* nodes_to_replace,
1671 TfLiteDelegateParams** partition_params_array,
1672 int* num_partitions) { return ForbiddenContextFunction(context); };
1673 // Free any memory that might have been allocated by
1674 // PreviewDelegatePartitioning.
1675 FreeDelegatePartitioningData();
1676}
1677
1678TfLiteStatus Subgraph::UndoAllDelegates() {
1679 // Return early if there is nothing to reset to.
1680 if (pre_delegation_execution_plan_.empty()) return kTfLiteOk;
1681
1682 // First free all delegate nodes.
1683 for (int execution_plan_index = 0;
1684 execution_plan_index < execution_plan_.size(); ++execution_plan_index) {
1685 int node_index = execution_plan_[execution_plan_index];
1686 TfLiteNode& node = nodes_and_registration_[node_index].first;
1687 if (node.delegate == nullptr) {
1688 continue;
1689 }
1690 CleanupNode(node_index);
1691 }
1692
1693 // Reset execution plan.
1694 execution_plan_ = pre_delegation_execution_plan_;
1695 pre_delegation_execution_plan_.clear();
1696
1697 // Handling FP16 delegation (if applies).
1698 //
1699 // First pass through execution plan to remember mapping of FP16
1700 // dequantizations in the graph.
1701 // This is required because delegates that support FP16 could remap supported
1702 // nodes' inputs to point to their fp16 versions (if delegate supports fp16
1703 // acceleration). This remapping is performed in FP16GraphPartitionHelper in
1704 // delegates/utils. We need to undo this remapping to ensure CPU kernels work.
1705 std::vector<int> fp16_to_fp32(tensors_size(), -1);
1706 for (int execution_plan_index = 0;
1707 execution_plan_index < execution_plan_.size(); ++execution_plan_index) {
1708 int node_index = execution_plan_[execution_plan_index];
1709 auto& node_and_reg = nodes_and_registration_[node_index];
1710 const TfLiteNode& node = node_and_reg.first;
1711 const TfLiteRegistration& reg = node_and_reg.second;
1712 if (reg.builtin_code == kTfLiteBuiltinDequantize &&
1713 node.inputs->size == 1 && node.outputs->size == 1) {
1714 const int input_idx = node.inputs->data[0];
1715 if (tensors_[input_idx].type == kTfLiteFloat16) {
1716 fp16_to_fp32[input_idx] = node.outputs->data[0];
1717 }
1718 }
1719 }
1720 // Second pass through the execution plan to remap applicable nodes' fp16
1721 // inputs to their original fp32 versions. Note that if a CPU kernel does
1722 // support fp16, the model will not contain a DEQUANTIZE for its constant
1723 // input.
1724 for (int execution_plan_index = 0;
1725 execution_plan_index < execution_plan_.size(); ++execution_plan_index) {
1726 int node_index = execution_plan_[execution_plan_index];
1727 auto& node_and_reg = nodes_and_registration_[node_index];
1728 const TfLiteNode& node = node_and_reg.first;
1729 const TfLiteRegistration& reg = node_and_reg.second;
1730 if (reg.builtin_code == kTfLiteBuiltinDequantize) continue;
1731 for (int i = 0; i < node.inputs->size; ++i) {
1732 const int original_input_idx = node.inputs->data[i];
1733 if (original_input_idx == kTfLiteOptionalTensor) continue;
1734 if (tensors_[original_input_idx].type == kTfLiteFloat16) {
1735 node.inputs->data[i] = fp16_to_fp32[original_input_idx];
1736 }
1737 }
1738 }
1739
1740 // Delegate nodes are appended to nodes_and_registration_. Therefore,
1741 // cleanup nodes_and_registration_ to only contain nodes from
1742 // pre_delegation_execution_plan_.
1743 int max_retained_node_index = 0;
1744 for (int execution_plan_index = 0;
1745 execution_plan_index < execution_plan_.size(); ++execution_plan_index) {
1746 max_retained_node_index = std::max(max_retained_node_index,
1747 execution_plan_[execution_plan_index]);
1748 }
1749 nodes_and_registration_.resize(max_retained_node_index + 1);
1750 // After undoing delegates, the graph is uninvokable, but mutable.
1751 state_ = kStateUninvokable;
1752
1753 delegates_undone_ = true;
1754 return kTfLiteOk;
1755}
1756
1757TfLiteStatus Subgraph::RedoAllDelegates() {
1758 if (!delegates_undone_) return kTfLiteOk;
1759
1760 delegates_undone_ = false;
1761 std::vector<TfLiteDelegate*> delegates_to_apply;
1762 delegates_applied_.swap(delegates_to_apply);
1763 for (auto* delegate : delegates_to_apply) {
1764 TF_LITE_ENSURE_STATUS(ModifyGraphWithDelegate(delegate));
1765 }
1766 return kTfLiteOk;
1767}
1768
1769TfLiteStatus Subgraph::RemoveAllDelegates() {
1770 TF_LITE_ENSURE_STATUS(UndoAllDelegates());
1771 delegates_applied_.clear();
1772 delegates_undone_ = false;
1773 TF_LITE_ENSURE_STATUS(EnsureMemoryAllocations());
1774 return kTfLiteOk;
1775}
1776
1777bool Subgraph::HasDelegates() { return !delegates_applied_.empty(); }
1778
1779bool Subgraph::IsFullyDelegated() const {
1780 for (const int nid : execution_plan_) {
1781 const TfLiteNode& node = nodes_and_registration_[nid].first;
1782 if (node.delegate == nullptr) return false;
1783 }
1784 return true;
1785}
1786
1787void Subgraph::EnsureTensorsVectorCapacity() {
1788 const size_t required_capacity = tensors_.size() + kTensorsCapacityHeadroom;
1789 if (required_capacity > tensors_.capacity()) {
1790 // Whenever it's required to increase the vector capacity, make it at
1791 // least twice bigger. The behavior is consistent with the default
1792 // behavior of GCC STL's `std::vector::resize()`. This avoids frequently
1793 // allocating and copying the underlying buffer.
1794 size_t reserved_capacity =
1795 std::max(required_capacity, tensors_.capacity() * 2);
1796 tensors_.reserve(reserved_capacity);
1797 context_.tensors = tensors_.data();
1798 }
1799}
1800
1801TfLiteStatus Subgraph::EnsureMemoryAllocations() {
1802 if (memory_planner_) {
1803 state_ = kStateUninvokable;
1804 TF_LITE_ENSURE_OK(&context_, memory_planner_->PlanAllocations());
1805 }
1806 TF_LITE_ENSURE_OK(&context_, AllocateTensors());
1807 TF_LITE_ENSURE_EQ(&context_, state_, kStateInvokable);
1808 return kTfLiteOk;
1809}
1810
1811TfLiteStatus Subgraph::ModifyGraphWithDelegate(TfLiteDelegate* delegate) {
1812 TFLITE_SCOPED_TAGGED_DEFAULT_PROFILE(profiler_.get(),
1813 "ModifyGraphWithDelegate");
1814
1815 if (delegate == nullptr) {
1816 ReportError("Null delegate.");
1817 return kTfLiteDelegateError;
1818 }
1819
1820 // Resets delegation & leaves graph in consistent state if delegate status is
1821 // not okay.
1822 auto reset_delegation_if_not_ok = [this](TfLiteStatus status) {
1823 if (status != kTfLiteOk) {
1824 TF_LITE_ENSURE_STATUS(RemoveAllDelegates());
1825 ReportError(
1826 "Restored original execution plan after delegate application "
1827 "failure.");
1828 return kTfLiteDelegateError;
1829 }
1830 return kTfLiteOk;
1831 };
1832
1833 // STEP 1: Verify & prepare graph for delegation.
1834 // ==============================================
1835
1836 // Restore delegation state if applicable.
1837 TF_LITE_ENSURE_STATUS(RedoAllDelegates());
1838
1839 const bool delegate_supports_dynamic_shapes =
1840 TfLiteDelegateGetFlagsInternal(delegate) &
1841 kTfLiteDelegateFlagsAllowDynamicTensors;
1842 const auto pre_delegation_state = state_;
1843
1844 if (state_ == kStateInvokableAndImmutable) {
1845 // A delegate that doesn't support dynamic shapes was already applied, so
1846 // we can assume tensor shapes have been propagated & there are no dynamic
1847 // tensors.
1848 // Reset the state to force tensor/op reallocation.
1849 state_ = kStateUninvokable;
1850 } else if (!delegate_supports_dynamic_shapes) {
1851 // Check if graph has dynamic tensors by preparing ops.
1852 int last_execution_plan_index_prepared;
1853 TF_LITE_ENSURE_STATUS(PrepareOpsStartingAt(
1854 0, execution_plan_, &last_execution_plan_index_prepared));
1855 if (has_dynamic_tensors_) {
1856 TF_LITE_ENSURE_STATUS(EnsureMemoryAllocations());
1857 TFLITE_LOG(
1858 tflite::TFLITE_LOG_WARNING,
1859 "Attempting to use a delegate that only supports static-sized "
1860 "tensors with a graph that has dynamic-sized tensors (tensor#%d is a "
1861 "dynamic-sized tensor).",
1862 dynamic_tensor_index_);
1863 return kTfLiteApplicationError;
1864 }
1865 }
1866
1867 if (delegates_applied_.empty()) {
1868 // This is the first delegate being applied, so remember original execution
1869 // plan.
1870 pre_delegation_execution_plan_ = execution_plan_;
1871 }
1872
1873 // STEP 2: Delegate replaces applicable nodes with delegate kernels.
1874 // =================================================================
1875
1876 // Setup additional context interface.
1877 SwitchToDelegateContext();
1878 TfLiteStatus status = TfLiteDelegatePrepareInternal(&context_, delegate);
1879 // Remove additional context info.
1880 SwitchToKernelContext();
1881 TF_LITE_ENSURE_STATUS(reset_delegation_if_not_ok(status));
1882
1883 // STEP 3: Leave graph in consistent state based on delegate & previous state.
1884 // ===========================================================================
1885
1886 if (!delegate_supports_dynamic_shapes) {
1887 // CASE 1: Current delegate does not support dynamic shapes.
1888 // Reset the state to force tensor/op reallocation.
1889 state_ = kStateUninvokable;
1890 TF_LITE_ENSURE_STATUS(
1891 reset_delegation_if_not_ok(EnsureMemoryAllocations()));
1892 // After using a delegate which doesn't support dynamic tensors, make the
1893 // entire graph immutable.
1894 state_ = kStateInvokableAndImmutable;
1895 } else if (pre_delegation_state == kStateInvokableAndImmutable) {
1896 // CASE 2: Current delegate supports dynamic shapes, but a previous one
1897 // does not.
1898 // Make sure new delegate didn't mark a tensor as dynamic.
1899 int last_execution_plan_index_prepared;
1900 TF_LITE_ENSURE_STATUS(reset_delegation_if_not_ok(PrepareOpsStartingAt(
1901 0, execution_plan_, &last_execution_plan_index_prepared)));
1902 if (has_dynamic_tensors_) {
1903 TF_LITE_ENSURE_STATUS(RemoveAllDelegates());
1904 ReportError(
1905 "Cannot allow dynamic tensors due to previous delegation, resetting "
1906 "to original execution plan.");
1907 return kTfLiteApplicationError;
1908 }
1909 // Redo memory allocations & ensure state is set back to original value.
1910 TF_LITE_ENSURE_STATUS(
1911 reset_delegation_if_not_ok(EnsureMemoryAllocations()));
1912 state_ = kStateInvokableAndImmutable;
1913 } else if (pre_delegation_state == kStateInvokable) {
1914 // CASE 3: Current delegate supports dynamic shapes, and the graph was
1915 // previously invokable.
1916 // Flush allocation now to leave it in a consistent state.
1917 TF_LITE_ENSURE_STATUS(
1918 reset_delegation_if_not_ok(EnsureMemoryAllocations()));
1919 }
1920 delegates_applied_.push_back(delegate);
1921
1922 return status;
1923}
1924
1925TfLiteStatus Subgraph::SetCustomAllocationForTensor(
1926 int tensor_index, const TfLiteCustomAllocation& allocation, int64_t flags) {
1927 TfLiteTensor* tensor = &context_.tensors[tensor_index];
1928 TF_LITE_ENSURE(context(),
1929 (tensor->allocation_type == kTfLiteArenaRw ||
1930 tensor->allocation_type == kTfLiteArenaRwPersistent ||
1931 tensor->allocation_type == kTfLiteCustom));
1932 // Don't check allocation.bytes here, we do that after all ops are prepared
1933 // to allow tensor shape propagation.
1934 TF_LITE_ENSURE(context(), allocation.data != nullptr);
1935 if (!(flags & kTfLiteCustomAllocationFlagsSkipAlignCheck)) {
1936 const intptr_t data_ptr_value = reinterpret_cast<intptr_t>(allocation.data);
1937 TF_LITE_ENSURE(context(), data_ptr_value % kDefaultTensorAlignment == 0);
1938 }
1939
1940 const auto iter_and_success =
1941 custom_allocations_.insert({tensor_index, allocation});
1942 if (!iter_and_success.second) {
1943 iter_and_success.first->second = allocation;
1944 }
1945
1946 tensor->allocation_type = kTfLiteCustom;
1947 tensor->data.data = allocation.data;
1948
1949 return kTfLiteOk;
1950}
1951
1952void Subgraph::SetName(const char* name) {
1953 if (name) {
1954 name_ = name;
1955 } else {
1956 name_ = "";
1957 }
1958}
1959
1960const std::string& Subgraph::GetName() const { return name_; }
1961
1962void Subgraph::DumpMemoryPlannerDebugInfo() const {
1963 if (memory_planner_ == nullptr) return;
1964 memory_planner_->DumpDebugInfo(execution_plan());
1965}
1966
1967void Subgraph::GetMemoryAllocInfo(SubgraphAllocInfo* alloc_info) const {
1968 memset(alloc_info, 0, sizeof(SubgraphAllocInfo));
1969 if (memory_planner_ == nullptr) return;
1970 memory_planner_->GetAllocInfo(&alloc_info->arena_size,
1971 &alloc_info->arena_persist_size);
1972 for (const auto& tensor : tensors_) {
1973 if (tensor.allocation_type == kTfLiteDynamic &&
1974 tensor.data.raw != nullptr) {
1975 alloc_info->dynamic_size += tensor.bytes;
1976 }
1977 }
1978 if (GetSubgraphIndex() == 0) {
1979 for (const auto& res : *resources_) {
1980 alloc_info->resource_size += res.second->GetMemoryUsage();
1981 }
1982 }
1983}
1984
1985std::unique_ptr<GraphInfo> Subgraph::CreateGraphInfo() {
1986 return std::unique_ptr<GraphInfo>(new InterpreterInfo(this));
1987}
1988
1989void Subgraph::InitializeTensorReleaseMap() {
1990 for (int i = 0; i < execution_plan_.size(); ++i) {
1991 int node_index = execution_plan_[i];
1992 const TfLiteNode& node = nodes_and_registration_[node_index].first;
1993 for (int input_index = 0; input_index < node.inputs->size; ++input_index) {
1994 int input_tensor_index = node.inputs->data[input_index];
1995 TfLiteTensor* input_tensor = tensor(input_tensor_index);
1996 if (!input_tensor) continue;
1997 tensor_to_last_op_index_[input_tensor_index] = node_index;
1998 }
1999 // Also checks outputs of a node to make sure tensors are released in case
2000 // when a tensor is not used for input of another node.
2001 for (int output_index = 0; output_index < node.outputs->size;
2002 ++output_index) {
2003 int output_tensor_index = node.outputs->data[output_index];
2004 TfLiteTensor* output_tensor = tensor(output_tensor_index);
2005 if (!output_tensor) continue;
2006 tensor_to_last_op_index_[output_tensor_index] = node_index;
2007 }
2008 }
2009}
2010
2011void Subgraph::MaybeReleaseDynamicTensors(const TfLiteNode& node,
2012 size_t node_index) {
2013 if (!ShouldReleaseDynamicTensors()) return;
2014
2015 // Release input tensors if they're neither graph input tensors nor no
2016 // longer used by remaining graph execution.
2017 auto tensorIsInput = [&](int index) {
2018 for (int idx : inputs_) {
2019 if (idx == index) return true;
2020 }
2021 return false;
2022 };
2023 auto tensorIsOutput = [&](int index) {
2024 for (int idx : outputs_) {
2025 if (idx == index) return true;
2026 }
2027 return false;
2028 };
2029 for (int input_index = 0; input_index < node.inputs->size; ++input_index) {
2030 int input_tensor_index = node.inputs->data[input_index];
2031 TfLiteTensor* input_tensor = tensor(input_tensor_index);
2032 if (!input_tensor || input_tensor->allocation_type != kTfLiteDynamic ||
2033 input_tensor->type == kTfLiteString ||
2034 input_tensor->type == kTfLiteResource ||
2035 tensorIsInput(input_tensor_index) || tensorIsOutput(input_tensor_index))
2036 continue;
2037 auto it = tensor_to_last_op_index_.find(input_tensor_index);
2038 if (it != tensor_to_last_op_index_.end() && it->second == node_index) {
2039 if (input_tensor->data.raw) {
2040 TfLiteTensorDataFree(input_tensor);
2041 }
2042 }
2043 }
2044
2045 // Release output tensors if they're neither graph output tensors nor no
2046 // longer used by remaining graph execution.
2047 for (int output_index = 0; output_index < node.outputs->size;
2048 ++output_index) {
2049 int output_tensor_index = node.outputs->data[output_index];
2050 TfLiteTensor* output_tensor = tensor(output_tensor_index);
2051 if (!output_tensor || output_tensor->allocation_type != kTfLiteDynamic ||
2052 output_tensor->type == kTfLiteString ||
2053 output_tensor->type == kTfLiteResource ||
2054 tensorIsInput(output_tensor_index) ||
2055 tensorIsOutput(output_tensor_index))
2056 continue;
2057 auto it = tensor_to_last_op_index_.find(output_tensor_index);
2058 if (it != tensor_to_last_op_index_.end() && it->second == node_index) {
2059 if (output_tensor->data.raw) {
2060 TfLiteTensorDataFree(output_tensor);
2061 }
2062 }
2063 }
2064}
2065
2066} // namespace tflite
2067