1 | /* Copyright 2017 The TensorFlow Authors. All Rights Reserved. |
2 | |
3 | Licensed under the Apache License, Version 2.0 (the "License"); |
4 | you may not use this file except in compliance with the License. |
5 | You may obtain a copy of the License at |
6 | |
7 | http://www.apache.org/licenses/LICENSE-2.0 |
8 | |
9 | Unless required by applicable law or agreed to in writing, software |
10 | distributed under the License is distributed on an "AS IS" BASIS, |
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | See the License for the specific language governing permissions and |
13 | limitations under the License. |
14 | ==============================================================================*/ |
15 | |
16 | #include "tensorflow/lite/core/subgraph.h" |
17 | |
18 | #include <stdarg.h> |
19 | #include <stddef.h> |
20 | |
21 | #include <algorithm> |
22 | #include <cstdint> |
23 | #include <cstdlib> |
24 | #include <cstring> |
25 | #include <iterator> |
26 | #include <memory> |
27 | #include <string> |
28 | #include <utility> |
29 | #include <vector> |
30 | |
31 | #include "tensorflow/lite/allocation.h" |
32 | #include "tensorflow/lite/builtin_ops.h" |
33 | #include "tensorflow/lite/c/c_api_types.h" |
34 | #include "tensorflow/lite/c/common.h" |
35 | #include "tensorflow/lite/c/common_internal.h" |
36 | #include "tensorflow/lite/context_util.h" |
37 | #include "tensorflow/lite/core/api/error_reporter.h" |
38 | #include "tensorflow/lite/core/api/profiler.h" |
39 | #include "tensorflow/lite/core/api/tensor_utils.h" |
40 | #include "tensorflow/lite/core/macros.h" |
41 | #include "tensorflow/lite/experimental/resource/resource_base.h" |
42 | #include "tensorflow/lite/graph_info.h" |
43 | #include "tensorflow/lite/memory_planner.h" |
44 | #include "tensorflow/lite/minimal_logging.h" |
45 | #include "tensorflow/lite/schema/schema_generated.h" |
46 | #include "tensorflow/lite/util.h" |
47 | #ifdef TFLITE_USE_SIMPLE_MEMORY_PLANNER |
48 | #include "tensorflow/lite/simple_planner.h" |
49 | #else |
50 | #include "tensorflow/lite/arena_planner.h" |
51 | #endif |
52 | #ifdef TF_LITE_TENSORFLOW_PROFILER |
53 | #include "tensorflow/lite/tensorflow_profiler_logger.h" |
54 | #endif // TF_LITE_TENSORFLOW_PROFILER |
55 | |
56 | namespace tflite { |
57 | |
58 | namespace { |
59 | |
60 | struct TfLiteQuantizationDeleter { |
61 | void operator()(TfLiteQuantization* q) { |
62 | if (q) TfLiteQuantizationFree(q); |
63 | } |
64 | }; |
65 | |
66 | using ScopedTfLiteQuantization = |
67 | std::unique_ptr<TfLiteQuantization, TfLiteQuantizationDeleter>; |
68 | |
69 | struct TfLiteSparsityDeleter { |
70 | void operator()(TfLiteSparsity* s) { |
71 | if (s) TfLiteSparsityFree(s); |
72 | } |
73 | }; |
74 | |
75 | using ScopedTfLiteSparsity = |
76 | std::unique_ptr<TfLiteSparsity, TfLiteSparsityDeleter>; |
77 | |
78 | TfLiteStatus ReportOpError(TfLiteContext* context, const TfLiteNode& node, |
79 | const TfLiteRegistration& registration, |
80 | int node_index, const char* message) { |
81 | TF_LITE_KERNEL_LOG(context, "Node number %d (%s) %s." , node_index, |
82 | registration.custom_name |
83 | ? registration.custom_name |
84 | : EnumNameBuiltinOperator(static_cast<BuiltinOperator>( |
85 | registration.builtin_code)), |
86 | message); |
87 | return kTfLiteError; |
88 | } |
89 | |
90 | // Stub method which returns kTfLiteError when the function is forbidden. |
91 | // We're registering this function to several different function to save |
92 | // compiled binary size. Please note the restrictions: |
93 | // * The type of first parameter have to be `TfLiteContext*`. |
94 | // * All parameters must be trivially destructible. (E.g. No C++ class) |
95 | TfLiteStatus ForbiddenContextFunction(TfLiteContext* context, ...) { |
96 | TF_LITE_KERNEL_LOG(context, |
97 | "The function is forbidden if not calling in delegate." ); |
98 | return kTfLiteError; |
99 | } |
100 | |
101 | // Set the ForbiddenContextFunction to a compatible function pointer. |
102 | template <typename FunctionType> |
103 | void SetForbiddenContextFunction(FunctionType* func) { |
104 | *func = reinterpret_cast<FunctionType>(ForbiddenContextFunction); |
105 | } |
106 | |
107 | // Returns true if at least one tensor in the given list is kTfLiteDynamic. |
108 | template <typename TensorIntArray> |
109 | bool HasDynamicTensorImpl(const TfLiteContext& context, |
110 | const TensorIntArray& int_array, |
111 | int* dynamic_tensor_index) { |
112 | for (int i : int_array) { |
113 | if (i == kTfLiteOptionalTensor) continue; |
114 | const TfLiteTensor& tensor = context.tensors[i]; |
115 | if (tensor.allocation_type == kTfLiteDynamic) { |
116 | if (dynamic_tensor_index) { |
117 | *dynamic_tensor_index = i; |
118 | } |
119 | return true; |
120 | } |
121 | } |
122 | return false; |
123 | } |
124 | |
125 | bool HasDynamicTensor(const TfLiteContext& context, |
126 | const TfLiteIntArray* int_array, |
127 | int* dynamic_tensor_index) { |
128 | return HasDynamicTensorImpl(context, TfLiteIntArrayView{int_array}, |
129 | dynamic_tensor_index); |
130 | } |
131 | |
132 | // Gets the legacy TfLiteQuantizationParams from the current TfLiteQuantization. |
133 | TfLiteQuantizationParams GetLegacyQuantization( |
134 | const TfLiteQuantization& quantization) { |
135 | TfLiteQuantizationParams legacy_quantization; |
136 | legacy_quantization.scale = 0; |
137 | legacy_quantization.zero_point = 0; |
138 | |
139 | // If the quantization type isn't affine, return the empty |
140 | // legacy_quantization. |
141 | if (quantization.type != kTfLiteAffineQuantization) { |
142 | return legacy_quantization; |
143 | } |
144 | |
145 | auto* affine_quantization = |
146 | static_cast<TfLiteAffineQuantization*>(quantization.params); |
147 | if (!affine_quantization || !affine_quantization->scale || |
148 | !affine_quantization->zero_point || |
149 | affine_quantization->scale->size != 1 || |
150 | affine_quantization->zero_point->size != 1) { |
151 | return legacy_quantization; |
152 | } |
153 | |
154 | // We know its per-layer quantization now. |
155 | legacy_quantization.scale = affine_quantization->scale->data[0]; |
156 | legacy_quantization.zero_point = affine_quantization->zero_point->data[0]; |
157 | return legacy_quantization; |
158 | } |
159 | |
160 | static constexpr const char kUnknownCustomOpName[] = "UnknownCustomOp" ; |
161 | const char* GetTFLiteOpName(const TfLiteRegistration& op_reg) { |
162 | if (op_reg.builtin_code == tflite::BuiltinOperator_CUSTOM) { |
163 | const char* const custom_name = op_reg.custom_name; |
164 | return custom_name ? custom_name : kUnknownCustomOpName; |
165 | } |
166 | if (op_reg.builtin_code == tflite::BuiltinOperator_DELEGATE && |
167 | op_reg.custom_name) { |
168 | return op_reg.custom_name; |
169 | } |
170 | return tflite::EnumNamesBuiltinOperator()[op_reg.builtin_code]; |
171 | } |
172 | |
173 | // Verifies custom allocation for tensor, if applicable. |
174 | TfLiteStatus VerifyCustomAllocationForTensor( |
175 | TfLiteContext* context, |
176 | const std::map<int, TfLiteCustomAllocation>& tensor_idx_to_alloc, |
177 | const int tensor_idx) { |
178 | auto& tensor = context->tensors[tensor_idx]; |
179 | if (tensor.allocation_type != kTfLiteCustom) return kTfLiteOk; |
180 | const auto idx_and_alloc = tensor_idx_to_alloc.find(tensor_idx); |
181 | TF_LITE_ENSURE(context, idx_and_alloc != tensor_idx_to_alloc.end()); |
182 | if (idx_and_alloc->second.bytes < tensor.bytes) { |
183 | TF_LITE_KERNEL_LOG(context, |
184 | "Custom allocation is too small for tensor idx: %d" , |
185 | tensor_idx); |
186 | return kTfLiteError; |
187 | } |
188 | return kTfLiteOk; |
189 | } |
190 | |
191 | } // namespace |
192 | |
193 | // A trivial implementation of GraphInfo around the Interpreter. |
194 | // NOTE: this interpreter info represents the subset of the |
195 | // graph that is executed according to execution plan. Thus, |
196 | // the indices are execution plan indices rather than raw node |
197 | // indices. |
198 | class InterpreterInfo : public GraphInfo { |
199 | public: |
200 | explicit InterpreterInfo(Subgraph* subgraph) : subgraph_(subgraph) {} |
201 | |
202 | size_t num_tensors() const override { return subgraph_->tensors_size(); } |
203 | TfLiteTensor* tensors() override { return subgraph_->tensors(); } |
204 | TfLiteTensor* tensor(size_t index) override { |
205 | return subgraph_->tensor(index); |
206 | } |
207 | size_t num_execution_nodes() const override { |
208 | return subgraph_->execution_plan().size(); |
209 | } |
210 | size_t num_total_nodes() const override { return subgraph_->nodes_size(); } |
211 | const TfLiteNode& node(size_t index) const override { |
212 | int node_index = subgraph_->execution_plan()[index]; |
213 | return subgraph_->nodes_and_registration()[node_index].first; |
214 | } |
215 | size_t node_index(size_t index) const override { |
216 | return subgraph_->execution_plan()[index]; |
217 | } |
218 | const std::vector<int>& inputs() const override { |
219 | return subgraph_->inputs(); |
220 | } |
221 | const std::vector<int>& outputs() const override { |
222 | return subgraph_->outputs(); |
223 | } |
224 | const std::vector<int>& variables() const override { |
225 | return subgraph_->variables(); |
226 | } |
227 | |
228 | public: |
229 | Subgraph* subgraph_; |
230 | }; |
231 | |
232 | Subgraph::Subgraph(ErrorReporter* error_reporter, |
233 | TfLiteExternalContext** external_contexts, |
234 | std::vector<std::unique_ptr<Subgraph>>* subgraphs, |
235 | resource::ResourceMap* resources, |
236 | resource::ResourceIDMap* resource_ids, |
237 | resource::InitializationStatusMap* initialization_status_map, |
238 | int subgraph_index) |
239 | : external_contexts_(external_contexts), |
240 | error_reporter_(error_reporter), |
241 | next_execution_plan_index_to_prepare_(0), |
242 | next_execution_plan_index_to_plan_allocation_(0), |
243 | subgraphs_(subgraphs), |
244 | subgraph_index_(subgraph_index), |
245 | resources_(resources), |
246 | resource_ids_(resource_ids), |
247 | initialization_status_map_(initialization_status_map), |
248 | options_(nullptr) { |
249 | context_.impl_ = static_cast<void*>(this); |
250 | context_.ResizeTensor = ResizeTensor; |
251 | context_.ReportError = ReportErrorC; |
252 | context_.AddTensors = AddTensors; |
253 | context_.tensors = nullptr; |
254 | context_.tensors_size = 0; |
255 | context_.allow_fp32_relax_to_fp16 = false; |
256 | context_.recommended_num_threads = -1; |
257 | context_.GetExternalContext = GetExternalContext; |
258 | context_.SetExternalContext = SetExternalContext; |
259 | context_.profiler = nullptr; |
260 | context_.GetTensor = nullptr; |
261 | context_.GetEvalTensor = nullptr; |
262 | context_.GetModelMetadata = GetModelMetadata; |
263 | |
264 | // Reserve some space for the tensors to avoid excessive resizing. |
265 | tensors_.reserve(kTensorsReservedCapacity); |
266 | nodes_and_registration_.reserve(kTensorsReservedCapacity); |
267 | // Invalid to call these except from TfLiteDelegate |
268 | SwitchToKernelContext(); |
269 | } |
270 | |
271 | Subgraph::~Subgraph() { |
272 | for (int node_index = 0; node_index < nodes_and_registration_.size(); |
273 | ++node_index) { |
274 | CleanupNode(node_index); |
275 | } |
276 | |
277 | for (size_t i = 0; i < context_.tensors_size; i++) { |
278 | TfLiteTensor* tensor = &context_.tensors[i]; |
279 | if (tensor->buffer_handle != kTfLiteNullBufferHandle) { |
280 | TfLiteDelegateFreeBufferHandleInternal(&context_, tensor->delegate, |
281 | &tensor->buffer_handle); |
282 | } |
283 | |
284 | TfLiteTensorFree(tensor); |
285 | } |
286 | } |
287 | |
288 | void Subgraph::CleanupNode(int node_index) { |
289 | TfLiteNode& node = nodes_and_registration_[node_index].first; |
290 | const TfLiteRegistration& registration = |
291 | nodes_and_registration_[node_index].second; |
292 | TfLiteIntArrayFree(node.inputs); |
293 | TfLiteIntArrayFree(node.outputs); |
294 | TfLiteIntArrayFree(node.temporaries); |
295 | TfLiteIntArrayFree(node.intermediates); |
296 | if (node.builtin_data) free(node.builtin_data); |
297 | OpFree(registration, node.user_data); |
298 | node.builtin_data = nullptr; |
299 | } |
300 | |
301 | TfLiteStatus Subgraph::ReplaceNodeSubsetsWithDelegateKernels( |
302 | TfLiteContext* context, TfLiteRegistration registration, |
303 | const TfLiteIntArray* nodes_to_replace, TfLiteDelegate* delegate) { |
304 | return static_cast<Subgraph*>(context->impl_) |
305 | ->ReplaceNodeSubsetsWithDelegateKernels(registration, nodes_to_replace, |
306 | delegate); |
307 | } |
308 | |
309 | namespace { |
310 | |
311 | // Copy a std::vector<int> to an existing TfLiteIntArray. |
312 | // This is a low-level data manipulation function, and it's caller's |
313 | // responsibility to ensure TfLiteIntArray has enough size. |
314 | void CopyVectorToTfLiteIntArray(const std::vector<int>& vec, |
315 | TfLiteIntArray* arr) { |
316 | arr->size = vec.size(); |
317 | memcpy(arr->data, vec.data(), sizeof(int) * arr->size); |
318 | } |
319 | |
320 | // This function allocates a continuous memory space that contains a |
321 | // TfLiteDelegateParams followed by a several TfLiteIntArray. |
322 | // When calling `free` at TfLiteDelegateParams*, all the allocated space |
323 | // will be freed together. |
324 | // |
325 | // +-----------------------------------+ |
326 | // | TfLiteDelegateParams | |
327 | // | TfLiteDelegate* delegate; | |
328 | // | TfLiteIntArray* nodes_to_replace; |--\ |
329 | // | TfLiteIntArray* input_tensors; |--+--\ |
330 | // | TfLiteIntArray* output_tensors; |--+--+--\ |
331 | // +-----------------------------------+ | | | |
332 | // | TfLiteIntArray (variable size) |<-/ | | |
333 | // +-----------------------------------+ | | |
334 | // | TfLiteIntArray (variable size) |<----/ | |
335 | // +-----------------------------------+ | |
336 | // | TfLiteIntArray (variable size) |<-------/ |
337 | // +-----------------------------------+ |
338 | TfLiteDelegateParams* CreateDelegateParams(TfLiteDelegate* delegate, |
339 | const NodeSubset& node_subset) { |
340 | // Step 1: Calculate the allocation size. |
341 | int allocation_size = sizeof(TfLiteDelegateParams); |
342 | |
343 | int nodes_to_replace_size = |
344 | TfLiteIntArrayGetSizeInBytes(node_subset.nodes.size()); |
345 | allocation_size += nodes_to_replace_size; |
346 | |
347 | int input_tensors_size = |
348 | TfLiteIntArrayGetSizeInBytes(node_subset.input_tensors.size()); |
349 | allocation_size += input_tensors_size; |
350 | |
351 | int output_tensors_size = |
352 | TfLiteIntArrayGetSizeInBytes(node_subset.output_tensors.size()); |
353 | allocation_size += output_tensors_size; |
354 | |
355 | // Step 2: Allocate the memory. |
356 | // Use `char*` for conveniently step through the allocated space by bytes. |
357 | char* allocation = static_cast<char*>(malloc(allocation_size)); |
358 | |
359 | // Step 3: Fill all data structures. |
360 | TfLiteDelegateParams* params = |
361 | reinterpret_cast<TfLiteDelegateParams*>(allocation); |
362 | params->delegate = delegate; |
363 | allocation += sizeof(TfLiteDelegateParams); |
364 | |
365 | params->nodes_to_replace = reinterpret_cast<TfLiteIntArray*>(allocation); |
366 | CopyVectorToTfLiteIntArray(node_subset.nodes, params->nodes_to_replace); |
367 | allocation += nodes_to_replace_size; |
368 | |
369 | params->input_tensors = reinterpret_cast<TfLiteIntArray*>(allocation); |
370 | CopyVectorToTfLiteIntArray(node_subset.input_tensors, params->input_tensors); |
371 | allocation += input_tensors_size; |
372 | |
373 | params->output_tensors = reinterpret_cast<TfLiteIntArray*>(allocation); |
374 | CopyVectorToTfLiteIntArray(node_subset.output_tensors, |
375 | params->output_tensors); |
376 | allocation += output_tensors_size; |
377 | |
378 | return params; |
379 | } |
380 | |
381 | // Assumes that params is not nullptr. |
382 | void PopulatePreviewDelegateParams(const NodeSubset& node_subset, |
383 | TfLiteDelegateParams* params) { |
384 | // Since these params are used for previewing partitioning, params->delegate |
385 | // is not required. |
386 | params->delegate = nullptr; |
387 | |
388 | params->nodes_to_replace = TfLiteIntArrayCreate(node_subset.nodes.size()); |
389 | CopyVectorToTfLiteIntArray(node_subset.nodes, params->nodes_to_replace); |
390 | |
391 | params->input_tensors = |
392 | TfLiteIntArrayCreate(node_subset.input_tensors.size()); |
393 | CopyVectorToTfLiteIntArray(node_subset.input_tensors, params->input_tensors); |
394 | |
395 | params->output_tensors = |
396 | TfLiteIntArrayCreate(node_subset.output_tensors.size()); |
397 | CopyVectorToTfLiteIntArray(node_subset.output_tensors, |
398 | params->output_tensors); |
399 | } |
400 | |
401 | } // namespace |
402 | |
403 | TfLiteStatus Subgraph::ReplaceNodeSubsetsWithDelegateKernels( |
404 | TfLiteRegistration registration, const TfLiteIntArray* nodes_to_replace, |
405 | TfLiteDelegate* delegate) { |
406 | // Ignore empty node replacement sets. |
407 | if (!nodes_to_replace->size) { |
408 | return kTfLiteOk; |
409 | } |
410 | |
411 | // Annotate the registration as DELEGATE op. |
412 | registration.builtin_code = BuiltinOperator_DELEGATE; |
413 | |
414 | // Analyze the graph to find all independent node_subsets that are either |
415 | // fully not-this-delegate or this-delegate computation. |
416 | InterpreterInfo info(this); |
417 | std::vector<NodeSubset> node_subsets; |
418 | PartitionGraphIntoIndependentNodeSubsets(&info, nodes_to_replace, |
419 | &node_subsets); |
420 | |
421 | // On Android the log message below is used for diagnosing delegation success |
422 | // also in production builds. Use VERBOSE here so that the logging is turned |
423 | // off in production builds on other platforms. |
424 | TFLITE_LOG_PROD( |
425 | tflite::TFLITE_LOG_VERBOSE, |
426 | "Replacing %d node(s) with delegate (%s) node, yielding %zu partitions." , |
427 | nodes_to_replace->size, |
428 | registration.custom_name ? registration.custom_name : "unknown" , |
429 | node_subsets.size()); |
430 | |
431 | execution_plan_.clear(); |
432 | |
433 | for (auto& node_subset : node_subsets) { |
434 | // Subsets claimed by the delegate should have a "macro" op created, the |
435 | // other node_subsets (kTfNonPartition) just have their nodes added back to |
436 | // the execution plan. |
437 | switch (node_subset.type) { |
438 | case NodeSubset::kTfNonPartition: |
439 | for (auto it = node_subset.nodes.begin(); it != node_subset.nodes.end(); |
440 | ++it) { |
441 | execution_plan_.push_back(*it); |
442 | } |
443 | break; |
444 | case NodeSubset::kTfPartition: { |
445 | int node_index; |
446 | |
447 | TfLiteDelegateParams* params = |
448 | CreateDelegateParams(delegate, node_subset); |
449 | TF_LITE_ENSURE_STATUS(AddNodeWithParameters( |
450 | node_subset.input_tensors, node_subset.output_tensors, {}, nullptr, |
451 | 0, params, ®istration, &node_index)); |
452 | |
453 | // Initialize the output tensors's delegate-related fields. |
454 | for (int tensor_index : node_subset.output_tensors) { |
455 | TfLiteTensor* tensor = &tensors_[tensor_index]; |
456 | TF_LITE_ENSURE(&context_, tensor->delegate == nullptr || |
457 | tensor->delegate == delegate); |
458 | tensor->delegate = delegate; |
459 | } |
460 | |
461 | // Associate the node with the delegate. |
462 | TfLiteNode* node = &nodes_and_registration_[node_index].first; |
463 | node->delegate = delegate; |
464 | } break; |
465 | case NodeSubset::kTfUnexplored: |
466 | return kTfLiteError; |
467 | break; |
468 | } |
469 | } |
470 | return kTfLiteOk; |
471 | } |
472 | |
473 | TfLiteExternalContext* Subgraph::GetExternalContext( |
474 | TfLiteExternalContextType type) { |
475 | if (static_cast<int>(type) >= 0 && type < kTfLiteMaxExternalContexts) { |
476 | return external_contexts_[type]; |
477 | } |
478 | return nullptr; |
479 | } |
480 | |
481 | TfLiteExternalContext* Subgraph::GetExternalContext( |
482 | struct TfLiteContext* context, TfLiteExternalContextType type) { |
483 | return static_cast<Subgraph*>(context->impl_)->GetExternalContext(type); |
484 | } |
485 | |
486 | void Subgraph::SetExternalContext(TfLiteExternalContextType type, |
487 | TfLiteExternalContext* ctx) { |
488 | if (static_cast<int>(type) >= 0 && type < kTfLiteMaxExternalContexts) { |
489 | external_contexts_[type] = ctx; |
490 | } |
491 | } |
492 | |
493 | void Subgraph::SetExternalContext(struct TfLiteContext* context, |
494 | TfLiteExternalContextType type, |
495 | TfLiteExternalContext* ctx) { |
496 | return static_cast<Subgraph*>(context->impl_)->SetExternalContext(type, ctx); |
497 | } |
498 | |
499 | // Gets an TfLiteIntArray* representing the execution plan. The interpreter owns |
500 | // this memory and it is only guaranteed to exist during the invocation of the |
501 | // delegate prepare. |
502 | TfLiteStatus Subgraph::GetExecutionPlan(TfLiteIntArray** execution_plan) { |
503 | plan_cache_.reset(TfLiteIntArrayCreate(execution_plan_.size())); |
504 | *execution_plan = plan_cache_.get(); |
505 | static_assert(sizeof(plan_cache_->data[0]) == sizeof(execution_plan_[0]), |
506 | "TfLiteIntArray and execution_plan do not contain same type." ); |
507 | std::memcpy(plan_cache_->data, execution_plan_.data(), |
508 | sizeof(plan_cache_->data[0]) * execution_plan_.size()); |
509 | return kTfLiteOk; |
510 | } |
511 | |
512 | // WARNING: This is an experimental interface that is subject to change. |
513 | // Entry point for C node plugin API to get the execution plan |
514 | TfLiteStatus Subgraph::GetExecutionPlan(struct TfLiteContext* context, |
515 | TfLiteIntArray** execution_plan) { |
516 | return static_cast<Subgraph*>(context->impl_) |
517 | ->GetExecutionPlan(execution_plan); |
518 | } |
519 | |
520 | void Subgraph::FreeDelegatePartitioningData() { |
521 | for (auto& params : partitioning_preview_cache_) { |
522 | TfLiteIntArrayFree(params.nodes_to_replace); |
523 | TfLiteIntArrayFree(params.input_tensors); |
524 | TfLiteIntArrayFree(params.output_tensors); |
525 | } |
526 | partitioning_preview_cache_.clear(); |
527 | } |
528 | |
529 | TfLiteStatus Subgraph::GetModelMetadata(const char* name, const char** ptr, |
530 | size_t* bytes) { |
531 | TF_LITE_ENSURE(&context_, ptr != nullptr); |
532 | TF_LITE_ENSURE(&context_, bytes != nullptr); |
533 | *ptr = nullptr; |
534 | *bytes = 0; |
535 | if (!metadata_) return kTfLiteError; |
536 | const std::string name_str = name; |
537 | auto itr = metadata_->find(name_str); |
538 | if (itr != metadata_->end()) { |
539 | *ptr = itr->second.c_str(); |
540 | *bytes = itr->second.size(); |
541 | return kTfLiteOk; |
542 | } |
543 | return kTfLiteError; |
544 | } |
545 | |
546 | TfLiteStatus Subgraph::GetModelMetadata(const struct TfLiteContext* context, |
547 | const char* name, const char** ptr, |
548 | size_t* bytes) { |
549 | return static_cast<Subgraph*>(context->impl_) |
550 | ->GetModelMetadata(name, ptr, bytes); |
551 | } |
552 | |
553 | TfLiteStatus Subgraph::PreviewDelegatePartitioning( |
554 | const TfLiteIntArray* nodes_to_replace, |
555 | TfLiteDelegateParams** partition_params_array, int* num_partitions) { |
556 | // Ensure partitioning cache is empty. |
557 | FreeDelegatePartitioningData(); |
558 | // Defaults. |
559 | if (!partition_params_array || !num_partitions) return kTfLiteError; |
560 | *partition_params_array = nullptr; |
561 | *num_partitions = 0; |
562 | if (!nodes_to_replace->size) { |
563 | return kTfLiteOk; |
564 | } |
565 | |
566 | // Partition the execution plan into node subsets. |
567 | InterpreterInfo info(this); |
568 | std::vector<NodeSubset> node_subsets; |
569 | PartitionGraphIntoIndependentNodeSubsets(&info, nodes_to_replace, |
570 | &node_subsets); |
571 | |
572 | // Create one TfLiteDelegateParams per node-subset which would be delegated. |
573 | for (auto& node_subset : node_subsets) { |
574 | if (node_subset.type != NodeSubset::kTfPartition) { |
575 | continue; |
576 | } |
577 | partitioning_preview_cache_.emplace_back(); |
578 | PopulatePreviewDelegateParams(node_subset, |
579 | &partitioning_preview_cache_.back()); |
580 | ++*num_partitions; |
581 | } |
582 | |
583 | *partition_params_array = partitioning_preview_cache_.data(); |
584 | return kTfLiteOk; |
585 | } |
586 | |
587 | TfLiteStatus Subgraph::PreviewDelegatePartitioning( |
588 | struct TfLiteContext* context, const TfLiteIntArray* nodes_to_replace, |
589 | TfLiteDelegateParams** partition_params_array, int* num_partitions) { |
590 | return static_cast<Subgraph*>(context->impl_) |
591 | ->PreviewDelegatePartitioning(nodes_to_replace, partition_params_array, |
592 | num_partitions); |
593 | } |
594 | |
595 | TfLiteStatus Subgraph::SetInputs(std::vector<int> inputs) { |
596 | TF_LITE_ENSURE_OK(&context_, |
597 | CheckTensorIndices("inputs" , inputs.data(), inputs.size())); |
598 | inputs_ = std::move(inputs); |
599 | return kTfLiteOk; |
600 | } |
601 | |
602 | TfLiteStatus Subgraph::SetOutputs(std::vector<int> outputs) { |
603 | TF_LITE_ENSURE_OK( |
604 | &context_, CheckTensorIndices("outputs" , outputs.data(), outputs.size())); |
605 | outputs_ = std::move(outputs); |
606 | return kTfLiteOk; |
607 | } |
608 | |
609 | TfLiteStatus Subgraph::SetVariables(std::vector<int> variables) { |
610 | TF_LITE_ENSURE_OK(&context_, CheckTensorIndices("variables" , variables.data(), |
611 | variables.size())); |
612 | variables_ = std::move(variables); |
613 | return kTfLiteOk; |
614 | } |
615 | |
616 | TfLiteStatus Subgraph::SetMetadata( |
617 | const std::map<std::string, std::string>* metadata) { |
618 | metadata_ = metadata; |
619 | return kTfLiteOk; |
620 | } |
621 | |
622 | void Subgraph::SetCancellationFunction(void* data, |
623 | bool (*check_cancelled_func)(void*)) { |
624 | cancellation_data_ = data; |
625 | check_cancelled_func_ = check_cancelled_func; |
626 | } |
627 | |
628 | TfLiteStatus Subgraph::EnsureTensorDataIsReadable(int tensor_index) { |
629 | TfLiteTensor* t = &tensors_[tensor_index]; |
630 | TF_LITE_ENSURE(&context_, t != nullptr); |
631 | TfLiteStatus status = kTfLiteOk; |
632 | if (t->data_is_stale) { |
633 | TF_LITE_ENSURE(&context_, t->delegate != nullptr); |
634 | TF_LITE_ENSURE(&context_, t->buffer_handle != kTfLiteNullBufferHandle); |
635 | status = TfLiteDelegateCopyFromBufferHandleInternal(&context_, t->delegate, |
636 | t->buffer_handle, t); |
637 | t->data_is_stale = false; |
638 | } |
639 | return status; |
640 | } |
641 | |
642 | bool Subgraph::IsCancelled() { |
643 | return (check_cancelled_func_ != nullptr) && |
644 | (*check_cancelled_func_)(cancellation_data_); |
645 | } |
646 | |
647 | void Subgraph::ReserveNodes(int count) { |
648 | nodes_and_registration_.reserve(count); |
649 | } |
650 | |
651 | TfLiteStatus Subgraph::CheckTensorIndices(const char* label, const int* indices, |
652 | int length) { |
653 | // Making sure kTfLiteOptionalTensor is not re-defined to something other than |
654 | // -1. |
655 | static_assert(kTfLiteOptionalTensor == -1, |
656 | "kTfLiteOptionalTensor should be defined -1" ); |
657 | |
658 | for (int i = 0; i < length; i++) { |
659 | int index = indices[i]; |
660 | // Continue if index == kTfLiteOptionalTensor before additional comparisons |
661 | // below, size_t(-1) is always >= context_tensors_size. |
662 | if (index == kTfLiteOptionalTensor) { |
663 | continue; |
664 | } |
665 | if (index < 0 || static_cast<size_t>(index) >= context_.tensors_size) { |
666 | ReportError( |
667 | "Invalid tensor index %d in %s. The subgraph has %d tensors\n" , index, |
668 | label, context_.tensors_size); |
669 | consistent_ = false; |
670 | return kTfLiteError; |
671 | } |
672 | } |
673 | return kTfLiteOk; |
674 | } |
675 | |
676 | // We have two arrays and we need to check that elements from one array don't |
677 | // show up in the other. We could sort both arrays and then iterate with two |
678 | // pointers from start to finish always increasing the smaller one but since |
679 | // these arrays are usually short (<25 elements for inputs, usually <3 for |
680 | // outputs), this might be slower than the naive approach (if arrays have size n |
681 | // and m, with n >> m ~ O(1), first approach is O(nlogn) whereas the other is |
682 | // O(n)). Plus, sorting the input and output arrays might not be something we |
683 | // want as it destroys ordering of elements. |
684 | // |
685 | // If it turns out that this is an issue, we can switch to the other algorithm. |
686 | TfLiteStatus Subgraph::CheckInputAndOutputForOverlap(const int* input_indices, |
687 | int num_inputs, |
688 | const int* output_indices, |
689 | int num_outputs) { |
690 | for (int i = 0; i < num_inputs; i++) { |
691 | for (int j = 0; j < num_outputs; j++) { |
692 | if (input_indices[i] == output_indices[j]) { |
693 | ReportError("Tensor %d is both input %d and output %d\n" , |
694 | input_indices[i], i, j); |
695 | consistent_ = false; |
696 | return kTfLiteError; |
697 | } |
698 | } |
699 | } |
700 | return kTfLiteOk; |
701 | } |
702 | |
703 | TfLiteStatus Subgraph::BytesRequired(TfLiteType type, const int* dims, |
704 | size_t dims_size, size_t* bytes) { |
705 | TF_LITE_ENSURE(&context_, bytes != nullptr); |
706 | // When 'dims_size' is 0, we simply assume it's a scalar. Therefore, we start |
707 | // 'count' as 1. |
708 | size_t count = 1; |
709 | for (int k = 0; k < dims_size; k++) { |
710 | size_t old_count = count; |
711 | TF_LITE_ENSURE_MSG( |
712 | &context_, |
713 | MultiplyAndCheckOverflow(old_count, dims[k], &count) == kTfLiteOk, |
714 | "BytesRequired number of elements overflowed.\n" ); |
715 | } |
716 | size_t type_size = 0; |
717 | TF_LITE_ENSURE_OK(&context_, GetSizeOfType(&context_, type, &type_size)); |
718 | TF_LITE_ENSURE_MSG( |
719 | &context_, MultiplyAndCheckOverflow(type_size, count, bytes) == kTfLiteOk, |
720 | "BytesRequired number of bytes overflowed.\n" ); |
721 | return kTfLiteOk; |
722 | } |
723 | |
724 | TfLiteStatus Subgraph::AllocateTensors() { |
725 | if (!consistent_) { |
726 | ReportError("AllocateTensors() called on inconsistent model." ); |
727 | return kTfLiteError; |
728 | } |
729 | |
730 | // Restore delegation state if applicable. |
731 | TF_LITE_ENSURE_STATUS(RedoAllDelegates()); |
732 | |
733 | // The runtime doesn't need to adjust any allocations if the state is |
734 | // invokable & no inputs are dynamic (which implies memory plan is unchanged). |
735 | const bool no_reallocations_necessary = |
736 | state_ != kStateUninvokable && |
737 | !HasDynamicTensorImpl(context_, inputs(), &dynamic_tensor_index_); |
738 | if (no_reallocations_necessary) { |
739 | // If non-persistent memory was released, re-allocate it. |
740 | if (memory_planner_ && !memory_planner_->HasNonPersistentMemory()) { |
741 | memory_planner_->AcquireNonPersistentMemory(); |
742 | } |
743 | // Check custom allocations, which may have been modified since last |
744 | // AllocateTensors() call. |
745 | if (!custom_allocations_.empty()) { |
746 | for (const auto& idx_and_alloc : custom_allocations_) { |
747 | const int idx = idx_and_alloc.first; |
748 | TfLiteTensor* tensor_at_index = tensor(idx); |
749 | TF_LITE_ENSURE_EQ(context(), tensor_at_index->allocation_type, |
750 | kTfLiteCustom); |
751 | TF_LITE_ENSURE_STATUS(VerifyCustomAllocationForTensor( |
752 | context(), custom_allocations_, idx)); |
753 | } |
754 | } |
755 | return kTfLiteOk; |
756 | } |
757 | |
758 | // Profile "AllocateTensors" only when memory planning is needed. |
759 | TFLITE_SCOPED_TAGGED_DEFAULT_PROFILE(profiler_.get(), "AllocateTensors" ); |
760 | |
761 | next_execution_plan_index_to_prepare_ = 0; |
762 | next_execution_plan_index_to_plan_allocation_ = 0; |
763 | next_original_execution_plan_index_to_prepare_ = 0; |
764 | if (memory_planner_) { |
765 | TF_LITE_ENSURE_STATUS(memory_planner_->ResetAllocations()); |
766 | } |
767 | |
768 | TF_LITE_ENSURE_STATUS(PrepareOpsAndTensors()); |
769 | |
770 | state_ = kStateInvokable; |
771 | |
772 | // Reset the variable tensors to zero after (re)allocating the tensors. |
773 | // Developers shouldn't rely on the side effect of this function to reset |
774 | // variable tensors. They should call `ResetVariableTensors` directly |
775 | // instead. |
776 | ResetVariableTensors(); |
777 | |
778 | // Initialize the mapping between tensor index and the last execution plan |
779 | // index that uses the tensor. |
780 | InitializeTensorReleaseMap(); |
781 | |
782 | return kTfLiteOk; |
783 | } |
784 | |
785 | // TODO(b/115961645): Support non-zero default values. |
786 | TfLiteStatus Subgraph::ResetVariableTensors() { |
787 | for (auto& tensor : tensors_) { |
788 | if (!tensor.is_variable) { |
789 | continue; |
790 | } |
791 | |
792 | if (tensor.allocation_type == kTfLiteArenaRwPersistent) { |
793 | // If variable tensors allocation type is `kTfLiteArenaRwPersistent`, then |
794 | // they must be allocated after the initial `PrepareOpsAndTensors()` is |
795 | // called. |
796 | TF_LITE_ENSURE(&context_, tensor.data.raw != nullptr); |
797 | tflite::ResetVariableTensor(&tensor); |
798 | } else { |
799 | // If variable tensors allocation type is not `kTfLiteArenaRwPersistent`, |
800 | // then it can only be `kTfLiteCustom` in which case, we do not reset it. |
801 | TF_LITE_ENSURE_EQ(&context_, tensor.allocation_type, kTfLiteCustom); |
802 | } |
803 | } |
804 | return kTfLiteOk; |
805 | } |
806 | |
807 | TfLiteStatus Subgraph::AddNodeWithParameters( |
808 | const std::vector<int>& inputs, const std::vector<int>& outputs, |
809 | const std::vector<int>& intermediates, const char* init_data, |
810 | size_t init_data_size, void* builtin_data, |
811 | const TfLiteRegistration* registration, int* node_index) { |
812 | std::unique_ptr<void, decltype(free)*> builtin_data_deleter(builtin_data, |
813 | free); |
814 | if (state_ == kStateInvokableAndImmutable) { |
815 | ReportError("AddNodeWithParameters is disallowed when graph is immutable." ); |
816 | return kTfLiteError; |
817 | } |
818 | state_ = kStateUninvokable; |
819 | |
820 | TF_LITE_ENSURE_OK(&context_, CheckTensorIndices("node inputs" , inputs.data(), |
821 | inputs.size())); |
822 | TF_LITE_ENSURE_OK( |
823 | &context_, |
824 | CheckTensorIndices("node outputs" , outputs.data(), outputs.size())); |
825 | |
826 | // For builtin ops, inputs and outputs must not overlap. Custom ops must do |
827 | // this check by themselves if they don't support overlapping tensors. This |
828 | // distinction is to allow custom ops to just forward a tensor, reusing it as |
829 | // both input and output. |
830 | if (builtin_data != nullptr) { |
831 | TF_LITE_ENSURE_OK(&context_, CheckInputAndOutputForOverlap( |
832 | inputs.data(), inputs.size(), |
833 | outputs.data(), outputs.size())); |
834 | } |
835 | |
836 | int new_node_index = nodes_and_registration_.size(); |
837 | if (node_index) *node_index = new_node_index; |
838 | nodes_and_registration_.emplace_back(); |
839 | auto& node_and_reg = nodes_and_registration_.back(); |
840 | TfLiteNode& node = node_and_reg.first; |
841 | |
842 | // NOTE, here we are not using move semantics yet, since our internal |
843 | // representation isn't std::vector, but in the future we would like to avoid |
844 | // copies, so we want the interface to take r-value references now. |
845 | node.inputs = ConvertVectorToTfLiteIntArray(inputs); |
846 | node.outputs = ConvertVectorToTfLiteIntArray(outputs); |
847 | node.intermediates = ConvertVectorToTfLiteIntArray(intermediates); |
848 | node.temporaries = TfLiteIntArrayCreate(0); |
849 | if (init_data) { |
850 | node.user_data = OpInit(*registration, init_data, init_data_size); |
851 | } else { |
852 | node.user_data = OpInit( |
853 | *registration, static_cast<const char*>(builtin_data_deleter.get()), 0); |
854 | } |
855 | |
856 | node.builtin_data = builtin_data_deleter.release(); |
857 | |
858 | if (registration->builtin_code == BuiltinOperator_CUSTOM) { |
859 | // When it's a CUSTOM op, the `custom_options` field in the Flatbuffer |
860 | // `Operator` table is passed in. |
861 | node.custom_initial_data = init_data; |
862 | node.custom_initial_data_size = init_data_size; |
863 | } else { |
864 | node.custom_initial_data = nullptr; |
865 | node.custom_initial_data_size = 0; |
866 | } |
867 | node.might_have_side_effect = OpMightHaveSideEffect(&node, registration); |
868 | |
869 | node.delegate = nullptr; |
870 | // Copying of registration is required to support unresolved custom ops. |
871 | node_and_reg.second = *registration; |
872 | execution_plan_.push_back(new_node_index); |
873 | return kTfLiteOk; |
874 | } |
875 | |
876 | namespace { |
877 | // Returns true if any tensor identified by indexes in 'tensor_indexes' is |
878 | // of type 'kTfLiteResource'. False otherwise. |
879 | bool AnyTensorOfTypeResource(const std::vector<TfLiteTensor>& tensors, |
880 | const TfLiteIntArray* tensor_indexes) { |
881 | for (int i = 0; i < tensor_indexes->size; ++i) { |
882 | int tensor_index = tensor_indexes->data[i]; |
883 | if (tensor_index >= 0 && tensor_index < tensors.size() && |
884 | tensors[tensor_index].type == kTfLiteResource) |
885 | return true; |
886 | } |
887 | return false; |
888 | } |
889 | |
890 | } // namespace |
891 | |
892 | bool Subgraph::OpMightHaveSideEffect( |
893 | const TfLiteNode* node, const TfLiteRegistration* registration) const { |
894 | // Check if any of the input tensors are of type resource. |
895 | if (AnyTensorOfTypeResource(tensors_, node->inputs)) return true; |
896 | // Check if any of the output tensors are of type resource. |
897 | if (AnyTensorOfTypeResource(tensors_, node->outputs)) return true; |
898 | // Consider control flow ops has side effect, some ops in the control flow |
899 | // subgraph can have side effect. |
900 | if (registration->builtin_code == kTfLiteBuiltinIf || |
901 | registration->builtin_code == kTfLiteBuiltinWhile || |
902 | registration->builtin_code == kTfLiteBuiltinCallOnce) |
903 | return true; |
904 | return false; |
905 | } |
906 | |
907 | TfLiteStatus Subgraph::ResizeInputTensor(int tensor_index, |
908 | const std::vector<int>& dims) { |
909 | const bool delegates_applied = !pre_delegation_execution_plan_.empty(); |
910 | const bool graph_is_immutable = state_ == kStateInvokableAndImmutable; |
911 | if (graph_is_immutable && !delegates_applied) { |
912 | ReportError("ResizeInputTensor is disallowed when graph is immutable." ); |
913 | return kTfLiteError; |
914 | } |
915 | |
916 | TF_LITE_ENSURE(&context_, |
917 | tensor_index < context_.tensors_size && tensor_index >= 0); |
918 | TfLiteTensor* tensor = &context_.tensors[tensor_index]; |
919 | |
920 | // Short-circuit the state change if the dimensions don't change, avoiding |
921 | // unnecessary (re)allocations. |
922 | // |
923 | // Note that it's required to check `tensor->data.raw != nullptr`. Otherwise |
924 | // the subgraph won't allocate memory for a dynamic tensor when its size |
925 | // is equal to the original tensor size. |
926 | if (tensor->data.raw != nullptr && |
927 | EqualArrayAndTfLiteIntArray(tensor->dims, dims.size(), dims.data())) { |
928 | return kTfLiteOk; |
929 | } |
930 | |
931 | if (graph_is_immutable) { |
932 | // Undo delegation if it resulted in the graph being immutable. |
933 | TF_LITE_ENSURE_STATUS(UndoAllDelegates()); |
934 | } |
935 | state_ = kStateUninvokable; |
936 | return ResizeTensorImpl(tensor, ConvertVectorToTfLiteIntArray(dims)); |
937 | } |
938 | |
939 | TfLiteStatus Subgraph::ResizeInputTensorStrict(int tensor_index, |
940 | const std::vector<int>& dims) { |
941 | TF_LITE_ENSURE(&context_, |
942 | tensor_index < context_.tensors_size && tensor_index >= 0); |
943 | TfLiteTensor* tensor = &context_.tensors[tensor_index]; |
944 | |
945 | // Ensure that only unknown dimensions can be resized. |
946 | TF_LITE_ENSURE_EQ(&context_, tensor->dims->size, dims.size()); |
947 | for (size_t idx = 0; idx < dims.size(); idx++) { |
948 | // `dims_signature` is not defined when no unknown dimensions are present. |
949 | int dim_signature; |
950 | if (tensor->dims_signature && tensor->dims_signature->size) { |
951 | dim_signature = tensor->dims_signature->data[idx]; |
952 | } else { |
953 | dim_signature = tensor->dims->data[idx]; |
954 | } |
955 | |
956 | if (dim_signature != -1 && dim_signature != dims[idx]) { |
957 | ReportError( |
958 | "Attempting to resize dimension %d of tensor %d with value %d to %d. " |
959 | "ResizeInputTensorStrict only allows mutating unknown dimensions " |
960 | "identified by -1." , |
961 | idx, tensor_index, dim_signature, dims[idx]); |
962 | return kTfLiteError; |
963 | } |
964 | } |
965 | |
966 | return ResizeInputTensor(tensor_index, dims); |
967 | } |
968 | |
969 | TfLiteStatus Subgraph::ReleaseNonPersistentMemory() { |
970 | state_ = kStateUninvokable; |
971 | if (memory_planner_) { |
972 | TF_LITE_ENSURE_STATUS(memory_planner_->ReleaseNonPersistentMemory()); |
973 | } |
974 | return kTfLiteOk; |
975 | } |
976 | |
977 | TfLiteStatus Subgraph::ReleaseMemory() { |
978 | state_ = kStateUninvokable; |
979 | ReleaseNonPersistentMemory(); |
980 | |
981 | // Free dynamic input tensors. |
982 | for (const int input_tensor_idx : inputs_) { |
983 | if (input_tensor_idx == kTfLiteOptionalTensor) continue; |
984 | TfLiteTensor* input_tensor = tensor(input_tensor_idx); |
985 | if (!input_tensor || input_tensor->allocation_type != kTfLiteDynamic) |
986 | continue; |
987 | if (input_tensor->data.raw) { |
988 | TfLiteTensorDataFree(input_tensor); |
989 | } |
990 | } |
991 | // Free dynamic output tensors. |
992 | for (const int output_tensor_idx : outputs_) { |
993 | if (output_tensor_idx == kTfLiteOptionalTensor) continue; |
994 | TfLiteTensor* output_tensor = tensor(output_tensor_idx); |
995 | if (!output_tensor || output_tensor->allocation_type != kTfLiteDynamic) |
996 | continue; |
997 | if (output_tensor->data.raw) { |
998 | TfLiteTensorDataFree(output_tensor); |
999 | } |
1000 | } |
1001 | |
1002 | return kTfLiteOk; |
1003 | } |
1004 | |
1005 | // Give 'op_reg' a chance to initialize itself using the contents of |
1006 | // 'buffer'. If registration_external is valid, use the 'init' callback from |
1007 | // that. |
1008 | void* Subgraph::OpInit(const TfLiteRegistration& op_reg, const char* buffer, |
1009 | size_t length) { |
1010 | if (op_reg.registration_external && op_reg.registration_external->init) { |
1011 | return op_reg.registration_external->init( |
1012 | op_reg.registration_external->init_data, |
1013 | reinterpret_cast<TfLiteOpaqueContext*>(&context_), buffer, length); |
1014 | } |
1015 | if (op_reg.init == nullptr) return nullptr; |
1016 | return op_reg.init(&context_, buffer, length); |
1017 | } |
1018 | |
1019 | TfLiteStatus Subgraph::OpPrepare(const TfLiteRegistration& op_reg, |
1020 | TfLiteNode* node) { |
1021 | if (op_reg.registration_external && op_reg.registration_external->prepare) { |
1022 | // The 'data' field required by the 'prepare' function pointer must be |
1023 | // retrieved from the 'registration_external' object itself. |
1024 | return op_reg.registration_external->prepare( |
1025 | op_reg.registration_external->prepare_data, |
1026 | reinterpret_cast<TfLiteOpaqueContext*>(&context_), |
1027 | reinterpret_cast<TfLiteOpaqueNode*>(node)); |
1028 | } |
1029 | if (op_reg.prepare == nullptr) { |
1030 | // Check if it's an unresolved custom op. |
1031 | if (IsUnresolvedCustomOp(op_reg)) { |
1032 | if (IsFlexOp(op_reg.custom_name)) { |
1033 | ReportError( |
1034 | "Select TensorFlow op(s), included in the given model, is(are) not " |
1035 | "supported by this interpreter. Make sure you apply/link the Flex " |
1036 | "delegate before inference. For the Android, it can be resolved by " |
1037 | "adding \"org.tensorflow:tensorflow-lite-select-tf-ops\" " |
1038 | "dependency. See instructions: " |
1039 | "https://www.tensorflow.org/lite/guide/ops_select" ); |
1040 | } else { |
1041 | ReportError( |
1042 | "Encountered unresolved custom op: %s.\nSee instructions: " |
1043 | "https://www.tensorflow.org/lite/guide/ops_custom " , |
1044 | op_reg.custom_name ? op_reg.custom_name : "UnknownOp" ); |
1045 | } |
1046 | return kTfLiteUnresolvedOps; |
1047 | } |
1048 | // Resolved ops can have a null Prepare function. |
1049 | return kTfLiteOk; |
1050 | } |
1051 | return op_reg.prepare(&context_, node); |
1052 | } |
1053 | |
1054 | // Invoke the operator represented by 'node'. |
1055 | TfLiteStatus Subgraph::OpInvoke(const TfLiteRegistration& op_reg, |
1056 | TfLiteNode* node) { |
1057 | if (op_reg.registration_external && op_reg.registration_external->invoke) { |
1058 | return op_reg.registration_external->invoke( |
1059 | op_reg.registration_external->invoke_data, |
1060 | reinterpret_cast<TfLiteOpaqueContext*>(&context_), |
1061 | reinterpret_cast<TfLiteOpaqueNode*>(node)); |
1062 | } |
1063 | if (op_reg.invoke == nullptr) return kTfLiteError; |
1064 | return op_reg.invoke(&context_, node); |
1065 | } |
1066 | |
1067 | // Let 'op_reg' release any memory it might have allocated via 'OpInit'. |
1068 | // If registration_external is valid, use the 'free' callback from that. |
1069 | void Subgraph::OpFree(const TfLiteRegistration& op_reg, void* buffer) { |
1070 | if (op_reg.registration_external && op_reg.registration_external->free && |
1071 | buffer) { |
1072 | return op_reg.registration_external->free( |
1073 | op_reg.registration_external->free_data, |
1074 | reinterpret_cast<TfLiteOpaqueContext*>(&context_), buffer); |
1075 | } |
1076 | if (op_reg.free == nullptr) return; |
1077 | if (buffer) { |
1078 | op_reg.free(&context_, buffer); |
1079 | } |
1080 | } |
1081 | |
1082 | TfLiteStatus Subgraph::MayAllocateOpOutput(TfLiteNode* node) { |
1083 | if (ShouldOptimizeMemoryForLargeTensors()) { |
1084 | for (int i = 0; i < node->outputs->size; ++i) { |
1085 | int tensor_index = node->outputs->data[i]; |
1086 | TfLiteTensor* tensor = &context_.tensors[tensor_index]; |
1087 | if (tensor->data.raw == nullptr && |
1088 | tensor->allocation_type == kTfLiteDynamic) { |
1089 | TfLiteTensorRealloc(tensor->bytes, tensor); |
1090 | } |
1091 | } |
1092 | } |
1093 | return kTfLiteOk; |
1094 | } |
1095 | |
1096 | TfLiteStatus Subgraph::PrepareOpsStartingAt( |
1097 | int first_execution_plan_index, const std::vector<int>& execution_plan, |
1098 | int* last_execution_plan_index_prepared) { |
1099 | if (first_execution_plan_index == 0) { |
1100 | // Forwarding inputs without modification won't be not evaluated in the |
1101 | // operators. So, it needs to look up the subgraph's output tensors at the |
1102 | // beginning. |
1103 | has_dynamic_tensors_ = |
1104 | HasDynamicTensorImpl(context_, outputs(), &dynamic_tensor_index_); |
1105 | } |
1106 | for (int execution_plan_index = first_execution_plan_index; |
1107 | execution_plan_index < execution_plan.size(); execution_plan_index++) { |
1108 | int node_index = execution_plan[execution_plan_index]; |
1109 | TfLiteNode& node = nodes_and_registration_[node_index].first; |
1110 | const TfLiteRegistration& registration = |
1111 | nodes_and_registration_[node_index].second; |
1112 | EnsureTensorsVectorCapacity(); |
1113 | #ifdef TF_LITE_TENSORFLOW_PROFILER |
1114 | tflite::OnTfLiteOpPrepare(GetTFLiteOpName(registration), subgraph_index_, |
1115 | node_index); |
1116 | #endif // TF_LITE_TENSORFLOW_PROFILER |
1117 | const TfLiteStatus op_prepare_status = OpPrepare(registration, &node); |
1118 | if (op_prepare_status != kTfLiteOk) { |
1119 | ReportOpError(&context_, node, registration, node_index, |
1120 | "failed to prepare" ); |
1121 | return op_prepare_status; |
1122 | } |
1123 | |
1124 | *last_execution_plan_index_prepared = execution_plan_index; |
1125 | |
1126 | // Discontinue if the node has dynamic outputs. Note that we don't |
1127 | // stop for dynamic temporary tensors since they won't affect the |
1128 | // sizes of other tensors in the graph. |
1129 | if (HasDynamicTensor(context_, node.outputs, &dynamic_tensor_index_)) { |
1130 | has_dynamic_tensors_ = true; |
1131 | return kTfLiteOk; |
1132 | } |
1133 | } |
1134 | return kTfLiteOk; |
1135 | } |
1136 | |
1137 | TfLiteStatus Subgraph::PrepareOpsAndTensors() { |
1138 | if (!memory_planner_) { |
1139 | #ifdef TFLITE_USE_SIMPLE_MEMORY_PLANNER |
1140 | memory_planner_.reset(new SimplePlanner(&context_, CreateGraphInfo())); |
1141 | #else |
1142 | memory_planner_ = std::make_unique<ArenaPlanner>( |
1143 | &context_, CreateGraphInfo(), ShouldPreserveAllTensors(), |
1144 | kDefaultTensorAlignment, subgraph_index_); |
1145 | #endif |
1146 | memory_planner_->PlanAllocations(); |
1147 | } |
1148 | |
1149 | // Prepare original execution plan if any applied delegate wants it. |
1150 | // If any of the delegates is immutable, this won't be triggered |
1151 | // post-delegation (since we undo/redo delegation). For all other cases, other |
1152 | // delegates that do shape propagation themselves would still be able to. |
1153 | bool prepare_original_plan = false; |
1154 | if (!pre_delegation_execution_plan_.empty()) { |
1155 | for (int i = 0; i < delegates_applied_.size(); ++i) { |
1156 | if ((TfLiteDelegateGetFlagsInternal(delegates_applied_[i]) & |
1157 | kTfLiteDelegateFlagsRequirePropagatedShapes)) { |
1158 | prepare_original_plan = true; |
1159 | break; |
1160 | } |
1161 | } |
1162 | } |
1163 | if (prepare_original_plan) { |
1164 | int last_original_exec_plan_index_prepared = 0; |
1165 | TF_LITE_ENSURE_STATUS(PrepareOpsStartingAt( |
1166 | next_execution_plan_index_to_prepare_, pre_delegation_execution_plan_, |
1167 | &last_original_exec_plan_index_prepared)); |
1168 | next_original_execution_plan_index_to_prepare_ = |
1169 | last_original_exec_plan_index_prepared + 1; |
1170 | } |
1171 | |
1172 | int last_exec_plan_index_prepared = 0; |
1173 | TF_LITE_ENSURE_STATUS( |
1174 | PrepareOpsStartingAt(next_execution_plan_index_to_prepare_, |
1175 | execution_plan_, &last_exec_plan_index_prepared)); |
1176 | next_execution_plan_index_to_prepare_ = last_exec_plan_index_prepared + 1; |
1177 | |
1178 | // Execute arena allocations. |
1179 | TF_LITE_ENSURE_STATUS(memory_planner_->ExecuteAllocations( |
1180 | next_execution_plan_index_to_plan_allocation_, |
1181 | last_exec_plan_index_prepared)); |
1182 | |
1183 | if (!custom_allocations_.empty()) { |
1184 | // Verify custom allocations for output tensors from the ops that have just |
1185 | // been prepared. Other output tensors might be resized later. |
1186 | if (!nodes_and_registration_.empty()) { |
1187 | for (int node_idx = next_execution_plan_index_to_plan_allocation_; |
1188 | node_idx <= last_exec_plan_index_prepared; ++node_idx) { |
1189 | TfLiteNode& node = nodes_and_registration_[node_idx].first; |
1190 | for (int i = 0; i < node.outputs->size; ++i) { |
1191 | const int output_tensor_idx = node.outputs->data[i]; |
1192 | if (output_tensor_idx == kTfLiteOptionalTensor) continue; |
1193 | TF_LITE_ENSURE_STATUS(VerifyCustomAllocationForTensor( |
1194 | context(), custom_allocations_, output_tensor_idx)); |
1195 | } |
1196 | } |
1197 | } |
1198 | // Check input custom allocs only if we just prepared nodes from the idx 0. |
1199 | if (next_execution_plan_index_to_plan_allocation_ == 0) { |
1200 | for (const int input_tensor_idx : inputs_) { |
1201 | if (input_tensor_idx == kTfLiteOptionalTensor) continue; |
1202 | TF_LITE_ENSURE_STATUS(VerifyCustomAllocationForTensor( |
1203 | context(), custom_allocations_, input_tensor_idx)); |
1204 | } |
1205 | } |
1206 | } |
1207 | |
1208 | next_execution_plan_index_to_plan_allocation_ = |
1209 | last_exec_plan_index_prepared + 1; |
1210 | |
1211 | return kTfLiteOk; |
1212 | } |
1213 | |
1214 | TfLiteStatus Subgraph::RemoveUnusedInputs() { |
1215 | auto graph_info = CreateGraphInfo(); |
1216 | std::vector<int> refcounts(graph_info->num_tensors(), 0); |
1217 | |
1218 | for (int tensor_index : graph_info->variables()) { |
1219 | refcounts[tensor_index]++; |
1220 | } |
1221 | // Count references to node input tensors. |
1222 | for (size_t i = 0; i < graph_info->num_execution_nodes(); ++i) { |
1223 | const TfLiteNode& node = graph_info->node(i); |
1224 | TfLiteIntArray* node_inputs = node.inputs; |
1225 | for (int j = 0; j < node_inputs->size; ++j) { |
1226 | int tensor_index = node_inputs->data[j]; |
1227 | if (tensor_index != kTfLiteOptionalTensor) { |
1228 | refcounts[tensor_index]++; |
1229 | } |
1230 | } |
1231 | } |
1232 | // Count references to SubGraph output tensors. |
1233 | for (auto iter = outputs_.begin(); iter != outputs_.end(); iter++) { |
1234 | if (*iter == kTfLiteOptionalTensor) continue; |
1235 | refcounts[*iter]++; |
1236 | } |
1237 | |
1238 | // Mark unused inputs as kTfLiteOptionalTensor. |
1239 | for (auto iter = inputs_.begin(); iter != inputs_.end(); iter++) { |
1240 | if (*iter == kTfLiteOptionalTensor) continue; |
1241 | if (refcounts[*iter] == 0) { |
1242 | tensor(*iter)->bytes = 0; // To make it clearer for memory analysis. |
1243 | *iter = kTfLiteOptionalTensor; |
1244 | } |
1245 | } |
1246 | return kTfLiteOk; |
1247 | } |
1248 | |
1249 | TfLiteStatus Subgraph::Invoke() { |
1250 | if (!consistent_) { |
1251 | ReportError("Invoke called on model that is not consistent." ); |
1252 | return kTfLiteError; |
1253 | } |
1254 | |
1255 | TfLiteStatus status = kTfLiteOk; |
1256 | if (state_ == kStateUninvokable) { |
1257 | ReportError("Invoke called on model that is not ready." ); |
1258 | return kTfLiteError; |
1259 | } else if (memory_planner_ && !memory_planner_->HasNonPersistentMemory()) { |
1260 | ReportError("Non-persistent memory is not available." ); |
1261 | return kTfLiteError; |
1262 | } |
1263 | TFLITE_SCOPED_TAGGED_DEFAULT_PROFILE(profiler_.get(), "Invoke" ); |
1264 | #ifdef TF_LITE_TENSORFLOW_PROFILER |
1265 | tensorflow::profiler::TraceMe* trace_subgraph = |
1266 | tflite::OnTfLiteSubgraphInvoke(name_.c_str(), subgraph_index_); |
1267 | #endif // TF_LITE_TENSORFLOW_PROFILER |
1268 | |
1269 | // Invocations are always done in node order. |
1270 | // Note that calling Invoke repeatedly will cause the original memory plan to |
1271 | // be reused, unless either ResizeInputTensor() or AllocateTensors() has been |
1272 | // called. |
1273 | for (int execution_plan_index = 0; |
1274 | execution_plan_index < execution_plan_.size(); execution_plan_index++) { |
1275 | if (execution_plan_index == next_execution_plan_index_to_prepare_) { |
1276 | TF_LITE_ENSURE_STATUS(PrepareOpsAndTensors()); |
1277 | TF_LITE_ENSURE(&context_, next_execution_plan_index_to_prepare_ >= |
1278 | execution_plan_index); |
1279 | } |
1280 | int node_index = execution_plan_[execution_plan_index]; |
1281 | TfLiteNode& node = nodes_and_registration_[node_index].first; |
1282 | const TfLiteRegistration& registration = |
1283 | nodes_and_registration_[node_index].second; |
1284 | |
1285 | const char* op_name = nullptr; |
1286 | if (profiler_) op_name = GetTFLiteOpName(registration); |
1287 | #ifdef TF_LITE_TENSORFLOW_PROFILER |
1288 | if (!op_name) { |
1289 | op_name = GetTFLiteOpName(registration); |
1290 | } |
1291 | tensorflow::profiler::TraceMe* trace_op = |
1292 | tflite::OnTfLiteOpInvoke(op_name, subgraph_index_, node_index); |
1293 | #endif // TF_LITE_TENSORFLOW_PROFILER |
1294 | TFLITE_SCOPED_TAGGED_OPERATOR_PROFILE(profiler_.get(), op_name, node_index); |
1295 | |
1296 | for (int i = 0; i < node.inputs->size; ++i) { |
1297 | int tensor_index = node.inputs->data[i]; |
1298 | if (tensor_index == kTfLiteOptionalTensor) { |
1299 | continue; |
1300 | } |
1301 | TfLiteTensor* tensor = &tensors_[tensor_index]; |
1302 | if (tensor->delegate && tensor->delegate != node.delegate && |
1303 | tensor->data_is_stale) { |
1304 | TF_LITE_ENSURE_STATUS(EnsureTensorDataIsReadable(tensor_index)); |
1305 | } |
1306 | if (tensor->data.raw == nullptr && tensor->bytes > 0) { |
1307 | if (registration.builtin_code == kTfLiteBuiltinReshape && i == 1 && |
1308 | tensor->dims->size != 1) { |
1309 | // In general, having a tensor here with no buffer will be an error. |
1310 | // However, for the reshape operator, the second input tensor is |
1311 | // sometimes only used for the shape, not for the data. Thus, null |
1312 | // buffer is ok in this situation. |
1313 | // The situation where null buffer is not ok for reshape operator is |
1314 | // only when there are 2 inputs given to the node and the one |
1315 | // corresponding to the shape (i == 1) is a vector that contains all |
1316 | // dimensions. See `GetOutputShape()` function in |
1317 | // `tensorflow/lite/kernels/reshape.cc` |
1318 | continue; |
1319 | } else { |
1320 | // In all other cases, we need to return an error as otherwise we will |
1321 | // trigger a null pointer dereference (likely). |
1322 | ReportError("Input tensor %d lacks data" , tensor_index); |
1323 | return kTfLiteError; |
1324 | } |
1325 | } |
1326 | } |
1327 | // Allocate dynamic tensors which memory is required to be allocated |
1328 | // before executing the node. |
1329 | MayAllocateOpOutput(&node); |
1330 | |
1331 | if (check_cancelled_func_ != nullptr && |
1332 | check_cancelled_func_(cancellation_data_)) { |
1333 | ReportError("Client requested cancel during Invoke()" ); |
1334 | return kTfLiteError; |
1335 | } |
1336 | |
1337 | EnsureTensorsVectorCapacity(); |
1338 | tensor_resized_since_op_invoke_ = false; |
1339 | if (OpInvoke(registration, &node) != kTfLiteOk) { |
1340 | return ReportOpError(&context_, node, registration, node_index, |
1341 | "failed to invoke" ); |
1342 | } |
1343 | |
1344 | // Force execution prep for downstream ops if the latest op triggered the |
1345 | // resize of a dynamic tensor. |
1346 | if (tensor_resized_since_op_invoke_ && |
1347 | HasDynamicTensor(context_, node.outputs, nullptr)) { |
1348 | next_execution_plan_index_to_prepare_ = execution_plan_index + 1; |
1349 | |
1350 | // This happens when an intermediate dynamic tensor is resized. |
1351 | // We don't have to prepare all the ops, but we need to recompute |
1352 | // the allocation plan. |
1353 | if (next_execution_plan_index_to_plan_allocation_ > |
1354 | next_execution_plan_index_to_prepare_) { |
1355 | next_execution_plan_index_to_plan_allocation_ = |
1356 | next_execution_plan_index_to_prepare_; |
1357 | if (memory_planner_) { |
1358 | TF_LITE_ENSURE_STATUS(memory_planner_->ResetAllocationsAfter( |
1359 | next_execution_plan_index_to_plan_allocation_ - 1)); |
1360 | } |
1361 | } |
1362 | } |
1363 | // Release dynamic tensor memory if configured by the user. |
1364 | MaybeReleaseDynamicTensors(node, node_index); |
1365 | |
1366 | #ifdef TF_LITE_TENSORFLOW_PROFILER |
1367 | tflite::OnTfLiteOpInvokeEnd(trace_op); |
1368 | #endif // TF_LITE_TENSORFLOW_PROFILER |
1369 | } |
1370 | #ifdef TF_LITE_TENSORFLOW_PROFILER |
1371 | tflite::OnTfLiteSubgraphInvokeEnd(trace_subgraph); |
1372 | #endif // TF_LITE_TENSORFLOW_PROFILER |
1373 | return status; |
1374 | } |
1375 | |
1376 | TfLiteStatus Subgraph::ResizeTensor(TfLiteContext* context, |
1377 | TfLiteTensor* tensor, |
1378 | TfLiteIntArray* new_size) { |
1379 | // If the dimensions don't change, avoiding |
1380 | // unnecessary (re)allocations. |
1381 | // |
1382 | // Note that it's required to check `tensor->data.raw != nullptr`. Otherwise |
1383 | // the subgraph won't allocate memory for a dynamic tensor when its size |
1384 | // is equal to the original tensor size. |
1385 | if (tensor->data.raw != nullptr && |
1386 | EqualArrayAndTfLiteIntArray(tensor->dims, new_size->size, |
1387 | new_size->data)) { |
1388 | // A number of clients assume |new_size| remains valid upon success, so |
1389 | // swap it in as the new (but logically identical) tensor dims. |
1390 | TfLiteIntArrayFree(tensor->dims); |
1391 | tensor->dims = new_size; |
1392 | return kTfLiteOk; |
1393 | } |
1394 | |
1395 | // Note here that context->impl_ is recovering the this pointer for an |
1396 | // instance of Interpreter to call into the member function ResizeTensorImpl |
1397 | // (this function is static). |
1398 | return static_cast<Subgraph*>(context->impl_) |
1399 | ->ResizeTensorImpl(tensor, new_size); |
1400 | } |
1401 | |
1402 | void Subgraph::ReportErrorImpl(const char* format, va_list args) { |
1403 | error_reporter_->Report(format, args); |
1404 | } |
1405 | |
1406 | void Subgraph::ReportErrorC(TfLiteContext* context, const char* format, ...) { |
1407 | va_list args; |
1408 | va_start(args, format); |
1409 | auto* f = static_cast<Subgraph*>(context->impl_); |
1410 | // Note here that context->impl_ is recovering the this pointer for an |
1411 | // instance of Subgraph to call into the member function ReportErrorImpl |
1412 | // (this function is static). |
1413 | f->ReportErrorImpl(format, args); |
1414 | va_end(args); |
1415 | } |
1416 | |
1417 | // Entry point for C node plugin API to report an error. |
1418 | void Subgraph::ReportError(const char* format, ...) { |
1419 | va_list args; |
1420 | va_start(args, format); |
1421 | auto* f = static_cast<Subgraph*>(context_.impl_); |
1422 | // Note here that context->impl_ is recovering the this pointer for an |
1423 | // instance of Subgraph to call into the member function ReportErrorImpl |
1424 | // (this function is static). |
1425 | f->ReportErrorImpl(format, args); |
1426 | va_end(args); |
1427 | } |
1428 | |
1429 | TfLiteStatus Subgraph::AddTensors(int tensors_to_add, |
1430 | int* first_new_tensor_index) { |
1431 | const size_t base_index = tensors_.size(); |
1432 | if (first_new_tensor_index) *first_new_tensor_index = base_index; |
1433 | tensors_.resize(tensors_.size() + tensors_to_add); |
1434 | for (size_t i = base_index; i < tensors_.size(); i++) { |
1435 | memset(&tensors_[i], 0, sizeof(tensors_[i])); |
1436 | tensors_[i].buffer_handle = kTfLiteNullBufferHandle; |
1437 | } |
1438 | context_.tensors = tensors_.data(); |
1439 | context_.tensors_size = tensors_.size(); |
1440 | return kTfLiteOk; |
1441 | } |
1442 | |
1443 | TfLiteStatus Subgraph::AddTensors(TfLiteContext* context, int tensors_to_add, |
1444 | int* first_new_tensor_index) { |
1445 | // Note here that context->impl_ is recovering the this pointer for an |
1446 | // instance of Interpreter to call into the member function AddTensors |
1447 | // (this function is static). |
1448 | return static_cast<Subgraph*>(context->impl_) |
1449 | ->AddTensors(tensors_to_add, first_new_tensor_index); |
1450 | } |
1451 | |
1452 | TfLiteStatus Subgraph::GetNodeAndRegistration( |
1453 | int node_index, TfLiteNode** node, TfLiteRegistration** registration) { |
1454 | TF_LITE_ENSURE(&context_, node_index >= 0); |
1455 | auto nodes_size = nodes_and_registration_.size(); |
1456 | TF_LITE_ENSURE(&context_, static_cast<size_t>(node_index) < nodes_size); |
1457 | TF_LITE_ENSURE(&context_, node != nullptr && registration != nullptr); |
1458 | auto& node_and_reg = nodes_and_registration_[node_index]; |
1459 | *node = &node_and_reg.first; |
1460 | *registration = &node_and_reg.second; |
1461 | return kTfLiteOk; |
1462 | } |
1463 | |
1464 | TfLiteStatus Subgraph::GetNodeAndRegistration( |
1465 | struct TfLiteContext* context, int node_index, TfLiteNode** node, |
1466 | TfLiteRegistration** registration) { |
1467 | return static_cast<Subgraph*>(context->impl_) |
1468 | ->GetNodeAndRegistration(node_index, node, registration); |
1469 | } |
1470 | |
1471 | TfLiteStatus Subgraph::SetTensorParametersReadOnly( |
1472 | int tensor_index, TfLiteType type, const char* name, const size_t ndims, |
1473 | const int* dims, TfLiteQuantization quantization, const char* buffer, |
1474 | size_t bytes, const Allocation* allocation, TfLiteSparsity* sparsity) { |
1475 | // Ensure quantization cleanup on failure. |
1476 | ScopedTfLiteQuantization scoped_quantization(&quantization); |
1477 | ScopedTfLiteSparsity scoped_sparsity(sparsity); |
1478 | if (state_ == kStateInvokableAndImmutable) { |
1479 | ReportError( |
1480 | "SetTensorParametersReadOnly is disallowed when graph is immutable." ); |
1481 | return kTfLiteError; |
1482 | } |
1483 | |
1484 | TF_LITE_ENSURE(&context_, |
1485 | tensor_index < context_.tensors_size && tensor_index >= 0); |
1486 | |
1487 | // For most tensors we know exactly how much memory is necessary so we can |
1488 | // ensure the buffer is large enough. However, we need to skip string tensors |
1489 | // and sparse tensors because their sizes change with the contents. |
1490 | // TODO(b/145615516): Extend BytesRequired to check sparse tensors. |
1491 | if (type != kTfLiteString && type != kTfLiteResource && |
1492 | type != kTfLiteVariant && sparsity == nullptr) { |
1493 | size_t required_bytes; |
1494 | TF_LITE_ENSURE_OK(&context_, |
1495 | BytesRequired(type, dims, ndims, &required_bytes)); |
1496 | TF_LITE_ENSURE_EQ(&context_, required_bytes, bytes); |
1497 | } |
1498 | |
1499 | TfLiteTensor& tensor = context_.tensors[tensor_index]; |
1500 | if (type == tensor.type && |
1501 | EqualArrayAndTfLiteIntArray(tensor.dims, ndims, dims)) { |
1502 | // Fast path which does not invalidate the invokable property. |
1503 | TfLiteTensorDataFree(&tensor); |
1504 | TfLiteQuantizationFree(&tensor.quantization); |
1505 | tensor.data.raw = const_cast<char*>(buffer); |
1506 | if (!tensor.dims) tensor.dims = ConvertArrayToTfLiteIntArray(ndims, dims); |
1507 | tensor.params = GetLegacyQuantization(quantization); |
1508 | tensor.quantization = *scoped_quantization.release(); |
1509 | tensor.sparsity = scoped_sparsity.release(); |
1510 | tensor.allocation_type = kTfLiteMmapRo; |
1511 | tensor.allocation = allocation; |
1512 | } else { |
1513 | state_ = kStateUninvokable; |
1514 | TfLiteTensorReset(type, name, ConvertArrayToTfLiteIntArray(ndims, dims), |
1515 | GetLegacyQuantization(quantization), |
1516 | const_cast<char*>(buffer), bytes, kTfLiteMmapRo, |
1517 | allocation, false, &tensor); |
1518 | tensor.quantization = *scoped_quantization.release(); |
1519 | tensor.sparsity = scoped_sparsity.release(); |
1520 | } |
1521 | return kTfLiteOk; |
1522 | } |
1523 | |
1524 | // Set description of inputs/outputs/data/fptrs for node `node_index`. |
1525 | // This variant assumes an external buffer has been allocated of size |
1526 | // bytes. The lifetime of buffer must be ensured to be greater or equal |
1527 | // to Interpreter. |
1528 | TfLiteStatus Subgraph::SetTensorParametersReadWrite( |
1529 | int tensor_index, TfLiteType type, const char* name, const size_t ndims, |
1530 | const int* dims, TfLiteQuantization quantization, bool is_variable, |
1531 | const size_t ndims_signature, const int* dims_signature) { |
1532 | // Ensure quantization cleanup on failure. |
1533 | ScopedTfLiteQuantization scoped_quantization(&quantization); |
1534 | if (state_ == kStateInvokableAndImmutable) { |
1535 | ReportError( |
1536 | "SetTensorParametersReadWrite is disallowed when graph is immutable." ); |
1537 | return kTfLiteError; |
1538 | } |
1539 | TF_LITE_ENSURE(&context_, |
1540 | tensor_index < context_.tensors_size && tensor_index >= 0); |
1541 | size_t required_bytes = 0; |
1542 | if (type != kTfLiteString && type != kTfLiteResource && |
1543 | type != kTfLiteVariant) { |
1544 | // These types will be allocated in our arena so we need to record how |
1545 | // many bytes we will need based on the dimensions. String tensors are |
1546 | // allocated dynamically and we can't know ahead of time how much space |
1547 | // they will require. |
1548 | TF_LITE_ENSURE_OK(&context_, |
1549 | BytesRequired(type, dims, ndims, &required_bytes)); |
1550 | } |
1551 | |
1552 | TfLiteAllocationType allocation_type = kTfLiteArenaRw; |
1553 | if (type == kTfLiteString || type == kTfLiteResource || |
1554 | type == kTfLiteVariant) { |
1555 | if (is_variable) { |
1556 | // We don't have a real use case for string variable tensor. |
1557 | ReportError("String variable tensor isn't supported." ); |
1558 | return kTfLiteError; |
1559 | } |
1560 | allocation_type = kTfLiteDynamic; |
1561 | } else if (is_variable) { |
1562 | allocation_type = kTfLiteArenaRwPersistent; |
1563 | } |
1564 | |
1565 | TfLiteTensor& tensor = context_.tensors[tensor_index]; |
1566 | |
1567 | TfLiteTensorReset(type, name, ConvertArrayToTfLiteIntArray(ndims, dims), |
1568 | GetLegacyQuantization(quantization), |
1569 | /*buffer=*/nullptr, required_bytes, allocation_type, |
1570 | nullptr, is_variable, &tensor); |
1571 | tensor.quantization = *scoped_quantization.release(); |
1572 | tensor.dims_signature = |
1573 | ConvertArrayToTfLiteIntArray(ndims_signature, dims_signature); |
1574 | return kTfLiteOk; |
1575 | } |
1576 | |
1577 | TfLiteStatus Subgraph::SetExecutionPlan(const std::vector<int>& new_plan) { |
1578 | for (int node_index : new_plan) { |
1579 | TF_LITE_ENSURE(&context_, node_index >= 0 && |
1580 | node_index < nodes_and_registration_.size()); |
1581 | } |
1582 | execution_plan_ = new_plan; |
1583 | return kTfLiteOk; |
1584 | } |
1585 | |
1586 | TfLiteStatus Subgraph::ResizeTensorImpl(TfLiteTensor* tensor, |
1587 | TfLiteIntArray* new_size) { |
1588 | // Note that in theory we could resize kTfLiteArenaRwPersistent tensors too. |
1589 | if (tensor->allocation_type == kTfLiteArenaRw || |
1590 | tensor->allocation_type == kTfLiteDynamic || |
1591 | tensor->allocation_type == kTfLiteArenaRwPersistent || |
1592 | tensor->allocation_type == kTfLitePersistentRo || |
1593 | tensor->allocation_type == kTfLiteCustom) { |
1594 | tensor_resized_since_op_invoke_ |= |
1595 | TfLiteIntArrayEqual(tensor->dims, new_size) == 0; |
1596 | if (tensor->type != kTfLiteString && tensor->type != kTfLiteResource && |
1597 | tensor->type != kTfLiteVariant) { |
1598 | size_t bytesRequired; |
1599 | TfLiteStatus status = BytesRequired(tensor->type, new_size->data, |
1600 | new_size->size, &bytesRequired); |
1601 | if (status != kTfLiteOk) { |
1602 | TfLiteIntArrayFree(new_size); |
1603 | return kTfLiteError; |
1604 | } |
1605 | |
1606 | // Realloc space for heap-allocated tensors. |
1607 | TfLiteTensorResizeMaybeCopy(bytesRequired, tensor, false); |
1608 | tensor->bytes = bytesRequired; |
1609 | } |
1610 | if (tensor->dims) TfLiteIntArrayFree(tensor->dims); |
1611 | tensor->dims = new_size; |
1612 | |
1613 | // Reset arena-allocated tensors; they will be allocated later. |
1614 | if (tensor->allocation_type == kTfLiteArenaRw || |
1615 | tensor->allocation_type == kTfLiteArenaRwPersistent) { |
1616 | tensor->data.raw = nullptr; |
1617 | } |
1618 | } else { |
1619 | // kTfLiteMmapRo tensors are stored in the flatbuffer and are therefore |
1620 | // of fixed size. |
1621 | TfLiteIntArrayFree(new_size); |
1622 | ReportError("Attempting to resize a fixed-size tensor." ); |
1623 | return kTfLiteError; |
1624 | } |
1625 | return kTfLiteOk; |
1626 | } |
1627 | |
1628 | void Subgraph::OptimizeMemoryForLargeTensors( |
1629 | int large_tensors_thresholds_in_bytes) { |
1630 | for (size_t tensor_index = 0; tensor_index < context_.tensors_size; |
1631 | tensor_index++) { |
1632 | TfLiteTensor* tensor = &context_.tensors[tensor_index]; |
1633 | if (tensor->bytes >= large_tensors_thresholds_in_bytes && |
1634 | tensor->allocation_type == kTfLiteArenaRw && |
1635 | // Skip input tensors since they are handled by ResizeInputTensor(). |
1636 | std::find(inputs_.begin(), inputs_.end(), tensor_index) == |
1637 | inputs_.end()) { |
1638 | // Change large tensors' allocation_type and data.raw. This method must be |
1639 | // called before AllocateTensors() to avoid handling them by ArenaPlanner. |
1640 | tensor->allocation_type = kTfLiteDynamic; |
1641 | tensor->data.raw = nullptr; |
1642 | } |
1643 | } |
1644 | } |
1645 | |
1646 | void Subgraph::SwitchToDelegateContext() { |
1647 | context_.GetNodeAndRegistration = GetNodeAndRegistration; |
1648 | context_.ReplaceNodeSubsetsWithDelegateKernels = |
1649 | ReplaceNodeSubsetsWithDelegateKernels; |
1650 | context_.GetExecutionPlan = GetExecutionPlan; |
1651 | context_.PreviewDelegatePartitioning = PreviewDelegatePartitioning; |
1652 | } |
1653 | |
1654 | void Subgraph::SwitchToKernelContext() { |
1655 | context_.GetNodeAndRegistration = [](struct TfLiteContext* context, |
1656 | int node_index, TfLiteNode** node, |
1657 | TfLiteRegistration** registration) { |
1658 | return ForbiddenContextFunction(context); |
1659 | }; |
1660 | context_.ReplaceNodeSubsetsWithDelegateKernels = |
1661 | [](TfLiteContext* context, TfLiteRegistration registration, |
1662 | const TfLiteIntArray* nodes_to_replace, TfLiteDelegate* delegate) { |
1663 | return ForbiddenContextFunction(context); |
1664 | }; |
1665 | context_.GetExecutionPlan = [](struct TfLiteContext* context, |
1666 | TfLiteIntArray**) { |
1667 | return ForbiddenContextFunction(context); |
1668 | }; |
1669 | context_.PreviewDelegatePartitioning = |
1670 | [](struct TfLiteContext* context, const TfLiteIntArray* nodes_to_replace, |
1671 | TfLiteDelegateParams** partition_params_array, |
1672 | int* num_partitions) { return ForbiddenContextFunction(context); }; |
1673 | // Free any memory that might have been allocated by |
1674 | // PreviewDelegatePartitioning. |
1675 | FreeDelegatePartitioningData(); |
1676 | } |
1677 | |
1678 | TfLiteStatus Subgraph::UndoAllDelegates() { |
1679 | // Return early if there is nothing to reset to. |
1680 | if (pre_delegation_execution_plan_.empty()) return kTfLiteOk; |
1681 | |
1682 | // First free all delegate nodes. |
1683 | for (int execution_plan_index = 0; |
1684 | execution_plan_index < execution_plan_.size(); ++execution_plan_index) { |
1685 | int node_index = execution_plan_[execution_plan_index]; |
1686 | TfLiteNode& node = nodes_and_registration_[node_index].first; |
1687 | if (node.delegate == nullptr) { |
1688 | continue; |
1689 | } |
1690 | CleanupNode(node_index); |
1691 | } |
1692 | |
1693 | // Reset execution plan. |
1694 | execution_plan_ = pre_delegation_execution_plan_; |
1695 | pre_delegation_execution_plan_.clear(); |
1696 | |
1697 | // Handling FP16 delegation (if applies). |
1698 | // |
1699 | // First pass through execution plan to remember mapping of FP16 |
1700 | // dequantizations in the graph. |
1701 | // This is required because delegates that support FP16 could remap supported |
1702 | // nodes' inputs to point to their fp16 versions (if delegate supports fp16 |
1703 | // acceleration). This remapping is performed in FP16GraphPartitionHelper in |
1704 | // delegates/utils. We need to undo this remapping to ensure CPU kernels work. |
1705 | std::vector<int> fp16_to_fp32(tensors_size(), -1); |
1706 | for (int execution_plan_index = 0; |
1707 | execution_plan_index < execution_plan_.size(); ++execution_plan_index) { |
1708 | int node_index = execution_plan_[execution_plan_index]; |
1709 | auto& node_and_reg = nodes_and_registration_[node_index]; |
1710 | const TfLiteNode& node = node_and_reg.first; |
1711 | const TfLiteRegistration& reg = node_and_reg.second; |
1712 | if (reg.builtin_code == kTfLiteBuiltinDequantize && |
1713 | node.inputs->size == 1 && node.outputs->size == 1) { |
1714 | const int input_idx = node.inputs->data[0]; |
1715 | if (tensors_[input_idx].type == kTfLiteFloat16) { |
1716 | fp16_to_fp32[input_idx] = node.outputs->data[0]; |
1717 | } |
1718 | } |
1719 | } |
1720 | // Second pass through the execution plan to remap applicable nodes' fp16 |
1721 | // inputs to their original fp32 versions. Note that if a CPU kernel does |
1722 | // support fp16, the model will not contain a DEQUANTIZE for its constant |
1723 | // input. |
1724 | for (int execution_plan_index = 0; |
1725 | execution_plan_index < execution_plan_.size(); ++execution_plan_index) { |
1726 | int node_index = execution_plan_[execution_plan_index]; |
1727 | auto& node_and_reg = nodes_and_registration_[node_index]; |
1728 | const TfLiteNode& node = node_and_reg.first; |
1729 | const TfLiteRegistration& reg = node_and_reg.second; |
1730 | if (reg.builtin_code == kTfLiteBuiltinDequantize) continue; |
1731 | for (int i = 0; i < node.inputs->size; ++i) { |
1732 | const int original_input_idx = node.inputs->data[i]; |
1733 | if (original_input_idx == kTfLiteOptionalTensor) continue; |
1734 | if (tensors_[original_input_idx].type == kTfLiteFloat16) { |
1735 | node.inputs->data[i] = fp16_to_fp32[original_input_idx]; |
1736 | } |
1737 | } |
1738 | } |
1739 | |
1740 | // Delegate nodes are appended to nodes_and_registration_. Therefore, |
1741 | // cleanup nodes_and_registration_ to only contain nodes from |
1742 | // pre_delegation_execution_plan_. |
1743 | int max_retained_node_index = 0; |
1744 | for (int execution_plan_index = 0; |
1745 | execution_plan_index < execution_plan_.size(); ++execution_plan_index) { |
1746 | max_retained_node_index = std::max(max_retained_node_index, |
1747 | execution_plan_[execution_plan_index]); |
1748 | } |
1749 | nodes_and_registration_.resize(max_retained_node_index + 1); |
1750 | // After undoing delegates, the graph is uninvokable, but mutable. |
1751 | state_ = kStateUninvokable; |
1752 | |
1753 | delegates_undone_ = true; |
1754 | return kTfLiteOk; |
1755 | } |
1756 | |
1757 | TfLiteStatus Subgraph::RedoAllDelegates() { |
1758 | if (!delegates_undone_) return kTfLiteOk; |
1759 | |
1760 | delegates_undone_ = false; |
1761 | std::vector<TfLiteDelegate*> delegates_to_apply; |
1762 | delegates_applied_.swap(delegates_to_apply); |
1763 | for (auto* delegate : delegates_to_apply) { |
1764 | TF_LITE_ENSURE_STATUS(ModifyGraphWithDelegate(delegate)); |
1765 | } |
1766 | return kTfLiteOk; |
1767 | } |
1768 | |
1769 | TfLiteStatus Subgraph::RemoveAllDelegates() { |
1770 | TF_LITE_ENSURE_STATUS(UndoAllDelegates()); |
1771 | delegates_applied_.clear(); |
1772 | delegates_undone_ = false; |
1773 | TF_LITE_ENSURE_STATUS(EnsureMemoryAllocations()); |
1774 | return kTfLiteOk; |
1775 | } |
1776 | |
1777 | bool Subgraph::HasDelegates() { return !delegates_applied_.empty(); } |
1778 | |
1779 | bool Subgraph::IsFullyDelegated() const { |
1780 | for (const int nid : execution_plan_) { |
1781 | const TfLiteNode& node = nodes_and_registration_[nid].first; |
1782 | if (node.delegate == nullptr) return false; |
1783 | } |
1784 | return true; |
1785 | } |
1786 | |
1787 | void Subgraph::EnsureTensorsVectorCapacity() { |
1788 | const size_t required_capacity = tensors_.size() + kTensorsCapacityHeadroom; |
1789 | if (required_capacity > tensors_.capacity()) { |
1790 | // Whenever it's required to increase the vector capacity, make it at |
1791 | // least twice bigger. The behavior is consistent with the default |
1792 | // behavior of GCC STL's `std::vector::resize()`. This avoids frequently |
1793 | // allocating and copying the underlying buffer. |
1794 | size_t reserved_capacity = |
1795 | std::max(required_capacity, tensors_.capacity() * 2); |
1796 | tensors_.reserve(reserved_capacity); |
1797 | context_.tensors = tensors_.data(); |
1798 | } |
1799 | } |
1800 | |
1801 | TfLiteStatus Subgraph::EnsureMemoryAllocations() { |
1802 | if (memory_planner_) { |
1803 | state_ = kStateUninvokable; |
1804 | TF_LITE_ENSURE_OK(&context_, memory_planner_->PlanAllocations()); |
1805 | } |
1806 | TF_LITE_ENSURE_OK(&context_, AllocateTensors()); |
1807 | TF_LITE_ENSURE_EQ(&context_, state_, kStateInvokable); |
1808 | return kTfLiteOk; |
1809 | } |
1810 | |
1811 | TfLiteStatus Subgraph::ModifyGraphWithDelegate(TfLiteDelegate* delegate) { |
1812 | TFLITE_SCOPED_TAGGED_DEFAULT_PROFILE(profiler_.get(), |
1813 | "ModifyGraphWithDelegate" ); |
1814 | |
1815 | if (delegate == nullptr) { |
1816 | ReportError("Null delegate." ); |
1817 | return kTfLiteDelegateError; |
1818 | } |
1819 | |
1820 | // Resets delegation & leaves graph in consistent state if delegate status is |
1821 | // not okay. |
1822 | auto reset_delegation_if_not_ok = [this](TfLiteStatus status) { |
1823 | if (status != kTfLiteOk) { |
1824 | TF_LITE_ENSURE_STATUS(RemoveAllDelegates()); |
1825 | ReportError( |
1826 | "Restored original execution plan after delegate application " |
1827 | "failure." ); |
1828 | return kTfLiteDelegateError; |
1829 | } |
1830 | return kTfLiteOk; |
1831 | }; |
1832 | |
1833 | // STEP 1: Verify & prepare graph for delegation. |
1834 | // ============================================== |
1835 | |
1836 | // Restore delegation state if applicable. |
1837 | TF_LITE_ENSURE_STATUS(RedoAllDelegates()); |
1838 | |
1839 | const bool delegate_supports_dynamic_shapes = |
1840 | TfLiteDelegateGetFlagsInternal(delegate) & |
1841 | kTfLiteDelegateFlagsAllowDynamicTensors; |
1842 | const auto pre_delegation_state = state_; |
1843 | |
1844 | if (state_ == kStateInvokableAndImmutable) { |
1845 | // A delegate that doesn't support dynamic shapes was already applied, so |
1846 | // we can assume tensor shapes have been propagated & there are no dynamic |
1847 | // tensors. |
1848 | // Reset the state to force tensor/op reallocation. |
1849 | state_ = kStateUninvokable; |
1850 | } else if (!delegate_supports_dynamic_shapes) { |
1851 | // Check if graph has dynamic tensors by preparing ops. |
1852 | int last_execution_plan_index_prepared; |
1853 | TF_LITE_ENSURE_STATUS(PrepareOpsStartingAt( |
1854 | 0, execution_plan_, &last_execution_plan_index_prepared)); |
1855 | if (has_dynamic_tensors_) { |
1856 | TF_LITE_ENSURE_STATUS(EnsureMemoryAllocations()); |
1857 | TFLITE_LOG( |
1858 | tflite::TFLITE_LOG_WARNING, |
1859 | "Attempting to use a delegate that only supports static-sized " |
1860 | "tensors with a graph that has dynamic-sized tensors (tensor#%d is a " |
1861 | "dynamic-sized tensor)." , |
1862 | dynamic_tensor_index_); |
1863 | return kTfLiteApplicationError; |
1864 | } |
1865 | } |
1866 | |
1867 | if (delegates_applied_.empty()) { |
1868 | // This is the first delegate being applied, so remember original execution |
1869 | // plan. |
1870 | pre_delegation_execution_plan_ = execution_plan_; |
1871 | } |
1872 | |
1873 | // STEP 2: Delegate replaces applicable nodes with delegate kernels. |
1874 | // ================================================================= |
1875 | |
1876 | // Setup additional context interface. |
1877 | SwitchToDelegateContext(); |
1878 | TfLiteStatus status = TfLiteDelegatePrepareInternal(&context_, delegate); |
1879 | // Remove additional context info. |
1880 | SwitchToKernelContext(); |
1881 | TF_LITE_ENSURE_STATUS(reset_delegation_if_not_ok(status)); |
1882 | |
1883 | // STEP 3: Leave graph in consistent state based on delegate & previous state. |
1884 | // =========================================================================== |
1885 | |
1886 | if (!delegate_supports_dynamic_shapes) { |
1887 | // CASE 1: Current delegate does not support dynamic shapes. |
1888 | // Reset the state to force tensor/op reallocation. |
1889 | state_ = kStateUninvokable; |
1890 | TF_LITE_ENSURE_STATUS( |
1891 | reset_delegation_if_not_ok(EnsureMemoryAllocations())); |
1892 | // After using a delegate which doesn't support dynamic tensors, make the |
1893 | // entire graph immutable. |
1894 | state_ = kStateInvokableAndImmutable; |
1895 | } else if (pre_delegation_state == kStateInvokableAndImmutable) { |
1896 | // CASE 2: Current delegate supports dynamic shapes, but a previous one |
1897 | // does not. |
1898 | // Make sure new delegate didn't mark a tensor as dynamic. |
1899 | int last_execution_plan_index_prepared; |
1900 | TF_LITE_ENSURE_STATUS(reset_delegation_if_not_ok(PrepareOpsStartingAt( |
1901 | 0, execution_plan_, &last_execution_plan_index_prepared))); |
1902 | if (has_dynamic_tensors_) { |
1903 | TF_LITE_ENSURE_STATUS(RemoveAllDelegates()); |
1904 | ReportError( |
1905 | "Cannot allow dynamic tensors due to previous delegation, resetting " |
1906 | "to original execution plan." ); |
1907 | return kTfLiteApplicationError; |
1908 | } |
1909 | // Redo memory allocations & ensure state is set back to original value. |
1910 | TF_LITE_ENSURE_STATUS( |
1911 | reset_delegation_if_not_ok(EnsureMemoryAllocations())); |
1912 | state_ = kStateInvokableAndImmutable; |
1913 | } else if (pre_delegation_state == kStateInvokable) { |
1914 | // CASE 3: Current delegate supports dynamic shapes, and the graph was |
1915 | // previously invokable. |
1916 | // Flush allocation now to leave it in a consistent state. |
1917 | TF_LITE_ENSURE_STATUS( |
1918 | reset_delegation_if_not_ok(EnsureMemoryAllocations())); |
1919 | } |
1920 | delegates_applied_.push_back(delegate); |
1921 | |
1922 | return status; |
1923 | } |
1924 | |
1925 | TfLiteStatus Subgraph::SetCustomAllocationForTensor( |
1926 | int tensor_index, const TfLiteCustomAllocation& allocation, int64_t flags) { |
1927 | TfLiteTensor* tensor = &context_.tensors[tensor_index]; |
1928 | TF_LITE_ENSURE(context(), |
1929 | (tensor->allocation_type == kTfLiteArenaRw || |
1930 | tensor->allocation_type == kTfLiteArenaRwPersistent || |
1931 | tensor->allocation_type == kTfLiteCustom)); |
1932 | // Don't check allocation.bytes here, we do that after all ops are prepared |
1933 | // to allow tensor shape propagation. |
1934 | TF_LITE_ENSURE(context(), allocation.data != nullptr); |
1935 | if (!(flags & kTfLiteCustomAllocationFlagsSkipAlignCheck)) { |
1936 | const intptr_t data_ptr_value = reinterpret_cast<intptr_t>(allocation.data); |
1937 | TF_LITE_ENSURE(context(), data_ptr_value % kDefaultTensorAlignment == 0); |
1938 | } |
1939 | |
1940 | const auto iter_and_success = |
1941 | custom_allocations_.insert({tensor_index, allocation}); |
1942 | if (!iter_and_success.second) { |
1943 | iter_and_success.first->second = allocation; |
1944 | } |
1945 | |
1946 | tensor->allocation_type = kTfLiteCustom; |
1947 | tensor->data.data = allocation.data; |
1948 | |
1949 | return kTfLiteOk; |
1950 | } |
1951 | |
1952 | void Subgraph::SetName(const char* name) { |
1953 | if (name) { |
1954 | name_ = name; |
1955 | } else { |
1956 | name_ = "" ; |
1957 | } |
1958 | } |
1959 | |
1960 | const std::string& Subgraph::GetName() const { return name_; } |
1961 | |
1962 | void Subgraph::DumpMemoryPlannerDebugInfo() const { |
1963 | if (memory_planner_ == nullptr) return; |
1964 | memory_planner_->DumpDebugInfo(execution_plan()); |
1965 | } |
1966 | |
1967 | void Subgraph::GetMemoryAllocInfo(SubgraphAllocInfo* alloc_info) const { |
1968 | memset(alloc_info, 0, sizeof(SubgraphAllocInfo)); |
1969 | if (memory_planner_ == nullptr) return; |
1970 | memory_planner_->GetAllocInfo(&alloc_info->arena_size, |
1971 | &alloc_info->arena_persist_size); |
1972 | for (const auto& tensor : tensors_) { |
1973 | if (tensor.allocation_type == kTfLiteDynamic && |
1974 | tensor.data.raw != nullptr) { |
1975 | alloc_info->dynamic_size += tensor.bytes; |
1976 | } |
1977 | } |
1978 | if (GetSubgraphIndex() == 0) { |
1979 | for (const auto& res : *resources_) { |
1980 | alloc_info->resource_size += res.second->GetMemoryUsage(); |
1981 | } |
1982 | } |
1983 | } |
1984 | |
1985 | std::unique_ptr<GraphInfo> Subgraph::CreateGraphInfo() { |
1986 | return std::unique_ptr<GraphInfo>(new InterpreterInfo(this)); |
1987 | } |
1988 | |
1989 | void Subgraph::InitializeTensorReleaseMap() { |
1990 | for (int i = 0; i < execution_plan_.size(); ++i) { |
1991 | int node_index = execution_plan_[i]; |
1992 | const TfLiteNode& node = nodes_and_registration_[node_index].first; |
1993 | for (int input_index = 0; input_index < node.inputs->size; ++input_index) { |
1994 | int input_tensor_index = node.inputs->data[input_index]; |
1995 | TfLiteTensor* input_tensor = tensor(input_tensor_index); |
1996 | if (!input_tensor) continue; |
1997 | tensor_to_last_op_index_[input_tensor_index] = node_index; |
1998 | } |
1999 | // Also checks outputs of a node to make sure tensors are released in case |
2000 | // when a tensor is not used for input of another node. |
2001 | for (int output_index = 0; output_index < node.outputs->size; |
2002 | ++output_index) { |
2003 | int output_tensor_index = node.outputs->data[output_index]; |
2004 | TfLiteTensor* output_tensor = tensor(output_tensor_index); |
2005 | if (!output_tensor) continue; |
2006 | tensor_to_last_op_index_[output_tensor_index] = node_index; |
2007 | } |
2008 | } |
2009 | } |
2010 | |
2011 | void Subgraph::MaybeReleaseDynamicTensors(const TfLiteNode& node, |
2012 | size_t node_index) { |
2013 | if (!ShouldReleaseDynamicTensors()) return; |
2014 | |
2015 | // Release input tensors if they're neither graph input tensors nor no |
2016 | // longer used by remaining graph execution. |
2017 | auto tensorIsInput = [&](int index) { |
2018 | for (int idx : inputs_) { |
2019 | if (idx == index) return true; |
2020 | } |
2021 | return false; |
2022 | }; |
2023 | auto tensorIsOutput = [&](int index) { |
2024 | for (int idx : outputs_) { |
2025 | if (idx == index) return true; |
2026 | } |
2027 | return false; |
2028 | }; |
2029 | for (int input_index = 0; input_index < node.inputs->size; ++input_index) { |
2030 | int input_tensor_index = node.inputs->data[input_index]; |
2031 | TfLiteTensor* input_tensor = tensor(input_tensor_index); |
2032 | if (!input_tensor || input_tensor->allocation_type != kTfLiteDynamic || |
2033 | input_tensor->type == kTfLiteString || |
2034 | input_tensor->type == kTfLiteResource || |
2035 | tensorIsInput(input_tensor_index) || tensorIsOutput(input_tensor_index)) |
2036 | continue; |
2037 | auto it = tensor_to_last_op_index_.find(input_tensor_index); |
2038 | if (it != tensor_to_last_op_index_.end() && it->second == node_index) { |
2039 | if (input_tensor->data.raw) { |
2040 | TfLiteTensorDataFree(input_tensor); |
2041 | } |
2042 | } |
2043 | } |
2044 | |
2045 | // Release output tensors if they're neither graph output tensors nor no |
2046 | // longer used by remaining graph execution. |
2047 | for (int output_index = 0; output_index < node.outputs->size; |
2048 | ++output_index) { |
2049 | int output_tensor_index = node.outputs->data[output_index]; |
2050 | TfLiteTensor* output_tensor = tensor(output_tensor_index); |
2051 | if (!output_tensor || output_tensor->allocation_type != kTfLiteDynamic || |
2052 | output_tensor->type == kTfLiteString || |
2053 | output_tensor->type == kTfLiteResource || |
2054 | tensorIsInput(output_tensor_index) || |
2055 | tensorIsOutput(output_tensor_index)) |
2056 | continue; |
2057 | auto it = tensor_to_last_op_index_.find(output_tensor_index); |
2058 | if (it != tensor_to_last_op_index_.end() && it->second == node_index) { |
2059 | if (output_tensor->data.raw) { |
2060 | TfLiteTensorDataFree(output_tensor); |
2061 | } |
2062 | } |
2063 | } |
2064 | } |
2065 | |
2066 | } // namespace tflite |
2067 | |