1/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
2
3Licensed under the Apache License, Version 2.0 (the "License");
4you may not use this file except in compliance with the License.
5You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9Unless required by applicable law or agreed to in writing, software
10distributed under the License is distributed on an "AS IS" BASIS,
11WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12See the License for the specific language governing permissions and
13limitations under the License.
14==============================================================================*/
15#ifndef TENSORFLOW_LITE_CORE_SUBGRAPH_H_
16#define TENSORFLOW_LITE_CORE_SUBGRAPH_H_
17
18#include <stdarg.h>
19#include <stddef.h>
20
21#include <cstdint>
22#include <cstdlib>
23#include <map>
24#include <memory>
25#include <set>
26#include <string>
27#include <utility>
28#include <vector>
29
30#include "tensorflow/lite/allocation.h"
31#include "tensorflow/lite/c/common.h"
32#include "tensorflow/lite/core/api/error_reporter.h"
33#include "tensorflow/lite/core/api/profiler.h"
34#include "tensorflow/lite/core/macros.h"
35#include "tensorflow/lite/experimental/resource/initialization_status.h"
36#include "tensorflow/lite/experimental/resource/resource_base.h"
37#include "tensorflow/lite/graph_info.h"
38#include "tensorflow/lite/interpreter_options.h"
39#include "tensorflow/lite/memory_planner.h"
40#include "tensorflow/lite/util.h"
41
42namespace tflite {
43
44class SingleOpModel; // Class for friend declarations.
45
46namespace delegates {
47namespace test_utils {
48class TestDelegate; // Class for friend declarations.
49} // namespace test_utils
50} // namespace delegates
51
52class Subgraph {
53 public:
54 friend class Interpreter;
55 friend class SingleOpModel;
56
57 Subgraph(ErrorReporter* error_reporter,
58 TfLiteExternalContext** external_contexts,
59 std::vector<std::unique_ptr<Subgraph>>* subgraphs,
60 resource::ResourceMap* resources,
61 resource::ResourceIDMap* resource_ids,
62 resource::InitializationStatusMap* initialization_status_map,
63 int subgraph_index = kInvalidSubgraphIndex);
64
65 Subgraph(const Subgraph&) = delete;
66
67 // Subgraphs should be movable but not copyable.
68 Subgraph(Subgraph&&) = default;
69 Subgraph& operator=(const Subgraph&) = delete;
70 virtual ~Subgraph();
71
72 // Provide a list of tensor indexes that are inputs to the model.
73 // Each index is bound check and this modifies the consistent_ flag of the
74 // interpreter.
75 TfLiteStatus SetInputs(std::vector<int> inputs);
76
77 // Provide a list of tensor indexes that are outputs to the model
78 // Each index is bound check and this modifies the consistent_ flag of the
79 // interpreter.
80 TfLiteStatus SetOutputs(std::vector<int> outputs);
81
82 // Provide a list of tensor indexes that are variable tensors.
83 // Each index is bound check and this modifies the consistent_ flag of the
84 // interpreter.
85 TfLiteStatus SetVariables(std::vector<int> variables);
86
87 // Adds a node with the given parameters and returns the index of the new
88 // node in `node_index` (optionally). Interpreter will take ownership of
89 // `builtin_data` and destroy it with `free`. Ownership of 'init_data'
90 // remains with the caller.
91 TfLiteStatus AddNodeWithParameters(const std::vector<int>& inputs,
92 const std::vector<int>& outputs,
93 const std::vector<int>& intermediates,
94 const char* init_data,
95 size_t init_data_size, void* builtin_data,
96 const TfLiteRegistration* registration,
97 int* node_index = nullptr);
98
99 // Adds `tensors_to_add` tensors, preserving pre-existing Tensor entries.
100 // The value pointed to by `first_new_tensor_index` will be set to the
101 // index of the first new tensor if `first_new_tensor_index` is non-null.
102 TfLiteStatus AddTensors(int tensors_to_add,
103 int* first_new_tensor_index = nullptr);
104
105 // Set description of inputs/outputs/data/fptrs for node `node_index`.
106 // This variant assumes an external buffer has been allocated of size
107 // bytes. The lifetime of buffer must be ensured to be greater or equal
108 // to Interpreter. `quantization` ownership is passed to the subgraph.
109 inline TfLiteStatus SetTensorParametersReadOnly(
110 int tensor_index, TfLiteType type, const char* name,
111 const std::vector<int>& dims, TfLiteQuantization quantization,
112 const char* buffer, size_t bytes, const Allocation* allocation = nullptr,
113 TfLiteSparsity* sparsity = nullptr) {
114 return SetTensorParametersReadOnly(tensor_index, type, name, dims.size(),
115 dims.data(), quantization, buffer, bytes,
116 allocation, sparsity);
117 }
118 TfLiteStatus SetTensorParametersReadOnly(
119 int tensor_index, TfLiteType type, const char* name, const size_t ndims,
120 const int* dims, TfLiteQuantization quantization, const char* buffer,
121 size_t bytes, const Allocation* allocation = nullptr,
122 TfLiteSparsity* sparsity = nullptr);
123
124 // Set description of inputs/outputs/data/fptrs for node `node_index`.
125 // This variant assumes an external buffer has been allocated of size
126 // bytes. The lifetime of buffer must be ensured to be greater or equal
127 // to Interpreter. `quantization` ownership is passed to the subgraph.
128 inline TfLiteStatus SetTensorParametersReadWrite(
129 int tensor_index, TfLiteType type, const char* name,
130 const std::vector<int>& dims, TfLiteQuantization quantization,
131 bool is_variable = false, const std::vector<int>& dims_signature = {}) {
132 if (dims_signature.empty()) {
133 return SetTensorParametersReadWrite(tensor_index, type, name, dims.size(),
134 dims.data(), quantization,
135 is_variable);
136 }
137 return SetTensorParametersReadWrite(
138 tensor_index, type, name, dims.size(), dims.data(), quantization,
139 is_variable, dims_signature.size(), dims_signature.data());
140 }
141 TfLiteStatus SetTensorParametersReadWrite(
142 int tensor_index, TfLiteType type, const char* name, const size_t ndims,
143 const int* dims, TfLiteQuantization quantization,
144 bool is_variable = false, const size_t ndims_signature = 0,
145 const int* dims_signature = nullptr);
146
147 // Get all tensors in the subgraph.
148 TfLiteTensor* tensors() { return context_.tensors; }
149
150 // Get a mutable tensor data structure.
151 TfLiteTensor* tensor(int tensor_index) {
152 if (tensor_index < 0 ||
153 static_cast<size_t>(tensor_index) >= context_.tensors_size) {
154 return nullptr;
155 }
156 return &context_.tensors[tensor_index];
157 }
158
159 // Get an immutable tensor data structure.
160 const TfLiteTensor* tensor(int tensor_index) const {
161 if (tensor_index < 0 ||
162 static_cast<size_t>(tensor_index) >= context_.tensors_size) {
163 return nullptr;
164 }
165 return &context_.tensors[tensor_index];
166 }
167
168 // Read only access to list of inputs.
169 std::vector<int>& inputs() { return inputs_; }
170
171 // Read only access to list of inputs.
172 const std::vector<int>& inputs() const { return inputs_; }
173
174 // Read only access to list of outputs.
175 std::vector<int>& outputs() { return outputs_; }
176
177 // Read only access to list of outputs.
178 const std::vector<int>& outputs() const { return outputs_; }
179
180 // Read only access to list of variable tensors.
181 std::vector<int>& variables() { return variables_; }
182
183 // Read only access to list of variable tensors.
184 const std::vector<int>& variables() const { return variables_; }
185
186 // WARNING: Experimental interface, subject to change.
187 // TODO(ycling): Move this function to an external context interface.
188 resource::ResourceMap& resources() { return *resources_; }
189
190 // WARNING: Experimental interface, subject to change.
191 // TODO(b/149099381): Move this function to an external context interface.
192 resource::ResourceIDMap& resource_ids() { return *resource_ids_; }
193
194 // WARNING: Experimental interface, subject to change.
195 // TODO(b/149099381): Move this function to an external context interface.
196 resource::InitializationStatusMap& initialization_status_map() {
197 return *initialization_status_map_;
198 }
199
200 size_t tensors_size() const { return tensors_.size(); }
201
202 // Return the number of ops in the model.
203 size_t nodes_size() const { return nodes_and_registration_.size(); }
204
205 // Return vector of node indices in the order of execution.
206 std::vector<int>& execution_plan() { return execution_plan_; }
207
208 // Return read-only vector of node indices in the order of execution.
209 const std::vector<int>& execution_plan() const { return execution_plan_; }
210
211 const std::vector<std::pair<TfLiteNode, TfLiteRegistration>>&
212 nodes_and_registration() const {
213 return nodes_and_registration_;
214 }
215
216 // Get a pointer to an operation and registration data structure if in bounds.
217 const std::pair<TfLiteNode, TfLiteRegistration>* node_and_registration(
218 int node_index) const {
219 if (node_index < 0 || static_cast<size_t>(node_index) >= nodes_size())
220 return nullptr;
221 return &nodes_and_registration_[node_index];
222 }
223
224 // Change the dimensionality of a given tensor. Note, this is only acceptable
225 // for tensor indices that are inputs.
226 // Returns status of failure or success.
227 TfLiteStatus ResizeInputTensor(int tensor_index,
228 const std::vector<int>& dims);
229
230 // WARNING: Experimental interface, subject to change
231 // Change the dimensionality of a given tensor. This is only acceptable for
232 // tensor indices that are inputs or variables. Only unknown dimensions can be
233 // resized with this function. Unknown dimensions are indicated as `-1` in the
234 // `dims_signature` attribute of a `TfLiteTensor`. Returns status of failure
235 // or success.
236 TfLiteStatus ResizeInputTensorStrict(int tensor_index,
237 const std::vector<int>& dims);
238
239 // This releases memory held by non-persistent tensors. It does NOT re-perform
240 // memory planning.
241 // AllocateTensors needs to be called before next invocation.
242 TfLiteStatus ReleaseNonPersistentMemory();
243
244 // WARNING: Experimental interface, subject to change
245 // This API releases memory held by the given subgraph. This method is
246 // designed to release memory of control flow subgraphs.
247 // AllocateTensors needs to be called before next invocation.
248 TfLiteStatus ReleaseMemory();
249
250 // Update allocations for all tensors. This will redim dependent tensors using
251 // the input tensor dimensionality as given. This is relatively expensive.
252 // If you know that your sizes are not changing, you need not call this.
253 // Returns status of success or failure.
254 TfLiteStatus AllocateTensors();
255
256 // Invoke the subgraph (run the whole graph in dependency order).
257 //
258 // NOTE: It is possible that the interpreter is not in a ready state
259 // to evaluate (i.e. if a ResizeTensor() has been performed without an
260 // AllocateTensors().
261 // Returns status of success or failure.
262 TfLiteStatus Invoke();
263
264 // Entry point for C node plugin API to report an error.
265 void ReportError(const char* format, ...);
266
267 // Return the subgraph specific context.
268 TfLiteContext* context() { return &context_; }
269 const TfLiteContext* context() const { return &context_; }
270
271 // Set the value of an external context.
272 void SetExternalContext(TfLiteExternalContextType type,
273 TfLiteExternalContext* ctx);
274 // Get the half precision flag.
275 // WARNING: This is an experimental API and subject to change.
276 bool GetAllowFp16PrecisionForFp32() const {
277 return context_.allow_fp32_relax_to_fp16;
278 }
279
280 // Sets the cancellation function pointer in order to cancel a request in the
281 // middle of a call to Invoke(). The interpreter queries this function during
282 // inference, between op invocations; when it returns true, the interpreter
283 // will abort execution and return `kTfLiteError`. The `data` parameter
284 // contains any data used by the cancellation function, and if non-null,
285 // remains owned by the caller.
286 // WARNING: This is an experimental API and subject to change.
287 void SetCancellationFunction(void* data, bool (*check_cancelled_func)(void*));
288
289 // Ensure the data in `tensor.data` is readable. In case delegate is used,
290 // it might require to copy the data from delegate buffer to raw memory.
291 // WARNING: This is an experimental API and subject to change.
292 TfLiteStatus EnsureTensorDataIsReadable(int tensor_index);
293
294 // The default capacity of `tensors_` vector.
295 static constexpr int kTensorsReservedCapacity = 128;
296 // The capacity headroom of `tensors_` vector before calling ops'
297 // `prepare` and `invoke` function. In these functions, it's guaranteed
298 // allocating up to `kTensorsCapacityHeadroom` more tensors won't invalidate
299 // pointers to existing tensors.
300 static constexpr int kTensorsCapacityHeadroom = 16;
301
302 // Reset all variable tensors to the default value.
303 // If a variable tensor doesn't have a buffer, reset it to zero.
304 // TODO(b/115961645): Implement - If a variable tensor has a buffer, reset it
305 // to the value of the buffer.
306 // WARNING: This is an experimental API and subject to change.
307 TfLiteStatus ResetVariableTensors();
308
309 void SetProfiler(Profiler* profiler, int associated_subgraph_idx) {
310 if (!profiler) {
311 profiler_.reset(nullptr);
312 context_.profiler = nullptr;
313 } else {
314 profiler_ = std::make_unique<SubgraphAwareProfiler>(
315 profiler, associated_subgraph_idx);
316 context_.profiler = profiler_.get();
317 }
318 }
319
320 Profiler* GetProfiler() { return profiler_.get(); }
321
322 // Returns a pointer to vector of subgraphs.
323 // WARNING: This is an experimental API and subject to change.
324 std::vector<std::unique_ptr<Subgraph>>* GetSubgraphs() { return subgraphs_; }
325
326 // Returns the location of this object within subgraphs_, or
327 // kInvalidSubgraphIndex if subgraphs_ is nullptr or *this is not
328 // represented *subgraphs_.
329 // WARNING: This is an experimental API and subject to
330 // change.
331 static constexpr int kInvalidSubgraphIndex = -1;
332 int GetSubgraphIndex() const { return subgraph_index_; }
333
334 // True if all tensors in the graph has static size after calling
335 // `AllocateTensors` function.
336 // Before `AllocateTensors` is called, this will always return true;
337 bool HasDynamicTensors() { return has_dynamic_tensors_; }
338
339 // Assigns (or reassigns) a custom memory allocation for the given tensor.
340 // `flags` is a bitmask, see TfLiteCustomAllocationFlags.
341 // The runtime does NOT take ownership of the underlying memory.
342 //
343 // NOTE: User needs to call AllocateTensors() after this.
344 // Invalid/insufficient buffers will cause an error during AllocateTensors or
345 // Invoke (in case of dynamic shapes in the graph).
346 //
347 // Parameters should satisfy the following conditions:
348 // 1. tensor->allocation_type == kTfLiteArenaRw or kTfLiteArenaRwPersistent
349 // In general, this is true for I/O tensors & variable tensors.
350 // 2. allocation->data has the appropriate permissions for runtime access
351 // (Read-only for inputs, Read-Write for others), and outlives Interpreter.
352 // 3. allocation->bytes >= tensor->bytes.
353 // This condition is checked again if any tensors are resized.
354 // 4. allocation->data should be aligned to kDefaultTensorAlignment
355 // defined in lite/util.h. (Currently 64 bytes)
356 // This check is skipped if kTfLiteCustomAllocationFlagsSkipAlignCheck is
357 // set through `flags`.
358 // TODO(b/182215910): Expand on this documentation in a g3doc.
359 //
360 // WARNING: This is an experimental interface that is subject to change.
361 TfLiteStatus SetCustomAllocationForTensor(
362 int tensor_index, const TfLiteCustomAllocation& allocation,
363 int64_t flags = kTfLiteCustomAllocationFlagsNone);
364
365 void SetName(const char* name);
366 const std::string& GetName() const;
367
368 // WARNING: This is an experimental API and subject to change.
369 // Dumps debugging info by the underlying memory planner.
370 // Note: to have minimal binary increase caused by this debug info dump for
371 // the TfLite library and allow users to plug-in their own memory planner
372 // debugger, we have utilized weak symbols to meet these two requirements. By
373 // default, there is no debugging info dumped. However, if the TfLite-provided
374 // lite:simple_memory_arena_debug_dump (i.e. containing the strong defintion)
375 // is linked to the program, calling this function will output memory usage
376 // information about tenosrs and ops.
377 void DumpMemoryPlannerDebugInfo() const;
378
379 typedef struct SubgraphAllocInfo {
380 size_t arena_size;
381 size_t arena_persist_size;
382 size_t dynamic_size;
383 size_t resource_size;
384 } SubgraphAllocInfo;
385
386 // WARNING: This is an experimental API and subject to change.
387 // Returns memory allocation status.
388 void GetMemoryAllocInfo(SubgraphAllocInfo* alloc_info) const;
389
390 // WARNING: This is an experimental API and subject to change.
391 // Set the given `InterpreterOptions` object.
392 void SetOptions(InterpreterOptions* options) { options_ = options; }
393
394 // WARNING: This is an experimental API and subject to change.
395 // True if all intermediates tensors should be preserved for debugging.
396 bool ShouldPreserveAllTensors() const {
397 return (options_ && options_->GetPreserveAllTensors());
398 }
399
400 // WARNING: This is an experimental API and subject to change.
401 // True if all intermediate dynamic tensors should be released once they are
402 // not used by the model.
403 bool ShouldReleaseDynamicTensors() const {
404 return (options_ && options_->GetEnsureDynamicTensorsAreReleased());
405 }
406
407 /// WARNING: This is an experimental API and subject to change.
408 /// Use dynamic tensor allocation and deallocation method for large tensors
409 /// instead of static memory planner. Dynamic tensors are allocated just
410 /// before when they're needed and released when they're not needed anymore.
411 /// It improves peak memory usage but there could be some latency impact. The
412 /// parameter `large_tensors_thresholds_in_bytes` is used to determine large
413 /// tensors. This API must be called before `AllocateTensors`.
414 void OptimizeMemoryForLargeTensors(int large_tensors_thresholds_in_bytes);
415
416 // WARNING: This is an experimental API and subject to change.
417 // True if dynamic tensor allocation / deallocation method is enabled by
418 // `OptimizeMemoryForLargeTensors` API.
419 bool ShouldOptimizeMemoryForLargeTensors() {
420 return (options_ && (options_->GetDynamicAllocationForLargeTensors() > 0));
421 }
422
423 // WARNING: This is an experimental API and subject to change.
424 // Remove unused inputs of the subgraph. It checks usage of inputs and mark it
425 // as kTfLiteOptionalTensor if the input is not used in graph execution.
426 // Currently, it's used to remove unused inputs of WHILE cond subgraphs.
427 TfLiteStatus RemoveUnusedInputs();
428
429 private:
430 friend class InterpreterBuilder;
431 friend class TestDelegate;
432 // SubgraphAwareProfiler wraps an actual TFLite profiler, such as a
433 // BufferedProfiler instance, and takes care of event profiling/tracing in a
434 // certain subgraph.
435 class SubgraphAwareProfiler : public Profiler {
436 public:
437 // Constructor should be called with the non-nullptr profiler argument.
438 SubgraphAwareProfiler(Profiler* profiler, int64_t subgraph_index)
439 : profiler_(profiler), subgraph_index_(subgraph_index) {}
440 ~SubgraphAwareProfiler() override {}
441
442 uint32_t BeginEvent(const char* tag, EventType event_type,
443 int64_t event_metadata1,
444 int64_t event_metadata2) override {
445 if (!profiler_) return 0;
446 return profiler_->BeginEvent(tag, event_type, event_metadata1,
447 subgraph_index_);
448 }
449
450 void EndEvent(uint32_t event_handle) override {
451 if (!profiler_) return;
452 profiler_->EndEvent(event_handle);
453 }
454
455 void EndEvent(uint32_t event_handle, int64_t event_metadata1,
456 int64_t event_metadata2) override {
457 if (!profiler_) return;
458 profiler_->EndEvent(event_handle, event_metadata1, event_metadata2);
459 }
460
461 void AddEvent(const char* tag, EventType event_type, uint64_t elapsed_time,
462 int64_t event_metadata1, int64_t event_metadata2) override {
463 if (!profiler_) return;
464 profiler_->AddEvent(tag, event_type, elapsed_time, event_metadata1,
465 subgraph_index_);
466 }
467
468 private:
469 // Not own the memory.
470 Profiler* const profiler_;
471 const int64_t subgraph_index_;
472 };
473
474 // Ensure the internal node storage memory allocates at least `count`
475 // spots for node. NOTE, this doesn't actually add operators. This is an
476 // efficiency optimization that is subject to change.
477 // Note: Only used during initialization.
478 void ReserveNodes(int count);
479
480 // Overrides execution plan. This bounds checks indices sent in.
481 // Note: Only used during initialization.
482 TfLiteStatus SetExecutionPlan(const std::vector<int>& new_plan);
483
484 // Prevent 'context_' from accessing functions that are only available to
485 // delegated kernels.
486 void SwitchToKernelContext();
487
488 // Add delegate-only functions to 'context_'.
489 void SwitchToDelegateContext();
490
491 // Give 'op_reg' a chance to initialize itself using the contents of
492 // 'buffer'. If registration_external is valid, use the 'init' callback from
493 // that.
494 void* OpInit(const TfLiteRegistration& op_reg, const char* buffer,
495 size_t length);
496
497 // Let 'op_reg' release any memory it might have allocated via 'OpInit'.
498 // If registration_external is valid, use the 'free' callback from that.
499 void OpFree(const TfLiteRegistration& op_reg, void* buffer);
500
501 // Prepare the given 'node' for execution.
502 TfLiteStatus OpPrepare(const TfLiteRegistration& op_reg, TfLiteNode* node);
503
504 // Invoke the operator represented by 'node'.
505 TfLiteStatus OpInvoke(const TfLiteRegistration& op_reg, TfLiteNode* node);
506
507 // Call OpPrepare() for as many ops as possible, allocating memory for their
508 // tensors. If an op containing dynamic tensors is found, preparation will be
509 // postponed until this function is called again. This allows the interpreter
510 // to wait until Invoke() to resolve the sizes of dynamic tensors.
511 TfLiteStatus PrepareOpsAndTensors();
512
513 // Call OpPrepare() for all ops starting at 'first_node'. Stop when a
514 // dynamic tensors is found or all ops have been prepared. Fill
515 // 'last_node_prepared' with the id of the op containing dynamic tensors, or
516 // the last in the graph.
517 TfLiteStatus PrepareOpsStartingAt(int first_execution_plan_index,
518 const std::vector<int>& execution_plan,
519 int* last_execution_plan_index_prepared);
520
521 // Tensors needed by the interpreter. Use `AddTensors` to add more blank
522 // tensor entries. Note, `tensors_.data()` needs to be synchronized to the
523 // `context_` whenever this std::vector is reallocated. Currently this
524 // only happens in `AddTensors()`.
525 std::vector<TfLiteTensor> tensors_;
526
527 // Check if an array of tensor indices are valid with respect to the Tensor
528 // array.
529 // NOTE: this changes consistent_ to be false if indices are out of bounds.
530 TfLiteStatus CheckTensorIndices(const char* label, const int* indices,
531 int length);
532
533 // Check that the input indices and the output indices don't overlap.
534 // This is needed because same tensor must not be used both as input and
535 // output for an operator.
536 // NOTE: this changes consistent_ to be false if indices are out of bounds.
537 TfLiteStatus CheckInputAndOutputForOverlap(const int* input_indices,
538 int num_inputs,
539 const int* output_indices,
540 int num_outputs);
541
542 // Compute the number of bytes required to represent a tensor with dimensions
543 // specified by the array dims (of length dims_size). Returns the status code
544 // and bytes.
545 TfLiteStatus BytesRequired(TfLiteType type, const int* dims, size_t dims_size,
546 size_t* bytes);
547
548 // Request an tensor be resized implementation. If the given tensor is of
549 // type kTfLiteDynamic it will also be allocated new memory.
550 TfLiteStatus ResizeTensorImpl(TfLiteTensor* tensor, TfLiteIntArray* new_size);
551
552 // Report a detailed error string (will be printed to stderr).
553 void ReportErrorImpl(const char* format, va_list args);
554
555 // Entry point for C node plugin API to request an tensor be resized.
556 static TfLiteStatus ResizeTensor(TfLiteContext* context, TfLiteTensor* tensor,
557 TfLiteIntArray* new_size);
558 // Entry point for C node plugin API to report an error.
559 static void ReportErrorC(TfLiteContext* context, const char* format, ...);
560
561 // Entry point for C node plugin API to add new tensors.
562 static TfLiteStatus AddTensors(TfLiteContext* context, int tensors_to_add,
563 int* first_new_tensor_index);
564
565 // WARNING: This is an experimental API and subject to change.
566 // Entry point for C API ReplaceNodeSubsetsWithDelegateKernels
567 static TfLiteStatus ReplaceNodeSubsetsWithDelegateKernels(
568 TfLiteContext* context, TfLiteRegistration registration,
569 const TfLiteIntArray* nodes_to_replace, TfLiteDelegate* delegate);
570
571 // Update the execution graph to replace some of the nodes with stub
572 // nodes. Specifically any node index that has `nodes[index]==1` will be
573 // slated for replacement with a delegate kernel specified by registration.
574 // Ownership of 'nodes_to_replace' and 'delegate' remains with the caller.
575 // WARNING: This is an experimental interface that is subject to change.
576 TfLiteStatus ReplaceNodeSubsetsWithDelegateKernels(
577 TfLiteRegistration registration, const TfLiteIntArray* nodes_to_replace,
578 TfLiteDelegate* delegate);
579
580 // WARNING: This is an experimental interface that is subject to change.
581 // Gets the internal pointer to a TensorFlow lite node by node_index.
582 TfLiteStatus GetNodeAndRegistration(int node_index, TfLiteNode** node,
583 TfLiteRegistration** registration);
584
585 // WARNING: This is an experimental interface that is subject to change.
586 // Entry point for C node plugin API to get a node by index.
587 static TfLiteStatus GetNodeAndRegistration(struct TfLiteContext*,
588 int node_index, TfLiteNode** node,
589 TfLiteRegistration** registration);
590
591 // WARNING: This is an experimental interface that is subject to change.
592 // Gets an TfLiteIntArray* representing the execution plan. The interpreter
593 // owns this memory and it is only guaranteed to exist during the invocation
594 // of the delegate prepare.
595 TfLiteStatus GetExecutionPlan(TfLiteIntArray** execution_plan);
596
597 // WARNING: This is an experimental interface that is subject to change.
598 // Entry point for C node plugin API to get the execution plan.
599 static TfLiteStatus GetExecutionPlan(struct TfLiteContext* context,
600 TfLiteIntArray** execution_plan);
601
602 // WARNING: This is an experimental interface that is subject to change.
603 // Provides a preview of post-delegation partitioning. Each
604 // TfLiteDelegateParams in the referenced array corresponds to one instance of
605 // the delegate kernel.
606 // nodes_to_replace should point to a valid array. partition_params_array &
607 // num_partitions should be non-null.
608 // Memory allocated by this method is automatically released with another call
609 // to PreviewDelegateParitioning, or after TfLiteDelegate::Prepare is done.
610 TfLiteStatus PreviewDelegatePartitioning(
611 const TfLiteIntArray* nodes_to_replace,
612 TfLiteDelegateParams** partition_params_array, int* num_partitions);
613
614 // WARNING: This is an experimental interface that is subject to change.
615 // Entry point for C node plugin API to preview delegation partitioning.
616 static TfLiteStatus PreviewDelegatePartitioning(
617 struct TfLiteContext* context, const TfLiteIntArray* nodes_to_replace,
618 TfLiteDelegateParams** partition_params_array, int* num_partitions);
619
620 // Retrieves named metadata from the TFLite model. Returns kTfLiteOk if
621 // metadata is successfully obtained.
622 // See the Metadata table in TFLite schema.
623 TfLiteStatus GetModelMetadata(const char* name, const char** ptr,
624 size_t* bytes);
625
626 // Entry point for C node plugin API to get model metadata based on name.
627 static TfLiteStatus GetModelMetadata(const struct TfLiteContext* context,
628 const char* name, const char** ptr,
629 size_t* bytes);
630
631 // Used to clear partitioning_preview_cache_, in case
632 // PreviewDelegatePartitioning was called.
633 void FreeDelegatePartitioningData();
634
635 // Retrieve an existing external context by type.
636 TfLiteExternalContext* GetExternalContext(TfLiteExternalContextType type);
637 static TfLiteExternalContext* GetExternalContext(
638 struct TfLiteContext* context, TfLiteExternalContextType type);
639
640 // Set the value of an external context.
641 static void SetExternalContext(struct TfLiteContext* context,
642 TfLiteExternalContextType type,
643 TfLiteExternalContext* ctx);
644
645 // WARNING: This is an experimental API and subject to change.
646 // Allow a delegate to look at the graph and modify the graph to handle
647 // parts of the graph themselves. After this is called, the graph may
648 // contain new nodes that replace 1 more nodes.
649 // NOTE: If tensors were allocated prior to delegate application, they will
650 // be reallocated if the graph was modified (i.e., the caller does *not* need
651 // to explicitly call |AllocateTensors()| again). If tensors were unallocated,
652 // they will remain unallocated after delegate application.
653 // Returns one of the following status codes:
654 // 1. kTfLiteOk: Delegation succeeded
655 // 2. kTfLiteDelegateError: Delegation failed due to an error *in the
656 // delegate*, or the delegate parameter was null. The Subgraph has been
657 // restored to its pre-delegation state.
658 // NOTE: This reverts all delegates previously applied to the Subgraph.
659 // 3. kTfLiteApplicationError : Delegation failed to be applied due to the
660 // incompatibility with the TF Lite runtime, e.g., the model graph is already
661 // immutable when applying the delegate. However, the Subgraph is still in a
662 // invokable state.
663 // 4. kTfLiteUnresolvedOps: Delegation failed because the model has an
664 // operator that cannot be resolved. This can happen when the op is not
665 // registered or built with the TF Lite framework.
666 // 5. kTfLiteError: Unexpected/runtime failure.
667 TfLiteStatus ModifyGraphWithDelegate(TfLiteDelegate* delegate);
668
669 // This un-applies all delegates that have been applied till now, but retains
670 // pointers to them.
671 // The old execution plan and nodes are restored.
672 TfLiteStatus UndoAllDelegates();
673
674 // This re-applies all delegates that were undone.
675 // Does nothing if UndoAllDelegates wasn't previously called.
676 TfLiteStatus RedoAllDelegates();
677
678 // This removes all delegates.
679 // The old execution plan and nodes are restored. The graph is invokable
680 // afterwards.
681 TfLiteStatus RemoveAllDelegates();
682
683 // Returns true if the subgraph has delegates applied.
684 bool HasDelegates();
685
686 // Returns true if the subgraph has been fully delegated.
687 bool IsFullyDelegated() const;
688
689 // Cleanups up data reserved for the given node. Does not remove the {node,
690 // registration} pair from nodes_and_registrations_.
691 void CleanupNode(int node_index);
692
693 // Ensures that `tensors_` has at least `kTensorsCapacityHeadroom` extra
694 // capacity. Calling this function may invalidate existing pointers to
695 // tensors. After calling this function, adding `kTensorsCapacityHeadroom`
696 // more tensors won't invalidate the pointer to existing tensors.
697 void EnsureTensorsVectorCapacity();
698
699 // Ensures the memory required is planned and allocated.
700 TfLiteStatus EnsureMemoryAllocations();
701
702 // Returns true if cancellation function returns true.
703 bool IsCancelled();
704
705 // Returns true if 'node' could have side effect (e.g. stateful op).
706 // Note that any node that might update other tensors beside op's output
707 // are considered to have side effect.
708 // So control flow ops like 'If' and 'While' are considered to have
709 // side effect because they can have ops that have side effect in the
710 // condition and body subgraphs.
711 bool OpMightHaveSideEffect(const TfLiteNode* node,
712 const TfLiteRegistration* registration) const;
713
714 // Returns new GraphInfo object based on the current Subgraph.
715 std::unique_ptr<GraphInfo> CreateGraphInfo();
716
717 // Store a ptr to the model metadata owned by the Interpreter.
718 // Since the lifetime of the Interpreter exceeds the Subgraph, metadata
719 // remains valid for the latter's lifetime.
720 // Also sets relevant fields on context_ based on known metadata.
721 TfLiteStatus SetMetadata(const std::map<std::string, std::string>* metadata);
722
723 // Initializes the mapping between tensor index to the index of the
724 // last operation that uses the tensor as input.
725 void InitializeTensorReleaseMap();
726
727 // May allocate dynamic tensor memory of node outputs. It's used when
728 // `EnsureDynamicTensorsAreReleased` or`UseDynamicAllocationForLargeTensors`
729 // API is used.
730 TfLiteStatus MayAllocateOpOutput(TfLiteNode* node);
731
732 // Checks the options for releasing dynamic tensors and release dynamic
733 // tensors if configured.
734 void MaybeReleaseDynamicTensors(const TfLiteNode& node, size_t node_index);
735
736 // The state of the Subgraph.
737 enum State {
738 // The Subgraph isn't ready to be invoked.
739 // `AllocateTensor` need to be called to enter an invokable state.
740 kStateUninvokable = 0,
741 // The Subgraph is ready to be invoked.
742 kStateInvokable,
743 // The Subgraph is ready to be invoked, and graph can't be further
744 // modified. The Subgraph will enter this state when calling
745 // `ModifyGraphWithDelegate` and the delegate doesn't support dynamic
746 // tensors.
747 kStateInvokableAndImmutable,
748 };
749 State state_ = kStateUninvokable;
750
751 // A pure C data structure used to communicate with the pure C plugin
752 // interface. To avoid copying tensor metadata, this is also the definitive
753 // structure to store tensors.
754 TfLiteContext context_ = {};
755
756 // A pointer to the external contexts (kTfLiteMaxExternalContexts) array that
757 // sits inside the associated TFLite interpreter instance.
758 TfLiteExternalContext** external_contexts_;
759
760 // Node inputs/outputs are stored in TfLiteNode and TfLiteRegistration stores
761 // function pointers to actual implementation.
762 // Nodes should appear in the order in which they are instantiated at runtime.
763 // Delegated nodes are appended after all the original ones.
764 std::vector<std::pair<TfLiteNode, TfLiteRegistration>>
765 nodes_and_registration_;
766
767 // Whether the model is consistent. That is to say if the inputs and outputs
768 // of every node and the global inputs and outputs are valid indexes into
769 // the tensor array.
770 bool consistent_ = true;
771
772 // Array of indices representing the tensors that are inputs to the
773 // interpreter.
774 std::vector<int> inputs_;
775
776 // Array of indices representing the tensors that are outputs to the
777 // interpreter.
778 std::vector<int> outputs_;
779
780 // Array of indices representing the tensors that are variable tensors.
781 std::vector<int> variables_;
782
783 // The error reporter delegate that tflite will forward queries errors to.
784 ErrorReporter* error_reporter_;
785
786 // Index of the next node to prepare.
787 // During Invoke(), Interpreter will allocate input tensors first, which are
788 // known to be fixed size. Then it will allocate outputs from nodes as many
789 // as possible. When there is a node that produces dynamic sized tensor.
790 // Interpreter will stop allocating tensors, set the value of next allocate
791 // node id, and execute the node to generate the output tensor before continue
792 // to allocate successors. This process repeats until all nodes are executed.
793 // NOTE: this relies on the order of nodes that is in topological order.
794 int next_execution_plan_index_to_prepare_;
795
796 // Only used in cases where a delegate supporting dynamic tensors is applied.
797 // This helps prepare the original execution before the post-delegation one,
798 // so that tensor shapes propagate.
799 int next_original_execution_plan_index_to_prepare_;
800
801 // This is similar to `next_execution_plan_index_to_prepare_`, but it tracks
802 // which nodes' allocation is planned with the arena planner.
803 //
804 // This is a workaround for b/127354079. It shouldn't be necessary if
805 // ArenaPlanner can "rewind" to a specific point.
806 // TODO(b/127354079): Improve ArenaPlanner and remove this mechanism.
807 int next_execution_plan_index_to_plan_allocation_;
808
809 // WARNING: This is an experimental interface that is subject to change.
810 // This is a list of node indices (to index into nodes_and_registration).
811 // This represents a valid topological sort (dependency ordered) execution
812 // plan. In particular, it is valid for this ordering to contain only a
813 // subset of the node indices.
814 std::vector<int> execution_plan_;
815
816 // This is a copy of the first execution_plan_ before any delegates were
817 // applied. It is empty if no delegates were applied to this Subgraph.
818 std::vector<int> pre_delegation_execution_plan_;
819
820 // Contains a list of delegates applied by the user so far, in order.
821 std::vector<TfLiteDelegate*> delegates_applied_;
822
823 // Set to true if UndoAllDelegates was called, and to false during
824 // RedoAllDelegates.
825 bool delegates_undone_ = false;
826
827 // In the future, we'd like a TfLiteIntArray compatible representation.
828 // TODO(aselle): replace execution_plan_ with this.
829 std::unique_ptr<TfLiteIntArray, TfLiteIntArrayDeleter> plan_cache_;
830
831 // Used by PreviewDelegateParitioning.
832 std::vector<TfLiteDelegateParams> partitioning_preview_cache_;
833
834 std::unique_ptr<MemoryPlanner> memory_planner_;
835
836 // Maps tensor index to custom allocation for all applicable tensors.
837 std::map<int, TfLiteCustomAllocation> custom_allocations_;
838
839 // Tracking bit for whether a tensor was resized in the course of an op
840 // invocation. This is a useful hint to ensure that dynamic tensor outputs
841 // trigger downstream reallocation after op invocation.
842 bool tensor_resized_since_op_invoke_ = false;
843
844 // Profiler for this interpreter instance.
845 std::unique_ptr<SubgraphAwareProfiler> profiler_;
846
847 // A pointer to vector of subgraphs. The vector is owned by the interpreter.
848 std::vector<std::unique_ptr<Subgraph>>* subgraphs_ = nullptr;
849
850 // Location of the pointer to *this in *subgraphs_, or kInvalidSubgraphIndex.
851 const int subgraph_index_;
852
853 // True if not all tensors in the graph has static size after calling
854 // `PrepareOpsStartingAt` function (which is called by the `AllocateTensors`
855 // public function).
856 // The value is invalid before `PrepareOpStartingAt` is called.
857 bool has_dynamic_tensors_ = true;
858
859 // WARNING: This is an experimental interface that is subject to change.
860 // This is the index of dynamic tensor which was checked at
861 // PrepareOpsStartingAt() when `has_dynamic_tensors_` is set. This information
862 // is kept only for user error message.
863 int dynamic_tensor_index_ = -1;
864
865 // Reference to cancellation function that can cancel a request in the middle
866 // of a call to Invoke(). When this function returns True, a kTfLiteError is
867 // thrown by Invoke().
868 bool (*check_cancelled_func_)(void*) = nullptr;
869
870 // Reference to data used by the cancellation function in
871 // `check_cancelled_func_`.
872 void* cancellation_data_ = nullptr;
873
874 // A map of resources. Owned by interpreter and shared by multiple subgraphs.
875 resource::ResourceMap* resources_ = nullptr;
876
877 // A map of resources IDs. Owned by interpreter and shared by multiple
878 // subgraphs.
879 resource::ResourceIDMap* resource_ids_ = nullptr;
880
881 // A map of initialization statuses, that indicate whether the intialization
882 // subgraph invocation is done or not.
883 resource::InitializationStatusMap* initialization_status_map_;
884
885 // Name of the subgraph (analogous to function name).
886 std::string name_;
887
888 // Model-metadata owned by the Interpreter.
889 const std::map<std::string, std::string>* metadata_ = nullptr;
890
891 // Mapping between tensor index to the last index of the execution plan that
892 // uses this tensor.
893 std::map<int, int> tensor_to_last_op_index_;
894
895 // `InterpreterOptions` object which is being used and owned by Interpreter.
896 InterpreterOptions* options_;
897};
898
899} // namespace tflite
900#endif // TENSORFLOW_LITE_CORE_SUBGRAPH_H_
901