1 | /* Copyright 2018 The TensorFlow Authors. All Rights Reserved. |
2 | |
3 | Licensed under the Apache License, Version 2.0 (the "License"); |
4 | you may not use this file except in compliance with the License. |
5 | You may obtain a copy of the License at |
6 | |
7 | http://www.apache.org/licenses/LICENSE-2.0 |
8 | |
9 | Unless required by applicable law or agreed to in writing, software |
10 | distributed under the License is distributed on an "AS IS" BASIS, |
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | See the License for the specific language governing permissions and |
13 | limitations under the License. |
14 | ==============================================================================*/ |
15 | #ifndef TENSORFLOW_LITE_CORE_SUBGRAPH_H_ |
16 | #define TENSORFLOW_LITE_CORE_SUBGRAPH_H_ |
17 | |
18 | #include <stdarg.h> |
19 | #include <stddef.h> |
20 | |
21 | #include <cstdint> |
22 | #include <cstdlib> |
23 | #include <map> |
24 | #include <memory> |
25 | #include <set> |
26 | #include <string> |
27 | #include <utility> |
28 | #include <vector> |
29 | |
30 | #include "tensorflow/lite/allocation.h" |
31 | #include "tensorflow/lite/c/common.h" |
32 | #include "tensorflow/lite/core/api/error_reporter.h" |
33 | #include "tensorflow/lite/core/api/profiler.h" |
34 | #include "tensorflow/lite/core/macros.h" |
35 | #include "tensorflow/lite/experimental/resource/initialization_status.h" |
36 | #include "tensorflow/lite/experimental/resource/resource_base.h" |
37 | #include "tensorflow/lite/graph_info.h" |
38 | #include "tensorflow/lite/interpreter_options.h" |
39 | #include "tensorflow/lite/memory_planner.h" |
40 | #include "tensorflow/lite/util.h" |
41 | |
42 | namespace tflite { |
43 | |
44 | class SingleOpModel; // Class for friend declarations. |
45 | |
46 | namespace delegates { |
47 | namespace test_utils { |
48 | class TestDelegate; // Class for friend declarations. |
49 | } // namespace test_utils |
50 | } // namespace delegates |
51 | |
52 | class Subgraph { |
53 | public: |
54 | friend class Interpreter; |
55 | friend class SingleOpModel; |
56 | |
57 | Subgraph(ErrorReporter* error_reporter, |
58 | TfLiteExternalContext** external_contexts, |
59 | std::vector<std::unique_ptr<Subgraph>>* subgraphs, |
60 | resource::ResourceMap* resources, |
61 | resource::ResourceIDMap* resource_ids, |
62 | resource::InitializationStatusMap* initialization_status_map, |
63 | int subgraph_index = kInvalidSubgraphIndex); |
64 | |
65 | Subgraph(const Subgraph&) = delete; |
66 | |
67 | // Subgraphs should be movable but not copyable. |
68 | Subgraph(Subgraph&&) = default; |
69 | Subgraph& operator=(const Subgraph&) = delete; |
70 | virtual ~Subgraph(); |
71 | |
72 | // Provide a list of tensor indexes that are inputs to the model. |
73 | // Each index is bound check and this modifies the consistent_ flag of the |
74 | // interpreter. |
75 | TfLiteStatus SetInputs(std::vector<int> inputs); |
76 | |
77 | // Provide a list of tensor indexes that are outputs to the model |
78 | // Each index is bound check and this modifies the consistent_ flag of the |
79 | // interpreter. |
80 | TfLiteStatus SetOutputs(std::vector<int> outputs); |
81 | |
82 | // Provide a list of tensor indexes that are variable tensors. |
83 | // Each index is bound check and this modifies the consistent_ flag of the |
84 | // interpreter. |
85 | TfLiteStatus SetVariables(std::vector<int> variables); |
86 | |
87 | // Adds a node with the given parameters and returns the index of the new |
88 | // node in `node_index` (optionally). Interpreter will take ownership of |
89 | // `builtin_data` and destroy it with `free`. Ownership of 'init_data' |
90 | // remains with the caller. |
91 | TfLiteStatus AddNodeWithParameters(const std::vector<int>& inputs, |
92 | const std::vector<int>& outputs, |
93 | const std::vector<int>& intermediates, |
94 | const char* init_data, |
95 | size_t init_data_size, void* builtin_data, |
96 | const TfLiteRegistration* registration, |
97 | int* node_index = nullptr); |
98 | |
99 | // Adds `tensors_to_add` tensors, preserving pre-existing Tensor entries. |
100 | // The value pointed to by `first_new_tensor_index` will be set to the |
101 | // index of the first new tensor if `first_new_tensor_index` is non-null. |
102 | TfLiteStatus AddTensors(int tensors_to_add, |
103 | int* first_new_tensor_index = nullptr); |
104 | |
105 | // Set description of inputs/outputs/data/fptrs for node `node_index`. |
106 | // This variant assumes an external buffer has been allocated of size |
107 | // bytes. The lifetime of buffer must be ensured to be greater or equal |
108 | // to Interpreter. `quantization` ownership is passed to the subgraph. |
109 | inline TfLiteStatus SetTensorParametersReadOnly( |
110 | int tensor_index, TfLiteType type, const char* name, |
111 | const std::vector<int>& dims, TfLiteQuantization quantization, |
112 | const char* buffer, size_t bytes, const Allocation* allocation = nullptr, |
113 | TfLiteSparsity* sparsity = nullptr) { |
114 | return SetTensorParametersReadOnly(tensor_index, type, name, dims.size(), |
115 | dims.data(), quantization, buffer, bytes, |
116 | allocation, sparsity); |
117 | } |
118 | TfLiteStatus SetTensorParametersReadOnly( |
119 | int tensor_index, TfLiteType type, const char* name, const size_t ndims, |
120 | const int* dims, TfLiteQuantization quantization, const char* buffer, |
121 | size_t bytes, const Allocation* allocation = nullptr, |
122 | TfLiteSparsity* sparsity = nullptr); |
123 | |
124 | // Set description of inputs/outputs/data/fptrs for node `node_index`. |
125 | // This variant assumes an external buffer has been allocated of size |
126 | // bytes. The lifetime of buffer must be ensured to be greater or equal |
127 | // to Interpreter. `quantization` ownership is passed to the subgraph. |
128 | inline TfLiteStatus SetTensorParametersReadWrite( |
129 | int tensor_index, TfLiteType type, const char* name, |
130 | const std::vector<int>& dims, TfLiteQuantization quantization, |
131 | bool is_variable = false, const std::vector<int>& dims_signature = {}) { |
132 | if (dims_signature.empty()) { |
133 | return SetTensorParametersReadWrite(tensor_index, type, name, dims.size(), |
134 | dims.data(), quantization, |
135 | is_variable); |
136 | } |
137 | return SetTensorParametersReadWrite( |
138 | tensor_index, type, name, dims.size(), dims.data(), quantization, |
139 | is_variable, dims_signature.size(), dims_signature.data()); |
140 | } |
141 | TfLiteStatus SetTensorParametersReadWrite( |
142 | int tensor_index, TfLiteType type, const char* name, const size_t ndims, |
143 | const int* dims, TfLiteQuantization quantization, |
144 | bool is_variable = false, const size_t ndims_signature = 0, |
145 | const int* dims_signature = nullptr); |
146 | |
147 | // Get all tensors in the subgraph. |
148 | TfLiteTensor* tensors() { return context_.tensors; } |
149 | |
150 | // Get a mutable tensor data structure. |
151 | TfLiteTensor* tensor(int tensor_index) { |
152 | if (tensor_index < 0 || |
153 | static_cast<size_t>(tensor_index) >= context_.tensors_size) { |
154 | return nullptr; |
155 | } |
156 | return &context_.tensors[tensor_index]; |
157 | } |
158 | |
159 | // Get an immutable tensor data structure. |
160 | const TfLiteTensor* tensor(int tensor_index) const { |
161 | if (tensor_index < 0 || |
162 | static_cast<size_t>(tensor_index) >= context_.tensors_size) { |
163 | return nullptr; |
164 | } |
165 | return &context_.tensors[tensor_index]; |
166 | } |
167 | |
168 | // Read only access to list of inputs. |
169 | std::vector<int>& inputs() { return inputs_; } |
170 | |
171 | // Read only access to list of inputs. |
172 | const std::vector<int>& inputs() const { return inputs_; } |
173 | |
174 | // Read only access to list of outputs. |
175 | std::vector<int>& outputs() { return outputs_; } |
176 | |
177 | // Read only access to list of outputs. |
178 | const std::vector<int>& outputs() const { return outputs_; } |
179 | |
180 | // Read only access to list of variable tensors. |
181 | std::vector<int>& variables() { return variables_; } |
182 | |
183 | // Read only access to list of variable tensors. |
184 | const std::vector<int>& variables() const { return variables_; } |
185 | |
186 | // WARNING: Experimental interface, subject to change. |
187 | // TODO(ycling): Move this function to an external context interface. |
188 | resource::ResourceMap& resources() { return *resources_; } |
189 | |
190 | // WARNING: Experimental interface, subject to change. |
191 | // TODO(b/149099381): Move this function to an external context interface. |
192 | resource::ResourceIDMap& resource_ids() { return *resource_ids_; } |
193 | |
194 | // WARNING: Experimental interface, subject to change. |
195 | // TODO(b/149099381): Move this function to an external context interface. |
196 | resource::InitializationStatusMap& initialization_status_map() { |
197 | return *initialization_status_map_; |
198 | } |
199 | |
200 | size_t tensors_size() const { return tensors_.size(); } |
201 | |
202 | // Return the number of ops in the model. |
203 | size_t nodes_size() const { return nodes_and_registration_.size(); } |
204 | |
205 | // Return vector of node indices in the order of execution. |
206 | std::vector<int>& execution_plan() { return execution_plan_; } |
207 | |
208 | // Return read-only vector of node indices in the order of execution. |
209 | const std::vector<int>& execution_plan() const { return execution_plan_; } |
210 | |
211 | const std::vector<std::pair<TfLiteNode, TfLiteRegistration>>& |
212 | nodes_and_registration() const { |
213 | return nodes_and_registration_; |
214 | } |
215 | |
216 | // Get a pointer to an operation and registration data structure if in bounds. |
217 | const std::pair<TfLiteNode, TfLiteRegistration>* node_and_registration( |
218 | int node_index) const { |
219 | if (node_index < 0 || static_cast<size_t>(node_index) >= nodes_size()) |
220 | return nullptr; |
221 | return &nodes_and_registration_[node_index]; |
222 | } |
223 | |
224 | // Change the dimensionality of a given tensor. Note, this is only acceptable |
225 | // for tensor indices that are inputs. |
226 | // Returns status of failure or success. |
227 | TfLiteStatus ResizeInputTensor(int tensor_index, |
228 | const std::vector<int>& dims); |
229 | |
230 | // WARNING: Experimental interface, subject to change |
231 | // Change the dimensionality of a given tensor. This is only acceptable for |
232 | // tensor indices that are inputs or variables. Only unknown dimensions can be |
233 | // resized with this function. Unknown dimensions are indicated as `-1` in the |
234 | // `dims_signature` attribute of a `TfLiteTensor`. Returns status of failure |
235 | // or success. |
236 | TfLiteStatus ResizeInputTensorStrict(int tensor_index, |
237 | const std::vector<int>& dims); |
238 | |
239 | // This releases memory held by non-persistent tensors. It does NOT re-perform |
240 | // memory planning. |
241 | // AllocateTensors needs to be called before next invocation. |
242 | TfLiteStatus ReleaseNonPersistentMemory(); |
243 | |
244 | // WARNING: Experimental interface, subject to change |
245 | // This API releases memory held by the given subgraph. This method is |
246 | // designed to release memory of control flow subgraphs. |
247 | // AllocateTensors needs to be called before next invocation. |
248 | TfLiteStatus ReleaseMemory(); |
249 | |
250 | // Update allocations for all tensors. This will redim dependent tensors using |
251 | // the input tensor dimensionality as given. This is relatively expensive. |
252 | // If you know that your sizes are not changing, you need not call this. |
253 | // Returns status of success or failure. |
254 | TfLiteStatus AllocateTensors(); |
255 | |
256 | // Invoke the subgraph (run the whole graph in dependency order). |
257 | // |
258 | // NOTE: It is possible that the interpreter is not in a ready state |
259 | // to evaluate (i.e. if a ResizeTensor() has been performed without an |
260 | // AllocateTensors(). |
261 | // Returns status of success or failure. |
262 | TfLiteStatus Invoke(); |
263 | |
264 | // Entry point for C node plugin API to report an error. |
265 | void ReportError(const char* format, ...); |
266 | |
267 | // Return the subgraph specific context. |
268 | TfLiteContext* context() { return &context_; } |
269 | const TfLiteContext* context() const { return &context_; } |
270 | |
271 | // Set the value of an external context. |
272 | void SetExternalContext(TfLiteExternalContextType type, |
273 | TfLiteExternalContext* ctx); |
274 | // Get the half precision flag. |
275 | // WARNING: This is an experimental API and subject to change. |
276 | bool GetAllowFp16PrecisionForFp32() const { |
277 | return context_.allow_fp32_relax_to_fp16; |
278 | } |
279 | |
280 | // Sets the cancellation function pointer in order to cancel a request in the |
281 | // middle of a call to Invoke(). The interpreter queries this function during |
282 | // inference, between op invocations; when it returns true, the interpreter |
283 | // will abort execution and return `kTfLiteError`. The `data` parameter |
284 | // contains any data used by the cancellation function, and if non-null, |
285 | // remains owned by the caller. |
286 | // WARNING: This is an experimental API and subject to change. |
287 | void SetCancellationFunction(void* data, bool (*check_cancelled_func)(void*)); |
288 | |
289 | // Ensure the data in `tensor.data` is readable. In case delegate is used, |
290 | // it might require to copy the data from delegate buffer to raw memory. |
291 | // WARNING: This is an experimental API and subject to change. |
292 | TfLiteStatus EnsureTensorDataIsReadable(int tensor_index); |
293 | |
294 | // The default capacity of `tensors_` vector. |
295 | static constexpr int kTensorsReservedCapacity = 128; |
296 | // The capacity headroom of `tensors_` vector before calling ops' |
297 | // `prepare` and `invoke` function. In these functions, it's guaranteed |
298 | // allocating up to `kTensorsCapacityHeadroom` more tensors won't invalidate |
299 | // pointers to existing tensors. |
300 | static constexpr int kTensorsCapacityHeadroom = 16; |
301 | |
302 | // Reset all variable tensors to the default value. |
303 | // If a variable tensor doesn't have a buffer, reset it to zero. |
304 | // TODO(b/115961645): Implement - If a variable tensor has a buffer, reset it |
305 | // to the value of the buffer. |
306 | // WARNING: This is an experimental API and subject to change. |
307 | TfLiteStatus ResetVariableTensors(); |
308 | |
309 | void SetProfiler(Profiler* profiler, int associated_subgraph_idx) { |
310 | if (!profiler) { |
311 | profiler_.reset(nullptr); |
312 | context_.profiler = nullptr; |
313 | } else { |
314 | profiler_ = std::make_unique<SubgraphAwareProfiler>( |
315 | profiler, associated_subgraph_idx); |
316 | context_.profiler = profiler_.get(); |
317 | } |
318 | } |
319 | |
320 | Profiler* GetProfiler() { return profiler_.get(); } |
321 | |
322 | // Returns a pointer to vector of subgraphs. |
323 | // WARNING: This is an experimental API and subject to change. |
324 | std::vector<std::unique_ptr<Subgraph>>* GetSubgraphs() { return subgraphs_; } |
325 | |
326 | // Returns the location of this object within subgraphs_, or |
327 | // kInvalidSubgraphIndex if subgraphs_ is nullptr or *this is not |
328 | // represented *subgraphs_. |
329 | // WARNING: This is an experimental API and subject to |
330 | // change. |
331 | static constexpr int kInvalidSubgraphIndex = -1; |
332 | int GetSubgraphIndex() const { return subgraph_index_; } |
333 | |
334 | // True if all tensors in the graph has static size after calling |
335 | // `AllocateTensors` function. |
336 | // Before `AllocateTensors` is called, this will always return true; |
337 | bool HasDynamicTensors() { return has_dynamic_tensors_; } |
338 | |
339 | // Assigns (or reassigns) a custom memory allocation for the given tensor. |
340 | // `flags` is a bitmask, see TfLiteCustomAllocationFlags. |
341 | // The runtime does NOT take ownership of the underlying memory. |
342 | // |
343 | // NOTE: User needs to call AllocateTensors() after this. |
344 | // Invalid/insufficient buffers will cause an error during AllocateTensors or |
345 | // Invoke (in case of dynamic shapes in the graph). |
346 | // |
347 | // Parameters should satisfy the following conditions: |
348 | // 1. tensor->allocation_type == kTfLiteArenaRw or kTfLiteArenaRwPersistent |
349 | // In general, this is true for I/O tensors & variable tensors. |
350 | // 2. allocation->data has the appropriate permissions for runtime access |
351 | // (Read-only for inputs, Read-Write for others), and outlives Interpreter. |
352 | // 3. allocation->bytes >= tensor->bytes. |
353 | // This condition is checked again if any tensors are resized. |
354 | // 4. allocation->data should be aligned to kDefaultTensorAlignment |
355 | // defined in lite/util.h. (Currently 64 bytes) |
356 | // This check is skipped if kTfLiteCustomAllocationFlagsSkipAlignCheck is |
357 | // set through `flags`. |
358 | // TODO(b/182215910): Expand on this documentation in a g3doc. |
359 | // |
360 | // WARNING: This is an experimental interface that is subject to change. |
361 | TfLiteStatus SetCustomAllocationForTensor( |
362 | int tensor_index, const TfLiteCustomAllocation& allocation, |
363 | int64_t flags = kTfLiteCustomAllocationFlagsNone); |
364 | |
365 | void SetName(const char* name); |
366 | const std::string& GetName() const; |
367 | |
368 | // WARNING: This is an experimental API and subject to change. |
369 | // Dumps debugging info by the underlying memory planner. |
370 | // Note: to have minimal binary increase caused by this debug info dump for |
371 | // the TfLite library and allow users to plug-in their own memory planner |
372 | // debugger, we have utilized weak symbols to meet these two requirements. By |
373 | // default, there is no debugging info dumped. However, if the TfLite-provided |
374 | // lite:simple_memory_arena_debug_dump (i.e. containing the strong defintion) |
375 | // is linked to the program, calling this function will output memory usage |
376 | // information about tenosrs and ops. |
377 | void DumpMemoryPlannerDebugInfo() const; |
378 | |
379 | typedef struct SubgraphAllocInfo { |
380 | size_t arena_size; |
381 | size_t arena_persist_size; |
382 | size_t dynamic_size; |
383 | size_t resource_size; |
384 | } SubgraphAllocInfo; |
385 | |
386 | // WARNING: This is an experimental API and subject to change. |
387 | // Returns memory allocation status. |
388 | void GetMemoryAllocInfo(SubgraphAllocInfo* alloc_info) const; |
389 | |
390 | // WARNING: This is an experimental API and subject to change. |
391 | // Set the given `InterpreterOptions` object. |
392 | void SetOptions(InterpreterOptions* options) { options_ = options; } |
393 | |
394 | // WARNING: This is an experimental API and subject to change. |
395 | // True if all intermediates tensors should be preserved for debugging. |
396 | bool ShouldPreserveAllTensors() const { |
397 | return (options_ && options_->GetPreserveAllTensors()); |
398 | } |
399 | |
400 | // WARNING: This is an experimental API and subject to change. |
401 | // True if all intermediate dynamic tensors should be released once they are |
402 | // not used by the model. |
403 | bool ShouldReleaseDynamicTensors() const { |
404 | return (options_ && options_->GetEnsureDynamicTensorsAreReleased()); |
405 | } |
406 | |
407 | /// WARNING: This is an experimental API and subject to change. |
408 | /// Use dynamic tensor allocation and deallocation method for large tensors |
409 | /// instead of static memory planner. Dynamic tensors are allocated just |
410 | /// before when they're needed and released when they're not needed anymore. |
411 | /// It improves peak memory usage but there could be some latency impact. The |
412 | /// parameter `large_tensors_thresholds_in_bytes` is used to determine large |
413 | /// tensors. This API must be called before `AllocateTensors`. |
414 | void OptimizeMemoryForLargeTensors(int large_tensors_thresholds_in_bytes); |
415 | |
416 | // WARNING: This is an experimental API and subject to change. |
417 | // True if dynamic tensor allocation / deallocation method is enabled by |
418 | // `OptimizeMemoryForLargeTensors` API. |
419 | bool ShouldOptimizeMemoryForLargeTensors() { |
420 | return (options_ && (options_->GetDynamicAllocationForLargeTensors() > 0)); |
421 | } |
422 | |
423 | // WARNING: This is an experimental API and subject to change. |
424 | // Remove unused inputs of the subgraph. It checks usage of inputs and mark it |
425 | // as kTfLiteOptionalTensor if the input is not used in graph execution. |
426 | // Currently, it's used to remove unused inputs of WHILE cond subgraphs. |
427 | TfLiteStatus RemoveUnusedInputs(); |
428 | |
429 | private: |
430 | friend class InterpreterBuilder; |
431 | friend class TestDelegate; |
432 | // SubgraphAwareProfiler wraps an actual TFLite profiler, such as a |
433 | // BufferedProfiler instance, and takes care of event profiling/tracing in a |
434 | // certain subgraph. |
435 | class SubgraphAwareProfiler : public Profiler { |
436 | public: |
437 | // Constructor should be called with the non-nullptr profiler argument. |
438 | SubgraphAwareProfiler(Profiler* profiler, int64_t subgraph_index) |
439 | : profiler_(profiler), subgraph_index_(subgraph_index) {} |
440 | ~SubgraphAwareProfiler() override {} |
441 | |
442 | uint32_t BeginEvent(const char* tag, EventType event_type, |
443 | int64_t event_metadata1, |
444 | int64_t event_metadata2) override { |
445 | if (!profiler_) return 0; |
446 | return profiler_->BeginEvent(tag, event_type, event_metadata1, |
447 | subgraph_index_); |
448 | } |
449 | |
450 | void EndEvent(uint32_t event_handle) override { |
451 | if (!profiler_) return; |
452 | profiler_->EndEvent(event_handle); |
453 | } |
454 | |
455 | void EndEvent(uint32_t event_handle, int64_t event_metadata1, |
456 | int64_t event_metadata2) override { |
457 | if (!profiler_) return; |
458 | profiler_->EndEvent(event_handle, event_metadata1, event_metadata2); |
459 | } |
460 | |
461 | void AddEvent(const char* tag, EventType event_type, uint64_t elapsed_time, |
462 | int64_t event_metadata1, int64_t event_metadata2) override { |
463 | if (!profiler_) return; |
464 | profiler_->AddEvent(tag, event_type, elapsed_time, event_metadata1, |
465 | subgraph_index_); |
466 | } |
467 | |
468 | private: |
469 | // Not own the memory. |
470 | Profiler* const profiler_; |
471 | const int64_t subgraph_index_; |
472 | }; |
473 | |
474 | // Ensure the internal node storage memory allocates at least `count` |
475 | // spots for node. NOTE, this doesn't actually add operators. This is an |
476 | // efficiency optimization that is subject to change. |
477 | // Note: Only used during initialization. |
478 | void ReserveNodes(int count); |
479 | |
480 | // Overrides execution plan. This bounds checks indices sent in. |
481 | // Note: Only used during initialization. |
482 | TfLiteStatus SetExecutionPlan(const std::vector<int>& new_plan); |
483 | |
484 | // Prevent 'context_' from accessing functions that are only available to |
485 | // delegated kernels. |
486 | void SwitchToKernelContext(); |
487 | |
488 | // Add delegate-only functions to 'context_'. |
489 | void SwitchToDelegateContext(); |
490 | |
491 | // Give 'op_reg' a chance to initialize itself using the contents of |
492 | // 'buffer'. If registration_external is valid, use the 'init' callback from |
493 | // that. |
494 | void* OpInit(const TfLiteRegistration& op_reg, const char* buffer, |
495 | size_t length); |
496 | |
497 | // Let 'op_reg' release any memory it might have allocated via 'OpInit'. |
498 | // If registration_external is valid, use the 'free' callback from that. |
499 | void OpFree(const TfLiteRegistration& op_reg, void* buffer); |
500 | |
501 | // Prepare the given 'node' for execution. |
502 | TfLiteStatus OpPrepare(const TfLiteRegistration& op_reg, TfLiteNode* node); |
503 | |
504 | // Invoke the operator represented by 'node'. |
505 | TfLiteStatus OpInvoke(const TfLiteRegistration& op_reg, TfLiteNode* node); |
506 | |
507 | // Call OpPrepare() for as many ops as possible, allocating memory for their |
508 | // tensors. If an op containing dynamic tensors is found, preparation will be |
509 | // postponed until this function is called again. This allows the interpreter |
510 | // to wait until Invoke() to resolve the sizes of dynamic tensors. |
511 | TfLiteStatus PrepareOpsAndTensors(); |
512 | |
513 | // Call OpPrepare() for all ops starting at 'first_node'. Stop when a |
514 | // dynamic tensors is found or all ops have been prepared. Fill |
515 | // 'last_node_prepared' with the id of the op containing dynamic tensors, or |
516 | // the last in the graph. |
517 | TfLiteStatus PrepareOpsStartingAt(int first_execution_plan_index, |
518 | const std::vector<int>& execution_plan, |
519 | int* last_execution_plan_index_prepared); |
520 | |
521 | // Tensors needed by the interpreter. Use `AddTensors` to add more blank |
522 | // tensor entries. Note, `tensors_.data()` needs to be synchronized to the |
523 | // `context_` whenever this std::vector is reallocated. Currently this |
524 | // only happens in `AddTensors()`. |
525 | std::vector<TfLiteTensor> tensors_; |
526 | |
527 | // Check if an array of tensor indices are valid with respect to the Tensor |
528 | // array. |
529 | // NOTE: this changes consistent_ to be false if indices are out of bounds. |
530 | TfLiteStatus CheckTensorIndices(const char* label, const int* indices, |
531 | int length); |
532 | |
533 | // Check that the input indices and the output indices don't overlap. |
534 | // This is needed because same tensor must not be used both as input and |
535 | // output for an operator. |
536 | // NOTE: this changes consistent_ to be false if indices are out of bounds. |
537 | TfLiteStatus CheckInputAndOutputForOverlap(const int* input_indices, |
538 | int num_inputs, |
539 | const int* output_indices, |
540 | int num_outputs); |
541 | |
542 | // Compute the number of bytes required to represent a tensor with dimensions |
543 | // specified by the array dims (of length dims_size). Returns the status code |
544 | // and bytes. |
545 | TfLiteStatus BytesRequired(TfLiteType type, const int* dims, size_t dims_size, |
546 | size_t* bytes); |
547 | |
548 | // Request an tensor be resized implementation. If the given tensor is of |
549 | // type kTfLiteDynamic it will also be allocated new memory. |
550 | TfLiteStatus ResizeTensorImpl(TfLiteTensor* tensor, TfLiteIntArray* new_size); |
551 | |
552 | // Report a detailed error string (will be printed to stderr). |
553 | void ReportErrorImpl(const char* format, va_list args); |
554 | |
555 | // Entry point for C node plugin API to request an tensor be resized. |
556 | static TfLiteStatus ResizeTensor(TfLiteContext* context, TfLiteTensor* tensor, |
557 | TfLiteIntArray* new_size); |
558 | // Entry point for C node plugin API to report an error. |
559 | static void ReportErrorC(TfLiteContext* context, const char* format, ...); |
560 | |
561 | // Entry point for C node plugin API to add new tensors. |
562 | static TfLiteStatus AddTensors(TfLiteContext* context, int tensors_to_add, |
563 | int* first_new_tensor_index); |
564 | |
565 | // WARNING: This is an experimental API and subject to change. |
566 | // Entry point for C API ReplaceNodeSubsetsWithDelegateKernels |
567 | static TfLiteStatus ReplaceNodeSubsetsWithDelegateKernels( |
568 | TfLiteContext* context, TfLiteRegistration registration, |
569 | const TfLiteIntArray* nodes_to_replace, TfLiteDelegate* delegate); |
570 | |
571 | // Update the execution graph to replace some of the nodes with stub |
572 | // nodes. Specifically any node index that has `nodes[index]==1` will be |
573 | // slated for replacement with a delegate kernel specified by registration. |
574 | // Ownership of 'nodes_to_replace' and 'delegate' remains with the caller. |
575 | // WARNING: This is an experimental interface that is subject to change. |
576 | TfLiteStatus ReplaceNodeSubsetsWithDelegateKernels( |
577 | TfLiteRegistration registration, const TfLiteIntArray* nodes_to_replace, |
578 | TfLiteDelegate* delegate); |
579 | |
580 | // WARNING: This is an experimental interface that is subject to change. |
581 | // Gets the internal pointer to a TensorFlow lite node by node_index. |
582 | TfLiteStatus GetNodeAndRegistration(int node_index, TfLiteNode** node, |
583 | TfLiteRegistration** registration); |
584 | |
585 | // WARNING: This is an experimental interface that is subject to change. |
586 | // Entry point for C node plugin API to get a node by index. |
587 | static TfLiteStatus GetNodeAndRegistration(struct TfLiteContext*, |
588 | int node_index, TfLiteNode** node, |
589 | TfLiteRegistration** registration); |
590 | |
591 | // WARNING: This is an experimental interface that is subject to change. |
592 | // Gets an TfLiteIntArray* representing the execution plan. The interpreter |
593 | // owns this memory and it is only guaranteed to exist during the invocation |
594 | // of the delegate prepare. |
595 | TfLiteStatus GetExecutionPlan(TfLiteIntArray** execution_plan); |
596 | |
597 | // WARNING: This is an experimental interface that is subject to change. |
598 | // Entry point for C node plugin API to get the execution plan. |
599 | static TfLiteStatus GetExecutionPlan(struct TfLiteContext* context, |
600 | TfLiteIntArray** execution_plan); |
601 | |
602 | // WARNING: This is an experimental interface that is subject to change. |
603 | // Provides a preview of post-delegation partitioning. Each |
604 | // TfLiteDelegateParams in the referenced array corresponds to one instance of |
605 | // the delegate kernel. |
606 | // nodes_to_replace should point to a valid array. partition_params_array & |
607 | // num_partitions should be non-null. |
608 | // Memory allocated by this method is automatically released with another call |
609 | // to PreviewDelegateParitioning, or after TfLiteDelegate::Prepare is done. |
610 | TfLiteStatus PreviewDelegatePartitioning( |
611 | const TfLiteIntArray* nodes_to_replace, |
612 | TfLiteDelegateParams** partition_params_array, int* num_partitions); |
613 | |
614 | // WARNING: This is an experimental interface that is subject to change. |
615 | // Entry point for C node plugin API to preview delegation partitioning. |
616 | static TfLiteStatus PreviewDelegatePartitioning( |
617 | struct TfLiteContext* context, const TfLiteIntArray* nodes_to_replace, |
618 | TfLiteDelegateParams** partition_params_array, int* num_partitions); |
619 | |
620 | // Retrieves named metadata from the TFLite model. Returns kTfLiteOk if |
621 | // metadata is successfully obtained. |
622 | // See the Metadata table in TFLite schema. |
623 | TfLiteStatus GetModelMetadata(const char* name, const char** ptr, |
624 | size_t* bytes); |
625 | |
626 | // Entry point for C node plugin API to get model metadata based on name. |
627 | static TfLiteStatus GetModelMetadata(const struct TfLiteContext* context, |
628 | const char* name, const char** ptr, |
629 | size_t* bytes); |
630 | |
631 | // Used to clear partitioning_preview_cache_, in case |
632 | // PreviewDelegatePartitioning was called. |
633 | void FreeDelegatePartitioningData(); |
634 | |
635 | // Retrieve an existing external context by type. |
636 | TfLiteExternalContext* GetExternalContext(TfLiteExternalContextType type); |
637 | static TfLiteExternalContext* GetExternalContext( |
638 | struct TfLiteContext* context, TfLiteExternalContextType type); |
639 | |
640 | // Set the value of an external context. |
641 | static void SetExternalContext(struct TfLiteContext* context, |
642 | TfLiteExternalContextType type, |
643 | TfLiteExternalContext* ctx); |
644 | |
645 | // WARNING: This is an experimental API and subject to change. |
646 | // Allow a delegate to look at the graph and modify the graph to handle |
647 | // parts of the graph themselves. After this is called, the graph may |
648 | // contain new nodes that replace 1 more nodes. |
649 | // NOTE: If tensors were allocated prior to delegate application, they will |
650 | // be reallocated if the graph was modified (i.e., the caller does *not* need |
651 | // to explicitly call |AllocateTensors()| again). If tensors were unallocated, |
652 | // they will remain unallocated after delegate application. |
653 | // Returns one of the following status codes: |
654 | // 1. kTfLiteOk: Delegation succeeded |
655 | // 2. kTfLiteDelegateError: Delegation failed due to an error *in the |
656 | // delegate*, or the delegate parameter was null. The Subgraph has been |
657 | // restored to its pre-delegation state. |
658 | // NOTE: This reverts all delegates previously applied to the Subgraph. |
659 | // 3. kTfLiteApplicationError : Delegation failed to be applied due to the |
660 | // incompatibility with the TF Lite runtime, e.g., the model graph is already |
661 | // immutable when applying the delegate. However, the Subgraph is still in a |
662 | // invokable state. |
663 | // 4. kTfLiteUnresolvedOps: Delegation failed because the model has an |
664 | // operator that cannot be resolved. This can happen when the op is not |
665 | // registered or built with the TF Lite framework. |
666 | // 5. kTfLiteError: Unexpected/runtime failure. |
667 | TfLiteStatus ModifyGraphWithDelegate(TfLiteDelegate* delegate); |
668 | |
669 | // This un-applies all delegates that have been applied till now, but retains |
670 | // pointers to them. |
671 | // The old execution plan and nodes are restored. |
672 | TfLiteStatus UndoAllDelegates(); |
673 | |
674 | // This re-applies all delegates that were undone. |
675 | // Does nothing if UndoAllDelegates wasn't previously called. |
676 | TfLiteStatus RedoAllDelegates(); |
677 | |
678 | // This removes all delegates. |
679 | // The old execution plan and nodes are restored. The graph is invokable |
680 | // afterwards. |
681 | TfLiteStatus RemoveAllDelegates(); |
682 | |
683 | // Returns true if the subgraph has delegates applied. |
684 | bool HasDelegates(); |
685 | |
686 | // Returns true if the subgraph has been fully delegated. |
687 | bool IsFullyDelegated() const; |
688 | |
689 | // Cleanups up data reserved for the given node. Does not remove the {node, |
690 | // registration} pair from nodes_and_registrations_. |
691 | void CleanupNode(int node_index); |
692 | |
693 | // Ensures that `tensors_` has at least `kTensorsCapacityHeadroom` extra |
694 | // capacity. Calling this function may invalidate existing pointers to |
695 | // tensors. After calling this function, adding `kTensorsCapacityHeadroom` |
696 | // more tensors won't invalidate the pointer to existing tensors. |
697 | void EnsureTensorsVectorCapacity(); |
698 | |
699 | // Ensures the memory required is planned and allocated. |
700 | TfLiteStatus EnsureMemoryAllocations(); |
701 | |
702 | // Returns true if cancellation function returns true. |
703 | bool IsCancelled(); |
704 | |
705 | // Returns true if 'node' could have side effect (e.g. stateful op). |
706 | // Note that any node that might update other tensors beside op's output |
707 | // are considered to have side effect. |
708 | // So control flow ops like 'If' and 'While' are considered to have |
709 | // side effect because they can have ops that have side effect in the |
710 | // condition and body subgraphs. |
711 | bool OpMightHaveSideEffect(const TfLiteNode* node, |
712 | const TfLiteRegistration* registration) const; |
713 | |
714 | // Returns new GraphInfo object based on the current Subgraph. |
715 | std::unique_ptr<GraphInfo> CreateGraphInfo(); |
716 | |
717 | // Store a ptr to the model metadata owned by the Interpreter. |
718 | // Since the lifetime of the Interpreter exceeds the Subgraph, metadata |
719 | // remains valid for the latter's lifetime. |
720 | // Also sets relevant fields on context_ based on known metadata. |
721 | TfLiteStatus SetMetadata(const std::map<std::string, std::string>* metadata); |
722 | |
723 | // Initializes the mapping between tensor index to the index of the |
724 | // last operation that uses the tensor as input. |
725 | void InitializeTensorReleaseMap(); |
726 | |
727 | // May allocate dynamic tensor memory of node outputs. It's used when |
728 | // `EnsureDynamicTensorsAreReleased` or`UseDynamicAllocationForLargeTensors` |
729 | // API is used. |
730 | TfLiteStatus MayAllocateOpOutput(TfLiteNode* node); |
731 | |
732 | // Checks the options for releasing dynamic tensors and release dynamic |
733 | // tensors if configured. |
734 | void MaybeReleaseDynamicTensors(const TfLiteNode& node, size_t node_index); |
735 | |
736 | // The state of the Subgraph. |
737 | enum State { |
738 | // The Subgraph isn't ready to be invoked. |
739 | // `AllocateTensor` need to be called to enter an invokable state. |
740 | kStateUninvokable = 0, |
741 | // The Subgraph is ready to be invoked. |
742 | kStateInvokable, |
743 | // The Subgraph is ready to be invoked, and graph can't be further |
744 | // modified. The Subgraph will enter this state when calling |
745 | // `ModifyGraphWithDelegate` and the delegate doesn't support dynamic |
746 | // tensors. |
747 | kStateInvokableAndImmutable, |
748 | }; |
749 | State state_ = kStateUninvokable; |
750 | |
751 | // A pure C data structure used to communicate with the pure C plugin |
752 | // interface. To avoid copying tensor metadata, this is also the definitive |
753 | // structure to store tensors. |
754 | TfLiteContext context_ = {}; |
755 | |
756 | // A pointer to the external contexts (kTfLiteMaxExternalContexts) array that |
757 | // sits inside the associated TFLite interpreter instance. |
758 | TfLiteExternalContext** external_contexts_; |
759 | |
760 | // Node inputs/outputs are stored in TfLiteNode and TfLiteRegistration stores |
761 | // function pointers to actual implementation. |
762 | // Nodes should appear in the order in which they are instantiated at runtime. |
763 | // Delegated nodes are appended after all the original ones. |
764 | std::vector<std::pair<TfLiteNode, TfLiteRegistration>> |
765 | nodes_and_registration_; |
766 | |
767 | // Whether the model is consistent. That is to say if the inputs and outputs |
768 | // of every node and the global inputs and outputs are valid indexes into |
769 | // the tensor array. |
770 | bool consistent_ = true; |
771 | |
772 | // Array of indices representing the tensors that are inputs to the |
773 | // interpreter. |
774 | std::vector<int> inputs_; |
775 | |
776 | // Array of indices representing the tensors that are outputs to the |
777 | // interpreter. |
778 | std::vector<int> outputs_; |
779 | |
780 | // Array of indices representing the tensors that are variable tensors. |
781 | std::vector<int> variables_; |
782 | |
783 | // The error reporter delegate that tflite will forward queries errors to. |
784 | ErrorReporter* error_reporter_; |
785 | |
786 | // Index of the next node to prepare. |
787 | // During Invoke(), Interpreter will allocate input tensors first, which are |
788 | // known to be fixed size. Then it will allocate outputs from nodes as many |
789 | // as possible. When there is a node that produces dynamic sized tensor. |
790 | // Interpreter will stop allocating tensors, set the value of next allocate |
791 | // node id, and execute the node to generate the output tensor before continue |
792 | // to allocate successors. This process repeats until all nodes are executed. |
793 | // NOTE: this relies on the order of nodes that is in topological order. |
794 | int next_execution_plan_index_to_prepare_; |
795 | |
796 | // Only used in cases where a delegate supporting dynamic tensors is applied. |
797 | // This helps prepare the original execution before the post-delegation one, |
798 | // so that tensor shapes propagate. |
799 | int next_original_execution_plan_index_to_prepare_; |
800 | |
801 | // This is similar to `next_execution_plan_index_to_prepare_`, but it tracks |
802 | // which nodes' allocation is planned with the arena planner. |
803 | // |
804 | // This is a workaround for b/127354079. It shouldn't be necessary if |
805 | // ArenaPlanner can "rewind" to a specific point. |
806 | // TODO(b/127354079): Improve ArenaPlanner and remove this mechanism. |
807 | int next_execution_plan_index_to_plan_allocation_; |
808 | |
809 | // WARNING: This is an experimental interface that is subject to change. |
810 | // This is a list of node indices (to index into nodes_and_registration). |
811 | // This represents a valid topological sort (dependency ordered) execution |
812 | // plan. In particular, it is valid for this ordering to contain only a |
813 | // subset of the node indices. |
814 | std::vector<int> execution_plan_; |
815 | |
816 | // This is a copy of the first execution_plan_ before any delegates were |
817 | // applied. It is empty if no delegates were applied to this Subgraph. |
818 | std::vector<int> pre_delegation_execution_plan_; |
819 | |
820 | // Contains a list of delegates applied by the user so far, in order. |
821 | std::vector<TfLiteDelegate*> delegates_applied_; |
822 | |
823 | // Set to true if UndoAllDelegates was called, and to false during |
824 | // RedoAllDelegates. |
825 | bool delegates_undone_ = false; |
826 | |
827 | // In the future, we'd like a TfLiteIntArray compatible representation. |
828 | // TODO(aselle): replace execution_plan_ with this. |
829 | std::unique_ptr<TfLiteIntArray, TfLiteIntArrayDeleter> plan_cache_; |
830 | |
831 | // Used by PreviewDelegateParitioning. |
832 | std::vector<TfLiteDelegateParams> partitioning_preview_cache_; |
833 | |
834 | std::unique_ptr<MemoryPlanner> memory_planner_; |
835 | |
836 | // Maps tensor index to custom allocation for all applicable tensors. |
837 | std::map<int, TfLiteCustomAllocation> custom_allocations_; |
838 | |
839 | // Tracking bit for whether a tensor was resized in the course of an op |
840 | // invocation. This is a useful hint to ensure that dynamic tensor outputs |
841 | // trigger downstream reallocation after op invocation. |
842 | bool tensor_resized_since_op_invoke_ = false; |
843 | |
844 | // Profiler for this interpreter instance. |
845 | std::unique_ptr<SubgraphAwareProfiler> profiler_; |
846 | |
847 | // A pointer to vector of subgraphs. The vector is owned by the interpreter. |
848 | std::vector<std::unique_ptr<Subgraph>>* subgraphs_ = nullptr; |
849 | |
850 | // Location of the pointer to *this in *subgraphs_, or kInvalidSubgraphIndex. |
851 | const int subgraph_index_; |
852 | |
853 | // True if not all tensors in the graph has static size after calling |
854 | // `PrepareOpsStartingAt` function (which is called by the `AllocateTensors` |
855 | // public function). |
856 | // The value is invalid before `PrepareOpStartingAt` is called. |
857 | bool has_dynamic_tensors_ = true; |
858 | |
859 | // WARNING: This is an experimental interface that is subject to change. |
860 | // This is the index of dynamic tensor which was checked at |
861 | // PrepareOpsStartingAt() when `has_dynamic_tensors_` is set. This information |
862 | // is kept only for user error message. |
863 | int dynamic_tensor_index_ = -1; |
864 | |
865 | // Reference to cancellation function that can cancel a request in the middle |
866 | // of a call to Invoke(). When this function returns True, a kTfLiteError is |
867 | // thrown by Invoke(). |
868 | bool (*check_cancelled_func_)(void*) = nullptr; |
869 | |
870 | // Reference to data used by the cancellation function in |
871 | // `check_cancelled_func_`. |
872 | void* cancellation_data_ = nullptr; |
873 | |
874 | // A map of resources. Owned by interpreter and shared by multiple subgraphs. |
875 | resource::ResourceMap* resources_ = nullptr; |
876 | |
877 | // A map of resources IDs. Owned by interpreter and shared by multiple |
878 | // subgraphs. |
879 | resource::ResourceIDMap* resource_ids_ = nullptr; |
880 | |
881 | // A map of initialization statuses, that indicate whether the intialization |
882 | // subgraph invocation is done or not. |
883 | resource::InitializationStatusMap* initialization_status_map_; |
884 | |
885 | // Name of the subgraph (analogous to function name). |
886 | std::string name_; |
887 | |
888 | // Model-metadata owned by the Interpreter. |
889 | const std::map<std::string, std::string>* metadata_ = nullptr; |
890 | |
891 | // Mapping between tensor index to the last index of the execution plan that |
892 | // uses this tensor. |
893 | std::map<int, int> tensor_to_last_op_index_; |
894 | |
895 | // `InterpreterOptions` object which is being used and owned by Interpreter. |
896 | InterpreterOptions* options_; |
897 | }; |
898 | |
899 | } // namespace tflite |
900 | #endif // TENSORFLOW_LITE_CORE_SUBGRAPH_H_ |
901 | |