1 | /* Copyright 2017 The TensorFlow Authors. All Rights Reserved. |
2 | |
3 | Licensed under the Apache License, Version 2.0 (the "License"); |
4 | you may not use this file except in compliance with the License. |
5 | You may obtain a copy of the License at |
6 | |
7 | http://www.apache.org/licenses/LICENSE-2.0 |
8 | |
9 | Unless required by applicable law or agreed to in writing, software |
10 | distributed under the License is distributed on an "AS IS" BASIS, |
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | See the License for the specific language governing permissions and |
13 | limitations under the License. |
14 | ==============================================================================*/ |
15 | #ifndef TENSORFLOW_LITE_ARENA_PLANNER_H_ |
16 | #define TENSORFLOW_LITE_ARENA_PLANNER_H_ |
17 | |
18 | #include <cstdint> |
19 | #include <memory> |
20 | #include <unordered_set> |
21 | #include <vector> |
22 | |
23 | #include "tensorflow/lite/c/common.h" |
24 | #include "tensorflow/lite/graph_info.h" |
25 | #include "tensorflow/lite/memory_planner.h" |
26 | #include "tensorflow/lite/simple_memory_arena.h" |
27 | #include "tensorflow/lite/util.h" |
28 | |
29 | namespace tflite { |
30 | |
31 | constexpr const int kDefaultArenaAlignment = 64; |
32 | struct AllocationInfo; |
33 | |
34 | // A memory planner that makes all the allocations using arenas. |
35 | // |
36 | // Before a model is executed by the interpreter, this class determines when |
37 | // each tensor needs to be allocated and deallocated, and preallocates all the |
38 | // necessary memory (the PlanAllocations phase). It then assigns portions of |
39 | // this memory buffer to each tensor (the ExecuteAllocations phase). Tensors may |
40 | // share some of the buffer if a tensor B is to be allocated after another |
41 | // tensor A has been deallocated. |
42 | // |
43 | // If dynamic tensors are used the planning steps can be repeated during model |
44 | // execution. Since dynamic tensors don't have sizes until after the |
45 | // corresponding operation is executed, this class supports incremental |
46 | // planning. |
47 | class ArenaPlanner : public MemoryPlanner { |
48 | public: |
49 | // Ownership of 'context' is not taken and it must remain util the |
50 | // ArenaPlanner is destroyed. The inputs to the graph will not share |
51 | // memory with any other tensor, effectively preserving them until the end |
52 | // of inference. |
53 | ArenaPlanner(TfLiteContext* context, std::unique_ptr<GraphInfo> graph_info, |
54 | bool preserve_all_tensors, int tensor_alignment, |
55 | int subgraph_index = 0); |
56 | ~ArenaPlanner() override; |
57 | ArenaPlanner(const ArenaPlanner&) = delete; |
58 | ArenaPlanner& operator=(const ArenaPlanner&) = delete; |
59 | |
60 | TfLiteStatus ResetAllocations() override; |
61 | TfLiteStatus ResetAllocationsAfter(int node) override; |
62 | TfLiteStatus PlanAllocations() override; |
63 | TfLiteStatus ExecuteAllocations(int first_node, int last_node) override; |
64 | TfLiteStatus ReleaseNonPersistentMemory() override; |
65 | TfLiteStatus AcquireNonPersistentMemory() override; |
66 | bool HasNonPersistentMemory() override; |
67 | void DumpDebugInfo(const std::vector<int>& execution_plan) const override; |
68 | void GetAllocInfo(size_t* arena_size, |
69 | size_t* arena_persist_size) const override; |
70 | |
71 | // Returns the base arena location for a given allocation type. |
72 | std::intptr_t BasePointer(TfLiteAllocationType type); |
73 | |
74 | private: |
75 | // Make sure all the arenas have reserved enough memory to store all their |
76 | // tensors. |
77 | TfLiteStatus Commit(bool* arena_reallocated); |
78 | |
79 | // Sorts tensors_to_allocate` using by the following ordering: |
80 | // - Tensors that have lifespan through the whole model inference time go |
81 | // first; |
82 | // - Other tensors (e.g. intermediate and temporary ones) are sorted from |
83 | // largest to smallest. For equal sized tensors, the tensor which is used |
84 | // first goes first. |
85 | void CreateTensorAllocationVector(std::vector<int32_t>* tensors_to_allocate); |
86 | |
87 | // Returns vector containing the indices of all tensors allocated between |
88 | // `first_node` and `last_node`. |
89 | std::vector<int32_t> GetTensorsToAllocate(int first_node, int last_node); |
90 | |
91 | // Traverse the allocation queue and reserve space in the appropriate arena |
92 | // for all tensors affected by ops in the interval [first_node, last_node]. |
93 | TfLiteStatus CalculateAllocations(int first_node, int last_node, |
94 | std::vector<int32_t>* tensors_allocated); |
95 | |
96 | // Assign absolute memory location to a tensor, based on its relative |
97 | // position inside the corresponding arena buffer. |
98 | TfLiteStatus ResolveTensorAllocation(int32_t tensor_index, |
99 | TfLiteTensor& tensor); |
100 | |
101 | // Register an allocation for all internal (temporary) tensors of |
102 | // 'node_index'. |
103 | TfLiteStatus CalculateAllocationOfInternalTensors(int node_index); |
104 | |
105 | // Register a deallocation for all internal (temporary) tensors of |
106 | // 'node_index'. |
107 | TfLiteStatus CalculateDeallocationOfInternalTensors(int node_index); |
108 | |
109 | TfLiteContext* context_; |
110 | std::unique_ptr<GraphInfo> graph_info_; |
111 | |
112 | // Stores allocation data for all tensors. |
113 | std::vector<ArenaAllocWithUsageInterval> allocs_; |
114 | |
115 | // Map of Tensors allocated by each node. |
116 | // NOLINTNEXTLINE - absl::flat_hash_set increases binary size by 106kB. |
117 | std::vector<std::unordered_set<int32_t>> nodes_to_tensors_; |
118 | |
119 | // First node, that uses the tensor. It needs to be allocated before |
120 | // execution of the node's operation. |
121 | std::vector<int32_t> alloc_node_; |
122 | |
123 | // Last node, that uses the tensor. It can be deallocated after execution of |
124 | // the node's operation. |
125 | std::vector<int32_t> dealloc_node_; |
126 | |
127 | // Raw memory buffer that is allocated for all temporary and graph outputs |
128 | // that are declared kTfLiteArenaRw. |
129 | SimpleMemoryArena arena_; |
130 | |
131 | // Raw memory buffer that is allocated for persistent tensors that are |
132 | // declared as kTfLiteArenaRwPersistent. |
133 | SimpleMemoryArena persistent_arena_; |
134 | |
135 | // If true, then no overlapping of memory areas is done, meaning intermediate |
136 | // tensors and temporary tensors can be queried after running. |
137 | // (modulo running delegates) |
138 | bool preserve_all_tensors_; |
139 | |
140 | // Number of bytes that tensor buffers should be aligned to. |
141 | int tensor_alignment_; |
142 | }; |
143 | |
144 | } // namespace tflite |
145 | |
146 | #endif // TENSORFLOW_LITE_ARENA_PLANNER_H_ |
147 | |