1 | /* Copyright 2017 The TensorFlow Authors. All Rights Reserved. |
2 | |
3 | Licensed under the Apache License, Version 2.0 (the "License"); |
4 | you may not use this file except in compliance with the License. |
5 | You may obtain a copy of the License at |
6 | |
7 | http://www.apache.org/licenses/LICENSE-2.0 |
8 | |
9 | Unless required by applicable law or agreed to in writing, software |
10 | distributed under the License is distributed on an "AS IS" BASIS, |
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | See the License for the specific language governing permissions and |
13 | limitations under the License. |
14 | ==============================================================================*/ |
15 | #include "tensorflow/lite/arena_planner.h" |
16 | |
17 | #include <stddef.h> |
18 | |
19 | #include <algorithm> |
20 | #include <cstdint> |
21 | #include <limits> |
22 | #include <memory> |
23 | #include <utility> |
24 | #include <vector> |
25 | |
26 | #include "tensorflow/lite/c/common.h" |
27 | #include "tensorflow/lite/graph_info.h" |
28 | #include "tensorflow/lite/simple_memory_arena.h" |
29 | |
30 | namespace tflite { |
31 | namespace { |
32 | |
33 | constexpr int32_t kNodeNotAssigned = std::numeric_limits<int32_t>::max(); |
34 | |
35 | } // namespace |
36 | |
37 | ArenaPlanner::ArenaPlanner(TfLiteContext* context, |
38 | std::unique_ptr<GraphInfo> graph_info, |
39 | bool preserve_all_tensors, int tensor_alignment, |
40 | int subgraph_index) |
41 | : context_(context), |
42 | graph_info_(std::move(graph_info)), |
43 | arena_(kDefaultArenaAlignment, subgraph_index), |
44 | persistent_arena_(kDefaultArenaAlignment, subgraph_index), |
45 | preserve_all_tensors_(preserve_all_tensors), |
46 | tensor_alignment_(tensor_alignment) {} |
47 | |
48 | ArenaPlanner::~ArenaPlanner() { |
49 | arena_.ReleaseBuffer(); |
50 | persistent_arena_.ReleaseBuffer(); |
51 | } |
52 | |
53 | std::intptr_t ArenaPlanner::BasePointer(TfLiteAllocationType type) { |
54 | if (type == kTfLiteArenaRwPersistent) { |
55 | return persistent_arena_.BasePointer(); |
56 | } |
57 | if (type == kTfLiteArenaRw) { |
58 | return arena_.BasePointer(); |
59 | } |
60 | return 0; |
61 | } |
62 | |
63 | TfLiteStatus ArenaPlanner::ResetAllocations() { |
64 | TF_LITE_ENSURE_STATUS(arena_.ClearPlan()); |
65 | TF_LITE_ENSURE_STATUS(persistent_arena_.ClearPlan()); |
66 | allocs_.clear(); |
67 | allocs_.resize(graph_info_->num_tensors()); |
68 | return kTfLiteOk; |
69 | } |
70 | |
71 | TfLiteStatus ArenaPlanner::ResetAllocationsAfter(int node) { |
72 | TfLiteTensor* tensors = graph_info_->tensors(); |
73 | for (int i = 0; i < static_cast<int>(allocs_.size()); ++i) { |
74 | if (allocs_[i].first_node > node && allocs_[i].size > 0) { |
75 | TfLiteTensor& tensor = tensors[i]; |
76 | if (tensor.allocation_type == kTfLiteArenaRw) { |
77 | allocs_[i].reset(); |
78 | tensor.data.raw = nullptr; |
79 | } |
80 | } |
81 | } |
82 | arena_.DeallocateAfter(node); |
83 | |
84 | return kTfLiteOk; |
85 | } |
86 | |
87 | TfLiteStatus ArenaPlanner::PlanAllocations() { |
88 | // Invalidate any existing data. |
89 | const size_t num_tensors = graph_info_->num_tensors(); |
90 | TF_LITE_ENSURE_STATUS(ResetAllocations()); |
91 | // Maybe other verb instead of 'Assigned' |
92 | alloc_node_.assign(num_tensors, kNodeNotAssigned); |
93 | dealloc_node_.assign(num_tensors, kNodeNotAssigned); |
94 | nodes_to_tensors_.clear(); |
95 | nodes_to_tensors_.resize( |
96 | std::max(graph_info_->num_execution_nodes(), (size_t)1), {}); |
97 | |
98 | // Keeps track of references to each tensor. |
99 | std::vector<int> refcounts(num_tensors, 0); |
100 | |
101 | auto allocate = [this](int node, int tensor) -> TfLiteStatus { |
102 | if (alloc_node_[tensor] != kNodeNotAssigned) { |
103 | // Tensor has already been allocated. |
104 | return kTfLiteOk; |
105 | } |
106 | TF_LITE_ENSURE(context_, dealloc_node_[tensor] == kNodeNotAssigned); |
107 | alloc_node_[tensor] = node; |
108 | return kTfLiteOk; |
109 | }; |
110 | |
111 | auto deallocate = [this](int node, int tensor) -> TfLiteStatus { |
112 | if (alloc_node_[tensor] == kNodeNotAssigned) { |
113 | // We don't need to deallocate the tensor, that is never allocated. |
114 | // This happened with the constant tensors. |
115 | return kTfLiteOk; |
116 | } |
117 | TF_LITE_ENSURE(context_, dealloc_node_[tensor] == kNodeNotAssigned); |
118 | dealloc_node_[tensor] = node; |
119 | return kTfLiteOk; |
120 | }; |
121 | |
122 | // We must make sure the output tensors are never overwritten. We do that by |
123 | // artificially adding one to their ref-counts so they are never selected |
124 | // for deallocation. |
125 | for (int tensor_index : graph_info_->outputs()) { |
126 | refcounts[tensor_index]++; |
127 | } |
128 | |
129 | // Variable tensors also should be ensured to be never overwritten and need to |
130 | // be alive all the time. |
131 | for (int tensor_index : graph_info_->variables()) { |
132 | // Increase the reference count for variable tensors by one, so it will |
133 | // never be deallocated. |
134 | refcounts[tensor_index]++; |
135 | // `variables` is a subgraph-level list and it should never be |
136 | // kTfLiteOptionalTensor. |
137 | TF_LITE_ENSURE(context_, tensor_index != kTfLiteOptionalTensor); |
138 | // Variable tensor should be allocated at the very beginning. |
139 | TF_LITE_ENSURE_STATUS(allocate(0, tensor_index)); |
140 | nodes_to_tensors_[0].insert(tensor_index); |
141 | } |
142 | |
143 | // Queue all graph inputs for allocation and make sure they are never |
144 | // overwritten. |
145 | for (int tensor_index : graph_info_->inputs()) { |
146 | if (tensor_index != kTfLiteOptionalTensor) { |
147 | refcounts[tensor_index]++; |
148 | TF_LITE_ENSURE_STATUS(allocate(0, tensor_index)); |
149 | nodes_to_tensors_[0].insert(tensor_index); |
150 | } |
151 | } |
152 | |
153 | // Count references to node input tensors. |
154 | for (size_t i = 0; i < graph_info_->num_execution_nodes(); ++i) { |
155 | const TfLiteNode& node = graph_info_->node(i); |
156 | TfLiteIntArray* node_inputs = node.inputs; |
157 | for (int j = 0; j < node_inputs->size; ++j) { |
158 | int tensor_index = node_inputs->data[j]; |
159 | if (tensor_index != kTfLiteOptionalTensor) { |
160 | refcounts[tensor_index]++; |
161 | } |
162 | } |
163 | } |
164 | |
165 | // Go through the graph in execution order. |
166 | for (size_t i = 0; i < graph_info_->num_execution_nodes(); ++i) { |
167 | const TfLiteNode& node = graph_info_->node(i); |
168 | |
169 | // First queue output tensors for allocation. |
170 | TfLiteIntArray* node_outputs = node.outputs; |
171 | for (int j = 0; j < node_outputs->size; ++j) { |
172 | int tensor_index = node_outputs->data[j]; |
173 | TF_LITE_ENSURE_STATUS(allocate(i, tensor_index)); |
174 | nodes_to_tensors_[i].insert(tensor_index); |
175 | } |
176 | |
177 | // Then update the ref-counts of the node's inputs, and if necessary queue |
178 | // them for deallocation. |
179 | if (!preserve_all_tensors_) { |
180 | TfLiteIntArray* node_inputs = node.inputs; |
181 | for (int j = 0; j < node_inputs->size; ++j) { |
182 | int tensor_index = node_inputs->data[j]; |
183 | if (tensor_index != kTfLiteOptionalTensor) { |
184 | refcounts[tensor_index]--; |
185 | if (refcounts[tensor_index] == 0) { |
186 | TF_LITE_ENSURE_STATUS(deallocate(i, tensor_index)); |
187 | } |
188 | } |
189 | } |
190 | } |
191 | } |
192 | |
193 | // Note that graph outputs will never be scheduled for deallocation. We |
194 | // could do that here for completeness, but it won't have any effect. |
195 | return kTfLiteOk; |
196 | } |
197 | |
198 | TfLiteStatus ArenaPlanner::ExecuteAllocations(int first_node, int last_node) { |
199 | // Grow the size of `allocs_` if necessary. This allows allocating temporary |
200 | // tensors in op's `prepare` function. |
201 | const size_t num_tensors = graph_info_->num_tensors(); |
202 | TF_LITE_ENSURE(context_, num_tensors >= allocs_.size()); |
203 | alloc_node_.resize(num_tensors, kNodeNotAssigned); |
204 | dealloc_node_.resize(num_tensors, kNodeNotAssigned); |
205 | allocs_.resize(num_tensors); |
206 | // Set allocation and deallocation for temporary tensors. |
207 | for (size_t i = first_node; i <= static_cast<size_t>(last_node) && |
208 | i < graph_info_->num_execution_nodes(); |
209 | ++i) { |
210 | const TfLiteNode& node = graph_info_->node(i); |
211 | TfLiteIntArray* node_temporaries = node.temporaries; |
212 | for (int j = 0; j < node_temporaries->size; ++j) { |
213 | int tensor_index = node_temporaries->data[j]; |
214 | alloc_node_[tensor_index] = i; |
215 | nodes_to_tensors_[i].insert(tensor_index); |
216 | if (!preserve_all_tensors_) { |
217 | dealloc_node_[tensor_index] = i; |
218 | } |
219 | } |
220 | } |
221 | |
222 | std::vector<int32_t> tensors_allocated; |
223 | TF_LITE_ENSURE_STATUS( |
224 | CalculateAllocations(first_node, last_node, &tensors_allocated)); |
225 | bool arena_reallocated = false; |
226 | TF_LITE_ENSURE_STATUS(Commit(&arena_reallocated)); |
227 | |
228 | TfLiteTensor* tensors = graph_info_->tensors(); |
229 | if (arena_reallocated) { |
230 | for (int i = 0; i < static_cast<int>(num_tensors); ++i) { |
231 | TF_LITE_ENSURE_STATUS(ResolveTensorAllocation(i, tensors[i])); |
232 | } |
233 | } else { |
234 | for (int i = 0; i < static_cast<int>(tensors_allocated.size()); ++i) { |
235 | TF_LITE_ENSURE_STATUS(ResolveTensorAllocation( |
236 | tensors_allocated[i], tensors[tensors_allocated[i]])); |
237 | } |
238 | } |
239 | |
240 | return kTfLiteOk; |
241 | } |
242 | |
243 | TfLiteStatus ArenaPlanner::ReleaseNonPersistentMemory() { |
244 | // Clear non-persistent arena's buffer. |
245 | TF_LITE_ENSURE_STATUS(arena_.ReleaseBuffer()); |
246 | // Set data pointers for all non-persistent tensors to nullptr. |
247 | TfLiteTensor* tensors = graph_info_->tensors(); |
248 | for (int i = 0; i < static_cast<int>(graph_info_->num_tensors()); ++i) { |
249 | TfLiteTensor& tensor = tensors[i]; |
250 | if (tensor.allocation_type == kTfLiteArenaRw) { |
251 | tensor.data.raw = nullptr; |
252 | } |
253 | } |
254 | return kTfLiteOk; |
255 | } |
256 | |
257 | TfLiteStatus ArenaPlanner::AcquireNonPersistentMemory() { |
258 | // First commit arena_ to allocate underlying buffer. |
259 | bool reallocated; |
260 | TF_LITE_ENSURE_STATUS(arena_.Commit(context_, &reallocated)); |
261 | // Resolve allocations for all tensors not on the persistent arena. |
262 | TfLiteTensor* tensors = graph_info_->tensors(); |
263 | for (int i = 0; i < static_cast<int>(graph_info_->num_tensors()); ++i) { |
264 | TfLiteTensor& tensor = tensors[i]; |
265 | if (tensor.allocation_type == kTfLiteArenaRw) { |
266 | TF_LITE_ENSURE_STATUS(ResolveTensorAllocation(i, tensors[i])); |
267 | } |
268 | } |
269 | return kTfLiteOk; |
270 | } |
271 | |
272 | bool ArenaPlanner::HasNonPersistentMemory() { |
273 | return arena_.GetBufferSize() != 0; |
274 | } |
275 | |
276 | void ArenaPlanner::DumpDebugInfo(const std::vector<int>& execution_plan) const { |
277 | arena_.DumpDebugInfo("kTfLiteArenaRw Dump:" , execution_plan); |
278 | persistent_arena_.DumpDebugInfo("kTfLiteArenaRwPersistent Dump:" , |
279 | execution_plan); |
280 | } |
281 | |
282 | void ArenaPlanner::GetAllocInfo(size_t* arena_size, |
283 | size_t* arena_persist_size) const { |
284 | *arena_size = arena_.GetBufferSize(); |
285 | *arena_persist_size = persistent_arena_.GetBufferSize(); |
286 | } |
287 | |
288 | TfLiteStatus ArenaPlanner::Commit(bool* reallocated) { |
289 | bool arena_reallocated, persistent_arena_reallocated; |
290 | TF_LITE_ENSURE_STATUS(arena_.Commit(context_, &arena_reallocated)); |
291 | TF_LITE_ENSURE_STATUS( |
292 | persistent_arena_.Commit(context_, &persistent_arena_reallocated)); |
293 | *reallocated = arena_reallocated; |
294 | *reallocated |= persistent_arena_reallocated; |
295 | return kTfLiteOk; |
296 | } |
297 | |
298 | void ArenaPlanner::CreateTensorAllocationVector( |
299 | std::vector<int32_t>* tensors_to_allocate) { |
300 | const TfLiteTensor* tensors = this->graph_info_->tensors(); |
301 | auto tensor_compare = [&](int idx1, int idx2) { |
302 | // Tensors that have lifespan through the whole model inference time are |
303 | // allocated at the beginning of memory slice. Their respective order |
304 | // doesn't matter in fact, so here they are sorted by index. |
305 | if (alloc_node_[idx1] == 0 && dealloc_node_[idx1] == kNodeNotAssigned) { |
306 | if (alloc_node_[idx2] == 0 && dealloc_node_[idx2] == kNodeNotAssigned) { |
307 | return idx1 < idx2; |
308 | } |
309 | return true; |
310 | } |
311 | if (alloc_node_[idx2] == 0 && dealloc_node_[idx2] == kNodeNotAssigned) { |
312 | return false; |
313 | } |
314 | |
315 | // All other tensors are sorted in non-increasing order of their size. |
316 | auto size1 = tensors[idx1].bytes; |
317 | auto size2 = tensors[idx2].bytes; |
318 | if (size1 != size2) { |
319 | return size1 > size2; |
320 | } |
321 | // Tensors with equal size are sorted in order of their allocation time. |
322 | return alloc_node_[idx1] < alloc_node_[idx2]; |
323 | }; |
324 | |
325 | // Indices of tensors in order their allocation offsets will be calculated. |
326 | std::sort(tensors_to_allocate->begin(), tensors_to_allocate->end(), |
327 | tensor_compare); |
328 | } |
329 | |
330 | std::vector<int32_t> ArenaPlanner::GetTensorsToAllocate(int first_node, |
331 | int last_node) { |
332 | int num_tensors = static_cast<int>(graph_info_->num_tensors()); |
333 | std::vector<int32_t> tensors_to_allocate; |
334 | tensors_to_allocate.reserve(num_tensors); |
335 | for (int i = first_node; i <= last_node; ++i) { |
336 | tensors_to_allocate.insert(tensors_to_allocate.end(), |
337 | nodes_to_tensors_[i].begin(), |
338 | nodes_to_tensors_[i].end()); |
339 | } |
340 | return tensors_to_allocate; |
341 | } |
342 | |
343 | TfLiteStatus ArenaPlanner::CalculateAllocations( |
344 | int first_node, int last_node, std::vector<int32_t>* tensors_allocated) { |
345 | // Indices of tensors in order their allocation offsets will be calculated. |
346 | const std::vector<int32_t> tensors_to_allocate = |
347 | GetTensorsToAllocate(first_node, last_node); |
348 | |
349 | tensors_allocated->reserve(tensors_to_allocate.size()); |
350 | // Deallocate if the tensor was already allocated. |
351 | TfLiteTensor* tensors = graph_info_->tensors(); |
352 | for (const auto& tensor_index : tensors_to_allocate) { |
353 | TfLiteTensor& tensor = tensors[tensor_index]; |
354 | if (tensor.allocation_type == kTfLiteArenaRw) { |
355 | if (allocs_[tensor_index].size < tensor.bytes) { |
356 | TF_LITE_ENSURE_STATUS( |
357 | arena_.Deallocate(context_, allocs_[tensor_index])); |
358 | tensors_allocated->push_back(tensor_index); |
359 | } |
360 | } else { |
361 | tensors_allocated->push_back(tensor_index); |
362 | } |
363 | } |
364 | |
365 | arena_.ResolveDeallocations(); |
366 | CreateTensorAllocationVector(tensors_allocated); |
367 | // Vector of ids of already allocated tensors, ordered by offset. |
368 | for (const auto& tensor_index : *tensors_allocated) { |
369 | TfLiteTensor& tensor = tensors[tensor_index]; |
370 | if (tensor.allocation_type == kTfLiteArenaRw) { |
371 | TF_LITE_ENSURE_STATUS( |
372 | arena_.Allocate(context_, tensor_alignment_, tensor.bytes, |
373 | tensor_index, alloc_node_[tensor_index], |
374 | dealloc_node_[tensor_index], &allocs_[tensor_index])); |
375 | } |
376 | // Check allocs_[].size to prevent from reallocation of persistent tensors. |
377 | if (tensor.allocation_type == kTfLiteArenaRwPersistent && |
378 | allocs_[tensor_index].size == 0) { |
379 | if (allocs_[tensor_index].size < tensor.bytes) { |
380 | TF_LITE_ENSURE_STATUS(persistent_arena_.Allocate( |
381 | context_, tensor_alignment_, tensor.bytes, tensor_index, |
382 | /*first_node=*/alloc_node_[tensor_index], |
383 | /*last_node=*/std::numeric_limits<int32_t>::max(), |
384 | &allocs_[tensor_index])); |
385 | } |
386 | } |
387 | } |
388 | return kTfLiteOk; |
389 | } |
390 | |
391 | TfLiteStatus ArenaPlanner::ResolveTensorAllocation(int32_t tensor_index, |
392 | TfLiteTensor& tensor) { |
393 | if (tensor.allocation_type == kTfLiteArenaRw) { |
394 | // Skip resolution if the size of the tensor is zero, leaving it as a |
395 | // nullptr. |
396 | if (allocs_[tensor_index].size != 0) { |
397 | return arena_.ResolveAlloc(context_, allocs_[tensor_index], |
398 | &tensor.data.raw); |
399 | } |
400 | } |
401 | if (tensor.allocation_type == kTfLiteArenaRwPersistent) { |
402 | return persistent_arena_.ResolveAlloc(context_, allocs_[tensor_index], |
403 | &tensor.data.raw); |
404 | } |
405 | return kTfLiteOk; |
406 | } |
407 | |
408 | } // namespace tflite |
409 | |