1/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2
3Licensed under the Apache License, Version 2.0 (the "License");
4you may not use this file except in compliance with the License.
5You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9Unless required by applicable law or agreed to in writing, software
10distributed under the License is distributed on an "AS IS" BASIS,
11WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12See the License for the specific language governing permissions and
13limitations under the License.
14==============================================================================*/
15#include "tensorflow/lite/arena_planner.h"
16
17#include <stddef.h>
18
19#include <algorithm>
20#include <cstdint>
21#include <limits>
22#include <memory>
23#include <utility>
24#include <vector>
25
26#include "tensorflow/lite/c/common.h"
27#include "tensorflow/lite/graph_info.h"
28#include "tensorflow/lite/simple_memory_arena.h"
29
30namespace tflite {
31namespace {
32
33constexpr int32_t kNodeNotAssigned = std::numeric_limits<int32_t>::max();
34
35} // namespace
36
37ArenaPlanner::ArenaPlanner(TfLiteContext* context,
38 std::unique_ptr<GraphInfo> graph_info,
39 bool preserve_all_tensors, int tensor_alignment,
40 int subgraph_index)
41 : context_(context),
42 graph_info_(std::move(graph_info)),
43 arena_(kDefaultArenaAlignment, subgraph_index),
44 persistent_arena_(kDefaultArenaAlignment, subgraph_index),
45 preserve_all_tensors_(preserve_all_tensors),
46 tensor_alignment_(tensor_alignment) {}
47
48ArenaPlanner::~ArenaPlanner() {
49 arena_.ReleaseBuffer();
50 persistent_arena_.ReleaseBuffer();
51}
52
53std::intptr_t ArenaPlanner::BasePointer(TfLiteAllocationType type) {
54 if (type == kTfLiteArenaRwPersistent) {
55 return persistent_arena_.BasePointer();
56 }
57 if (type == kTfLiteArenaRw) {
58 return arena_.BasePointer();
59 }
60 return 0;
61}
62
63TfLiteStatus ArenaPlanner::ResetAllocations() {
64 TF_LITE_ENSURE_STATUS(arena_.ClearPlan());
65 TF_LITE_ENSURE_STATUS(persistent_arena_.ClearPlan());
66 allocs_.clear();
67 allocs_.resize(graph_info_->num_tensors());
68 return kTfLiteOk;
69}
70
71TfLiteStatus ArenaPlanner::ResetAllocationsAfter(int node) {
72 TfLiteTensor* tensors = graph_info_->tensors();
73 for (int i = 0; i < static_cast<int>(allocs_.size()); ++i) {
74 if (allocs_[i].first_node > node && allocs_[i].size > 0) {
75 TfLiteTensor& tensor = tensors[i];
76 if (tensor.allocation_type == kTfLiteArenaRw) {
77 allocs_[i].reset();
78 tensor.data.raw = nullptr;
79 }
80 }
81 }
82 arena_.DeallocateAfter(node);
83
84 return kTfLiteOk;
85}
86
87TfLiteStatus ArenaPlanner::PlanAllocations() {
88 // Invalidate any existing data.
89 const size_t num_tensors = graph_info_->num_tensors();
90 TF_LITE_ENSURE_STATUS(ResetAllocations());
91 // Maybe other verb instead of 'Assigned'
92 alloc_node_.assign(num_tensors, kNodeNotAssigned);
93 dealloc_node_.assign(num_tensors, kNodeNotAssigned);
94 nodes_to_tensors_.clear();
95 nodes_to_tensors_.resize(
96 std::max(graph_info_->num_execution_nodes(), (size_t)1), {});
97
98 // Keeps track of references to each tensor.
99 std::vector<int> refcounts(num_tensors, 0);
100
101 auto allocate = [this](int node, int tensor) -> TfLiteStatus {
102 if (alloc_node_[tensor] != kNodeNotAssigned) {
103 // Tensor has already been allocated.
104 return kTfLiteOk;
105 }
106 TF_LITE_ENSURE(context_, dealloc_node_[tensor] == kNodeNotAssigned);
107 alloc_node_[tensor] = node;
108 return kTfLiteOk;
109 };
110
111 auto deallocate = [this](int node, int tensor) -> TfLiteStatus {
112 if (alloc_node_[tensor] == kNodeNotAssigned) {
113 // We don't need to deallocate the tensor, that is never allocated.
114 // This happened with the constant tensors.
115 return kTfLiteOk;
116 }
117 TF_LITE_ENSURE(context_, dealloc_node_[tensor] == kNodeNotAssigned);
118 dealloc_node_[tensor] = node;
119 return kTfLiteOk;
120 };
121
122 // We must make sure the output tensors are never overwritten. We do that by
123 // artificially adding one to their ref-counts so they are never selected
124 // for deallocation.
125 for (int tensor_index : graph_info_->outputs()) {
126 refcounts[tensor_index]++;
127 }
128
129 // Variable tensors also should be ensured to be never overwritten and need to
130 // be alive all the time.
131 for (int tensor_index : graph_info_->variables()) {
132 // Increase the reference count for variable tensors by one, so it will
133 // never be deallocated.
134 refcounts[tensor_index]++;
135 // `variables` is a subgraph-level list and it should never be
136 // kTfLiteOptionalTensor.
137 TF_LITE_ENSURE(context_, tensor_index != kTfLiteOptionalTensor);
138 // Variable tensor should be allocated at the very beginning.
139 TF_LITE_ENSURE_STATUS(allocate(0, tensor_index));
140 nodes_to_tensors_[0].insert(tensor_index);
141 }
142
143 // Queue all graph inputs for allocation and make sure they are never
144 // overwritten.
145 for (int tensor_index : graph_info_->inputs()) {
146 if (tensor_index != kTfLiteOptionalTensor) {
147 refcounts[tensor_index]++;
148 TF_LITE_ENSURE_STATUS(allocate(0, tensor_index));
149 nodes_to_tensors_[0].insert(tensor_index);
150 }
151 }
152
153 // Count references to node input tensors.
154 for (size_t i = 0; i < graph_info_->num_execution_nodes(); ++i) {
155 const TfLiteNode& node = graph_info_->node(i);
156 TfLiteIntArray* node_inputs = node.inputs;
157 for (int j = 0; j < node_inputs->size; ++j) {
158 int tensor_index = node_inputs->data[j];
159 if (tensor_index != kTfLiteOptionalTensor) {
160 refcounts[tensor_index]++;
161 }
162 }
163 }
164
165 // Go through the graph in execution order.
166 for (size_t i = 0; i < graph_info_->num_execution_nodes(); ++i) {
167 const TfLiteNode& node = graph_info_->node(i);
168
169 // First queue output tensors for allocation.
170 TfLiteIntArray* node_outputs = node.outputs;
171 for (int j = 0; j < node_outputs->size; ++j) {
172 int tensor_index = node_outputs->data[j];
173 TF_LITE_ENSURE_STATUS(allocate(i, tensor_index));
174 nodes_to_tensors_[i].insert(tensor_index);
175 }
176
177 // Then update the ref-counts of the node's inputs, and if necessary queue
178 // them for deallocation.
179 if (!preserve_all_tensors_) {
180 TfLiteIntArray* node_inputs = node.inputs;
181 for (int j = 0; j < node_inputs->size; ++j) {
182 int tensor_index = node_inputs->data[j];
183 if (tensor_index != kTfLiteOptionalTensor) {
184 refcounts[tensor_index]--;
185 if (refcounts[tensor_index] == 0) {
186 TF_LITE_ENSURE_STATUS(deallocate(i, tensor_index));
187 }
188 }
189 }
190 }
191 }
192
193 // Note that graph outputs will never be scheduled for deallocation. We
194 // could do that here for completeness, but it won't have any effect.
195 return kTfLiteOk;
196}
197
198TfLiteStatus ArenaPlanner::ExecuteAllocations(int first_node, int last_node) {
199 // Grow the size of `allocs_` if necessary. This allows allocating temporary
200 // tensors in op's `prepare` function.
201 const size_t num_tensors = graph_info_->num_tensors();
202 TF_LITE_ENSURE(context_, num_tensors >= allocs_.size());
203 alloc_node_.resize(num_tensors, kNodeNotAssigned);
204 dealloc_node_.resize(num_tensors, kNodeNotAssigned);
205 allocs_.resize(num_tensors);
206 // Set allocation and deallocation for temporary tensors.
207 for (size_t i = first_node; i <= static_cast<size_t>(last_node) &&
208 i < graph_info_->num_execution_nodes();
209 ++i) {
210 const TfLiteNode& node = graph_info_->node(i);
211 TfLiteIntArray* node_temporaries = node.temporaries;
212 for (int j = 0; j < node_temporaries->size; ++j) {
213 int tensor_index = node_temporaries->data[j];
214 alloc_node_[tensor_index] = i;
215 nodes_to_tensors_[i].insert(tensor_index);
216 if (!preserve_all_tensors_) {
217 dealloc_node_[tensor_index] = i;
218 }
219 }
220 }
221
222 std::vector<int32_t> tensors_allocated;
223 TF_LITE_ENSURE_STATUS(
224 CalculateAllocations(first_node, last_node, &tensors_allocated));
225 bool arena_reallocated = false;
226 TF_LITE_ENSURE_STATUS(Commit(&arena_reallocated));
227
228 TfLiteTensor* tensors = graph_info_->tensors();
229 if (arena_reallocated) {
230 for (int i = 0; i < static_cast<int>(num_tensors); ++i) {
231 TF_LITE_ENSURE_STATUS(ResolveTensorAllocation(i, tensors[i]));
232 }
233 } else {
234 for (int i = 0; i < static_cast<int>(tensors_allocated.size()); ++i) {
235 TF_LITE_ENSURE_STATUS(ResolveTensorAllocation(
236 tensors_allocated[i], tensors[tensors_allocated[i]]));
237 }
238 }
239
240 return kTfLiteOk;
241}
242
243TfLiteStatus ArenaPlanner::ReleaseNonPersistentMemory() {
244 // Clear non-persistent arena's buffer.
245 TF_LITE_ENSURE_STATUS(arena_.ReleaseBuffer());
246 // Set data pointers for all non-persistent tensors to nullptr.
247 TfLiteTensor* tensors = graph_info_->tensors();
248 for (int i = 0; i < static_cast<int>(graph_info_->num_tensors()); ++i) {
249 TfLiteTensor& tensor = tensors[i];
250 if (tensor.allocation_type == kTfLiteArenaRw) {
251 tensor.data.raw = nullptr;
252 }
253 }
254 return kTfLiteOk;
255}
256
257TfLiteStatus ArenaPlanner::AcquireNonPersistentMemory() {
258 // First commit arena_ to allocate underlying buffer.
259 bool reallocated;
260 TF_LITE_ENSURE_STATUS(arena_.Commit(context_, &reallocated));
261 // Resolve allocations for all tensors not on the persistent arena.
262 TfLiteTensor* tensors = graph_info_->tensors();
263 for (int i = 0; i < static_cast<int>(graph_info_->num_tensors()); ++i) {
264 TfLiteTensor& tensor = tensors[i];
265 if (tensor.allocation_type == kTfLiteArenaRw) {
266 TF_LITE_ENSURE_STATUS(ResolveTensorAllocation(i, tensors[i]));
267 }
268 }
269 return kTfLiteOk;
270}
271
272bool ArenaPlanner::HasNonPersistentMemory() {
273 return arena_.GetBufferSize() != 0;
274}
275
276void ArenaPlanner::DumpDebugInfo(const std::vector<int>& execution_plan) const {
277 arena_.DumpDebugInfo("kTfLiteArenaRw Dump:", execution_plan);
278 persistent_arena_.DumpDebugInfo("kTfLiteArenaRwPersistent Dump:",
279 execution_plan);
280}
281
282void ArenaPlanner::GetAllocInfo(size_t* arena_size,
283 size_t* arena_persist_size) const {
284 *arena_size = arena_.GetBufferSize();
285 *arena_persist_size = persistent_arena_.GetBufferSize();
286}
287
288TfLiteStatus ArenaPlanner::Commit(bool* reallocated) {
289 bool arena_reallocated, persistent_arena_reallocated;
290 TF_LITE_ENSURE_STATUS(arena_.Commit(context_, &arena_reallocated));
291 TF_LITE_ENSURE_STATUS(
292 persistent_arena_.Commit(context_, &persistent_arena_reallocated));
293 *reallocated = arena_reallocated;
294 *reallocated |= persistent_arena_reallocated;
295 return kTfLiteOk;
296}
297
298void ArenaPlanner::CreateTensorAllocationVector(
299 std::vector<int32_t>* tensors_to_allocate) {
300 const TfLiteTensor* tensors = this->graph_info_->tensors();
301 auto tensor_compare = [&](int idx1, int idx2) {
302 // Tensors that have lifespan through the whole model inference time are
303 // allocated at the beginning of memory slice. Their respective order
304 // doesn't matter in fact, so here they are sorted by index.
305 if (alloc_node_[idx1] == 0 && dealloc_node_[idx1] == kNodeNotAssigned) {
306 if (alloc_node_[idx2] == 0 && dealloc_node_[idx2] == kNodeNotAssigned) {
307 return idx1 < idx2;
308 }
309 return true;
310 }
311 if (alloc_node_[idx2] == 0 && dealloc_node_[idx2] == kNodeNotAssigned) {
312 return false;
313 }
314
315 // All other tensors are sorted in non-increasing order of their size.
316 auto size1 = tensors[idx1].bytes;
317 auto size2 = tensors[idx2].bytes;
318 if (size1 != size2) {
319 return size1 > size2;
320 }
321 // Tensors with equal size are sorted in order of their allocation time.
322 return alloc_node_[idx1] < alloc_node_[idx2];
323 };
324
325 // Indices of tensors in order their allocation offsets will be calculated.
326 std::sort(tensors_to_allocate->begin(), tensors_to_allocate->end(),
327 tensor_compare);
328}
329
330std::vector<int32_t> ArenaPlanner::GetTensorsToAllocate(int first_node,
331 int last_node) {
332 int num_tensors = static_cast<int>(graph_info_->num_tensors());
333 std::vector<int32_t> tensors_to_allocate;
334 tensors_to_allocate.reserve(num_tensors);
335 for (int i = first_node; i <= last_node; ++i) {
336 tensors_to_allocate.insert(tensors_to_allocate.end(),
337 nodes_to_tensors_[i].begin(),
338 nodes_to_tensors_[i].end());
339 }
340 return tensors_to_allocate;
341}
342
343TfLiteStatus ArenaPlanner::CalculateAllocations(
344 int first_node, int last_node, std::vector<int32_t>* tensors_allocated) {
345 // Indices of tensors in order their allocation offsets will be calculated.
346 const std::vector<int32_t> tensors_to_allocate =
347 GetTensorsToAllocate(first_node, last_node);
348
349 tensors_allocated->reserve(tensors_to_allocate.size());
350 // Deallocate if the tensor was already allocated.
351 TfLiteTensor* tensors = graph_info_->tensors();
352 for (const auto& tensor_index : tensors_to_allocate) {
353 TfLiteTensor& tensor = tensors[tensor_index];
354 if (tensor.allocation_type == kTfLiteArenaRw) {
355 if (allocs_[tensor_index].size < tensor.bytes) {
356 TF_LITE_ENSURE_STATUS(
357 arena_.Deallocate(context_, allocs_[tensor_index]));
358 tensors_allocated->push_back(tensor_index);
359 }
360 } else {
361 tensors_allocated->push_back(tensor_index);
362 }
363 }
364
365 arena_.ResolveDeallocations();
366 CreateTensorAllocationVector(tensors_allocated);
367 // Vector of ids of already allocated tensors, ordered by offset.
368 for (const auto& tensor_index : *tensors_allocated) {
369 TfLiteTensor& tensor = tensors[tensor_index];
370 if (tensor.allocation_type == kTfLiteArenaRw) {
371 TF_LITE_ENSURE_STATUS(
372 arena_.Allocate(context_, tensor_alignment_, tensor.bytes,
373 tensor_index, alloc_node_[tensor_index],
374 dealloc_node_[tensor_index], &allocs_[tensor_index]));
375 }
376 // Check allocs_[].size to prevent from reallocation of persistent tensors.
377 if (tensor.allocation_type == kTfLiteArenaRwPersistent &&
378 allocs_[tensor_index].size == 0) {
379 if (allocs_[tensor_index].size < tensor.bytes) {
380 TF_LITE_ENSURE_STATUS(persistent_arena_.Allocate(
381 context_, tensor_alignment_, tensor.bytes, tensor_index,
382 /*first_node=*/alloc_node_[tensor_index],
383 /*last_node=*/std::numeric_limits<int32_t>::max(),
384 &allocs_[tensor_index]));
385 }
386 }
387 }
388 return kTfLiteOk;
389}
390
391TfLiteStatus ArenaPlanner::ResolveTensorAllocation(int32_t tensor_index,
392 TfLiteTensor& tensor) {
393 if (tensor.allocation_type == kTfLiteArenaRw) {
394 // Skip resolution if the size of the tensor is zero, leaving it as a
395 // nullptr.
396 if (allocs_[tensor_index].size != 0) {
397 return arena_.ResolveAlloc(context_, allocs_[tensor_index],
398 &tensor.data.raw);
399 }
400 }
401 if (tensor.allocation_type == kTfLiteArenaRwPersistent) {
402 return persistent_arena_.ResolveAlloc(context_, allocs_[tensor_index],
403 &tensor.data.raw);
404 }
405 return kTfLiteOk;
406}
407
408} // namespace tflite
409