arena_planner.cc source code [tensorflow/tensorflow/lite/arena_planner.cc]

1	/ Copyright 2017 The TensorFlow Authors. All Rights Reserved.*
2
3	Licensed under the Apache License, Version 2.0 (the "License");
4	you may not use this file except in compliance with the License.
5	You may obtain a copy of the License at
6
7	http://www.apache.org/licenses/LICENSE-2.0
8
9	Unless required by applicable law or agreed to in writing, software
10	distributed under the License is distributed on an "AS IS" BASIS,
11	WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12	See the License for the specific language governing permissions and
13	limitations under the License.
14	==============================================================================/*
15	#include "tensorflow/lite/arena_planner.h"
16
17	#include <stddef.h>
18
19	#include <algorithm>
20	#include <cstdint>
21	#include <limits>
22	#include <memory>
23	#include <utility>
24	#include <vector>
25
26	#include "tensorflow/lite/c/common.h"
27	#include "tensorflow/lite/graph_info.h"
28	#include "tensorflow/lite/simple_memory_arena.h"
29
30	namespace tflite {
31	namespace {
32
33	constexpr int32_t kNodeNotAssigned = std::numeric_limits<int32_t>::max();
34
35	} // namespace
36
37	ArenaPlanner::ArenaPlanner(TfLiteContext* context,
38	std::unique_ptr<GraphInfo> graph_info,
39	bool preserve_all_tensors, int tensor_alignment,
40	int subgraph_index)
41	: context_(context),
42	graph_info_(std::move(graph_info)),
43	arena_(kDefaultArenaAlignment, subgraph_index),
44	persistent_arena_(kDefaultArenaAlignment, subgraph_index),
45	preserve_all_tensors_(preserve_all_tensors),
46	tensor_alignment_(tensor_alignment) {}
47
48	ArenaPlanner::~ArenaPlanner() {
49	arena_.ReleaseBuffer();
50	persistent_arena_.ReleaseBuffer();
51	}
52
53	std::intptr_t ArenaPlanner::BasePointer(TfLiteAllocationType type) {
54	if (type == kTfLiteArenaRwPersistent) {
55	return persistent_arena_.BasePointer();
56	}
57	if (type == kTfLiteArenaRw) {
58	return arena_.BasePointer();
59	}
60	return `0`;
61	}
62
63	TfLiteStatus ArenaPlanner::ResetAllocations() {
64	TF_LITE_ENSURE_STATUS(arena_.ClearPlan());
65	TF_LITE_ENSURE_STATUS(persistent_arena_.ClearPlan());
66	allocs_.clear();
67	allocs_.resize(graph_info_->num_tensors());
68	return kTfLiteOk;
69	}
70
71	TfLiteStatus ArenaPlanner::ResetAllocationsAfter(int node) {
72	TfLiteTensor* tensors = graph_info_->tensors();
73	for (int i = `0`; i < static_cast<int>(allocs_.size()); ++i) {
74	if (allocs_[i].first_node > node && allocs_[i].size > `0`) {
75	TfLiteTensor& tensor = tensors[i];
76	if (tensor.allocation_type == kTfLiteArenaRw) {
77	allocs_[i].reset();
78	tensor.data.raw = nullptr;
79	}
80	}
81	}
82	arena_.DeallocateAfter(node);
83
84	return kTfLiteOk;
85	}
86
87	TfLiteStatus ArenaPlanner::PlanAllocations() {
88	// Invalidate any existing data.
89	const size_t num_tensors = graph_info_->num_tensors();
90	TF_LITE_ENSURE_STATUS(ResetAllocations());
91	// Maybe other verb instead of 'Assigned'
92	alloc_node_.assign(num_tensors, kNodeNotAssigned);
93	dealloc_node_.assign(num_tensors, kNodeNotAssigned);
94	nodes_to_tensors_.clear();
95	nodes_to_tensors_.resize(
96	std::max(graph_info_->num_execution_nodes(), (size_t)`1`), {});
97
98	// Keeps track of references to each tensor.
99	std::vector<int> refcounts(num_tensors, `0`);
100
101	auto allocate = [this](int node, int tensor) -> TfLiteStatus {
102	if (alloc_node_[tensor] != kNodeNotAssigned) {
103	// Tensor has already been allocated.
104	return kTfLiteOk;
105	}
106	TF_LITE_ENSURE(context_, dealloc_node_[tensor] == kNodeNotAssigned);
107	alloc_node_[tensor] = node;
108	return kTfLiteOk;
109	};
110
111	auto deallocate = [this](int node, int tensor) -> TfLiteStatus {
112	if (alloc_node_[tensor] == kNodeNotAssigned) {
113	// We don't need to deallocate the tensor, that is never allocated.
114	// This happened with the constant tensors.
115	return kTfLiteOk;
116	}
117	TF_LITE_ENSURE(context_, dealloc_node_[tensor] == kNodeNotAssigned);
118	dealloc_node_[tensor] = node;
119	return kTfLiteOk;
120	};
121
122	// We must make sure the output tensors are never overwritten. We do that by
123	// artificially adding one to their ref-counts so they are never selected
124	// for deallocation.
125	for (int tensor_index : graph_info_->outputs()) {
126	refcounts [tensor_index]++;
127	}
128
129	// Variable tensors also should be ensured to be never overwritten and need to
130	// be alive all the time.
131	for (int tensor_index : graph_info_->variables()) {
132	// Increase the reference count for variable tensors by one, so it will
133	// never be deallocated.
134	refcounts [tensor_index]++;
135	// `variables` is a subgraph-level list and it should never be
136	// kTfLiteOptionalTensor.
137	TF_LITE_ENSURE(context_, tensor_index != kTfLiteOptionalTensor);
138	// Variable tensor should be allocated at the very beginning.
139	TF_LITE_ENSURE_STATUS(allocate(`0`, tensor_index));
140	nodes_to_tensors_[`0`].insert(tensor_index);
141	}
142
143	// Queue all graph inputs for allocation and make sure they are never
144	// overwritten.
145	for (int tensor_index : graph_info_->inputs()) {
146	if (tensor_index != kTfLiteOptionalTensor) {
147	refcounts [tensor_index]++;
148	TF_LITE_ENSURE_STATUS(allocate(`0`, tensor_index));
149	nodes_to_tensors_[`0`].insert(tensor_index);
150	}
151	}
152
153	// Count references to node input tensors.
154	for (size_t i = `0`; i < graph_info_->num_execution_nodes(); ++i) {
155	const TfLiteNode& node = graph_info_->node(i);
156	TfLiteIntArray* node_inputs = node.inputs;
157	for (int j = `0`; j < node_inputs->size; ++j) {
158	int tensor_index = node_inputs->data[j];
159	if (tensor_index != kTfLiteOptionalTensor) {
160	refcounts [tensor_index]++;
161	}
162	}
163	}
164
165	// Go through the graph in execution order.
166	for (size_t i = `0`; i < graph_info_->num_execution_nodes(); ++i) {
167	const TfLiteNode& node = graph_info_->node(i);
168
169	// First queue output tensors for allocation.
170	TfLiteIntArray* node_outputs = node.outputs;
171	for (int j = `0`; j < node_outputs->size; ++j) {
172	int tensor_index = node_outputs->data[j];
173	TF_LITE_ENSURE_STATUS(allocate(i, tensor_index));
174	nodes_to_tensors_[i].insert(tensor_index);
175	}
176
177	// Then update the ref-counts of the node's inputs, and if necessary queue
178	// them for deallocation.
179	if (!preserve_all_tensors_) {
180	TfLiteIntArray* node_inputs = node.inputs;
181	for (int j = `0`; j < node_inputs->size; ++j) {
182	int tensor_index = node_inputs->data[j];
183	if (tensor_index != kTfLiteOptionalTensor) {
184	refcounts [tensor_index]--;
185	if (refcounts [tensor_index] == `0`) {
186	TF_LITE_ENSURE_STATUS(deallocate(i, tensor_index));
187	}
188	}
189	}
190	}
191	}
192
193	// Note that graph outputs will never be scheduled for deallocation. We
194	// could do that here for completeness, but it won't have any effect.
195	return kTfLiteOk;
196	}
197
198	TfLiteStatus ArenaPlanner::ExecuteAllocations(int first_node, int last_node) {
199	// Grow the size of `allocs_` if necessary. This allows allocating temporary
200	// tensors in op's `prepare` function.
201	const size_t num_tensors = graph_info_->num_tensors();
202	TF_LITE_ENSURE(context_, num_tensors >= allocs_.size());
203	alloc_node_.resize(num_tensors, kNodeNotAssigned);
204	dealloc_node_.resize(num_tensors, kNodeNotAssigned);
205	allocs_.resize(num_tensors);
206	// Set allocation and deallocation for temporary tensors.
207	for (size_t i = first_node; i <= static_cast<size_t>(last_node) &&
208	i < graph_info_->num_execution_nodes();
209	++i) {
210	const TfLiteNode& node = graph_info_->node(i);
211	TfLiteIntArray* node_temporaries = node.temporaries;
212	for (int j = `0`; j < node_temporaries->size; ++j) {
213	int tensor_index = node_temporaries->data[j];
214	alloc_node_[tensor_index] = i;
215	nodes_to_tensors_[i].insert(tensor_index);
216	if (!preserve_all_tensors_) {
217	dealloc_node_[tensor_index] = i;
218	}
219	}
220	}
221
222	std::vector<int32_t> tensors_allocated;
223	TF_LITE_ENSURE_STATUS(
224	CalculateAllocations(first_node, last_node, &tensors_allocated));
225	bool arena_reallocated = false;
226	TF_LITE_ENSURE_STATUS(Commit(&arena_reallocated));
227
228	TfLiteTensor* tensors = graph_info_->tensors();
229	if (arena_reallocated) {
230	for (int i = `0`; i < static_cast<int>(num_tensors); ++i) {
231	TF_LITE_ENSURE_STATUS(ResolveTensorAllocation(i, tensors[i]));
232	}
233	} else {
234	for (int i = `0`; i < static_cast<int>(tensors_allocated.size()); ++i) {
235	TF_LITE_ENSURE_STATUS(ResolveTensorAllocation(
236	tensors_allocated[i], tensors[tensors_allocated[i]]));
237	}
238	}
239
240	return kTfLiteOk;
241	}
242
243	TfLiteStatus ArenaPlanner::ReleaseNonPersistentMemory() {
244	// Clear non-persistent arena's buffer.
245	TF_LITE_ENSURE_STATUS(arena_.ReleaseBuffer());
246	// Set data pointers for all non-persistent tensors to nullptr.
247	TfLiteTensor* tensors = graph_info_->tensors();
248	for (int i = `0`; i < static_cast<int>(graph_info_->num_tensors()); ++i) {
249	TfLiteTensor& tensor = tensors[i];
250	if (tensor.allocation_type == kTfLiteArenaRw) {
251	tensor.data.raw = nullptr;
252	}
253	}
254	return kTfLiteOk;
255	}
256
257	TfLiteStatus ArenaPlanner::AcquireNonPersistentMemory() {
258	// First commit arena_ to allocate underlying buffer.
259	bool reallocated;
260	TF_LITE_ENSURE_STATUS(arena_.Commit(context_, &reallocated));
261	// Resolve allocations for all tensors not on the persistent arena.
262	TfLiteTensor* tensors = graph_info_->tensors();
263	for (int i = `0`; i < static_cast<int>(graph_info_->num_tensors()); ++i) {
264	TfLiteTensor& tensor = tensors[i];
265	if (tensor.allocation_type == kTfLiteArenaRw) {
266	TF_LITE_ENSURE_STATUS(ResolveTensorAllocation(i, tensors[i]));
267	}
268	}
269	return kTfLiteOk;
270	}
271
272	bool ArenaPlanner::HasNonPersistentMemory() {
273	return arena_.GetBufferSize() != `0`;
274	}
275
276	void ArenaPlanner::DumpDebugInfo(const std::vector<int>& execution_plan) const {
277	arena_.DumpDebugInfo("kTfLiteArenaRw Dump:", execution_plan);
278	persistent_arena_.DumpDebugInfo("kTfLiteArenaRwPersistent Dump:",
279	execution_plan);
280	}
281
282	void ArenaPlanner::GetAllocInfo(size_t* arena_size,
283	size_t* arena_persist_size) const {
284	*arena_size = arena_.GetBufferSize();
285	*arena_persist_size = persistent_arena_.GetBufferSize();
286	}
287
288	TfLiteStatus ArenaPlanner::Commit(bool* reallocated) {
289	bool arena_reallocated, persistent_arena_reallocated;
290	TF_LITE_ENSURE_STATUS(arena_.Commit(context_, &arena_reallocated));
291	TF_LITE_ENSURE_STATUS(
292	persistent_arena_.Commit(context_, &persistent_arena_reallocated));
293	*reallocated = arena_reallocated;
294	*reallocated \|= persistent_arena_reallocated;
295	return kTfLiteOk;
296	}
297
298	void ArenaPlanner::CreateTensorAllocationVector(
299	std::vector<int32_t>* tensors_to_allocate) {
300	const TfLiteTensor* tensors = this->graph_info_->tensors();
301	auto tensor_compare = [&](int idx1, int idx2) {
302	// Tensors that have lifespan through the whole model inference time are
303	// allocated at the beginning of memory slice. Their respective order
304	// doesn't matter in fact, so here they are sorted by index.
305	if (alloc_node_[idx1] == `0` && dealloc_node_[idx1] == kNodeNotAssigned) {
306	if (alloc_node_[idx2] == `0` && dealloc_node_[idx2] == kNodeNotAssigned) {
307	return idx1 < idx2;
308	}
309	return true;
310	}
311	if (alloc_node_[idx2] == `0` && dealloc_node_[idx2] == kNodeNotAssigned) {
312	return false;
313	}
314
315	// All other tensors are sorted in non-increasing order of their size.
316	auto size1 = tensors[idx1].bytes;
317	auto size2 = tensors[idx2].bytes;
318	if (size1 != size2) {
319	return size1 > size2;
320	}
321	// Tensors with equal size are sorted in order of their allocation time.
322	return alloc_node_[idx1] < alloc_node_[idx2];
323	};
324
325	// Indices of tensors in order their allocation offsets will be calculated.
326	std::sort(tensors_to_allocate->begin(), tensors_to_allocate->end(),
327	tensor_compare);
328	}
329
330	std::vector<int32_t> ArenaPlanner::GetTensorsToAllocate(int first_node,
331	int last_node) {
332	int num_tensors = static_cast<int>(graph_info_->num_tensors());
333	std::vector<int32_t> tensors_to_allocate;
334	tensors_to_allocate.reserve(num_tensors);
335	for (int i = first_node; i <= last_node; ++i) {
336	tensors_to_allocate.insert(tensors_to_allocate.end(),
337	nodes_to_tensors_[i].begin(),
338	nodes_to_tensors_[i].end());
339	}
340	return tensors_to_allocate;
341	}
342
343	TfLiteStatus ArenaPlanner::CalculateAllocations(
344	int first_node, int last_node, std::vector<int32_t>* tensors_allocated) {
345	// Indices of tensors in order their allocation offsets will be calculated.
346	const std::vector<int32_t> tensors_to_allocate =
347	GetTensorsToAllocate(first_node, last_node);
348
349	tensors_allocated->reserve(tensors_to_allocate.size());
350	// Deallocate if the tensor was already allocated.
351	TfLiteTensor* tensors = graph_info_->tensors();
352	for (const auto& tensor_index : tensors_to_allocate) {
353	TfLiteTensor& tensor = tensors[tensor_index];
354	if (tensor.allocation_type == kTfLiteArenaRw) {
355	if (allocs_[tensor_index].size < tensor.bytes) {
356	TF_LITE_ENSURE_STATUS(
357	arena_.Deallocate(context_, allocs_[tensor_index]));
358	tensors_allocated->push_back(tensor_index);
359	}
360	} else {
361	tensors_allocated->push_back(tensor_index);
362	}
363	}
364
365	arena_.ResolveDeallocations();
366	CreateTensorAllocationVector(tensors_allocated);
367	// Vector of ids of already allocated tensors, ordered by offset.
368	for (const auto& tensor_index : *tensors_allocated) {
369	TfLiteTensor& tensor = tensors[tensor_index];
370	if (tensor.allocation_type == kTfLiteArenaRw) {
371	TF_LITE_ENSURE_STATUS(
372	arena_.Allocate(context_, tensor_alignment_, tensor.bytes,
373	tensor_index, alloc_node_[tensor_index],
374	dealloc_node_[tensor_index], &allocs_[tensor_index]));
375	}
376	// Check allocs_[].size to prevent from reallocation of persistent tensors.
377	if (tensor.allocation_type == kTfLiteArenaRwPersistent &&
378	allocs_[tensor_index].size == `0`) {
379	if (allocs_[tensor_index].size < tensor.bytes) {
380	TF_LITE_ENSURE_STATUS(persistent_arena_.Allocate(
381	context_, tensor_alignment_, tensor.bytes, tensor_index,
382	/first_node=/alloc_node_[tensor_index],
383	/last_node=/std::numeric_limits<int32_t>::max(),
384	&allocs_[tensor_index]));
385	}
386	}
387	}
388	return kTfLiteOk;
389	}
390
391	TfLiteStatus ArenaPlanner::ResolveTensorAllocation(int32_t tensor_index,
392	TfLiteTensor& tensor) {
393	if (tensor.allocation_type == kTfLiteArenaRw) {
394	// Skip resolution if the size of the tensor is zero, leaving it as a
395	// nullptr.
396	if (allocs_[tensor_index].size != `0`) {
397	return arena_.ResolveAlloc(context_, allocs_[tensor_index],
398	&tensor.data.raw);
399	}
400	}
401	if (tensor.allocation_type == kTfLiteArenaRwPersistent) {
402	return persistent_arena_.ResolveAlloc(context_, allocs_[tensor_index],
403	&tensor.data.raw);
404	}
405	return kTfLiteOk;
406	}
407
408	} // namespace tflite
409

Browse the source code of tensorflow/tensorflow/lite/arena_planner.cc