allocate_transient_arrays.cc source code [tensorflow/tensorflow/lite/toco/allocate_transient_arrays.cc]

1	/ Copyright 2017 The TensorFlow Authors. All Rights Reserved.*
2
3	Licensed under the Apache License, Version 2.0 (the "License");
4	you may not use this file except in compliance with the License.
5	You may obtain a copy of the License at
6
7	http://www.apache.org/licenses/LICENSE-2.0
8
9	Unless required by applicable law or agreed to in writing, software
10	distributed under the License is distributed on an "AS IS" BASIS,
11	WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12	See the License for the specific language governing permissions and
13	limitations under the License.
14	==============================================================================/*
15	#include <algorithm>
16	#include <memory>
17	#include <set>
18	#include <string>
19	#include <unordered_map>
20	#include <utility>
21	#include <vector>
22
23	#include "tensorflow/lite/toco/allocate_transient_arrays.h"
24	#include "tensorflow/lite/toco/model.h"
25	#include "tensorflow/lite/toco/model_flags.pb.h"
26	#include "tensorflow/lite/toco/tooling_util.h"
27	#include "tensorflow/core/platform/logging.h"
28
29	namespace toco {
30	namespace {
31
32	// The life span of an array.
33	struct ArrayLifespan {
34	// If true, the array is persistent state (as in a RNN). In that case,
35	// its allocation is permanent and the first_op, last_op members are
36	// unused. (The term 'transient' is a misnomer and we should think in
37	// terms of 'workspace' instead).
38	bool persistent = false;
39	// Index of the first op addressing that array. The array must be allocated
40	// just before executing this op.
41	std::size_t first_op = `0`;
42	// Index of the last op addressing that array. We want to deallocate the array
43	// immediately after executing this op.
44	std::size_t last_op = `0`;
45	};
46
47	bool StartsAt(const ArrayLifespan& lifespan, std::size_t op_index) {
48	return !lifespan.persistent && lifespan.first_op == op_index;
49	}
50
51	bool EndsAt(const ArrayLifespan& lifespan, std::size_t op_index) {
52	return !lifespan.persistent && lifespan.last_op == op_index;
53	}
54
55	// Helper function for ComputeArrayLifespans: updates one ArrayLifespan for
56	// one array for one op.
57	void UpdateArrayLifespan(
58	const std::string& array_name, std::size_t op_index,
59	std::unordered_map<std::string, ArrayLifespan>* array_lifespans) {
60	if (array_lifespans->count(array_name)) {
61	auto& lifespan = array_lifespans->at(array_name);
62	if (!lifespan.persistent) {
63	lifespan.first_op = std::min(lifespan.first_op, op_index);
64	lifespan.last_op = std::max(lifespan.last_op, op_index);
65	}
66	} else {
67	ArrayLifespan lifespan;
68	lifespan.first_op = op_index;
69	lifespan.last_op = op_index;
70	(*array_lifespans)[array_name] = lifespan;
71	}
72	}
73
74	// Computes the ArrayLifespan for each array.
75	void ComputeArrayLifespans(
76	const Model& model,
77	std::unordered_map<std::string, ArrayLifespan>* array_lifespans) {
78	CHECK(array_lifespans->empty());
79	for (const auto& rnn_state : model.flags.rnn_states()) {
80	ArrayLifespan lifespan;
81	lifespan.persistent = true;
82	(*array_lifespans)[rnn_state.state_array()] = lifespan;
83	}
84	for (std::size_t op_index = `0`; op_index < model.operators.size();
85	op_index++) {
86	const auto& op = model.operators [op_index];
87	for (const auto& input : op ->inputs) {
88	UpdateArrayLifespan(input, op_index, array_lifespans);
89	}
90	for (const auto& output : op ->outputs) {
91	UpdateArrayLifespan(output, op_index, array_lifespans);
92	}
93	}
94	}
95
96	inline bool operator==(const Alloc& a, const Alloc& b) {
97	CHECK(a.start != b.start \|\| a.end == b.end);
98	return a.start == b.start;
99	}
100
101	// Helper to keep track of total allocation size and of currently live
102	// allocations, and containing the core allocation routine.
103	class Allocator {
104	public:
105	Allocator() : total_size_(`0`) {}
106
107	// Core allocation routine.
108	void Allocate(std::size_t size, Alloc* result) {
109	if (size == `0`) {
110	// zero-sized arrays get a dummy alloc of (0, 0) that does not
111	// need to be kept in the books (no need to insert that into
112	// live_allocs_).
113	// Note: zero-sized arrays shouldn't exist, but handling that case
114	// here allows such pathological cases to get a cleaner error message
115	// later instead of generating spurious allocator failures.
116	result->start = `0`;
117	result->end = `0`;
118	return;
119	}
120	// Naive algorithm: pick the first gap between live allocations,
121	// that is wide enough for the new array.
122	std::size_t pos = `0`;
123	for (const auto& a : live_allocs_) {
124	if (a.start >= pos + size) {
125	result->start = pos;
126	result->end = pos + size;
127	live_allocs_.insert(*result);
128	return;
129	}
130	pos = a.end;
131	}
132	// No sufficiently wide gap was found before an existing live allocation,
133	// so we allocate the new array at the end of the allocation space.
134	// We may then have to grow total_size_.
135	total_size_ = std::max(total_size_, pos + size);
136	result->start = pos;
137	result->end = pos + size;
138	live_allocs_.insert(*result);
139	}
140
141	void Deallocate(const Alloc& a) {
142	// Special-case dummy allocs for zero-sized arrays.
143	if (a.start == `0` && a.end == `0`) {
144	// Nothing needs to be done, these aren't kept in the books.
145	return;
146	}
147	auto iter = live_allocs_.lower_bound(a);
148	CHECK(iter != live_allocs_.end());
149	CHECK(*iter == a);
150	live_allocs_.erase(iter);
151	}
152
153	std::size_t total_size() const { return total_size_; }
154
155	private:
156	std::size_t total_size_;
157	std::set<Alloc> live_allocs_;
158	};
159
160	// Returns the required transient allocation size (in bytes) for a given array,
161	// or 0 if it's not a transient array.
162	std::size_t TransientArraySize(const Model& model,
163	const std::string& array_name,
164	std::size_t transient_data_alignment) {
165	if (!IsAllocatableTransientArray(model, array_name)) {
166	return `0`;
167	}
168	const auto& array = &model.GetArray(array_name);
169	CHECK(array->has_shape())
170	<< "Array '" << array_name << "' doesn't have a shape";
171	if (array->data_type == ArrayDataType::kNone) {
172	// Catch a typical issue at the moment with RNN states
173	for (const auto& rnn_state : model.flags.rnn_states()) {
174	if (rnn_state.state_array() == array_name) {
175	LOG(FATAL)
176	<< "A RNN state array, " << array_name << ", still does not "
177	<< "have a known data type after all graph transformations have "
178	<< "run.";
179	}
180	}
181	LOG(FATAL) << "An array, " << array_name << ", still does not "
182	<< "have a known data type after all graph transformations have "
183	<< "run.";
184	}
185	const std::size_t elem_size = ElementSize(array->data_type);
186	const std::size_t raw_size =
187	elem_size * RequiredBufferSizeForShape(array->shape());
188	const std::size_t rounded_size =
189	RoundUpToNextMultipleOf(raw_size, transient_data_alignment);
190	return rounded_size;
191	}
192
193	// Allocates an array: call this for every array just before the first
194	// op where it is used.
195	void AllocateTransientArray(const Model& model, const std::string& array_name,
196	Allocator* allocator,
197	std::size_t transient_data_alignment) {
198	if (!IsAllocatableTransientArray(model, array_name)) {
199	return;
200	}
201	const std::size_t size =
202	TransientArraySize(model, array_name, transient_data_alignment);
203	const auto& array = &model.GetArray(array_name);
204	CHECK(!array->alloc);
205	allocator->Allocate(size, &array->GetOrCreateAlloc());
206	}
207
208	// Deallocates an array: call this for every array just after the last
209	// op where it is used.
210	void DeallocateTransientArray(const Model& model, const std::string& array_name,
211	Allocator* allocator) {
212	if (!IsAllocatableTransientArray(model, array_name)) {
213	return;
214	}
215	const auto& array = &model.GetArray(array_name);
216	CHECK(!!array->alloc);
217	allocator->Deallocate(*array->alloc);
218	}
219
220	void PushBackIfNotFound(const std::string& s, std::vector<std::string>* v) {
221	if (std::find(v->begin(), v->end(), s) == v->end()) {
222	v->push_back(s);
223	}
224	}
225
226	} // namespace
227
228	void AllocateTransientArrays(Model* model,
229	std::size_t transient_data_alignment) {
230	// Precompute the lifespans for all arrays.
231	std::unordered_map<std::string, ArrayLifespan> array_lifespans;
232	ComputeArrayLifespans(*model, &array_lifespans);
233
234	// In case of variable batch, our convention will be to compute the
235	// allocations for batch==1, then let the inference code multiply all
236	// the offsets by the actual runtime batch size. Conveniently,
237	// the variable_batch and batch flags are mutually exclusive, and the default
238	// value of batch is 1, so we have nothing special to do here. Let us
239	// just guard this assumption with a CHECK:
240	bool batchless_input_shapes = true;
241	for (const auto& input_array : model->flags.input_arrays()) {
242	if (!input_array.has_shape() \|\| input_array.shape().dims().empty() \|\|
243	input_array.shape().dims(`0`) != `1`) {
244	batchless_input_shapes = false;
245	break;
246	}
247	}
248	CHECK(!model->flags.variable_batch() \|\| batchless_input_shapes);
249
250	Allocator allocator;
251
252	// Construct a sorted map of array names, so that other layout engines can
253	// match exactly.
254	std::map<std::string, const Array*> ordered_arrays_map;
255	for (const auto& pair : model->GetArrayMap()) {
256	ordered_arrays_map [pair.first] = pair.second.get();
257	}
258
259	// Allocate persistent arrays (like RNN states). For them, 'transient'
260	// is a misnormer, should read 'workspace'.
261	for (const auto& array_pair : ordered_arrays_map) {
262	const std::string& array_name = array_pair.first;
263	auto it = array_lifespans.find(array_name);
264	if (it != array_lifespans.end() && it ->second.persistent) {
265	AllocateTransientArray(*model, array_name, &allocator,
266	transient_data_alignment);
267	}
268	}
269
270	for (std::size_t op_index = `0`; op_index < model->operators.size();
271	op_index++) {
272	const auto& op = model->operators [op_index];
273	// Allocate those arrays whose lifespan starts exactly here.
274	std::vector<std::string> arrays_to_allocate;
275	for (const auto& input : op ->inputs) {
276	if (StartsAt(array_lifespans [input], op_index)) {
277	PushBackIfNotFound(input, &arrays_to_allocate);
278	}
279	}
280	for (const auto& output : op ->outputs) {
281	if (StartsAt(array_lifespans [output], op_index)) {
282	PushBackIfNotFound(output, &arrays_to_allocate);
283	}
284	}
285	for (const std::string& array : arrays_to_allocate) {
286	AllocateTransientArray(*model, array, &allocator,
287	transient_data_alignment);
288	}
289
290	// Deallocate those arrays whose lifespan ends exactly here.
291	std::vector<std::string> arrays_to_deallocate;
292	for (const auto& input : op ->inputs) {
293	if (EndsAt(array_lifespans [input], op_index)) {
294	PushBackIfNotFound(input, &arrays_to_deallocate);
295	}
296	}
297	for (const auto& output : op ->outputs) {
298	if (EndsAt(array_lifespans [output], op_index)) {
299	PushBackIfNotFound(output, &arrays_to_deallocate);
300	}
301	}
302	for (const std::string& array : arrays_to_deallocate) {
303	DeallocateTransientArray(*model, array, &allocator);
304	}
305	}
306
307	// Just out of curiosity (not used in the actual allocation process)
308	// evaluate the optimal total allocated size.
309	// First, compute the size of persistent arrays.
310	std::size_t optimal_transient_alloc_size = `0`;
311	std::size_t persistent_alloc_size = `0`;
312	for (const auto& array_pair : ordered_arrays_map) {
313	const std::string& array_name = array_pair.first;
314	auto it = array_lifespans.find(array_name);
315	if (it != array_lifespans.end() && it ->second.persistent) {
316	persistent_alloc_size +=
317	TransientArraySize(*model, array_name, transient_data_alignment);
318	}
319	}
320	for (const auto& op : model->operators) {
321	// for each operator, compute the sum of the sizes of the array that must
322	// be live during the execution of this operator, plus the size of
323	// persistent arrays that must be live at all times.
324	std::vector<std::string> non_persistent_edges;
325	for (const auto& input : op ->inputs) {
326	if (!array_lifespans [input].persistent) {
327	PushBackIfNotFound(input, &non_persistent_edges);
328	}
329	}
330	for (const auto& output : op ->outputs) {
331	if (!array_lifespans [output].persistent) {
332	PushBackIfNotFound(output, &non_persistent_edges);
333	}
334	}
335	std::size_t size = persistent_alloc_size;
336	for (const std::string& edge : non_persistent_edges) {
337	size += TransientArraySize(*model, edge, transient_data_alignment);
338	}
339	// The optimal total size is the maximum of all operator-specific sizes.
340	optimal_transient_alloc_size = std::max(optimal_transient_alloc_size, size);
341	}
342
343	model->transient_data_size = allocator.total_size();
344	model->transient_data_alignment = transient_data_alignment;
345	CHECK_GE(model->transient_data_size, optimal_transient_alloc_size);
346	LOG(INFO) << "Total transient array allocated size: "
347	<< model->transient_data_size << " bytes, "
348	<< "theoretical optimal value: " << optimal_transient_alloc_size
349	<< " bytes.";
350	CheckInvariants(*model);
351	}
352	} // namespace toco
353

Browse the source code of tensorflow/tensorflow/lite/toco/allocate_transient_arrays.cc