1/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2
3Licensed under the Apache License, Version 2.0 (the "License");
4you may not use this file except in compliance with the License.
5You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9Unless required by applicable law or agreed to in writing, software
10distributed under the License is distributed on an "AS IS" BASIS,
11WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12See the License for the specific language governing permissions and
13limitations under the License.
14==============================================================================*/
15#include <algorithm>
16#include <memory>
17#include <set>
18#include <string>
19#include <unordered_map>
20#include <utility>
21#include <vector>
22
23#include "tensorflow/lite/toco/allocate_transient_arrays.h"
24#include "tensorflow/lite/toco/model.h"
25#include "tensorflow/lite/toco/model_flags.pb.h"
26#include "tensorflow/lite/toco/tooling_util.h"
27#include "tensorflow/core/platform/logging.h"
28
29namespace toco {
30namespace {
31
32// The life span of an array.
33struct ArrayLifespan {
34 // If true, the array is persistent state (as in a RNN). In that case,
35 // its allocation is permanent and the first_op, last_op members are
36 // unused. (The term 'transient' is a misnomer and we should think in
37 // terms of 'workspace' instead).
38 bool persistent = false;
39 // Index of the first op addressing that array. The array must be allocated
40 // just before executing this op.
41 std::size_t first_op = 0;
42 // Index of the last op addressing that array. We want to deallocate the array
43 // immediately after executing this op.
44 std::size_t last_op = 0;
45};
46
47bool StartsAt(const ArrayLifespan& lifespan, std::size_t op_index) {
48 return !lifespan.persistent && lifespan.first_op == op_index;
49}
50
51bool EndsAt(const ArrayLifespan& lifespan, std::size_t op_index) {
52 return !lifespan.persistent && lifespan.last_op == op_index;
53}
54
55// Helper function for ComputeArrayLifespans: updates one ArrayLifespan for
56// one array for one op.
57void UpdateArrayLifespan(
58 const std::string& array_name, std::size_t op_index,
59 std::unordered_map<std::string, ArrayLifespan>* array_lifespans) {
60 if (array_lifespans->count(array_name)) {
61 auto& lifespan = array_lifespans->at(array_name);
62 if (!lifespan.persistent) {
63 lifespan.first_op = std::min(lifespan.first_op, op_index);
64 lifespan.last_op = std::max(lifespan.last_op, op_index);
65 }
66 } else {
67 ArrayLifespan lifespan;
68 lifespan.first_op = op_index;
69 lifespan.last_op = op_index;
70 (*array_lifespans)[array_name] = lifespan;
71 }
72}
73
74// Computes the ArrayLifespan for each array.
75void ComputeArrayLifespans(
76 const Model& model,
77 std::unordered_map<std::string, ArrayLifespan>* array_lifespans) {
78 CHECK(array_lifespans->empty());
79 for (const auto& rnn_state : model.flags.rnn_states()) {
80 ArrayLifespan lifespan;
81 lifespan.persistent = true;
82 (*array_lifespans)[rnn_state.state_array()] = lifespan;
83 }
84 for (std::size_t op_index = 0; op_index < model.operators.size();
85 op_index++) {
86 const auto& op = model.operators[op_index];
87 for (const auto& input : op->inputs) {
88 UpdateArrayLifespan(input, op_index, array_lifespans);
89 }
90 for (const auto& output : op->outputs) {
91 UpdateArrayLifespan(output, op_index, array_lifespans);
92 }
93 }
94}
95
96inline bool operator==(const Alloc& a, const Alloc& b) {
97 CHECK(a.start != b.start || a.end == b.end);
98 return a.start == b.start;
99}
100
101// Helper to keep track of total allocation size and of currently live
102// allocations, and containing the core allocation routine.
103class Allocator {
104 public:
105 Allocator() : total_size_(0) {}
106
107 // Core allocation routine.
108 void Allocate(std::size_t size, Alloc* result) {
109 if (size == 0) {
110 // zero-sized arrays get a dummy alloc of (0, 0) that does not
111 // need to be kept in the books (no need to insert that into
112 // live_allocs_).
113 // Note: zero-sized arrays shouldn't exist, but handling that case
114 // here allows such pathological cases to get a cleaner error message
115 // later instead of generating spurious allocator failures.
116 result->start = 0;
117 result->end = 0;
118 return;
119 }
120 // Naive algorithm: pick the first gap between live allocations,
121 // that is wide enough for the new array.
122 std::size_t pos = 0;
123 for (const auto& a : live_allocs_) {
124 if (a.start >= pos + size) {
125 result->start = pos;
126 result->end = pos + size;
127 live_allocs_.insert(*result);
128 return;
129 }
130 pos = a.end;
131 }
132 // No sufficiently wide gap was found before an existing live allocation,
133 // so we allocate the new array at the end of the allocation space.
134 // We may then have to grow total_size_.
135 total_size_ = std::max(total_size_, pos + size);
136 result->start = pos;
137 result->end = pos + size;
138 live_allocs_.insert(*result);
139 }
140
141 void Deallocate(const Alloc& a) {
142 // Special-case dummy allocs for zero-sized arrays.
143 if (a.start == 0 && a.end == 0) {
144 // Nothing needs to be done, these aren't kept in the books.
145 return;
146 }
147 auto iter = live_allocs_.lower_bound(a);
148 CHECK(iter != live_allocs_.end());
149 CHECK(*iter == a);
150 live_allocs_.erase(iter);
151 }
152
153 std::size_t total_size() const { return total_size_; }
154
155 private:
156 std::size_t total_size_;
157 std::set<Alloc> live_allocs_;
158};
159
160// Returns the required transient allocation size (in bytes) for a given array,
161// or 0 if it's not a transient array.
162std::size_t TransientArraySize(const Model& model,
163 const std::string& array_name,
164 std::size_t transient_data_alignment) {
165 if (!IsAllocatableTransientArray(model, array_name)) {
166 return 0;
167 }
168 const auto& array = &model.GetArray(array_name);
169 CHECK(array->has_shape())
170 << "Array '" << array_name << "' doesn't have a shape";
171 if (array->data_type == ArrayDataType::kNone) {
172 // Catch a typical issue at the moment with RNN states
173 for (const auto& rnn_state : model.flags.rnn_states()) {
174 if (rnn_state.state_array() == array_name) {
175 LOG(FATAL)
176 << "A RNN state array, " << array_name << ", still does not "
177 << "have a known data type after all graph transformations have "
178 << "run.";
179 }
180 }
181 LOG(FATAL) << "An array, " << array_name << ", still does not "
182 << "have a known data type after all graph transformations have "
183 << "run.";
184 }
185 const std::size_t elem_size = ElementSize(array->data_type);
186 const std::size_t raw_size =
187 elem_size * RequiredBufferSizeForShape(array->shape());
188 const std::size_t rounded_size =
189 RoundUpToNextMultipleOf(raw_size, transient_data_alignment);
190 return rounded_size;
191}
192
193// Allocates an array: call this for every array just before the first
194// op where it is used.
195void AllocateTransientArray(const Model& model, const std::string& array_name,
196 Allocator* allocator,
197 std::size_t transient_data_alignment) {
198 if (!IsAllocatableTransientArray(model, array_name)) {
199 return;
200 }
201 const std::size_t size =
202 TransientArraySize(model, array_name, transient_data_alignment);
203 const auto& array = &model.GetArray(array_name);
204 CHECK(!array->alloc);
205 allocator->Allocate(size, &array->GetOrCreateAlloc());
206}
207
208// Deallocates an array: call this for every array just after the last
209// op where it is used.
210void DeallocateTransientArray(const Model& model, const std::string& array_name,
211 Allocator* allocator) {
212 if (!IsAllocatableTransientArray(model, array_name)) {
213 return;
214 }
215 const auto& array = &model.GetArray(array_name);
216 CHECK(!!array->alloc);
217 allocator->Deallocate(*array->alloc);
218}
219
220void PushBackIfNotFound(const std::string& s, std::vector<std::string>* v) {
221 if (std::find(v->begin(), v->end(), s) == v->end()) {
222 v->push_back(s);
223 }
224}
225
226} // namespace
227
228void AllocateTransientArrays(Model* model,
229 std::size_t transient_data_alignment) {
230 // Precompute the lifespans for all arrays.
231 std::unordered_map<std::string, ArrayLifespan> array_lifespans;
232 ComputeArrayLifespans(*model, &array_lifespans);
233
234 // In case of variable batch, our convention will be to compute the
235 // allocations for batch==1, then let the inference code multiply all
236 // the offsets by the actual runtime batch size. Conveniently,
237 // the variable_batch and batch flags are mutually exclusive, and the default
238 // value of batch is 1, so we have nothing special to do here. Let us
239 // just guard this assumption with a CHECK:
240 bool batchless_input_shapes = true;
241 for (const auto& input_array : model->flags.input_arrays()) {
242 if (!input_array.has_shape() || input_array.shape().dims().empty() ||
243 input_array.shape().dims(0) != 1) {
244 batchless_input_shapes = false;
245 break;
246 }
247 }
248 CHECK(!model->flags.variable_batch() || batchless_input_shapes);
249
250 Allocator allocator;
251
252 // Construct a sorted map of array names, so that other layout engines can
253 // match exactly.
254 std::map<std::string, const Array*> ordered_arrays_map;
255 for (const auto& pair : model->GetArrayMap()) {
256 ordered_arrays_map[pair.first] = pair.second.get();
257 }
258
259 // Allocate persistent arrays (like RNN states). For them, 'transient'
260 // is a misnormer, should read 'workspace'.
261 for (const auto& array_pair : ordered_arrays_map) {
262 const std::string& array_name = array_pair.first;
263 auto it = array_lifespans.find(array_name);
264 if (it != array_lifespans.end() && it->second.persistent) {
265 AllocateTransientArray(*model, array_name, &allocator,
266 transient_data_alignment);
267 }
268 }
269
270 for (std::size_t op_index = 0; op_index < model->operators.size();
271 op_index++) {
272 const auto& op = model->operators[op_index];
273 // Allocate those arrays whose lifespan starts exactly here.
274 std::vector<std::string> arrays_to_allocate;
275 for (const auto& input : op->inputs) {
276 if (StartsAt(array_lifespans[input], op_index)) {
277 PushBackIfNotFound(input, &arrays_to_allocate);
278 }
279 }
280 for (const auto& output : op->outputs) {
281 if (StartsAt(array_lifespans[output], op_index)) {
282 PushBackIfNotFound(output, &arrays_to_allocate);
283 }
284 }
285 for (const std::string& array : arrays_to_allocate) {
286 AllocateTransientArray(*model, array, &allocator,
287 transient_data_alignment);
288 }
289
290 // Deallocate those arrays whose lifespan ends exactly here.
291 std::vector<std::string> arrays_to_deallocate;
292 for (const auto& input : op->inputs) {
293 if (EndsAt(array_lifespans[input], op_index)) {
294 PushBackIfNotFound(input, &arrays_to_deallocate);
295 }
296 }
297 for (const auto& output : op->outputs) {
298 if (EndsAt(array_lifespans[output], op_index)) {
299 PushBackIfNotFound(output, &arrays_to_deallocate);
300 }
301 }
302 for (const std::string& array : arrays_to_deallocate) {
303 DeallocateTransientArray(*model, array, &allocator);
304 }
305 }
306
307 // Just out of curiosity (not used in the actual allocation process)
308 // evaluate the optimal total allocated size.
309 // First, compute the size of persistent arrays.
310 std::size_t optimal_transient_alloc_size = 0;
311 std::size_t persistent_alloc_size = 0;
312 for (const auto& array_pair : ordered_arrays_map) {
313 const std::string& array_name = array_pair.first;
314 auto it = array_lifespans.find(array_name);
315 if (it != array_lifespans.end() && it->second.persistent) {
316 persistent_alloc_size +=
317 TransientArraySize(*model, array_name, transient_data_alignment);
318 }
319 }
320 for (const auto& op : model->operators) {
321 // for each operator, compute the sum of the sizes of the array that must
322 // be live during the execution of this operator, plus the size of
323 // persistent arrays that must be live at all times.
324 std::vector<std::string> non_persistent_edges;
325 for (const auto& input : op->inputs) {
326 if (!array_lifespans[input].persistent) {
327 PushBackIfNotFound(input, &non_persistent_edges);
328 }
329 }
330 for (const auto& output : op->outputs) {
331 if (!array_lifespans[output].persistent) {
332 PushBackIfNotFound(output, &non_persistent_edges);
333 }
334 }
335 std::size_t size = persistent_alloc_size;
336 for (const std::string& edge : non_persistent_edges) {
337 size += TransientArraySize(*model, edge, transient_data_alignment);
338 }
339 // The optimal total size is the maximum of all operator-specific sizes.
340 optimal_transient_alloc_size = std::max(optimal_transient_alloc_size, size);
341 }
342
343 model->transient_data_size = allocator.total_size();
344 model->transient_data_alignment = transient_data_alignment;
345 CHECK_GE(model->transient_data_size, optimal_transient_alloc_size);
346 LOG(INFO) << "Total transient array allocated size: "
347 << model->transient_data_size << " bytes, "
348 << "theoretical optimal value: " << optimal_transient_alloc_size
349 << " bytes.";
350 CheckInvariants(*model);
351}
352} // namespace toco
353