1 | /* Copyright 2017 The TensorFlow Authors. All Rights Reserved. |
2 | |
3 | Licensed under the Apache License, Version 2.0 (the "License"); |
4 | you may not use this file except in compliance with the License. |
5 | You may obtain a copy of the License at |
6 | |
7 | http://www.apache.org/licenses/LICENSE-2.0 |
8 | |
9 | Unless required by applicable law or agreed to in writing, software |
10 | distributed under the License is distributed on an "AS IS" BASIS, |
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | See the License for the specific language governing permissions and |
13 | limitations under the License. |
14 | ==============================================================================*/ |
15 | #include <algorithm> |
16 | #include <memory> |
17 | #include <set> |
18 | #include <string> |
19 | #include <unordered_map> |
20 | #include <utility> |
21 | #include <vector> |
22 | |
23 | #include "tensorflow/lite/toco/allocate_transient_arrays.h" |
24 | #include "tensorflow/lite/toco/model.h" |
25 | #include "tensorflow/lite/toco/model_flags.pb.h" |
26 | #include "tensorflow/lite/toco/tooling_util.h" |
27 | #include "tensorflow/core/platform/logging.h" |
28 | |
29 | namespace toco { |
30 | namespace { |
31 | |
32 | // The life span of an array. |
33 | struct ArrayLifespan { |
34 | // If true, the array is persistent state (as in a RNN). In that case, |
35 | // its allocation is permanent and the first_op, last_op members are |
36 | // unused. (The term 'transient' is a misnomer and we should think in |
37 | // terms of 'workspace' instead). |
38 | bool persistent = false; |
39 | // Index of the first op addressing that array. The array must be allocated |
40 | // just before executing this op. |
41 | std::size_t first_op = 0; |
42 | // Index of the last op addressing that array. We want to deallocate the array |
43 | // immediately after executing this op. |
44 | std::size_t last_op = 0; |
45 | }; |
46 | |
47 | bool StartsAt(const ArrayLifespan& lifespan, std::size_t op_index) { |
48 | return !lifespan.persistent && lifespan.first_op == op_index; |
49 | } |
50 | |
51 | bool EndsAt(const ArrayLifespan& lifespan, std::size_t op_index) { |
52 | return !lifespan.persistent && lifespan.last_op == op_index; |
53 | } |
54 | |
55 | // Helper function for ComputeArrayLifespans: updates one ArrayLifespan for |
56 | // one array for one op. |
57 | void UpdateArrayLifespan( |
58 | const std::string& array_name, std::size_t op_index, |
59 | std::unordered_map<std::string, ArrayLifespan>* array_lifespans) { |
60 | if (array_lifespans->count(array_name)) { |
61 | auto& lifespan = array_lifespans->at(array_name); |
62 | if (!lifespan.persistent) { |
63 | lifespan.first_op = std::min(lifespan.first_op, op_index); |
64 | lifespan.last_op = std::max(lifespan.last_op, op_index); |
65 | } |
66 | } else { |
67 | ArrayLifespan lifespan; |
68 | lifespan.first_op = op_index; |
69 | lifespan.last_op = op_index; |
70 | (*array_lifespans)[array_name] = lifespan; |
71 | } |
72 | } |
73 | |
74 | // Computes the ArrayLifespan for each array. |
75 | void ComputeArrayLifespans( |
76 | const Model& model, |
77 | std::unordered_map<std::string, ArrayLifespan>* array_lifespans) { |
78 | CHECK(array_lifespans->empty()); |
79 | for (const auto& rnn_state : model.flags.rnn_states()) { |
80 | ArrayLifespan lifespan; |
81 | lifespan.persistent = true; |
82 | (*array_lifespans)[rnn_state.state_array()] = lifespan; |
83 | } |
84 | for (std::size_t op_index = 0; op_index < model.operators.size(); |
85 | op_index++) { |
86 | const auto& op = model.operators[op_index]; |
87 | for (const auto& input : op->inputs) { |
88 | UpdateArrayLifespan(input, op_index, array_lifespans); |
89 | } |
90 | for (const auto& output : op->outputs) { |
91 | UpdateArrayLifespan(output, op_index, array_lifespans); |
92 | } |
93 | } |
94 | } |
95 | |
96 | inline bool operator==(const Alloc& a, const Alloc& b) { |
97 | CHECK(a.start != b.start || a.end == b.end); |
98 | return a.start == b.start; |
99 | } |
100 | |
101 | // Helper to keep track of total allocation size and of currently live |
102 | // allocations, and containing the core allocation routine. |
103 | class Allocator { |
104 | public: |
105 | Allocator() : total_size_(0) {} |
106 | |
107 | // Core allocation routine. |
108 | void Allocate(std::size_t size, Alloc* result) { |
109 | if (size == 0) { |
110 | // zero-sized arrays get a dummy alloc of (0, 0) that does not |
111 | // need to be kept in the books (no need to insert that into |
112 | // live_allocs_). |
113 | // Note: zero-sized arrays shouldn't exist, but handling that case |
114 | // here allows such pathological cases to get a cleaner error message |
115 | // later instead of generating spurious allocator failures. |
116 | result->start = 0; |
117 | result->end = 0; |
118 | return; |
119 | } |
120 | // Naive algorithm: pick the first gap between live allocations, |
121 | // that is wide enough for the new array. |
122 | std::size_t pos = 0; |
123 | for (const auto& a : live_allocs_) { |
124 | if (a.start >= pos + size) { |
125 | result->start = pos; |
126 | result->end = pos + size; |
127 | live_allocs_.insert(*result); |
128 | return; |
129 | } |
130 | pos = a.end; |
131 | } |
132 | // No sufficiently wide gap was found before an existing live allocation, |
133 | // so we allocate the new array at the end of the allocation space. |
134 | // We may then have to grow total_size_. |
135 | total_size_ = std::max(total_size_, pos + size); |
136 | result->start = pos; |
137 | result->end = pos + size; |
138 | live_allocs_.insert(*result); |
139 | } |
140 | |
141 | void Deallocate(const Alloc& a) { |
142 | // Special-case dummy allocs for zero-sized arrays. |
143 | if (a.start == 0 && a.end == 0) { |
144 | // Nothing needs to be done, these aren't kept in the books. |
145 | return; |
146 | } |
147 | auto iter = live_allocs_.lower_bound(a); |
148 | CHECK(iter != live_allocs_.end()); |
149 | CHECK(*iter == a); |
150 | live_allocs_.erase(iter); |
151 | } |
152 | |
153 | std::size_t total_size() const { return total_size_; } |
154 | |
155 | private: |
156 | std::size_t total_size_; |
157 | std::set<Alloc> live_allocs_; |
158 | }; |
159 | |
160 | // Returns the required transient allocation size (in bytes) for a given array, |
161 | // or 0 if it's not a transient array. |
162 | std::size_t TransientArraySize(const Model& model, |
163 | const std::string& array_name, |
164 | std::size_t transient_data_alignment) { |
165 | if (!IsAllocatableTransientArray(model, array_name)) { |
166 | return 0; |
167 | } |
168 | const auto& array = &model.GetArray(array_name); |
169 | CHECK(array->has_shape()) |
170 | << "Array '" << array_name << "' doesn't have a shape" ; |
171 | if (array->data_type == ArrayDataType::kNone) { |
172 | // Catch a typical issue at the moment with RNN states |
173 | for (const auto& rnn_state : model.flags.rnn_states()) { |
174 | if (rnn_state.state_array() == array_name) { |
175 | LOG(FATAL) |
176 | << "A RNN state array, " << array_name << ", still does not " |
177 | << "have a known data type after all graph transformations have " |
178 | << "run." ; |
179 | } |
180 | } |
181 | LOG(FATAL) << "An array, " << array_name << ", still does not " |
182 | << "have a known data type after all graph transformations have " |
183 | << "run." ; |
184 | } |
185 | const std::size_t elem_size = ElementSize(array->data_type); |
186 | const std::size_t raw_size = |
187 | elem_size * RequiredBufferSizeForShape(array->shape()); |
188 | const std::size_t rounded_size = |
189 | RoundUpToNextMultipleOf(raw_size, transient_data_alignment); |
190 | return rounded_size; |
191 | } |
192 | |
193 | // Allocates an array: call this for every array just before the first |
194 | // op where it is used. |
195 | void AllocateTransientArray(const Model& model, const std::string& array_name, |
196 | Allocator* allocator, |
197 | std::size_t transient_data_alignment) { |
198 | if (!IsAllocatableTransientArray(model, array_name)) { |
199 | return; |
200 | } |
201 | const std::size_t size = |
202 | TransientArraySize(model, array_name, transient_data_alignment); |
203 | const auto& array = &model.GetArray(array_name); |
204 | CHECK(!array->alloc); |
205 | allocator->Allocate(size, &array->GetOrCreateAlloc()); |
206 | } |
207 | |
208 | // Deallocates an array: call this for every array just after the last |
209 | // op where it is used. |
210 | void DeallocateTransientArray(const Model& model, const std::string& array_name, |
211 | Allocator* allocator) { |
212 | if (!IsAllocatableTransientArray(model, array_name)) { |
213 | return; |
214 | } |
215 | const auto& array = &model.GetArray(array_name); |
216 | CHECK(!!array->alloc); |
217 | allocator->Deallocate(*array->alloc); |
218 | } |
219 | |
220 | void PushBackIfNotFound(const std::string& s, std::vector<std::string>* v) { |
221 | if (std::find(v->begin(), v->end(), s) == v->end()) { |
222 | v->push_back(s); |
223 | } |
224 | } |
225 | |
226 | } // namespace |
227 | |
228 | void AllocateTransientArrays(Model* model, |
229 | std::size_t transient_data_alignment) { |
230 | // Precompute the lifespans for all arrays. |
231 | std::unordered_map<std::string, ArrayLifespan> array_lifespans; |
232 | ComputeArrayLifespans(*model, &array_lifespans); |
233 | |
234 | // In case of variable batch, our convention will be to compute the |
235 | // allocations for batch==1, then let the inference code multiply all |
236 | // the offsets by the actual runtime batch size. Conveniently, |
237 | // the variable_batch and batch flags are mutually exclusive, and the default |
238 | // value of batch is 1, so we have nothing special to do here. Let us |
239 | // just guard this assumption with a CHECK: |
240 | bool batchless_input_shapes = true; |
241 | for (const auto& input_array : model->flags.input_arrays()) { |
242 | if (!input_array.has_shape() || input_array.shape().dims().empty() || |
243 | input_array.shape().dims(0) != 1) { |
244 | batchless_input_shapes = false; |
245 | break; |
246 | } |
247 | } |
248 | CHECK(!model->flags.variable_batch() || batchless_input_shapes); |
249 | |
250 | Allocator allocator; |
251 | |
252 | // Construct a sorted map of array names, so that other layout engines can |
253 | // match exactly. |
254 | std::map<std::string, const Array*> ordered_arrays_map; |
255 | for (const auto& pair : model->GetArrayMap()) { |
256 | ordered_arrays_map[pair.first] = pair.second.get(); |
257 | } |
258 | |
259 | // Allocate persistent arrays (like RNN states). For them, 'transient' |
260 | // is a misnormer, should read 'workspace'. |
261 | for (const auto& array_pair : ordered_arrays_map) { |
262 | const std::string& array_name = array_pair.first; |
263 | auto it = array_lifespans.find(array_name); |
264 | if (it != array_lifespans.end() && it->second.persistent) { |
265 | AllocateTransientArray(*model, array_name, &allocator, |
266 | transient_data_alignment); |
267 | } |
268 | } |
269 | |
270 | for (std::size_t op_index = 0; op_index < model->operators.size(); |
271 | op_index++) { |
272 | const auto& op = model->operators[op_index]; |
273 | // Allocate those arrays whose lifespan starts exactly here. |
274 | std::vector<std::string> arrays_to_allocate; |
275 | for (const auto& input : op->inputs) { |
276 | if (StartsAt(array_lifespans[input], op_index)) { |
277 | PushBackIfNotFound(input, &arrays_to_allocate); |
278 | } |
279 | } |
280 | for (const auto& output : op->outputs) { |
281 | if (StartsAt(array_lifespans[output], op_index)) { |
282 | PushBackIfNotFound(output, &arrays_to_allocate); |
283 | } |
284 | } |
285 | for (const std::string& array : arrays_to_allocate) { |
286 | AllocateTransientArray(*model, array, &allocator, |
287 | transient_data_alignment); |
288 | } |
289 | |
290 | // Deallocate those arrays whose lifespan ends exactly here. |
291 | std::vector<std::string> arrays_to_deallocate; |
292 | for (const auto& input : op->inputs) { |
293 | if (EndsAt(array_lifespans[input], op_index)) { |
294 | PushBackIfNotFound(input, &arrays_to_deallocate); |
295 | } |
296 | } |
297 | for (const auto& output : op->outputs) { |
298 | if (EndsAt(array_lifespans[output], op_index)) { |
299 | PushBackIfNotFound(output, &arrays_to_deallocate); |
300 | } |
301 | } |
302 | for (const std::string& array : arrays_to_deallocate) { |
303 | DeallocateTransientArray(*model, array, &allocator); |
304 | } |
305 | } |
306 | |
307 | // Just out of curiosity (not used in the actual allocation process) |
308 | // evaluate the optimal total allocated size. |
309 | // First, compute the size of persistent arrays. |
310 | std::size_t optimal_transient_alloc_size = 0; |
311 | std::size_t persistent_alloc_size = 0; |
312 | for (const auto& array_pair : ordered_arrays_map) { |
313 | const std::string& array_name = array_pair.first; |
314 | auto it = array_lifespans.find(array_name); |
315 | if (it != array_lifespans.end() && it->second.persistent) { |
316 | persistent_alloc_size += |
317 | TransientArraySize(*model, array_name, transient_data_alignment); |
318 | } |
319 | } |
320 | for (const auto& op : model->operators) { |
321 | // for each operator, compute the sum of the sizes of the array that must |
322 | // be live during the execution of this operator, plus the size of |
323 | // persistent arrays that must be live at all times. |
324 | std::vector<std::string> non_persistent_edges; |
325 | for (const auto& input : op->inputs) { |
326 | if (!array_lifespans[input].persistent) { |
327 | PushBackIfNotFound(input, &non_persistent_edges); |
328 | } |
329 | } |
330 | for (const auto& output : op->outputs) { |
331 | if (!array_lifespans[output].persistent) { |
332 | PushBackIfNotFound(output, &non_persistent_edges); |
333 | } |
334 | } |
335 | std::size_t size = persistent_alloc_size; |
336 | for (const std::string& edge : non_persistent_edges) { |
337 | size += TransientArraySize(*model, edge, transient_data_alignment); |
338 | } |
339 | // The optimal total size is the maximum of all operator-specific sizes. |
340 | optimal_transient_alloc_size = std::max(optimal_transient_alloc_size, size); |
341 | } |
342 | |
343 | model->transient_data_size = allocator.total_size(); |
344 | model->transient_data_alignment = transient_data_alignment; |
345 | CHECK_GE(model->transient_data_size, optimal_transient_alloc_size); |
346 | LOG(INFO) << "Total transient array allocated size: " |
347 | << model->transient_data_size << " bytes, " |
348 | << "theoretical optimal value: " << optimal_transient_alloc_size |
349 | << " bytes." ; |
350 | CheckInvariants(*model); |
351 | } |
352 | } // namespace toco |
353 | |