1 | /* Copyright 2017 The TensorFlow Authors. All Rights Reserved. |
2 | |
3 | Licensed under the Apache License, Version 2.0 (the "License"); |
4 | you may not use this file except in compliance with the License. |
5 | You may obtain a copy of the License at |
6 | |
7 | http://www.apache.org/licenses/LICENSE-2.0 |
8 | |
9 | Unless required by applicable law or agreed to in writing, software |
10 | distributed under the License is distributed on an "AS IS" BASIS, |
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | See the License for the specific language governing permissions and |
13 | limitations under the License. |
14 | ==============================================================================*/ |
15 | #include "tensorflow/lite/optional_debug_tools.h" |
16 | |
17 | #include <cassert> |
18 | #include <cinttypes> |
19 | #include <cstddef> |
20 | #include <cstdio> |
21 | #include <functional> |
22 | #include <limits> |
23 | #include <set> |
24 | #include <string> |
25 | #include <utility> |
26 | #include <vector> |
27 | |
28 | #include "tensorflow/lite/context_util.h" |
29 | #include "tensorflow/lite/core/subgraph.h" |
30 | #include "tensorflow/lite/interpreter.h" |
31 | #include "tensorflow/lite/schema/schema_generated.h" |
32 | |
33 | namespace tflite { |
34 | |
35 | namespace { |
36 | // Just forward declarations. |
37 | const char* AllocTypeName(TfLiteAllocationType type); |
38 | |
39 | void PrintIntVector(const std::vector<int>& v, |
40 | bool collapse_consecutives = true, |
41 | bool add_newline = false); |
42 | |
43 | // A class to represent the information of a memory arena that's used in TfLite |
44 | // runtime for holding allocated memory of tensors. The information includes |
45 | // the following: |
46 | // 1. The memory allocation type. |
47 | // 2. The tensor id of the tensor that has the most amount of memory allocated, |
48 | // and the memory size. |
49 | // 3. The estimated memory boundary and size of the arena. |
50 | class MemoryArenaInfo { |
51 | public: |
52 | explicit MemoryArenaInfo(TfLiteAllocationType type) |
53 | : allocation_type_(type) {} |
54 | |
55 | void Update(size_t tensor_index, const TfLiteTensor& tensor) { |
56 | if (tensor.allocation_type != allocation_type_) return; |
57 | if (tensor.data.data == nullptr) return; |
58 | if (tensor.bytes > max_tensor_mem_bytes_) { |
59 | max_tensor_mem_bytes_ = tensor.bytes; |
60 | max_tensor_id_ = tensor_index; |
61 | } |
62 | |
63 | size_t current_start_addr = reinterpret_cast<size_t>(tensor.data.data); |
64 | |
65 | size_t current_end_addr = current_start_addr + tensor.bytes; |
66 | if (current_start_addr < min_tensor_start_addr_) { |
67 | min_tensor_start_addr_ = current_start_addr; |
68 | } |
69 | if (current_end_addr > max_tensor_end_addr_) { |
70 | max_tensor_end_addr_ = current_end_addr; |
71 | } |
72 | |
73 | TensorAllocInfo info; |
74 | info.tensor_id = tensor_index; |
75 | info.start_addr = current_start_addr; |
76 | info.bytes = tensor.bytes; |
77 | const auto result = alloc_info_.insert(info); |
78 | // Simply check that the insertion succeeds. |
79 | assert(result.second); |
80 | (void)result; // suppress the "unused variable" compilation error. |
81 | } |
82 | |
83 | size_t GetArenaStartingAddress() const { return min_tensor_start_addr_; } |
84 | |
85 | void Print() const { |
86 | printf("%s Info: " , AllocTypeName(allocation_type_)); |
87 | if (max_tensor_end_addr_ == 0) { |
88 | printf("not holding any allocation.\n" ); |
89 | return; |
90 | } |
91 | printf("\nTensor %zu has the max size %zu bytes (%.3f MB).\n" , |
92 | max_tensor_id_, max_tensor_mem_bytes_, |
93 | static_cast<float>(max_tensor_mem_bytes_) / (1 << 20)); |
94 | const size_t arena_size = max_tensor_end_addr_ - min_tensor_start_addr_; |
95 | printf( |
96 | "This memory arena is estimated as[0x%zx, 0x%zx), taking %zu bytes " |
97 | "(%.3f MB).\n" , |
98 | max_tensor_end_addr_, min_tensor_start_addr_, arena_size, |
99 | static_cast<float>(arena_size) / (1 << 20)); |
100 | |
101 | std::vector<const TensorAllocInfo*> arena_increase_trace; |
102 | size_t last_end_addr = 0; |
103 | for (const auto& info : alloc_info_) { |
104 | if (info.start_addr >= last_end_addr) { |
105 | arena_increase_trace.emplace_back(&info); |
106 | last_end_addr = info.start_addr + info.bytes; |
107 | } |
108 | } |
109 | printf( |
110 | "One possible set of tensors that have non-overlapping memory spaces " |
111 | "with each other, and they take up the whole arena:\n" ); |
112 | printf("Tensor " ); |
113 | for (int i = 0; i < arena_increase_trace.size() - 1; ++i) { |
114 | printf("%zu -> " , arena_increase_trace[i]->tensor_id); |
115 | } |
116 | printf("%zu.\n" , arena_increase_trace.back()->tensor_id); |
117 | } |
118 | |
119 | private: |
120 | struct TensorAllocInfo { |
121 | size_t tensor_id; |
122 | size_t start_addr; |
123 | size_t bytes; |
124 | }; |
125 | |
126 | // Compare first according to 'start_addr' in increasing order, then secondly |
127 | // according to 'bytes' in decreasing order and finally according to |
128 | // 'tensor_id' in increasing order. |
129 | struct TensorAllocInfoCompare { |
130 | bool operator()(const TensorAllocInfo& lhs, |
131 | const TensorAllocInfo& rhs) const { |
132 | if (lhs.start_addr < rhs.start_addr) return true; |
133 | if (lhs.start_addr == rhs.start_addr) { |
134 | if (lhs.bytes > rhs.bytes) return true; |
135 | if (lhs.bytes == rhs.bytes) return lhs.tensor_id < rhs.tensor_id; |
136 | return false; |
137 | } |
138 | return false; |
139 | } |
140 | }; |
141 | |
142 | const TfLiteAllocationType allocation_type_; |
143 | size_t max_tensor_mem_bytes_ = 0; |
144 | // the index of the tensor that has the max memory size. |
145 | size_t max_tensor_id_ = -1; |
146 | size_t min_tensor_start_addr_ = std::numeric_limits<size_t>::max(); |
147 | size_t max_tensor_end_addr_ = 0; |
148 | std::set<TensorAllocInfo, TensorAllocInfoCompare> alloc_info_; |
149 | }; |
150 | |
151 | class DynamicMemoryInfo { |
152 | public: |
153 | void Update(size_t tensor_index, const TfLiteTensor& tensor) { |
154 | if (tensor.allocation_type != kTfLiteDynamic) return; |
155 | if (tensor.data.data == nullptr) return; |
156 | if (tensor.bytes > max_tensor_mem_bytes_) { |
157 | max_tensor_mem_bytes_ = tensor.bytes; |
158 | max_tensor_ids_.clear(); |
159 | max_tensor_ids_.push_back(tensor_index); |
160 | } else if (tensor.bytes == max_tensor_mem_bytes_) { |
161 | max_tensor_ids_.push_back(static_cast<int>(tensor_index)); |
162 | } |
163 | total_mem_bytes_ += tensor.bytes; |
164 | num_total_tensors_++; |
165 | } |
166 | |
167 | void Print() const { |
168 | printf("kTfLiteDynamic Info: " ); |
169 | if (total_mem_bytes_ == 0) { |
170 | printf("not holding any allocation.\n" ); |
171 | return; |
172 | } |
173 | printf("\n%zu Tensors " , max_tensor_ids_.size()); |
174 | PrintIntVector(max_tensor_ids_, /*collapse_consecutives*/ false); |
175 | printf(" have the max size %zu bytes (%.3f MB).\n" , max_tensor_mem_bytes_, |
176 | static_cast<float>(max_tensor_mem_bytes_) / (1 << 20)); |
177 | printf("There are %d dynamic tensors, taking %zu bytes (%.3f MB).\n" , |
178 | num_total_tensors_, total_mem_bytes_, |
179 | static_cast<float>(total_mem_bytes_) / (1 << 20)); |
180 | } |
181 | |
182 | private: |
183 | size_t max_tensor_mem_bytes_ = 0; |
184 | // the index list of the tensor that has the max memory size. |
185 | std::vector<int> max_tensor_ids_; |
186 | size_t total_mem_bytes_ = 0; |
187 | int num_total_tensors_ = 0; |
188 | }; |
189 | |
190 | class ModelTensorMemoryInfo { |
191 | public: |
192 | ModelTensorMemoryInfo() |
193 | : rw_info_(kTfLiteArenaRw), |
194 | rw_persistent_info_(kTfLiteArenaRwPersistent), |
195 | mmap_info_(kTfLiteMmapRo) {} |
196 | |
197 | void Update(size_t tensor_index, const TfLiteTensor& tensor) { |
198 | rw_info_.Update(tensor_index, tensor); |
199 | rw_persistent_info_.Update(tensor_index, tensor); |
200 | mmap_info_.Update(tensor_index, tensor); |
201 | dynamic_info_.Update(tensor_index, tensor); |
202 | } |
203 | |
204 | // Get the offset from the beginning address of the memory arena for 'tensor'. |
205 | // Returns -1 if not applicable. Otherwise, returns a non-negative value. |
206 | int64_t GetOffsetFromArenaStart(const TfLiteTensor& tensor) const { |
207 | if (tensor.data.data == nullptr) return -1; |
208 | size_t tensor_address = reinterpret_cast<size_t>(tensor.data.data); |
209 | if (tensor.allocation_type == kTfLiteArenaRw) { |
210 | return static_cast<int64_t>(tensor_address - |
211 | rw_info_.GetArenaStartingAddress()); |
212 | } |
213 | if (tensor.allocation_type == kTfLiteArenaRwPersistent) { |
214 | return static_cast<int64_t>( |
215 | tensor_address - rw_persistent_info_.GetArenaStartingAddress()); |
216 | } |
217 | if (tensor.allocation_type == kTfLiteMmapRo) { |
218 | return static_cast<int64_t>(tensor_address - |
219 | mmap_info_.GetArenaStartingAddress()); |
220 | } |
221 | return -1; |
222 | } |
223 | |
224 | void Print() const { |
225 | printf("\n" ); |
226 | rw_info_.Print(); |
227 | printf("\n" ); |
228 | rw_persistent_info_.Print(); |
229 | printf("\n" ); |
230 | mmap_info_.Print(); |
231 | printf("\n" ); |
232 | dynamic_info_.Print(); |
233 | printf("\n" ); |
234 | } |
235 | |
236 | private: |
237 | MemoryArenaInfo rw_info_; |
238 | MemoryArenaInfo rw_persistent_info_; |
239 | MemoryArenaInfo mmap_info_; |
240 | DynamicMemoryInfo dynamic_info_; |
241 | }; |
242 | |
243 | template <typename T> |
244 | void PrintTotalBytesOfTensors(const Subgraph& subgraph, const T& tensor_ids, |
245 | const std::string& prefix = " -> " ) { |
246 | size_t total = 0; |
247 | for (const auto id : tensor_ids) { |
248 | const TfLiteTensor* tensor = subgraph.tensor(id); |
249 | if (tensor == nullptr) continue; |
250 | total += tensor->bytes; |
251 | } |
252 | printf("%s%zuB (%.2fMB)\n" , prefix.c_str(), total, |
253 | static_cast<float>(total) / (1 << 20)); |
254 | } |
255 | |
256 | void PrintIntVector(const std::vector<int>& v, bool collapse_consecutives, |
257 | bool add_newline) { |
258 | if (v.empty()) { |
259 | printf("(null)" ); |
260 | if (add_newline) { |
261 | printf("\n" ); |
262 | } |
263 | return; |
264 | } |
265 | |
266 | int range_start = v[0]; |
267 | int range_end = range_start; |
268 | std::function<void(const char*)> print_range = [&](const char* suffix) { |
269 | if (range_end == range_start) { |
270 | printf("%d%s" , range_start, suffix); |
271 | } else if (range_end == range_start + 1) { |
272 | printf("%d,%d%s" , range_start, range_end, suffix); |
273 | } else { |
274 | printf("%d-%d%s" , range_start, range_end, suffix); |
275 | } |
276 | }; |
277 | |
278 | printf("[" ); |
279 | for (int i = 1; i < v.size(); ++i) { |
280 | int current = v[i]; |
281 | if (collapse_consecutives && (current == range_end + 1)) { |
282 | range_end = current; |
283 | } else { |
284 | print_range("," ); |
285 | range_start = range_end = current; |
286 | } |
287 | } |
288 | print_range("]" ); |
289 | if (add_newline) { |
290 | printf("\n" ); |
291 | } |
292 | } |
293 | |
294 | void PrintTfLiteIntVector(const TfLiteIntArray* v, |
295 | bool collapse_consecutives = true, |
296 | bool add_newline = false) { |
297 | std::vector<int> tmp; |
298 | if (!v || v->size <= 0) { |
299 | PrintIntVector(tmp, collapse_consecutives, add_newline); |
300 | return; |
301 | } |
302 | tmp.insert(tmp.end(), v->data, v->data + v->size); |
303 | PrintIntVector(tmp, collapse_consecutives, add_newline); |
304 | } |
305 | |
306 | const char* TensorTypeName(TfLiteType type) { |
307 | switch (type) { |
308 | case kTfLiteNoType: |
309 | return "kTfLiteNoType" ; |
310 | case kTfLiteFloat32: |
311 | return "kTfLiteFloat32" ; |
312 | case kTfLiteInt32: |
313 | return "kTfLiteInt32" ; |
314 | case kTfLiteUInt32: |
315 | return "kTfLiteUInt32" ; |
316 | case kTfLiteUInt8: |
317 | return "kTfLiteUInt8" ; |
318 | case kTfLiteInt8: |
319 | return "kTfLiteInt8" ; |
320 | case kTfLiteInt64: |
321 | return "kTfLiteInt64" ; |
322 | case kTfLiteUInt64: |
323 | return "kTfLiteUInt64" ; |
324 | case kTfLiteString: |
325 | return "kTfLiteString" ; |
326 | case kTfLiteBool: |
327 | return "kTfLiteBool" ; |
328 | case kTfLiteUInt16: |
329 | return "kTfLiteUInt16" ; |
330 | case kTfLiteInt16: |
331 | return "kTfLiteInt16" ; |
332 | case kTfLiteComplex64: |
333 | return "kTfLiteComplex64" ; |
334 | case kTfLiteComplex128: |
335 | return "kTfLiteComplex128" ; |
336 | case kTfLiteFloat16: |
337 | return "kTfLiteFloat16" ; |
338 | case kTfLiteFloat64: |
339 | return "kTfLiteFloat64" ; |
340 | case kTfLiteResource: |
341 | return "kTfLiteResource" ; |
342 | case kTfLiteVariant: |
343 | return "kTfLiteVariant" ; |
344 | } |
345 | return "(invalid)" ; |
346 | } |
347 | |
348 | const char* AllocTypeName(TfLiteAllocationType type) { |
349 | switch (type) { |
350 | case kTfLiteMemNone: |
351 | return "kTfLiteMemNone" ; |
352 | case kTfLiteMmapRo: |
353 | return "kTfLiteMmapRo" ; |
354 | case kTfLiteDynamic: |
355 | return "kTfLiteDynamic" ; |
356 | case kTfLiteArenaRw: |
357 | return "kTfLiteArenaRw" ; |
358 | case kTfLiteArenaRwPersistent: |
359 | return "kTfLiteArenaRwPersistent" ; |
360 | case kTfLitePersistentRo: |
361 | return "kTfLitePersistentRo" ; |
362 | case kTfLiteCustom: |
363 | return "kTfLiteCustom" ; |
364 | } |
365 | return "(invalid)" ; |
366 | } |
367 | |
368 | std::string TruncateString(const char* str, int size_limit, |
369 | bool truncate_at_end = false) { |
370 | if (str == nullptr) return "(nil)" ; |
371 | |
372 | std::string truncated(str); |
373 | const size_t length = truncated.size(); |
374 | if (length <= size_limit) return truncated; |
375 | |
376 | if (size_limit <= 3) return std::string(size_limit, '.'); |
377 | |
378 | if (truncate_at_end) { |
379 | truncated.resize(size_limit); |
380 | // Change the last 3 chars to "..." to imply truncation. |
381 | truncated.replace(size_limit - 3, 3, "..." ); |
382 | } else { |
383 | truncated.erase(0, length - size_limit); |
384 | // Change the first 3 chars to "..." to imply truncation. |
385 | truncated.replace(0, 3, "..." ); |
386 | } |
387 | return truncated; |
388 | } |
389 | |
390 | } // namespace |
391 | |
392 | // Prints a dump of what tensors and what nodes are in the interpreter. |
393 | void PrintInterpreterState(const Interpreter* interpreter) { |
394 | const size_t num_subgraphs = interpreter->subgraphs_size(); |
395 | printf("Interpreter has %zu subgraphs.\n\n" , num_subgraphs); |
396 | |
397 | for (int i = 0; i < num_subgraphs; ++i) { |
398 | const Subgraph& subgraph = *(interpreter->subgraph(i)); |
399 | printf("-----------Subgraph-%d has %zu tensors and %zu nodes------------\n" , |
400 | i, subgraph.tensors_size(), subgraph.nodes_size()); |
401 | printf("%zu Inputs: " , subgraph.inputs().size()); |
402 | PrintIntVector(subgraph.inputs()); |
403 | PrintTotalBytesOfTensors(subgraph, subgraph.inputs()); |
404 | |
405 | printf("%zu Outputs: " , subgraph.outputs().size()); |
406 | PrintIntVector(subgraph.outputs()); |
407 | PrintTotalBytesOfTensors(subgraph, subgraph.outputs()); |
408 | printf("\n" ); |
409 | |
410 | // Collect info about tensor memory allocation. |
411 | ModelTensorMemoryInfo tensor_mem_info; |
412 | for (size_t tensor_index = 0; tensor_index < subgraph.tensors_size(); |
413 | tensor_index++) { |
414 | const TfLiteTensor* tensor = |
415 | subgraph.tensor(static_cast<int>(tensor_index)); |
416 | tensor_mem_info.Update(tensor_index, *tensor); |
417 | } |
418 | |
419 | printf("Tensor %3s %-25s %-15s %-18s %-18s %-10s %-16s\n" , "ID" , "Name" , |
420 | "Type" , "AllocType" , "Size (Bytes/MB)" , "Shape" , "MemAddr-Offset" ); |
421 | for (size_t tensor_index = 0; tensor_index < subgraph.tensors_size(); |
422 | tensor_index++) { |
423 | const TfLiteTensor* tensor = |
424 | subgraph.tensor(static_cast<int>(tensor_index)); |
425 | printf("Tensor %3zu %-25s %-15s %-18s %-8zu / %.2f " , tensor_index, |
426 | TruncateString(tensor->name, 25, /*truncate_at_end*/ true).c_str(), |
427 | TruncateString(TensorTypeName(tensor->type), 15).c_str(), |
428 | TruncateString(AllocTypeName(tensor->allocation_type), 18).c_str(), |
429 | tensor->bytes, (static_cast<float>(tensor->bytes) / (1 << 20))); |
430 | PrintTfLiteIntVector(tensor->dims, /*collapse_consecutives*/ false); |
431 | const int64_t start_offset = |
432 | tensor_mem_info.GetOffsetFromArenaStart(*tensor); |
433 | const int64_t end_offset = |
434 | start_offset == -1 |
435 | ? -1 |
436 | : start_offset + static_cast<int64_t>(tensor->bytes); |
437 | printf(" [%" PRId64 ", %" PRId64 ")\n" , start_offset, end_offset); |
438 | } |
439 | tensor_mem_info.Print(); |
440 | |
441 | // Dumps debugging info provided by the underlying memory planner. |
442 | // Note that this will output nothing unless the |
443 | // ":simple_memory_arena_debug_dump" is added as an extra dependence. |
444 | subgraph.DumpMemoryPlannerDebugInfo(); |
445 | |
446 | // Going to print out all nodes (i.e. op kernels) in this subgraph. |
447 | std::vector<bool> replaced_node_bits; |
448 | std::vector<size_t> replaced_by_node; |
449 | replaced_node_bits.resize(subgraph.nodes_size()); |
450 | replaced_by_node.resize(subgraph.nodes_size()); |
451 | bool has_delegate_applied = false; |
452 | for (size_t node_index = 0; node_index < subgraph.nodes_size(); |
453 | node_index++) { |
454 | replaced_node_bits[node_index] = false; |
455 | const std::pair<TfLiteNode, TfLiteRegistration>* node_and_reg = |
456 | subgraph.node_and_registration(static_cast<int>(node_index)); |
457 | const TfLiteNode& node = node_and_reg->first; |
458 | auto* const delegate = node.delegate; |
459 | if (delegate != nullptr) { |
460 | has_delegate_applied = true; |
461 | auto* params = static_cast<TfLiteDelegateParams*>(node.builtin_data); |
462 | for (int nid : TfLiteIntArrayView(params->nodes_to_replace)) { |
463 | replaced_node_bits[nid] = true; |
464 | replaced_by_node[nid] = node_index; |
465 | } |
466 | } |
467 | } |
468 | for (size_t node_index = 0; node_index < subgraph.nodes_size(); |
469 | node_index++) { |
470 | const std::pair<TfLiteNode, TfLiteRegistration>* node_and_reg = |
471 | subgraph.node_and_registration(static_cast<int>(node_index)); |
472 | const TfLiteNode& node = node_and_reg->first; |
473 | const TfLiteRegistration& reg = node_and_reg->second; |
474 | |
475 | std::string delegated_status; |
476 | bool is_node_delegated = false; |
477 | TfLiteIntArray empty_int_array; |
478 | empty_int_array.size = 0; |
479 | if (node.delegate == nullptr) { |
480 | if (replaced_node_bits[node_index]) { |
481 | delegated_status = "(delegated by node " ; |
482 | delegated_status.append(std::to_string(replaced_by_node[node_index])); |
483 | delegated_status.append(")" ); |
484 | is_node_delegated = true; |
485 | } else { |
486 | delegated_status = "(not delegated)" ; |
487 | } |
488 | } |
489 | |
490 | if (reg.custom_name != nullptr) { |
491 | printf("Node %3zu Operator Custom Name %s %s\n" , node_index, |
492 | reg.custom_name, delegated_status.c_str()); |
493 | } else { |
494 | printf("Node %3zu Operator Builtin Code %3d %s %s\n" , node_index, |
495 | reg.builtin_code, EnumNamesBuiltinOperator()[reg.builtin_code], |
496 | delegated_status.c_str()); |
497 | } |
498 | printf(" %d Input Tensors:" , |
499 | node.inputs != nullptr ? node.inputs->size : 0); |
500 | PrintTfLiteIntVector( |
501 | node.inputs, |
502 | /*collapse_consecutives=*/(node.delegate != nullptr)); |
503 | PrintTotalBytesOfTensors( |
504 | subgraph, is_node_delegated ? TfLiteIntArrayView(&empty_int_array) |
505 | : TfLiteIntArrayView(node.inputs)); |
506 | |
507 | printf(" %d Output Tensors:" , |
508 | node.outputs != nullptr ? node.outputs->size : 0); |
509 | PrintTfLiteIntVector(node.outputs); |
510 | PrintTotalBytesOfTensors( |
511 | subgraph, is_node_delegated ? TfLiteIntArrayView(&empty_int_array) |
512 | : TfLiteIntArrayView(node.outputs)); |
513 | |
514 | if (node.intermediates && node.intermediates->size) { |
515 | printf(" %d Intermediate Tensors:" , node.intermediates->size); |
516 | PrintTfLiteIntVector(node.intermediates); |
517 | PrintTotalBytesOfTensors(subgraph, |
518 | is_node_delegated |
519 | ? TfLiteIntArrayView(&empty_int_array) |
520 | : TfLiteIntArrayView(node.intermediates)); |
521 | } |
522 | |
523 | if (node.temporaries && node.temporaries->size) { |
524 | printf(" %d Temporary Tensors:" , node.temporaries->size); |
525 | PrintTfLiteIntVector(node.temporaries); |
526 | PrintTotalBytesOfTensors( |
527 | subgraph, is_node_delegated ? TfLiteIntArrayView(&empty_int_array) |
528 | : TfLiteIntArrayView(node.temporaries)); |
529 | } |
530 | } |
531 | |
532 | printf("\nExecution plan as the list of %zu nodes invoked in-order: " , |
533 | subgraph.execution_plan().size()); |
534 | PrintIntVector(subgraph.execution_plan(), /*collapse_consecutives=*/true, |
535 | /*add_newline=*/true); |
536 | if (has_delegate_applied) { |
537 | printf("Among these nodes in the execution plan:\n" ); |
538 | for (int node_id : subgraph.execution_plan()) { |
539 | const std::pair<TfLiteNode, TfLiteRegistration>* node_and_reg = |
540 | subgraph.node_and_registration(node_id); |
541 | const TfLiteNode& node = node_and_reg->first; |
542 | auto* const delegate = node.delegate; |
543 | if (delegate == nullptr) continue; |
544 | const char* delegate_name = node_and_reg->second.custom_name; |
545 | auto* delegate_params = |
546 | static_cast<TfLiteDelegateParams*>(node.builtin_data); |
547 | printf(" Node %d is a %s node (%p), which has delegated %d nodes: " , |
548 | node_id, delegate_name == nullptr ? "[n/a]" : delegate_name, |
549 | delegate, delegate_params->nodes_to_replace->size); |
550 | PrintTfLiteIntVector(delegate_params->nodes_to_replace, |
551 | /*collapse_consecutives=*/true, |
552 | /*add_newline=*/true); |
553 | } |
554 | } |
555 | |
556 | printf("--------------Subgraph-%d dump has completed--------------\n\n" , i); |
557 | } |
558 | printf("--------------Memory Arena Status Start--------------\n" ); |
559 | size_t total_arena_memory_bytes = 0; |
560 | size_t total_dynamic_memory_bytes = 0; |
561 | size_t total_resource_bytes = 0; |
562 | |
563 | for (int i = 0; i < num_subgraphs; ++i) { |
564 | const Subgraph& subgraph = *(interpreter->subgraph(i)); |
565 | Subgraph::SubgraphAllocInfo alloc_info; |
566 | subgraph.GetMemoryAllocInfo(&alloc_info); |
567 | total_arena_memory_bytes += alloc_info.arena_size; |
568 | total_arena_memory_bytes += alloc_info.arena_persist_size; |
569 | total_dynamic_memory_bytes += alloc_info.dynamic_size; |
570 | // Resources are shared with all subgraphs. So calculate it only once. |
571 | if (i == 0) { |
572 | total_resource_bytes = alloc_info.resource_size; |
573 | } |
574 | } |
575 | size_t total_memory_bytes = total_arena_memory_bytes + |
576 | total_dynamic_memory_bytes + total_resource_bytes; |
577 | printf("Total memory usage: %zu bytes (%.3f MB)\n" , total_memory_bytes, |
578 | static_cast<float>(total_memory_bytes) / (1 << 20)); |
579 | printf("- Total arena memory usage: %zu bytes (%.3f MB)\n" , |
580 | total_arena_memory_bytes, |
581 | static_cast<float>(total_arena_memory_bytes) / (1 << 20)); |
582 | printf("- Total dynamic memory usage: %zu bytes (%.3f MB)\n" , |
583 | total_dynamic_memory_bytes, |
584 | static_cast<float>(total_dynamic_memory_bytes) / (1 << 20)); |
585 | if (total_resource_bytes) { |
586 | printf("- Total resource memory usage: %zu bytes (%.3f MB)\n" , |
587 | total_resource_bytes, |
588 | static_cast<float>(total_resource_bytes) / (1 << 20)); |
589 | } |
590 | putchar('\n'); |
591 | |
592 | for (int i = 0; i < num_subgraphs; ++i) { |
593 | const Subgraph& subgraph = *(interpreter->subgraph(i)); |
594 | Subgraph::SubgraphAllocInfo alloc_info; |
595 | subgraph.GetMemoryAllocInfo(&alloc_info); |
596 | if (alloc_info.arena_size) { |
597 | printf( |
598 | "Subgraph#%-3d %-18s %10zu (%.2f%%)\n" , i, "Arena (Normal)" , |
599 | alloc_info.arena_size, |
600 | static_cast<float>(alloc_info.arena_size * 100) / total_memory_bytes); |
601 | } |
602 | if (alloc_info.arena_persist_size) { |
603 | printf("Subgraph#%-3d %-18s %10zu (%.2f%%)\n" , i, "Arena (Persistent)" , |
604 | alloc_info.arena_persist_size, |
605 | static_cast<float>(alloc_info.arena_persist_size * 100) / |
606 | total_memory_bytes); |
607 | } |
608 | if (alloc_info.dynamic_size) { |
609 | printf("Subgraph#%-3d %-18s %10zu (%.2f%%)\n" , i, "Dyanmic Tensors" , |
610 | alloc_info.dynamic_size, |
611 | static_cast<float>(alloc_info.dynamic_size * 100) / |
612 | total_memory_bytes); |
613 | } |
614 | } |
615 | printf("--------------Memory Arena Status End--------------\n\n" ); |
616 | } |
617 | |
618 | } // namespace tflite |
619 | |