1 | /* Copyright 2018 The TensorFlow Authors. All Rights Reserved. |
2 | |
3 | Licensed under the Apache License, Version 2.0 (the "License"); |
4 | you may not use this file except in compliance with the License. |
5 | You may obtain a copy of the License at |
6 | |
7 | http://www.apache.org/licenses/LICENSE-2.0 |
8 | |
9 | Unless required by applicable law or agreed to in writing, software |
10 | distributed under the License is distributed on an "AS IS" BASIS, |
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | See the License for the specific language governing permissions and |
13 | limitations under the License. |
14 | ==============================================================================*/ |
15 | |
16 | #include "tensorflow/core/util/stats_calculator.h" |
17 | |
18 | #include <iomanip> |
19 | #include <map> |
20 | #include <queue> |
21 | #include <sstream> |
22 | #include <string> |
23 | |
24 | namespace tensorflow { |
25 | |
26 | StatsCalculator::StatsCalculator(const StatSummarizerOptions& options) |
27 | : options_(options) {} |
28 | |
29 | std::string StatsCalculator::GetShortSummary() const { |
30 | std::stringstream stream; |
31 | stream << "Timings (microseconds): " ; |
32 | run_total_us_.OutputToStream(&stream); |
33 | stream << std::endl; |
34 | |
35 | stream << "Memory (bytes): " ; |
36 | memory_.OutputToStream(&stream); |
37 | stream << std::endl; |
38 | |
39 | stream << details_.size() << " nodes observed" << std::endl; |
40 | return stream.str(); |
41 | } |
42 | |
43 | std::ostream& InitField(std::ostream& stream, int width) { |
44 | stream << "\t" << std::right << std::setw(width) << std::fixed |
45 | << std::setprecision(3); |
46 | return stream; |
47 | } |
48 | |
49 | std::string StatsCalculator::(const std::string& title) const { |
50 | std::stringstream stream; |
51 | |
52 | stream << "============================== " << title |
53 | << " ==============================" << std::endl; |
54 | if (options_.format_as_csv) { |
55 | stream << "node type, first, avg_ms, %, cdf%, mem KB, times called, " |
56 | "name" ; |
57 | } else { |
58 | InitField(stream, 24) << "[node type]" ; |
59 | InitField(stream, 9) << "[first]" ; |
60 | InitField(stream, 9) << "[avg ms]" ; |
61 | InitField(stream, 8) << "[%]" ; |
62 | InitField(stream, 8) << "[cdf%]" ; |
63 | InitField(stream, 10) << "[mem KB]" ; |
64 | InitField(stream, 9) << "[times called]" ; |
65 | stream << "\t" |
66 | << "[Name]" ; |
67 | } |
68 | return stream.str(); |
69 | } |
70 | |
71 | std::string StatsCalculator::ColumnString(const Detail& detail, |
72 | const int64_t cumulative_stat_on_node, |
73 | const Stat<int64_t>& stat) const { |
74 | const double first_time_ms = detail.elapsed_time.first() / 1000.0; |
75 | const double avg_time_ms = detail.elapsed_time.avg() / 1000.0; |
76 | const double percentage = detail.elapsed_time.sum() * 100.0 / stat.sum(); |
77 | const double cdf_percentage = (cumulative_stat_on_node * 100.0f) / stat.sum(); |
78 | const int64_t times_called = detail.times_called / num_runs(); |
79 | |
80 | std::stringstream stream; |
81 | if (options_.format_as_csv) { |
82 | std::string name(detail.name); |
83 | std::replace(name.begin(), name.end(), ',', '\t'); |
84 | stream << detail.type << ", " << first_time_ms << ", " << avg_time_ms |
85 | << ", " << percentage << "%, " << cdf_percentage << "%, " |
86 | << detail.mem_used.newest() / 1000.0 << ", " << times_called << ", " |
87 | << name; |
88 | } else { |
89 | InitField(stream, 24) << detail.type; |
90 | InitField(stream, 9) << first_time_ms; |
91 | InitField(stream, 9) << avg_time_ms; |
92 | InitField(stream, 7) << percentage << "%" ; |
93 | InitField(stream, 7) << cdf_percentage << "%" ; |
94 | InitField(stream, 10) << detail.mem_used.newest() / 1000.0; |
95 | InitField(stream, 9) << times_called; |
96 | stream << "\t" << detail.name; |
97 | } |
98 | |
99 | return stream.str(); |
100 | } |
101 | |
102 | void StatsCalculator::OrderNodesByMetric( |
103 | SortingMetric metric, std::vector<const Detail*>* details) const { |
104 | std::priority_queue<std::pair<std::string, const Detail*>> sorted_list; |
105 | const int num_nodes = details_.size(); |
106 | |
107 | for (const auto& det : details_) { |
108 | const Detail* detail = &(det.second); |
109 | std::stringstream stream; |
110 | stream << std::setw(20) << std::right << std::setprecision(10) |
111 | << std::fixed; |
112 | |
113 | switch (metric) { |
114 | case BY_NAME: |
115 | stream << detail->name; |
116 | break; |
117 | case BY_RUN_ORDER: |
118 | stream << num_nodes - detail->run_order; |
119 | break; |
120 | case BY_TIME: |
121 | stream << detail->elapsed_time.avg(); |
122 | break; |
123 | case BY_MEMORY: |
124 | stream << detail->mem_used.avg(); |
125 | break; |
126 | case BY_TYPE: |
127 | stream << detail->type; |
128 | break; |
129 | default: |
130 | stream << "" ; |
131 | break; |
132 | } |
133 | |
134 | sorted_list.emplace(stream.str(), detail); |
135 | } |
136 | |
137 | while (!sorted_list.empty()) { |
138 | auto entry = sorted_list.top(); |
139 | sorted_list.pop(); |
140 | details->push_back(entry.second); |
141 | } |
142 | } |
143 | |
144 | void StatsCalculator::ComputeStatsByType( |
145 | std::map<std::string, int64_t>* node_type_map_count, |
146 | std::map<std::string, int64_t>* node_type_map_time, |
147 | std::map<std::string, int64_t>* node_type_map_memory, |
148 | std::map<std::string, int64_t>* node_type_map_times_called, |
149 | int64_t* accumulated_us) const { |
150 | int64_t run_count = run_total_us_.count(); |
151 | |
152 | for (const auto& det : details_) { |
153 | const std::string node_name = det.first; |
154 | const Detail& detail = det.second; |
155 | |
156 | int64_t curr_time_val = |
157 | static_cast<int64_t>(detail.elapsed_time.sum() / run_count); |
158 | *accumulated_us += curr_time_val; |
159 | |
160 | int64_t curr_memory_val = detail.mem_used.newest(); |
161 | |
162 | const std::string& node_type = detail.type; |
163 | |
164 | (*node_type_map_count)[node_type] += 1; |
165 | (*node_type_map_time)[node_type] += curr_time_val; |
166 | (*node_type_map_memory)[node_type] += curr_memory_val; |
167 | (*node_type_map_times_called)[node_type] += detail.times_called / run_count; |
168 | } |
169 | } |
170 | |
171 | std::string StatsCalculator::GetStatsByNodeType() const { |
172 | std::stringstream stream; |
173 | |
174 | stream << "Number of nodes executed: " << details_.size() << std::endl; |
175 | |
176 | stream << "============================== Summary by node type " |
177 | "==============================" |
178 | << std::endl; |
179 | |
180 | std::map<std::string, int64_t> node_type_map_count; |
181 | std::map<std::string, int64_t> node_type_map_time; |
182 | std::map<std::string, int64_t> node_type_map_memory; |
183 | std::map<std::string, int64_t> node_type_map_times_called; |
184 | int64_t accumulated_us = 0; |
185 | |
186 | ComputeStatsByType(&node_type_map_count, &node_type_map_time, |
187 | &node_type_map_memory, &node_type_map_times_called, |
188 | &accumulated_us); |
189 | |
190 | // Sort them. |
191 | std::priority_queue<std::pair<int64_t, std::pair<std::string, int64_t>>> |
192 | timings; |
193 | for (const auto& node_type : node_type_map_time) { |
194 | const int64_t mem_used = node_type_map_memory[node_type.first]; |
195 | timings.emplace(node_type.second, |
196 | std::pair<std::string, int64_t>(node_type.first, mem_used)); |
197 | } |
198 | |
199 | if (options_.format_as_csv) { |
200 | stream << "node type, count, avg_ms, avg %, cdf %, mem KB, times called\n" ; |
201 | } else { |
202 | InitField(stream, 24) << "[Node type]" ; |
203 | InitField(stream, 9) << "[count]" ; |
204 | InitField(stream, 10) << "[avg ms]" ; |
205 | InitField(stream, 11) << "[avg %]" ; |
206 | InitField(stream, 11) << "[cdf %]" ; |
207 | InitField(stream, 10) << "[mem KB]" ; |
208 | InitField(stream, 10) << "[times called]" ; |
209 | stream << std::endl; |
210 | } |
211 | |
212 | float cdf = 0.0f; |
213 | while (!timings.empty()) { |
214 | auto entry = timings.top(); |
215 | timings.pop(); |
216 | |
217 | const std::string node_type = entry.second.first; |
218 | const float memory = entry.second.second / 1000.0f; |
219 | |
220 | const int64_t node_type_total_us = entry.first; |
221 | const float time_per_run_ms = node_type_total_us / 1000.0f; |
222 | |
223 | const float percentage = |
224 | ((entry.first / static_cast<float>(accumulated_us)) * 100.0f); |
225 | cdf += percentage; |
226 | |
227 | if (options_.format_as_csv) { |
228 | stream << node_type << ", " << node_type_map_count[node_type] << ", " |
229 | << time_per_run_ms << ", " << percentage << "%, " << cdf << "%, " |
230 | << memory << ", " << node_type_map_times_called[node_type] |
231 | << std::endl; |
232 | } else { |
233 | InitField(stream, 24) << node_type; |
234 | InitField(stream, 9) << node_type_map_count[node_type]; |
235 | InitField(stream, 10) << time_per_run_ms; |
236 | InitField(stream, 10) << percentage << "%" ; |
237 | InitField(stream, 10) << cdf << "%" ; |
238 | InitField(stream, 10) << memory; |
239 | InitField(stream, 9) << node_type_map_times_called[node_type]; |
240 | stream << std::endl; |
241 | } |
242 | } |
243 | stream << std::endl; |
244 | return stream.str(); |
245 | } |
246 | |
247 | std::string StatsCalculator::GetStatsByMetric(const std::string& title, |
248 | SortingMetric sorting_metric, |
249 | int num_stats) const { |
250 | std::vector<const Detail*> details; |
251 | OrderNodesByMetric(sorting_metric, &details); |
252 | |
253 | double cumulative_stat_on_node = 0; |
254 | |
255 | std::stringstream stream; |
256 | stream << HeaderString(title) << std::endl; |
257 | int stat_num = 0; |
258 | for (auto detail : details) { |
259 | ++stat_num; |
260 | if (num_stats > 0 && stat_num > num_stats) { |
261 | break; |
262 | } |
263 | |
264 | // TODO(andrewharp): Make this keep track of the particular metric for cdf. |
265 | cumulative_stat_on_node += detail->elapsed_time.sum(); |
266 | stream << ColumnString(*detail, cumulative_stat_on_node, run_total_us_) |
267 | << std::endl; |
268 | } |
269 | stream << std::endl; |
270 | return stream.str(); |
271 | } |
272 | |
273 | std::string StatsCalculator::GetOutputString() const { |
274 | std::stringstream stream; |
275 | if (options_.show_run_order) { |
276 | stream << GetStatsByMetric("Run Order" , BY_RUN_ORDER, |
277 | options_.run_order_limit); |
278 | } |
279 | if (options_.show_time) { |
280 | stream << GetStatsByMetric("Top by Computation Time" , BY_TIME, |
281 | options_.time_limit); |
282 | } |
283 | if (options_.show_memory) { |
284 | stream << GetStatsByMetric("Top by Memory Use" , BY_MEMORY, |
285 | options_.memory_limit); |
286 | } |
287 | if (options_.show_type) { |
288 | stream << GetStatsByNodeType(); |
289 | } |
290 | if (options_.show_summary) { |
291 | stream << GetShortSummary() << std::endl; |
292 | } |
293 | return stream.str(); |
294 | } |
295 | |
296 | void StatsCalculator::AddNodeStats(const std::string& name, |
297 | const std::string& type, int64_t run_order, |
298 | int64_t elapsed_time, int64_t mem_used) { |
299 | Detail* detail = nullptr; |
300 | if (details_.find(name) == details_.end()) { |
301 | details_.insert({name, {}}); |
302 | detail = &details_.at(name); |
303 | detail->type = type; |
304 | detail->name = name; |
305 | detail->run_order = run_order; |
306 | } else { |
307 | detail = &details_.at(name); |
308 | } |
309 | detail->elapsed_time.UpdateStat(elapsed_time); |
310 | detail->mem_used.UpdateStat(mem_used); |
311 | detail->times_called++; |
312 | } |
313 | |
314 | } // namespace tensorflow |
315 | |