1/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
2
3Licensed under the Apache License, Version 2.0 (the "License");
4you may not use this file except in compliance with the License.
5You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9Unless required by applicable law or agreed to in writing, software
10distributed under the License is distributed on an "AS IS" BASIS,
11WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12See the License for the specific language governing permissions and
13limitations under the License.
14==============================================================================*/
15
16#include "tensorflow/core/util/stats_calculator.h"
17
18#include <iomanip>
19#include <map>
20#include <queue>
21#include <sstream>
22#include <string>
23
24namespace tensorflow {
25
26StatsCalculator::StatsCalculator(const StatSummarizerOptions& options)
27 : options_(options) {}
28
29std::string StatsCalculator::GetShortSummary() const {
30 std::stringstream stream;
31 stream << "Timings (microseconds): ";
32 run_total_us_.OutputToStream(&stream);
33 stream << std::endl;
34
35 stream << "Memory (bytes): ";
36 memory_.OutputToStream(&stream);
37 stream << std::endl;
38
39 stream << details_.size() << " nodes observed" << std::endl;
40 return stream.str();
41}
42
43std::ostream& InitField(std::ostream& stream, int width) {
44 stream << "\t" << std::right << std::setw(width) << std::fixed
45 << std::setprecision(3);
46 return stream;
47}
48
49std::string StatsCalculator::HeaderString(const std::string& title) const {
50 std::stringstream stream;
51
52 stream << "============================== " << title
53 << " ==============================" << std::endl;
54 if (options_.format_as_csv) {
55 stream << "node type, first, avg_ms, %, cdf%, mem KB, times called, "
56 "name";
57 } else {
58 InitField(stream, 24) << "[node type]";
59 InitField(stream, 9) << "[first]";
60 InitField(stream, 9) << "[avg ms]";
61 InitField(stream, 8) << "[%]";
62 InitField(stream, 8) << "[cdf%]";
63 InitField(stream, 10) << "[mem KB]";
64 InitField(stream, 9) << "[times called]";
65 stream << "\t"
66 << "[Name]";
67 }
68 return stream.str();
69}
70
71std::string StatsCalculator::ColumnString(const Detail& detail,
72 const int64_t cumulative_stat_on_node,
73 const Stat<int64_t>& stat) const {
74 const double first_time_ms = detail.elapsed_time.first() / 1000.0;
75 const double avg_time_ms = detail.elapsed_time.avg() / 1000.0;
76 const double percentage = detail.elapsed_time.sum() * 100.0 / stat.sum();
77 const double cdf_percentage = (cumulative_stat_on_node * 100.0f) / stat.sum();
78 const int64_t times_called = detail.times_called / num_runs();
79
80 std::stringstream stream;
81 if (options_.format_as_csv) {
82 std::string name(detail.name);
83 std::replace(name.begin(), name.end(), ',', '\t');
84 stream << detail.type << ", " << first_time_ms << ", " << avg_time_ms
85 << ", " << percentage << "%, " << cdf_percentage << "%, "
86 << detail.mem_used.newest() / 1000.0 << ", " << times_called << ", "
87 << name;
88 } else {
89 InitField(stream, 24) << detail.type;
90 InitField(stream, 9) << first_time_ms;
91 InitField(stream, 9) << avg_time_ms;
92 InitField(stream, 7) << percentage << "%";
93 InitField(stream, 7) << cdf_percentage << "%";
94 InitField(stream, 10) << detail.mem_used.newest() / 1000.0;
95 InitField(stream, 9) << times_called;
96 stream << "\t" << detail.name;
97 }
98
99 return stream.str();
100}
101
102void StatsCalculator::OrderNodesByMetric(
103 SortingMetric metric, std::vector<const Detail*>* details) const {
104 std::priority_queue<std::pair<std::string, const Detail*>> sorted_list;
105 const int num_nodes = details_.size();
106
107 for (const auto& det : details_) {
108 const Detail* detail = &(det.second);
109 std::stringstream stream;
110 stream << std::setw(20) << std::right << std::setprecision(10)
111 << std::fixed;
112
113 switch (metric) {
114 case BY_NAME:
115 stream << detail->name;
116 break;
117 case BY_RUN_ORDER:
118 stream << num_nodes - detail->run_order;
119 break;
120 case BY_TIME:
121 stream << detail->elapsed_time.avg();
122 break;
123 case BY_MEMORY:
124 stream << detail->mem_used.avg();
125 break;
126 case BY_TYPE:
127 stream << detail->type;
128 break;
129 default:
130 stream << "";
131 break;
132 }
133
134 sorted_list.emplace(stream.str(), detail);
135 }
136
137 while (!sorted_list.empty()) {
138 auto entry = sorted_list.top();
139 sorted_list.pop();
140 details->push_back(entry.second);
141 }
142}
143
144void StatsCalculator::ComputeStatsByType(
145 std::map<std::string, int64_t>* node_type_map_count,
146 std::map<std::string, int64_t>* node_type_map_time,
147 std::map<std::string, int64_t>* node_type_map_memory,
148 std::map<std::string, int64_t>* node_type_map_times_called,
149 int64_t* accumulated_us) const {
150 int64_t run_count = run_total_us_.count();
151
152 for (const auto& det : details_) {
153 const std::string node_name = det.first;
154 const Detail& detail = det.second;
155
156 int64_t curr_time_val =
157 static_cast<int64_t>(detail.elapsed_time.sum() / run_count);
158 *accumulated_us += curr_time_val;
159
160 int64_t curr_memory_val = detail.mem_used.newest();
161
162 const std::string& node_type = detail.type;
163
164 (*node_type_map_count)[node_type] += 1;
165 (*node_type_map_time)[node_type] += curr_time_val;
166 (*node_type_map_memory)[node_type] += curr_memory_val;
167 (*node_type_map_times_called)[node_type] += detail.times_called / run_count;
168 }
169}
170
171std::string StatsCalculator::GetStatsByNodeType() const {
172 std::stringstream stream;
173
174 stream << "Number of nodes executed: " << details_.size() << std::endl;
175
176 stream << "============================== Summary by node type "
177 "=============================="
178 << std::endl;
179
180 std::map<std::string, int64_t> node_type_map_count;
181 std::map<std::string, int64_t> node_type_map_time;
182 std::map<std::string, int64_t> node_type_map_memory;
183 std::map<std::string, int64_t> node_type_map_times_called;
184 int64_t accumulated_us = 0;
185
186 ComputeStatsByType(&node_type_map_count, &node_type_map_time,
187 &node_type_map_memory, &node_type_map_times_called,
188 &accumulated_us);
189
190 // Sort them.
191 std::priority_queue<std::pair<int64_t, std::pair<std::string, int64_t>>>
192 timings;
193 for (const auto& node_type : node_type_map_time) {
194 const int64_t mem_used = node_type_map_memory[node_type.first];
195 timings.emplace(node_type.second,
196 std::pair<std::string, int64_t>(node_type.first, mem_used));
197 }
198
199 if (options_.format_as_csv) {
200 stream << "node type, count, avg_ms, avg %, cdf %, mem KB, times called\n";
201 } else {
202 InitField(stream, 24) << "[Node type]";
203 InitField(stream, 9) << "[count]";
204 InitField(stream, 10) << "[avg ms]";
205 InitField(stream, 11) << "[avg %]";
206 InitField(stream, 11) << "[cdf %]";
207 InitField(stream, 10) << "[mem KB]";
208 InitField(stream, 10) << "[times called]";
209 stream << std::endl;
210 }
211
212 float cdf = 0.0f;
213 while (!timings.empty()) {
214 auto entry = timings.top();
215 timings.pop();
216
217 const std::string node_type = entry.second.first;
218 const float memory = entry.second.second / 1000.0f;
219
220 const int64_t node_type_total_us = entry.first;
221 const float time_per_run_ms = node_type_total_us / 1000.0f;
222
223 const float percentage =
224 ((entry.first / static_cast<float>(accumulated_us)) * 100.0f);
225 cdf += percentage;
226
227 if (options_.format_as_csv) {
228 stream << node_type << ", " << node_type_map_count[node_type] << ", "
229 << time_per_run_ms << ", " << percentage << "%, " << cdf << "%, "
230 << memory << ", " << node_type_map_times_called[node_type]
231 << std::endl;
232 } else {
233 InitField(stream, 24) << node_type;
234 InitField(stream, 9) << node_type_map_count[node_type];
235 InitField(stream, 10) << time_per_run_ms;
236 InitField(stream, 10) << percentage << "%";
237 InitField(stream, 10) << cdf << "%";
238 InitField(stream, 10) << memory;
239 InitField(stream, 9) << node_type_map_times_called[node_type];
240 stream << std::endl;
241 }
242 }
243 stream << std::endl;
244 return stream.str();
245}
246
247std::string StatsCalculator::GetStatsByMetric(const std::string& title,
248 SortingMetric sorting_metric,
249 int num_stats) const {
250 std::vector<const Detail*> details;
251 OrderNodesByMetric(sorting_metric, &details);
252
253 double cumulative_stat_on_node = 0;
254
255 std::stringstream stream;
256 stream << HeaderString(title) << std::endl;
257 int stat_num = 0;
258 for (auto detail : details) {
259 ++stat_num;
260 if (num_stats > 0 && stat_num > num_stats) {
261 break;
262 }
263
264 // TODO(andrewharp): Make this keep track of the particular metric for cdf.
265 cumulative_stat_on_node += detail->elapsed_time.sum();
266 stream << ColumnString(*detail, cumulative_stat_on_node, run_total_us_)
267 << std::endl;
268 }
269 stream << std::endl;
270 return stream.str();
271}
272
273std::string StatsCalculator::GetOutputString() const {
274 std::stringstream stream;
275 if (options_.show_run_order) {
276 stream << GetStatsByMetric("Run Order", BY_RUN_ORDER,
277 options_.run_order_limit);
278 }
279 if (options_.show_time) {
280 stream << GetStatsByMetric("Top by Computation Time", BY_TIME,
281 options_.time_limit);
282 }
283 if (options_.show_memory) {
284 stream << GetStatsByMetric("Top by Memory Use", BY_MEMORY,
285 options_.memory_limit);
286 }
287 if (options_.show_type) {
288 stream << GetStatsByNodeType();
289 }
290 if (options_.show_summary) {
291 stream << GetShortSummary() << std::endl;
292 }
293 return stream.str();
294}
295
296void StatsCalculator::AddNodeStats(const std::string& name,
297 const std::string& type, int64_t run_order,
298 int64_t elapsed_time, int64_t mem_used) {
299 Detail* detail = nullptr;
300 if (details_.find(name) == details_.end()) {
301 details_.insert({name, {}});
302 detail = &details_.at(name);
303 detail->type = type;
304 detail->name = name;
305 detail->run_order = run_order;
306 } else {
307 detail = &details_.at(name);
308 }
309 detail->elapsed_time.UpdateStat(elapsed_time);
310 detail->mem_used.UpdateStat(mem_used);
311 detail->times_called++;
312}
313
314} // namespace tensorflow
315