1 | /* Copyright 2018 The TensorFlow Authors. All Rights Reserved. |
2 | |
3 | Licensed under the Apache License, Version 2.0 (the "License"); |
4 | you may not use this file except in compliance with the License. |
5 | You may obtain a copy of the License at |
6 | |
7 | http://www.apache.org/licenses/LICENSE-2.0 |
8 | |
9 | Unless required by applicable law or agreed to in writing, software |
10 | distributed under the License is distributed on an "AS IS" BASIS, |
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | See the License for the specific language governing permissions and |
13 | limitations under the License. |
14 | ==============================================================================*/ |
15 | |
16 | #ifndef TENSORFLOW_CORE_UTIL_STATS_CALCULATOR_H_ |
17 | #define TENSORFLOW_CORE_UTIL_STATS_CALCULATOR_H_ |
18 | |
19 | #include <stdlib.h> |
20 | |
21 | #include <algorithm> |
22 | #include <cmath> |
23 | #include <limits> |
24 | #include <map> |
25 | #include <sstream> |
26 | #include <string> |
27 | #include <vector> |
28 | |
29 | #include "tensorflow/core/util/stat_summarizer_options.h" |
30 | |
31 | namespace tensorflow { |
32 | |
33 | template <typename ValueType, typename HighPrecisionValueType = double> |
34 | class Stat { |
35 | public: |
36 | void UpdateStat(ValueType v) { |
37 | if (count_ == 0) { |
38 | first_ = v; |
39 | } |
40 | |
41 | newest_ = v; |
42 | max_ = std::max(v, max_); |
43 | min_ = std::min(v, min_); |
44 | ++count_; |
45 | sum_ += v; |
46 | squared_sum_ += static_cast<HighPrecisionValueType>(v) * v; |
47 | } |
48 | |
49 | void Reset() { new (this) Stat<ValueType, HighPrecisionValueType>(); } |
50 | |
51 | bool empty() const { return count_ == 0; } |
52 | |
53 | ValueType first() const { return first_; } |
54 | |
55 | ValueType newest() const { return newest_; } |
56 | |
57 | ValueType max() const { return max_; } |
58 | |
59 | ValueType min() const { return min_; } |
60 | |
61 | int64_t count() const { return count_; } |
62 | |
63 | ValueType sum() const { return sum_; } |
64 | |
65 | HighPrecisionValueType squared_sum() const { return squared_sum_; } |
66 | |
67 | bool all_same() const { return (count_ == 0 || min_ == max_); } |
68 | |
69 | HighPrecisionValueType avg() const { |
70 | return empty() ? std::numeric_limits<ValueType>::quiet_NaN() |
71 | : static_cast<HighPrecisionValueType>(sum_) / count_; |
72 | } |
73 | |
74 | // Returns sample variance. |
75 | ValueType sample_variance() const { |
76 | return all_same() |
77 | ? 0 |
78 | : (squared_sum_ - std::pow(sum_, 2.0) / count_) / (count_ - 1); |
79 | } |
80 | |
81 | // Returns population variance. |
82 | ValueType variance() const { |
83 | return all_same() ? 0 : (squared_sum_ / count_) - (avg() * avg()); |
84 | } |
85 | |
86 | // Returns population stddev. |
87 | ValueType std_deviation() const { |
88 | return all_same() ? 0 : std::sqrt(variance()); |
89 | } |
90 | |
91 | void OutputToStream(std::ostream* stream) const { |
92 | if (empty()) { |
93 | *stream << "count=0" ; |
94 | } else if (all_same()) { |
95 | *stream << "count=" << count_ << " curr=" << newest_; |
96 | if (count_ > 1) *stream << "(all same)" ; |
97 | } else { |
98 | *stream << "count=" << count_ << " first=" << first_ |
99 | << " curr=" << newest_ << " min=" << min_ << " max=" << max_ |
100 | << " avg=" << avg() << " std=" << std_deviation(); |
101 | } |
102 | } |
103 | |
104 | friend std::ostream& operator<<(std::ostream& stream, |
105 | const Stat<ValueType>& stat) { |
106 | stat.OutputToStream(&stream); |
107 | return stream; |
108 | } |
109 | |
110 | private: |
111 | ValueType first_ = 0; |
112 | ValueType newest_ = 0; |
113 | ValueType max_ = std::numeric_limits<ValueType>::min(); |
114 | ValueType min_ = std::numeric_limits<ValueType>::max(); |
115 | int64_t count_ = 0; |
116 | ValueType sum_ = 0; |
117 | HighPrecisionValueType squared_sum_ = 0; |
118 | }; |
119 | |
120 | // A StatsCalculator assists in performance analysis of Graph executions. |
121 | // |
122 | // It summarizes time spent executing (on GPU/CPU), memory used etc for |
123 | // graph execution. |
124 | // |
125 | // For example usage see StatsSummarizer. |
126 | class StatsCalculator { |
127 | public: |
128 | enum SortingMetric { |
129 | BY_NAME, |
130 | BY_RUN_ORDER, |
131 | BY_TIME, |
132 | BY_MEMORY, |
133 | BY_TYPE, |
134 | }; |
135 | |
136 | explicit StatsCalculator(const StatSummarizerOptions& options); |
137 | |
138 | // Returns a string detailing the accumulated runtime stats in a tab-separated |
139 | // format which can be pasted into a spreadsheet for further analysis. |
140 | std::string GetOutputString() const; |
141 | |
142 | std::string GetShortSummary() const; |
143 | |
144 | void ComputeStatsByType( |
145 | std::map<std::string, int64_t>* node_type_map_count, |
146 | std::map<std::string, int64_t>* node_type_map_time, |
147 | std::map<std::string, int64_t>* node_type_map_memory, |
148 | std::map<std::string, int64_t>* node_type_map_times_called, |
149 | int64_t* accumulated_us) const; |
150 | |
151 | std::string GetStatsByNodeType() const; |
152 | |
153 | std::string GetStatsByMetric(const std::string& title, |
154 | SortingMetric sorting_metric, |
155 | int num_stats) const; |
156 | |
157 | // Returns number of runs. |
158 | int num_runs() const { return static_cast<int>(run_total_us_.count()); } |
159 | |
160 | // Returns stats of total microseconds spent by all nodes in each run. |
161 | const Stat<int64_t>& run_total_us() const { return run_total_us_; } |
162 | |
163 | void UpdateRunTotalUs(int64_t run_total_us) { |
164 | run_total_us_.UpdateStat(run_total_us); |
165 | } |
166 | |
167 | void UpdateMemoryUsed(int64_t memory) { memory_.UpdateStat(memory); } |
168 | |
169 | struct Detail { |
170 | std::string name; |
171 | std::string type; |
172 | int64_t run_order; |
173 | Stat<int64_t> elapsed_time; |
174 | Stat<int64_t> mem_used; |
175 | int64_t times_called; |
176 | }; |
177 | |
178 | const std::map<std::string, Detail>& GetDetails() const { return details_; } |
179 | |
180 | void AddNodeStats(const std::string& name, const std::string& type, |
181 | int64_t run_order, int64_t rel_end_us, int64_t mem_used); |
182 | |
183 | private: |
184 | void OrderNodesByMetric(SortingMetric sorting_metric, |
185 | std::vector<const Detail*>* details) const; |
186 | |
187 | std::string (const std::string& title) const; |
188 | std::string ColumnString(const Detail& detail, |
189 | const int64_t cumulative_stat_on_node, |
190 | const Stat<int64_t>& stat) const; |
191 | |
192 | Stat<int64_t> run_total_us_; |
193 | Stat<int64_t> memory_; |
194 | |
195 | std::map<std::string, Detail> details_; |
196 | StatSummarizerOptions options_; |
197 | }; |
198 | |
199 | } // namespace tensorflow |
200 | |
201 | #endif // TENSORFLOW_CORE_UTIL_STATS_CALCULATOR_H_ |
202 | |