1/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
2
3Licensed under the Apache License, Version 2.0 (the "License");
4you may not use this file except in compliance with the License.
5You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9Unless required by applicable law or agreed to in writing, software
10distributed under the License is distributed on an "AS IS" BASIS,
11WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12See the License for the specific language governing permissions and
13limitations under the License.
14==============================================================================*/
15
16#ifndef TENSORFLOW_CORE_UTIL_STATS_CALCULATOR_H_
17#define TENSORFLOW_CORE_UTIL_STATS_CALCULATOR_H_
18
19#include <stdlib.h>
20
21#include <algorithm>
22#include <cmath>
23#include <limits>
24#include <map>
25#include <sstream>
26#include <string>
27#include <vector>
28
29#include "tensorflow/core/util/stat_summarizer_options.h"
30
31namespace tensorflow {
32
33template <typename ValueType, typename HighPrecisionValueType = double>
34class Stat {
35 public:
36 void UpdateStat(ValueType v) {
37 if (count_ == 0) {
38 first_ = v;
39 }
40
41 newest_ = v;
42 max_ = std::max(v, max_);
43 min_ = std::min(v, min_);
44 ++count_;
45 sum_ += v;
46 squared_sum_ += static_cast<HighPrecisionValueType>(v) * v;
47 }
48
49 void Reset() { new (this) Stat<ValueType, HighPrecisionValueType>(); }
50
51 bool empty() const { return count_ == 0; }
52
53 ValueType first() const { return first_; }
54
55 ValueType newest() const { return newest_; }
56
57 ValueType max() const { return max_; }
58
59 ValueType min() const { return min_; }
60
61 int64_t count() const { return count_; }
62
63 ValueType sum() const { return sum_; }
64
65 HighPrecisionValueType squared_sum() const { return squared_sum_; }
66
67 bool all_same() const { return (count_ == 0 || min_ == max_); }
68
69 HighPrecisionValueType avg() const {
70 return empty() ? std::numeric_limits<ValueType>::quiet_NaN()
71 : static_cast<HighPrecisionValueType>(sum_) / count_;
72 }
73
74 // Returns sample variance.
75 ValueType sample_variance() const {
76 return all_same()
77 ? 0
78 : (squared_sum_ - std::pow(sum_, 2.0) / count_) / (count_ - 1);
79 }
80
81 // Returns population variance.
82 ValueType variance() const {
83 return all_same() ? 0 : (squared_sum_ / count_) - (avg() * avg());
84 }
85
86 // Returns population stddev.
87 ValueType std_deviation() const {
88 return all_same() ? 0 : std::sqrt(variance());
89 }
90
91 void OutputToStream(std::ostream* stream) const {
92 if (empty()) {
93 *stream << "count=0";
94 } else if (all_same()) {
95 *stream << "count=" << count_ << " curr=" << newest_;
96 if (count_ > 1) *stream << "(all same)";
97 } else {
98 *stream << "count=" << count_ << " first=" << first_
99 << " curr=" << newest_ << " min=" << min_ << " max=" << max_
100 << " avg=" << avg() << " std=" << std_deviation();
101 }
102 }
103
104 friend std::ostream& operator<<(std::ostream& stream,
105 const Stat<ValueType>& stat) {
106 stat.OutputToStream(&stream);
107 return stream;
108 }
109
110 private:
111 ValueType first_ = 0;
112 ValueType newest_ = 0;
113 ValueType max_ = std::numeric_limits<ValueType>::min();
114 ValueType min_ = std::numeric_limits<ValueType>::max();
115 int64_t count_ = 0;
116 ValueType sum_ = 0;
117 HighPrecisionValueType squared_sum_ = 0;
118};
119
120// A StatsCalculator assists in performance analysis of Graph executions.
121//
122// It summarizes time spent executing (on GPU/CPU), memory used etc for
123// graph execution.
124//
125// For example usage see StatsSummarizer.
126class StatsCalculator {
127 public:
128 enum SortingMetric {
129 BY_NAME,
130 BY_RUN_ORDER,
131 BY_TIME,
132 BY_MEMORY,
133 BY_TYPE,
134 };
135
136 explicit StatsCalculator(const StatSummarizerOptions& options);
137
138 // Returns a string detailing the accumulated runtime stats in a tab-separated
139 // format which can be pasted into a spreadsheet for further analysis.
140 std::string GetOutputString() const;
141
142 std::string GetShortSummary() const;
143
144 void ComputeStatsByType(
145 std::map<std::string, int64_t>* node_type_map_count,
146 std::map<std::string, int64_t>* node_type_map_time,
147 std::map<std::string, int64_t>* node_type_map_memory,
148 std::map<std::string, int64_t>* node_type_map_times_called,
149 int64_t* accumulated_us) const;
150
151 std::string GetStatsByNodeType() const;
152
153 std::string GetStatsByMetric(const std::string& title,
154 SortingMetric sorting_metric,
155 int num_stats) const;
156
157 // Returns number of runs.
158 int num_runs() const { return static_cast<int>(run_total_us_.count()); }
159
160 // Returns stats of total microseconds spent by all nodes in each run.
161 const Stat<int64_t>& run_total_us() const { return run_total_us_; }
162
163 void UpdateRunTotalUs(int64_t run_total_us) {
164 run_total_us_.UpdateStat(run_total_us);
165 }
166
167 void UpdateMemoryUsed(int64_t memory) { memory_.UpdateStat(memory); }
168
169 struct Detail {
170 std::string name;
171 std::string type;
172 int64_t run_order;
173 Stat<int64_t> elapsed_time;
174 Stat<int64_t> mem_used;
175 int64_t times_called;
176 };
177
178 const std::map<std::string, Detail>& GetDetails() const { return details_; }
179
180 void AddNodeStats(const std::string& name, const std::string& type,
181 int64_t run_order, int64_t rel_end_us, int64_t mem_used);
182
183 private:
184 void OrderNodesByMetric(SortingMetric sorting_metric,
185 std::vector<const Detail*>* details) const;
186
187 std::string HeaderString(const std::string& title) const;
188 std::string ColumnString(const Detail& detail,
189 const int64_t cumulative_stat_on_node,
190 const Stat<int64_t>& stat) const;
191
192 Stat<int64_t> run_total_us_;
193 Stat<int64_t> memory_;
194
195 std::map<std::string, Detail> details_;
196 StatSummarizerOptions options_;
197};
198
199} // namespace tensorflow
200
201#endif // TENSORFLOW_CORE_UTIL_STATS_CALCULATOR_H_
202