statistics.cc source code [leveldb/third_party/benchmark/src/statistics.cc]

1	// Copyright 2016 Ismael Jimenez Martinez. All rights reserved.
2	// Copyright 2017 Roman Lebedev. All rights reserved.
3	//
4	// Licensed under the Apache License, Version 2.0 (the "License");
5	// you may not use this file except in compliance with the License.
6	// You may obtain a copy of the License at
7	//
8	// http://www.apache.org/licenses/LICENSE-2.0
9	//
10	// Unless required by applicable law or agreed to in writing, software
11	// distributed under the License is distributed on an "AS IS" BASIS,
12	// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13	// See the License for the specific language governing permissions and
14	// limitations under the License.
15
16	#include "benchmark/benchmark.h"
17
18	#include <algorithm>
19	#include <cmath>
20	#include <numeric>
21	#include <string>
22	#include <vector>
23	#include "check.h"
24	#include "statistics.h"
25
26	namespace benchmark {
27
28	auto StatisticsSum = [](const std::vector<double>& v) {
29	return std::accumulate(v.begin(), v.end(), `0.0`);
30	};
31
32	double StatisticsMean(const std::vector<double>& v) {
33	if (v.empty()) return `0.0`;
34	return StatisticsSum (v) * (`1.0` / v.size());
35	}
36
37	double StatisticsMedian(const std::vector<double>& v) {
38	if (v.size() < `3`) return StatisticsMean(v);
39	std::vector<double> copy(v);
40
41	auto center = copy.begin() + v.size() / `2`;
42	std::nth_element(copy.begin(), center, copy.end());
43
44	// did we have an odd number of samples?
45	// if yes, then center is the median
46	// it no, then we are looking for the average between center and the value
47	// before
48	if (v.size() % `2` == `1`) return *center;
49	auto center2 = copy.begin() + v.size() / `2` - `1`;
50	std::nth_element(copy.begin(), center2, copy.end());
51	return (center + center2) / `2.0`;
52	}
53
54	// Return the sum of the squares of this sample set
55	auto SumSquares = [](const std::vector<double>& v) {
56	return std::inner_product(v.begin(), v.end(), v.begin(), `0.0`);
57	};
58
59	auto Sqr = [](const double dat) { return dat * dat; };
60	auto Sqrt = [](const double dat) {
61	// Avoid NaN due to imprecision in the calculations
62	if (dat < `0.0`) return `0.0`;
63	return std::sqrt(dat);
64	};
65
66	double StatisticsStdDev(const std::vector<double>& v) {
67	const auto mean = StatisticsMean(v);
68	if (v.empty()) return mean;
69
70	// Sample standard deviation is undefined for n = 1
71	if (v.size() == `1`) return `0.0`;
72
73	const double avg_squares = SumSquares (v) * (`1.0` / v.size());
74	return Sqrt (v.size() / (v.size() - `1.0`) * (avg_squares - Sqr (mean)));
75	}
76
77	std::vector<BenchmarkReporter::Run> ComputeStats(
78	const std::vector<BenchmarkReporter::Run>& reports) {
79	typedef BenchmarkReporter::Run Run;
80	std::vector<Run> results;
81
82	auto error_count =
83	std::count_if(reports.begin(), reports.end(),
84	[](Run const& run) { return run.error_occurred; });
85
86	if (reports.size() - error_count < `2`) {
87	// We don't report aggregated data if there was a single run.
88	return results;
89	}
90
91	// Accumulators.
92	std::vector<double> real_accumulated_time_stat;
93	std::vector<double> cpu_accumulated_time_stat;
94
95	real_accumulated_time_stat.reserve(reports.size());
96	cpu_accumulated_time_stat.reserve(reports.size());
97
98	// All repetitions should be run with the same number of iterations so we
99	// can take this information from the first benchmark.
100	const IterationCount run_iterations = reports.front().iterations;
101	// create stats for user counters
102	struct CounterStat {
103	Counter c;
104	std::vector<double> s;
105	};
106	std::map<std::string, CounterStat> counter_stats;
107	for (Run const& r : reports) {
108	for (auto const& cnt : r.counters) {
109	auto it = counter_stats.find(cnt.first);
110	if (it == counter_stats.end()) {
111	counter_stats.insert({cnt.first, {cnt.second, std::vector<double>{}}});
112	it = counter_stats.find(cnt.first);
113	it ->second.s.reserve(reports.size());
114	} else {
115	CHECK_EQ(counter_stats[cnt.first].c.flags, cnt.second.flags);
116	}
117	}
118	}
119
120	// Populate the accumulators.
121	for (Run const& run : reports) {
122	CHECK_EQ(reports[`0`].benchmark_name(), run.benchmark_name());
123	CHECK_EQ(run_iterations, run.iterations);
124	if (run.error_occurred) continue;
125	real_accumulated_time_stat.emplace_back(run.real_accumulated_time);
126	cpu_accumulated_time_stat.emplace_back(run.cpu_accumulated_time);
127	// user counters
128	for (auto const& cnt : run.counters) {
129	auto it = counter_stats.find(cnt.first);
130	CHECK_NE(it, counter_stats.end());
131	it ->second.s.emplace_back(cnt.second);
132	}
133	}
134
135	// Only add label if it is same for all runs
136	std::string report_label = reports [`0`].report_label;
137	for (std::size_t i = `1`; i < reports.size(); i++) {
138	if (reports [i].report_label != report_label) {
139	report_label = "";
140	break;
141	}
142	}
143
144	const double iteration_rescale_factor =
145	double(reports.size()) / double(run_iterations);
146
147	for (const auto& Stat : *reports [`0`].statistics) {
148	// Get the data from the accumulator to BenchmarkReporter::Run's.
149	Run data;
150	data.run_name = reports [`0`].run_name;
151	data.run_type = BenchmarkReporter::Run::RT_Aggregate;
152	data.threads = reports [`0`].threads;
153	data.repetitions = reports [`0`].repetitions;
154	data.repetition_index = Run::no_repetition_index;
155	data.aggregate_name = Stat.name_;
156	data.report_label = report_label;
157
158	// It is incorrect to say that an aggregate is computed over
159	// run's iterations, because those iterations already got averaged.
160	// Similarly, if there are N repetitions with 1 iterations each,
161	// an aggregate will be computed over N measurements, not 1.
162	// Thus it is best to simply use the count of separate reports.
163	data.iterations = reports.size();
164
165	data.real_accumulated_time = Stat.compute_(real_accumulated_time_stat);
166	data.cpu_accumulated_time = Stat.compute_(cpu_accumulated_time_stat);
167
168	// We will divide these times by data.iterations when reporting, but the
169	// data.iterations is not nessesairly the scale of these measurements,
170	// because in each repetition, these timers are sum over all the iterations.
171	// And if we want to say that the stats are over N repetitions and not
172	// M iterations, we need to multiply these by (N/M).
173	data.real_accumulated_time *= iteration_rescale_factor;
174	data.cpu_accumulated_time *= iteration_rescale_factor;
175
176	data.time_unit = reports [`0`].time_unit;
177
178	// user counters
179	for (auto const& kv : counter_stats) {
180	// Do NOT* rescale the custom counters. They are already properly scaled.*
181	const auto uc_stat = Stat.compute_(kv.second.s);
182	auto c = Counter (uc_stat, counter_stats [kv.first].c.flags,
183	counter_stats [kv.first].c.oneK);
184	data.counters [kv.first] = c;
185	}
186
187	results.push_back(data);
188	}
189
190	return results;
191	}
192
193	} // end namespace benchmark
194

Browse the source code of leveldb/third_party/benchmark/src/statistics.cc