Benchmark.cpp source code [glow/thirdparty/folly/folly/Benchmark.cpp]

1	/*
2	* Copyright (c) Facebook, Inc. and its affiliates.
3	*
4	* Licensed under the Apache License, Version 2.0 (the "License");
5	* you may not use this file except in compliance with the License.
6	* You may obtain a copy of the License at
7	*
8	* http://www.apache.org/licenses/LICENSE-2.0
9	*
10	* Unless required by applicable law or agreed to in writing, software
11	* distributed under the License is distributed on an "AS IS" BASIS,
12	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13	* See the License for the specific language governing permissions and
14	* limitations under the License.
15	*/
16
17	// @author Andrei Alexandrescu ([email protected])
18
19	#include <folly/Benchmark.h>
20
21	#include <algorithm>
22	#include <cmath>
23	#include <cstring>
24	#include <iostream>
25	#include <limits>
26	#include <map>
27	#include <memory>
28	#include <numeric>
29	#include <utility>
30	#include <vector>
31
32	#include <boost/regex.hpp>
33
34	#include <folly/MapUtil.h>
35	#include <folly/String.h>
36	#include <folly/container/Foreach.h>
37	#include <folly/json.h>
38
39	using namespace std;
40
41	DEFINE_bool(benchmark, false, "Run benchmarks.");
42	DEFINE_bool(json, false, "Output in JSON format.");
43	DEFINE_bool(json_verbose, false, "Output in verbose JSON format.");
44
45	DEFINE_string(
46	bm_regex,
47	"",
48	"Only benchmarks whose names match this regex will be run.");
49
50	DEFINE_int64(
51	bm_min_usec,
52	`100`,
53	"Minimum # of microseconds we'll accept for each benchmark.");
54
55	DEFINE_int32(
56	bm_min_iters,
57	`1`,
58	"Minimum # of iterations we'll try for each benchmark.");
59
60	DEFINE_int64(
61	bm_max_iters,
62	`1` << `30`,
63	"Maximum # of iterations we'll try for each benchmark.");
64
65	DEFINE_int32(
66	bm_max_secs,
67	`1`,
68	"Maximum # of seconds we'll spend on each benchmark.");
69
70	namespace folly {
71
72	std::chrono::high_resolution_clock::duration BenchmarkSuspender::timeSpent;
73
74	typedef function<detail::TimeIterData(unsigned int)> BenchmarkFun;
75
76	vector<detail::BenchmarkRegistration>& benchmarks() {
77	static vector<detail::BenchmarkRegistration> _benchmarks;
78	return _benchmarks;
79	}
80
81	#define FB_FOLLY_GLOBAL_BENCHMARK_BASELINE fbFollyGlobalBenchmarkBaseline
82	#define FB_STRINGIZE_X2(x) FOLLY_PP_STRINGIZE(x)
83
84	// Add the global baseline
85	BENCHMARK(FB_FOLLY_GLOBAL_BENCHMARK_BASELINE) {
86	#ifdef _MSC_VER
87	_ReadWriteBarrier();
88	#else
89	asm volatile("");
90	#endif
91	}
92
93	size_t getGlobalBenchmarkBaselineIndex() {
94	const char* global = FB_STRINGIZE_X2(FB_FOLLY_GLOBAL_BENCHMARK_BASELINE);
95	auto it = std::find_if(
96	benchmarks().begin(),
97	benchmarks().end(),
98	[global](const detail::BenchmarkRegistration& v) {
99	return v.name == global;
100	});
101	CHECK(it != benchmarks().end());
102	return size_t(std::distance(benchmarks().begin(), it));
103	}
104
105	#undef FB_STRINGIZE_X2
106	#undef FB_FOLLY_GLOBAL_BENCHMARK_BASELINE
107
108	void detail::addBenchmarkImpl(
109	const char* file,
110	StringPiece name,
111	BenchmarkFun fun,
112	bool useCounter) {
113	benchmarks().push_back({file, name.str(), std::move(fun), useCounter});
114	}
115
116	static std::pair<double, UserCounters> runBenchmarkGetNSPerIteration(
117	const BenchmarkFun& fun,
118	const double globalBaseline) {
119	using std::chrono::duration_cast;
120	using std::chrono::high_resolution_clock;
121	using std::chrono::microseconds;
122	using std::chrono::nanoseconds;
123	using std::chrono::seconds;
124
125	// They key here is accuracy; too low numbers means the accuracy was
126	// coarse. We up the ante until we get to at least minNanoseconds
127	// timings.
128	static_assert(
129	std::is_same<high_resolution_clock::duration, nanoseconds>::value,
130	"High resolution clock must be nanosecond resolution.");
131	// We choose a minimum minimum (sic) of 100,000 nanoseconds, but if
132	// the clock resolution is worse than that, it will be larger. In
133	// essence we're aiming at making the quantization noise 0.01%.
134	static const auto minNanoseconds = std::max<nanoseconds>(
135	nanoseconds (`100000`), microseconds (FLAGS_bm_min_usec));
136
137	// We do measurements in several epochs and take the minimum, to
138	// account for jitter.
139	static const unsigned int epochs = `1000`;
140	// We establish a total time budget as we don't want a measurement
141	// to take too long. This will curtail the number of actual epochs.
142	const auto timeBudget = seconds (FLAGS_bm_max_secs);
143	auto global = high_resolution_clock::now();
144
145	std::vector<std::pair<double, UserCounters>> epochResults(epochs);
146	size_t actualEpochs = `0`;
147
148	for (; actualEpochs < epochs; ++actualEpochs) {
149	const auto maxIters = uint32_t(FLAGS_bm_max_iters);
150	for (auto n = uint32_t(FLAGS_bm_min_iters); n < maxIters; n *= `2`) {
151	detail::TimeIterData timeIterData = fun (static_cast<unsigned int>(n));
152	if (timeIterData.duration < minNanoseconds) {
153	continue;
154	}
155	// We got an accurate enough timing, done. But only save if
156	// smaller than the current result.
157	auto nsecs = duration_cast<nanoseconds>(timeIterData.duration);
158	epochResults [actualEpochs] = std::make_pair(
159	max(`0.0`, double(nsecs.count()) / timeIterData.niter - globalBaseline),
160	std::move(timeIterData.userCounters));
161	// Done with the current epoch, we got a meaningful timing.
162	break;
163	}
164	auto now = high_resolution_clock::now();
165	if (now - global >= timeBudget) {
166	// No more time budget available.
167	++actualEpochs;
168	break;
169	}
170	}
171
172	// Current state of the art: get the minimum. After some
173	// experimentation, it seems taking the minimum is the best.
174	auto iter = min_element(
175	epochResults.begin(),
176	epochResults.begin() + actualEpochs,
177	[](const auto& a, const auto& b) { return a.first < b.first; });
178
179	// If the benchmark was basically drowned in baseline noise, it's
180	// possible it became negative.
181	return std::make_pair(max(`0.0`, iter ->first), iter ->second);
182	}
183
184	struct ScaleInfo {
185	double boundary;
186	const char* suffix;
187	};
188
189	static const ScaleInfo kTimeSuffixes[]{
190	{`365.25` * `24` * `3600`, "years"},
191	{`24` * `3600`, "days"},
192	{`3600`, "hr"},
193	{`60`, "min"},
194	{`1`, "s"},
195	{`1E-3`, "ms"},
196	{`1E-6`, "us"},
197	{`1E-9`, "ns"},
198	{`1E-12`, "ps"},
199	{`1E-15`, "fs"},
200	{`0`, nullptr},
201	};
202
203	static const ScaleInfo kMetricSuffixes[]{
204	{`1E24`, "Y"}, // yotta
205	{`1E21`, "Z"}, // zetta
206	{`1E18`, "X"}, // "exa" written with suffix 'X' so as to not create
207	// confusion with scientific notation
208	{`1E15`, "P"}, // peta
209	{`1E12`, "T"}, // terra
210	{`1E9`, "G"}, // giga
211	{`1E6`, "M"}, // mega
212	{`1E3`, "K"}, // kilo
213	{`1`, ""},
214	{`1E-3`, "m"}, // milli
215	{`1E-6`, "u"}, // micro
216	{`1E-9`, "n"}, // nano
217	{`1E-12`, "p"}, // pico
218	{`1E-15`, "f"}, // femto
219	{`1E-18`, "a"}, // atto
220	{`1E-21`, "z"}, // zepto
221	{`1E-24`, "y"}, // yocto
222	{`0`, nullptr},
223	};
224
225	static string
226	humanReadable(double n, unsigned int decimals, const ScaleInfo* scales) {
227	if (std::isinf(n) \|\| std::isnan(n)) {
228	return folly::to<string>(n);
229	}
230
231	const double absValue = fabs(n);
232	const ScaleInfo* scale = scales;
233	while (absValue < scale[`0`].boundary && scale[`1`].suffix != nullptr) {
234	++scale;
235	}
236
237	const double scaledValue = n / scale->boundary;
238	return stringPrintf("%.*f%s", decimals, scaledValue, scale->suffix);
239	}
240
241	static string readableTime(double n, unsigned int decimals) {
242	return humanReadable(n, decimals, kTimeSuffixes);
243	}
244
245	static string metricReadable(double n, unsigned int decimals) {
246	return humanReadable(n, decimals, kMetricSuffixes);
247	}
248
249	namespace {
250	class BenchmarkResultsPrinter {
251	public:
252	BenchmarkResultsPrinter() = default;
253	explicit BenchmarkResultsPrinter(std::set<std::string> counterNames)
254	: counterNames_(std::move(counterNames)),
255	namesLength_{std::accumulate(
256	counterNames_.begin(),
257	counterNames_.end(),
258	size_t{`0`},
259	[](size_t acc, auto&& name) { return acc + `2` + name.length(); })} {}
260
261	static constexpr unsigned int columns{`76`};
262	void separator(char pad) {
263	puts(string (columns + namesLength_, pad).c_str());
264	}
265
266	void header(const string& file) {
267	separator(`'='`);
268	printf("%-*srelative time/iter iters/s", columns - `28`, file.c_str());
269	for (auto const& name : counterNames_) {
270	printf(" %s", name.c_str());
271	}
272	printf("\n");
273	separator(`'='`);
274	}
275
276	void print(const vector<detail::BenchmarkResult>& data) {
277	for (auto& datum : data) {
278	auto file = datum.file;
279	if (file != lastFile_) {
280	// New file starting
281	header(file);
282	lastFile_ = file;
283	}
284
285	string s = datum.name;
286	if (s == "-") {
287	separator(`'-'`);
288	continue;
289	}
290	bool useBaseline / = void /;
291	if (s [`0`] == `'%'`) {
292	s.erase(`0`, `1`);
293	useBaseline = true;
294	} else {
295	baselineNsPerIter_ = datum.timeInNs;
296	useBaseline = false;
297	}
298	s.resize(columns - `29`, `' '`);
299	auto nsPerIter = datum.timeInNs;
300	auto secPerIter = nsPerIter / `1E9`;
301	auto itersPerSec = (secPerIter == `0`)
302	? std::numeric_limits<double>::infinity()
303	: (`1` / secPerIter);
304	if (!useBaseline) {
305	// Print without baseline
306	printf(
307	"%*s %9s %7s",
308	static_cast<int>(s.size()),
309	s.c_str(),
310	readableTime(secPerIter, `2`).c_str(),
311	metricReadable(itersPerSec, `2`).c_str());
312	} else {
313	// Print with baseline
314	auto rel = baselineNsPerIter_ / nsPerIter * `100.0`;
315	printf(
316	"%*s %7.2f%% %9s %7s",
317	static_cast<int>(s.size()),
318	s.c_str(),
319	rel,
320	readableTime(secPerIter, `2`).c_str(),
321	metricReadable(itersPerSec, `2`).c_str());
322	}
323	for (auto const& name : counterNames_) {
324	if (auto ptr = folly::get_ptr(datum.counters, name)) {
325	switch (ptr->type) {
326	case UserMetric::Type::TIME:
327	printf(
328	" %-*s",
329	int(name.length()),
330	readableTime(ptr->value, `2`).c_str());
331	break;
332	case UserMetric::Type::METRIC:
333	printf(
334	" %-*s",
335	int(name.length()),
336	metricReadable(ptr->value, `2`).c_str());
337	break;
338	case UserMetric::Type::CUSTOM:
339	default:
340	printf(" %-" PRId64, int*(name.length()), ptr->value);
341	}
342	} else {
343	printf(" %-s", int*(name.length()), "NaN");
344	}
345	}
346	printf("\n");
347	}
348	}
349
350	private:
351	std::set<std::string> counterNames_;
352	size_t namesLength_{`0`};
353	double baselineNsPerIter_{numeric_limits<double>::max()};
354	string lastFile_;
355	};
356	} // namespace
357
358	static void printBenchmarkResultsAsJson(
359	const vector<detail::BenchmarkResult>& data) {
360	dynamic d = dynamic::object;
361	for (auto& datum : data) {
362	d [datum.name] = datum.timeInNs * `1000.`;
363	}
364
365	printf("%s\n", toPrettyJson(d).c_str());
366	}
367
368	static void printBenchmarkResultsAsVerboseJson(
369	const vector<detail::BenchmarkResult>& data) {
370	dynamic d;
371	benchmarkResultsToDynamic(data, d);
372	printf("%s\n", toPrettyJson(d).c_str());
373	}
374
375	static void printBenchmarkResults(const vector<detail::BenchmarkResult>& data) {
376	if (FLAGS_json_verbose) {
377	printBenchmarkResultsAsVerboseJson(data);
378	return;
379	} else if (FLAGS_json) {
380	printBenchmarkResultsAsJson(data);
381	return;
382	}
383
384	CHECK(FLAGS_json_verbose \|\| FLAGS_json) << "Cannot print benchmark results";
385	}
386
387	void benchmarkResultsToDynamic(
388	const vector<detail::BenchmarkResult>& data,
389	dynamic& out) {
390	out = dynamic::array;
391	for (auto& datum : data) {
392	if (!datum.counters.empty()) {
393	dynamic obj = dynamic::object;
394	for (auto& counter : datum.counters) {
395	dynamic counterInfo = dynamic::object;
396	counterInfo ["value"] = counter.second.value;
397	counterInfo ["type"] = static_cast<int>(counter.second.type);
398	obj [counter.first] = counterInfo;
399	}
400	out.push_back(
401	dynamic::array(datum.file, datum.name, datum.timeInNs, obj));
402	} else {
403	out.push_back(dynamic::array(datum.file, datum.name, datum.timeInNs));
404	}
405	}
406	}
407
408	void benchmarkResultsFromDynamic(
409	const dynamic& d,
410	vector<detail::BenchmarkResult>& results) {
411	for (auto& datum : d) {
412	results.push_back({datum [`0`].asString(),
413	datum [`1`].asString(),
414	datum [`2`].asDouble(),
415	UserCounters {}});
416	}
417	}
418
419	static pair<StringPiece, StringPiece> resultKey(
420	const detail::BenchmarkResult& result) {
421	return pair<StringPiece, StringPiece>(result.file, result.name);
422	}
423
424	void printResultComparison(
425	const vector<detail::BenchmarkResult>& base,
426	const vector<detail::BenchmarkResult>& test) {
427	map<pair<StringPiece, StringPiece>, double> baselines;
428
429	for (auto& baseResult : base) {
430	baselines [resultKey(baseResult)] = baseResult.timeInNs;
431	}
432	//
433	// Width available
434	static const unsigned int columns = `76`;
435
436	// Compute the longest benchmark name
437	size_t longestName = `0`;
438	for (auto& datum : test) {
439	longestName = max(longestName, datum.name.size());
440	}
441
442	// Print a horizontal rule
443	auto separator = [&](char pad) { puts(string (columns, pad).c_str()); };
444
445	// Print header for a file
446	auto header = [&](const string& file) {
447	separator (`'='`);
448	printf("%-*srelative time/iter iters/s", columns - `28`, file.c_str());
449	separator (`'='`);
450	};
451
452	string lastFile;
453
454	for (auto& datum : test) {
455	folly::Optional<double> baseline =
456	folly::get_optional(baselines, resultKey(datum));
457	auto file = datum.file;
458	if (file != lastFile) {
459	// New file starting
460	header (file);
461	lastFile = file;
462	}
463
464	string s = datum.name;
465	if (s == "-") {
466	separator (`'-'`);
467	continue;
468	}
469	if (s [`0`] == `'%'`) {
470	s.erase(`0`, `1`);
471	}
472	s.resize(columns - `29`, `' '`);
473	auto nsPerIter = datum.timeInNs;
474	auto secPerIter = nsPerIter / `1E9`;
475	auto itersPerSec = (secPerIter == `0`)
476	? std::numeric_limits<double>::infinity()
477	: (`1` / secPerIter);
478	if (!baseline) {
479	// Print without baseline
480	printf(
481	"%*s %9s %7s\n",
482	static_cast<int>(s.size()),
483	s.c_str(),
484	readableTime(secPerIter, `2`).c_str(),
485	metricReadable(itersPerSec, `2`).c_str());
486	} else {
487	// Print with baseline
488	auto rel = baseline / nsPerIter `100.0`;
489	printf(
490	"%*s %7.2f%% %9s %7s\n",
491	static_cast<int>(s.size()),
492	s.c_str(),
493	rel,
494	readableTime(secPerIter, `2`).c_str(),
495	metricReadable(itersPerSec, `2`).c_str());
496	}
497	}
498	separator (`'='`);
499	}
500
501	void checkRunMode() {
502	if (folly::kIsDebug \|\| folly::kIsSanitize) {
503	std::cerr << "WARNING: Benchmark running "
504	<< (folly::kIsDebug ? "in DEBUG mode" : "with SANITIZERS")
505	<< std::endl;
506	}
507	}
508
509	void runBenchmarks() {
510	CHECK(!benchmarks().empty());
511
512	checkRunMode();
513
514	vector<detail::BenchmarkResult> results;
515	results.reserve(benchmarks().size() - `1`);
516
517	std::unique_ptr<boost::regex> bmRegex;
518	if (!FLAGS_bm_regex.empty()) {
519	bmRegex = std::make_unique<boost::regex>(FLAGS_bm_regex);
520	}
521
522	// PLEASE KEEP QUIET. MEASUREMENTS IN PROGRESS.
523
524	size_t baselineIndex = getGlobalBenchmarkBaselineIndex();
525
526	auto const globalBaseline =
527	runBenchmarkGetNSPerIteration(benchmarks()[baselineIndex].func, `0`);
528
529	bool useCounter =
530	std::any_of(benchmarks().begin(), benchmarks().end(), [](const auto& bm) {
531	return bm.useCounter;
532	});
533	BenchmarkResultsPrinter printer;
534	std::set<std::string> counterNames;
535	FOR_EACH_RANGE (i, `0`, benchmarks().size()) {
536	if (i == baselineIndex) {
537	continue;
538	}
539	std::pair<double, UserCounters> elapsed;
540	auto& bm = benchmarks()[i];
541	if (bm.name != "-") { // skip separators
542	if (bmRegex && !boost::regex_search(bm.name, *bmRegex)) {
543	continue;
544	}
545	elapsed = runBenchmarkGetNSPerIteration(bm.func, globalBaseline.first);
546	}
547
548	// if customized user counters is used, it cannot print the result in real
549	// time as it needs to run all cases first to know the complete set of
550	// counters have been used, then the header can be printed out properly
551	if (!FLAGS_json_verbose && !FLAGS_json && !useCounter) {
552	printer.print({{bm.file, bm.name, elapsed.first, elapsed.second}});
553	} else {
554	results.push_back({bm.file, bm.name, elapsed.first, elapsed.second});
555	}
556
557	// get all counter names
558	for (auto const& kv : elapsed.second) {
559	counterNames.insert(kv.first);
560	}
561	}
562
563	// PLEASE MAKE NOISE. MEASUREMENTS DONE.
564	if (FLAGS_json_verbose \|\| FLAGS_json) {
565	printBenchmarkResults(results);
566	} else {
567	printer = BenchmarkResultsPrinter {std::move(counterNames)};
568	printer.print(results);
569	printer.separator(`'='`);
570	}
571
572	checkRunMode();
573	}
574
575	} // namespace folly
576

Browse the source code of glow/thirdparty/folly/folly/Benchmark.cpp