1/*
2 * Copyright (c) Facebook, Inc. and its affiliates.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17// @author Andrei Alexandrescu ([email protected])
18
19#include <folly/Benchmark.h>
20
21#include <algorithm>
22#include <cmath>
23#include <cstring>
24#include <iostream>
25#include <limits>
26#include <map>
27#include <memory>
28#include <numeric>
29#include <utility>
30#include <vector>
31
32#include <boost/regex.hpp>
33
34#include <folly/MapUtil.h>
35#include <folly/String.h>
36#include <folly/container/Foreach.h>
37#include <folly/json.h>
38
39using namespace std;
40
41DEFINE_bool(benchmark, false, "Run benchmarks.");
42DEFINE_bool(json, false, "Output in JSON format.");
43DEFINE_bool(json_verbose, false, "Output in verbose JSON format.");
44
45DEFINE_string(
46 bm_regex,
47 "",
48 "Only benchmarks whose names match this regex will be run.");
49
50DEFINE_int64(
51 bm_min_usec,
52 100,
53 "Minimum # of microseconds we'll accept for each benchmark.");
54
55DEFINE_int32(
56 bm_min_iters,
57 1,
58 "Minimum # of iterations we'll try for each benchmark.");
59
60DEFINE_int64(
61 bm_max_iters,
62 1 << 30,
63 "Maximum # of iterations we'll try for each benchmark.");
64
65DEFINE_int32(
66 bm_max_secs,
67 1,
68 "Maximum # of seconds we'll spend on each benchmark.");
69
70namespace folly {
71
72std::chrono::high_resolution_clock::duration BenchmarkSuspender::timeSpent;
73
74typedef function<detail::TimeIterData(unsigned int)> BenchmarkFun;
75
76vector<detail::BenchmarkRegistration>& benchmarks() {
77 static vector<detail::BenchmarkRegistration> _benchmarks;
78 return _benchmarks;
79}
80
81#define FB_FOLLY_GLOBAL_BENCHMARK_BASELINE fbFollyGlobalBenchmarkBaseline
82#define FB_STRINGIZE_X2(x) FOLLY_PP_STRINGIZE(x)
83
84// Add the global baseline
85BENCHMARK(FB_FOLLY_GLOBAL_BENCHMARK_BASELINE) {
86#ifdef _MSC_VER
87 _ReadWriteBarrier();
88#else
89 asm volatile("");
90#endif
91}
92
93size_t getGlobalBenchmarkBaselineIndex() {
94 const char* global = FB_STRINGIZE_X2(FB_FOLLY_GLOBAL_BENCHMARK_BASELINE);
95 auto it = std::find_if(
96 benchmarks().begin(),
97 benchmarks().end(),
98 [global](const detail::BenchmarkRegistration& v) {
99 return v.name == global;
100 });
101 CHECK(it != benchmarks().end());
102 return size_t(std::distance(benchmarks().begin(), it));
103}
104
105#undef FB_STRINGIZE_X2
106#undef FB_FOLLY_GLOBAL_BENCHMARK_BASELINE
107
108void detail::addBenchmarkImpl(
109 const char* file,
110 StringPiece name,
111 BenchmarkFun fun,
112 bool useCounter) {
113 benchmarks().push_back({file, name.str(), std::move(fun), useCounter});
114}
115
116static std::pair<double, UserCounters> runBenchmarkGetNSPerIteration(
117 const BenchmarkFun& fun,
118 const double globalBaseline) {
119 using std::chrono::duration_cast;
120 using std::chrono::high_resolution_clock;
121 using std::chrono::microseconds;
122 using std::chrono::nanoseconds;
123 using std::chrono::seconds;
124
125 // They key here is accuracy; too low numbers means the accuracy was
126 // coarse. We up the ante until we get to at least minNanoseconds
127 // timings.
128 static_assert(
129 std::is_same<high_resolution_clock::duration, nanoseconds>::value,
130 "High resolution clock must be nanosecond resolution.");
131 // We choose a minimum minimum (sic) of 100,000 nanoseconds, but if
132 // the clock resolution is worse than that, it will be larger. In
133 // essence we're aiming at making the quantization noise 0.01%.
134 static const auto minNanoseconds = std::max<nanoseconds>(
135 nanoseconds(100000), microseconds(FLAGS_bm_min_usec));
136
137 // We do measurements in several epochs and take the minimum, to
138 // account for jitter.
139 static const unsigned int epochs = 1000;
140 // We establish a total time budget as we don't want a measurement
141 // to take too long. This will curtail the number of actual epochs.
142 const auto timeBudget = seconds(FLAGS_bm_max_secs);
143 auto global = high_resolution_clock::now();
144
145 std::vector<std::pair<double, UserCounters>> epochResults(epochs);
146 size_t actualEpochs = 0;
147
148 for (; actualEpochs < epochs; ++actualEpochs) {
149 const auto maxIters = uint32_t(FLAGS_bm_max_iters);
150 for (auto n = uint32_t(FLAGS_bm_min_iters); n < maxIters; n *= 2) {
151 detail::TimeIterData timeIterData = fun(static_cast<unsigned int>(n));
152 if (timeIterData.duration < minNanoseconds) {
153 continue;
154 }
155 // We got an accurate enough timing, done. But only save if
156 // smaller than the current result.
157 auto nsecs = duration_cast<nanoseconds>(timeIterData.duration);
158 epochResults[actualEpochs] = std::make_pair(
159 max(0.0, double(nsecs.count()) / timeIterData.niter - globalBaseline),
160 std::move(timeIterData.userCounters));
161 // Done with the current epoch, we got a meaningful timing.
162 break;
163 }
164 auto now = high_resolution_clock::now();
165 if (now - global >= timeBudget) {
166 // No more time budget available.
167 ++actualEpochs;
168 break;
169 }
170 }
171
172 // Current state of the art: get the minimum. After some
173 // experimentation, it seems taking the minimum is the best.
174 auto iter = min_element(
175 epochResults.begin(),
176 epochResults.begin() + actualEpochs,
177 [](const auto& a, const auto& b) { return a.first < b.first; });
178
179 // If the benchmark was basically drowned in baseline noise, it's
180 // possible it became negative.
181 return std::make_pair(max(0.0, iter->first), iter->second);
182}
183
184struct ScaleInfo {
185 double boundary;
186 const char* suffix;
187};
188
189static const ScaleInfo kTimeSuffixes[]{
190 {365.25 * 24 * 3600, "years"},
191 {24 * 3600, "days"},
192 {3600, "hr"},
193 {60, "min"},
194 {1, "s"},
195 {1E-3, "ms"},
196 {1E-6, "us"},
197 {1E-9, "ns"},
198 {1E-12, "ps"},
199 {1E-15, "fs"},
200 {0, nullptr},
201};
202
203static const ScaleInfo kMetricSuffixes[]{
204 {1E24, "Y"}, // yotta
205 {1E21, "Z"}, // zetta
206 {1E18, "X"}, // "exa" written with suffix 'X' so as to not create
207 // confusion with scientific notation
208 {1E15, "P"}, // peta
209 {1E12, "T"}, // terra
210 {1E9, "G"}, // giga
211 {1E6, "M"}, // mega
212 {1E3, "K"}, // kilo
213 {1, ""},
214 {1E-3, "m"}, // milli
215 {1E-6, "u"}, // micro
216 {1E-9, "n"}, // nano
217 {1E-12, "p"}, // pico
218 {1E-15, "f"}, // femto
219 {1E-18, "a"}, // atto
220 {1E-21, "z"}, // zepto
221 {1E-24, "y"}, // yocto
222 {0, nullptr},
223};
224
225static string
226humanReadable(double n, unsigned int decimals, const ScaleInfo* scales) {
227 if (std::isinf(n) || std::isnan(n)) {
228 return folly::to<string>(n);
229 }
230
231 const double absValue = fabs(n);
232 const ScaleInfo* scale = scales;
233 while (absValue < scale[0].boundary && scale[1].suffix != nullptr) {
234 ++scale;
235 }
236
237 const double scaledValue = n / scale->boundary;
238 return stringPrintf("%.*f%s", decimals, scaledValue, scale->suffix);
239}
240
241static string readableTime(double n, unsigned int decimals) {
242 return humanReadable(n, decimals, kTimeSuffixes);
243}
244
245static string metricReadable(double n, unsigned int decimals) {
246 return humanReadable(n, decimals, kMetricSuffixes);
247}
248
249namespace {
250class BenchmarkResultsPrinter {
251 public:
252 BenchmarkResultsPrinter() = default;
253 explicit BenchmarkResultsPrinter(std::set<std::string> counterNames)
254 : counterNames_(std::move(counterNames)),
255 namesLength_{std::accumulate(
256 counterNames_.begin(),
257 counterNames_.end(),
258 size_t{0},
259 [](size_t acc, auto&& name) { return acc + 2 + name.length(); })} {}
260
261 static constexpr unsigned int columns{76};
262 void separator(char pad) {
263 puts(string(columns + namesLength_, pad).c_str());
264 }
265
266 void header(const string& file) {
267 separator('=');
268 printf("%-*srelative time/iter iters/s", columns - 28, file.c_str());
269 for (auto const& name : counterNames_) {
270 printf(" %s", name.c_str());
271 }
272 printf("\n");
273 separator('=');
274 }
275
276 void print(const vector<detail::BenchmarkResult>& data) {
277 for (auto& datum : data) {
278 auto file = datum.file;
279 if (file != lastFile_) {
280 // New file starting
281 header(file);
282 lastFile_ = file;
283 }
284
285 string s = datum.name;
286 if (s == "-") {
287 separator('-');
288 continue;
289 }
290 bool useBaseline /* = void */;
291 if (s[0] == '%') {
292 s.erase(0, 1);
293 useBaseline = true;
294 } else {
295 baselineNsPerIter_ = datum.timeInNs;
296 useBaseline = false;
297 }
298 s.resize(columns - 29, ' ');
299 auto nsPerIter = datum.timeInNs;
300 auto secPerIter = nsPerIter / 1E9;
301 auto itersPerSec = (secPerIter == 0)
302 ? std::numeric_limits<double>::infinity()
303 : (1 / secPerIter);
304 if (!useBaseline) {
305 // Print without baseline
306 printf(
307 "%*s %9s %7s",
308 static_cast<int>(s.size()),
309 s.c_str(),
310 readableTime(secPerIter, 2).c_str(),
311 metricReadable(itersPerSec, 2).c_str());
312 } else {
313 // Print with baseline
314 auto rel = baselineNsPerIter_ / nsPerIter * 100.0;
315 printf(
316 "%*s %7.2f%% %9s %7s",
317 static_cast<int>(s.size()),
318 s.c_str(),
319 rel,
320 readableTime(secPerIter, 2).c_str(),
321 metricReadable(itersPerSec, 2).c_str());
322 }
323 for (auto const& name : counterNames_) {
324 if (auto ptr = folly::get_ptr(datum.counters, name)) {
325 switch (ptr->type) {
326 case UserMetric::Type::TIME:
327 printf(
328 " %-*s",
329 int(name.length()),
330 readableTime(ptr->value, 2).c_str());
331 break;
332 case UserMetric::Type::METRIC:
333 printf(
334 " %-*s",
335 int(name.length()),
336 metricReadable(ptr->value, 2).c_str());
337 break;
338 case UserMetric::Type::CUSTOM:
339 default:
340 printf(" %-*" PRId64, int(name.length()), ptr->value);
341 }
342 } else {
343 printf(" %-*s", int(name.length()), "NaN");
344 }
345 }
346 printf("\n");
347 }
348 }
349
350 private:
351 std::set<std::string> counterNames_;
352 size_t namesLength_{0};
353 double baselineNsPerIter_{numeric_limits<double>::max()};
354 string lastFile_;
355};
356} // namespace
357
358static void printBenchmarkResultsAsJson(
359 const vector<detail::BenchmarkResult>& data) {
360 dynamic d = dynamic::object;
361 for (auto& datum : data) {
362 d[datum.name] = datum.timeInNs * 1000.;
363 }
364
365 printf("%s\n", toPrettyJson(d).c_str());
366}
367
368static void printBenchmarkResultsAsVerboseJson(
369 const vector<detail::BenchmarkResult>& data) {
370 dynamic d;
371 benchmarkResultsToDynamic(data, d);
372 printf("%s\n", toPrettyJson(d).c_str());
373}
374
375static void printBenchmarkResults(const vector<detail::BenchmarkResult>& data) {
376 if (FLAGS_json_verbose) {
377 printBenchmarkResultsAsVerboseJson(data);
378 return;
379 } else if (FLAGS_json) {
380 printBenchmarkResultsAsJson(data);
381 return;
382 }
383
384 CHECK(FLAGS_json_verbose || FLAGS_json) << "Cannot print benchmark results";
385}
386
387void benchmarkResultsToDynamic(
388 const vector<detail::BenchmarkResult>& data,
389 dynamic& out) {
390 out = dynamic::array;
391 for (auto& datum : data) {
392 if (!datum.counters.empty()) {
393 dynamic obj = dynamic::object;
394 for (auto& counter : datum.counters) {
395 dynamic counterInfo = dynamic::object;
396 counterInfo["value"] = counter.second.value;
397 counterInfo["type"] = static_cast<int>(counter.second.type);
398 obj[counter.first] = counterInfo;
399 }
400 out.push_back(
401 dynamic::array(datum.file, datum.name, datum.timeInNs, obj));
402 } else {
403 out.push_back(dynamic::array(datum.file, datum.name, datum.timeInNs));
404 }
405 }
406}
407
408void benchmarkResultsFromDynamic(
409 const dynamic& d,
410 vector<detail::BenchmarkResult>& results) {
411 for (auto& datum : d) {
412 results.push_back({datum[0].asString(),
413 datum[1].asString(),
414 datum[2].asDouble(),
415 UserCounters{}});
416 }
417}
418
419static pair<StringPiece, StringPiece> resultKey(
420 const detail::BenchmarkResult& result) {
421 return pair<StringPiece, StringPiece>(result.file, result.name);
422}
423
424void printResultComparison(
425 const vector<detail::BenchmarkResult>& base,
426 const vector<detail::BenchmarkResult>& test) {
427 map<pair<StringPiece, StringPiece>, double> baselines;
428
429 for (auto& baseResult : base) {
430 baselines[resultKey(baseResult)] = baseResult.timeInNs;
431 }
432 //
433 // Width available
434 static const unsigned int columns = 76;
435
436 // Compute the longest benchmark name
437 size_t longestName = 0;
438 for (auto& datum : test) {
439 longestName = max(longestName, datum.name.size());
440 }
441
442 // Print a horizontal rule
443 auto separator = [&](char pad) { puts(string(columns, pad).c_str()); };
444
445 // Print header for a file
446 auto header = [&](const string& file) {
447 separator('=');
448 printf("%-*srelative time/iter iters/s", columns - 28, file.c_str());
449 separator('=');
450 };
451
452 string lastFile;
453
454 for (auto& datum : test) {
455 folly::Optional<double> baseline =
456 folly::get_optional(baselines, resultKey(datum));
457 auto file = datum.file;
458 if (file != lastFile) {
459 // New file starting
460 header(file);
461 lastFile = file;
462 }
463
464 string s = datum.name;
465 if (s == "-") {
466 separator('-');
467 continue;
468 }
469 if (s[0] == '%') {
470 s.erase(0, 1);
471 }
472 s.resize(columns - 29, ' ');
473 auto nsPerIter = datum.timeInNs;
474 auto secPerIter = nsPerIter / 1E9;
475 auto itersPerSec = (secPerIter == 0)
476 ? std::numeric_limits<double>::infinity()
477 : (1 / secPerIter);
478 if (!baseline) {
479 // Print without baseline
480 printf(
481 "%*s %9s %7s\n",
482 static_cast<int>(s.size()),
483 s.c_str(),
484 readableTime(secPerIter, 2).c_str(),
485 metricReadable(itersPerSec, 2).c_str());
486 } else {
487 // Print with baseline
488 auto rel = *baseline / nsPerIter * 100.0;
489 printf(
490 "%*s %7.2f%% %9s %7s\n",
491 static_cast<int>(s.size()),
492 s.c_str(),
493 rel,
494 readableTime(secPerIter, 2).c_str(),
495 metricReadable(itersPerSec, 2).c_str());
496 }
497 }
498 separator('=');
499}
500
501void checkRunMode() {
502 if (folly::kIsDebug || folly::kIsSanitize) {
503 std::cerr << "WARNING: Benchmark running "
504 << (folly::kIsDebug ? "in DEBUG mode" : "with SANITIZERS")
505 << std::endl;
506 }
507}
508
509void runBenchmarks() {
510 CHECK(!benchmarks().empty());
511
512 checkRunMode();
513
514 vector<detail::BenchmarkResult> results;
515 results.reserve(benchmarks().size() - 1);
516
517 std::unique_ptr<boost::regex> bmRegex;
518 if (!FLAGS_bm_regex.empty()) {
519 bmRegex = std::make_unique<boost::regex>(FLAGS_bm_regex);
520 }
521
522 // PLEASE KEEP QUIET. MEASUREMENTS IN PROGRESS.
523
524 size_t baselineIndex = getGlobalBenchmarkBaselineIndex();
525
526 auto const globalBaseline =
527 runBenchmarkGetNSPerIteration(benchmarks()[baselineIndex].func, 0);
528
529 bool useCounter =
530 std::any_of(benchmarks().begin(), benchmarks().end(), [](const auto& bm) {
531 return bm.useCounter;
532 });
533 BenchmarkResultsPrinter printer;
534 std::set<std::string> counterNames;
535 FOR_EACH_RANGE (i, 0, benchmarks().size()) {
536 if (i == baselineIndex) {
537 continue;
538 }
539 std::pair<double, UserCounters> elapsed;
540 auto& bm = benchmarks()[i];
541 if (bm.name != "-") { // skip separators
542 if (bmRegex && !boost::regex_search(bm.name, *bmRegex)) {
543 continue;
544 }
545 elapsed = runBenchmarkGetNSPerIteration(bm.func, globalBaseline.first);
546 }
547
548 // if customized user counters is used, it cannot print the result in real
549 // time as it needs to run all cases first to know the complete set of
550 // counters have been used, then the header can be printed out properly
551 if (!FLAGS_json_verbose && !FLAGS_json && !useCounter) {
552 printer.print({{bm.file, bm.name, elapsed.first, elapsed.second}});
553 } else {
554 results.push_back({bm.file, bm.name, elapsed.first, elapsed.second});
555 }
556
557 // get all counter names
558 for (auto const& kv : elapsed.second) {
559 counterNames.insert(kv.first);
560 }
561 }
562
563 // PLEASE MAKE NOISE. MEASUREMENTS DONE.
564 if (FLAGS_json_verbose || FLAGS_json) {
565 printBenchmarkResults(results);
566 } else {
567 printer = BenchmarkResultsPrinter{std::move(counterNames)};
568 printer.print(results);
569 printer.separator('=');
570 }
571
572 checkRunMode();
573}
574
575} // namespace folly
576