1// Copyright 2015 Google Inc. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#include "benchmark/benchmark.h"
16
17#include "benchmark_api_internal.h"
18#include "benchmark_runner.h"
19#include "internal_macros.h"
20
21#ifndef BENCHMARK_OS_WINDOWS
22#ifndef BENCHMARK_OS_FUCHSIA
23#include <sys/resource.h>
24#endif
25#include <sys/time.h>
26#include <unistd.h>
27#endif
28
29#include <algorithm>
30#include <atomic>
31#include <condition_variable>
32#include <cstdio>
33#include <cstdlib>
34#include <fstream>
35#include <iostream>
36#include <limits>
37#include <map>
38#include <memory>
39#include <random>
40#include <string>
41#include <thread>
42#include <utility>
43
44#include "check.h"
45#include "colorprint.h"
46#include "commandlineflags.h"
47#include "complexity.h"
48#include "counter.h"
49#include "internal_macros.h"
50#include "log.h"
51#include "mutex.h"
52#include "perf_counters.h"
53#include "re.h"
54#include "statistics.h"
55#include "string_util.h"
56#include "thread_manager.h"
57#include "thread_timer.h"
58
59namespace benchmark {
60// Print a list of benchmarks. This option overrides all other options.
61BM_DEFINE_bool(benchmark_list_tests, false);
62
63// A regular expression that specifies the set of benchmarks to execute. If
64// this flag is empty, or if this flag is the string \"all\", all benchmarks
65// linked into the binary are run.
66BM_DEFINE_string(benchmark_filter, "");
67
68// Minimum number of seconds we should run benchmark before results are
69// considered significant. For cpu-time based tests, this is the lower bound
70// on the total cpu time used by all threads that make up the test. For
71// real-time based tests, this is the lower bound on the elapsed time of the
72// benchmark execution, regardless of number of threads.
73BM_DEFINE_double(benchmark_min_time, 0.5);
74
75// The number of runs of each benchmark. If greater than 1, the mean and
76// standard deviation of the runs will be reported.
77BM_DEFINE_int32(benchmark_repetitions, 1);
78
79// If set, enable random interleaving of repetitions of all benchmarks.
80// See http://github.com/google/benchmark/issues/1051 for details.
81BM_DEFINE_bool(benchmark_enable_random_interleaving, false);
82
83// Report the result of each benchmark repetitions. When 'true' is specified
84// only the mean, standard deviation, and other statistics are reported for
85// repeated benchmarks. Affects all reporters.
86BM_DEFINE_bool(benchmark_report_aggregates_only, false);
87
88// Display the result of each benchmark repetitions. When 'true' is specified
89// only the mean, standard deviation, and other statistics are displayed for
90// repeated benchmarks. Unlike benchmark_report_aggregates_only, only affects
91// the display reporter, but *NOT* file reporter, which will still contain
92// all the output.
93BM_DEFINE_bool(benchmark_display_aggregates_only, false);
94
95// The format to use for console output.
96// Valid values are 'console', 'json', or 'csv'.
97BM_DEFINE_string(benchmark_format, "console");
98
99// The format to use for file output.
100// Valid values are 'console', 'json', or 'csv'.
101BM_DEFINE_string(benchmark_out_format, "json");
102
103// The file to write additional output to.
104BM_DEFINE_string(benchmark_out, "");
105
106// Whether to use colors in the output. Valid values:
107// 'true'/'yes'/1, 'false'/'no'/0, and 'auto'. 'auto' means to use colors if
108// the output is being sent to a terminal and the TERM environment variable is
109// set to a terminal type that supports colors.
110BM_DEFINE_string(benchmark_color, "auto");
111
112// Whether to use tabular format when printing user counters to the console.
113// Valid values: 'true'/'yes'/1, 'false'/'no'/0. Defaults to false.
114BM_DEFINE_bool(benchmark_counters_tabular, false);
115
116// List of additional perf counters to collect, in libpfm format. For more
117// information about libpfm: https://man7.org/linux/man-pages/man3/libpfm.3.html
118BM_DEFINE_string(benchmark_perf_counters, "");
119
120// Extra context to include in the output formatted as comma-separated key-value
121// pairs. Kept internal as it's only used for parsing from env/command line.
122BM_DEFINE_kvpairs(benchmark_context, {});
123
124// The level of verbose logging to output
125BM_DEFINE_int32(v, 0);
126
127namespace internal {
128
129std::map<std::string, std::string>* global_context = nullptr;
130
131// FIXME: wouldn't LTO mess this up?
132void UseCharPointer(char const volatile*) {}
133
134} // namespace internal
135
136State::State(IterationCount max_iters, const std::vector<int64_t>& ranges,
137 int thread_i, int n_threads, internal::ThreadTimer* timer,
138 internal::ThreadManager* manager,
139 internal::PerfCountersMeasurement* perf_counters_measurement)
140 : total_iterations_(0),
141 batch_leftover_(0),
142 max_iterations(max_iters),
143 started_(false),
144 finished_(false),
145 error_occurred_(false),
146 range_(ranges),
147 complexity_n_(0),
148 thread_index_(thread_i),
149 threads_(n_threads),
150 timer_(timer),
151 manager_(manager),
152 perf_counters_measurement_(perf_counters_measurement) {
153 BM_CHECK(max_iterations != 0) << "At least one iteration must be run";
154 BM_CHECK_LT(thread_index_, threads_)
155 << "thread_index must be less than threads";
156
157 // Note: The use of offsetof below is technically undefined until C++17
158 // because State is not a standard layout type. However, all compilers
159 // currently provide well-defined behavior as an extension (which is
160 // demonstrated since constexpr evaluation must diagnose all undefined
161 // behavior). However, GCC and Clang also warn about this use of offsetof,
162 // which must be suppressed.
163#if defined(__INTEL_COMPILER)
164#pragma warning push
165#pragma warning(disable : 1875)
166#elif defined(__GNUC__)
167#pragma GCC diagnostic push
168#pragma GCC diagnostic ignored "-Winvalid-offsetof"
169#endif
170 // Offset tests to ensure commonly accessed data is on the first cache line.
171 const int cache_line_size = 64;
172 static_assert(offsetof(State, error_occurred_) <=
173 (cache_line_size - sizeof(error_occurred_)),
174 "");
175#if defined(__INTEL_COMPILER)
176#pragma warning pop
177#elif defined(__GNUC__)
178#pragma GCC diagnostic pop
179#endif
180}
181
182void State::PauseTiming() {
183 // Add in time accumulated so far
184 BM_CHECK(started_ && !finished_ && !error_occurred_);
185 timer_->StopTimer();
186 if (perf_counters_measurement_) {
187 auto measurements = perf_counters_measurement_->StopAndGetMeasurements();
188 for (const auto& name_and_measurement : measurements) {
189 auto name = name_and_measurement.first;
190 auto measurement = name_and_measurement.second;
191 BM_CHECK_EQ(counters[name], 0.0);
192 counters[name] = Counter(measurement, Counter::kAvgIterations);
193 }
194 }
195}
196
197void State::ResumeTiming() {
198 BM_CHECK(started_ && !finished_ && !error_occurred_);
199 timer_->StartTimer();
200 if (perf_counters_measurement_) {
201 perf_counters_measurement_->Start();
202 }
203}
204
205void State::SkipWithError(const char* msg) {
206 BM_CHECK(msg);
207 error_occurred_ = true;
208 {
209 MutexLock l(manager_->GetBenchmarkMutex());
210 if (manager_->results.has_error_ == false) {
211 manager_->results.error_message_ = msg;
212 manager_->results.has_error_ = true;
213 }
214 }
215 total_iterations_ = 0;
216 if (timer_->running()) timer_->StopTimer();
217}
218
219void State::SetIterationTime(double seconds) {
220 timer_->SetIterationTime(seconds);
221}
222
223void State::SetLabel(const char* label) {
224 MutexLock l(manager_->GetBenchmarkMutex());
225 manager_->results.report_label_ = label;
226}
227
228void State::StartKeepRunning() {
229 BM_CHECK(!started_ && !finished_);
230 started_ = true;
231 total_iterations_ = error_occurred_ ? 0 : max_iterations;
232 manager_->StartStopBarrier();
233 if (!error_occurred_) ResumeTiming();
234}
235
236void State::FinishKeepRunning() {
237 BM_CHECK(started_ && (!finished_ || error_occurred_));
238 if (!error_occurred_) {
239 PauseTiming();
240 }
241 // Total iterations has now wrapped around past 0. Fix this.
242 total_iterations_ = 0;
243 finished_ = true;
244 manager_->StartStopBarrier();
245}
246
247namespace internal {
248namespace {
249
250// Flushes streams after invoking reporter methods that write to them. This
251// ensures users get timely updates even when streams are not line-buffered.
252void FlushStreams(BenchmarkReporter* reporter) {
253 if (!reporter) return;
254 std::flush(reporter->GetOutputStream());
255 std::flush(reporter->GetErrorStream());
256}
257
258// Reports in both display and file reporters.
259void Report(BenchmarkReporter* display_reporter,
260 BenchmarkReporter* file_reporter, const RunResults& run_results) {
261 auto report_one = [](BenchmarkReporter* reporter, bool aggregates_only,
262 const RunResults& results) {
263 assert(reporter);
264 // If there are no aggregates, do output non-aggregates.
265 aggregates_only &= !results.aggregates_only.empty();
266 if (!aggregates_only) reporter->ReportRuns(results.non_aggregates);
267 if (!results.aggregates_only.empty())
268 reporter->ReportRuns(results.aggregates_only);
269 };
270
271 report_one(display_reporter, run_results.display_report_aggregates_only,
272 run_results);
273 if (file_reporter)
274 report_one(file_reporter, run_results.file_report_aggregates_only,
275 run_results);
276
277 FlushStreams(display_reporter);
278 FlushStreams(file_reporter);
279}
280
281void RunBenchmarks(const std::vector<BenchmarkInstance>& benchmarks,
282 BenchmarkReporter* display_reporter,
283 BenchmarkReporter* file_reporter) {
284 // Note the file_reporter can be null.
285 BM_CHECK(display_reporter != nullptr);
286
287 // Determine the width of the name field using a minimum width of 10.
288 bool might_have_aggregates = FLAGS_benchmark_repetitions > 1;
289 size_t name_field_width = 10;
290 size_t stat_field_width = 0;
291 for (const BenchmarkInstance& benchmark : benchmarks) {
292 name_field_width =
293 std::max<size_t>(name_field_width, benchmark.name().str().size());
294 might_have_aggregates |= benchmark.repetitions() > 1;
295
296 for (const auto& Stat : benchmark.statistics())
297 stat_field_width = std::max<size_t>(stat_field_width, Stat.name_.size());
298 }
299 if (might_have_aggregates) name_field_width += 1 + stat_field_width;
300
301 // Print header here
302 BenchmarkReporter::Context context;
303 context.name_field_width = name_field_width;
304
305 // Keep track of running times of all instances of each benchmark family.
306 std::map<int /*family_index*/, BenchmarkReporter::PerFamilyRunReports>
307 per_family_reports;
308
309 if (display_reporter->ReportContext(context) &&
310 (!file_reporter || file_reporter->ReportContext(context))) {
311 FlushStreams(display_reporter);
312 FlushStreams(file_reporter);
313
314 size_t num_repetitions_total = 0;
315
316 std::vector<internal::BenchmarkRunner> runners;
317 runners.reserve(benchmarks.size());
318 for (const BenchmarkInstance& benchmark : benchmarks) {
319 BenchmarkReporter::PerFamilyRunReports* reports_for_family = nullptr;
320 if (benchmark.complexity() != oNone)
321 reports_for_family = &per_family_reports[benchmark.family_index()];
322
323 runners.emplace_back(benchmark, reports_for_family);
324 int num_repeats_of_this_instance = runners.back().GetNumRepeats();
325 num_repetitions_total += num_repeats_of_this_instance;
326 if (reports_for_family)
327 reports_for_family->num_runs_total += num_repeats_of_this_instance;
328 }
329 assert(runners.size() == benchmarks.size() && "Unexpected runner count.");
330
331 std::vector<size_t> repetition_indices;
332 repetition_indices.reserve(num_repetitions_total);
333 for (size_t runner_index = 0, num_runners = runners.size();
334 runner_index != num_runners; ++runner_index) {
335 const internal::BenchmarkRunner& runner = runners[runner_index];
336 std::fill_n(std::back_inserter(repetition_indices),
337 runner.GetNumRepeats(), runner_index);
338 }
339 assert(repetition_indices.size() == num_repetitions_total &&
340 "Unexpected number of repetition indexes.");
341
342 if (FLAGS_benchmark_enable_random_interleaving) {
343 std::random_device rd;
344 std::mt19937 g(rd());
345 std::shuffle(repetition_indices.begin(), repetition_indices.end(), g);
346 }
347
348 for (size_t repetition_index : repetition_indices) {
349 internal::BenchmarkRunner& runner = runners[repetition_index];
350 runner.DoOneRepetition();
351 if (runner.HasRepeatsRemaining()) continue;
352 // FIXME: report each repetition separately, not all of them in bulk.
353
354 RunResults run_results = runner.GetResults();
355
356 // Maybe calculate complexity report
357 if (const auto* reports_for_family = runner.GetReportsForFamily()) {
358 if (reports_for_family->num_runs_done ==
359 reports_for_family->num_runs_total) {
360 auto additional_run_stats = ComputeBigO(reports_for_family->Runs);
361 run_results.aggregates_only.insert(run_results.aggregates_only.end(),
362 additional_run_stats.begin(),
363 additional_run_stats.end());
364 per_family_reports.erase(
365 static_cast<int>(reports_for_family->Runs.front().family_index));
366 }
367 }
368
369 Report(display_reporter, file_reporter, run_results);
370 }
371 }
372 display_reporter->Finalize();
373 if (file_reporter) file_reporter->Finalize();
374 FlushStreams(display_reporter);
375 FlushStreams(file_reporter);
376}
377
378// Disable deprecated warnings temporarily because we need to reference
379// CSVReporter but don't want to trigger -Werror=-Wdeprecated-declarations
380BENCHMARK_DISABLE_DEPRECATED_WARNING
381
382std::unique_ptr<BenchmarkReporter> CreateReporter(
383 std::string const& name, ConsoleReporter::OutputOptions output_opts) {
384 typedef std::unique_ptr<BenchmarkReporter> PtrType;
385 if (name == "console") {
386 return PtrType(new ConsoleReporter(output_opts));
387 } else if (name == "json") {
388 return PtrType(new JSONReporter);
389 } else if (name == "csv") {
390 return PtrType(new CSVReporter);
391 } else {
392 std::cerr << "Unexpected format: '" << name << "'\n";
393 std::exit(1);
394 }
395}
396
397BENCHMARK_RESTORE_DEPRECATED_WARNING
398
399} // end namespace
400
401bool IsZero(double n) {
402 return std::abs(n) < std::numeric_limits<double>::epsilon();
403}
404
405ConsoleReporter::OutputOptions GetOutputOptions(bool force_no_color) {
406 int output_opts = ConsoleReporter::OO_Defaults;
407 auto is_benchmark_color = [force_no_color]() -> bool {
408 if (force_no_color) {
409 return false;
410 }
411 if (FLAGS_benchmark_color == "auto") {
412 return IsColorTerminal();
413 }
414 return IsTruthyFlagValue(FLAGS_benchmark_color);
415 };
416 if (is_benchmark_color()) {
417 output_opts |= ConsoleReporter::OO_Color;
418 } else {
419 output_opts &= ~ConsoleReporter::OO_Color;
420 }
421 if (FLAGS_benchmark_counters_tabular) {
422 output_opts |= ConsoleReporter::OO_Tabular;
423 } else {
424 output_opts &= ~ConsoleReporter::OO_Tabular;
425 }
426 return static_cast<ConsoleReporter::OutputOptions>(output_opts);
427}
428
429} // end namespace internal
430
431size_t RunSpecifiedBenchmarks() {
432 return RunSpecifiedBenchmarks(nullptr, nullptr, FLAGS_benchmark_filter);
433}
434
435size_t RunSpecifiedBenchmarks(std::string spec) {
436 return RunSpecifiedBenchmarks(nullptr, nullptr, std::move(spec));
437}
438
439size_t RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter) {
440 return RunSpecifiedBenchmarks(display_reporter, nullptr,
441 FLAGS_benchmark_filter);
442}
443
444size_t RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter,
445 std::string spec) {
446 return RunSpecifiedBenchmarks(display_reporter, nullptr, std::move(spec));
447}
448
449size_t RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter,
450 BenchmarkReporter* file_reporter) {
451 return RunSpecifiedBenchmarks(display_reporter, file_reporter,
452 FLAGS_benchmark_filter);
453}
454
455size_t RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter,
456 BenchmarkReporter* file_reporter,
457 std::string spec) {
458 if (spec.empty() || spec == "all")
459 spec = "."; // Regexp that matches all benchmarks
460
461 // Setup the reporters
462 std::ofstream output_file;
463 std::unique_ptr<BenchmarkReporter> default_display_reporter;
464 std::unique_ptr<BenchmarkReporter> default_file_reporter;
465 if (!display_reporter) {
466 default_display_reporter = internal::CreateReporter(
467 FLAGS_benchmark_format, internal::GetOutputOptions());
468 display_reporter = default_display_reporter.get();
469 }
470 auto& Out = display_reporter->GetOutputStream();
471 auto& Err = display_reporter->GetErrorStream();
472
473 std::string const& fname = FLAGS_benchmark_out;
474 if (fname.empty() && file_reporter) {
475 Err << "A custom file reporter was provided but "
476 "--benchmark_out=<file> was not specified."
477 << std::endl;
478 std::exit(1);
479 }
480 if (!fname.empty()) {
481 output_file.open(fname);
482 if (!output_file.is_open()) {
483 Err << "invalid file name: '" << fname << "'" << std::endl;
484 std::exit(1);
485 }
486 if (!file_reporter) {
487 default_file_reporter = internal::CreateReporter(
488 FLAGS_benchmark_out_format, ConsoleReporter::OO_None);
489 file_reporter = default_file_reporter.get();
490 }
491 file_reporter->SetOutputStream(&output_file);
492 file_reporter->SetErrorStream(&output_file);
493 }
494
495 std::vector<internal::BenchmarkInstance> benchmarks;
496 if (!FindBenchmarksInternal(spec, &benchmarks, &Err)) return 0;
497
498 if (benchmarks.empty()) {
499 Err << "Failed to match any benchmarks against regex: " << spec << "\n";
500 return 0;
501 }
502
503 if (FLAGS_benchmark_list_tests) {
504 for (auto const& benchmark : benchmarks)
505 Out << benchmark.name().str() << "\n";
506 } else {
507 internal::RunBenchmarks(benchmarks, display_reporter, file_reporter);
508 }
509
510 return benchmarks.size();
511}
512
513std::string GetBenchmarkFilter() { return FLAGS_benchmark_filter; }
514
515void RegisterMemoryManager(MemoryManager* manager) {
516 internal::memory_manager = manager;
517}
518
519void AddCustomContext(const std::string& key, const std::string& value) {
520 if (internal::global_context == nullptr) {
521 internal::global_context = new std::map<std::string, std::string>();
522 }
523 if (!internal::global_context->emplace(key, value).second) {
524 std::cerr << "Failed to add custom context \"" << key << "\" as it already "
525 << "exists with value \"" << value << "\"\n";
526 }
527}
528
529namespace internal {
530
531void PrintUsageAndExit() {
532 fprintf(stdout,
533 "benchmark"
534 " [--benchmark_list_tests={true|false}]\n"
535 " [--benchmark_filter=<regex>]\n"
536 " [--benchmark_min_time=<min_time>]\n"
537 " [--benchmark_repetitions=<num_repetitions>]\n"
538 " [--benchmark_enable_random_interleaving={true|false}]\n"
539 " [--benchmark_report_aggregates_only={true|false}]\n"
540 " [--benchmark_display_aggregates_only={true|false}]\n"
541 " [--benchmark_format=<console|json|csv>]\n"
542 " [--benchmark_out=<filename>]\n"
543 " [--benchmark_out_format=<json|console|csv>]\n"
544 " [--benchmark_color={auto|true|false}]\n"
545 " [--benchmark_counters_tabular={true|false}]\n"
546 " [--benchmark_perf_counters=<counter>,...]\n"
547 " [--benchmark_context=<key>=<value>,...]\n"
548 " [--v=<verbosity>]\n");
549 exit(0);
550}
551
552void ParseCommandLineFlags(int* argc, char** argv) {
553 using namespace benchmark;
554 BenchmarkReporter::Context::executable_name =
555 (argc && *argc > 0) ? argv[0] : "unknown";
556 for (int i = 1; argc && i < *argc; ++i) {
557 if (ParseBoolFlag(argv[i], "benchmark_list_tests",
558 &FLAGS_benchmark_list_tests) ||
559 ParseStringFlag(argv[i], "benchmark_filter", &FLAGS_benchmark_filter) ||
560 ParseDoubleFlag(argv[i], "benchmark_min_time",
561 &FLAGS_benchmark_min_time) ||
562 ParseInt32Flag(argv[i], "benchmark_repetitions",
563 &FLAGS_benchmark_repetitions) ||
564 ParseBoolFlag(argv[i], "benchmark_enable_random_interleaving",
565 &FLAGS_benchmark_enable_random_interleaving) ||
566 ParseBoolFlag(argv[i], "benchmark_report_aggregates_only",
567 &FLAGS_benchmark_report_aggregates_only) ||
568 ParseBoolFlag(argv[i], "benchmark_display_aggregates_only",
569 &FLAGS_benchmark_display_aggregates_only) ||
570 ParseStringFlag(argv[i], "benchmark_format", &FLAGS_benchmark_format) ||
571 ParseStringFlag(argv[i], "benchmark_out", &FLAGS_benchmark_out) ||
572 ParseStringFlag(argv[i], "benchmark_out_format",
573 &FLAGS_benchmark_out_format) ||
574 ParseStringFlag(argv[i], "benchmark_color", &FLAGS_benchmark_color) ||
575 ParseBoolFlag(argv[i], "benchmark_counters_tabular",
576 &FLAGS_benchmark_counters_tabular) ||
577 ParseStringFlag(argv[i], "benchmark_perf_counters",
578 &FLAGS_benchmark_perf_counters) ||
579 ParseKeyValueFlag(argv[i], "benchmark_context",
580 &FLAGS_benchmark_context) ||
581 ParseInt32Flag(argv[i], "v", &FLAGS_v)) {
582 for (int j = i; j != *argc - 1; ++j) argv[j] = argv[j + 1];
583
584 --(*argc);
585 --i;
586 } else if (IsFlag(argv[i], "help")) {
587 PrintUsageAndExit();
588 }
589 }
590 for (auto const* flag :
591 {&FLAGS_benchmark_format, &FLAGS_benchmark_out_format}) {
592 if (*flag != "console" && *flag != "json" && *flag != "csv") {
593 PrintUsageAndExit();
594 }
595 }
596 if (FLAGS_benchmark_color.empty()) {
597 PrintUsageAndExit();
598 }
599 for (const auto& kv : FLAGS_benchmark_context) {
600 AddCustomContext(kv.first, kv.second);
601 }
602}
603
604int InitializeStreams() {
605 static std::ios_base::Init init;
606 return 0;
607}
608
609} // end namespace internal
610
611void Initialize(int* argc, char** argv) {
612 internal::ParseCommandLineFlags(argc, argv);
613 internal::LogLevel() = FLAGS_v;
614}
615
616void Shutdown() { delete internal::global_context; }
617
618bool ReportUnrecognizedArguments(int argc, char** argv) {
619 for (int i = 1; i < argc; ++i) {
620 fprintf(stderr, "%s: error: unrecognized command-line flag: %s\n", argv[0],
621 argv[i]);
622 }
623 return argc > 1;
624}
625
626} // end namespace benchmark
627