benchmark_runner.cc source code [leveldb/third_party/benchmark/src/benchmark_runner.cc]

1	// Copyright 2015 Google Inc. All rights reserved.
2	//
3	// Licensed under the Apache License, Version 2.0 (the "License");
4	// you may not use this file except in compliance with the License.
5	// You may obtain a copy of the License at
6	//
7	// http://www.apache.org/licenses/LICENSE-2.0
8	//
9	// Unless required by applicable law or agreed to in writing, software
10	// distributed under the License is distributed on an "AS IS" BASIS,
11	// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12	// See the License for the specific language governing permissions and
13	// limitations under the License.
14
15	#include "benchmark_runner.h"
16	#include "benchmark/benchmark.h"
17	#include "benchmark_api_internal.h"
18	#include "internal_macros.h"
19
20	#ifndef BENCHMARK_OS_WINDOWS
21	#ifndef BENCHMARK_OS_FUCHSIA
22	#include <sys/resource.h>
23	#endif
24	#include <sys/time.h>
25	#include <unistd.h>
26	#endif
27
28	#include <algorithm>
29	#include <atomic>
30	#include <condition_variable>
31	#include <cstdio>
32	#include <cstdlib>
33	#include <fstream>
34	#include <iostream>
35	#include <memory>
36	#include <string>
37	#include <thread>
38	#include <utility>
39
40	#include "check.h"
41	#include "colorprint.h"
42	#include "commandlineflags.h"
43	#include "complexity.h"
44	#include "counter.h"
45	#include "internal_macros.h"
46	#include "log.h"
47	#include "mutex.h"
48	#include "perf_counters.h"
49	#include "re.h"
50	#include "statistics.h"
51	#include "string_util.h"
52	#include "thread_manager.h"
53	#include "thread_timer.h"
54
55	namespace benchmark {
56
57	namespace internal {
58
59	MemoryManager* memory_manager = nullptr;
60
61	namespace {
62
63	static constexpr IterationCount kMaxIterations = `1000000000`;
64
65	BenchmarkReporter::Run CreateRunReport(
66	const benchmark::internal::BenchmarkInstance& b,
67	const internal::ThreadManager::Result& results,
68	IterationCount memory_iterations,
69	const MemoryManager::Result& memory_result, double seconds,
70	int64_t repetition_index) {
71	// Create report about this benchmark run.
72	BenchmarkReporter::Run report;
73
74	report.run_name = b.name();
75	report.error_occurred = results.has_error_;
76	report.error_message = results.error_message_;
77	report.report_label = results.report_label_;
78	// This is the total iterations across all threads.
79	report.iterations = results.iterations;
80	report.time_unit = b.time_unit();
81	report.threads = b.threads();
82	report.repetition_index = repetition_index;
83	report.repetitions = b.repetitions();
84
85	if (!report.error_occurred) {
86	if (b.use_manual_time()) {
87	report.real_accumulated_time = results.manual_time_used;
88	} else {
89	report.real_accumulated_time = results.real_time_used;
90	}
91	report.cpu_accumulated_time = results.cpu_time_used;
92	report.complexity_n = results.complexity_n;
93	report.complexity = b.complexity();
94	report.complexity_lambda = b.complexity_lambda();
95	report.statistics = &b.statistics();
96	report.counters = results.counters;
97
98	if (memory_iterations > `0`) {
99	report.has_memory_result = true;
100	report.allocs_per_iter =
101	memory_iterations ? static_cast<double>(memory_result.num_allocs) /
102	memory_iterations
103	: `0`;
104	report.max_bytes_used = memory_result.max_bytes_used;
105	}
106
107	internal::Finish(&report.counters, results.iterations, seconds, b.threads());
108	}
109	return report;
110	}
111
112	// Execute one thread of benchmark b for the specified number of iterations.
113	// Adds the stats collected for the thread into manager->results.
114	void RunInThread(const BenchmarkInstance* b, IterationCount iters,
115	int thread_id, ThreadManager* manager,
116	PerfCountersMeasurement* perf_counters_measurement) {
117	internal::ThreadTimer timer(
118	b->measure_process_cpu_time()
119	? internal::ThreadTimer::CreateProcessCpuTime()
120	: internal::ThreadTimer::Create());
121	State st =
122	b->Run(iters, thread_id, &timer, manager, perf_counters_measurement);
123	CHECK(st.error_occurred() \|\| st.iterations() >= st.max_iterations)
124	<< "Benchmark returned before State::KeepRunning() returned false!";
125	{
126	MutexLock l(manager->GetBenchmarkMutex());
127	internal::ThreadManager::Result& results = manager->results;
128	results.iterations += st.iterations();
129	results.cpu_time_used += timer.cpu_time_used();
130	results.real_time_used += timer.real_time_used();
131	results.manual_time_used += timer.manual_time_used();
132	results.complexity_n += st.complexity_length_n();
133	internal::Increment(&results.counters, st.counters);
134	}
135	manager->NotifyThreadComplete();
136	}
137
138	class BenchmarkRunner {
139	public:
140	BenchmarkRunner(const benchmark::internal::BenchmarkInstance& b_,
141	std::vector<BenchmarkReporter::Run>* complexity_reports_)
142	: b(b_),
143	complexity_reports(*complexity_reports_),
144	min_time(!IsZero(b.min_time()) ? b.min_time() : FLAGS_benchmark_min_time),
145	repeats(b.repetitions() != `0` ? b.repetitions()
146	: FLAGS_benchmark_repetitions),
147	has_explicit_iteration_count(b.iterations() != `0`),
148	pool (b.threads() - `1`),
149	iters(has_explicit_iteration_count ? b.iterations() : `1`),
150	perf_counters_measurement (
151	PerfCounters::Create(StrSplit(FLAGS_benchmark_perf_counters, `','`))),
152	perf_counters_measurement_ptr(perf_counters_measurement.IsValid()
153	? &perf_counters_measurement
154	: nullptr) {
155	run_results.display_report_aggregates_only =
156	(FLAGS_benchmark_report_aggregates_only \|\|
157	FLAGS_benchmark_display_aggregates_only);
158	run_results.file_report_aggregates_only =
159	FLAGS_benchmark_report_aggregates_only;
160	if (b.aggregation_report_mode() != internal::ARM_Unspecified) {
161	run_results.display_report_aggregates_only =
162	(b.aggregation_report_mode() &
163	internal::ARM_DisplayReportAggregatesOnly);
164	run_results.file_report_aggregates_only =
165	(b.aggregation_report_mode() & internal::ARM_FileReportAggregatesOnly);
166	CHECK(b.threads() == `1` \|\| !perf_counters_measurement.IsValid())
167	<< "Perf counters are not supported in multi-threaded cases.\n";
168	CHECK(FLAGS_benchmark_perf_counters.empty() \|\|
169	perf_counters_measurement.IsValid())
170	<< "Perf counters were requested but could not be set up.";
171	}
172
173	for (int repetition_num = `0`; repetition_num < repeats; repetition_num++) {
174	DoOneRepetition(repetition_num);
175	}
176
177	// Calculate additional statistics
178	run_results.aggregates_only = ComputeStats(run_results.non_aggregates);
179
180	// Maybe calculate complexity report
181	if ((b.complexity() != oNone) && b.last_benchmark_instance) {
182	auto additional_run_stats = ComputeBigO(complexity_reports);
183	run_results.aggregates_only.insert(run_results.aggregates_only.end(),
184	additional_run_stats.begin(),
185	additional_run_stats.end());
186	complexity_reports.clear();
187	}
188	}
189
190	RunResults&& get_results() { return std::move(run_results); }
191
192	private:
193	RunResults run_results;
194
195	const benchmark::internal::BenchmarkInstance& b;
196	std::vector<BenchmarkReporter::Run>& complexity_reports;
197
198	const double min_time;
199	const int repeats;
200	const bool has_explicit_iteration_count;
201
202	std::vector<std::thread> pool;
203
204	IterationCount iters; // preserved between repetitions!
205	// So only the first repetition has to find/calculate it,
206	// the other repetitions will just use that precomputed iteration count.
207
208	PerfCountersMeasurement perf_counters_measurement;
209	PerfCountersMeasurement* const perf_counters_measurement_ptr;
210
211	struct IterationResults {
212	internal::ThreadManager::Result results;
213	IterationCount iters;
214	double seconds;
215	};
216	IterationResults DoNIterations() {
217	VLOG(`2`) << "Running " << b.name().str() << " for " << iters << "\n";
218
219	std::unique_ptr<internal::ThreadManager> manager;
220	manager.reset(new internal::ThreadManager (b.threads()));
221
222	// Run all but one thread in separate threads
223	for (std::size_t ti = `0`; ti < pool.size(); ++ti) {
224	pool [ti] = std::thread (&RunInThread, &b, iters, static_cast<int>(ti + `1`),
225	manager.get(), perf_counters_measurement_ptr);
226	}
227	// And run one thread here directly.
228	// (If we were asked to run just one thread, we don't create new threads.)
229	// Yes, we need to do this here after* we start the separate threads.*
230	RunInThread(&b, iters, `0`, manager.get(), perf_counters_measurement_ptr);
231
232	// The main thread has finished. Now let's wait for the other threads.
233	manager ->WaitForAllThreads();
234	for (std::thread& thread : pool) thread.join();
235
236	IterationResults i;
237	// Acquire the measurements/counters from the manager, UNDER THE LOCK!
238	{
239	MutexLock l(manager ->GetBenchmarkMutex());
240	i.results = manager ->results;
241	}
242
243	// And get rid of the manager.
244	manager.reset();
245
246	// Adjust real/manual time stats since they were reported per thread.
247	i.results.real_time_used /= b.threads();
248	i.results.manual_time_used /= b.threads();
249	// If we were measuring whole-process CPU usage, adjust the CPU time too.
250	if (b.measure_process_cpu_time()) i.results.cpu_time_used /= b.threads();
251
252	VLOG(`2`) << "Ran in " << i.results.cpu_time_used << "/"
253	<< i.results.real_time_used << "\n";
254
255	// By using KeepRunningBatch a benchmark can iterate more times than
256	// requested, so take the iteration count from i.results.
257	i.iters = i.results.iterations / b.threads();
258
259	// Base decisions off of real time if requested by this benchmark.
260	i.seconds = i.results.cpu_time_used;
261	if (b.use_manual_time()) {
262	i.seconds = i.results.manual_time_used;
263	} else if (b.use_real_time()) {
264	i.seconds = i.results.real_time_used;
265	}
266
267	return i;
268	}
269
270	IterationCount PredictNumItersNeeded(const IterationResults& i) const {
271	// See how much iterations should be increased by.
272	// Note: Avoid division by zero with max(seconds, 1ns).
273	double multiplier = min_time * `1.4` / std::max(i.seconds, `1e-9`);
274	// If our last run was at least 10% of FLAGS_benchmark_min_time then we
275	// use the multiplier directly.
276	// Otherwise we use at most 10 times expansion.
277	// NOTE: When the last run was at least 10% of the min time the max
278	// expansion should be 14x.
279	bool is_significant = (i.seconds / min_time) > `0.1`;
280	multiplier = is_significant ? multiplier : std::min(`10.0`, multiplier);
281	if (multiplier <= `1.0`) multiplier = `2.0`;
282
283	// So what seems to be the sufficiently-large iteration count? Round up.
284	const IterationCount max_next_iters = static_cast<IterationCount>(
285	std::lround(std::max(multiplier * static_cast<double>(i.iters),
286	static_cast<double>(i.iters) + `1.0`)));
287	// But we do have some* sanity limits though..*
288	const IterationCount next_iters = std::min(max_next_iters, kMaxIterations);
289
290	VLOG(`3`) << "Next iters: " << next_iters << ", " << multiplier << "\n";
291	return next_iters; // round up before conversion to integer.
292	}
293
294	bool ShouldReportIterationResults(const IterationResults& i) const {
295	// Determine if this run should be reported;
296	// Either it has run for a sufficient amount of time
297	// or because an error was reported.
298	return i.results.has_error_ \|\|
299	i.iters >= kMaxIterations \|\| // Too many iterations already.
300	i.seconds >= min_time \|\| // The elapsed time is large enough.
301	// CPU time is specified but the elapsed real time greatly exceeds
302	// the minimum time.
303	// Note that user provided timers are except from this sanity check.
304	((i.results.real_time_used >= `5` * min_time) && !b.use_manual_time());
305	}
306
307	void DoOneRepetition(int64_t repetition_index) {
308	const bool is_the_first_repetition = repetition_index == `0`;
309	IterationResults i;
310
311	// We may* be gradually increasing the length (iteration count)*
312	// of the benchmark until we decide the results are significant.
313	// And once we do, we report those last results and exit.
314	// Please do note that the if there are repetitions, the iteration count
315	// is only* calculated for the first repetition, and other repetitions*
316	// simply use that precomputed iteration count.
317	for (;;) {
318	i = DoNIterations();
319
320	// Do we consider the results to be significant?
321	// If we are doing repetitions, and the first repetition was already done,
322	// it has calculated the correct iteration time, so we have run that very
323	// iteration count just now. No need to calculate anything. Just report.
324	// Else, the normal rules apply.
325	const bool results_are_significant = !is_the_first_repetition \|\|
326	has_explicit_iteration_count \|\|
327	ShouldReportIterationResults(i);
328
329	if (results_are_significant) break; // Good, let's report them!
330
331	// Nope, bad iteration. Let's re-estimate the hopefully-sufficient
332	// iteration count, and run the benchmark again...
333
334	iters = PredictNumItersNeeded(i);
335	assert(iters > i.iters &&
336	"if we did more iterations than we want to do the next time, "
337	"then we should have accepted the current iteration run.");
338	}
339
340	// Oh, one last thing, we need to also produce the 'memory measurements'..
341	MemoryManager::Result memory_result;
342	IterationCount memory_iterations = `0`;
343	if (memory_manager != nullptr) {
344	// Only run a few iterations to reduce the impact of one-time
345	// allocations in benchmarks that are not properly managed.
346	memory_iterations = std::min<IterationCount>(`16`, iters);
347	memory_manager->Start();
348	std::unique_ptr<internal::ThreadManager> manager;
349	manager.reset(new internal::ThreadManager (`1`));
350	RunInThread(&b, memory_iterations, `0`, manager.get(),
351	perf_counters_measurement_ptr);
352	manager ->WaitForAllThreads();
353	manager.reset();
354
355	memory_manager->Stop(&memory_result);
356	}
357
358	// Ok, now actualy report.
359	BenchmarkReporter::Run report =
360	CreateRunReport(b, i.results, memory_iterations, memory_result,
361	i.seconds, repetition_index);
362
363	if (!report.error_occurred && b.complexity() != oNone)
364	complexity_reports.push_back(report);
365
366	run_results.non_aggregates.push_back(report);
367	}
368	};
369
370	} // end namespace
371
372	RunResults RunBenchmark(
373	const benchmark::internal::BenchmarkInstance& b,
374	std::vector<BenchmarkReporter::Run>* complexity_reports) {
375	internal::BenchmarkRunner r(b, complexity_reports);
376	return r.get_results();
377	}
378
379	} // end namespace internal
380
381	} // end namespace benchmark
382

Browse the source code of leveldb/third_party/benchmark/src/benchmark_runner.cc