benchmark_runner.cc source code [glow/tests/googlebenchmark/src/benchmark_runner.cc]

1	// Copyright 2015 Google Inc. All rights reserved.
2	//
3	// Licensed under the Apache License, Version 2.0 (the "License");
4	// you may not use this file except in compliance with the License.
5	// You may obtain a copy of the License at
6	//
7	// http://www.apache.org/licenses/LICENSE-2.0
8	//
9	// Unless required by applicable law or agreed to in writing, software
10	// distributed under the License is distributed on an "AS IS" BASIS,
11	// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12	// See the License for the specific language governing permissions and
13	// limitations under the License.
14
15	#include "benchmark_runner.h"
16	#include "benchmark/benchmark.h"
17	#include "benchmark_api_internal.h"
18	#include "internal_macros.h"
19
20	#ifndef BENCHMARK_OS_WINDOWS
21	#ifndef BENCHMARK_OS_FUCHSIA
22	#include <sys/resource.h>
23	#endif
24	#include <sys/time.h>
25	#include <unistd.h>
26	#endif
27
28	#include <algorithm>
29	#include <atomic>
30	#include <condition_variable>
31	#include <cstdio>
32	#include <cstdlib>
33	#include <fstream>
34	#include <iostream>
35	#include <memory>
36	#include <string>
37	#include <thread>
38	#include <utility>
39
40	#include "check.h"
41	#include "colorprint.h"
42	#include "commandlineflags.h"
43	#include "complexity.h"
44	#include "counter.h"
45	#include "internal_macros.h"
46	#include "log.h"
47	#include "mutex.h"
48	#include "re.h"
49	#include "statistics.h"
50	#include "string_util.h"
51	#include "thread_manager.h"
52	#include "thread_timer.h"
53
54	namespace benchmark {
55
56	namespace internal {
57
58	MemoryManager* memory_manager = nullptr;
59
60	namespace {
61
62	static constexpr IterationCount kMaxIterations = `1000000000`;
63
64	BenchmarkReporter::Run CreateRunReport(
65	const benchmark::internal::BenchmarkInstance& b,
66	const internal::ThreadManager::Result& results,
67	IterationCount memory_iterations,
68	const MemoryManager::Result& memory_result, double seconds,
69	int64_t repetition_index) {
70	// Create report about this benchmark run.
71	BenchmarkReporter::Run report;
72
73	report.run_name = b.name;
74	report.error_occurred = results.has_error_;
75	report.error_message = results.error_message_;
76	report.report_label = results.report_label_;
77	// This is the total iterations across all threads.
78	report.iterations = results.iterations;
79	report.time_unit = b.time_unit;
80	report.threads = b.threads;
81	report.repetition_index = repetition_index;
82	report.repetitions = b.repetitions;
83
84	if (!report.error_occurred) {
85	if (b.use_manual_time) {
86	report.real_accumulated_time = results.manual_time_used;
87	} else {
88	report.real_accumulated_time = results.real_time_used;
89	}
90	report.cpu_accumulated_time = results.cpu_time_used;
91	report.complexity_n = results.complexity_n;
92	report.complexity = b.complexity;
93	report.complexity_lambda = b.complexity_lambda;
94	report.statistics = b.statistics;
95	report.counters = results.counters;
96
97	if (memory_iterations > `0`) {
98	report.has_memory_result = true;
99	report.allocs_per_iter =
100	memory_iterations ? static_cast<double>(memory_result.num_allocs) /
101	memory_iterations
102	: `0`;
103	report.max_bytes_used = memory_result.max_bytes_used;
104	}
105
106	internal::Finish(&report.counters, results.iterations, seconds, b.threads);
107	}
108	return report;
109	}
110
111	// Execute one thread of benchmark b for the specified number of iterations.
112	// Adds the stats collected for the thread into total.*
113	void RunInThread(const BenchmarkInstance* b, IterationCount iters,
114	int thread_id, ThreadManager* manager) {
115	internal::ThreadTimer timer(
116	b->measure_process_cpu_time
117	? internal::ThreadTimer::CreateProcessCpuTime()
118	: internal::ThreadTimer::Create());
119	State st = b->Run(iters, thread_id, &timer, manager);
120	CHECK(st.iterations() >= st.max_iterations)
121	<< "Benchmark returned before State::KeepRunning() returned false!";
122	{
123	MutexLock l(manager->GetBenchmarkMutex());
124	internal::ThreadManager::Result& results = manager->results;
125	results.iterations += st.iterations();
126	results.cpu_time_used += timer.cpu_time_used();
127	results.real_time_used += timer.real_time_used();
128	results.manual_time_used += timer.manual_time_used();
129	results.complexity_n += st.complexity_length_n();
130	internal::Increment(&results.counters, st.counters);
131	}
132	manager->NotifyThreadComplete();
133	}
134
135	class BenchmarkRunner {
136	public:
137	BenchmarkRunner(const benchmark::internal::BenchmarkInstance& b_,
138	std::vector<BenchmarkReporter::Run>* complexity_reports_)
139	: b(b_),
140	complexity_reports(*complexity_reports_),
141	min_time(!IsZero(b.min_time) ? b.min_time : FLAGS_benchmark_min_time),
142	repeats(b.repetitions != `0` ? b.repetitions
143	: FLAGS_benchmark_repetitions),
144	has_explicit_iteration_count(b.iterations != `0`),
145	pool (b.threads - `1`),
146	iters(has_explicit_iteration_count ? b.iterations : `1`) {
147	run_results.display_report_aggregates_only =
148	(FLAGS_benchmark_report_aggregates_only \|\|
149	FLAGS_benchmark_display_aggregates_only);
150	run_results.file_report_aggregates_only =
151	FLAGS_benchmark_report_aggregates_only;
152	if (b.aggregation_report_mode != internal::ARM_Unspecified) {
153	run_results.display_report_aggregates_only =
154	(b.aggregation_report_mode &
155	internal::ARM_DisplayReportAggregatesOnly);
156	run_results.file_report_aggregates_only =
157	(b.aggregation_report_mode & internal::ARM_FileReportAggregatesOnly);
158	}
159
160	for (int repetition_num = `0`; repetition_num < repeats; repetition_num++) {
161	DoOneRepetition(repetition_num);
162	}
163
164	// Calculate additional statistics
165	run_results.aggregates_only = ComputeStats(run_results.non_aggregates);
166
167	// Maybe calculate complexity report
168	if ((b.complexity != oNone) && b.last_benchmark_instance) {
169	auto additional_run_stats = ComputeBigO(complexity_reports);
170	run_results.aggregates_only.insert(run_results.aggregates_only.end(),
171	additional_run_stats.begin(),
172	additional_run_stats.end());
173	complexity_reports.clear();
174	}
175	}
176
177	RunResults&& get_results() { return std::move(run_results); }
178
179	private:
180	RunResults run_results;
181
182	const benchmark::internal::BenchmarkInstance& b;
183	std::vector<BenchmarkReporter::Run>& complexity_reports;
184
185	const double min_time;
186	const int repeats;
187	const bool has_explicit_iteration_count;
188
189	std::vector<std::thread> pool;
190
191	IterationCount iters; // preserved between repetitions!
192	// So only the first repetition has to find/calculate it,
193	// the other repetitions will just use that precomputed iteration count.
194
195	struct IterationResults {
196	internal::ThreadManager::Result results;
197	IterationCount iters;
198	double seconds;
199	};
200	IterationResults DoNIterations() {
201	VLOG(`2`) << "Running " << b.name.str() << " for " << iters << "\n";
202
203	std::unique_ptr<internal::ThreadManager> manager;
204	manager.reset(new internal::ThreadManager (b.threads));
205
206	// Run all but one thread in separate threads
207	for (std::size_t ti = `0`; ti < pool.size(); ++ti) {
208	pool [ti] = std::thread (&RunInThread, &b, iters, static_cast<int>(ti + `1`),
209	manager.get());
210	}
211	// And run one thread here directly.
212	// (If we were asked to run just one thread, we don't create new threads.)
213	// Yes, we need to do this here after* we start the separate threads.*
214	RunInThread(&b, iters, `0`, manager.get());
215
216	// The main thread has finished. Now let's wait for the other threads.
217	manager ->WaitForAllThreads();
218	for (std::thread& thread : pool) thread.join();
219
220	IterationResults i;
221	// Acquire the measurements/counters from the manager, UNDER THE LOCK!
222	{
223	MutexLock l(manager ->GetBenchmarkMutex());
224	i.results = manager ->results;
225	}
226
227	// And get rid of the manager.
228	manager.reset();
229
230	// Adjust real/manual time stats since they were reported per thread.
231	i.results.real_time_used /= b.threads;
232	i.results.manual_time_used /= b.threads;
233	// If we were measuring whole-process CPU usage, adjust the CPU time too.
234	if (b.measure_process_cpu_time) i.results.cpu_time_used /= b.threads;
235
236	VLOG(`2`) << "Ran in " << i.results.cpu_time_used << "/"
237	<< i.results.real_time_used << "\n";
238
239	// So for how long were we running?
240	i.iters = iters;
241	// Base decisions off of real time if requested by this benchmark.
242	i.seconds = i.results.cpu_time_used;
243	if (b.use_manual_time) {
244	i.seconds = i.results.manual_time_used;
245	} else if (b.use_real_time) {
246	i.seconds = i.results.real_time_used;
247	}
248
249	return i;
250	}
251
252	IterationCount PredictNumItersNeeded(const IterationResults& i) const {
253	// See how much iterations should be increased by.
254	// Note: Avoid division by zero with max(seconds, 1ns).
255	double multiplier = min_time * `1.4` / std::max(i.seconds, `1e-9`);
256	// If our last run was at least 10% of FLAGS_benchmark_min_time then we
257	// use the multiplier directly.
258	// Otherwise we use at most 10 times expansion.
259	// NOTE: When the last run was at least 10% of the min time the max
260	// expansion should be 14x.
261	bool is_significant = (i.seconds / min_time) > `0.1`;
262	multiplier = is_significant ? multiplier : std::min(`10.0`, multiplier);
263	if (multiplier <= `1.0`) multiplier = `2.0`;
264
265	// So what seems to be the sufficiently-large iteration count? Round up.
266	const IterationCount max_next_iters =
267	`0.5` + std::max(multiplier * i.iters, i.iters + `1.0`);
268	// But we do have some* sanity limits though..*
269	const IterationCount next_iters = std::min(max_next_iters, kMaxIterations);
270
271	VLOG(`3`) << "Next iters: " << next_iters << ", " << multiplier << "\n";
272	return next_iters; // round up before conversion to integer.
273	}
274
275	bool ShouldReportIterationResults(const IterationResults& i) const {
276	// Determine if this run should be reported;
277	// Either it has run for a sufficient amount of time
278	// or because an error was reported.
279	return i.results.has_error_ \|\|
280	i.iters >= kMaxIterations \|\| // Too many iterations already.
281	i.seconds >= min_time \|\| // The elapsed time is large enough.
282	// CPU time is specified but the elapsed real time greatly exceeds
283	// the minimum time.
284	// Note that user provided timers are except from this sanity check.
285	((i.results.real_time_used >= `5` * min_time) && !b.use_manual_time);
286	}
287
288	void DoOneRepetition(int64_t repetition_index) {
289	const bool is_the_first_repetition = repetition_index == `0`;
290	IterationResults i;
291
292	// We may* be gradually increasing the length (iteration count)*
293	// of the benchmark until we decide the results are significant.
294	// And once we do, we report those last results and exit.
295	// Please do note that the if there are repetitions, the iteration count
296	// is only* calculated for the first repetition, and other repetitions*
297	// simply use that precomputed iteration count.
298	for (;;) {
299	i = DoNIterations();
300
301	// Do we consider the results to be significant?
302	// If we are doing repetitions, and the first repetition was already done,
303	// it has calculated the correct iteration time, so we have run that very
304	// iteration count just now. No need to calculate anything. Just report.
305	// Else, the normal rules apply.
306	const bool results_are_significant = !is_the_first_repetition \|\|
307	has_explicit_iteration_count \|\|
308	ShouldReportIterationResults(i);
309
310	if (results_are_significant) break; // Good, let's report them!
311
312	// Nope, bad iteration. Let's re-estimate the hopefully-sufficient
313	// iteration count, and run the benchmark again...
314
315	iters = PredictNumItersNeeded(i);
316	assert(iters > i.iters &&
317	"if we did more iterations than we want to do the next time, "
318	"then we should have accepted the current iteration run.");
319	}
320
321	// Oh, one last thing, we need to also produce the 'memory measurements'..
322	MemoryManager::Result memory_result;
323	IterationCount memory_iterations = `0`;
324	if (memory_manager != nullptr) {
325	// Only run a few iterations to reduce the impact of one-time
326	// allocations in benchmarks that are not properly managed.
327	memory_iterations = std::min<IterationCount>(`16`, iters);
328	memory_manager->Start();
329	std::unique_ptr<internal::ThreadManager> manager;
330	manager.reset(new internal::ThreadManager (`1`));
331	RunInThread(&b, memory_iterations, `0`, manager.get());
332	manager ->WaitForAllThreads();
333	manager.reset();
334
335	memory_manager->Stop(&memory_result);
336	}
337
338	// Ok, now actualy report.
339	BenchmarkReporter::Run report =
340	CreateRunReport(b, i.results, memory_iterations, memory_result,
341	i.seconds, repetition_index);
342
343	if (!report.error_occurred && b.complexity != oNone)
344	complexity_reports.push_back(report);
345
346	run_results.non_aggregates.push_back(report);
347	}
348	};
349
350	} // end namespace
351
352	RunResults RunBenchmark(
353	const benchmark::internal::BenchmarkInstance& b,
354	std::vector<BenchmarkReporter::Run>* complexity_reports) {
355	internal::BenchmarkRunner r(b, complexity_reports);
356	return r.get_results();
357	}
358
359	} // end namespace internal
360
361	} // end namespace benchmark
362

Browse the source code of glow/tests/googlebenchmark/src/benchmark_runner.cc