1 | // Copyright 2015 Google Inc. All rights reserved. |
2 | // |
3 | // Licensed under the Apache License, Version 2.0 (the "License"); |
4 | // you may not use this file except in compliance with the License. |
5 | // You may obtain a copy of the License at |
6 | // |
7 | // http://www.apache.org/licenses/LICENSE-2.0 |
8 | // |
9 | // Unless required by applicable law or agreed to in writing, software |
10 | // distributed under the License is distributed on an "AS IS" BASIS, |
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | // See the License for the specific language governing permissions and |
13 | // limitations under the License. |
14 | |
15 | #include "benchmark_runner.h" |
16 | |
17 | #include "benchmark/benchmark.h" |
18 | #include "benchmark_api_internal.h" |
19 | #include "internal_macros.h" |
20 | |
21 | #ifndef BENCHMARK_OS_WINDOWS |
22 | #ifndef BENCHMARK_OS_FUCHSIA |
23 | #include <sys/resource.h> |
24 | #endif |
25 | #include <sys/time.h> |
26 | #include <unistd.h> |
27 | #endif |
28 | |
29 | #include <algorithm> |
30 | #include <atomic> |
31 | #include <condition_variable> |
32 | #include <cstdio> |
33 | #include <cstdlib> |
34 | #include <fstream> |
35 | #include <iostream> |
36 | #include <memory> |
37 | #include <string> |
38 | #include <thread> |
39 | #include <utility> |
40 | |
41 | #include "check.h" |
42 | #include "colorprint.h" |
43 | #include "commandlineflags.h" |
44 | #include "complexity.h" |
45 | #include "counter.h" |
46 | #include "internal_macros.h" |
47 | #include "log.h" |
48 | #include "mutex.h" |
49 | #include "perf_counters.h" |
50 | #include "re.h" |
51 | #include "statistics.h" |
52 | #include "string_util.h" |
53 | #include "thread_manager.h" |
54 | #include "thread_timer.h" |
55 | |
56 | namespace benchmark { |
57 | |
58 | namespace internal { |
59 | |
60 | MemoryManager* memory_manager = nullptr; |
61 | |
62 | namespace { |
63 | |
64 | static constexpr IterationCount kMaxIterations = 1000000000; |
65 | |
66 | BenchmarkReporter::Run CreateRunReport( |
67 | const benchmark::internal::BenchmarkInstance& b, |
68 | const internal::ThreadManager::Result& results, |
69 | IterationCount memory_iterations, |
70 | const MemoryManager::Result* memory_result, double seconds, |
71 | int64_t repetition_index, int64_t repeats) { |
72 | // Create report about this benchmark run. |
73 | BenchmarkReporter::Run report; |
74 | |
75 | report.run_name = b.name(); |
76 | report.family_index = b.family_index(); |
77 | report.per_family_instance_index = b.per_family_instance_index(); |
78 | report.error_occurred = results.has_error_; |
79 | report.error_message = results.error_message_; |
80 | report.report_label = results.report_label_; |
81 | // This is the total iterations across all threads. |
82 | report.iterations = results.iterations; |
83 | report.time_unit = b.time_unit(); |
84 | report.threads = b.threads(); |
85 | report.repetition_index = repetition_index; |
86 | report.repetitions = repeats; |
87 | |
88 | if (!report.error_occurred) { |
89 | if (b.use_manual_time()) { |
90 | report.real_accumulated_time = results.manual_time_used; |
91 | } else { |
92 | report.real_accumulated_time = results.real_time_used; |
93 | } |
94 | report.cpu_accumulated_time = results.cpu_time_used; |
95 | report.complexity_n = results.complexity_n; |
96 | report.complexity = b.complexity(); |
97 | report.complexity_lambda = b.complexity_lambda(); |
98 | report.statistics = &b.statistics(); |
99 | report.counters = results.counters; |
100 | |
101 | if (memory_iterations > 0) { |
102 | assert(memory_result != nullptr); |
103 | report.memory_result = memory_result; |
104 | report.allocs_per_iter = |
105 | memory_iterations ? static_cast<double>(memory_result->num_allocs) / |
106 | memory_iterations |
107 | : 0; |
108 | } |
109 | |
110 | internal::Finish(&report.counters, results.iterations, seconds, |
111 | b.threads()); |
112 | } |
113 | return report; |
114 | } |
115 | |
116 | // Execute one thread of benchmark b for the specified number of iterations. |
117 | // Adds the stats collected for the thread into manager->results. |
118 | void RunInThread(const BenchmarkInstance* b, IterationCount iters, |
119 | int thread_id, ThreadManager* manager, |
120 | PerfCountersMeasurement* perf_counters_measurement) { |
121 | internal::ThreadTimer timer( |
122 | b->measure_process_cpu_time() |
123 | ? internal::ThreadTimer::CreateProcessCpuTime() |
124 | : internal::ThreadTimer::Create()); |
125 | State st = |
126 | b->Run(iters, thread_id, &timer, manager, perf_counters_measurement); |
127 | BM_CHECK(st.error_occurred() || st.iterations() >= st.max_iterations) |
128 | << "Benchmark returned before State::KeepRunning() returned false!" ; |
129 | { |
130 | MutexLock l(manager->GetBenchmarkMutex()); |
131 | internal::ThreadManager::Result& results = manager->results; |
132 | results.iterations += st.iterations(); |
133 | results.cpu_time_used += timer.cpu_time_used(); |
134 | results.real_time_used += timer.real_time_used(); |
135 | results.manual_time_used += timer.manual_time_used(); |
136 | results.complexity_n += st.complexity_length_n(); |
137 | internal::Increment(&results.counters, st.counters); |
138 | } |
139 | manager->NotifyThreadComplete(); |
140 | } |
141 | |
142 | } // end namespace |
143 | |
144 | BenchmarkRunner::BenchmarkRunner( |
145 | const benchmark::internal::BenchmarkInstance& b_, |
146 | BenchmarkReporter::PerFamilyRunReports* reports_for_family_) |
147 | : b(b_), |
148 | reports_for_family(reports_for_family_), |
149 | min_time(!IsZero(b.min_time()) ? b.min_time() : FLAGS_benchmark_min_time), |
150 | repeats(b.repetitions() != 0 ? b.repetitions() |
151 | : FLAGS_benchmark_repetitions), |
152 | has_explicit_iteration_count(b.iterations() != 0), |
153 | pool(b.threads() - 1), |
154 | iters(has_explicit_iteration_count ? b.iterations() : 1), |
155 | perf_counters_measurement( |
156 | PerfCounters::Create(StrSplit(FLAGS_benchmark_perf_counters, ','))), |
157 | perf_counters_measurement_ptr(perf_counters_measurement.IsValid() |
158 | ? &perf_counters_measurement |
159 | : nullptr) { |
160 | run_results.display_report_aggregates_only = |
161 | (FLAGS_benchmark_report_aggregates_only || |
162 | FLAGS_benchmark_display_aggregates_only); |
163 | run_results.file_report_aggregates_only = |
164 | FLAGS_benchmark_report_aggregates_only; |
165 | if (b.aggregation_report_mode() != internal::ARM_Unspecified) { |
166 | run_results.display_report_aggregates_only = |
167 | (b.aggregation_report_mode() & |
168 | internal::ARM_DisplayReportAggregatesOnly); |
169 | run_results.file_report_aggregates_only = |
170 | (b.aggregation_report_mode() & internal::ARM_FileReportAggregatesOnly); |
171 | BM_CHECK(FLAGS_benchmark_perf_counters.empty() || |
172 | perf_counters_measurement.IsValid()) |
173 | << "Perf counters were requested but could not be set up." ; |
174 | } |
175 | } |
176 | |
177 | BenchmarkRunner::IterationResults BenchmarkRunner::DoNIterations() { |
178 | BM_VLOG(2) << "Running " << b.name().str() << " for " << iters << "\n" ; |
179 | |
180 | std::unique_ptr<internal::ThreadManager> manager; |
181 | manager.reset(new internal::ThreadManager(b.threads())); |
182 | |
183 | // Run all but one thread in separate threads |
184 | for (std::size_t ti = 0; ti < pool.size(); ++ti) { |
185 | pool[ti] = std::thread(&RunInThread, &b, iters, static_cast<int>(ti + 1), |
186 | manager.get(), perf_counters_measurement_ptr); |
187 | } |
188 | // And run one thread here directly. |
189 | // (If we were asked to run just one thread, we don't create new threads.) |
190 | // Yes, we need to do this here *after* we start the separate threads. |
191 | RunInThread(&b, iters, 0, manager.get(), perf_counters_measurement_ptr); |
192 | |
193 | // The main thread has finished. Now let's wait for the other threads. |
194 | manager->WaitForAllThreads(); |
195 | for (std::thread& thread : pool) thread.join(); |
196 | |
197 | IterationResults i; |
198 | // Acquire the measurements/counters from the manager, UNDER THE LOCK! |
199 | { |
200 | MutexLock l(manager->GetBenchmarkMutex()); |
201 | i.results = manager->results; |
202 | } |
203 | |
204 | // And get rid of the manager. |
205 | manager.reset(); |
206 | |
207 | // Adjust real/manual time stats since they were reported per thread. |
208 | i.results.real_time_used /= b.threads(); |
209 | i.results.manual_time_used /= b.threads(); |
210 | // If we were measuring whole-process CPU usage, adjust the CPU time too. |
211 | if (b.measure_process_cpu_time()) i.results.cpu_time_used /= b.threads(); |
212 | |
213 | BM_VLOG(2) << "Ran in " << i.results.cpu_time_used << "/" |
214 | << i.results.real_time_used << "\n" ; |
215 | |
216 | // By using KeepRunningBatch a benchmark can iterate more times than |
217 | // requested, so take the iteration count from i.results. |
218 | i.iters = i.results.iterations / b.threads(); |
219 | |
220 | // Base decisions off of real time if requested by this benchmark. |
221 | i.seconds = i.results.cpu_time_used; |
222 | if (b.use_manual_time()) { |
223 | i.seconds = i.results.manual_time_used; |
224 | } else if (b.use_real_time()) { |
225 | i.seconds = i.results.real_time_used; |
226 | } |
227 | |
228 | return i; |
229 | } |
230 | |
231 | IterationCount BenchmarkRunner::PredictNumItersNeeded( |
232 | const IterationResults& i) const { |
233 | // See how much iterations should be increased by. |
234 | // Note: Avoid division by zero with max(seconds, 1ns). |
235 | double multiplier = min_time * 1.4 / std::max(i.seconds, 1e-9); |
236 | // If our last run was at least 10% of FLAGS_benchmark_min_time then we |
237 | // use the multiplier directly. |
238 | // Otherwise we use at most 10 times expansion. |
239 | // NOTE: When the last run was at least 10% of the min time the max |
240 | // expansion should be 14x. |
241 | bool is_significant = (i.seconds / min_time) > 0.1; |
242 | multiplier = is_significant ? multiplier : 10.0; |
243 | |
244 | // So what seems to be the sufficiently-large iteration count? Round up. |
245 | const IterationCount max_next_iters = static_cast<IterationCount>( |
246 | std::lround(std::max(multiplier * static_cast<double>(i.iters), |
247 | static_cast<double>(i.iters) + 1.0))); |
248 | // But we do have *some* sanity limits though.. |
249 | const IterationCount next_iters = std::min(max_next_iters, kMaxIterations); |
250 | |
251 | BM_VLOG(3) << "Next iters: " << next_iters << ", " << multiplier << "\n" ; |
252 | return next_iters; // round up before conversion to integer. |
253 | } |
254 | |
255 | bool BenchmarkRunner::ShouldReportIterationResults( |
256 | const IterationResults& i) const { |
257 | // Determine if this run should be reported; |
258 | // Either it has run for a sufficient amount of time |
259 | // or because an error was reported. |
260 | return i.results.has_error_ || |
261 | i.iters >= kMaxIterations || // Too many iterations already. |
262 | i.seconds >= min_time || // The elapsed time is large enough. |
263 | // CPU time is specified but the elapsed real time greatly exceeds |
264 | // the minimum time. |
265 | // Note that user provided timers are except from this sanity check. |
266 | ((i.results.real_time_used >= 5 * min_time) && !b.use_manual_time()); |
267 | } |
268 | |
269 | void BenchmarkRunner::DoOneRepetition() { |
270 | assert(HasRepeatsRemaining() && "Already done all repetitions?" ); |
271 | |
272 | const bool is_the_first_repetition = num_repetitions_done == 0; |
273 | IterationResults i; |
274 | |
275 | // We *may* be gradually increasing the length (iteration count) |
276 | // of the benchmark until we decide the results are significant. |
277 | // And once we do, we report those last results and exit. |
278 | // Please do note that the if there are repetitions, the iteration count |
279 | // is *only* calculated for the *first* repetition, and other repetitions |
280 | // simply use that precomputed iteration count. |
281 | for (;;) { |
282 | b.Setup(); |
283 | i = DoNIterations(); |
284 | b.Teardown(); |
285 | |
286 | // Do we consider the results to be significant? |
287 | // If we are doing repetitions, and the first repetition was already done, |
288 | // it has calculated the correct iteration time, so we have run that very |
289 | // iteration count just now. No need to calculate anything. Just report. |
290 | // Else, the normal rules apply. |
291 | const bool results_are_significant = !is_the_first_repetition || |
292 | has_explicit_iteration_count || |
293 | ShouldReportIterationResults(i); |
294 | |
295 | if (results_are_significant) break; // Good, let's report them! |
296 | |
297 | // Nope, bad iteration. Let's re-estimate the hopefully-sufficient |
298 | // iteration count, and run the benchmark again... |
299 | |
300 | iters = PredictNumItersNeeded(i); |
301 | assert(iters > i.iters && |
302 | "if we did more iterations than we want to do the next time, " |
303 | "then we should have accepted the current iteration run." ); |
304 | } |
305 | |
306 | // Oh, one last thing, we need to also produce the 'memory measurements'.. |
307 | MemoryManager::Result* memory_result = nullptr; |
308 | IterationCount memory_iterations = 0; |
309 | if (memory_manager != nullptr) { |
310 | // TODO(vyng): Consider making BenchmarkReporter::Run::memory_result an |
311 | // optional so we don't have to own the Result here. |
312 | // Can't do it now due to cxx03. |
313 | memory_results.push_back(MemoryManager::Result()); |
314 | memory_result = &memory_results.back(); |
315 | // Only run a few iterations to reduce the impact of one-time |
316 | // allocations in benchmarks that are not properly managed. |
317 | memory_iterations = std::min<IterationCount>(16, iters); |
318 | memory_manager->Start(); |
319 | std::unique_ptr<internal::ThreadManager> manager; |
320 | manager.reset(new internal::ThreadManager(1)); |
321 | b.Setup(); |
322 | RunInThread(&b, memory_iterations, 0, manager.get(), |
323 | perf_counters_measurement_ptr); |
324 | manager->WaitForAllThreads(); |
325 | manager.reset(); |
326 | b.Teardown(); |
327 | |
328 | BENCHMARK_DISABLE_DEPRECATED_WARNING |
329 | memory_manager->Stop(memory_result); |
330 | BENCHMARK_RESTORE_DEPRECATED_WARNING |
331 | } |
332 | |
333 | // Ok, now actually report. |
334 | BenchmarkReporter::Run report = |
335 | CreateRunReport(b, i.results, memory_iterations, memory_result, i.seconds, |
336 | num_repetitions_done, repeats); |
337 | |
338 | if (reports_for_family) { |
339 | ++reports_for_family->num_runs_done; |
340 | if (!report.error_occurred) reports_for_family->Runs.push_back(report); |
341 | } |
342 | |
343 | run_results.non_aggregates.push_back(report); |
344 | |
345 | ++num_repetitions_done; |
346 | } |
347 | |
348 | RunResults&& BenchmarkRunner::GetResults() { |
349 | assert(!HasRepeatsRemaining() && "Did not run all repetitions yet?" ); |
350 | |
351 | // Calculate additional statistics over the repetitions of this instance. |
352 | run_results.aggregates_only = ComputeStats(run_results.non_aggregates); |
353 | |
354 | return std::move(run_results); |
355 | } |
356 | |
357 | } // end namespace internal |
358 | |
359 | } // end namespace benchmark |
360 | |