1 | // Copyright 2015 Google Inc. All rights reserved. |
2 | // |
3 | // Licensed under the Apache License, Version 2.0 (the "License"); |
4 | // you may not use this file except in compliance with the License. |
5 | // You may obtain a copy of the License at |
6 | // |
7 | // http://www.apache.org/licenses/LICENSE-2.0 |
8 | // |
9 | // Unless required by applicable law or agreed to in writing, software |
10 | // distributed under the License is distributed on an "AS IS" BASIS, |
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | // See the License for the specific language governing permissions and |
13 | // limitations under the License. |
14 | |
15 | #include "benchmark_runner.h" |
16 | #include "benchmark/benchmark.h" |
17 | #include "benchmark_api_internal.h" |
18 | #include "internal_macros.h" |
19 | |
20 | #ifndef BENCHMARK_OS_WINDOWS |
21 | #ifndef BENCHMARK_OS_FUCHSIA |
22 | #include <sys/resource.h> |
23 | #endif |
24 | #include <sys/time.h> |
25 | #include <unistd.h> |
26 | #endif |
27 | |
28 | #include <algorithm> |
29 | #include <atomic> |
30 | #include <condition_variable> |
31 | #include <cstdio> |
32 | #include <cstdlib> |
33 | #include <fstream> |
34 | #include <iostream> |
35 | #include <memory> |
36 | #include <string> |
37 | #include <thread> |
38 | #include <utility> |
39 | |
40 | #include "check.h" |
41 | #include "colorprint.h" |
42 | #include "commandlineflags.h" |
43 | #include "complexity.h" |
44 | #include "counter.h" |
45 | #include "internal_macros.h" |
46 | #include "log.h" |
47 | #include "mutex.h" |
48 | #include "re.h" |
49 | #include "statistics.h" |
50 | #include "string_util.h" |
51 | #include "thread_manager.h" |
52 | #include "thread_timer.h" |
53 | |
54 | namespace benchmark { |
55 | |
56 | namespace internal { |
57 | |
58 | MemoryManager* memory_manager = nullptr; |
59 | |
60 | namespace { |
61 | |
62 | static constexpr IterationCount kMaxIterations = 1000000000; |
63 | |
64 | BenchmarkReporter::Run CreateRunReport( |
65 | const benchmark::internal::BenchmarkInstance& b, |
66 | const internal::ThreadManager::Result& results, |
67 | IterationCount memory_iterations, |
68 | const MemoryManager::Result& memory_result, double seconds, |
69 | int64_t repetition_index) { |
70 | // Create report about this benchmark run. |
71 | BenchmarkReporter::Run report; |
72 | |
73 | report.run_name = b.name; |
74 | report.error_occurred = results.has_error_; |
75 | report.error_message = results.error_message_; |
76 | report.report_label = results.report_label_; |
77 | // This is the total iterations across all threads. |
78 | report.iterations = results.iterations; |
79 | report.time_unit = b.time_unit; |
80 | report.threads = b.threads; |
81 | report.repetition_index = repetition_index; |
82 | report.repetitions = b.repetitions; |
83 | |
84 | if (!report.error_occurred) { |
85 | if (b.use_manual_time) { |
86 | report.real_accumulated_time = results.manual_time_used; |
87 | } else { |
88 | report.real_accumulated_time = results.real_time_used; |
89 | } |
90 | report.cpu_accumulated_time = results.cpu_time_used; |
91 | report.complexity_n = results.complexity_n; |
92 | report.complexity = b.complexity; |
93 | report.complexity_lambda = b.complexity_lambda; |
94 | report.statistics = b.statistics; |
95 | report.counters = results.counters; |
96 | |
97 | if (memory_iterations > 0) { |
98 | report.has_memory_result = true; |
99 | report.allocs_per_iter = |
100 | memory_iterations ? static_cast<double>(memory_result.num_allocs) / |
101 | memory_iterations |
102 | : 0; |
103 | report.max_bytes_used = memory_result.max_bytes_used; |
104 | } |
105 | |
106 | internal::Finish(&report.counters, results.iterations, seconds, b.threads); |
107 | } |
108 | return report; |
109 | } |
110 | |
111 | // Execute one thread of benchmark b for the specified number of iterations. |
112 | // Adds the stats collected for the thread into *total. |
113 | void RunInThread(const BenchmarkInstance* b, IterationCount iters, |
114 | int thread_id, ThreadManager* manager) { |
115 | internal::ThreadTimer timer( |
116 | b->measure_process_cpu_time |
117 | ? internal::ThreadTimer::CreateProcessCpuTime() |
118 | : internal::ThreadTimer::Create()); |
119 | State st = b->Run(iters, thread_id, &timer, manager); |
120 | CHECK(st.iterations() >= st.max_iterations) |
121 | << "Benchmark returned before State::KeepRunning() returned false!" ; |
122 | { |
123 | MutexLock l(manager->GetBenchmarkMutex()); |
124 | internal::ThreadManager::Result& results = manager->results; |
125 | results.iterations += st.iterations(); |
126 | results.cpu_time_used += timer.cpu_time_used(); |
127 | results.real_time_used += timer.real_time_used(); |
128 | results.manual_time_used += timer.manual_time_used(); |
129 | results.complexity_n += st.complexity_length_n(); |
130 | internal::Increment(&results.counters, st.counters); |
131 | } |
132 | manager->NotifyThreadComplete(); |
133 | } |
134 | |
135 | class BenchmarkRunner { |
136 | public: |
137 | BenchmarkRunner(const benchmark::internal::BenchmarkInstance& b_, |
138 | std::vector<BenchmarkReporter::Run>* complexity_reports_) |
139 | : b(b_), |
140 | complexity_reports(*complexity_reports_), |
141 | min_time(!IsZero(b.min_time) ? b.min_time : FLAGS_benchmark_min_time), |
142 | repeats(b.repetitions != 0 ? b.repetitions |
143 | : FLAGS_benchmark_repetitions), |
144 | has_explicit_iteration_count(b.iterations != 0), |
145 | pool(b.threads - 1), |
146 | iters(has_explicit_iteration_count ? b.iterations : 1) { |
147 | run_results.display_report_aggregates_only = |
148 | (FLAGS_benchmark_report_aggregates_only || |
149 | FLAGS_benchmark_display_aggregates_only); |
150 | run_results.file_report_aggregates_only = |
151 | FLAGS_benchmark_report_aggregates_only; |
152 | if (b.aggregation_report_mode != internal::ARM_Unspecified) { |
153 | run_results.display_report_aggregates_only = |
154 | (b.aggregation_report_mode & |
155 | internal::ARM_DisplayReportAggregatesOnly); |
156 | run_results.file_report_aggregates_only = |
157 | (b.aggregation_report_mode & internal::ARM_FileReportAggregatesOnly); |
158 | } |
159 | |
160 | for (int repetition_num = 0; repetition_num < repeats; repetition_num++) { |
161 | DoOneRepetition(repetition_num); |
162 | } |
163 | |
164 | // Calculate additional statistics |
165 | run_results.aggregates_only = ComputeStats(run_results.non_aggregates); |
166 | |
167 | // Maybe calculate complexity report |
168 | if ((b.complexity != oNone) && b.last_benchmark_instance) { |
169 | auto additional_run_stats = ComputeBigO(complexity_reports); |
170 | run_results.aggregates_only.insert(run_results.aggregates_only.end(), |
171 | additional_run_stats.begin(), |
172 | additional_run_stats.end()); |
173 | complexity_reports.clear(); |
174 | } |
175 | } |
176 | |
177 | RunResults&& get_results() { return std::move(run_results); } |
178 | |
179 | private: |
180 | RunResults run_results; |
181 | |
182 | const benchmark::internal::BenchmarkInstance& b; |
183 | std::vector<BenchmarkReporter::Run>& complexity_reports; |
184 | |
185 | const double min_time; |
186 | const int repeats; |
187 | const bool has_explicit_iteration_count; |
188 | |
189 | std::vector<std::thread> pool; |
190 | |
191 | IterationCount iters; // preserved between repetitions! |
192 | // So only the first repetition has to find/calculate it, |
193 | // the other repetitions will just use that precomputed iteration count. |
194 | |
195 | struct IterationResults { |
196 | internal::ThreadManager::Result results; |
197 | IterationCount iters; |
198 | double seconds; |
199 | }; |
200 | IterationResults DoNIterations() { |
201 | VLOG(2) << "Running " << b.name.str() << " for " << iters << "\n" ; |
202 | |
203 | std::unique_ptr<internal::ThreadManager> manager; |
204 | manager.reset(new internal::ThreadManager(b.threads)); |
205 | |
206 | // Run all but one thread in separate threads |
207 | for (std::size_t ti = 0; ti < pool.size(); ++ti) { |
208 | pool[ti] = std::thread(&RunInThread, &b, iters, static_cast<int>(ti + 1), |
209 | manager.get()); |
210 | } |
211 | // And run one thread here directly. |
212 | // (If we were asked to run just one thread, we don't create new threads.) |
213 | // Yes, we need to do this here *after* we start the separate threads. |
214 | RunInThread(&b, iters, 0, manager.get()); |
215 | |
216 | // The main thread has finished. Now let's wait for the other threads. |
217 | manager->WaitForAllThreads(); |
218 | for (std::thread& thread : pool) thread.join(); |
219 | |
220 | IterationResults i; |
221 | // Acquire the measurements/counters from the manager, UNDER THE LOCK! |
222 | { |
223 | MutexLock l(manager->GetBenchmarkMutex()); |
224 | i.results = manager->results; |
225 | } |
226 | |
227 | // And get rid of the manager. |
228 | manager.reset(); |
229 | |
230 | // Adjust real/manual time stats since they were reported per thread. |
231 | i.results.real_time_used /= b.threads; |
232 | i.results.manual_time_used /= b.threads; |
233 | // If we were measuring whole-process CPU usage, adjust the CPU time too. |
234 | if (b.measure_process_cpu_time) i.results.cpu_time_used /= b.threads; |
235 | |
236 | VLOG(2) << "Ran in " << i.results.cpu_time_used << "/" |
237 | << i.results.real_time_used << "\n" ; |
238 | |
239 | // So for how long were we running? |
240 | i.iters = iters; |
241 | // Base decisions off of real time if requested by this benchmark. |
242 | i.seconds = i.results.cpu_time_used; |
243 | if (b.use_manual_time) { |
244 | i.seconds = i.results.manual_time_used; |
245 | } else if (b.use_real_time) { |
246 | i.seconds = i.results.real_time_used; |
247 | } |
248 | |
249 | return i; |
250 | } |
251 | |
252 | IterationCount PredictNumItersNeeded(const IterationResults& i) const { |
253 | // See how much iterations should be increased by. |
254 | // Note: Avoid division by zero with max(seconds, 1ns). |
255 | double multiplier = min_time * 1.4 / std::max(i.seconds, 1e-9); |
256 | // If our last run was at least 10% of FLAGS_benchmark_min_time then we |
257 | // use the multiplier directly. |
258 | // Otherwise we use at most 10 times expansion. |
259 | // NOTE: When the last run was at least 10% of the min time the max |
260 | // expansion should be 14x. |
261 | bool is_significant = (i.seconds / min_time) > 0.1; |
262 | multiplier = is_significant ? multiplier : std::min(10.0, multiplier); |
263 | if (multiplier <= 1.0) multiplier = 2.0; |
264 | |
265 | // So what seems to be the sufficiently-large iteration count? Round up. |
266 | const IterationCount max_next_iters = |
267 | 0.5 + std::max(multiplier * i.iters, i.iters + 1.0); |
268 | // But we do have *some* sanity limits though.. |
269 | const IterationCount next_iters = std::min(max_next_iters, kMaxIterations); |
270 | |
271 | VLOG(3) << "Next iters: " << next_iters << ", " << multiplier << "\n" ; |
272 | return next_iters; // round up before conversion to integer. |
273 | } |
274 | |
275 | bool ShouldReportIterationResults(const IterationResults& i) const { |
276 | // Determine if this run should be reported; |
277 | // Either it has run for a sufficient amount of time |
278 | // or because an error was reported. |
279 | return i.results.has_error_ || |
280 | i.iters >= kMaxIterations || // Too many iterations already. |
281 | i.seconds >= min_time || // The elapsed time is large enough. |
282 | // CPU time is specified but the elapsed real time greatly exceeds |
283 | // the minimum time. |
284 | // Note that user provided timers are except from this sanity check. |
285 | ((i.results.real_time_used >= 5 * min_time) && !b.use_manual_time); |
286 | } |
287 | |
288 | void DoOneRepetition(int64_t repetition_index) { |
289 | const bool is_the_first_repetition = repetition_index == 0; |
290 | IterationResults i; |
291 | |
292 | // We *may* be gradually increasing the length (iteration count) |
293 | // of the benchmark until we decide the results are significant. |
294 | // And once we do, we report those last results and exit. |
295 | // Please do note that the if there are repetitions, the iteration count |
296 | // is *only* calculated for the *first* repetition, and other repetitions |
297 | // simply use that precomputed iteration count. |
298 | for (;;) { |
299 | i = DoNIterations(); |
300 | |
301 | // Do we consider the results to be significant? |
302 | // If we are doing repetitions, and the first repetition was already done, |
303 | // it has calculated the correct iteration time, so we have run that very |
304 | // iteration count just now. No need to calculate anything. Just report. |
305 | // Else, the normal rules apply. |
306 | const bool results_are_significant = !is_the_first_repetition || |
307 | has_explicit_iteration_count || |
308 | ShouldReportIterationResults(i); |
309 | |
310 | if (results_are_significant) break; // Good, let's report them! |
311 | |
312 | // Nope, bad iteration. Let's re-estimate the hopefully-sufficient |
313 | // iteration count, and run the benchmark again... |
314 | |
315 | iters = PredictNumItersNeeded(i); |
316 | assert(iters > i.iters && |
317 | "if we did more iterations than we want to do the next time, " |
318 | "then we should have accepted the current iteration run." ); |
319 | } |
320 | |
321 | // Oh, one last thing, we need to also produce the 'memory measurements'.. |
322 | MemoryManager::Result memory_result; |
323 | IterationCount memory_iterations = 0; |
324 | if (memory_manager != nullptr) { |
325 | // Only run a few iterations to reduce the impact of one-time |
326 | // allocations in benchmarks that are not properly managed. |
327 | memory_iterations = std::min<IterationCount>(16, iters); |
328 | memory_manager->Start(); |
329 | std::unique_ptr<internal::ThreadManager> manager; |
330 | manager.reset(new internal::ThreadManager(1)); |
331 | RunInThread(&b, memory_iterations, 0, manager.get()); |
332 | manager->WaitForAllThreads(); |
333 | manager.reset(); |
334 | |
335 | memory_manager->Stop(&memory_result); |
336 | } |
337 | |
338 | // Ok, now actualy report. |
339 | BenchmarkReporter::Run report = |
340 | CreateRunReport(b, i.results, memory_iterations, memory_result, |
341 | i.seconds, repetition_index); |
342 | |
343 | if (!report.error_occurred && b.complexity != oNone) |
344 | complexity_reports.push_back(report); |
345 | |
346 | run_results.non_aggregates.push_back(report); |
347 | } |
348 | }; |
349 | |
350 | } // end namespace |
351 | |
352 | RunResults RunBenchmark( |
353 | const benchmark::internal::BenchmarkInstance& b, |
354 | std::vector<BenchmarkReporter::Run>* complexity_reports) { |
355 | internal::BenchmarkRunner r(b, complexity_reports); |
356 | return r.get_results(); |
357 | } |
358 | |
359 | } // end namespace internal |
360 | |
361 | } // end namespace benchmark |
362 | |