1 | // Copyright 2020 The Marl Authors. |
2 | // |
3 | // Licensed under the Apache License, Version 2.0 (the "License"); |
4 | // you may not use this file except in compliance with the License. |
5 | // You may obtain a copy of the License at |
6 | // |
7 | // https://www.apache.org/licenses/LICENSE-2.0 |
8 | // |
9 | // Unless required by applicable law or agreed to in writing, software |
10 | // distributed under the License is distributed on an "AS IS" BASIS, |
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | // See the License for the specific language governing permissions and |
13 | // limitations under the License. |
14 | |
15 | #include "marl/scheduler.h" |
16 | #include "marl/thread.h" |
17 | |
18 | #include "benchmark/benchmark.h" |
19 | |
20 | // Define MARL_FULL_BENCHMARK to 1 if you want to run benchmarks for every |
21 | // available logical CPU core. |
22 | #ifndef MARL_FULL_BENCHMARK |
23 | #define MARL_FULL_BENCHMARK 0 |
24 | #endif |
25 | |
26 | class Schedule : public benchmark::Fixture { |
27 | public: |
28 | void SetUp(const ::benchmark::State&) {} |
29 | |
30 | void TearDown(const ::benchmark::State&) {} |
31 | |
32 | // run() creates a scheduler using the config cfg, sets the number of worker |
33 | // threads from the benchmark arguments, calls f, then unbinds and destructs |
34 | // the scheduler. |
35 | // F must be a function of the signature: void(int numTasks) |
36 | template <typename F> |
37 | void run(const ::benchmark::State& state, |
38 | marl::Scheduler::Config cfg, |
39 | F&& f) { |
40 | cfg.setWorkerThreadCount(numThreads(state)); |
41 | |
42 | marl::Scheduler scheduler(cfg); |
43 | scheduler.bind(); |
44 | f(numTasks(state)); |
45 | scheduler.unbind(); |
46 | } |
47 | |
48 | // run() creates a scheduler, sets the number of worker threads from the |
49 | // benchmark arguments, calls f, then unbinds and destructs the scheduler. |
50 | // F must be a function of the signature: void(int numTasks) |
51 | template <typename F> |
52 | void run(const ::benchmark::State& state, F&& f) { |
53 | run(state, marl::Scheduler::Config{}, f); |
54 | } |
55 | |
56 | // args() sets up the benchmark to run a number of tasks over a number of |
57 | // threads. |
58 | // If MARL_FULL_BENCHMARK is enabled, then NumTasks tasks will be run |
59 | // across from 0 to numLogicalCPUs worker threads. |
60 | // If MARL_FULL_BENCHMARK is not enabled, then NumTasks tasks will be run |
61 | // across [0 .. numLogicalCPUs] worker threads in 2^n steps. |
62 | template <int NumTasks = 0x40000> |
63 | static void args(benchmark::internal::Benchmark* b) { |
64 | b->ArgNames({"tasks" , "threads" }); |
65 | b->Args({NumTasks, 0}); |
66 | auto numLogicalCPUs = marl::Thread::numLogicalCPUs(); |
67 | #if MARL_FULL_BENCHMARK |
68 | for (unsigned int threads = 1U; threads <= numLogicalCPUs; threads++) { |
69 | b->Args({NumTasks, threads}); |
70 | } |
71 | #else |
72 | for (unsigned int threads = 1U; threads <= numLogicalCPUs; threads *= 2) { |
73 | b->Args({NumTasks, threads}); |
74 | } |
75 | if ((numLogicalCPUs & (numLogicalCPUs - 1)) != 0) { |
76 | // numLogicalCPUs is not a power-of-two. Also test with numLogicalCPUs. |
77 | b->Args({NumTasks, numLogicalCPUs}); |
78 | } |
79 | #endif |
80 | } |
81 | |
82 | // numThreads() return the number of threads in the benchmark run from the |
83 | // state. |
84 | static int numThreads(const ::benchmark::State& state) { |
85 | return static_cast<int>(state.range(1)); |
86 | } |
87 | |
88 | // numTasks() return the number of tasks in the benchmark run from the state. |
89 | static int numTasks(const ::benchmark::State& state) { |
90 | return static_cast<int>(state.range(0)); |
91 | } |
92 | |
93 | // doSomeWork() performs some made up bit-shitfy algorithm that's difficult |
94 | // for a compiler to optimize and produces consistent results. |
95 | static uint32_t doSomeWork(uint32_t x); |
96 | }; |