1// Copyright 2020 The Marl Authors.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// https://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#include "marl/scheduler.h"
16#include "marl/thread.h"
17
18#include "benchmark/benchmark.h"
19
20// Define MARL_FULL_BENCHMARK to 1 if you want to run benchmarks for every
21// available logical CPU core.
22#ifndef MARL_FULL_BENCHMARK
23#define MARL_FULL_BENCHMARK 0
24#endif
25
26class Schedule : public benchmark::Fixture {
27 public:
28 void SetUp(const ::benchmark::State&) {}
29
30 void TearDown(const ::benchmark::State&) {}
31
32 // run() creates a scheduler using the config cfg, sets the number of worker
33 // threads from the benchmark arguments, calls f, then unbinds and destructs
34 // the scheduler.
35 // F must be a function of the signature: void(int numTasks)
36 template <typename F>
37 void run(const ::benchmark::State& state,
38 marl::Scheduler::Config cfg,
39 F&& f) {
40 cfg.setWorkerThreadCount(numThreads(state));
41
42 marl::Scheduler scheduler(cfg);
43 scheduler.bind();
44 f(numTasks(state));
45 scheduler.unbind();
46 }
47
48 // run() creates a scheduler, sets the number of worker threads from the
49 // benchmark arguments, calls f, then unbinds and destructs the scheduler.
50 // F must be a function of the signature: void(int numTasks)
51 template <typename F>
52 void run(const ::benchmark::State& state, F&& f) {
53 run(state, marl::Scheduler::Config{}, f);
54 }
55
56 // args() sets up the benchmark to run a number of tasks over a number of
57 // threads.
58 // If MARL_FULL_BENCHMARK is enabled, then NumTasks tasks will be run
59 // across from 0 to numLogicalCPUs worker threads.
60 // If MARL_FULL_BENCHMARK is not enabled, then NumTasks tasks will be run
61 // across [0 .. numLogicalCPUs] worker threads in 2^n steps.
62 template <int NumTasks = 0x40000>
63 static void args(benchmark::internal::Benchmark* b) {
64 b->ArgNames({"tasks", "threads"});
65 b->Args({NumTasks, 0});
66 auto numLogicalCPUs = marl::Thread::numLogicalCPUs();
67#if MARL_FULL_BENCHMARK
68 for (unsigned int threads = 1U; threads <= numLogicalCPUs; threads++) {
69 b->Args({NumTasks, threads});
70 }
71#else
72 for (unsigned int threads = 1U; threads <= numLogicalCPUs; threads *= 2) {
73 b->Args({NumTasks, threads});
74 }
75 if ((numLogicalCPUs & (numLogicalCPUs - 1)) != 0) {
76 // numLogicalCPUs is not a power-of-two. Also test with numLogicalCPUs.
77 b->Args({NumTasks, numLogicalCPUs});
78 }
79#endif
80 }
81
82 // numThreads() return the number of threads in the benchmark run from the
83 // state.
84 static int numThreads(const ::benchmark::State& state) {
85 return static_cast<int>(state.range(1));
86 }
87
88 // numTasks() return the number of tasks in the benchmark run from the state.
89 static int numTasks(const ::benchmark::State& state) {
90 return static_cast<int>(state.range(0));
91 }
92
93 // doSomeWork() performs some made up bit-shitfy algorithm that's difficult
94 // for a compiler to optimize and produces consistent results.
95 static uint32_t doSomeWork(uint32_t x);
96};