1 | /* |
2 | * Licensed to the Apache Software Foundation (ASF) under one |
3 | * or more contributor license agreements. See the NOTICE file |
4 | * distributed with this work for additional information |
5 | * regarding copyright ownership. The ASF licenses this file |
6 | * to you under the Apache License, Version 2.0 (the |
7 | * "License"); you may not use this file except in compliance |
8 | * with the License. You may obtain a copy of the License at |
9 | * |
10 | * http://www.apache.org/licenses/LICENSE-2.0 |
11 | * |
12 | * Unless required by applicable law or agreed to in writing, |
13 | * software distributed under the License is distributed on an |
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
15 | * KIND, either express or implied. See the License for the |
16 | * specific language governing permissions and limitations |
17 | * under the License. |
18 | */ |
19 | |
20 | /*! |
21 | * \file parallel_for.h |
22 | * \brief An implementation to run loop in parallel. |
23 | */ |
24 | #ifndef TVM_SUPPORT_PARALLEL_FOR_H_ |
25 | #define TVM_SUPPORT_PARALLEL_FOR_H_ |
26 | |
27 | #include <tvm/runtime/c_runtime_api.h> |
28 | |
29 | #include <functional> |
30 | #include <vector> |
31 | |
32 | namespace tvm { |
33 | namespace support { |
34 | |
35 | using PartitionerFuncType = std::function<std::vector<std::vector<int>>(int, int, int, int)>; |
36 | |
37 | /*! |
38 | * \brief A partitioner to split the task to each thread in Round-robin manner. |
39 | * \param begin The start index of this parallel loop(inclusive). |
40 | * \param end The end index of this parallel loop(exclusive). |
41 | * \param step The traversal step to the index. |
42 | * \param num_threads The number of threads(the number of tasks to be partitioned to). |
43 | * \return A list with `num_threads` elements, and each is a list of integers indicating the loop |
44 | * indexes for the corresponding thread to process. |
45 | */ |
46 | TVM_DLL std::vector<std::vector<int>> rr_partitioner(int begin, int end, int step, int num_threads); |
47 | |
48 | /*! |
49 | * \brief A runtime api provided to run the task function in parallel. |
50 | * e.g. A for loop: |
51 | * for (int i = 0; i < 10; i++) { |
52 | * a[i] = i; |
53 | * } |
54 | * should work the same as: |
55 | * parallel_for(0, 10, [&a](int index) { |
56 | * a[i] = i; |
57 | * }); |
58 | * \param begin The start index of this parallel loop(inclusive). |
59 | * \param end The end index of this parallel loop(exclusive). |
60 | * \param f The task function to be executed. Assert to take an int index as input with no output. |
61 | * \param step The traversal step to the index. |
62 | * \param partitioner A partition function to split tasks to different threads. Use Round-robin |
63 | * partitioner by default. |
64 | * \note 1. Currently do not support nested parallel_for; 2. The order of execution in each thread |
65 | * is not guaranteed, the for loop task should be thread independent and thread safe. |
66 | */ |
67 | TVM_DLL void parallel_for(int begin, int end, const std::function<void(int)>& f, int step = 1, |
68 | const PartitionerFuncType partitioner = rr_partitioner); |
69 | |
70 | /*! |
71 | * \brief An API to launch fix amount of threads to run the specific functor in parallel. |
72 | * Different from `parallel_for`, the partition is determined dynamically on the fly, |
73 | * i.e. any time when a thread is idle, it fetches the next task to run. |
74 | * The behavior is similar to dynamic scheduling in OpenMP: |
75 | * |
76 | * \#pragma omp parallel for schedule(dynamic) num_threads(num_threads) |
77 | * for (int i = 0; i < 10; i++) { |
78 | * a[i] = i; |
79 | * } |
80 | * |
81 | * \param begin The start index of this parallel loop (inclusive). |
82 | * \param end The end index of this parallel loop (exclusive). |
83 | * \param num_threads The number of threads to be used. |
84 | * \param f The task function to be executed. Takes the thread index and the task index as |
85 | * input with no output. |
86 | * \note `step` support is left for future work. |
87 | */ |
88 | TVM_DLL void parallel_for_dynamic(int begin, int end, int num_threads, |
89 | const std::function<void(int thread_id, int task_id)>& f); |
90 | } // namespace support |
91 | } // namespace tvm |
92 | |
93 | #endif // TVM_SUPPORT_PARALLEL_FOR_H_ |
94 | |