1/*
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing,
13 * software distributed under the License is distributed on an
14 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 * KIND, either express or implied. See the License for the
16 * specific language governing permissions and limitations
17 * under the License.
18 */
19
20/*!
21 * \file parallel_for.h
22 * \brief An implementation to run loop in parallel.
23 */
24#ifndef TVM_SUPPORT_PARALLEL_FOR_H_
25#define TVM_SUPPORT_PARALLEL_FOR_H_
26
27#include <tvm/runtime/c_runtime_api.h>
28
29#include <functional>
30#include <vector>
31
32namespace tvm {
33namespace support {
34
35using PartitionerFuncType = std::function<std::vector<std::vector<int>>(int, int, int, int)>;
36
37/*!
38 * \brief A partitioner to split the task to each thread in Round-robin manner.
39 * \param begin The start index of this parallel loop(inclusive).
40 * \param end The end index of this parallel loop(exclusive).
41 * \param step The traversal step to the index.
42 * \param num_threads The number of threads(the number of tasks to be partitioned to).
43 * \return A list with `num_threads` elements, and each is a list of integers indicating the loop
44 * indexes for the corresponding thread to process.
45 */
46TVM_DLL std::vector<std::vector<int>> rr_partitioner(int begin, int end, int step, int num_threads);
47
48/*!
49 * \brief A runtime api provided to run the task function in parallel.
50 * e.g. A for loop:
51 * for (int i = 0; i < 10; i++) {
52 * a[i] = i;
53 * }
54 * should work the same as:
55 * parallel_for(0, 10, [&a](int index) {
56 * a[i] = i;
57 * });
58 * \param begin The start index of this parallel loop(inclusive).
59 * \param end The end index of this parallel loop(exclusive).
60 * \param f The task function to be executed. Assert to take an int index as input with no output.
61 * \param step The traversal step to the index.
62 * \param partitioner A partition function to split tasks to different threads. Use Round-robin
63 * partitioner by default.
64 * \note 1. Currently do not support nested parallel_for; 2. The order of execution in each thread
65 * is not guaranteed, the for loop task should be thread independent and thread safe.
66 */
67TVM_DLL void parallel_for(int begin, int end, const std::function<void(int)>& f, int step = 1,
68 const PartitionerFuncType partitioner = rr_partitioner);
69
70/*!
71 * \brief An API to launch fix amount of threads to run the specific functor in parallel.
72 * Different from `parallel_for`, the partition is determined dynamically on the fly,
73 * i.e. any time when a thread is idle, it fetches the next task to run.
74 * The behavior is similar to dynamic scheduling in OpenMP:
75 *
76 * \#pragma omp parallel for schedule(dynamic) num_threads(num_threads)
77 * for (int i = 0; i < 10; i++) {
78 * a[i] = i;
79 * }
80 *
81 * \param begin The start index of this parallel loop (inclusive).
82 * \param end The end index of this parallel loop (exclusive).
83 * \param num_threads The number of threads to be used.
84 * \param f The task function to be executed. Takes the thread index and the task index as
85 * input with no output.
86 * \note `step` support is left for future work.
87 */
88TVM_DLL void parallel_for_dynamic(int begin, int end, int num_threads,
89 const std::function<void(int thread_id, int task_id)>& f);
90} // namespace support
91} // namespace tvm
92
93#endif // TVM_SUPPORT_PARALLEL_FOR_H_
94