1/**
2 * Copyright (c) Glow Contributors. See CONTRIBUTORS file.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16#ifndef GLOW_PARTITIONER_PARTITIONER_H
17#define GLOW_PARTITIONER_PARTITIONER_H
18
19#include "glow/Partitioner/PartitionerBase.h"
20#include "glow/Support/Error.h"
21
22namespace glow {
23
24using namespace runtime;
25
26/// Given a module, partitions each of the its functions into multiple ones
27/// based on memory constraints and minimizes the communication cost.
28class Partitioner final : public PartitionerBase {
29 /// The module that needs to be decomposed.
30 Module *module_;
31
32 /// The representative function used for partition. We choose the function who
33 /// has the largest memory size.
34 Function *F_;
35
36 /// True if there are more than 1 type of backends.
37 bool multiBackendNames_;
38
39 /// Number of copies of inputs/outputs to assume when calculating mem size.
40 unsigned contextCount_{1};
41
42 /// The cost model related to device.
43 std::vector<DeviceInfo> deviceInfo_;
44
45 /// The backends created in Partitioner. Used for function optimization.
46 std::vector<std::unique_ptr<Backend>> backendHolder_;
47
48 /// The raw backend pointers.
49 std::vector<Backend *> backends_;
50
51 /// The map between backend name and BackendInfo.
52 std::map<std::string, BackendInfo> backendMap_;
53
54 /// The map between partitions and the logicalDeviceID. The partitions with
55 /// the same logicalDeviceID will be assigned into the same physical device.
56 std::map<Function *, std::vector<DeviceIDTy>> logicalIDMap_;
57
58 /// The number of logicalDevice IDs, i.e. the number of physical devices
59 /// needed after partitions.
60 DeviceIDTy logicalDeviceID_;
61
62 /// Total memory (bytes) requested by one module.
63 uint64_t memSize_;
64
65 /// Flag to set if the funcitons in the module are areadly optimized. By
66 /// default, the optimization should be done in Partitioner due to
67 /// heterogeneous partition.
68 bool optimized_;
69
70 /// The struct contain user-defined partition info.
71 PartitionConfig partitionConfig_;
72
73 /// Get the representative function (the one with the largest input) and
74 /// update the memSize.
75 static Function *selectRepFunc(Module *parent, uint64_t &memSize);
76
77 /// Initialization. Called in class constructor.
78 void init();
79
80 /// Verify the generated functions in module, and \returns error if any
81 /// function is invalid. Dump partition logs from \p partitions and \p
82 /// mapping.
83 Error finalize(const DAGListTy &partitions, const NodeToFunctionMap &mapping);
84
85 /// After getting the initial partitions, adjust the partitions to minimize
86 /// communication and computation cost.
87 void partitionsAdjust(NodeToFunctionMap &partitions,
88 uint64_t availableMemory);
89
90 /// Assign nodes to partitions grouped by \p backendName and return the
91 /// mapping.
92 NodeToFunctionMap selectPartitions(Function *F, uint64_t availableMemory,
93 llvm::StringRef backendName);
94
95 /// Duplicates \p partitions in the module order to saturate the Host.
96 /// \p logicalDeviceCount is the number of logical devices used by the
97 /// current partitions. \p availableLogicalDevices is the total number of
98 /// devices to saturate (if zero than the number of found devices is used).
99 /// For example: If a network is partitioned into two parts (\p
100 /// logicalDeviceCount) and there are six devices this would duplicate
101 /// the network three times. If \p availableLogicalDevices is set to four,
102 /// the network would be duplicated only twice.
103 void saturateHost(unsigned logicalDeviceCount, const DAGListTy &partitions,
104 size_t availableLogicalDevices);
105
106 /// Partition a function \p F based on backends \p backends. \returns the
107 /// final partition result(or an err) and a map between partitions and backend
108 /// names. \p cctx is used for functions optimization.
109 Expected<DAGListTy>
110 backendBasedPartition(FunctionToBackendNameMap &funcToBackend, Function *F,
111 std::vector<Backend *> &backends,
112 CompilationContext &cctx);
113
114 /// If there is no need to do any partition, just generate the DAGNode based
115 /// on current functions in this module for backend \p backendName found in \p
116 /// backendMap. \p cctx is used for function optimization. \returns the
117 /// partition result or an error.
118 Expected<DAGListTy>
119 createDAGWithoutPartition(llvm::StringRef backendName,
120 std::map<std::string, BackendInfo> &backendMap,
121 CompilationContext &cctx);
122
123 /// Create the map between the backend name and the concrete backend info
124 /// (e.g. backend pointer, mem, number) used in this partiton. If there are
125 /// backends need to be created, we use \p backendsHolder to hold them for
126 /// memory purpose.
127 void genBackendMap(std::map<std::string, BackendInfo> &backendMap,
128 std::vector<std::unique_ptr<Backend>> &backendsHolder,
129 std::vector<Backend *> &backends);
130
131 /// Returns info for the default device of the backend. If multiple devices,
132 /// returns the first one.
133 const DeviceInfo &getDeviceInfoForBackend(llvm::StringRef backendName);
134
135public:
136 /// \p parent is the module which contains the functions need to be divided.
137 /// Here we assume that all the functions in one module belong to a same
138 /// "Function Family", that is, without considerting the "dynamic stuff" (i.e.
139 /// batch size, input/output shape of each op), all the functions are
140 /// identical. The required memory and computation cost for each op can be
141 /// found in Module.
142 /// The \p devices provides the cost model related to devices.
143 /// \p optimized is false by default, which means the functions in this module
144 /// are not optimized. \p partitionConfig contains the user defined partition
145 /// info.
146 Partitioner(Module *parent, const std::vector<DeviceInfo> &devices,
147 bool optimized = false,
148 PartitionConfig partitionConfig = PartitionConfig());
149
150 /// Users can create Mock Backends and pass their points to test Graph
151 /// Partitioning without actually register them in GLOW.
152 Partitioner(Module *parent, const std::vector<DeviceInfo> &devices,
153 const std::vector<Backend *> &backends, bool optimized = false);
154
155 /// Set contextCount_ to provided /p count.
156 void setContextCount(unsigned count) { contextCount_ = count; }
157
158 /// Based on \p partitionConfig passed into Partitioner, do user-defined
159 /// partition.
160 Expected<DAGListTy>
161 partitionFromConfig(const PartitionConfig &partitionConfig,
162 CompilationContext &cctx);
163
164 /// Based on \p cctx, setup all data structures needed for a DAG.
165 /// cctx.prepartitionedConfig contains the Functions which are already
166 /// partitioned and connected via Placeholders.
167 Expected<DAGListTy> setupPrepartitionedModule(CompilationContext &cctx);
168
169 /// This partition approach is used in Glow Quantization Profiling flow. The
170 /// backendBasedPartition is applied first in case there are heterogeneous
171 /// backends. Then each sub-function will be compiled and run in CPU backend
172 /// for profiling. \p cctx is used for function optimization. \returns the
173 /// partition result or an error.
174 Expected<DAGListTy> quantizationProfilingPartition(CompilationContext &cctx);
175
176 /// This partition approch first do the partition based on backend types, and
177 /// then based on cost models(memory usage and performance). \p cctx is used
178 /// for function optimization. \returns the partition result or an error.
179 Expected<DAGListTy> heterogeneousPartition(CompilationContext &cctx);
180
181 /// This partition approach is an experimental one. It tries to balance the
182 /// workloads of each accelerator/device in addition to respecting memory
183 /// constraints. \p numDevices is the minimal number of partition. That is,
184 /// after loadBalancedPartition, the network will be devided up into at lease
185 /// \p numDevices sub-networks. Now it is overwritten inside of
186 /// loadBalcnedPartition. But in the future, it can be manually defined by
187 /// users.
188 Expected<DAGListTy> loadBalancedPartition(CompilationContext &cctx,
189 size_t numDevices = 0);
190
191 // This partition approach is meant for SparseNN models. The SLS tables are
192 // split across logical devices and the non-SLS nodes are assigned in a
193 // round-robin fashion to all logical devices.
194 Expected<DAGListTy> partitionSparseNN(CompilationContext &cctx);
195
196 /// Decompose each function in a module. Given the parameters, this function
197 /// will choose different partition approches supported in this class:
198 /// heterogeneous partition, user-defined partition or quantization profiling.
199 /// \p cctx is used for function optimization. \returns the partition result
200 /// or an error.
201 Expected<DAGListTy> partition(CompilationContext &cctx) override;
202};
203} // namespace glow
204#endif // GLOW_PARTITIONER_PARTITIONER_H
205