1 | /** |
2 | * Copyright (c) Glow Contributors. See CONTRIBUTORS file. |
3 | * |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | * you may not use this file except in compliance with the License. |
6 | * You may obtain a copy of the License at |
7 | * |
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | * |
10 | * Unless required by applicable law or agreed to in writing, software |
11 | * distributed under the License is distributed on an "AS IS" BASIS, |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | * See the License for the specific language governing permissions and |
14 | * limitations under the License. |
15 | */ |
16 | #ifndef GLOW_PARTITIONER_PARTITIONER_H |
17 | #define GLOW_PARTITIONER_PARTITIONER_H |
18 | |
19 | #include "glow/Partitioner/PartitionerBase.h" |
20 | #include "glow/Support/Error.h" |
21 | |
22 | namespace glow { |
23 | |
24 | using namespace runtime; |
25 | |
26 | /// Given a module, partitions each of the its functions into multiple ones |
27 | /// based on memory constraints and minimizes the communication cost. |
28 | class Partitioner final : public PartitionerBase { |
29 | /// The module that needs to be decomposed. |
30 | Module *module_; |
31 | |
32 | /// The representative function used for partition. We choose the function who |
33 | /// has the largest memory size. |
34 | Function *F_; |
35 | |
36 | /// True if there are more than 1 type of backends. |
37 | bool multiBackendNames_; |
38 | |
39 | /// Number of copies of inputs/outputs to assume when calculating mem size. |
40 | unsigned contextCount_{1}; |
41 | |
42 | /// The cost model related to device. |
43 | std::vector<DeviceInfo> deviceInfo_; |
44 | |
45 | /// The backends created in Partitioner. Used for function optimization. |
46 | std::vector<std::unique_ptr<Backend>> backendHolder_; |
47 | |
48 | /// The raw backend pointers. |
49 | std::vector<Backend *> backends_; |
50 | |
51 | /// The map between backend name and BackendInfo. |
52 | std::map<std::string, BackendInfo> backendMap_; |
53 | |
54 | /// The map between partitions and the logicalDeviceID. The partitions with |
55 | /// the same logicalDeviceID will be assigned into the same physical device. |
56 | std::map<Function *, std::vector<DeviceIDTy>> logicalIDMap_; |
57 | |
58 | /// The number of logicalDevice IDs, i.e. the number of physical devices |
59 | /// needed after partitions. |
60 | DeviceIDTy logicalDeviceID_; |
61 | |
62 | /// Total memory (bytes) requested by one module. |
63 | uint64_t memSize_; |
64 | |
65 | /// Flag to set if the funcitons in the module are areadly optimized. By |
66 | /// default, the optimization should be done in Partitioner due to |
67 | /// heterogeneous partition. |
68 | bool optimized_; |
69 | |
70 | /// The struct contain user-defined partition info. |
71 | PartitionConfig partitionConfig_; |
72 | |
73 | /// Get the representative function (the one with the largest input) and |
74 | /// update the memSize. |
75 | static Function *selectRepFunc(Module *parent, uint64_t &memSize); |
76 | |
77 | /// Initialization. Called in class constructor. |
78 | void init(); |
79 | |
80 | /// Verify the generated functions in module, and \returns error if any |
81 | /// function is invalid. Dump partition logs from \p partitions and \p |
82 | /// mapping. |
83 | Error finalize(const DAGListTy &partitions, const NodeToFunctionMap &mapping); |
84 | |
85 | /// After getting the initial partitions, adjust the partitions to minimize |
86 | /// communication and computation cost. |
87 | void partitionsAdjust(NodeToFunctionMap &partitions, |
88 | uint64_t availableMemory); |
89 | |
90 | /// Assign nodes to partitions grouped by \p backendName and return the |
91 | /// mapping. |
92 | NodeToFunctionMap selectPartitions(Function *F, uint64_t availableMemory, |
93 | llvm::StringRef backendName); |
94 | |
95 | /// Duplicates \p partitions in the module order to saturate the Host. |
96 | /// \p logicalDeviceCount is the number of logical devices used by the |
97 | /// current partitions. \p availableLogicalDevices is the total number of |
98 | /// devices to saturate (if zero than the number of found devices is used). |
99 | /// For example: If a network is partitioned into two parts (\p |
100 | /// logicalDeviceCount) and there are six devices this would duplicate |
101 | /// the network three times. If \p availableLogicalDevices is set to four, |
102 | /// the network would be duplicated only twice. |
103 | void saturateHost(unsigned logicalDeviceCount, const DAGListTy &partitions, |
104 | size_t availableLogicalDevices); |
105 | |
106 | /// Partition a function \p F based on backends \p backends. \returns the |
107 | /// final partition result(or an err) and a map between partitions and backend |
108 | /// names. \p cctx is used for functions optimization. |
109 | Expected<DAGListTy> |
110 | backendBasedPartition(FunctionToBackendNameMap &funcToBackend, Function *F, |
111 | std::vector<Backend *> &backends, |
112 | CompilationContext &cctx); |
113 | |
114 | /// If there is no need to do any partition, just generate the DAGNode based |
115 | /// on current functions in this module for backend \p backendName found in \p |
116 | /// backendMap. \p cctx is used for function optimization. \returns the |
117 | /// partition result or an error. |
118 | Expected<DAGListTy> |
119 | createDAGWithoutPartition(llvm::StringRef backendName, |
120 | std::map<std::string, BackendInfo> &backendMap, |
121 | CompilationContext &cctx); |
122 | |
123 | /// Create the map between the backend name and the concrete backend info |
124 | /// (e.g. backend pointer, mem, number) used in this partiton. If there are |
125 | /// backends need to be created, we use \p backendsHolder to hold them for |
126 | /// memory purpose. |
127 | void genBackendMap(std::map<std::string, BackendInfo> &backendMap, |
128 | std::vector<std::unique_ptr<Backend>> &backendsHolder, |
129 | std::vector<Backend *> &backends); |
130 | |
131 | /// Returns info for the default device of the backend. If multiple devices, |
132 | /// returns the first one. |
133 | const DeviceInfo &getDeviceInfoForBackend(llvm::StringRef backendName); |
134 | |
135 | public: |
136 | /// \p parent is the module which contains the functions need to be divided. |
137 | /// Here we assume that all the functions in one module belong to a same |
138 | /// "Function Family", that is, without considerting the "dynamic stuff" (i.e. |
139 | /// batch size, input/output shape of each op), all the functions are |
140 | /// identical. The required memory and computation cost for each op can be |
141 | /// found in Module. |
142 | /// The \p devices provides the cost model related to devices. |
143 | /// \p optimized is false by default, which means the functions in this module |
144 | /// are not optimized. \p partitionConfig contains the user defined partition |
145 | /// info. |
146 | Partitioner(Module *parent, const std::vector<DeviceInfo> &devices, |
147 | bool optimized = false, |
148 | PartitionConfig partitionConfig = PartitionConfig()); |
149 | |
150 | /// Users can create Mock Backends and pass their points to test Graph |
151 | /// Partitioning without actually register them in GLOW. |
152 | Partitioner(Module *parent, const std::vector<DeviceInfo> &devices, |
153 | const std::vector<Backend *> &backends, bool optimized = false); |
154 | |
155 | /// Set contextCount_ to provided /p count. |
156 | void setContextCount(unsigned count) { contextCount_ = count; } |
157 | |
158 | /// Based on \p partitionConfig passed into Partitioner, do user-defined |
159 | /// partition. |
160 | Expected<DAGListTy> |
161 | partitionFromConfig(const PartitionConfig &partitionConfig, |
162 | CompilationContext &cctx); |
163 | |
164 | /// Based on \p cctx, setup all data structures needed for a DAG. |
165 | /// cctx.prepartitionedConfig contains the Functions which are already |
166 | /// partitioned and connected via Placeholders. |
167 | Expected<DAGListTy> setupPrepartitionedModule(CompilationContext &cctx); |
168 | |
169 | /// This partition approach is used in Glow Quantization Profiling flow. The |
170 | /// backendBasedPartition is applied first in case there are heterogeneous |
171 | /// backends. Then each sub-function will be compiled and run in CPU backend |
172 | /// for profiling. \p cctx is used for function optimization. \returns the |
173 | /// partition result or an error. |
174 | Expected<DAGListTy> quantizationProfilingPartition(CompilationContext &cctx); |
175 | |
176 | /// This partition approch first do the partition based on backend types, and |
177 | /// then based on cost models(memory usage and performance). \p cctx is used |
178 | /// for function optimization. \returns the partition result or an error. |
179 | Expected<DAGListTy> heterogeneousPartition(CompilationContext &cctx); |
180 | |
181 | /// This partition approach is an experimental one. It tries to balance the |
182 | /// workloads of each accelerator/device in addition to respecting memory |
183 | /// constraints. \p numDevices is the minimal number of partition. That is, |
184 | /// after loadBalancedPartition, the network will be devided up into at lease |
185 | /// \p numDevices sub-networks. Now it is overwritten inside of |
186 | /// loadBalcnedPartition. But in the future, it can be manually defined by |
187 | /// users. |
188 | Expected<DAGListTy> loadBalancedPartition(CompilationContext &cctx, |
189 | size_t numDevices = 0); |
190 | |
191 | // This partition approach is meant for SparseNN models. The SLS tables are |
192 | // split across logical devices and the non-SLS nodes are assigned in a |
193 | // round-robin fashion to all logical devices. |
194 | Expected<DAGListTy> partitionSparseNN(CompilationContext &cctx); |
195 | |
196 | /// Decompose each function in a module. Given the parameters, this function |
197 | /// will choose different partition approches supported in this class: |
198 | /// heterogeneous partition, user-defined partition or quantization profiling. |
199 | /// \p cctx is used for function optimization. \returns the partition result |
200 | /// or an error. |
201 | Expected<DAGListTy> partition(CompilationContext &cctx) override; |
202 | }; |
203 | } // namespace glow |
204 | #endif // GLOW_PARTITIONER_PARTITIONER_H |
205 | |