Partitioner.h source code [glow/include/glow/Partitioner/Partitioner.h]

1	/**
2	* Copyright (c) Glow Contributors. See CONTRIBUTORS file.
3	*
4	* Licensed under the Apache License, Version 2.0 (the "License");
5	* you may not use this file except in compliance with the License.
6	* You may obtain a copy of the License at
7	*
8	* http://www.apache.org/licenses/LICENSE-2.0
9	*
10	* Unless required by applicable law or agreed to in writing, software
11	* distributed under the License is distributed on an "AS IS" BASIS,
12	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13	* See the License for the specific language governing permissions and
14	* limitations under the License.
15	*/
16	#ifndef GLOW_PARTITIONER_PARTITIONER_H
17	#define GLOW_PARTITIONER_PARTITIONER_H
18
19	#include "glow/Partitioner/PartitionerBase.h"
20	#include "glow/Support/Error.h"
21
22	namespace glow {
23
24	using namespace runtime;
25
26	/// Given a module, partitions each of the its functions into multiple ones
27	/// based on memory constraints and minimizes the communication cost.
28	class Partitioner final : public PartitionerBase {
29	/// The module that needs to be decomposed.
30	Module *module_;
31
32	/// The representative function used for partition. We choose the function who
33	/// has the largest memory size.
34	Function *F_;
35
36	/// True if there are more than 1 type of backends.
37	bool multiBackendNames_;
38
39	/// Number of copies of inputs/outputs to assume when calculating mem size.
40	unsigned contextCount_{`1`};
41
42	/// The cost model related to device.
43	std::vector<DeviceInfo> deviceInfo_;
44
45	/// The backends created in Partitioner. Used for function optimization.
46	std::vector<std::unique_ptr<Backend>> backendHolder_;
47
48	/// The raw backend pointers.
49	std::vector<Backend *> backends_;
50
51	/// The map between backend name and BackendInfo.
52	std::map<std::string, BackendInfo> backendMap_;
53
54	/// The map between partitions and the logicalDeviceID. The partitions with
55	/// the same logicalDeviceID will be assigned into the same physical device.
56	std::map<Function *, std::vector<DeviceIDTy>> logicalIDMap_;
57
58	/// The number of logicalDevice IDs, i.e. the number of physical devices
59	/// needed after partitions.
60	DeviceIDTy logicalDeviceID_;
61
62	/// Total memory (bytes) requested by one module.
63	uint64_t memSize_;
64
65	/// Flag to set if the funcitons in the module are areadly optimized. By
66	/// default, the optimization should be done in Partitioner due to
67	/// heterogeneous partition.
68	bool optimized_;
69
70	/// The struct contain user-defined partition info.
71	PartitionConfig partitionConfig_;
72
73	/// Get the representative function (the one with the largest input) and
74	/// update the memSize.
75	static Function selectRepFunc(Module parent, uint64_t &memSize);
76
77	/// Initialization. Called in class constructor.
78	void init();
79
80	/// Verify the generated functions in module, and \returns error if any
81	/// function is invalid. Dump partition logs from \p partitions and \p
82	/// mapping.
83	Error finalize(const DAGListTy &partitions, const NodeToFunctionMap &mapping);
84
85	/// After getting the initial partitions, adjust the partitions to minimize
86	/// communication and computation cost.
87	void partitionsAdjust(NodeToFunctionMap &partitions,
88	uint64_t availableMemory);
89
90	/// Assign nodes to partitions grouped by \p backendName and return the
91	/// mapping.
92	NodeToFunctionMap selectPartitions(Function *F, uint64_t availableMemory,
93	llvm::StringRef backendName);
94
95	/// Duplicates \p partitions in the module order to saturate the Host.
96	/// \p logicalDeviceCount is the number of logical devices used by the
97	/// current partitions. \p availableLogicalDevices is the total number of
98	/// devices to saturate (if zero than the number of found devices is used).
99	/// For example: If a network is partitioned into two parts (\p
100	/// logicalDeviceCount) and there are six devices this would duplicate
101	/// the network three times. If \p availableLogicalDevices is set to four,
102	/// the network would be duplicated only twice.
103	void saturateHost(unsigned logicalDeviceCount, const DAGListTy &partitions,
104	size_t availableLogicalDevices);
105
106	/// Partition a function \p F based on backends \p backends. \returns the
107	/// final partition result(or an err) and a map between partitions and backend
108	/// names. \p cctx is used for functions optimization.
109	Expected<DAGListTy>
110	backendBasedPartition(FunctionToBackendNameMap &funcToBackend, Function *F,
111	std::vector<Backend *> &backends,
112	CompilationContext &cctx);
113
114	/// If there is no need to do any partition, just generate the DAGNode based
115	/// on current functions in this module for backend \p backendName found in \p
116	/// backendMap. \p cctx is used for function optimization. \returns the
117	/// partition result or an error.
118	Expected<DAGListTy>
119	createDAGWithoutPartition(llvm::StringRef backendName,
120	std::map<std::string, BackendInfo> &backendMap,
121	CompilationContext &cctx);
122
123	/// Create the map between the backend name and the concrete backend info
124	/// (e.g. backend pointer, mem, number) used in this partiton. If there are
125	/// backends need to be created, we use \p backendsHolder to hold them for
126	/// memory purpose.
127	void genBackendMap(std::map<std::string, BackendInfo> &backendMap,
128	std::vector<std::unique_ptr<Backend>> &backendsHolder,
129	std::vector<Backend *> &backends);
130
131	/// Returns info for the default device of the backend. If multiple devices,
132	/// returns the first one.
133	const DeviceInfo &getDeviceInfoForBackend(llvm::StringRef backendName);
134
135	public:
136	/// \p parent is the module which contains the functions need to be divided.
137	/// Here we assume that all the functions in one module belong to a same
138	/// "Function Family", that is, without considerting the "dynamic stuff" (i.e.
139	/// batch size, input/output shape of each op), all the functions are
140	/// identical. The required memory and computation cost for each op can be
141	/// found in Module.
142	/// The \p devices provides the cost model related to devices.
143	/// \p optimized is false by default, which means the functions in this module
144	/// are not optimized. \p partitionConfig contains the user defined partition
145	/// info.
146	Partitioner(Module parent, const* std::vector<DeviceInfo> &devices,
147	bool optimized = false,
148	PartitionConfig partitionConfig = PartitionConfig ());
149
150	/// Users can create Mock Backends and pass their points to test Graph
151	/// Partitioning without actually register them in GLOW.
152	Partitioner(Module parent, const* std::vector<DeviceInfo> &devices,
153	const std::vector<Backend > &backends, bool* optimized = false);
154
155	/// Set contextCount_ to provided /p count.
156	void setContextCount(unsigned count) { contextCount_ = count; }
157
158	/// Based on \p partitionConfig passed into Partitioner, do user-defined
159	/// partition.
160	Expected<DAGListTy>
161	partitionFromConfig(const PartitionConfig &partitionConfig,
162	CompilationContext &cctx);
163
164	/// Based on \p cctx, setup all data structures needed for a DAG.
165	/// cctx.prepartitionedConfig contains the Functions which are already
166	/// partitioned and connected via Placeholders.
167	Expected<DAGListTy> setupPrepartitionedModule(CompilationContext &cctx);
168
169	/// This partition approach is used in Glow Quantization Profiling flow. The
170	/// backendBasedPartition is applied first in case there are heterogeneous
171	/// backends. Then each sub-function will be compiled and run in CPU backend
172	/// for profiling. \p cctx is used for function optimization. \returns the
173	/// partition result or an error.
174	Expected<DAGListTy> quantizationProfilingPartition(CompilationContext &cctx);
175
176	/// This partition approch first do the partition based on backend types, and
177	/// then based on cost models(memory usage and performance). \p cctx is used
178	/// for function optimization. \returns the partition result or an error.
179	Expected<DAGListTy> heterogeneousPartition(CompilationContext &cctx);
180
181	/// This partition approach is an experimental one. It tries to balance the
182	/// workloads of each accelerator/device in addition to respecting memory
183	/// constraints. \p numDevices is the minimal number of partition. That is,
184	/// after loadBalancedPartition, the network will be devided up into at lease
185	/// \p numDevices sub-networks. Now it is overwritten inside of
186	/// loadBalcnedPartition. But in the future, it can be manually defined by
187	/// users.
188	Expected<DAGListTy> loadBalancedPartition(CompilationContext &cctx,
189	size_t numDevices = `0`);
190
191	// This partition approach is meant for SparseNN models. The SLS tables are
192	// split across logical devices and the non-SLS nodes are assigned in a
193	// round-robin fashion to all logical devices.
194	Expected<DAGListTy> partitionSparseNN(CompilationContext &cctx);
195
196	/// Decompose each function in a module. Given the parameters, this function
197	/// will choose different partition approches supported in this class:
198	/// heterogeneous partition, user-defined partition or quantization profiling.
199	/// \p cctx is used for function optimization. \returns the partition result
200	/// or an error.
201	Expected<DAGListTy> partition(CompilationContext &cctx) override;
202	};
203	} // namespace glow
204	#endif // GLOW_PARTITIONER_PARTITIONER_H
205

Browse the source code of glow/include/glow/Partitioner/Partitioner.h