RuntimeTypes.h source code [glow/include/glow/Runtime/RuntimeTypes.h]

1	/**
2	* Copyright (c) Glow Contributors. See CONTRIBUTORS file.
3	*
4	* Licensed under the Apache License, Version 2.0 (the "License");
5	* you may not use this file except in compliance with the License.
6	* You may obtain a copy of the License at
7	*
8	* http://www.apache.org/licenses/LICENSE-2.0
9	*
10	* Unless required by applicable law or agreed to in writing, software
11	* distributed under the License is distributed on an "AS IS" BASIS,
12	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13	* See the License for the specific language governing permissions and
14	* limitations under the License.
15	*/
16	#ifndef GLOW_RUNTIME_RUNTIMETYPES_H
17	#define GLOW_RUNTIME_RUNTIMETYPES_H
18
19	#include "glow/Backend/Backend.h"
20	#include "glow/Backend/BackendUtils.h"
21	#include "glow/Backends/BackendOptions.h"
22	#include "glow/Graph/Graph.h"
23	#include "glow/Support/Error.h"
24
25	#include <map>
26	#include <string>
27	#include <unordered_map>
28	#include <vector>
29
30	namespace glow {
31
32	class ExecutionContext;
33
34	namespace runtime {
35
36	class DeviceManager;
37	using DeviceIDTy = size_t;
38	using RunIdentifierTy = size_t;
39
40	/// Map of DeviceIDTy -> DeviceManager.
41	using DeviceManagerMapTy = std::map<DeviceIDTy, std::unique_ptr<DeviceManager>>;
42
43	/// Callback type used by HostManager and DeviceManager, used to pass results of
44	/// an inference request back to the caller.
45	using ResultCBTy = std::function<void(runtime::RunIdentifierTy, Error,
46	std::unique_ptr<ExecutionContext>)>;
47
48	/// Data structure that contains device constraint information for each device.
49	/// Used to communicate memory constraints and later costs to the Partitioner.
50	struct DeviceInfo {
51	/// Available global memory on device in bytes.
52	uint64_t availableMemory;
53	/// Backend Type.
54	std::string backendName;
55	/// A string contains the node names(e.g. Add, Div) which are separeted by
56	/// ",". E.g. "Div,Add". In Partitioner, those nodes won't be supported in
57	/// this backend.
58	std::string nonSupportedNodes;
59	/// A string contains the node names(e.g. Add, Div) which are separeted by
60	/// ",". E.g. "Div,Add". In Partitioner, the complementary set of those nodes
61	/// won't be supported in this backend.
62	std::string supportedNodes;
63	/// Available SRAM capacity in bytes.
64	uint64_t sramCapacity;
65	/// Available (software controlled) local/scratchpad/onchip memory on the
66	/// device in bytes.
67	uint64_t availableLocalMemory;
68	/// Peak compute on device in ops/second. Assumes all ops are in int8.
69	/// TODO: distinguish between data types with different peak flops.
70	float peakCompute;
71	/// Peak memory bandwidth from DRAM on device in bytes/second.
72	float peakDramBw;
73	/// Peak memory bandwidth from SRAM on device in bytes/second.
74	float peakSramBw;
75	/// Peak ingress/egress PCI-E bandwidth from device in bytes/second.
76	float peakPCIeBw;
77	/// Maximum amount of input resources defaults to 0 if there is no limit.
78	uint64_t inputCountMax{`0`};
79	};
80
81	/// Data structure that tracks how many outstanding work items remain for a
82	/// device and when we last used it.
83	struct DeviceRuntimeInfo {
84	DeviceRuntimeInfo() : lastUsedTimestamp (std::chrono::steady_clock::now()) {}
85
86	unsigned outstandingInferences{`0`};
87	std::chrono::time_point<std::chrono::steady_clock> lastUsedTimestamp;
88	};
89
90	/// Individual Node in the DAG for a given network. This contains all the
91	/// information needed to run the sub-network at inference time.
92	/// NOTE: When adding members to this struct, if it's a compile-time member that
93	/// needs to be remembered when serializing the model, metadata-prop
94	/// serialization logic must be updated in ONNXModelImporter/ONNXModelWriter.
95	struct DAGNode {
96	/// The children of this node, these are nodes that depend on the current
97	/// node.
98	std::vector<DAGNode *> children;
99	/// Pointers to the parents of this node. This is used by the executor for
100	/// determining if a given node has all dependencies met.
101	std::vector<DAGNode *> parents;
102
103	/// Protects deviceRuntimeInfos;
104	std::mutex lock;
105	/// IDs of the deviceManagers that this network is assigned to.
106	std::map<DeviceIDTy, DeviceRuntimeInfo> deviceRuntimeInfos;
107
108	/// Map of deviceID to alternating state.
109	std::map<DeviceIDTy, unsigned> alternateFunction;
110
111	/// Count of duplications for network, this is the number of replications of
112	/// the network on a single card.
113	unsigned replicationCount{`1`};
114
115	/// Lock to protect against race conditions when getting the next duplicated
116	/// network name.
117	std::mutex nameLock;
118
119	/// Count of instances of this network created by saturateHost. This will be
120	/// copies across cards.
121	unsigned instanceCount{`1`};
122
123	/// Backend name for this network.
124	std::string backendName;
125	/// The logicalDevice is an output of the Partitioner to indicate that two
126	/// networks should be assigned to the same device. Multiple logical devices
127	/// indicates the network should be duplicated.
128	std::vector<DeviceIDTy> logicalDevices;
129	/// Index of the current deviceID in deviceIDs. This is used by the Executor
130	/// when picking a device to request a network run.
131	std::atomic<unsigned> currentDeviceIdx{`0`};
132	/// Name assigned to the sub-network, this is the id that will be passed to
133	/// the DeviceManager when requesting a run of the network.
134	std::string name;
135	/// Runtime bundle containing all the symbol information for this network at
136	/// runtime.
137	std::unique_ptr<RuntimeBundle> runtimeBundle;
138	/// Size of constants and placeholders used by the function.
139	uint64_t size{`0`};
140
141	/// Backend Hints object, this is populated by the Partitioner and is used
142	/// to communicated hints to the compiler, like SRAM pinning and resource
143	/// reservation.
144	BackendHints backendHints{};
145
146	/// Backend specific opts object, populated by the Partitioner.
147	BackendSpecificOptions backendSpecificOpts{};
148
149	/// Pointer to module the function came from. This is so the executor can
150	/// access the associated PHs for the function that are stored in the Module.
151	Module module{nullptr*};
152
153	/// Return the deviceId for the device that should execute the next request.
154	/// We select the device with the least amount of outstanding work on it. For
155	/// devices with the same amount of work remaining, we pick the one that's
156	/// least recently used as expect the work there will finish first.
157	DeviceIDTy getNextDevice() {
158	const std::lock_guard<std::mutex> g(lock);
159
160	auto selected = deviceRuntimeInfos.begin();
161	auto iter = deviceRuntimeInfos.begin();
162
163	for (++iter; iter != deviceRuntimeInfos.end(); ++iter) {
164	if (selected ->second.outstandingInferences >
165	iter ->second.outstandingInferences \|\|
166	(selected ->second.outstandingInferences ==
167	iter ->second.outstandingInferences &&
168	selected ->second.lastUsedTimestamp <
169	iter ->second.lastUsedTimestamp)) {
170	selected = iter;
171	}
172	}
173
174	selected ->second.outstandingInferences++;
175	selected ->second.lastUsedTimestamp = std::chrono::steady_clock::now();
176
177	return selected ->first;
178	}
179
180	void markFinished(DeviceIDTy deviceID) {
181	const auto iter = deviceRuntimeInfos.find(deviceID);
182	DCHECK(iter != deviceRuntimeInfos.end());
183	const std::lock_guard<std::mutex> g(lock);
184	iter ->second.outstandingInferences--;
185	}
186
187	void initAlternateState() {
188	std::lock_guard<std::mutex> g(nameLock);
189	for (auto dev : deviceRuntimeInfos) {
190	alternateFunction [dev.first] = `0`;
191	}
192	}
193
194	std::string getNextName(DeviceIDTy device) {
195	nameLock.lock();
196	auto currentNet = alternateFunction [device];
197	alternateFunction [device] = (currentNet + `1`) % replicationCount;
198	nameLock.unlock();
199
200	std::string newName = name;
201	if (currentNet) {
202	newName = name + "_replicated" + std::to_string(currentNet);
203	}
204	return newName;
205	}
206	};
207
208	/// This struct represents a DAG. The first element is the root of a DAG, and
209	/// the second one is a list of all rest nodes in this DAG.
210	using DAGNodePtr = std::unique_ptr<DAGNode>;
211	using DAGNodePtrVec = std::vector<std::unique_ptr<DAGNode>>;
212
213	struct DAG {
214	/// This is a root node it does not map directly to a loaded function. It
215	/// contains the name of the network, a list of children, and a reference to
216	/// the Module the function came from.
217	DAGNodePtr root;
218	/// This is a vector of all the DAGNodes. Structure is encoded in the DAGNodes
219	/// with pointers to parents and children.
220	DAGNodePtrVec nodes;
221	};
222
223	/// This list contains all the created DAGNodes from the Partitioner. The
224	/// contained DAGNodes can only refer to the DAGNodes from the same DAGListTy.
225	using DAGListTy = std::vector<DAG>;
226
227	/// This is the base class for DeviceManager configurations. Any specific
228	/// device can extend this class to contain information to identify
229	/// and configure the device manager. Additionally it needs to set it's backend
230	/// member variable to it's correct Backend.
231	struct DeviceConfig {
232	/// Backend used for this config. It is used in
233	/// checking the type of config before casting to a derived class.
234	const std::string backendName;
235	/// A human readable name to identify the device.
236	std::string name;
237	/// A runtime assigned id for the device. This is used for stats reporting.
238	unsigned deviceID{`0`};
239	/// Device memory size in bytes.
240	uint64_t deviceMemory = `0`;
241	/// A map of configuration parameters.
242	llvm::StringMap<std::string> parameters{};
243
244	DeviceConfig(llvm::StringRef backendName) : backendName (backendName) {}
245	DeviceConfig(llvm::StringRef backendName, llvm::StringRef name)
246	: backendName (backendName), name (name) {}
247
248	DeviceConfig(llvm::StringRef backendName, llvm::StringRef name,
249	llvm::StringMap<std::string> parameters)
250	: backendName (backendName), name (name), parameters (parameters) {}
251
252	bool hasName() const { return name != ""; }
253
254	void setDeviceMemory(uint64_t memSize) { deviceMemory = memSize; }
255
256	uint64_t getDeviceMemory() const { return deviceMemory; }
257
258	uint64_t getDeviceMemory(uint64_t defaultMemory) const {
259	return deviceMemory == `0` ? defaultMemory : deviceMemory;
260	}
261	};
262
263	/// Options configuring Host components of the Runtime, such as the Partitioner
264	/// and Executor.
265	struct HostConfig {
266	/// Number of outstanding or concurrent networks before queueing.
267	size_t maxActiveRequests{`48`};
268	/// Number of requests to queue up before refusing further requests.
269	size_t maxQueueSize{`100`};
270	/// Number of threads to allocate to the Executor.
271	size_t executorThreads{`3`};
272	};
273
274	/// This is struct for user defined partition.
275	struct PartitionConfig {
276	/// The name of the function to be partitioned.
277	std::string funcName;
278	/// The number of user defined partitions.
279	/// The partition ids are between 0 and numOfPartitions - 1, inclusive.
280	size_t numOfPartitions;
281	/// The backend for each partition. backendNames.size() == numOfPartitions.
282	std::vector<std::string> backendNames;
283	/// The name for each partition. partitionNames.size() == numOfPartitions.
284	std::vector<std::string> partitionNames;
285	/// The backend hints for each partition. backendNames.size() ==
286	/// numOfPartitions.
287	std::vector<BackendHints> backendHints;
288	/// The logical IDs to assign to the partitions.
289	std::vector<std::vector<unsigned>> logicalIDs;
290	/// The mapping between nodes' name to Partition ids. Assume there are n nodes
291	/// and m partitions. We have 2 types of valid mapping: 1. all nodes are
292	/// mapped to a partition. 2. For i-th (0 <= i < m) partition, the nodes
293	/// mapped to this partition id are not in this map, and the nodes mapped to
294	/// other partitions ids must be in this map. The node's name should be the
295	/// name in Glow function and may be different from the original name from
296	/// models. Since Glow will mangle names to make them unique.
297	llvm::StringMap<size_t> nodeToPartition;
298	/// A map containing desired number of replications for each partition. If a
299	/// count is not specified for a partition the default will be one copy of the
300	/// partition loaded [PartitionID, replicationCount].
301	std::map<unsigned, unsigned> replicationCount;
302
303	PartitionConfig() : numOfPartitions(`0`) {}
304	bool enabled() { return numOfPartitions > `0`; }
305	};
306
307	/// Struct for a pre-partitioned network already made up of multiple Functions.
308	struct PrePartitionedConfig {
309	/// The name of the root DAG node.
310	std::string funcName;
311	/// Functions from the module which are partitioned.
312	std::vector<Function *> funcs;
313	/// The logical IDs to assign to the partitions.
314	std::vector<std::vector<DeviceIDTy>> logicalIDs;
315	/// Backends that are used for each partition.
316	std::vector<std::string> backendNames;
317	/// BackendHints for each partition.
318	std::vector<BackendHints> backendHints;
319	/// Backend-specific options for each partition.
320	std::vector<BackendSpecificOptions> backendSpecificOpts;
321	/// Number of times to replicate each partition.
322	std::vector<unsigned> replicationCounts;
323
324	/// Resizes/reserves for all vectors in the struct to \p size. Resize is used
325	/// for those vectors which need to have their parameter constructed.
326	void resizeAndReserve(size_t size) {
327	funcs.reserve(size);
328	logicalIDs.resize(size);
329	backendNames.reserve(size);
330	backendHints.reserve(size);
331	backendSpecificOpts.resize(size);
332	replicationCounts.reserve(size);
333	}
334	};
335
336	/// A struct containing a mapping of ExecutionContext to a loaded network on a
337	/// device.
338	struct ContextBinding {
339	/// The context used for execution of the specified network.
340	ExecutionContext *context;
341	/// The device the network will be run on with this context.
342	DeviceManager *device;
343	/// The name of the network.
344	std::string networkName;
345	};
346
347	/// Signifiers for exporting and importing properties of Nodes.
348	inline std::string getPartitionIdPrefix(int idx) {
349	return std::string ("partition_") + std::to_string(idx) + "_";
350	}
351
352	constexpr char numLogicalDevicesSignifier[] = "numLogicalDevices";
353	inline std::string getLogicalDeviceSignfier(int idx) {
354	return std::string ("logicalDevice_") + std::to_string(idx);
355	}
356
357	constexpr char nameSignifier[] = "name";
358	constexpr char backendNameSignifier[] = "backendName";
359	constexpr char executionUnitsSignifier[] = "BackendHint_executionUnits";
360	constexpr char sizeSignifier[] = "size";
361	constexpr char nodeOptSignifier[] = "NodeOpt";
362
363	constexpr char numBackendSpecificOptsSignifier[] = "numBackendSpecificOpts";
364	inline std::string getBackendSpecificOptKeySignifier(int idx) {
365	return std::string ("backendSpecificOpts_key_") + std::to_string(idx);
366	}
367	inline std::string getBackendSpecificOptValSignifier(int idx) {
368	return std::string ("backendSpecificOpts_val_") + std::to_string(idx);
369	}
370
371	constexpr char replicationCountSignifier[] = "replicationCount";
372	constexpr char Signifier[] = "";
373
374	} // namespace runtime
375	} // namespace glow
376	#endif // GLOW_RUNTIME_RUNTIMETYPES_H
377

Browse the source code of glow/include/glow/Runtime/RuntimeTypes.h