1/**
2 * Copyright (c) Glow Contributors. See CONTRIBUTORS file.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16#ifndef GLOW_RUNTIME_RUNTIMETYPES_H
17#define GLOW_RUNTIME_RUNTIMETYPES_H
18
19#include "glow/Backend/Backend.h"
20#include "glow/Backend/BackendUtils.h"
21#include "glow/Backends/BackendOptions.h"
22#include "glow/Graph/Graph.h"
23#include "glow/Support/Error.h"
24
25#include <map>
26#include <string>
27#include <unordered_map>
28#include <vector>
29
30namespace glow {
31
32class ExecutionContext;
33
34namespace runtime {
35
36class DeviceManager;
37using DeviceIDTy = size_t;
38using RunIdentifierTy = size_t;
39
40/// Map of DeviceIDTy -> DeviceManager.
41using DeviceManagerMapTy = std::map<DeviceIDTy, std::unique_ptr<DeviceManager>>;
42
43/// Callback type used by HostManager and DeviceManager, used to pass results of
44/// an inference request back to the caller.
45using ResultCBTy = std::function<void(runtime::RunIdentifierTy, Error,
46 std::unique_ptr<ExecutionContext>)>;
47
48/// Data structure that contains device constraint information for each device.
49/// Used to communicate memory constraints and later costs to the Partitioner.
50struct DeviceInfo {
51 /// Available global memory on device in bytes.
52 uint64_t availableMemory;
53 /// Backend Type.
54 std::string backendName;
55 /// A string contains the node names(e.g. Add, Div) which are separeted by
56 /// ",". E.g. "Div,Add". In Partitioner, those nodes won't be supported in
57 /// this backend.
58 std::string nonSupportedNodes;
59 /// A string contains the node names(e.g. Add, Div) which are separeted by
60 /// ",". E.g. "Div,Add". In Partitioner, the complementary set of those nodes
61 /// won't be supported in this backend.
62 std::string supportedNodes;
63 /// Available SRAM capacity in bytes.
64 uint64_t sramCapacity;
65 /// Available (software controlled) local/scratchpad/onchip memory on the
66 /// device in bytes.
67 uint64_t availableLocalMemory;
68 /// Peak compute on device in ops/second. Assumes all ops are in int8.
69 /// TODO: distinguish between data types with different peak flops.
70 float peakCompute;
71 /// Peak memory bandwidth from DRAM on device in bytes/second.
72 float peakDramBw;
73 /// Peak memory bandwidth from SRAM on device in bytes/second.
74 float peakSramBw;
75 /// Peak ingress/egress PCI-E bandwidth from device in bytes/second.
76 float peakPCIeBw;
77 /// Maximum amount of input resources defaults to 0 if there is no limit.
78 uint64_t inputCountMax{0};
79};
80
81/// Data structure that tracks how many outstanding work items remain for a
82/// device and when we last used it.
83struct DeviceRuntimeInfo {
84 DeviceRuntimeInfo() : lastUsedTimestamp(std::chrono::steady_clock::now()) {}
85
86 unsigned outstandingInferences{0};
87 std::chrono::time_point<std::chrono::steady_clock> lastUsedTimestamp;
88};
89
90/// Individual Node in the DAG for a given network. This contains all the
91/// information needed to run the sub-network at inference time.
92/// NOTE: When adding members to this struct, if it's a compile-time member that
93/// needs to be remembered when serializing the model, metadata-prop
94/// serialization logic must be updated in ONNXModelImporter/ONNXModelWriter.
95struct DAGNode {
96 /// The children of this node, these are nodes that depend on the current
97 /// node.
98 std::vector<DAGNode *> children;
99 /// Pointers to the parents of this node. This is used by the executor for
100 /// determining if a given node has all dependencies met.
101 std::vector<DAGNode *> parents;
102
103 /// Protects deviceRuntimeInfos;
104 std::mutex lock;
105 /// IDs of the deviceManagers that this network is assigned to.
106 std::map<DeviceIDTy, DeviceRuntimeInfo> deviceRuntimeInfos;
107
108 /// Map of deviceID to alternating state.
109 std::map<DeviceIDTy, unsigned> alternateFunction;
110
111 /// Count of duplications for network, this is the number of replications of
112 /// the network on a single card.
113 unsigned replicationCount{1};
114
115 /// Lock to protect against race conditions when getting the next duplicated
116 /// network name.
117 std::mutex nameLock;
118
119 /// Count of instances of this network created by saturateHost. This will be
120 /// copies across cards.
121 unsigned instanceCount{1};
122
123 /// Backend name for this network.
124 std::string backendName;
125 /// The logicalDevice is an output of the Partitioner to indicate that two
126 /// networks should be assigned to the same device. Multiple logical devices
127 /// indicates the network should be duplicated.
128 std::vector<DeviceIDTy> logicalDevices;
129 /// Index of the current deviceID in deviceIDs. This is used by the Executor
130 /// when picking a device to request a network run.
131 std::atomic<unsigned> currentDeviceIdx{0};
132 /// Name assigned to the sub-network, this is the id that will be passed to
133 /// the DeviceManager when requesting a run of the network.
134 std::string name;
135 /// Runtime bundle containing all the symbol information for this network at
136 /// runtime.
137 std::unique_ptr<RuntimeBundle> runtimeBundle;
138 /// Size of constants and placeholders used by the function.
139 uint64_t size{0};
140
141 /// Backend Hints object, this is populated by the Partitioner and is used
142 /// to communicated hints to the compiler, like SRAM pinning and resource
143 /// reservation.
144 BackendHints backendHints{};
145
146 /// Backend specific opts object, populated by the Partitioner.
147 BackendSpecificOptions backendSpecificOpts{};
148
149 /// Pointer to module the function came from. This is so the executor can
150 /// access the associated PHs for the function that are stored in the Module.
151 Module *module{nullptr};
152
153 /// Return the deviceId for the device that should execute the next request.
154 /// We select the device with the least amount of outstanding work on it. For
155 /// devices with the same amount of work remaining, we pick the one that's
156 /// least recently used as expect the work there will finish first.
157 DeviceIDTy getNextDevice() {
158 const std::lock_guard<std::mutex> g(lock);
159
160 auto selected = deviceRuntimeInfos.begin();
161 auto iter = deviceRuntimeInfos.begin();
162
163 for (++iter; iter != deviceRuntimeInfos.end(); ++iter) {
164 if (selected->second.outstandingInferences >
165 iter->second.outstandingInferences ||
166 (selected->second.outstandingInferences ==
167 iter->second.outstandingInferences &&
168 selected->second.lastUsedTimestamp <
169 iter->second.lastUsedTimestamp)) {
170 selected = iter;
171 }
172 }
173
174 selected->second.outstandingInferences++;
175 selected->second.lastUsedTimestamp = std::chrono::steady_clock::now();
176
177 return selected->first;
178 }
179
180 void markFinished(DeviceIDTy deviceID) {
181 const auto iter = deviceRuntimeInfos.find(deviceID);
182 DCHECK(iter != deviceRuntimeInfos.end());
183 const std::lock_guard<std::mutex> g(lock);
184 iter->second.outstandingInferences--;
185 }
186
187 void initAlternateState() {
188 std::lock_guard<std::mutex> g(nameLock);
189 for (auto dev : deviceRuntimeInfos) {
190 alternateFunction[dev.first] = 0;
191 }
192 }
193
194 std::string getNextName(DeviceIDTy device) {
195 nameLock.lock();
196 auto currentNet = alternateFunction[device];
197 alternateFunction[device] = (currentNet + 1) % replicationCount;
198 nameLock.unlock();
199
200 std::string newName = name;
201 if (currentNet) {
202 newName = name + "_replicated" + std::to_string(currentNet);
203 }
204 return newName;
205 }
206};
207
208/// This struct represents a DAG. The first element is the root of a DAG, and
209/// the second one is a list of all rest nodes in this DAG.
210using DAGNodePtr = std::unique_ptr<DAGNode>;
211using DAGNodePtrVec = std::vector<std::unique_ptr<DAGNode>>;
212
213struct DAG {
214 /// This is a root node it does not map directly to a loaded function. It
215 /// contains the name of the network, a list of children, and a reference to
216 /// the Module the function came from.
217 DAGNodePtr root;
218 /// This is a vector of all the DAGNodes. Structure is encoded in the DAGNodes
219 /// with pointers to parents and children.
220 DAGNodePtrVec nodes;
221};
222
223/// This list contains all the created DAGNodes from the Partitioner. The
224/// contained DAGNodes can only refer to the DAGNodes from the same DAGListTy.
225using DAGListTy = std::vector<DAG>;
226
227/// This is the base class for DeviceManager configurations. Any specific
228/// device can extend this class to contain information to identify
229/// and configure the device manager. Additionally it needs to set it's backend
230/// member variable to it's correct Backend.
231struct DeviceConfig {
232 /// Backend used for this config. It is used in
233 /// checking the type of config before casting to a derived class.
234 const std::string backendName;
235 /// A human readable name to identify the device.
236 std::string name;
237 /// A runtime assigned id for the device. This is used for stats reporting.
238 unsigned deviceID{0};
239 /// Device memory size in bytes.
240 uint64_t deviceMemory = 0;
241 /// A map of configuration parameters.
242 llvm::StringMap<std::string> parameters{};
243
244 DeviceConfig(llvm::StringRef backendName) : backendName(backendName) {}
245 DeviceConfig(llvm::StringRef backendName, llvm::StringRef name)
246 : backendName(backendName), name(name) {}
247
248 DeviceConfig(llvm::StringRef backendName, llvm::StringRef name,
249 llvm::StringMap<std::string> parameters)
250 : backendName(backendName), name(name), parameters(parameters) {}
251
252 bool hasName() const { return name != ""; }
253
254 void setDeviceMemory(uint64_t memSize) { deviceMemory = memSize; }
255
256 uint64_t getDeviceMemory() const { return deviceMemory; }
257
258 uint64_t getDeviceMemory(uint64_t defaultMemory) const {
259 return deviceMemory == 0 ? defaultMemory : deviceMemory;
260 }
261};
262
263/// Options configuring Host components of the Runtime, such as the Partitioner
264/// and Executor.
265struct HostConfig {
266 /// Number of outstanding or concurrent networks before queueing.
267 size_t maxActiveRequests{48};
268 /// Number of requests to queue up before refusing further requests.
269 size_t maxQueueSize{100};
270 /// Number of threads to allocate to the Executor.
271 size_t executorThreads{3};
272};
273
274/// This is struct for user defined partition.
275struct PartitionConfig {
276 /// The name of the function to be partitioned.
277 std::string funcName;
278 /// The number of user defined partitions.
279 /// The partition ids are between 0 and numOfPartitions - 1, inclusive.
280 size_t numOfPartitions;
281 /// The backend for each partition. backendNames.size() == numOfPartitions.
282 std::vector<std::string> backendNames;
283 /// The name for each partition. partitionNames.size() == numOfPartitions.
284 std::vector<std::string> partitionNames;
285 /// The backend hints for each partition. backendNames.size() ==
286 /// numOfPartitions.
287 std::vector<BackendHints> backendHints;
288 /// The logical IDs to assign to the partitions.
289 std::vector<std::vector<unsigned>> logicalIDs;
290 /// The mapping between nodes' name to Partition ids. Assume there are n nodes
291 /// and m partitions. We have 2 types of valid mapping: 1. all nodes are
292 /// mapped to a partition. 2. For i-th (0 <= i < m) partition, the nodes
293 /// mapped to this partition id are not in this map, and the nodes mapped to
294 /// other partitions ids must be in this map. The node's name should be the
295 /// name in Glow function and may be different from the original name from
296 /// models. Since Glow will mangle names to make them unique.
297 llvm::StringMap<size_t> nodeToPartition;
298 /// A map containing desired number of replications for each partition. If a
299 /// count is not specified for a partition the default will be one copy of the
300 /// partition loaded [PartitionID, replicationCount].
301 std::map<unsigned, unsigned> replicationCount;
302
303 PartitionConfig() : numOfPartitions(0) {}
304 bool enabled() { return numOfPartitions > 0; }
305};
306
307/// Struct for a pre-partitioned network already made up of multiple Functions.
308struct PrePartitionedConfig {
309 /// The name of the root DAG node.
310 std::string funcName;
311 /// Functions from the module which are partitioned.
312 std::vector<Function *> funcs;
313 /// The logical IDs to assign to the partitions.
314 std::vector<std::vector<DeviceIDTy>> logicalIDs;
315 /// Backends that are used for each partition.
316 std::vector<std::string> backendNames;
317 /// BackendHints for each partition.
318 std::vector<BackendHints> backendHints;
319 /// Backend-specific options for each partition.
320 std::vector<BackendSpecificOptions> backendSpecificOpts;
321 /// Number of times to replicate each partition.
322 std::vector<unsigned> replicationCounts;
323
324 /// Resizes/reserves for all vectors in the struct to \p size. Resize is used
325 /// for those vectors which need to have their parameter constructed.
326 void resizeAndReserve(size_t size) {
327 funcs.reserve(size);
328 logicalIDs.resize(size);
329 backendNames.reserve(size);
330 backendHints.reserve(size);
331 backendSpecificOpts.resize(size);
332 replicationCounts.reserve(size);
333 }
334};
335
336/// A struct containing a mapping of ExecutionContext to a loaded network on a
337/// device.
338struct ContextBinding {
339 /// The context used for execution of the specified network.
340 ExecutionContext *context;
341 /// The device the network will be run on with this context.
342 DeviceManager *device;
343 /// The name of the network.
344 std::string networkName;
345};
346
347/// Signifiers for exporting and importing properties of Nodes.
348inline std::string getPartitionIdPrefix(int idx) {
349 return std::string("partition_") + std::to_string(idx) + "_";
350}
351
352constexpr char numLogicalDevicesSignifier[] = "numLogicalDevices";
353inline std::string getLogicalDeviceSignfier(int idx) {
354 return std::string("logicalDevice_") + std::to_string(idx);
355}
356
357constexpr char nameSignifier[] = "name";
358constexpr char backendNameSignifier[] = "backendName";
359constexpr char executionUnitsSignifier[] = "BackendHint_executionUnits";
360constexpr char sizeSignifier[] = "size";
361constexpr char nodeOptSignifier[] = "NodeOpt";
362
363constexpr char numBackendSpecificOptsSignifier[] = "numBackendSpecificOpts";
364inline std::string getBackendSpecificOptKeySignifier(int idx) {
365 return std::string("backendSpecificOpts_key_") + std::to_string(idx);
366}
367inline std::string getBackendSpecificOptValSignifier(int idx) {
368 return std::string("backendSpecificOpts_val_") + std::to_string(idx);
369}
370
371constexpr char replicationCountSignifier[] = "replicationCount";
372constexpr char Signifier[] = "";
373
374} // namespace runtime
375} // namespace glow
376#endif // GLOW_RUNTIME_RUNTIMETYPES_H
377