1 | /** |
2 | * Copyright (c) Glow Contributors. See CONTRIBUTORS file. |
3 | * |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | * you may not use this file except in compliance with the License. |
6 | * You may obtain a copy of the License at |
7 | * |
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | * |
10 | * Unless required by applicable law or agreed to in writing, software |
11 | * distributed under the License is distributed on an "AS IS" BASIS, |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | * See the License for the specific language governing permissions and |
14 | * limitations under the License. |
15 | */ |
16 | #ifndef GLOW_RUNTIME_RUNTIMETYPES_H |
17 | #define GLOW_RUNTIME_RUNTIMETYPES_H |
18 | |
19 | #include "glow/Backend/Backend.h" |
20 | #include "glow/Backend/BackendUtils.h" |
21 | #include "glow/Backends/BackendOptions.h" |
22 | #include "glow/Graph/Graph.h" |
23 | #include "glow/Support/Error.h" |
24 | |
25 | #include <map> |
26 | #include <string> |
27 | #include <unordered_map> |
28 | #include <vector> |
29 | |
30 | namespace glow { |
31 | |
32 | class ExecutionContext; |
33 | |
34 | namespace runtime { |
35 | |
36 | class DeviceManager; |
37 | using DeviceIDTy = size_t; |
38 | using RunIdentifierTy = size_t; |
39 | |
40 | /// Map of DeviceIDTy -> DeviceManager. |
41 | using DeviceManagerMapTy = std::map<DeviceIDTy, std::unique_ptr<DeviceManager>>; |
42 | |
43 | /// Callback type used by HostManager and DeviceManager, used to pass results of |
44 | /// an inference request back to the caller. |
45 | using ResultCBTy = std::function<void(runtime::RunIdentifierTy, Error, |
46 | std::unique_ptr<ExecutionContext>)>; |
47 | |
48 | /// Data structure that contains device constraint information for each device. |
49 | /// Used to communicate memory constraints and later costs to the Partitioner. |
50 | struct DeviceInfo { |
51 | /// Available global memory on device in bytes. |
52 | uint64_t availableMemory; |
53 | /// Backend Type. |
54 | std::string backendName; |
55 | /// A string contains the node names(e.g. Add, Div) which are separeted by |
56 | /// ",". E.g. "Div,Add". In Partitioner, those nodes won't be supported in |
57 | /// this backend. |
58 | std::string nonSupportedNodes; |
59 | /// A string contains the node names(e.g. Add, Div) which are separeted by |
60 | /// ",". E.g. "Div,Add". In Partitioner, the complementary set of those nodes |
61 | /// won't be supported in this backend. |
62 | std::string supportedNodes; |
63 | /// Available SRAM capacity in bytes. |
64 | uint64_t sramCapacity; |
65 | /// Available (software controlled) local/scratchpad/onchip memory on the |
66 | /// device in bytes. |
67 | uint64_t availableLocalMemory; |
68 | /// Peak compute on device in ops/second. Assumes all ops are in int8. |
69 | /// TODO: distinguish between data types with different peak flops. |
70 | float peakCompute; |
71 | /// Peak memory bandwidth from DRAM on device in bytes/second. |
72 | float peakDramBw; |
73 | /// Peak memory bandwidth from SRAM on device in bytes/second. |
74 | float peakSramBw; |
75 | /// Peak ingress/egress PCI-E bandwidth from device in bytes/second. |
76 | float peakPCIeBw; |
77 | /// Maximum amount of input resources defaults to 0 if there is no limit. |
78 | uint64_t inputCountMax{0}; |
79 | }; |
80 | |
81 | /// Data structure that tracks how many outstanding work items remain for a |
82 | /// device and when we last used it. |
83 | struct DeviceRuntimeInfo { |
84 | DeviceRuntimeInfo() : lastUsedTimestamp(std::chrono::steady_clock::now()) {} |
85 | |
86 | unsigned outstandingInferences{0}; |
87 | std::chrono::time_point<std::chrono::steady_clock> lastUsedTimestamp; |
88 | }; |
89 | |
90 | /// Individual Node in the DAG for a given network. This contains all the |
91 | /// information needed to run the sub-network at inference time. |
92 | /// NOTE: When adding members to this struct, if it's a compile-time member that |
93 | /// needs to be remembered when serializing the model, metadata-prop |
94 | /// serialization logic must be updated in ONNXModelImporter/ONNXModelWriter. |
95 | struct DAGNode { |
96 | /// The children of this node, these are nodes that depend on the current |
97 | /// node. |
98 | std::vector<DAGNode *> children; |
99 | /// Pointers to the parents of this node. This is used by the executor for |
100 | /// determining if a given node has all dependencies met. |
101 | std::vector<DAGNode *> parents; |
102 | |
103 | /// Protects deviceRuntimeInfos; |
104 | std::mutex lock; |
105 | /// IDs of the deviceManagers that this network is assigned to. |
106 | std::map<DeviceIDTy, DeviceRuntimeInfo> deviceRuntimeInfos; |
107 | |
108 | /// Map of deviceID to alternating state. |
109 | std::map<DeviceIDTy, unsigned> alternateFunction; |
110 | |
111 | /// Count of duplications for network, this is the number of replications of |
112 | /// the network on a single card. |
113 | unsigned replicationCount{1}; |
114 | |
115 | /// Lock to protect against race conditions when getting the next duplicated |
116 | /// network name. |
117 | std::mutex nameLock; |
118 | |
119 | /// Count of instances of this network created by saturateHost. This will be |
120 | /// copies across cards. |
121 | unsigned instanceCount{1}; |
122 | |
123 | /// Backend name for this network. |
124 | std::string backendName; |
125 | /// The logicalDevice is an output of the Partitioner to indicate that two |
126 | /// networks should be assigned to the same device. Multiple logical devices |
127 | /// indicates the network should be duplicated. |
128 | std::vector<DeviceIDTy> logicalDevices; |
129 | /// Index of the current deviceID in deviceIDs. This is used by the Executor |
130 | /// when picking a device to request a network run. |
131 | std::atomic<unsigned> currentDeviceIdx{0}; |
132 | /// Name assigned to the sub-network, this is the id that will be passed to |
133 | /// the DeviceManager when requesting a run of the network. |
134 | std::string name; |
135 | /// Runtime bundle containing all the symbol information for this network at |
136 | /// runtime. |
137 | std::unique_ptr<RuntimeBundle> runtimeBundle; |
138 | /// Size of constants and placeholders used by the function. |
139 | uint64_t size{0}; |
140 | |
141 | /// Backend Hints object, this is populated by the Partitioner and is used |
142 | /// to communicated hints to the compiler, like SRAM pinning and resource |
143 | /// reservation. |
144 | BackendHints backendHints{}; |
145 | |
146 | /// Backend specific opts object, populated by the Partitioner. |
147 | BackendSpecificOptions backendSpecificOpts{}; |
148 | |
149 | /// Pointer to module the function came from. This is so the executor can |
150 | /// access the associated PHs for the function that are stored in the Module. |
151 | Module *module{nullptr}; |
152 | |
153 | /// Return the deviceId for the device that should execute the next request. |
154 | /// We select the device with the least amount of outstanding work on it. For |
155 | /// devices with the same amount of work remaining, we pick the one that's |
156 | /// least recently used as expect the work there will finish first. |
157 | DeviceIDTy getNextDevice() { |
158 | const std::lock_guard<std::mutex> g(lock); |
159 | |
160 | auto selected = deviceRuntimeInfos.begin(); |
161 | auto iter = deviceRuntimeInfos.begin(); |
162 | |
163 | for (++iter; iter != deviceRuntimeInfos.end(); ++iter) { |
164 | if (selected->second.outstandingInferences > |
165 | iter->second.outstandingInferences || |
166 | (selected->second.outstandingInferences == |
167 | iter->second.outstandingInferences && |
168 | selected->second.lastUsedTimestamp < |
169 | iter->second.lastUsedTimestamp)) { |
170 | selected = iter; |
171 | } |
172 | } |
173 | |
174 | selected->second.outstandingInferences++; |
175 | selected->second.lastUsedTimestamp = std::chrono::steady_clock::now(); |
176 | |
177 | return selected->first; |
178 | } |
179 | |
180 | void markFinished(DeviceIDTy deviceID) { |
181 | const auto iter = deviceRuntimeInfos.find(deviceID); |
182 | DCHECK(iter != deviceRuntimeInfos.end()); |
183 | const std::lock_guard<std::mutex> g(lock); |
184 | iter->second.outstandingInferences--; |
185 | } |
186 | |
187 | void initAlternateState() { |
188 | std::lock_guard<std::mutex> g(nameLock); |
189 | for (auto dev : deviceRuntimeInfos) { |
190 | alternateFunction[dev.first] = 0; |
191 | } |
192 | } |
193 | |
194 | std::string getNextName(DeviceIDTy device) { |
195 | nameLock.lock(); |
196 | auto currentNet = alternateFunction[device]; |
197 | alternateFunction[device] = (currentNet + 1) % replicationCount; |
198 | nameLock.unlock(); |
199 | |
200 | std::string newName = name; |
201 | if (currentNet) { |
202 | newName = name + "_replicated" + std::to_string(currentNet); |
203 | } |
204 | return newName; |
205 | } |
206 | }; |
207 | |
208 | /// This struct represents a DAG. The first element is the root of a DAG, and |
209 | /// the second one is a list of all rest nodes in this DAG. |
210 | using DAGNodePtr = std::unique_ptr<DAGNode>; |
211 | using DAGNodePtrVec = std::vector<std::unique_ptr<DAGNode>>; |
212 | |
213 | struct DAG { |
214 | /// This is a root node it does not map directly to a loaded function. It |
215 | /// contains the name of the network, a list of children, and a reference to |
216 | /// the Module the function came from. |
217 | DAGNodePtr root; |
218 | /// This is a vector of all the DAGNodes. Structure is encoded in the DAGNodes |
219 | /// with pointers to parents and children. |
220 | DAGNodePtrVec nodes; |
221 | }; |
222 | |
223 | /// This list contains all the created DAGNodes from the Partitioner. The |
224 | /// contained DAGNodes can only refer to the DAGNodes from the same DAGListTy. |
225 | using DAGListTy = std::vector<DAG>; |
226 | |
227 | /// This is the base class for DeviceManager configurations. Any specific |
228 | /// device can extend this class to contain information to identify |
229 | /// and configure the device manager. Additionally it needs to set it's backend |
230 | /// member variable to it's correct Backend. |
231 | struct DeviceConfig { |
232 | /// Backend used for this config. It is used in |
233 | /// checking the type of config before casting to a derived class. |
234 | const std::string backendName; |
235 | /// A human readable name to identify the device. |
236 | std::string name; |
237 | /// A runtime assigned id for the device. This is used for stats reporting. |
238 | unsigned deviceID{0}; |
239 | /// Device memory size in bytes. |
240 | uint64_t deviceMemory = 0; |
241 | /// A map of configuration parameters. |
242 | llvm::StringMap<std::string> parameters{}; |
243 | |
244 | DeviceConfig(llvm::StringRef backendName) : backendName(backendName) {} |
245 | DeviceConfig(llvm::StringRef backendName, llvm::StringRef name) |
246 | : backendName(backendName), name(name) {} |
247 | |
248 | DeviceConfig(llvm::StringRef backendName, llvm::StringRef name, |
249 | llvm::StringMap<std::string> parameters) |
250 | : backendName(backendName), name(name), parameters(parameters) {} |
251 | |
252 | bool hasName() const { return name != "" ; } |
253 | |
254 | void setDeviceMemory(uint64_t memSize) { deviceMemory = memSize; } |
255 | |
256 | uint64_t getDeviceMemory() const { return deviceMemory; } |
257 | |
258 | uint64_t getDeviceMemory(uint64_t defaultMemory) const { |
259 | return deviceMemory == 0 ? defaultMemory : deviceMemory; |
260 | } |
261 | }; |
262 | |
263 | /// Options configuring Host components of the Runtime, such as the Partitioner |
264 | /// and Executor. |
265 | struct HostConfig { |
266 | /// Number of outstanding or concurrent networks before queueing. |
267 | size_t maxActiveRequests{48}; |
268 | /// Number of requests to queue up before refusing further requests. |
269 | size_t maxQueueSize{100}; |
270 | /// Number of threads to allocate to the Executor. |
271 | size_t executorThreads{3}; |
272 | }; |
273 | |
274 | /// This is struct for user defined partition. |
275 | struct PartitionConfig { |
276 | /// The name of the function to be partitioned. |
277 | std::string funcName; |
278 | /// The number of user defined partitions. |
279 | /// The partition ids are between 0 and numOfPartitions - 1, inclusive. |
280 | size_t numOfPartitions; |
281 | /// The backend for each partition. backendNames.size() == numOfPartitions. |
282 | std::vector<std::string> backendNames; |
283 | /// The name for each partition. partitionNames.size() == numOfPartitions. |
284 | std::vector<std::string> partitionNames; |
285 | /// The backend hints for each partition. backendNames.size() == |
286 | /// numOfPartitions. |
287 | std::vector<BackendHints> backendHints; |
288 | /// The logical IDs to assign to the partitions. |
289 | std::vector<std::vector<unsigned>> logicalIDs; |
290 | /// The mapping between nodes' name to Partition ids. Assume there are n nodes |
291 | /// and m partitions. We have 2 types of valid mapping: 1. all nodes are |
292 | /// mapped to a partition. 2. For i-th (0 <= i < m) partition, the nodes |
293 | /// mapped to this partition id are not in this map, and the nodes mapped to |
294 | /// other partitions ids must be in this map. The node's name should be the |
295 | /// name in Glow function and may be different from the original name from |
296 | /// models. Since Glow will mangle names to make them unique. |
297 | llvm::StringMap<size_t> nodeToPartition; |
298 | /// A map containing desired number of replications for each partition. If a |
299 | /// count is not specified for a partition the default will be one copy of the |
300 | /// partition loaded [PartitionID, replicationCount]. |
301 | std::map<unsigned, unsigned> replicationCount; |
302 | |
303 | PartitionConfig() : numOfPartitions(0) {} |
304 | bool enabled() { return numOfPartitions > 0; } |
305 | }; |
306 | |
307 | /// Struct for a pre-partitioned network already made up of multiple Functions. |
308 | struct PrePartitionedConfig { |
309 | /// The name of the root DAG node. |
310 | std::string funcName; |
311 | /// Functions from the module which are partitioned. |
312 | std::vector<Function *> funcs; |
313 | /// The logical IDs to assign to the partitions. |
314 | std::vector<std::vector<DeviceIDTy>> logicalIDs; |
315 | /// Backends that are used for each partition. |
316 | std::vector<std::string> backendNames; |
317 | /// BackendHints for each partition. |
318 | std::vector<BackendHints> backendHints; |
319 | /// Backend-specific options for each partition. |
320 | std::vector<BackendSpecificOptions> backendSpecificOpts; |
321 | /// Number of times to replicate each partition. |
322 | std::vector<unsigned> replicationCounts; |
323 | |
324 | /// Resizes/reserves for all vectors in the struct to \p size. Resize is used |
325 | /// for those vectors which need to have their parameter constructed. |
326 | void resizeAndReserve(size_t size) { |
327 | funcs.reserve(size); |
328 | logicalIDs.resize(size); |
329 | backendNames.reserve(size); |
330 | backendHints.reserve(size); |
331 | backendSpecificOpts.resize(size); |
332 | replicationCounts.reserve(size); |
333 | } |
334 | }; |
335 | |
336 | /// A struct containing a mapping of ExecutionContext to a loaded network on a |
337 | /// device. |
338 | struct ContextBinding { |
339 | /// The context used for execution of the specified network. |
340 | ExecutionContext *context; |
341 | /// The device the network will be run on with this context. |
342 | DeviceManager *device; |
343 | /// The name of the network. |
344 | std::string networkName; |
345 | }; |
346 | |
347 | /// Signifiers for exporting and importing properties of Nodes. |
348 | inline std::string getPartitionIdPrefix(int idx) { |
349 | return std::string("partition_" ) + std::to_string(idx) + "_" ; |
350 | } |
351 | |
352 | constexpr char numLogicalDevicesSignifier[] = "numLogicalDevices" ; |
353 | inline std::string getLogicalDeviceSignfier(int idx) { |
354 | return std::string("logicalDevice_" ) + std::to_string(idx); |
355 | } |
356 | |
357 | constexpr char nameSignifier[] = "name" ; |
358 | constexpr char backendNameSignifier[] = "backendName" ; |
359 | constexpr char executionUnitsSignifier[] = "BackendHint_executionUnits" ; |
360 | constexpr char sizeSignifier[] = "size" ; |
361 | constexpr char nodeOptSignifier[] = "NodeOpt" ; |
362 | |
363 | constexpr char numBackendSpecificOptsSignifier[] = "numBackendSpecificOpts" ; |
364 | inline std::string getBackendSpecificOptKeySignifier(int idx) { |
365 | return std::string("backendSpecificOpts_key_" ) + std::to_string(idx); |
366 | } |
367 | inline std::string getBackendSpecificOptValSignifier(int idx) { |
368 | return std::string("backendSpecificOpts_val_" ) + std::to_string(idx); |
369 | } |
370 | |
371 | constexpr char replicationCountSignifier[] = "replicationCount" ; |
372 | constexpr char Signifier[] = "" ; |
373 | |
374 | } // namespace runtime |
375 | } // namespace glow |
376 | #endif // GLOW_RUNTIME_RUNTIMETYPES_H |
377 | |