1 | /* |
2 | * Licensed to the Apache Software Foundation (ASF) under one |
3 | * or more contributor license agreements. See the NOTICE file |
4 | * distributed with this work for additional information |
5 | * regarding copyright ownership. The ASF licenses this file |
6 | * to you under the Apache License, Version 2.0 (the |
7 | * "License"); you may not use this file except in compliance |
8 | * with the License. You may obtain a copy of the License at |
9 | * |
10 | * http://www.apache.org/licenses/LICENSE-2.0 |
11 | * |
12 | * Unless required by applicable law or agreed to in writing, |
13 | * software distributed under the License is distributed on an |
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
15 | * KIND, either express or implied. See the License for the |
16 | * specific language governing permissions and limitations |
17 | * under the License. |
18 | */ |
19 | |
20 | /*! |
21 | * \file tvm/target/virtual_device.h |
22 | * \brief A compile time representation for where data is to be stored at runtime, and how to |
23 | * compile code to compute it. |
24 | */ |
25 | |
26 | #ifndef TVM_TARGET_VIRTUAL_DEVICE_H_ |
27 | #define TVM_TARGET_VIRTUAL_DEVICE_H_ |
28 | |
29 | #include <tvm/ir/transform.h> |
30 | #include <tvm/target/target.h> |
31 | |
32 | #include <string> |
33 | #include <unordered_set> |
34 | #include <utility> |
35 | |
36 | namespace tvm { |
37 | |
38 | /*! |
39 | * Abstract label for an area of memory. |
40 | * |
41 | * Currently uninterpreted and arbitrary. Likely to be replaced by a structured representation |
42 | * of a memory pool in the future. Please try to use this alias instead of String to aid future |
43 | * code migration. |
44 | */ |
45 | using MemoryScope = String; |
46 | |
47 | /*! |
48 | * \brief Describes at compile time the constraints on where data is to be stored at runtime |
49 | * down to the (virtual) device and memory scope level, and how to compile code to compute that |
50 | * data. Used by the \p PlanDevices pass to collect and solve (virtual) device constraints for |
51 | * the whole Relay program. |
52 | * |
53 | * Is a quadruple of: |
54 | * - A \p device_type (\p DLDeviceType). May be \p kInvalidDeviceType if unconstrained. |
55 | * - A \p virtual_device_id (\p int). This allows us to distinguish distinct devices |
56 | * with the same \p Target, for example in a multi-GPU system. May be -1 if unconstrained. |
57 | * See "Virtual Devices" below. |
58 | * - A \p target (\p Target) describing how to compile code for the intended device. May be null |
59 | * if unconstrained. |
60 | * - A \p memory_scope (\p MemoryScope, which is currently just \p String) describing which memory |
61 | * area is to be used to hold data. May be "" if unconstrained. See "Memory Scopes and Devices" |
62 | * below. |
63 | * |
64 | * Some or all of these fields may be unconstrained, signaling that device planning is free to |
65 | * choose a value consistent with the whole program. However if a \p target is given then the \p |
66 | * device_type must equal \p target->GetTargetDeviceType(). |
67 | * |
68 | * Note that currently we assume if a function returns its result on a particular (virtual) device |
69 | * then the function body is also executed on that device. See the overview comment in |
70 | * src/relay/transforms/device_planner.cc for more details. |
71 | * |
72 | * By 'data' we include both tensors and additional supporting datastructures such as shapes, |
73 | * Relay ADT items (including tuples), Relay references, and Relay closures. Typically non-tensor |
74 | * data must reside on a 'CPU'-like host device with good support for scalars. |
75 | * |
76 | * By 'execution' we include both (fused) primitive operators, and all the Relay expressions |
77 | * surrounding them which coordinates data and control flow. Again, typically non-primitive |
78 | * operators must be executed on a 'CPU'-like device with good support for control flow. |
79 | * |
80 | * Since TVM targets such a wide range of systems it is not possible for \p VirtualDevice to impose |
81 | * much semantics on these fields, particularly for \p virtual_device_id and \p memory_scope. |
82 | * Instead we assume downstream passes and codegen will interpret an validate these fields |
83 | * appropriately. |
84 | * |
85 | * Targets vs Devices |
86 | * ------------------ |
87 | * Generally \p Targets (a compile-time only datastructue) describe compiler options for a specific |
88 | * microarchitecture and toolchain, while \p Devices (a runtime datastructure also available at |
89 | * compile time) describe a physical device on the target system. Obviously the target must agree |
90 | * with the device's microarchitecture, but we otherwise don't impose any constraints between them: |
91 | * - It's ok to use different \p Targets for the same \p Device, eg to squeeze some extra perf |
92 | * out of a particular primitive using particular compiler flags. |
93 | * - It's ok to use the same \p Target for multiple \p Devices, eg if we have multiple CPUs. |
94 | * |
95 | * Traditionally TVM assumes at most one \p Target per \p DLDeviceType. We are moving away from that |
96 | * assumption. |
97 | * |
98 | * Virtual vs Physical Devices |
99 | * --------------------------- |
100 | * The \p virtual_device_id may be used by downstream passes or the runtime to help decide which |
101 | * \p device_id to use for a particular physical runtime \p Device. For example: |
102 | * - Some runtimes may support passing in an array of actual `device` specifications, and the |
103 | * \p virtual_device_id can be used at runtime as an index into that array. |
104 | * - Some runtimes may support dynamically allocating computations to physical devices. On these |
105 | * systems a large space of \p virtual_device_ids could be used at compile time, even though |
106 | * at runtime only a few physical devices will be present. |
107 | * |
108 | * The \p virtual_device_id may also be left unconstrained if not needed. |
109 | * |
110 | * Memory Scopes and Devices |
111 | * ------------------------- |
112 | * Multi-device systems can have complex memory hierarchies. For example |
113 | * \code |
114 | * (kDLCPU, 0, "llvm", "global") |
115 | * \endcode |
116 | * and |
117 | * \code |
118 | * (kDLCPU, 1, "llvm", "global") |
119 | * \endcode |
120 | * could denote: |
121 | * - The same memory area accessible from two separate CPUs without any CPU affinity; |
122 | * - Distinct memory areas in a NUMA architecture for which cross-device access is handled |
123 | * by the memory system; |
124 | * - Outright distinct memory areas, where one device cannot directly address the memory of |
125 | * another. |
126 | * |
127 | * Similarly: |
128 | * \code |
129 | * (kDLCPU, 0, "llvm", "global") |
130 | * \endcode |
131 | * and |
132 | * \code |
133 | * (kDLCUDA, 0, "cuda", "host") |
134 | * \endcode |
135 | * could denote the same memory area, but with very different access costs. |
136 | * |
137 | * Furthermore, not all memory scopes are accessible to all devices, and it is possible for |
138 | * a memory scope to only be accessible to a device when code is compiled with particular |
139 | * \p Target options. |
140 | * |
141 | * \p VirtualDevices themselves have no system-level understanding. Currently the \p PlanDevices |
142 | * pass will simply insert "device_copy" operators wherever \p VirtualDevices are not exactly |
143 | * pointwise equal. We may revisit this in the future as the work on memory pools matures. |
144 | * |
145 | * Joining and Defaulting |
146 | * ---------------------- |
147 | * It is possible to 'join' two \p VirtualDevices to yield the most constrained \p VirtualDevice |
148 | * which agrees with both join arguments. Eg: |
149 | * \code |
150 | * Join((kDLCPU, -1, "llvm", ""), (kInvalidDeviceType, 3, null, "global)) |
151 | * => (kDLCPU, 3, "llvm", "global") |
152 | * Join((kDLCPU, -1, "llvm", ""), (kInvalidDeviceType, 3, null, "local)) |
153 | * => null (no join possible) |
154 | * \endcode |
155 | * |
156 | * Related to 'join' is 'default', which only takes constrained fields from the rhs when the |
157 | * lhs is unconstrained: |
158 | * \code |
159 | * Default(kDLCPU, -1, "llvm", "local"), (kDLCPU, 3, null, "global")) |
160 | * => (kDLCPU, 3, "llvm", "local") |
161 | * \endcode |
162 | * |
163 | * These operations are needed during device planning. |
164 | */ |
165 | |
166 | class VirtualDeviceNode : public AttrsNode<VirtualDeviceNode> { |
167 | private: |
168 | /*! |
169 | * \brief The \p DLDeviceType (represented as an int) of the virtual device. If \p target is |
170 | * known then this will be equal to \p target->GetTargetDeviceType(). If \p target is null then |
171 | * the target is to be determined later. |
172 | * |
173 | * This is needed to support the legacy "on_device" and "device_copy" calls which only allow |
174 | * a \p DLDeviceTypes (as an integer) to be given. |
175 | * |
176 | * kInvalidDeviceType denotes unconstrained. An int since the DLDeviceType enum representation |
177 | * is not fixed. Private to discourage further int vs DLDeviceType confusion. |
178 | */ |
179 | int /* actually DLDeviceType */ device_type_int; |
180 | |
181 | public: |
182 | DLDeviceType device_type() const { return static_cast<DLDeviceType>(device_type_int); } |
183 | |
184 | /*! |
185 | * \brief The device identifier for the virtual device. This must be resolved to a physical |
186 | * device identifier either during compilation or at runtime. |
187 | * |
188 | * -1 denotes unconstrained. |
189 | */ |
190 | int virtual_device_id; |
191 | |
192 | /*! |
193 | * \brief The \p Target describing how to compile for the virtual device. |
194 | * |
195 | * Null denotes unconstrained. Note that if a target later becomes known for this \p VirtualDevice |
196 | * then it must be consistent with the \p device_type if already known. This is enforced by the |
197 | * Join and Default methods. |
198 | */ |
199 | Target target; |
200 | |
201 | /*! |
202 | * \brief The scope of memory w.r.t. the virtual device which holds data. |
203 | * |
204 | * Empty denotes unconstrained. |
205 | */ |
206 | MemoryScope memory_scope; |
207 | |
208 | /*! |
209 | * \brief Returns true if virtual device is 'fully unconstrained', ie no target/device type, |
210 | * device id or memory scope is specified. |
211 | */ |
212 | bool IsFullyUnconstrained() const { |
213 | return !target.defined() && device_type() == kInvalidDeviceType && virtual_device_id == -1 && |
214 | memory_scope.empty(); |
215 | } |
216 | |
217 | /*! |
218 | * \brief Returns true if virtual device is 'fully constrained', ie target, device id and memory |
219 | * scope are all specified. |
220 | */ |
221 | bool IsFullyConstrained() const { |
222 | return target.defined() && virtual_device_id != -1 && !memory_scope.empty(); |
223 | } |
224 | |
225 | /*! |
226 | * \brief Returns the (virtual) \p Device implied by this \p VirtualDevice. Both the \p |
227 | * device_type and \p virtual_device_must be constrained. The returned \p Device may not |
228 | * correspond to any physical device available at compile time or even runtime: see "Virtual vs |
229 | * Physical Devices" above. |
230 | */ |
231 | Device ToDevice() const { |
232 | ICHECK(device_type() != kInvalidDeviceType); |
233 | ICHECK(virtual_device_id != -1); |
234 | Device device; |
235 | device.device_type = device_type(); |
236 | device.device_id = virtual_device_id; |
237 | return device; |
238 | } |
239 | |
240 | TVM_DECLARE_ATTRS(VirtualDeviceNode, "VirtualDevice" ) { |
241 | TVM_ATTR_FIELD(device_type_int) |
242 | .describe("The type of the virtual device." ) |
243 | .set_default(kInvalidDeviceType); |
244 | TVM_ATTR_FIELD(virtual_device_id) |
245 | .describe("The device id of the virtual device." ) |
246 | .set_default(-1); |
247 | TVM_ATTR_FIELD(target) |
248 | .describe("The target describing how to compile for the virtual device." ) |
249 | .set_default(Target()); |
250 | TVM_ATTR_FIELD(memory_scope) |
251 | .describe("The area of memory w.r.t. the virtual device where data is stored." ) |
252 | .set_default("" ); |
253 | } |
254 | |
255 | friend class VirtualDevice; |
256 | }; |
257 | |
258 | /*! |
259 | * \brief Managed reference class to \p VirtualDeviceNode. |
260 | */ |
261 | class VirtualDevice : public ObjectRef { |
262 | public: |
263 | /*! |
264 | * \brief Construct a virtual device. |
265 | * \param device_type The device type for the virtual device, or \p kInvalidDeviceType if |
266 | * unconstrained. If \p target is defined then must match its \p target->GetTargetDeviceType(). |
267 | * \param virtual_device_id The device id for the virtual device, or -1 if unconstrained. |
268 | * \param target The target describing how to compile for the virtual device, or null if |
269 | * unconstrained. |
270 | * \param memory_scope The memory scope w.r.t. the virtual device which holds data, or "" if |
271 | * unconstrained. |
272 | * \return The virtual device. |
273 | */ |
274 | explicit VirtualDevice(DLDeviceType device_type = kInvalidDeviceType, int virtual_device_id = -1, |
275 | Target target = {}, MemoryScope memory_scope = {}); |
276 | |
277 | /*! \brief Returns the unique fully unconstrained \p VirtualDevice. */ |
278 | static VirtualDevice FullyUnconstrained(); |
279 | |
280 | /*! |
281 | * \brief Returns the \p VirtualDevice for \p device_type and (if not -1) \p virtual_device_id. |
282 | * The target and memory scope will be unconstrained. |
283 | */ |
284 | static VirtualDevice ForDeviceType(DLDeviceType device_type, int virtual_device_id = -1) { |
285 | ICHECK_GT(device_type, 0); |
286 | return VirtualDevice(device_type, virtual_device_id); |
287 | } |
288 | static VirtualDevice ForDeviceType(int device_type, int virtual_device_id = -1) { |
289 | return ForDeviceType(static_cast<DLDeviceType>(device_type), virtual_device_id); |
290 | } |
291 | static VirtualDevice ForDeviceType(const Integer& device_type, int virtual_device_id = -1) { |
292 | return ForDeviceType(static_cast<int>(device_type->value), virtual_device_id); |
293 | } |
294 | |
295 | /*! \brief Returns the \p VirtualDevice for \p device. */ |
296 | static VirtualDevice ForDevice(const Device& device) { |
297 | return ForDeviceType(device.device_type, device.device_id); |
298 | } |
299 | |
300 | /*! \brief Returns the \p VirtualDevice for \p device and \p target. */ |
301 | static VirtualDevice ForDeviceAndTarget(const Device& device, Target target) { |
302 | return VirtualDevice(device.device_type, device.device_id, std::move(target)); |
303 | } |
304 | |
305 | /*! \brief Returns the \p VirtualDevice for \p target. */ |
306 | static VirtualDevice ForTarget(Target target) { |
307 | DLDeviceType device_type = static_cast<DLDeviceType>(target->GetTargetDeviceType()); |
308 | return VirtualDevice(device_type, /*virtual_device_id=*/0, std::move(target)); |
309 | } |
310 | |
311 | /*! \brief Returns the \p VirtualDevice for \p memory_scope alone. */ |
312 | static VirtualDevice ForMemoryScope(MemoryScope memory_scope) { |
313 | return VirtualDevice(kInvalidDeviceType, -1, {}, std::move(memory_scope)); |
314 | } |
315 | |
316 | /*! \brief Returns the \p VirtualDevice for \p device, \p target and \p memory_scope. */ |
317 | TVM_DLL static VirtualDevice ForDeviceTargetAndMemoryScope(const Device& device, Target target, |
318 | MemoryScope memory_scope) { |
319 | return VirtualDevice(device.device_type, device.device_id, std::move(target), |
320 | std::move(memory_scope)); |
321 | } |
322 | |
323 | /*! |
324 | * \brief Returns the 'join' of \p lhs and \p rhs. The result will agree pointwise with |
325 | * \p lhs and \p rhs on all their constrained fields. Returns the null optional if no such |
326 | * join exists, ie there's disagreement on at least one constrained field. |
327 | */ |
328 | static Optional<VirtualDevice> Join(const VirtualDevice& lhs, const VirtualDevice& rhs); |
329 | |
330 | /*! |
331 | * \brief Returns the 'default' of \p lhs and \p rhs. The result will be \p lhs, except any |
332 | * unconstrained fields in \p lhs will take their value from \p rhs. Always well-defined. |
333 | */ |
334 | static VirtualDevice Default(const VirtualDevice& lhs, const VirtualDevice& rhs); |
335 | |
336 | TVM_DEFINE_NOTNULLABLE_OBJECT_REF_METHODS(VirtualDevice, ObjectRef, VirtualDeviceNode); |
337 | |
338 | friend class VirtualDeviceCache; // Private implementation helper. |
339 | }; |
340 | |
341 | /*! |
342 | * \brief A cache of \p VirtualDevices. This can be used: |
343 | * - To avoid ending up with lots of identical instances, since the space of VirtualDevices for any |
344 | * one compilation is very small but the number of points they need to be constructed can |
345 | * be very large (eg during device planning). |
346 | * - So we can assume \p VirtualDevices are pointer equal if and only if they are structurally |
347 | * equal. This simplifies the unification of 'device domains' which are built on \p VirtualDevices. |
348 | */ |
349 | class VirtualDeviceCache { |
350 | public: |
351 | /*! \brief Returns the unique \p VirtualDevice representing given fields. */ |
352 | VirtualDevice Make(DLDeviceType device_type = kInvalidDeviceType, int virtual_device_id = -1, |
353 | Target target = {}, MemoryScope memory_scope = {}); |
354 | |
355 | /*! |
356 | * \brief Returns the unique \p VirtualDevice structurally equal to the given \p virtual_device. |
357 | */ |
358 | VirtualDevice Unique(const VirtualDevice& virtual_device); |
359 | |
360 | private: |
361 | /*! \brief Already constructed VirtualDevices. */ |
362 | std::unordered_set<VirtualDevice, StructuralHash, StructuralEqual> cache_; |
363 | }; |
364 | |
365 | /*! brief The attribute key for the virtual device. This key will be promoted to first class on |
366 | * functions. For use in the parser and printer only. |
367 | * |
368 | * Type: VirtualDevice |
369 | */ |
370 | constexpr const char* kVirtualDevice = "virtual_device" ; |
371 | |
372 | } // namespace tvm |
373 | |
374 | #endif // TVM_TARGET_VIRTUAL_DEVICE_H_ |
375 | |