1/*
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing,
13 * software distributed under the License is distributed on an
14 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 * KIND, either express or implied. See the License for the
16 * specific language governing permissions and limitations
17 * under the License.
18 */
19
20/*!
21 * \file tvm/target/virtual_device.h
22 * \brief A compile time representation for where data is to be stored at runtime, and how to
23 * compile code to compute it.
24 */
25
26#ifndef TVM_TARGET_VIRTUAL_DEVICE_H_
27#define TVM_TARGET_VIRTUAL_DEVICE_H_
28
29#include <tvm/ir/transform.h>
30#include <tvm/target/target.h>
31
32#include <string>
33#include <unordered_set>
34#include <utility>
35
36namespace tvm {
37
38/*!
39 * Abstract label for an area of memory.
40 *
41 * Currently uninterpreted and arbitrary. Likely to be replaced by a structured representation
42 * of a memory pool in the future. Please try to use this alias instead of String to aid future
43 * code migration.
44 */
45using MemoryScope = String;
46
47/*!
48 * \brief Describes at compile time the constraints on where data is to be stored at runtime
49 * down to the (virtual) device and memory scope level, and how to compile code to compute that
50 * data. Used by the \p PlanDevices pass to collect and solve (virtual) device constraints for
51 * the whole Relay program.
52 *
53 * Is a quadruple of:
54 * - A \p device_type (\p DLDeviceType). May be \p kInvalidDeviceType if unconstrained.
55 * - A \p virtual_device_id (\p int). This allows us to distinguish distinct devices
56 * with the same \p Target, for example in a multi-GPU system. May be -1 if unconstrained.
57 * See "Virtual Devices" below.
58 * - A \p target (\p Target) describing how to compile code for the intended device. May be null
59 * if unconstrained.
60 * - A \p memory_scope (\p MemoryScope, which is currently just \p String) describing which memory
61 * area is to be used to hold data. May be "" if unconstrained. See "Memory Scopes and Devices"
62 * below.
63 *
64 * Some or all of these fields may be unconstrained, signaling that device planning is free to
65 * choose a value consistent with the whole program. However if a \p target is given then the \p
66 * device_type must equal \p target->GetTargetDeviceType().
67 *
68 * Note that currently we assume if a function returns its result on a particular (virtual) device
69 * then the function body is also executed on that device. See the overview comment in
70 * src/relay/transforms/device_planner.cc for more details.
71 *
72 * By 'data' we include both tensors and additional supporting datastructures such as shapes,
73 * Relay ADT items (including tuples), Relay references, and Relay closures. Typically non-tensor
74 * data must reside on a 'CPU'-like host device with good support for scalars.
75 *
76 * By 'execution' we include both (fused) primitive operators, and all the Relay expressions
77 * surrounding them which coordinates data and control flow. Again, typically non-primitive
78 * operators must be executed on a 'CPU'-like device with good support for control flow.
79 *
80 * Since TVM targets such a wide range of systems it is not possible for \p VirtualDevice to impose
81 * much semantics on these fields, particularly for \p virtual_device_id and \p memory_scope.
82 * Instead we assume downstream passes and codegen will interpret an validate these fields
83 * appropriately.
84 *
85 * Targets vs Devices
86 * ------------------
87 * Generally \p Targets (a compile-time only datastructue) describe compiler options for a specific
88 * microarchitecture and toolchain, while \p Devices (a runtime datastructure also available at
89 * compile time) describe a physical device on the target system. Obviously the target must agree
90 * with the device's microarchitecture, but we otherwise don't impose any constraints between them:
91 * - It's ok to use different \p Targets for the same \p Device, eg to squeeze some extra perf
92 * out of a particular primitive using particular compiler flags.
93 * - It's ok to use the same \p Target for multiple \p Devices, eg if we have multiple CPUs.
94 *
95 * Traditionally TVM assumes at most one \p Target per \p DLDeviceType. We are moving away from that
96 * assumption.
97 *
98 * Virtual vs Physical Devices
99 * ---------------------------
100 * The \p virtual_device_id may be used by downstream passes or the runtime to help decide which
101 * \p device_id to use for a particular physical runtime \p Device. For example:
102 * - Some runtimes may support passing in an array of actual `device` specifications, and the
103 * \p virtual_device_id can be used at runtime as an index into that array.
104 * - Some runtimes may support dynamically allocating computations to physical devices. On these
105 * systems a large space of \p virtual_device_ids could be used at compile time, even though
106 * at runtime only a few physical devices will be present.
107 *
108 * The \p virtual_device_id may also be left unconstrained if not needed.
109 *
110 * Memory Scopes and Devices
111 * -------------------------
112 * Multi-device systems can have complex memory hierarchies. For example
113 * \code
114 * (kDLCPU, 0, "llvm", "global")
115 * \endcode
116 * and
117 * \code
118 * (kDLCPU, 1, "llvm", "global")
119 * \endcode
120 * could denote:
121 * - The same memory area accessible from two separate CPUs without any CPU affinity;
122 * - Distinct memory areas in a NUMA architecture for which cross-device access is handled
123 * by the memory system;
124 * - Outright distinct memory areas, where one device cannot directly address the memory of
125 * another.
126 *
127 * Similarly:
128 * \code
129 * (kDLCPU, 0, "llvm", "global")
130 * \endcode
131 * and
132 * \code
133 * (kDLCUDA, 0, "cuda", "host")
134 * \endcode
135 * could denote the same memory area, but with very different access costs.
136 *
137 * Furthermore, not all memory scopes are accessible to all devices, and it is possible for
138 * a memory scope to only be accessible to a device when code is compiled with particular
139 * \p Target options.
140 *
141 * \p VirtualDevices themselves have no system-level understanding. Currently the \p PlanDevices
142 * pass will simply insert "device_copy" operators wherever \p VirtualDevices are not exactly
143 * pointwise equal. We may revisit this in the future as the work on memory pools matures.
144 *
145 * Joining and Defaulting
146 * ----------------------
147 * It is possible to 'join' two \p VirtualDevices to yield the most constrained \p VirtualDevice
148 * which agrees with both join arguments. Eg:
149 * \code
150 * Join((kDLCPU, -1, "llvm", ""), (kInvalidDeviceType, 3, null, "global))
151 * => (kDLCPU, 3, "llvm", "global")
152 * Join((kDLCPU, -1, "llvm", ""), (kInvalidDeviceType, 3, null, "local))
153 * => null (no join possible)
154 * \endcode
155 *
156 * Related to 'join' is 'default', which only takes constrained fields from the rhs when the
157 * lhs is unconstrained:
158 * \code
159 * Default(kDLCPU, -1, "llvm", "local"), (kDLCPU, 3, null, "global"))
160 * => (kDLCPU, 3, "llvm", "local")
161 * \endcode
162 *
163 * These operations are needed during device planning.
164 */
165
166class VirtualDeviceNode : public AttrsNode<VirtualDeviceNode> {
167 private:
168 /*!
169 * \brief The \p DLDeviceType (represented as an int) of the virtual device. If \p target is
170 * known then this will be equal to \p target->GetTargetDeviceType(). If \p target is null then
171 * the target is to be determined later.
172 *
173 * This is needed to support the legacy "on_device" and "device_copy" calls which only allow
174 * a \p DLDeviceTypes (as an integer) to be given.
175 *
176 * kInvalidDeviceType denotes unconstrained. An int since the DLDeviceType enum representation
177 * is not fixed. Private to discourage further int vs DLDeviceType confusion.
178 */
179 int /* actually DLDeviceType */ device_type_int;
180
181 public:
182 DLDeviceType device_type() const { return static_cast<DLDeviceType>(device_type_int); }
183
184 /*!
185 * \brief The device identifier for the virtual device. This must be resolved to a physical
186 * device identifier either during compilation or at runtime.
187 *
188 * -1 denotes unconstrained.
189 */
190 int virtual_device_id;
191
192 /*!
193 * \brief The \p Target describing how to compile for the virtual device.
194 *
195 * Null denotes unconstrained. Note that if a target later becomes known for this \p VirtualDevice
196 * then it must be consistent with the \p device_type if already known. This is enforced by the
197 * Join and Default methods.
198 */
199 Target target;
200
201 /*!
202 * \brief The scope of memory w.r.t. the virtual device which holds data.
203 *
204 * Empty denotes unconstrained.
205 */
206 MemoryScope memory_scope;
207
208 /*!
209 * \brief Returns true if virtual device is 'fully unconstrained', ie no target/device type,
210 * device id or memory scope is specified.
211 */
212 bool IsFullyUnconstrained() const {
213 return !target.defined() && device_type() == kInvalidDeviceType && virtual_device_id == -1 &&
214 memory_scope.empty();
215 }
216
217 /*!
218 * \brief Returns true if virtual device is 'fully constrained', ie target, device id and memory
219 * scope are all specified.
220 */
221 bool IsFullyConstrained() const {
222 return target.defined() && virtual_device_id != -1 && !memory_scope.empty();
223 }
224
225 /*!
226 * \brief Returns the (virtual) \p Device implied by this \p VirtualDevice. Both the \p
227 * device_type and \p virtual_device_must be constrained. The returned \p Device may not
228 * correspond to any physical device available at compile time or even runtime: see "Virtual vs
229 * Physical Devices" above.
230 */
231 Device ToDevice() const {
232 ICHECK(device_type() != kInvalidDeviceType);
233 ICHECK(virtual_device_id != -1);
234 Device device;
235 device.device_type = device_type();
236 device.device_id = virtual_device_id;
237 return device;
238 }
239
240 TVM_DECLARE_ATTRS(VirtualDeviceNode, "VirtualDevice") {
241 TVM_ATTR_FIELD(device_type_int)
242 .describe("The type of the virtual device.")
243 .set_default(kInvalidDeviceType);
244 TVM_ATTR_FIELD(virtual_device_id)
245 .describe("The device id of the virtual device.")
246 .set_default(-1);
247 TVM_ATTR_FIELD(target)
248 .describe("The target describing how to compile for the virtual device.")
249 .set_default(Target());
250 TVM_ATTR_FIELD(memory_scope)
251 .describe("The area of memory w.r.t. the virtual device where data is stored.")
252 .set_default("");
253 }
254
255 friend class VirtualDevice;
256};
257
258/*!
259 * \brief Managed reference class to \p VirtualDeviceNode.
260 */
261class VirtualDevice : public ObjectRef {
262 public:
263 /*!
264 * \brief Construct a virtual device.
265 * \param device_type The device type for the virtual device, or \p kInvalidDeviceType if
266 * unconstrained. If \p target is defined then must match its \p target->GetTargetDeviceType().
267 * \param virtual_device_id The device id for the virtual device, or -1 if unconstrained.
268 * \param target The target describing how to compile for the virtual device, or null if
269 * unconstrained.
270 * \param memory_scope The memory scope w.r.t. the virtual device which holds data, or "" if
271 * unconstrained.
272 * \return The virtual device.
273 */
274 explicit VirtualDevice(DLDeviceType device_type = kInvalidDeviceType, int virtual_device_id = -1,
275 Target target = {}, MemoryScope memory_scope = {});
276
277 /*! \brief Returns the unique fully unconstrained \p VirtualDevice. */
278 static VirtualDevice FullyUnconstrained();
279
280 /*!
281 * \brief Returns the \p VirtualDevice for \p device_type and (if not -1) \p virtual_device_id.
282 * The target and memory scope will be unconstrained.
283 */
284 static VirtualDevice ForDeviceType(DLDeviceType device_type, int virtual_device_id = -1) {
285 ICHECK_GT(device_type, 0);
286 return VirtualDevice(device_type, virtual_device_id);
287 }
288 static VirtualDevice ForDeviceType(int device_type, int virtual_device_id = -1) {
289 return ForDeviceType(static_cast<DLDeviceType>(device_type), virtual_device_id);
290 }
291 static VirtualDevice ForDeviceType(const Integer& device_type, int virtual_device_id = -1) {
292 return ForDeviceType(static_cast<int>(device_type->value), virtual_device_id);
293 }
294
295 /*! \brief Returns the \p VirtualDevice for \p device. */
296 static VirtualDevice ForDevice(const Device& device) {
297 return ForDeviceType(device.device_type, device.device_id);
298 }
299
300 /*! \brief Returns the \p VirtualDevice for \p device and \p target. */
301 static VirtualDevice ForDeviceAndTarget(const Device& device, Target target) {
302 return VirtualDevice(device.device_type, device.device_id, std::move(target));
303 }
304
305 /*! \brief Returns the \p VirtualDevice for \p target. */
306 static VirtualDevice ForTarget(Target target) {
307 DLDeviceType device_type = static_cast<DLDeviceType>(target->GetTargetDeviceType());
308 return VirtualDevice(device_type, /*virtual_device_id=*/0, std::move(target));
309 }
310
311 /*! \brief Returns the \p VirtualDevice for \p memory_scope alone. */
312 static VirtualDevice ForMemoryScope(MemoryScope memory_scope) {
313 return VirtualDevice(kInvalidDeviceType, -1, {}, std::move(memory_scope));
314 }
315
316 /*! \brief Returns the \p VirtualDevice for \p device, \p target and \p memory_scope. */
317 TVM_DLL static VirtualDevice ForDeviceTargetAndMemoryScope(const Device& device, Target target,
318 MemoryScope memory_scope) {
319 return VirtualDevice(device.device_type, device.device_id, std::move(target),
320 std::move(memory_scope));
321 }
322
323 /*!
324 * \brief Returns the 'join' of \p lhs and \p rhs. The result will agree pointwise with
325 * \p lhs and \p rhs on all their constrained fields. Returns the null optional if no such
326 * join exists, ie there's disagreement on at least one constrained field.
327 */
328 static Optional<VirtualDevice> Join(const VirtualDevice& lhs, const VirtualDevice& rhs);
329
330 /*!
331 * \brief Returns the 'default' of \p lhs and \p rhs. The result will be \p lhs, except any
332 * unconstrained fields in \p lhs will take their value from \p rhs. Always well-defined.
333 */
334 static VirtualDevice Default(const VirtualDevice& lhs, const VirtualDevice& rhs);
335
336 TVM_DEFINE_NOTNULLABLE_OBJECT_REF_METHODS(VirtualDevice, ObjectRef, VirtualDeviceNode);
337
338 friend class VirtualDeviceCache; // Private implementation helper.
339};
340
341/*!
342 * \brief A cache of \p VirtualDevices. This can be used:
343 * - To avoid ending up with lots of identical instances, since the space of VirtualDevices for any
344 * one compilation is very small but the number of points they need to be constructed can
345 * be very large (eg during device planning).
346 * - So we can assume \p VirtualDevices are pointer equal if and only if they are structurally
347 * equal. This simplifies the unification of 'device domains' which are built on \p VirtualDevices.
348 */
349class VirtualDeviceCache {
350 public:
351 /*! \brief Returns the unique \p VirtualDevice representing given fields. */
352 VirtualDevice Make(DLDeviceType device_type = kInvalidDeviceType, int virtual_device_id = -1,
353 Target target = {}, MemoryScope memory_scope = {});
354
355 /*!
356 * \brief Returns the unique \p VirtualDevice structurally equal to the given \p virtual_device.
357 */
358 VirtualDevice Unique(const VirtualDevice& virtual_device);
359
360 private:
361 /*! \brief Already constructed VirtualDevices. */
362 std::unordered_set<VirtualDevice, StructuralHash, StructuralEqual> cache_;
363};
364
365/*! brief The attribute key for the virtual device. This key will be promoted to first class on
366 * functions. For use in the parser and printer only.
367 *
368 * Type: VirtualDevice
369 */
370constexpr const char* kVirtualDevice = "virtual_device";
371
372} // namespace tvm
373
374#endif // TVM_TARGET_VIRTUAL_DEVICE_H_
375