1/*
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing,
13 * software distributed under the License is distributed on an
14 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 * KIND, either express or implied. See the License for the
16 * specific language governing permissions and limitations
17 * under the License.
18 */
19
20/*!
21 * \file tvm/runtime/vm/vm.h
22 * \brief The Relay virtual machine runtime.
23 */
24#ifndef TVM_RUNTIME_VM_VM_H_
25#define TVM_RUNTIME_VM_VM_H_
26
27#include <tvm/runtime/container/closure.h>
28#include <tvm/runtime/module.h>
29#include <tvm/runtime/object.h>
30#include <tvm/runtime/packed_func.h>
31#include <tvm/runtime/registry.h>
32#include <tvm/runtime/vm/bytecode.h>
33#include <tvm/runtime/vm/executable.h>
34#include <tvm/runtime/vm/memory_manager.h>
35
36#include <memory>
37#include <string>
38#include <unordered_map>
39#include <utility>
40#include <vector>
41
42namespace tvm {
43namespace runtime {
44namespace vm {
45
46/*!
47 * \brief An object representing a vm closure.
48 */
49class VMClosureObj : public ClosureObj {
50 public:
51 /*!
52 * \brief The index into the function list. The function could be any
53 * function object that is compatible to the VM runtime.
54 */
55 size_t func_index;
56 /*! \brief The free variables of the closure. */
57 std::vector<ObjectRef> free_vars;
58
59 static constexpr const uint32_t _type_index = TypeIndex::kDynamic;
60 static constexpr const char* _type_key = "vm.Closure";
61 TVM_DECLARE_FINAL_OBJECT_INFO(VMClosureObj, ClosureObj);
62};
63
64/*! \brief reference to closure. */
65class VMClosure : public Closure {
66 public:
67 VMClosure(size_t func_index, std::vector<ObjectRef> free_vars);
68 TVM_DEFINE_OBJECT_REF_METHODS(VMClosure, Closure, VMClosureObj);
69};
70
71/*!
72 * \brief A representation of a Relay function in the VM.
73 *
74 * Contains metadata about the compiled function, as
75 * well as the compiled VM instructions.
76 */
77struct VMFunction {
78 /*! \brief The function's name. */
79 std::string name;
80 /*! \brief The function parameter names. */
81 std::vector<std::string> params;
82 /*! \brief The instructions representing the function. */
83 std::vector<Instruction> instructions;
84 /*! \brief The size of the frame for this function */
85 Index register_file_size = 0;
86 /*! \brief The indexes for the device holding each function parameter. */
87 std::vector<Index> param_device_indexes;
88
89 VMFunction(std::string name, std::vector<std::string> params,
90 std::vector<Instruction> instructions, Index register_file_size,
91 std::vector<Index> param_device_indexes)
92 : name(std::move(name)),
93 params(std::move(params)),
94 instructions(std::move(instructions)),
95 register_file_size(register_file_size),
96 param_device_indexes(std::move(param_device_indexes)) {
97 ICHECK_EQ(this->params.size(), this->param_device_indexes.size());
98 }
99
100 VMFunction() = default;
101
102 friend std::ostream& operator<<(std::ostream& os, const VMFunction&);
103};
104
105/*!
106 * \brief A representation of a stack frame.
107 *
108 * A stack frame is a record containing the information needed
109 * to restore the caller's virtual machine state after returning
110 * from a function call.
111 */
112struct VMFrame {
113 /*! \brief The return program counter. */
114 Index pc;
115 /*! \brief The index into the function table, points to the caller. */
116 Index func_index;
117 /*! \brief The number of arguments. */
118 Index args;
119 /*! \brief A pointer into the caller function's instructions. */
120 const Instruction* code;
121
122 /*! \brief Statically allocated space for objects */
123 std::vector<ObjectRef> register_file;
124
125 /*! \brief Register in caller's frame to put return value */
126 RegName caller_return_register;
127
128 VMFrame(Index pc, Index func_index, Index args, const Instruction* code, Index register_file_size)
129 : pc(pc),
130 func_index(func_index),
131 args(args),
132 code(code),
133 register_file(register_file_size),
134 caller_return_register(0) {}
135};
136
137/*!
138 * \brief The virtual machine.
139 *
140 * The virtual machine contains all the current execution state,
141 * as well as the executable.
142 *
143 * The goal is to have a single self-contained object,
144 * enabling one to easily pass around VMs, execute them on
145 * multiple threads, or serialize them to disk or over the
146 * wire.
147 */
148class TVM_DLL VirtualMachine : public runtime::ModuleNode {
149 public:
150 /*!
151 * \brief Get a PackedFunc from module.
152 *
153 * The PackedFunc may not be fully initialized,
154 * there might still be first time running overhead when
155 * executing the function on certain devices.
156 * For benchmarking, use prepare to eliminate
157 *
158 * \param name the name of the function.
159 * \param sptr_to_self The shared_ptr that points to this module node.
160 *
161 * \return PackedFunc(nullptr) when it is not available.
162 *
163 * \note The function will always remain valid.
164 * If the function needs resource from the module(e.g. late linking),
165 * it should capture sptr_to_self.
166 */
167 virtual PackedFunc GetFunction(const std::string& name, const ObjectPtr<Object>& sptr_to_self);
168
169 virtual ~VirtualMachine() {}
170
171 const char* type_key() const final { return "VirtualMachine"; }
172
173 VirtualMachine() : frames_(), func_index_(0), code_(nullptr), pc_(0), exec_(nullptr) {}
174
175 /*!
176 * \brief load the executable for the virtual machine.
177 * \param exec The executable.
178 */
179 virtual void LoadExecutable(const ObjectPtr<Executable>& exec);
180
181 protected:
182 /*! \brief Push a call frame on to the call stack. */
183 void PushFrame(Index arg_count, Index ret_pc, const VMFunction& vm_func);
184
185 /*!
186 * \brief Pop a frame off the call stack.
187 * \return The number of frames left.
188 */
189 Index PopFrame();
190
191 /*!
192 * \brief Write to a VM register.
193 * \param reg The register to write to.
194 * \param obj The object to write to.
195 */
196 inline void WriteRegister(RegName reg, const ObjectRef& obj);
197
198 /*!
199 * \brief Read a VM register.
200 * \param reg The register to read from.
201 * \return The read object.
202 */
203 ObjectRef ReadRegister(RegName reg) const;
204
205 /*!
206 * \brief Read a VM register and cast it to int32_t
207 * \param reg The register to read from.
208 * \return The read scalar.
209 */
210 int64_t LoadScalarInt(RegName reg) const;
211
212 /*!
213 * \brief Invoke a VM function.
214 * \param func The function.
215 * \param args The arguments to the function.
216 * \return The object representing the result.
217 */
218 ObjectRef Invoke(const VMFunction& func, const std::vector<ObjectRef>& args);
219
220 // TODO(@jroesch): I really would like this to be a global variable.
221 /*!
222 * \brief Invoke a VM function by name.
223 * \param name The function's name.
224 * \param args The arguments to the function.
225 * \return The object representing the result.
226 */
227 ObjectRef Invoke(const std::string& name, const std::vector<ObjectRef>& args);
228
229 /*!
230 * \brief Invoke a VM function.
231 * \param func The function.
232 * \param input_args The input arguments to the function.
233 * \param output_args The pre-allocated output arguments of the function.
234 * \return The object(s) representing the result.
235 */
236 ObjectRef Invoke(const VMFunction& func, const std::vector<ObjectRef>& input_args,
237 const std::vector<ObjectRef>& output_args);
238
239 /*!
240 * \brief Invoke a PackedFunction
241 *
242 * \param packed_index The offset of the PackedFunction in all functions.
243 * \param func The PackedFunction to be invoked.
244 * \param arg_count The number of arguments to the PackedFunction.
245 * \param output_size The number of outputs of the PackedFunction.
246 * \param args Arguments to the PackedFunction.
247 *
248 * \note The return value will be stored in the last output_size slots of args.
249 */
250 virtual void InvokePacked(Index packed_index, const PackedFunc& func, Index arg_count,
251 Index output_size, const std::vector<ObjectRef>& args);
252
253 /*!
254 * \brief Initialize the virtual machine for a set of (physical) devices.
255 * \param physical_devices The set of TVM devices.
256 * \param alloc_types The allocator types for each device.
257 */
258 void Init(const std::vector<Device>& physical_devices,
259 const std::vector<AllocatorType>& alloc_types);
260
261 /*! \brief Run VM dispatch loop. */
262 void RunLoop(const std::vector<Index>& output_tensor_reg_indices = {});
263
264 /*! \brief Get device from the device list based on a given device index. */
265 Device GetDevice(Index device_index) const;
266 Allocator* GetAllocator(Index device_index) const;
267
268 /*!
269 * \brief Invoke a global setting up the VM state to execute.
270 *
271 * This does not begin execution of the VM.
272 */
273 void InvokeGlobal(const VMFunction& func, const std::vector<ObjectRef>& args);
274
275 /*!
276 * \brief Set inputs to a function.
277 * \param name The function name
278 * \param args args[offset:] are arguments to the
279 * function. If the arguments are not of the correct device for the function,
280 * they will be copied to the device.
281 * \param offset Starting offset of the arguments in `args`.
282 */
283 void SetInput(std::string name, TVMArgs args, int offset);
284
285 /*!
286 * \brief Set one input tensor with index or name to a function.
287 * \param name The function name.
288 * \param tag index or name of the input tensor .
289 * \param tensor the input tensor. If the tensor is not of the correct device for the function,
290 * they will be copied to the device.
291 */
292 void SetOneInput(std::string name, const TVMArgValue& tag, const TVMArgValue& tensor);
293
294 /*!
295 * \brief Set pre-allocated output tensors to a function.
296 * It is native implementation of 'set_outputs' python method.
297 * It is used in scenario when output tensors are allocated outside each invocation.
298 * Note: it sets set_outputs_enabled_[name] true and fill outputs_[name]
299 * but after invocation the first is switched off and the second is cleared
300 * \param name The function name
301 * \param args outputs to the function.
302 */
303 void SetOutputs(std::string name, TVMArgs args);
304
305 /*!
306 * \brief Preparation part of Invoke method before RunLoop.
307 * \param func the function.
308 * \param args input args
309 */
310 void PrintInfoAndSetInputArgs(const VMFunction& func, const std::vector<ObjectRef>& args);
311
312 /*!
313 * \brief Set pre-allocated outputs to register for specified function.
314 * \param func_name The function's name.
315 * \param outputs set of output tensors.
316 */
317 void SetOutputTensorsToRegister(const std::string& func_name,
318 const std::vector<ObjectRef>& outputs);
319
320 /*!
321 * \brief Internal hook for profiling the start of an op.
322 *
323 * This hook is only called on certain ops that are likely to take a
324 * significant amount of runtime (normally because they alloc or transfer to
325 * device).
326 *
327 * \param instr Instruction that will be executed after this hook fires
328 */
329 virtual void OpStartHook(Instruction instr);
330
331 /*!
332 * \brief Internal hook for profiling the end of an op.
333 */
334 virtual void OpStopHook();
335
336 private:
337 /*!
338 * \brief Get index of input tensor from its name.
339 * \param func_name The function's name.
340 * \param input_name The input tensor name.
341 * \return The input tensor index.
342 */
343 int64_t GetInputIndexFromVMFunction(const std::string& func_name,
344 const std::string& input_name) const;
345
346 /*!
347 * \brief Get index of input tensor from its name.
348 * \param params parameter names.
349 * \param input_name The input tensor name.
350 * \return The input tensor index.
351 */
352 int64_t GetInputIndexFromName(const std::vector<std::string>& params,
353 const std::string& input_name) const;
354
355 /*!
356 * \brief Check executable exists and get VM function from it.
357 * \param func_name The function's name.
358 * \return VM function.
359 */
360 const VMFunction& CheckAndGetVMFunction(const std::string& func_name) const;
361
362 /*!
363 * \brief Creats inputs_ field, if it exists check its size.
364 * \param func_name The function's name.
365 * \param size inputs_ field size.
366 * \return VM function.
367 */
368 void CreateInputsOrCheckSize(const std::string& func_name, size_t size);
369
370 /*!
371 * \brief Set one input tensor with given index to set of input tensors if need copy to given
372 * device. \param tensors the input tensors set (destination) \param tensor some tensor (not
373 * necessary DLTensor). \param index The input tensor index. \param dev device to copy if need.
374 */
375 void SetInputTensorWithIndex(std::vector<ObjectRef>& tensors, // NOLINT(*)
376 const TVMArgValue& tensor, int index, Device dev);
377
378 /*!
379 * \brief Convert tensor from TVMArgValue to ObjectRef.
380 * DLTensor and NDArray types are supported.
381 * \param tensor given arg value containing tensor.
382 * \return tensor in ObjectRef format
383 */
384 ObjectRef TensorFromTVMArgValueToObjectRef(const TVMArgValue& tensor) const;
385
386 /*!
387 * \brief Get index of outputs in register_file from func code
388 * \return result register index
389 */
390 Index GetResultRegisterIndex() const;
391
392 /*!
393 * \brief Calculate the index of operation which destination is result
394 * \param res_index is the index of op returning result
395 */
396 void CalculatePreResultOpIndex(Index res_index);
397
398 /*!
399 * \brief Get indices from register_file for output tensors.
400 * It helps to replace output tensors allocated in RunLoop by
401 * tensors pre-allocated outside. Scenario is when `set_output` is used
402 * \return indices from register_file for output tensors.
403 */
404 std::vector<Index> GetOutputTensorRegIndices();
405
406 /*!
407 * \brief Write new allocated tensor to register_file of frame.
408 * \param instr current instruction containing shape and storage info.
409 */
410 void WriteAllocatedTensor(const Instruction& instr);
411
412 /*!
413 * \brief 'set_outputs_enabled' is assumed true for using this method.
414 * It is expected that result register has already contained tensor from outside,
415 * new memory is not allocated and write, but expected shape and data type are checked.
416 * For other register WriteAllocatedTensor method is used.
417 * \param instr current instruction containing shape and storage info.
418 */
419 void WriteAllocatedTensorFromOutside(const Instruction& instr);
420
421 bool FindIndex(const std::vector<Index>& indices, Index val) const;
422
423 protected:
424 /*! \brief The virtual machine's packed function table. */
425 std::vector<PackedFunc> packed_funcs_;
426 /*! \brief The current stack of call frames. */
427 std::vector<VMFrame> frames_;
428 /*! \brief The fuction table index of the current function. */
429 Index func_index_;
430 /*! \brief The current pointer to the code section. */
431 const Instruction* code_;
432 /*! \brief The virtual machine PC. */
433 Index pc_;
434 /*! \brief The special return register. */
435 ObjectRef return_register_;
436 /*! \brief The executable the VM will operate on. */
437 ObjectPtr<Executable> exec_;
438 /*! \brief The function name to inputs mapping. */
439 std::unordered_map<std::string, std::vector<ObjectRef>> inputs_;
440 /*! \brief The function name to flag enabling scenario with set outputs. */
441 std::unordered_map<std::string, bool> set_outputs_enabled_;
442 /*! \brief The index of operation which destination is result. */
443 Index preresult_op_index_ = -1;
444 /*! \brief The function name to indices of output tensors in register file. */
445 std::unordered_map<std::string, std::vector<Index>> output_tensor_reg_indices_;
446 /*! \brief The function name to pre-allocated outputs mapping. */
447 std::unordered_map<std::string, std::vector<ObjectRef>> outputs_;
448 /*!
449 * \brief The "physical" devices the VM can execute primitives on. All "device indexes"
450 * are w.r.t. this vector. Each entry in this vector must match the corresponding entry
451 * in the executable's "virtual" devices vector.
452 */
453 std::vector<Device> devices_;
454 /*! \brief The cached memory allocators, one per device. */
455 std::vector<Allocator*> allocators_;
456 /*!
457 * \brief The constant pool for runtime. It caches the device dependent
458 * object to avoid rellocation of constants during inference.
459 */
460 std::vector<ObjectRef> const_pool_;
461};
462
463} // namespace vm
464} // namespace runtime
465} // namespace tvm
466
467#endif // TVM_RUNTIME_VM_VM_H_
468