1 | /* |
2 | * Licensed to the Apache Software Foundation (ASF) under one |
3 | * or more contributor license agreements. See the NOTICE file |
4 | * distributed with this work for additional information |
5 | * regarding copyright ownership. The ASF licenses this file |
6 | * to you under the Apache License, Version 2.0 (the |
7 | * "License"); you may not use this file except in compliance |
8 | * with the License. You may obtain a copy of the License at |
9 | * |
10 | * http://www.apache.org/licenses/LICENSE-2.0 |
11 | * |
12 | * Unless required by applicable law or agreed to in writing, |
13 | * software distributed under the License is distributed on an |
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
15 | * KIND, either express or implied. See the License for the |
16 | * specific language governing permissions and limitations |
17 | * under the License. |
18 | */ |
19 | |
20 | /*! |
21 | * \file tvm/runtime/vm/vm.h |
22 | * \brief The Relay virtual machine runtime. |
23 | */ |
24 | #ifndef TVM_RUNTIME_VM_VM_H_ |
25 | #define TVM_RUNTIME_VM_VM_H_ |
26 | |
27 | #include <tvm/runtime/container/closure.h> |
28 | #include <tvm/runtime/module.h> |
29 | #include <tvm/runtime/object.h> |
30 | #include <tvm/runtime/packed_func.h> |
31 | #include <tvm/runtime/registry.h> |
32 | #include <tvm/runtime/vm/bytecode.h> |
33 | #include <tvm/runtime/vm/executable.h> |
34 | #include <tvm/runtime/vm/memory_manager.h> |
35 | |
36 | #include <memory> |
37 | #include <string> |
38 | #include <unordered_map> |
39 | #include <utility> |
40 | #include <vector> |
41 | |
42 | namespace tvm { |
43 | namespace runtime { |
44 | namespace vm { |
45 | |
46 | /*! |
47 | * \brief An object representing a vm closure. |
48 | */ |
49 | class VMClosureObj : public ClosureObj { |
50 | public: |
51 | /*! |
52 | * \brief The index into the function list. The function could be any |
53 | * function object that is compatible to the VM runtime. |
54 | */ |
55 | size_t func_index; |
56 | /*! \brief The free variables of the closure. */ |
57 | std::vector<ObjectRef> free_vars; |
58 | |
59 | static constexpr const uint32_t _type_index = TypeIndex::kDynamic; |
60 | static constexpr const char* _type_key = "vm.Closure" ; |
61 | TVM_DECLARE_FINAL_OBJECT_INFO(VMClosureObj, ClosureObj); |
62 | }; |
63 | |
64 | /*! \brief reference to closure. */ |
65 | class VMClosure : public Closure { |
66 | public: |
67 | VMClosure(size_t func_index, std::vector<ObjectRef> free_vars); |
68 | TVM_DEFINE_OBJECT_REF_METHODS(VMClosure, Closure, VMClosureObj); |
69 | }; |
70 | |
71 | /*! |
72 | * \brief A representation of a Relay function in the VM. |
73 | * |
74 | * Contains metadata about the compiled function, as |
75 | * well as the compiled VM instructions. |
76 | */ |
77 | struct VMFunction { |
78 | /*! \brief The function's name. */ |
79 | std::string name; |
80 | /*! \brief The function parameter names. */ |
81 | std::vector<std::string> params; |
82 | /*! \brief The instructions representing the function. */ |
83 | std::vector<Instruction> instructions; |
84 | /*! \brief The size of the frame for this function */ |
85 | Index register_file_size = 0; |
86 | /*! \brief The indexes for the device holding each function parameter. */ |
87 | std::vector<Index> param_device_indexes; |
88 | |
89 | VMFunction(std::string name, std::vector<std::string> params, |
90 | std::vector<Instruction> instructions, Index register_file_size, |
91 | std::vector<Index> param_device_indexes) |
92 | : name(std::move(name)), |
93 | params(std::move(params)), |
94 | instructions(std::move(instructions)), |
95 | register_file_size(register_file_size), |
96 | param_device_indexes(std::move(param_device_indexes)) { |
97 | ICHECK_EQ(this->params.size(), this->param_device_indexes.size()); |
98 | } |
99 | |
100 | VMFunction() = default; |
101 | |
102 | friend std::ostream& operator<<(std::ostream& os, const VMFunction&); |
103 | }; |
104 | |
105 | /*! |
106 | * \brief A representation of a stack frame. |
107 | * |
108 | * A stack frame is a record containing the information needed |
109 | * to restore the caller's virtual machine state after returning |
110 | * from a function call. |
111 | */ |
112 | struct VMFrame { |
113 | /*! \brief The return program counter. */ |
114 | Index pc; |
115 | /*! \brief The index into the function table, points to the caller. */ |
116 | Index func_index; |
117 | /*! \brief The number of arguments. */ |
118 | Index args; |
119 | /*! \brief A pointer into the caller function's instructions. */ |
120 | const Instruction* code; |
121 | |
122 | /*! \brief Statically allocated space for objects */ |
123 | std::vector<ObjectRef> register_file; |
124 | |
125 | /*! \brief Register in caller's frame to put return value */ |
126 | RegName caller_return_register; |
127 | |
128 | VMFrame(Index pc, Index func_index, Index args, const Instruction* code, Index register_file_size) |
129 | : pc(pc), |
130 | func_index(func_index), |
131 | args(args), |
132 | code(code), |
133 | register_file(register_file_size), |
134 | caller_return_register(0) {} |
135 | }; |
136 | |
137 | /*! |
138 | * \brief The virtual machine. |
139 | * |
140 | * The virtual machine contains all the current execution state, |
141 | * as well as the executable. |
142 | * |
143 | * The goal is to have a single self-contained object, |
144 | * enabling one to easily pass around VMs, execute them on |
145 | * multiple threads, or serialize them to disk or over the |
146 | * wire. |
147 | */ |
148 | class TVM_DLL VirtualMachine : public runtime::ModuleNode { |
149 | public: |
150 | /*! |
151 | * \brief Get a PackedFunc from module. |
152 | * |
153 | * The PackedFunc may not be fully initialized, |
154 | * there might still be first time running overhead when |
155 | * executing the function on certain devices. |
156 | * For benchmarking, use prepare to eliminate |
157 | * |
158 | * \param name the name of the function. |
159 | * \param sptr_to_self The shared_ptr that points to this module node. |
160 | * |
161 | * \return PackedFunc(nullptr) when it is not available. |
162 | * |
163 | * \note The function will always remain valid. |
164 | * If the function needs resource from the module(e.g. late linking), |
165 | * it should capture sptr_to_self. |
166 | */ |
167 | virtual PackedFunc GetFunction(const std::string& name, const ObjectPtr<Object>& sptr_to_self); |
168 | |
169 | virtual ~VirtualMachine() {} |
170 | |
171 | const char* type_key() const final { return "VirtualMachine" ; } |
172 | |
173 | VirtualMachine() : frames_(), func_index_(0), code_(nullptr), pc_(0), exec_(nullptr) {} |
174 | |
175 | /*! |
176 | * \brief load the executable for the virtual machine. |
177 | * \param exec The executable. |
178 | */ |
179 | virtual void LoadExecutable(const ObjectPtr<Executable>& exec); |
180 | |
181 | protected: |
182 | /*! \brief Push a call frame on to the call stack. */ |
183 | void PushFrame(Index arg_count, Index ret_pc, const VMFunction& vm_func); |
184 | |
185 | /*! |
186 | * \brief Pop a frame off the call stack. |
187 | * \return The number of frames left. |
188 | */ |
189 | Index PopFrame(); |
190 | |
191 | /*! |
192 | * \brief Write to a VM register. |
193 | * \param reg The register to write to. |
194 | * \param obj The object to write to. |
195 | */ |
196 | inline void WriteRegister(RegName reg, const ObjectRef& obj); |
197 | |
198 | /*! |
199 | * \brief Read a VM register. |
200 | * \param reg The register to read from. |
201 | * \return The read object. |
202 | */ |
203 | ObjectRef ReadRegister(RegName reg) const; |
204 | |
205 | /*! |
206 | * \brief Read a VM register and cast it to int32_t |
207 | * \param reg The register to read from. |
208 | * \return The read scalar. |
209 | */ |
210 | int64_t LoadScalarInt(RegName reg) const; |
211 | |
212 | /*! |
213 | * \brief Invoke a VM function. |
214 | * \param func The function. |
215 | * \param args The arguments to the function. |
216 | * \return The object representing the result. |
217 | */ |
218 | ObjectRef Invoke(const VMFunction& func, const std::vector<ObjectRef>& args); |
219 | |
220 | // TODO(@jroesch): I really would like this to be a global variable. |
221 | /*! |
222 | * \brief Invoke a VM function by name. |
223 | * \param name The function's name. |
224 | * \param args The arguments to the function. |
225 | * \return The object representing the result. |
226 | */ |
227 | ObjectRef Invoke(const std::string& name, const std::vector<ObjectRef>& args); |
228 | |
229 | /*! |
230 | * \brief Invoke a VM function. |
231 | * \param func The function. |
232 | * \param input_args The input arguments to the function. |
233 | * \param output_args The pre-allocated output arguments of the function. |
234 | * \return The object(s) representing the result. |
235 | */ |
236 | ObjectRef Invoke(const VMFunction& func, const std::vector<ObjectRef>& input_args, |
237 | const std::vector<ObjectRef>& output_args); |
238 | |
239 | /*! |
240 | * \brief Invoke a PackedFunction |
241 | * |
242 | * \param packed_index The offset of the PackedFunction in all functions. |
243 | * \param func The PackedFunction to be invoked. |
244 | * \param arg_count The number of arguments to the PackedFunction. |
245 | * \param output_size The number of outputs of the PackedFunction. |
246 | * \param args Arguments to the PackedFunction. |
247 | * |
248 | * \note The return value will be stored in the last output_size slots of args. |
249 | */ |
250 | virtual void InvokePacked(Index packed_index, const PackedFunc& func, Index arg_count, |
251 | Index output_size, const std::vector<ObjectRef>& args); |
252 | |
253 | /*! |
254 | * \brief Initialize the virtual machine for a set of (physical) devices. |
255 | * \param physical_devices The set of TVM devices. |
256 | * \param alloc_types The allocator types for each device. |
257 | */ |
258 | void Init(const std::vector<Device>& physical_devices, |
259 | const std::vector<AllocatorType>& alloc_types); |
260 | |
261 | /*! \brief Run VM dispatch loop. */ |
262 | void RunLoop(const std::vector<Index>& output_tensor_reg_indices = {}); |
263 | |
264 | /*! \brief Get device from the device list based on a given device index. */ |
265 | Device GetDevice(Index device_index) const; |
266 | Allocator* GetAllocator(Index device_index) const; |
267 | |
268 | /*! |
269 | * \brief Invoke a global setting up the VM state to execute. |
270 | * |
271 | * This does not begin execution of the VM. |
272 | */ |
273 | void InvokeGlobal(const VMFunction& func, const std::vector<ObjectRef>& args); |
274 | |
275 | /*! |
276 | * \brief Set inputs to a function. |
277 | * \param name The function name |
278 | * \param args args[offset:] are arguments to the |
279 | * function. If the arguments are not of the correct device for the function, |
280 | * they will be copied to the device. |
281 | * \param offset Starting offset of the arguments in `args`. |
282 | */ |
283 | void SetInput(std::string name, TVMArgs args, int offset); |
284 | |
285 | /*! |
286 | * \brief Set one input tensor with index or name to a function. |
287 | * \param name The function name. |
288 | * \param tag index or name of the input tensor . |
289 | * \param tensor the input tensor. If the tensor is not of the correct device for the function, |
290 | * they will be copied to the device. |
291 | */ |
292 | void SetOneInput(std::string name, const TVMArgValue& tag, const TVMArgValue& tensor); |
293 | |
294 | /*! |
295 | * \brief Set pre-allocated output tensors to a function. |
296 | * It is native implementation of 'set_outputs' python method. |
297 | * It is used in scenario when output tensors are allocated outside each invocation. |
298 | * Note: it sets set_outputs_enabled_[name] true and fill outputs_[name] |
299 | * but after invocation the first is switched off and the second is cleared |
300 | * \param name The function name |
301 | * \param args outputs to the function. |
302 | */ |
303 | void SetOutputs(std::string name, TVMArgs args); |
304 | |
305 | /*! |
306 | * \brief Preparation part of Invoke method before RunLoop. |
307 | * \param func the function. |
308 | * \param args input args |
309 | */ |
310 | void PrintInfoAndSetInputArgs(const VMFunction& func, const std::vector<ObjectRef>& args); |
311 | |
312 | /*! |
313 | * \brief Set pre-allocated outputs to register for specified function. |
314 | * \param func_name The function's name. |
315 | * \param outputs set of output tensors. |
316 | */ |
317 | void SetOutputTensorsToRegister(const std::string& func_name, |
318 | const std::vector<ObjectRef>& outputs); |
319 | |
320 | /*! |
321 | * \brief Internal hook for profiling the start of an op. |
322 | * |
323 | * This hook is only called on certain ops that are likely to take a |
324 | * significant amount of runtime (normally because they alloc or transfer to |
325 | * device). |
326 | * |
327 | * \param instr Instruction that will be executed after this hook fires |
328 | */ |
329 | virtual void OpStartHook(Instruction instr); |
330 | |
331 | /*! |
332 | * \brief Internal hook for profiling the end of an op. |
333 | */ |
334 | virtual void OpStopHook(); |
335 | |
336 | private: |
337 | /*! |
338 | * \brief Get index of input tensor from its name. |
339 | * \param func_name The function's name. |
340 | * \param input_name The input tensor name. |
341 | * \return The input tensor index. |
342 | */ |
343 | int64_t GetInputIndexFromVMFunction(const std::string& func_name, |
344 | const std::string& input_name) const; |
345 | |
346 | /*! |
347 | * \brief Get index of input tensor from its name. |
348 | * \param params parameter names. |
349 | * \param input_name The input tensor name. |
350 | * \return The input tensor index. |
351 | */ |
352 | int64_t GetInputIndexFromName(const std::vector<std::string>& params, |
353 | const std::string& input_name) const; |
354 | |
355 | /*! |
356 | * \brief Check executable exists and get VM function from it. |
357 | * \param func_name The function's name. |
358 | * \return VM function. |
359 | */ |
360 | const VMFunction& CheckAndGetVMFunction(const std::string& func_name) const; |
361 | |
362 | /*! |
363 | * \brief Creats inputs_ field, if it exists check its size. |
364 | * \param func_name The function's name. |
365 | * \param size inputs_ field size. |
366 | * \return VM function. |
367 | */ |
368 | void CreateInputsOrCheckSize(const std::string& func_name, size_t size); |
369 | |
370 | /*! |
371 | * \brief Set one input tensor with given index to set of input tensors if need copy to given |
372 | * device. \param tensors the input tensors set (destination) \param tensor some tensor (not |
373 | * necessary DLTensor). \param index The input tensor index. \param dev device to copy if need. |
374 | */ |
375 | void SetInputTensorWithIndex(std::vector<ObjectRef>& tensors, // NOLINT(*) |
376 | const TVMArgValue& tensor, int index, Device dev); |
377 | |
378 | /*! |
379 | * \brief Convert tensor from TVMArgValue to ObjectRef. |
380 | * DLTensor and NDArray types are supported. |
381 | * \param tensor given arg value containing tensor. |
382 | * \return tensor in ObjectRef format |
383 | */ |
384 | ObjectRef TensorFromTVMArgValueToObjectRef(const TVMArgValue& tensor) const; |
385 | |
386 | /*! |
387 | * \brief Get index of outputs in register_file from func code |
388 | * \return result register index |
389 | */ |
390 | Index GetResultRegisterIndex() const; |
391 | |
392 | /*! |
393 | * \brief Calculate the index of operation which destination is result |
394 | * \param res_index is the index of op returning result |
395 | */ |
396 | void CalculatePreResultOpIndex(Index res_index); |
397 | |
398 | /*! |
399 | * \brief Get indices from register_file for output tensors. |
400 | * It helps to replace output tensors allocated in RunLoop by |
401 | * tensors pre-allocated outside. Scenario is when `set_output` is used |
402 | * \return indices from register_file for output tensors. |
403 | */ |
404 | std::vector<Index> GetOutputTensorRegIndices(); |
405 | |
406 | /*! |
407 | * \brief Write new allocated tensor to register_file of frame. |
408 | * \param instr current instruction containing shape and storage info. |
409 | */ |
410 | void WriteAllocatedTensor(const Instruction& instr); |
411 | |
412 | /*! |
413 | * \brief 'set_outputs_enabled' is assumed true for using this method. |
414 | * It is expected that result register has already contained tensor from outside, |
415 | * new memory is not allocated and write, but expected shape and data type are checked. |
416 | * For other register WriteAllocatedTensor method is used. |
417 | * \param instr current instruction containing shape and storage info. |
418 | */ |
419 | void WriteAllocatedTensorFromOutside(const Instruction& instr); |
420 | |
421 | bool FindIndex(const std::vector<Index>& indices, Index val) const; |
422 | |
423 | protected: |
424 | /*! \brief The virtual machine's packed function table. */ |
425 | std::vector<PackedFunc> packed_funcs_; |
426 | /*! \brief The current stack of call frames. */ |
427 | std::vector<VMFrame> frames_; |
428 | /*! \brief The fuction table index of the current function. */ |
429 | Index func_index_; |
430 | /*! \brief The current pointer to the code section. */ |
431 | const Instruction* code_; |
432 | /*! \brief The virtual machine PC. */ |
433 | Index pc_; |
434 | /*! \brief The special return register. */ |
435 | ObjectRef return_register_; |
436 | /*! \brief The executable the VM will operate on. */ |
437 | ObjectPtr<Executable> exec_; |
438 | /*! \brief The function name to inputs mapping. */ |
439 | std::unordered_map<std::string, std::vector<ObjectRef>> inputs_; |
440 | /*! \brief The function name to flag enabling scenario with set outputs. */ |
441 | std::unordered_map<std::string, bool> set_outputs_enabled_; |
442 | /*! \brief The index of operation which destination is result. */ |
443 | Index preresult_op_index_ = -1; |
444 | /*! \brief The function name to indices of output tensors in register file. */ |
445 | std::unordered_map<std::string, std::vector<Index>> output_tensor_reg_indices_; |
446 | /*! \brief The function name to pre-allocated outputs mapping. */ |
447 | std::unordered_map<std::string, std::vector<ObjectRef>> outputs_; |
448 | /*! |
449 | * \brief The "physical" devices the VM can execute primitives on. All "device indexes" |
450 | * are w.r.t. this vector. Each entry in this vector must match the corresponding entry |
451 | * in the executable's "virtual" devices vector. |
452 | */ |
453 | std::vector<Device> devices_; |
454 | /*! \brief The cached memory allocators, one per device. */ |
455 | std::vector<Allocator*> allocators_; |
456 | /*! |
457 | * \brief The constant pool for runtime. It caches the device dependent |
458 | * object to avoid rellocation of constants during inference. |
459 | */ |
460 | std::vector<ObjectRef> const_pool_; |
461 | }; |
462 | |
463 | } // namespace vm |
464 | } // namespace runtime |
465 | } // namespace tvm |
466 | |
467 | #endif // TVM_RUNTIME_VM_VM_H_ |
468 | |