1 | /* |
2 | * Licensed to the Apache Software Foundation (ASF) under one |
3 | * or more contributor license agreements. See the NOTICE file |
4 | * distributed with this work for additional information |
5 | * regarding copyright ownership. The ASF licenses this file |
6 | * to you under the Apache License, Version 2.0 (the |
7 | * "License"); you may not use this file except in compliance |
8 | * with the License. You may obtain a copy of the License at |
9 | * |
10 | * http://www.apache.org/licenses/LICENSE-2.0 |
11 | * |
12 | * Unless required by applicable law or agreed to in writing, |
13 | * software distributed under the License is distributed on an |
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
15 | * KIND, either express or implied. See the License for the |
16 | * specific language governing permissions and limitations |
17 | * under the License. |
18 | */ |
19 | |
20 | /*! |
21 | * \file tvm/runtime/ndarray.h |
22 | * \brief A device-independent managed NDArray abstraction. |
23 | */ |
24 | #ifndef TVM_RUNTIME_NDARRAY_H_ |
25 | #define TVM_RUNTIME_NDARRAY_H_ |
26 | |
27 | #include <tvm/runtime/c_runtime_api.h> |
28 | #include <tvm/runtime/container/optional.h> |
29 | #include <tvm/runtime/container/shape_tuple.h> |
30 | #include <tvm/runtime/container/string.h> |
31 | #include <tvm/runtime/data_type.h> |
32 | #include <tvm/runtime/object.h> |
33 | #include <tvm/runtime/serializer.h> |
34 | |
35 | #include <atomic> |
36 | #include <functional> |
37 | #include <utility> |
38 | #include <vector> |
39 | |
40 | namespace tvm { |
41 | |
42 | // alias DLDevice |
43 | using Device = DLDevice; |
44 | |
45 | // A 'null' device type, does not correspond to any DLDeviceType enum. |
46 | // TODO(mbs): This is to help us as we transition away from representing the 'homogenous' case |
47 | // as a singleton target map indexed by the invalid DLDeviceType '0'. |
48 | constexpr DLDeviceType kNullDeviceType = static_cast<DLDeviceType>(0); |
49 | |
50 | // An 'invalid' device type, does not correspond to any DLDeviceType enum. |
51 | constexpr DLDeviceType kInvalidDeviceType = static_cast<DLDeviceType>(-1); |
52 | |
53 | namespace runtime { |
54 | |
55 | /*! |
56 | * \brief Managed NDArray. |
57 | * The array is backed by reference counted blocks. |
58 | */ |
59 | class NDArray : public ObjectRef { |
60 | public: |
61 | /*! \brief ContainerBase used to back the TVMArrayHandle */ |
62 | class ContainerBase; |
63 | /*! \brief NDArray internal container type */ |
64 | class Container; |
65 | /*! \brief Container type for Object system. */ |
66 | using ContainerType = Container; |
67 | /*! \brief default constructor */ |
68 | NDArray() {} |
69 | /*! |
70 | * \brief constructor. |
71 | * \param data ObjectPtr to the data container. |
72 | */ |
73 | explicit NDArray(ObjectPtr<Object> data) : ObjectRef(data) {} |
74 | |
75 | /*! \brief reset the content of NDArray to be nullptr */ |
76 | inline void reset(); |
77 | /*! |
78 | * \return the reference counter |
79 | * \note this number is approximate in multi-threaded setting. |
80 | */ |
81 | inline int use_count() const; |
82 | /*! \return Pointer to content of DLTensor */ |
83 | inline const DLTensor* operator->() const; |
84 | /*! \return Whether the tensor is contiguous */ |
85 | inline bool IsContiguous() const; |
86 | /*! |
87 | * \brief Copy data content from another array. |
88 | * \param other The source array to be copied from. |
89 | * \note The copy may happen asynchronously if it involves a GPU context. |
90 | * TVMSynchronize is necessary. |
91 | */ |
92 | inline void CopyFrom(const DLTensor* other); |
93 | inline void CopyFrom(const NDArray& other); |
94 | /*! |
95 | * \brief Copy data content from a byte buffer. |
96 | * \param data The source bytes to be copied from. |
97 | * \param nbytes The size of the buffer in bytes |
98 | * Must be equal to the size of the NDArray. |
99 | * \note The copy always triggers a TVMSynchronize. |
100 | */ |
101 | TVM_DLL void CopyFromBytes(const void* data, size_t nbytes); |
102 | /*! |
103 | * \brief Copy data content into another array. |
104 | * \param other The source array to be copied from. |
105 | * \note The copy may happen asynchronously if it involves a GPU context. |
106 | * TVMSynchronize is necessary. |
107 | */ |
108 | inline void CopyTo(DLTensor* other) const; |
109 | inline void CopyTo(const NDArray& other) const; |
110 | /*! |
111 | * \brief Copy data content into another array. |
112 | * \param data The source bytes to be copied from. |
113 | * \param nbytes The size of the data buffer. |
114 | * Must be equal to the size of the NDArray. |
115 | * \note The copy always triggers a TVMSynchronize. |
116 | */ |
117 | TVM_DLL void CopyToBytes(void* data, size_t nbytes) const; |
118 | /*! |
119 | * \brief Copy the data to another device. |
120 | * \param dev The target device. |
121 | * \return The array under another device. |
122 | */ |
123 | inline NDArray CopyTo(const Device& dev) const; |
124 | /*! |
125 | * \brief Load NDArray from stream |
126 | * \param stream The input data stream |
127 | * \return Whether load is successful |
128 | */ |
129 | inline bool Load(dmlc::Stream* stream); |
130 | /*! |
131 | * \brief Save NDArray to stream |
132 | * \param stream The output data stream |
133 | */ |
134 | inline void Save(dmlc::Stream* stream) const; |
135 | /*! |
136 | * \brief Create a NDArray that shares the data memory with the current one. |
137 | * \param shape The shape of the new array. |
138 | * \param dtype The data type of the new array. |
139 | * \note The memory size of new array must be smaller than the current one. |
140 | */ |
141 | TVM_DLL NDArray CreateView(ShapeTuple shape, DLDataType dtype); |
142 | /*! |
143 | * \brief Create a reference view of NDArray that |
144 | * represents as DLManagedTensor. |
145 | * \return A DLManagedTensor |
146 | */ |
147 | TVM_DLL DLManagedTensor* ToDLPack() const; |
148 | /*! |
149 | * \brief Create an empty NDArray. |
150 | * \param shape The shape of the new array. |
151 | * \param dtype The data type of the new array. |
152 | * \param dev The device of the array. |
153 | * \param mem_scope The memory scope of the array. |
154 | * \return The created Array |
155 | */ |
156 | TVM_DLL static NDArray Empty(ShapeTuple shape, DLDataType dtype, Device dev, |
157 | Optional<String> mem_scope = NullOpt); |
158 | /*! |
159 | * \brief Create a NDArray backed by an external DLTensor without memory copying. |
160 | * |
161 | * If DLTensor is not contiguous or has bad aligned data, It fails. |
162 | * This allows us to create a NDArray using the memory |
163 | * allocated by an external source. Responsibility for memory |
164 | * retaining lies with the external source. |
165 | * \param dl_tensor The DLTensor for NDArray base. |
166 | * \return The created NDArray view. |
167 | */ |
168 | TVM_DLL static NDArray FromExternalDLTensor(const DLTensor& dl_tensor); |
169 | /*! |
170 | * \brief Create new NDArray, data is copied from DLTensor. |
171 | * |
172 | * \param dl_tensor The DLTensor to copy from. |
173 | * \param dev device location of the created NDArray. |
174 | * \return The created NDArray view. |
175 | */ |
176 | TVM_DLL static NDArray NewFromDLTensor(DLTensor* dl_tensor, const Device& dev); |
177 | /*! |
178 | * \brief Create a NDArray backed by a dlpack tensor. |
179 | * |
180 | * This allows us to create a NDArray using the memory |
181 | * allocated by an external deep learning framework |
182 | * that is DLPack compatible. |
183 | * |
184 | * The memory is retained until the NDArray went out of scope. |
185 | * \param tensor The DLPack tensor to copy from. |
186 | * \return The created NDArray view. |
187 | */ |
188 | TVM_DLL static NDArray FromDLPack(DLManagedTensor* tensor); |
189 | /*! |
190 | * \brief Function to copy data from one array to another. |
191 | * \param from The source array. |
192 | * \param to The target array. |
193 | * \param stream The stream used in copy. |
194 | */ |
195 | TVM_DLL static void CopyFromTo(const DLTensor* from, DLTensor* to, |
196 | TVMStreamHandle stream = nullptr); |
197 | |
198 | TVM_DLL ShapeTuple Shape() const; |
199 | TVM_DLL runtime::DataType DataType() const; |
200 | /*! |
201 | * \brief Check conditions for construction NDArray over DLTensor without copying. |
202 | * There are three conditions to check: |
203 | * 1. Destination device is the same as DLTensor device |
204 | * 2. Destination device id is the same as DLTensor device id |
205 | * 3. Memory in DLTensor is aligned as expected for NDArray |
206 | * \param tensor the DLTensor. |
207 | * \param dev destination device. |
208 | * \return true if all conditions are satisfied. |
209 | */ |
210 | TVM_DLL static bool AbilityOfZeroCopyForDLTensor(DLTensor* tensor, const Device& dev); |
211 | // internal namespace |
212 | struct Internal; |
213 | |
214 | private: |
215 | TVM_DLL static bool IsAligned(const DLTensor& tensor); |
216 | |
217 | protected: |
218 | friend class TVMPODValue_; |
219 | friend class TVMRetValue; |
220 | friend class TVMArgsSetter; |
221 | /*! |
222 | * \brief Get mutable internal container pointer. |
223 | * \return a mutable container pointer. |
224 | */ |
225 | inline Container* get_mutable() const; |
226 | // Helper functions for FFI handling. |
227 | /*! |
228 | * \brief Construct NDArray's Data field from array handle in FFI. |
229 | * \param handle The array handle. |
230 | * \return The corresponding ObjectPtr to the constructed container object. |
231 | * |
232 | * \note We keep a special calling convention for NDArray by passing |
233 | * ContainerBase pointer in FFI. |
234 | * As a result, the argument is compatible to DLTensor*. |
235 | */ |
236 | inline static ObjectPtr<Object> FFIDataFromHandle(TVMArrayHandle handle); |
237 | /*! |
238 | * \brief DecRef resource managed by an FFI array handle. |
239 | * \param handle The array handle. |
240 | */ |
241 | inline static void FFIDecRef(TVMArrayHandle handle); |
242 | /*! |
243 | * \brief Get FFI Array handle from ndarray. |
244 | * \param nd The object with ndarray type. |
245 | * \return The result array handle. |
246 | */ |
247 | inline static TVMArrayHandle FFIGetHandle(const ObjectRef& nd); |
248 | }; |
249 | |
250 | /*! |
251 | * \brief Save a DLTensor to stream |
252 | * \param strm The output stream |
253 | * \param tensor The tensor to be saved. |
254 | */ |
255 | inline bool SaveDLTensor(dmlc::Stream* strm, const DLTensor* tensor); |
256 | |
257 | /*! |
258 | * \brief The container base structure |
259 | * contains all the fields except for the Object header. |
260 | * |
261 | * \note We explicitly declare this structure in order to pass |
262 | * PackedFunc argument using ContainerBase*. |
263 | */ |
264 | class NDArray::ContainerBase { |
265 | public: |
266 | /*! |
267 | * \brief The corresponding dl_tensor field. |
268 | * \note it is important that the first field is DLTensor |
269 | * So that this data structure is DLTensor compatible. |
270 | * The head ptr of this struct can be viewed as DLTensor*. |
271 | */ |
272 | DLTensor dl_tensor; |
273 | |
274 | /*! |
275 | * \brief additional context, reserved for recycling |
276 | * \note We can attach additional content here |
277 | * which the current container depend on |
278 | * (e.g. reference to original memory when creating views). |
279 | */ |
280 | void* manager_ctx{nullptr}; |
281 | |
282 | protected: |
283 | /*! |
284 | * \brief The shape container, |
285 | * can be used used for shape data. |
286 | */ |
287 | ShapeTuple shape_; |
288 | }; |
289 | |
290 | /*! |
291 | * \brief Object container class that backs NDArray. |
292 | * \note do not use this function directly, use NDArray. |
293 | */ |
294 | class NDArray::Container : public Object, public NDArray::ContainerBase { |
295 | public: |
296 | /*! \brief default constructor */ |
297 | Container() { |
298 | // Initialize the type index. |
299 | type_index_ = Container::RuntimeTypeIndex(); |
300 | dl_tensor.data = nullptr; |
301 | dl_tensor.ndim = 0; |
302 | dl_tensor.shape = nullptr; |
303 | dl_tensor.strides = nullptr; |
304 | dl_tensor.byte_offset = 0; |
305 | } |
306 | |
307 | Container(void* data, ShapeTuple shape, DLDataType dtype, Device dev) { |
308 | // Initialize the type index. |
309 | type_index_ = Container::RuntimeTypeIndex(); |
310 | dl_tensor.data = data; |
311 | shape_ = std::move(shape); |
312 | dl_tensor.ndim = static_cast<int>(shape_.size()); |
313 | dl_tensor.shape = const_cast<ShapeTuple::index_type*>(shape_.data()); |
314 | dl_tensor.dtype = dtype; |
315 | dl_tensor.strides = nullptr; |
316 | dl_tensor.byte_offset = 0; |
317 | dl_tensor.device = dev; |
318 | } |
319 | /*! |
320 | * \brief Set the deleter field. |
321 | * \param deleter The deleter. |
322 | */ |
323 | void SetDeleter(FDeleter deleter) { deleter_ = deleter; } |
324 | |
325 | // Expose DecRef and IncRef as public function |
326 | // NOTE: they are only for developer purposes only. |
327 | using Object::DecRef; |
328 | using Object::IncRef; |
329 | |
330 | // Information for object protocol. |
331 | static constexpr const uint32_t _type_index = TypeIndex::kRuntimeNDArray; |
332 | static constexpr const uint32_t _type_child_slots = 0; |
333 | static constexpr const uint32_t _type_child_slots_can_overflow = true; |
334 | static constexpr const char* _type_key = "runtime.NDArray" ; |
335 | TVM_DECLARE_BASE_OBJECT_INFO(NDArray::Container, Object); |
336 | |
337 | protected: |
338 | friend class RPCWrappedFunc; |
339 | friend class NDArray; |
340 | }; |
341 | |
342 | // implementations of inline functions |
343 | /*! |
344 | * \brief return the size of data the DLTensor hold, in term of number of bytes |
345 | * |
346 | * \param arr the input DLTensor |
347 | * \return number of bytes of data in the DLTensor. |
348 | */ |
349 | inline size_t GetDataSize(const DLTensor& arr) { |
350 | size_t size = 1; |
351 | for (tvm_index_t i = 0; i < arr.ndim; ++i) { |
352 | size *= static_cast<size_t>(arr.shape[i]); |
353 | } |
354 | size *= (arr.dtype.bits * arr.dtype.lanes + 7) / 8; |
355 | return size; |
356 | } |
357 | |
358 | /*! |
359 | * \brief check if a DLTensor is contiguous. |
360 | * \param arr The input DLTensor. |
361 | * \return The check result. |
362 | */ |
363 | static inline bool IsContiguous(const DLTensor& arr) { |
364 | if (arr.strides == nullptr) return true; |
365 | int64_t expected_stride = 1; |
366 | for (int32_t i = arr.ndim; i != 0; --i) { |
367 | int32_t k = i - 1; |
368 | if (arr.shape[k] == 1) { |
369 | // Skip stride check if shape[k] is 1, where the dimension is contiguous |
370 | // regardless of the value of stride. |
371 | // |
372 | // For example, PyTorch will normalize stride to 1 if shape is 1 when exporting |
373 | // to DLPack. |
374 | // More context: https://github.com/pytorch/pytorch/pull/83158 |
375 | continue; |
376 | } |
377 | if (arr.strides[k] != expected_stride) return false; |
378 | expected_stride *= arr.shape[k]; |
379 | } |
380 | return true; |
381 | } |
382 | |
383 | inline bool NDArray::IsContiguous() const { |
384 | return ::tvm::runtime::IsContiguous(get_mutable()->dl_tensor); |
385 | } |
386 | |
387 | inline void NDArray::CopyFrom(const DLTensor* other) { |
388 | ICHECK(data_ != nullptr); |
389 | CopyFromTo(other, &(get_mutable()->dl_tensor)); |
390 | } |
391 | |
392 | inline void NDArray::CopyFrom(const NDArray& other) { |
393 | ICHECK(data_ != nullptr); |
394 | ICHECK(other.data_ != nullptr); |
395 | CopyFromTo(&(other.get_mutable()->dl_tensor), &(get_mutable()->dl_tensor)); |
396 | } |
397 | |
398 | inline void NDArray::CopyTo(DLTensor* other) const { |
399 | ICHECK(data_ != nullptr); |
400 | CopyFromTo(&(get_mutable()->dl_tensor), other); |
401 | } |
402 | |
403 | inline void NDArray::CopyTo(const NDArray& other) const { |
404 | ICHECK(data_ != nullptr); |
405 | ICHECK(other.data_ != nullptr); |
406 | CopyFromTo(&(get_mutable()->dl_tensor), &(other.get_mutable()->dl_tensor)); |
407 | } |
408 | |
409 | inline NDArray NDArray::CopyTo(const Device& dev) const { |
410 | ICHECK(data_ != nullptr); |
411 | const DLTensor* dptr = operator->(); |
412 | NDArray ret = Empty(ShapeTuple(dptr->shape, dptr->shape + dptr->ndim), dptr->dtype, dev); |
413 | this->CopyTo(ret); |
414 | return ret; |
415 | } |
416 | |
417 | inline int NDArray::use_count() const { return data_.use_count(); } |
418 | |
419 | inline const DLTensor* NDArray::operator->() const { return &(get_mutable()->dl_tensor); } |
420 | |
421 | inline NDArray::Container* NDArray::get_mutable() const { |
422 | return static_cast<NDArray::Container*>(data_.get()); |
423 | } |
424 | |
425 | inline ObjectPtr<Object> NDArray::FFIDataFromHandle(TVMArrayHandle handle) { |
426 | return GetObjectPtr<Object>( |
427 | static_cast<NDArray::Container*>(reinterpret_cast<NDArray::ContainerBase*>(handle))); |
428 | } |
429 | |
430 | inline TVMArrayHandle NDArray::FFIGetHandle(const ObjectRef& nd) { |
431 | // NOTE: it is necessary to cast to container then to base |
432 | // so that the FFI handle uses the ContainerBase address. |
433 | auto ptr = reinterpret_cast<TVMArrayHandle>(static_cast<NDArray::ContainerBase*>( |
434 | static_cast<NDArray::Container*>(const_cast<Object*>(nd.get())))); |
435 | return ptr; |
436 | } |
437 | |
438 | inline void NDArray::FFIDecRef(TVMArrayHandle handle) { |
439 | static_cast<NDArray::Container*>(reinterpret_cast<NDArray::ContainerBase*>(handle))->DecRef(); |
440 | } |
441 | |
442 | inline Object* TVMArrayHandleToObjectHandle(TVMArrayHandle handle) { |
443 | return static_cast<NDArray::Container*>(reinterpret_cast<NDArray::ContainerBase*>(handle)); |
444 | } |
445 | |
446 | /*! \brief Magic number for NDArray file */ |
447 | constexpr uint64_t kTVMNDArrayMagic = 0xDD5E40F096B4A13F; |
448 | |
449 | inline bool SaveDLTensor(dmlc::Stream* strm, const DLTensor* tensor) { |
450 | uint64_t = kTVMNDArrayMagic, reserved = 0; |
451 | strm->Write(header); |
452 | strm->Write(reserved); |
453 | // Always save data as CPU context |
454 | // |
455 | // Parameters that get serialized should be in CPU by default. |
456 | // So even the array's context is GPU, it will be stored as CPU array. |
457 | // This is used to prevent case when another user loads the parameters |
458 | // back on machine that do not have GPU or related context. |
459 | // |
460 | // We can always do array.CopyTo(target_dev) to get a corresponding |
461 | // array in the target context. |
462 | Device cpu_dev; |
463 | cpu_dev.device_type = kDLCPU; |
464 | cpu_dev.device_id = 0; |
465 | strm->Write(cpu_dev); |
466 | strm->Write(tensor->ndim); |
467 | strm->Write(tensor->dtype); |
468 | int ndim = tensor->ndim; |
469 | strm->WriteArray(tensor->shape, ndim); |
470 | int type_bytes = (tensor->dtype.bits + 7) / 8; |
471 | int64_t num_elems = 1; |
472 | for (int i = 0; i < ndim; ++i) { |
473 | num_elems *= tensor->shape[i]; |
474 | } |
475 | int64_t data_byte_size = type_bytes * num_elems; |
476 | strm->Write(data_byte_size); |
477 | |
478 | if (DMLC_IO_NO_ENDIAN_SWAP && tensor->device.device_type == kDLCPU && |
479 | tensor->strides == nullptr && tensor->byte_offset == 0) { |
480 | // quick path |
481 | strm->Write(tensor->data, data_byte_size); |
482 | } else { |
483 | std::vector<uint8_t> bytes(data_byte_size); |
484 | ICHECK_EQ( |
485 | TVMArrayCopyToBytes(const_cast<DLTensor*>(tensor), dmlc::BeginPtr(bytes), data_byte_size), |
486 | 0) |
487 | << TVMGetLastError(); |
488 | if (!DMLC_IO_NO_ENDIAN_SWAP) { |
489 | dmlc::ByteSwap(dmlc::BeginPtr(bytes), type_bytes, num_elems); |
490 | } |
491 | strm->Write(dmlc::BeginPtr(bytes), data_byte_size); |
492 | } |
493 | return true; |
494 | } |
495 | |
496 | inline void NDArray::Save(dmlc::Stream* strm) const { SaveDLTensor(strm, operator->()); } |
497 | |
498 | inline bool NDArray::Load(dmlc::Stream* strm) { |
499 | uint64_t , reserved; |
500 | ICHECK(strm->Read(&header)) << "Invalid DLTensor file format" ; |
501 | ICHECK(strm->Read(&reserved)) << "Invalid DLTensor file format" ; |
502 | ICHECK(header == kTVMNDArrayMagic) << "Invalid DLTensor file format" ; |
503 | Device dev; |
504 | int ndim; |
505 | DLDataType dtype; |
506 | ICHECK(strm->Read(&dev)) << "Invalid DLTensor file format" ; |
507 | ICHECK(strm->Read(&ndim)) << "Invalid DLTensor file format" ; |
508 | ICHECK(strm->Read(&dtype)) << "Invalid DLTensor file format" ; |
509 | ICHECK_EQ(dev.device_type, kDLCPU) << "Invalid DLTensor device: can only save as CPU tensor" ; |
510 | std::vector<int64_t> shape(ndim); |
511 | if (ndim != 0) { |
512 | ICHECK(strm->ReadArray(&shape[0], ndim)) << "Invalid DLTensor file format" ; |
513 | } |
514 | NDArray ret = NDArray::Empty(ShapeTuple(shape), dtype, dev); |
515 | int64_t num_elems = 1; |
516 | int elem_bytes = (ret->dtype.bits + 7) / 8; |
517 | for (int i = 0; i < ret->ndim; ++i) { |
518 | num_elems *= ret->shape[i]; |
519 | } |
520 | int64_t data_byte_size; |
521 | ICHECK(strm->Read(&data_byte_size)) << "Invalid DLTensor file format" ; |
522 | ICHECK(data_byte_size == num_elems * elem_bytes) << "Invalid DLTensor file format" ; |
523 | auto read_ret = strm->Read(ret->data, data_byte_size); |
524 | // Only check non-empty data |
525 | if (ndim > 0 && shape[0] != 0) { |
526 | ICHECK(read_ret) << "Invalid DLTensor file format" ; |
527 | } |
528 | if (!DMLC_IO_NO_ENDIAN_SWAP) { |
529 | dmlc::ByteSwap(ret->data, elem_bytes, num_elems); |
530 | } |
531 | *this = ret; |
532 | return true; |
533 | } |
534 | |
535 | } // namespace runtime |
536 | } // namespace tvm |
537 | |
538 | namespace std { |
539 | template <> |
540 | struct hash<tvm::Device> { |
541 | std::size_t operator()(const tvm::Device& dev) const { |
542 | return ((dev.device_id << 8) | dev.device_type); |
543 | } |
544 | }; |
545 | |
546 | template <> |
547 | struct equal_to<tvm::Device> { |
548 | bool operator()(const tvm::Device& lhs, const tvm::Device& rhs) const { |
549 | return (lhs.device_type == rhs.device_type && lhs.device_id == rhs.device_id); |
550 | } |
551 | }; |
552 | } // namespace std |
553 | |
554 | #endif // TVM_RUNTIME_NDARRAY_H_ |
555 | |