ndarray.h source code [tvm/include/tvm/runtime/ndarray.h]

1	/*
2	* Licensed to the Apache Software Foundation (ASF) under one
3	* or more contributor license agreements. See the NOTICE file
4	* distributed with this work for additional information
5	* regarding copyright ownership. The ASF licenses this file
6	* to you under the Apache License, Version 2.0 (the
7	* "License"); you may not use this file except in compliance
8	* with the License. You may obtain a copy of the License at
9	*
10	* http://www.apache.org/licenses/LICENSE-2.0
11	*
12	* Unless required by applicable law or agreed to in writing,
13	* software distributed under the License is distributed on an
14	* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15	* KIND, either express or implied. See the License for the
16	* specific language governing permissions and limitations
17	* under the License.
18	*/
19
20	/!*
21	* \file tvm/runtime/ndarray.h
22	* \brief A device-independent managed NDArray abstraction.
23	*/
24	#ifndef TVM_RUNTIME_NDARRAY_H_
25	#define TVM_RUNTIME_NDARRAY_H_
26
27	#include <tvm/runtime/c_runtime_api.h>
28	#include <tvm/runtime/container/optional.h>
29	#include <tvm/runtime/container/shape_tuple.h>
30	#include <tvm/runtime/container/string.h>
31	#include <tvm/runtime/data_type.h>
32	#include <tvm/runtime/object.h>
33	#include <tvm/runtime/serializer.h>
34
35	#include <atomic>
36	#include <functional>
37	#include <utility>
38	#include <vector>
39
40	namespace tvm {
41
42	// alias DLDevice
43	using Device = DLDevice;
44
45	// A 'null' device type, does not correspond to any DLDeviceType enum.
46	// TODO(mbs): This is to help us as we transition away from representing the 'homogenous' case
47	// as a singleton target map indexed by the invalid DLDeviceType '0'.
48	constexpr DLDeviceType kNullDeviceType = static_cast<DLDeviceType>(`0`);
49
50	// An 'invalid' device type, does not correspond to any DLDeviceType enum.
51	constexpr DLDeviceType kInvalidDeviceType = static_cast<DLDeviceType>(-`1`);
52
53	namespace runtime {
54
55	/!*
56	* \brief Managed NDArray.
57	* The array is backed by reference counted blocks.
58	*/
59	class NDArray : public ObjectRef {
60	public:
61	/! \brief ContainerBase used to back the TVMArrayHandle /
62	class ContainerBase;
63	/! \brief NDArray internal container type /
64	class Container;
65	/! \brief Container type for Object system. /
66	using ContainerType = Container;
67	/! \brief default constructor /
68	NDArray() {}
69	/!*
70	* \brief constructor.
71	* \param data ObjectPtr to the data container.
72	*/
73	explicit NDArray(ObjectPtr<Object> data) : ObjectRef (data) {}
74
75	/! \brief reset the content of NDArray to be nullptr /
76	inline void reset();
77	/!*
78	* \return the reference counter
79	* \note this number is approximate in multi-threaded setting.
80	*/
81	inline int use_count() const;
82	/! \return Pointer to content of DLTensor /
83	inline const DLTensor* operator->() const;
84	/! \return Whether the tensor is contiguous /
85	inline bool IsContiguous() const;
86	/!*
87	* \brief Copy data content from another array.
88	* \param other The source array to be copied from.
89	* \note The copy may happen asynchronously if it involves a GPU context.
90	* TVMSynchronize is necessary.
91	*/
92	inline void CopyFrom(const DLTensor* other);
93	inline void CopyFrom(const NDArray& other);
94	/!*
95	* \brief Copy data content from a byte buffer.
96	* \param data The source bytes to be copied from.
97	* \param nbytes The size of the buffer in bytes
98	* Must be equal to the size of the NDArray.
99	* \note The copy always triggers a TVMSynchronize.
100	*/
101	TVM_DLL void CopyFromBytes(const void* data, size_t nbytes);
102	/!*
103	* \brief Copy data content into another array.
104	* \param other The source array to be copied from.
105	* \note The copy may happen asynchronously if it involves a GPU context.
106	* TVMSynchronize is necessary.
107	*/
108	inline void CopyTo(DLTensor* other) const;
109	inline void CopyTo(const NDArray& other) const;
110	/!*
111	* \brief Copy data content into another array.
112	* \param data The source bytes to be copied from.
113	* \param nbytes The size of the data buffer.
114	* Must be equal to the size of the NDArray.
115	* \note The copy always triggers a TVMSynchronize.
116	*/
117	TVM_DLL void CopyToBytes(void* data, size_t nbytes) const;
118	/!*
119	* \brief Copy the data to another device.
120	* \param dev The target device.
121	* \return The array under another device.
122	*/
123	inline NDArray CopyTo(const Device& dev) const;
124	/!*
125	* \brief Load NDArray from stream
126	* \param stream The input data stream
127	* \return Whether load is successful
128	*/
129	inline bool Load(dmlc::Stream* stream);
130	/!*
131	* \brief Save NDArray to stream
132	* \param stream The output data stream
133	*/
134	inline void Save(dmlc::Stream* stream) const;
135	/!*
136	* \brief Create a NDArray that shares the data memory with the current one.
137	* \param shape The shape of the new array.
138	* \param dtype The data type of the new array.
139	* \note The memory size of new array must be smaller than the current one.
140	*/
141	TVM_DLL NDArray CreateView(ShapeTuple shape, DLDataType dtype);
142	/!*
143	* \brief Create a reference view of NDArray that
144	* represents as DLManagedTensor.
145	* \return A DLManagedTensor
146	*/
147	TVM_DLL DLManagedTensor* ToDLPack() const;
148	/!*
149	* \brief Create an empty NDArray.
150	* \param shape The shape of the new array.
151	* \param dtype The data type of the new array.
152	* \param dev The device of the array.
153	* \param mem_scope The memory scope of the array.
154	* \return The created Array
155	*/
156	TVM_DLL static NDArray Empty(ShapeTuple shape, DLDataType dtype, Device dev,
157	Optional<String> mem_scope = NullOpt);
158	/!*
159	* \brief Create a NDArray backed by an external DLTensor without memory copying.
160	*
161	* If DLTensor is not contiguous or has bad aligned data, It fails.
162	* This allows us to create a NDArray using the memory
163	* allocated by an external source. Responsibility for memory
164	* retaining lies with the external source.
165	* \param dl_tensor The DLTensor for NDArray base.
166	* \return The created NDArray view.
167	*/
168	TVM_DLL static NDArray FromExternalDLTensor(const DLTensor& dl_tensor);
169	/!*
170	* \brief Create new NDArray, data is copied from DLTensor.
171	*
172	* \param dl_tensor The DLTensor to copy from.
173	* \param dev device location of the created NDArray.
174	* \return The created NDArray view.
175	*/
176	TVM_DLL static NDArray NewFromDLTensor(DLTensor* dl_tensor, const Device& dev);
177	/!*
178	* \brief Create a NDArray backed by a dlpack tensor.
179	*
180	* This allows us to create a NDArray using the memory
181	* allocated by an external deep learning framework
182	* that is DLPack compatible.
183	*
184	* The memory is retained until the NDArray went out of scope.
185	* \param tensor The DLPack tensor to copy from.
186	* \return The created NDArray view.
187	*/
188	TVM_DLL static NDArray FromDLPack(DLManagedTensor* tensor);
189	/!*
190	* \brief Function to copy data from one array to another.
191	* \param from The source array.
192	* \param to The target array.
193	* \param stream The stream used in copy.
194	*/
195	TVM_DLL static void CopyFromTo(const DLTensor* from, DLTensor* to,
196	TVMStreamHandle stream = nullptr);
197
198	TVM_DLL ShapeTuple Shape() const;
199	TVM_DLL runtime::DataType DataType() const;
200	/!*
201	* \brief Check conditions for construction NDArray over DLTensor without copying.
202	* There are three conditions to check:
203	* 1. Destination device is the same as DLTensor device
204	* 2. Destination device id is the same as DLTensor device id
205	* 3. Memory in DLTensor is aligned as expected for NDArray
206	* \param tensor the DLTensor.
207	* \param dev destination device.
208	* \return true if all conditions are satisfied.
209	*/
210	TVM_DLL static bool AbilityOfZeroCopyForDLTensor(DLTensor* tensor, const Device& dev);
211	// internal namespace
212	struct Internal;
213
214	private:
215	TVM_DLL static bool IsAligned(const DLTensor& tensor);
216
217	protected:
218	friend class TVMPODValue_;
219	friend class TVMRetValue;
220	friend class TVMArgsSetter;
221	/!*
222	* \brief Get mutable internal container pointer.
223	* \return a mutable container pointer.
224	*/
225	inline Container* get_mutable() const;
226	// Helper functions for FFI handling.
227	/!*
228	* \brief Construct NDArray's Data field from array handle in FFI.
229	* \param handle The array handle.
230	* \return The corresponding ObjectPtr to the constructed container object.
231	*
232	* \note We keep a special calling convention for NDArray by passing
233	* ContainerBase pointer in FFI.
234	* As a result, the argument is compatible to DLTensor*.
235	*/
236	inline static ObjectPtr<Object> FFIDataFromHandle(TVMArrayHandle handle);
237	/!*
238	* \brief DecRef resource managed by an FFI array handle.
239	* \param handle The array handle.
240	*/
241	inline static void FFIDecRef(TVMArrayHandle handle);
242	/!*
243	* \brief Get FFI Array handle from ndarray.
244	* \param nd The object with ndarray type.
245	* \return The result array handle.
246	*/
247	inline static TVMArrayHandle FFIGetHandle(const ObjectRef& nd);
248	};
249
250	/!*
251	* \brief Save a DLTensor to stream
252	* \param strm The output stream
253	* \param tensor The tensor to be saved.
254	*/
255	inline bool SaveDLTensor(dmlc::Stream* strm, const DLTensor* tensor);
256
257	/!*
258	* \brief The container base structure
259	* contains all the fields except for the Object header.
260	*
261	* \note We explicitly declare this structure in order to pass
262	* PackedFunc argument using ContainerBase*.
263	*/
264	class NDArray::ContainerBase {
265	public:
266	/!*
267	* \brief The corresponding dl_tensor field.
268	* \note it is important that the first field is DLTensor
269	* So that this data structure is DLTensor compatible.
270	* The head ptr of this struct can be viewed as DLTensor*.
271	*/
272	DLTensor dl_tensor;
273
274	/!*
275	* \brief additional context, reserved for recycling
276	* \note We can attach additional content here
277	* which the current container depend on
278	* (e.g. reference to original memory when creating views).
279	*/
280	void* manager_ctx{nullptr};
281
282	protected:
283	/!*
284	* \brief The shape container,
285	* can be used used for shape data.
286	*/
287	ShapeTuple shape_;
288	};
289
290	/!*
291	* \brief Object container class that backs NDArray.
292	* \note do not use this function directly, use NDArray.
293	*/
294	class NDArray::Container : public Object, public NDArray::ContainerBase {
295	public:
296	/! \brief default constructor /
297	Container() {
298	// Initialize the type index.
299	type_index_ = Container::RuntimeTypeIndex();
300	dl_tensor.data = nullptr;
301	dl_tensor.ndim = `0`;
302	dl_tensor.shape = nullptr;
303	dl_tensor.strides = nullptr;
304	dl_tensor.byte_offset = `0`;
305	}
306
307	Container(void* data, ShapeTuple shape, DLDataType dtype, Device dev) {
308	// Initialize the type index.
309	type_index_ = Container::RuntimeTypeIndex();
310	dl_tensor.data = data;
311	shape_ = std::move(shape);
312	dl_tensor.ndim = static_cast<int>(shape_.size());
313	dl_tensor.shape = const_cast<ShapeTuple::index_type*>(shape_.data());
314	dl_tensor.dtype = dtype;
315	dl_tensor.strides = nullptr;
316	dl_tensor.byte_offset = `0`;
317	dl_tensor.device = dev;
318	}
319	/!*
320	* \brief Set the deleter field.
321	* \param deleter The deleter.
322	*/
323	void SetDeleter(FDeleter deleter) { deleter_ = deleter; }
324
325	// Expose DecRef and IncRef as public function
326	// NOTE: they are only for developer purposes only.
327	using Object::DecRef;
328	using Object::IncRef;
329
330	// Information for object protocol.
331	static constexpr const uint32_t _type_index = TypeIndex::kRuntimeNDArray;
332	static constexpr const uint32_t _type_child_slots = `0`;
333	static constexpr const uint32_t _type_child_slots_can_overflow = true;
334	static constexpr const char* _type_key = "runtime.NDArray";
335	TVM_DECLARE_BASE_OBJECT_INFO(NDArray::Container, Object);
336
337	protected:
338	friend class RPCWrappedFunc;
339	friend class NDArray;
340	};
341
342	// implementations of inline functions
343	/!*
344	* \brief return the size of data the DLTensor hold, in term of number of bytes
345	*
346	* \param arr the input DLTensor
347	* \return number of bytes of data in the DLTensor.
348	*/
349	inline size_t GetDataSize(const DLTensor& arr) {
350	size_t size = `1`;
351	for (tvm_index_t i = `0`; i < arr.ndim; ++i) {
352	size = static_cast*<size_t>(arr.shape[i]);
353	}
354	size = (arr.dtype.bits arr.dtype.lanes + `7`) / `8`;
355	return size;
356	}
357
358	/!*
359	* \brief check if a DLTensor is contiguous.
360	* \param arr The input DLTensor.
361	* \return The check result.
362	*/
363	static inline bool IsContiguous(const DLTensor& arr) {
364	if (arr.strides == nullptr) return true;
365	int64_t expected_stride = `1`;
366	for (int32_t i = arr.ndim; i != `0`; --i) {
367	int32_t k = i - `1`;
368	if (arr.shape[k] == `1`) {
369	// Skip stride check if shape[k] is 1, where the dimension is contiguous
370	// regardless of the value of stride.
371	//
372	// For example, PyTorch will normalize stride to 1 if shape is 1 when exporting
373	// to DLPack.
374	// More context: https://github.com/pytorch/pytorch/pull/83158
375	continue;
376	}
377	if (arr.strides[k] != expected_stride) return false;
378	expected_stride *= arr.shape[k];
379	}
380	return true;
381	}
382
383	inline bool NDArray::IsContiguous() const {
384	return ::tvm::runtime::IsContiguous(get_mutable()->dl_tensor);
385	}
386
387	inline void NDArray::CopyFrom(const DLTensor* other) {
388	ICHECK(data_ != nullptr);
389	CopyFromTo(other, &(get_mutable()->dl_tensor));
390	}
391
392	inline void NDArray::CopyFrom(const NDArray& other) {
393	ICHECK(data_ != nullptr);
394	ICHECK(other.data_ != nullptr);
395	CopyFromTo(&(other.get_mutable()->dl_tensor), &(get_mutable()->dl_tensor));
396	}
397
398	inline void NDArray::CopyTo(DLTensor* other) const {
399	ICHECK(data_ != nullptr);
400	CopyFromTo(&(get_mutable()->dl_tensor), other);
401	}
402
403	inline void NDArray::CopyTo(const NDArray& other) const {
404	ICHECK(data_ != nullptr);
405	ICHECK(other.data_ != nullptr);
406	CopyFromTo(&(get_mutable()->dl_tensor), &(other.get_mutable()->dl_tensor));
407	}
408
409	inline NDArray NDArray::CopyTo(const Device& dev) const {
410	ICHECK(data_ != nullptr);
411	const DLTensor* dptr = operator->();
412	NDArray ret = Empty(ShapeTuple (dptr->shape, dptr->shape + dptr->ndim), dptr->dtype, dev);
413	this->CopyTo(ret);
414	return ret;
415	}
416
417	inline int NDArray::use_count() const { return data_.use_count(); }
418
419	inline const DLTensor* NDArray::operator->() const { return &(get_mutable()->dl_tensor); }
420
421	inline NDArray::Container* NDArray::get_mutable() const {
422	return static_cast<NDArray::Container*>(data_.get());
423	}
424
425	inline ObjectPtr<Object> NDArray::FFIDataFromHandle(TVMArrayHandle handle) {
426	return GetObjectPtr<Object>(
427	static_cast<NDArray::Container>(reinterpret_cast<NDArray::ContainerBase>(handle)));
428	}
429
430	inline TVMArrayHandle NDArray::FFIGetHandle(const ObjectRef& nd) {
431	// NOTE: it is necessary to cast to container then to base
432	// so that the FFI handle uses the ContainerBase address.
433	auto ptr = reinterpret_cast<TVMArrayHandle>(static_cast<NDArray::ContainerBase*>(
434	static_cast<NDArray::Container>(const_cast<Object>(nd.get()))));
435	return ptr;
436	}
437
438	inline void NDArray::FFIDecRef(TVMArrayHandle handle) {
439	static_cast<NDArray::Container>(reinterpret_cast<NDArray::ContainerBase>(handle))->DecRef();
440	}
441
442	inline Object* TVMArrayHandleToObjectHandle(TVMArrayHandle handle) {
443	return static_cast<NDArray::Container>(reinterpret_cast<NDArray::ContainerBase>(handle));
444	}
445
446	/! \brief Magic number for NDArray file /
447	constexpr uint64_t kTVMNDArrayMagic = `0xDD5E40F096B4A13F`;
448
449	inline bool SaveDLTensor(dmlc::Stream* strm, const DLTensor* tensor) {
450	uint64_t header = kTVMNDArrayMagic, reserved = `0`;
451	strm->Write(header);
452	strm->Write(reserved);
453	// Always save data as CPU context
454	//
455	// Parameters that get serialized should be in CPU by default.
456	// So even the array's context is GPU, it will be stored as CPU array.
457	// This is used to prevent case when another user loads the parameters
458	// back on machine that do not have GPU or related context.
459	//
460	// We can always do array.CopyTo(target_dev) to get a corresponding
461	// array in the target context.
462	Device cpu_dev;
463	cpu_dev.device_type = kDLCPU;
464	cpu_dev.device_id = `0`;
465	strm->Write(cpu_dev);
466	strm->Write(tensor->ndim);
467	strm->Write(tensor->dtype);
468	int ndim = tensor->ndim;
469	strm->WriteArray(tensor->shape, ndim);
470	int type_bytes = (tensor->dtype.bits + `7`) / `8`;
471	int64_t num_elems = `1`;
472	for (int i = `0`; i < ndim; ++i) {
473	num_elems *= tensor->shape[i];
474	}
475	int64_t data_byte_size = type_bytes * num_elems;
476	strm->Write(data_byte_size);
477
478	if (DMLC_IO_NO_ENDIAN_SWAP && tensor->device.device_type == kDLCPU &&
479	tensor->strides == nullptr && tensor->byte_offset == `0`) {
480	// quick path
481	strm->Write(tensor->data, data_byte_size);
482	} else {
483	std::vector<uint8_t> bytes(data_byte_size);
484	ICHECK_EQ(
485	TVMArrayCopyToBytes(const_cast<DLTensor*>(tensor), dmlc::BeginPtr(bytes), data_byte_size),
486	`0`)
487	<< TVMGetLastError();
488	if (!DMLC_IO_NO_ENDIAN_SWAP) {
489	dmlc::ByteSwap(dmlc::BeginPtr(bytes), type_bytes, num_elems);
490	}
491	strm->Write(dmlc::BeginPtr(bytes), data_byte_size);
492	}
493	return true;
494	}
495
496	inline void NDArray::Save(dmlc::Stream* strm) const { SaveDLTensor(strm, operator->()); }
497
498	inline bool NDArray::Load(dmlc::Stream* strm) {
499	uint64_t header, reserved;
500	ICHECK(strm->Read(&header)) << "Invalid DLTensor file format";
501	ICHECK(strm->Read(&reserved)) << "Invalid DLTensor file format";
502	ICHECK(header == kTVMNDArrayMagic) << "Invalid DLTensor file format";
503	Device dev;
504	int ndim;
505	DLDataType dtype;
506	ICHECK(strm->Read(&dev)) << "Invalid DLTensor file format";
507	ICHECK(strm->Read(&ndim)) << "Invalid DLTensor file format";
508	ICHECK(strm->Read(&dtype)) << "Invalid DLTensor file format";
509	ICHECK_EQ(dev.device_type, kDLCPU) << "Invalid DLTensor device: can only save as CPU tensor";
510	std::vector<int64_t> shape(ndim);
511	if (ndim != `0`) {
512	ICHECK(strm->ReadArray(&shape[`0`], ndim)) << "Invalid DLTensor file format";
513	}
514	NDArray ret = NDArray::Empty(ShapeTuple (shape), dtype, dev);
515	int64_t num_elems = `1`;
516	int elem_bytes = (ret ->dtype.bits + `7`) / `8`;
517	for (int i = `0`; i < ret ->ndim; ++i) {
518	num_elems *= ret ->shape[i];
519	}
520	int64_t data_byte_size;
521	ICHECK(strm->Read(&data_byte_size)) << "Invalid DLTensor file format";
522	ICHECK(data_byte_size == num_elems * elem_bytes) << "Invalid DLTensor file format";
523	auto read_ret = strm->Read(ret ->data, data_byte_size);
524	// Only check non-empty data
525	if (ndim > `0` && shape [`0`] != `0`) {
526	ICHECK(read_ret) << "Invalid DLTensor file format";
527	}
528	if (!DMLC_IO_NO_ENDIAN_SWAP) {
529	dmlc::ByteSwap(ret ->data, elem_bytes, num_elems);
530	}
531	*this = ret;
532	return true;
533	}
534
535	} // namespace runtime
536	} // namespace tvm
537
538	namespace std {
539	template <>
540	struct hash<tvm::Device> {
541	std::size_t operator()(const tvm::Device& dev) const {
542	return ((dev.device_id << `8`) \| dev.device_type);
543	}
544	};
545
546	template <>
547	struct equal_to<tvm::Device> {
548	bool operator()(const tvm::Device& lhs, const tvm::Device& rhs) const {
549	return (lhs.device_type == rhs.device_type && lhs.device_id == rhs.device_id);
550	}
551	};
552	} // namespace std
553
554	#endif // TVM_RUNTIME_NDARRAY_H_
555

Browse the source code of tvm/include/tvm/runtime/ndarray.h