dlpack.h source code [tvm/3rdparty/dlpack/include/dlpack/dlpack.h]

1	/!*
2	* Copyright (c) 2017 by Contributors
3	* \file dlpack.h
4	* \brief The common header of DLPack.
5	*/
6	#ifndef DLPACK_DLPACK_H_
7	#define DLPACK_DLPACK_H_
8
9	/**
10	* \brief Compatibility with C++
11	*/
12	#ifdef __cplusplus
13	#define DLPACK_EXTERN_C extern "C"
14	#else
15	#define DLPACK_EXTERN_C
16	#endif
17
18	/! \brief The current version of dlpack /
19	#define DLPACK_VERSION 70
20
21	/! \brief The current ABI version of dlpack /
22	#define DLPACK_ABI_VERSION 1
23
24	/! \brief DLPACK_DLL prefix for windows /
25	#ifdef _WIN32
26	#ifdef DLPACK_EXPORTS
27	#define DLPACK_DLL __declspec(dllexport)
28	#else
29	#define DLPACK_DLL __declspec(dllimport)
30	#endif
31	#else
32	#define DLPACK_DLL
33	#endif
34
35	#include <stdint.h>
36	#include <stddef.h>
37
38	#ifdef __cplusplus
39	extern "C" {
40	#endif
41	/!*
42	* \brief The device type in DLDevice.
43	*/
44	#ifdef __cplusplus
45	typedef enum : int32_t {
46	#else
47	typedef enum {
48	#endif
49	/! \brief CPU device /
50	kDLCPU = `1`,
51	/! \brief CUDA GPU device /
52	kDLCUDA = `2`,
53	/!*
54	* \brief Pinned CUDA CPU memory by cudaMallocHost
55	*/
56	kDLCUDAHost = `3`,
57	/! \brief OpenCL devices. /
58	kDLOpenCL = `4`,
59	/! \brief Vulkan buffer for next generation graphics. /
60	kDLVulkan = `7`,
61	/! \brief Metal for Apple GPU. /
62	kDLMetal = `8`,
63	/! \brief Verilog simulator buffer /
64	kDLVPI = `9`,
65	/! \brief ROCm GPUs for AMD GPUs /
66	kDLROCM = `10`,
67	/!*
68	* \brief Pinned ROCm CPU memory allocated by hipMallocHost
69	*/
70	kDLROCMHost = `11`,
71	/!*
72	* \brief Reserved extension device type,
73	* used for quickly test extension device
74	* The semantics can differ depending on the implementation.
75	*/
76	kDLExtDev = `12`,
77	/!*
78	* \brief CUDA managed/unified memory allocated by cudaMallocManaged
79	*/
80	kDLCUDAManaged = `13`,
81	/!*
82	* \brief Unified shared memory allocated on a oneAPI non-partititioned
83	* device. Call to oneAPI runtime is required to determine the device
84	* type, the USM allocation type and the sycl context it is bound to.
85	*
86	*/
87	kDLOneAPI = `14`,
88	/! \brief GPU support for next generation WebGPU standard. /
89	kDLWebGPU = `15`,
90	/! \brief Qualcomm Hexagon DSP /
91	kDLHexagon = `16`,
92	} DLDeviceType;
93
94	/!*
95	* \brief A Device for Tensor and operator.
96	*/
97	typedef struct {
98	/! \brief The device type used in the device. /
99	DLDeviceType device_type;
100	/!*
101	* \brief The device index.
102	* For vanilla CPU memory, pinned memory, or managed memory, this is set to 0.
103	*/
104	int32_t device_id;
105	} DLDevice;
106
107	/!*
108	* \brief The type code options DLDataType.
109	*/
110	typedef enum {
111	/! \brief signed integer /
112	kDLInt = `0U`,
113	/! \brief unsigned integer /
114	kDLUInt = `1U`,
115	/! \brief IEEE floating point /
116	kDLFloat = `2U`,
117	/!*
118	* \brief Opaque handle type, reserved for testing purposes.
119	* Frameworks need to agree on the handle data type for the exchange to be well-defined.
120	*/
121	kDLOpaqueHandle = `3U`,
122	/! \brief bfloat16 /
123	kDLBfloat = `4U`,
124	/!*
125	* \brief complex number
126	* (C/C++/Python layout: compact struct per complex number)
127	*/
128	kDLComplex = `5U`,
129	} DLDataTypeCode;
130
131	/!*
132	* \brief The data type the tensor can hold. The data type is assumed to follow the
133	* native endian-ness. An explicit error message should be raised when attempting to
134	* export an array with non-native endianness
135	*
136	* Examples
137	* - float: type_code = 2, bits = 32, lanes=1
138	* - float4(vectorized 4 float): type_code = 2, bits = 32, lanes=4
139	* - int8: type_code = 0, bits = 8, lanes=1
140	* - std::complex<float>: type_code = 5, bits = 64, lanes = 1
141	*/
142	typedef struct {
143	/!*
144	* \brief Type code of base types.
145	* We keep it uint8_t instead of DLDataTypeCode for minimal memory
146	* footprint, but the value should be one of DLDataTypeCode enum values.
147	* */
148	uint8_t code;
149	/!*
150	* \brief Number of bits, common choices are 8, 16, 32.
151	*/
152	uint8_t bits;
153	/! \brief Number of lanes in the type, used for vector types. /
154	uint16_t lanes;
155	} DLDataType;
156
157	/!*
158	* \brief Plain C Tensor object, does not manage memory.
159	*/
160	typedef struct {
161	/!*
162	* \brief The data pointer points to the allocated data. This will be CUDA
163	* device pointer or cl_mem handle in OpenCL. It may be opaque on some device
164	* types. This pointer is always aligned to 256 bytes as in CUDA. The
165	* `byte_offset` field should be used to point to the beginning of the data.
166	*
167	* Note that as of Nov 2021, multiply libraries (CuPy, PyTorch, TensorFlow,
168	* TVM, perhaps others) do not adhere to this 256 byte aligment requirement
169	* on CPU/CUDA/ROCm, and always use `byte_offset=0`. This must be fixed
170	* (after which this note will be updated); at the moment it is recommended
171	* to not rely on the data pointer being correctly aligned.
172	*
173	* For given DLTensor, the size of memory required to store the contents of
174	* data is calculated as follows:
175	*
176	* \code{.c}
177	* static inline size_t GetDataSize(const DLTensor* t) {
178	* size_t size = 1;
179	* for (tvm_index_t i = 0; i < t->ndim; ++i) {
180	* size *= t->shape[i];
181	* }
182	* size = (t->dtype.bits t->dtype.lanes + 7) / 8;
183	* return size;
184	* }
185	* \endcode
186	*/
187	void* data;
188	/! \brief The device of the tensor /
189	DLDevice device;
190	/! \brief Number of dimensions /
191	int32_t ndim;
192	/! \brief The data type of the pointer/
193	DLDataType dtype;
194	/! \brief The shape of the tensor /
195	int64_t* shape;
196	/!*
197	* \brief strides of the tensor (in number of elements, not bytes)
198	* can be NULL, indicating tensor is compact and row-majored.
199	*/
200	int64_t* strides;
201	/! \brief The offset in bytes to the beginning pointer to data /
202	uint64_t byte_offset;
203	} DLTensor;
204
205	/!*
206	* \brief C Tensor object, manage memory of DLTensor. This data structure is
207	* intended to facilitate the borrowing of DLTensor by another framework. It is
208	* not meant to transfer the tensor. When the borrowing framework doesn't need
209	* the tensor, it should call the deleter to notify the host that the resource
210	* is no longer needed.
211	*/
212	typedef struct DLManagedTensor {
213	/! \brief DLTensor which is being memory managed /
214	DLTensor dl_tensor;
215	/! \brief the context of the original host framework of DLManagedTensor in*
216	* which DLManagedTensor is used in the framework. It can also be NULL.
217	*/
218	void * manager_ctx;
219	/! \brief Destructor signature void ()(void) - this should be called*
220	* to destruct manager_ctx which holds the DLManagedTensor. It can be NULL
221	* if there is no way for the caller to provide a reasonable destructor.
222	* The destructors deletes the argument self as well.
223	*/
224	void (deleter)(struct* DLManagedTensor * self);
225	} DLManagedTensor;
226	#ifdef __cplusplus
227	} // DLPACK_EXTERN_C
228	#endif
229	#endif // DLPACK_DLPACK_H_
230

Browse the source code of tvm/3rdparty/dlpack/include/dlpack/dlpack.h