1 | /*! |
2 | * Copyright (c) 2017 by Contributors |
3 | * \file dlpack.h |
4 | * \brief The common header of DLPack. |
5 | */ |
6 | #ifndef DLPACK_DLPACK_H_ |
7 | #define DLPACK_DLPACK_H_ |
8 | |
9 | #ifdef __cplusplus |
10 | #define DLPACK_EXTERN_C extern "C" |
11 | #else |
12 | #define DLPACK_EXTERN_C |
13 | #endif |
14 | |
15 | /*! \brief The current version of dlpack */ |
16 | #define DLPACK_VERSION 60 |
17 | |
18 | /*! \brief DLPACK_DLL prefix for windows */ |
19 | #ifdef _WIN32 |
20 | #ifdef DLPACK_EXPORTS |
21 | #define DLPACK_DLL __declspec(dllexport) |
22 | #else |
23 | #define DLPACK_DLL __declspec(dllimport) |
24 | #endif |
25 | #else |
26 | #define DLPACK_DLL |
27 | #endif |
28 | |
29 | #include <stddef.h> |
30 | #include <stdint.h> |
31 | |
32 | #ifdef __cplusplus |
33 | extern "C" { |
34 | #endif |
35 | /*! |
36 | * \brief The device type in DLDevice. |
37 | */ |
38 | typedef enum { |
39 | /*! \brief CPU device */ |
40 | kDLCPU = 1, |
41 | /*! \brief CUDA GPU device */ |
42 | kDLCUDA = 2, |
43 | /*! |
44 | * \brief Pinned CUDA CPU memory by cudaMallocHost |
45 | */ |
46 | kDLCUDAHost = 3, |
47 | /*! \brief OpenCL devices. */ |
48 | kDLOpenCL = 4, |
49 | /*! \brief Vulkan buffer for next generation graphics. */ |
50 | kDLVulkan = 7, |
51 | /*! \brief Metal for Apple GPU. */ |
52 | kDLMetal = 8, |
53 | /*! \brief Verilog simulator buffer */ |
54 | kDLVPI = 9, |
55 | /*! \brief ROCm GPUs for AMD GPUs */ |
56 | kDLROCM = 10, |
57 | /*! |
58 | * \brief Pinned ROCm CPU memory allocated by hipMallocHost |
59 | */ |
60 | kDLROCMHost = 11, |
61 | /*! |
62 | * \brief Reserved extension device type, |
63 | * used for quickly test extension device |
64 | * The semantics can differ depending on the implementation. |
65 | */ |
66 | kDLExtDev = 12, |
67 | /*! |
68 | * \brief CUDA managed/unified memory allocated by cudaMallocManaged |
69 | */ |
70 | kDLCUDAManaged = 13, |
71 | } DLDeviceType; |
72 | |
73 | /*! |
74 | * \brief A Device for Tensor and operator. |
75 | */ |
76 | typedef struct { |
77 | /*! \brief The device type used in the device. */ |
78 | DLDeviceType device_type; |
79 | /*! |
80 | * \brief The device index. |
81 | * For vanilla CPU memory, pinned memory, or managed memory, this is set to 0. |
82 | */ |
83 | int device_id; |
84 | } DLDevice; |
85 | |
86 | /*! |
87 | * \brief The type code options DLDataType. |
88 | */ |
89 | typedef enum { |
90 | /*! \brief signed integer */ |
91 | kDLInt = 0U, |
92 | /*! \brief unsigned integer */ |
93 | kDLUInt = 1U, |
94 | /*! \brief IEEE floating point */ |
95 | kDLFloat = 2U, |
96 | /*! |
97 | * \brief Opaque handle type, reserved for testing purposes. |
98 | * Frameworks need to agree on the handle data type for the exchange to be |
99 | * well-defined. |
100 | */ |
101 | kDLOpaqueHandle = 3U, |
102 | /*! \brief bfloat16 */ |
103 | kDLBfloat = 4U, |
104 | /*! |
105 | * \brief complex number |
106 | * (C/C++/Python layout: compact struct per complex number) |
107 | */ |
108 | kDLComplex = 5U, |
109 | } DLDataTypeCode; |
110 | |
111 | /*! |
112 | * \brief The data type the tensor can hold. |
113 | * |
114 | * Examples |
115 | * - float: type_code = 2, bits = 32, lanes=1 |
116 | * - float4(vectorized 4 float): type_code = 2, bits = 32, lanes=4 |
117 | * - int8: type_code = 0, bits = 8, lanes=1 |
118 | * - std::complex<float>: type_code = 5, bits = 64, lanes = 1 |
119 | */ |
120 | typedef struct { |
121 | /*! |
122 | * \brief Type code of base types. |
123 | * We keep it uint8_t instead of DLDataTypeCode for minimal memory |
124 | * footprint, but the value should be one of DLDataTypeCode enum values. |
125 | * */ |
126 | uint8_t code; |
127 | /*! |
128 | * \brief Number of bits, common choices are 8, 16, 32. |
129 | */ |
130 | uint8_t bits; |
131 | /*! \brief Number of lanes in the type, used for vector types. */ |
132 | uint16_t lanes; |
133 | } DLDataType; |
134 | |
135 | /*! |
136 | * \brief Plain C Tensor object, does not manage memory. |
137 | */ |
138 | typedef struct { |
139 | /*! |
140 | * \brief The opaque data pointer points to the allocated data. This will be |
141 | * CUDA device pointer or cl_mem handle in OpenCL. This pointer is always |
142 | * aligned to 256 bytes as in CUDA. |
143 | * |
144 | * For given DLTensor, the size of memory required to store the contents of |
145 | * data is calculated as follows: |
146 | * |
147 | * \code{.c} |
148 | * static inline size_t GetDataSize(const DLTensor* t) { |
149 | * size_t size = 1; |
150 | * for (tvm_index_t i = 0; i < t->ndim; ++i) { |
151 | * size *= t->shape[i]; |
152 | * } |
153 | * size *= (t->dtype.bits * t->dtype.lanes + 7) / 8; |
154 | * return size; |
155 | * } |
156 | * \endcode |
157 | */ |
158 | void* data; |
159 | /*! \brief The device of the tensor */ |
160 | DLDevice device; |
161 | /*! \brief Number of dimensions */ |
162 | int ndim; |
163 | /*! \brief The data type of the pointer*/ |
164 | DLDataType dtype; |
165 | /*! \brief The shape of the tensor */ |
166 | int64_t* shape; |
167 | /*! |
168 | * \brief strides of the tensor (in number of elements, not bytes) |
169 | * can be NULL, indicating tensor is compact and row-majored. |
170 | */ |
171 | int64_t* strides; |
172 | /*! \brief The offset in bytes to the beginning pointer to data */ |
173 | uint64_t byte_offset; |
174 | } DLTensor; |
175 | |
176 | /*! |
177 | * \brief C Tensor object, manage memory of DLTensor. This data structure is |
178 | * intended to facilitate the borrowing of DLTensor by another framework. It is |
179 | * not meant to transfer the tensor. When the borrowing framework doesn't need |
180 | * the tensor, it should call the deleter to notify the host that the resource |
181 | * is no longer needed. |
182 | */ |
183 | typedef struct DLManagedTensor { |
184 | /*! \brief DLTensor which is being memory managed */ |
185 | DLTensor dl_tensor; |
186 | /*! \brief the context of the original host framework of DLManagedTensor in |
187 | * which DLManagedTensor is used in the framework. It can also be NULL. |
188 | */ |
189 | void* manager_ctx; |
190 | /*! \brief Destructor signature void (*)(void*) - this should be called |
191 | * to destruct manager_ctx which holds the DLManagedTensor. It can be NULL |
192 | * if there is no way for the caller to provide a reasonable destructor. |
193 | * The destructors deletes the argument self as well. |
194 | */ |
195 | void (*deleter)(struct DLManagedTensor* self); |
196 | } DLManagedTensor; |
197 | #ifdef __cplusplus |
198 | } // DLPACK_EXTERN_C |
199 | #endif |
200 | #endif // DLPACK_DLPACK_H_ |
201 | |