1/*!
2 * Copyright (c) 2017 by Contributors
3 * \file dlpack.h
4 * \brief The common header of DLPack.
5 */
6#ifndef DLPACK_DLPACK_H_
7#define DLPACK_DLPACK_H_
8
9#ifdef __cplusplus
10#define DLPACK_EXTERN_C extern "C"
11#else
12#define DLPACK_EXTERN_C
13#endif
14
15/*! \brief The current version of dlpack */
16#define DLPACK_VERSION 60
17
18/*! \brief DLPACK_DLL prefix for windows */
19#ifdef _WIN32
20#ifdef DLPACK_EXPORTS
21#define DLPACK_DLL __declspec(dllexport)
22#else
23#define DLPACK_DLL __declspec(dllimport)
24#endif
25#else
26#define DLPACK_DLL
27#endif
28
29#include <stddef.h>
30#include <stdint.h>
31
32#ifdef __cplusplus
33extern "C" {
34#endif
35/*!
36 * \brief The device type in DLDevice.
37 */
38typedef enum {
39 /*! \brief CPU device */
40 kDLCPU = 1,
41 /*! \brief CUDA GPU device */
42 kDLCUDA = 2,
43 /*!
44 * \brief Pinned CUDA CPU memory by cudaMallocHost
45 */
46 kDLCUDAHost = 3,
47 /*! \brief OpenCL devices. */
48 kDLOpenCL = 4,
49 /*! \brief Vulkan buffer for next generation graphics. */
50 kDLVulkan = 7,
51 /*! \brief Metal for Apple GPU. */
52 kDLMetal = 8,
53 /*! \brief Verilog simulator buffer */
54 kDLVPI = 9,
55 /*! \brief ROCm GPUs for AMD GPUs */
56 kDLROCM = 10,
57 /*!
58 * \brief Pinned ROCm CPU memory allocated by hipMallocHost
59 */
60 kDLROCMHost = 11,
61 /*!
62 * \brief Reserved extension device type,
63 * used for quickly test extension device
64 * The semantics can differ depending on the implementation.
65 */
66 kDLExtDev = 12,
67 /*!
68 * \brief CUDA managed/unified memory allocated by cudaMallocManaged
69 */
70 kDLCUDAManaged = 13,
71} DLDeviceType;
72
73/*!
74 * \brief A Device for Tensor and operator.
75 */
76typedef struct {
77 /*! \brief The device type used in the device. */
78 DLDeviceType device_type;
79 /*!
80 * \brief The device index.
81 * For vanilla CPU memory, pinned memory, or managed memory, this is set to 0.
82 */
83 int device_id;
84} DLDevice;
85
86/*!
87 * \brief The type code options DLDataType.
88 */
89typedef enum {
90 /*! \brief signed integer */
91 kDLInt = 0U,
92 /*! \brief unsigned integer */
93 kDLUInt = 1U,
94 /*! \brief IEEE floating point */
95 kDLFloat = 2U,
96 /*!
97 * \brief Opaque handle type, reserved for testing purposes.
98 * Frameworks need to agree on the handle data type for the exchange to be
99 * well-defined.
100 */
101 kDLOpaqueHandle = 3U,
102 /*! \brief bfloat16 */
103 kDLBfloat = 4U,
104 /*!
105 * \brief complex number
106 * (C/C++/Python layout: compact struct per complex number)
107 */
108 kDLComplex = 5U,
109} DLDataTypeCode;
110
111/*!
112 * \brief The data type the tensor can hold.
113 *
114 * Examples
115 * - float: type_code = 2, bits = 32, lanes=1
116 * - float4(vectorized 4 float): type_code = 2, bits = 32, lanes=4
117 * - int8: type_code = 0, bits = 8, lanes=1
118 * - std::complex<float>: type_code = 5, bits = 64, lanes = 1
119 */
120typedef struct {
121 /*!
122 * \brief Type code of base types.
123 * We keep it uint8_t instead of DLDataTypeCode for minimal memory
124 * footprint, but the value should be one of DLDataTypeCode enum values.
125 * */
126 uint8_t code;
127 /*!
128 * \brief Number of bits, common choices are 8, 16, 32.
129 */
130 uint8_t bits;
131 /*! \brief Number of lanes in the type, used for vector types. */
132 uint16_t lanes;
133} DLDataType;
134
135/*!
136 * \brief Plain C Tensor object, does not manage memory.
137 */
138typedef struct {
139 /*!
140 * \brief The opaque data pointer points to the allocated data. This will be
141 * CUDA device pointer or cl_mem handle in OpenCL. This pointer is always
142 * aligned to 256 bytes as in CUDA.
143 *
144 * For given DLTensor, the size of memory required to store the contents of
145 * data is calculated as follows:
146 *
147 * \code{.c}
148 * static inline size_t GetDataSize(const DLTensor* t) {
149 * size_t size = 1;
150 * for (tvm_index_t i = 0; i < t->ndim; ++i) {
151 * size *= t->shape[i];
152 * }
153 * size *= (t->dtype.bits * t->dtype.lanes + 7) / 8;
154 * return size;
155 * }
156 * \endcode
157 */
158 void* data;
159 /*! \brief The device of the tensor */
160 DLDevice device;
161 /*! \brief Number of dimensions */
162 int ndim;
163 /*! \brief The data type of the pointer*/
164 DLDataType dtype;
165 /*! \brief The shape of the tensor */
166 int64_t* shape;
167 /*!
168 * \brief strides of the tensor (in number of elements, not bytes)
169 * can be NULL, indicating tensor is compact and row-majored.
170 */
171 int64_t* strides;
172 /*! \brief The offset in bytes to the beginning pointer to data */
173 uint64_t byte_offset;
174} DLTensor;
175
176/*!
177 * \brief C Tensor object, manage memory of DLTensor. This data structure is
178 * intended to facilitate the borrowing of DLTensor by another framework. It is
179 * not meant to transfer the tensor. When the borrowing framework doesn't need
180 * the tensor, it should call the deleter to notify the host that the resource
181 * is no longer needed.
182 */
183typedef struct DLManagedTensor {
184 /*! \brief DLTensor which is being memory managed */
185 DLTensor dl_tensor;
186 /*! \brief the context of the original host framework of DLManagedTensor in
187 * which DLManagedTensor is used in the framework. It can also be NULL.
188 */
189 void* manager_ctx;
190 /*! \brief Destructor signature void (*)(void*) - this should be called
191 * to destruct manager_ctx which holds the DLManagedTensor. It can be NULL
192 * if there is no way for the caller to provide a reasonable destructor.
193 * The destructors deletes the argument self as well.
194 */
195 void (*deleter)(struct DLManagedTensor* self);
196} DLManagedTensor;
197#ifdef __cplusplus
198} // DLPACK_EXTERN_C
199#endif
200#endif // DLPACK_DLPACK_H_
201