1/*
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing,
13 * software distributed under the License is distributed on an
14 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 * KIND, either express or implied. See the License for the
16 * specific language governing permissions and limitations
17 * under the License.
18 */
19
20/*!
21 * \file ndarray.cc
22 * \brief NDArray container infratructure.
23 */
24#include <tvm/runtime/c_runtime_api.h>
25#include <tvm/runtime/device_api.h>
26#include <tvm/runtime/logging.h>
27#include <tvm/runtime/ndarray.h>
28#include <tvm/runtime/registry.h>
29
30#include "runtime_base.h"
31
32extern "C" {
33// C-mangled dlpack deleter.
34static void TVMNDArrayDLPackDeleter(DLManagedTensor* tensor);
35// helper function to get NDArray's type index, only used by ctypes.
36TVM_DLL int TVMArrayGetTypeIndex(TVMArrayHandle handle, unsigned* out_tindex);
37}
38
39namespace tvm {
40namespace runtime {
41
42inline void VerifyDataType(DLDataType dtype) {
43 ICHECK_GE(dtype.lanes, 1);
44 if (dtype.code == kDLFloat) {
45 ICHECK_EQ(dtype.bits % 8, 0);
46 } else {
47 // allow uint1 as a special flag for bool.
48 if (dtype.bits == 1 && dtype.code == kDLUInt) return;
49 // allow int1/uint4/int4
50 else if (dtype.bits == 1 && dtype.code == kDLInt)
51 return;
52 else if (dtype.bits == 4 && dtype.code == kDLUInt)
53 return;
54 else if (dtype.bits == 4 && dtype.code == kDLInt)
55 return;
56 else
57 ICHECK_EQ(dtype.bits % 8, 0);
58 }
59 ICHECK_EQ(dtype.bits & (dtype.bits - 1), 0);
60}
61
62void ArrayCopyFromBytes(DLTensor* handle, const void* data, size_t nbytes) {
63 size_t arr_size = GetDataSize(*handle);
64 ICHECK_EQ(arr_size, nbytes) << "ArrayCopyFromBytes: size mismatch";
65 ICHECK(IsContiguous(*handle)) << "ArrayCopyFromBytes only support contiguous array for now";
66
67 DLTensor from;
68 from.data = const_cast<void*>(data);
69 from.device = Device{kDLCPU, 0};
70 from.ndim = handle->ndim;
71 from.dtype = handle->dtype;
72 from.shape = handle->shape;
73 from.strides = nullptr;
74 from.byte_offset = 0;
75 DeviceAPI::Get(handle->device)->CopyDataFromTo(&from, handle, nullptr);
76 // Synchronize in case data become unavailable later.
77 DeviceAPI::Get(handle->device)->StreamSync(handle->device, nullptr);
78}
79
80void ArrayCopyToBytes(const DLTensor* handle, void* data, size_t nbytes) {
81 size_t arr_size = GetDataSize(*handle);
82 ICHECK_EQ(arr_size, nbytes) << "ArrayCopyToBytes: size mismatch";
83 ICHECK(IsContiguous(*handle)) << "ArrayCopyToBytes only support contiguous array for now";
84
85 DLTensor to;
86 to.data = const_cast<void*>(data);
87 to.device = Device{kDLCPU, 0};
88 to.ndim = handle->ndim;
89 to.dtype = handle->dtype;
90 to.shape = handle->shape;
91 to.strides = nullptr;
92 to.byte_offset = 0;
93
94 DeviceAPI::Get(handle->device)->CopyDataFromTo(const_cast<DLTensor*>(handle), &to, nullptr);
95 // Synchronize in case data become unavailable later.
96 DeviceAPI::Get(handle->device)->StreamSync(handle->device, nullptr);
97}
98
99struct NDArray::Internal {
100 // Default deleter for the container
101 static void DefaultDeleter(Object* ptr_obj) {
102 auto* ptr = static_cast<NDArray::Container*>(ptr_obj);
103 if (ptr->manager_ctx != nullptr) {
104 static_cast<NDArray::Container*>(ptr->manager_ctx)->DecRef();
105 } else if (ptr->dl_tensor.data != nullptr) {
106 tvm::runtime::DeviceAPI::Get(ptr->dl_tensor.device)
107 ->FreeDataSpace(ptr->dl_tensor.device, ptr->dl_tensor.data);
108 }
109 delete ptr;
110 }
111 // Deleter for NDArray converted from DLPack
112 // This is used from data which is passed from external DLPack(DLManagedTensor)
113 // that are not allocated inside of TVM.
114 // This enables us to create NDArray from memory allocated by other
115 // frameworks that are DLPack compatible
116 static void DLPackDeleter(Object* ptr_obj) {
117 auto* ptr = static_cast<NDArray::Container*>(ptr_obj);
118 DLManagedTensor* tensor = static_cast<DLManagedTensor*>(ptr->manager_ctx);
119 if (tensor->deleter != nullptr) {
120 (*tensor->deleter)(tensor);
121 }
122 delete ptr;
123 }
124 // Deleter for NDArray based on external DLTensor
125 // The memory is allocated from outside and it is assumed that
126 // responsibility for its freeing is also outside
127 static void SelfDeleter(Object* ptr_obj) {
128 auto* ptr = static_cast<NDArray::Container*>(ptr_obj);
129 delete ptr;
130 }
131 // Local create function which allocates tensor metadata
132 // but does not allocate space for the data.
133 static NDArray Create(ShapeTuple shape, DLDataType dtype, Device dev) {
134 VerifyDataType(dtype);
135
136 // critical zone: construct header
137 NDArray::Container* data = new NDArray::Container();
138 data->SetDeleter(DefaultDeleter);
139
140 // RAII now in effect
141 NDArray ret(GetObjectPtr<Object>(data));
142 // setup shape
143 data->shape_ = std::move(shape);
144 data->dl_tensor.shape = const_cast<ShapeTuple::index_type*>(data->shape_.data());
145 data->dl_tensor.ndim = static_cast<int>(data->shape_.size());
146 // setup dtype
147 data->dl_tensor.dtype = dtype;
148 // setup device
149 data->dl_tensor.device = dev;
150 return ret;
151 }
152 // Implementation of API function
153 static DLTensor* MoveToFFIHandle(NDArray arr) {
154 DLTensor* handle = NDArray::FFIGetHandle(arr);
155 ObjectRef::FFIClearAfterMove(&arr);
156 return handle;
157 }
158 static void FFIDecRef(TVMArrayHandle tensor) { NDArray::FFIDecRef(tensor); }
159 // Container to DLManagedTensor
160 static DLManagedTensor* ToDLPack(TVMArrayHandle handle) {
161 auto* from =
162 static_cast<NDArray::Container*>(reinterpret_cast<NDArray::ContainerBase*>(handle));
163 return ToDLPack(from);
164 }
165
166 static DLManagedTensor* ToDLPack(NDArray::Container* from) {
167 ICHECK(from != nullptr);
168 DLManagedTensor* ret = new DLManagedTensor();
169 ret->dl_tensor = from->dl_tensor;
170 ret->manager_ctx = from;
171 from->IncRef();
172 ret->deleter = TVMNDArrayDLPackDeleter;
173 return ret;
174 }
175 // Delete dlpack object.
176 static void NDArrayDLPackDeleter(DLManagedTensor* tensor) {
177 static_cast<NDArray::Container*>(tensor->manager_ctx)->DecRef();
178 delete tensor;
179 }
180};
181
182NDArray NDArray::CreateView(ShapeTuple shape, DLDataType dtype) {
183 ICHECK(data_ != nullptr);
184 ICHECK(get_mutable()->dl_tensor.strides == nullptr) << "Can only create view for compact tensor";
185 NDArray ret = Internal::Create(shape, dtype, get_mutable()->dl_tensor.device);
186 ret.get_mutable()->dl_tensor.byte_offset = this->get_mutable()->dl_tensor.byte_offset;
187 size_t curr_size = GetDataSize(this->get_mutable()->dl_tensor);
188 size_t view_size = GetDataSize(ret.get_mutable()->dl_tensor);
189 ICHECK_LE(view_size, curr_size)
190 << "Tries to create a view that has bigger memory than current one";
191 // increase ref count
192 get_mutable()->IncRef();
193 ret.get_mutable()->manager_ctx = get_mutable();
194 ret.get_mutable()->dl_tensor.data = get_mutable()->dl_tensor.data;
195 return ret;
196}
197
198DLManagedTensor* NDArray::ToDLPack() const { return Internal::ToDLPack(get_mutable()); }
199
200NDArray NDArray::Empty(ShapeTuple shape, DLDataType dtype, Device dev, Optional<String> mem_scope) {
201 NDArray ret = Internal::Create(shape, dtype, dev);
202 ret.get_mutable()->dl_tensor.data =
203 DeviceAPI::Get(ret->device)
204 ->AllocDataSpace(ret->device, shape.size(), shape.data(), ret->dtype, mem_scope);
205 return ret;
206}
207
208NDArray NDArray::FromExternalDLTensor(const DLTensor& dl_tensor) {
209 ICHECK(::tvm::runtime::IsContiguous(dl_tensor)) << "External DLTensor must be contiguous.";
210 ICHECK(IsAligned(dl_tensor)) << "Data in DLTensor is not aligned as required by NDArray";
211 NDArray::Container* data = new NDArray::Container();
212
213 data->SetDeleter(Internal::SelfDeleter);
214 data->dl_tensor = dl_tensor;
215 std::vector<ShapeTuple::index_type> shape;
216 shape.resize(data->dl_tensor.ndim);
217 shape.assign(data->dl_tensor.shape, data->dl_tensor.shape + data->dl_tensor.ndim);
218 data->shape_ = ShapeTuple(shape);
219 data->dl_tensor.shape = const_cast<ShapeTuple::index_type*>(data->shape_.data());
220
221 return NDArray(GetObjectPtr<Object>(data));
222}
223
224NDArray NDArray::NewFromDLTensor(DLTensor* tensor, const Device& dev) {
225 ICHECK(::tvm::runtime::IsContiguous(*tensor))
226 << "DLTensor is not contiguous. Copying from non-contiguous data is currently not supported";
227 std::vector<int64_t> shape;
228 for (int64_t i = 0; i < tensor->ndim; i++) {
229 shape.push_back(tensor->shape[i]);
230 }
231 NDArray ary = NDArray::Empty(shape, tensor->dtype, dev);
232 ary.CopyFrom(tensor);
233 return ary;
234}
235
236NDArray NDArray::FromDLPack(DLManagedTensor* tensor) {
237 NDArray::Container* data = new NDArray::Container();
238 // construct header
239 data->SetDeleter(Internal::DLPackDeleter);
240 // fill up content.
241 data->manager_ctx = tensor;
242 ICHECK(::tvm::runtime::IsContiguous(tensor->dl_tensor)) << "DLManagedTensor must be contiguous.";
243 ICHECK(IsAligned(tensor->dl_tensor))
244 << "Data in DLManagedTensor is not aligned as required by NDArray";
245 data->dl_tensor = tensor->dl_tensor;
246 // update shape_
247 std::vector<ShapeTuple::index_type> shape;
248 shape.resize(data->dl_tensor.ndim);
249 shape.assign(data->dl_tensor.shape, data->dl_tensor.shape + data->dl_tensor.ndim);
250 data->shape_ = ShapeTuple(shape);
251 data->dl_tensor.shape = const_cast<ShapeTuple::index_type*>(data->shape_.data());
252 return NDArray(GetObjectPtr<Object>(data));
253}
254
255void NDArray::CopyToBytes(void* data, size_t nbytes) const {
256 ICHECK(data != nullptr);
257 ICHECK(data_ != nullptr);
258 ArrayCopyToBytes(&get_mutable()->dl_tensor, data, nbytes);
259}
260
261void NDArray::CopyFromBytes(const void* data, size_t nbytes) {
262 ICHECK(data != nullptr);
263 ICHECK(data_ != nullptr);
264 ArrayCopyFromBytes(&get_mutable()->dl_tensor, data, nbytes);
265}
266
267void NDArray::CopyFromTo(const DLTensor* from, DLTensor* to, TVMStreamHandle stream) {
268 size_t from_size = GetDataSize(*from);
269 size_t to_size = GetDataSize(*to);
270 ICHECK_EQ(from_size, to_size) << "TVMArrayCopyFromTo: The size must exactly match";
271
272 ICHECK(from->device.device_type == to->device.device_type || from->device.device_type == kDLCPU ||
273 to->device.device_type == kDLCPU || from->device.device_type == kDLCUDAHost ||
274 to->device.device_type == kDLCUDAHost)
275 << "Can not copy across different device types directly. From device type: "
276 << from->device.device_type << " to device type: " << to->device.device_type;
277
278 // Use the device that is *not* a cpu device to get the correct device
279 // api manager.
280 Device dev = from->device.device_type != kDLCPU ? from->device : to->device;
281
282 DeviceAPI::Get(dev)->CopyDataFromTo(const_cast<DLTensor*>(from), to, stream);
283}
284
285ShapeTuple NDArray::Shape() const { return get_mutable()->shape_; }
286
287runtime::DataType NDArray::DataType() const {
288 return runtime::DataType(get_mutable()->dl_tensor.dtype);
289}
290
291bool NDArray::AbilityOfZeroCopyForDLTensor(DLTensor* tensor, const Device& dev) {
292 bool device_check = (dev.device_type == tensor->device.device_type);
293 bool device_id_check = (dev.device_id == tensor->device.device_id);
294 bool alignment_check = IsAligned(*tensor);
295 return device_check && device_id_check && alignment_check;
296}
297
298bool NDArray::IsAligned(const DLTensor& tensor) {
299 return (reinterpret_cast<size_t>(static_cast<char*>(tensor.data) + tensor.byte_offset) %
300 tvm::runtime::kAllocAlignment ==
301 0);
302}
303
304TVM_REGISTER_OBJECT_TYPE(NDArray::Container);
305
306} // namespace runtime
307} // namespace tvm
308
309using namespace tvm::runtime;
310
311void TVMNDArrayDLPackDeleter(DLManagedTensor* tensor) {
312 NDArray::Internal::NDArrayDLPackDeleter(tensor);
313}
314
315int TVMArrayGetTypeIndex(TVMArrayHandle handle, unsigned* out_tindex) {
316 API_BEGIN();
317 *out_tindex = TVMArrayHandleToObjectHandle(handle)->type_index();
318 API_END();
319}
320
321int TVMArrayAlloc(const tvm_index_t* shape, int ndim, int dtype_code, int dtype_bits,
322 int dtype_lanes, int device_type, int device_id, TVMArrayHandle* out) {
323 API_BEGIN();
324 DLDataType dtype;
325 dtype.code = static_cast<uint8_t>(dtype_code);
326 dtype.bits = static_cast<uint8_t>(dtype_bits);
327 dtype.lanes = static_cast<uint16_t>(dtype_lanes);
328 tvm::Device dev;
329 dev.device_type = static_cast<DLDeviceType>(device_type);
330 dev.device_id = device_id;
331 auto ndarray = NDArray::Empty(ShapeTuple(shape, shape + ndim), dtype, dev);
332
333 *out = NDArray::Internal::MoveToFFIHandle(ndarray);
334 API_END();
335}
336
337TVM_REGISTER_GLOBAL("runtime.TVMArrayAllocWithScope").set_body_typed(NDArray::Empty);
338
339TVM_REGISTER_GLOBAL("runtime.TVMArrayCreateView").set_body_typed([](NDArray arr, ShapeTuple shape) {
340 NDArray view = arr.CreateView(shape, arr->dtype);
341 return view;
342});
343
344int TVMArrayFree(TVMArrayHandle handle) {
345 API_BEGIN();
346 NDArray::Internal::FFIDecRef(handle);
347 API_END();
348}
349
350int TVMArrayCopyFromTo(TVMArrayHandle from, TVMArrayHandle to, TVMStreamHandle stream) {
351 API_BEGIN();
352 NDArray::CopyFromTo(from, to, stream);
353 API_END();
354}
355
356int TVMArrayFromDLPack(DLManagedTensor* from, TVMArrayHandle* out) {
357 API_BEGIN();
358 *out = NDArray::Internal::MoveToFFIHandle(NDArray::FromDLPack(from));
359 API_END();
360}
361
362int TVMArrayToDLPack(TVMArrayHandle from, DLManagedTensor** out) {
363 API_BEGIN();
364 *out = NDArray::Internal::ToDLPack(from);
365 API_END();
366}
367
368void TVMDLManagedTensorCallDeleter(DLManagedTensor* dltensor) { (*(dltensor->deleter))(dltensor); }
369
370int TVMArrayCopyFromBytes(TVMArrayHandle handle, void* data, size_t nbytes) {
371 API_BEGIN();
372 ArrayCopyFromBytes(handle, data, nbytes);
373 API_END();
374}
375
376int TVMArrayCopyToBytes(TVMArrayHandle handle, void* data, size_t nbytes) {
377 API_BEGIN();
378 ArrayCopyToBytes(handle, data, nbytes);
379 API_END();
380}
381