1 | /* |
2 | * Licensed to the Apache Software Foundation (ASF) under one |
3 | * or more contributor license agreements. See the NOTICE file |
4 | * distributed with this work for additional information |
5 | * regarding copyright ownership. The ASF licenses this file |
6 | * to you under the Apache License, Version 2.0 (the |
7 | * "License"); you may not use this file except in compliance |
8 | * with the License. You may obtain a copy of the License at |
9 | * |
10 | * http://www.apache.org/licenses/LICENSE-2.0 |
11 | * |
12 | * Unless required by applicable law or agreed to in writing, |
13 | * software distributed under the License is distributed on an |
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
15 | * KIND, either express or implied. See the License for the |
16 | * specific language governing permissions and limitations |
17 | * under the License. |
18 | */ |
19 | |
20 | /*! |
21 | * \file ndarray.cc |
22 | * \brief NDArray container infratructure. |
23 | */ |
24 | #include <tvm/runtime/c_runtime_api.h> |
25 | #include <tvm/runtime/device_api.h> |
26 | #include <tvm/runtime/logging.h> |
27 | #include <tvm/runtime/ndarray.h> |
28 | #include <tvm/runtime/registry.h> |
29 | |
30 | #include "runtime_base.h" |
31 | |
32 | extern "C" { |
33 | // C-mangled dlpack deleter. |
34 | static void TVMNDArrayDLPackDeleter(DLManagedTensor* tensor); |
35 | // helper function to get NDArray's type index, only used by ctypes. |
36 | TVM_DLL int TVMArrayGetTypeIndex(TVMArrayHandle handle, unsigned* out_tindex); |
37 | } |
38 | |
39 | namespace tvm { |
40 | namespace runtime { |
41 | |
42 | inline void VerifyDataType(DLDataType dtype) { |
43 | ICHECK_GE(dtype.lanes, 1); |
44 | if (dtype.code == kDLFloat) { |
45 | ICHECK_EQ(dtype.bits % 8, 0); |
46 | } else { |
47 | // allow uint1 as a special flag for bool. |
48 | if (dtype.bits == 1 && dtype.code == kDLUInt) return; |
49 | // allow int1/uint4/int4 |
50 | else if (dtype.bits == 1 && dtype.code == kDLInt) |
51 | return; |
52 | else if (dtype.bits == 4 && dtype.code == kDLUInt) |
53 | return; |
54 | else if (dtype.bits == 4 && dtype.code == kDLInt) |
55 | return; |
56 | else |
57 | ICHECK_EQ(dtype.bits % 8, 0); |
58 | } |
59 | ICHECK_EQ(dtype.bits & (dtype.bits - 1), 0); |
60 | } |
61 | |
62 | void ArrayCopyFromBytes(DLTensor* handle, const void* data, size_t nbytes) { |
63 | size_t arr_size = GetDataSize(*handle); |
64 | ICHECK_EQ(arr_size, nbytes) << "ArrayCopyFromBytes: size mismatch" ; |
65 | ICHECK(IsContiguous(*handle)) << "ArrayCopyFromBytes only support contiguous array for now" ; |
66 | |
67 | DLTensor from; |
68 | from.data = const_cast<void*>(data); |
69 | from.device = Device{kDLCPU, 0}; |
70 | from.ndim = handle->ndim; |
71 | from.dtype = handle->dtype; |
72 | from.shape = handle->shape; |
73 | from.strides = nullptr; |
74 | from.byte_offset = 0; |
75 | DeviceAPI::Get(handle->device)->CopyDataFromTo(&from, handle, nullptr); |
76 | // Synchronize in case data become unavailable later. |
77 | DeviceAPI::Get(handle->device)->StreamSync(handle->device, nullptr); |
78 | } |
79 | |
80 | void ArrayCopyToBytes(const DLTensor* handle, void* data, size_t nbytes) { |
81 | size_t arr_size = GetDataSize(*handle); |
82 | ICHECK_EQ(arr_size, nbytes) << "ArrayCopyToBytes: size mismatch" ; |
83 | ICHECK(IsContiguous(*handle)) << "ArrayCopyToBytes only support contiguous array for now" ; |
84 | |
85 | DLTensor to; |
86 | to.data = const_cast<void*>(data); |
87 | to.device = Device{kDLCPU, 0}; |
88 | to.ndim = handle->ndim; |
89 | to.dtype = handle->dtype; |
90 | to.shape = handle->shape; |
91 | to.strides = nullptr; |
92 | to.byte_offset = 0; |
93 | |
94 | DeviceAPI::Get(handle->device)->CopyDataFromTo(const_cast<DLTensor*>(handle), &to, nullptr); |
95 | // Synchronize in case data become unavailable later. |
96 | DeviceAPI::Get(handle->device)->StreamSync(handle->device, nullptr); |
97 | } |
98 | |
99 | struct NDArray::Internal { |
100 | // Default deleter for the container |
101 | static void DefaultDeleter(Object* ptr_obj) { |
102 | auto* ptr = static_cast<NDArray::Container*>(ptr_obj); |
103 | if (ptr->manager_ctx != nullptr) { |
104 | static_cast<NDArray::Container*>(ptr->manager_ctx)->DecRef(); |
105 | } else if (ptr->dl_tensor.data != nullptr) { |
106 | tvm::runtime::DeviceAPI::Get(ptr->dl_tensor.device) |
107 | ->FreeDataSpace(ptr->dl_tensor.device, ptr->dl_tensor.data); |
108 | } |
109 | delete ptr; |
110 | } |
111 | // Deleter for NDArray converted from DLPack |
112 | // This is used from data which is passed from external DLPack(DLManagedTensor) |
113 | // that are not allocated inside of TVM. |
114 | // This enables us to create NDArray from memory allocated by other |
115 | // frameworks that are DLPack compatible |
116 | static void DLPackDeleter(Object* ptr_obj) { |
117 | auto* ptr = static_cast<NDArray::Container*>(ptr_obj); |
118 | DLManagedTensor* tensor = static_cast<DLManagedTensor*>(ptr->manager_ctx); |
119 | if (tensor->deleter != nullptr) { |
120 | (*tensor->deleter)(tensor); |
121 | } |
122 | delete ptr; |
123 | } |
124 | // Deleter for NDArray based on external DLTensor |
125 | // The memory is allocated from outside and it is assumed that |
126 | // responsibility for its freeing is also outside |
127 | static void SelfDeleter(Object* ptr_obj) { |
128 | auto* ptr = static_cast<NDArray::Container*>(ptr_obj); |
129 | delete ptr; |
130 | } |
131 | // Local create function which allocates tensor metadata |
132 | // but does not allocate space for the data. |
133 | static NDArray Create(ShapeTuple shape, DLDataType dtype, Device dev) { |
134 | VerifyDataType(dtype); |
135 | |
136 | // critical zone: construct header |
137 | NDArray::Container* data = new NDArray::Container(); |
138 | data->SetDeleter(DefaultDeleter); |
139 | |
140 | // RAII now in effect |
141 | NDArray ret(GetObjectPtr<Object>(data)); |
142 | // setup shape |
143 | data->shape_ = std::move(shape); |
144 | data->dl_tensor.shape = const_cast<ShapeTuple::index_type*>(data->shape_.data()); |
145 | data->dl_tensor.ndim = static_cast<int>(data->shape_.size()); |
146 | // setup dtype |
147 | data->dl_tensor.dtype = dtype; |
148 | // setup device |
149 | data->dl_tensor.device = dev; |
150 | return ret; |
151 | } |
152 | // Implementation of API function |
153 | static DLTensor* MoveToFFIHandle(NDArray arr) { |
154 | DLTensor* handle = NDArray::FFIGetHandle(arr); |
155 | ObjectRef::FFIClearAfterMove(&arr); |
156 | return handle; |
157 | } |
158 | static void FFIDecRef(TVMArrayHandle tensor) { NDArray::FFIDecRef(tensor); } |
159 | // Container to DLManagedTensor |
160 | static DLManagedTensor* ToDLPack(TVMArrayHandle handle) { |
161 | auto* from = |
162 | static_cast<NDArray::Container*>(reinterpret_cast<NDArray::ContainerBase*>(handle)); |
163 | return ToDLPack(from); |
164 | } |
165 | |
166 | static DLManagedTensor* ToDLPack(NDArray::Container* from) { |
167 | ICHECK(from != nullptr); |
168 | DLManagedTensor* ret = new DLManagedTensor(); |
169 | ret->dl_tensor = from->dl_tensor; |
170 | ret->manager_ctx = from; |
171 | from->IncRef(); |
172 | ret->deleter = TVMNDArrayDLPackDeleter; |
173 | return ret; |
174 | } |
175 | // Delete dlpack object. |
176 | static void NDArrayDLPackDeleter(DLManagedTensor* tensor) { |
177 | static_cast<NDArray::Container*>(tensor->manager_ctx)->DecRef(); |
178 | delete tensor; |
179 | } |
180 | }; |
181 | |
182 | NDArray NDArray::CreateView(ShapeTuple shape, DLDataType dtype) { |
183 | ICHECK(data_ != nullptr); |
184 | ICHECK(get_mutable()->dl_tensor.strides == nullptr) << "Can only create view for compact tensor" ; |
185 | NDArray ret = Internal::Create(shape, dtype, get_mutable()->dl_tensor.device); |
186 | ret.get_mutable()->dl_tensor.byte_offset = this->get_mutable()->dl_tensor.byte_offset; |
187 | size_t curr_size = GetDataSize(this->get_mutable()->dl_tensor); |
188 | size_t view_size = GetDataSize(ret.get_mutable()->dl_tensor); |
189 | ICHECK_LE(view_size, curr_size) |
190 | << "Tries to create a view that has bigger memory than current one" ; |
191 | // increase ref count |
192 | get_mutable()->IncRef(); |
193 | ret.get_mutable()->manager_ctx = get_mutable(); |
194 | ret.get_mutable()->dl_tensor.data = get_mutable()->dl_tensor.data; |
195 | return ret; |
196 | } |
197 | |
198 | DLManagedTensor* NDArray::ToDLPack() const { return Internal::ToDLPack(get_mutable()); } |
199 | |
200 | NDArray NDArray::Empty(ShapeTuple shape, DLDataType dtype, Device dev, Optional<String> mem_scope) { |
201 | NDArray ret = Internal::Create(shape, dtype, dev); |
202 | ret.get_mutable()->dl_tensor.data = |
203 | DeviceAPI::Get(ret->device) |
204 | ->AllocDataSpace(ret->device, shape.size(), shape.data(), ret->dtype, mem_scope); |
205 | return ret; |
206 | } |
207 | |
208 | NDArray NDArray::FromExternalDLTensor(const DLTensor& dl_tensor) { |
209 | ICHECK(::tvm::runtime::IsContiguous(dl_tensor)) << "External DLTensor must be contiguous." ; |
210 | ICHECK(IsAligned(dl_tensor)) << "Data in DLTensor is not aligned as required by NDArray" ; |
211 | NDArray::Container* data = new NDArray::Container(); |
212 | |
213 | data->SetDeleter(Internal::SelfDeleter); |
214 | data->dl_tensor = dl_tensor; |
215 | std::vector<ShapeTuple::index_type> shape; |
216 | shape.resize(data->dl_tensor.ndim); |
217 | shape.assign(data->dl_tensor.shape, data->dl_tensor.shape + data->dl_tensor.ndim); |
218 | data->shape_ = ShapeTuple(shape); |
219 | data->dl_tensor.shape = const_cast<ShapeTuple::index_type*>(data->shape_.data()); |
220 | |
221 | return NDArray(GetObjectPtr<Object>(data)); |
222 | } |
223 | |
224 | NDArray NDArray::NewFromDLTensor(DLTensor* tensor, const Device& dev) { |
225 | ICHECK(::tvm::runtime::IsContiguous(*tensor)) |
226 | << "DLTensor is not contiguous. Copying from non-contiguous data is currently not supported" ; |
227 | std::vector<int64_t> shape; |
228 | for (int64_t i = 0; i < tensor->ndim; i++) { |
229 | shape.push_back(tensor->shape[i]); |
230 | } |
231 | NDArray ary = NDArray::Empty(shape, tensor->dtype, dev); |
232 | ary.CopyFrom(tensor); |
233 | return ary; |
234 | } |
235 | |
236 | NDArray NDArray::FromDLPack(DLManagedTensor* tensor) { |
237 | NDArray::Container* data = new NDArray::Container(); |
238 | // construct header |
239 | data->SetDeleter(Internal::DLPackDeleter); |
240 | // fill up content. |
241 | data->manager_ctx = tensor; |
242 | ICHECK(::tvm::runtime::IsContiguous(tensor->dl_tensor)) << "DLManagedTensor must be contiguous." ; |
243 | ICHECK(IsAligned(tensor->dl_tensor)) |
244 | << "Data in DLManagedTensor is not aligned as required by NDArray" ; |
245 | data->dl_tensor = tensor->dl_tensor; |
246 | // update shape_ |
247 | std::vector<ShapeTuple::index_type> shape; |
248 | shape.resize(data->dl_tensor.ndim); |
249 | shape.assign(data->dl_tensor.shape, data->dl_tensor.shape + data->dl_tensor.ndim); |
250 | data->shape_ = ShapeTuple(shape); |
251 | data->dl_tensor.shape = const_cast<ShapeTuple::index_type*>(data->shape_.data()); |
252 | return NDArray(GetObjectPtr<Object>(data)); |
253 | } |
254 | |
255 | void NDArray::CopyToBytes(void* data, size_t nbytes) const { |
256 | ICHECK(data != nullptr); |
257 | ICHECK(data_ != nullptr); |
258 | ArrayCopyToBytes(&get_mutable()->dl_tensor, data, nbytes); |
259 | } |
260 | |
261 | void NDArray::CopyFromBytes(const void* data, size_t nbytes) { |
262 | ICHECK(data != nullptr); |
263 | ICHECK(data_ != nullptr); |
264 | ArrayCopyFromBytes(&get_mutable()->dl_tensor, data, nbytes); |
265 | } |
266 | |
267 | void NDArray::CopyFromTo(const DLTensor* from, DLTensor* to, TVMStreamHandle stream) { |
268 | size_t from_size = GetDataSize(*from); |
269 | size_t to_size = GetDataSize(*to); |
270 | ICHECK_EQ(from_size, to_size) << "TVMArrayCopyFromTo: The size must exactly match" ; |
271 | |
272 | ICHECK(from->device.device_type == to->device.device_type || from->device.device_type == kDLCPU || |
273 | to->device.device_type == kDLCPU || from->device.device_type == kDLCUDAHost || |
274 | to->device.device_type == kDLCUDAHost) |
275 | << "Can not copy across different device types directly. From device type: " |
276 | << from->device.device_type << " to device type: " << to->device.device_type; |
277 | |
278 | // Use the device that is *not* a cpu device to get the correct device |
279 | // api manager. |
280 | Device dev = from->device.device_type != kDLCPU ? from->device : to->device; |
281 | |
282 | DeviceAPI::Get(dev)->CopyDataFromTo(const_cast<DLTensor*>(from), to, stream); |
283 | } |
284 | |
285 | ShapeTuple NDArray::Shape() const { return get_mutable()->shape_; } |
286 | |
287 | runtime::DataType NDArray::DataType() const { |
288 | return runtime::DataType(get_mutable()->dl_tensor.dtype); |
289 | } |
290 | |
291 | bool NDArray::AbilityOfZeroCopyForDLTensor(DLTensor* tensor, const Device& dev) { |
292 | bool device_check = (dev.device_type == tensor->device.device_type); |
293 | bool device_id_check = (dev.device_id == tensor->device.device_id); |
294 | bool alignment_check = IsAligned(*tensor); |
295 | return device_check && device_id_check && alignment_check; |
296 | } |
297 | |
298 | bool NDArray::IsAligned(const DLTensor& tensor) { |
299 | return (reinterpret_cast<size_t>(static_cast<char*>(tensor.data) + tensor.byte_offset) % |
300 | tvm::runtime::kAllocAlignment == |
301 | 0); |
302 | } |
303 | |
304 | TVM_REGISTER_OBJECT_TYPE(NDArray::Container); |
305 | |
306 | } // namespace runtime |
307 | } // namespace tvm |
308 | |
309 | using namespace tvm::runtime; |
310 | |
311 | void TVMNDArrayDLPackDeleter(DLManagedTensor* tensor) { |
312 | NDArray::Internal::NDArrayDLPackDeleter(tensor); |
313 | } |
314 | |
315 | int TVMArrayGetTypeIndex(TVMArrayHandle handle, unsigned* out_tindex) { |
316 | API_BEGIN(); |
317 | *out_tindex = TVMArrayHandleToObjectHandle(handle)->type_index(); |
318 | API_END(); |
319 | } |
320 | |
321 | int TVMArrayAlloc(const tvm_index_t* shape, int ndim, int dtype_code, int dtype_bits, |
322 | int dtype_lanes, int device_type, int device_id, TVMArrayHandle* out) { |
323 | API_BEGIN(); |
324 | DLDataType dtype; |
325 | dtype.code = static_cast<uint8_t>(dtype_code); |
326 | dtype.bits = static_cast<uint8_t>(dtype_bits); |
327 | dtype.lanes = static_cast<uint16_t>(dtype_lanes); |
328 | tvm::Device dev; |
329 | dev.device_type = static_cast<DLDeviceType>(device_type); |
330 | dev.device_id = device_id; |
331 | auto ndarray = NDArray::Empty(ShapeTuple(shape, shape + ndim), dtype, dev); |
332 | |
333 | *out = NDArray::Internal::MoveToFFIHandle(ndarray); |
334 | API_END(); |
335 | } |
336 | |
337 | TVM_REGISTER_GLOBAL("runtime.TVMArrayAllocWithScope" ).set_body_typed(NDArray::Empty); |
338 | |
339 | TVM_REGISTER_GLOBAL("runtime.TVMArrayCreateView" ).set_body_typed([](NDArray arr, ShapeTuple shape) { |
340 | NDArray view = arr.CreateView(shape, arr->dtype); |
341 | return view; |
342 | }); |
343 | |
344 | int TVMArrayFree(TVMArrayHandle handle) { |
345 | API_BEGIN(); |
346 | NDArray::Internal::FFIDecRef(handle); |
347 | API_END(); |
348 | } |
349 | |
350 | int TVMArrayCopyFromTo(TVMArrayHandle from, TVMArrayHandle to, TVMStreamHandle stream) { |
351 | API_BEGIN(); |
352 | NDArray::CopyFromTo(from, to, stream); |
353 | API_END(); |
354 | } |
355 | |
356 | int TVMArrayFromDLPack(DLManagedTensor* from, TVMArrayHandle* out) { |
357 | API_BEGIN(); |
358 | *out = NDArray::Internal::MoveToFFIHandle(NDArray::FromDLPack(from)); |
359 | API_END(); |
360 | } |
361 | |
362 | int TVMArrayToDLPack(TVMArrayHandle from, DLManagedTensor** out) { |
363 | API_BEGIN(); |
364 | *out = NDArray::Internal::ToDLPack(from); |
365 | API_END(); |
366 | } |
367 | |
368 | void TVMDLManagedTensorCallDeleter(DLManagedTensor* dltensor) { (*(dltensor->deleter))(dltensor); } |
369 | |
370 | int TVMArrayCopyFromBytes(TVMArrayHandle handle, void* data, size_t nbytes) { |
371 | API_BEGIN(); |
372 | ArrayCopyFromBytes(handle, data, nbytes); |
373 | API_END(); |
374 | } |
375 | |
376 | int TVMArrayCopyToBytes(TVMArrayHandle handle, void* data, size_t nbytes) { |
377 | API_BEGIN(); |
378 | ArrayCopyToBytes(handle, data, nbytes); |
379 | API_END(); |
380 | } |
381 | |