1 | #define TORCH_ASSERT_NO_OPERATORS |
2 | #include <ATen/EmptyTensor.h> |
3 | #include <ATen/detail/CUDAHooksInterface.h> |
4 | #include <c10/core/CPUAllocator.h> |
5 | #include <c10/util/safe_numerics.h> |
6 | |
7 | #include <limits> |
8 | |
9 | namespace at { |
10 | namespace detail { |
11 | namespace { |
12 | c10::Allocator* GetCPUAllocatorMaybePinned(bool pin_memory) { |
13 | if (pin_memory) { |
14 | return at::detail::getCUDAHooks().getPinnedMemoryAllocator(); |
15 | } |
16 | return c10::GetCPUAllocator(); |
17 | } |
18 | |
19 | constexpr uint64_t storage_max() { |
20 | // int64_t and size_t are used somewhat inconsistently throughout ATen. |
21 | // To be safe, storage size calculations must fit in both types. |
22 | constexpr auto int64_max = static_cast<uint64_t>( |
23 | std::numeric_limits<int64_t>::max()); |
24 | constexpr auto size_max = static_cast<uint64_t>( |
25 | std::numeric_limits<size_t>::max()); |
26 | return std::min(int64_max, size_max); |
27 | } |
28 | |
29 | inline void raise_warning_for_complex_half(ScalarType dtype) { |
30 | if (dtype == kComplexHalf) { |
31 | TORCH_WARN_ONCE( |
32 | "ComplexHalf support is experimental and many operators don't support it yet." ); |
33 | } |
34 | } |
35 | |
36 | } // namespace (anonymous) |
37 | |
38 | size_t computeStorageNbytesContiguous( |
39 | IntArrayRef sizes, |
40 | size_t itemsize_bytes, |
41 | size_t storage_offset |
42 | ) { |
43 | // Ignore overflow checks on mobile |
44 | #ifndef C10_MOBILE |
45 | uint64_t size = 1; |
46 | bool overflowed = c10::safe_multiplies_u64(sizes, &size); |
47 | overflowed |= c10::add_overflows(size, storage_offset, &size); |
48 | overflowed |= c10::mul_overflows(size, itemsize_bytes, &size); |
49 | overflowed |= size > storage_max(); |
50 | TORCH_CHECK(!overflowed, |
51 | "Storage size calculation overflowed with sizes=" , sizes); |
52 | return static_cast<size_t>(size); |
53 | #else |
54 | const auto numel = c10::multiply_integers(sizes); |
55 | return itemsize_bytes * (storage_offset + numel); |
56 | #endif |
57 | } |
58 | |
59 | size_t computeStorageNbytes( |
60 | IntArrayRef sizes, |
61 | IntArrayRef strides, |
62 | size_t itemsize_bytes, |
63 | size_t storage_offset |
64 | ) { |
65 | TORCH_CHECK( |
66 | sizes.size() == strides.size(), |
67 | "dimensionality of sizes (" , |
68 | sizes.size(), |
69 | ") must match dimensionality of strides (" , |
70 | strides.size(), |
71 | ")" ); |
72 | |
73 | // Ignore overflow checks on mobile |
74 | #ifndef C10_MOBILE |
75 | // size of the underlying storage is 1 bigger than the offset |
76 | // of the last element according to stride |
77 | uint64_t size = storage_offset + 1; |
78 | bool overflowed = false; |
79 | for (const auto i : c10::irange(sizes.size())) { |
80 | if (sizes[i] == 0) { |
81 | return 0; |
82 | } |
83 | |
84 | uint64_t strided_size; |
85 | overflowed |= c10::mul_overflows(strides[i], sizes[i] - 1, &strided_size); |
86 | overflowed |= c10::add_overflows(size, strided_size, &size); |
87 | } |
88 | overflowed |= c10::mul_overflows(size, itemsize_bytes, &size); |
89 | overflowed |= size > storage_max(); |
90 | TORCH_CHECK(!overflowed, |
91 | "Storage size calculation overflowed with sizes=" , |
92 | sizes, " and strides=" , strides); |
93 | return static_cast<size_t>(size); |
94 | #else |
95 | // size of the underlying storage is 1 bigger than the offset |
96 | // of the last element according to stride |
97 | uint64_t size = 1; |
98 | for (const auto i : c10::irange(sizes.size())) { |
99 | if (sizes[i] == 0) { |
100 | return 0; |
101 | } |
102 | |
103 | size += strides[i] * (sizes[i] - 1); |
104 | } |
105 | return itemsize_bytes * (storage_offset + size); |
106 | #endif |
107 | } |
108 | |
109 | SymInt computeStorageNbytesContiguous( |
110 | SymIntArrayRef sizes, |
111 | const SymInt& itemsize_bytes, |
112 | const SymInt& storage_offset |
113 | ) { |
114 | const auto numel = c10::multiply_integers(sizes); |
115 | return itemsize_bytes * (storage_offset + numel); |
116 | } |
117 | |
118 | // not including mobile-only macros in this function, |
119 | // since mobile shouldn't be using symints. |
120 | SymInt computeStorageNbytes( |
121 | SymIntArrayRef sizes, |
122 | SymIntArrayRef strides, |
123 | const SymInt& itemsize_bytes, |
124 | const SymInt& storage_offset |
125 | ) { |
126 | TORCH_CHECK( |
127 | sizes.size() == strides.size(), |
128 | "dimensionality of sizes (" , |
129 | sizes.size(), |
130 | ") must match dimensionality of strides (" , |
131 | strides.size(), |
132 | ")" ); |
133 | |
134 | // size of the underlying storage is 1 bigger than the offset |
135 | // of the last element according to stride |
136 | SymInt size = 1; |
137 | for (const auto i : c10::irange(sizes.size())) { |
138 | if (sizes[i] == 0) { |
139 | return 0; |
140 | } |
141 | |
142 | size += strides[i] * (sizes[i] - 1); |
143 | } |
144 | return itemsize_bytes * (storage_offset + size); |
145 | } |
146 | |
147 | template <typename T> |
148 | TensorBase _empty_generic( |
149 | ArrayRef<T> size, |
150 | c10::Allocator* allocator, |
151 | c10::DispatchKeySet ks, |
152 | ScalarType scalar_type, |
153 | c10::optional<c10::MemoryFormat> memory_format_opt) { |
154 | at::detail::check_size_nonnegative(size); |
155 | at::detail::raise_warning_for_complex_half(scalar_type); |
156 | caffe2::TypeMeta dtype = scalarTypeToTypeMeta(scalar_type); |
157 | auto size_bytes = computeStorageNbytesContiguous(size, dtype.itemsize()); |
158 | auto storage_impl = c10::make_intrusive<StorageImpl>( |
159 | c10::StorageImpl::use_byte_size_t(), |
160 | size_bytes, |
161 | allocator, |
162 | /*resizeable=*/true); |
163 | |
164 | auto tensor = detail::make_tensor_base<TensorImpl>( |
165 | std::move(storage_impl), ks, dtype); |
166 | // Default TensorImpl has size [0] |
167 | // NB: test for meta dispatch key to avoid guarding on zero-ness |
168 | if (ks.has(c10::DispatchKey::Meta) || size.size() != 1 || size[0] != 0) { |
169 | tensor.unsafeGetTensorImpl()->generic_set_sizes_contiguous(size); |
170 | } |
171 | |
172 | if (memory_format_opt.has_value()) { |
173 | // Restriding a just-created empty contiguous tensor does nothing. |
174 | if (*memory_format_opt != MemoryFormat::Contiguous) { |
175 | tensor.unsafeGetTensorImpl()->empty_tensor_restride(*memory_format_opt); |
176 | } |
177 | } |
178 | |
179 | return tensor; |
180 | } |
181 | |
182 | TensorBase empty_generic( |
183 | IntArrayRef size, |
184 | c10::Allocator* allocator, |
185 | c10::DispatchKeySet ks, |
186 | ScalarType scalar_type, |
187 | c10::optional<c10::MemoryFormat> memory_format_opt) { |
188 | return _empty_generic(size, allocator, ks, scalar_type, memory_format_opt); |
189 | } |
190 | |
191 | template <typename T> |
192 | TensorBase _empty_strided_generic( |
193 | T size, |
194 | T stride, |
195 | c10::Allocator* allocator, |
196 | c10::DispatchKeySet ks, |
197 | ScalarType scalar_type) { |
198 | at::detail::check_size_nonnegative(size); |
199 | at::detail::raise_warning_for_complex_half(scalar_type); |
200 | caffe2::TypeMeta dtype = scalarTypeToTypeMeta(scalar_type); |
201 | auto size_bytes = computeStorageNbytes(size, stride, dtype.itemsize()); |
202 | auto storage_impl = c10::make_intrusive<StorageImpl>( |
203 | c10::StorageImpl::use_byte_size_t(), |
204 | size_bytes, |
205 | allocator, |
206 | /*resizeable=*/true); |
207 | |
208 | auto tensor = detail::make_tensor_base<TensorImpl>( |
209 | std::move(storage_impl), ks, dtype); |
210 | tensor.unsafeGetTensorImpl()->set_sizes_and_strides(size, stride); |
211 | return tensor; |
212 | } |
213 | |
214 | TensorBase empty_strided_generic( |
215 | IntArrayRef size, |
216 | IntArrayRef stride, |
217 | c10::Allocator* allocator, |
218 | c10::DispatchKeySet ks, |
219 | ScalarType scalar_type) { |
220 | return _empty_strided_generic<IntArrayRef>(size, stride, allocator, ks, scalar_type); |
221 | } |
222 | |
223 | TensorBase empty_strided_symint_generic( |
224 | SymIntArrayRef size, |
225 | SymIntArrayRef stride, |
226 | c10::Allocator* allocator, |
227 | c10::DispatchKeySet ks, |
228 | ScalarType scalar_type) { |
229 | return _empty_strided_generic<SymIntArrayRef>(size, stride, allocator, ks, scalar_type); |
230 | } |
231 | |
232 | TensorBase empty_cpu(IntArrayRef size, ScalarType dtype, bool pin_memory, |
233 | c10::optional<c10::MemoryFormat> memory_format_opt) { |
234 | auto allocator = GetCPUAllocatorMaybePinned(pin_memory); |
235 | constexpr c10::DispatchKeySet cpu_ks(c10::DispatchKey::CPU); |
236 | return empty_generic(size, allocator, cpu_ks, dtype, memory_format_opt); |
237 | } |
238 | |
239 | TensorBase empty_cpu( |
240 | IntArrayRef size, |
241 | c10::optional<ScalarType> dtype_opt, |
242 | c10::optional<Layout> layout_opt, |
243 | c10::optional<Device> device_opt, |
244 | c10::optional<bool> pin_memory_opt, |
245 | c10::optional<c10::MemoryFormat> memory_format_opt) { |
246 | TORCH_INTERNAL_ASSERT_DEBUG_ONLY(device_or_default(device_opt).type() == DeviceType::CPU); |
247 | TORCH_INTERNAL_ASSERT_DEBUG_ONLY(layout_or_default(layout_opt) == Layout::Strided); |
248 | |
249 | auto pin_memory = pinned_memory_or_default(pin_memory_opt); |
250 | auto dtype = dtype_or_default(dtype_opt); |
251 | return empty_cpu(size, dtype, pin_memory, memory_format_opt); |
252 | } |
253 | |
254 | TensorBase empty_cpu( |
255 | IntArrayRef size, const TensorOptions &options) { |
256 | return at::detail::empty_cpu( |
257 | size, |
258 | optTypeMetaToScalarType(options.dtype_opt()), |
259 | options.layout_opt(), |
260 | options.device_opt(), |
261 | options.pinned_memory_opt(), |
262 | options.memory_format_opt()); |
263 | } |
264 | |
265 | TensorBase empty_strided_cpu(IntArrayRef size, IntArrayRef stride, |
266 | ScalarType dtype, bool pin_memory) { |
267 | auto allocator = at::detail::GetCPUAllocatorMaybePinned(pin_memory); |
268 | constexpr c10::DispatchKeySet cpu_ks(c10::DispatchKey::CPU); |
269 | return at::detail::empty_strided_generic( |
270 | size, stride, allocator, cpu_ks, dtype); |
271 | } |
272 | |
273 | TensorBase empty_strided_cpu( |
274 | IntArrayRef size, |
275 | IntArrayRef stride, |
276 | c10::optional<ScalarType> dtype_opt, |
277 | c10::optional<Layout> layout_opt, |
278 | c10::optional<Device> device_opt, |
279 | c10::optional<bool> pin_memory_opt) { |
280 | TORCH_INTERNAL_ASSERT_DEBUG_ONLY(device_or_default(device_opt).type() == DeviceType::CPU); |
281 | TORCH_INTERNAL_ASSERT_DEBUG_ONLY(layout_or_default(layout_opt) == Layout::Strided); |
282 | |
283 | auto pin_memory = pinned_memory_or_default(pin_memory_opt); |
284 | auto dtype = dtype_or_default(dtype_opt); |
285 | return at::detail::empty_strided_cpu(size, stride, dtype, pin_memory); |
286 | } |
287 | |
288 | TensorBase empty_strided_cpu( |
289 | IntArrayRef size, |
290 | IntArrayRef stride, |
291 | const TensorOptions &options) { |
292 | return at::detail::empty_strided_cpu( |
293 | size, |
294 | stride, |
295 | optTypeMetaToScalarType(options.dtype_opt()), |
296 | options.layout_opt(), |
297 | options.device_opt(), |
298 | options.pinned_memory_opt()); |
299 | } |
300 | |
301 | // The meta allocator ignores whatever allocation is requested and always |
302 | // gives you nullptr |
303 | struct MetaAllocator final : public at::Allocator { |
304 | MetaAllocator() = default; |
305 | ~MetaAllocator() override = default; |
306 | static void deleter(void* const pointer) { |
307 | TORCH_INTERNAL_ASSERT(!pointer); |
308 | } |
309 | DataPtr allocate(const size_t nbytes) const override { |
310 | return {nullptr, nullptr, &deleter, at::Device(DeviceType::Meta)}; |
311 | } |
312 | DeleterFnPtr raw_deleter() const override { |
313 | return deleter; |
314 | } |
315 | }; |
316 | |
317 | static MetaAllocator g_meta_alloc; |
318 | |
319 | REGISTER_ALLOCATOR(kMeta, &g_meta_alloc); |
320 | |
321 | TensorBase empty_meta(IntArrayRef size, ScalarType dtype, |
322 | c10::optional<c10::MemoryFormat> memory_format_opt) { |
323 | auto *allocator = GetAllocator(kMeta); |
324 | constexpr c10::DispatchKeySet meta_dks(c10::DispatchKey::Meta); |
325 | return at::detail::empty_generic( |
326 | size, allocator, meta_dks, dtype, memory_format_opt); |
327 | } |
328 | |
329 | TensorBase empty_meta( |
330 | IntArrayRef size, |
331 | c10::optional<ScalarType> dtype_opt, |
332 | c10::optional<Layout> layout_opt, |
333 | c10::optional<Device> device_opt, |
334 | c10::optional<bool> pin_memory_opt, |
335 | c10::optional<c10::MemoryFormat> memory_format_opt |
336 | ) { |
337 | TORCH_INTERNAL_ASSERT_DEBUG_ONLY(device_or_default(device_opt).type() == DeviceType::Meta); |
338 | // NB: because there is no SparseMeta (yet), non-strided layout is |
339 | // exerciseable |
340 | TORCH_CHECK_NOT_IMPLEMENTED( |
341 | layout_or_default(layout_opt) == Layout::Strided, |
342 | "non-strided meta tensors not supported yet" |
343 | ); |
344 | |
345 | auto dtype = dtype_or_default(dtype_opt); |
346 | return empty_meta(size, dtype, memory_format_opt); |
347 | } |
348 | |
349 | TensorBase empty_symint_meta( |
350 | SymIntArrayRef size, |
351 | c10::optional<ScalarType> dtype_opt, |
352 | c10::optional<Layout> layout_opt, |
353 | c10::optional<Device> device_opt, |
354 | c10::optional<bool> pin_memory_opt, |
355 | c10::optional<c10::MemoryFormat> memory_format_opt |
356 | ) { |
357 | auto *allocator = GetAllocator(kMeta); |
358 | constexpr c10::DispatchKeySet ks(c10::DispatchKey::Meta); |
359 | auto scalar_type = dtype_or_default(dtype_opt); |
360 | return _empty_generic(size, allocator, ks, scalar_type, memory_format_opt); |
361 | } |
362 | |
363 | TensorBase empty_meta( |
364 | IntArrayRef size, const TensorOptions &options) { |
365 | return at::detail::empty_meta( |
366 | size, |
367 | optTypeMetaToScalarType(options.dtype_opt()), |
368 | options.layout_opt(), |
369 | options.device_opt(), |
370 | options.pinned_memory_opt(), |
371 | options.memory_format_opt()); |
372 | } |
373 | |
374 | TensorBase empty_strided_meta(IntArrayRef size, IntArrayRef stride, |
375 | ScalarType dtype) { |
376 | auto *allocator = GetAllocator(kMeta); |
377 | constexpr c10::DispatchKeySet meta_dks(c10::DispatchKey::Meta); |
378 | return at::detail::empty_strided_generic( |
379 | size, stride, allocator, meta_dks, dtype); |
380 | } |
381 | |
382 | TensorBase empty_strided_meta( |
383 | IntArrayRef size, |
384 | IntArrayRef stride, |
385 | c10::optional<ScalarType> dtype_opt, |
386 | c10::optional<Layout> layout_opt, |
387 | c10::optional<Device> device_opt, |
388 | c10::optional<bool> pin_memory_opt) { |
389 | TORCH_INTERNAL_ASSERT_DEBUG_ONLY(device_or_default(device_opt).type() == DeviceType::Meta); |
390 | TORCH_INTERNAL_ASSERT_DEBUG_ONLY(layout_or_default(layout_opt) == Layout::Strided); |
391 | |
392 | auto dtype = dtype_or_default(dtype_opt); |
393 | return at::detail::empty_strided_meta(size, stride, dtype); |
394 | } |
395 | |
396 | TensorBase empty_strided_meta( |
397 | IntArrayRef size, |
398 | IntArrayRef stride, |
399 | const TensorOptions &options) { |
400 | return at::detail::empty_strided_meta( |
401 | size, |
402 | stride, |
403 | optTypeMetaToScalarType(options.dtype_opt()), |
404 | options.layout_opt(), |
405 | options.device_opt(), |
406 | options.pinned_memory_opt()); |
407 | } |
408 | |
409 | TensorBase empty_strided_symint_meta(SymIntArrayRef size, SymIntArrayRef stride, |
410 | ScalarType dtype) { |
411 | auto *allocator = GetAllocator(kMeta); |
412 | constexpr c10::DispatchKeySet meta_dks(c10::DispatchKey::Meta); |
413 | return at::detail::empty_strided_symint_generic( |
414 | size, stride, allocator, meta_dks, dtype); |
415 | } |
416 | |
417 | TensorBase empty_strided_symint_meta( |
418 | SymIntArrayRef size, |
419 | SymIntArrayRef stride, |
420 | c10::optional<ScalarType> dtype_opt, |
421 | c10::optional<Layout> layout_opt, |
422 | c10::optional<Device> device_opt, |
423 | c10::optional<bool> pin_memory_opt) { |
424 | TORCH_INTERNAL_ASSERT_DEBUG_ONLY(device_or_default(device_opt).type() == DeviceType::Meta); |
425 | TORCH_INTERNAL_ASSERT_DEBUG_ONLY(layout_or_default(layout_opt) == Layout::Strided); |
426 | |
427 | auto dtype = dtype_or_default(dtype_opt); |
428 | return at::detail::empty_strided_symint_meta(size, stride, dtype); |
429 | } |
430 | |
431 | TensorBase empty_strided_symint_meta( |
432 | SymIntArrayRef size, |
433 | SymIntArrayRef stride, |
434 | const TensorOptions &options) { |
435 | return at::detail::empty_strided_symint_meta( |
436 | size, |
437 | stride, |
438 | optTypeMetaToScalarType(options.dtype_opt()), |
439 | options.layout_opt(), |
440 | options.device_opt(), |
441 | options.pinned_memory_opt()); |
442 | } |
443 | |
444 | }} // namespace at::detail |
445 | |