1#define TORCH_ASSERT_NO_OPERATORS
2#include <ATen/EmptyTensor.h>
3#include <ATen/detail/CUDAHooksInterface.h>
4#include <c10/core/CPUAllocator.h>
5#include <c10/util/safe_numerics.h>
6
7#include <limits>
8
9namespace at {
10namespace detail {
11namespace {
12c10::Allocator* GetCPUAllocatorMaybePinned(bool pin_memory) {
13 if (pin_memory) {
14 return at::detail::getCUDAHooks().getPinnedMemoryAllocator();
15 }
16 return c10::GetCPUAllocator();
17}
18
19constexpr uint64_t storage_max() {
20 // int64_t and size_t are used somewhat inconsistently throughout ATen.
21 // To be safe, storage size calculations must fit in both types.
22 constexpr auto int64_max = static_cast<uint64_t>(
23 std::numeric_limits<int64_t>::max());
24 constexpr auto size_max = static_cast<uint64_t>(
25 std::numeric_limits<size_t>::max());
26 return std::min(int64_max, size_max);
27}
28
29inline void raise_warning_for_complex_half(ScalarType dtype) {
30 if (dtype == kComplexHalf) {
31 TORCH_WARN_ONCE(
32 "ComplexHalf support is experimental and many operators don't support it yet.");
33 }
34}
35
36} // namespace (anonymous)
37
38size_t computeStorageNbytesContiguous(
39 IntArrayRef sizes,
40 size_t itemsize_bytes,
41 size_t storage_offset
42 ) {
43 // Ignore overflow checks on mobile
44#ifndef C10_MOBILE
45 uint64_t size = 1;
46 bool overflowed = c10::safe_multiplies_u64(sizes, &size);
47 overflowed |= c10::add_overflows(size, storage_offset, &size);
48 overflowed |= c10::mul_overflows(size, itemsize_bytes, &size);
49 overflowed |= size > storage_max();
50 TORCH_CHECK(!overflowed,
51 "Storage size calculation overflowed with sizes=", sizes);
52 return static_cast<size_t>(size);
53#else
54 const auto numel = c10::multiply_integers(sizes);
55 return itemsize_bytes * (storage_offset + numel);
56#endif
57}
58
59size_t computeStorageNbytes(
60 IntArrayRef sizes,
61 IntArrayRef strides,
62 size_t itemsize_bytes,
63 size_t storage_offset
64 ) {
65 TORCH_CHECK(
66 sizes.size() == strides.size(),
67 "dimensionality of sizes (",
68 sizes.size(),
69 ") must match dimensionality of strides (",
70 strides.size(),
71 ")");
72
73 // Ignore overflow checks on mobile
74#ifndef C10_MOBILE
75 // size of the underlying storage is 1 bigger than the offset
76 // of the last element according to stride
77 uint64_t size = storage_offset + 1;
78 bool overflowed = false;
79 for (const auto i : c10::irange(sizes.size())) {
80 if (sizes[i] == 0) {
81 return 0;
82 }
83
84 uint64_t strided_size;
85 overflowed |= c10::mul_overflows(strides[i], sizes[i] - 1, &strided_size);
86 overflowed |= c10::add_overflows(size, strided_size, &size);
87 }
88 overflowed |= c10::mul_overflows(size, itemsize_bytes, &size);
89 overflowed |= size > storage_max();
90 TORCH_CHECK(!overflowed,
91 "Storage size calculation overflowed with sizes=",
92 sizes, " and strides=", strides);
93 return static_cast<size_t>(size);
94#else
95 // size of the underlying storage is 1 bigger than the offset
96 // of the last element according to stride
97 uint64_t size = 1;
98 for (const auto i : c10::irange(sizes.size())) {
99 if (sizes[i] == 0) {
100 return 0;
101 }
102
103 size += strides[i] * (sizes[i] - 1);
104 }
105 return itemsize_bytes * (storage_offset + size);
106#endif
107}
108
109SymInt computeStorageNbytesContiguous(
110 SymIntArrayRef sizes,
111 const SymInt& itemsize_bytes,
112 const SymInt& storage_offset
113 ) {
114 const auto numel = c10::multiply_integers(sizes);
115 return itemsize_bytes * (storage_offset + numel);
116}
117
118// not including mobile-only macros in this function,
119// since mobile shouldn't be using symints.
120SymInt computeStorageNbytes(
121 SymIntArrayRef sizes,
122 SymIntArrayRef strides,
123 const SymInt& itemsize_bytes,
124 const SymInt& storage_offset
125 ) {
126 TORCH_CHECK(
127 sizes.size() == strides.size(),
128 "dimensionality of sizes (",
129 sizes.size(),
130 ") must match dimensionality of strides (",
131 strides.size(),
132 ")");
133
134 // size of the underlying storage is 1 bigger than the offset
135 // of the last element according to stride
136 SymInt size = 1;
137 for (const auto i : c10::irange(sizes.size())) {
138 if (sizes[i] == 0) {
139 return 0;
140 }
141
142 size += strides[i] * (sizes[i] - 1);
143 }
144 return itemsize_bytes * (storage_offset + size);
145}
146
147template <typename T>
148TensorBase _empty_generic(
149 ArrayRef<T> size,
150 c10::Allocator* allocator,
151 c10::DispatchKeySet ks,
152 ScalarType scalar_type,
153 c10::optional<c10::MemoryFormat> memory_format_opt) {
154 at::detail::check_size_nonnegative(size);
155 at::detail::raise_warning_for_complex_half(scalar_type);
156 caffe2::TypeMeta dtype = scalarTypeToTypeMeta(scalar_type);
157 auto size_bytes = computeStorageNbytesContiguous(size, dtype.itemsize());
158 auto storage_impl = c10::make_intrusive<StorageImpl>(
159 c10::StorageImpl::use_byte_size_t(),
160 size_bytes,
161 allocator,
162 /*resizeable=*/true);
163
164 auto tensor = detail::make_tensor_base<TensorImpl>(
165 std::move(storage_impl), ks, dtype);
166 // Default TensorImpl has size [0]
167 // NB: test for meta dispatch key to avoid guarding on zero-ness
168 if (ks.has(c10::DispatchKey::Meta) || size.size() != 1 || size[0] != 0) {
169 tensor.unsafeGetTensorImpl()->generic_set_sizes_contiguous(size);
170 }
171
172 if (memory_format_opt.has_value()) {
173 // Restriding a just-created empty contiguous tensor does nothing.
174 if (*memory_format_opt != MemoryFormat::Contiguous) {
175 tensor.unsafeGetTensorImpl()->empty_tensor_restride(*memory_format_opt);
176 }
177 }
178
179 return tensor;
180}
181
182TensorBase empty_generic(
183 IntArrayRef size,
184 c10::Allocator* allocator,
185 c10::DispatchKeySet ks,
186 ScalarType scalar_type,
187 c10::optional<c10::MemoryFormat> memory_format_opt) {
188 return _empty_generic(size, allocator, ks, scalar_type, memory_format_opt);
189}
190
191template <typename T>
192TensorBase _empty_strided_generic(
193 T size,
194 T stride,
195 c10::Allocator* allocator,
196 c10::DispatchKeySet ks,
197 ScalarType scalar_type) {
198 at::detail::check_size_nonnegative(size);
199 at::detail::raise_warning_for_complex_half(scalar_type);
200 caffe2::TypeMeta dtype = scalarTypeToTypeMeta(scalar_type);
201 auto size_bytes = computeStorageNbytes(size, stride, dtype.itemsize());
202 auto storage_impl = c10::make_intrusive<StorageImpl>(
203 c10::StorageImpl::use_byte_size_t(),
204 size_bytes,
205 allocator,
206 /*resizeable=*/true);
207
208 auto tensor = detail::make_tensor_base<TensorImpl>(
209 std::move(storage_impl), ks, dtype);
210 tensor.unsafeGetTensorImpl()->set_sizes_and_strides(size, stride);
211 return tensor;
212}
213
214TensorBase empty_strided_generic(
215 IntArrayRef size,
216 IntArrayRef stride,
217 c10::Allocator* allocator,
218 c10::DispatchKeySet ks,
219 ScalarType scalar_type) {
220 return _empty_strided_generic<IntArrayRef>(size, stride, allocator, ks, scalar_type);
221}
222
223TensorBase empty_strided_symint_generic(
224 SymIntArrayRef size,
225 SymIntArrayRef stride,
226 c10::Allocator* allocator,
227 c10::DispatchKeySet ks,
228 ScalarType scalar_type) {
229 return _empty_strided_generic<SymIntArrayRef>(size, stride, allocator, ks, scalar_type);
230}
231
232TensorBase empty_cpu(IntArrayRef size, ScalarType dtype, bool pin_memory,
233 c10::optional<c10::MemoryFormat> memory_format_opt) {
234 auto allocator = GetCPUAllocatorMaybePinned(pin_memory);
235 constexpr c10::DispatchKeySet cpu_ks(c10::DispatchKey::CPU);
236 return empty_generic(size, allocator, cpu_ks, dtype, memory_format_opt);
237}
238
239TensorBase empty_cpu(
240 IntArrayRef size,
241 c10::optional<ScalarType> dtype_opt,
242 c10::optional<Layout> layout_opt,
243 c10::optional<Device> device_opt,
244 c10::optional<bool> pin_memory_opt,
245 c10::optional<c10::MemoryFormat> memory_format_opt) {
246 TORCH_INTERNAL_ASSERT_DEBUG_ONLY(device_or_default(device_opt).type() == DeviceType::CPU);
247 TORCH_INTERNAL_ASSERT_DEBUG_ONLY(layout_or_default(layout_opt) == Layout::Strided);
248
249 auto pin_memory = pinned_memory_or_default(pin_memory_opt);
250 auto dtype = dtype_or_default(dtype_opt);
251 return empty_cpu(size, dtype, pin_memory, memory_format_opt);
252}
253
254TensorBase empty_cpu(
255 IntArrayRef size, const TensorOptions &options) {
256 return at::detail::empty_cpu(
257 size,
258 optTypeMetaToScalarType(options.dtype_opt()),
259 options.layout_opt(),
260 options.device_opt(),
261 options.pinned_memory_opt(),
262 options.memory_format_opt());
263}
264
265TensorBase empty_strided_cpu(IntArrayRef size, IntArrayRef stride,
266 ScalarType dtype, bool pin_memory) {
267 auto allocator = at::detail::GetCPUAllocatorMaybePinned(pin_memory);
268 constexpr c10::DispatchKeySet cpu_ks(c10::DispatchKey::CPU);
269 return at::detail::empty_strided_generic(
270 size, stride, allocator, cpu_ks, dtype);
271}
272
273TensorBase empty_strided_cpu(
274 IntArrayRef size,
275 IntArrayRef stride,
276 c10::optional<ScalarType> dtype_opt,
277 c10::optional<Layout> layout_opt,
278 c10::optional<Device> device_opt,
279 c10::optional<bool> pin_memory_opt) {
280 TORCH_INTERNAL_ASSERT_DEBUG_ONLY(device_or_default(device_opt).type() == DeviceType::CPU);
281 TORCH_INTERNAL_ASSERT_DEBUG_ONLY(layout_or_default(layout_opt) == Layout::Strided);
282
283 auto pin_memory = pinned_memory_or_default(pin_memory_opt);
284 auto dtype = dtype_or_default(dtype_opt);
285 return at::detail::empty_strided_cpu(size, stride, dtype, pin_memory);
286}
287
288TensorBase empty_strided_cpu(
289 IntArrayRef size,
290 IntArrayRef stride,
291 const TensorOptions &options) {
292 return at::detail::empty_strided_cpu(
293 size,
294 stride,
295 optTypeMetaToScalarType(options.dtype_opt()),
296 options.layout_opt(),
297 options.device_opt(),
298 options.pinned_memory_opt());
299}
300
301// The meta allocator ignores whatever allocation is requested and always
302// gives you nullptr
303struct MetaAllocator final : public at::Allocator {
304 MetaAllocator() = default;
305 ~MetaAllocator() override = default;
306 static void deleter(void* const pointer) {
307 TORCH_INTERNAL_ASSERT(!pointer);
308 }
309 DataPtr allocate(const size_t nbytes) const override {
310 return {nullptr, nullptr, &deleter, at::Device(DeviceType::Meta)};
311 }
312 DeleterFnPtr raw_deleter() const override {
313 return deleter;
314 }
315};
316
317static MetaAllocator g_meta_alloc;
318
319REGISTER_ALLOCATOR(kMeta, &g_meta_alloc);
320
321TensorBase empty_meta(IntArrayRef size, ScalarType dtype,
322 c10::optional<c10::MemoryFormat> memory_format_opt) {
323 auto *allocator = GetAllocator(kMeta);
324 constexpr c10::DispatchKeySet meta_dks(c10::DispatchKey::Meta);
325 return at::detail::empty_generic(
326 size, allocator, meta_dks, dtype, memory_format_opt);
327}
328
329TensorBase empty_meta(
330 IntArrayRef size,
331 c10::optional<ScalarType> dtype_opt,
332 c10::optional<Layout> layout_opt,
333 c10::optional<Device> device_opt,
334 c10::optional<bool> pin_memory_opt,
335 c10::optional<c10::MemoryFormat> memory_format_opt
336) {
337 TORCH_INTERNAL_ASSERT_DEBUG_ONLY(device_or_default(device_opt).type() == DeviceType::Meta);
338 // NB: because there is no SparseMeta (yet), non-strided layout is
339 // exerciseable
340 TORCH_CHECK_NOT_IMPLEMENTED(
341 layout_or_default(layout_opt) == Layout::Strided,
342 "non-strided meta tensors not supported yet"
343 );
344
345 auto dtype = dtype_or_default(dtype_opt);
346 return empty_meta(size, dtype, memory_format_opt);
347}
348
349TensorBase empty_symint_meta(
350 SymIntArrayRef size,
351 c10::optional<ScalarType> dtype_opt,
352 c10::optional<Layout> layout_opt,
353 c10::optional<Device> device_opt,
354 c10::optional<bool> pin_memory_opt,
355 c10::optional<c10::MemoryFormat> memory_format_opt
356) {
357 auto *allocator = GetAllocator(kMeta);
358 constexpr c10::DispatchKeySet ks(c10::DispatchKey::Meta);
359 auto scalar_type = dtype_or_default(dtype_opt);
360 return _empty_generic(size, allocator, ks, scalar_type, memory_format_opt);
361}
362
363TensorBase empty_meta(
364 IntArrayRef size, const TensorOptions &options) {
365 return at::detail::empty_meta(
366 size,
367 optTypeMetaToScalarType(options.dtype_opt()),
368 options.layout_opt(),
369 options.device_opt(),
370 options.pinned_memory_opt(),
371 options.memory_format_opt());
372}
373
374TensorBase empty_strided_meta(IntArrayRef size, IntArrayRef stride,
375 ScalarType dtype) {
376 auto *allocator = GetAllocator(kMeta);
377 constexpr c10::DispatchKeySet meta_dks(c10::DispatchKey::Meta);
378 return at::detail::empty_strided_generic(
379 size, stride, allocator, meta_dks, dtype);
380}
381
382TensorBase empty_strided_meta(
383 IntArrayRef size,
384 IntArrayRef stride,
385 c10::optional<ScalarType> dtype_opt,
386 c10::optional<Layout> layout_opt,
387 c10::optional<Device> device_opt,
388 c10::optional<bool> pin_memory_opt) {
389 TORCH_INTERNAL_ASSERT_DEBUG_ONLY(device_or_default(device_opt).type() == DeviceType::Meta);
390 TORCH_INTERNAL_ASSERT_DEBUG_ONLY(layout_or_default(layout_opt) == Layout::Strided);
391
392 auto dtype = dtype_or_default(dtype_opt);
393 return at::detail::empty_strided_meta(size, stride, dtype);
394}
395
396TensorBase empty_strided_meta(
397 IntArrayRef size,
398 IntArrayRef stride,
399 const TensorOptions &options) {
400 return at::detail::empty_strided_meta(
401 size,
402 stride,
403 optTypeMetaToScalarType(options.dtype_opt()),
404 options.layout_opt(),
405 options.device_opt(),
406 options.pinned_memory_opt());
407}
408
409TensorBase empty_strided_symint_meta(SymIntArrayRef size, SymIntArrayRef stride,
410 ScalarType dtype) {
411 auto *allocator = GetAllocator(kMeta);
412 constexpr c10::DispatchKeySet meta_dks(c10::DispatchKey::Meta);
413 return at::detail::empty_strided_symint_generic(
414 size, stride, allocator, meta_dks, dtype);
415}
416
417TensorBase empty_strided_symint_meta(
418 SymIntArrayRef size,
419 SymIntArrayRef stride,
420 c10::optional<ScalarType> dtype_opt,
421 c10::optional<Layout> layout_opt,
422 c10::optional<Device> device_opt,
423 c10::optional<bool> pin_memory_opt) {
424 TORCH_INTERNAL_ASSERT_DEBUG_ONLY(device_or_default(device_opt).type() == DeviceType::Meta);
425 TORCH_INTERNAL_ASSERT_DEBUG_ONLY(layout_or_default(layout_opt) == Layout::Strided);
426
427 auto dtype = dtype_or_default(dtype_opt);
428 return at::detail::empty_strided_symint_meta(size, stride, dtype);
429}
430
431TensorBase empty_strided_symint_meta(
432 SymIntArrayRef size,
433 SymIntArrayRef stride,
434 const TensorOptions &options) {
435 return at::detail::empty_strided_symint_meta(
436 size,
437 stride,
438 optTypeMetaToScalarType(options.dtype_opt()),
439 options.layout_opt(),
440 options.device_opt(),
441 options.pinned_memory_opt());
442}
443
444}} // namespace at::detail
445