EmptyTensor.cpp source code [pytorch/aten/src/ATen/EmptyTensor.cpp]

1	#define TORCH_ASSERT_NO_OPERATORS
2	#include <ATen/EmptyTensor.h>
3	#include <ATen/detail/CUDAHooksInterface.h>
4	#include <c10/core/CPUAllocator.h>
5	#include <c10/util/safe_numerics.h>
6
7	#include <limits>
8
9	namespace at {
10	namespace detail {
11	namespace {
12	c10::Allocator* GetCPUAllocatorMaybePinned(bool pin_memory) {
13	if (pin_memory) {
14	return at::detail::getCUDAHooks().getPinnedMemoryAllocator();
15	}
16	return c10::GetCPUAllocator();
17	}
18
19	constexpr uint64_t storage_max() {
20	// int64_t and size_t are used somewhat inconsistently throughout ATen.
21	// To be safe, storage size calculations must fit in both types.
22	constexpr auto int64_max = static_cast<uint64_t>(
23	std::numeric_limits<int64_t>::max());
24	constexpr auto size_max = static_cast<uint64_t>(
25	std::numeric_limits<size_t>::max());
26	return std::min(int64_max, size_max);
27	}
28
29	inline void raise_warning_for_complex_half(ScalarType dtype) {
30	if (dtype == kComplexHalf) {
31	TORCH_WARN_ONCE(
32	"ComplexHalf support is experimental and many operators don't support it yet.");
33	}
34	}
35
36	} // namespace (anonymous)
37
38	size_t computeStorageNbytesContiguous(
39	IntArrayRef sizes,
40	size_t itemsize_bytes,
41	size_t storage_offset
42	) {
43	// Ignore overflow checks on mobile
44	#ifndef C10_MOBILE
45	uint64_t size = `1`;
46	bool overflowed = c10::safe_multiplies_u64(sizes, &size);
47	overflowed \|= c10::add_overflows(size, storage_offset, &size);
48	overflowed \|= c10::mul_overflows(size, itemsize_bytes, &size);
49	overflowed \|= size > storage_max();
50	TORCH_CHECK(!overflowed,
51	"Storage size calculation overflowed with sizes=", sizes);
52	return static_cast<size_t>(size);
53	#else
54	const auto numel = c10::multiply_integers(sizes);
55	return itemsize_bytes * (storage_offset + numel);
56	#endif
57	}
58
59	size_t computeStorageNbytes(
60	IntArrayRef sizes,
61	IntArrayRef strides,
62	size_t itemsize_bytes,
63	size_t storage_offset
64	) {
65	TORCH_CHECK(
66	sizes.size() == strides.size(),
67	"dimensionality of sizes (",
68	sizes.size(),
69	") must match dimensionality of strides (",
70	strides.size(),
71	")");
72
73	// Ignore overflow checks on mobile
74	#ifndef C10_MOBILE
75	// size of the underlying storage is 1 bigger than the offset
76	// of the last element according to stride
77	uint64_t size = storage_offset + `1`;
78	bool overflowed = false;
79	for (const auto i : c10::irange(sizes.size())) {
80	if (sizes [i] == `0`) {
81	return `0`;
82	}
83
84	uint64_t strided_size;
85	overflowed \|= c10::mul_overflows(strides [i], sizes [i] - `1`, &strided_size);
86	overflowed \|= c10::add_overflows(size, strided_size, &size);
87	}
88	overflowed \|= c10::mul_overflows(size, itemsize_bytes, &size);
89	overflowed \|= size > storage_max();
90	TORCH_CHECK(!overflowed,
91	"Storage size calculation overflowed with sizes=",
92	sizes, " and strides=", strides);
93	return static_cast<size_t>(size);
94	#else
95	// size of the underlying storage is 1 bigger than the offset
96	// of the last element according to stride
97	uint64_t size = `1`;
98	for (const auto i : c10::irange(sizes.size())) {
99	if (sizes[i] == `0`) {
100	return `0`;
101	}
102
103	size += strides[i] * (sizes[i] - `1`);
104	}
105	return itemsize_bytes * (storage_offset + size);
106	#endif
107	}
108
109	SymInt computeStorageNbytesContiguous(
110	SymIntArrayRef sizes,
111	const SymInt& itemsize_bytes,
112	const SymInt& storage_offset
113	) {
114	const auto numel = c10::multiply_integers(sizes);
115	return itemsize_bytes * (storage_offset + numel);
116	}
117
118	// not including mobile-only macros in this function,
119	// since mobile shouldn't be using symints.
120	SymInt computeStorageNbytes(
121	SymIntArrayRef sizes,
122	SymIntArrayRef strides,
123	const SymInt& itemsize_bytes,
124	const SymInt& storage_offset
125	) {
126	TORCH_CHECK(
127	sizes.size() == strides.size(),
128	"dimensionality of sizes (",
129	sizes.size(),
130	") must match dimensionality of strides (",
131	strides.size(),
132	")");
133
134	// size of the underlying storage is 1 bigger than the offset
135	// of the last element according to stride
136	SymInt size = `1`;
137	for (const auto i : c10::irange(sizes.size())) {
138	if (sizes [i] == `0`) {
139	return `0`;
140	}
141
142	size += strides [i] * (sizes [i] - `1`);
143	}
144	return itemsize_bytes * (storage_offset + size);
145	}
146
147	template <typename T>
148	TensorBase _empty_generic(
149	ArrayRef<T> size,
150	c10::Allocator* allocator,
151	c10::DispatchKeySet ks,
152	ScalarType scalar_type,
153	c10::optional<c10::MemoryFormat> memory_format_opt) {
154	at::detail::check_size_nonnegative(size);
155	at::detail::raise_warning_for_complex_half(scalar_type);
156	caffe2::TypeMeta dtype = scalarTypeToTypeMeta(scalar_type);
157	auto size_bytes = computeStorageNbytesContiguous(size, dtype.itemsize());
158	auto storage_impl = c10::make_intrusive<StorageImpl>(
159	c10::StorageImpl::use_byte_size_t (),
160	size_bytes,
161	allocator,
162	/resizeable=/true);
163
164	auto tensor = detail::make_tensor_base<TensorImpl>(
165	std::move(storage_impl), ks, dtype);
166	// Default TensorImpl has size [0]
167	// NB: test for meta dispatch key to avoid guarding on zero-ness
168	if (ks.has(c10::DispatchKey::Meta) \|\| size.size() != `1` \|\| size[`0`] != `0`) {
169	tensor.unsafeGetTensorImpl()->generic_set_sizes_contiguous(size);
170	}
171
172	if (memory_format_opt.has_value()) {
173	// Restriding a just-created empty contiguous tensor does nothing.
174	if (*memory_format_opt != MemoryFormat::Contiguous) {
175	tensor.unsafeGetTensorImpl()->empty_tensor_restride(*memory_format_opt);
176	}
177	}
178
179	return tensor;
180	}
181
182	TensorBase empty_generic(
183	IntArrayRef size,
184	c10::Allocator* allocator,
185	c10::DispatchKeySet ks,
186	ScalarType scalar_type,
187	c10::optional<c10::MemoryFormat> memory_format_opt) {
188	return _empty_generic(size, allocator, ks, scalar_type, memory_format_opt);
189	}
190
191	template <typename T>
192	TensorBase _empty_strided_generic(
193	T size,
194	T stride,
195	c10::Allocator* allocator,
196	c10::DispatchKeySet ks,
197	ScalarType scalar_type) {
198	at::detail::check_size_nonnegative(size);
199	at::detail::raise_warning_for_complex_half(scalar_type);
200	caffe2::TypeMeta dtype = scalarTypeToTypeMeta(scalar_type);
201	auto size_bytes = computeStorageNbytes(size, stride, dtype.itemsize());
202	auto storage_impl = c10::make_intrusive<StorageImpl>(
203	c10::StorageImpl::use_byte_size_t (),
204	size_bytes,
205	allocator,
206	/resizeable=/true);
207
208	auto tensor = detail::make_tensor_base<TensorImpl>(
209	std::move(storage_impl), ks, dtype);
210	tensor.unsafeGetTensorImpl()->set_sizes_and_strides(size, stride);
211	return tensor;
212	}
213
214	TensorBase empty_strided_generic(
215	IntArrayRef size,
216	IntArrayRef stride,
217	c10::Allocator* allocator,
218	c10::DispatchKeySet ks,
219	ScalarType scalar_type) {
220	return _empty_strided_generic<IntArrayRef>(size, stride, allocator, ks, scalar_type);
221	}
222
223	TensorBase empty_strided_symint_generic(
224	SymIntArrayRef size,
225	SymIntArrayRef stride,
226	c10::Allocator* allocator,
227	c10::DispatchKeySet ks,
228	ScalarType scalar_type) {
229	return _empty_strided_generic<SymIntArrayRef>(size, stride, allocator, ks, scalar_type);
230	}
231
232	TensorBase empty_cpu(IntArrayRef size, ScalarType dtype, bool pin_memory,
233	c10::optional<c10::MemoryFormat> memory_format_opt) {
234	auto allocator = GetCPUAllocatorMaybePinned(pin_memory);
235	constexpr c10::DispatchKeySet cpu_ks(c10::DispatchKey::CPU);
236	return empty_generic(size, allocator, cpu_ks, dtype, memory_format_opt);
237	}
238
239	TensorBase empty_cpu(
240	IntArrayRef size,
241	c10::optional<ScalarType> dtype_opt,
242	c10::optional<Layout> layout_opt,
243	c10::optional<Device> device_opt,
244	c10::optional<bool> pin_memory_opt,
245	c10::optional<c10::MemoryFormat> memory_format_opt) {
246	TORCH_INTERNAL_ASSERT_DEBUG_ONLY(device_or_default(device_opt).type() == DeviceType::CPU);
247	TORCH_INTERNAL_ASSERT_DEBUG_ONLY(layout_or_default(layout_opt) == Layout::Strided);
248
249	auto pin_memory = pinned_memory_or_default(pin_memory_opt);
250	auto dtype = dtype_or_default(dtype_opt);
251	return empty_cpu(size, dtype, pin_memory, memory_format_opt);
252	}
253
254	TensorBase empty_cpu(
255	IntArrayRef size, const TensorOptions &options) {
256	return at::detail::empty_cpu(
257	size,
258	optTypeMetaToScalarType(options.dtype_opt()),
259	options.layout_opt(),
260	options.device_opt(),
261	options.pinned_memory_opt(),
262	options.memory_format_opt());
263	}
264
265	TensorBase empty_strided_cpu(IntArrayRef size, IntArrayRef stride,
266	ScalarType dtype, bool pin_memory) {
267	auto allocator = at::detail::GetCPUAllocatorMaybePinned(pin_memory);
268	constexpr c10::DispatchKeySet cpu_ks(c10::DispatchKey::CPU);
269	return at::detail::empty_strided_generic(
270	size, stride, allocator, cpu_ks, dtype);
271	}
272
273	TensorBase empty_strided_cpu(
274	IntArrayRef size,
275	IntArrayRef stride,
276	c10::optional<ScalarType> dtype_opt,
277	c10::optional<Layout> layout_opt,
278	c10::optional<Device> device_opt,
279	c10::optional<bool> pin_memory_opt) {
280	TORCH_INTERNAL_ASSERT_DEBUG_ONLY(device_or_default(device_opt).type() == DeviceType::CPU);
281	TORCH_INTERNAL_ASSERT_DEBUG_ONLY(layout_or_default(layout_opt) == Layout::Strided);
282
283	auto pin_memory = pinned_memory_or_default(pin_memory_opt);
284	auto dtype = dtype_or_default(dtype_opt);
285	return at::detail::empty_strided_cpu(size, stride, dtype, pin_memory);
286	}
287
288	TensorBase empty_strided_cpu(
289	IntArrayRef size,
290	IntArrayRef stride,
291	const TensorOptions &options) {
292	return at::detail::empty_strided_cpu(
293	size,
294	stride,
295	optTypeMetaToScalarType(options.dtype_opt()),
296	options.layout_opt(),
297	options.device_opt(),
298	options.pinned_memory_opt());
299	}
300
301	// The meta allocator ignores whatever allocation is requested and always
302	// gives you nullptr
303	struct MetaAllocator final : public at::Allocator {
304	MetaAllocator() = default;
305	~MetaAllocator() override = default;
306	static void deleter(void* const pointer) {
307	TORCH_INTERNAL_ASSERT(!pointer);
308	}
309	DataPtr allocate(const size_t nbytes) const override {
310	return {nullptr, nullptr, &deleter, at::Device (DeviceType::Meta)};
311	}
312	DeleterFnPtr raw_deleter() const override {
313	return deleter;
314	}
315	};
316
317	static MetaAllocator g_meta_alloc;
318
319	REGISTER_ALLOCATOR(kMeta, &g_meta_alloc);
320
321	TensorBase empty_meta(IntArrayRef size, ScalarType dtype,
322	c10::optional<c10::MemoryFormat> memory_format_opt) {
323	auto *allocator = GetAllocator(kMeta);
324	constexpr c10::DispatchKeySet meta_dks(c10::DispatchKey::Meta);
325	return at::detail::empty_generic(
326	size, allocator, meta_dks, dtype, memory_format_opt);
327	}
328
329	TensorBase empty_meta(
330	IntArrayRef size,
331	c10::optional<ScalarType> dtype_opt,
332	c10::optional<Layout> layout_opt,
333	c10::optional<Device> device_opt,
334	c10::optional<bool> pin_memory_opt,
335	c10::optional<c10::MemoryFormat> memory_format_opt
336	) {
337	TORCH_INTERNAL_ASSERT_DEBUG_ONLY(device_or_default(device_opt).type() == DeviceType::Meta);
338	// NB: because there is no SparseMeta (yet), non-strided layout is
339	// exerciseable
340	TORCH_CHECK_NOT_IMPLEMENTED(
341	layout_or_default(layout_opt) == Layout::Strided,
342	"non-strided meta tensors not supported yet"
343	);
344
345	auto dtype = dtype_or_default(dtype_opt);
346	return empty_meta(size, dtype, memory_format_opt);
347	}
348
349	TensorBase empty_symint_meta(
350	SymIntArrayRef size,
351	c10::optional<ScalarType> dtype_opt,
352	c10::optional<Layout> layout_opt,
353	c10::optional<Device> device_opt,
354	c10::optional<bool> pin_memory_opt,
355	c10::optional<c10::MemoryFormat> memory_format_opt
356	) {
357	auto *allocator = GetAllocator(kMeta);
358	constexpr c10::DispatchKeySet ks(c10::DispatchKey::Meta);
359	auto scalar_type = dtype_or_default(dtype_opt);
360	return _empty_generic(size, allocator, ks, scalar_type, memory_format_opt);
361	}
362
363	TensorBase empty_meta(
364	IntArrayRef size, const TensorOptions &options) {
365	return at::detail::empty_meta(
366	size,
367	optTypeMetaToScalarType(options.dtype_opt()),
368	options.layout_opt(),
369	options.device_opt(),
370	options.pinned_memory_opt(),
371	options.memory_format_opt());
372	}
373
374	TensorBase empty_strided_meta(IntArrayRef size, IntArrayRef stride,
375	ScalarType dtype) {
376	auto *allocator = GetAllocator(kMeta);
377	constexpr c10::DispatchKeySet meta_dks(c10::DispatchKey::Meta);
378	return at::detail::empty_strided_generic(
379	size, stride, allocator, meta_dks, dtype);
380	}
381
382	TensorBase empty_strided_meta(
383	IntArrayRef size,
384	IntArrayRef stride,
385	c10::optional<ScalarType> dtype_opt,
386	c10::optional<Layout> layout_opt,
387	c10::optional<Device> device_opt,
388	c10::optional<bool> pin_memory_opt) {
389	TORCH_INTERNAL_ASSERT_DEBUG_ONLY(device_or_default(device_opt).type() == DeviceType::Meta);
390	TORCH_INTERNAL_ASSERT_DEBUG_ONLY(layout_or_default(layout_opt) == Layout::Strided);
391
392	auto dtype = dtype_or_default(dtype_opt);
393	return at::detail::empty_strided_meta(size, stride, dtype);
394	}
395
396	TensorBase empty_strided_meta(
397	IntArrayRef size,
398	IntArrayRef stride,
399	const TensorOptions &options) {
400	return at::detail::empty_strided_meta(
401	size,
402	stride,
403	optTypeMetaToScalarType(options.dtype_opt()),
404	options.layout_opt(),
405	options.device_opt(),
406	options.pinned_memory_opt());
407	}
408
409	TensorBase empty_strided_symint_meta(SymIntArrayRef size, SymIntArrayRef stride,
410	ScalarType dtype) {
411	auto *allocator = GetAllocator(kMeta);
412	constexpr c10::DispatchKeySet meta_dks(c10::DispatchKey::Meta);
413	return at::detail::empty_strided_symint_generic(
414	size, stride, allocator, meta_dks, dtype);
415	}
416
417	TensorBase empty_strided_symint_meta(
418	SymIntArrayRef size,
419	SymIntArrayRef stride,
420	c10::optional<ScalarType> dtype_opt,
421	c10::optional<Layout> layout_opt,
422	c10::optional<Device> device_opt,
423	c10::optional<bool> pin_memory_opt) {
424	TORCH_INTERNAL_ASSERT_DEBUG_ONLY(device_or_default(device_opt).type() == DeviceType::Meta);
425	TORCH_INTERNAL_ASSERT_DEBUG_ONLY(layout_or_default(layout_opt) == Layout::Strided);
426
427	auto dtype = dtype_or_default(dtype_opt);
428	return at::detail::empty_strided_symint_meta(size, stride, dtype);
429	}
430
431	TensorBase empty_strided_symint_meta(
432	SymIntArrayRef size,
433	SymIntArrayRef stride,
434	const TensorOptions &options) {
435	return at::detail::empty_strided_symint_meta(
436	size,
437	stride,
438	optTypeMetaToScalarType(options.dtype_opt()),
439	options.layout_opt(),
440	options.device_opt(),
441	options.pinned_memory_opt());
442	}
443
444	}} // namespace at::detail
445

Browse the source code of pytorch/aten/src/ATen/EmptyTensor.cpp