TensorImpl.h source code [pytorch/c10/core/TensorImpl.h]

1	#pragma once
2
3	#include <c10/core/Backend.h>
4	#include <c10/core/CopyBytes.h>
5	#include <c10/core/DispatchKeySet.h>
6	#include <c10/core/InferenceMode.h>
7	#include <c10/core/MemoryFormat.h>
8	#include <c10/core/Storage.h>
9	#include <c10/core/SymBool.h>
10	#include <c10/core/SymIntArrayRef.h>
11	#include <c10/core/TensorOptions.h>
12	#include <c10/core/WrapDimMinimal.h>
13	#include <c10/core/impl/LocalDispatchKeySet.h>
14	#include <c10/core/impl/PyObjectSlot.h>
15	#include <c10/core/impl/SizesAndStrides.h>
16	#include <c10/util/DimVector.h>
17	#include <c10/util/Exception.h>
18	#include <c10/util/Flags.h>
19	#include <c10/util/Logging.h>
20	#include <c10/util/Optional.h>
21	#include <c10/util/accumulate.h>
22	#include <c10/util/irange.h>
23	#include <c10/util/python_stub.h>
24	#include <c10/util/safe_numerics.h>
25
26	#include <algorithm>
27	#include <atomic>
28	#include <limits>
29	#include <memory>
30	#include <numeric>
31	#include <utility>
32
33	// A global boolean variable to control whether we free memory when a Tensor
34	// is shrunk to a smaller size. As a result, a Tensor is always going to
35	// keep the memory allocated for its maximum capacity reshaped to so far.
36	//
37	// This parameter is respected "upper-case" methods which call Resize()
38	// (e.g., CopyFrom, ResizeLike); it is NOT respected by Tensor::resize_
39	// or ShrinkTo, both of which guarantee to never to free memory.
40	C10_DECLARE_bool(caffe2_keep_on_shrink);
41
42	// Since we can have high variance in blob memory allocated across different
43	// inputs in the same run, we will shrink the blob only if the memory gain
44	// is larger than this flag in bytes. This only applies to functions which
45	// respect caffe2_keep_on_shrink.
46	C10_DECLARE_int64(caffe2_max_keep_on_shrink_memory);
47
48	C10_CLANG_DIAGNOSTIC_PUSH()
49	#if C10_CLANG_HAS_WARNING("-Wimplicit-int-float-conversion")
50	C10_CLANG_DIAGNOSTIC_IGNORE("-Wimplicit-int-float-conversion")
51	#endif
52
53	namespace at {
54	class Tensor;
55	class TensorBase;
56	} // namespace at
57
58	namespace c10 {
59	class Scalar;
60	struct Storage;
61	} // namespace c10
62
63	namespace c10 {
64
65	/**
66	* A utility function to convert vector<int> to vector<int64_t>.
67	*/
68	inline std::vector<int64_t> ToVectorint64_t(const ArrayRef<int>& src) {
69	return std::vector<int64_t>(src.begin(), src.end());
70	}
71
72	/**
73	* Return product of all dimensions starting from k
74	*/
75	inline int64_t size_from_dim_(int k, IntArrayRef dims) {
76	int64_t r = `1`;
77	for (const auto i : c10::irange(k, dims.size())) {
78	r *= dims [i];
79	}
80	return r;
81	}
82
83	// Product of all dims up to k (not including dims[k])
84	inline int64_t size_to_dim_(int k, IntArrayRef dims) {
85	TORCH_CHECK((unsigned)k <= dims.size());
86	int64_t r = `1`;
87	for (const auto i : c10::irange(k)) {
88	r *= dims [i];
89	}
90	return r;
91	}
92
93	// Product of all dims between k and l (not including dims[k] and dims[l])
94	inline int64_t size_between_dim_(int k, int l, IntArrayRef dims) {
95	TORCH_CHECK((unsigned)l < dims.size() && (unsigned)k < dims.size());
96	int64_t r = `1`;
97	if (k < l) {
98	for (int i = k + `1`; i < l; ++i) {
99	r *= dims [i];
100	}
101	} else {
102	for (int i = l + `1`; i < k; ++i) {
103	r *= dims [i];
104	}
105	}
106	return r;
107	}
108
109	// Wrap around axis_index if it is negative, s.t., -1 is the last dim
110	inline int canonical_axis_index_(int axis_index, int ndims) {
111	TORCH_CHECK(axis_index >= -ndims);
112	TORCH_CHECK(axis_index < ndims);
113	if (axis_index < `0`) {
114	return axis_index + ndims;
115	}
116	return axis_index;
117	}
118
119	using PlacementDtor = void ()(void**, size_t);
120
121	/*
122	* A Context that will call extra placement deleter during
123	* deconstruction.
124	*
125	* Accept a already constructed DataPtr and store it as member
126	* during destruction, we'll call extra deleter on the underlying
127	* data pointer before the DataPtr is destructed.
128	* `data_ptr_` owns the memory.
129	*/
130	struct C10_API PlacementDeleteContext {
131	DataPtr data_ptr_;
132	PlacementDtor placement_dtor_;
133	size_t size_;
134	PlacementDeleteContext(
135	DataPtr&& data_ptr,
136	PlacementDtor placement_dtor,
137	size_t size)
138	: data_ptr_(std::move(data_ptr)),
139	placement_dtor_(placement_dtor),
140	size_(size) {}
141	static DataPtr makeDataPtr(
142	DataPtr&& data_ptr,
143	PlacementDtor placement_dtor,
144	size_t size,
145	Device device);
146	~PlacementDeleteContext() {
147	placement_dtor_(data_ptr_.get(), size_);
148	// original memory will be freed when data_ptr_ is destructed
149	}
150	};
151
152	struct TensorImpl;
153
154	struct C10_API AutogradMetaInterface {
155	virtual void set_requires_grad(
156	bool requires_grad,
157	at::TensorImpl* self_impl) = `0`;
158	virtual bool requires_grad() const = `0`;
159	virtual at::Tensor& mutable_grad() = `0`;
160	virtual const at::Tensor& grad() const = `0`;
161	virtual const at::Tensor& fw_grad(uint64_t level, const at::TensorBase& self)
162	const = `0`;
163	virtual void set_fw_grad(
164	const at::TensorBase& new_grad,
165	const at::TensorBase& self,
166	uint64_t level,
167	bool is_inplace_op) = `0`;
168	virtual ~AutogradMetaInterface();
169	};
170
171	namespace impl {
172
173	// Unfortunately, the definition of AutogradMeta lives in a separate
174	// compilation unit than TensorImpl (libtorch.so versus libc10.so)
175	// which means that we cannot construct an AutogradMeta from TensorImpl,
176	// not even from the cpp file. So we have to indirect it through a factory
177	// function which will be initialized when we load libtorch.so.
178
179	struct C10_API AutogradMetaFactory {
180	virtual ~AutogradMetaFactory() = default;
181	virtual std::unique_ptr<AutogradMetaInterface> make() const = `0`;
182	// This method is the dumbest method. But I don't have access
183	// to Tensor (not TensorImpl) which is undefined in this header.
184	virtual const at::Tensor& undefined_tensor() const = `0`;
185	};
186
187	C10_API void SetAutogradMetaFactory(AutogradMetaFactory* factory);
188	C10_API AutogradMetaFactory* GetAutogradMetaFactory();
189
190	struct C10_API AutogradMetaFactoryRegisterer {
191	explicit AutogradMetaFactoryRegisterer(AutogradMetaFactory* factory) {
192	SetAutogradMetaFactory(factory);
193	}
194	};
195
196	} // namespace impl
197
198	struct C10_API NamedTensorMetaInterface {
199	virtual ~NamedTensorMetaInterface() = default;
200	virtual std::unique_ptr<NamedTensorMetaInterface> clone() const {
201	TORCH_INTERNAL_ASSERT(
202	false, "Not implemented: NamedTensorMetaInterface::clone");
203	};
204	virtual int64_t slow_dim() const {
205	TORCH_INTERNAL_ASSERT(
206	false, "Not implemented: NamedTensorMetaInterface::slow_dim");
207	};
208	};
209
210	// For ease of copy pasting
211	#if 0
212	is_contiguous
213	is_channels_last_contiguous
214	is_channels_last_3d_contiguous
215	is_channels_last
216	is_channels_last_3d
217	is_non_overlapping_and_dense
218	#endif
219
220	struct C10_API ExtraMeta {
221	SymDimVector sizes_ = {`0`};
222	SymDimVector strides_ = {`1`};
223	SymInt numel_ = `1`;
224	SymInt storage_offset_ = `0`;
225	SymBool is_contiguous_{true};
226	SymBool is_channels_last_contiguous_{false};
227	SymBool is_channels_last_3d_contiguous_{false};
228	SymBool is_channels_last_{false};
229	SymBool is_channels_last_3d_{false};
230	SymBool is_non_overlapping_and_dense_{true};
231	std::unique_ptr<c10::NamedTensorMetaInterface> named_tensor_meta_ = nullptr;
232
233	ExtraMeta() = default;
234
235	ExtraMeta(
236	SymDimVector sizes,
237	SymDimVector strides,
238	SymInt numel,
239	SymInt storage_offset,
240	SymBool is_contiguous,
241	SymBool is_channels_last_contiguous,
242	SymBool is_channels_last_3d_contiguous,
243	SymBool is_channels_last,
244	SymBool is_channels_last_3d,
245	SymBool is_non_overlapping_and_dense,
246	std::unique_ptr<c10::NamedTensorMetaInterface> named_tensor_meta)
247	: sizes_(std::move(sizes)),
248	strides_(std::move(strides)),
249	numel_(std::move(numel)),
250	storage_offset_(std::move(storage_offset)),
251	is_contiguous_(std::move(is_contiguous)),
252	is_channels_last_contiguous_(std::move(is_channels_last_contiguous)),
253	is_channels_last_3d_contiguous_(
254	std::move(is_channels_last_3d_contiguous)),
255	is_channels_last_(std::move(is_channels_last)),
256	is_channels_last_3d_(std::move(is_channels_last_3d)),
257	is_non_overlapping_and_dense_(std::move(is_non_overlapping_and_dense)),
258	named_tensor_meta_(std::move(named_tensor_meta)) {}
259
260	std::unique_ptr<ExtraMeta> clone() const {
261	return std::make_unique<ExtraMeta>(
262	sizes_,
263	strides_,
264	numel_,
265	storage_offset_,
266	is_contiguous_,
267	is_channels_last_contiguous_,
268	is_channels_last_3d_contiguous_,
269	is_channels_last_,
270	is_channels_last_3d_,
271	is_non_overlapping_and_dense_,
272	named_tensor_meta_ ? named_tensor_meta_->clone() : nullptr);
273	}
274	};
275
276	// NOTE [ Version Counter Sharing ]
277	//
278	// Every Tensor has a version counter. Version counters are incremented whenever
279	// the data or size of a tensor changes through in-place Variable operations.
280	// Version counters are used to detect modifications to saved variables which
281	// would result in incorrect gradient calculations. Version counters may be
282	// shared between Variables:
283	//
284	// 1. A view shares the version counter of the base Variable,
285	// 2. `x.detach()` shares the version counter of `x`,
286	// 3. Unpacked saved variables share the version counter of the source.
287	//
288	// Version counters are not shared in these scenarios:
289	//
290	// 1. When we replace a `Variable`'s underlying `Tensor` by calling
291	// `set_data(...)`,
292	// 2. `x.data` does not share the version counter of `x`. (See discussion at
293	// https://github.com/pytorch/pytorch/issues/5396)
294	//
295	// Question: Why do we put the version counter in TensorImpl instead of
296	// AutogradMeta?
297	//
298	// Answer: After the Variable/Tensor merge, a tensor will not have AutogradMeta
299	// when its `requires_grad_` is false, but when we use this tensor in the
300	// forward pass of a function that requires saving this tensor for backward, we
301	// need to keep track of this tensor's version to make sure it's always valid in
302	// the autograd graph.
303	//
304	// To achieve this goal, we put the version counter in TensorImpl instead of
305	// AutogradMeta, and have it always be available. This allows us to have the
306	// optimization of not carrying AutogradMeta when a tensor doesn't require
307	// gradient.
308	//
309	// A hypothetical alternative way to achieve this goal is to initialize
310	// AutogradMeta and create the version counter for the non-requires-grad tensor
311	// only when it's saved for backward. However, since saving a tensor for
312	// backward happens in the forward pass, and our invariant is that forward pass
313	// needs to be thread-safe, lazy-initializing AutogradMeta when saving a tensor
314	// can introduce race conditions when we are running the forward pass in
315	// multi-thread scenarios, thus making the forward pass not thread-safe anymore,
316	// which breaks the invariant.
317	struct C10_API VariableVersion {
318	private:
319	struct VersionCounter : intrusive_ptr_target {
320	VersionCounter(uint32_t version) : version_(version) {}
321	std::atomic<uint32_t> version_;
322	};
323	c10::intrusive_ptr<VersionCounter> version_counter_;
324
325	public:
326	// Note [Disabled VariableVersion]
327	// VariableVersion struct has an intrusive_ptr pointing VersionCounter struct
328	// with an atomic variable. Thus `VariableVersion(/version=/0)` is not as
329	// cheap as we expected. In some cases constructing a VariableVersion with
330	// version 0 is not necessary so we add a cheap constructor which
331	// doesn't allocate the intrusive_ptr.
332	// Example use cases are:
333	// - Inference tensors don't track version counter, so they'll just always
334	// have disbaled VariableVersion.
335	// - In SavedVariable class we override version_counter_ inside its
336	// construtor
337	// so that we can use the cheap constructor there.
338	enum Disabled { DISABLED };
339	// It's okay to return true even for inference tensor which
340	// doesn't have version counter enabled.
341	// We want to be permissive here since in many cases (e.g. make_variable)
342	// we can std::move a TensorImpl if there's no other uses which saves us
343	// an additional TensorImpl allocation.
344	bool unique() const {
345	return version_counter_ ? `1` == version_counter_.use_count() : true;
346	}
347	// NOTE: As of C++11 and 14, default-constructing a std::atomic variable
348	// leaves it in a persistently undefined state. See
349	// https://cplusplus.github.io/LWG/issue2334.
350	VariableVersion(uint32_t version)
351	: version_counter_(c10::make_intrusive<VersionCounter>(version)) {}
352	VariableVersion(Disabled = DISABLED) {}
353
354	bool enabled() const {
355	return version_counter_;
356	}
357
358	// Note [Inplace update inference tensor]
359	// 1. Inplace update to inference tensor is forbidden in normal mode.
360	// For example:
361	// inference_tensor.copy_(normal_tensor_requires_grad)
362	// This inplace makes inference_tensor have requires_grad=True and
363	// have a grad_fn. This is bad because views of `inference_tensor`
364	// created in InferenceMode won't be able to know the grad_fn since
365	// their ViewMeta were not recorded. To match NoGradMode behavior
366	// that "inplace update to a view created in NoGradMode raise an error",
367	// we just ban inplace update to inference tensor since we can't tell
368	// if an inference tensor is a view created in InferenceMode.
369	//
370	// Note that views of normal tensor created in InferenceMode has proper
371	// ViewMeta so that they're aware of the grad_fn correctly.
372	//
373	// 2. Inplace update to inference tensor in inference tensor doesn't bump
374	// version counter.
375	// It either doesn't call bump() by skipping ADInplaceOrView kernel,*
376	// - e.g. inference_tensor.add_(1)
377	// or bump() is a no-op for inference tensor.*
378	// - e.g. inference_tensor.add_(normal_tensor)
379	void bump() {
380	// TODO: Replace the link to the documentation once it's available.
381	TORCH_CHECK(
382	version_counter_ \|\| InferenceMode::is_enabled(),
383	"Inplace update to inference tensor outside InferenceMode is not allowed."
384	"You can make a clone to get a normal tensor before doing inplace update."
385	"See https://github.com/pytorch/rfcs/pull/17 for more details.");
386	if (version_counter_) {
387	++version_counter_->version_;
388	}
389	}
390
391	void set_version(int64_t i) {
392	TORCH_CHECK(
393	version_counter_,
394	"Tried to call torch.autograd._unsafe_set_version() on a tensor "
395	"that does not have a version counter. Was it created in inference mode?");
396	TORCH_CHECK(i >= `0`, "Cannot set a version_counter to a value below 0: ", i);
397	version_counter_->version_ = i;
398	}
399
400	// Inference tensor doesn't have version counter so it shouldn't be
401	// accessed.
402	uint32_t current_version() const {
403	TORCH_CHECK(
404	version_counter_, "Inference tensors do not track version counter.");
405	return version_counter_->version_;
406	}
407	};
408
409	// Forward declaration of TensorImpl needed for forward declaration of
410	// C10_TensorImpl_Size_Check_Dummy_Class
411	struct C10_API TensorImpl;
412
413	// Forward declaration needed because TensorImpl needs to be friends with
414	// C10_TensorImpl_Size_Check_Dummy_Class in order to check the size
415	// of its private fields.
416	template <
417	size_t cplusplus,
418	size_t clang_ver_major,
419	size_t gcc_ver,
420	size_t gcc_ver_minor,
421	size_t nvcc,
422	size_t cuda_version,
423	size_t cuda_version_major,
424	size_t ptr_size>
425	class C10_TensorImpl_Size_Check_Dummy_Class;
426
427	/**
428	* NOTE: Some TensorImpl methods are small and not overridden in the
429	* PyTorch codebase itself, but may theoretically need to be
430	* overridden by third-party TensorImpl subclasses. This macro allows
431	* users that need maximum performance and don't need these extension
432	* points to disable them with a build-time flag. (In particular,
433	* XLA's XLATensorImpl currently overrides these methods, so we can't
434	* enable this flag by default.)
435	*/
436	#ifdef C10_DISABLE_TENSORIMPL_EXTENSIBILITY
437	#define TENSORIMPL_MAYBE_VIRTUAL
438	#else
439	#define TENSORIMPL_MAYBE_VIRTUAL virtual
440	#endif
441
442	/**
443	* The low-level representation of a tensor, which contains a pointer
444	* to a storage (which contains the actual data) and metadata (e.g., sizes and
445	* strides) describing this particular view of the data as a tensor.
446	*
447	* Some basic characteristics about our in-memory representation of
448	* tensors:
449	*
450	* - It contains a pointer to a storage struct (Storage/StorageImpl)
451	* which contains the pointer to the actual data and records the
452	* data type and device of the view. This allows multiple tensors
453	* to alias the same underlying data, which allows to efficiently
454	* implement differing views on a tensor.
455	*
456	* - The tensor struct itself records view-specific metadata about
457	* the tensor, e.g., sizes, strides and offset into storage.
458	* Each view of a storage can have a different size or offset.
459	*
460	* - This class is intrusively refcounted. It is refcounted so that
461	* we can support prompt deallocation of large tensors; it is
462	* intrusively refcounted so that we can still perform reference
463	* counted operations on raw pointers, which is often more convenient
464	* when passing tensors across language boundaries.
465	*
466	* - For backwards-compatibility reasons, a tensor may be in an
467	* uninitialized state. A tensor may be uninitialized in the following
468	* two ways:
469	*
470	* - A tensor may be DTYPE UNINITIALIZED. A tensor of this
471	* form has an uninitialized dtype. This situation most
472	* frequently arises when a user writes Tensor x(CPU). The dtype
473	* is subsequently initialized when mutable_data<T>() is
474	* invoked for the first time.
475	*
476	* - A tensor may be STORAGE UNINITIALIZED. A tensor of this form
477	* has non-zero size, but has a storage with a null data pointer.
478	* This situation most frequently arises when a user calls
479	* Resize() or FreeMemory(). This is because Caffe2 historically
480	* does lazy allocation: allocation of data doesn't occur until
481	* mutable_data<T>() is invoked. A tensor with zero size is
482	* always storage initialized, because no allocation is necessary
483	* in this case.
484	*
485	* All combinations of these two uninitialized states are possible.
486	* Consider the following transcript in idiomatic Caffe2 API:
487	*
488	* Tensor x(CPU); // x is storage-initialized, dtype-UNINITIALIZED
489	* x.Resize(4); // x is storage-UNINITIALIZED, dtype-UNINITIALIZED
490	* x.mutable_data<float>(); // x is storage-initialized, dtype-initialized
491	* x.FreeMemory(); // x is storage-UNINITIALIZED, dtype-initialized.
492	*
493	* All other fields on tensor are always initialized. In particular,
494	* size is always valid. (Historically, a tensor declared as Tensor x(CPU)
495	* also had uninitialized size, encoded as numel == -1, but we have now
496	* decided to default to zero size, resulting in numel == 0).
497	*
498	* Uninitialized storages MUST be uniquely owned, to keep our model
499	* simple. Thus, we will reject operations which could cause an
500	* uninitialized storage to become shared (or a shared storage to
501	* become uninitialized, e.g., from FreeMemory).
502	*
503	* In practice, tensors which are storage-UNINITIALIZED and
504	* dtype-UNINITIALIZED are extremely ephemeral: essentially,
505	* after you do a Resize(), you basically always call mutable_data()
506	* immediately afterwards. Most functions are not designed to
507	* work if given a storage-UNINITIALIZED, dtype-UNINITIALIZED tensor.
508	*
509	* We intend to eliminate all uninitialized states, so that every
510	* tensor is fully initialized in all fields. Please do not write new code
511	* that depends on these uninitialized states.
512	*/
513	struct C10_API TensorImpl : public c10::intrusive_ptr_target {
514	TensorImpl() = delete;
515	~TensorImpl() override;
516	// Note [Enum ImplType]
517	// This enum is temporary. In the followup refactor we should
518	// think about how to specialize TensorImpl creation for view
519	// tensors. Currently we only special case its key_set_ but
520	// there's also potential to share version_counter_ directly
521	// without creating first and then override in as_view.
522	enum ImplType { VIEW };
523
524	/**
525	* Construct a 1-dim 0-size tensor backed by the given storage.
526	*/
527	TensorImpl(
528	Storage&& storage,
529	DispatchKeySet,
530	const caffe2::TypeMeta data_type);
531
532	// See Note [Enum ImplType]
533	TensorImpl(
534	ImplType,
535	Storage&& storage,
536	DispatchKeySet,
537	const caffe2::TypeMeta data_type);
538
539	/**
540	* Construct a 1-dim 0 size tensor that doesn't have a storage.
541	*/
542	TensorImpl(
543	DispatchKeySet,
544	const caffe2::TypeMeta data_type,
545	c10::optional<c10::Device> device_opt);
546
547	// Legacy constructors so I don't have to go update call sites.
548	// TODO: When Variable is added, delete these constructors
549	TensorImpl(
550	Storage&& storage,
551	DispatchKey dispatch_key,
552	const caffe2::TypeMeta data_type)
553	: TensorImpl (
554	std::move(storage),
555	DispatchKeySet (dispatch_key),
556	data_type) {}
557	TensorImpl(
558	DispatchKey dispatch_key,
559	const caffe2::TypeMeta data_type,
560	c10::optional<c10::Device> device_opt)
561	: TensorImpl (DispatchKeySet (dispatch_key), data_type, device_opt) {}
562
563	private:
564	// This constructor is private, because the data_type is redundant with
565	// storage. Still, we pass it in separately because it's easier to write
566	// the initializer list if we're not worried about storage being moved out
567	// from under us.
568	TensorImpl(
569	Storage&& storage,
570	DispatchKeySet,
571	const caffe2::TypeMeta data_type,
572	c10::optional<c10::Device>);
573
574	public:
575	TensorImpl(const TensorImpl&) = delete;
576	TensorImpl& operator=(const TensorImpl&) = delete;
577	TensorImpl(TensorImpl&&) = delete;
578	TensorImpl& operator=(TensorImpl&&) = delete;
579
580	/**
581	* Release (decref) storage, and any other external allocations. This
582	* override is for `intrusive_ptr_target` and is used to implement weak
583	* tensors.
584	*/
585	void release_resources() override;
586
587	public:
588	/**
589	* Return the DispatchKeySet corresponding to this Tensor, specifying
590	* all of the DispatchKeys that this Tensor identifies as. This is the
591	* information used to dispatch operations on this tensor.
592	*/
593	DispatchKeySet key_set() const {
594	return key_set_;
595	}
596
597	// NOTE: The general recipe for customizable methods is that the fastpath
598	// function (e.g., sizes()) does an unlikely policy test, and if doesn't
599	// trigger, it does the fast path implementation with no checks and going
600	// directly to on-TensorImpl fields. In particular, you never need to
601	// check ExtraMeta if the policy doesn't trigger, as non-trivial ExtraMeta
602	// implies the policy will always match.
603	//
604	// The default implementations of methods are "safe": they do extra tests
605	// to make sure the internal state is consistent no matter if you are
606	// doing symbolic shapes or not. If you don't want the tests, directly
607	// override the custom method (e.g., custom_sizes()) to do your preferred
608	// behavior.
609
610	public:
611	/**
612	* Return a reference to the sizes of this tensor. This reference remains
613	* valid as long as the tensor is live and not resized.
614	*/
615	IntArrayRef sizes() const {
616	if (C10_UNLIKELY(matches_policy(SizesStridesPolicy::CustomSizes))) {
617	return sizes_custom();
618	}
619	return sizes_and_strides_.sizes_arrayref();
620	}
621
622	SymIntArrayRef sym_sizes() const {
623	if (C10_UNLIKELY(matches_policy(SizesStridesPolicy::CustomSizes))) {
624	return sym_sizes_custom();
625	}
626	// Sizes guaranteed to be non-negative, so unchecked cast is OK
627	return c10::fromIntArrayRefKnownNonNegative(
628	sizes_and_strides_.sizes_arrayref());
629	}
630
631	IntArrayRef sizes_default() const {
632	// TODO: force backtrace to be printed on this error
633	TORCH_CHECK(
634	!has_symbolic_sizes_strides_,
635	"Cannot call sizes() on tensor with symbolic sizes/strides");
636	return sizes_and_strides_.sizes_arrayref();
637	}
638
639	SymIntArrayRef sym_sizes_default() const {
640	if (has_symbolic_sizes_strides_) {
641	return extra_meta_->sizes_;
642	} else {
643	// Sizes guaranteed to be non-negative, so unchecked cast is OK
644	return c10::fromIntArrayRefKnownNonNegative(sizes_default());
645	}
646	}
647
648	// From https://stackoverflow.com/a/3057522/23845
649	// TODO: does C++14 have a stdlib template for this?
650	template <typename T>
651	struct identity {
652	typedef T type;
653	};
654
655	template <typename T>
656	ArrayRef<T> generic_sizes() {
657	return _generic_sizes(identity<T>());
658	}
659
660	ArrayRef<int64_t> _generic_sizes(identity<int64_t>) {
661	return sizes();
662	}
663	ArrayRef<c10::SymInt> _generic_sizes(identity<c10::SymInt>) {
664	return sym_sizes();
665	}
666
667	template <typename T>
668	ArrayRef<T> generic_strides() {
669	return _generic_strides(identity<T>());
670	}
671
672	ArrayRef<int64_t> _generic_strides(identity<int64_t>) {
673	return strides();
674	}
675	ArrayRef<c10::SymInt> _generic_strides(identity<c10::SymInt>) {
676	return sym_strides();
677	}
678
679	template <typename T>
680	T generic_storage_offset() {
681	return _generic_storage_offset(identity<T>());
682	}
683
684	int64_t _generic_storage_offset(identity<int64_t>) {
685	return storage_offset();
686	}
687	c10::SymInt _generic_storage_offset(identity<c10::SymInt>) {
688	return sym_storage_offset();
689	}
690
691	/**
692	* The number of elements in a tensor.
693	*
694	* WARNING: Previously, if you were using the Caffe2 API, you could
695	* test numel() == -1 to see if a tensor was uninitialized. This
696	* is no longer true; numel always accurately reports the product
697	* of sizes of a tensor.
698	*/
699	int64_t numel() const {
700	if (C10_UNLIKELY(matches_policy(SizesStridesPolicy::CustomSizes))) {
701	return numel_custom();
702	}
703	return numel_;
704	}
705
706	c10::SymInt sym_numel() const {
707	if (C10_UNLIKELY(matches_policy(SizesStridesPolicy::CustomSizes))) {
708	return sym_numel_custom();
709	}
710	return c10::SymInt (SymInt::UNCHECKED, numel_);
711	}
712
713	int64_t numel_default() const {
714	TORCH_CHECK(
715	!has_symbolic_sizes_strides_,
716	"Cannot call numel() on tensor with symbolic sizes/strides");
717	return numel_;
718	}
719
720	c10::SymInt sym_numel_default() const {
721	if (has_symbolic_sizes_strides_) {
722	return extra_meta_->numel_;
723	} else {
724	return c10::SymInt (SymInt::UNCHECKED, numel_);
725	}
726	}
727
728	/**
729	* Return the number of dimensions of this tensor. Note that 0-dimension
730	* represents a Tensor that is a Scalar, e.g., one that has a single element.
731	*/
732	int64_t dim() const {
733	if (C10_UNLIKELY(matches_policy(SizesStridesPolicy::CustomSizes))) {
734	return dim_custom();
735	}
736	return sizes_and_strides_.size();
737	}
738
739	int64_t dim_default() const {
740	if (has_symbolic_sizes_strides_) {
741	return extra_meta_->sizes_.size();
742	} else {
743	return sizes_and_strides_.size();
744	}
745	}
746
747	/**
748	* Return the offset in number of elements into the storage that this
749	* tensor points to. Most tensors have storage_offset() == 0, but,
750	* for example, an index into a tensor will have a non-zero storage_offset().
751	*
752	* WARNING: This is NOT computed in bytes.
753	*/
754	int64_t storage_offset() const {
755	// TODO: maybe this should be toggled by strides
756	if (C10_UNLIKELY(matches_policy(SizesStridesPolicy::CustomSizes))) {
757	return storage_offset_custom();
758	}
759	return storage_offset_;
760	}
761
762	c10::SymInt sym_storage_offset() const {
763	if (C10_UNLIKELY(matches_policy(SizesStridesPolicy::CustomSizes))) {
764	return sym_storage_offset_custom();
765	}
766	return c10::SymInt (SymInt::UNCHECKED, storage_offset_);
767	}
768
769	int64_t storage_offset_default() const {
770	TORCH_CHECK(
771	!has_symbolic_sizes_strides_,
772	"Cannot call storage_offset() on tensor with symbolic sizes/strides");
773	return storage_offset_;
774	}
775
776	c10::SymInt sym_storage_offset_default() const {
777	if (has_symbolic_sizes_strides_) {
778	return extra_meta_->storage_offset_;
779	} else {
780	return c10::SymInt (SymInt::UNCHECKED, storage_offset_);
781	}
782	}
783
784	/**
785	* Return a reference to the strides of this tensor. This reference remains
786	* valid as long as the tensor is live and not restrided.
787	*/
788	IntArrayRef strides() const {
789	if (C10_UNLIKELY(matches_policy(SizesStridesPolicy::CustomStrides))) {
790	return strides_custom();
791	}
792	return sizes_and_strides_.strides_arrayref();
793	}
794
795	c10::SymIntArrayRef sym_strides() const {
796	if (C10_UNLIKELY(matches_policy(SizesStridesPolicy::CustomStrides))) {
797	return sym_strides_custom();
798	}
799	return c10::fromIntArrayRefKnownNonNegative(strides_default());
800	}
801
802	IntArrayRef strides_default() const {
803	TORCH_CHECK(
804	!has_symbolic_sizes_strides_,
805	"Cannot call strides() on tensor with symbolic sizes/strides");
806	return sizes_and_strides_.strides_arrayref();
807	}
808
809	c10::SymIntArrayRef sym_strides_default() const {
810	if (has_symbolic_sizes_strides_) {
811	return extra_meta_->strides_;
812	} else {
813	return c10::fromIntArrayRefKnownNonNegative(strides_default());
814	}
815	}
816
817	/**
818	* Whether or not a tensor is laid out in contiguous memory.
819	*
820	* Tensors with non-trivial strides are not contiguous. See
821	* compute_contiguous() for the exact definition of whether or not
822	* a tensor is contiguous or not.
823	*/
824	bool is_contiguous(
825	at::MemoryFormat memory_format = at::MemoryFormat::Contiguous) const {
826	if (C10_UNLIKELY(matches_policy(SizesStridesPolicy::CustomStrides))) {
827	return is_contiguous_custom(memory_format);
828	}
829	return is_contiguous_default(memory_format);
830	}
831
832	// These are factored into separate functions in case subclasses
833	// want to use them
834	bool is_contiguous_default(at::MemoryFormat memory_format) const {
835	if (has_symbolic_sizes_strides_) {
836	if (memory_format == at::MemoryFormat::ChannelsLast) {
837	return extra_meta_->is_channels_last_contiguous_.guard_bool(
838	__FILE__, __LINE__);
839	} else if (memory_format == at::MemoryFormat::ChannelsLast3d) {
840	return extra_meta_->is_channels_last_3d_contiguous_.guard_bool(
841	__FILE__, __LINE__);
842	}
843	return extra_meta_->is_contiguous_.guard_bool(__FILE__, __LINE__);
844	}
845
846	if (memory_format == at::MemoryFormat::ChannelsLast) {
847	return is_channels_last_contiguous_;
848	} else if (memory_format == at::MemoryFormat::ChannelsLast3d) {
849	return is_channels_last_3d_contiguous_;
850	}
851	return is_contiguous_;
852	}
853
854	bool is_strides_like_default(at::MemoryFormat memory_format) const {
855	if (has_symbolic_sizes_strides_) {
856	if (memory_format == at::MemoryFormat::ChannelsLast) {
857	return extra_meta_->is_channels_last_.guard_bool(__FILE__, __LINE__);
858	} else if (memory_format == at::MemoryFormat::ChannelsLast3d) {
859	return extra_meta_->is_channels_last_3d_.guard_bool(__FILE__, __LINE__);
860	} else {
861	return false;
862	}
863	}
864
865	if (memory_format == at::MemoryFormat::ChannelsLast) {
866	return is_channels_last_;
867	} else if (memory_format == at::MemoryFormat::ChannelsLast3d) {
868	return is_channels_last_3d_;
869	} else {
870	return false;
871	}
872	}
873
874	bool is_non_overlapping_and_dense_default() const {
875	if (has_symbolic_sizes_strides_) {
876	return extra_meta_->is_non_overlapping_and_dense_.guard_bool(
877	__FILE__, __LINE__);
878	} else {
879	return is_non_overlapping_and_dense_;
880	}
881	}
882
883	// NB: these dim accessor functions don't have _default(), as you can use
884	// sizes_default/strides_default
885	/**
886	* Return the size of a tensor at some dimension, wrapping the dimension if
887	* necessary.
888	*
889	* NOTE: if you know wrapping is unnecessary, do sizes()[d] instead; it will
890	* be faster
891	*/
892	int64_t size(int64_t d) const {
893	if (C10_UNLIKELY(matches_policy(SizesStridesPolicy::CustomSizes))) {
894	return size_custom(d);
895	}
896	d = maybe_wrap_dim(d, dim(), /wrap_scalar=/false);
897	return sizes_and_strides_.size_at_unchecked(d);
898	}
899
900	c10::SymInt sym_size(int64_t d) const {
901	if (C10_UNLIKELY(matches_policy(SizesStridesPolicy::CustomSizes))) {
902	return sym_size_custom(d);
903	}
904	d = maybe_wrap_dim(d, dim(), /wrap_scalar=/false);
905	const auto sizes = this->sym_sizes();
906	return sizes [d];
907	}
908
909	/**
910	* Return the stride of a tensor at some dimension, wrapping the dimension
911	* if necessary.
912	*
913	* NOTE: if you know wrapping is unnecessary, do sizes()[d] instead; it will
914	* be faster
915	*/
916	int64_t stride(int64_t d) const {
917	d = maybe_wrap_dim(d, dim(), false);
918	if (C10_UNLIKELY(matches_policy(SizesStridesPolicy::CustomStrides))) {
919	// TODO: provide stride_custom, symmetrically with size_custom.
920	// There is presently no user for it; only NestedTensor is using
921	// size_custom overrideability
922	return strides_custom()[d]; // unchecked (maybe_wrap_dim enforces bounds)
923	}
924	// Intentionally don't call default, which also handles symbolic
925	return sizes_and_strides_.stride_at_unchecked(d);
926	}
927
928	enum class SizesStridesPolicy : uint8_t {
929	// Default behavior, e.g., dense tensor.
930	//
931	// Can override: nothing
932	Default = `0`,
933	// Customizable strides behavior, e.g., sparse tensor,
934	// mkldnn tensor.
935	//
936	// Can override: strides(), is_contiguous()
937	CustomStrides = `1`,
938	// Customizable sizes behavior, e.g., nested tensor
939	//
940	// Can override: strides(), is_contiguous(), sizes(), dim(), numel()
941	CustomSizes = `2`
942	};
943
944	protected:
945	inline bool matches_policy(SizesStridesPolicy policy) const {
946	return sizes_strides_policy_ >= static_cast<uint8_t>(policy);
947	}
948
949	inline bool matches_custom(SizesStridesPolicy policy) const {
950	return custom_sizes_strides_ >= static_cast<uint8_t>(policy);
951	}
952
953	inline bool matches_python_custom(SizesStridesPolicy policy) const {
954	auto r = python_custom_sizes_strides_ >= static_cast<uint8_t>(policy);
955	if (r) {
956	TORCH_INTERNAL_ASSERT(is_python_dispatch())
957	}
958	return r;
959	}
960
961	/**
962	* Customization points for the functions above. sizes_strides_policy_
963	* must be set to enable these.
964	*
965	* NB: dim is overrideable separately from sizes because it is possible
966	* for a tensor to have rank, but not well defined sizes.
967	*/
968	// sizes_strides_policy_ >= CustomStrides
969	virtual bool is_contiguous_custom(at::MemoryFormat memory_format) const;
970	virtual bool is_strides_like_custom(at::MemoryFormat memory_format) const;
971	virtual bool is_non_overlapping_and_dense_custom() const;
972	// sizes_strides_policy_ >= CustomSizes
973	// Currently this method only exists to be overwritten by subclasses such as
974	// NestedTensorImpl.
975	virtual int64_t size_custom(int64_t d) const {
976	// TODO: We could add support to Python dispatch here.
977	// TODO: We could call into aten::size.int instead of
978	// sizes_custom()[d] and enable use of the dispatcher.
979	d = maybe_wrap_dim(d, dim(), /wrap_scalar=/false);
980	return sizes_custom()[d]; // unchecked (maybe_wrap_dim enforces bounds)
981	}
982
983	virtual c10::SymInt sym_size_custom(int64_t d) const {
984	// TODO: We could add support to Python dispatch here.
985	// TODO: We could call into aten::size.int instead of
986	// sym_sizes_custom()[d] and enable use of the dispatcher.
987	d = maybe_wrap_dim(d, dim(), /wrap_scalar=/false);
988	return sym_sizes_custom()[d]; // unchecked (maybe_wrap_dim enforces bounds)
989	}
990
991	virtual IntArrayRef sizes_custom() const;
992	virtual IntArrayRef strides_custom() const;
993	virtual int64_t numel_custom() const;
994	virtual int64_t storage_offset_custom() const;
995	virtual int64_t dim_custom() const;
996	virtual Device device_custom() const;
997	virtual Layout layout_custom() const;
998
999	virtual c10::SymIntArrayRef sym_sizes_custom() const;
1000	virtual c10::SymIntArrayRef sym_strides_custom() const;
1001	virtual c10::SymInt sym_numel_custom() const;
1002	virtual c10::SymInt sym_storage_offset_custom() const;
1003
1004	public:
1005	/**
1006	* True if this tensor has storage. See storage() for details.
1007	*/
1008	#ifdef DEBUG
1009	// Allow subclasses to check that their storage_ is never getting set in debug
1010	// builds.
1011	virtual
1012	#else
1013	TENSORIMPL_MAYBE_VIRTUAL
1014	#endif
1015	bool
1016	has_storage() const
1017	// NOTE: we devirtualize this because it arguably shouldn't be an
1018	// error just to ask subclasses if they have storage.
1019	// This used to throw for most subclasses, but OpaqueTensorImpl
1020	// wanted it to successfully return false, so we went ahead and made
1021	// it a non-error.
1022	#ifdef C10_DISABLE_TENSORIMPL_EXTENSIBILITY
1023	{
1024	return storage_;
1025	}
1026	#else
1027	;
1028	#endif
1029
1030	/**
1031	* Return the underlying storage of a Tensor. Multiple tensors may share
1032	* a single storage. A Storage is an impoverished, Tensor-like class
1033	* which supports far less operations than Tensor.
1034	*
1035	* Avoid using this method if possible; try to use only Tensor APIs to perform
1036	* operations.
1037	*/
1038	TENSORIMPL_MAYBE_VIRTUAL const Storage& storage() const {
1039	if (C10_UNLIKELY(storage_access_should_throw_)) {
1040	throw_storage_access_error();
1041	}
1042	return storage_;
1043	}
1044
1045	/**
1046	* Return the underlying storage, unsafely assuming this is a basic strided
1047	* tensor. In cases where `storage` access would throw, this returns a
1048	* default-constructed Storage.
1049	*/
1050	inline const Storage& unsafe_storage() const {
1051	return storage_;
1052	}
1053
1054	bool unique_version() const {
1055	return version_counter_.unique();
1056	}
1057
1058	protected:
1059	virtual Layout layout_impl() const {
1060	TORCH_CHECK(
1061	false, "layout_impl is only implemented for TensorImpl subclasses.");
1062	}
1063
1064	public:
1065	// Whether a tensor is sparse COO or not.
1066	bool is_sparse() const {
1067	// NB: This method is not virtual and avoid dispatches for performance
1068	// reasons.
1069	return key_set_.has_all(c10::sparse_ks);
1070	}
1071
1072	// Whether a tensor is sparse CSR or not.
1073	bool is_sparse_csr() const {
1074	return layout() == kSparseCsr;
1075	}
1076
1077	bool is_quantized() const {
1078	// NB: This method is not virtual and avoid dispatches for performance
1079	// reasons.
1080	constexpr auto quantized_ks = DispatchKeySet (DispatchKey::Quantized);
1081	return key_set_.has_all(quantized_ks);
1082	}
1083
1084	bool is_meta() const {
1085	// NB: This method is not virtual and avoid dispatches for performance
1086	// reasons.
1087	if (C10_UNLIKELY(device_policy_)) {
1088	return device_custom().is_meta();
1089	}
1090	return device_opt_.has_value() && device_opt_->type() == kMeta;
1091	}
1092
1093	bool is_cpu() const {
1094	// NB: This method is not virtual and avoid dispatches for performance
1095	// reasons.
1096	if (C10_UNLIKELY(device_policy_)) {
1097	return device_custom().is_cpu();
1098	}
1099	// Note: we cannot rely on dispatch keys to determine the device type
1100	// of a tensor, because "wrapper" tensors (like FunctionalTensorWrapper)
1101	// don't include backend dispatch keys.
1102	return device_opt_.has_value() && device_opt_->type() == kCPU;
1103	}
1104
1105	bool is_cuda() const {
1106	// NB: This method is not virtual and avoid dispatches for performance
1107	// reasons.
1108	if (C10_UNLIKELY(device_policy_)) {
1109	return device_custom().is_cuda();
1110	}
1111	return device_opt_.has_value() && device_opt_->type() == kCUDA;
1112	}
1113
1114	bool is_xpu() const {
1115	// NB: This method is not virtual and avoid dispatches for performance
1116	// reasons.
1117	if (C10_UNLIKELY(device_policy_)) {
1118	return device_custom().is_xpu();
1119	}
1120	return device_opt_.has_value() && device_opt_->type() == kXPU;
1121	}
1122
1123	bool is_ipu() const {
1124	if (C10_UNLIKELY(device_policy_)) {
1125	return device_custom().is_ipu();
1126	}
1127	return device_opt_.has_value() && device_opt_->type() == kIPU;
1128	}
1129
1130	bool is_xla() const {
1131	if (C10_UNLIKELY(device_policy_)) {
1132	return device_custom().is_xla();
1133	}
1134	return device_opt_.has_value() && device_opt_->type() == kXLA;
1135	}
1136
1137	bool is_hpu() const {
1138	if (C10_UNLIKELY(device_policy_)) {
1139	return device_custom().is_hpu();
1140	}
1141	return device_opt_.has_value() && device_opt_->type() == kHPU;
1142	}
1143
1144	bool is_lazy() const {
1145	if (C10_UNLIKELY(device_policy_)) {
1146	return device_custom().is_lazy();
1147	}
1148	return device_opt_.has_value() && device_opt_->type() == kLazy;
1149	}
1150
1151	bool is_hip() const {
1152	// NB: This method is not virtual and avoid dispatches for performance
1153	// reasons.
1154	if (C10_UNLIKELY(device_policy_)) {
1155	return device_custom().is_hip();
1156	}
1157	return device_opt_.has_value() && device_opt_->type() == kHIP;
1158	}
1159
1160	bool is_ve() const {
1161	// NB: This method is not virtual and avoid dispatches for performance
1162	// reasons.
1163	if (C10_UNLIKELY(device_policy_)) {
1164	return device_custom().is_ve();
1165	}
1166	return device_opt_.has_value() && device_opt_->type() == kVE;
1167	}
1168
1169	bool is_mkldnn() const {
1170	return key_set_.has_all(c10::mkldnn_ks);
1171	}
1172
1173	bool is_vulkan() const {
1174	if (C10_UNLIKELY(device_policy_)) {
1175	return device_custom().is_vulkan();
1176	}
1177	return device_opt_.has_value() && device_opt_->type() == kVulkan;
1178	}
1179
1180	bool is_metal() const {
1181	if (C10_UNLIKELY(device_policy_)) {
1182	return device_custom().is_metal();
1183	}
1184	return device_opt_.has_value() && device_opt_->type() == kMetal;
1185	}
1186
1187	bool is_mps() const {
1188	if (C10_UNLIKELY(device_policy_)) {
1189	return device_custom().is_mps();
1190	}
1191	return device_opt_.has_value() && device_opt_->type() == kMPS;
1192	}
1193
1194	bool is_ort() const {
1195	if (C10_UNLIKELY(device_policy_)) {
1196	return device_custom().is_ort();
1197	}
1198	return device_opt_.has_value() && device_opt_->type() == kORT;
1199	}
1200
1201	bool is_nested() const {
1202	return key_set_.has(DispatchKey::NestedTensor);
1203	}
1204
1205	// TODO: remove this once we don't automatically enabled Autograd dispatch
1206	// keys
1207	// in TensorImpl constructor.
1208	// DON'T USE THIS API!! It's only created for testing purpose in
1209	// file aten/src/ATen/core/boxing/impl/test_helpers.h
1210	void remove_autograd_key() {
1211	key_set_ = key_set_ - autograd_dispatch_keyset;
1212	}
1213
1214	// Inference tensor doesn't have autograd or ADInplaceOrView key.
1215	// Invariant:
1216	// Inference tensor has version_counter_.enabled() == false
1217	bool is_inference() {
1218	bool no_ADInplaceOrView = !key_set_.has_any(c10::inplace_or_view_ks);
1219	bool no_Autograd = !key_set_.has_any(c10::autograd_dispatch_keyset);
1220	TORCH_INTERNAL_ASSERT_DEBUG_ONLY(
1221	no_ADInplaceOrView == no_Autograd,
1222	"ADInplaceOrView and Autograd keys must be on/off at the same time.");
1223	return no_ADInplaceOrView && no_Autograd;
1224	}
1225
1226	int64_t get_device() const {
1227	if (C10_UNLIKELY(device_policy_)) {
1228	return device_custom().index();
1229	}
1230	return device_default().index();
1231	}
1232
1233	Device device() const {
1234	if (C10_UNLIKELY(device_policy_)) {
1235	return device_custom();
1236	}
1237	return device_default();
1238	}
1239
1240	protected:
1241	c10::Device device_default() const {
1242	TORCH_CHECK(device_opt_.has_value(), "tensor does not have a device");
1243	// See NOTE [c10::optional operator usage in CUDA]
1244	return *device_opt_;
1245	}
1246
1247	public:
1248	Layout layout() const {
1249	if (C10_UNLIKELY(layout_policy_)) {
1250	return layout_custom();
1251	}
1252
1253	// NB: This method is not virtual and avoid dispatches for perf.
1254	// strided is also the most common layout type, so we check for
1255	// strided case first.
1256	// This keyset must also be kept in sync with the logic in
1257	// is_sparse() / is_sparse_csr() / is_mkldnn()
1258	constexpr auto sparse_and_sparsecsr_and_mkldnn_ks =
1259	c10::sparse_ks \| c10::sparse_csr_ks \| c10::mkldnn_ks;
1260	if (!key_set_.has_any(sparse_and_sparsecsr_and_mkldnn_ks)) {
1261	return kStrided;
1262	} else if (is_sparse()) {
1263	return kSparse;
1264	} else if (key_set_.has_any(c10::sparse_csr_ks)) {
1265	// Typically, the tensor dispatch keys define the tensor layout
1266	// uniquely. This allows using non-virtual layout method for
1267	// better performance. However, when tensor's layout depends,
1268	// say, on tensor attributes, one must use this execution path
1269	// where the corresponding tensor impl class overwrites virtual
1270	// layout_impl() method.
1271	//
1272	// TODO: implement layout() as native function/method so that
1273	// __torch_dispatch__ users will be able to redefine the
1274	// layout() method.
1275	return layout_impl();
1276	} else {
1277	TORCH_INTERNAL_ASSERT(
1278	is_mkldnn(), "There is an error in the layout calculation logic.");
1279	return kMkldnn;
1280	}
1281	}
1282
1283	/**
1284	* True if a tensor was auto-wrapped from a C++ or Python number.
1285	* For example, when you write 't + 2', 2 is auto-wrapped into a Tensor
1286	* with `is_wrapped_number_` set to true.
1287	*
1288	* Wrapped numbers do not participate in the result type computation for
1289	* mixed-type operations if there are any Tensors that are not wrapped
1290	* numbers. This is useful, because we want 't + 2' to work with
1291	* any type of tensor, not just LongTensor (which is what integers
1292	* in Python represent).
1293	*
1294	* Otherwise, they behave like their non-wrapped equivalents.
1295	* See [Result type computation] in TensorIterator.h.
1296	*
1297	* Why did we opt for wrapped numbers, as opposed to just having
1298	* an extra function add(Tensor, Scalar)? This helps greatly reduce
1299	* the amount of code we have to write for add, when actually
1300	* a Tensor-Scalar addition is really just a Tensor-Tensor
1301	* addition when the RHS is 0-dim (except for promotion behavior.)
1302	*/
1303	bool is_wrapped_number() const {
1304	return is_wrapped_number_;
1305	}
1306
1307	/**
1308	* Set whether or not a tensor was auto-wrapped from a C++ or Python
1309	* number. You probably don't want to call this, unless you are
1310	* writing binding code.
1311	*/
1312	void set_wrapped_number(bool value) {
1313	TORCH_INTERNAL_ASSERT(dim() == `0`);
1314	is_wrapped_number_ = value;
1315	}
1316
1317	/**
1318	* Returns true if Tensor supports as_strided and as_strided_backward.
1319	* This is used in autograd to perform inplace update on view Tensors.
1320	* See Note [View + Inplace update for base tensor] and
1321	* [View + Inplace update for view tensor] for details.
1322	* Note this method only returns true for XLA backend, where it
1323	* simulates strided Tensor to support most view ops, but it cannot
1324	* fully support general `as_strided` case.
1325	* It can be expanded as needed in the future, e.g sparse Tensor.
1326	*/
1327	inline bool support_as_strided() const {
1328	if (is_nested()) {
1329	return false;
1330	}
1331	if (key_set_.has(DispatchKey::Functionalize)) {
1332	return false;
1333	}
1334	return device().supports_as_strided();
1335	}
1336
1337	// ~~~~~ Autograd API ~~~~~
1338	// Some methods below are defined in TensorImpl.cpp because Tensor is an
1339	// incomplete type.
1340
1341	/**
1342	* Set whether or not a tensor requires gradient.
1343	*/
1344	void set_requires_grad(bool requires_grad);
1345
1346	/**
1347	* True if a tensor requires gradient. Tensors which require gradient
1348	* have history tracked for any operations performed on them, so that
1349	* we can automatically differentiate back to them. A tensor that
1350	* requires gradient and has no history is a "leaf" tensor, which we
1351	* accumulate gradients into.
1352	*/
1353	bool requires_grad() const;
1354
1355	/**
1356	* Return a mutable reference to the gradient. This is conventionally
1357	* used as `t.grad() = x` to set a gradient to a completely new tensor.
1358	*/
1359	at::Tensor& mutable_grad();
1360
1361	/**
1362	* Return the accumulated gradient of a tensor. This gradient is written
1363	* into when performing backwards, when this tensor is a leaf tensor.
1364	*/
1365	const at::Tensor& grad() const;
1366
1367	/**
1368	* Whether or not the imaginary part of the tensor should be negated
1369	*/
1370	inline bool is_conj() const {
1371	constexpr auto conjugate_ks = DispatchKeySet (DispatchKey::Conjugate);
1372	return key_set_.has_all(conjugate_ks);
1373	}
1374
1375	/**
1376	* Set whether or not to take the conjugate of the tensor (flip the imaginary
1377	* bit).
1378	*/
1379	void _set_conj(bool value) {
1380	if (value) {
1381	key_set_ = key_set_.add(DispatchKey::Conjugate);
1382	TORCH_INTERNAL_ASSERT(isComplexType(typeMetaToScalarType(dtype())));
1383	} else {
1384	key_set_ = key_set_.remove(DispatchKey::Conjugate);
1385	}
1386	}
1387
1388	/**
1389	* XXX: do not use, private api!
1390	* Update the backend component related keys to the backend component
1391	* corresponding to this device.
1392	*/
1393	void _change_backend_component_keys(c10::Device device);
1394
1395	/**
1396	* Whether or not the tensor is a zerotensor
1397	*/
1398	inline bool _is_zerotensor() const {
1399	constexpr auto zerotensor_ks = DispatchKeySet (DispatchKey::ZeroTensor);
1400	return key_set_.has_all(zerotensor_ks);
1401	}
1402
1403	/**
1404	Set whether or not the tensor is a zero tensor
1405	*/
1406	void _set_zero(bool value) {
1407	if (value) {
1408	TORCH_INTERNAL_ASSERT(
1409	false,
1410	"Please call `torch._efficientzerotensor` if you want to create a tensor with no storage.");
1411	} else {
1412	key_set_ = key_set_.remove(DispatchKey::ZeroTensor);
1413	}
1414	}
1415
1416	/**
1417	* Whether or not the tensor should be negated
1418	*/
1419	inline bool is_neg() const {
1420	constexpr auto negative_ks = DispatchKeySet (DispatchKey::Negative);
1421	return key_set_.has_all(negative_ks);
1422	}
1423
1424	/**
1425	* Set whether or not to take the conjugate of the tensor (flip the imaginary
1426	* bit).
1427	*/
1428	void _set_neg(bool value) {
1429	if (value) {
1430	key_set_ = key_set_.add(DispatchKey::Negative);
1431	} else {
1432	key_set_ = key_set_.remove(DispatchKey::Negative);
1433	}
1434	}
1435
1436	/**
1437	* Return the accumulated gradient of a tensor. This gradient is computed
1438	* using forward mode AD.
1439	*
1440	* This is an internal API that should never be used by end users.
1441	*
1442	* The API is as follows:
1443	* - "level" allows to specify the level of forward AD nesting for which the
1444	* gradient should be returned. Note that since levels are not fully
1445	* supported yet, this argument should be 0. See documentation for
1446	* torch::autograd::enter_dual_level for more details about forward AD
1447	* nesting.
1448	* - "self" should represent the Tensor whose forward grad is accessed. It
1449	* is required when dealing with view.
1450	*/
1451	const at::Tensor& _fw_grad(uint64_t level, const at::TensorBase& self) const;
1452
1453	/**
1454	* Sets the forward gradient for this Tensor.
1455	* The given Tensor might not be used directly and its content will be copied.
1456	*
1457	* This is an internal API that should never be used by end users.
1458	*
1459	* The API is as follows:
1460	* - "new_grad" is a Tensor containing the new value of the gradient that
1461	* should be set
1462	* - "self" should represent the Tensor whose forward grad is accessed. It
1463	* is required when dealing with view.
1464	* - "level" allows to specify the level of forward AD nesting for which the
1465	* gradient should be set. Note that since levels are not fully supported
1466	* yet, this argument should be 0. See documentation for
1467	* torch::autograd::enter_dual_level for more details about forward AD
1468	* nesting.
1469	* - "is_inplace_op" is a boolean flag that tells if this gradient was
1470	* generated by an inplace operation or an out of place one. This allows
1471	* better error checking.
1472	*/
1473	void _set_fw_grad(
1474	const at::TensorBase& new_grad,
1475	const at::TensorBase& self,
1476	uint64_t level,
1477	bool is_inplace_op);
1478
1479	/**
1480	* Return a typed data pointer to the actual data which this tensor refers to.
1481	* This checks that the requested type (from the template parameter) matches
1482	* the internal type of the tensor.
1483	*
1484	* It is invalid to call data() on a dtype-uninitialized tensor, even if
1485	* the size is 0.
1486	*
1487	* WARNING: If a tensor is not contiguous, you MUST use strides when
1488	* performing index calculations to determine the location of elements in
1489	* the tensor. We recommend using 'TensorAccessor' to handle this computation
1490	* for you; this class is available from 'Tensor'.
1491	*/
1492	template <typename T>
1493	inline T* data() const {
1494	TORCH_CHECK(
1495	data_type_.Match<T>(),
1496	"Tensor type mismatch, caller expects elements to be ",
1497	caffe2::TypeMeta::TypeName<T>(),
1498	", while tensor contains ",
1499	data_type_.name(),
1500	". ");
1501	return data_ptr_impl<T>();
1502	}
1503
1504	/**
1505	* More efficient helper for Tensor::data_ptr(). Like data<T>(), but
1506	* does not do a type check. Unlike the untemplated data(), does
1507	* check has_storage() and storage_initialized().
1508	*/
1509	template <typename T>
1510	inline T* data_ptr_impl() const {
1511	TORCH_CHECK(
1512	has_storage(),
1513	"Cannot access data pointer of Tensor that doesn't have storage");
1514	TORCH_CHECK(
1515	storage_initialized(),
1516	"The tensor has a non-zero number of elements, but its data is not allocated yet. "
1517	"Caffe2 uses a lazy allocation, so you will need to call "
1518	"mutable_data() or raw_mutable_data() to actually allocate memory.");
1519	// Caller does the type check.
1520	return storage_.unsafe_data<T>() + storage_offset_;
1521	}
1522
1523	/**
1524	* Return a void* data pointer to the actual data which this tensor refers to.
1525	*
1526	* It is invalid to call data() on a dtype-uninitialized tensor, even if the
1527	* size is 0.
1528	*
1529	* WARNING: The data pointed to by this tensor may not contiguous; do NOT
1530	* assume that itemsize() * numel() is sufficient to compute the bytes that
1531	* can be validly read from this tensor.
1532	*/
1533	inline void* data() const {
1534	TORCH_CHECK(
1535	has_storage(),
1536	"Cannot access data pointer of Tensor that doesn't have storage");
1537	TORCH_CHECK(
1538	dtype_initialized(),
1539	"Cannot access data pointer of Tensor that doesn't have initialized dtype "
1540	"(e.g., caffe2::Tensor x(CPU), prior to calling mutable_data<T>() on x)");
1541	// Computing an offset into an empty tensor would be UB, since an empty
1542	// tensor's storage will be nullptr, and adding a nonzero offset to nullptr
1543	// is UB. So we skip the offset computation in this case.
1544	if (is_empty()) {
1545	return nullptr;
1546	}
1547	return static_cast<void*>(
1548	static_cast<char*>(storage_.data()) +
1549	data_type_.itemsize() * storage_offset_);
1550	}
1551
1552	/**
1553	* Like data<T>(), but performs no checks. You are responsible for ensuring
1554	* that all invariants required by data() are upheld here.
1555	*/
1556	template <typename T>
1557	inline T* unsafe_data() const {
1558	return storage_.unsafe_data<T>() + storage_offset_;
1559	}
1560
1561	/**
1562	* Returns the TypeMeta of a tensor, which describes what data type
1563	* it is (e.g., int, float, ...)
1564	*/
1565	const caffe2::TypeMeta dtype() const {
1566	return data_type_;
1567	}
1568
1569	/**
1570	* Return the size of a single element of this tensor in bytes.
1571	*/
1572	size_t itemsize() const {
1573	TORCH_CHECK(
1574	dtype_initialized(),
1575	"Cannot report itemsize of Tensor that doesn't have initialized dtype "
1576	"(e.g., caffe2::Tensor x(CPU), prior to calling mutable_data<T>() on x)");
1577	return data_type_.itemsize();
1578	}
1579
1580	protected:
1581	/**
1582	* Returns the human-readable name of the actual type of this object (e.g.,
1583	* TensorImpl, BatchedTensorImpl, etc.). Used for error messages.
1584	*/
1585	virtual const char* tensorimpl_type_name() const {
1586	return "TensorImpl";
1587	}
1588
1589	private:
1590	[[noreturn]] void throw_storage_access_error() const;
1591
1592	public:
1593	/**
1594	* True if a tensor has no elements (e.g., numel() == 0).
1595	*/
1596	inline bool is_empty() const {
1597	return numel() == `0`;
1598	}
1599
1600	// if we are going to use sym sizes, we should be setting sym strides at the
1601	// same time, otherwise it's very easy to misuse this API
1602	void set_sizes_and_strides(
1603	c10::SymIntArrayRef sizes,
1604	c10::SymIntArrayRef strides,
1605	c10::optional<c10::SymInt> storage_offset = c10::nullopt);
1606	// This is renamed to avoid breaking overload BC
1607	void generic_set_sizes_contiguous(c10::SymIntArrayRef sizes);
1608	void generic_set_sizes_contiguous(c10::IntArrayRef sizes) {
1609	set_sizes_contiguous(sizes);
1610	}
1611
1612	/**
1613	* Change the size at some dimension. This DOES NOT update strides;
1614	* thus, most changes to size will not preserve contiguity. You probably
1615	* also want to call set_stride() when you call this.
1616	*
1617	* TODO: This should be jettisoned in favor of `set_sizes_and_strides`,
1618	* which is harder to misuse.
1619	*/
1620	virtual void set_size(int64_t dim, int64_t new_size) {
1621	TORCH_CHECK(
1622	allow_tensor_metadata_change(),
1623	"set_size ",
1624	err_msg_tensor_metadata_change_not_allowed);
1625	TORCH_CHECK(
1626	!matches_policy(SizesStridesPolicy::CustomSizes),
1627	"set_size() called on tensor with dynamic shapes or customized size behavior")
1628	sizes_and_strides_.size_at(dim) = new_size;
1629	refresh_numel();
1630	refresh_contiguous();
1631	}
1632
1633	/**
1634	* Change the stride at some dimension.
1635	*
1636	* TODO: This should be jettisoned in favor of `set_sizes_and_strides`,
1637	* which is harder to misuse.
1638	*/
1639	virtual void set_stride(int64_t dim, int64_t new_stride) {
1640	TORCH_CHECK(
1641	allow_tensor_metadata_change(),
1642	"set_stride ",
1643	err_msg_tensor_metadata_change_not_allowed);
1644	TORCH_CHECK(
1645	!has_symbolic_sizes_strides_,
1646	"set_stride() called on tensor with symbolic shape")
1647	sizes_and_strides_.stride_at_unchecked(dim) = new_stride;
1648	refresh_contiguous();
1649	}
1650
1651	/**
1652	* Set the offset into the storage of this tensor.
1653	*
1654	* WARNING: This does NOT check if the tensor is in bounds for the new
1655	* location at the storage; the caller is responsible for checking this
1656	* (and resizing if necessary.)
1657	*/
1658	virtual void set_storage_offset(int64_t storage_offset) {
1659	TORCH_CHECK(
1660	allow_tensor_metadata_change(),
1661	"set_storage_offset ",
1662	err_msg_tensor_metadata_change_not_allowed);
1663	// TODO: this should probably consult policy
1664	TORCH_CHECK(
1665	!has_symbolic_sizes_strides_,
1666	"set_storage_offset() called on tensor with symbolic shape")
1667	storage_offset_ = storage_offset;
1668	}
1669
1670	/**
1671	* Like set_sizes_and_strides but assumes contiguous strides.
1672	*
1673	* WARNING: This function does not check if the requested
1674	* sizes/strides are in bounds for the storage that is allocated;
1675	* this is the responsibility of the caller
1676	*/
1677	void set_sizes_contiguous(IntArrayRef new_size) {
1678	TORCH_CHECK(
1679	allow_tensor_metadata_change(),
1680	"set_sizes_contiguous ",
1681	err_msg_tensor_metadata_change_not_allowed);
1682	TORCH_CHECK(
1683	!matches_policy(SizesStridesPolicy::CustomStrides),
1684	"tried to directly modify sizes for customized tensor");
1685	sizes_and_strides_.set_sizes(new_size);
1686
1687	refresh_numel();
1688	empty_tensor_restride(
1689	MemoryFormat::Contiguous); // calls refresh_contiguous()
1690	}
1691
1692	/**
1693	* Set the sizes and strides of a tensor.
1694	*
1695	* WARNING: This function does not check if the requested
1696	* sizes/strides are in bounds for the storage that is allocated;
1697	* this is the responsibility of the caller
1698	*/
1699	void set_sizes_and_strides(
1700	IntArrayRef new_size,
1701	IntArrayRef new_stride,
1702	c10::optional<int64_t> storage_offset = c10::nullopt) {
1703	TORCH_CHECK(
1704	allow_tensor_metadata_change(),
1705	"set_sizes_and_strides ",
1706	err_msg_tensor_metadata_change_not_allowed);
1707	TORCH_CHECK(
1708	!has_symbolic_sizes_strides_,
1709	"set_sizes_and_strides() called on tensor with symbolic shape")
1710	TORCH_CHECK(
1711	new_size.size() == new_stride.size(),
1712	"dimensionality of sizes (",
1713	new_size.size(),
1714	") must match dimensionality of strides (",
1715	new_stride.size(),
1716	")");
1717	const auto new_dim = new_size.size();
1718
1719	sizes_and_strides_.set_sizes(new_size);
1720
1721	if (new_dim > `0`) {
1722	for (size_t dim = new_dim - `1`;; dim--) {
1723	if (new_stride [dim] >= `0`) {
1724	sizes_and_strides_.stride_at_unchecked(dim) = new_stride [dim];
1725	} else {
1726	// XXX: This behavior is surprising and may need to be removed to
1727	// support negative strides. Some pytorch functions rely on it:
1728	// for example, torch.cat (run TestTorch.test_cat_empty).
1729	if (dim == new_dim - `1`) {
1730	sizes_and_strides_.stride_at_unchecked(dim) = `1`;
1731	} else {
1732	// Keep stride monotonically increasing to match NumPy.
1733	sizes_and_strides_.stride_at_unchecked(dim) =
1734	std::max<int64_t>(
1735	sizes_and_strides_.size_at_unchecked(dim + `1`), `1`) *
1736	sizes_and_strides_.stride_at_unchecked(dim + `1`);
1737	}
1738	}
1739	if (dim == `0`)
1740	break;
1741	}
1742	}
1743
1744	refresh_numel();
1745	refresh_contiguous();
1746
1747	if (storage_offset.has_value()) {
1748	storage_offset_ = *storage_offset;
1749	}
1750	}
1751
1752	/**
1753	* Set whether a tensor allows changes to its metadata (e.g. sizes / strides /
1754	* storage / storage_offset). See NOTE [ Metadata Change for a Detached Tensor
1755	* ] for details.
1756	*/
1757	void set_allow_tensor_metadata_change(bool value) {
1758	// TODO: at some point, we should kill this field completely.
1759	allow_tensor_metadata_change_ = true;
1760	}
1761
1762	/**
1763	* True if a tensor allows changes to its metadata (e.g. sizes / strides /
1764	* storage / storage_offset). See NOTE [ Metadata Change for a Detached Tensor
1765	* ] for details.
1766	*/
1767	bool allow_tensor_metadata_change() const {
1768	return allow_tensor_metadata_change_;
1769	}
1770
1771	/**
1772	* Set the pointer to autograd metadata.
1773	*/
1774	void set_autograd_meta(
1775	std::unique_ptr<c10::AutogradMetaInterface> autograd_meta);
1776
1777	/**
1778	* Return the pointer to autograd metadata. May return nullptr if the
1779	* tensor does not track gradients.
1780	*/
1781	c10::AutogradMetaInterface* autograd_meta() const;
1782
1783	/**
1784	* Set the pointer to named tensor metadata.
1785	*/
1786	void set_named_tensor_meta(
1787	std::unique_ptr<c10::NamedTensorMetaInterface> named_tensor_meta) {
1788	TORCH_WARN_ONCE(
1789	"Named tensors and all their associated APIs are an experimental feature ",
1790	"and subject to change. Please do not use them for anything important ",
1791	"until they are released as stable.");
1792	#ifdef DEBUG
1793	if (named_tensor_meta) {
1794	TORCH_INTERNAL_ASSERT(named_tensor_meta->slow_dim() == dim());
1795	}
1796	#endif
1797	if (named_tensor_meta) {
1798	if (!extra_meta_) {
1799	extra_meta_ = std::make_unique<ExtraMeta>();
1800	}
1801	extra_meta_->named_tensor_meta_ = std::move(named_tensor_meta);
1802	key_set_ = key_set_.add(DispatchKey::Named);
1803	} else {
1804	if (extra_meta_) {
1805	extra_meta_->named_tensor_meta_ = nullptr;
1806	}
1807	key_set_ = key_set_.remove(DispatchKey::Named);
1808	}
1809	}
1810
1811	void set_python_dispatch(bool k) {
1812	if (k) {
1813	key_set_ = key_set_.add(c10::python_ks);
1814	} else {
1815	key_set_ = key_set_ - c10::python_ks;
1816	}
1817	}
1818
1819	bool is_python_dispatch() const {
1820	return key_set_.has_all(c10::python_ks);
1821	}
1822
1823	/**
1824	* Return the pointer to named tensor metadata.
1825	*/
1826	const c10::NamedTensorMetaInterface* named_tensor_meta() const {
1827	if (!extra_meta_) {
1828	return nullptr;
1829	}
1830	return extra_meta_->named_tensor_meta_.get();
1831	}
1832
1833	c10::NamedTensorMetaInterface* named_tensor_meta() {
1834	if (!extra_meta_) {
1835	return nullptr;
1836	}
1837	return extra_meta_->named_tensor_meta_.get();
1838	}
1839
1840	bool has_named_tensor_meta() const {
1841	if (!extra_meta_) {
1842	return false;
1843	}
1844	return extra_meta_->named_tensor_meta_ != nullptr;
1845	}
1846
1847	// NOTE [ TensorImpl Shallow-Copying ]
1848	//
1849	// TensorImpl shallow-copying is used when we want to have two Variables share
1850	// the same tensor metadata (e.g. sizes / strides / storage pointer /
1851	// storage_offset), but each with a different autograd history. Example call
1852	// sites:
1853	//
1854	// 1. `var_detached = var.detach()` uses `shallow_copy_and_detach()` to create
1855	// `var_detached` that shares the same tensor metadata with `var`, but with a
1856	// completely new autograd history.
1857	// 2. `var.set_data(tensor)` uses `shallow_copy_from()` to copy tensor
1858	// metadata from `tensor` into `var`, while keeping `var`'s original
1859	// AutogradMeta.
1860	//
1861	// Functions that shallow-copy a TensorImpl (such as
1862	// `shallow_copy_and_detach()` / `shallow_copy_from()` /
1863	// `copy_tensor_metadata()`) copy the tensor metadata fields (e.g. sizes /
1864	// strides / storage pointer / storage_offset) by value. However, the
1865	// following fields are not copied:
1866	//
1867	// 1. the AutogradMeta pointer, because it is unique for each Variable.
1868	// 2. the version counter, because the destination TensorImpl's version
1869	// counter is either set to the passed-in `version_counter` (in
1870	// `shallow_copy_and_detach()` and `copy_tensor_metadata()`), or it is kept
1871	// intact (in `shallow_copy_from()`). See NOTE [ Version Counter Sharing ] for
1872	// details.
1873	//
1874	// In `shallow_copy_and_detach()` and `copy_tensor_metadata()`, the passed-in
1875	// `allow_tensor_metadata_change` determines whether the TensorImpl
1876	// shallow-copy allows changes to its metadata (e.g. sizes / strides / storage
1877	// / storage_offset). See NOTE [ Metadata Change for a Detached Tensor ] for
1878	// details.
1879	//
1880	// In `shallow_copy_from()`, we don't check the destination TensorImpl's
1881	// `allow_tensor_metadata_change_`, because `shallow_copy_from()` is used for
1882	// implementing functions such as `var.set_data(tensor)`, which changes
1883	// `var`'s tensor metadata and expects its `allow_tensor_metadata_change_` to
1884	// be ignored.
1885
1886	/**
1887	* One TensorImpl can be copied to another TensorImpl if they have the same
1888	* DispatchKeySet. The only two special cases (for legacy reason) are:
1889	* CPU is compatible with CUDA and SparseCPU is
1890	* compatible with SparseCUDA.
1891	*/
1892	inline bool has_compatible_shallow_copy_type(DispatchKeySet from) {
1893	auto is_dense = [](DispatchKeySet ts) {
1894	constexpr auto dense_backends = DispatchKeySet (
1895	{BackendComponent::CPUBit,
1896	BackendComponent::CUDABit,
1897	BackendComponent::MPSBit,
1898	BackendComponent::HIPBit,
1899	BackendComponent::XPUBit});
1900	constexpr auto dense_k = DispatchKeySet (DispatchKey::Dense);
1901	return ts.has_any(dense_k) && ts.has_any(dense_backends);
1902	};
1903	auto is_sparse = [](DispatchKeySet ts) {
1904	constexpr auto sparse_backends = DispatchKeySet (
1905	{BackendComponent::CPUBit,
1906	BackendComponent::CUDABit,
1907	BackendComponent::HIPBit,
1908	BackendComponent::XPUBit});
1909	constexpr auto sparse_k = DispatchKeySet (DispatchKey::Sparse);
1910	return ts.has_any(sparse_k) && ts.has_any(sparse_backends);
1911	};
1912	return (key_set_ == from) \|\| (is_dense(key_set_) && is_dense(from)) \|\|
1913	(is_sparse(key_set_) && is_sparse(from));
1914	}
1915
1916	private:
1917	template <typename VariableVersion>
1918	c10::intrusive_ptr<TensorImpl> shallow_copy_and_detach_core(
1919	VariableVersion&& version_counter,
1920	bool allow_tensor_metadata_change) const;
1921
1922	public:
1923	/**
1924	* Return a TensorImpl that is a shallow-copy of this TensorImpl.
1925	*
1926	* For usage of `version_counter` and `allow_tensor_metadata_change`,
1927	* see NOTE [ TensorImpl Shallow-Copying ].
1928	*/
1929	virtual c10::intrusive_ptr<TensorImpl> shallow_copy_and_detach(
1930	const c10::VariableVersion& version_counter,
1931	bool allow_tensor_metadata_change) const;
1932
1933	/**
1934	* Return a TensorImpl that is a shallow-copy of this TensorImpl.
1935	*
1936	* For usage of `version_counter` and `allow_tensor_metadata_change`,
1937	* see NOTE [ TensorImpl Shallow-Copying ].
1938	*/
1939	virtual c10::intrusive_ptr<TensorImpl> shallow_copy_and_detach(
1940	c10::VariableVersion&& version_counter,
1941	bool allow_tensor_metadata_change) const;
1942
1943	/**
1944	* Shallow-copies data from another TensorImpl into this TensorImpl.
1945	*
1946	* For why this function doesn't check this TensorImpl's
1947	* `allow_tensor_metadata_change_`, see NOTE [ TensorImpl Shallow-Copying ].
1948	*/
1949	virtual void shallow_copy_from(const c10::intrusive_ptr<TensorImpl>& impl) {
1950	copy_tensor_metadata(
1951	/src_impl=/impl.get(),
1952	/dest_impl=/this,
1953	/version_counter=/version_counter(),
1954	/allow_tensor_metadata_change=/allow_tensor_metadata_change());
1955	refresh_numel();
1956	refresh_contiguous();
1957	}
1958
1959	// Inference tensor doesn't have version counter,
1960	// set_version_counter is no-op for them.
1961	void set_version_counter(const c10::VariableVersion& version_counter) {
1962	TORCH_CHECK(
1963	!(is_inference() && version_counter.enabled()),
1964	"Cannot set version_counter for inference tensor");
1965	version_counter_ = version_counter;
1966	}
1967
1968	void set_version_counter(c10::VariableVersion&& version_counter) {
1969	TORCH_CHECK(
1970	!(is_inference() && version_counter.enabled()),
1971	"Cannot set version_counter for inference tensor");
1972	version_counter_ = std::move(version_counter);
1973	}
1974
1975	const c10::VariableVersion& version_counter() const noexcept {
1976	return version_counter_;
1977	}
1978
1979	void bump_version() {
1980	version_counter_.bump();
1981	}
1982
1983	impl::PyObjectSlot* pyobj_slot() {
1984	return &pyobj_slot_;
1985	}
1986
1987	const impl::PyObjectSlot* pyobj_slot() const {
1988	return &pyobj_slot_;
1989	}
1990
1991	private:
1992	// See NOTE [c10::optional operator usage in CUDA]
1993	// We probably don't want to expose this publicly until
1994	// the note is addressed.
1995	c10::optional<c10::Device> device_opt() const {
1996	return device_opt_;
1997	}
1998
1999	public:
2000	/**
2001	* The device type of a Tensor, e.g., DeviceType::CPU or DeviceType::CUDA.
2002	*/
2003	DeviceType device_type() const {
2004	// TODO: A useful internal assert would be to show that device_opt_ is null
2005	// only if you are an undefined tensor
2006	TORCH_CHECK(
2007	device_opt_.has_value(),
2008	"device_type cannot be run on undefined Tensor");
2009	// See NOTE [c10::optional operator usage in CUDA]
2010	return (*device_opt_).type();
2011	}
2012
2013	/**
2014	* @brief Extends the outer-most dimension of this tensor by num elements,
2015	* preserving the existing data.
2016	*
2017	* The underlying data may be reallocated in order to accommodate the new
2018	* elements, in which case this tensors' capacity is grown at a factor of
2019	* growthPct. This ensures that Extend runs on an amortized O(1) time
2020	* complexity.
2021	*
2022	* This op is auto-asynchronous if the underlying device (CUDA) supports it.
2023	*/
2024	void Extend(int64_t num, float growthPct);
2025
2026	/**
2027	* @brief Reserve space for the underlying tensor.
2028	*
2029	* This must be called after Resize(), since we only specify the first
2030	* dimension This does not copy over the old data to the newly allocated space
2031	*/
2032	void ReserveSpace(int64_t outer_dim);
2033
2034	/**
2035	* @brief Resizes a tensor.
2036	*
2037	* Resize takes in a vector of ints specifying the dimensions of the tensor.
2038	* You can pass in an empty vector to specify that it is a scalar (i.e.
2039	* containing one single item).
2040	*
2041	* The underlying storage may be deleted after calling Resize: if the new
2042	* shape leads to a different number of items in the tensor, the old memory
2043	* is deleted and new memory will be allocated next time you call
2044	* mutable_data(). However, if the shape is different but the total number of
2045	* items is the same, the underlying storage is kept.
2046	*
2047	* This method respects caffe2_keep_on_shrink. Consult the internal logic
2048	* of this method to see exactly under what circumstances this flag matters.
2049	*/
2050	template <typename... Ts>
2051	void Resize(Ts... dim_source) {
2052	bool size_changed = SetDims(dim_source...);
2053	if (size_changed) {
2054	HandleResize();
2055	}
2056	}
2057
2058	template <typename T>
2059	void Resize(const std::vector<T>& dim_source) {
2060	Resize(ArrayRef<T>(dim_source));
2061	}
2062
2063	/**
2064	* Resizes the tensor without touching underlying storage.
2065	* This requires the total size of the tensor to remains constant.
2066	*/
2067	void Reshape(const std::vector<int64_t>& dims);
2068
2069	/**
2070	* Release whatever memory the tensor was holding but keep size and type
2071	* information. Subsequent call to mutable_data will trigger new memory
2072	* allocation.
2073	*/
2074	void FreeMemory();
2075
2076	/**
2077	* @brief Shares the data with another tensor.
2078	*
2079	* To share data between two tensors, the sizes of the two tensors must be
2080	* equal already. The reason we do not implicitly do a Resize to make the two
2081	* tensors have the same shape is that we want to allow tensors of different
2082	* shapes but the same number of items to still be able to share data. This
2083	* allows one to e.g. have a n-dimensional Tensor and a flattened version
2084	* sharing the same underlying storage.
2085	*
2086	* The source tensor should already have its data allocated.
2087	*/
2088	// To be deprecated
2089	void ShareData(const TensorImpl& src);
2090
2091	void ShareExternalPointer(
2092	DataPtr&& data_ptr,
2093	const caffe2::TypeMeta data_type,
2094	size_t size_bytes);
2095
2096	/**
2097	* Returns a mutable raw pointer of the underlying storage. Since we will need
2098	* to know the type of the data for allocation, a TypeMeta object is passed in
2099	* to specify the necessary information. This is conceptually equivalent of
2100	* calling mutable_data<T>() where the TypeMeta parameter meta is derived from
2101	* the type T. This function differs from mutable_data<T>() in the sense that
2102	* the type T can be specified during runtime via the TypeMeta object.
2103	*
2104	* If the existing data does not match the desired type, it will be deleted
2105	* and a new storage will be created.
2106	*/
2107	inline void* raw_mutable_data(const caffe2::TypeMeta meta) {
2108	// For 0-size tensors it's fine to return any pointer (including nullptr)
2109	if (data_type_ == meta && storage_initialized()) {
2110	return static_cast<void*>(
2111	static_cast<char*>(storage_.data()) +
2112	storage_offset_ * meta.itemsize());
2113	} else {
2114	bool had_special_dtor = data_type_.placementDelete() != nullptr;
2115	storage_offset_ = `0`;
2116	data_type_ = meta;
2117	// NB: device is not changed
2118
2119	// We can reuse the existing buffer if the current data does not have
2120	// a special destructor and the new data doesn't have a special
2121	// constructor.
2122	if (numel_ == `0` \|\|
2123	(meta.placementNew() == nullptr && !had_special_dtor &&
2124	(storage_.nbytes() >= (numel_ * data_type_.itemsize())))) {
2125	TORCH_INTERNAL_ASSERT(
2126	storage_offset_ == `0`); // because we just reallocated
2127	return storage_.data();
2128	}
2129	const Allocator* allocator = storage_.allocator();
2130	// Storage might have nullptr allocator in rare cases, for example, if
2131	// an external memory segment has been wrapped with Tensor and we don't
2132	// know how to reallocate it. However, in order to preserve legacy C2
2133	// behavior, we allow reallocating the memory using default allocator.
2134	if (allocator == nullptr) {
2135	allocator = GetAllocator(storage_.device_type());
2136	}
2137	if (meta.placementNew()) {
2138	// For types that need placement new, we will call it, as well as
2139	// making sure that when the data is freed, it calls the right
2140	// destruction procedure.
2141	auto size = numel_;
2142	auto dtor = data_type_.placementDelete();
2143	auto data_ptr = allocator->allocate(numel_ * data_type_.itemsize());
2144	storage_.set_data_ptr_noswap(PlacementDeleteContext::makeDataPtr(
2145	std::move(data_ptr), dtor, size, storage_.device()));
2146	data_type_.placementNew()(storage_.data(), numel_);
2147	} else {
2148	// For fundamental type, new and delete is easier.
2149	storage_.set_data_ptr_noswap(
2150	allocator->allocate(numel_ * data_type_.itemsize()));
2151	}
2152	storage_.set_nbytes(numel_ * data_type_.itemsize());
2153	TORCH_INTERNAL_ASSERT(
2154	storage_offset_ == `0`); // because we just reallocated
2155	device_opt_ = storage_.device();
2156	return storage_.data();
2157	}
2158	}
2159
2160	/**
2161	* Returns a typed pointer of the underlying storage.
2162	*
2163	* For fundamental types, we reuse possible existing storage if there
2164	* is sufficient capacity.
2165	*/
2166	template <typename T>
2167	inline T* mutable_data() {
2168	if (storage_initialized() && data_type_.Match<T>()) {
2169	return static_cast<T*>(storage_.data()) + storage_offset_;
2170	}
2171	// Check it here statically - otherwise TypeMeta would throw the runtime
2172	// error in attempt to invoke TypeMeta::ctor()
2173	static_assert(
2174	std::is_default_constructible<T>::value,
2175	"Tensor can't hold non-default-constructable types");
2176	return static_cast<T*>(raw_mutable_data(caffe2::TypeMeta::Make<T>()));
2177	}
2178
2179	/**
2180	* True if a tensor is storage initialized. A tensor may become
2181	* storage UNINITIALIZED after a Resize() or FreeMemory()
2182	*/
2183	bool storage_initialized() const {
2184	TORCH_CHECK(
2185	has_storage(),
2186	"cannot call storage_initialized on tensor that does not have storage");
2187	return storage_.data() \|\| numel_ == `0`;
2188	}
2189
2190	/**
2191	* True if a tensor is dtype initialized. A tensor allocated with
2192	* Caffe2-style constructors is dtype uninitialized until the
2193	* first time mutable_data<T>() is called.
2194	*/
2195	bool dtype_initialized() const noexcept {
2196	return data_type_ != caffe2::TypeMeta ();
2197	}
2198
2199	void set_storage_keep_dtype(at::Storage storage) {
2200	TORCH_CHECK(
2201	allow_tensor_metadata_change(),
2202	"set_storage ",
2203	err_msg_tensor_metadata_change_not_allowed);
2204	storage_ = std::move(storage);
2205	device_opt_ = storage_.device();
2206	}
2207
2208	void set_storage_and_dtype(
2209	at::Storage storage,
2210	const caffe2::TypeMeta data_type) {
2211	set_storage_keep_dtype(std::move(storage));
2212	data_type_ = data_type;
2213	}
2214
2215	void empty_tensor_restride_symint(MemoryFormat memory_format);
2216
2217	/**
2218	* Set the strides of the tensor to match memory_format
2219	*
2220	* WARNING: This function doesn't rearrange data and assumes tensor is a
2221	* memory contiguous
2222	*/
2223	void empty_tensor_restride(MemoryFormat memory_format) {
2224	if (has_symbolic_sizes_strides_) {
2225	empty_tensor_restride_symint(memory_format);
2226	return;
2227	}
2228	#ifdef DEBUG
2229	TORCH_INTERNAL_ASSERT(
2230	compute_numel() == numel_,
2231	"If you are seeing this error, that means empty_tensor_restride was "
2232	"called before setting correct numel");
2233	#endif
2234	switch (memory_format) {
2235	case MemoryFormat::Contiguous: {
2236	// dim_ is a virtual call, don't repeat it
2237	const auto dim_ = dim();
2238	sizes_and_strides_.resize(dim_);
2239	if (dim_ > `0`) {
2240	const auto last_idx = dim_ - `1`;
2241	sizes_and_strides_.stride_at_unchecked(last_idx) = `1`;
2242	for (auto i = last_idx - `1`; i >= `0`; --i) {
2243	sizes_and_strides_.stride_at_unchecked(i) =
2244	sizes_and_strides_.stride_at_unchecked(i + `1`) *
2245	std::max<int64_t>(
2246	sizes_and_strides_.size_at_unchecked(i + `1`), `1`);
2247	}
2248	}
2249	break;
2250	}
2251	case MemoryFormat::ChannelsLast: {
2252	TORCH_CHECK(
2253	dim() == `4`, "required rank 4 tensor to use channels_last format");
2254	set_sizes_and_strides(sizes(), get_channels_last_strides_2d(sizes()));
2255	break;
2256	}
2257	case MemoryFormat::ChannelsLast3d: {
2258	TORCH_CHECK(
2259	dim() == `5`,
2260	"required rank 5 tensor to use channels_last_3d format");
2261	set_sizes_and_strides(sizes(), get_channels_last_strides_3d(sizes()));
2262	break;
2263	}
2264	case MemoryFormat::Preserve:
2265	TORCH_CHECK(false, "unsupported memory format ", memory_format);
2266	// Cleaning warning messages, no need to break as TORCH_CHECK(false)
2267	// terminates flow.
2268	// break;
2269	case MemoryFormat::NumOptions:
2270	TORCH_INTERNAL_ASSERT(false, "invalid memory format ", memory_format);
2271	}
2272	// recompute contiguous flag, as currently NHWC/NCHW flags are not mutually
2273	// exclusive see #24090
2274	refresh_contiguous();
2275	}
2276
2277	bool is_strides_like(at::MemoryFormat memory_format) const {
2278	if (C10_UNLIKELY(matches_policy(SizesStridesPolicy::CustomStrides))) {
2279	return is_strides_like_custom(memory_format);
2280	}
2281	return is_strides_like_default(memory_format);
2282	}
2283
2284	bool is_strides_like_channels_last() const {
2285	return is_strides_like(at::MemoryFormat::ChannelsLast);
2286	}
2287
2288	bool is_strides_like_channels_last_3d() const {
2289	return is_strides_like(at::MemoryFormat::ChannelsLast3d);
2290	}
2291
2292	bool is_non_overlapping_and_dense() const {
2293	if (C10_UNLIKELY(matches_policy(SizesStridesPolicy::CustomStrides))) {
2294	return is_non_overlapping_and_dense_custom();
2295	}
2296	return is_non_overlapping_and_dense_default();
2297	}
2298
2299	bool has_symbolic_sizes_strides() const {
2300	return has_symbolic_sizes_strides_;
2301	}
2302
2303	private:
2304	void HandleResize();
2305
2306	// The Caffe2 Resize() method supports being called both as Resize({2,2}) as
2307	// well as variadic with Resize(2, 2). These overloads provide all of the
2308	// supported calling configurations, while being overloads (and not templates)
2309	// so that implicit conversions still work.
2310	//
2311	// SetDims on ArrayRef is internally implemented as a template, so we can
2312	// handle both ArrayRefs of different types (there are some uses of
2313	// Resize in Caffe2 which pass in int, not int64_t.)
2314
2315	template <
2316	typename T,
2317	typename = typename std::enable_if<std::is_integral<T>::value>::type>
2318	bool SetDimsTemplate(ArrayRef<T> src) {
2319	TORCH_CHECK(
2320	!has_symbolic_sizes_strides_,
2321	"SetDims() called on tensor with symbolic shape")
2322
2323	auto old_numel = numel_;
2324	sizes_and_strides_.resize(src.size());
2325	int64_t new_numel = `1`;
2326	for (const auto i : c10::irange(src.size())) {
2327	new_numel *= src[i];
2328	sizes_and_strides_.size_at_unchecked(i) = src[i];
2329	}
2330	numel_ = new_numel;
2331	empty_tensor_restride(MemoryFormat::Contiguous);
2332	return numel_ != old_numel;
2333	}
2334
2335	bool SetDims(ArrayRef<int64_t> s) {
2336	return SetDimsTemplate(s);
2337	}
2338
2339	bool SetDims(ArrayRef<int> s) {
2340	return SetDimsTemplate(s);
2341	}
2342
2343	bool SetDims(ArrayRef<size_t> s) {
2344	return SetDimsTemplate(s);
2345	}
2346
2347	bool SetDims() {
2348	return SetDims(IntArrayRef {});
2349	}
2350
2351	bool SetDims(const int64_t d0) {
2352	return SetDims(IntArrayRef {d0});
2353	}
2354
2355	bool SetDims(const int64_t d0, const int64_t d1) {
2356	return SetDims(IntArrayRef {d0, d1});
2357	}
2358
2359	bool SetDims(const int64_t d0, const int64_t d1, const int64_t d2) {
2360	return SetDims(IntArrayRef {d0, d1, d2});
2361	}
2362
2363	bool SetDims(
2364	const int64_t d0,
2365	const int64_t d1,
2366	const int64_t d2,
2367	const int64_t d3) {
2368	return SetDims(IntArrayRef {d0, d1, d2, d3});
2369	}
2370
2371	/**
2372	* Compute the number of elements based on the sizes of a tensor.
2373	*/
2374	// NB: This is ONLY called when sizes_and_strides_ is used directly; if
2375	// we are virtualizing, then numel calls are virtualized as well, and this
2376	// should never get called
2377	int64_t compute_numel() const {
2378	TORCH_INTERNAL_ASSERT_DEBUG_ONLY(!has_symbolic_sizes_strides_);
2379	#if C10_HAS_BUILTIN_OVERFLOW() && !defined(C10_MOBILE)
2380	// Use overflow checks if supported by the compiler
2381	return safe_compute_numel();
2382	#else
2383	return c10::multiply_integers(sizes_and_strides_.sizes_arrayref());
2384	#endif
2385	}
2386
2387	/**
2388	* Compute the number of elements based on the sizes of a
2389	* tensor. Catches integer overflow that may occur when a tensor
2390	* using a sparse layout has multiple dimensions with large sizes.
2391	*/
2392	int64_t safe_compute_numel() const {
2393	TORCH_INTERNAL_ASSERT_DEBUG_ONLY(!has_symbolic_sizes_strides_);
2394	uint64_t n = `1`;
2395	bool overflows =
2396	c10::safe_multiplies_u64(sizes_and_strides_.sizes_arrayref(), &n);
2397	constexpr auto numel_max = std::min(
2398	static_cast<uint64_t>(std::numeric_limits<int64_t>::max()),
2399	static_cast<uint64_t>(std::numeric_limits<size_t>::max()));
2400
2401	overflows \|= (n > numel_max);
2402	TORCH_CHECK(!overflows, "numel: integer multiplication overflow");
2403	return static_cast<int64_t>(n);
2404	}
2405
2406	SymInt compute_sym_numel() const {
2407	TORCH_INTERNAL_ASSERT_DEBUG_ONLY(has_symbolic_sizes_strides_);
2408	SymInt numel = `1`;
2409	for (const auto& s : extra_meta_->sizes_) {
2410	numel *= s;
2411	}
2412	return numel;
2413	}
2414
2415	/**
2416	* Compute whether or not a tensor is contiguous based on the sizes and
2417	* strides of a tensor.
2418	*/
2419	bool compute_contiguous(identity<bool>) const;
2420
2421	bool compute_channels_last_contiguous_2d(identity<bool>) const;
2422
2423	bool compute_channels_last_contiguous_3d(identity<bool>) const;
2424
2425	bool compute_strides_like_channels_last_2d(identity<bool>) const;
2426
2427	bool compute_strides_like_channels_last_3d(identity<bool>) const;
2428
2429	bool compute_non_overlapping_and_dense(identity<bool>) const;
2430
2431	SymBool compute_contiguous(identity<SymBool>) const;
2432
2433	SymBool compute_channels_last_contiguous_2d(identity<SymBool>) const;
2434
2435	SymBool compute_channels_last_contiguous_3d(identity<SymBool>) const;
2436
2437	SymBool compute_strides_like_channels_last_2d(identity<SymBool>) const;
2438
2439	SymBool compute_strides_like_channels_last_3d(identity<SymBool>) const;
2440
2441	SymBool compute_non_overlapping_and_dense(identity<SymBool>) const;
2442
2443	protected:
2444	/**
2445	* Recompute the cached numel of a tensor. Call this if you modify
2446	* sizes.
2447	*
2448	* For tensors with sparse layouts, use safe_refresh_numel() instead
2449	* because it will catch integer overflow that may occur for tensors
2450	* with sparse layouts and large dimensions.
2451	*
2452	* NB: We may uselessly recompute cached numel even in situations where
2453	* it is completely never used (e.g., if CustomSizes for Python). However,
2454	* we still must keep it up to date in case the Python overload
2455	* returns None (in which case we will consult the field here). This also
2456	* implies that sizes/strides will never be complete garbage; in the
2457	* very worst case scenario, it will reflect a 1-dim zero size tensor.
2458	*/
2459	void refresh_numel() {
2460	if (has_symbolic_sizes_strides_) {
2461	extra_meta_->numel_ = compute_sym_numel();
2462	} else {
2463	numel_ = compute_numel();
2464	}
2465	}
2466
2467	/**
2468	* Recompute the cached numel of a tensor. Call this if you modify
2469	* sizes. Use only for tensors with sparse layouts because only
2470	* sparse tensor are likely to have sizes that may lead to integer
2471	* overflow when computing numel.
2472	*/
2473	void safe_refresh_numel() {
2474	if (has_symbolic_sizes_strides_) {
2475	// NB: sym numel is done with symbolic integers, which handle overflow
2476	// checking
2477	extra_meta_->numel_ = compute_sym_numel();
2478	} else {
2479	numel_ = safe_compute_numel();
2480	}
2481	}
2482
2483	private:
2484	// NB: the TypeId argument prevents confusion where you pass a true/false
2485	// literal and pick the wrong overload
2486
2487	void _set_is_contiguous(identity<bool>, bool b) {
2488	is_contiguous_ = b;
2489	}
2490
2491	void _set_is_contiguous(identity<SymBool>, SymBool b) {
2492	extra_meta_->is_contiguous_ = std::move(b);
2493	}
2494
2495	void _set_is_channels_last_contiguous(identity<bool>, bool b) {
2496	is_channels_last_contiguous_ = b;
2497	}
2498
2499	void _set_is_channels_last_contiguous(identity<SymBool>, SymBool b) {
2500	extra_meta_->is_channels_last_contiguous_ = std::move(b);
2501	}
2502
2503	void _set_is_channels_last_3d_contiguous(identity<bool>, bool b) {
2504	is_channels_last_3d_contiguous_ = b;
2505	}
2506
2507	void _set_is_channels_last_3d_contiguous(identity<SymBool>, SymBool b) {
2508	extra_meta_->is_channels_last_3d_contiguous_ = std::move(b);
2509	}
2510
2511	void _set_is_channels_last(identity<bool>, bool b) {
2512	is_channels_last_ = b;
2513	}
2514
2515	void _set_is_channels_last(identity<SymBool>, SymBool b) {
2516	extra_meta_->is_channels_last_ = std::move(b);
2517	}
2518
2519	void _set_is_channels_last_3d(identity<bool>, bool b) {
2520	is_channels_last_3d_ = b;
2521	}
2522
2523	void _set_is_channels_last_3d(identity<SymBool>, SymBool b) {
2524	extra_meta_->is_channels_last_3d_ = std::move(b);
2525	}
2526
2527	void _set_is_non_overlapping_and_dense(identity<bool>, bool b) {
2528	is_non_overlapping_and_dense_ = b;
2529	}
2530
2531	void _set_is_non_overlapping_and_dense(identity<SymBool>, SymBool b) {
2532	extra_meta_->is_non_overlapping_and_dense_ = std::move(b);
2533	}
2534
2535	// These are little wrappers over the real compute_ functions that
2536	// can make use of other contiguity fields to short circuit.
2537	// They need to be implemented separately for SymBool, as SymBool does
2538	// not short circuit.
2539	// TODO: should the SymBool cases avoid the short circuit? Need to reason
2540	// if its correct, and reason if the simpler expressions are better for
2541	// analysis (maybe not!)
2542
2543	bool compute_is_non_overlapping_and_dense_dim4(identity<bool> type_id) {
2544	return is_contiguous_ \|\| is_channels_last_contiguous_ \|\|
2545	compute_non_overlapping_and_dense(type_id);
2546	}
2547
2548	SymBool compute_is_non_overlapping_and_dense_dim4(identity<SymBool> type_id);
2549
2550	bool compute_channels_last_contiguous_3d_dim5(identity<bool> type_id) {
2551	return !is_channels_last_contiguous_ &&
2552	compute_channels_last_contiguous_3d(type_id);
2553	}
2554
2555	SymBool compute_channels_last_contiguous_3d_dim5(identity<SymBool> type_id);
2556
2557	bool compute_channels_last_2d_dim5(identity<bool> type_id) {
2558	return !is_channels_last_3d_contiguous_ &&
2559	compute_strides_like_channels_last_2d(type_id);
2560	}
2561
2562	SymBool compute_channels_last_2d_dim5(identity<SymBool> type_id);
2563
2564	bool compute_channels_last_3d_dim5(identity<bool> type_id) {
2565	return !is_channels_last_ && compute_strides_like_channels_last_3d(type_id);
2566	}
2567
2568	SymBool compute_channels_last_3d_dim5(identity<SymBool> type_id);
2569
2570	bool compute_is_non_overlapping_and_dense_dim5(identity<bool> type_id) {
2571	return is_contiguous_ \|\| is_channels_last_contiguous_ \|\|
2572	is_channels_last_3d_contiguous_ \|\|
2573	compute_non_overlapping_and_dense(type_id);
2574	}
2575
2576	SymBool compute_is_non_overlapping_and_dense_dim5(identity<SymBool> type_id);
2577
2578	bool compute_is_non_overlapping_and_dense_anydim(identity<bool> type_id) {
2579	return is_contiguous_ \|\| compute_non_overlapping_and_dense(type_id);
2580	}
2581
2582	SymBool compute_is_non_overlapping_and_dense_anydim(
2583	identity<SymBool> type_id);
2584
2585	template <typename T>
2586	void _refresh_contiguous() {
2587	auto type_id = identity<T>();
2588	// Note:
2589	// Dim 0, 1, 2 will never be a channels last 2d/3d format
2590	// Dim 3+ is possibly be a channels last 2d format (Dim 4 only at this
2591	// point) Dim 4+ is possibly be a channels last 3d format (Dim 5 only at
2592	// this point)
2593	switch (dim()) {
2594	case `4`: {
2595	_set_is_contiguous(type_id, compute_contiguous(type_id));
2596	_set_is_channels_last_contiguous(
2597	type_id, compute_channels_last_contiguous_2d(type_id));
2598	_set_is_channels_last_3d_contiguous(type_id, false);
2599	_set_is_channels_last(
2600	type_id, compute_strides_like_channels_last_2d(type_id));
2601	_set_is_channels_last_3d(type_id, false);
2602	_set_is_non_overlapping_and_dense(
2603	type_id, compute_is_non_overlapping_and_dense_dim4(type_id));
2604	break;
2605	}
2606	case `5`: {
2607	_set_is_contiguous(type_id, compute_contiguous(type_id));
2608	_set_is_channels_last_contiguous(
2609	type_id, compute_channels_last_contiguous_2d(type_id));
2610	_set_is_channels_last_3d_contiguous(
2611	type_id, compute_channels_last_contiguous_3d_dim5(type_id));
2612	_set_is_channels_last(type_id, compute_channels_last_2d_dim5(type_id));
2613	_set_is_channels_last_3d(
2614	type_id, compute_channels_last_3d_dim5(type_id));
2615	_set_is_non_overlapping_and_dense(
2616	type_id, compute_is_non_overlapping_and_dense_dim5(type_id));
2617	break;
2618	}
2619	default:
2620	// is_channels_last_ and is_channels_last_3d_ are suggested
2621	// memory_format. Being channels_last_contiguous doesn't necessarily
2622	// mean the tensor is strided like channels_last: for strides on channel
2623	// dimension could suggest desired memory_layout, but it doesn't affect
2624	// memory storage
2625	_set_is_contiguous(type_id, compute_contiguous(type_id));
2626	_set_is_channels_last_contiguous(type_id, false);
2627	_set_is_channels_last_3d_contiguous(type_id, false);
2628	_set_is_channels_last(type_id, false);
2629	_set_is_channels_last_3d(type_id, false);
2630	_set_is_non_overlapping_and_dense(
2631	type_id, compute_is_non_overlapping_and_dense_anydim(type_id));
2632	break;
2633	}
2634	}
2635
2636	protected:
2637	/**
2638	* Recompute the cached contiguity of a tensor. Call this if you modify sizes
2639	* or strides.
2640	*/
2641	void refresh_contiguous() {
2642	if (has_symbolic_sizes_strides_) {
2643	_refresh_contiguous<SymBool>();
2644	} else {
2645	_refresh_contiguous<bool>();
2646	}
2647	}
2648
2649	/**
2650	* Copy the tensor metadata fields (e.g. sizes / strides / storage pointer /
2651	* storage_offset) from one TensorImpl to another TensorImpl.
2652	*
2653	* For usage of `version_counter` and `allow_tensor_metadata_change`, see NOTE
2654	* [ TensorImpl Shallow-Copying ].
2655	*/
2656	static void copy_tensor_metadata(
2657	const TensorImpl* src_impl,
2658	TensorImpl* dest_impl,
2659	const c10::VariableVersion& version_counter,
2660	bool allow_tensor_metadata_change);
2661
2662	/**
2663	* Copy the tensor metadata fields (e.g. sizes / strides / storage pointer /
2664	* storage_offset) from one TensorImpl to another TensorImpl.
2665	*
2666	* For usage of `version_counter` and `allow_tensor_metadata_change`, see NOTE
2667	* [ TensorImpl Shallow-Copying ].
2668	*/
2669	static void copy_tensor_metadata(
2670	const TensorImpl* src_impl,
2671	TensorImpl* dest_impl,
2672	c10::VariableVersion&& version_counter,
2673	bool allow_tensor_metadata_change);
2674
2675	private:
2676	static void copy_tensor_metadata_except_version_counter(
2677	const TensorImpl* src_impl,
2678	TensorImpl* dest_impl,
2679	bool allow_tensor_metadata_change);
2680
2681	protected:
2682	// Error message to show when the user tries to change tensor metadata on
2683	// Tensor created from .data or .detach().
2684	//
2685	// See NOTE [ Metadata Change for a Detached Tensor ] for details.
2686	static const char* const err_msg_tensor_metadata_change_not_allowed;
2687
2688	static void copy_generic_tensor_metadata(
2689	const TensorImpl* src_impl,
2690	TensorImpl* dest_impl);
2691
2692	public:
2693	void set_storage_access_should_throw() {
2694	storage_access_should_throw_ = true;
2695	}
2696
2697	public:
2698	void set_custom_sizes_strides(SizesStridesPolicy policy) {
2699	custom_sizes_strides_ = static_cast<uint8_t>(policy);
2700	refresh_sizes_strides_policy();
2701	}
2702
2703	void set_python_custom_sizes_strides(SizesStridesPolicy policy) {
2704	python_custom_sizes_strides_ = static_cast<uint8_t>(policy);
2705	refresh_sizes_strides_policy();
2706	}
2707
2708	void set_custom_device(bool custom_device) {
2709	custom_device_ = custom_device;
2710	refresh_device_policy();
2711	}
2712
2713	void set_custom_layout(bool custom_layout) {
2714	custom_layout_ = custom_layout;
2715	refresh_layout_policy();
2716	}
2717
2718	void set_python_custom_device(bool custom_device) {
2719	python_custom_device_ = custom_device;
2720	refresh_device_policy();
2721	}
2722
2723	void set_python_custom_layout(bool custom_layout) {
2724	python_custom_layout_ = custom_layout;
2725	refresh_layout_policy();
2726	}
2727
2728	protected:
2729	void refresh_sizes_strides_policy() {
2730	if (has_symbolic_sizes_strides_) {
2731	sizes_strides_policy_ =
2732	static_cast<uint8_t>(SizesStridesPolicy::CustomSizes);
2733	} else {
2734	sizes_strides_policy_ =
2735	std::max(custom_sizes_strides_, python_custom_sizes_strides_);
2736	}
2737	}
2738
2739	void refresh_device_policy() {
2740	device_policy_ = custom_device_ \|\| python_custom_device_;
2741	}
2742
2743	void refresh_layout_policy() {
2744	layout_policy_ = custom_layout_ \|\| python_custom_layout_;
2745	}
2746
2747	protected:
2748	Storage storage_;
2749
2750	private:
2751	// This pointer points to an AutogradMeta struct that stores autograd-specific
2752	// fields (such as grad_ / grad_fn_ / grad_accumulator_). This pointer always
2753	// has unique ownership (meaning only one TensorImpl can own it at a time).
2754	//
2755	// autograd_meta_ can be nullptr, as an optimization. When this occurs, it is
2756	// equivalent to having an autograd_meta_ pointing to a default constructed
2757	// AutogradMeta; intuitively, tensors which don't require grad will have this
2758	// field set to null.
2759	//
2760	// This means accessors on autograd_meta_ have to be careful to test if they
2761	// got a nullptr, and handle default behavior appropriately in that case.
2762	//
2763	// Note that we don't enforce the invariant that if the AutogradMeta is
2764	// default constructed, it is nullptr (to do this, we'd have to continuously
2765	// check if an AutogradMeta became, by mutation, equal to the default
2766	// constructed form. (This might be useful, but it seems rare enough that
2767	// a requires_grad=True variable will turn back into the requires_grad=False
2768	// version.) So there are three representable states:
2769	//
2770	// 1. autograd_meta_ == nullptr
2771	// 2. autograd_meta_ is default constructed (semantically, same as (1))
2772	// 3. autograd_meta_ has nontrivial information content
2773	//
2774	std::unique_ptr<c10::AutogradMetaInterface> autograd_meta_ = nullptr;
2775
2776	protected:
2777	std::unique_ptr<c10::ExtraMeta> extra_meta_ = nullptr;
2778
2779	c10::VariableVersion version_counter_;
2780
2781	impl::PyObjectSlot pyobj_slot_;
2782
2783	c10::impl::SizesAndStrides sizes_and_strides_;
2784
2785	int64_t storage_offset_ = `0`;
2786	// If sizes and strides are empty, the numel is 1!! However, most of the
2787	// time, we will immediately set sizes to {0} and reset numel to 0.
2788	// (Can't do that in the default initializers, because there's no way to
2789	// spell "allocate a one-element array" for strides_).
2790	int64_t numel_ = `1`;
2791
2792	// INVARIANT: When storage is non-null, this type meta must
2793	// agree with the type meta in storage
2794	caffe2::TypeMeta data_type_;
2795
2796	// NOTE [c10::optional operator usage in CUDA]
2797	// Our optional definition doesn't compile in .cu file if `value()` or
2798	// `operator->` are used. Instead, we always use `operator`.*
2799	// See https://github.com/pytorch/pytorch/issues/18496 for more info.
2800	// If this is too burdensome to maintain, we can just
2801	// manually implement this with an additional bool.
2802
2803	// INVARIANT: When storage is non-null, this Device must
2804	// agree with the type meta in storage.
2805	//
2806	// INVARIANT: device_opt_ is only nullopt for undefined tensors
2807	// (which do not have a device.)
2808	c10::optional<c10::Device> device_opt_;
2809
2810	// default member initializers for bit-fields only available with -std=c++2a
2811	// or -std=gnu++2a
2812	inline void init_bitfields() {
2813	is_contiguous_ = true;
2814	is_channels_last_ = false;
2815	is_channels_last_contiguous_ = false;
2816	is_channels_last_3d_ = false;
2817	is_channels_last_3d_contiguous_ = false;
2818	is_non_overlapping_and_dense_ = true;
2819	is_wrapped_number_ = false;
2820	allow_tensor_metadata_change_ = true;
2821	reserved_ = false;
2822	sizes_strides_policy_ = static_cast<uint8_t>(SizesStridesPolicy::Default);
2823	custom_sizes_strides_ = static_cast<uint8_t>(SizesStridesPolicy::Default);
2824	python_custom_sizes_strides_ =
2825	static_cast<uint8_t>(SizesStridesPolicy::Default);
2826	python_custom_device_ = false;
2827	python_custom_layout_ = false;
2828	custom_device_ = false;
2829	custom_layout_ = false;
2830	device_policy_ = false;
2831	layout_policy_ = false;
2832	storage_access_should_throw_ = false;
2833	has_symbolic_sizes_strides_ = false;
2834	}
2835
2836	// Tensor is contiguous
2837	bool is_contiguous_ : `1`;
2838
2839	// Tensor is a subclass that does not permit storage access.
2840	bool storage_access_should_throw_ : `1`;
2841
2842	// Tensor is stored in the channels last 2d memory format, when dimensions
2843	// order is (N)CHW and C-strides < W-strides < H-strides (< N-strides)
2844	// (If size of any dimension is equal to 1, this dimension strides value
2845	// is not taken into account).
2846	bool is_channels_last_ : `1`;
2847
2848	// Channels last contiguous tensor is channel last tensor which occupies
2849	// contiguous memory block.
2850	bool is_channels_last_contiguous_ : `1`;
2851
2852	// Tensor is stored in the channels last 3d memory format, when dimensions
2853	// order is (N)CDHW and C-strides < W-strides < H-strides < D - strides (<
2854	// N-strides) (If size of any dimension is equal to 1, this dimension strides
2855	// value is not taken into account).
2856	bool is_channels_last_3d_ : `1`;
2857
2858	// Channels last 3d contiguous tensor is channel last 3d tensor which occupies
2859	// contiguous memory block.
2860	bool is_channels_last_3d_contiguous_ : `1`;
2861
2862	// Dense tensor is the tensor that store values in a contiguous block of
2863	// memory. Non-overlapping tensor is the tensor in which elements occupy
2864	// individual non-repetitive memory.
2865	bool is_non_overlapping_and_dense_ : `1`;
2866
2867	bool is_wrapped_number_ : `1`;
2868
2869	// NOTE [ Metadata Change for a Detached Tensor ]
2870	//
2871	// Normally, a user is allowed to change the tensor metadata
2872	// (e.g. sizes / strides / storage / storage_offset) of a tensor.
2873	// However, if the tensor is created by `t1_detached = t1.data` in Python
2874	// or `t1_detached = t1.detach()` in Python/C++, those changes to the
2875	// tensor metadata of `t1_detached` will not be propagated back to the
2876	// original tensor `t1`. In order to make such changes explicitly illegal,
2877	// we created the `allow_tensor_metadata_change_` flag, to prevent users
2878	// from changing metadata of the detached tensor and expecting the original
2879	// tensor to also be updated.
2880	//
2881	// NOTE: For a full list of tensor metadata fields, please see
2882	// `copy_tensor_metadata()` in TensorImpl and its subclasses to find
2883	// which fields are copied by value.
2884	bool allow_tensor_metadata_change_ : `1`;
2885
2886	// we decide to keep reserved_ and it will
2887	// live in Tensor after the split
2888	// The logic is that if Extend() or ReserveSpace() were ever called,
2889	// then subsequent Resize()s will not free up Storage.
2890	bool reserved_ : `1`;
2891
2892	// Call _custom() virtual methods for
2893	// strides()/is_contiguous()/sizes()/dim()/numel()
2894	// This is a combination of sizes_strides_custom_dispatch_
2895	// and has_symbolic_sizes_strides_
2896	uint8_t sizes_strides_policy_ : `2`;
2897
2898	// Whether or not sizes_and_strides_ contains a symbolic value.
2899	bool has_symbolic_sizes_strides_ : `1`;
2900
2901	// Call _custom() virtual method for
2902	// strides()/is_contiguous()/sizes()/dim()/numel()
2903	uint8_t custom_sizes_strides_ : `2`;
2904
2905	// Combo of custom_ and python_custom_
2906	bool device_policy_ : `1`;
2907	bool layout_policy_ : `1`;
2908
2909	// Call _custom() virtual method for device()
2910	bool custom_device_ : `1`;
2911
2912	// Call _custom() virtual method for layout()
2913	bool custom_layout_ : `1`;
2914
2915	// Call into Python for
2916	// strides()/is_contiguous()/sizes()/dim()/numel()
2917	uint8_t python_custom_sizes_strides_ : `2`;
2918
2919	// Call into Python for device()
2920	bool python_custom_device_ : `1`;
2921
2922	// Call into Python for layout()
2923	bool python_custom_layout_ : `1`;
2924
2925	// The set of DispatchKeys which describe this tensor. NB: this
2926	// does NOT include Autograd (historically, it did, but
2927	// not anymore!)
2928	//
2929	// INVARIANT: extra_meta_->named_tensor_meta_ != nullptr <==>
2930	// key_set_.has(DispatchKey::Named)
2931	DispatchKeySet key_set_;
2932
2933	private:
2934	// C10_TensorImpl_Size_Check_Dummy_Class needs to be friends with
2935	// TensorImpl so it can inspect the size of private fields
2936	template <
2937	size_t cplusplus,
2938	size_t clang_ver_major,
2939	size_t gcc_ver,
2940	size_t gcc_ver_minor,
2941	size_t nvcc,
2942	size_t cuda_version,
2943	size_t cuda_version_major,
2944	size_t ptr_size>
2945	friend class C10_TensorImpl_Size_Check_Dummy_Class;
2946	};
2947
2948	// Note [TensorImpl size constraints]
2949	// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2950	// Changed the size of TensorImpl? If the size went down, good for
2951	// you! Adjust the documentation below and the expected size.
2952	// Did it go up? Read on...
2953	//
2954	// Struct size matters. In some production systems at Facebook, we have
2955	// 400M live tensors during a training run. Do the math: every 64-bit
2956	// word you add to Tensor is an extra 3.2 gigabytes in RAM.
2957	//
2958	// If you are a Facebook employee, you can check if the run in question
2959	// has tipped you over the point using the command here:
2960	// https://fburl.com/q5enpv98
2961	//
2962	// For reference, we OOMed at 160 bytes (20 words) per TensorImpl.
2963	// This is not counting overhead from strides out-of-line allocation and
2964	// StorageImpl space and this is from before we inlined sizes and strides
2965	// directly into TensorImpl as SmallVectors.
2966	//
2967	// Our memory usage on 32-bit systems is suboptimal, but we're not checking
2968	// for it at the moment (to help avoid rage inducing cycles when the
2969	// 32-bit number is wrong).
2970	//
2971	// Current breakdown:
2972	//
2973	// vtable pointer
2974	// strong refcount TODO: pack these into one word
2975	// weak refcount
2976	// storage pointer
2977	// autograd metadata pointer
2978	// named tensor metadata pointer
2979	// version counter pointer
2980	// PyObjectSlot
2981	// SizesAndStrides size/pointer
2982	// SizesAndStrides sizes (pre-allocated 0)
2983	// SizesAndStrides sizes (pre-allocated 1)
2984	// SizesAndStrides sizes (pre-allocated 2)
2985	// SizesAndStrides sizes (pre-allocated 3)
2986	// SizesAndStrides sizes (pre-allocated 4)
2987	// SizesAndStrides strides (pre-allocated 0)
2988	// SizesAndStrides strides (pre-allocated 1)
2989	// SizesAndStrides strides (pre-allocated 2)
2990	// SizesAndStrides strides (pre-allocated 3)
2991	// SizesAndStrides strides (pre-allocated 4)
2992	// storage offset
2993	// numel
2994	// data type, device, is_contiguous, storage_access_should_throw_, bitfields
2995	// DispatchKeySet
2996	//
2997
2998	// Various preprocessor macros we use to check that the
2999	// TensorImpl size hasn't changed unexpectedly. We undef
3000	// these later.
3001	#ifndef __NVCC__
3002	#define C10_NVCC 0
3003	#else
3004	#define C10_NVCC __NVCC__
3005	#endif
3006
3007	#ifndef __CUDA_VER_MAJOR__
3008	#define C10_CUDA_VERSION_MAJOR 0
3009	#else
3010	#define C10_CUDA_VERSION_MAJOR __CUDA_VER_MAJOR__
3011	#endif
3012
3013	#ifndef CUDA_VERSION
3014	#define C10_CUDA_VERSION 0
3015	#else
3016	#define C10_CUDA_VERSION CUDA_VERSION
3017	#endif
3018
3019	#ifndef __clang_major__
3020	#define C10_CLANG_MAJOR_VERSION 0
3021	#else
3022	#define C10_CLANG_MAJOR_VERSION __clang_major__
3023	#endif
3024
3025	#ifndef __GNUC__
3026	#define C10_GCC_VERSION 0
3027	#else
3028	#define C10_GCC_VERSION __GNUC__
3029	#endif
3030
3031	#ifndef __GNUC_MINOR__
3032	#define C10_GCC_VERSION_MINOR 0
3033	#else
3034	#define C10_GCC_VERSION_MINOR __GNUC_MINOR__
3035	#endif
3036
3037	// We use a templatized class to both contain the logic of checking the sizes
3038	// as well as to provide compile-time information that might be useful in
3039	// figuring out why sizes may have changed.
3040	// All the compile time information is given by the template fields that are
3041	// always printed by the compiler when the static_assert fails.
3042	template <
3043	size_t cplusplus = __cplusplus,
3044	size_t clang_ver_major = C10_CLANG_MAJOR_VERSION,
3045	size_t gcc_ver = C10_GCC_VERSION,
3046	size_t gcc_ver_minor = C10_GCC_VERSION_MINOR,
3047	size_t nvcc = C10_NVCC,
3048	size_t cuda_version = C10_CUDA_VERSION,
3049	size_t cuda_version_major = C10_CUDA_VERSION_MAJOR,
3050	size_t ptr_size = sizeof(void*)>
3051	class C10_TensorImpl_Size_Check_Dummy_Class : private TensorImpl {
3052	// Names of (non-bitfield) fields in TensorImpl; used to provide
3053	// compile-time info about fields whose size changes unexpectedly.
3054	enum class FieldNameEnum {
3055	storage_,
3056	autograd_meta_,
3057	extra_meta_,
3058	version_counter_,
3059	pyobj_slot_,
3060	sizes_and_strides_,
3061	storage_offset_,
3062	numel_,
3063	data_type_,
3064	device_opt_,
3065	key_set_,
3066	TOTAL_SIZE
3067	};
3068
3069	// Provides compile-time equality check that reveals what numbers
3070	// were used and on which quantity
3071	template <size_t Actual, size_t Expected, FieldNameEnum FiledName>
3072	constexpr static bool are_equal() {
3073	static_assert(
3074	Actual == Expected,
3075	"Actual and Expected sizes of a field did not match!");
3076	return true;
3077	}
3078
3079	// Provides compile-time <= check that reveals what numbers
3080	// were used and on which quantity
3081	template <size_t Actual, size_t Expected, FieldNameEnum FiledName>
3082	constexpr static bool is_le() {
3083	static_assert(
3084	Actual <= Expected,
3085	"Actual and Expected sizes of a field did not match!");
3086	return true;
3087	}
3088
3089	public:
3090	// Compile-time check that TensorImpl field sizes are as expected
3091	//
3092	// Observed total sizes and associated versions
3093	// If you find a flag that predicts when unique_ptr has 16 bytes
3094	// on 64-bit systems or when sizes_and_strides_ is 84 vs 88 bytes
3095	// on 32-bit systems you get a cookie!
3096	// Length \| LLVM \| GCC \| C++ \| CUDA
3097	// 192 \| ? \| 11.2 \| 201703 \| 11040
3098	// 208 \| ? \| 11.2 \| 201703 \| 11040
3099	// 208 \| ? \| 11.2 \| 201402 \| 11040
3100	// 192 \| ? \| 11.2 \| 201402 \| 11040
3101	// 160 \| 12 \| 4.2 \| 201703 \| 0
3102	//
3103	// To keep things clean, we split on systems here.
3104
3105	#if UINTPTR_MAX == 0xFFFFFFFF
3106	// This is a 32-bit system
3107	static constexpr bool check_sizes() {
3108	constexpr size_t tsize = `20` * sizeof(int64_t);
3109
3110	// clang-format off
3111	are_equal<sizeof(storage_), `4`, FieldNameEnum::storage_>();
3112	are_equal<sizeof(autograd_meta_), `4`, FieldNameEnum::autograd_meta_>();
3113	are_equal<sizeof(extra_meta_), `4`, FieldNameEnum::extra_meta_>();
3114	are_equal<sizeof(version_counter_), `4`, FieldNameEnum::version_counter_>();
3115	are_equal<sizeof(pyobj_slot_), `8`, FieldNameEnum::pyobj_slot_>();
3116	is_le<sizeof(sizes_and_strides_), `88`, FieldNameEnum::sizes_and_strides_>();
3117	are_equal<sizeof(storage_offset_), `8`, FieldNameEnum::storage_offset_>();
3118	are_equal<sizeof(numel_), `8`, FieldNameEnum::numel_>();
3119	are_equal<sizeof(data_type_), `2`, FieldNameEnum::data_type_>();
3120	are_equal<sizeof(device_opt_), `3`, FieldNameEnum::device_opt_>();
3121	are_equal<sizeof(key_set_), `8`, FieldNameEnum::key_set_>();
3122	is_le<sizeof(TensorImpl), tsize, FieldNameEnum::TOTAL_SIZE>();
3123	// clang-format on
3124
3125	return true;
3126	}
3127	#else
3128	// This is a 64-bit system
3129	static constexpr bool check_sizes() {
3130	constexpr size_t tsize = `26` * sizeof(int64_t);
3131
3132	// clang-format off
3133	are_equal<sizeof(storage_), `8`, FieldNameEnum::storage_>();
3134	// On some systems involving NVCC the size of unique_ptr is 16 bytes. We haven't
3135	// figured out how to detect those via macro preprocessors yet, so we use <=
3136	// comparisons for the relevant fields.
3137	is_le<sizeof(autograd_meta_), `16`, FieldNameEnum::autograd_meta_>();
3138	is_le<sizeof(extra_meta_), `16`, FieldNameEnum::extra_meta_>();
3139	are_equal<sizeof(version_counter_), `8`, FieldNameEnum::version_counter_>();
3140	are_equal<sizeof(pyobj_slot_), `16`, FieldNameEnum::pyobj_slot_>();
3141	are_equal<sizeof(sizes_and_strides_), `88`, FieldNameEnum::sizes_and_strides_>();
3142	are_equal<sizeof(storage_offset_), `8`, FieldNameEnum::storage_offset_>();
3143	are_equal<sizeof(numel_), `8`, FieldNameEnum::numel_>();
3144	are_equal<sizeof(data_type_), `2`, FieldNameEnum::data_type_>();
3145	are_equal<sizeof(device_opt_), `3`, FieldNameEnum::device_opt_>();
3146	are_equal<sizeof(key_set_), `8`, FieldNameEnum::key_set_>();
3147	is_le<sizeof(TensorImpl), tsize, FieldNameEnum::TOTAL_SIZE>();
3148	// clang-format on
3149
3150	return true;
3151	}
3152	#endif
3153	};
3154
3155	// We use a class to encapsulate size-checking logic with
3156	// templates to capture sizes and flags. We call this within
3157	// a static assert to prove there is no run-time behaviour.
3158	// Since the methods we call return either true or fail their
3159	// own static_asserts, we should never see the error messages
3160	// below. We have to provide it though for c++ <17.
3161	static_assert(
3162	C10_TensorImpl_Size_Check_Dummy_Class<>::check_sizes(),
3163	"You should not see this message.");
3164
3165	// Clean up after ourselves
3166	#undef C10_NVCC
3167	#undef C10_CUDA_VERSION_MAJOR
3168	#undef C10_CUDA_VERSION
3169	#undef C10_CLANG_MAJOR_VERSION
3170	#undef C10_GCC_VERSION
3171	#undef C10_GCC_VERSION_MINOR
3172
3173	} // namespace c10
3174
3175	C10_CLANG_DIAGNOSTIC_POP()
3176

Browse the source code of pytorch/c10/core/TensorImpl.h