1#pragma once
2
3#include <c10/core/Backend.h>
4#include <c10/core/CopyBytes.h>
5#include <c10/core/DispatchKeySet.h>
6#include <c10/core/InferenceMode.h>
7#include <c10/core/MemoryFormat.h>
8#include <c10/core/Storage.h>
9#include <c10/core/SymBool.h>
10#include <c10/core/SymIntArrayRef.h>
11#include <c10/core/TensorOptions.h>
12#include <c10/core/WrapDimMinimal.h>
13#include <c10/core/impl/LocalDispatchKeySet.h>
14#include <c10/core/impl/PyObjectSlot.h>
15#include <c10/core/impl/SizesAndStrides.h>
16#include <c10/util/DimVector.h>
17#include <c10/util/Exception.h>
18#include <c10/util/Flags.h>
19#include <c10/util/Logging.h>
20#include <c10/util/Optional.h>
21#include <c10/util/accumulate.h>
22#include <c10/util/irange.h>
23#include <c10/util/python_stub.h>
24#include <c10/util/safe_numerics.h>
25
26#include <algorithm>
27#include <atomic>
28#include <limits>
29#include <memory>
30#include <numeric>
31#include <utility>
32
33// A global boolean variable to control whether we free memory when a Tensor
34// is shrunk to a smaller size. As a result, a Tensor is always going to
35// keep the memory allocated for its maximum capacity reshaped to so far.
36//
37// This parameter is respected "upper-case" methods which call Resize()
38// (e.g., CopyFrom, ResizeLike); it is NOT respected by Tensor::resize_
39// or ShrinkTo, both of which guarantee to never to free memory.
40C10_DECLARE_bool(caffe2_keep_on_shrink);
41
42// Since we can have high variance in blob memory allocated across different
43// inputs in the same run, we will shrink the blob only if the memory gain
44// is larger than this flag in bytes. This only applies to functions which
45// respect caffe2_keep_on_shrink.
46C10_DECLARE_int64(caffe2_max_keep_on_shrink_memory);
47
48C10_CLANG_DIAGNOSTIC_PUSH()
49#if C10_CLANG_HAS_WARNING("-Wimplicit-int-float-conversion")
50C10_CLANG_DIAGNOSTIC_IGNORE("-Wimplicit-int-float-conversion")
51#endif
52
53namespace at {
54class Tensor;
55class TensorBase;
56} // namespace at
57
58namespace c10 {
59class Scalar;
60struct Storage;
61} // namespace c10
62
63namespace c10 {
64
65/**
66 * A utility function to convert vector<int> to vector<int64_t>.
67 */
68inline std::vector<int64_t> ToVectorint64_t(const ArrayRef<int>& src) {
69 return std::vector<int64_t>(src.begin(), src.end());
70}
71
72/**
73 * Return product of all dimensions starting from k
74 */
75inline int64_t size_from_dim_(int k, IntArrayRef dims) {
76 int64_t r = 1;
77 for (const auto i : c10::irange(k, dims.size())) {
78 r *= dims[i];
79 }
80 return r;
81}
82
83// Product of all dims up to k (not including dims[k])
84inline int64_t size_to_dim_(int k, IntArrayRef dims) {
85 TORCH_CHECK((unsigned)k <= dims.size());
86 int64_t r = 1;
87 for (const auto i : c10::irange(k)) {
88 r *= dims[i];
89 }
90 return r;
91}
92
93// Product of all dims between k and l (not including dims[k] and dims[l])
94inline int64_t size_between_dim_(int k, int l, IntArrayRef dims) {
95 TORCH_CHECK((unsigned)l < dims.size() && (unsigned)k < dims.size());
96 int64_t r = 1;
97 if (k < l) {
98 for (int i = k + 1; i < l; ++i) {
99 r *= dims[i];
100 }
101 } else {
102 for (int i = l + 1; i < k; ++i) {
103 r *= dims[i];
104 }
105 }
106 return r;
107}
108
109// Wrap around axis_index if it is negative, s.t., -1 is the last dim
110inline int canonical_axis_index_(int axis_index, int ndims) {
111 TORCH_CHECK(axis_index >= -ndims);
112 TORCH_CHECK(axis_index < ndims);
113 if (axis_index < 0) {
114 return axis_index + ndims;
115 }
116 return axis_index;
117}
118
119using PlacementDtor = void (*)(void*, size_t);
120
121/*
122 * A Context that will call extra placement deleter during
123 * deconstruction.
124 *
125 * Accept a already constructed DataPtr and store it as member
126 * during destruction, we'll call extra deleter on the underlying
127 * data pointer before the DataPtr is destructed.
128 * `data_ptr_` owns the memory.
129 */
130struct C10_API PlacementDeleteContext {
131 DataPtr data_ptr_;
132 PlacementDtor placement_dtor_;
133 size_t size_;
134 PlacementDeleteContext(
135 DataPtr&& data_ptr,
136 PlacementDtor placement_dtor,
137 size_t size)
138 : data_ptr_(std::move(data_ptr)),
139 placement_dtor_(placement_dtor),
140 size_(size) {}
141 static DataPtr makeDataPtr(
142 DataPtr&& data_ptr,
143 PlacementDtor placement_dtor,
144 size_t size,
145 Device device);
146 ~PlacementDeleteContext() {
147 placement_dtor_(data_ptr_.get(), size_);
148 // original memory will be freed when data_ptr_ is destructed
149 }
150};
151
152struct TensorImpl;
153
154struct C10_API AutogradMetaInterface {
155 virtual void set_requires_grad(
156 bool requires_grad,
157 at::TensorImpl* self_impl) = 0;
158 virtual bool requires_grad() const = 0;
159 virtual at::Tensor& mutable_grad() = 0;
160 virtual const at::Tensor& grad() const = 0;
161 virtual const at::Tensor& fw_grad(uint64_t level, const at::TensorBase& self)
162 const = 0;
163 virtual void set_fw_grad(
164 const at::TensorBase& new_grad,
165 const at::TensorBase& self,
166 uint64_t level,
167 bool is_inplace_op) = 0;
168 virtual ~AutogradMetaInterface();
169};
170
171namespace impl {
172
173// Unfortunately, the definition of AutogradMeta lives in a separate
174// compilation unit than TensorImpl (libtorch.so versus libc10.so)
175// which means that we cannot construct an AutogradMeta from TensorImpl,
176// not even from the cpp file. So we have to indirect it through a factory
177// function which will be initialized when we load libtorch.so.
178
179struct C10_API AutogradMetaFactory {
180 virtual ~AutogradMetaFactory() = default;
181 virtual std::unique_ptr<AutogradMetaInterface> make() const = 0;
182 // This method is the dumbest method. But I don't have access
183 // to Tensor (not TensorImpl) which is undefined in this header.
184 virtual const at::Tensor& undefined_tensor() const = 0;
185};
186
187C10_API void SetAutogradMetaFactory(AutogradMetaFactory* factory);
188C10_API AutogradMetaFactory* GetAutogradMetaFactory();
189
190struct C10_API AutogradMetaFactoryRegisterer {
191 explicit AutogradMetaFactoryRegisterer(AutogradMetaFactory* factory) {
192 SetAutogradMetaFactory(factory);
193 }
194};
195
196} // namespace impl
197
198struct C10_API NamedTensorMetaInterface {
199 virtual ~NamedTensorMetaInterface() = default;
200 virtual std::unique_ptr<NamedTensorMetaInterface> clone() const {
201 TORCH_INTERNAL_ASSERT(
202 false, "Not implemented: NamedTensorMetaInterface::clone");
203 };
204 virtual int64_t slow_dim() const {
205 TORCH_INTERNAL_ASSERT(
206 false, "Not implemented: NamedTensorMetaInterface::slow_dim");
207 };
208};
209
210// For ease of copy pasting
211#if 0
212is_contiguous
213is_channels_last_contiguous
214is_channels_last_3d_contiguous
215is_channels_last
216is_channels_last_3d
217is_non_overlapping_and_dense
218#endif
219
220struct C10_API ExtraMeta {
221 SymDimVector sizes_ = {0};
222 SymDimVector strides_ = {1};
223 SymInt numel_ = 1;
224 SymInt storage_offset_ = 0;
225 SymBool is_contiguous_{true};
226 SymBool is_channels_last_contiguous_{false};
227 SymBool is_channels_last_3d_contiguous_{false};
228 SymBool is_channels_last_{false};
229 SymBool is_channels_last_3d_{false};
230 SymBool is_non_overlapping_and_dense_{true};
231 std::unique_ptr<c10::NamedTensorMetaInterface> named_tensor_meta_ = nullptr;
232
233 ExtraMeta() = default;
234
235 ExtraMeta(
236 SymDimVector sizes,
237 SymDimVector strides,
238 SymInt numel,
239 SymInt storage_offset,
240 SymBool is_contiguous,
241 SymBool is_channels_last_contiguous,
242 SymBool is_channels_last_3d_contiguous,
243 SymBool is_channels_last,
244 SymBool is_channels_last_3d,
245 SymBool is_non_overlapping_and_dense,
246 std::unique_ptr<c10::NamedTensorMetaInterface> named_tensor_meta)
247 : sizes_(std::move(sizes)),
248 strides_(std::move(strides)),
249 numel_(std::move(numel)),
250 storage_offset_(std::move(storage_offset)),
251 is_contiguous_(std::move(is_contiguous)),
252 is_channels_last_contiguous_(std::move(is_channels_last_contiguous)),
253 is_channels_last_3d_contiguous_(
254 std::move(is_channels_last_3d_contiguous)),
255 is_channels_last_(std::move(is_channels_last)),
256 is_channels_last_3d_(std::move(is_channels_last_3d)),
257 is_non_overlapping_and_dense_(std::move(is_non_overlapping_and_dense)),
258 named_tensor_meta_(std::move(named_tensor_meta)) {}
259
260 std::unique_ptr<ExtraMeta> clone() const {
261 return std::make_unique<ExtraMeta>(
262 sizes_,
263 strides_,
264 numel_,
265 storage_offset_,
266 is_contiguous_,
267 is_channels_last_contiguous_,
268 is_channels_last_3d_contiguous_,
269 is_channels_last_,
270 is_channels_last_3d_,
271 is_non_overlapping_and_dense_,
272 named_tensor_meta_ ? named_tensor_meta_->clone() : nullptr);
273 }
274};
275
276// NOTE [ Version Counter Sharing ]
277//
278// Every Tensor has a version counter. Version counters are incremented whenever
279// the data or size of a tensor changes through in-place Variable operations.
280// Version counters are used to detect modifications to saved variables which
281// would result in incorrect gradient calculations. Version counters may be
282// shared between Variables:
283//
284// 1. A view shares the version counter of the base Variable,
285// 2. `x.detach()` shares the version counter of `x`,
286// 3. Unpacked saved variables share the version counter of the source.
287//
288// Version counters are not shared in these scenarios:
289//
290// 1. When we replace a `Variable`'s underlying `Tensor` by calling
291// `set_data(...)`,
292// 2. `x.data` does not share the version counter of `x`. (See discussion at
293// https://github.com/pytorch/pytorch/issues/5396)
294//
295// Question: Why do we put the version counter in TensorImpl instead of
296// AutogradMeta?
297//
298// Answer: After the Variable/Tensor merge, a tensor will not have AutogradMeta
299// when its `requires_grad_` is false, but when we use this tensor in the
300// forward pass of a function that requires saving this tensor for backward, we
301// need to keep track of this tensor's version to make sure it's always valid in
302// the autograd graph.
303//
304// To achieve this goal, we put the version counter in TensorImpl instead of
305// AutogradMeta, and have it always be available. This allows us to have the
306// optimization of not carrying AutogradMeta when a tensor doesn't require
307// gradient.
308//
309// A hypothetical alternative way to achieve this goal is to initialize
310// AutogradMeta and create the version counter for the non-requires-grad tensor
311// only when it's saved for backward. However, since saving a tensor for
312// backward happens in the forward pass, and our invariant is that forward pass
313// needs to be thread-safe, lazy-initializing AutogradMeta when saving a tensor
314// can introduce race conditions when we are running the forward pass in
315// multi-thread scenarios, thus making the forward pass not thread-safe anymore,
316// which breaks the invariant.
317struct C10_API VariableVersion {
318 private:
319 struct VersionCounter : intrusive_ptr_target {
320 VersionCounter(uint32_t version) : version_(version) {}
321 std::atomic<uint32_t> version_;
322 };
323 c10::intrusive_ptr<VersionCounter> version_counter_;
324
325 public:
326 // Note [Disabled VariableVersion]
327 // VariableVersion struct has an intrusive_ptr pointing VersionCounter struct
328 // with an atomic variable. Thus `VariableVersion(/*version=*/0)` is not as
329 // cheap as we expected. In some cases constructing a VariableVersion with
330 // version 0 is not necessary so we add a cheap constructor which
331 // doesn't allocate the intrusive_ptr.
332 // Example use cases are:
333 // - Inference tensors don't track version counter, so they'll just always
334 // have disbaled VariableVersion.
335 // - In SavedVariable class we override version_counter_ inside its
336 // construtor
337 // so that we can use the cheap constructor there.
338 enum Disabled { DISABLED };
339 // It's okay to return true even for inference tensor which
340 // doesn't have version counter enabled.
341 // We want to be permissive here since in many cases (e.g. make_variable)
342 // we can std::move a TensorImpl if there's no other uses which saves us
343 // an additional TensorImpl allocation.
344 bool unique() const {
345 return version_counter_ ? 1 == version_counter_.use_count() : true;
346 }
347 // NOTE: As of C++11 and 14, default-constructing a std::atomic variable
348 // leaves it in a persistently undefined state. See
349 // https://cplusplus.github.io/LWG/issue2334.
350 VariableVersion(uint32_t version)
351 : version_counter_(c10::make_intrusive<VersionCounter>(version)) {}
352 VariableVersion(Disabled = DISABLED) {}
353
354 bool enabled() const {
355 return version_counter_;
356 }
357
358 // Note [Inplace update inference tensor]
359 // 1. Inplace update to inference tensor is forbidden in normal mode.
360 // For example:
361 // inference_tensor.copy_(normal_tensor_requires_grad)
362 // This inplace makes inference_tensor have requires_grad=True and
363 // have a grad_fn. This is bad because views of `inference_tensor`
364 // created in InferenceMode won't be able to know the grad_fn since
365 // their ViewMeta were not recorded. To match NoGradMode behavior
366 // that "inplace update to a view created in NoGradMode raise an error",
367 // we just ban inplace update to inference tensor since we can't tell
368 // if an inference tensor is a view created in InferenceMode.
369 //
370 // Note that views of normal tensor created in InferenceMode has proper
371 // ViewMeta so that they're aware of the grad_fn correctly.
372 //
373 // 2. Inplace update to inference tensor in inference tensor doesn't bump
374 // version counter.
375 // * It either doesn't call bump() by skipping ADInplaceOrView kernel,
376 // - e.g. inference_tensor.add_(1)
377 // * or bump() is a no-op for inference tensor.
378 // - e.g. inference_tensor.add_(normal_tensor)
379 void bump() {
380 // TODO: Replace the link to the documentation once it's available.
381 TORCH_CHECK(
382 version_counter_ || InferenceMode::is_enabled(),
383 "Inplace update to inference tensor outside InferenceMode is not allowed."
384 "You can make a clone to get a normal tensor before doing inplace update."
385 "See https://github.com/pytorch/rfcs/pull/17 for more details.");
386 if (version_counter_) {
387 ++version_counter_->version_;
388 }
389 }
390
391 void set_version(int64_t i) {
392 TORCH_CHECK(
393 version_counter_,
394 "Tried to call torch.autograd._unsafe_set_version() on a tensor "
395 "that does not have a version counter. Was it created in inference mode?");
396 TORCH_CHECK(i >= 0, "Cannot set a version_counter to a value below 0: ", i);
397 version_counter_->version_ = i;
398 }
399
400 // Inference tensor doesn't have version counter so it shouldn't be
401 // accessed.
402 uint32_t current_version() const {
403 TORCH_CHECK(
404 version_counter_, "Inference tensors do not track version counter.");
405 return version_counter_->version_;
406 }
407};
408
409// Forward declaration of TensorImpl needed for forward declaration of
410// C10_TensorImpl_Size_Check_Dummy_Class
411struct C10_API TensorImpl;
412
413// Forward declaration needed because TensorImpl needs to be friends with
414// C10_TensorImpl_Size_Check_Dummy_Class in order to check the size
415// of its private fields.
416template <
417 size_t cplusplus,
418 size_t clang_ver_major,
419 size_t gcc_ver,
420 size_t gcc_ver_minor,
421 size_t nvcc,
422 size_t cuda_version,
423 size_t cuda_version_major,
424 size_t ptr_size>
425class C10_TensorImpl_Size_Check_Dummy_Class;
426
427/**
428 * NOTE: Some TensorImpl methods are small and not overridden in the
429 * PyTorch codebase itself, but may theoretically need to be
430 * overridden by third-party TensorImpl subclasses. This macro allows
431 * users that need maximum performance and don't need these extension
432 * points to disable them with a build-time flag. (In particular,
433 * XLA's XLATensorImpl currently overrides these methods, so we can't
434 * enable this flag by default.)
435 */
436#ifdef C10_DISABLE_TENSORIMPL_EXTENSIBILITY
437#define TENSORIMPL_MAYBE_VIRTUAL
438#else
439#define TENSORIMPL_MAYBE_VIRTUAL virtual
440#endif
441
442/**
443 * The low-level representation of a tensor, which contains a pointer
444 * to a storage (which contains the actual data) and metadata (e.g., sizes and
445 * strides) describing this particular view of the data as a tensor.
446 *
447 * Some basic characteristics about our in-memory representation of
448 * tensors:
449 *
450 * - It contains a pointer to a storage struct (Storage/StorageImpl)
451 * which contains the pointer to the actual data and records the
452 * data type and device of the view. This allows multiple tensors
453 * to alias the same underlying data, which allows to efficiently
454 * implement differing *views* on a tensor.
455 *
456 * - The tensor struct itself records view-specific metadata about
457 * the tensor, e.g., sizes, strides and offset into storage.
458 * Each view of a storage can have a different size or offset.
459 *
460 * - This class is intrusively refcounted. It is refcounted so that
461 * we can support prompt deallocation of large tensors; it is
462 * intrusively refcounted so that we can still perform reference
463 * counted operations on raw pointers, which is often more convenient
464 * when passing tensors across language boundaries.
465 *
466 * - For backwards-compatibility reasons, a tensor may be in an
467 * uninitialized state. A tensor may be uninitialized in the following
468 * two ways:
469 *
470 * - A tensor may be DTYPE UNINITIALIZED. A tensor of this
471 * form has an uninitialized dtype. This situation most
472 * frequently arises when a user writes Tensor x(CPU). The dtype
473 * is subsequently initialized when mutable_data<T>() is
474 * invoked for the first time.
475 *
476 * - A tensor may be STORAGE UNINITIALIZED. A tensor of this form
477 * has non-zero size, but has a storage with a null data pointer.
478 * This situation most frequently arises when a user calls
479 * Resize() or FreeMemory(). This is because Caffe2 historically
480 * does lazy allocation: allocation of data doesn't occur until
481 * mutable_data<T>() is invoked. A tensor with zero size is
482 * always storage initialized, because no allocation is necessary
483 * in this case.
484 *
485 * All combinations of these two uninitialized states are possible.
486 * Consider the following transcript in idiomatic Caffe2 API:
487 *
488 * Tensor x(CPU); // x is storage-initialized, dtype-UNINITIALIZED
489 * x.Resize(4); // x is storage-UNINITIALIZED, dtype-UNINITIALIZED
490 * x.mutable_data<float>(); // x is storage-initialized, dtype-initialized
491 * x.FreeMemory(); // x is storage-UNINITIALIZED, dtype-initialized.
492 *
493 * All other fields on tensor are always initialized. In particular,
494 * size is always valid. (Historically, a tensor declared as Tensor x(CPU)
495 * also had uninitialized size, encoded as numel == -1, but we have now
496 * decided to default to zero size, resulting in numel == 0).
497 *
498 * Uninitialized storages MUST be uniquely owned, to keep our model
499 * simple. Thus, we will reject operations which could cause an
500 * uninitialized storage to become shared (or a shared storage to
501 * become uninitialized, e.g., from FreeMemory).
502 *
503 * In practice, tensors which are storage-UNINITIALIZED and
504 * dtype-UNINITIALIZED are *extremely* ephemeral: essentially,
505 * after you do a Resize(), you basically always call mutable_data()
506 * immediately afterwards. Most functions are not designed to
507 * work if given a storage-UNINITIALIZED, dtype-UNINITIALIZED tensor.
508 *
509 * We intend to eliminate all uninitialized states, so that every
510 * tensor is fully initialized in all fields. Please do not write new code
511 * that depends on these uninitialized states.
512 */
513struct C10_API TensorImpl : public c10::intrusive_ptr_target {
514 TensorImpl() = delete;
515 ~TensorImpl() override;
516 // Note [Enum ImplType]
517 // This enum is temporary. In the followup refactor we should
518 // think about how to specialize TensorImpl creation for view
519 // tensors. Currently we only special case its key_set_ but
520 // there's also potential to share version_counter_ directly
521 // without creating first and then override in as_view.
522 enum ImplType { VIEW };
523
524 /**
525 * Construct a 1-dim 0-size tensor backed by the given storage.
526 */
527 TensorImpl(
528 Storage&& storage,
529 DispatchKeySet,
530 const caffe2::TypeMeta data_type);
531
532 // See Note [Enum ImplType]
533 TensorImpl(
534 ImplType,
535 Storage&& storage,
536 DispatchKeySet,
537 const caffe2::TypeMeta data_type);
538
539 /**
540 * Construct a 1-dim 0 size tensor that doesn't have a storage.
541 */
542 TensorImpl(
543 DispatchKeySet,
544 const caffe2::TypeMeta data_type,
545 c10::optional<c10::Device> device_opt);
546
547 // Legacy constructors so I don't have to go update call sites.
548 // TODO: When Variable is added, delete these constructors
549 TensorImpl(
550 Storage&& storage,
551 DispatchKey dispatch_key,
552 const caffe2::TypeMeta data_type)
553 : TensorImpl(
554 std::move(storage),
555 DispatchKeySet(dispatch_key),
556 data_type) {}
557 TensorImpl(
558 DispatchKey dispatch_key,
559 const caffe2::TypeMeta data_type,
560 c10::optional<c10::Device> device_opt)
561 : TensorImpl(DispatchKeySet(dispatch_key), data_type, device_opt) {}
562
563 private:
564 // This constructor is private, because the data_type is redundant with
565 // storage. Still, we pass it in separately because it's easier to write
566 // the initializer list if we're not worried about storage being moved out
567 // from under us.
568 TensorImpl(
569 Storage&& storage,
570 DispatchKeySet,
571 const caffe2::TypeMeta data_type,
572 c10::optional<c10::Device>);
573
574 public:
575 TensorImpl(const TensorImpl&) = delete;
576 TensorImpl& operator=(const TensorImpl&) = delete;
577 TensorImpl(TensorImpl&&) = delete;
578 TensorImpl& operator=(TensorImpl&&) = delete;
579
580 /**
581 * Release (decref) storage, and any other external allocations. This
582 * override is for `intrusive_ptr_target` and is used to implement weak
583 * tensors.
584 */
585 void release_resources() override;
586
587 public:
588 /**
589 * Return the DispatchKeySet corresponding to this Tensor, specifying
590 * all of the DispatchKeys that this Tensor identifies as. This is the
591 * information used to dispatch operations on this tensor.
592 */
593 DispatchKeySet key_set() const {
594 return key_set_;
595 }
596
597 // NOTE: The general recipe for customizable methods is that the fastpath
598 // function (e.g., sizes()) does an unlikely policy test, and if doesn't
599 // trigger, it does the fast path implementation with no checks and going
600 // directly to on-TensorImpl fields. In particular, you never need to
601 // check ExtraMeta if the policy doesn't trigger, as non-trivial ExtraMeta
602 // implies the policy will always match.
603 //
604 // The default implementations of methods are "safe": they do extra tests
605 // to make sure the internal state is consistent no matter if you are
606 // doing symbolic shapes or not. If you don't want the tests, directly
607 // override the custom method (e.g., custom_sizes()) to do your preferred
608 // behavior.
609
610 public:
611 /**
612 * Return a reference to the sizes of this tensor. This reference remains
613 * valid as long as the tensor is live and not resized.
614 */
615 IntArrayRef sizes() const {
616 if (C10_UNLIKELY(matches_policy(SizesStridesPolicy::CustomSizes))) {
617 return sizes_custom();
618 }
619 return sizes_and_strides_.sizes_arrayref();
620 }
621
622 SymIntArrayRef sym_sizes() const {
623 if (C10_UNLIKELY(matches_policy(SizesStridesPolicy::CustomSizes))) {
624 return sym_sizes_custom();
625 }
626 // Sizes guaranteed to be non-negative, so unchecked cast is OK
627 return c10::fromIntArrayRefKnownNonNegative(
628 sizes_and_strides_.sizes_arrayref());
629 }
630
631 IntArrayRef sizes_default() const {
632 // TODO: force backtrace to be printed on this error
633 TORCH_CHECK(
634 !has_symbolic_sizes_strides_,
635 "Cannot call sizes() on tensor with symbolic sizes/strides");
636 return sizes_and_strides_.sizes_arrayref();
637 }
638
639 SymIntArrayRef sym_sizes_default() const {
640 if (has_symbolic_sizes_strides_) {
641 return extra_meta_->sizes_;
642 } else {
643 // Sizes guaranteed to be non-negative, so unchecked cast is OK
644 return c10::fromIntArrayRefKnownNonNegative(sizes_default());
645 }
646 }
647
648 // From https://stackoverflow.com/a/3057522/23845
649 // TODO: does C++14 have a stdlib template for this?
650 template <typename T>
651 struct identity {
652 typedef T type;
653 };
654
655 template <typename T>
656 ArrayRef<T> generic_sizes() {
657 return _generic_sizes(identity<T>());
658 }
659
660 ArrayRef<int64_t> _generic_sizes(identity<int64_t>) {
661 return sizes();
662 }
663 ArrayRef<c10::SymInt> _generic_sizes(identity<c10::SymInt>) {
664 return sym_sizes();
665 }
666
667 template <typename T>
668 ArrayRef<T> generic_strides() {
669 return _generic_strides(identity<T>());
670 }
671
672 ArrayRef<int64_t> _generic_strides(identity<int64_t>) {
673 return strides();
674 }
675 ArrayRef<c10::SymInt> _generic_strides(identity<c10::SymInt>) {
676 return sym_strides();
677 }
678
679 template <typename T>
680 T generic_storage_offset() {
681 return _generic_storage_offset(identity<T>());
682 }
683
684 int64_t _generic_storage_offset(identity<int64_t>) {
685 return storage_offset();
686 }
687 c10::SymInt _generic_storage_offset(identity<c10::SymInt>) {
688 return sym_storage_offset();
689 }
690
691 /**
692 * The number of elements in a tensor.
693 *
694 * WARNING: Previously, if you were using the Caffe2 API, you could
695 * test numel() == -1 to see if a tensor was uninitialized. This
696 * is no longer true; numel always accurately reports the product
697 * of sizes of a tensor.
698 */
699 int64_t numel() const {
700 if (C10_UNLIKELY(matches_policy(SizesStridesPolicy::CustomSizes))) {
701 return numel_custom();
702 }
703 return numel_;
704 }
705
706 c10::SymInt sym_numel() const {
707 if (C10_UNLIKELY(matches_policy(SizesStridesPolicy::CustomSizes))) {
708 return sym_numel_custom();
709 }
710 return c10::SymInt(SymInt::UNCHECKED, numel_);
711 }
712
713 int64_t numel_default() const {
714 TORCH_CHECK(
715 !has_symbolic_sizes_strides_,
716 "Cannot call numel() on tensor with symbolic sizes/strides");
717 return numel_;
718 }
719
720 c10::SymInt sym_numel_default() const {
721 if (has_symbolic_sizes_strides_) {
722 return extra_meta_->numel_;
723 } else {
724 return c10::SymInt(SymInt::UNCHECKED, numel_);
725 }
726 }
727
728 /**
729 * Return the number of dimensions of this tensor. Note that 0-dimension
730 * represents a Tensor that is a Scalar, e.g., one that has a single element.
731 */
732 int64_t dim() const {
733 if (C10_UNLIKELY(matches_policy(SizesStridesPolicy::CustomSizes))) {
734 return dim_custom();
735 }
736 return sizes_and_strides_.size();
737 }
738
739 int64_t dim_default() const {
740 if (has_symbolic_sizes_strides_) {
741 return extra_meta_->sizes_.size();
742 } else {
743 return sizes_and_strides_.size();
744 }
745 }
746
747 /**
748 * Return the offset in number of elements into the storage that this
749 * tensor points to. Most tensors have storage_offset() == 0, but,
750 * for example, an index into a tensor will have a non-zero storage_offset().
751 *
752 * WARNING: This is NOT computed in bytes.
753 */
754 int64_t storage_offset() const {
755 // TODO: maybe this should be toggled by strides
756 if (C10_UNLIKELY(matches_policy(SizesStridesPolicy::CustomSizes))) {
757 return storage_offset_custom();
758 }
759 return storage_offset_;
760 }
761
762 c10::SymInt sym_storage_offset() const {
763 if (C10_UNLIKELY(matches_policy(SizesStridesPolicy::CustomSizes))) {
764 return sym_storage_offset_custom();
765 }
766 return c10::SymInt(SymInt::UNCHECKED, storage_offset_);
767 }
768
769 int64_t storage_offset_default() const {
770 TORCH_CHECK(
771 !has_symbolic_sizes_strides_,
772 "Cannot call storage_offset() on tensor with symbolic sizes/strides");
773 return storage_offset_;
774 }
775
776 c10::SymInt sym_storage_offset_default() const {
777 if (has_symbolic_sizes_strides_) {
778 return extra_meta_->storage_offset_;
779 } else {
780 return c10::SymInt(SymInt::UNCHECKED, storage_offset_);
781 }
782 }
783
784 /**
785 * Return a reference to the strides of this tensor. This reference remains
786 * valid as long as the tensor is live and not restrided.
787 */
788 IntArrayRef strides() const {
789 if (C10_UNLIKELY(matches_policy(SizesStridesPolicy::CustomStrides))) {
790 return strides_custom();
791 }
792 return sizes_and_strides_.strides_arrayref();
793 }
794
795 c10::SymIntArrayRef sym_strides() const {
796 if (C10_UNLIKELY(matches_policy(SizesStridesPolicy::CustomStrides))) {
797 return sym_strides_custom();
798 }
799 return c10::fromIntArrayRefKnownNonNegative(strides_default());
800 }
801
802 IntArrayRef strides_default() const {
803 TORCH_CHECK(
804 !has_symbolic_sizes_strides_,
805 "Cannot call strides() on tensor with symbolic sizes/strides");
806 return sizes_and_strides_.strides_arrayref();
807 }
808
809 c10::SymIntArrayRef sym_strides_default() const {
810 if (has_symbolic_sizes_strides_) {
811 return extra_meta_->strides_;
812 } else {
813 return c10::fromIntArrayRefKnownNonNegative(strides_default());
814 }
815 }
816
817 /**
818 * Whether or not a tensor is laid out in contiguous memory.
819 *
820 * Tensors with non-trivial strides are not contiguous. See
821 * compute_contiguous() for the exact definition of whether or not
822 * a tensor is contiguous or not.
823 */
824 bool is_contiguous(
825 at::MemoryFormat memory_format = at::MemoryFormat::Contiguous) const {
826 if (C10_UNLIKELY(matches_policy(SizesStridesPolicy::CustomStrides))) {
827 return is_contiguous_custom(memory_format);
828 }
829 return is_contiguous_default(memory_format);
830 }
831
832 // These are factored into separate functions in case subclasses
833 // want to use them
834 bool is_contiguous_default(at::MemoryFormat memory_format) const {
835 if (has_symbolic_sizes_strides_) {
836 if (memory_format == at::MemoryFormat::ChannelsLast) {
837 return extra_meta_->is_channels_last_contiguous_.guard_bool(
838 __FILE__, __LINE__);
839 } else if (memory_format == at::MemoryFormat::ChannelsLast3d) {
840 return extra_meta_->is_channels_last_3d_contiguous_.guard_bool(
841 __FILE__, __LINE__);
842 }
843 return extra_meta_->is_contiguous_.guard_bool(__FILE__, __LINE__);
844 }
845
846 if (memory_format == at::MemoryFormat::ChannelsLast) {
847 return is_channels_last_contiguous_;
848 } else if (memory_format == at::MemoryFormat::ChannelsLast3d) {
849 return is_channels_last_3d_contiguous_;
850 }
851 return is_contiguous_;
852 }
853
854 bool is_strides_like_default(at::MemoryFormat memory_format) const {
855 if (has_symbolic_sizes_strides_) {
856 if (memory_format == at::MemoryFormat::ChannelsLast) {
857 return extra_meta_->is_channels_last_.guard_bool(__FILE__, __LINE__);
858 } else if (memory_format == at::MemoryFormat::ChannelsLast3d) {
859 return extra_meta_->is_channels_last_3d_.guard_bool(__FILE__, __LINE__);
860 } else {
861 return false;
862 }
863 }
864
865 if (memory_format == at::MemoryFormat::ChannelsLast) {
866 return is_channels_last_;
867 } else if (memory_format == at::MemoryFormat::ChannelsLast3d) {
868 return is_channels_last_3d_;
869 } else {
870 return false;
871 }
872 }
873
874 bool is_non_overlapping_and_dense_default() const {
875 if (has_symbolic_sizes_strides_) {
876 return extra_meta_->is_non_overlapping_and_dense_.guard_bool(
877 __FILE__, __LINE__);
878 } else {
879 return is_non_overlapping_and_dense_;
880 }
881 }
882
883 // NB: these dim accessor functions don't have _default(), as you can use
884 // sizes_default/strides_default
885 /**
886 * Return the size of a tensor at some dimension, wrapping the dimension if
887 * necessary.
888 *
889 * NOTE: if you know wrapping is unnecessary, do sizes()[d] instead; it will
890 * be faster
891 */
892 int64_t size(int64_t d) const {
893 if (C10_UNLIKELY(matches_policy(SizesStridesPolicy::CustomSizes))) {
894 return size_custom(d);
895 }
896 d = maybe_wrap_dim(d, dim(), /*wrap_scalar=*/false);
897 return sizes_and_strides_.size_at_unchecked(d);
898 }
899
900 c10::SymInt sym_size(int64_t d) const {
901 if (C10_UNLIKELY(matches_policy(SizesStridesPolicy::CustomSizes))) {
902 return sym_size_custom(d);
903 }
904 d = maybe_wrap_dim(d, dim(), /*wrap_scalar=*/false);
905 const auto sizes = this->sym_sizes();
906 return sizes[d];
907 }
908
909 /**
910 * Return the stride of a tensor at some dimension, wrapping the dimension
911 * if necessary.
912 *
913 * NOTE: if you know wrapping is unnecessary, do sizes()[d] instead; it will
914 * be faster
915 */
916 int64_t stride(int64_t d) const {
917 d = maybe_wrap_dim(d, dim(), false);
918 if (C10_UNLIKELY(matches_policy(SizesStridesPolicy::CustomStrides))) {
919 // TODO: provide stride_custom, symmetrically with size_custom.
920 // There is presently no user for it; only NestedTensor is using
921 // size_custom overrideability
922 return strides_custom()[d]; // unchecked (maybe_wrap_dim enforces bounds)
923 }
924 // Intentionally don't call default, which also handles symbolic
925 return sizes_and_strides_.stride_at_unchecked(d);
926 }
927
928 enum class SizesStridesPolicy : uint8_t {
929 // Default behavior, e.g., dense tensor.
930 //
931 // Can override: nothing
932 Default = 0,
933 // Customizable strides behavior, e.g., sparse tensor,
934 // mkldnn tensor.
935 //
936 // Can override: strides(), is_contiguous()
937 CustomStrides = 1,
938 // Customizable sizes behavior, e.g., nested tensor
939 //
940 // Can override: strides(), is_contiguous(), sizes(), dim(), numel()
941 CustomSizes = 2
942 };
943
944 protected:
945 inline bool matches_policy(SizesStridesPolicy policy) const {
946 return sizes_strides_policy_ >= static_cast<uint8_t>(policy);
947 }
948
949 inline bool matches_custom(SizesStridesPolicy policy) const {
950 return custom_sizes_strides_ >= static_cast<uint8_t>(policy);
951 }
952
953 inline bool matches_python_custom(SizesStridesPolicy policy) const {
954 auto r = python_custom_sizes_strides_ >= static_cast<uint8_t>(policy);
955 if (r) {
956 TORCH_INTERNAL_ASSERT(is_python_dispatch())
957 }
958 return r;
959 }
960
961 /**
962 * Customization points for the functions above. sizes_strides_policy_
963 * must be set to enable these.
964 *
965 * NB: dim is overrideable separately from sizes because it is possible
966 * for a tensor to have rank, but not well defined sizes.
967 */
968 // sizes_strides_policy_ >= CustomStrides
969 virtual bool is_contiguous_custom(at::MemoryFormat memory_format) const;
970 virtual bool is_strides_like_custom(at::MemoryFormat memory_format) const;
971 virtual bool is_non_overlapping_and_dense_custom() const;
972 // sizes_strides_policy_ >= CustomSizes
973 // Currently this method only exists to be overwritten by subclasses such as
974 // NestedTensorImpl.
975 virtual int64_t size_custom(int64_t d) const {
976 // TODO: We could add support to Python dispatch here.
977 // TODO: We could call into aten::size.int instead of
978 // sizes_custom()[d] and enable use of the dispatcher.
979 d = maybe_wrap_dim(d, dim(), /*wrap_scalar=*/false);
980 return sizes_custom()[d]; // unchecked (maybe_wrap_dim enforces bounds)
981 }
982
983 virtual c10::SymInt sym_size_custom(int64_t d) const {
984 // TODO: We could add support to Python dispatch here.
985 // TODO: We could call into aten::size.int instead of
986 // sym_sizes_custom()[d] and enable use of the dispatcher.
987 d = maybe_wrap_dim(d, dim(), /*wrap_scalar=*/false);
988 return sym_sizes_custom()[d]; // unchecked (maybe_wrap_dim enforces bounds)
989 }
990
991 virtual IntArrayRef sizes_custom() const;
992 virtual IntArrayRef strides_custom() const;
993 virtual int64_t numel_custom() const;
994 virtual int64_t storage_offset_custom() const;
995 virtual int64_t dim_custom() const;
996 virtual Device device_custom() const;
997 virtual Layout layout_custom() const;
998
999 virtual c10::SymIntArrayRef sym_sizes_custom() const;
1000 virtual c10::SymIntArrayRef sym_strides_custom() const;
1001 virtual c10::SymInt sym_numel_custom() const;
1002 virtual c10::SymInt sym_storage_offset_custom() const;
1003
1004 public:
1005 /**
1006 * True if this tensor has storage. See storage() for details.
1007 */
1008#ifdef DEBUG
1009 // Allow subclasses to check that their storage_ is never getting set in debug
1010 // builds.
1011 virtual
1012#else
1013 TENSORIMPL_MAYBE_VIRTUAL
1014#endif
1015 bool
1016 has_storage() const
1017 // NOTE: we devirtualize this because it arguably shouldn't be an
1018 // error just to ask subclasses if they have storage.
1019 // This used to throw for most subclasses, but OpaqueTensorImpl
1020 // wanted it to successfully return false, so we went ahead and made
1021 // it a non-error.
1022#ifdef C10_DISABLE_TENSORIMPL_EXTENSIBILITY
1023 {
1024 return storage_;
1025 }
1026#else
1027 ;
1028#endif
1029
1030 /**
1031 * Return the underlying storage of a Tensor. Multiple tensors may share
1032 * a single storage. A Storage is an impoverished, Tensor-like class
1033 * which supports far less operations than Tensor.
1034 *
1035 * Avoid using this method if possible; try to use only Tensor APIs to perform
1036 * operations.
1037 */
1038 TENSORIMPL_MAYBE_VIRTUAL const Storage& storage() const {
1039 if (C10_UNLIKELY(storage_access_should_throw_)) {
1040 throw_storage_access_error();
1041 }
1042 return storage_;
1043 }
1044
1045 /**
1046 * Return the underlying storage, unsafely assuming this is a basic strided
1047 * tensor. In cases where `storage` access would throw, this returns a
1048 * default-constructed Storage.
1049 */
1050 inline const Storage& unsafe_storage() const {
1051 return storage_;
1052 }
1053
1054 bool unique_version() const {
1055 return version_counter_.unique();
1056 }
1057
1058 protected:
1059 virtual Layout layout_impl() const {
1060 TORCH_CHECK(
1061 false, "layout_impl is only implemented for TensorImpl subclasses.");
1062 }
1063
1064 public:
1065 // Whether a tensor is sparse COO or not.
1066 bool is_sparse() const {
1067 // NB: This method is not virtual and avoid dispatches for performance
1068 // reasons.
1069 return key_set_.has_all(c10::sparse_ks);
1070 }
1071
1072 // Whether a tensor is sparse CSR or not.
1073 bool is_sparse_csr() const {
1074 return layout() == kSparseCsr;
1075 }
1076
1077 bool is_quantized() const {
1078 // NB: This method is not virtual and avoid dispatches for performance
1079 // reasons.
1080 constexpr auto quantized_ks = DispatchKeySet(DispatchKey::Quantized);
1081 return key_set_.has_all(quantized_ks);
1082 }
1083
1084 bool is_meta() const {
1085 // NB: This method is not virtual and avoid dispatches for performance
1086 // reasons.
1087 if (C10_UNLIKELY(device_policy_)) {
1088 return device_custom().is_meta();
1089 }
1090 return device_opt_.has_value() && device_opt_->type() == kMeta;
1091 }
1092
1093 bool is_cpu() const {
1094 // NB: This method is not virtual and avoid dispatches for performance
1095 // reasons.
1096 if (C10_UNLIKELY(device_policy_)) {
1097 return device_custom().is_cpu();
1098 }
1099 // Note: we cannot rely on dispatch keys to determine the device type
1100 // of a tensor, because "wrapper" tensors (like FunctionalTensorWrapper)
1101 // don't include backend dispatch keys.
1102 return device_opt_.has_value() && device_opt_->type() == kCPU;
1103 }
1104
1105 bool is_cuda() const {
1106 // NB: This method is not virtual and avoid dispatches for performance
1107 // reasons.
1108 if (C10_UNLIKELY(device_policy_)) {
1109 return device_custom().is_cuda();
1110 }
1111 return device_opt_.has_value() && device_opt_->type() == kCUDA;
1112 }
1113
1114 bool is_xpu() const {
1115 // NB: This method is not virtual and avoid dispatches for performance
1116 // reasons.
1117 if (C10_UNLIKELY(device_policy_)) {
1118 return device_custom().is_xpu();
1119 }
1120 return device_opt_.has_value() && device_opt_->type() == kXPU;
1121 }
1122
1123 bool is_ipu() const {
1124 if (C10_UNLIKELY(device_policy_)) {
1125 return device_custom().is_ipu();
1126 }
1127 return device_opt_.has_value() && device_opt_->type() == kIPU;
1128 }
1129
1130 bool is_xla() const {
1131 if (C10_UNLIKELY(device_policy_)) {
1132 return device_custom().is_xla();
1133 }
1134 return device_opt_.has_value() && device_opt_->type() == kXLA;
1135 }
1136
1137 bool is_hpu() const {
1138 if (C10_UNLIKELY(device_policy_)) {
1139 return device_custom().is_hpu();
1140 }
1141 return device_opt_.has_value() && device_opt_->type() == kHPU;
1142 }
1143
1144 bool is_lazy() const {
1145 if (C10_UNLIKELY(device_policy_)) {
1146 return device_custom().is_lazy();
1147 }
1148 return device_opt_.has_value() && device_opt_->type() == kLazy;
1149 }
1150
1151 bool is_hip() const {
1152 // NB: This method is not virtual and avoid dispatches for performance
1153 // reasons.
1154 if (C10_UNLIKELY(device_policy_)) {
1155 return device_custom().is_hip();
1156 }
1157 return device_opt_.has_value() && device_opt_->type() == kHIP;
1158 }
1159
1160 bool is_ve() const {
1161 // NB: This method is not virtual and avoid dispatches for performance
1162 // reasons.
1163 if (C10_UNLIKELY(device_policy_)) {
1164 return device_custom().is_ve();
1165 }
1166 return device_opt_.has_value() && device_opt_->type() == kVE;
1167 }
1168
1169 bool is_mkldnn() const {
1170 return key_set_.has_all(c10::mkldnn_ks);
1171 }
1172
1173 bool is_vulkan() const {
1174 if (C10_UNLIKELY(device_policy_)) {
1175 return device_custom().is_vulkan();
1176 }
1177 return device_opt_.has_value() && device_opt_->type() == kVulkan;
1178 }
1179
1180 bool is_metal() const {
1181 if (C10_UNLIKELY(device_policy_)) {
1182 return device_custom().is_metal();
1183 }
1184 return device_opt_.has_value() && device_opt_->type() == kMetal;
1185 }
1186
1187 bool is_mps() const {
1188 if (C10_UNLIKELY(device_policy_)) {
1189 return device_custom().is_mps();
1190 }
1191 return device_opt_.has_value() && device_opt_->type() == kMPS;
1192 }
1193
1194 bool is_ort() const {
1195 if (C10_UNLIKELY(device_policy_)) {
1196 return device_custom().is_ort();
1197 }
1198 return device_opt_.has_value() && device_opt_->type() == kORT;
1199 }
1200
1201 bool is_nested() const {
1202 return key_set_.has(DispatchKey::NestedTensor);
1203 }
1204
1205 // TODO: remove this once we don't automatically enabled Autograd dispatch
1206 // keys
1207 // in TensorImpl constructor.
1208 // DON'T USE THIS API!! It's only created for testing purpose in
1209 // file aten/src/ATen/core/boxing/impl/test_helpers.h
1210 void remove_autograd_key() {
1211 key_set_ = key_set_ - autograd_dispatch_keyset;
1212 }
1213
1214 // Inference tensor doesn't have autograd or ADInplaceOrView key.
1215 // Invariant:
1216 // Inference tensor has version_counter_.enabled() == false
1217 bool is_inference() {
1218 bool no_ADInplaceOrView = !key_set_.has_any(c10::inplace_or_view_ks);
1219 bool no_Autograd = !key_set_.has_any(c10::autograd_dispatch_keyset);
1220 TORCH_INTERNAL_ASSERT_DEBUG_ONLY(
1221 no_ADInplaceOrView == no_Autograd,
1222 "ADInplaceOrView and Autograd keys must be on/off at the same time.");
1223 return no_ADInplaceOrView && no_Autograd;
1224 }
1225
1226 int64_t get_device() const {
1227 if (C10_UNLIKELY(device_policy_)) {
1228 return device_custom().index();
1229 }
1230 return device_default().index();
1231 }
1232
1233 Device device() const {
1234 if (C10_UNLIKELY(device_policy_)) {
1235 return device_custom();
1236 }
1237 return device_default();
1238 }
1239
1240 protected:
1241 c10::Device device_default() const {
1242 TORCH_CHECK(device_opt_.has_value(), "tensor does not have a device");
1243 // See NOTE [c10::optional operator usage in CUDA]
1244 return *device_opt_;
1245 }
1246
1247 public:
1248 Layout layout() const {
1249 if (C10_UNLIKELY(layout_policy_)) {
1250 return layout_custom();
1251 }
1252
1253 // NB: This method is not virtual and avoid dispatches for perf.
1254 // strided is also the most common layout type, so we check for
1255 // strided case first.
1256 // This keyset must also be kept in sync with the logic in
1257 // is_sparse() / is_sparse_csr() / is_mkldnn()
1258 constexpr auto sparse_and_sparsecsr_and_mkldnn_ks =
1259 c10::sparse_ks | c10::sparse_csr_ks | c10::mkldnn_ks;
1260 if (!key_set_.has_any(sparse_and_sparsecsr_and_mkldnn_ks)) {
1261 return kStrided;
1262 } else if (is_sparse()) {
1263 return kSparse;
1264 } else if (key_set_.has_any(c10::sparse_csr_ks)) {
1265 // Typically, the tensor dispatch keys define the tensor layout
1266 // uniquely. This allows using non-virtual layout method for
1267 // better performance. However, when tensor's layout depends,
1268 // say, on tensor attributes, one must use this execution path
1269 // where the corresponding tensor impl class overwrites virtual
1270 // layout_impl() method.
1271 //
1272 // TODO: implement layout() as native function/method so that
1273 // __torch_dispatch__ users will be able to redefine the
1274 // layout() method.
1275 return layout_impl();
1276 } else {
1277 TORCH_INTERNAL_ASSERT(
1278 is_mkldnn(), "There is an error in the layout calculation logic.");
1279 return kMkldnn;
1280 }
1281 }
1282
1283 /**
1284 * True if a tensor was auto-wrapped from a C++ or Python number.
1285 * For example, when you write 't + 2', 2 is auto-wrapped into a Tensor
1286 * with `is_wrapped_number_` set to true.
1287 *
1288 * Wrapped numbers do not participate in the result type computation for
1289 * mixed-type operations if there are any Tensors that are not wrapped
1290 * numbers. This is useful, because we want 't + 2' to work with
1291 * any type of tensor, not just LongTensor (which is what integers
1292 * in Python represent).
1293 *
1294 * Otherwise, they behave like their non-wrapped equivalents.
1295 * See [Result type computation] in TensorIterator.h.
1296 *
1297 * Why did we opt for wrapped numbers, as opposed to just having
1298 * an extra function add(Tensor, Scalar)? This helps greatly reduce
1299 * the amount of code we have to write for add, when actually
1300 * a Tensor-Scalar addition is really just a Tensor-Tensor
1301 * addition when the RHS is 0-dim (except for promotion behavior.)
1302 */
1303 bool is_wrapped_number() const {
1304 return is_wrapped_number_;
1305 }
1306
1307 /**
1308 * Set whether or not a tensor was auto-wrapped from a C++ or Python
1309 * number. You probably don't want to call this, unless you are
1310 * writing binding code.
1311 */
1312 void set_wrapped_number(bool value) {
1313 TORCH_INTERNAL_ASSERT(dim() == 0);
1314 is_wrapped_number_ = value;
1315 }
1316
1317 /**
1318 * Returns true if Tensor supports as_strided and as_strided_backward.
1319 * This is used in autograd to perform inplace update on view Tensors.
1320 * See Note [View + Inplace update for base tensor] and
1321 * [View + Inplace update for view tensor] for details.
1322 * Note this method only returns true for XLA backend, where it
1323 * simulates strided Tensor to support most view ops, but it cannot
1324 * fully support general `as_strided` case.
1325 * It can be expanded as needed in the future, e.g sparse Tensor.
1326 */
1327 inline bool support_as_strided() const {
1328 if (is_nested()) {
1329 return false;
1330 }
1331 if (key_set_.has(DispatchKey::Functionalize)) {
1332 return false;
1333 }
1334 return device().supports_as_strided();
1335 }
1336
1337 // ~~~~~ Autograd API ~~~~~
1338 // Some methods below are defined in TensorImpl.cpp because Tensor is an
1339 // incomplete type.
1340
1341 /**
1342 * Set whether or not a tensor requires gradient.
1343 */
1344 void set_requires_grad(bool requires_grad);
1345
1346 /**
1347 * True if a tensor requires gradient. Tensors which require gradient
1348 * have history tracked for any operations performed on them, so that
1349 * we can automatically differentiate back to them. A tensor that
1350 * requires gradient and has no history is a "leaf" tensor, which we
1351 * accumulate gradients into.
1352 */
1353 bool requires_grad() const;
1354
1355 /**
1356 * Return a mutable reference to the gradient. This is conventionally
1357 * used as `t.grad() = x` to set a gradient to a completely new tensor.
1358 */
1359 at::Tensor& mutable_grad();
1360
1361 /**
1362 * Return the accumulated gradient of a tensor. This gradient is written
1363 * into when performing backwards, when this tensor is a leaf tensor.
1364 */
1365 const at::Tensor& grad() const;
1366
1367 /**
1368 * Whether or not the imaginary part of the tensor should be negated
1369 */
1370 inline bool is_conj() const {
1371 constexpr auto conjugate_ks = DispatchKeySet(DispatchKey::Conjugate);
1372 return key_set_.has_all(conjugate_ks);
1373 }
1374
1375 /**
1376 * Set whether or not to take the conjugate of the tensor (flip the imaginary
1377 * bit).
1378 */
1379 void _set_conj(bool value) {
1380 if (value) {
1381 key_set_ = key_set_.add(DispatchKey::Conjugate);
1382 TORCH_INTERNAL_ASSERT(isComplexType(typeMetaToScalarType(dtype())));
1383 } else {
1384 key_set_ = key_set_.remove(DispatchKey::Conjugate);
1385 }
1386 }
1387
1388 /**
1389 * XXX: do not use, private api!
1390 * Update the backend component related keys to the backend component
1391 * corresponding to this device.
1392 */
1393 void _change_backend_component_keys(c10::Device device);
1394
1395 /**
1396 * Whether or not the tensor is a zerotensor
1397 */
1398 inline bool _is_zerotensor() const {
1399 constexpr auto zerotensor_ks = DispatchKeySet(DispatchKey::ZeroTensor);
1400 return key_set_.has_all(zerotensor_ks);
1401 }
1402
1403 /**
1404 Set whether or not the tensor is a zero tensor
1405 */
1406 void _set_zero(bool value) {
1407 if (value) {
1408 TORCH_INTERNAL_ASSERT(
1409 false,
1410 "Please call `torch._efficientzerotensor` if you want to create a tensor with no storage.");
1411 } else {
1412 key_set_ = key_set_.remove(DispatchKey::ZeroTensor);
1413 }
1414 }
1415
1416 /**
1417 * Whether or not the tensor should be negated
1418 */
1419 inline bool is_neg() const {
1420 constexpr auto negative_ks = DispatchKeySet(DispatchKey::Negative);
1421 return key_set_.has_all(negative_ks);
1422 }
1423
1424 /**
1425 * Set whether or not to take the conjugate of the tensor (flip the imaginary
1426 * bit).
1427 */
1428 void _set_neg(bool value) {
1429 if (value) {
1430 key_set_ = key_set_.add(DispatchKey::Negative);
1431 } else {
1432 key_set_ = key_set_.remove(DispatchKey::Negative);
1433 }
1434 }
1435
1436 /**
1437 * Return the accumulated gradient of a tensor. This gradient is computed
1438 * using forward mode AD.
1439 *
1440 * This is an internal API that should never be used by end users.
1441 *
1442 * The API is as follows:
1443 * - "level" allows to specify the level of forward AD nesting for which the
1444 * gradient should be returned. Note that since levels are not fully
1445 * supported yet, this argument should be 0. See documentation for
1446 * torch::autograd::enter_dual_level for more details about forward AD
1447 * nesting.
1448 * - "self" should represent the Tensor whose forward grad is accessed. It
1449 * is required when dealing with view.
1450 */
1451 const at::Tensor& _fw_grad(uint64_t level, const at::TensorBase& self) const;
1452
1453 /**
1454 * Sets the forward gradient for this Tensor.
1455 * The given Tensor might not be used directly and its content will be copied.
1456 *
1457 * This is an internal API that should never be used by end users.
1458 *
1459 * The API is as follows:
1460 * - "new_grad" is a Tensor containing the new value of the gradient that
1461 * should be set
1462 * - "self" should represent the Tensor whose forward grad is accessed. It
1463 * is required when dealing with view.
1464 * - "level" allows to specify the level of forward AD nesting for which the
1465 * gradient should be set. Note that since levels are not fully supported
1466 * yet, this argument should be 0. See documentation for
1467 * torch::autograd::enter_dual_level for more details about forward AD
1468 * nesting.
1469 * - "is_inplace_op" is a boolean flag that tells if this gradient was
1470 * generated by an inplace operation or an out of place one. This allows
1471 * better error checking.
1472 */
1473 void _set_fw_grad(
1474 const at::TensorBase& new_grad,
1475 const at::TensorBase& self,
1476 uint64_t level,
1477 bool is_inplace_op);
1478
1479 /**
1480 * Return a typed data pointer to the actual data which this tensor refers to.
1481 * This checks that the requested type (from the template parameter) matches
1482 * the internal type of the tensor.
1483 *
1484 * It is invalid to call data() on a dtype-uninitialized tensor, even if
1485 * the size is 0.
1486 *
1487 * WARNING: If a tensor is not contiguous, you MUST use strides when
1488 * performing index calculations to determine the location of elements in
1489 * the tensor. We recommend using 'TensorAccessor' to handle this computation
1490 * for you; this class is available from 'Tensor'.
1491 */
1492 template <typename T>
1493 inline T* data() const {
1494 TORCH_CHECK(
1495 data_type_.Match<T>(),
1496 "Tensor type mismatch, caller expects elements to be ",
1497 caffe2::TypeMeta::TypeName<T>(),
1498 ", while tensor contains ",
1499 data_type_.name(),
1500 ". ");
1501 return data_ptr_impl<T>();
1502 }
1503
1504 /**
1505 * More efficient helper for Tensor::data_ptr(). Like data<T>(), but
1506 * does not do a type check. Unlike the untemplated data(), does
1507 * check has_storage() and storage_initialized().
1508 */
1509 template <typename T>
1510 inline T* data_ptr_impl() const {
1511 TORCH_CHECK(
1512 has_storage(),
1513 "Cannot access data pointer of Tensor that doesn't have storage");
1514 TORCH_CHECK(
1515 storage_initialized(),
1516 "The tensor has a non-zero number of elements, but its data is not allocated yet. "
1517 "Caffe2 uses a lazy allocation, so you will need to call "
1518 "mutable_data() or raw_mutable_data() to actually allocate memory.");
1519 // Caller does the type check.
1520 return storage_.unsafe_data<T>() + storage_offset_;
1521 }
1522
1523 /**
1524 * Return a void* data pointer to the actual data which this tensor refers to.
1525 *
1526 * It is invalid to call data() on a dtype-uninitialized tensor, even if the
1527 * size is 0.
1528 *
1529 * WARNING: The data pointed to by this tensor may not contiguous; do NOT
1530 * assume that itemsize() * numel() is sufficient to compute the bytes that
1531 * can be validly read from this tensor.
1532 */
1533 inline void* data() const {
1534 TORCH_CHECK(
1535 has_storage(),
1536 "Cannot access data pointer of Tensor that doesn't have storage");
1537 TORCH_CHECK(
1538 dtype_initialized(),
1539 "Cannot access data pointer of Tensor that doesn't have initialized dtype "
1540 "(e.g., caffe2::Tensor x(CPU), prior to calling mutable_data<T>() on x)");
1541 // Computing an offset into an empty tensor would be UB, since an empty
1542 // tensor's storage will be nullptr, and adding a nonzero offset to nullptr
1543 // is UB. So we skip the offset computation in this case.
1544 if (is_empty()) {
1545 return nullptr;
1546 }
1547 return static_cast<void*>(
1548 static_cast<char*>(storage_.data()) +
1549 data_type_.itemsize() * storage_offset_);
1550 }
1551
1552 /**
1553 * Like data<T>(), but performs no checks. You are responsible for ensuring
1554 * that all invariants required by data() are upheld here.
1555 */
1556 template <typename T>
1557 inline T* unsafe_data() const {
1558 return storage_.unsafe_data<T>() + storage_offset_;
1559 }
1560
1561 /**
1562 * Returns the TypeMeta of a tensor, which describes what data type
1563 * it is (e.g., int, float, ...)
1564 */
1565 const caffe2::TypeMeta dtype() const {
1566 return data_type_;
1567 }
1568
1569 /**
1570 * Return the size of a single element of this tensor in bytes.
1571 */
1572 size_t itemsize() const {
1573 TORCH_CHECK(
1574 dtype_initialized(),
1575 "Cannot report itemsize of Tensor that doesn't have initialized dtype "
1576 "(e.g., caffe2::Tensor x(CPU), prior to calling mutable_data<T>() on x)");
1577 return data_type_.itemsize();
1578 }
1579
1580 protected:
1581 /**
1582 * Returns the human-readable name of the actual type of this object (e.g.,
1583 * TensorImpl, BatchedTensorImpl, etc.). Used for error messages.
1584 */
1585 virtual const char* tensorimpl_type_name() const {
1586 return "TensorImpl";
1587 }
1588
1589 private:
1590 [[noreturn]] void throw_storage_access_error() const;
1591
1592 public:
1593 /**
1594 * True if a tensor has no elements (e.g., numel() == 0).
1595 */
1596 inline bool is_empty() const {
1597 return numel() == 0;
1598 }
1599
1600 // if we are going to use sym sizes, we should be setting sym strides at the
1601 // same time, otherwise it's very easy to misuse this API
1602 void set_sizes_and_strides(
1603 c10::SymIntArrayRef sizes,
1604 c10::SymIntArrayRef strides,
1605 c10::optional<c10::SymInt> storage_offset = c10::nullopt);
1606 // This is renamed to avoid breaking overload BC
1607 void generic_set_sizes_contiguous(c10::SymIntArrayRef sizes);
1608 void generic_set_sizes_contiguous(c10::IntArrayRef sizes) {
1609 set_sizes_contiguous(sizes);
1610 }
1611
1612 /**
1613 * Change the size at some dimension. This DOES NOT update strides;
1614 * thus, most changes to size will not preserve contiguity. You probably
1615 * also want to call set_stride() when you call this.
1616 *
1617 * TODO: This should be jettisoned in favor of `set_sizes_and_strides`,
1618 * which is harder to misuse.
1619 */
1620 virtual void set_size(int64_t dim, int64_t new_size) {
1621 TORCH_CHECK(
1622 allow_tensor_metadata_change(),
1623 "set_size ",
1624 err_msg_tensor_metadata_change_not_allowed);
1625 TORCH_CHECK(
1626 !matches_policy(SizesStridesPolicy::CustomSizes),
1627 "set_size() called on tensor with dynamic shapes or customized size behavior")
1628 sizes_and_strides_.size_at(dim) = new_size;
1629 refresh_numel();
1630 refresh_contiguous();
1631 }
1632
1633 /**
1634 * Change the stride at some dimension.
1635 *
1636 * TODO: This should be jettisoned in favor of `set_sizes_and_strides`,
1637 * which is harder to misuse.
1638 */
1639 virtual void set_stride(int64_t dim, int64_t new_stride) {
1640 TORCH_CHECK(
1641 allow_tensor_metadata_change(),
1642 "set_stride ",
1643 err_msg_tensor_metadata_change_not_allowed);
1644 TORCH_CHECK(
1645 !has_symbolic_sizes_strides_,
1646 "set_stride() called on tensor with symbolic shape")
1647 sizes_and_strides_.stride_at_unchecked(dim) = new_stride;
1648 refresh_contiguous();
1649 }
1650
1651 /**
1652 * Set the offset into the storage of this tensor.
1653 *
1654 * WARNING: This does NOT check if the tensor is in bounds for the new
1655 * location at the storage; the caller is responsible for checking this
1656 * (and resizing if necessary.)
1657 */
1658 virtual void set_storage_offset(int64_t storage_offset) {
1659 TORCH_CHECK(
1660 allow_tensor_metadata_change(),
1661 "set_storage_offset ",
1662 err_msg_tensor_metadata_change_not_allowed);
1663 // TODO: this should probably consult policy
1664 TORCH_CHECK(
1665 !has_symbolic_sizes_strides_,
1666 "set_storage_offset() called on tensor with symbolic shape")
1667 storage_offset_ = storage_offset;
1668 }
1669
1670 /**
1671 * Like set_sizes_and_strides but assumes contiguous strides.
1672 *
1673 * WARNING: This function does not check if the requested
1674 * sizes/strides are in bounds for the storage that is allocated;
1675 * this is the responsibility of the caller
1676 */
1677 void set_sizes_contiguous(IntArrayRef new_size) {
1678 TORCH_CHECK(
1679 allow_tensor_metadata_change(),
1680 "set_sizes_contiguous ",
1681 err_msg_tensor_metadata_change_not_allowed);
1682 TORCH_CHECK(
1683 !matches_policy(SizesStridesPolicy::CustomStrides),
1684 "tried to directly modify sizes for customized tensor");
1685 sizes_and_strides_.set_sizes(new_size);
1686
1687 refresh_numel();
1688 empty_tensor_restride(
1689 MemoryFormat::Contiguous); // calls refresh_contiguous()
1690 }
1691
1692 /**
1693 * Set the sizes and strides of a tensor.
1694 *
1695 * WARNING: This function does not check if the requested
1696 * sizes/strides are in bounds for the storage that is allocated;
1697 * this is the responsibility of the caller
1698 */
1699 void set_sizes_and_strides(
1700 IntArrayRef new_size,
1701 IntArrayRef new_stride,
1702 c10::optional<int64_t> storage_offset = c10::nullopt) {
1703 TORCH_CHECK(
1704 allow_tensor_metadata_change(),
1705 "set_sizes_and_strides ",
1706 err_msg_tensor_metadata_change_not_allowed);
1707 TORCH_CHECK(
1708 !has_symbolic_sizes_strides_,
1709 "set_sizes_and_strides() called on tensor with symbolic shape")
1710 TORCH_CHECK(
1711 new_size.size() == new_stride.size(),
1712 "dimensionality of sizes (",
1713 new_size.size(),
1714 ") must match dimensionality of strides (",
1715 new_stride.size(),
1716 ")");
1717 const auto new_dim = new_size.size();
1718
1719 sizes_and_strides_.set_sizes(new_size);
1720
1721 if (new_dim > 0) {
1722 for (size_t dim = new_dim - 1;; dim--) {
1723 if (new_stride[dim] >= 0) {
1724 sizes_and_strides_.stride_at_unchecked(dim) = new_stride[dim];
1725 } else {
1726 // XXX: This behavior is surprising and may need to be removed to
1727 // support negative strides. Some pytorch functions rely on it:
1728 // for example, torch.cat (run TestTorch.test_cat_empty).
1729 if (dim == new_dim - 1) {
1730 sizes_and_strides_.stride_at_unchecked(dim) = 1;
1731 } else {
1732 // Keep stride monotonically increasing to match NumPy.
1733 sizes_and_strides_.stride_at_unchecked(dim) =
1734 std::max<int64_t>(
1735 sizes_and_strides_.size_at_unchecked(dim + 1), 1) *
1736 sizes_and_strides_.stride_at_unchecked(dim + 1);
1737 }
1738 }
1739 if (dim == 0)
1740 break;
1741 }
1742 }
1743
1744 refresh_numel();
1745 refresh_contiguous();
1746
1747 if (storage_offset.has_value()) {
1748 storage_offset_ = *storage_offset;
1749 }
1750 }
1751
1752 /**
1753 * Set whether a tensor allows changes to its metadata (e.g. sizes / strides /
1754 * storage / storage_offset). See NOTE [ Metadata Change for a Detached Tensor
1755 * ] for details.
1756 */
1757 void set_allow_tensor_metadata_change(bool value) {
1758 // TODO: at some point, we should kill this field completely.
1759 allow_tensor_metadata_change_ = true;
1760 }
1761
1762 /**
1763 * True if a tensor allows changes to its metadata (e.g. sizes / strides /
1764 * storage / storage_offset). See NOTE [ Metadata Change for a Detached Tensor
1765 * ] for details.
1766 */
1767 bool allow_tensor_metadata_change() const {
1768 return allow_tensor_metadata_change_;
1769 }
1770
1771 /**
1772 * Set the pointer to autograd metadata.
1773 */
1774 void set_autograd_meta(
1775 std::unique_ptr<c10::AutogradMetaInterface> autograd_meta);
1776
1777 /**
1778 * Return the pointer to autograd metadata. May return nullptr if the
1779 * tensor does not track gradients.
1780 */
1781 c10::AutogradMetaInterface* autograd_meta() const;
1782
1783 /**
1784 * Set the pointer to named tensor metadata.
1785 */
1786 void set_named_tensor_meta(
1787 std::unique_ptr<c10::NamedTensorMetaInterface> named_tensor_meta) {
1788 TORCH_WARN_ONCE(
1789 "Named tensors and all their associated APIs are an experimental feature ",
1790 "and subject to change. Please do not use them for anything important ",
1791 "until they are released as stable.");
1792#ifdef DEBUG
1793 if (named_tensor_meta) {
1794 TORCH_INTERNAL_ASSERT(named_tensor_meta->slow_dim() == dim());
1795 }
1796#endif
1797 if (named_tensor_meta) {
1798 if (!extra_meta_) {
1799 extra_meta_ = std::make_unique<ExtraMeta>();
1800 }
1801 extra_meta_->named_tensor_meta_ = std::move(named_tensor_meta);
1802 key_set_ = key_set_.add(DispatchKey::Named);
1803 } else {
1804 if (extra_meta_) {
1805 extra_meta_->named_tensor_meta_ = nullptr;
1806 }
1807 key_set_ = key_set_.remove(DispatchKey::Named);
1808 }
1809 }
1810
1811 void set_python_dispatch(bool k) {
1812 if (k) {
1813 key_set_ = key_set_.add(c10::python_ks);
1814 } else {
1815 key_set_ = key_set_ - c10::python_ks;
1816 }
1817 }
1818
1819 bool is_python_dispatch() const {
1820 return key_set_.has_all(c10::python_ks);
1821 }
1822
1823 /**
1824 * Return the pointer to named tensor metadata.
1825 */
1826 const c10::NamedTensorMetaInterface* named_tensor_meta() const {
1827 if (!extra_meta_) {
1828 return nullptr;
1829 }
1830 return extra_meta_->named_tensor_meta_.get();
1831 }
1832
1833 c10::NamedTensorMetaInterface* named_tensor_meta() {
1834 if (!extra_meta_) {
1835 return nullptr;
1836 }
1837 return extra_meta_->named_tensor_meta_.get();
1838 }
1839
1840 bool has_named_tensor_meta() const {
1841 if (!extra_meta_) {
1842 return false;
1843 }
1844 return extra_meta_->named_tensor_meta_ != nullptr;
1845 }
1846
1847 // NOTE [ TensorImpl Shallow-Copying ]
1848 //
1849 // TensorImpl shallow-copying is used when we want to have two Variables share
1850 // the same tensor metadata (e.g. sizes / strides / storage pointer /
1851 // storage_offset), but each with a different autograd history. Example call
1852 // sites:
1853 //
1854 // 1. `var_detached = var.detach()` uses `shallow_copy_and_detach()` to create
1855 // `var_detached` that shares the same tensor metadata with `var`, but with a
1856 // completely new autograd history.
1857 // 2. `var.set_data(tensor)` uses `shallow_copy_from()` to copy tensor
1858 // metadata from `tensor` into `var`, while keeping `var`'s original
1859 // AutogradMeta.
1860 //
1861 // Functions that shallow-copy a TensorImpl (such as
1862 // `shallow_copy_and_detach()` / `shallow_copy_from()` /
1863 // `copy_tensor_metadata()`) copy the tensor metadata fields (e.g. sizes /
1864 // strides / storage pointer / storage_offset) by value. However, the
1865 // following fields are not copied:
1866 //
1867 // 1. the AutogradMeta pointer, because it is unique for each Variable.
1868 // 2. the version counter, because the destination TensorImpl's version
1869 // counter is either set to the passed-in `version_counter` (in
1870 // `shallow_copy_and_detach()` and `copy_tensor_metadata()`), or it is kept
1871 // intact (in `shallow_copy_from()`). See NOTE [ Version Counter Sharing ] for
1872 // details.
1873 //
1874 // In `shallow_copy_and_detach()` and `copy_tensor_metadata()`, the passed-in
1875 // `allow_tensor_metadata_change` determines whether the TensorImpl
1876 // shallow-copy allows changes to its metadata (e.g. sizes / strides / storage
1877 // / storage_offset). See NOTE [ Metadata Change for a Detached Tensor ] for
1878 // details.
1879 //
1880 // In `shallow_copy_from()`, we don't check the destination TensorImpl's
1881 // `allow_tensor_metadata_change_`, because `shallow_copy_from()` is used for
1882 // implementing functions such as `var.set_data(tensor)`, which changes
1883 // `var`'s tensor metadata and expects its `allow_tensor_metadata_change_` to
1884 // be ignored.
1885
1886 /**
1887 * One TensorImpl can be copied to another TensorImpl if they have the same
1888 * DispatchKeySet. The only two special cases (for legacy reason) are:
1889 * CPU is compatible with CUDA and SparseCPU is
1890 * compatible with SparseCUDA.
1891 */
1892 inline bool has_compatible_shallow_copy_type(DispatchKeySet from) {
1893 auto is_dense = [](DispatchKeySet ts) {
1894 constexpr auto dense_backends = DispatchKeySet(
1895 {BackendComponent::CPUBit,
1896 BackendComponent::CUDABit,
1897 BackendComponent::MPSBit,
1898 BackendComponent::HIPBit,
1899 BackendComponent::XPUBit});
1900 constexpr auto dense_k = DispatchKeySet(DispatchKey::Dense);
1901 return ts.has_any(dense_k) && ts.has_any(dense_backends);
1902 };
1903 auto is_sparse = [](DispatchKeySet ts) {
1904 constexpr auto sparse_backends = DispatchKeySet(
1905 {BackendComponent::CPUBit,
1906 BackendComponent::CUDABit,
1907 BackendComponent::HIPBit,
1908 BackendComponent::XPUBit});
1909 constexpr auto sparse_k = DispatchKeySet(DispatchKey::Sparse);
1910 return ts.has_any(sparse_k) && ts.has_any(sparse_backends);
1911 };
1912 return (key_set_ == from) || (is_dense(key_set_) && is_dense(from)) ||
1913 (is_sparse(key_set_) && is_sparse(from));
1914 }
1915
1916 private:
1917 template <typename VariableVersion>
1918 c10::intrusive_ptr<TensorImpl> shallow_copy_and_detach_core(
1919 VariableVersion&& version_counter,
1920 bool allow_tensor_metadata_change) const;
1921
1922 public:
1923 /**
1924 * Return a TensorImpl that is a shallow-copy of this TensorImpl.
1925 *
1926 * For usage of `version_counter` and `allow_tensor_metadata_change`,
1927 * see NOTE [ TensorImpl Shallow-Copying ].
1928 */
1929 virtual c10::intrusive_ptr<TensorImpl> shallow_copy_and_detach(
1930 const c10::VariableVersion& version_counter,
1931 bool allow_tensor_metadata_change) const;
1932
1933 /**
1934 * Return a TensorImpl that is a shallow-copy of this TensorImpl.
1935 *
1936 * For usage of `version_counter` and `allow_tensor_metadata_change`,
1937 * see NOTE [ TensorImpl Shallow-Copying ].
1938 */
1939 virtual c10::intrusive_ptr<TensorImpl> shallow_copy_and_detach(
1940 c10::VariableVersion&& version_counter,
1941 bool allow_tensor_metadata_change) const;
1942
1943 /**
1944 * Shallow-copies data from another TensorImpl into this TensorImpl.
1945 *
1946 * For why this function doesn't check this TensorImpl's
1947 * `allow_tensor_metadata_change_`, see NOTE [ TensorImpl Shallow-Copying ].
1948 */
1949 virtual void shallow_copy_from(const c10::intrusive_ptr<TensorImpl>& impl) {
1950 copy_tensor_metadata(
1951 /*src_impl=*/impl.get(),
1952 /*dest_impl=*/this,
1953 /*version_counter=*/version_counter(),
1954 /*allow_tensor_metadata_change=*/allow_tensor_metadata_change());
1955 refresh_numel();
1956 refresh_contiguous();
1957 }
1958
1959 // Inference tensor doesn't have version counter,
1960 // set_version_counter is no-op for them.
1961 void set_version_counter(const c10::VariableVersion& version_counter) {
1962 TORCH_CHECK(
1963 !(is_inference() && version_counter.enabled()),
1964 "Cannot set version_counter for inference tensor");
1965 version_counter_ = version_counter;
1966 }
1967
1968 void set_version_counter(c10::VariableVersion&& version_counter) {
1969 TORCH_CHECK(
1970 !(is_inference() && version_counter.enabled()),
1971 "Cannot set version_counter for inference tensor");
1972 version_counter_ = std::move(version_counter);
1973 }
1974
1975 const c10::VariableVersion& version_counter() const noexcept {
1976 return version_counter_;
1977 }
1978
1979 void bump_version() {
1980 version_counter_.bump();
1981 }
1982
1983 impl::PyObjectSlot* pyobj_slot() {
1984 return &pyobj_slot_;
1985 }
1986
1987 const impl::PyObjectSlot* pyobj_slot() const {
1988 return &pyobj_slot_;
1989 }
1990
1991 private:
1992 // See NOTE [c10::optional operator usage in CUDA]
1993 // We probably don't want to expose this publicly until
1994 // the note is addressed.
1995 c10::optional<c10::Device> device_opt() const {
1996 return device_opt_;
1997 }
1998
1999 public:
2000 /**
2001 * The device type of a Tensor, e.g., DeviceType::CPU or DeviceType::CUDA.
2002 */
2003 DeviceType device_type() const {
2004 // TODO: A useful internal assert would be to show that device_opt_ is null
2005 // only if you are an undefined tensor
2006 TORCH_CHECK(
2007 device_opt_.has_value(),
2008 "device_type cannot be run on undefined Tensor");
2009 // See NOTE [c10::optional operator usage in CUDA]
2010 return (*device_opt_).type();
2011 }
2012
2013 /**
2014 * @brief Extends the outer-most dimension of this tensor by num elements,
2015 * preserving the existing data.
2016 *
2017 * The underlying data may be reallocated in order to accommodate the new
2018 * elements, in which case this tensors' capacity is grown at a factor of
2019 * growthPct. This ensures that Extend runs on an amortized O(1) time
2020 * complexity.
2021 *
2022 * This op is auto-asynchronous if the underlying device (CUDA) supports it.
2023 */
2024 void Extend(int64_t num, float growthPct);
2025
2026 /**
2027 * @brief Reserve space for the underlying tensor.
2028 *
2029 * This must be called after Resize(), since we only specify the first
2030 * dimension This does not copy over the old data to the newly allocated space
2031 */
2032 void ReserveSpace(int64_t outer_dim);
2033
2034 /**
2035 * @brief Resizes a tensor.
2036 *
2037 * Resize takes in a vector of ints specifying the dimensions of the tensor.
2038 * You can pass in an empty vector to specify that it is a scalar (i.e.
2039 * containing one single item).
2040 *
2041 * The underlying storage may be deleted after calling Resize: if the new
2042 * shape leads to a different number of items in the tensor, the old memory
2043 * is deleted and new memory will be allocated next time you call
2044 * mutable_data(). However, if the shape is different but the total number of
2045 * items is the same, the underlying storage is kept.
2046 *
2047 * This method respects caffe2_keep_on_shrink. Consult the internal logic
2048 * of this method to see exactly under what circumstances this flag matters.
2049 */
2050 template <typename... Ts>
2051 void Resize(Ts... dim_source) {
2052 bool size_changed = SetDims(dim_source...);
2053 if (size_changed) {
2054 HandleResize();
2055 }
2056 }
2057
2058 template <typename T>
2059 void Resize(const std::vector<T>& dim_source) {
2060 Resize(ArrayRef<T>(dim_source));
2061 }
2062
2063 /**
2064 * Resizes the tensor without touching underlying storage.
2065 * This requires the total size of the tensor to remains constant.
2066 */
2067 void Reshape(const std::vector<int64_t>& dims);
2068
2069 /**
2070 * Release whatever memory the tensor was holding but keep size and type
2071 * information. Subsequent call to mutable_data will trigger new memory
2072 * allocation.
2073 */
2074 void FreeMemory();
2075
2076 /**
2077 * @brief Shares the data with another tensor.
2078 *
2079 * To share data between two tensors, the sizes of the two tensors must be
2080 * equal already. The reason we do not implicitly do a Resize to make the two
2081 * tensors have the same shape is that we want to allow tensors of different
2082 * shapes but the same number of items to still be able to share data. This
2083 * allows one to e.g. have a n-dimensional Tensor and a flattened version
2084 * sharing the same underlying storage.
2085 *
2086 * The source tensor should already have its data allocated.
2087 */
2088 // To be deprecated
2089 void ShareData(const TensorImpl& src);
2090
2091 void ShareExternalPointer(
2092 DataPtr&& data_ptr,
2093 const caffe2::TypeMeta data_type,
2094 size_t size_bytes);
2095
2096 /**
2097 * Returns a mutable raw pointer of the underlying storage. Since we will need
2098 * to know the type of the data for allocation, a TypeMeta object is passed in
2099 * to specify the necessary information. This is conceptually equivalent of
2100 * calling mutable_data<T>() where the TypeMeta parameter meta is derived from
2101 * the type T. This function differs from mutable_data<T>() in the sense that
2102 * the type T can be specified during runtime via the TypeMeta object.
2103 *
2104 * If the existing data does not match the desired type, it will be deleted
2105 * and a new storage will be created.
2106 */
2107 inline void* raw_mutable_data(const caffe2::TypeMeta meta) {
2108 // For 0-size tensors it's fine to return any pointer (including nullptr)
2109 if (data_type_ == meta && storage_initialized()) {
2110 return static_cast<void*>(
2111 static_cast<char*>(storage_.data()) +
2112 storage_offset_ * meta.itemsize());
2113 } else {
2114 bool had_special_dtor = data_type_.placementDelete() != nullptr;
2115 storage_offset_ = 0;
2116 data_type_ = meta;
2117 // NB: device is not changed
2118
2119 // We can reuse the existing buffer if the current data does not have
2120 // a special destructor and the new data doesn't have a special
2121 // constructor.
2122 if (numel_ == 0 ||
2123 (meta.placementNew() == nullptr && !had_special_dtor &&
2124 (storage_.nbytes() >= (numel_ * data_type_.itemsize())))) {
2125 TORCH_INTERNAL_ASSERT(
2126 storage_offset_ == 0); // because we just reallocated
2127 return storage_.data();
2128 }
2129 const Allocator* allocator = storage_.allocator();
2130 // Storage might have nullptr allocator in rare cases, for example, if
2131 // an external memory segment has been wrapped with Tensor and we don't
2132 // know how to reallocate it. However, in order to preserve legacy C2
2133 // behavior, we allow reallocating the memory using default allocator.
2134 if (allocator == nullptr) {
2135 allocator = GetAllocator(storage_.device_type());
2136 }
2137 if (meta.placementNew()) {
2138 // For types that need placement new, we will call it, as well as
2139 // making sure that when the data is freed, it calls the right
2140 // destruction procedure.
2141 auto size = numel_;
2142 auto dtor = data_type_.placementDelete();
2143 auto data_ptr = allocator->allocate(numel_ * data_type_.itemsize());
2144 storage_.set_data_ptr_noswap(PlacementDeleteContext::makeDataPtr(
2145 std::move(data_ptr), dtor, size, storage_.device()));
2146 data_type_.placementNew()(storage_.data(), numel_);
2147 } else {
2148 // For fundamental type, new and delete is easier.
2149 storage_.set_data_ptr_noswap(
2150 allocator->allocate(numel_ * data_type_.itemsize()));
2151 }
2152 storage_.set_nbytes(numel_ * data_type_.itemsize());
2153 TORCH_INTERNAL_ASSERT(
2154 storage_offset_ == 0); // because we just reallocated
2155 device_opt_ = storage_.device();
2156 return storage_.data();
2157 }
2158 }
2159
2160 /**
2161 * Returns a typed pointer of the underlying storage.
2162 *
2163 * For fundamental types, we reuse possible existing storage if there
2164 * is sufficient capacity.
2165 */
2166 template <typename T>
2167 inline T* mutable_data() {
2168 if (storage_initialized() && data_type_.Match<T>()) {
2169 return static_cast<T*>(storage_.data()) + storage_offset_;
2170 }
2171 // Check it here statically - otherwise TypeMeta would throw the runtime
2172 // error in attempt to invoke TypeMeta::ctor()
2173 static_assert(
2174 std::is_default_constructible<T>::value,
2175 "Tensor can't hold non-default-constructable types");
2176 return static_cast<T*>(raw_mutable_data(caffe2::TypeMeta::Make<T>()));
2177 }
2178
2179 /**
2180 * True if a tensor is storage initialized. A tensor may become
2181 * storage UNINITIALIZED after a Resize() or FreeMemory()
2182 */
2183 bool storage_initialized() const {
2184 TORCH_CHECK(
2185 has_storage(),
2186 "cannot call storage_initialized on tensor that does not have storage");
2187 return storage_.data() || numel_ == 0;
2188 }
2189
2190 /**
2191 * True if a tensor is dtype initialized. A tensor allocated with
2192 * Caffe2-style constructors is dtype uninitialized until the
2193 * first time mutable_data<T>() is called.
2194 */
2195 bool dtype_initialized() const noexcept {
2196 return data_type_ != caffe2::TypeMeta();
2197 }
2198
2199 void set_storage_keep_dtype(at::Storage storage) {
2200 TORCH_CHECK(
2201 allow_tensor_metadata_change(),
2202 "set_storage ",
2203 err_msg_tensor_metadata_change_not_allowed);
2204 storage_ = std::move(storage);
2205 device_opt_ = storage_.device();
2206 }
2207
2208 void set_storage_and_dtype(
2209 at::Storage storage,
2210 const caffe2::TypeMeta data_type) {
2211 set_storage_keep_dtype(std::move(storage));
2212 data_type_ = data_type;
2213 }
2214
2215 void empty_tensor_restride_symint(MemoryFormat memory_format);
2216
2217 /**
2218 * Set the strides of the tensor to match memory_format
2219 *
2220 * WARNING: This function doesn't rearrange data and assumes tensor is a
2221 * memory contiguous
2222 */
2223 void empty_tensor_restride(MemoryFormat memory_format) {
2224 if (has_symbolic_sizes_strides_) {
2225 empty_tensor_restride_symint(memory_format);
2226 return;
2227 }
2228#ifdef DEBUG
2229 TORCH_INTERNAL_ASSERT(
2230 compute_numel() == numel_,
2231 "If you are seeing this error, that means empty_tensor_restride was "
2232 "called before setting correct numel");
2233#endif
2234 switch (memory_format) {
2235 case MemoryFormat::Contiguous: {
2236 // dim_ is a virtual call, don't repeat it
2237 const auto dim_ = dim();
2238 sizes_and_strides_.resize(dim_);
2239 if (dim_ > 0) {
2240 const auto last_idx = dim_ - 1;
2241 sizes_and_strides_.stride_at_unchecked(last_idx) = 1;
2242 for (auto i = last_idx - 1; i >= 0; --i) {
2243 sizes_and_strides_.stride_at_unchecked(i) =
2244 sizes_and_strides_.stride_at_unchecked(i + 1) *
2245 std::max<int64_t>(
2246 sizes_and_strides_.size_at_unchecked(i + 1), 1);
2247 }
2248 }
2249 break;
2250 }
2251 case MemoryFormat::ChannelsLast: {
2252 TORCH_CHECK(
2253 dim() == 4, "required rank 4 tensor to use channels_last format");
2254 set_sizes_and_strides(sizes(), get_channels_last_strides_2d(sizes()));
2255 break;
2256 }
2257 case MemoryFormat::ChannelsLast3d: {
2258 TORCH_CHECK(
2259 dim() == 5,
2260 "required rank 5 tensor to use channels_last_3d format");
2261 set_sizes_and_strides(sizes(), get_channels_last_strides_3d(sizes()));
2262 break;
2263 }
2264 case MemoryFormat::Preserve:
2265 TORCH_CHECK(false, "unsupported memory format ", memory_format);
2266 // Cleaning warning messages, no need to break as TORCH_CHECK(false)
2267 // terminates flow.
2268 // break;
2269 case MemoryFormat::NumOptions:
2270 TORCH_INTERNAL_ASSERT(false, "invalid memory format ", memory_format);
2271 }
2272 // recompute contiguous flag, as currently NHWC/NCHW flags are not mutually
2273 // exclusive see #24090
2274 refresh_contiguous();
2275 }
2276
2277 bool is_strides_like(at::MemoryFormat memory_format) const {
2278 if (C10_UNLIKELY(matches_policy(SizesStridesPolicy::CustomStrides))) {
2279 return is_strides_like_custom(memory_format);
2280 }
2281 return is_strides_like_default(memory_format);
2282 }
2283
2284 bool is_strides_like_channels_last() const {
2285 return is_strides_like(at::MemoryFormat::ChannelsLast);
2286 }
2287
2288 bool is_strides_like_channels_last_3d() const {
2289 return is_strides_like(at::MemoryFormat::ChannelsLast3d);
2290 }
2291
2292 bool is_non_overlapping_and_dense() const {
2293 if (C10_UNLIKELY(matches_policy(SizesStridesPolicy::CustomStrides))) {
2294 return is_non_overlapping_and_dense_custom();
2295 }
2296 return is_non_overlapping_and_dense_default();
2297 }
2298
2299 bool has_symbolic_sizes_strides() const {
2300 return has_symbolic_sizes_strides_;
2301 }
2302
2303 private:
2304 void HandleResize();
2305
2306 // The Caffe2 Resize() method supports being called both as Resize({2,2}) as
2307 // well as variadic with Resize(2, 2). These overloads provide all of the
2308 // supported calling configurations, while being overloads (and not templates)
2309 // so that implicit conversions still work.
2310 //
2311 // SetDims on ArrayRef is internally implemented as a template, so we can
2312 // handle both ArrayRefs of different types (there are some uses of
2313 // Resize in Caffe2 which pass in int, not int64_t.)
2314
2315 template <
2316 typename T,
2317 typename = typename std::enable_if<std::is_integral<T>::value>::type>
2318 bool SetDimsTemplate(ArrayRef<T> src) {
2319 TORCH_CHECK(
2320 !has_symbolic_sizes_strides_,
2321 "SetDims() called on tensor with symbolic shape")
2322
2323 auto old_numel = numel_;
2324 sizes_and_strides_.resize(src.size());
2325 int64_t new_numel = 1;
2326 for (const auto i : c10::irange(src.size())) {
2327 new_numel *= src[i];
2328 sizes_and_strides_.size_at_unchecked(i) = src[i];
2329 }
2330 numel_ = new_numel;
2331 empty_tensor_restride(MemoryFormat::Contiguous);
2332 return numel_ != old_numel;
2333 }
2334
2335 bool SetDims(ArrayRef<int64_t> s) {
2336 return SetDimsTemplate(s);
2337 }
2338
2339 bool SetDims(ArrayRef<int> s) {
2340 return SetDimsTemplate(s);
2341 }
2342
2343 bool SetDims(ArrayRef<size_t> s) {
2344 return SetDimsTemplate(s);
2345 }
2346
2347 bool SetDims() {
2348 return SetDims(IntArrayRef{});
2349 }
2350
2351 bool SetDims(const int64_t d0) {
2352 return SetDims(IntArrayRef{d0});
2353 }
2354
2355 bool SetDims(const int64_t d0, const int64_t d1) {
2356 return SetDims(IntArrayRef{d0, d1});
2357 }
2358
2359 bool SetDims(const int64_t d0, const int64_t d1, const int64_t d2) {
2360 return SetDims(IntArrayRef{d0, d1, d2});
2361 }
2362
2363 bool SetDims(
2364 const int64_t d0,
2365 const int64_t d1,
2366 const int64_t d2,
2367 const int64_t d3) {
2368 return SetDims(IntArrayRef{d0, d1, d2, d3});
2369 }
2370
2371 /**
2372 * Compute the number of elements based on the sizes of a tensor.
2373 */
2374 // NB: This is ONLY called when sizes_and_strides_ is used directly; if
2375 // we are virtualizing, then numel calls are virtualized as well, and this
2376 // should never get called
2377 int64_t compute_numel() const {
2378 TORCH_INTERNAL_ASSERT_DEBUG_ONLY(!has_symbolic_sizes_strides_);
2379#if C10_HAS_BUILTIN_OVERFLOW() && !defined(C10_MOBILE)
2380 // Use overflow checks if supported by the compiler
2381 return safe_compute_numel();
2382#else
2383 return c10::multiply_integers(sizes_and_strides_.sizes_arrayref());
2384#endif
2385 }
2386
2387 /**
2388 * Compute the number of elements based on the sizes of a
2389 * tensor. Catches integer overflow that may occur when a tensor
2390 * using a sparse layout has multiple dimensions with large sizes.
2391 */
2392 int64_t safe_compute_numel() const {
2393 TORCH_INTERNAL_ASSERT_DEBUG_ONLY(!has_symbolic_sizes_strides_);
2394 uint64_t n = 1;
2395 bool overflows =
2396 c10::safe_multiplies_u64(sizes_and_strides_.sizes_arrayref(), &n);
2397 constexpr auto numel_max = std::min(
2398 static_cast<uint64_t>(std::numeric_limits<int64_t>::max()),
2399 static_cast<uint64_t>(std::numeric_limits<size_t>::max()));
2400
2401 overflows |= (n > numel_max);
2402 TORCH_CHECK(!overflows, "numel: integer multiplication overflow");
2403 return static_cast<int64_t>(n);
2404 }
2405
2406 SymInt compute_sym_numel() const {
2407 TORCH_INTERNAL_ASSERT_DEBUG_ONLY(has_symbolic_sizes_strides_);
2408 SymInt numel = 1;
2409 for (const auto& s : extra_meta_->sizes_) {
2410 numel *= s;
2411 }
2412 return numel;
2413 }
2414
2415 /**
2416 * Compute whether or not a tensor is contiguous based on the sizes and
2417 * strides of a tensor.
2418 */
2419 bool compute_contiguous(identity<bool>) const;
2420
2421 bool compute_channels_last_contiguous_2d(identity<bool>) const;
2422
2423 bool compute_channels_last_contiguous_3d(identity<bool>) const;
2424
2425 bool compute_strides_like_channels_last_2d(identity<bool>) const;
2426
2427 bool compute_strides_like_channels_last_3d(identity<bool>) const;
2428
2429 bool compute_non_overlapping_and_dense(identity<bool>) const;
2430
2431 SymBool compute_contiguous(identity<SymBool>) const;
2432
2433 SymBool compute_channels_last_contiguous_2d(identity<SymBool>) const;
2434
2435 SymBool compute_channels_last_contiguous_3d(identity<SymBool>) const;
2436
2437 SymBool compute_strides_like_channels_last_2d(identity<SymBool>) const;
2438
2439 SymBool compute_strides_like_channels_last_3d(identity<SymBool>) const;
2440
2441 SymBool compute_non_overlapping_and_dense(identity<SymBool>) const;
2442
2443 protected:
2444 /**
2445 * Recompute the cached numel of a tensor. Call this if you modify
2446 * sizes.
2447 *
2448 * For tensors with sparse layouts, use safe_refresh_numel() instead
2449 * because it will catch integer overflow that may occur for tensors
2450 * with sparse layouts and large dimensions.
2451 *
2452 * NB: We may uselessly recompute cached numel even in situations where
2453 * it is completely never used (e.g., if CustomSizes for Python). However,
2454 * we still must keep it up to date in case the Python overload
2455 * returns None (in which case we will consult the field here). This also
2456 * implies that sizes/strides will never be complete garbage; in the
2457 * very worst case scenario, it will reflect a 1-dim zero size tensor.
2458 */
2459 void refresh_numel() {
2460 if (has_symbolic_sizes_strides_) {
2461 extra_meta_->numel_ = compute_sym_numel();
2462 } else {
2463 numel_ = compute_numel();
2464 }
2465 }
2466
2467 /**
2468 * Recompute the cached numel of a tensor. Call this if you modify
2469 * sizes. Use only for tensors with sparse layouts because only
2470 * sparse tensor are likely to have sizes that may lead to integer
2471 * overflow when computing numel.
2472 */
2473 void safe_refresh_numel() {
2474 if (has_symbolic_sizes_strides_) {
2475 // NB: sym numel is done with symbolic integers, which handle overflow
2476 // checking
2477 extra_meta_->numel_ = compute_sym_numel();
2478 } else {
2479 numel_ = safe_compute_numel();
2480 }
2481 }
2482
2483 private:
2484 // NB: the TypeId argument prevents confusion where you pass a true/false
2485 // literal and pick the wrong overload
2486
2487 void _set_is_contiguous(identity<bool>, bool b) {
2488 is_contiguous_ = b;
2489 }
2490
2491 void _set_is_contiguous(identity<SymBool>, SymBool b) {
2492 extra_meta_->is_contiguous_ = std::move(b);
2493 }
2494
2495 void _set_is_channels_last_contiguous(identity<bool>, bool b) {
2496 is_channels_last_contiguous_ = b;
2497 }
2498
2499 void _set_is_channels_last_contiguous(identity<SymBool>, SymBool b) {
2500 extra_meta_->is_channels_last_contiguous_ = std::move(b);
2501 }
2502
2503 void _set_is_channels_last_3d_contiguous(identity<bool>, bool b) {
2504 is_channels_last_3d_contiguous_ = b;
2505 }
2506
2507 void _set_is_channels_last_3d_contiguous(identity<SymBool>, SymBool b) {
2508 extra_meta_->is_channels_last_3d_contiguous_ = std::move(b);
2509 }
2510
2511 void _set_is_channels_last(identity<bool>, bool b) {
2512 is_channels_last_ = b;
2513 }
2514
2515 void _set_is_channels_last(identity<SymBool>, SymBool b) {
2516 extra_meta_->is_channels_last_ = std::move(b);
2517 }
2518
2519 void _set_is_channels_last_3d(identity<bool>, bool b) {
2520 is_channels_last_3d_ = b;
2521 }
2522
2523 void _set_is_channels_last_3d(identity<SymBool>, SymBool b) {
2524 extra_meta_->is_channels_last_3d_ = std::move(b);
2525 }
2526
2527 void _set_is_non_overlapping_and_dense(identity<bool>, bool b) {
2528 is_non_overlapping_and_dense_ = b;
2529 }
2530
2531 void _set_is_non_overlapping_and_dense(identity<SymBool>, SymBool b) {
2532 extra_meta_->is_non_overlapping_and_dense_ = std::move(b);
2533 }
2534
2535 // These are little wrappers over the real compute_ functions that
2536 // can make use of other contiguity fields to short circuit.
2537 // They need to be implemented separately for SymBool, as SymBool does
2538 // not short circuit.
2539 // TODO: should the SymBool cases avoid the short circuit? Need to reason
2540 // if its correct, and reason if the simpler expressions are better for
2541 // analysis (maybe not!)
2542
2543 bool compute_is_non_overlapping_and_dense_dim4(identity<bool> type_id) {
2544 return is_contiguous_ || is_channels_last_contiguous_ ||
2545 compute_non_overlapping_and_dense(type_id);
2546 }
2547
2548 SymBool compute_is_non_overlapping_and_dense_dim4(identity<SymBool> type_id);
2549
2550 bool compute_channels_last_contiguous_3d_dim5(identity<bool> type_id) {
2551 return !is_channels_last_contiguous_ &&
2552 compute_channels_last_contiguous_3d(type_id);
2553 }
2554
2555 SymBool compute_channels_last_contiguous_3d_dim5(identity<SymBool> type_id);
2556
2557 bool compute_channels_last_2d_dim5(identity<bool> type_id) {
2558 return !is_channels_last_3d_contiguous_ &&
2559 compute_strides_like_channels_last_2d(type_id);
2560 }
2561
2562 SymBool compute_channels_last_2d_dim5(identity<SymBool> type_id);
2563
2564 bool compute_channels_last_3d_dim5(identity<bool> type_id) {
2565 return !is_channels_last_ && compute_strides_like_channels_last_3d(type_id);
2566 }
2567
2568 SymBool compute_channels_last_3d_dim5(identity<SymBool> type_id);
2569
2570 bool compute_is_non_overlapping_and_dense_dim5(identity<bool> type_id) {
2571 return is_contiguous_ || is_channels_last_contiguous_ ||
2572 is_channels_last_3d_contiguous_ ||
2573 compute_non_overlapping_and_dense(type_id);
2574 }
2575
2576 SymBool compute_is_non_overlapping_and_dense_dim5(identity<SymBool> type_id);
2577
2578 bool compute_is_non_overlapping_and_dense_anydim(identity<bool> type_id) {
2579 return is_contiguous_ || compute_non_overlapping_and_dense(type_id);
2580 }
2581
2582 SymBool compute_is_non_overlapping_and_dense_anydim(
2583 identity<SymBool> type_id);
2584
2585 template <typename T>
2586 void _refresh_contiguous() {
2587 auto type_id = identity<T>();
2588 // Note:
2589 // Dim 0, 1, 2 will never be a channels last 2d/3d format
2590 // Dim 3+ is possibly be a channels last 2d format (Dim 4 only at this
2591 // point) Dim 4+ is possibly be a channels last 3d format (Dim 5 only at
2592 // this point)
2593 switch (dim()) {
2594 case 4: {
2595 _set_is_contiguous(type_id, compute_contiguous(type_id));
2596 _set_is_channels_last_contiguous(
2597 type_id, compute_channels_last_contiguous_2d(type_id));
2598 _set_is_channels_last_3d_contiguous(type_id, false);
2599 _set_is_channels_last(
2600 type_id, compute_strides_like_channels_last_2d(type_id));
2601 _set_is_channels_last_3d(type_id, false);
2602 _set_is_non_overlapping_and_dense(
2603 type_id, compute_is_non_overlapping_and_dense_dim4(type_id));
2604 break;
2605 }
2606 case 5: {
2607 _set_is_contiguous(type_id, compute_contiguous(type_id));
2608 _set_is_channels_last_contiguous(
2609 type_id, compute_channels_last_contiguous_2d(type_id));
2610 _set_is_channels_last_3d_contiguous(
2611 type_id, compute_channels_last_contiguous_3d_dim5(type_id));
2612 _set_is_channels_last(type_id, compute_channels_last_2d_dim5(type_id));
2613 _set_is_channels_last_3d(
2614 type_id, compute_channels_last_3d_dim5(type_id));
2615 _set_is_non_overlapping_and_dense(
2616 type_id, compute_is_non_overlapping_and_dense_dim5(type_id));
2617 break;
2618 }
2619 default:
2620 // is_channels_last_ and is_channels_last_3d_ are suggested
2621 // memory_format. Being channels_last_contiguous doesn't necessarily
2622 // mean the tensor is strided like channels_last: for strides on channel
2623 // dimension could suggest desired memory_layout, but it doesn't affect
2624 // memory storage
2625 _set_is_contiguous(type_id, compute_contiguous(type_id));
2626 _set_is_channels_last_contiguous(type_id, false);
2627 _set_is_channels_last_3d_contiguous(type_id, false);
2628 _set_is_channels_last(type_id, false);
2629 _set_is_channels_last_3d(type_id, false);
2630 _set_is_non_overlapping_and_dense(
2631 type_id, compute_is_non_overlapping_and_dense_anydim(type_id));
2632 break;
2633 }
2634 }
2635
2636 protected:
2637 /**
2638 * Recompute the cached contiguity of a tensor. Call this if you modify sizes
2639 * or strides.
2640 */
2641 void refresh_contiguous() {
2642 if (has_symbolic_sizes_strides_) {
2643 _refresh_contiguous<SymBool>();
2644 } else {
2645 _refresh_contiguous<bool>();
2646 }
2647 }
2648
2649 /**
2650 * Copy the tensor metadata fields (e.g. sizes / strides / storage pointer /
2651 * storage_offset) from one TensorImpl to another TensorImpl.
2652 *
2653 * For usage of `version_counter` and `allow_tensor_metadata_change`, see NOTE
2654 * [ TensorImpl Shallow-Copying ].
2655 */
2656 static void copy_tensor_metadata(
2657 const TensorImpl* src_impl,
2658 TensorImpl* dest_impl,
2659 const c10::VariableVersion& version_counter,
2660 bool allow_tensor_metadata_change);
2661
2662 /**
2663 * Copy the tensor metadata fields (e.g. sizes / strides / storage pointer /
2664 * storage_offset) from one TensorImpl to another TensorImpl.
2665 *
2666 * For usage of `version_counter` and `allow_tensor_metadata_change`, see NOTE
2667 * [ TensorImpl Shallow-Copying ].
2668 */
2669 static void copy_tensor_metadata(
2670 const TensorImpl* src_impl,
2671 TensorImpl* dest_impl,
2672 c10::VariableVersion&& version_counter,
2673 bool allow_tensor_metadata_change);
2674
2675 private:
2676 static void copy_tensor_metadata_except_version_counter(
2677 const TensorImpl* src_impl,
2678 TensorImpl* dest_impl,
2679 bool allow_tensor_metadata_change);
2680
2681 protected:
2682 // Error message to show when the user tries to change tensor metadata on
2683 // Tensor created from .data or .detach().
2684 //
2685 // See NOTE [ Metadata Change for a Detached Tensor ] for details.
2686 static const char* const err_msg_tensor_metadata_change_not_allowed;
2687
2688 static void copy_generic_tensor_metadata(
2689 const TensorImpl* src_impl,
2690 TensorImpl* dest_impl);
2691
2692 public:
2693 void set_storage_access_should_throw() {
2694 storage_access_should_throw_ = true;
2695 }
2696
2697 public:
2698 void set_custom_sizes_strides(SizesStridesPolicy policy) {
2699 custom_sizes_strides_ = static_cast<uint8_t>(policy);
2700 refresh_sizes_strides_policy();
2701 }
2702
2703 void set_python_custom_sizes_strides(SizesStridesPolicy policy) {
2704 python_custom_sizes_strides_ = static_cast<uint8_t>(policy);
2705 refresh_sizes_strides_policy();
2706 }
2707
2708 void set_custom_device(bool custom_device) {
2709 custom_device_ = custom_device;
2710 refresh_device_policy();
2711 }
2712
2713 void set_custom_layout(bool custom_layout) {
2714 custom_layout_ = custom_layout;
2715 refresh_layout_policy();
2716 }
2717
2718 void set_python_custom_device(bool custom_device) {
2719 python_custom_device_ = custom_device;
2720 refresh_device_policy();
2721 }
2722
2723 void set_python_custom_layout(bool custom_layout) {
2724 python_custom_layout_ = custom_layout;
2725 refresh_layout_policy();
2726 }
2727
2728 protected:
2729 void refresh_sizes_strides_policy() {
2730 if (has_symbolic_sizes_strides_) {
2731 sizes_strides_policy_ =
2732 static_cast<uint8_t>(SizesStridesPolicy::CustomSizes);
2733 } else {
2734 sizes_strides_policy_ =
2735 std::max(custom_sizes_strides_, python_custom_sizes_strides_);
2736 }
2737 }
2738
2739 void refresh_device_policy() {
2740 device_policy_ = custom_device_ || python_custom_device_;
2741 }
2742
2743 void refresh_layout_policy() {
2744 layout_policy_ = custom_layout_ || python_custom_layout_;
2745 }
2746
2747 protected:
2748 Storage storage_;
2749
2750 private:
2751 // This pointer points to an AutogradMeta struct that stores autograd-specific
2752 // fields (such as grad_ / grad_fn_ / grad_accumulator_). This pointer always
2753 // has unique ownership (meaning only one TensorImpl can own it at a time).
2754 //
2755 // autograd_meta_ can be nullptr, as an optimization. When this occurs, it is
2756 // equivalent to having an autograd_meta_ pointing to a default constructed
2757 // AutogradMeta; intuitively, tensors which don't require grad will have this
2758 // field set to null.
2759 //
2760 // This means accessors on autograd_meta_ have to be careful to test if they
2761 // got a nullptr, and handle default behavior appropriately in that case.
2762 //
2763 // Note that we don't enforce the invariant that if the AutogradMeta is
2764 // default constructed, it is nullptr (to do this, we'd have to continuously
2765 // check if an AutogradMeta became, by mutation, equal to the default
2766 // constructed form. (This might be useful, but it seems rare enough that
2767 // a requires_grad=True variable will turn back into the requires_grad=False
2768 // version.) So there are three representable states:
2769 //
2770 // 1. autograd_meta_ == nullptr
2771 // 2. autograd_meta_ is default constructed (semantically, same as (1))
2772 // 3. autograd_meta_ has nontrivial information content
2773 //
2774 std::unique_ptr<c10::AutogradMetaInterface> autograd_meta_ = nullptr;
2775
2776 protected:
2777 std::unique_ptr<c10::ExtraMeta> extra_meta_ = nullptr;
2778
2779 c10::VariableVersion version_counter_;
2780
2781 impl::PyObjectSlot pyobj_slot_;
2782
2783 c10::impl::SizesAndStrides sizes_and_strides_;
2784
2785 int64_t storage_offset_ = 0;
2786 // If sizes and strides are empty, the numel is 1!! However, most of the
2787 // time, we will immediately set sizes to {0} and reset numel to 0.
2788 // (Can't do that in the default initializers, because there's no way to
2789 // spell "allocate a one-element array" for strides_).
2790 int64_t numel_ = 1;
2791
2792 // INVARIANT: When storage is non-null, this type meta must
2793 // agree with the type meta in storage
2794 caffe2::TypeMeta data_type_;
2795
2796 // NOTE [c10::optional operator usage in CUDA]
2797 // Our optional definition doesn't compile in .cu file if `value()` or
2798 // `operator->` are used. Instead, we always use `operator*`.
2799 // See https://github.com/pytorch/pytorch/issues/18496 for more info.
2800 // If this is too burdensome to maintain, we can just
2801 // manually implement this with an additional bool.
2802
2803 // INVARIANT: When storage is non-null, this Device must
2804 // agree with the type meta in storage.
2805 //
2806 // INVARIANT: device_opt_ is only nullopt for undefined tensors
2807 // (which do not have a device.)
2808 c10::optional<c10::Device> device_opt_;
2809
2810 // default member initializers for bit-fields only available with -std=c++2a
2811 // or -std=gnu++2a
2812 inline void init_bitfields() {
2813 is_contiguous_ = true;
2814 is_channels_last_ = false;
2815 is_channels_last_contiguous_ = false;
2816 is_channels_last_3d_ = false;
2817 is_channels_last_3d_contiguous_ = false;
2818 is_non_overlapping_and_dense_ = true;
2819 is_wrapped_number_ = false;
2820 allow_tensor_metadata_change_ = true;
2821 reserved_ = false;
2822 sizes_strides_policy_ = static_cast<uint8_t>(SizesStridesPolicy::Default);
2823 custom_sizes_strides_ = static_cast<uint8_t>(SizesStridesPolicy::Default);
2824 python_custom_sizes_strides_ =
2825 static_cast<uint8_t>(SizesStridesPolicy::Default);
2826 python_custom_device_ = false;
2827 python_custom_layout_ = false;
2828 custom_device_ = false;
2829 custom_layout_ = false;
2830 device_policy_ = false;
2831 layout_policy_ = false;
2832 storage_access_should_throw_ = false;
2833 has_symbolic_sizes_strides_ = false;
2834 }
2835
2836 // Tensor is contiguous
2837 bool is_contiguous_ : 1;
2838
2839 // Tensor is a subclass that does not permit storage access.
2840 bool storage_access_should_throw_ : 1;
2841
2842 // Tensor is stored in the channels last 2d memory format, when dimensions
2843 // order is (N)CHW and C-strides < W-strides < H-strides (< N-strides)
2844 // (If size of any dimension is equal to 1, this dimension strides value
2845 // is not taken into account).
2846 bool is_channels_last_ : 1;
2847
2848 // Channels last contiguous tensor is channel last tensor which occupies
2849 // contiguous memory block.
2850 bool is_channels_last_contiguous_ : 1;
2851
2852 // Tensor is stored in the channels last 3d memory format, when dimensions
2853 // order is (N)CDHW and C-strides < W-strides < H-strides < D - strides (<
2854 // N-strides) (If size of any dimension is equal to 1, this dimension strides
2855 // value is not taken into account).
2856 bool is_channels_last_3d_ : 1;
2857
2858 // Channels last 3d contiguous tensor is channel last 3d tensor which occupies
2859 // contiguous memory block.
2860 bool is_channels_last_3d_contiguous_ : 1;
2861
2862 // Dense tensor is the tensor that store values in a contiguous block of
2863 // memory. Non-overlapping tensor is the tensor in which elements occupy
2864 // individual non-repetitive memory.
2865 bool is_non_overlapping_and_dense_ : 1;
2866
2867 bool is_wrapped_number_ : 1;
2868
2869 // NOTE [ Metadata Change for a Detached Tensor ]
2870 //
2871 // Normally, a user is allowed to change the tensor metadata
2872 // (e.g. sizes / strides / storage / storage_offset) of a tensor.
2873 // However, if the tensor is created by `t1_detached = t1.data` in Python
2874 // or `t1_detached = t1.detach()` in Python/C++, those changes to the
2875 // tensor metadata of `t1_detached` will not be propagated back to the
2876 // original tensor `t1`. In order to make such changes explicitly illegal,
2877 // we created the `allow_tensor_metadata_change_` flag, to prevent users
2878 // from changing metadata of the detached tensor and expecting the original
2879 // tensor to also be updated.
2880 //
2881 // NOTE: For a full list of tensor metadata fields, please see
2882 // `copy_tensor_metadata()` in TensorImpl and its subclasses to find
2883 // which fields are copied by value.
2884 bool allow_tensor_metadata_change_ : 1;
2885
2886 // we decide to keep reserved_ and it will
2887 // live in Tensor after the split
2888 // The logic is that if Extend() or ReserveSpace() were ever called,
2889 // then subsequent Resize()s will not free up Storage.
2890 bool reserved_ : 1;
2891
2892 // Call _custom() virtual methods for
2893 // strides()/is_contiguous()/sizes()/dim()/numel()
2894 // This is a combination of sizes_strides_custom_dispatch_
2895 // and has_symbolic_sizes_strides_
2896 uint8_t sizes_strides_policy_ : 2;
2897
2898 // Whether or not sizes_and_strides_ contains a symbolic value.
2899 bool has_symbolic_sizes_strides_ : 1;
2900
2901 // Call _custom() virtual method for
2902 // strides()/is_contiguous()/sizes()/dim()/numel()
2903 uint8_t custom_sizes_strides_ : 2;
2904
2905 // Combo of custom_ and python_custom_
2906 bool device_policy_ : 1;
2907 bool layout_policy_ : 1;
2908
2909 // Call _custom() virtual method for device()
2910 bool custom_device_ : 1;
2911
2912 // Call _custom() virtual method for layout()
2913 bool custom_layout_ : 1;
2914
2915 // Call into Python for
2916 // strides()/is_contiguous()/sizes()/dim()/numel()
2917 uint8_t python_custom_sizes_strides_ : 2;
2918
2919 // Call into Python for device()
2920 bool python_custom_device_ : 1;
2921
2922 // Call into Python for layout()
2923 bool python_custom_layout_ : 1;
2924
2925 // The set of DispatchKeys which describe this tensor. NB: this
2926 // does NOT include Autograd (historically, it did, but
2927 // not anymore!)
2928 //
2929 // INVARIANT: extra_meta_->named_tensor_meta_ != nullptr <==>
2930 // key_set_.has(DispatchKey::Named)
2931 DispatchKeySet key_set_;
2932
2933 private:
2934 // C10_TensorImpl_Size_Check_Dummy_Class needs to be friends with
2935 // TensorImpl so it can inspect the size of private fields
2936 template <
2937 size_t cplusplus,
2938 size_t clang_ver_major,
2939 size_t gcc_ver,
2940 size_t gcc_ver_minor,
2941 size_t nvcc,
2942 size_t cuda_version,
2943 size_t cuda_version_major,
2944 size_t ptr_size>
2945 friend class C10_TensorImpl_Size_Check_Dummy_Class;
2946};
2947
2948// Note [TensorImpl size constraints]
2949// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2950// Changed the size of TensorImpl? If the size went down, good for
2951// you! Adjust the documentation below and the expected size.
2952// Did it go up? Read on...
2953//
2954// Struct size matters. In some production systems at Facebook, we have
2955// 400M live tensors during a training run. Do the math: every 64-bit
2956// word you add to Tensor is an extra 3.2 gigabytes in RAM.
2957//
2958// If you are a Facebook employee, you can check if the run in question
2959// has tipped you over the point using the command here:
2960// https://fburl.com/q5enpv98
2961//
2962// For reference, we OOMed at 160 bytes (20 words) per TensorImpl.
2963// This is not counting overhead from strides out-of-line allocation and
2964// StorageImpl space and this is from before we inlined sizes and strides
2965// directly into TensorImpl as SmallVectors.
2966//
2967// Our memory usage on 32-bit systems is suboptimal, but we're not checking
2968// for it at the moment (to help avoid rage inducing cycles when the
2969// 32-bit number is wrong).
2970//
2971// Current breakdown:
2972//
2973// vtable pointer
2974// strong refcount TODO: pack these into one word
2975// weak refcount
2976// storage pointer
2977// autograd metadata pointer
2978// named tensor metadata pointer
2979// version counter pointer
2980// PyObjectSlot
2981// SizesAndStrides size/pointer
2982// SizesAndStrides sizes (pre-allocated 0)
2983// SizesAndStrides sizes (pre-allocated 1)
2984// SizesAndStrides sizes (pre-allocated 2)
2985// SizesAndStrides sizes (pre-allocated 3)
2986// SizesAndStrides sizes (pre-allocated 4)
2987// SizesAndStrides strides (pre-allocated 0)
2988// SizesAndStrides strides (pre-allocated 1)
2989// SizesAndStrides strides (pre-allocated 2)
2990// SizesAndStrides strides (pre-allocated 3)
2991// SizesAndStrides strides (pre-allocated 4)
2992// storage offset
2993// numel
2994// data type, device, is_contiguous, storage_access_should_throw_, bitfields
2995// DispatchKeySet
2996//
2997
2998// Various preprocessor macros we use to check that the
2999// TensorImpl size hasn't changed unexpectedly. We undef
3000// these later.
3001#ifndef __NVCC__
3002#define C10_NVCC 0
3003#else
3004#define C10_NVCC __NVCC__
3005#endif
3006
3007#ifndef __CUDA_VER_MAJOR__
3008#define C10_CUDA_VERSION_MAJOR 0
3009#else
3010#define C10_CUDA_VERSION_MAJOR __CUDA_VER_MAJOR__
3011#endif
3012
3013#ifndef CUDA_VERSION
3014#define C10_CUDA_VERSION 0
3015#else
3016#define C10_CUDA_VERSION CUDA_VERSION
3017#endif
3018
3019#ifndef __clang_major__
3020#define C10_CLANG_MAJOR_VERSION 0
3021#else
3022#define C10_CLANG_MAJOR_VERSION __clang_major__
3023#endif
3024
3025#ifndef __GNUC__
3026#define C10_GCC_VERSION 0
3027#else
3028#define C10_GCC_VERSION __GNUC__
3029#endif
3030
3031#ifndef __GNUC_MINOR__
3032#define C10_GCC_VERSION_MINOR 0
3033#else
3034#define C10_GCC_VERSION_MINOR __GNUC_MINOR__
3035#endif
3036
3037// We use a templatized class to both contain the logic of checking the sizes
3038// as well as to provide compile-time information that might be useful in
3039// figuring out why sizes may have changed.
3040// All the compile time information is given by the template fields that are
3041// always printed by the compiler when the static_assert fails.
3042template <
3043 size_t cplusplus = __cplusplus,
3044 size_t clang_ver_major = C10_CLANG_MAJOR_VERSION,
3045 size_t gcc_ver = C10_GCC_VERSION,
3046 size_t gcc_ver_minor = C10_GCC_VERSION_MINOR,
3047 size_t nvcc = C10_NVCC,
3048 size_t cuda_version = C10_CUDA_VERSION,
3049 size_t cuda_version_major = C10_CUDA_VERSION_MAJOR,
3050 size_t ptr_size = sizeof(void*)>
3051class C10_TensorImpl_Size_Check_Dummy_Class : private TensorImpl {
3052 // Names of (non-bitfield) fields in TensorImpl; used to provide
3053 // compile-time info about fields whose size changes unexpectedly.
3054 enum class FieldNameEnum {
3055 storage_,
3056 autograd_meta_,
3057 extra_meta_,
3058 version_counter_,
3059 pyobj_slot_,
3060 sizes_and_strides_,
3061 storage_offset_,
3062 numel_,
3063 data_type_,
3064 device_opt_,
3065 key_set_,
3066 TOTAL_SIZE
3067 };
3068
3069 // Provides compile-time equality check that reveals what numbers
3070 // were used and on which quantity
3071 template <size_t Actual, size_t Expected, FieldNameEnum FiledName>
3072 constexpr static bool are_equal() {
3073 static_assert(
3074 Actual == Expected,
3075 "Actual and Expected sizes of a field did not match!");
3076 return true;
3077 }
3078
3079 // Provides compile-time <= check that reveals what numbers
3080 // were used and on which quantity
3081 template <size_t Actual, size_t Expected, FieldNameEnum FiledName>
3082 constexpr static bool is_le() {
3083 static_assert(
3084 Actual <= Expected,
3085 "Actual and Expected sizes of a field did not match!");
3086 return true;
3087 }
3088
3089 public:
3090 // Compile-time check that TensorImpl field sizes are as expected
3091 //
3092 // Observed total sizes and associated versions
3093 // If you find a flag that predicts when unique_ptr has 16 bytes
3094 // on 64-bit systems or when sizes_and_strides_ is 84 vs 88 bytes
3095 // on 32-bit systems you get a cookie!
3096 // Length | LLVM | GCC | C++ | CUDA
3097 // 192 | ? | 11.2 | 201703 | 11040
3098 // 208 | ? | 11.2 | 201703 | 11040
3099 // 208 | ? | 11.2 | 201402 | 11040
3100 // 192 | ? | 11.2 | 201402 | 11040
3101 // 160 | 12 | 4.2 | 201703 | 0
3102 //
3103 // To keep things clean, we split on systems here.
3104
3105#if UINTPTR_MAX == 0xFFFFFFFF
3106 // This is a 32-bit system
3107 static constexpr bool check_sizes() {
3108 constexpr size_t tsize = 20 * sizeof(int64_t);
3109
3110 // clang-format off
3111 are_equal<sizeof(storage_), 4, FieldNameEnum::storage_>();
3112 are_equal<sizeof(autograd_meta_), 4, FieldNameEnum::autograd_meta_>();
3113 are_equal<sizeof(extra_meta_), 4, FieldNameEnum::extra_meta_>();
3114 are_equal<sizeof(version_counter_), 4, FieldNameEnum::version_counter_>();
3115 are_equal<sizeof(pyobj_slot_), 8, FieldNameEnum::pyobj_slot_>();
3116 is_le<sizeof(sizes_and_strides_), 88, FieldNameEnum::sizes_and_strides_>();
3117 are_equal<sizeof(storage_offset_), 8, FieldNameEnum::storage_offset_>();
3118 are_equal<sizeof(numel_), 8, FieldNameEnum::numel_>();
3119 are_equal<sizeof(data_type_), 2, FieldNameEnum::data_type_>();
3120 are_equal<sizeof(device_opt_), 3, FieldNameEnum::device_opt_>();
3121 are_equal<sizeof(key_set_), 8, FieldNameEnum::key_set_>();
3122 is_le<sizeof(TensorImpl), tsize, FieldNameEnum::TOTAL_SIZE>();
3123 // clang-format on
3124
3125 return true;
3126 }
3127#else
3128 // This is a 64-bit system
3129 static constexpr bool check_sizes() {
3130 constexpr size_t tsize = 26 * sizeof(int64_t);
3131
3132 // clang-format off
3133 are_equal<sizeof(storage_), 8, FieldNameEnum::storage_>();
3134 // On some systems involving NVCC the size of unique_ptr is 16 bytes. We haven't
3135 // figured out how to detect those via macro preprocessors yet, so we use <=
3136 // comparisons for the relevant fields.
3137 is_le<sizeof(autograd_meta_), 16, FieldNameEnum::autograd_meta_>();
3138 is_le<sizeof(extra_meta_), 16, FieldNameEnum::extra_meta_>();
3139 are_equal<sizeof(version_counter_), 8, FieldNameEnum::version_counter_>();
3140 are_equal<sizeof(pyobj_slot_), 16, FieldNameEnum::pyobj_slot_>();
3141 are_equal<sizeof(sizes_and_strides_), 88, FieldNameEnum::sizes_and_strides_>();
3142 are_equal<sizeof(storage_offset_), 8, FieldNameEnum::storage_offset_>();
3143 are_equal<sizeof(numel_), 8, FieldNameEnum::numel_>();
3144 are_equal<sizeof(data_type_), 2, FieldNameEnum::data_type_>();
3145 are_equal<sizeof(device_opt_), 3, FieldNameEnum::device_opt_>();
3146 are_equal<sizeof(key_set_), 8, FieldNameEnum::key_set_>();
3147 is_le<sizeof(TensorImpl), tsize, FieldNameEnum::TOTAL_SIZE>();
3148 // clang-format on
3149
3150 return true;
3151 }
3152#endif
3153};
3154
3155// We use a class to encapsulate size-checking logic with
3156// templates to capture sizes and flags. We call this within
3157// a static assert to prove there is no run-time behaviour.
3158// Since the methods we call return either true or fail their
3159// own static_asserts, we should never see the error messages
3160// below. We have to provide it though for c++ <17.
3161static_assert(
3162 C10_TensorImpl_Size_Check_Dummy_Class<>::check_sizes(),
3163 "You should not see this message.");
3164
3165// Clean up after ourselves
3166#undef C10_NVCC
3167#undef C10_CUDA_VERSION_MAJOR
3168#undef C10_CUDA_VERSION
3169#undef C10_CLANG_MAJOR_VERSION
3170#undef C10_GCC_VERSION
3171#undef C10_GCC_VERSION_MINOR
3172
3173} // namespace c10
3174
3175C10_CLANG_DIAGNOSTIC_POP()
3176