RegisterNestedTensorCPU.cpp source code [pytorch/build/aten/src/ATen/RegisterNestedTensorCPU.cpp]

1	// required for old g++ to compile PRId64 macros, see
2	// https://github.com/pytorch/pytorch/issues/3571
3	// for context
4	#ifndef __STDC_FORMAT_MACROS
5	#define __STDC_FORMAT_MACROS
6	#endif
7
8	// an external backend might generate file within its code tree
9	// and check all the source files within the tree with clang-format.
10	// so, disable it since the backend might have a different config.
11	// clang-format off
12
13	// NOTE: This condition is true for all PyTorch internal libraries, it
14	// just excludes external projects such as torch_xla which
15	// re-use some of the PyTorch codegen machinery.
16	#if defined(CAFFE2_BUILD_MAIN_LIB) \|\| \
17	defined(TORCH_CUDA_BUILD_MAIN_LIB) \|\| \
18	defined(TORCH_HIP_BUILD_MAIN_LIB) \|\| \
19	defined(TORCH_CUDA_CU_BUILD_MAIN_LIB) \|\| \
20	defined(TORCH_CUDA_CPP_BUILD_MAIN_LIB)
21	#define TORCH_ASSERT_ONLY_METHOD_OPERATORS
22	#endif
23
24	// @generated by torchgen/gen.py from RegisterDispatchKey.cpp
25
26	#include <c10/core/TensorImpl.h>
27	#include <c10/core/Allocator.h>
28	#include <ATen/DeviceGuard.h>
29	#include <ATen/NamedTensorUtils.h>
30	#include <ATen/Utils.h>
31	#include <ATen/WrapDimUtils.h>
32	#include <ATen/Dispatch.h>
33	#include <c10/util/ExclusivelyOwned.h>
34	#include <c10/util/Half.h>
35	#include <c10/core/UndefinedTensorImpl.h>
36	#include <c10/util/Optional.h>
37	#include <ATen/Tensor.h>
38	#include <ATen/native/Resize.h>
39
40	#include <cstddef>
41	#include <functional>
42	#include <memory>
43	#include <utility>
44
45	#include <ATen/Config.h>
46	#include <ATen/core/op_registration/adaption.h>
47	#include <torch/library.h>
48
49
50	#include <ATen/ops/as_strided_native.h>
51	#include <ATen/ops/empty.h>
52	#include <ATen/ops/empty_strided.h>
53	#include <ATen/ops/_copy_from_and_resize.h>
54	#include <ATen/ops/_copy_from.h>
55	#include <ATen/ops/_fused_sdp_choice_native.h>
56	#include <ATen/ops/_native_decoder_only_multi_head_attention_native.h>
57	#include <ATen/ops/_native_multi_head_attention_native.h>
58	#include <ATen/ops/_nested_from_padded_and_nested_example_native.h>
59	#include <ATen/ops/_nested_select_backward_native.h>
60	#include <ATen/ops/_nested_sum_backward_native.h>
61	#include <ATen/ops/_nested_tensor_offsets_native.h>
62	#include <ATen/ops/_nested_tensor_size_native.h>
63	#include <ATen/ops/_nested_tensor_softmax_with_shape_native.h>
64	#include <ATen/ops/_nested_tensor_strides_native.h>
65	#include <ATen/ops/_softmax_backward_data_native.h>
66	#include <ATen/ops/_softmax_native.h>
67	#include <ATen/ops/_test_autograd_multiple_dispatch_native.h>
68	#include <ATen/ops/_to_copy_native.h>
69	#include <ATen/ops/_transform_bias_rescale_qkv_native.h>
70	#include <ATen/ops/_transformer_decoder_only_layer_fwd_native.h>
71	#include <ATen/ops/_transformer_encoder_layer_fwd_native.h>
72	#include <ATen/ops/add_native.h>
73	#include <ATen/ops/bmm_native.h>
74	#include <ATen/ops/chunk_native.h>
75	#include <ATen/ops/clone_native.h>
76	#include <ATen/ops/copy_native.h>
77	#include <ATen/ops/detach_native.h>
78	#include <ATen/ops/div_native.h>
79	#include <ATen/ops/embedding_native.h>
80	#include <ATen/ops/empty_like_native.h>
81	#include <ATen/ops/fill_native.h>
82	#include <ATen/ops/gelu_native.h>
83	#include <ATen/ops/is_same_size_native.h>
84	#include <ATen/ops/linear_backward_native.h>
85	#include <ATen/ops/linear_native.h>
86	#include <ATen/ops/matmul_backward_native.h>
87	#include <ATen/ops/matmul_native.h>
88	#include <ATen/ops/mul_native.h>
89	#include <ATen/ops/native_dropout_backward_native.h>
90	#include <ATen/ops/native_dropout_native.h>
91	#include <ATen/ops/native_layer_norm_native.h>
92	#include <ATen/ops/neg_native.h>
93	#include <ATen/ops/ones_like_native.h>
94	#include <ATen/ops/relu_native.h>
95	#include <ATen/ops/select_native.h>
96	#include <ATen/ops/squeeze_native.h>
97	#include <ATen/ops/sum_native.h>
98	#include <ATen/ops/tanh_native.h>
99	#include <ATen/ops/to_padded_tensor_native.h>
100	#include <ATen/ops/transpose_native.h>
101	#include <ATen/ops/unsqueeze_native.h>
102	#include <ATen/ops/values_native.h>
103	#include <ATen/ops/view_native.h>
104
105	// See template file RegisterDispatchDefinitions.ini
106	namespace at {
107	// NB: TORCH_LIBRARY_IMPL must be in an anonymous namespace to avoid
108	// ambiguity with conflicting identifiers that may have been defined in
109	// at namespace already.
110	namespace {
111	void resize_out(const Tensor &out, IntArrayRef sizes, IntArrayRef strides, const TensorOptions &options) {
112	TORCH_CHECK(options.dtype() == out.dtype(),
113	"Expected out tensor to have dtype ", options.dtype(), ", but got ", out.dtype(), " instead");
114	TORCH_CHECK(options.device() == out.device(),
115	"Expected out tensor to have device ", options.device(), ", but got ", out.device(), " instead");
116	const bool resized = at::native::resize_output(out, sizes);
117	// Only restride if a resize occurred; otherwise we ignore the (advisory)
118	// strides from the meta function and directly use the output tensor's
119	// preexisting strides
120	if (resized) {
121	if (!strides.empty()) {
122	TORCH_INTERNAL_ASSERT(!options.memory_format_opt().has_value());
123	// TODO: avoid the redispatch here
124	out.as_strided_(sizes, strides);
125	} else if (options.memory_format_opt().has_value()) {
126	out.unsafeGetTensorImpl()->empty_tensor_restride(*options.memory_format_opt());
127	}
128	}
129	}
130	void check_inplace(const Tensor &self, IntArrayRef sizes, const TensorOptions &options) {
131	// These checks are needed on those operators that:
132	// 1) don't use 'TensorIterator' (e.g. 'addmm' and 'baddbmm')
133	// 2) have particular typing rules (e.g. 'cumsum' and 'cumprod')
134	// For other operators (e.g. 'add'), 'TensorIterator' already checks
135	// these things separately.
136	TORCH_CHECK(options.dtype() == self.dtype(),
137	"Bad in-place call: ",
138	"input tensor dtype ", self.dtype(), " and output tensor dtype ", options.dtype(), " should match");
139	TORCH_CHECK(options.device() == self.device(),
140	"Bad in-place call: ",
141	"input tensor device ", self.device(), " and output tensor device ", options.device(), " should match");
142	TORCH_CHECK(sizes == self.sizes(),
143	"Bad in-place call: ",
144	"input tensor size ", self.sizes(), " and output tensor size ", sizes, " should match");
145	}
146	namespace {
147	::std::tuple<at::Tensor,at::Tensor> wrapper_NestedTensorCPU__native_dropout(const at::Tensor & input, double p, c10::optional<bool> train) {
148	// No device check
149	// DeviceGuard omitted
150	return at::native::native_dropout_nested(input, p, train);
151	}
152	} // anonymous namespace
153	namespace {
154	at::Tensor wrapper_NestedTensorCPU__native_dropout_backward(const at::Tensor & grad_output, const at::Tensor & mask, double scale) {
155	// No device check
156	// DeviceGuard omitted
157	return at::native::native_dropout_backward(grad_output, mask, scale);
158	}
159	} // anonymous namespace
160	namespace {
161	at::Tensor wrapper_NestedTensorCPU_Tensor_add(const at::Tensor & self, const at::Tensor & other, const at::Scalar & alpha) {
162	// No device check
163	// DeviceGuard omitted
164	return at::native::NestedTensor_add_Tensor(self, other, alpha);
165	}
166	} // anonymous namespace
167	namespace {
168	at::Tensor & wrapper_NestedTensorCPU_Tensor_add_(at::Tensor & self, const at::Tensor & other, const at::Scalar & alpha) {
169	// No device check
170	// DeviceGuard omitted
171	return at::native::NestedTensor_add__Tensor(self, other, alpha);
172	}
173	} // anonymous namespace
174	namespace {
175	at::Tensor wrapper_NestedTensorCPU__bmm(const at::Tensor & self, const at::Tensor & mat2) {
176	// No device check
177	// DeviceGuard omitted
178	return at::native::bmm_nested(self, mat2);
179	}
180	} // anonymous namespace
181	namespace {
182	::std::vector<at::Tensor> wrapper_NestedTensorCPU__chunk(const at::Tensor & self, int64_t chunks, int64_t dim) {
183	// No device check
184	// DeviceGuard omitted
185	return at::native::chunk_nested_tensor(self, chunks, dim);
186	}
187	} // anonymous namespace
188	namespace {
189	at::Tensor & wrapper_NestedTensorCPU__copy_(at::Tensor & self, const at::Tensor & src, bool non_blocking) {
190	// No device check
191	// DeviceGuard omitted
192	return at::native::copy_nested_(self, src, non_blocking);
193	}
194	} // anonymous namespace
195	namespace {
196	at::Tensor wrapper_NestedTensorCPU_Tensor_div(const at::Tensor & self, const at::Tensor & other) {
197	// No device check
198	// DeviceGuard omitted
199	return at::native::NestedTensor_div_Tensor(self, other);
200	}
201	} // anonymous namespace
202	namespace {
203	at::Tensor wrapper_NestedTensorCPU_Scalar_div(const at::Tensor & self, const at::Scalar & other) {
204	// No device check
205	// DeviceGuard omitted
206	return at::native::NestedTensor_div_Scalar(self, other);
207	}
208	} // anonymous namespace
209	namespace {
210	at::Tensor wrapper_NestedTensorCPU__embedding(const at::Tensor & weight, const at::Tensor & indices, c10::SymInt padding_idx, bool scale_grad_by_freq, bool sparse) {
211	// No device check
212	// DeviceGuard omitted
213	return at::native::NestedTensor_embedding(weight, indices, padding_idx.expect_int(), scale_grad_by_freq, sparse);
214	}
215	} // anonymous namespace
216	namespace {
217	at::Tensor wrapper_NestedTensorCPU__empty_like(const at::Tensor & self, c10::optional<at::ScalarType> dtype, c10::optional<at::Layout> layout, c10::optional<at::Device> device, c10::optional<bool> pin_memory, c10::optional<at::MemoryFormat> memory_format) {
218	// No device check
219	// DeviceGuard omitted
220	return at::native::empty_like_nested(self, dtype, layout, device, pin_memory, memory_format);
221	}
222	} // anonymous namespace
223	namespace {
224	at::Tensor & wrapper_NestedTensorCPU_Scalar_fill_(at::Tensor & self, const at::Scalar & value) {
225	// No device check
226	// DeviceGuard omitted
227	return at::native::fill_nested_(self, value);
228	}
229	} // anonymous namespace
230	namespace {
231	at::Tensor & wrapper_NestedTensorCPU_Tensor_fill_(at::Tensor & self, const at::Tensor & value) {
232	// No device check
233	// DeviceGuard omitted
234	return at::native::fill_nested_(self, value);
235	}
236	} // anonymous namespace
237	namespace {
238	bool wrapper_NestedTensorCPU__is_same_size(const at::Tensor & self, const at::Tensor & other) {
239	// No device check
240	// DeviceGuard omitted
241	return at::native::nested_is_same_size(self, other);
242	}
243	} // anonymous namespace
244	namespace {
245	::std::tuple<at::Tensor,at::Tensor,at::Tensor> wrapper_NestedTensorCPU__native_layer_norm(const at::Tensor & input, c10::SymIntArrayRef normalized_shape, const c10::optional<at::Tensor> & weight, const c10::optional<at::Tensor> & bias, double eps) {
246	// No device check
247	// DeviceGuard omitted
248	return at::native::nested_layer_norm(input, C10_AS_INTARRAYREF_SLOW(normalized_shape), weight, bias, eps);
249	}
250	} // anonymous namespace
251	namespace {
252	at::Tensor wrapper_NestedTensorCPU__linear(const at::Tensor & input, const at::Tensor & weight, const c10::optional<at::Tensor> & bias) {
253	// No device check
254	// DeviceGuard omitted
255	return at::native::nested_linear(input, weight, bias);
256	}
257	} // anonymous namespace
258	namespace {
259	::std::tuple<at::Tensor,at::Tensor,at::Tensor> wrapper_NestedTensorCPU__linear_backward(const at::Tensor & self, const at::Tensor & grad_output, const at::Tensor & weight, ::std::array<bool,`3`> output_mask) {
260	// No device check
261	// DeviceGuard omitted
262	return at::native::nested_linear_backward(self, grad_output, weight, output_mask);
263	}
264	} // anonymous namespace
265	namespace {
266	at::Tensor wrapper_NestedTensorCPU__matmul(const at::Tensor & self, const at::Tensor & other) {
267	// No device check
268	// DeviceGuard omitted
269	return at::native::matmul_nested(self, other);
270	}
271	} // anonymous namespace
272	namespace {
273	at::Tensor & wrapper_NestedTensorCPU_out_matmul_out(const at::Tensor & self, const at::Tensor & other, at::Tensor & out) {
274	// No device check
275	// DeviceGuard omitted
276	return at::native::matmul_out_nested(self, other, out);
277	}
278	} // anonymous namespace
279	namespace {
280	::std::tuple<at::Tensor,at::Tensor> wrapper_NestedTensorCPU__matmul_backward(const at::Tensor & grad, const at::Tensor & self, const at::Tensor & other, ::std::array<bool,`2`> mask) {
281	// No device check
282	// DeviceGuard omitted
283	return at::native::matmul_backward_nested(grad, self, other, mask);
284	}
285	} // anonymous namespace
286	namespace {
287	at::Tensor wrapper_NestedTensorCPU_Tensor_mul(const at::Tensor & self, const at::Tensor & other) {
288	// No device check
289	// DeviceGuard omitted
290	return at::native::NestedTensor_mul_Tensor(self, other);
291	}
292	} // anonymous namespace
293	namespace {
294	at::Tensor & wrapper_NestedTensorCPU_Tensor_mul_(at::Tensor & self, const at::Tensor & other) {
295	// No device check
296	// DeviceGuard omitted
297	return at::native::NestedTensor_mul__Tensor(self, other);
298	}
299	} // anonymous namespace
300	namespace {
301	at::Tensor wrapper_NestedTensorCPU_Scalar_mul(const at::Tensor & self, const at::Scalar & other) {
302	// No device check
303	// DeviceGuard omitted
304	return at::native::NestedTensor_mul_Scalar(self, other);
305	}
306	} // anonymous namespace
307	namespace {
308	at::Tensor & wrapper_NestedTensorCPU_Scalar_mul_(at::Tensor & self, const at::Scalar & other) {
309	// No device check
310	// DeviceGuard omitted
311	return at::native::NestedTensor_mul__Scalar(self, other);
312	}
313	} // anonymous namespace
314	namespace {
315	at::Tensor wrapper_NestedTensorCPU__ones_like(const at::Tensor & self, c10::optional<at::ScalarType> dtype, c10::optional<at::Layout> layout, c10::optional<at::Device> device, c10::optional<bool> pin_memory, c10::optional<at::MemoryFormat> memory_format) {
316	// No device check
317	// DeviceGuard omitted
318	return at::native::ones_like(self, dtype, layout, device, pin_memory, memory_format);
319	}
320	} // anonymous namespace
321	namespace {
322	at::Tensor wrapper_NestedTensorCPU__neg(const at::Tensor & self) {
323	// No device check
324	// DeviceGuard omitted
325	return at::native::NestedTensor_neg(self);
326	}
327	} // anonymous namespace
328	namespace {
329	at::Tensor & wrapper_NestedTensorCPU__neg_(at::Tensor & self) {
330	// No device check
331	// DeviceGuard omitted
332	return at::native::NestedTensor_neg_(self);
333	}
334	} // anonymous namespace
335	namespace {
336	at::Tensor wrapper_NestedTensorCPU__relu(const at::Tensor & self) {
337	// No device check
338	// DeviceGuard omitted
339	return at::native::NestedTensor_relu(self);
340	}
341	} // anonymous namespace
342	namespace {
343	at::Tensor & wrapper_NestedTensorCPU__relu_(at::Tensor & self) {
344	// No device check
345	// DeviceGuard omitted
346	return at::native::NestedTensor_relu_(self);
347	}
348	} // anonymous namespace
349	namespace {
350	at::Tensor wrapper_NestedTensorCPU__gelu(const at::Tensor & self, c10::string_view approximate) {
351	// No device check
352	// DeviceGuard omitted
353	return at::native::NestedTensor_gelu(self, approximate);
354	}
355	} // anonymous namespace
356	namespace {
357	at::Tensor & wrapper_NestedTensorCPU__gelu_(at::Tensor & self, c10::string_view approximate) {
358	// No device check
359	// DeviceGuard omitted
360	return at::native::NestedTensor_gelu_(self, approximate);
361	}
362	} // anonymous namespace
363	namespace {
364	at::Tensor wrapper_NestedTensorCPU_int_select(const at::Tensor & self, int64_t dim, c10::SymInt index) {
365	// No device check
366	// DeviceGuard omitted
367	return at::native::select_nested(self, dim, index.expect_int());
368	}
369	} // anonymous namespace
370	namespace {
371	at::Tensor wrapper_NestedTensorCPU___nested_select_backward(const at::Tensor & grad_output, const at::Tensor & self, int64_t dim, c10::SymInt index) {
372	// No device check
373	// DeviceGuard omitted
374	return at::native::_nested_select_backward_symint(grad_output, self, dim, index);
375	}
376	} // anonymous namespace
377	namespace {
378	at::Tensor wrapper_NestedTensorCPU__detach(const at::Tensor & self) {
379	// No device check
380	// DeviceGuard omitted
381	return at::native::detach(self);
382	}
383	} // anonymous namespace
384	namespace {
385	at::Tensor wrapper_NestedTensorCPU___softmax(const at::Tensor & self, int64_t dim, bool half_to_float) {
386	// No device check
387	// DeviceGuard omitted
388	return at::native::softmax_nested(self, dim, half_to_float);
389	}
390	} // anonymous namespace
391	namespace {
392	at::Tensor wrapper_NestedTensorCPU___softmax_backward_data(const at::Tensor & grad_output, const at::Tensor & output, int64_t dim, at::ScalarType input_dtype) {
393	// No device check
394	// DeviceGuard omitted
395	return at::native::nested_softmax_backward(grad_output, output, dim, input_dtype);
396	}
397	} // anonymous namespace
398	namespace {
399	at::Tensor wrapper_NestedTensorCPU__squeeze(const at::Tensor & self) {
400	// No device check
401	// DeviceGuard omitted
402	return at::native::squeeze_nested(self);
403	}
404	} // anonymous namespace
405	namespace {
406	at::Tensor wrapper_NestedTensorCPU_dim_squeeze(const at::Tensor & self, int64_t dim) {
407	// No device check
408	// DeviceGuard omitted
409	return at::native::squeeze_dim_nested(self, dim);
410	}
411	} // anonymous namespace
412	namespace {
413	at::Tensor wrapper_NestedTensorCPU_dims_squeeze(const at::Tensor & self, at::IntArrayRef dim) {
414	// No device check
415	// DeviceGuard omitted
416	return at::native::squeeze_dim_nested(self, dim);
417	}
418	} // anonymous namespace
419	namespace {
420	at::Tensor wrapper_NestedTensorCPU_dim_IntList_sum(const at::Tensor & self, at::OptionalIntArrayRef dim, bool keepdim, c10::optional<at::ScalarType> dtype) {
421	// No device check
422	// DeviceGuard omitted
423	return at::native::NestedTensor_sum_dim_CPU(self, dim, keepdim, dtype);
424	}
425	} // anonymous namespace
426	namespace {
427	at::Tensor wrapper_NestedTensorCPU___nested_sum_backward(const at::Tensor & grad, const at::Tensor & self, at::OptionalIntArrayRef dim, bool keepdim) {
428	// No device check
429	// DeviceGuard omitted
430	return at::native::_nested_sum_backward_cpu(grad, self, dim, keepdim);
431	}
432	} // anonymous namespace
433	namespace {
434	at::Tensor wrapper_NestedTensorCPU__tanh(const at::Tensor & self) {
435	// No device check
436	// DeviceGuard omitted
437	return at::native::NestedTensor_tanh(self);
438	}
439	} // anonymous namespace
440	namespace {
441	at::Tensor & wrapper_NestedTensorCPU__tanh_(at::Tensor & self) {
442	// No device check
443	// DeviceGuard omitted
444	return at::native::NestedTensor_tanh_(self);
445	}
446	} // anonymous namespace
447	namespace {
448	at::Tensor wrapper_NestedTensorCPU_int_transpose(const at::Tensor & self, int64_t dim0, int64_t dim1) {
449	// No device check
450	// DeviceGuard omitted
451	return at::native::transpose_nested(self, dim0, dim1);
452	}
453	} // anonymous namespace
454	namespace {
455	::std::tuple<at::Tensor,at::Tensor,at::Tensor> wrapper_NestedTensorCPU___transform_bias_rescale_qkv(const at::Tensor & qkv, const at::Tensor & qkv_bias, int64_t num_heads) {
456	// No device check
457	// DeviceGuard omitted
458	return at::native::transform_bias_rescale_qkv_cpu(qkv, qkv_bias, num_heads);
459	}
460	} // anonymous namespace
461	namespace {
462	at::Tensor wrapper_NestedTensorCPU___nested_tensor_size(const at::Tensor & self) {
463	// No device check
464	// DeviceGuard omitted
465	return at::native::_nested_tensor_size(self);
466	}
467	} // anonymous namespace
468	namespace {
469	at::Tensor wrapper_NestedTensorCPU___nested_tensor_strides(const at::Tensor & self) {
470	// No device check
471	// DeviceGuard omitted
472	return at::native::_nested_tensor_strides(self);
473	}
474	} // anonymous namespace
475	namespace {
476	::std::vector<int64_t> wrapper_NestedTensorCPU___nested_tensor_offsets(const at::Tensor & self) {
477	// No device check
478	// DeviceGuard omitted
479	return at::native::_nested_tensor_offsets(self);
480	}
481	} // anonymous namespace
482	namespace {
483	at::Tensor wrapper_NestedTensorCPU___nested_from_padded_and_nested_example(const at::Tensor & padded, const at::Tensor & nt_example) {
484	// No device check
485	// DeviceGuard omitted
486	return at::native::NestedTensor_from_padded_and_nested_example(padded, nt_example);
487	}
488	} // anonymous namespace
489	namespace {
490	at::Tensor wrapper_NestedTensorCPU__unsqueeze(const at::Tensor & self, int64_t dim) {
491	// No device check
492	// DeviceGuard omitted
493	return at::native::unsqueeze_nested(self, dim);
494	}
495	} // anonymous namespace
496	namespace {
497	at::Tensor wrapper_NestedTensorCPU__clone(const at::Tensor & self, c10::optional<at::MemoryFormat> memory_format) {
498	// No device check
499	// DeviceGuard omitted
500	return at::native::clone_nested(self, memory_format);
501	}
502	} // anonymous namespace
503	namespace {
504	at::Tensor wrapper_NestedTensorCPU__values(const at::Tensor & self) {
505	// No device check
506	// DeviceGuard omitted
507	return at::native::values_nested(self);
508	}
509	} // anonymous namespace
510	namespace {
511	at::Tensor wrapper_NestedTensorCPU___to_copy(const at::Tensor & self, c10::optional<at::ScalarType> dtype, c10::optional<at::Layout> layout, c10::optional<at::Device> device, c10::optional<bool> pin_memory, bool non_blocking, c10::optional<at::MemoryFormat> memory_format) {
512	// No device check
513	// DeviceGuard omitted
514	return at::native::_to_copy_nested(self, dtype, layout, device, pin_memory, non_blocking, memory_format);
515	}
516	} // anonymous namespace
517	namespace {
518	at::Tensor wrapper_NestedTensorCPU__view(const at::Tensor & self, c10::SymIntArrayRef size) {
519	// No device check
520	// DeviceGuard omitted
521	return at::native::view_nested(self, C10_AS_INTARRAYREF_SLOW(size));
522	}
523	} // anonymous namespace
524	namespace {
525	at::Tensor wrapper_NestedTensorCPU_fullcoverage__test_autograd_multiple_dispatch(const at::Tensor & self) {
526	// No device check
527	// DeviceGuard omitted
528	return at::native::_test_autograd_multiple_dispatch_fullcoverage(self);
529	}
530	} // anonymous namespace
531	namespace {
532	at::Tensor wrapper_NestedTensorCPU_ntonly__test_autograd_multiple_dispatch(const at::Tensor & self, bool b) {
533	// No device check
534	// DeviceGuard omitted
535	return at::native::_test_autograd_multiple_dispatch_ntonly(self, b);
536	}
537	} // anonymous namespace
538	namespace {
539	at::Tensor wrapper_NestedTensorCPU__to_padded_tensor(const at::Tensor & self, double padding, at::OptionalSymIntArrayRef output_size) {
540	// No device check
541	// DeviceGuard omitted
542	return at::native::NestedTensor_to_padded_tensor_generic(self, padding, output_size.has_value() ? c10::make_optional(C10_AS_INTARRAYREF_SLOW(*output_size)) : c10::nullopt);
543	}
544	} // anonymous namespace
545	namespace {
546	at::Tensor wrapper_NestedTensorCPU___nested_tensor_softmax_with_shape(const at::Tensor & self, const at::Tensor & query) {
547	// No device check
548	// DeviceGuard omitted
549	return at::native::NestedTensor_softmax_dropout(self, query);
550	}
551	} // anonymous namespace
552	namespace {
553	at::Tensor wrapper_NestedTensorCPU___transformer_encoder_layer_fwd(const at::Tensor & src, int64_t embed_dim, int64_t num_heads, const at::Tensor & qkv_weight, const at::Tensor & qkv_bias, const at::Tensor & proj_weight, const at::Tensor & proj_bias, bool use_gelu, bool norm_first, double eps, const at::Tensor & norm_weight_1, const at::Tensor & norm_bias_1, const at::Tensor & norm_weight_2, const at::Tensor & norm_bias_2, const at::Tensor & ffn_weight_1, const at::Tensor & ffn_bias_1, const at::Tensor & ffn_weight_2, const at::Tensor & ffn_bias_2, const c10::optional<at::Tensor> & mask, c10::optional<int64_t> mask_type) {
554	// No device check
555	// DeviceGuard omitted
556	return at::native::transformer_encoder_layer_forward(src, embed_dim, num_heads, qkv_weight, qkv_bias, proj_weight, proj_bias, use_gelu, norm_first, eps, norm_weight_1, norm_bias_1, norm_weight_2, norm_bias_2, ffn_weight_1, ffn_bias_1, ffn_weight_2, ffn_bias_2, mask, mask_type);
557	}
558	} // anonymous namespace
559	namespace {
560	::std::tuple<at::Tensor,at::Tensor> wrapper_NestedTensorCPU___native_multi_head_attention(const at::Tensor & query, const at::Tensor & key, const at::Tensor & value, int64_t embed_dim, int64_t num_head, const at::Tensor & qkv_weight, const at::Tensor & qkv_bias, const at::Tensor & proj_weight, const at::Tensor & proj_bias, const c10::optional<at::Tensor> & mask, bool need_weights, bool average_attn_weights, c10::optional<int64_t> mask_type) {
561	// No device check
562	// DeviceGuard omitted
563	return at::native::native_multi_head_attention_cpu(query, key, value, embed_dim, num_head, qkv_weight, qkv_bias, proj_weight, proj_bias, mask, need_weights, average_attn_weights, mask_type);
564	}
565	} // anonymous namespace
566	namespace {
567	int64_t wrapper_NestedTensorCPU___fused_sdp_choice(const at::Tensor & query, const at::Tensor & key, const at::Tensor & value, const c10::optional<at::Tensor> & attn_mask, double dropout_p, bool is_causal) {
568	// No device check
569	// DeviceGuard omitted
570	return at::native::_fused_sdp_choice_cpp(query, key, value, attn_mask, dropout_p, is_causal);
571	}
572	} // anonymous namespace
573	namespace {
574	::std::tuple<at::Tensor,at::Tensor,at::Tensor> wrapper_NestedTensorCPU___transformer_decoder_only_layer_fwd(const at::Tensor & src, int64_t embed_dim, int64_t num_heads, const at::Tensor & qkv_weight, const at::Tensor & qkv_bias, const at::Tensor & proj_weight, const at::Tensor & proj_bias, bool use_gelu, bool norm_first, double eps, const at::Tensor & norm_weight_1, const at::Tensor & norm_bias_1, const at::Tensor & norm_weight_2, const at::Tensor & norm_bias_2, const at::Tensor & ffn_weight_1, const at::Tensor & ffn_bias_1, const at::Tensor & ffn_weight_2, const at::Tensor & ffn_bias_2, const c10::optional<at::Tensor> & mask, const c10::optional<at::Tensor> & incr_key, const c10::optional<at::Tensor> & incr_value) {
575	// No device check
576	// DeviceGuard omitted
577	return at::native::transformer_decoder_only_layer_forward(src, embed_dim, num_heads, qkv_weight, qkv_bias, proj_weight, proj_bias, use_gelu, norm_first, eps, norm_weight_1, norm_bias_1, norm_weight_2, norm_bias_2, ffn_weight_1, ffn_bias_1, ffn_weight_2, ffn_bias_2, mask, incr_key, incr_value);
578	}
579	} // anonymous namespace
580	namespace {
581	::std::tuple<at::Tensor,at::Tensor,at::Tensor,at::Tensor> wrapper_NestedTensorCPU___native_decoder_only_multi_head_attention(const at::Tensor & query, const at::Tensor & key, const at::Tensor & value, int64_t embed_dim, int64_t num_head, const at::Tensor & qkv_weight, const at::Tensor & qkv_bias, const at::Tensor & proj_weight, const at::Tensor & proj_bias, const c10::optional<at::Tensor> & mask, const c10::optional<at::Tensor> & incr_key, const c10::optional<at::Tensor> & incr_value, bool need_weights, bool average_attn_weights) {
582	// No device check
583	// DeviceGuard omitted
584	return at::native::native_decoder_only_multi_head_attention(query, key, value, embed_dim, num_head, qkv_weight, qkv_bias, proj_weight, proj_bias, mask, incr_key, incr_value, need_weights, average_attn_weights);
585	}
586	} // anonymous namespace
587	TORCH_LIBRARY_IMPL(aten, NestedTensorCPU, m) {
588	m.impl("native_dropout",
589	TORCH_FN(wrapper_NestedTensorCPU__native_dropout));
590	m.impl("native_dropout_backward",
591	TORCH_FN(wrapper_NestedTensorCPU__native_dropout_backward));
592	m.impl("add.Tensor",
593	TORCH_FN(wrapper_NestedTensorCPU_Tensor_add));
594	m.impl("add_.Tensor",
595	TORCH_FN(wrapper_NestedTensorCPU_Tensor_add_));
596	m.impl("bmm",
597	TORCH_FN(wrapper_NestedTensorCPU__bmm));
598	m.impl("chunk",
599	TORCH_FN(wrapper_NestedTensorCPU__chunk));
600	m.impl("copy_",
601	TORCH_FN(wrapper_NestedTensorCPU__copy_));
602	m.impl("div.Tensor",
603	TORCH_FN(wrapper_NestedTensorCPU_Tensor_div));
604	m.impl("div.Scalar",
605	TORCH_FN(wrapper_NestedTensorCPU_Scalar_div));
606	m.impl("embedding",
607	TORCH_FN(wrapper_NestedTensorCPU__embedding));
608	m.impl("empty_like",
609	TORCH_FN(wrapper_NestedTensorCPU__empty_like));
610	m.impl("fill_.Scalar",
611	TORCH_FN(wrapper_NestedTensorCPU_Scalar_fill_));
612	m.impl("fill_.Tensor",
613	TORCH_FN(wrapper_NestedTensorCPU_Tensor_fill_));
614	m.impl("is_same_size",
615	TORCH_FN(wrapper_NestedTensorCPU__is_same_size));
616	m.impl("native_layer_norm",
617	TORCH_FN(wrapper_NestedTensorCPU__native_layer_norm));
618	m.impl("linear",
619	TORCH_FN(wrapper_NestedTensorCPU__linear));
620	m.impl("linear_backward",
621	TORCH_FN(wrapper_NestedTensorCPU__linear_backward));
622	m.impl("matmul",
623	TORCH_FN(wrapper_NestedTensorCPU__matmul));
624	m.impl("matmul.out",
625	TORCH_FN(wrapper_NestedTensorCPU_out_matmul_out));
626	m.impl("matmul_backward",
627	TORCH_FN(wrapper_NestedTensorCPU__matmul_backward));
628	m.impl("mul.Tensor",
629	TORCH_FN(wrapper_NestedTensorCPU_Tensor_mul));
630	m.impl("mul_.Tensor",
631	TORCH_FN(wrapper_NestedTensorCPU_Tensor_mul_));
632	m.impl("mul.Scalar",
633	TORCH_FN(wrapper_NestedTensorCPU_Scalar_mul));
634	m.impl("mul_.Scalar",
635	TORCH_FN(wrapper_NestedTensorCPU_Scalar_mul_));
636	m.impl("ones_like",
637	TORCH_FN(wrapper_NestedTensorCPU__ones_like));
638	m.impl("neg",
639	TORCH_FN(wrapper_NestedTensorCPU__neg));
640	m.impl("neg_",
641	TORCH_FN(wrapper_NestedTensorCPU__neg_));
642	m.impl("relu",
643	TORCH_FN(wrapper_NestedTensorCPU__relu));
644	m.impl("relu_",
645	TORCH_FN(wrapper_NestedTensorCPU__relu_));
646	m.impl("gelu",
647	TORCH_FN(wrapper_NestedTensorCPU__gelu));
648	m.impl("gelu_",
649	TORCH_FN(wrapper_NestedTensorCPU__gelu_));
650	m.impl("select.int",
651	TORCH_FN(wrapper_NestedTensorCPU_int_select));
652	m.impl("_nested_select_backward",
653	TORCH_FN(wrapper_NestedTensorCPU___nested_select_backward));
654	m.impl("detach",
655	TORCH_FN(wrapper_NestedTensorCPU__detach));
656	m.impl("_softmax",
657	TORCH_FN(wrapper_NestedTensorCPU___softmax));
658	m.impl("_softmax_backward_data",
659	TORCH_FN(wrapper_NestedTensorCPU___softmax_backward_data));
660	m.impl("squeeze",
661	TORCH_FN(wrapper_NestedTensorCPU__squeeze));
662	m.impl("squeeze.dim",
663	TORCH_FN(wrapper_NestedTensorCPU_dim_squeeze));
664	m.impl("squeeze.dims",
665	TORCH_FN(wrapper_NestedTensorCPU_dims_squeeze));
666	m.impl("sum.dim_IntList",
667	TORCH_FN(wrapper_NestedTensorCPU_dim_IntList_sum));
668	m.impl("_nested_sum_backward",
669	TORCH_FN(wrapper_NestedTensorCPU___nested_sum_backward));
670	m.impl("tanh",
671	TORCH_FN(wrapper_NestedTensorCPU__tanh));
672	m.impl("tanh_",
673	TORCH_FN(wrapper_NestedTensorCPU__tanh_));
674	m.impl("transpose.int",
675	TORCH_FN(wrapper_NestedTensorCPU_int_transpose));
676	m.impl("_transform_bias_rescale_qkv",
677	TORCH_FN(wrapper_NestedTensorCPU___transform_bias_rescale_qkv));
678	m.impl("_nested_tensor_size",
679	TORCH_FN(wrapper_NestedTensorCPU___nested_tensor_size));
680	m.impl("_nested_tensor_strides",
681	TORCH_FN(wrapper_NestedTensorCPU___nested_tensor_strides));
682	m.impl("_nested_tensor_offsets",
683	TORCH_FN(wrapper_NestedTensorCPU___nested_tensor_offsets));
684	m.impl("_nested_from_padded_and_nested_example",
685	TORCH_FN(wrapper_NestedTensorCPU___nested_from_padded_and_nested_example));
686	m.impl("unsqueeze",
687	TORCH_FN(wrapper_NestedTensorCPU__unsqueeze));
688	m.impl("clone",
689	TORCH_FN(wrapper_NestedTensorCPU__clone));
690	m.impl("values",
691	TORCH_FN(wrapper_NestedTensorCPU__values));
692	m.impl("_to_copy",
693	TORCH_FN(wrapper_NestedTensorCPU___to_copy));
694	m.impl("view",
695	TORCH_FN(wrapper_NestedTensorCPU__view));
696	m.impl("_test_autograd_multiple_dispatch.fullcoverage",
697	TORCH_FN(wrapper_NestedTensorCPU_fullcoverage__test_autograd_multiple_dispatch));
698	m.impl("_test_autograd_multiple_dispatch.ntonly",
699	TORCH_FN(wrapper_NestedTensorCPU_ntonly__test_autograd_multiple_dispatch));
700	m.impl("to_padded_tensor",
701	TORCH_FN(wrapper_NestedTensorCPU__to_padded_tensor));
702	m.impl("_nested_tensor_softmax_with_shape",
703	TORCH_FN(wrapper_NestedTensorCPU___nested_tensor_softmax_with_shape));
704	m.impl("_transformer_encoder_layer_fwd",
705	TORCH_FN(wrapper_NestedTensorCPU___transformer_encoder_layer_fwd));
706	m.impl("_native_multi_head_attention",
707	TORCH_FN(wrapper_NestedTensorCPU___native_multi_head_attention));
708	m.impl("_fused_sdp_choice",
709	TORCH_FN(wrapper_NestedTensorCPU___fused_sdp_choice));
710	m.impl("_transformer_decoder_only_layer_fwd",
711	TORCH_FN(wrapper_NestedTensorCPU___transformer_decoder_only_layer_fwd));
712	m.impl("_native_decoder_only_multi_head_attention",
713	TORCH_FN(wrapper_NestedTensorCPU___native_decoder_only_multi_head_attention));
714	};
715	} // anonymous namespace
716	namespace nestedtensorcpu {
717	::std::tuple<at::Tensor,at::Tensor> native_dropout(const at::Tensor & input, double p, c10::optional<bool> train) {
718	return wrapper_NestedTensorCPU__native_dropout(input, p, train);
719	}
720	at::Tensor native_dropout_backward(const at::Tensor & grad_output, const at::Tensor & mask, double scale) {
721	return wrapper_NestedTensorCPU__native_dropout_backward(grad_output, mask, scale);
722	}
723	at::Tensor add(const at::Tensor & self, const at::Tensor & other, const at::Scalar & alpha) {
724	return wrapper_NestedTensorCPU_Tensor_add(self, other, alpha);
725	}
726	at::Tensor & add_(at::Tensor & self, const at::Tensor & other, const at::Scalar & alpha) {
727	return wrapper_NestedTensorCPU_Tensor_add_(self, other, alpha);
728	}
729	at::Tensor bmm(const at::Tensor & self, const at::Tensor & mat2) {
730	return wrapper_NestedTensorCPU__bmm(self, mat2);
731	}
732	::std::vector<at::Tensor> chunk(const at::Tensor & self, int64_t chunks, int64_t dim) {
733	return wrapper_NestedTensorCPU__chunk(self, chunks, dim);
734	}
735	at::Tensor & copy_(at::Tensor & self, const at::Tensor & src, bool non_blocking) {
736	return wrapper_NestedTensorCPU__copy_(self, src, non_blocking);
737	}
738	at::Tensor div(const at::Tensor & self, const at::Tensor & other) {
739	return wrapper_NestedTensorCPU_Tensor_div(self, other);
740	}
741	at::Tensor div(const at::Tensor & self, const at::Scalar & other) {
742	return wrapper_NestedTensorCPU_Scalar_div(self, other);
743	}
744	at::Tensor embedding(const at::Tensor & weight, const at::Tensor & indices, int64_t padding_idx, bool scale_grad_by_freq, bool sparse) {
745	return wrapper_NestedTensorCPU__embedding(weight, indices, padding_idx, scale_grad_by_freq, sparse);
746	}
747	at::Tensor embedding_symint(const at::Tensor & weight, const at::Tensor & indices, c10::SymInt padding_idx, bool scale_grad_by_freq, bool sparse) {
748	return wrapper_NestedTensorCPU__embedding(weight, indices, padding_idx, scale_grad_by_freq, sparse);
749	}
750	at::Tensor empty_like(const at::Tensor & self, at::TensorOptions options, c10::optional<at::MemoryFormat> memory_format) {
751	return wrapper_NestedTensorCPU__empty_like(self, optTypeMetaToScalarType(options.dtype_opt()), options.layout_opt(), options.device_opt(), options.pinned_memory_opt(), c10::impl::check_tensor_options_and_extract_memory_format(options, memory_format));
752	}
753	at::Tensor empty_like(const at::Tensor & self, c10::optional<at::ScalarType> dtype, c10::optional<at::Layout> layout, c10::optional<at::Device> device, c10::optional<bool> pin_memory, c10::optional<at::MemoryFormat> memory_format) {
754	return wrapper_NestedTensorCPU__empty_like(self, dtype, layout, device, pin_memory, memory_format);
755	}
756	at::Tensor & fill_(at::Tensor & self, const at::Scalar & value) {
757	return wrapper_NestedTensorCPU_Scalar_fill_(self, value);
758	}
759	at::Tensor & fill_(at::Tensor & self, const at::Tensor & value) {
760	return wrapper_NestedTensorCPU_Tensor_fill_(self, value);
761	}
762	bool is_same_size(const at::Tensor & self, const at::Tensor & other) {
763	return wrapper_NestedTensorCPU__is_same_size(self, other);
764	}
765	::std::tuple<at::Tensor,at::Tensor,at::Tensor> native_layer_norm(const at::Tensor & input, at::IntArrayRef normalized_shape, const c10::optional<at::Tensor> & weight, const c10::optional<at::Tensor> & bias, double eps) {
766	return wrapper_NestedTensorCPU__native_layer_norm(input, c10::fromIntArrayRefSlow(normalized_shape), weight, bias, eps);
767	}
768	::std::tuple<at::Tensor,at::Tensor,at::Tensor> native_layer_norm_symint(const at::Tensor & input, c10::SymIntArrayRef normalized_shape, const c10::optional<at::Tensor> & weight, const c10::optional<at::Tensor> & bias, double eps) {
769	return wrapper_NestedTensorCPU__native_layer_norm(input, normalized_shape, weight, bias, eps);
770	}
771	at::Tensor linear(const at::Tensor & input, const at::Tensor & weight, const c10::optional<at::Tensor> & bias) {
772	return wrapper_NestedTensorCPU__linear(input, weight, bias);
773	}
774	::std::tuple<at::Tensor,at::Tensor,at::Tensor> linear_backward(const at::Tensor & self, const at::Tensor & grad_output, const at::Tensor & weight, ::std::array<bool,`3`> output_mask) {
775	return wrapper_NestedTensorCPU__linear_backward(self, grad_output, weight, output_mask);
776	}
777	at::Tensor matmul(const at::Tensor & self, const at::Tensor & other) {
778	return wrapper_NestedTensorCPU__matmul(self, other);
779	}
780	at::Tensor & matmul_out(at::Tensor & out, const at::Tensor & self, const at::Tensor & other) {
781	return wrapper_NestedTensorCPU_out_matmul_out(self, other, out);
782	}
783	at::Tensor & matmul_outf(const at::Tensor & self, const at::Tensor & other, at::Tensor & out) {
784	return wrapper_NestedTensorCPU_out_matmul_out(self, other, out);
785	}
786	::std::tuple<at::Tensor,at::Tensor> matmul_backward(const at::Tensor & grad, const at::Tensor & self, const at::Tensor & other, ::std::array<bool,`2`> mask) {
787	return wrapper_NestedTensorCPU__matmul_backward(grad, self, other, mask);
788	}
789	at::Tensor mul(const at::Tensor & self, const at::Tensor & other) {
790	return wrapper_NestedTensorCPU_Tensor_mul(self, other);
791	}
792	at::Tensor & mul_(at::Tensor & self, const at::Tensor & other) {
793	return wrapper_NestedTensorCPU_Tensor_mul_(self, other);
794	}
795	at::Tensor mul(const at::Tensor & self, const at::Scalar & other) {
796	return wrapper_NestedTensorCPU_Scalar_mul(self, other);
797	}
798	at::Tensor & mul_(at::Tensor & self, const at::Scalar & other) {
799	return wrapper_NestedTensorCPU_Scalar_mul_(self, other);
800	}
801	at::Tensor ones_like(const at::Tensor & self, at::TensorOptions options, c10::optional<at::MemoryFormat> memory_format) {
802	return wrapper_NestedTensorCPU__ones_like(self, optTypeMetaToScalarType(options.dtype_opt()), options.layout_opt(), options.device_opt(), options.pinned_memory_opt(), c10::impl::check_tensor_options_and_extract_memory_format(options, memory_format));
803	}
804	at::Tensor ones_like(const at::Tensor & self, c10::optional<at::ScalarType> dtype, c10::optional<at::Layout> layout, c10::optional<at::Device> device, c10::optional<bool> pin_memory, c10::optional<at::MemoryFormat> memory_format) {
805	return wrapper_NestedTensorCPU__ones_like(self, dtype, layout, device, pin_memory, memory_format);
806	}
807	at::Tensor neg(const at::Tensor & self) {
808	return wrapper_NestedTensorCPU__neg(self);
809	}
810	at::Tensor & neg_(at::Tensor & self) {
811	return wrapper_NestedTensorCPU__neg_(self);
812	}
813	at::Tensor relu(const at::Tensor & self) {
814	return wrapper_NestedTensorCPU__relu(self);
815	}
816	at::Tensor & relu_(at::Tensor & self) {
817	return wrapper_NestedTensorCPU__relu_(self);
818	}
819	at::Tensor gelu(const at::Tensor & self, c10::string_view approximate) {
820	return wrapper_NestedTensorCPU__gelu(self, approximate);
821	}
822	at::Tensor & gelu_(at::Tensor & self, c10::string_view approximate) {
823	return wrapper_NestedTensorCPU__gelu_(self, approximate);
824	}
825	at::Tensor select(const at::Tensor & self, int64_t dim, int64_t index) {
826	return wrapper_NestedTensorCPU_int_select(self, dim, index);
827	}
828	at::Tensor select_symint(const at::Tensor & self, int64_t dim, c10::SymInt index) {
829	return wrapper_NestedTensorCPU_int_select(self, dim, index);
830	}
831	at::Tensor _nested_select_backward(const at::Tensor & grad_output, const at::Tensor & self, int64_t dim, int64_t index) {
832	return wrapper_NestedTensorCPU___nested_select_backward(grad_output, self, dim, index);
833	}
834	at::Tensor _nested_select_backward_symint(const at::Tensor & grad_output, const at::Tensor & self, int64_t dim, c10::SymInt index) {
835	return wrapper_NestedTensorCPU___nested_select_backward(grad_output, self, dim, index);
836	}
837	at::Tensor detach(const at::Tensor & self) {
838	return wrapper_NestedTensorCPU__detach(self);
839	}
840	at::Tensor _softmax(const at::Tensor & self, int64_t dim, bool half_to_float) {
841	return wrapper_NestedTensorCPU___softmax(self, dim, half_to_float);
842	}
843	at::Tensor _softmax_backward_data(const at::Tensor & grad_output, const at::Tensor & output, int64_t dim, at::ScalarType input_dtype) {
844	return wrapper_NestedTensorCPU___softmax_backward_data(grad_output, output, dim, input_dtype);
845	}
846	at::Tensor squeeze(const at::Tensor & self) {
847	return wrapper_NestedTensorCPU__squeeze(self);
848	}
849	at::Tensor squeeze(const at::Tensor & self, int64_t dim) {
850	return wrapper_NestedTensorCPU_dim_squeeze(self, dim);
851	}
852	at::Tensor squeeze(const at::Tensor & self, at::IntArrayRef dim) {
853	return wrapper_NestedTensorCPU_dims_squeeze(self, dim);
854	}
855	at::Tensor sum(const at::Tensor & self, at::OptionalIntArrayRef dim, bool keepdim, c10::optional<at::ScalarType> dtype) {
856	return wrapper_NestedTensorCPU_dim_IntList_sum(self, dim, keepdim, dtype);
857	}
858	at::Tensor _nested_sum_backward(const at::Tensor & grad, const at::Tensor & self, at::OptionalIntArrayRef dim, bool keepdim) {
859	return wrapper_NestedTensorCPU___nested_sum_backward(grad, self, dim, keepdim);
860	}
861	at::Tensor tanh(const at::Tensor & self) {
862	return wrapper_NestedTensorCPU__tanh(self);
863	}
864	at::Tensor & tanh_(at::Tensor & self) {
865	return wrapper_NestedTensorCPU__tanh_(self);
866	}
867	at::Tensor transpose(const at::Tensor & self, int64_t dim0, int64_t dim1) {
868	return wrapper_NestedTensorCPU_int_transpose(self, dim0, dim1);
869	}
870	::std::tuple<at::Tensor,at::Tensor,at::Tensor> _transform_bias_rescale_qkv(const at::Tensor & qkv, const at::Tensor & qkv_bias, int64_t num_heads) {
871	return wrapper_NestedTensorCPU___transform_bias_rescale_qkv(qkv, qkv_bias, num_heads);
872	}
873	at::Tensor _nested_tensor_size(const at::Tensor & self) {
874	return wrapper_NestedTensorCPU___nested_tensor_size(self);
875	}
876	at::Tensor _nested_tensor_strides(const at::Tensor & self) {
877	return wrapper_NestedTensorCPU___nested_tensor_strides(self);
878	}
879	::std::vector<int64_t> _nested_tensor_offsets(const at::Tensor & self) {
880	return wrapper_NestedTensorCPU___nested_tensor_offsets(self);
881	}
882	at::Tensor _nested_from_padded_and_nested_example(const at::Tensor & padded, const at::Tensor & nt_example) {
883	return wrapper_NestedTensorCPU___nested_from_padded_and_nested_example(padded, nt_example);
884	}
885	at::Tensor unsqueeze(const at::Tensor & self, int64_t dim) {
886	return wrapper_NestedTensorCPU__unsqueeze(self, dim);
887	}
888	at::Tensor clone(const at::Tensor & self, c10::optional<at::MemoryFormat> memory_format) {
889	return wrapper_NestedTensorCPU__clone(self, memory_format);
890	}
891	at::Tensor values(const at::Tensor & self) {
892	return wrapper_NestedTensorCPU__values(self);
893	}
894	at::Tensor _to_copy(const at::Tensor & self, at::TensorOptions options, bool non_blocking, c10::optional<at::MemoryFormat> memory_format) {
895	return wrapper_NestedTensorCPU___to_copy(self, optTypeMetaToScalarType(options.dtype_opt()), options.layout_opt(), options.device_opt(), options.pinned_memory_opt(), non_blocking, c10::impl::check_tensor_options_and_extract_memory_format(options, memory_format));
896	}
897	at::Tensor _to_copy(const at::Tensor & self, c10::optional<at::ScalarType> dtype, c10::optional<at::Layout> layout, c10::optional<at::Device> device, c10::optional<bool> pin_memory, bool non_blocking, c10::optional<at::MemoryFormat> memory_format) {
898	return wrapper_NestedTensorCPU___to_copy(self, dtype, layout, device, pin_memory, non_blocking, memory_format);
899	}
900	at::Tensor view(const at::Tensor & self, at::IntArrayRef size) {
901	return wrapper_NestedTensorCPU__view(self, c10::fromIntArrayRefSlow(size));
902	}
903	at::Tensor view_symint(const at::Tensor & self, c10::SymIntArrayRef size) {
904	return wrapper_NestedTensorCPU__view(self, size);
905	}
906	at::Tensor _test_autograd_multiple_dispatch(const at::Tensor & self) {
907	return wrapper_NestedTensorCPU_fullcoverage__test_autograd_multiple_dispatch(self);
908	}
909	at::Tensor _test_autograd_multiple_dispatch(const at::Tensor & self, bool b) {
910	return wrapper_NestedTensorCPU_ntonly__test_autograd_multiple_dispatch(self, b);
911	}
912	at::Tensor to_padded_tensor(const at::Tensor & self, double padding, at::OptionalIntArrayRef output_size) {
913	return wrapper_NestedTensorCPU__to_padded_tensor(self, padding, output_size.has_value() ? c10::make_optional(c10::fromIntArrayRefSlow(*output_size)) : c10::nullopt);
914	}
915	at::Tensor to_padded_tensor_symint(const at::Tensor & self, double padding, at::OptionalSymIntArrayRef output_size) {
916	return wrapper_NestedTensorCPU__to_padded_tensor(self, padding, output_size);
917	}
918	at::Tensor _nested_tensor_softmax_with_shape(const at::Tensor & self, const at::Tensor & query) {
919	return wrapper_NestedTensorCPU___nested_tensor_softmax_with_shape(self, query);
920	}
921	at::Tensor _transformer_encoder_layer_fwd(const at::Tensor & src, int64_t embed_dim, int64_t num_heads, const at::Tensor & qkv_weight, const at::Tensor & qkv_bias, const at::Tensor & proj_weight, const at::Tensor & proj_bias, bool use_gelu, bool norm_first, double eps, const at::Tensor & norm_weight_1, const at::Tensor & norm_bias_1, const at::Tensor & norm_weight_2, const at::Tensor & norm_bias_2, const at::Tensor & ffn_weight_1, const at::Tensor & ffn_bias_1, const at::Tensor & ffn_weight_2, const at::Tensor & ffn_bias_2, const c10::optional<at::Tensor> & mask, c10::optional<int64_t> mask_type) {
922	return wrapper_NestedTensorCPU___transformer_encoder_layer_fwd(src, embed_dim, num_heads, qkv_weight, qkv_bias, proj_weight, proj_bias, use_gelu, norm_first, eps, norm_weight_1, norm_bias_1, norm_weight_2, norm_bias_2, ffn_weight_1, ffn_bias_1, ffn_weight_2, ffn_bias_2, mask, mask_type);
923	}
924	::std::tuple<at::Tensor,at::Tensor> _native_multi_head_attention(const at::Tensor & query, const at::Tensor & key, const at::Tensor & value, int64_t embed_dim, int64_t num_head, const at::Tensor & qkv_weight, const at::Tensor & qkv_bias, const at::Tensor & proj_weight, const at::Tensor & proj_bias, const c10::optional<at::Tensor> & mask, bool need_weights, bool average_attn_weights, c10::optional<int64_t> mask_type) {
925	return wrapper_NestedTensorCPU___native_multi_head_attention(query, key, value, embed_dim, num_head, qkv_weight, qkv_bias, proj_weight, proj_bias, mask, need_weights, average_attn_weights, mask_type);
926	}
927	int64_t _fused_sdp_choice(const at::Tensor & query, const at::Tensor & key, const at::Tensor & value, const c10::optional<at::Tensor> & attn_mask, double dropout_p, bool is_causal) {
928	return wrapper_NestedTensorCPU___fused_sdp_choice(query, key, value, attn_mask, dropout_p, is_causal);
929	}
930	::std::tuple<at::Tensor,at::Tensor,at::Tensor> _transformer_decoder_only_layer_fwd(const at::Tensor & src, int64_t embed_dim, int64_t num_heads, const at::Tensor & qkv_weight, const at::Tensor & qkv_bias, const at::Tensor & proj_weight, const at::Tensor & proj_bias, bool use_gelu, bool norm_first, double eps, const at::Tensor & norm_weight_1, const at::Tensor & norm_bias_1, const at::Tensor & norm_weight_2, const at::Tensor & norm_bias_2, const at::Tensor & ffn_weight_1, const at::Tensor & ffn_bias_1, const at::Tensor & ffn_weight_2, const at::Tensor & ffn_bias_2, const c10::optional<at::Tensor> & mask, const c10::optional<at::Tensor> & incr_key, const c10::optional<at::Tensor> & incr_value) {
931	return wrapper_NestedTensorCPU___transformer_decoder_only_layer_fwd(src, embed_dim, num_heads, qkv_weight, qkv_bias, proj_weight, proj_bias, use_gelu, norm_first, eps, norm_weight_1, norm_bias_1, norm_weight_2, norm_bias_2, ffn_weight_1, ffn_bias_1, ffn_weight_2, ffn_bias_2, mask, incr_key, incr_value);
932	}
933	::std::tuple<at::Tensor,at::Tensor,at::Tensor,at::Tensor> _native_decoder_only_multi_head_attention(const at::Tensor & query, const at::Tensor & key, const at::Tensor & value, int64_t embed_dim, int64_t num_head, const at::Tensor & qkv_weight, const at::Tensor & qkv_bias, const at::Tensor & proj_weight, const at::Tensor & proj_bias, const c10::optional<at::Tensor> & mask, const c10::optional<at::Tensor> & incr_key, const c10::optional<at::Tensor> & incr_value, bool need_weights, bool average_attn_weights) {
934	return wrapper_NestedTensorCPU___native_decoder_only_multi_head_attention(query, key, value, embed_dim, num_head, qkv_weight, qkv_bias, proj_weight, proj_bias, mask, incr_key, incr_value, need_weights, average_attn_weights);
935	}
936	} // namespace nestedtensorcpu
937	} // namespace at
938

Browse the source code of pytorch/build/aten/src/ATen/RegisterNestedTensorCPU.cpp