1// required for old g++ to compile PRId64 macros, see
2// https://github.com/pytorch/pytorch/issues/3571
3// for context
4#ifndef __STDC_FORMAT_MACROS
5#define __STDC_FORMAT_MACROS
6#endif
7
8// an external backend might generate file within its code tree
9// and check all the source files within the tree with clang-format.
10// so, disable it since the backend might have a different config.
11// clang-format off
12
13// NOTE: This condition is true for all PyTorch internal libraries, it
14// just excludes external projects such as torch_xla which
15// re-use some of the PyTorch codegen machinery.
16#if defined(CAFFE2_BUILD_MAIN_LIB) || \
17 defined(TORCH_CUDA_BUILD_MAIN_LIB) || \
18 defined(TORCH_HIP_BUILD_MAIN_LIB) || \
19 defined(TORCH_CUDA_CU_BUILD_MAIN_LIB) || \
20 defined(TORCH_CUDA_CPP_BUILD_MAIN_LIB)
21#define TORCH_ASSERT_ONLY_METHOD_OPERATORS
22#endif
23
24// @generated by torchgen/gen.py from RegisterDispatchKey.cpp
25
26#include <c10/core/TensorImpl.h>
27#include <c10/core/Allocator.h>
28#include <ATen/DeviceGuard.h>
29#include <ATen/NamedTensorUtils.h>
30#include <ATen/Utils.h>
31#include <ATen/WrapDimUtils.h>
32#include <ATen/Dispatch.h>
33#include <c10/util/ExclusivelyOwned.h>
34#include <c10/util/Half.h>
35#include <c10/core/UndefinedTensorImpl.h>
36#include <c10/util/Optional.h>
37#include <ATen/Tensor.h>
38#include <ATen/native/Resize.h>
39
40#include <cstddef>
41#include <functional>
42#include <memory>
43#include <utility>
44
45#include <ATen/Config.h>
46#include <ATen/core/op_registration/adaption.h>
47#include <torch/library.h>
48
49
50#include <ATen/ops/as_strided_native.h>
51#include <ATen/ops/empty.h>
52#include <ATen/ops/empty_strided.h>
53#include <ATen/ops/_copy_from_and_resize.h>
54#include <ATen/ops/_copy_from.h>
55#include <ATen/ops/_fused_sdp_choice_native.h>
56#include <ATen/ops/_native_decoder_only_multi_head_attention_native.h>
57#include <ATen/ops/_native_multi_head_attention_native.h>
58#include <ATen/ops/_nested_from_padded_and_nested_example_native.h>
59#include <ATen/ops/_nested_select_backward_native.h>
60#include <ATen/ops/_nested_sum_backward_native.h>
61#include <ATen/ops/_nested_tensor_offsets_native.h>
62#include <ATen/ops/_nested_tensor_size_native.h>
63#include <ATen/ops/_nested_tensor_softmax_with_shape_native.h>
64#include <ATen/ops/_nested_tensor_strides_native.h>
65#include <ATen/ops/_softmax_backward_data_native.h>
66#include <ATen/ops/_softmax_native.h>
67#include <ATen/ops/_test_autograd_multiple_dispatch_native.h>
68#include <ATen/ops/_to_copy_native.h>
69#include <ATen/ops/_transform_bias_rescale_qkv_native.h>
70#include <ATen/ops/_transformer_decoder_only_layer_fwd_native.h>
71#include <ATen/ops/_transformer_encoder_layer_fwd_native.h>
72#include <ATen/ops/add_native.h>
73#include <ATen/ops/bmm_native.h>
74#include <ATen/ops/chunk_native.h>
75#include <ATen/ops/clone_native.h>
76#include <ATen/ops/copy_native.h>
77#include <ATen/ops/detach_native.h>
78#include <ATen/ops/div_native.h>
79#include <ATen/ops/embedding_native.h>
80#include <ATen/ops/empty_like_native.h>
81#include <ATen/ops/fill_native.h>
82#include <ATen/ops/gelu_native.h>
83#include <ATen/ops/is_same_size_native.h>
84#include <ATen/ops/linear_backward_native.h>
85#include <ATen/ops/linear_native.h>
86#include <ATen/ops/matmul_backward_native.h>
87#include <ATen/ops/matmul_native.h>
88#include <ATen/ops/mul_native.h>
89#include <ATen/ops/native_dropout_backward_native.h>
90#include <ATen/ops/native_dropout_native.h>
91#include <ATen/ops/native_layer_norm_native.h>
92#include <ATen/ops/neg_native.h>
93#include <ATen/ops/ones_like_native.h>
94#include <ATen/ops/relu_native.h>
95#include <ATen/ops/select_native.h>
96#include <ATen/ops/squeeze_native.h>
97#include <ATen/ops/sum_native.h>
98#include <ATen/ops/tanh_native.h>
99#include <ATen/ops/to_padded_tensor_native.h>
100#include <ATen/ops/transpose_native.h>
101#include <ATen/ops/unsqueeze_native.h>
102#include <ATen/ops/values_native.h>
103#include <ATen/ops/view_native.h>
104
105// See template file RegisterDispatchDefinitions.ini
106namespace at {
107// NB: TORCH_LIBRARY_IMPL must be in an anonymous namespace to avoid
108// ambiguity with conflicting identifiers that may have been defined in
109// at namespace already.
110namespace {
111void resize_out(const Tensor &out, IntArrayRef sizes, IntArrayRef strides, const TensorOptions &options) {
112 TORCH_CHECK(options.dtype() == out.dtype(),
113 "Expected out tensor to have dtype ", options.dtype(), ", but got ", out.dtype(), " instead");
114 TORCH_CHECK(options.device() == out.device(),
115 "Expected out tensor to have device ", options.device(), ", but got ", out.device(), " instead");
116 const bool resized = at::native::resize_output(out, sizes);
117 // Only restride if a resize occurred; otherwise we ignore the (advisory)
118 // strides from the meta function and directly use the output tensor's
119 // preexisting strides
120 if (resized) {
121 if (!strides.empty()) {
122 TORCH_INTERNAL_ASSERT(!options.memory_format_opt().has_value());
123 // TODO: avoid the redispatch here
124 out.as_strided_(sizes, strides);
125 } else if (options.memory_format_opt().has_value()) {
126 out.unsafeGetTensorImpl()->empty_tensor_restride(*options.memory_format_opt());
127 }
128 }
129}
130void check_inplace(const Tensor &self, IntArrayRef sizes, const TensorOptions &options) {
131 // These checks are needed on those operators that:
132 // 1) don't use 'TensorIterator' (e.g. 'addmm' and 'baddbmm')
133 // 2) have particular typing rules (e.g. 'cumsum' and 'cumprod')
134 // For other operators (e.g. 'add'), 'TensorIterator' already checks
135 // these things separately.
136 TORCH_CHECK(options.dtype() == self.dtype(),
137 "Bad in-place call: ",
138 "input tensor dtype ", self.dtype(), " and output tensor dtype ", options.dtype(), " should match");
139 TORCH_CHECK(options.device() == self.device(),
140 "Bad in-place call: ",
141 "input tensor device ", self.device(), " and output tensor device ", options.device(), " should match");
142 TORCH_CHECK(sizes == self.sizes(),
143 "Bad in-place call: ",
144 "input tensor size ", self.sizes(), " and output tensor size ", sizes, " should match");
145}
146namespace {
147::std::tuple<at::Tensor,at::Tensor> wrapper_NestedTensorCPU__native_dropout(const at::Tensor & input, double p, c10::optional<bool> train) {
148 // No device check
149 // DeviceGuard omitted
150 return at::native::native_dropout_nested(input, p, train);
151}
152} // anonymous namespace
153namespace {
154at::Tensor wrapper_NestedTensorCPU__native_dropout_backward(const at::Tensor & grad_output, const at::Tensor & mask, double scale) {
155 // No device check
156 // DeviceGuard omitted
157 return at::native::native_dropout_backward(grad_output, mask, scale);
158}
159} // anonymous namespace
160namespace {
161at::Tensor wrapper_NestedTensorCPU_Tensor_add(const at::Tensor & self, const at::Tensor & other, const at::Scalar & alpha) {
162 // No device check
163 // DeviceGuard omitted
164 return at::native::NestedTensor_add_Tensor(self, other, alpha);
165}
166} // anonymous namespace
167namespace {
168at::Tensor & wrapper_NestedTensorCPU_Tensor_add_(at::Tensor & self, const at::Tensor & other, const at::Scalar & alpha) {
169 // No device check
170 // DeviceGuard omitted
171 return at::native::NestedTensor_add__Tensor(self, other, alpha);
172}
173} // anonymous namespace
174namespace {
175at::Tensor wrapper_NestedTensorCPU__bmm(const at::Tensor & self, const at::Tensor & mat2) {
176 // No device check
177 // DeviceGuard omitted
178 return at::native::bmm_nested(self, mat2);
179}
180} // anonymous namespace
181namespace {
182::std::vector<at::Tensor> wrapper_NestedTensorCPU__chunk(const at::Tensor & self, int64_t chunks, int64_t dim) {
183 // No device check
184 // DeviceGuard omitted
185 return at::native::chunk_nested_tensor(self, chunks, dim);
186}
187} // anonymous namespace
188namespace {
189at::Tensor & wrapper_NestedTensorCPU__copy_(at::Tensor & self, const at::Tensor & src, bool non_blocking) {
190 // No device check
191 // DeviceGuard omitted
192 return at::native::copy_nested_(self, src, non_blocking);
193}
194} // anonymous namespace
195namespace {
196at::Tensor wrapper_NestedTensorCPU_Tensor_div(const at::Tensor & self, const at::Tensor & other) {
197 // No device check
198 // DeviceGuard omitted
199 return at::native::NestedTensor_div_Tensor(self, other);
200}
201} // anonymous namespace
202namespace {
203at::Tensor wrapper_NestedTensorCPU_Scalar_div(const at::Tensor & self, const at::Scalar & other) {
204 // No device check
205 // DeviceGuard omitted
206 return at::native::NestedTensor_div_Scalar(self, other);
207}
208} // anonymous namespace
209namespace {
210at::Tensor wrapper_NestedTensorCPU__embedding(const at::Tensor & weight, const at::Tensor & indices, c10::SymInt padding_idx, bool scale_grad_by_freq, bool sparse) {
211 // No device check
212 // DeviceGuard omitted
213 return at::native::NestedTensor_embedding(weight, indices, padding_idx.expect_int(), scale_grad_by_freq, sparse);
214}
215} // anonymous namespace
216namespace {
217at::Tensor wrapper_NestedTensorCPU__empty_like(const at::Tensor & self, c10::optional<at::ScalarType> dtype, c10::optional<at::Layout> layout, c10::optional<at::Device> device, c10::optional<bool> pin_memory, c10::optional<at::MemoryFormat> memory_format) {
218 // No device check
219 // DeviceGuard omitted
220 return at::native::empty_like_nested(self, dtype, layout, device, pin_memory, memory_format);
221}
222} // anonymous namespace
223namespace {
224at::Tensor & wrapper_NestedTensorCPU_Scalar_fill_(at::Tensor & self, const at::Scalar & value) {
225 // No device check
226 // DeviceGuard omitted
227 return at::native::fill_nested_(self, value);
228}
229} // anonymous namespace
230namespace {
231at::Tensor & wrapper_NestedTensorCPU_Tensor_fill_(at::Tensor & self, const at::Tensor & value) {
232 // No device check
233 // DeviceGuard omitted
234 return at::native::fill_nested_(self, value);
235}
236} // anonymous namespace
237namespace {
238bool wrapper_NestedTensorCPU__is_same_size(const at::Tensor & self, const at::Tensor & other) {
239 // No device check
240 // DeviceGuard omitted
241 return at::native::nested_is_same_size(self, other);
242}
243} // anonymous namespace
244namespace {
245::std::tuple<at::Tensor,at::Tensor,at::Tensor> wrapper_NestedTensorCPU__native_layer_norm(const at::Tensor & input, c10::SymIntArrayRef normalized_shape, const c10::optional<at::Tensor> & weight, const c10::optional<at::Tensor> & bias, double eps) {
246 // No device check
247 // DeviceGuard omitted
248 return at::native::nested_layer_norm(input, C10_AS_INTARRAYREF_SLOW(normalized_shape), weight, bias, eps);
249}
250} // anonymous namespace
251namespace {
252at::Tensor wrapper_NestedTensorCPU__linear(const at::Tensor & input, const at::Tensor & weight, const c10::optional<at::Tensor> & bias) {
253 // No device check
254 // DeviceGuard omitted
255 return at::native::nested_linear(input, weight, bias);
256}
257} // anonymous namespace
258namespace {
259::std::tuple<at::Tensor,at::Tensor,at::Tensor> wrapper_NestedTensorCPU__linear_backward(const at::Tensor & self, const at::Tensor & grad_output, const at::Tensor & weight, ::std::array<bool,3> output_mask) {
260 // No device check
261 // DeviceGuard omitted
262 return at::native::nested_linear_backward(self, grad_output, weight, output_mask);
263}
264} // anonymous namespace
265namespace {
266at::Tensor wrapper_NestedTensorCPU__matmul(const at::Tensor & self, const at::Tensor & other) {
267 // No device check
268 // DeviceGuard omitted
269 return at::native::matmul_nested(self, other);
270}
271} // anonymous namespace
272namespace {
273at::Tensor & wrapper_NestedTensorCPU_out_matmul_out(const at::Tensor & self, const at::Tensor & other, at::Tensor & out) {
274 // No device check
275 // DeviceGuard omitted
276 return at::native::matmul_out_nested(self, other, out);
277}
278} // anonymous namespace
279namespace {
280::std::tuple<at::Tensor,at::Tensor> wrapper_NestedTensorCPU__matmul_backward(const at::Tensor & grad, const at::Tensor & self, const at::Tensor & other, ::std::array<bool,2> mask) {
281 // No device check
282 // DeviceGuard omitted
283 return at::native::matmul_backward_nested(grad, self, other, mask);
284}
285} // anonymous namespace
286namespace {
287at::Tensor wrapper_NestedTensorCPU_Tensor_mul(const at::Tensor & self, const at::Tensor & other) {
288 // No device check
289 // DeviceGuard omitted
290 return at::native::NestedTensor_mul_Tensor(self, other);
291}
292} // anonymous namespace
293namespace {
294at::Tensor & wrapper_NestedTensorCPU_Tensor_mul_(at::Tensor & self, const at::Tensor & other) {
295 // No device check
296 // DeviceGuard omitted
297 return at::native::NestedTensor_mul__Tensor(self, other);
298}
299} // anonymous namespace
300namespace {
301at::Tensor wrapper_NestedTensorCPU_Scalar_mul(const at::Tensor & self, const at::Scalar & other) {
302 // No device check
303 // DeviceGuard omitted
304 return at::native::NestedTensor_mul_Scalar(self, other);
305}
306} // anonymous namespace
307namespace {
308at::Tensor & wrapper_NestedTensorCPU_Scalar_mul_(at::Tensor & self, const at::Scalar & other) {
309 // No device check
310 // DeviceGuard omitted
311 return at::native::NestedTensor_mul__Scalar(self, other);
312}
313} // anonymous namespace
314namespace {
315at::Tensor wrapper_NestedTensorCPU__ones_like(const at::Tensor & self, c10::optional<at::ScalarType> dtype, c10::optional<at::Layout> layout, c10::optional<at::Device> device, c10::optional<bool> pin_memory, c10::optional<at::MemoryFormat> memory_format) {
316 // No device check
317 // DeviceGuard omitted
318 return at::native::ones_like(self, dtype, layout, device, pin_memory, memory_format);
319}
320} // anonymous namespace
321namespace {
322at::Tensor wrapper_NestedTensorCPU__neg(const at::Tensor & self) {
323 // No device check
324 // DeviceGuard omitted
325 return at::native::NestedTensor_neg(self);
326}
327} // anonymous namespace
328namespace {
329at::Tensor & wrapper_NestedTensorCPU__neg_(at::Tensor & self) {
330 // No device check
331 // DeviceGuard omitted
332 return at::native::NestedTensor_neg_(self);
333}
334} // anonymous namespace
335namespace {
336at::Tensor wrapper_NestedTensorCPU__relu(const at::Tensor & self) {
337 // No device check
338 // DeviceGuard omitted
339 return at::native::NestedTensor_relu(self);
340}
341} // anonymous namespace
342namespace {
343at::Tensor & wrapper_NestedTensorCPU__relu_(at::Tensor & self) {
344 // No device check
345 // DeviceGuard omitted
346 return at::native::NestedTensor_relu_(self);
347}
348} // anonymous namespace
349namespace {
350at::Tensor wrapper_NestedTensorCPU__gelu(const at::Tensor & self, c10::string_view approximate) {
351 // No device check
352 // DeviceGuard omitted
353 return at::native::NestedTensor_gelu(self, approximate);
354}
355} // anonymous namespace
356namespace {
357at::Tensor & wrapper_NestedTensorCPU__gelu_(at::Tensor & self, c10::string_view approximate) {
358 // No device check
359 // DeviceGuard omitted
360 return at::native::NestedTensor_gelu_(self, approximate);
361}
362} // anonymous namespace
363namespace {
364at::Tensor wrapper_NestedTensorCPU_int_select(const at::Tensor & self, int64_t dim, c10::SymInt index) {
365 // No device check
366 // DeviceGuard omitted
367 return at::native::select_nested(self, dim, index.expect_int());
368}
369} // anonymous namespace
370namespace {
371at::Tensor wrapper_NestedTensorCPU___nested_select_backward(const at::Tensor & grad_output, const at::Tensor & self, int64_t dim, c10::SymInt index) {
372 // No device check
373 // DeviceGuard omitted
374 return at::native::_nested_select_backward_symint(grad_output, self, dim, index);
375}
376} // anonymous namespace
377namespace {
378at::Tensor wrapper_NestedTensorCPU__detach(const at::Tensor & self) {
379 // No device check
380 // DeviceGuard omitted
381 return at::native::detach(self);
382}
383} // anonymous namespace
384namespace {
385at::Tensor wrapper_NestedTensorCPU___softmax(const at::Tensor & self, int64_t dim, bool half_to_float) {
386 // No device check
387 // DeviceGuard omitted
388 return at::native::softmax_nested(self, dim, half_to_float);
389}
390} // anonymous namespace
391namespace {
392at::Tensor wrapper_NestedTensorCPU___softmax_backward_data(const at::Tensor & grad_output, const at::Tensor & output, int64_t dim, at::ScalarType input_dtype) {
393 // No device check
394 // DeviceGuard omitted
395 return at::native::nested_softmax_backward(grad_output, output, dim, input_dtype);
396}
397} // anonymous namespace
398namespace {
399at::Tensor wrapper_NestedTensorCPU__squeeze(const at::Tensor & self) {
400 // No device check
401 // DeviceGuard omitted
402 return at::native::squeeze_nested(self);
403}
404} // anonymous namespace
405namespace {
406at::Tensor wrapper_NestedTensorCPU_dim_squeeze(const at::Tensor & self, int64_t dim) {
407 // No device check
408 // DeviceGuard omitted
409 return at::native::squeeze_dim_nested(self, dim);
410}
411} // anonymous namespace
412namespace {
413at::Tensor wrapper_NestedTensorCPU_dims_squeeze(const at::Tensor & self, at::IntArrayRef dim) {
414 // No device check
415 // DeviceGuard omitted
416 return at::native::squeeze_dim_nested(self, dim);
417}
418} // anonymous namespace
419namespace {
420at::Tensor wrapper_NestedTensorCPU_dim_IntList_sum(const at::Tensor & self, at::OptionalIntArrayRef dim, bool keepdim, c10::optional<at::ScalarType> dtype) {
421 // No device check
422 // DeviceGuard omitted
423 return at::native::NestedTensor_sum_dim_CPU(self, dim, keepdim, dtype);
424}
425} // anonymous namespace
426namespace {
427at::Tensor wrapper_NestedTensorCPU___nested_sum_backward(const at::Tensor & grad, const at::Tensor & self, at::OptionalIntArrayRef dim, bool keepdim) {
428 // No device check
429 // DeviceGuard omitted
430 return at::native::_nested_sum_backward_cpu(grad, self, dim, keepdim);
431}
432} // anonymous namespace
433namespace {
434at::Tensor wrapper_NestedTensorCPU__tanh(const at::Tensor & self) {
435 // No device check
436 // DeviceGuard omitted
437 return at::native::NestedTensor_tanh(self);
438}
439} // anonymous namespace
440namespace {
441at::Tensor & wrapper_NestedTensorCPU__tanh_(at::Tensor & self) {
442 // No device check
443 // DeviceGuard omitted
444 return at::native::NestedTensor_tanh_(self);
445}
446} // anonymous namespace
447namespace {
448at::Tensor wrapper_NestedTensorCPU_int_transpose(const at::Tensor & self, int64_t dim0, int64_t dim1) {
449 // No device check
450 // DeviceGuard omitted
451 return at::native::transpose_nested(self, dim0, dim1);
452}
453} // anonymous namespace
454namespace {
455::std::tuple<at::Tensor,at::Tensor,at::Tensor> wrapper_NestedTensorCPU___transform_bias_rescale_qkv(const at::Tensor & qkv, const at::Tensor & qkv_bias, int64_t num_heads) {
456 // No device check
457 // DeviceGuard omitted
458 return at::native::transform_bias_rescale_qkv_cpu(qkv, qkv_bias, num_heads);
459}
460} // anonymous namespace
461namespace {
462at::Tensor wrapper_NestedTensorCPU___nested_tensor_size(const at::Tensor & self) {
463 // No device check
464 // DeviceGuard omitted
465 return at::native::_nested_tensor_size(self);
466}
467} // anonymous namespace
468namespace {
469at::Tensor wrapper_NestedTensorCPU___nested_tensor_strides(const at::Tensor & self) {
470 // No device check
471 // DeviceGuard omitted
472 return at::native::_nested_tensor_strides(self);
473}
474} // anonymous namespace
475namespace {
476::std::vector<int64_t> wrapper_NestedTensorCPU___nested_tensor_offsets(const at::Tensor & self) {
477 // No device check
478 // DeviceGuard omitted
479 return at::native::_nested_tensor_offsets(self);
480}
481} // anonymous namespace
482namespace {
483at::Tensor wrapper_NestedTensorCPU___nested_from_padded_and_nested_example(const at::Tensor & padded, const at::Tensor & nt_example) {
484 // No device check
485 // DeviceGuard omitted
486 return at::native::NestedTensor_from_padded_and_nested_example(padded, nt_example);
487}
488} // anonymous namespace
489namespace {
490at::Tensor wrapper_NestedTensorCPU__unsqueeze(const at::Tensor & self, int64_t dim) {
491 // No device check
492 // DeviceGuard omitted
493 return at::native::unsqueeze_nested(self, dim);
494}
495} // anonymous namespace
496namespace {
497at::Tensor wrapper_NestedTensorCPU__clone(const at::Tensor & self, c10::optional<at::MemoryFormat> memory_format) {
498 // No device check
499 // DeviceGuard omitted
500 return at::native::clone_nested(self, memory_format);
501}
502} // anonymous namespace
503namespace {
504at::Tensor wrapper_NestedTensorCPU__values(const at::Tensor & self) {
505 // No device check
506 // DeviceGuard omitted
507 return at::native::values_nested(self);
508}
509} // anonymous namespace
510namespace {
511at::Tensor wrapper_NestedTensorCPU___to_copy(const at::Tensor & self, c10::optional<at::ScalarType> dtype, c10::optional<at::Layout> layout, c10::optional<at::Device> device, c10::optional<bool> pin_memory, bool non_blocking, c10::optional<at::MemoryFormat> memory_format) {
512 // No device check
513 // DeviceGuard omitted
514 return at::native::_to_copy_nested(self, dtype, layout, device, pin_memory, non_blocking, memory_format);
515}
516} // anonymous namespace
517namespace {
518at::Tensor wrapper_NestedTensorCPU__view(const at::Tensor & self, c10::SymIntArrayRef size) {
519 // No device check
520 // DeviceGuard omitted
521 return at::native::view_nested(self, C10_AS_INTARRAYREF_SLOW(size));
522}
523} // anonymous namespace
524namespace {
525at::Tensor wrapper_NestedTensorCPU_fullcoverage__test_autograd_multiple_dispatch(const at::Tensor & self) {
526 // No device check
527 // DeviceGuard omitted
528 return at::native::_test_autograd_multiple_dispatch_fullcoverage(self);
529}
530} // anonymous namespace
531namespace {
532at::Tensor wrapper_NestedTensorCPU_ntonly__test_autograd_multiple_dispatch(const at::Tensor & self, bool b) {
533 // No device check
534 // DeviceGuard omitted
535 return at::native::_test_autograd_multiple_dispatch_ntonly(self, b);
536}
537} // anonymous namespace
538namespace {
539at::Tensor wrapper_NestedTensorCPU__to_padded_tensor(const at::Tensor & self, double padding, at::OptionalSymIntArrayRef output_size) {
540 // No device check
541 // DeviceGuard omitted
542 return at::native::NestedTensor_to_padded_tensor_generic(self, padding, output_size.has_value() ? c10::make_optional(C10_AS_INTARRAYREF_SLOW(*output_size)) : c10::nullopt);
543}
544} // anonymous namespace
545namespace {
546at::Tensor wrapper_NestedTensorCPU___nested_tensor_softmax_with_shape(const at::Tensor & self, const at::Tensor & query) {
547 // No device check
548 // DeviceGuard omitted
549 return at::native::NestedTensor_softmax_dropout(self, query);
550}
551} // anonymous namespace
552namespace {
553at::Tensor wrapper_NestedTensorCPU___transformer_encoder_layer_fwd(const at::Tensor & src, int64_t embed_dim, int64_t num_heads, const at::Tensor & qkv_weight, const at::Tensor & qkv_bias, const at::Tensor & proj_weight, const at::Tensor & proj_bias, bool use_gelu, bool norm_first, double eps, const at::Tensor & norm_weight_1, const at::Tensor & norm_bias_1, const at::Tensor & norm_weight_2, const at::Tensor & norm_bias_2, const at::Tensor & ffn_weight_1, const at::Tensor & ffn_bias_1, const at::Tensor & ffn_weight_2, const at::Tensor & ffn_bias_2, const c10::optional<at::Tensor> & mask, c10::optional<int64_t> mask_type) {
554 // No device check
555 // DeviceGuard omitted
556 return at::native::transformer_encoder_layer_forward(src, embed_dim, num_heads, qkv_weight, qkv_bias, proj_weight, proj_bias, use_gelu, norm_first, eps, norm_weight_1, norm_bias_1, norm_weight_2, norm_bias_2, ffn_weight_1, ffn_bias_1, ffn_weight_2, ffn_bias_2, mask, mask_type);
557}
558} // anonymous namespace
559namespace {
560::std::tuple<at::Tensor,at::Tensor> wrapper_NestedTensorCPU___native_multi_head_attention(const at::Tensor & query, const at::Tensor & key, const at::Tensor & value, int64_t embed_dim, int64_t num_head, const at::Tensor & qkv_weight, const at::Tensor & qkv_bias, const at::Tensor & proj_weight, const at::Tensor & proj_bias, const c10::optional<at::Tensor> & mask, bool need_weights, bool average_attn_weights, c10::optional<int64_t> mask_type) {
561 // No device check
562 // DeviceGuard omitted
563 return at::native::native_multi_head_attention_cpu(query, key, value, embed_dim, num_head, qkv_weight, qkv_bias, proj_weight, proj_bias, mask, need_weights, average_attn_weights, mask_type);
564}
565} // anonymous namespace
566namespace {
567int64_t wrapper_NestedTensorCPU___fused_sdp_choice(const at::Tensor & query, const at::Tensor & key, const at::Tensor & value, const c10::optional<at::Tensor> & attn_mask, double dropout_p, bool is_causal) {
568 // No device check
569 // DeviceGuard omitted
570 return at::native::_fused_sdp_choice_cpp(query, key, value, attn_mask, dropout_p, is_causal);
571}
572} // anonymous namespace
573namespace {
574::std::tuple<at::Tensor,at::Tensor,at::Tensor> wrapper_NestedTensorCPU___transformer_decoder_only_layer_fwd(const at::Tensor & src, int64_t embed_dim, int64_t num_heads, const at::Tensor & qkv_weight, const at::Tensor & qkv_bias, const at::Tensor & proj_weight, const at::Tensor & proj_bias, bool use_gelu, bool norm_first, double eps, const at::Tensor & norm_weight_1, const at::Tensor & norm_bias_1, const at::Tensor & norm_weight_2, const at::Tensor & norm_bias_2, const at::Tensor & ffn_weight_1, const at::Tensor & ffn_bias_1, const at::Tensor & ffn_weight_2, const at::Tensor & ffn_bias_2, const c10::optional<at::Tensor> & mask, const c10::optional<at::Tensor> & incr_key, const c10::optional<at::Tensor> & incr_value) {
575 // No device check
576 // DeviceGuard omitted
577 return at::native::transformer_decoder_only_layer_forward(src, embed_dim, num_heads, qkv_weight, qkv_bias, proj_weight, proj_bias, use_gelu, norm_first, eps, norm_weight_1, norm_bias_1, norm_weight_2, norm_bias_2, ffn_weight_1, ffn_bias_1, ffn_weight_2, ffn_bias_2, mask, incr_key, incr_value);
578}
579} // anonymous namespace
580namespace {
581::std::tuple<at::Tensor,at::Tensor,at::Tensor,at::Tensor> wrapper_NestedTensorCPU___native_decoder_only_multi_head_attention(const at::Tensor & query, const at::Tensor & key, const at::Tensor & value, int64_t embed_dim, int64_t num_head, const at::Tensor & qkv_weight, const at::Tensor & qkv_bias, const at::Tensor & proj_weight, const at::Tensor & proj_bias, const c10::optional<at::Tensor> & mask, const c10::optional<at::Tensor> & incr_key, const c10::optional<at::Tensor> & incr_value, bool need_weights, bool average_attn_weights) {
582 // No device check
583 // DeviceGuard omitted
584 return at::native::native_decoder_only_multi_head_attention(query, key, value, embed_dim, num_head, qkv_weight, qkv_bias, proj_weight, proj_bias, mask, incr_key, incr_value, need_weights, average_attn_weights);
585}
586} // anonymous namespace
587TORCH_LIBRARY_IMPL(aten, NestedTensorCPU, m) {
588 m.impl("native_dropout",
589TORCH_FN(wrapper_NestedTensorCPU__native_dropout));
590m.impl("native_dropout_backward",
591TORCH_FN(wrapper_NestedTensorCPU__native_dropout_backward));
592m.impl("add.Tensor",
593TORCH_FN(wrapper_NestedTensorCPU_Tensor_add));
594m.impl("add_.Tensor",
595TORCH_FN(wrapper_NestedTensorCPU_Tensor_add_));
596m.impl("bmm",
597TORCH_FN(wrapper_NestedTensorCPU__bmm));
598m.impl("chunk",
599TORCH_FN(wrapper_NestedTensorCPU__chunk));
600m.impl("copy_",
601TORCH_FN(wrapper_NestedTensorCPU__copy_));
602m.impl("div.Tensor",
603TORCH_FN(wrapper_NestedTensorCPU_Tensor_div));
604m.impl("div.Scalar",
605TORCH_FN(wrapper_NestedTensorCPU_Scalar_div));
606m.impl("embedding",
607TORCH_FN(wrapper_NestedTensorCPU__embedding));
608m.impl("empty_like",
609TORCH_FN(wrapper_NestedTensorCPU__empty_like));
610m.impl("fill_.Scalar",
611TORCH_FN(wrapper_NestedTensorCPU_Scalar_fill_));
612m.impl("fill_.Tensor",
613TORCH_FN(wrapper_NestedTensorCPU_Tensor_fill_));
614m.impl("is_same_size",
615TORCH_FN(wrapper_NestedTensorCPU__is_same_size));
616m.impl("native_layer_norm",
617TORCH_FN(wrapper_NestedTensorCPU__native_layer_norm));
618m.impl("linear",
619TORCH_FN(wrapper_NestedTensorCPU__linear));
620m.impl("linear_backward",
621TORCH_FN(wrapper_NestedTensorCPU__linear_backward));
622m.impl("matmul",
623TORCH_FN(wrapper_NestedTensorCPU__matmul));
624m.impl("matmul.out",
625TORCH_FN(wrapper_NestedTensorCPU_out_matmul_out));
626m.impl("matmul_backward",
627TORCH_FN(wrapper_NestedTensorCPU__matmul_backward));
628m.impl("mul.Tensor",
629TORCH_FN(wrapper_NestedTensorCPU_Tensor_mul));
630m.impl("mul_.Tensor",
631TORCH_FN(wrapper_NestedTensorCPU_Tensor_mul_));
632m.impl("mul.Scalar",
633TORCH_FN(wrapper_NestedTensorCPU_Scalar_mul));
634m.impl("mul_.Scalar",
635TORCH_FN(wrapper_NestedTensorCPU_Scalar_mul_));
636m.impl("ones_like",
637TORCH_FN(wrapper_NestedTensorCPU__ones_like));
638m.impl("neg",
639TORCH_FN(wrapper_NestedTensorCPU__neg));
640m.impl("neg_",
641TORCH_FN(wrapper_NestedTensorCPU__neg_));
642m.impl("relu",
643TORCH_FN(wrapper_NestedTensorCPU__relu));
644m.impl("relu_",
645TORCH_FN(wrapper_NestedTensorCPU__relu_));
646m.impl("gelu",
647TORCH_FN(wrapper_NestedTensorCPU__gelu));
648m.impl("gelu_",
649TORCH_FN(wrapper_NestedTensorCPU__gelu_));
650m.impl("select.int",
651TORCH_FN(wrapper_NestedTensorCPU_int_select));
652m.impl("_nested_select_backward",
653TORCH_FN(wrapper_NestedTensorCPU___nested_select_backward));
654m.impl("detach",
655TORCH_FN(wrapper_NestedTensorCPU__detach));
656m.impl("_softmax",
657TORCH_FN(wrapper_NestedTensorCPU___softmax));
658m.impl("_softmax_backward_data",
659TORCH_FN(wrapper_NestedTensorCPU___softmax_backward_data));
660m.impl("squeeze",
661TORCH_FN(wrapper_NestedTensorCPU__squeeze));
662m.impl("squeeze.dim",
663TORCH_FN(wrapper_NestedTensorCPU_dim_squeeze));
664m.impl("squeeze.dims",
665TORCH_FN(wrapper_NestedTensorCPU_dims_squeeze));
666m.impl("sum.dim_IntList",
667TORCH_FN(wrapper_NestedTensorCPU_dim_IntList_sum));
668m.impl("_nested_sum_backward",
669TORCH_FN(wrapper_NestedTensorCPU___nested_sum_backward));
670m.impl("tanh",
671TORCH_FN(wrapper_NestedTensorCPU__tanh));
672m.impl("tanh_",
673TORCH_FN(wrapper_NestedTensorCPU__tanh_));
674m.impl("transpose.int",
675TORCH_FN(wrapper_NestedTensorCPU_int_transpose));
676m.impl("_transform_bias_rescale_qkv",
677TORCH_FN(wrapper_NestedTensorCPU___transform_bias_rescale_qkv));
678m.impl("_nested_tensor_size",
679TORCH_FN(wrapper_NestedTensorCPU___nested_tensor_size));
680m.impl("_nested_tensor_strides",
681TORCH_FN(wrapper_NestedTensorCPU___nested_tensor_strides));
682m.impl("_nested_tensor_offsets",
683TORCH_FN(wrapper_NestedTensorCPU___nested_tensor_offsets));
684m.impl("_nested_from_padded_and_nested_example",
685TORCH_FN(wrapper_NestedTensorCPU___nested_from_padded_and_nested_example));
686m.impl("unsqueeze",
687TORCH_FN(wrapper_NestedTensorCPU__unsqueeze));
688m.impl("clone",
689TORCH_FN(wrapper_NestedTensorCPU__clone));
690m.impl("values",
691TORCH_FN(wrapper_NestedTensorCPU__values));
692m.impl("_to_copy",
693TORCH_FN(wrapper_NestedTensorCPU___to_copy));
694m.impl("view",
695TORCH_FN(wrapper_NestedTensorCPU__view));
696m.impl("_test_autograd_multiple_dispatch.fullcoverage",
697TORCH_FN(wrapper_NestedTensorCPU_fullcoverage__test_autograd_multiple_dispatch));
698m.impl("_test_autograd_multiple_dispatch.ntonly",
699TORCH_FN(wrapper_NestedTensorCPU_ntonly__test_autograd_multiple_dispatch));
700m.impl("to_padded_tensor",
701TORCH_FN(wrapper_NestedTensorCPU__to_padded_tensor));
702m.impl("_nested_tensor_softmax_with_shape",
703TORCH_FN(wrapper_NestedTensorCPU___nested_tensor_softmax_with_shape));
704m.impl("_transformer_encoder_layer_fwd",
705TORCH_FN(wrapper_NestedTensorCPU___transformer_encoder_layer_fwd));
706m.impl("_native_multi_head_attention",
707TORCH_FN(wrapper_NestedTensorCPU___native_multi_head_attention));
708m.impl("_fused_sdp_choice",
709TORCH_FN(wrapper_NestedTensorCPU___fused_sdp_choice));
710m.impl("_transformer_decoder_only_layer_fwd",
711TORCH_FN(wrapper_NestedTensorCPU___transformer_decoder_only_layer_fwd));
712m.impl("_native_decoder_only_multi_head_attention",
713TORCH_FN(wrapper_NestedTensorCPU___native_decoder_only_multi_head_attention));
714};
715} // anonymous namespace
716namespace nestedtensorcpu {
717::std::tuple<at::Tensor,at::Tensor> native_dropout(const at::Tensor & input, double p, c10::optional<bool> train) {
718return wrapper_NestedTensorCPU__native_dropout(input, p, train);
719}
720at::Tensor native_dropout_backward(const at::Tensor & grad_output, const at::Tensor & mask, double scale) {
721return wrapper_NestedTensorCPU__native_dropout_backward(grad_output, mask, scale);
722}
723at::Tensor add(const at::Tensor & self, const at::Tensor & other, const at::Scalar & alpha) {
724return wrapper_NestedTensorCPU_Tensor_add(self, other, alpha);
725}
726at::Tensor & add_(at::Tensor & self, const at::Tensor & other, const at::Scalar & alpha) {
727return wrapper_NestedTensorCPU_Tensor_add_(self, other, alpha);
728}
729at::Tensor bmm(const at::Tensor & self, const at::Tensor & mat2) {
730return wrapper_NestedTensorCPU__bmm(self, mat2);
731}
732::std::vector<at::Tensor> chunk(const at::Tensor & self, int64_t chunks, int64_t dim) {
733return wrapper_NestedTensorCPU__chunk(self, chunks, dim);
734}
735at::Tensor & copy_(at::Tensor & self, const at::Tensor & src, bool non_blocking) {
736return wrapper_NestedTensorCPU__copy_(self, src, non_blocking);
737}
738at::Tensor div(const at::Tensor & self, const at::Tensor & other) {
739return wrapper_NestedTensorCPU_Tensor_div(self, other);
740}
741at::Tensor div(const at::Tensor & self, const at::Scalar & other) {
742return wrapper_NestedTensorCPU_Scalar_div(self, other);
743}
744at::Tensor embedding(const at::Tensor & weight, const at::Tensor & indices, int64_t padding_idx, bool scale_grad_by_freq, bool sparse) {
745return wrapper_NestedTensorCPU__embedding(weight, indices, padding_idx, scale_grad_by_freq, sparse);
746}
747at::Tensor embedding_symint(const at::Tensor & weight, const at::Tensor & indices, c10::SymInt padding_idx, bool scale_grad_by_freq, bool sparse) {
748return wrapper_NestedTensorCPU__embedding(weight, indices, padding_idx, scale_grad_by_freq, sparse);
749}
750at::Tensor empty_like(const at::Tensor & self, at::TensorOptions options, c10::optional<at::MemoryFormat> memory_format) {
751return wrapper_NestedTensorCPU__empty_like(self, optTypeMetaToScalarType(options.dtype_opt()), options.layout_opt(), options.device_opt(), options.pinned_memory_opt(), c10::impl::check_tensor_options_and_extract_memory_format(options, memory_format));
752}
753at::Tensor empty_like(const at::Tensor & self, c10::optional<at::ScalarType> dtype, c10::optional<at::Layout> layout, c10::optional<at::Device> device, c10::optional<bool> pin_memory, c10::optional<at::MemoryFormat> memory_format) {
754return wrapper_NestedTensorCPU__empty_like(self, dtype, layout, device, pin_memory, memory_format);
755}
756at::Tensor & fill_(at::Tensor & self, const at::Scalar & value) {
757return wrapper_NestedTensorCPU_Scalar_fill_(self, value);
758}
759at::Tensor & fill_(at::Tensor & self, const at::Tensor & value) {
760return wrapper_NestedTensorCPU_Tensor_fill_(self, value);
761}
762bool is_same_size(const at::Tensor & self, const at::Tensor & other) {
763return wrapper_NestedTensorCPU__is_same_size(self, other);
764}
765::std::tuple<at::Tensor,at::Tensor,at::Tensor> native_layer_norm(const at::Tensor & input, at::IntArrayRef normalized_shape, const c10::optional<at::Tensor> & weight, const c10::optional<at::Tensor> & bias, double eps) {
766return wrapper_NestedTensorCPU__native_layer_norm(input, c10::fromIntArrayRefSlow(normalized_shape), weight, bias, eps);
767}
768::std::tuple<at::Tensor,at::Tensor,at::Tensor> native_layer_norm_symint(const at::Tensor & input, c10::SymIntArrayRef normalized_shape, const c10::optional<at::Tensor> & weight, const c10::optional<at::Tensor> & bias, double eps) {
769return wrapper_NestedTensorCPU__native_layer_norm(input, normalized_shape, weight, bias, eps);
770}
771at::Tensor linear(const at::Tensor & input, const at::Tensor & weight, const c10::optional<at::Tensor> & bias) {
772return wrapper_NestedTensorCPU__linear(input, weight, bias);
773}
774::std::tuple<at::Tensor,at::Tensor,at::Tensor> linear_backward(const at::Tensor & self, const at::Tensor & grad_output, const at::Tensor & weight, ::std::array<bool,3> output_mask) {
775return wrapper_NestedTensorCPU__linear_backward(self, grad_output, weight, output_mask);
776}
777at::Tensor matmul(const at::Tensor & self, const at::Tensor & other) {
778return wrapper_NestedTensorCPU__matmul(self, other);
779}
780at::Tensor & matmul_out(at::Tensor & out, const at::Tensor & self, const at::Tensor & other) {
781return wrapper_NestedTensorCPU_out_matmul_out(self, other, out);
782}
783at::Tensor & matmul_outf(const at::Tensor & self, const at::Tensor & other, at::Tensor & out) {
784return wrapper_NestedTensorCPU_out_matmul_out(self, other, out);
785}
786::std::tuple<at::Tensor,at::Tensor> matmul_backward(const at::Tensor & grad, const at::Tensor & self, const at::Tensor & other, ::std::array<bool,2> mask) {
787return wrapper_NestedTensorCPU__matmul_backward(grad, self, other, mask);
788}
789at::Tensor mul(const at::Tensor & self, const at::Tensor & other) {
790return wrapper_NestedTensorCPU_Tensor_mul(self, other);
791}
792at::Tensor & mul_(at::Tensor & self, const at::Tensor & other) {
793return wrapper_NestedTensorCPU_Tensor_mul_(self, other);
794}
795at::Tensor mul(const at::Tensor & self, const at::Scalar & other) {
796return wrapper_NestedTensorCPU_Scalar_mul(self, other);
797}
798at::Tensor & mul_(at::Tensor & self, const at::Scalar & other) {
799return wrapper_NestedTensorCPU_Scalar_mul_(self, other);
800}
801at::Tensor ones_like(const at::Tensor & self, at::TensorOptions options, c10::optional<at::MemoryFormat> memory_format) {
802return wrapper_NestedTensorCPU__ones_like(self, optTypeMetaToScalarType(options.dtype_opt()), options.layout_opt(), options.device_opt(), options.pinned_memory_opt(), c10::impl::check_tensor_options_and_extract_memory_format(options, memory_format));
803}
804at::Tensor ones_like(const at::Tensor & self, c10::optional<at::ScalarType> dtype, c10::optional<at::Layout> layout, c10::optional<at::Device> device, c10::optional<bool> pin_memory, c10::optional<at::MemoryFormat> memory_format) {
805return wrapper_NestedTensorCPU__ones_like(self, dtype, layout, device, pin_memory, memory_format);
806}
807at::Tensor neg(const at::Tensor & self) {
808return wrapper_NestedTensorCPU__neg(self);
809}
810at::Tensor & neg_(at::Tensor & self) {
811return wrapper_NestedTensorCPU__neg_(self);
812}
813at::Tensor relu(const at::Tensor & self) {
814return wrapper_NestedTensorCPU__relu(self);
815}
816at::Tensor & relu_(at::Tensor & self) {
817return wrapper_NestedTensorCPU__relu_(self);
818}
819at::Tensor gelu(const at::Tensor & self, c10::string_view approximate) {
820return wrapper_NestedTensorCPU__gelu(self, approximate);
821}
822at::Tensor & gelu_(at::Tensor & self, c10::string_view approximate) {
823return wrapper_NestedTensorCPU__gelu_(self, approximate);
824}
825at::Tensor select(const at::Tensor & self, int64_t dim, int64_t index) {
826return wrapper_NestedTensorCPU_int_select(self, dim, index);
827}
828at::Tensor select_symint(const at::Tensor & self, int64_t dim, c10::SymInt index) {
829return wrapper_NestedTensorCPU_int_select(self, dim, index);
830}
831at::Tensor _nested_select_backward(const at::Tensor & grad_output, const at::Tensor & self, int64_t dim, int64_t index) {
832return wrapper_NestedTensorCPU___nested_select_backward(grad_output, self, dim, index);
833}
834at::Tensor _nested_select_backward_symint(const at::Tensor & grad_output, const at::Tensor & self, int64_t dim, c10::SymInt index) {
835return wrapper_NestedTensorCPU___nested_select_backward(grad_output, self, dim, index);
836}
837at::Tensor detach(const at::Tensor & self) {
838return wrapper_NestedTensorCPU__detach(self);
839}
840at::Tensor _softmax(const at::Tensor & self, int64_t dim, bool half_to_float) {
841return wrapper_NestedTensorCPU___softmax(self, dim, half_to_float);
842}
843at::Tensor _softmax_backward_data(const at::Tensor & grad_output, const at::Tensor & output, int64_t dim, at::ScalarType input_dtype) {
844return wrapper_NestedTensorCPU___softmax_backward_data(grad_output, output, dim, input_dtype);
845}
846at::Tensor squeeze(const at::Tensor & self) {
847return wrapper_NestedTensorCPU__squeeze(self);
848}
849at::Tensor squeeze(const at::Tensor & self, int64_t dim) {
850return wrapper_NestedTensorCPU_dim_squeeze(self, dim);
851}
852at::Tensor squeeze(const at::Tensor & self, at::IntArrayRef dim) {
853return wrapper_NestedTensorCPU_dims_squeeze(self, dim);
854}
855at::Tensor sum(const at::Tensor & self, at::OptionalIntArrayRef dim, bool keepdim, c10::optional<at::ScalarType> dtype) {
856return wrapper_NestedTensorCPU_dim_IntList_sum(self, dim, keepdim, dtype);
857}
858at::Tensor _nested_sum_backward(const at::Tensor & grad, const at::Tensor & self, at::OptionalIntArrayRef dim, bool keepdim) {
859return wrapper_NestedTensorCPU___nested_sum_backward(grad, self, dim, keepdim);
860}
861at::Tensor tanh(const at::Tensor & self) {
862return wrapper_NestedTensorCPU__tanh(self);
863}
864at::Tensor & tanh_(at::Tensor & self) {
865return wrapper_NestedTensorCPU__tanh_(self);
866}
867at::Tensor transpose(const at::Tensor & self, int64_t dim0, int64_t dim1) {
868return wrapper_NestedTensorCPU_int_transpose(self, dim0, dim1);
869}
870::std::tuple<at::Tensor,at::Tensor,at::Tensor> _transform_bias_rescale_qkv(const at::Tensor & qkv, const at::Tensor & qkv_bias, int64_t num_heads) {
871return wrapper_NestedTensorCPU___transform_bias_rescale_qkv(qkv, qkv_bias, num_heads);
872}
873at::Tensor _nested_tensor_size(const at::Tensor & self) {
874return wrapper_NestedTensorCPU___nested_tensor_size(self);
875}
876at::Tensor _nested_tensor_strides(const at::Tensor & self) {
877return wrapper_NestedTensorCPU___nested_tensor_strides(self);
878}
879::std::vector<int64_t> _nested_tensor_offsets(const at::Tensor & self) {
880return wrapper_NestedTensorCPU___nested_tensor_offsets(self);
881}
882at::Tensor _nested_from_padded_and_nested_example(const at::Tensor & padded, const at::Tensor & nt_example) {
883return wrapper_NestedTensorCPU___nested_from_padded_and_nested_example(padded, nt_example);
884}
885at::Tensor unsqueeze(const at::Tensor & self, int64_t dim) {
886return wrapper_NestedTensorCPU__unsqueeze(self, dim);
887}
888at::Tensor clone(const at::Tensor & self, c10::optional<at::MemoryFormat> memory_format) {
889return wrapper_NestedTensorCPU__clone(self, memory_format);
890}
891at::Tensor values(const at::Tensor & self) {
892return wrapper_NestedTensorCPU__values(self);
893}
894at::Tensor _to_copy(const at::Tensor & self, at::TensorOptions options, bool non_blocking, c10::optional<at::MemoryFormat> memory_format) {
895return wrapper_NestedTensorCPU___to_copy(self, optTypeMetaToScalarType(options.dtype_opt()), options.layout_opt(), options.device_opt(), options.pinned_memory_opt(), non_blocking, c10::impl::check_tensor_options_and_extract_memory_format(options, memory_format));
896}
897at::Tensor _to_copy(const at::Tensor & self, c10::optional<at::ScalarType> dtype, c10::optional<at::Layout> layout, c10::optional<at::Device> device, c10::optional<bool> pin_memory, bool non_blocking, c10::optional<at::MemoryFormat> memory_format) {
898return wrapper_NestedTensorCPU___to_copy(self, dtype, layout, device, pin_memory, non_blocking, memory_format);
899}
900at::Tensor view(const at::Tensor & self, at::IntArrayRef size) {
901return wrapper_NestedTensorCPU__view(self, c10::fromIntArrayRefSlow(size));
902}
903at::Tensor view_symint(const at::Tensor & self, c10::SymIntArrayRef size) {
904return wrapper_NestedTensorCPU__view(self, size);
905}
906at::Tensor _test_autograd_multiple_dispatch(const at::Tensor & self) {
907return wrapper_NestedTensorCPU_fullcoverage__test_autograd_multiple_dispatch(self);
908}
909at::Tensor _test_autograd_multiple_dispatch(const at::Tensor & self, bool b) {
910return wrapper_NestedTensorCPU_ntonly__test_autograd_multiple_dispatch(self, b);
911}
912at::Tensor to_padded_tensor(const at::Tensor & self, double padding, at::OptionalIntArrayRef output_size) {
913return wrapper_NestedTensorCPU__to_padded_tensor(self, padding, output_size.has_value() ? c10::make_optional(c10::fromIntArrayRefSlow(*output_size)) : c10::nullopt);
914}
915at::Tensor to_padded_tensor_symint(const at::Tensor & self, double padding, at::OptionalSymIntArrayRef output_size) {
916return wrapper_NestedTensorCPU__to_padded_tensor(self, padding, output_size);
917}
918at::Tensor _nested_tensor_softmax_with_shape(const at::Tensor & self, const at::Tensor & query) {
919return wrapper_NestedTensorCPU___nested_tensor_softmax_with_shape(self, query);
920}
921at::Tensor _transformer_encoder_layer_fwd(const at::Tensor & src, int64_t embed_dim, int64_t num_heads, const at::Tensor & qkv_weight, const at::Tensor & qkv_bias, const at::Tensor & proj_weight, const at::Tensor & proj_bias, bool use_gelu, bool norm_first, double eps, const at::Tensor & norm_weight_1, const at::Tensor & norm_bias_1, const at::Tensor & norm_weight_2, const at::Tensor & norm_bias_2, const at::Tensor & ffn_weight_1, const at::Tensor & ffn_bias_1, const at::Tensor & ffn_weight_2, const at::Tensor & ffn_bias_2, const c10::optional<at::Tensor> & mask, c10::optional<int64_t> mask_type) {
922return wrapper_NestedTensorCPU___transformer_encoder_layer_fwd(src, embed_dim, num_heads, qkv_weight, qkv_bias, proj_weight, proj_bias, use_gelu, norm_first, eps, norm_weight_1, norm_bias_1, norm_weight_2, norm_bias_2, ffn_weight_1, ffn_bias_1, ffn_weight_2, ffn_bias_2, mask, mask_type);
923}
924::std::tuple<at::Tensor,at::Tensor> _native_multi_head_attention(const at::Tensor & query, const at::Tensor & key, const at::Tensor & value, int64_t embed_dim, int64_t num_head, const at::Tensor & qkv_weight, const at::Tensor & qkv_bias, const at::Tensor & proj_weight, const at::Tensor & proj_bias, const c10::optional<at::Tensor> & mask, bool need_weights, bool average_attn_weights, c10::optional<int64_t> mask_type) {
925return wrapper_NestedTensorCPU___native_multi_head_attention(query, key, value, embed_dim, num_head, qkv_weight, qkv_bias, proj_weight, proj_bias, mask, need_weights, average_attn_weights, mask_type);
926}
927int64_t _fused_sdp_choice(const at::Tensor & query, const at::Tensor & key, const at::Tensor & value, const c10::optional<at::Tensor> & attn_mask, double dropout_p, bool is_causal) {
928return wrapper_NestedTensorCPU___fused_sdp_choice(query, key, value, attn_mask, dropout_p, is_causal);
929}
930::std::tuple<at::Tensor,at::Tensor,at::Tensor> _transformer_decoder_only_layer_fwd(const at::Tensor & src, int64_t embed_dim, int64_t num_heads, const at::Tensor & qkv_weight, const at::Tensor & qkv_bias, const at::Tensor & proj_weight, const at::Tensor & proj_bias, bool use_gelu, bool norm_first, double eps, const at::Tensor & norm_weight_1, const at::Tensor & norm_bias_1, const at::Tensor & norm_weight_2, const at::Tensor & norm_bias_2, const at::Tensor & ffn_weight_1, const at::Tensor & ffn_bias_1, const at::Tensor & ffn_weight_2, const at::Tensor & ffn_bias_2, const c10::optional<at::Tensor> & mask, const c10::optional<at::Tensor> & incr_key, const c10::optional<at::Tensor> & incr_value) {
931return wrapper_NestedTensorCPU___transformer_decoder_only_layer_fwd(src, embed_dim, num_heads, qkv_weight, qkv_bias, proj_weight, proj_bias, use_gelu, norm_first, eps, norm_weight_1, norm_bias_1, norm_weight_2, norm_bias_2, ffn_weight_1, ffn_bias_1, ffn_weight_2, ffn_bias_2, mask, incr_key, incr_value);
932}
933::std::tuple<at::Tensor,at::Tensor,at::Tensor,at::Tensor> _native_decoder_only_multi_head_attention(const at::Tensor & query, const at::Tensor & key, const at::Tensor & value, int64_t embed_dim, int64_t num_head, const at::Tensor & qkv_weight, const at::Tensor & qkv_bias, const at::Tensor & proj_weight, const at::Tensor & proj_bias, const c10::optional<at::Tensor> & mask, const c10::optional<at::Tensor> & incr_key, const c10::optional<at::Tensor> & incr_value, bool need_weights, bool average_attn_weights) {
934return wrapper_NestedTensorCPU___native_decoder_only_multi_head_attention(query, key, value, embed_dim, num_head, qkv_weight, qkv_bias, proj_weight, proj_bias, mask, incr_key, incr_value, need_weights, average_attn_weights);
935}
936} // namespace nestedtensorcpu
937} // namespace at
938