1 | // required for old g++ to compile PRId64 macros, see |
2 | // https://github.com/pytorch/pytorch/issues/3571 |
3 | // for context |
4 | #ifndef __STDC_FORMAT_MACROS |
5 | #define __STDC_FORMAT_MACROS |
6 | #endif |
7 | |
8 | // an external backend might generate file within its code tree |
9 | // and check all the source files within the tree with clang-format. |
10 | // so, disable it since the backend might have a different config. |
11 | // clang-format off |
12 | |
13 | // NOTE: This condition is true for all PyTorch internal libraries, it |
14 | // just excludes external projects such as torch_xla which |
15 | // re-use some of the PyTorch codegen machinery. |
16 | #if defined(CAFFE2_BUILD_MAIN_LIB) || \ |
17 | defined(TORCH_CUDA_BUILD_MAIN_LIB) || \ |
18 | defined(TORCH_HIP_BUILD_MAIN_LIB) || \ |
19 | defined(TORCH_CUDA_CU_BUILD_MAIN_LIB) || \ |
20 | defined(TORCH_CUDA_CPP_BUILD_MAIN_LIB) |
21 | #define TORCH_ASSERT_ONLY_METHOD_OPERATORS |
22 | #endif |
23 | |
24 | // @generated by torchgen/gen.py from RegisterDispatchKey.cpp |
25 | |
26 | #include <c10/core/TensorImpl.h> |
27 | #include <c10/core/Allocator.h> |
28 | #include <ATen/DeviceGuard.h> |
29 | #include <ATen/NamedTensorUtils.h> |
30 | #include <ATen/Utils.h> |
31 | #include <ATen/WrapDimUtils.h> |
32 | #include <ATen/Dispatch.h> |
33 | #include <c10/util/ExclusivelyOwned.h> |
34 | #include <c10/util/Half.h> |
35 | #include <c10/core/UndefinedTensorImpl.h> |
36 | #include <c10/util/Optional.h> |
37 | #include <ATen/Tensor.h> |
38 | #include <ATen/native/Resize.h> |
39 | |
40 | #include <cstddef> |
41 | #include <functional> |
42 | #include <memory> |
43 | #include <utility> |
44 | |
45 | #include <ATen/Config.h> |
46 | #include <ATen/core/op_registration/adaption.h> |
47 | #include <torch/library.h> |
48 | |
49 | |
50 | #include <ATen/ops/as_strided_native.h> |
51 | #include <ATen/ops/empty.h> |
52 | #include <ATen/ops/empty_strided.h> |
53 | #include <ATen/ops/_copy_from_and_resize.h> |
54 | #include <ATen/ops/_copy_from.h> |
55 | #include <ATen/ops/_fused_sdp_choice_native.h> |
56 | #include <ATen/ops/_native_decoder_only_multi_head_attention_native.h> |
57 | #include <ATen/ops/_native_multi_head_attention_native.h> |
58 | #include <ATen/ops/_nested_from_padded_and_nested_example_native.h> |
59 | #include <ATen/ops/_nested_select_backward_native.h> |
60 | #include <ATen/ops/_nested_sum_backward_native.h> |
61 | #include <ATen/ops/_nested_tensor_offsets_native.h> |
62 | #include <ATen/ops/_nested_tensor_size_native.h> |
63 | #include <ATen/ops/_nested_tensor_softmax_with_shape_native.h> |
64 | #include <ATen/ops/_nested_tensor_strides_native.h> |
65 | #include <ATen/ops/_softmax_backward_data_native.h> |
66 | #include <ATen/ops/_softmax_native.h> |
67 | #include <ATen/ops/_test_autograd_multiple_dispatch_native.h> |
68 | #include <ATen/ops/_to_copy_native.h> |
69 | #include <ATen/ops/_transform_bias_rescale_qkv_native.h> |
70 | #include <ATen/ops/_transformer_decoder_only_layer_fwd_native.h> |
71 | #include <ATen/ops/_transformer_encoder_layer_fwd_native.h> |
72 | #include <ATen/ops/add_native.h> |
73 | #include <ATen/ops/bmm_native.h> |
74 | #include <ATen/ops/chunk_native.h> |
75 | #include <ATen/ops/clone_native.h> |
76 | #include <ATen/ops/copy_native.h> |
77 | #include <ATen/ops/detach_native.h> |
78 | #include <ATen/ops/div_native.h> |
79 | #include <ATen/ops/embedding_native.h> |
80 | #include <ATen/ops/empty_like_native.h> |
81 | #include <ATen/ops/fill_native.h> |
82 | #include <ATen/ops/gelu_native.h> |
83 | #include <ATen/ops/is_same_size_native.h> |
84 | #include <ATen/ops/linear_backward_native.h> |
85 | #include <ATen/ops/linear_native.h> |
86 | #include <ATen/ops/matmul_backward_native.h> |
87 | #include <ATen/ops/matmul_native.h> |
88 | #include <ATen/ops/mul_native.h> |
89 | #include <ATen/ops/native_dropout_backward_native.h> |
90 | #include <ATen/ops/native_dropout_native.h> |
91 | #include <ATen/ops/native_layer_norm_native.h> |
92 | #include <ATen/ops/neg_native.h> |
93 | #include <ATen/ops/ones_like_native.h> |
94 | #include <ATen/ops/relu_native.h> |
95 | #include <ATen/ops/select_native.h> |
96 | #include <ATen/ops/squeeze_native.h> |
97 | #include <ATen/ops/sum_native.h> |
98 | #include <ATen/ops/tanh_native.h> |
99 | #include <ATen/ops/to_padded_tensor_native.h> |
100 | #include <ATen/ops/transpose_native.h> |
101 | #include <ATen/ops/unsqueeze_native.h> |
102 | #include <ATen/ops/values_native.h> |
103 | #include <ATen/ops/view_native.h> |
104 | |
105 | // See template file RegisterDispatchDefinitions.ini |
106 | namespace at { |
107 | // NB: TORCH_LIBRARY_IMPL must be in an anonymous namespace to avoid |
108 | // ambiguity with conflicting identifiers that may have been defined in |
109 | // at namespace already. |
110 | namespace { |
111 | void resize_out(const Tensor &out, IntArrayRef sizes, IntArrayRef strides, const TensorOptions &options) { |
112 | TORCH_CHECK(options.dtype() == out.dtype(), |
113 | "Expected out tensor to have dtype " , options.dtype(), ", but got " , out.dtype(), " instead" ); |
114 | TORCH_CHECK(options.device() == out.device(), |
115 | "Expected out tensor to have device " , options.device(), ", but got " , out.device(), " instead" ); |
116 | const bool resized = at::native::resize_output(out, sizes); |
117 | // Only restride if a resize occurred; otherwise we ignore the (advisory) |
118 | // strides from the meta function and directly use the output tensor's |
119 | // preexisting strides |
120 | if (resized) { |
121 | if (!strides.empty()) { |
122 | TORCH_INTERNAL_ASSERT(!options.memory_format_opt().has_value()); |
123 | // TODO: avoid the redispatch here |
124 | out.as_strided_(sizes, strides); |
125 | } else if (options.memory_format_opt().has_value()) { |
126 | out.unsafeGetTensorImpl()->empty_tensor_restride(*options.memory_format_opt()); |
127 | } |
128 | } |
129 | } |
130 | void check_inplace(const Tensor &self, IntArrayRef sizes, const TensorOptions &options) { |
131 | // These checks are needed on those operators that: |
132 | // 1) don't use 'TensorIterator' (e.g. 'addmm' and 'baddbmm') |
133 | // 2) have particular typing rules (e.g. 'cumsum' and 'cumprod') |
134 | // For other operators (e.g. 'add'), 'TensorIterator' already checks |
135 | // these things separately. |
136 | TORCH_CHECK(options.dtype() == self.dtype(), |
137 | "Bad in-place call: " , |
138 | "input tensor dtype " , self.dtype(), " and output tensor dtype " , options.dtype(), " should match" ); |
139 | TORCH_CHECK(options.device() == self.device(), |
140 | "Bad in-place call: " , |
141 | "input tensor device " , self.device(), " and output tensor device " , options.device(), " should match" ); |
142 | TORCH_CHECK(sizes == self.sizes(), |
143 | "Bad in-place call: " , |
144 | "input tensor size " , self.sizes(), " and output tensor size " , sizes, " should match" ); |
145 | } |
146 | namespace { |
147 | ::std::tuple<at::Tensor,at::Tensor> wrapper_NestedTensorCPU__native_dropout(const at::Tensor & input, double p, c10::optional<bool> train) { |
148 | // No device check |
149 | // DeviceGuard omitted |
150 | return at::native::native_dropout_nested(input, p, train); |
151 | } |
152 | } // anonymous namespace |
153 | namespace { |
154 | at::Tensor wrapper_NestedTensorCPU__native_dropout_backward(const at::Tensor & grad_output, const at::Tensor & mask, double scale) { |
155 | // No device check |
156 | // DeviceGuard omitted |
157 | return at::native::native_dropout_backward(grad_output, mask, scale); |
158 | } |
159 | } // anonymous namespace |
160 | namespace { |
161 | at::Tensor wrapper_NestedTensorCPU_Tensor_add(const at::Tensor & self, const at::Tensor & other, const at::Scalar & alpha) { |
162 | // No device check |
163 | // DeviceGuard omitted |
164 | return at::native::NestedTensor_add_Tensor(self, other, alpha); |
165 | } |
166 | } // anonymous namespace |
167 | namespace { |
168 | at::Tensor & wrapper_NestedTensorCPU_Tensor_add_(at::Tensor & self, const at::Tensor & other, const at::Scalar & alpha) { |
169 | // No device check |
170 | // DeviceGuard omitted |
171 | return at::native::NestedTensor_add__Tensor(self, other, alpha); |
172 | } |
173 | } // anonymous namespace |
174 | namespace { |
175 | at::Tensor wrapper_NestedTensorCPU__bmm(const at::Tensor & self, const at::Tensor & mat2) { |
176 | // No device check |
177 | // DeviceGuard omitted |
178 | return at::native::bmm_nested(self, mat2); |
179 | } |
180 | } // anonymous namespace |
181 | namespace { |
182 | ::std::vector<at::Tensor> wrapper_NestedTensorCPU__chunk(const at::Tensor & self, int64_t chunks, int64_t dim) { |
183 | // No device check |
184 | // DeviceGuard omitted |
185 | return at::native::chunk_nested_tensor(self, chunks, dim); |
186 | } |
187 | } // anonymous namespace |
188 | namespace { |
189 | at::Tensor & wrapper_NestedTensorCPU__copy_(at::Tensor & self, const at::Tensor & src, bool non_blocking) { |
190 | // No device check |
191 | // DeviceGuard omitted |
192 | return at::native::copy_nested_(self, src, non_blocking); |
193 | } |
194 | } // anonymous namespace |
195 | namespace { |
196 | at::Tensor wrapper_NestedTensorCPU_Tensor_div(const at::Tensor & self, const at::Tensor & other) { |
197 | // No device check |
198 | // DeviceGuard omitted |
199 | return at::native::NestedTensor_div_Tensor(self, other); |
200 | } |
201 | } // anonymous namespace |
202 | namespace { |
203 | at::Tensor wrapper_NestedTensorCPU_Scalar_div(const at::Tensor & self, const at::Scalar & other) { |
204 | // No device check |
205 | // DeviceGuard omitted |
206 | return at::native::NestedTensor_div_Scalar(self, other); |
207 | } |
208 | } // anonymous namespace |
209 | namespace { |
210 | at::Tensor wrapper_NestedTensorCPU__embedding(const at::Tensor & weight, const at::Tensor & indices, c10::SymInt padding_idx, bool scale_grad_by_freq, bool sparse) { |
211 | // No device check |
212 | // DeviceGuard omitted |
213 | return at::native::NestedTensor_embedding(weight, indices, padding_idx.expect_int(), scale_grad_by_freq, sparse); |
214 | } |
215 | } // anonymous namespace |
216 | namespace { |
217 | at::Tensor wrapper_NestedTensorCPU__empty_like(const at::Tensor & self, c10::optional<at::ScalarType> dtype, c10::optional<at::Layout> layout, c10::optional<at::Device> device, c10::optional<bool> pin_memory, c10::optional<at::MemoryFormat> memory_format) { |
218 | // No device check |
219 | // DeviceGuard omitted |
220 | return at::native::empty_like_nested(self, dtype, layout, device, pin_memory, memory_format); |
221 | } |
222 | } // anonymous namespace |
223 | namespace { |
224 | at::Tensor & wrapper_NestedTensorCPU_Scalar_fill_(at::Tensor & self, const at::Scalar & value) { |
225 | // No device check |
226 | // DeviceGuard omitted |
227 | return at::native::fill_nested_(self, value); |
228 | } |
229 | } // anonymous namespace |
230 | namespace { |
231 | at::Tensor & wrapper_NestedTensorCPU_Tensor_fill_(at::Tensor & self, const at::Tensor & value) { |
232 | // No device check |
233 | // DeviceGuard omitted |
234 | return at::native::fill_nested_(self, value); |
235 | } |
236 | } // anonymous namespace |
237 | namespace { |
238 | bool wrapper_NestedTensorCPU__is_same_size(const at::Tensor & self, const at::Tensor & other) { |
239 | // No device check |
240 | // DeviceGuard omitted |
241 | return at::native::nested_is_same_size(self, other); |
242 | } |
243 | } // anonymous namespace |
244 | namespace { |
245 | ::std::tuple<at::Tensor,at::Tensor,at::Tensor> wrapper_NestedTensorCPU__native_layer_norm(const at::Tensor & input, c10::SymIntArrayRef normalized_shape, const c10::optional<at::Tensor> & weight, const c10::optional<at::Tensor> & bias, double eps) { |
246 | // No device check |
247 | // DeviceGuard omitted |
248 | return at::native::nested_layer_norm(input, C10_AS_INTARRAYREF_SLOW(normalized_shape), weight, bias, eps); |
249 | } |
250 | } // anonymous namespace |
251 | namespace { |
252 | at::Tensor wrapper_NestedTensorCPU__linear(const at::Tensor & input, const at::Tensor & weight, const c10::optional<at::Tensor> & bias) { |
253 | // No device check |
254 | // DeviceGuard omitted |
255 | return at::native::nested_linear(input, weight, bias); |
256 | } |
257 | } // anonymous namespace |
258 | namespace { |
259 | ::std::tuple<at::Tensor,at::Tensor,at::Tensor> wrapper_NestedTensorCPU__linear_backward(const at::Tensor & self, const at::Tensor & grad_output, const at::Tensor & weight, ::std::array<bool,3> output_mask) { |
260 | // No device check |
261 | // DeviceGuard omitted |
262 | return at::native::nested_linear_backward(self, grad_output, weight, output_mask); |
263 | } |
264 | } // anonymous namespace |
265 | namespace { |
266 | at::Tensor wrapper_NestedTensorCPU__matmul(const at::Tensor & self, const at::Tensor & other) { |
267 | // No device check |
268 | // DeviceGuard omitted |
269 | return at::native::matmul_nested(self, other); |
270 | } |
271 | } // anonymous namespace |
272 | namespace { |
273 | at::Tensor & wrapper_NestedTensorCPU_out_matmul_out(const at::Tensor & self, const at::Tensor & other, at::Tensor & out) { |
274 | // No device check |
275 | // DeviceGuard omitted |
276 | return at::native::matmul_out_nested(self, other, out); |
277 | } |
278 | } // anonymous namespace |
279 | namespace { |
280 | ::std::tuple<at::Tensor,at::Tensor> wrapper_NestedTensorCPU__matmul_backward(const at::Tensor & grad, const at::Tensor & self, const at::Tensor & other, ::std::array<bool,2> mask) { |
281 | // No device check |
282 | // DeviceGuard omitted |
283 | return at::native::matmul_backward_nested(grad, self, other, mask); |
284 | } |
285 | } // anonymous namespace |
286 | namespace { |
287 | at::Tensor wrapper_NestedTensorCPU_Tensor_mul(const at::Tensor & self, const at::Tensor & other) { |
288 | // No device check |
289 | // DeviceGuard omitted |
290 | return at::native::NestedTensor_mul_Tensor(self, other); |
291 | } |
292 | } // anonymous namespace |
293 | namespace { |
294 | at::Tensor & wrapper_NestedTensorCPU_Tensor_mul_(at::Tensor & self, const at::Tensor & other) { |
295 | // No device check |
296 | // DeviceGuard omitted |
297 | return at::native::NestedTensor_mul__Tensor(self, other); |
298 | } |
299 | } // anonymous namespace |
300 | namespace { |
301 | at::Tensor wrapper_NestedTensorCPU_Scalar_mul(const at::Tensor & self, const at::Scalar & other) { |
302 | // No device check |
303 | // DeviceGuard omitted |
304 | return at::native::NestedTensor_mul_Scalar(self, other); |
305 | } |
306 | } // anonymous namespace |
307 | namespace { |
308 | at::Tensor & wrapper_NestedTensorCPU_Scalar_mul_(at::Tensor & self, const at::Scalar & other) { |
309 | // No device check |
310 | // DeviceGuard omitted |
311 | return at::native::NestedTensor_mul__Scalar(self, other); |
312 | } |
313 | } // anonymous namespace |
314 | namespace { |
315 | at::Tensor wrapper_NestedTensorCPU__ones_like(const at::Tensor & self, c10::optional<at::ScalarType> dtype, c10::optional<at::Layout> layout, c10::optional<at::Device> device, c10::optional<bool> pin_memory, c10::optional<at::MemoryFormat> memory_format) { |
316 | // No device check |
317 | // DeviceGuard omitted |
318 | return at::native::ones_like(self, dtype, layout, device, pin_memory, memory_format); |
319 | } |
320 | } // anonymous namespace |
321 | namespace { |
322 | at::Tensor wrapper_NestedTensorCPU__neg(const at::Tensor & self) { |
323 | // No device check |
324 | // DeviceGuard omitted |
325 | return at::native::NestedTensor_neg(self); |
326 | } |
327 | } // anonymous namespace |
328 | namespace { |
329 | at::Tensor & wrapper_NestedTensorCPU__neg_(at::Tensor & self) { |
330 | // No device check |
331 | // DeviceGuard omitted |
332 | return at::native::NestedTensor_neg_(self); |
333 | } |
334 | } // anonymous namespace |
335 | namespace { |
336 | at::Tensor wrapper_NestedTensorCPU__relu(const at::Tensor & self) { |
337 | // No device check |
338 | // DeviceGuard omitted |
339 | return at::native::NestedTensor_relu(self); |
340 | } |
341 | } // anonymous namespace |
342 | namespace { |
343 | at::Tensor & wrapper_NestedTensorCPU__relu_(at::Tensor & self) { |
344 | // No device check |
345 | // DeviceGuard omitted |
346 | return at::native::NestedTensor_relu_(self); |
347 | } |
348 | } // anonymous namespace |
349 | namespace { |
350 | at::Tensor wrapper_NestedTensorCPU__gelu(const at::Tensor & self, c10::string_view approximate) { |
351 | // No device check |
352 | // DeviceGuard omitted |
353 | return at::native::NestedTensor_gelu(self, approximate); |
354 | } |
355 | } // anonymous namespace |
356 | namespace { |
357 | at::Tensor & wrapper_NestedTensorCPU__gelu_(at::Tensor & self, c10::string_view approximate) { |
358 | // No device check |
359 | // DeviceGuard omitted |
360 | return at::native::NestedTensor_gelu_(self, approximate); |
361 | } |
362 | } // anonymous namespace |
363 | namespace { |
364 | at::Tensor wrapper_NestedTensorCPU_int_select(const at::Tensor & self, int64_t dim, c10::SymInt index) { |
365 | // No device check |
366 | // DeviceGuard omitted |
367 | return at::native::select_nested(self, dim, index.expect_int()); |
368 | } |
369 | } // anonymous namespace |
370 | namespace { |
371 | at::Tensor wrapper_NestedTensorCPU___nested_select_backward(const at::Tensor & grad_output, const at::Tensor & self, int64_t dim, c10::SymInt index) { |
372 | // No device check |
373 | // DeviceGuard omitted |
374 | return at::native::_nested_select_backward_symint(grad_output, self, dim, index); |
375 | } |
376 | } // anonymous namespace |
377 | namespace { |
378 | at::Tensor wrapper_NestedTensorCPU__detach(const at::Tensor & self) { |
379 | // No device check |
380 | // DeviceGuard omitted |
381 | return at::native::detach(self); |
382 | } |
383 | } // anonymous namespace |
384 | namespace { |
385 | at::Tensor wrapper_NestedTensorCPU___softmax(const at::Tensor & self, int64_t dim, bool half_to_float) { |
386 | // No device check |
387 | // DeviceGuard omitted |
388 | return at::native::softmax_nested(self, dim, half_to_float); |
389 | } |
390 | } // anonymous namespace |
391 | namespace { |
392 | at::Tensor wrapper_NestedTensorCPU___softmax_backward_data(const at::Tensor & grad_output, const at::Tensor & output, int64_t dim, at::ScalarType input_dtype) { |
393 | // No device check |
394 | // DeviceGuard omitted |
395 | return at::native::nested_softmax_backward(grad_output, output, dim, input_dtype); |
396 | } |
397 | } // anonymous namespace |
398 | namespace { |
399 | at::Tensor wrapper_NestedTensorCPU__squeeze(const at::Tensor & self) { |
400 | // No device check |
401 | // DeviceGuard omitted |
402 | return at::native::squeeze_nested(self); |
403 | } |
404 | } // anonymous namespace |
405 | namespace { |
406 | at::Tensor wrapper_NestedTensorCPU_dim_squeeze(const at::Tensor & self, int64_t dim) { |
407 | // No device check |
408 | // DeviceGuard omitted |
409 | return at::native::squeeze_dim_nested(self, dim); |
410 | } |
411 | } // anonymous namespace |
412 | namespace { |
413 | at::Tensor wrapper_NestedTensorCPU_dims_squeeze(const at::Tensor & self, at::IntArrayRef dim) { |
414 | // No device check |
415 | // DeviceGuard omitted |
416 | return at::native::squeeze_dim_nested(self, dim); |
417 | } |
418 | } // anonymous namespace |
419 | namespace { |
420 | at::Tensor wrapper_NestedTensorCPU_dim_IntList_sum(const at::Tensor & self, at::OptionalIntArrayRef dim, bool keepdim, c10::optional<at::ScalarType> dtype) { |
421 | // No device check |
422 | // DeviceGuard omitted |
423 | return at::native::NestedTensor_sum_dim_CPU(self, dim, keepdim, dtype); |
424 | } |
425 | } // anonymous namespace |
426 | namespace { |
427 | at::Tensor wrapper_NestedTensorCPU___nested_sum_backward(const at::Tensor & grad, const at::Tensor & self, at::OptionalIntArrayRef dim, bool keepdim) { |
428 | // No device check |
429 | // DeviceGuard omitted |
430 | return at::native::_nested_sum_backward_cpu(grad, self, dim, keepdim); |
431 | } |
432 | } // anonymous namespace |
433 | namespace { |
434 | at::Tensor wrapper_NestedTensorCPU__tanh(const at::Tensor & self) { |
435 | // No device check |
436 | // DeviceGuard omitted |
437 | return at::native::NestedTensor_tanh(self); |
438 | } |
439 | } // anonymous namespace |
440 | namespace { |
441 | at::Tensor & wrapper_NestedTensorCPU__tanh_(at::Tensor & self) { |
442 | // No device check |
443 | // DeviceGuard omitted |
444 | return at::native::NestedTensor_tanh_(self); |
445 | } |
446 | } // anonymous namespace |
447 | namespace { |
448 | at::Tensor wrapper_NestedTensorCPU_int_transpose(const at::Tensor & self, int64_t dim0, int64_t dim1) { |
449 | // No device check |
450 | // DeviceGuard omitted |
451 | return at::native::transpose_nested(self, dim0, dim1); |
452 | } |
453 | } // anonymous namespace |
454 | namespace { |
455 | ::std::tuple<at::Tensor,at::Tensor,at::Tensor> wrapper_NestedTensorCPU___transform_bias_rescale_qkv(const at::Tensor & qkv, const at::Tensor & qkv_bias, int64_t num_heads) { |
456 | // No device check |
457 | // DeviceGuard omitted |
458 | return at::native::transform_bias_rescale_qkv_cpu(qkv, qkv_bias, num_heads); |
459 | } |
460 | } // anonymous namespace |
461 | namespace { |
462 | at::Tensor wrapper_NestedTensorCPU___nested_tensor_size(const at::Tensor & self) { |
463 | // No device check |
464 | // DeviceGuard omitted |
465 | return at::native::_nested_tensor_size(self); |
466 | } |
467 | } // anonymous namespace |
468 | namespace { |
469 | at::Tensor wrapper_NestedTensorCPU___nested_tensor_strides(const at::Tensor & self) { |
470 | // No device check |
471 | // DeviceGuard omitted |
472 | return at::native::_nested_tensor_strides(self); |
473 | } |
474 | } // anonymous namespace |
475 | namespace { |
476 | ::std::vector<int64_t> wrapper_NestedTensorCPU___nested_tensor_offsets(const at::Tensor & self) { |
477 | // No device check |
478 | // DeviceGuard omitted |
479 | return at::native::_nested_tensor_offsets(self); |
480 | } |
481 | } // anonymous namespace |
482 | namespace { |
483 | at::Tensor wrapper_NestedTensorCPU___nested_from_padded_and_nested_example(const at::Tensor & padded, const at::Tensor & nt_example) { |
484 | // No device check |
485 | // DeviceGuard omitted |
486 | return at::native::NestedTensor_from_padded_and_nested_example(padded, nt_example); |
487 | } |
488 | } // anonymous namespace |
489 | namespace { |
490 | at::Tensor wrapper_NestedTensorCPU__unsqueeze(const at::Tensor & self, int64_t dim) { |
491 | // No device check |
492 | // DeviceGuard omitted |
493 | return at::native::unsqueeze_nested(self, dim); |
494 | } |
495 | } // anonymous namespace |
496 | namespace { |
497 | at::Tensor wrapper_NestedTensorCPU__clone(const at::Tensor & self, c10::optional<at::MemoryFormat> memory_format) { |
498 | // No device check |
499 | // DeviceGuard omitted |
500 | return at::native::clone_nested(self, memory_format); |
501 | } |
502 | } // anonymous namespace |
503 | namespace { |
504 | at::Tensor wrapper_NestedTensorCPU__values(const at::Tensor & self) { |
505 | // No device check |
506 | // DeviceGuard omitted |
507 | return at::native::values_nested(self); |
508 | } |
509 | } // anonymous namespace |
510 | namespace { |
511 | at::Tensor wrapper_NestedTensorCPU___to_copy(const at::Tensor & self, c10::optional<at::ScalarType> dtype, c10::optional<at::Layout> layout, c10::optional<at::Device> device, c10::optional<bool> pin_memory, bool non_blocking, c10::optional<at::MemoryFormat> memory_format) { |
512 | // No device check |
513 | // DeviceGuard omitted |
514 | return at::native::_to_copy_nested(self, dtype, layout, device, pin_memory, non_blocking, memory_format); |
515 | } |
516 | } // anonymous namespace |
517 | namespace { |
518 | at::Tensor wrapper_NestedTensorCPU__view(const at::Tensor & self, c10::SymIntArrayRef size) { |
519 | // No device check |
520 | // DeviceGuard omitted |
521 | return at::native::view_nested(self, C10_AS_INTARRAYREF_SLOW(size)); |
522 | } |
523 | } // anonymous namespace |
524 | namespace { |
525 | at::Tensor wrapper_NestedTensorCPU_fullcoverage__test_autograd_multiple_dispatch(const at::Tensor & self) { |
526 | // No device check |
527 | // DeviceGuard omitted |
528 | return at::native::_test_autograd_multiple_dispatch_fullcoverage(self); |
529 | } |
530 | } // anonymous namespace |
531 | namespace { |
532 | at::Tensor wrapper_NestedTensorCPU_ntonly__test_autograd_multiple_dispatch(const at::Tensor & self, bool b) { |
533 | // No device check |
534 | // DeviceGuard omitted |
535 | return at::native::_test_autograd_multiple_dispatch_ntonly(self, b); |
536 | } |
537 | } // anonymous namespace |
538 | namespace { |
539 | at::Tensor wrapper_NestedTensorCPU__to_padded_tensor(const at::Tensor & self, double padding, at::OptionalSymIntArrayRef output_size) { |
540 | // No device check |
541 | // DeviceGuard omitted |
542 | return at::native::NestedTensor_to_padded_tensor_generic(self, padding, output_size.has_value() ? c10::make_optional(C10_AS_INTARRAYREF_SLOW(*output_size)) : c10::nullopt); |
543 | } |
544 | } // anonymous namespace |
545 | namespace { |
546 | at::Tensor wrapper_NestedTensorCPU___nested_tensor_softmax_with_shape(const at::Tensor & self, const at::Tensor & query) { |
547 | // No device check |
548 | // DeviceGuard omitted |
549 | return at::native::NestedTensor_softmax_dropout(self, query); |
550 | } |
551 | } // anonymous namespace |
552 | namespace { |
553 | at::Tensor wrapper_NestedTensorCPU___transformer_encoder_layer_fwd(const at::Tensor & src, int64_t embed_dim, int64_t num_heads, const at::Tensor & qkv_weight, const at::Tensor & qkv_bias, const at::Tensor & proj_weight, const at::Tensor & proj_bias, bool use_gelu, bool norm_first, double eps, const at::Tensor & norm_weight_1, const at::Tensor & norm_bias_1, const at::Tensor & norm_weight_2, const at::Tensor & norm_bias_2, const at::Tensor & ffn_weight_1, const at::Tensor & ffn_bias_1, const at::Tensor & ffn_weight_2, const at::Tensor & ffn_bias_2, const c10::optional<at::Tensor> & mask, c10::optional<int64_t> mask_type) { |
554 | // No device check |
555 | // DeviceGuard omitted |
556 | return at::native::transformer_encoder_layer_forward(src, embed_dim, num_heads, qkv_weight, qkv_bias, proj_weight, proj_bias, use_gelu, norm_first, eps, norm_weight_1, norm_bias_1, norm_weight_2, norm_bias_2, ffn_weight_1, ffn_bias_1, ffn_weight_2, ffn_bias_2, mask, mask_type); |
557 | } |
558 | } // anonymous namespace |
559 | namespace { |
560 | ::std::tuple<at::Tensor,at::Tensor> wrapper_NestedTensorCPU___native_multi_head_attention(const at::Tensor & query, const at::Tensor & key, const at::Tensor & value, int64_t embed_dim, int64_t num_head, const at::Tensor & qkv_weight, const at::Tensor & qkv_bias, const at::Tensor & proj_weight, const at::Tensor & proj_bias, const c10::optional<at::Tensor> & mask, bool need_weights, bool average_attn_weights, c10::optional<int64_t> mask_type) { |
561 | // No device check |
562 | // DeviceGuard omitted |
563 | return at::native::native_multi_head_attention_cpu(query, key, value, embed_dim, num_head, qkv_weight, qkv_bias, proj_weight, proj_bias, mask, need_weights, average_attn_weights, mask_type); |
564 | } |
565 | } // anonymous namespace |
566 | namespace { |
567 | int64_t wrapper_NestedTensorCPU___fused_sdp_choice(const at::Tensor & query, const at::Tensor & key, const at::Tensor & value, const c10::optional<at::Tensor> & attn_mask, double dropout_p, bool is_causal) { |
568 | // No device check |
569 | // DeviceGuard omitted |
570 | return at::native::_fused_sdp_choice_cpp(query, key, value, attn_mask, dropout_p, is_causal); |
571 | } |
572 | } // anonymous namespace |
573 | namespace { |
574 | ::std::tuple<at::Tensor,at::Tensor,at::Tensor> wrapper_NestedTensorCPU___transformer_decoder_only_layer_fwd(const at::Tensor & src, int64_t embed_dim, int64_t num_heads, const at::Tensor & qkv_weight, const at::Tensor & qkv_bias, const at::Tensor & proj_weight, const at::Tensor & proj_bias, bool use_gelu, bool norm_first, double eps, const at::Tensor & norm_weight_1, const at::Tensor & norm_bias_1, const at::Tensor & norm_weight_2, const at::Tensor & norm_bias_2, const at::Tensor & ffn_weight_1, const at::Tensor & ffn_bias_1, const at::Tensor & ffn_weight_2, const at::Tensor & ffn_bias_2, const c10::optional<at::Tensor> & mask, const c10::optional<at::Tensor> & incr_key, const c10::optional<at::Tensor> & incr_value) { |
575 | // No device check |
576 | // DeviceGuard omitted |
577 | return at::native::transformer_decoder_only_layer_forward(src, embed_dim, num_heads, qkv_weight, qkv_bias, proj_weight, proj_bias, use_gelu, norm_first, eps, norm_weight_1, norm_bias_1, norm_weight_2, norm_bias_2, ffn_weight_1, ffn_bias_1, ffn_weight_2, ffn_bias_2, mask, incr_key, incr_value); |
578 | } |
579 | } // anonymous namespace |
580 | namespace { |
581 | ::std::tuple<at::Tensor,at::Tensor,at::Tensor,at::Tensor> wrapper_NestedTensorCPU___native_decoder_only_multi_head_attention(const at::Tensor & query, const at::Tensor & key, const at::Tensor & value, int64_t embed_dim, int64_t num_head, const at::Tensor & qkv_weight, const at::Tensor & qkv_bias, const at::Tensor & proj_weight, const at::Tensor & proj_bias, const c10::optional<at::Tensor> & mask, const c10::optional<at::Tensor> & incr_key, const c10::optional<at::Tensor> & incr_value, bool need_weights, bool average_attn_weights) { |
582 | // No device check |
583 | // DeviceGuard omitted |
584 | return at::native::native_decoder_only_multi_head_attention(query, key, value, embed_dim, num_head, qkv_weight, qkv_bias, proj_weight, proj_bias, mask, incr_key, incr_value, need_weights, average_attn_weights); |
585 | } |
586 | } // anonymous namespace |
587 | TORCH_LIBRARY_IMPL(aten, NestedTensorCPU, m) { |
588 | m.impl("native_dropout" , |
589 | TORCH_FN(wrapper_NestedTensorCPU__native_dropout)); |
590 | m.impl("native_dropout_backward" , |
591 | TORCH_FN(wrapper_NestedTensorCPU__native_dropout_backward)); |
592 | m.impl("add.Tensor" , |
593 | TORCH_FN(wrapper_NestedTensorCPU_Tensor_add)); |
594 | m.impl("add_.Tensor" , |
595 | TORCH_FN(wrapper_NestedTensorCPU_Tensor_add_)); |
596 | m.impl("bmm" , |
597 | TORCH_FN(wrapper_NestedTensorCPU__bmm)); |
598 | m.impl("chunk" , |
599 | TORCH_FN(wrapper_NestedTensorCPU__chunk)); |
600 | m.impl("copy_" , |
601 | TORCH_FN(wrapper_NestedTensorCPU__copy_)); |
602 | m.impl("div.Tensor" , |
603 | TORCH_FN(wrapper_NestedTensorCPU_Tensor_div)); |
604 | m.impl("div.Scalar" , |
605 | TORCH_FN(wrapper_NestedTensorCPU_Scalar_div)); |
606 | m.impl("embedding" , |
607 | TORCH_FN(wrapper_NestedTensorCPU__embedding)); |
608 | m.impl("empty_like" , |
609 | TORCH_FN(wrapper_NestedTensorCPU__empty_like)); |
610 | m.impl("fill_.Scalar" , |
611 | TORCH_FN(wrapper_NestedTensorCPU_Scalar_fill_)); |
612 | m.impl("fill_.Tensor" , |
613 | TORCH_FN(wrapper_NestedTensorCPU_Tensor_fill_)); |
614 | m.impl("is_same_size" , |
615 | TORCH_FN(wrapper_NestedTensorCPU__is_same_size)); |
616 | m.impl("native_layer_norm" , |
617 | TORCH_FN(wrapper_NestedTensorCPU__native_layer_norm)); |
618 | m.impl("linear" , |
619 | TORCH_FN(wrapper_NestedTensorCPU__linear)); |
620 | m.impl("linear_backward" , |
621 | TORCH_FN(wrapper_NestedTensorCPU__linear_backward)); |
622 | m.impl("matmul" , |
623 | TORCH_FN(wrapper_NestedTensorCPU__matmul)); |
624 | m.impl("matmul.out" , |
625 | TORCH_FN(wrapper_NestedTensorCPU_out_matmul_out)); |
626 | m.impl("matmul_backward" , |
627 | TORCH_FN(wrapper_NestedTensorCPU__matmul_backward)); |
628 | m.impl("mul.Tensor" , |
629 | TORCH_FN(wrapper_NestedTensorCPU_Tensor_mul)); |
630 | m.impl("mul_.Tensor" , |
631 | TORCH_FN(wrapper_NestedTensorCPU_Tensor_mul_)); |
632 | m.impl("mul.Scalar" , |
633 | TORCH_FN(wrapper_NestedTensorCPU_Scalar_mul)); |
634 | m.impl("mul_.Scalar" , |
635 | TORCH_FN(wrapper_NestedTensorCPU_Scalar_mul_)); |
636 | m.impl("ones_like" , |
637 | TORCH_FN(wrapper_NestedTensorCPU__ones_like)); |
638 | m.impl("neg" , |
639 | TORCH_FN(wrapper_NestedTensorCPU__neg)); |
640 | m.impl("neg_" , |
641 | TORCH_FN(wrapper_NestedTensorCPU__neg_)); |
642 | m.impl("relu" , |
643 | TORCH_FN(wrapper_NestedTensorCPU__relu)); |
644 | m.impl("relu_" , |
645 | TORCH_FN(wrapper_NestedTensorCPU__relu_)); |
646 | m.impl("gelu" , |
647 | TORCH_FN(wrapper_NestedTensorCPU__gelu)); |
648 | m.impl("gelu_" , |
649 | TORCH_FN(wrapper_NestedTensorCPU__gelu_)); |
650 | m.impl("select.int" , |
651 | TORCH_FN(wrapper_NestedTensorCPU_int_select)); |
652 | m.impl("_nested_select_backward" , |
653 | TORCH_FN(wrapper_NestedTensorCPU___nested_select_backward)); |
654 | m.impl("detach" , |
655 | TORCH_FN(wrapper_NestedTensorCPU__detach)); |
656 | m.impl("_softmax" , |
657 | TORCH_FN(wrapper_NestedTensorCPU___softmax)); |
658 | m.impl("_softmax_backward_data" , |
659 | TORCH_FN(wrapper_NestedTensorCPU___softmax_backward_data)); |
660 | m.impl("squeeze" , |
661 | TORCH_FN(wrapper_NestedTensorCPU__squeeze)); |
662 | m.impl("squeeze.dim" , |
663 | TORCH_FN(wrapper_NestedTensorCPU_dim_squeeze)); |
664 | m.impl("squeeze.dims" , |
665 | TORCH_FN(wrapper_NestedTensorCPU_dims_squeeze)); |
666 | m.impl("sum.dim_IntList" , |
667 | TORCH_FN(wrapper_NestedTensorCPU_dim_IntList_sum)); |
668 | m.impl("_nested_sum_backward" , |
669 | TORCH_FN(wrapper_NestedTensorCPU___nested_sum_backward)); |
670 | m.impl("tanh" , |
671 | TORCH_FN(wrapper_NestedTensorCPU__tanh)); |
672 | m.impl("tanh_" , |
673 | TORCH_FN(wrapper_NestedTensorCPU__tanh_)); |
674 | m.impl("transpose.int" , |
675 | TORCH_FN(wrapper_NestedTensorCPU_int_transpose)); |
676 | m.impl("_transform_bias_rescale_qkv" , |
677 | TORCH_FN(wrapper_NestedTensorCPU___transform_bias_rescale_qkv)); |
678 | m.impl("_nested_tensor_size" , |
679 | TORCH_FN(wrapper_NestedTensorCPU___nested_tensor_size)); |
680 | m.impl("_nested_tensor_strides" , |
681 | TORCH_FN(wrapper_NestedTensorCPU___nested_tensor_strides)); |
682 | m.impl("_nested_tensor_offsets" , |
683 | TORCH_FN(wrapper_NestedTensorCPU___nested_tensor_offsets)); |
684 | m.impl("_nested_from_padded_and_nested_example" , |
685 | TORCH_FN(wrapper_NestedTensorCPU___nested_from_padded_and_nested_example)); |
686 | m.impl("unsqueeze" , |
687 | TORCH_FN(wrapper_NestedTensorCPU__unsqueeze)); |
688 | m.impl("clone" , |
689 | TORCH_FN(wrapper_NestedTensorCPU__clone)); |
690 | m.impl("values" , |
691 | TORCH_FN(wrapper_NestedTensorCPU__values)); |
692 | m.impl("_to_copy" , |
693 | TORCH_FN(wrapper_NestedTensorCPU___to_copy)); |
694 | m.impl("view" , |
695 | TORCH_FN(wrapper_NestedTensorCPU__view)); |
696 | m.impl("_test_autograd_multiple_dispatch.fullcoverage" , |
697 | TORCH_FN(wrapper_NestedTensorCPU_fullcoverage__test_autograd_multiple_dispatch)); |
698 | m.impl("_test_autograd_multiple_dispatch.ntonly" , |
699 | TORCH_FN(wrapper_NestedTensorCPU_ntonly__test_autograd_multiple_dispatch)); |
700 | m.impl("to_padded_tensor" , |
701 | TORCH_FN(wrapper_NestedTensorCPU__to_padded_tensor)); |
702 | m.impl("_nested_tensor_softmax_with_shape" , |
703 | TORCH_FN(wrapper_NestedTensorCPU___nested_tensor_softmax_with_shape)); |
704 | m.impl("_transformer_encoder_layer_fwd" , |
705 | TORCH_FN(wrapper_NestedTensorCPU___transformer_encoder_layer_fwd)); |
706 | m.impl("_native_multi_head_attention" , |
707 | TORCH_FN(wrapper_NestedTensorCPU___native_multi_head_attention)); |
708 | m.impl("_fused_sdp_choice" , |
709 | TORCH_FN(wrapper_NestedTensorCPU___fused_sdp_choice)); |
710 | m.impl("_transformer_decoder_only_layer_fwd" , |
711 | TORCH_FN(wrapper_NestedTensorCPU___transformer_decoder_only_layer_fwd)); |
712 | m.impl("_native_decoder_only_multi_head_attention" , |
713 | TORCH_FN(wrapper_NestedTensorCPU___native_decoder_only_multi_head_attention)); |
714 | }; |
715 | } // anonymous namespace |
716 | namespace nestedtensorcpu { |
717 | ::std::tuple<at::Tensor,at::Tensor> native_dropout(const at::Tensor & input, double p, c10::optional<bool> train) { |
718 | return wrapper_NestedTensorCPU__native_dropout(input, p, train); |
719 | } |
720 | at::Tensor native_dropout_backward(const at::Tensor & grad_output, const at::Tensor & mask, double scale) { |
721 | return wrapper_NestedTensorCPU__native_dropout_backward(grad_output, mask, scale); |
722 | } |
723 | at::Tensor add(const at::Tensor & self, const at::Tensor & other, const at::Scalar & alpha) { |
724 | return wrapper_NestedTensorCPU_Tensor_add(self, other, alpha); |
725 | } |
726 | at::Tensor & add_(at::Tensor & self, const at::Tensor & other, const at::Scalar & alpha) { |
727 | return wrapper_NestedTensorCPU_Tensor_add_(self, other, alpha); |
728 | } |
729 | at::Tensor bmm(const at::Tensor & self, const at::Tensor & mat2) { |
730 | return wrapper_NestedTensorCPU__bmm(self, mat2); |
731 | } |
732 | ::std::vector<at::Tensor> chunk(const at::Tensor & self, int64_t chunks, int64_t dim) { |
733 | return wrapper_NestedTensorCPU__chunk(self, chunks, dim); |
734 | } |
735 | at::Tensor & copy_(at::Tensor & self, const at::Tensor & src, bool non_blocking) { |
736 | return wrapper_NestedTensorCPU__copy_(self, src, non_blocking); |
737 | } |
738 | at::Tensor div(const at::Tensor & self, const at::Tensor & other) { |
739 | return wrapper_NestedTensorCPU_Tensor_div(self, other); |
740 | } |
741 | at::Tensor div(const at::Tensor & self, const at::Scalar & other) { |
742 | return wrapper_NestedTensorCPU_Scalar_div(self, other); |
743 | } |
744 | at::Tensor embedding(const at::Tensor & weight, const at::Tensor & indices, int64_t padding_idx, bool scale_grad_by_freq, bool sparse) { |
745 | return wrapper_NestedTensorCPU__embedding(weight, indices, padding_idx, scale_grad_by_freq, sparse); |
746 | } |
747 | at::Tensor embedding_symint(const at::Tensor & weight, const at::Tensor & indices, c10::SymInt padding_idx, bool scale_grad_by_freq, bool sparse) { |
748 | return wrapper_NestedTensorCPU__embedding(weight, indices, padding_idx, scale_grad_by_freq, sparse); |
749 | } |
750 | at::Tensor empty_like(const at::Tensor & self, at::TensorOptions options, c10::optional<at::MemoryFormat> memory_format) { |
751 | return wrapper_NestedTensorCPU__empty_like(self, optTypeMetaToScalarType(options.dtype_opt()), options.layout_opt(), options.device_opt(), options.pinned_memory_opt(), c10::impl::check_tensor_options_and_extract_memory_format(options, memory_format)); |
752 | } |
753 | at::Tensor empty_like(const at::Tensor & self, c10::optional<at::ScalarType> dtype, c10::optional<at::Layout> layout, c10::optional<at::Device> device, c10::optional<bool> pin_memory, c10::optional<at::MemoryFormat> memory_format) { |
754 | return wrapper_NestedTensorCPU__empty_like(self, dtype, layout, device, pin_memory, memory_format); |
755 | } |
756 | at::Tensor & fill_(at::Tensor & self, const at::Scalar & value) { |
757 | return wrapper_NestedTensorCPU_Scalar_fill_(self, value); |
758 | } |
759 | at::Tensor & fill_(at::Tensor & self, const at::Tensor & value) { |
760 | return wrapper_NestedTensorCPU_Tensor_fill_(self, value); |
761 | } |
762 | bool is_same_size(const at::Tensor & self, const at::Tensor & other) { |
763 | return wrapper_NestedTensorCPU__is_same_size(self, other); |
764 | } |
765 | ::std::tuple<at::Tensor,at::Tensor,at::Tensor> native_layer_norm(const at::Tensor & input, at::IntArrayRef normalized_shape, const c10::optional<at::Tensor> & weight, const c10::optional<at::Tensor> & bias, double eps) { |
766 | return wrapper_NestedTensorCPU__native_layer_norm(input, c10::fromIntArrayRefSlow(normalized_shape), weight, bias, eps); |
767 | } |
768 | ::std::tuple<at::Tensor,at::Tensor,at::Tensor> native_layer_norm_symint(const at::Tensor & input, c10::SymIntArrayRef normalized_shape, const c10::optional<at::Tensor> & weight, const c10::optional<at::Tensor> & bias, double eps) { |
769 | return wrapper_NestedTensorCPU__native_layer_norm(input, normalized_shape, weight, bias, eps); |
770 | } |
771 | at::Tensor linear(const at::Tensor & input, const at::Tensor & weight, const c10::optional<at::Tensor> & bias) { |
772 | return wrapper_NestedTensorCPU__linear(input, weight, bias); |
773 | } |
774 | ::std::tuple<at::Tensor,at::Tensor,at::Tensor> linear_backward(const at::Tensor & self, const at::Tensor & grad_output, const at::Tensor & weight, ::std::array<bool,3> output_mask) { |
775 | return wrapper_NestedTensorCPU__linear_backward(self, grad_output, weight, output_mask); |
776 | } |
777 | at::Tensor matmul(const at::Tensor & self, const at::Tensor & other) { |
778 | return wrapper_NestedTensorCPU__matmul(self, other); |
779 | } |
780 | at::Tensor & matmul_out(at::Tensor & out, const at::Tensor & self, const at::Tensor & other) { |
781 | return wrapper_NestedTensorCPU_out_matmul_out(self, other, out); |
782 | } |
783 | at::Tensor & matmul_outf(const at::Tensor & self, const at::Tensor & other, at::Tensor & out) { |
784 | return wrapper_NestedTensorCPU_out_matmul_out(self, other, out); |
785 | } |
786 | ::std::tuple<at::Tensor,at::Tensor> matmul_backward(const at::Tensor & grad, const at::Tensor & self, const at::Tensor & other, ::std::array<bool,2> mask) { |
787 | return wrapper_NestedTensorCPU__matmul_backward(grad, self, other, mask); |
788 | } |
789 | at::Tensor mul(const at::Tensor & self, const at::Tensor & other) { |
790 | return wrapper_NestedTensorCPU_Tensor_mul(self, other); |
791 | } |
792 | at::Tensor & mul_(at::Tensor & self, const at::Tensor & other) { |
793 | return wrapper_NestedTensorCPU_Tensor_mul_(self, other); |
794 | } |
795 | at::Tensor mul(const at::Tensor & self, const at::Scalar & other) { |
796 | return wrapper_NestedTensorCPU_Scalar_mul(self, other); |
797 | } |
798 | at::Tensor & mul_(at::Tensor & self, const at::Scalar & other) { |
799 | return wrapper_NestedTensorCPU_Scalar_mul_(self, other); |
800 | } |
801 | at::Tensor ones_like(const at::Tensor & self, at::TensorOptions options, c10::optional<at::MemoryFormat> memory_format) { |
802 | return wrapper_NestedTensorCPU__ones_like(self, optTypeMetaToScalarType(options.dtype_opt()), options.layout_opt(), options.device_opt(), options.pinned_memory_opt(), c10::impl::check_tensor_options_and_extract_memory_format(options, memory_format)); |
803 | } |
804 | at::Tensor ones_like(const at::Tensor & self, c10::optional<at::ScalarType> dtype, c10::optional<at::Layout> layout, c10::optional<at::Device> device, c10::optional<bool> pin_memory, c10::optional<at::MemoryFormat> memory_format) { |
805 | return wrapper_NestedTensorCPU__ones_like(self, dtype, layout, device, pin_memory, memory_format); |
806 | } |
807 | at::Tensor neg(const at::Tensor & self) { |
808 | return wrapper_NestedTensorCPU__neg(self); |
809 | } |
810 | at::Tensor & neg_(at::Tensor & self) { |
811 | return wrapper_NestedTensorCPU__neg_(self); |
812 | } |
813 | at::Tensor relu(const at::Tensor & self) { |
814 | return wrapper_NestedTensorCPU__relu(self); |
815 | } |
816 | at::Tensor & relu_(at::Tensor & self) { |
817 | return wrapper_NestedTensorCPU__relu_(self); |
818 | } |
819 | at::Tensor gelu(const at::Tensor & self, c10::string_view approximate) { |
820 | return wrapper_NestedTensorCPU__gelu(self, approximate); |
821 | } |
822 | at::Tensor & gelu_(at::Tensor & self, c10::string_view approximate) { |
823 | return wrapper_NestedTensorCPU__gelu_(self, approximate); |
824 | } |
825 | at::Tensor select(const at::Tensor & self, int64_t dim, int64_t index) { |
826 | return wrapper_NestedTensorCPU_int_select(self, dim, index); |
827 | } |
828 | at::Tensor select_symint(const at::Tensor & self, int64_t dim, c10::SymInt index) { |
829 | return wrapper_NestedTensorCPU_int_select(self, dim, index); |
830 | } |
831 | at::Tensor _nested_select_backward(const at::Tensor & grad_output, const at::Tensor & self, int64_t dim, int64_t index) { |
832 | return wrapper_NestedTensorCPU___nested_select_backward(grad_output, self, dim, index); |
833 | } |
834 | at::Tensor _nested_select_backward_symint(const at::Tensor & grad_output, const at::Tensor & self, int64_t dim, c10::SymInt index) { |
835 | return wrapper_NestedTensorCPU___nested_select_backward(grad_output, self, dim, index); |
836 | } |
837 | at::Tensor detach(const at::Tensor & self) { |
838 | return wrapper_NestedTensorCPU__detach(self); |
839 | } |
840 | at::Tensor _softmax(const at::Tensor & self, int64_t dim, bool half_to_float) { |
841 | return wrapper_NestedTensorCPU___softmax(self, dim, half_to_float); |
842 | } |
843 | at::Tensor _softmax_backward_data(const at::Tensor & grad_output, const at::Tensor & output, int64_t dim, at::ScalarType input_dtype) { |
844 | return wrapper_NestedTensorCPU___softmax_backward_data(grad_output, output, dim, input_dtype); |
845 | } |
846 | at::Tensor squeeze(const at::Tensor & self) { |
847 | return wrapper_NestedTensorCPU__squeeze(self); |
848 | } |
849 | at::Tensor squeeze(const at::Tensor & self, int64_t dim) { |
850 | return wrapper_NestedTensorCPU_dim_squeeze(self, dim); |
851 | } |
852 | at::Tensor squeeze(const at::Tensor & self, at::IntArrayRef dim) { |
853 | return wrapper_NestedTensorCPU_dims_squeeze(self, dim); |
854 | } |
855 | at::Tensor sum(const at::Tensor & self, at::OptionalIntArrayRef dim, bool keepdim, c10::optional<at::ScalarType> dtype) { |
856 | return wrapper_NestedTensorCPU_dim_IntList_sum(self, dim, keepdim, dtype); |
857 | } |
858 | at::Tensor _nested_sum_backward(const at::Tensor & grad, const at::Tensor & self, at::OptionalIntArrayRef dim, bool keepdim) { |
859 | return wrapper_NestedTensorCPU___nested_sum_backward(grad, self, dim, keepdim); |
860 | } |
861 | at::Tensor tanh(const at::Tensor & self) { |
862 | return wrapper_NestedTensorCPU__tanh(self); |
863 | } |
864 | at::Tensor & tanh_(at::Tensor & self) { |
865 | return wrapper_NestedTensorCPU__tanh_(self); |
866 | } |
867 | at::Tensor transpose(const at::Tensor & self, int64_t dim0, int64_t dim1) { |
868 | return wrapper_NestedTensorCPU_int_transpose(self, dim0, dim1); |
869 | } |
870 | ::std::tuple<at::Tensor,at::Tensor,at::Tensor> _transform_bias_rescale_qkv(const at::Tensor & qkv, const at::Tensor & qkv_bias, int64_t num_heads) { |
871 | return wrapper_NestedTensorCPU___transform_bias_rescale_qkv(qkv, qkv_bias, num_heads); |
872 | } |
873 | at::Tensor _nested_tensor_size(const at::Tensor & self) { |
874 | return wrapper_NestedTensorCPU___nested_tensor_size(self); |
875 | } |
876 | at::Tensor _nested_tensor_strides(const at::Tensor & self) { |
877 | return wrapper_NestedTensorCPU___nested_tensor_strides(self); |
878 | } |
879 | ::std::vector<int64_t> _nested_tensor_offsets(const at::Tensor & self) { |
880 | return wrapper_NestedTensorCPU___nested_tensor_offsets(self); |
881 | } |
882 | at::Tensor _nested_from_padded_and_nested_example(const at::Tensor & padded, const at::Tensor & nt_example) { |
883 | return wrapper_NestedTensorCPU___nested_from_padded_and_nested_example(padded, nt_example); |
884 | } |
885 | at::Tensor unsqueeze(const at::Tensor & self, int64_t dim) { |
886 | return wrapper_NestedTensorCPU__unsqueeze(self, dim); |
887 | } |
888 | at::Tensor clone(const at::Tensor & self, c10::optional<at::MemoryFormat> memory_format) { |
889 | return wrapper_NestedTensorCPU__clone(self, memory_format); |
890 | } |
891 | at::Tensor values(const at::Tensor & self) { |
892 | return wrapper_NestedTensorCPU__values(self); |
893 | } |
894 | at::Tensor _to_copy(const at::Tensor & self, at::TensorOptions options, bool non_blocking, c10::optional<at::MemoryFormat> memory_format) { |
895 | return wrapper_NestedTensorCPU___to_copy(self, optTypeMetaToScalarType(options.dtype_opt()), options.layout_opt(), options.device_opt(), options.pinned_memory_opt(), non_blocking, c10::impl::check_tensor_options_and_extract_memory_format(options, memory_format)); |
896 | } |
897 | at::Tensor _to_copy(const at::Tensor & self, c10::optional<at::ScalarType> dtype, c10::optional<at::Layout> layout, c10::optional<at::Device> device, c10::optional<bool> pin_memory, bool non_blocking, c10::optional<at::MemoryFormat> memory_format) { |
898 | return wrapper_NestedTensorCPU___to_copy(self, dtype, layout, device, pin_memory, non_blocking, memory_format); |
899 | } |
900 | at::Tensor view(const at::Tensor & self, at::IntArrayRef size) { |
901 | return wrapper_NestedTensorCPU__view(self, c10::fromIntArrayRefSlow(size)); |
902 | } |
903 | at::Tensor view_symint(const at::Tensor & self, c10::SymIntArrayRef size) { |
904 | return wrapper_NestedTensorCPU__view(self, size); |
905 | } |
906 | at::Tensor _test_autograd_multiple_dispatch(const at::Tensor & self) { |
907 | return wrapper_NestedTensorCPU_fullcoverage__test_autograd_multiple_dispatch(self); |
908 | } |
909 | at::Tensor _test_autograd_multiple_dispatch(const at::Tensor & self, bool b) { |
910 | return wrapper_NestedTensorCPU_ntonly__test_autograd_multiple_dispatch(self, b); |
911 | } |
912 | at::Tensor to_padded_tensor(const at::Tensor & self, double padding, at::OptionalIntArrayRef output_size) { |
913 | return wrapper_NestedTensorCPU__to_padded_tensor(self, padding, output_size.has_value() ? c10::make_optional(c10::fromIntArrayRefSlow(*output_size)) : c10::nullopt); |
914 | } |
915 | at::Tensor to_padded_tensor_symint(const at::Tensor & self, double padding, at::OptionalSymIntArrayRef output_size) { |
916 | return wrapper_NestedTensorCPU__to_padded_tensor(self, padding, output_size); |
917 | } |
918 | at::Tensor _nested_tensor_softmax_with_shape(const at::Tensor & self, const at::Tensor & query) { |
919 | return wrapper_NestedTensorCPU___nested_tensor_softmax_with_shape(self, query); |
920 | } |
921 | at::Tensor _transformer_encoder_layer_fwd(const at::Tensor & src, int64_t embed_dim, int64_t num_heads, const at::Tensor & qkv_weight, const at::Tensor & qkv_bias, const at::Tensor & proj_weight, const at::Tensor & proj_bias, bool use_gelu, bool norm_first, double eps, const at::Tensor & norm_weight_1, const at::Tensor & norm_bias_1, const at::Tensor & norm_weight_2, const at::Tensor & norm_bias_2, const at::Tensor & ffn_weight_1, const at::Tensor & ffn_bias_1, const at::Tensor & ffn_weight_2, const at::Tensor & ffn_bias_2, const c10::optional<at::Tensor> & mask, c10::optional<int64_t> mask_type) { |
922 | return wrapper_NestedTensorCPU___transformer_encoder_layer_fwd(src, embed_dim, num_heads, qkv_weight, qkv_bias, proj_weight, proj_bias, use_gelu, norm_first, eps, norm_weight_1, norm_bias_1, norm_weight_2, norm_bias_2, ffn_weight_1, ffn_bias_1, ffn_weight_2, ffn_bias_2, mask, mask_type); |
923 | } |
924 | ::std::tuple<at::Tensor,at::Tensor> _native_multi_head_attention(const at::Tensor & query, const at::Tensor & key, const at::Tensor & value, int64_t embed_dim, int64_t num_head, const at::Tensor & qkv_weight, const at::Tensor & qkv_bias, const at::Tensor & proj_weight, const at::Tensor & proj_bias, const c10::optional<at::Tensor> & mask, bool need_weights, bool average_attn_weights, c10::optional<int64_t> mask_type) { |
925 | return wrapper_NestedTensorCPU___native_multi_head_attention(query, key, value, embed_dim, num_head, qkv_weight, qkv_bias, proj_weight, proj_bias, mask, need_weights, average_attn_weights, mask_type); |
926 | } |
927 | int64_t _fused_sdp_choice(const at::Tensor & query, const at::Tensor & key, const at::Tensor & value, const c10::optional<at::Tensor> & attn_mask, double dropout_p, bool is_causal) { |
928 | return wrapper_NestedTensorCPU___fused_sdp_choice(query, key, value, attn_mask, dropout_p, is_causal); |
929 | } |
930 | ::std::tuple<at::Tensor,at::Tensor,at::Tensor> _transformer_decoder_only_layer_fwd(const at::Tensor & src, int64_t embed_dim, int64_t num_heads, const at::Tensor & qkv_weight, const at::Tensor & qkv_bias, const at::Tensor & proj_weight, const at::Tensor & proj_bias, bool use_gelu, bool norm_first, double eps, const at::Tensor & norm_weight_1, const at::Tensor & norm_bias_1, const at::Tensor & norm_weight_2, const at::Tensor & norm_bias_2, const at::Tensor & ffn_weight_1, const at::Tensor & ffn_bias_1, const at::Tensor & ffn_weight_2, const at::Tensor & ffn_bias_2, const c10::optional<at::Tensor> & mask, const c10::optional<at::Tensor> & incr_key, const c10::optional<at::Tensor> & incr_value) { |
931 | return wrapper_NestedTensorCPU___transformer_decoder_only_layer_fwd(src, embed_dim, num_heads, qkv_weight, qkv_bias, proj_weight, proj_bias, use_gelu, norm_first, eps, norm_weight_1, norm_bias_1, norm_weight_2, norm_bias_2, ffn_weight_1, ffn_bias_1, ffn_weight_2, ffn_bias_2, mask, incr_key, incr_value); |
932 | } |
933 | ::std::tuple<at::Tensor,at::Tensor,at::Tensor,at::Tensor> _native_decoder_only_multi_head_attention(const at::Tensor & query, const at::Tensor & key, const at::Tensor & value, int64_t embed_dim, int64_t num_head, const at::Tensor & qkv_weight, const at::Tensor & qkv_bias, const at::Tensor & proj_weight, const at::Tensor & proj_bias, const c10::optional<at::Tensor> & mask, const c10::optional<at::Tensor> & incr_key, const c10::optional<at::Tensor> & incr_value, bool need_weights, bool average_attn_weights) { |
934 | return wrapper_NestedTensorCPU___native_decoder_only_multi_head_attention(query, key, value, embed_dim, num_head, qkv_weight, qkv_bias, proj_weight, proj_bias, mask, incr_key, incr_value, need_weights, average_attn_weights); |
935 | } |
936 | } // namespace nestedtensorcpu |
937 | } // namespace at |
938 | |