1// required for old g++ to compile PRId64 macros, see
2// https://github.com/pytorch/pytorch/issues/3571
3// for context
4#ifndef __STDC_FORMAT_MACROS
5#define __STDC_FORMAT_MACROS
6#endif
7
8// an external backend might generate file within its code tree
9// and check all the source files within the tree with clang-format.
10// so, disable it since the backend might have a different config.
11// clang-format off
12
13// NOTE: This condition is true for all PyTorch internal libraries, it
14// just excludes external projects such as torch_xla which
15// re-use some of the PyTorch codegen machinery.
16#if defined(CAFFE2_BUILD_MAIN_LIB) || \
17 defined(TORCH_CUDA_BUILD_MAIN_LIB) || \
18 defined(TORCH_HIP_BUILD_MAIN_LIB) || \
19 defined(TORCH_CUDA_CU_BUILD_MAIN_LIB) || \
20 defined(TORCH_CUDA_CPP_BUILD_MAIN_LIB)
21#define TORCH_ASSERT_ONLY_METHOD_OPERATORS
22#endif
23
24// @generated by torchgen/gen.py from RegisterDispatchKey.cpp
25
26#include <c10/core/TensorImpl.h>
27#include <c10/core/Allocator.h>
28#include <ATen/DeviceGuard.h>
29#include <ATen/NamedTensorUtils.h>
30#include <ATen/Utils.h>
31#include <ATen/WrapDimUtils.h>
32#include <ATen/Dispatch.h>
33#include <c10/util/ExclusivelyOwned.h>
34#include <c10/util/Half.h>
35#include <c10/core/UndefinedTensorImpl.h>
36#include <c10/util/Optional.h>
37#include <ATen/Tensor.h>
38#include <ATen/native/Resize.h>
39
40#include <cstddef>
41#include <functional>
42#include <memory>
43#include <utility>
44
45#include <ATen/Config.h>
46#include <ATen/core/op_registration/adaption.h>
47#include <torch/library.h>
48#include <c10/cuda/CUDAGuard.h>
49#include <ATen/cuda/ATenCUDAGeneral.h>
50#include <ATen/cuda/CUDADevice.h>
51#include <ATen/cuda/CUDAContext.h>
52
53#include <ATen/ops/as_strided_native.h>
54#include <ATen/ops/empty.h>
55#include <ATen/ops/empty_strided.h>
56#include <ATen/ops/_copy_from_and_resize.h>
57#include <ATen/ops/_copy_from.h>
58#include <ATen/ops/_adaptive_avg_pool2d_native.h>
59#include <ATen/ops/_empty_affine_quantized_native.h>
60#include <ATen/ops/_empty_per_channel_affine_quantized_native.h>
61#include <ATen/ops/_index_put_impl_native.h>
62#include <ATen/ops/_reshape_alias_native.h>
63#include <ATen/ops/as_strided_native.h>
64#include <ATen/ops/clone_native.h>
65#include <ATen/ops/dequantize_native.h>
66#include <ATen/ops/empty_like_native.h>
67#include <ATen/ops/empty_native.h>
68#include <ATen/ops/empty_quantized_native.h>
69#include <ATen/ops/empty_strided_native.h>
70#include <ATen/ops/fill_native.h>
71#include <ATen/ops/flip_native.h>
72#include <ATen/ops/gelu_native.h>
73#include <ATen/ops/index_select_native.h>
74#include <ATen/ops/int_repr_native.h>
75#include <ATen/ops/masked_fill_native.h>
76#include <ATen/ops/max_native.h>
77#include <ATen/ops/min_native.h>
78#include <ATen/ops/q_per_channel_axis_native.h>
79#include <ATen/ops/q_per_channel_scales_native.h>
80#include <ATen/ops/q_per_channel_zero_points_native.h>
81#include <ATen/ops/q_scale_native.h>
82#include <ATen/ops/q_zero_point_native.h>
83#include <ATen/ops/qscheme_native.h>
84#include <ATen/ops/quantized_max_pool2d_native.h>
85#include <ATen/ops/relu_native.h>
86#include <ATen/ops/set_native.h>
87#include <ATen/ops/squeeze_native.h>
88#include <ATen/ops/unfold_native.h>
89#include <ATen/ops/unsqueeze_native.h>
90#include <ATen/ops/view_native.h>
91
92// See template file RegisterDispatchDefinitions.ini
93namespace at {
94// NB: TORCH_LIBRARY_IMPL must be in an anonymous namespace to avoid
95// ambiguity with conflicting identifiers that may have been defined in
96// at namespace already.
97namespace {
98Tensor create_out(IntArrayRef sizes, IntArrayRef strides, const TensorOptions &options) {
99 if (strides.empty()) {
100 return at::empty(sizes, options);
101 } else {
102 return at::empty_strided(sizes, strides, options);
103 }
104}
105void resize_out(const Tensor &out, IntArrayRef sizes, IntArrayRef strides, const TensorOptions &options) {
106 TORCH_CHECK(options.dtype() == out.dtype(),
107 "Expected out tensor to have dtype ", options.dtype(), ", but got ", out.dtype(), " instead");
108 TORCH_CHECK(options.device() == out.device(),
109 "Expected out tensor to have device ", options.device(), ", but got ", out.device(), " instead");
110 const bool resized = at::native::resize_output(out, sizes);
111 // Only restride if a resize occurred; otherwise we ignore the (advisory)
112 // strides from the meta function and directly use the output tensor's
113 // preexisting strides
114 if (resized) {
115 if (!strides.empty()) {
116 TORCH_INTERNAL_ASSERT(!options.memory_format_opt().has_value());
117 // TODO: avoid the redispatch here
118 out.as_strided_(sizes, strides);
119 } else if (options.memory_format_opt().has_value()) {
120 out.unsafeGetTensorImpl()->empty_tensor_restride(*options.memory_format_opt());
121 }
122 }
123}
124void check_inplace(const Tensor &self, IntArrayRef sizes, const TensorOptions &options) {
125 // These checks are needed on those operators that:
126 // 1) don't use 'TensorIterator' (e.g. 'addmm' and 'baddbmm')
127 // 2) have particular typing rules (e.g. 'cumsum' and 'cumprod')
128 // For other operators (e.g. 'add'), 'TensorIterator' already checks
129 // these things separately.
130 TORCH_CHECK(options.dtype() == self.dtype(),
131 "Bad in-place call: ",
132 "input tensor dtype ", self.dtype(), " and output tensor dtype ", options.dtype(), " should match");
133 TORCH_CHECK(options.device() == self.device(),
134 "Bad in-place call: ",
135 "input tensor device ", self.device(), " and output tensor device ", options.device(), " should match");
136 TORCH_CHECK(sizes == self.sizes(),
137 "Bad in-place call: ",
138 "input tensor size ", self.sizes(), " and output tensor size ", sizes, " should match");
139}
140c10::optional<Tensor> maybe_create_proxy(const Tensor &out, IntArrayRef sizes, IntArrayRef strides, const TensorOptions &options) {
141 if (out.strides() != strides) {
142 return at::empty_strided(sizes, strides, options);
143 }
144 return c10::nullopt;
145}
146namespace {
147at::Tensor wrapper_QuantizedCUDA__as_strided(const at::Tensor & self, c10::SymIntArrayRef size, c10::SymIntArrayRef stride, c10::optional<c10::SymInt> storage_offset) {
148 // No device check
149 // DeviceGuard omitted
150 return at::native::as_strided_qtensorimpl(self, C10_AS_INTARRAYREF_SLOW(size), C10_AS_INTARRAYREF_SLOW(stride), storage_offset.has_value() ? c10::make_optional(storage_offset->expect_int()) : c10::nullopt);
151}
152} // anonymous namespace
153namespace {
154at::Tensor wrapper_QuantizedCUDA_memory_format_empty(c10::SymIntArrayRef size, c10::optional<at::ScalarType> dtype, c10::optional<at::Layout> layout, c10::optional<at::Device> device, c10::optional<bool> pin_memory, c10::optional<at::MemoryFormat> memory_format) {
155 c10::optional<Device> common_device = nullopt;
156(void)common_device; // Suppress unused variable warning
157 globalContext().lazyInitCUDA();
158 const DeviceGuard device_guard(device_or_default(device));
159 return at::native::empty_unknown_quantized(C10_AS_INTARRAYREF_SLOW(size), dtype, layout, device, pin_memory, memory_format);
160}
161} // anonymous namespace
162namespace {
163at::Tensor wrapper_QuantizedCUDA___empty_affine_quantized(at::IntArrayRef size, c10::optional<at::ScalarType> dtype, c10::optional<at::Layout> layout, c10::optional<at::Device> device, c10::optional<bool> pin_memory, double scale, int64_t zero_point, c10::optional<at::MemoryFormat> memory_format) {
164 c10::optional<Device> common_device = nullopt;
165(void)common_device; // Suppress unused variable warning
166 globalContext().lazyInitCUDA();
167 const DeviceGuard device_guard(device_or_default(device));
168 return at::native::empty_affine_quantized(size, dtype, layout, device, pin_memory, scale, zero_point, memory_format);
169}
170} // anonymous namespace
171namespace {
172at::Tensor wrapper_QuantizedCUDA___empty_per_channel_affine_quantized(at::IntArrayRef size, const at::Tensor & scales, const at::Tensor & zero_points, int64_t axis, c10::optional<at::ScalarType> dtype, c10::optional<at::Layout> layout, c10::optional<at::Device> device, c10::optional<bool> pin_memory, c10::optional<at::MemoryFormat> memory_format) {
173 c10::optional<Device> common_device = nullopt;
174(void)common_device; // Suppress unused variable warning
175 globalContext().lazyInitCUDA();
176 const DeviceGuard device_guard(device_or_default(device));
177 return at::native::empty_per_channel_affine_quantized(size, scales, zero_points, axis, dtype, layout, device, pin_memory, memory_format);
178}
179} // anonymous namespace
180namespace {
181at::Tensor wrapper_QuantizedCUDA__empty_quantized(at::IntArrayRef size, const at::Tensor & qtensor, c10::optional<at::ScalarType> dtype, c10::optional<at::Layout> layout, c10::optional<at::Device> device, c10::optional<bool> pin_memory, c10::optional<at::MemoryFormat> memory_format) {
182 c10::optional<Device> common_device = nullopt;
183(void)common_device; // Suppress unused variable warning
184 c10::impl::check_and_update_common_device(common_device, qtensor, "wrapper_QuantizedCUDA__empty_quantized", "qtensor");
185 globalContext().lazyInitCUDA();
186 const DeviceGuard device_guard(device_or_default(device));
187 return at::native::empty_quantized(size, qtensor, dtype, layout, device, pin_memory, memory_format);
188}
189} // anonymous namespace
190namespace {
191at::Tensor wrapper_QuantizedCUDA__empty_like(const at::Tensor & self, c10::optional<at::ScalarType> dtype, c10::optional<at::Layout> layout, c10::optional<at::Device> device, c10::optional<bool> pin_memory, c10::optional<at::MemoryFormat> memory_format) {
192 // No device check
193 // DeviceGuard omitted
194 return at::native::empty_like_quantized(self, dtype, layout, device, pin_memory, memory_format);
195}
196} // anonymous namespace
197namespace {
198at::Tensor wrapper_QuantizedCUDA__empty_strided(c10::SymIntArrayRef size, c10::SymIntArrayRef stride, c10::optional<at::ScalarType> dtype, c10::optional<at::Layout> layout, c10::optional<at::Device> device, c10::optional<bool> pin_memory) {
199 c10::optional<Device> common_device = nullopt;
200(void)common_device; // Suppress unused variable warning
201 globalContext().lazyInitCUDA();
202 const DeviceGuard device_guard(device_or_default(device));
203 return at::native::empty_strided_unknown_quantized(C10_AS_INTARRAYREF_SLOW(size), C10_AS_INTARRAYREF_SLOW(stride), dtype, layout, device, pin_memory);
204}
205} // anonymous namespace
206namespace {
207at::Tensor & wrapper_QuantizedCUDA_Scalar_fill_(at::Tensor & self, const at::Scalar & value) {
208 // No device check
209 const OptionalDeviceGuard device_guard(device_of(self));
210 return at::native::fill_quantized_(self, value);
211}
212} // anonymous namespace
213namespace {
214at::Tensor & wrapper_QuantizedCUDA_Tensor_fill_(at::Tensor & self, const at::Tensor & value) {
215 // No device check
216 const OptionalDeviceGuard device_guard(device_of(self));
217 return at::native::fill_quantized_(self, value);
218}
219} // anonymous namespace
220namespace {
221at::Tensor & wrapper_QuantizedCUDA___index_put_impl_(at::Tensor & self, const c10::List<c10::optional<at::Tensor>> & indices, const at::Tensor & values, bool accumulate, bool unsafe) {
222 // No device check
223 const OptionalDeviceGuard device_guard(device_of(self));
224 return at::native::_index_put_impl_quantized_cuda_(self, indices, values, accumulate, unsafe);
225}
226} // anonymous namespace
227namespace {
228::std::tuple<at::Tensor,at::Tensor> wrapper_QuantizedCUDA_dim_max(const at::Tensor & self, int64_t dim, bool keepdim) {
229 // No device check
230 const OptionalDeviceGuard device_guard(device_of(self));
231 return at::native::qmax(self, dim, keepdim);
232}
233} // anonymous namespace
234namespace {
235at::Tensor wrapper_QuantizedCUDA__quantized_max_pool2d(const at::Tensor & self, at::IntArrayRef kernel_size, at::IntArrayRef stride, at::IntArrayRef padding, at::IntArrayRef dilation, bool ceil_mode) {
236 c10::optional<Device> common_device = nullopt;
237(void)common_device; // Suppress unused variable warning
238 c10::impl::check_and_update_common_device(common_device, self, "wrapper_QuantizedCUDA__quantized_max_pool2d", "self");
239 const OptionalDeviceGuard device_guard(device_of(self));
240 return at::native::quantized_max_pool2d_cudnn(self, kernel_size, stride, padding, dilation, ceil_mode);
241}
242} // anonymous namespace
243namespace {
244::std::tuple<at::Tensor,at::Tensor> wrapper_QuantizedCUDA_dim_min(const at::Tensor & self, int64_t dim, bool keepdim) {
245 // No device check
246 const OptionalDeviceGuard device_guard(device_of(self));
247 return at::native::qmin(self, dim, keepdim);
248}
249} // anonymous namespace
250namespace {
251at::Tensor wrapper_QuantizedCUDA___reshape_alias(const at::Tensor & self, c10::SymIntArrayRef size, c10::SymIntArrayRef stride) {
252 // No device check
253 // DeviceGuard omitted
254 return at::native::_reshape_alias(self, C10_AS_INTARRAYREF_SLOW(size), C10_AS_INTARRAYREF_SLOW(stride));
255}
256} // anonymous namespace
257namespace {
258at::Tensor wrapper_QuantizedCUDA__relu(const at::Tensor & self) {
259 // No device check
260 const OptionalDeviceGuard device_guard(device_of(self));
261 return at::native::relu_quantized_cuda(self);
262}
263} // anonymous namespace
264namespace {
265at::Tensor & wrapper_QuantizedCUDA__relu_(at::Tensor & self) {
266 // No device check
267 const OptionalDeviceGuard device_guard(device_of(self));
268 return at::native::relu_quantized_cuda_(self);
269}
270} // anonymous namespace
271namespace {
272at::Tensor wrapper_QuantizedCUDA__gelu(const at::Tensor & self, c10::string_view approximate) {
273 // No device check
274 const OptionalDeviceGuard device_guard(device_of(self));
275 return at::native::gelu_quantized_cuda(self, approximate);
276}
277} // anonymous namespace
278namespace {
279at::Tensor wrapper_QuantizedCUDA__squeeze(const at::Tensor & self) {
280 // No device check
281 // DeviceGuard omitted
282 return at::native::squeeze_quantized(self);
283}
284} // anonymous namespace
285namespace {
286at::Tensor wrapper_QuantizedCUDA_dim_squeeze(const at::Tensor & self, int64_t dim) {
287 // No device check
288 // DeviceGuard omitted
289 return at::native::squeeze_quantized(self, dim);
290}
291} // anonymous namespace
292namespace {
293at::Tensor wrapper_QuantizedCUDA_dims_squeeze(const at::Tensor & self, at::IntArrayRef dim) {
294 // No device check
295 // DeviceGuard omitted
296 return at::native::squeeze_quantized(self, dim);
297}
298} // anonymous namespace
299namespace {
300at::Tensor wrapper_QuantizedCUDA__flip(const at::Tensor & self, at::IntArrayRef dims) {
301 c10::optional<Device> common_device = nullopt;
302(void)common_device; // Suppress unused variable warning
303 c10::impl::check_and_update_common_device(common_device, self, "wrapper_QuantizedCUDA__flip", "self");
304 const OptionalDeviceGuard device_guard(device_of(self));
305 return at::native::flip(self, dims);
306}
307} // anonymous namespace
308namespace {
309at::Tensor wrapper_QuantizedCUDA__unsqueeze(const at::Tensor & self, int64_t dim) {
310 // No device check
311 // DeviceGuard omitted
312 return at::native::unsqueeze_quantized(self, dim);
313}
314} // anonymous namespace
315namespace {
316at::Tensor wrapper_QuantizedCUDA__clone(const at::Tensor & self, c10::optional<at::MemoryFormat> memory_format) {
317 c10::optional<Device> common_device = nullopt;
318(void)common_device; // Suppress unused variable warning
319 c10::impl::check_and_update_common_device(common_device, self, "wrapper_QuantizedCUDA__clone", "self");
320 const OptionalDeviceGuard device_guard(device_of(self));
321 return at::native::quantized_clone(self, memory_format);
322}
323} // anonymous namespace
324namespace {
325at::Tensor wrapper_QuantizedCUDA_self_dequantize(const at::Tensor & self) {
326 c10::optional<Device> common_device = nullopt;
327(void)common_device; // Suppress unused variable warning
328 c10::impl::check_and_update_common_device(common_device, self, "wrapper_QuantizedCUDA_self_dequantize", "self");
329 const OptionalDeviceGuard device_guard(device_of(self));
330 return at::native::dequantize_quantized(self);
331}
332} // anonymous namespace
333namespace {
334double wrapper_QuantizedCUDA__q_scale(const at::Tensor & self) {
335 c10::optional<Device> common_device = nullopt;
336(void)common_device; // Suppress unused variable warning
337 c10::impl::check_and_update_common_device(common_device, self, "wrapper_QuantizedCUDA__q_scale", "self");
338 const OptionalDeviceGuard device_guard(device_of(self));
339 return at::native::q_scale_quant(self);
340}
341} // anonymous namespace
342namespace {
343int64_t wrapper_QuantizedCUDA__q_zero_point(const at::Tensor & self) {
344 c10::optional<Device> common_device = nullopt;
345(void)common_device; // Suppress unused variable warning
346 c10::impl::check_and_update_common_device(common_device, self, "wrapper_QuantizedCUDA__q_zero_point", "self");
347 const OptionalDeviceGuard device_guard(device_of(self));
348 return at::native::q_zero_point_quant(self);
349}
350} // anonymous namespace
351namespace {
352at::Tensor wrapper_QuantizedCUDA__q_per_channel_scales(const at::Tensor & self) {
353 c10::optional<Device> common_device = nullopt;
354(void)common_device; // Suppress unused variable warning
355 c10::impl::check_and_update_common_device(common_device, self, "wrapper_QuantizedCUDA__q_per_channel_scales", "self");
356 const OptionalDeviceGuard device_guard(device_of(self));
357 return at::native::q_per_channel_scales(self);
358}
359} // anonymous namespace
360namespace {
361at::Tensor wrapper_QuantizedCUDA__q_per_channel_zero_points(const at::Tensor & self) {
362 c10::optional<Device> common_device = nullopt;
363(void)common_device; // Suppress unused variable warning
364 c10::impl::check_and_update_common_device(common_device, self, "wrapper_QuantizedCUDA__q_per_channel_zero_points", "self");
365 const OptionalDeviceGuard device_guard(device_of(self));
366 return at::native::q_per_channel_zero_points(self);
367}
368} // anonymous namespace
369namespace {
370int64_t wrapper_QuantizedCUDA__q_per_channel_axis(const at::Tensor & self) {
371 c10::optional<Device> common_device = nullopt;
372(void)common_device; // Suppress unused variable warning
373 c10::impl::check_and_update_common_device(common_device, self, "wrapper_QuantizedCUDA__q_per_channel_axis", "self");
374 const OptionalDeviceGuard device_guard(device_of(self));
375 return at::native::q_per_channel_axis(self);
376}
377} // anonymous namespace
378namespace {
379at::Tensor wrapper_QuantizedCUDA__int_repr(const at::Tensor & self) {
380 // No device check
381 const OptionalDeviceGuard device_guard(device_of(self));
382 return at::native::int_repr_quantized_cuda(self);
383}
384} // anonymous namespace
385namespace {
386at::QScheme wrapper_QuantizedCUDA__qscheme(const at::Tensor & self) {
387 c10::optional<Device> common_device = nullopt;
388(void)common_device; // Suppress unused variable warning
389 c10::impl::check_and_update_common_device(common_device, self, "wrapper_QuantizedCUDA__qscheme", "self");
390 const OptionalDeviceGuard device_guard(device_of(self));
391 return at::native::qscheme_quant(self);
392}
393} // anonymous namespace
394namespace {
395at::Tensor & wrapper_QuantizedCUDA_source_Storage_storage_offset_set_(at::Tensor & self, at::Storage source, c10::SymInt storage_offset, c10::SymIntArrayRef size, c10::SymIntArrayRef stride) {
396 // No device check
397 // DeviceGuard omitted
398 return at::native::set_storage_quantized_(self, source, storage_offset.expect_int(), C10_AS_INTARRAYREF_SLOW(size), C10_AS_INTARRAYREF_SLOW(stride));
399}
400} // anonymous namespace
401namespace {
402at::Tensor & wrapper_QuantizedCUDA_Scalar_masked_fill_(at::Tensor & self, const at::Tensor & mask, const at::Scalar & value) {
403 // No device check
404 const OptionalDeviceGuard device_guard(device_of(self));
405 return at::native::masked_fill__quantized_cuda(self, mask, value);
406}
407} // anonymous namespace
408namespace {
409at::Tensor & wrapper_QuantizedCUDA_Tensor_masked_fill_(at::Tensor & self, const at::Tensor & mask, const at::Tensor & value) {
410 // No device check
411 const OptionalDeviceGuard device_guard(device_of(self));
412 return at::native::masked_fill__quantized_cuda(self, mask, value);
413}
414} // anonymous namespace
415namespace {
416at::Tensor wrapper_QuantizedCUDA__view(const at::Tensor & self, c10::SymIntArrayRef size) {
417 // No device check
418 // DeviceGuard omitted
419 return at::native::view(self, C10_AS_INTARRAYREF_SLOW(size));
420}
421} // anonymous namespace
422namespace {
423at::Tensor wrapper_QuantizedCUDA__index_select(const at::Tensor & self, int64_t dim, const at::Tensor & index) {
424 c10::optional<Device> common_device = nullopt;
425(void)common_device; // Suppress unused variable warning
426 c10::impl::check_and_update_common_device(common_device, self, "wrapper_QuantizedCUDA__index_select", "self");
427 c10::impl::check_and_update_common_device(common_device, index, "wrapper_QuantizedCUDA__index_select", "index");
428 const OptionalDeviceGuard device_guard(device_of(self));
429 return at::native::index_select_quantized_cuda(self, dim, index);
430}
431} // anonymous namespace
432namespace {
433at::Tensor & wrapper_QuantizedCUDA_out_index_select_out(const at::Tensor & self, int64_t dim, const at::Tensor & index, at::Tensor & out) {
434 c10::optional<Device> common_device = nullopt;
435(void)common_device; // Suppress unused variable warning
436 c10::impl::check_and_update_common_device(common_device, out, "wrapper_QuantizedCUDA_out_index_select_out", "out");
437 c10::impl::check_and_update_common_device(common_device, self, "wrapper_QuantizedCUDA_out_index_select_out", "self");
438 c10::impl::check_and_update_common_device(common_device, index, "wrapper_QuantizedCUDA_out_index_select_out", "index");
439 const OptionalDeviceGuard device_guard(device_of(self));
440 return at::native::index_select_out_cuda(self, dim, index, out);
441}
442} // anonymous namespace
443namespace {
444at::Tensor wrapper_QuantizedCUDA__unfold(const at::Tensor & self, int64_t dimension, int64_t size, int64_t step) {
445 // No device check
446 // DeviceGuard omitted
447 return at::native::unfold(self, dimension, size, step);
448}
449} // anonymous namespace
450namespace {
451at::Tensor wrapper_QuantizedCUDA___adaptive_avg_pool2d(const at::Tensor & self, c10::SymIntArrayRef output_size) {
452 c10::optional<Device> common_device = nullopt;
453(void)common_device; // Suppress unused variable warning
454 c10::impl::check_and_update_common_device(common_device, self, "wrapper_QuantizedCUDA___adaptive_avg_pool2d", "self");
455 const OptionalDeviceGuard device_guard(device_of(self));
456 return at::native::adaptive_avg_pool2d_quantized_cuda(self, C10_AS_INTARRAYREF_SLOW(output_size));
457}
458} // anonymous namespace
459TORCH_LIBRARY_IMPL(aten, QuantizedCUDA, m) {
460 m.impl("as_strided",
461TORCH_FN(wrapper_QuantizedCUDA__as_strided));
462m.impl("empty.memory_format",
463TORCH_FN(wrapper_QuantizedCUDA_memory_format_empty));
464m.impl("_empty_affine_quantized",
465TORCH_FN(wrapper_QuantizedCUDA___empty_affine_quantized));
466m.impl("_empty_per_channel_affine_quantized",
467TORCH_FN(wrapper_QuantizedCUDA___empty_per_channel_affine_quantized));
468m.impl("empty_quantized",
469TORCH_FN(wrapper_QuantizedCUDA__empty_quantized));
470m.impl("empty_like",
471TORCH_FN(wrapper_QuantizedCUDA__empty_like));
472m.impl("empty_strided",
473TORCH_FN(wrapper_QuantizedCUDA__empty_strided));
474m.impl("fill_.Scalar",
475TORCH_FN(wrapper_QuantizedCUDA_Scalar_fill_));
476m.impl("fill_.Tensor",
477TORCH_FN(wrapper_QuantizedCUDA_Tensor_fill_));
478m.impl("_index_put_impl_",
479TORCH_FN(wrapper_QuantizedCUDA___index_put_impl_));
480m.impl("max.dim",
481TORCH_FN(wrapper_QuantizedCUDA_dim_max));
482m.impl("quantized_max_pool2d",
483TORCH_FN(wrapper_QuantizedCUDA__quantized_max_pool2d));
484m.impl("min.dim",
485TORCH_FN(wrapper_QuantizedCUDA_dim_min));
486m.impl("_reshape_alias",
487TORCH_FN(wrapper_QuantizedCUDA___reshape_alias));
488m.impl("relu",
489TORCH_FN(wrapper_QuantizedCUDA__relu));
490m.impl("relu_",
491TORCH_FN(wrapper_QuantizedCUDA__relu_));
492m.impl("gelu",
493TORCH_FN(wrapper_QuantizedCUDA__gelu));
494m.impl("squeeze",
495TORCH_FN(wrapper_QuantizedCUDA__squeeze));
496m.impl("squeeze.dim",
497TORCH_FN(wrapper_QuantizedCUDA_dim_squeeze));
498m.impl("squeeze.dims",
499TORCH_FN(wrapper_QuantizedCUDA_dims_squeeze));
500m.impl("flip",
501TORCH_FN(wrapper_QuantizedCUDA__flip));
502m.impl("unsqueeze",
503TORCH_FN(wrapper_QuantizedCUDA__unsqueeze));
504m.impl("clone",
505TORCH_FN(wrapper_QuantizedCUDA__clone));
506m.impl("dequantize.self",
507TORCH_FN(wrapper_QuantizedCUDA_self_dequantize));
508m.impl("q_scale",
509TORCH_FN(wrapper_QuantizedCUDA__q_scale));
510m.impl("q_zero_point",
511TORCH_FN(wrapper_QuantizedCUDA__q_zero_point));
512m.impl("q_per_channel_scales",
513TORCH_FN(wrapper_QuantizedCUDA__q_per_channel_scales));
514m.impl("q_per_channel_zero_points",
515TORCH_FN(wrapper_QuantizedCUDA__q_per_channel_zero_points));
516m.impl("q_per_channel_axis",
517TORCH_FN(wrapper_QuantizedCUDA__q_per_channel_axis));
518m.impl("int_repr",
519TORCH_FN(wrapper_QuantizedCUDA__int_repr));
520m.impl("qscheme",
521TORCH_FN(wrapper_QuantizedCUDA__qscheme));
522m.impl("set_.source_Storage_storage_offset",
523TORCH_FN(wrapper_QuantizedCUDA_source_Storage_storage_offset_set_));
524m.impl("masked_fill_.Scalar",
525TORCH_FN(wrapper_QuantizedCUDA_Scalar_masked_fill_));
526m.impl("masked_fill_.Tensor",
527TORCH_FN(wrapper_QuantizedCUDA_Tensor_masked_fill_));
528m.impl("view",
529TORCH_FN(wrapper_QuantizedCUDA__view));
530m.impl("index_select",
531TORCH_FN(wrapper_QuantizedCUDA__index_select));
532m.impl("index_select.out",
533TORCH_FN(wrapper_QuantizedCUDA_out_index_select_out));
534m.impl("unfold",
535TORCH_FN(wrapper_QuantizedCUDA__unfold));
536m.impl("_adaptive_avg_pool2d",
537TORCH_FN(wrapper_QuantizedCUDA___adaptive_avg_pool2d));
538};
539} // anonymous namespace
540namespace quantizedcuda {
541at::Tensor as_strided(const at::Tensor & self, at::IntArrayRef size, at::IntArrayRef stride, c10::optional<int64_t> storage_offset) {
542return wrapper_QuantizedCUDA__as_strided(self, c10::fromIntArrayRefSlow(size), c10::fromIntArrayRefSlow(stride), storage_offset.has_value() ? c10::make_optional(c10::SymInt(*storage_offset)) : c10::nullopt);
543}
544at::Tensor as_strided_symint(const at::Tensor & self, c10::SymIntArrayRef size, c10::SymIntArrayRef stride, c10::optional<c10::SymInt> storage_offset) {
545return wrapper_QuantizedCUDA__as_strided(self, size, stride, storage_offset);
546}
547at::Tensor empty(at::IntArrayRef size, at::TensorOptions options, c10::optional<at::MemoryFormat> memory_format) {
548return wrapper_QuantizedCUDA_memory_format_empty(c10::fromIntArrayRefSlow(size), optTypeMetaToScalarType(options.dtype_opt()), options.layout_opt(), options.device_opt(), options.pinned_memory_opt(), c10::impl::check_tensor_options_and_extract_memory_format(options, memory_format));
549}
550at::Tensor empty(at::IntArrayRef size, c10::optional<at::ScalarType> dtype, c10::optional<at::Layout> layout, c10::optional<at::Device> device, c10::optional<bool> pin_memory, c10::optional<at::MemoryFormat> memory_format) {
551return wrapper_QuantizedCUDA_memory_format_empty(c10::fromIntArrayRefSlow(size), dtype, layout, device, pin_memory, memory_format);
552}
553at::Tensor empty_symint(c10::SymIntArrayRef size, at::TensorOptions options, c10::optional<at::MemoryFormat> memory_format) {
554return wrapper_QuantizedCUDA_memory_format_empty(size, optTypeMetaToScalarType(options.dtype_opt()), options.layout_opt(), options.device_opt(), options.pinned_memory_opt(), c10::impl::check_tensor_options_and_extract_memory_format(options, memory_format));
555}
556at::Tensor empty_symint(c10::SymIntArrayRef size, c10::optional<at::ScalarType> dtype, c10::optional<at::Layout> layout, c10::optional<at::Device> device, c10::optional<bool> pin_memory, c10::optional<at::MemoryFormat> memory_format) {
557return wrapper_QuantizedCUDA_memory_format_empty(size, dtype, layout, device, pin_memory, memory_format);
558}
559at::Tensor _empty_affine_quantized(at::IntArrayRef size, at::TensorOptions options, double scale, int64_t zero_point, c10::optional<at::MemoryFormat> memory_format) {
560return wrapper_QuantizedCUDA___empty_affine_quantized(size, optTypeMetaToScalarType(options.dtype_opt()), options.layout_opt(), options.device_opt(), options.pinned_memory_opt(), scale, zero_point, c10::impl::check_tensor_options_and_extract_memory_format(options, memory_format));
561}
562at::Tensor _empty_affine_quantized(at::IntArrayRef size, c10::optional<at::ScalarType> dtype, c10::optional<at::Layout> layout, c10::optional<at::Device> device, c10::optional<bool> pin_memory, double scale, int64_t zero_point, c10::optional<at::MemoryFormat> memory_format) {
563return wrapper_QuantizedCUDA___empty_affine_quantized(size, dtype, layout, device, pin_memory, scale, zero_point, memory_format);
564}
565at::Tensor _empty_per_channel_affine_quantized(at::IntArrayRef size, const at::Tensor & scales, const at::Tensor & zero_points, int64_t axis, at::TensorOptions options, c10::optional<at::MemoryFormat> memory_format) {
566return wrapper_QuantizedCUDA___empty_per_channel_affine_quantized(size, scales, zero_points, axis, optTypeMetaToScalarType(options.dtype_opt()), options.layout_opt(), options.device_opt(), options.pinned_memory_opt(), c10::impl::check_tensor_options_and_extract_memory_format(options, memory_format));
567}
568at::Tensor _empty_per_channel_affine_quantized(at::IntArrayRef size, const at::Tensor & scales, const at::Tensor & zero_points, int64_t axis, c10::optional<at::ScalarType> dtype, c10::optional<at::Layout> layout, c10::optional<at::Device> device, c10::optional<bool> pin_memory, c10::optional<at::MemoryFormat> memory_format) {
569return wrapper_QuantizedCUDA___empty_per_channel_affine_quantized(size, scales, zero_points, axis, dtype, layout, device, pin_memory, memory_format);
570}
571at::Tensor empty_quantized(at::IntArrayRef size, const at::Tensor & qtensor, at::TensorOptions options, c10::optional<at::MemoryFormat> memory_format) {
572return wrapper_QuantizedCUDA__empty_quantized(size, qtensor, optTypeMetaToScalarType(options.dtype_opt()), options.layout_opt(), options.device_opt(), options.pinned_memory_opt(), c10::impl::check_tensor_options_and_extract_memory_format(options, memory_format));
573}
574at::Tensor empty_quantized(at::IntArrayRef size, const at::Tensor & qtensor, c10::optional<at::ScalarType> dtype, c10::optional<at::Layout> layout, c10::optional<at::Device> device, c10::optional<bool> pin_memory, c10::optional<at::MemoryFormat> memory_format) {
575return wrapper_QuantizedCUDA__empty_quantized(size, qtensor, dtype, layout, device, pin_memory, memory_format);
576}
577at::Tensor empty_like(const at::Tensor & self, at::TensorOptions options, c10::optional<at::MemoryFormat> memory_format) {
578return wrapper_QuantizedCUDA__empty_like(self, optTypeMetaToScalarType(options.dtype_opt()), options.layout_opt(), options.device_opt(), options.pinned_memory_opt(), c10::impl::check_tensor_options_and_extract_memory_format(options, memory_format));
579}
580at::Tensor empty_like(const at::Tensor & self, c10::optional<at::ScalarType> dtype, c10::optional<at::Layout> layout, c10::optional<at::Device> device, c10::optional<bool> pin_memory, c10::optional<at::MemoryFormat> memory_format) {
581return wrapper_QuantizedCUDA__empty_like(self, dtype, layout, device, pin_memory, memory_format);
582}
583at::Tensor empty_strided(at::IntArrayRef size, at::IntArrayRef stride, at::TensorOptions options) {
584return wrapper_QuantizedCUDA__empty_strided(c10::fromIntArrayRefSlow(size), c10::fromIntArrayRefSlow(stride), optTypeMetaToScalarType(options.dtype_opt()), options.layout_opt(), options.device_opt(), options.pinned_memory_opt());
585}
586at::Tensor empty_strided(at::IntArrayRef size, at::IntArrayRef stride, c10::optional<at::ScalarType> dtype, c10::optional<at::Layout> layout, c10::optional<at::Device> device, c10::optional<bool> pin_memory) {
587return wrapper_QuantizedCUDA__empty_strided(c10::fromIntArrayRefSlow(size), c10::fromIntArrayRefSlow(stride), dtype, layout, device, pin_memory);
588}
589at::Tensor empty_strided_symint(c10::SymIntArrayRef size, c10::SymIntArrayRef stride, at::TensorOptions options) {
590return wrapper_QuantizedCUDA__empty_strided(size, stride, optTypeMetaToScalarType(options.dtype_opt()), options.layout_opt(), options.device_opt(), options.pinned_memory_opt());
591}
592at::Tensor empty_strided_symint(c10::SymIntArrayRef size, c10::SymIntArrayRef stride, c10::optional<at::ScalarType> dtype, c10::optional<at::Layout> layout, c10::optional<at::Device> device, c10::optional<bool> pin_memory) {
593return wrapper_QuantizedCUDA__empty_strided(size, stride, dtype, layout, device, pin_memory);
594}
595at::Tensor & fill_(at::Tensor & self, const at::Scalar & value) {
596return wrapper_QuantizedCUDA_Scalar_fill_(self, value);
597}
598at::Tensor & fill_(at::Tensor & self, const at::Tensor & value) {
599return wrapper_QuantizedCUDA_Tensor_fill_(self, value);
600}
601at::Tensor & _index_put_impl_(at::Tensor & self, const c10::List<c10::optional<at::Tensor>> & indices, const at::Tensor & values, bool accumulate, bool unsafe) {
602return wrapper_QuantizedCUDA___index_put_impl_(self, indices, values, accumulate, unsafe);
603}
604::std::tuple<at::Tensor,at::Tensor> max(const at::Tensor & self, int64_t dim, bool keepdim) {
605return wrapper_QuantizedCUDA_dim_max(self, dim, keepdim);
606}
607at::Tensor quantized_max_pool2d(const at::Tensor & self, at::IntArrayRef kernel_size, at::IntArrayRef stride, at::IntArrayRef padding, at::IntArrayRef dilation, bool ceil_mode) {
608return wrapper_QuantizedCUDA__quantized_max_pool2d(self, kernel_size, stride, padding, dilation, ceil_mode);
609}
610::std::tuple<at::Tensor,at::Tensor> min(const at::Tensor & self, int64_t dim, bool keepdim) {
611return wrapper_QuantizedCUDA_dim_min(self, dim, keepdim);
612}
613at::Tensor _reshape_alias(const at::Tensor & self, at::IntArrayRef size, at::IntArrayRef stride) {
614return wrapper_QuantizedCUDA___reshape_alias(self, c10::fromIntArrayRefSlow(size), c10::fromIntArrayRefSlow(stride));
615}
616at::Tensor _reshape_alias_symint(const at::Tensor & self, c10::SymIntArrayRef size, c10::SymIntArrayRef stride) {
617return wrapper_QuantizedCUDA___reshape_alias(self, size, stride);
618}
619at::Tensor relu(const at::Tensor & self) {
620return wrapper_QuantizedCUDA__relu(self);
621}
622at::Tensor & relu_(at::Tensor & self) {
623return wrapper_QuantizedCUDA__relu_(self);
624}
625at::Tensor gelu(const at::Tensor & self, c10::string_view approximate) {
626return wrapper_QuantizedCUDA__gelu(self, approximate);
627}
628at::Tensor squeeze(const at::Tensor & self) {
629return wrapper_QuantizedCUDA__squeeze(self);
630}
631at::Tensor squeeze(const at::Tensor & self, int64_t dim) {
632return wrapper_QuantizedCUDA_dim_squeeze(self, dim);
633}
634at::Tensor squeeze(const at::Tensor & self, at::IntArrayRef dim) {
635return wrapper_QuantizedCUDA_dims_squeeze(self, dim);
636}
637at::Tensor flip(const at::Tensor & self, at::IntArrayRef dims) {
638return wrapper_QuantizedCUDA__flip(self, dims);
639}
640at::Tensor unsqueeze(const at::Tensor & self, int64_t dim) {
641return wrapper_QuantizedCUDA__unsqueeze(self, dim);
642}
643at::Tensor clone(const at::Tensor & self, c10::optional<at::MemoryFormat> memory_format) {
644return wrapper_QuantizedCUDA__clone(self, memory_format);
645}
646at::Tensor dequantize(const at::Tensor & self) {
647return wrapper_QuantizedCUDA_self_dequantize(self);
648}
649double q_scale(const at::Tensor & self) {
650return wrapper_QuantizedCUDA__q_scale(self);
651}
652int64_t q_zero_point(const at::Tensor & self) {
653return wrapper_QuantizedCUDA__q_zero_point(self);
654}
655at::Tensor q_per_channel_scales(const at::Tensor & self) {
656return wrapper_QuantizedCUDA__q_per_channel_scales(self);
657}
658at::Tensor q_per_channel_zero_points(const at::Tensor & self) {
659return wrapper_QuantizedCUDA__q_per_channel_zero_points(self);
660}
661int64_t q_per_channel_axis(const at::Tensor & self) {
662return wrapper_QuantizedCUDA__q_per_channel_axis(self);
663}
664at::Tensor int_repr(const at::Tensor & self) {
665return wrapper_QuantizedCUDA__int_repr(self);
666}
667at::QScheme qscheme(const at::Tensor & self) {
668return wrapper_QuantizedCUDA__qscheme(self);
669}
670at::Tensor & set_(at::Tensor & self, at::Storage source, int64_t storage_offset, at::IntArrayRef size, at::IntArrayRef stride) {
671return wrapper_QuantizedCUDA_source_Storage_storage_offset_set_(self, source, storage_offset, c10::fromIntArrayRefSlow(size), c10::fromIntArrayRefSlow(stride));
672}
673at::Tensor & set__symint(at::Tensor & self, at::Storage source, c10::SymInt storage_offset, c10::SymIntArrayRef size, c10::SymIntArrayRef stride) {
674return wrapper_QuantizedCUDA_source_Storage_storage_offset_set_(self, source, storage_offset, size, stride);
675}
676at::Tensor & masked_fill_(at::Tensor & self, const at::Tensor & mask, const at::Scalar & value) {
677return wrapper_QuantizedCUDA_Scalar_masked_fill_(self, mask, value);
678}
679at::Tensor & masked_fill_(at::Tensor & self, const at::Tensor & mask, const at::Tensor & value) {
680return wrapper_QuantizedCUDA_Tensor_masked_fill_(self, mask, value);
681}
682at::Tensor view(const at::Tensor & self, at::IntArrayRef size) {
683return wrapper_QuantizedCUDA__view(self, c10::fromIntArrayRefSlow(size));
684}
685at::Tensor view_symint(const at::Tensor & self, c10::SymIntArrayRef size) {
686return wrapper_QuantizedCUDA__view(self, size);
687}
688at::Tensor index_select(const at::Tensor & self, int64_t dim, const at::Tensor & index) {
689return wrapper_QuantizedCUDA__index_select(self, dim, index);
690}
691at::Tensor & index_select_out(at::Tensor & out, const at::Tensor & self, int64_t dim, const at::Tensor & index) {
692return wrapper_QuantizedCUDA_out_index_select_out(self, dim, index, out);
693}
694at::Tensor & index_select_outf(const at::Tensor & self, int64_t dim, const at::Tensor & index, at::Tensor & out) {
695return wrapper_QuantizedCUDA_out_index_select_out(self, dim, index, out);
696}
697at::Tensor unfold(const at::Tensor & self, int64_t dimension, int64_t size, int64_t step) {
698return wrapper_QuantizedCUDA__unfold(self, dimension, size, step);
699}
700at::Tensor _adaptive_avg_pool2d(const at::Tensor & self, at::IntArrayRef output_size) {
701return wrapper_QuantizedCUDA___adaptive_avg_pool2d(self, c10::fromIntArrayRefSlow(output_size));
702}
703at::Tensor _adaptive_avg_pool2d_symint(const at::Tensor & self, c10::SymIntArrayRef output_size) {
704return wrapper_QuantizedCUDA___adaptive_avg_pool2d(self, output_size);
705}
706} // namespace quantizedcuda
707} // namespace at
708