1#include <ATen/FunctionalTensorWrapper.h>
2#include <ATen/Functions.h>
3#include <ATen/MetaFunctions.h>
4#include <ATen/NativeFunctions.h>
5#include <ATen/Operators.h>
6#include <ATen/native/BinaryOps.h>
7#include <ATen/native/CPUFallback.h>
8#include <torch/csrc/lazy/core/helpers.h>
9#include <torch/csrc/lazy/core/ir_builder.h>
10#include <torch/csrc/lazy/core/metrics.h>
11#include <torch/csrc/lazy/core/ops/utils.h>
12#include <torch/csrc/lazy/core/shape_inference.h>
13#include <torch/csrc/lazy/core/tensor_impl.h>
14#include <torch/csrc/lazy/core/tensor_util.h>
15#include <torch/csrc/lazy/generated/LazyNativeFunctions.h>
16#include <torch/csrc/lazy/ts_backend/config.h>
17#include <torch/csrc/lazy/ts_backend/ops/random_ops.h>
18#include <torch/csrc/lazy/ts_backend/ops/to_copy.h>
19#include <torch/csrc/lazy/ts_backend/tensor_aten_ops.h>
20#include <torch/csrc/lazy/ts_backend/ts_autograd_functions.h>
21#include <torch/csrc/lazy/ts_backend/ts_eager_fallback.h>
22#include <torch/library.h>
23
24using at::Tensor;
25
26namespace torch {
27namespace lazy {
28namespace {
29
30at::Tensor CreateLtcTensor(
31 const at::Tensor& tensor,
32 const c10::optional<torch::lazy::BackendDevice>& device) {
33 if (tensor.defined() && device) {
34 return torch::lazy::CreateAtenFromLtcTensor(
35 torch::lazy::LazyTensor::Create(tensor, *device));
36 }
37 return tensor;
38}
39
40c10::optional<torch::lazy::BackendDevice> GetLtcDevice(
41 const c10::optional<c10::Device>& device) {
42 if (!device) {
43 return c10::nullopt;
44 }
45 if (device->type() != at::kLazy) {
46 return c10::nullopt;
47 }
48 return torch::lazy::atenDeviceToBackendDevice(*device);
49}
50
51} // namespace
52
53// clone is special in LT because we make it a no-op.
54// This should be safe to do, because every operator in the LT is functional.
55at::Tensor LazyNativeFunctions::clone(
56 const at::Tensor& self,
57 c10::optional<at::MemoryFormat> memory_format) {
58 auto self_lt = torch::lazy::TryGetLtcTensor(self);
59 return torch::lazy::CreateAtenFromLtcTensor(
60 self_lt->Create(self_lt->GetIrValue(), self_lt->GetDevice()));
61}
62
63at::Tensor LazyNativeFunctions::_copy_from(
64 const at::Tensor& self,
65 const at::Tensor& dst,
66 bool non_blocking) {
67 TORCH_LAZY_FN_COUNTER("lazy::");
68 auto dst_tensor = torch::lazy::TryGetLtcTensor(dst);
69 auto self_tensor = torch::lazy::TryGetLtcTensor(self);
70 if (!self_tensor) {
71 // providing a new 'eager' value (self) for an existing lazy tensor (dst)
72 static bool sync_update = FLAGS_torch_lazy_ts_tensor_update_sync;
73 CHECK(dst_tensor);
74 dst_tensor->UpdateFromTensor(self, /*sync=*/sync_update);
75 } else if (!dst_tensor) {
76 // materializing a lazy tensor (self) and copying its value into eager
77 // tensor (dst) detached=false lets us skip a copy in `ToTensor`, which
78 // should be safe because we are only going to use the tensor for
79 // dst.copy_()
80 CHECK(self_tensor);
81 at::Tensor tensor = self_tensor->ToTensor(/*detached=*/false);
82 at::Tensor typed_tensor =
83 torch::lazy::CopyTensor(tensor, dst.scalar_type(), /*copy=*/false);
84 dst.resize_as_(typed_tensor).copy_(typed_tensor);
85 } else {
86 // Copying one lazy tensor to another
87 if (!dst_tensor->CurrentIrValue()) {
88 // if dest is not backed by IR (e.g. result of some lazy operation),
89 // then it should have at::Tensor data backing it instead
90 auto dst_tensor_data = dst_tensor->CurrentTensorData();
91 CHECK(dst_tensor_data);
92 auto src_tensor_data = self_tensor->CurrentTensorData();
93 if (src_tensor_data) {
94 // both src/dst are simply backed by at::Tensor data, no IR- do a
95 // straightforward copy
96 dst_tensor_data->copy_(*src_tensor_data);
97 } else {
98 // src needs to be materialized before its result can be used for a copy
99 // into dst since we use the src tensor only for making a copy, we don't
100 // need to detach it note: it would be even more efficient if we could
101 // cause ToTensor to materialize the value directly into dst's buffer
102 // (that would need to be detached though).
103 dst_tensor_data->copy_(self_tensor->ToTensor(/*detached=*/false));
104 }
105 } else {
106 copy_(dst_tensor, self_tensor);
107 auto* impl =
108 dynamic_cast<torch::lazy::LTCTensorImpl*>(dst.unsafeGetTensorImpl());
109 impl->set_tensor(dst_tensor);
110 }
111 }
112 return dst;
113}
114
115at::Tensor LazyNativeFunctions::_copy_from_and_resize(
116 const at::Tensor& self,
117 const at::Tensor& dst) {
118 TORCH_LAZY_FN_COUNTER("lazy::");
119 auto dst_tensor = torch::lazy::TryGetLtcTensor(dst);
120 auto self_tensor = torch::lazy::TryGetLtcTensor(self);
121 if (!self_tensor) {
122 CHECK(dst_tensor);
123 dst_tensor->UpdateFromTensorOut(self);
124 } else if (!dst_tensor) {
125 CHECK(self_tensor);
126 at::Tensor tensor = self_tensor->ToTensor(/*detached=*/true);
127 at::Tensor typed_tensor =
128 torch::lazy::CopyTensor(tensor, dst.scalar_type(), /*copy=*/false);
129 dst.resize_as_(typed_tensor).copy_(typed_tensor);
130 } else {
131 // at this point we know dst is a lazy tensor
132 auto* dest_impl =
133 dynamic_cast<torch::lazy::LTCTensorImpl*>(dst.unsafeGetTensorImpl());
134 dest_impl->tensor()->UpdateFromTensorOut(self_tensor);
135 dest_impl->force_refresh_sizes();
136 }
137 return dst;
138}
139
140at::Tensor LazyNativeFunctions::_to_copy(
141 const at::Tensor& self,
142 c10::optional<at::ScalarType> dtype,
143 c10::optional<at::Layout> layout,
144 c10::optional<at::Device> device,
145 c10::optional<bool> pin_memory,
146 bool non_blocking,
147 c10::optional<at::MemoryFormat> memory_format) {
148 if (force_eager_fallback(at::aten::_to_copy)) {
149 TORCH_INTERNAL_ASSERT(
150 false,
151 "Fallback is currently impossible for _to_copy since the fallback helper itself reinvokes _to_copy");
152 }
153
154 auto options = self.options();
155 if (dtype) {
156 // I put each of these setters in a conditional instead of doing
157 // `self.options().dtype(dtype).layout(layout)... because calling
158 // .dtype(nullopt) on an options() that already has dtype appears to wipe it
159 options = options.dtype(dtype);
160 }
161 if (layout) {
162 options = options.layout(layout);
163 }
164 if (memory_format) {
165 options = options.memory_format(memory_format);
166 }
167 if (pin_memory) {
168 // TODO(whc) can we honor 'pin_memory' in some/all cases?
169 options = options.pinned_memory(pin_memory);
170 TORCH_WARN_ONCE(
171 "Pinned memory used in lazy _to_copy, check if the behavior is as intended");
172 }
173
174 TORCH_LAZY_FN_COUNTER("lazy::");
175 auto lazy_self = torch::lazy::TryGetLtcTensor(self);
176 if (!lazy_self && device && device->type() == c10::kLazy) {
177 // Case 1: eager->lazy (we create a new lazy tensor)
178 // See Note [Lazy Tensor Functionalization]
179 // Invariant: if the functionalization key is in the exclude set, then we're
180 // expected to return an ordinary tensor, which will be "lifted" into a
181 // functional wrapper later.
182 bool functionalize_output =
183 !c10::impl::tls_local_dispatch_key_set().excluded_.has(
184 c10::DispatchKey::Functionalize);
185 return torch::lazy::to_lazy_tensor(
186 self,
187 options,
188 *device,
189 /*non_blocking=*/non_blocking,
190 /*functionalize_output=*/functionalize_output);
191 } else if (device && device->type() != c10::kLazy) {
192 // Case 2: lazy->eager (forces a graph break since we are materializing a
193 // tensor)
194
195 TORCH_INTERNAL_ASSERT(lazy_self);
196 auto eager_tensor = lazy_self->ToTensor(/*detached=*/true);
197 options = options.device(device);
198 auto moved_eager_tensor =
199 eager_tensor.to(options, /*non_blocking=*/non_blocking, /*copy=*/true);
200 return moved_eager_tensor;
201 } else if (
202 device && device->type() == c10::kLazy && device->has_index() &&
203 device->index() != self.device().index()) {
204 // Case 3: lazy:0 -> lazy:1
205
206 // TODO(whc) what do we actually want to do here?
207 // option 1: materialize, move eager tensor, create new lazy tensor
208 // - this should be our default, as it is what would happen before we
209 // implemented _to_copy
210 // - actually combines case 1 + case 2
211 // option 2: support multiple devices inside one lazy/TS executor (case 4)
212 // - but: we may have other assumptions that there is just one device
213 // per executor? so don't take this lightly
214
215 TORCH_INTERNAL_ASSERT(lazy_self);
216 auto eager_tensor = lazy_self->ToTensor(/*detached=*/true);
217 // we move the eager tensor to the 'eager' equivalent of our lazy device
218 // e.g. if our device is lazy:1, the backend maps that to cuda:1, which is
219 // what we use
220 auto eager_device = c10::Device(
221 torch::lazy::getBackend()->EagerFallbackDeviceType(), device->index());
222 options = options.device(eager_device);
223 auto moved_eager_tensor =
224 eager_tensor.to(options, /*non_blocking=*/false, /*copy=*/true);
225 lazy_self = torch::lazy::GetOrCreateLtcTensor(
226 moved_eager_tensor,
227 torch::lazy::atenDeviceToBackendDevice(eager_device));
228 return torch::lazy::CreateAtenFromLtcTensor(lazy_self);
229
230 } else {
231 // Case 4: lazy->lazy (special case: keep the _to_copy INSIDE the lazy
232 // graph)
233
234 // Note: captured _to_copy will be executed with real eager tensors, not
235 // lazy tensors. We DO NOT want to burn 'lazy:0' as the device into this
236 // captured IR, or we will try to convert an eager tensor back to a lazy one
237 // inside the torchscript executor lazy:0 -> lazy:1 is handled in case3, so
238 // we can safely drop the device argument
239 device = c10::nullopt;
240
241 torch::lazy::NodePtr node = torch::lazy::ReuseNode<ToCopy>(
242 lazy_self->GetIrValue(),
243 dtype,
244 layout,
245 device,
246 pin_memory,
247 non_blocking,
248 memory_format);
249 if (!node) {
250 auto shapes = torch::lazy::compute_shape__to_copy(
251 self, dtype, layout, device, pin_memory, non_blocking, memory_format);
252 TORCH_INTERNAL_ASSERT(shapes.size() == 1);
253 node = torch::lazy::MakeNode<ToCopy>(
254 lazy_self->GetIrValue(),
255 dtype,
256 layout,
257 device,
258 pin_memory,
259 non_blocking,
260 memory_format,
261 std::move(shapes));
262 CacheNode(node);
263 }
264
265 auto result =
266 torch::lazy::CreateAtenFromLtcTensor(torch::lazy::LazyTensor::Create(
267 std::move(node), lazy_self->GetDevice()));
268 return result;
269 }
270};
271
272at::Tensor LazyNativeFunctions::empty_symint(
273 at::SymIntArrayRef sym_size,
274 c10::optional<at::ScalarType> dtype,
275 c10::optional<at::Layout> layout,
276 c10::optional<at::Device> device,
277 c10::optional<bool> pin_memory,
278 c10::optional<at::MemoryFormat> memory_format) {
279 // TODO: support this directly
280 auto size = C10_AS_INTARRAYREF_SLOW(sym_size);
281 const auto device_type = torch::lazy::getBackend()->EagerFallbackDeviceType();
282 at::TensorOptions options = at::TensorOptions()
283 .device(c10::Device(device_type))
284 .layout(layout)
285 .pinned_memory(pin_memory)
286 .dtype(dtype);
287 auto x_result = at::empty(size, options, memory_format);
288 auto tensor = CreateLtcTensor(x_result, GetLtcDevice(device));
289 // See Note [Lazy Tensor Functionalization]
290 if (c10::impl::tls_local_dispatch_key_set().excluded_.has(
291 c10::DispatchKey::Functionalize)) {
292 // Invariant: if the functionalization key is in the exclude set, then we're
293 // expected to return an ordinary tensor, which will be "lifted" into a
294 // functional wrapper later.
295 return tensor;
296 } else {
297 auto wrapped = at::functionalization::impl::to_functional_tensor(tensor);
298 return wrapped;
299 }
300}
301
302at::Tensor LazyNativeFunctions::empty_strided_symint(
303 at::SymIntArrayRef sym_size,
304 at::SymIntArrayRef sym_stride,
305 c10::optional<at::ScalarType> dtype,
306 c10::optional<at::Layout> layout,
307 c10::optional<at::Device> device,
308 c10::optional<bool> pin_memory) {
309 TORCH_LAZY_FN_COUNTER("lazy::");
310 at::Tensor t =
311 empty_symint(sym_size, dtype, layout, device, pin_memory, c10::nullopt);
312 auto size = C10_AS_INTARRAYREF_SLOW(sym_size);
313 auto stride = C10_AS_INTARRAYREF_SLOW(sym_stride);
314 return t.as_strided(size, stride, /*storage_offset=*/0);
315}
316
317at::Tensor& LazyNativeFunctions::fill_(
318 at::Tensor& self,
319 const at::Scalar& value) {
320 TORCH_LAZY_FN_COUNTER("lazy::");
321 auto self_tensor = torch::lazy::TryGetLtcTensor(self);
322 torch::lazy::fill_(self_tensor, value);
323 return self;
324}
325
326at::Tensor LazyNativeFunctions::max_pool3d(
327 const at::Tensor& self,
328 at::IntArrayRef kernel_size,
329 at::IntArrayRef stride,
330 at::IntArrayRef padding,
331 at::IntArrayRef dilation,
332 bool ceil_mode) {
333 return torch::lazy::MaxPool3dAutogradFunctionTS::apply(
334 self, kernel_size, stride, padding, dilation, ceil_mode);
335}
336
337// We need to explicitly override max pooling operators and just call the
338// fallback for them because we've customized the autograd function for them
339// (backward needs saved indices from forward).
340std::tuple<at::Tensor, at::Tensor> LazyNativeFunctions::max_pool3d_with_indices(
341 const at::Tensor& self,
342 at::IntArrayRef kernel_size,
343 at::IntArrayRef stride,
344 at::IntArrayRef padding,
345 at::IntArrayRef dilation,
346 bool ceil_mode) {
347 return at::native::
348 call_fallback_fn<&ltc_eager_fallback, ATEN_OP(max_pool3d_with_indices)>::
349 call(self, kernel_size, stride, padding, dilation, ceil_mode);
350}
351
352at::Tensor LazyNativeFunctions::max_pool3d_with_indices_backward(
353 const at::Tensor& grad_output,
354 const at::Tensor& self,
355 at::IntArrayRef kernel_size,
356 at::IntArrayRef stride,
357 at::IntArrayRef padding,
358 at::IntArrayRef dilation,
359 bool ceil_mode,
360 const at::Tensor& indices) {
361 return at::native::call_fallback_fn<
362 &ltc_eager_fallback,
363 ATEN_OP(max_pool3d_with_indices_backward)>::
364 call(
365 grad_output,
366 self,
367 kernel_size,
368 stride,
369 padding,
370 dilation,
371 ceil_mode,
372 indices);
373}
374
375at::Tensor& LazyNativeFunctions::normal_(
376 at::Tensor& self,
377 double mean,
378 double std,
379 c10::optional<at::Generator> generator) {
380 // Unconditionally fall back.
381 // implementing normal_ via lazy tensor caused differences in results compared
382 // to eager.
383 return at::native::call_fallback_fn<&ltc_eager_fallback, ATEN_OP(normal_)>::
384 call(self, mean, std, generator);
385
386 // if (force_eager_fallback(c10::Symbol::fromQualString("aten::normal_"))) {
387 // return at::native::call_fallback_fn<&ltc_eager_fallback,
388 // ATEN_OP(normal_)>::call(self, mean, std, generator);
389 // }
390
391 // if (generator.has_value()) {
392 // return at::native::call_fallback_fn<&ltc_eager_fallback,
393 // ATEN_OP(normal_)>::call(self, mean, std, generator);
394 // }
395
396 // TORCH_LAZY_FN_COUNTER("lazy::");
397 // auto device = bridge::GetBackendDevice(self);
398 // LazyTensor lazy_self = GetLtcTensorOrCreateForWrappedNumber(self, *device);
399 // std::vector<torch::lazy::Shape> shapes =
400 // {torch::lazy::Shape(self.scalar_type(), self.sizes().vec())}; auto node =
401 // torch::lazy::MakeNode<Normal>(lazy_self.GetIrValue(), mean, std,
402 // std::move(shapes)); lazy_self.SetInPlaceIrValue(node); return self;
403};
404
405at::Tensor LazyNativeFunctions::_unsafe_view(
406 const at::Tensor& self,
407 at::IntArrayRef size) {
408 TORCH_LAZY_FN_COUNTER("lazy::");
409 return LazyNativeFunctions::view_copy_symint(
410 self, c10::fromIntArrayRefSlow(size));
411}
412
413// This is needed by the torch.tensor constructor.
414// LazyTensor always opts into functionalization.
415// "lifting" a tensor for functionalization means wrapping it in a
416// FunctionalTensorWrapper object.
417at::Tensor LazyNativeFunctions::lift(const at::Tensor& tensor) {
418 TORCH_INTERNAL_ASSERT(
419 !at::functionalization::impl::isFunctionalTensor(tensor));
420 return at::functionalization::impl::to_functional_tensor(tensor);
421}
422at::Tensor LazyNativeFunctions::lift_fresh(const at::Tensor& tensor) {
423 TORCH_INTERNAL_ASSERT(
424 !at::functionalization::impl::isFunctionalTensor(tensor));
425 return at::functionalization::impl::to_functional_tensor(tensor);
426}
427
428// All of the below ops correspond to CompositeExplicitAutograd kernels from
429// core that call into view operators internally. These are all composite ops
430// that LTC can technically re-use / get for free, but we need to
431// "functionalize" them to remove the view ops before we can use them.
432at::Tensor LazyNativeFunctions::block_diag(at::TensorList tensors) {
433 return at::functionalization::functionalize_aten_op<ATEN_OP(
434 block_diag)>::call(tensors);
435}
436at::Tensor LazyNativeFunctions::new_empty_strided_symint(
437 const at::Tensor& self,
438 c10::SymIntArrayRef size,
439 c10::SymIntArrayRef stride,
440 c10::optional<at::ScalarType> dtype,
441 c10::optional<at::Layout> layout,
442 c10::optional<at::Device> device,
443 c10::optional<bool> pin_memory) {
444 return at::functionalization::
445 functionalize_aten_op_symint<ATEN_OP(new_empty_strided)>::call(
446 self, size, stride, dtype, layout, device, pin_memory);
447}
448
449at::Tensor LazyNativeFunctions::narrow_copy_symint(
450 const at::Tensor& self,
451 int64_t dim,
452 c10::SymInt start,
453 c10::SymInt length) {
454 return at::functionalization::functionalize_aten_op_symint<ATEN_OP(
455 narrow_copy)>::call(self, dim, start, length);
456}
457at::Tensor LazyNativeFunctions::pixel_shuffle(
458 const at::Tensor& self,
459 int64_t upscale_factor) {
460 return at::functionalization::functionalize_aten_op<ATEN_OP(
461 pixel_shuffle)>::call(self, upscale_factor);
462}
463at::Tensor LazyNativeFunctions::pixel_unshuffle(
464 const at::Tensor& self,
465 int64_t downscale_factor) {
466 return at::functionalization::functionalize_aten_op<ATEN_OP(
467 pixel_unshuffle)>::call(self, downscale_factor);
468}
469at::Tensor LazyNativeFunctions::select_backward_symint(
470 const at::Tensor& grad_output,
471 c10::SymIntArrayRef input_sizes,
472 int64_t dim,
473 c10::SymInt index) {
474 return at::functionalization::functionalize_aten_op_symint<ATEN_OP(
475 select_backward)>::call(grad_output, input_sizes, dim, index);
476}
477at::Tensor LazyNativeFunctions::_trilinear(
478 const at::Tensor& i1,
479 const at::Tensor& i2,
480 const at::Tensor& i3,
481 at::IntArrayRef expand1,
482 at::IntArrayRef expand2,
483 at::IntArrayRef expand3,
484 at::IntArrayRef sumdim,
485 int64_t unroll_dim) {
486 return at::functionalization::functionalize_aten_op<ATEN_OP(_trilinear)>::
487 call(i1, i2, i3, expand1, expand2, expand3, sumdim, unroll_dim);
488}
489at::Tensor LazyNativeFunctions::linalg_pinv(
490 const at::Tensor& self,
491 const c10::optional<at::Tensor>& atol,
492 const c10::optional<at::Tensor>& rtol,
493 bool hermitian) {
494 return at::functionalization::functionalize_aten_op<ATEN_OP2(
495 linalg_pinv, atol_rtol_tensor)>::call(self, atol, rtol, hermitian);
496}
497
498// functionalize_aten_op can't handle out= ops directly.
499// Instead, we can call the composite kernel from core, and copy and mutations
500// back to the inputs.
501at::Tensor& LazyNativeFunctions::logsumexp_out(
502 const at::Tensor& self,
503 at::IntArrayRef dim,
504 bool keepdim,
505 at::Tensor& out) {
506 auto self_wrapped = at::functionalization::impl::to_functional_tensor(self);
507 auto out_wrapped = at::functionalization::impl::to_functional_tensor(out);
508 // directly call the composite kernel from core.
509 // Make sure to re-enable functionalization first.
510 auto curr_tls = c10::impl::tls_local_dispatch_key_set();
511 auto tls_reenable_functionalize = c10::impl::PODLocalDispatchKeySet();
512 tls_reenable_functionalize.set_included(curr_tls.included_);
513 tls_reenable_functionalize.set_excluded(
514 curr_tls.excluded_.remove(c10::DispatchKey::Functionalize));
515 c10::impl::ForceDispatchKeyGuard guard_(tls_reenable_functionalize);
516 at::native::logsumexp_out(self_wrapped, dim, keepdim, out_wrapped);
517 auto out_unwrapped =
518 at::functionalization::impl::from_functional_tensor(out_wrapped);
519 // propagate mutations back to the inputs (including resizing)
520 out.resize_(out_unwrapped.sizes());
521 out.copy_(out_unwrapped);
522 return out;
523}
524
525at::Tensor LazyNativeFunctions::diag_embed(
526 const at::Tensor& self,
527 int64_t offset,
528 int64_t dim1,
529 int64_t dim2) {
530 return at::functionalization::functionalize_aten_op<ATEN_OP(
531 diag_embed)>::call(self, offset, dim1, dim2);
532}
533
534at::Tensor LazyNativeFunctions::diagonal_backward_symint(
535 const at::Tensor& grad_output,
536 at::SymIntArrayRef input_sizes,
537 int64_t offset,
538 int64_t dim1,
539 int64_t dim2) {
540 return at::functionalization::functionalize_aten_op_symint<ATEN_OP(
541 diagonal_backward)>::call(grad_output, input_sizes, offset, dim1, dim2);
542}
543
544at::Tensor LazyNativeFunctions::slice_backward_symint(
545 const at::Tensor& grad_output,
546 at::SymIntArrayRef input_sizes,
547 int64_t dim,
548 c10::SymInt start,
549 c10::SymInt end,
550 c10::SymInt step) {
551 return at::functionalization::functionalize_aten_op_symint<ATEN_OP(
552 slice_backward)>::call(grad_output, input_sizes, dim, start, end, step);
553}
554
555// re-use the composite kernel from core, that way we don't need to provide a
556// backwards formula for native_group_norm
557std::tuple<Tensor, Tensor, Tensor> LazyNativeFunctions::native_group_norm(
558 const at::Tensor& input,
559 const c10::optional<at::Tensor>& weight,
560 const c10::optional<at::Tensor>& bias,
561 int64_t N,
562 int64_t C,
563 int64_t HxW,
564 int64_t group,
565 double eps) {
566 return at::native::math_group_norm(
567 input, weight, bias, N, C, HxW, group, eps);
568}
569
570void InitializeAtenBindings() {}
571
572} // namespace lazy
573} // namespace torch
574