1 | #pragma once |
---|---|
2 | |
3 | #include <c10/macros/Macros.h> |
4 | |
5 | #if !defined(__CUDACC__) && !defined(__HIPCC__) |
6 | #include <ATen/cpu/vec/functional.h> |
7 | #include <ATen/cpu/vec/vec.h> |
8 | #endif |
9 | |
10 | namespace at { |
11 | namespace native { |
12 | namespace ufunc { |
13 | |
14 | template <typename T> |
15 | C10_HOST_DEVICE C10_ALWAYS_INLINE T add(T self, T other, T alpha) __ubsan_ignore_undefined__ { |
16 | return self + alpha * other; |
17 | } |
18 | |
19 | #if !defined(__CUDACC__) && !defined(__HIPCC__) |
20 | using vec::Vectorized; |
21 | template <typename T> |
22 | C10_ALWAYS_INLINE Vectorized<T> add(Vectorized<T> self, Vectorized<T> other, Vectorized<T> alpha) __ubsan_ignore_undefined__ { |
23 | return vec::fmadd(other, alpha, self); |
24 | } |
25 | #endif |
26 | |
27 | }}} // namespace at::native::ufunc |
28 |