1// Generated from "/code/pytorch/third_party/nvfuser/runtime/fp16_support.cu"
2// 2023-02-12 08:01:26
3
4namespace nvfuser_resources {
5
6constexpr const char* fp16_support_cu = R"(
7
8#define __NVFUSER_HALF_TO_US(var) *(reinterpret_cast<unsigned short*>(&(var)))
9#define __NVFUSER_HALF_TO_CUS(var) \
10 *(reinterpret_cast<const unsigned short*>(&(var)))
11
12struct __half;
13__device__ __half __float2half(const float);
14
15struct __align__(2) __half {
16 __half() = default;
17
18 __device__ __half(const float f) {
19 __x = __float2half(f).__x;
20 }
21
22 protected:
23 unsigned short __x;
24};
25
26__device__ __half __float2half(const float f) {
27 __half val;
28 asm("{ cvt.rn.f16.f32 %0, %1;}\n"
29 : "=h"(__NVFUSER_HALF_TO_US(val))
30 : "f"(f));
31 return val;
32}
33
34__device__ float __half2float(const __half h) {
35 float val;
36 asm("{ cvt.f32.f16 %0, %1;}\n" : "=f"(val) : "h"(__NVFUSER_HALF_TO_CUS(h)));
37 return val;
38}
39
40__device__ __half __double2half(const double d) {
41#if __CUDA_ARCH__ >= 700
42 __half val;
43 asm("{ cvt.rn.f16.f64 %0, %1;}\n"
44 : "=h"(__NVFUSER_HALF_TO_US(val))
45 : "d"(d));
46 return val;
47#else
48 return __float2half(static_cast<float>(d));
49#endif
50}
51
52__device__ double __half2double(const __half h) {
53#if __CUDA_ARCH__ >= 700
54 double val;
55 asm("{ cvt.f64.f16 %0, %1;}\n" : "=d"(val) : "h"(__NVFUSER_HALF_TO_CUS(h)));
56 return val;
57#else
58 return static_cast<double>(__half2float(h));
59#endif
60}
61)";
62
63} // namespace nvfuser_resources
64