1 | // Generated from "/code/pytorch/third_party/nvfuser/runtime/fp16_support.cu" |
2 | // 2023-02-12 08:01:26 |
3 | |
4 | namespace nvfuser_resources { |
5 | |
6 | constexpr const char* fp16_support_cu = R"( |
7 | |
8 | #define __NVFUSER_HALF_TO_US(var) *(reinterpret_cast<unsigned short*>(&(var))) |
9 | #define __NVFUSER_HALF_TO_CUS(var) \ |
10 | *(reinterpret_cast<const unsigned short*>(&(var))) |
11 | |
12 | struct __half; |
13 | __device__ __half __float2half(const float); |
14 | |
15 | struct __align__(2) __half { |
16 | __half() = default; |
17 | |
18 | __device__ __half(const float f) { |
19 | __x = __float2half(f).__x; |
20 | } |
21 | |
22 | protected: |
23 | unsigned short __x; |
24 | }; |
25 | |
26 | __device__ __half __float2half(const float f) { |
27 | __half val; |
28 | asm("{ cvt.rn.f16.f32 %0, %1;}\n" |
29 | : "=h"(__NVFUSER_HALF_TO_US(val)) |
30 | : "f"(f)); |
31 | return val; |
32 | } |
33 | |
34 | __device__ float __half2float(const __half h) { |
35 | float val; |
36 | asm("{ cvt.f32.f16 %0, %1;}\n" : "=f"(val) : "h"(__NVFUSER_HALF_TO_CUS(h))); |
37 | return val; |
38 | } |
39 | |
40 | __device__ __half __double2half(const double d) { |
41 | #if __CUDA_ARCH__ >= 700 |
42 | __half val; |
43 | asm("{ cvt.rn.f16.f64 %0, %1;}\n" |
44 | : "=h"(__NVFUSER_HALF_TO_US(val)) |
45 | : "d"(d)); |
46 | return val; |
47 | #else |
48 | return __float2half(static_cast<float>(d)); |
49 | #endif |
50 | } |
51 | |
52 | __device__ double __half2double(const __half h) { |
53 | #if __CUDA_ARCH__ >= 700 |
54 | double val; |
55 | asm("{ cvt.f64.f16 %0, %1;}\n" : "=d"(val) : "h"(__NVFUSER_HALF_TO_CUS(h))); |
56 | return val; |
57 | #else |
58 | return static_cast<double>(__half2float(h)); |
59 | #endif |
60 | } |
61 | )" ; |
62 | |
63 | } // namespace nvfuser_resources |
64 | |