1 | // Generated from "/code/pytorch/third_party/nvfuser/runtime/random_numbers.cu" |
2 | // 2023-02-12 08:01:26 |
3 | |
4 | namespace nvfuser_resources { |
5 | |
6 | constexpr const char* random_numbers_cu = R"( |
7 | __device__ unsigned int mulhilo32( |
8 | unsigned int a, |
9 | unsigned int b, |
10 | unsigned int* result_high) { |
11 | *result_high = __umulhi(a, b); |
12 | return a * b; |
13 | } |
14 | |
15 | __device__ uint4 single_round(uint4 ctr, uint2 key) { |
16 | constexpr unsigned long kPhiloxSA = 0xD2511F53; |
17 | constexpr unsigned long kPhiloxSB = 0xCD9E8D57; |
18 | unsigned int hi0; |
19 | unsigned int hi1; |
20 | unsigned int lo0 = mulhilo32(kPhiloxSA, ctr.x, &hi0); |
21 | unsigned int lo1 = mulhilo32(kPhiloxSB, ctr.z, &hi1); |
22 | uint4 ret = {hi1 ^ ctr.y ^ key.x, lo1, hi0 ^ ctr.w ^ key.y, lo0}; |
23 | return ret; |
24 | } |
25 | |
26 | __device__ uint4 philox( |
27 | unsigned long long seed, |
28 | unsigned long long subsequence, |
29 | unsigned long long offset) { |
30 | constexpr unsigned long kPhilox10A = 0x9E3779B9; |
31 | constexpr unsigned long kPhilox10B = 0xBB67AE85; |
32 | uint2 key = {}; |
33 | key.x = (unsigned int)seed; |
34 | key.y = (unsigned int)(seed >> 32); |
35 | uint4 counter = make_uint4(0, 0, 0, 0); |
36 | counter.x = (unsigned int)(offset); |
37 | counter.y = (unsigned int)(offset >> 32); |
38 | counter.z = (unsigned int)(subsequence); |
39 | counter.w = (unsigned int)(subsequence >> 32); |
40 | |
41 | uint4 output = {}; |
42 | uint2 key_ = key; |
43 | uint4 counter_ = counter; |
44 | for (int i = 0; i < 9; i++) { |
45 | counter_ = single_round(counter_, key_); |
46 | key_.x += (kPhilox10A); |
47 | key_.y += (kPhilox10B); |
48 | } |
49 | output = single_round(counter_, key_); |
50 | return output; |
51 | } |
52 | |
53 | __device__ float uniformf(unsigned int x) { |
54 | constexpr float kRanInvM32 = 2.3283064e-10f; // Inverse of 2^32. |
55 | float result = x * kRanInvM32; |
56 | return result == 1 ? 0.0f : result; |
57 | } |
58 | |
59 | __device__ double uniform(unsigned int x, unsigned int y) { |
60 | constexpr double kRan2Pow53Inv = 1.1102230246251565e-16; |
61 | const unsigned long long z = |
62 | (unsigned long long)x ^ ((unsigned long long)y << (53 - 32)); |
63 | double result = z * kRan2Pow53Inv + (kRan2Pow53Inv / 2.0); |
64 | return result == 1 ? 0.0 : result; |
65 | } |
66 | |
67 | __device__ double rng_uniform(const uint4& rng_result, int rng_component) { |
68 | return uniform( |
69 | (&rng_result.x)[rng_component * 2], |
70 | (&rng_result.x)[rng_component * 2 + 1]); |
71 | } |
72 | |
73 | __device__ float rng_uniformf(const uint4& rng_result, int rng_component) { |
74 | return uniformf((&rng_result.x)[rng_component]); |
75 | } |
76 | |
77 | __device__ double rng_uniform_range( |
78 | const uint4& rng_result, |
79 | int rng_component, |
80 | double from, |
81 | double to) { |
82 | auto range = to - from; |
83 | auto uniform01 = rng_uniform(rng_result, rng_component); |
84 | return from + range * uniform01; |
85 | } |
86 | |
87 | __device__ float rng_uniform_rangef( |
88 | const uint4& rng_result, |
89 | int rng_component, |
90 | float from, |
91 | float to) { |
92 | auto range = to - from; |
93 | auto uniform01 = rng_uniformf(rng_result, rng_component); |
94 | return from + range * uniform01; |
95 | } |
96 | )" ; |
97 | |
98 | } // namespace nvfuser_resources |
99 | |