1// Generated from "/code/pytorch/third_party/nvfuser/runtime/random_numbers.cu"
2// 2023-02-12 08:01:26
3
4namespace nvfuser_resources {
5
6constexpr const char* random_numbers_cu = R"(
7__device__ unsigned int mulhilo32(
8 unsigned int a,
9 unsigned int b,
10 unsigned int* result_high) {
11 *result_high = __umulhi(a, b);
12 return a * b;
13}
14
15__device__ uint4 single_round(uint4 ctr, uint2 key) {
16 constexpr unsigned long kPhiloxSA = 0xD2511F53;
17 constexpr unsigned long kPhiloxSB = 0xCD9E8D57;
18 unsigned int hi0;
19 unsigned int hi1;
20 unsigned int lo0 = mulhilo32(kPhiloxSA, ctr.x, &hi0);
21 unsigned int lo1 = mulhilo32(kPhiloxSB, ctr.z, &hi1);
22 uint4 ret = {hi1 ^ ctr.y ^ key.x, lo1, hi0 ^ ctr.w ^ key.y, lo0};
23 return ret;
24}
25
26__device__ uint4 philox(
27 unsigned long long seed,
28 unsigned long long subsequence,
29 unsigned long long offset) {
30 constexpr unsigned long kPhilox10A = 0x9E3779B9;
31 constexpr unsigned long kPhilox10B = 0xBB67AE85;
32 uint2 key = {};
33 key.x = (unsigned int)seed;
34 key.y = (unsigned int)(seed >> 32);
35 uint4 counter = make_uint4(0, 0, 0, 0);
36 counter.x = (unsigned int)(offset);
37 counter.y = (unsigned int)(offset >> 32);
38 counter.z = (unsigned int)(subsequence);
39 counter.w = (unsigned int)(subsequence >> 32);
40
41 uint4 output = {};
42 uint2 key_ = key;
43 uint4 counter_ = counter;
44 for (int i = 0; i < 9; i++) {
45 counter_ = single_round(counter_, key_);
46 key_.x += (kPhilox10A);
47 key_.y += (kPhilox10B);
48 }
49 output = single_round(counter_, key_);
50 return output;
51}
52
53__device__ float uniformf(unsigned int x) {
54 constexpr float kRanInvM32 = 2.3283064e-10f; // Inverse of 2^32.
55 float result = x * kRanInvM32;
56 return result == 1 ? 0.0f : result;
57}
58
59__device__ double uniform(unsigned int x, unsigned int y) {
60 constexpr double kRan2Pow53Inv = 1.1102230246251565e-16;
61 const unsigned long long z =
62 (unsigned long long)x ^ ((unsigned long long)y << (53 - 32));
63 double result = z * kRan2Pow53Inv + (kRan2Pow53Inv / 2.0);
64 return result == 1 ? 0.0 : result;
65}
66
67__device__ double rng_uniform(const uint4& rng_result, int rng_component) {
68 return uniform(
69 (&rng_result.x)[rng_component * 2],
70 (&rng_result.x)[rng_component * 2 + 1]);
71}
72
73__device__ float rng_uniformf(const uint4& rng_result, int rng_component) {
74 return uniformf((&rng_result.x)[rng_component]);
75}
76
77__device__ double rng_uniform_range(
78 const uint4& rng_result,
79 int rng_component,
80 double from,
81 double to) {
82 auto range = to - from;
83 auto uniform01 = rng_uniform(rng_result, rng_component);
84 return from + range * uniform01;
85}
86
87__device__ float rng_uniform_rangef(
88 const uint4& rng_result,
89 int rng_component,
90 float from,
91 float to) {
92 auto range = to - from;
93 auto uniform01 = rng_uniformf(rng_result, rng_component);
94 return from + range * uniform01;
95}
96)";
97
98} // namespace nvfuser_resources
99