1 | /* ---------------------------------------------------------------------------- |
2 | Copyright (c) 2019-2021, Microsoft Research, Daan Leijen |
3 | This is free software; you can redistribute it and/or modify it under the |
4 | terms of the MIT license. A copy of the license can be found in the file |
5 | "LICENSE" at the root of this distribution. |
6 | -----------------------------------------------------------------------------*/ |
7 | #ifndef _DEFAULT_SOURCE |
8 | #define _DEFAULT_SOURCE // for syscall() on Linux |
9 | #endif |
10 | |
11 | #include "mimalloc.h" |
12 | #include "mimalloc-internal.h" |
13 | |
14 | #include <string.h> // memset |
15 | |
16 | /* ---------------------------------------------------------------------------- |
17 | We use our own PRNG to keep predictable performance of random number generation |
18 | and to avoid implementations that use a lock. We only use the OS provided |
19 | random source to initialize the initial seeds. Since we do not need ultimate |
20 | performance but we do rely on the security (for secret cookies in secure mode) |
21 | we use a cryptographically secure generator (chacha20). |
22 | -----------------------------------------------------------------------------*/ |
23 | |
24 | #define MI_CHACHA_ROUNDS (20) // perhaps use 12 for better performance? |
25 | |
26 | |
27 | /* ---------------------------------------------------------------------------- |
28 | Chacha20 implementation as the original algorithm with a 64-bit nonce |
29 | and counter: https://en.wikipedia.org/wiki/Salsa20 |
30 | The input matrix has sixteen 32-bit values: |
31 | Position 0 to 3: constant key |
32 | Position 4 to 11: the key |
33 | Position 12 to 13: the counter. |
34 | Position 14 to 15: the nonce. |
35 | |
36 | The implementation uses regular C code which compiles very well on modern compilers. |
37 | (gcc x64 has no register spills, and clang 6+ uses SSE instructions) |
38 | -----------------------------------------------------------------------------*/ |
39 | |
40 | static inline uint32_t rotl(uint32_t x, uint32_t shift) { |
41 | return (x << shift) | (x >> (32 - shift)); |
42 | } |
43 | |
44 | static inline void qround(uint32_t x[16], size_t a, size_t b, size_t c, size_t d) { |
45 | x[a] += x[b]; x[d] = rotl(x[d] ^ x[a], 16); |
46 | x[c] += x[d]; x[b] = rotl(x[b] ^ x[c], 12); |
47 | x[a] += x[b]; x[d] = rotl(x[d] ^ x[a], 8); |
48 | x[c] += x[d]; x[b] = rotl(x[b] ^ x[c], 7); |
49 | } |
50 | |
51 | static void chacha_block(mi_random_ctx_t* ctx) |
52 | { |
53 | // scramble into `x` |
54 | uint32_t x[16]; |
55 | for (size_t i = 0; i < 16; i++) { |
56 | x[i] = ctx->input[i]; |
57 | } |
58 | for (size_t i = 0; i < MI_CHACHA_ROUNDS; i += 2) { |
59 | qround(x, 0, 4, 8, 12); |
60 | qround(x, 1, 5, 9, 13); |
61 | qround(x, 2, 6, 10, 14); |
62 | qround(x, 3, 7, 11, 15); |
63 | qround(x, 0, 5, 10, 15); |
64 | qround(x, 1, 6, 11, 12); |
65 | qround(x, 2, 7, 8, 13); |
66 | qround(x, 3, 4, 9, 14); |
67 | } |
68 | |
69 | // add scrambled data to the initial state |
70 | for (size_t i = 0; i < 16; i++) { |
71 | ctx->output[i] = x[i] + ctx->input[i]; |
72 | } |
73 | ctx->output_available = 16; |
74 | |
75 | // increment the counter for the next round |
76 | ctx->input[12] += 1; |
77 | if (ctx->input[12] == 0) { |
78 | ctx->input[13] += 1; |
79 | if (ctx->input[13] == 0) { // and keep increasing into the nonce |
80 | ctx->input[14] += 1; |
81 | } |
82 | } |
83 | } |
84 | |
85 | static uint32_t chacha_next32(mi_random_ctx_t* ctx) { |
86 | if (ctx->output_available <= 0) { |
87 | chacha_block(ctx); |
88 | ctx->output_available = 16; // (assign again to suppress static analysis warning) |
89 | } |
90 | const uint32_t x = ctx->output[16 - ctx->output_available]; |
91 | ctx->output[16 - ctx->output_available] = 0; // reset once the data is handed out |
92 | ctx->output_available--; |
93 | return x; |
94 | } |
95 | |
96 | static inline uint32_t read32(const uint8_t* p, size_t idx32) { |
97 | const size_t i = 4*idx32; |
98 | return ((uint32_t)p[i+0] | (uint32_t)p[i+1] << 8 | (uint32_t)p[i+2] << 16 | (uint32_t)p[i+3] << 24); |
99 | } |
100 | |
101 | static void chacha_init(mi_random_ctx_t* ctx, const uint8_t key[32], uint64_t nonce) |
102 | { |
103 | // since we only use chacha for randomness (and not encryption) we |
104 | // do not _need_ to read 32-bit values as little endian but we do anyways |
105 | // just for being compatible :-) |
106 | memset(ctx, 0, sizeof(*ctx)); |
107 | for (size_t i = 0; i < 4; i++) { |
108 | const uint8_t* sigma = (uint8_t*)"expand 32-byte k" ; |
109 | ctx->input[i] = read32(sigma,i); |
110 | } |
111 | for (size_t i = 0; i < 8; i++) { |
112 | ctx->input[i + 4] = read32(key,i); |
113 | } |
114 | ctx->input[12] = 0; |
115 | ctx->input[13] = 0; |
116 | ctx->input[14] = (uint32_t)nonce; |
117 | ctx->input[15] = (uint32_t)(nonce >> 32); |
118 | } |
119 | |
120 | static void chacha_split(mi_random_ctx_t* ctx, uint64_t nonce, mi_random_ctx_t* ctx_new) { |
121 | memset(ctx_new, 0, sizeof(*ctx_new)); |
122 | _mi_memcpy(ctx_new->input, ctx->input, sizeof(ctx_new->input)); |
123 | ctx_new->input[12] = 0; |
124 | ctx_new->input[13] = 0; |
125 | ctx_new->input[14] = (uint32_t)nonce; |
126 | ctx_new->input[15] = (uint32_t)(nonce >> 32); |
127 | mi_assert_internal(ctx->input[14] != ctx_new->input[14] || ctx->input[15] != ctx_new->input[15]); // do not reuse nonces! |
128 | chacha_block(ctx_new); |
129 | } |
130 | |
131 | |
132 | /* ---------------------------------------------------------------------------- |
133 | Random interface |
134 | -----------------------------------------------------------------------------*/ |
135 | |
136 | #if MI_DEBUG>1 |
137 | static bool mi_random_is_initialized(mi_random_ctx_t* ctx) { |
138 | return (ctx != NULL && ctx->input[0] != 0); |
139 | } |
140 | #endif |
141 | |
142 | void _mi_random_split(mi_random_ctx_t* ctx, mi_random_ctx_t* ctx_new) { |
143 | mi_assert_internal(mi_random_is_initialized(ctx)); |
144 | mi_assert_internal(ctx != ctx_new); |
145 | chacha_split(ctx, (uintptr_t)ctx_new /*nonce*/, ctx_new); |
146 | } |
147 | |
148 | uintptr_t _mi_random_next(mi_random_ctx_t* ctx) { |
149 | mi_assert_internal(mi_random_is_initialized(ctx)); |
150 | #if MI_INTPTR_SIZE <= 4 |
151 | return chacha_next32(ctx); |
152 | #elif MI_INTPTR_SIZE == 8 |
153 | return (((uintptr_t)chacha_next32(ctx) << 32) | chacha_next32(ctx)); |
154 | #else |
155 | # error "define mi_random_next for this platform" |
156 | #endif |
157 | } |
158 | |
159 | |
160 | /* ---------------------------------------------------------------------------- |
161 | To initialize a fresh random context we rely on the OS: |
162 | - Windows : BCryptGenRandom (or RtlGenRandom) |
163 | - macOS : CCRandomGenerateBytes, arc4random_buf |
164 | - bsd,wasi : arc4random_buf |
165 | - Linux : getrandom,/dev/urandom |
166 | If we cannot get good randomness, we fall back to weak randomness based on a timer and ASLR. |
167 | -----------------------------------------------------------------------------*/ |
168 | |
169 | #if defined(_WIN32) |
170 | |
171 | #if defined(MI_USE_RTLGENRANDOM) || defined(__cplusplus) |
172 | // We prefer to use BCryptGenRandom instead of (the unofficial) RtlGenRandom but when using |
173 | // dynamic overriding, we observed it can raise an exception when compiled with C++, and |
174 | // sometimes deadlocks when also running under the VS debugger. |
175 | // In contrast, issue #623 implies that on Windows Server 2019 we need to use BCryptGenRandom. |
176 | // To be continued.. |
177 | #pragma comment (lib,"advapi32.lib") |
178 | #define RtlGenRandom SystemFunction036 |
179 | #ifdef __cplusplus |
180 | extern "C" { |
181 | #endif |
182 | BOOLEAN NTAPI RtlGenRandom(PVOID RandomBuffer, ULONG RandomBufferLength); |
183 | #ifdef __cplusplus |
184 | } |
185 | #endif |
186 | static bool os_random_buf(void* buf, size_t buf_len) { |
187 | return (RtlGenRandom(buf, (ULONG)buf_len) != 0); |
188 | } |
189 | #else |
190 | #pragma comment (lib,"bcrypt.lib") |
191 | #include <bcrypt.h> |
192 | static bool os_random_buf(void* buf, size_t buf_len) { |
193 | return (BCryptGenRandom(NULL, (PUCHAR)buf, (ULONG)buf_len, BCRYPT_USE_SYSTEM_PREFERRED_RNG) >= 0); |
194 | } |
195 | #endif |
196 | |
197 | #elif defined(__APPLE__) |
198 | #include <AvailabilityMacros.h> |
199 | #if defined(MAC_OS_X_VERSION_10_10) && MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_10 |
200 | #include <CommonCrypto/CommonCryptoError.h> |
201 | #include <CommonCrypto/CommonRandom.h> |
202 | #endif |
203 | static bool os_random_buf(void* buf, size_t buf_len) { |
204 | #if defined(MAC_OS_X_VERSION_10_15) && MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_15 |
205 | // We prefere CCRandomGenerateBytes as it returns an error code while arc4random_buf |
206 | // may fail silently on macOS. See PR #390, and <https://opensource.apple.com/source/Libc/Libc-1439.40.11/gen/FreeBSD/arc4random.c.auto.html> |
207 | return (CCRandomGenerateBytes(buf, buf_len) == kCCSuccess); |
208 | #else |
209 | // fall back on older macOS |
210 | arc4random_buf(buf, buf_len); |
211 | return true; |
212 | #endif |
213 | } |
214 | |
215 | #elif defined(__ANDROID__) || defined(__DragonFly__) || \ |
216 | defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || \ |
217 | defined(__sun) // todo: what to use with __wasi__? |
218 | #include <stdlib.h> |
219 | static bool os_random_buf(void* buf, size_t buf_len) { |
220 | arc4random_buf(buf, buf_len); |
221 | return true; |
222 | } |
223 | #elif defined(__linux__) || defined(__HAIKU__) |
224 | #if defined(__linux__) |
225 | #include <sys/syscall.h> |
226 | #endif |
227 | #include <unistd.h> |
228 | #include <sys/types.h> |
229 | #include <sys/stat.h> |
230 | #include <fcntl.h> |
231 | #include <errno.h> |
232 | static bool os_random_buf(void* buf, size_t buf_len) { |
233 | // Modern Linux provides `getrandom` but different distributions either use `sys/random.h` or `linux/random.h` |
234 | // and for the latter the actual `getrandom` call is not always defined. |
235 | // (see <https://stackoverflow.com/questions/45237324/why-doesnt-getrandom-compile>) |
236 | // We therefore use a syscall directly and fall back dynamically to /dev/urandom when needed. |
237 | #ifdef SYS_getrandom |
238 | #ifndef GRND_NONBLOCK |
239 | #define GRND_NONBLOCK (1) |
240 | #endif |
241 | static _Atomic(uintptr_t) no_getrandom; // = 0 |
242 | if (mi_atomic_load_acquire(&no_getrandom)==0) { |
243 | ssize_t ret = syscall(SYS_getrandom, buf, buf_len, GRND_NONBLOCK); |
244 | if (ret >= 0) return (buf_len == (size_t)ret); |
245 | if (errno != ENOSYS) return false; |
246 | mi_atomic_store_release(&no_getrandom, 1UL); // don't call again, and fall back to /dev/urandom |
247 | } |
248 | #endif |
249 | int flags = O_RDONLY; |
250 | #if defined(O_CLOEXEC) |
251 | flags |= O_CLOEXEC; |
252 | #endif |
253 | int fd = open("/dev/urandom" , flags, 0); |
254 | if (fd < 0) return false; |
255 | size_t count = 0; |
256 | while(count < buf_len) { |
257 | ssize_t ret = read(fd, (char*)buf + count, buf_len - count); |
258 | if (ret<=0) { |
259 | if (errno!=EAGAIN && errno!=EINTR) break; |
260 | } |
261 | else { |
262 | count += ret; |
263 | } |
264 | } |
265 | close(fd); |
266 | return (count==buf_len); |
267 | } |
268 | #else |
269 | static bool os_random_buf(void* buf, size_t buf_len) { |
270 | return false; |
271 | } |
272 | #endif |
273 | |
274 | #if defined(_WIN32) |
275 | #include <windows.h> |
276 | #elif defined(__APPLE__) |
277 | #include <mach/mach_time.h> |
278 | #else |
279 | #include <time.h> |
280 | #endif |
281 | |
282 | uintptr_t _mi_os_random_weak(uintptr_t ) { |
283 | uintptr_t x = (uintptr_t)&_mi_os_random_weak ^ extra_seed; // ASLR makes the address random |
284 | |
285 | #if defined(_WIN32) |
286 | LARGE_INTEGER pcount; |
287 | QueryPerformanceCounter(&pcount); |
288 | x ^= (uintptr_t)(pcount.QuadPart); |
289 | #elif defined(__APPLE__) |
290 | x ^= (uintptr_t)mach_absolute_time(); |
291 | #else |
292 | struct timespec time; |
293 | clock_gettime(CLOCK_MONOTONIC, &time); |
294 | x ^= (uintptr_t)time.tv_sec; |
295 | x ^= (uintptr_t)time.tv_nsec; |
296 | #endif |
297 | // and do a few randomization steps |
298 | uintptr_t max = ((x ^ (x >> 17)) & 0x0F) + 1; |
299 | for (uintptr_t i = 0; i < max; i++) { |
300 | x = _mi_random_shuffle(x); |
301 | } |
302 | mi_assert_internal(x != 0); |
303 | return x; |
304 | } |
305 | |
306 | void _mi_random_init(mi_random_ctx_t* ctx) { |
307 | uint8_t key[32]; |
308 | if (!os_random_buf(key, sizeof(key))) { |
309 | // if we fail to get random data from the OS, we fall back to a |
310 | // weak random source based on the current time |
311 | #if !defined(__wasi__) |
312 | _mi_warning_message("unable to use secure randomness\n" ); |
313 | #endif |
314 | uintptr_t x = _mi_os_random_weak(0); |
315 | for (size_t i = 0; i < 8; i++) { // key is eight 32-bit words. |
316 | x = _mi_random_shuffle(x); |
317 | ((uint32_t*)key)[i] = (uint32_t)x; |
318 | } |
319 | } |
320 | chacha_init(ctx, key, (uintptr_t)ctx /*nonce*/ ); |
321 | } |
322 | |
323 | /* -------------------------------------------------------- |
324 | test vectors from <https://tools.ietf.org/html/rfc8439> |
325 | ----------------------------------------------------------- */ |
326 | /* |
327 | static bool array_equals(uint32_t* x, uint32_t* y, size_t n) { |
328 | for (size_t i = 0; i < n; i++) { |
329 | if (x[i] != y[i]) return false; |
330 | } |
331 | return true; |
332 | } |
333 | static void chacha_test(void) |
334 | { |
335 | uint32_t x[4] = { 0x11111111, 0x01020304, 0x9b8d6f43, 0x01234567 }; |
336 | uint32_t x_out[4] = { 0xea2a92f4, 0xcb1cf8ce, 0x4581472e, 0x5881c4bb }; |
337 | qround(x, 0, 1, 2, 3); |
338 | mi_assert_internal(array_equals(x, x_out, 4)); |
339 | |
340 | uint32_t y[16] = { |
341 | 0x879531e0, 0xc5ecf37d, 0x516461b1, 0xc9a62f8a, |
342 | 0x44c20ef3, 0x3390af7f, 0xd9fc690b, 0x2a5f714c, |
343 | 0x53372767, 0xb00a5631, 0x974c541a, 0x359e9963, |
344 | 0x5c971061, 0x3d631689, 0x2098d9d6, 0x91dbd320 }; |
345 | uint32_t y_out[16] = { |
346 | 0x879531e0, 0xc5ecf37d, 0xbdb886dc, 0xc9a62f8a, |
347 | 0x44c20ef3, 0x3390af7f, 0xd9fc690b, 0xcfacafd2, |
348 | 0xe46bea80, 0xb00a5631, 0x974c541a, 0x359e9963, |
349 | 0x5c971061, 0xccc07c79, 0x2098d9d6, 0x91dbd320 }; |
350 | qround(y, 2, 7, 8, 13); |
351 | mi_assert_internal(array_equals(y, y_out, 16)); |
352 | |
353 | mi_random_ctx_t r = { |
354 | { 0x61707865, 0x3320646e, 0x79622d32, 0x6b206574, |
355 | 0x03020100, 0x07060504, 0x0b0a0908, 0x0f0e0d0c, |
356 | 0x13121110, 0x17161514, 0x1b1a1918, 0x1f1e1d1c, |
357 | 0x00000001, 0x09000000, 0x4a000000, 0x00000000 }, |
358 | {0}, |
359 | 0 |
360 | }; |
361 | uint32_t r_out[16] = { |
362 | 0xe4e7f110, 0x15593bd1, 0x1fdd0f50, 0xc47120a3, |
363 | 0xc7f4d1c7, 0x0368c033, 0x9aaa2204, 0x4e6cd4c3, |
364 | 0x466482d2, 0x09aa9f07, 0x05d7c214, 0xa2028bd9, |
365 | 0xd19c12b5, 0xb94e16de, 0xe883d0cb, 0x4e3c50a2 }; |
366 | chacha_block(&r); |
367 | mi_assert_internal(array_equals(r.output, r_out, 16)); |
368 | } |
369 | */ |
370 | |