1/* ----------------------------------------------------------------------------
2Copyright (c) 2019-2021, Microsoft Research, Daan Leijen
3This is free software; you can redistribute it and/or modify it under the
4terms of the MIT license. A copy of the license can be found in the file
5"LICENSE" at the root of this distribution.
6-----------------------------------------------------------------------------*/
7#ifndef _DEFAULT_SOURCE
8#define _DEFAULT_SOURCE // for syscall() on Linux
9#endif
10
11#include "mimalloc.h"
12#include "mimalloc-internal.h"
13
14#include <string.h> // memset
15
16/* ----------------------------------------------------------------------------
17We use our own PRNG to keep predictable performance of random number generation
18and to avoid implementations that use a lock. We only use the OS provided
19random source to initialize the initial seeds. Since we do not need ultimate
20performance but we do rely on the security (for secret cookies in secure mode)
21we use a cryptographically secure generator (chacha20).
22-----------------------------------------------------------------------------*/
23
24#define MI_CHACHA_ROUNDS (20) // perhaps use 12 for better performance?
25
26
27/* ----------------------------------------------------------------------------
28Chacha20 implementation as the original algorithm with a 64-bit nonce
29and counter: https://en.wikipedia.org/wiki/Salsa20
30The input matrix has sixteen 32-bit values:
31Position 0 to 3: constant key
32Position 4 to 11: the key
33Position 12 to 13: the counter.
34Position 14 to 15: the nonce.
35
36The implementation uses regular C code which compiles very well on modern compilers.
37(gcc x64 has no register spills, and clang 6+ uses SSE instructions)
38-----------------------------------------------------------------------------*/
39
40static inline uint32_t rotl(uint32_t x, uint32_t shift) {
41 return (x << shift) | (x >> (32 - shift));
42}
43
44static inline void qround(uint32_t x[16], size_t a, size_t b, size_t c, size_t d) {
45 x[a] += x[b]; x[d] = rotl(x[d] ^ x[a], 16);
46 x[c] += x[d]; x[b] = rotl(x[b] ^ x[c], 12);
47 x[a] += x[b]; x[d] = rotl(x[d] ^ x[a], 8);
48 x[c] += x[d]; x[b] = rotl(x[b] ^ x[c], 7);
49}
50
51static void chacha_block(mi_random_ctx_t* ctx)
52{
53 // scramble into `x`
54 uint32_t x[16];
55 for (size_t i = 0; i < 16; i++) {
56 x[i] = ctx->input[i];
57 }
58 for (size_t i = 0; i < MI_CHACHA_ROUNDS; i += 2) {
59 qround(x, 0, 4, 8, 12);
60 qround(x, 1, 5, 9, 13);
61 qround(x, 2, 6, 10, 14);
62 qround(x, 3, 7, 11, 15);
63 qround(x, 0, 5, 10, 15);
64 qround(x, 1, 6, 11, 12);
65 qround(x, 2, 7, 8, 13);
66 qround(x, 3, 4, 9, 14);
67 }
68
69 // add scrambled data to the initial state
70 for (size_t i = 0; i < 16; i++) {
71 ctx->output[i] = x[i] + ctx->input[i];
72 }
73 ctx->output_available = 16;
74
75 // increment the counter for the next round
76 ctx->input[12] += 1;
77 if (ctx->input[12] == 0) {
78 ctx->input[13] += 1;
79 if (ctx->input[13] == 0) { // and keep increasing into the nonce
80 ctx->input[14] += 1;
81 }
82 }
83}
84
85static uint32_t chacha_next32(mi_random_ctx_t* ctx) {
86 if (ctx->output_available <= 0) {
87 chacha_block(ctx);
88 ctx->output_available = 16; // (assign again to suppress static analysis warning)
89 }
90 const uint32_t x = ctx->output[16 - ctx->output_available];
91 ctx->output[16 - ctx->output_available] = 0; // reset once the data is handed out
92 ctx->output_available--;
93 return x;
94}
95
96static inline uint32_t read32(const uint8_t* p, size_t idx32) {
97 const size_t i = 4*idx32;
98 return ((uint32_t)p[i+0] | (uint32_t)p[i+1] << 8 | (uint32_t)p[i+2] << 16 | (uint32_t)p[i+3] << 24);
99}
100
101static void chacha_init(mi_random_ctx_t* ctx, const uint8_t key[32], uint64_t nonce)
102{
103 // since we only use chacha for randomness (and not encryption) we
104 // do not _need_ to read 32-bit values as little endian but we do anyways
105 // just for being compatible :-)
106 memset(ctx, 0, sizeof(*ctx));
107 for (size_t i = 0; i < 4; i++) {
108 const uint8_t* sigma = (uint8_t*)"expand 32-byte k";
109 ctx->input[i] = read32(sigma,i);
110 }
111 for (size_t i = 0; i < 8; i++) {
112 ctx->input[i + 4] = read32(key,i);
113 }
114 ctx->input[12] = 0;
115 ctx->input[13] = 0;
116 ctx->input[14] = (uint32_t)nonce;
117 ctx->input[15] = (uint32_t)(nonce >> 32);
118}
119
120static void chacha_split(mi_random_ctx_t* ctx, uint64_t nonce, mi_random_ctx_t* ctx_new) {
121 memset(ctx_new, 0, sizeof(*ctx_new));
122 _mi_memcpy(ctx_new->input, ctx->input, sizeof(ctx_new->input));
123 ctx_new->input[12] = 0;
124 ctx_new->input[13] = 0;
125 ctx_new->input[14] = (uint32_t)nonce;
126 ctx_new->input[15] = (uint32_t)(nonce >> 32);
127 mi_assert_internal(ctx->input[14] != ctx_new->input[14] || ctx->input[15] != ctx_new->input[15]); // do not reuse nonces!
128 chacha_block(ctx_new);
129}
130
131
132/* ----------------------------------------------------------------------------
133Random interface
134-----------------------------------------------------------------------------*/
135
136#if MI_DEBUG>1
137static bool mi_random_is_initialized(mi_random_ctx_t* ctx) {
138 return (ctx != NULL && ctx->input[0] != 0);
139}
140#endif
141
142void _mi_random_split(mi_random_ctx_t* ctx, mi_random_ctx_t* ctx_new) {
143 mi_assert_internal(mi_random_is_initialized(ctx));
144 mi_assert_internal(ctx != ctx_new);
145 chacha_split(ctx, (uintptr_t)ctx_new /*nonce*/, ctx_new);
146}
147
148uintptr_t _mi_random_next(mi_random_ctx_t* ctx) {
149 mi_assert_internal(mi_random_is_initialized(ctx));
150 #if MI_INTPTR_SIZE <= 4
151 return chacha_next32(ctx);
152 #elif MI_INTPTR_SIZE == 8
153 return (((uintptr_t)chacha_next32(ctx) << 32) | chacha_next32(ctx));
154 #else
155 # error "define mi_random_next for this platform"
156 #endif
157}
158
159
160/* ----------------------------------------------------------------------------
161To initialize a fresh random context we rely on the OS:
162- Windows : BCryptGenRandom (or RtlGenRandom)
163- macOS : CCRandomGenerateBytes, arc4random_buf
164- bsd,wasi : arc4random_buf
165- Linux : getrandom,/dev/urandom
166If we cannot get good randomness, we fall back to weak randomness based on a timer and ASLR.
167-----------------------------------------------------------------------------*/
168
169#if defined(_WIN32)
170
171#if defined(MI_USE_RTLGENRANDOM) || defined(__cplusplus)
172// We prefer to use BCryptGenRandom instead of (the unofficial) RtlGenRandom but when using
173// dynamic overriding, we observed it can raise an exception when compiled with C++, and
174// sometimes deadlocks when also running under the VS debugger.
175// In contrast, issue #623 implies that on Windows Server 2019 we need to use BCryptGenRandom.
176// To be continued..
177#pragma comment (lib,"advapi32.lib")
178#define RtlGenRandom SystemFunction036
179#ifdef __cplusplus
180extern "C" {
181#endif
182BOOLEAN NTAPI RtlGenRandom(PVOID RandomBuffer, ULONG RandomBufferLength);
183#ifdef __cplusplus
184}
185#endif
186static bool os_random_buf(void* buf, size_t buf_len) {
187 return (RtlGenRandom(buf, (ULONG)buf_len) != 0);
188}
189#else
190#pragma comment (lib,"bcrypt.lib")
191#include <bcrypt.h>
192static bool os_random_buf(void* buf, size_t buf_len) {
193 return (BCryptGenRandom(NULL, (PUCHAR)buf, (ULONG)buf_len, BCRYPT_USE_SYSTEM_PREFERRED_RNG) >= 0);
194}
195#endif
196
197#elif defined(__APPLE__)
198#include <AvailabilityMacros.h>
199#if defined(MAC_OS_X_VERSION_10_10) && MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_10
200#include <CommonCrypto/CommonCryptoError.h>
201#include <CommonCrypto/CommonRandom.h>
202#endif
203static bool os_random_buf(void* buf, size_t buf_len) {
204 #if defined(MAC_OS_X_VERSION_10_15) && MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_15
205 // We prefere CCRandomGenerateBytes as it returns an error code while arc4random_buf
206 // may fail silently on macOS. See PR #390, and <https://opensource.apple.com/source/Libc/Libc-1439.40.11/gen/FreeBSD/arc4random.c.auto.html>
207 return (CCRandomGenerateBytes(buf, buf_len) == kCCSuccess);
208 #else
209 // fall back on older macOS
210 arc4random_buf(buf, buf_len);
211 return true;
212 #endif
213}
214
215#elif defined(__ANDROID__) || defined(__DragonFly__) || \
216 defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || \
217 defined(__sun) // todo: what to use with __wasi__?
218#include <stdlib.h>
219static bool os_random_buf(void* buf, size_t buf_len) {
220 arc4random_buf(buf, buf_len);
221 return true;
222}
223#elif defined(__linux__) || defined(__HAIKU__)
224#if defined(__linux__)
225#include <sys/syscall.h>
226#endif
227#include <unistd.h>
228#include <sys/types.h>
229#include <sys/stat.h>
230#include <fcntl.h>
231#include <errno.h>
232static bool os_random_buf(void* buf, size_t buf_len) {
233 // Modern Linux provides `getrandom` but different distributions either use `sys/random.h` or `linux/random.h`
234 // and for the latter the actual `getrandom` call is not always defined.
235 // (see <https://stackoverflow.com/questions/45237324/why-doesnt-getrandom-compile>)
236 // We therefore use a syscall directly and fall back dynamically to /dev/urandom when needed.
237#ifdef SYS_getrandom
238 #ifndef GRND_NONBLOCK
239 #define GRND_NONBLOCK (1)
240 #endif
241 static _Atomic(uintptr_t) no_getrandom; // = 0
242 if (mi_atomic_load_acquire(&no_getrandom)==0) {
243 ssize_t ret = syscall(SYS_getrandom, buf, buf_len, GRND_NONBLOCK);
244 if (ret >= 0) return (buf_len == (size_t)ret);
245 if (errno != ENOSYS) return false;
246 mi_atomic_store_release(&no_getrandom, 1UL); // don't call again, and fall back to /dev/urandom
247 }
248#endif
249 int flags = O_RDONLY;
250 #if defined(O_CLOEXEC)
251 flags |= O_CLOEXEC;
252 #endif
253 int fd = open("/dev/urandom", flags, 0);
254 if (fd < 0) return false;
255 size_t count = 0;
256 while(count < buf_len) {
257 ssize_t ret = read(fd, (char*)buf + count, buf_len - count);
258 if (ret<=0) {
259 if (errno!=EAGAIN && errno!=EINTR) break;
260 }
261 else {
262 count += ret;
263 }
264 }
265 close(fd);
266 return (count==buf_len);
267}
268#else
269static bool os_random_buf(void* buf, size_t buf_len) {
270 return false;
271}
272#endif
273
274#if defined(_WIN32)
275#include <windows.h>
276#elif defined(__APPLE__)
277#include <mach/mach_time.h>
278#else
279#include <time.h>
280#endif
281
282uintptr_t _mi_os_random_weak(uintptr_t extra_seed) {
283 uintptr_t x = (uintptr_t)&_mi_os_random_weak ^ extra_seed; // ASLR makes the address random
284
285 #if defined(_WIN32)
286 LARGE_INTEGER pcount;
287 QueryPerformanceCounter(&pcount);
288 x ^= (uintptr_t)(pcount.QuadPart);
289 #elif defined(__APPLE__)
290 x ^= (uintptr_t)mach_absolute_time();
291 #else
292 struct timespec time;
293 clock_gettime(CLOCK_MONOTONIC, &time);
294 x ^= (uintptr_t)time.tv_sec;
295 x ^= (uintptr_t)time.tv_nsec;
296 #endif
297 // and do a few randomization steps
298 uintptr_t max = ((x ^ (x >> 17)) & 0x0F) + 1;
299 for (uintptr_t i = 0; i < max; i++) {
300 x = _mi_random_shuffle(x);
301 }
302 mi_assert_internal(x != 0);
303 return x;
304}
305
306void _mi_random_init(mi_random_ctx_t* ctx) {
307 uint8_t key[32];
308 if (!os_random_buf(key, sizeof(key))) {
309 // if we fail to get random data from the OS, we fall back to a
310 // weak random source based on the current time
311 #if !defined(__wasi__)
312 _mi_warning_message("unable to use secure randomness\n");
313 #endif
314 uintptr_t x = _mi_os_random_weak(0);
315 for (size_t i = 0; i < 8; i++) { // key is eight 32-bit words.
316 x = _mi_random_shuffle(x);
317 ((uint32_t*)key)[i] = (uint32_t)x;
318 }
319 }
320 chacha_init(ctx, key, (uintptr_t)ctx /*nonce*/ );
321}
322
323/* --------------------------------------------------------
324test vectors from <https://tools.ietf.org/html/rfc8439>
325----------------------------------------------------------- */
326/*
327static bool array_equals(uint32_t* x, uint32_t* y, size_t n) {
328 for (size_t i = 0; i < n; i++) {
329 if (x[i] != y[i]) return false;
330 }
331 return true;
332}
333static void chacha_test(void)
334{
335 uint32_t x[4] = { 0x11111111, 0x01020304, 0x9b8d6f43, 0x01234567 };
336 uint32_t x_out[4] = { 0xea2a92f4, 0xcb1cf8ce, 0x4581472e, 0x5881c4bb };
337 qround(x, 0, 1, 2, 3);
338 mi_assert_internal(array_equals(x, x_out, 4));
339
340 uint32_t y[16] = {
341 0x879531e0, 0xc5ecf37d, 0x516461b1, 0xc9a62f8a,
342 0x44c20ef3, 0x3390af7f, 0xd9fc690b, 0x2a5f714c,
343 0x53372767, 0xb00a5631, 0x974c541a, 0x359e9963,
344 0x5c971061, 0x3d631689, 0x2098d9d6, 0x91dbd320 };
345 uint32_t y_out[16] = {
346 0x879531e0, 0xc5ecf37d, 0xbdb886dc, 0xc9a62f8a,
347 0x44c20ef3, 0x3390af7f, 0xd9fc690b, 0xcfacafd2,
348 0xe46bea80, 0xb00a5631, 0x974c541a, 0x359e9963,
349 0x5c971061, 0xccc07c79, 0x2098d9d6, 0x91dbd320 };
350 qround(y, 2, 7, 8, 13);
351 mi_assert_internal(array_equals(y, y_out, 16));
352
353 mi_random_ctx_t r = {
354 { 0x61707865, 0x3320646e, 0x79622d32, 0x6b206574,
355 0x03020100, 0x07060504, 0x0b0a0908, 0x0f0e0d0c,
356 0x13121110, 0x17161514, 0x1b1a1918, 0x1f1e1d1c,
357 0x00000001, 0x09000000, 0x4a000000, 0x00000000 },
358 {0},
359 0
360 };
361 uint32_t r_out[16] = {
362 0xe4e7f110, 0x15593bd1, 0x1fdd0f50, 0xc47120a3,
363 0xc7f4d1c7, 0x0368c033, 0x9aaa2204, 0x4e6cd4c3,
364 0x466482d2, 0x09aa9f07, 0x05d7c214, 0xa2028bd9,
365 0xd19c12b5, 0xb94e16de, 0xe883d0cb, 0x4e3c50a2 };
366 chacha_block(&r);
367 mi_assert_internal(array_equals(r.output, r_out, 16));
368}
369*/
370