1 | /* Authors: Gregory P. Smith & Jeffrey Yasskin */ |
2 | #include "Python.h" |
3 | #include "pycore_fileutils.h" |
4 | #if defined(HAVE_PIPE2) && !defined(_GNU_SOURCE) |
5 | # define _GNU_SOURCE |
6 | #endif |
7 | #include <unistd.h> |
8 | #include <fcntl.h> |
9 | #ifdef HAVE_SYS_TYPES_H |
10 | #include <sys/types.h> |
11 | #endif |
12 | #if defined(HAVE_SYS_STAT_H) |
13 | #include <sys/stat.h> |
14 | #endif |
15 | #ifdef HAVE_SYS_SYSCALL_H |
16 | #include <sys/syscall.h> |
17 | #endif |
18 | #if defined(HAVE_SYS_RESOURCE_H) |
19 | #include <sys/resource.h> |
20 | #endif |
21 | #ifdef HAVE_DIRENT_H |
22 | #include <dirent.h> |
23 | #endif |
24 | #ifdef HAVE_GRP_H |
25 | #include <grp.h> |
26 | #endif /* HAVE_GRP_H */ |
27 | |
28 | #include "posixmodule.h" |
29 | |
30 | #ifdef _Py_MEMORY_SANITIZER |
31 | # include <sanitizer/msan_interface.h> |
32 | #endif |
33 | |
34 | #if defined(__ANDROID__) && __ANDROID_API__ < 21 && !defined(SYS_getdents64) |
35 | # include <sys/linux-syscalls.h> |
36 | # define SYS_getdents64 __NR_getdents64 |
37 | #endif |
38 | |
39 | #if defined(__linux__) && defined(HAVE_VFORK) && defined(HAVE_SIGNAL_H) && \ |
40 | defined(HAVE_PTHREAD_SIGMASK) && !defined(HAVE_BROKEN_PTHREAD_SIGMASK) |
41 | /* If this is ever expanded to non-Linux platforms, verify what calls are |
42 | * allowed after vfork(). Ex: setsid() may be disallowed on macOS? */ |
43 | # include <signal.h> |
44 | # define VFORK_USABLE 1 |
45 | #endif |
46 | |
47 | #if defined(__sun) && defined(__SVR4) |
48 | /* readdir64 is used to work around Solaris 9 bug 6395699. */ |
49 | # define readdir readdir64 |
50 | # define dirent dirent64 |
51 | # if !defined(HAVE_DIRFD) |
52 | /* Some versions of Solaris lack dirfd(). */ |
53 | # define dirfd(dirp) ((dirp)->dd_fd) |
54 | # define HAVE_DIRFD |
55 | # endif |
56 | #endif |
57 | |
58 | #if defined(__FreeBSD__) || (defined(__APPLE__) && defined(__MACH__)) || defined(__DragonFly__) |
59 | # define FD_DIR "/dev/fd" |
60 | #else |
61 | # define FD_DIR "/proc/self/fd" |
62 | #endif |
63 | |
64 | #ifdef NGROUPS_MAX |
65 | #define MAX_GROUPS NGROUPS_MAX |
66 | #else |
67 | #define MAX_GROUPS 64 |
68 | #endif |
69 | |
70 | #define POSIX_CALL(call) do { if ((call) == -1) goto error; } while (0) |
71 | |
72 | static struct PyModuleDef _posixsubprocessmodule; |
73 | |
74 | /* Convert ASCII to a positive int, no libc call. no overflow. -1 on error. */ |
75 | static int |
76 | _pos_int_from_ascii(const char *name) |
77 | { |
78 | int num = 0; |
79 | while (*name >= '0' && *name <= '9') { |
80 | num = num * 10 + (*name - '0'); |
81 | ++name; |
82 | } |
83 | if (*name) |
84 | return -1; /* Non digit found, not a number. */ |
85 | return num; |
86 | } |
87 | |
88 | |
89 | #if defined(__FreeBSD__) || defined(__DragonFly__) |
90 | /* When /dev/fd isn't mounted it is often a static directory populated |
91 | * with 0 1 2 or entries for 0 .. 63 on FreeBSD, NetBSD, OpenBSD and DragonFlyBSD. |
92 | * NetBSD and OpenBSD have a /proc fs available (though not necessarily |
93 | * mounted) and do not have fdescfs for /dev/fd. MacOS X has a devfs |
94 | * that properly supports /dev/fd. |
95 | */ |
96 | static int |
97 | _is_fdescfs_mounted_on_dev_fd(void) |
98 | { |
99 | struct stat dev_stat; |
100 | struct stat dev_fd_stat; |
101 | if (stat("/dev" , &dev_stat) != 0) |
102 | return 0; |
103 | if (stat(FD_DIR, &dev_fd_stat) != 0) |
104 | return 0; |
105 | if (dev_stat.st_dev == dev_fd_stat.st_dev) |
106 | return 0; /* / == /dev == /dev/fd means it is static. #fail */ |
107 | return 1; |
108 | } |
109 | #endif |
110 | |
111 | |
112 | /* Returns 1 if there is a problem with fd_sequence, 0 otherwise. */ |
113 | static int |
114 | _sanity_check_python_fd_sequence(PyObject *fd_sequence) |
115 | { |
116 | Py_ssize_t seq_idx; |
117 | long prev_fd = -1; |
118 | for (seq_idx = 0; seq_idx < PyTuple_GET_SIZE(fd_sequence); ++seq_idx) { |
119 | PyObject* py_fd = PyTuple_GET_ITEM(fd_sequence, seq_idx); |
120 | long iter_fd; |
121 | if (!PyLong_Check(py_fd)) { |
122 | return 1; |
123 | } |
124 | iter_fd = PyLong_AsLong(py_fd); |
125 | if (iter_fd < 0 || iter_fd <= prev_fd || iter_fd > INT_MAX) { |
126 | /* Negative, overflow, unsorted, too big for a fd. */ |
127 | return 1; |
128 | } |
129 | prev_fd = iter_fd; |
130 | } |
131 | return 0; |
132 | } |
133 | |
134 | |
135 | /* Is fd found in the sorted Python Sequence? */ |
136 | static int |
137 | _is_fd_in_sorted_fd_sequence(int fd, PyObject *fd_sequence) |
138 | { |
139 | /* Binary search. */ |
140 | Py_ssize_t search_min = 0; |
141 | Py_ssize_t search_max = PyTuple_GET_SIZE(fd_sequence) - 1; |
142 | if (search_max < 0) |
143 | return 0; |
144 | do { |
145 | long middle = (search_min + search_max) / 2; |
146 | long middle_fd = PyLong_AsLong(PyTuple_GET_ITEM(fd_sequence, middle)); |
147 | if (fd == middle_fd) |
148 | return 1; |
149 | if (fd > middle_fd) |
150 | search_min = middle + 1; |
151 | else |
152 | search_max = middle - 1; |
153 | } while (search_min <= search_max); |
154 | return 0; |
155 | } |
156 | |
157 | static int |
158 | make_inheritable(PyObject *py_fds_to_keep, int errpipe_write) |
159 | { |
160 | Py_ssize_t i, len; |
161 | |
162 | len = PyTuple_GET_SIZE(py_fds_to_keep); |
163 | for (i = 0; i < len; ++i) { |
164 | PyObject* fdobj = PyTuple_GET_ITEM(py_fds_to_keep, i); |
165 | long fd = PyLong_AsLong(fdobj); |
166 | assert(!PyErr_Occurred()); |
167 | assert(0 <= fd && fd <= INT_MAX); |
168 | if (fd == errpipe_write) { |
169 | /* errpipe_write is part of py_fds_to_keep. It must be closed at |
170 | exec(), but kept open in the child process until exec() is |
171 | called. */ |
172 | continue; |
173 | } |
174 | if (_Py_set_inheritable_async_safe((int)fd, 1, NULL) < 0) |
175 | return -1; |
176 | } |
177 | return 0; |
178 | } |
179 | |
180 | |
181 | /* Get the maximum file descriptor that could be opened by this process. |
182 | * This function is async signal safe for use between fork() and exec(). |
183 | */ |
184 | static long |
185 | safe_get_max_fd(void) |
186 | { |
187 | long local_max_fd; |
188 | #if defined(__NetBSD__) |
189 | local_max_fd = fcntl(0, F_MAXFD); |
190 | if (local_max_fd >= 0) |
191 | return local_max_fd; |
192 | #endif |
193 | #if defined(HAVE_SYS_RESOURCE_H) && defined(__OpenBSD__) |
194 | struct rlimit rl; |
195 | /* Not on the POSIX async signal safe functions list but likely |
196 | * safe. TODO - Someone should audit OpenBSD to make sure. */ |
197 | if (getrlimit(RLIMIT_NOFILE, &rl) >= 0) |
198 | return (long) rl.rlim_max; |
199 | #endif |
200 | #ifdef _SC_OPEN_MAX |
201 | local_max_fd = sysconf(_SC_OPEN_MAX); |
202 | if (local_max_fd == -1) |
203 | #endif |
204 | local_max_fd = 256; /* Matches legacy Lib/subprocess.py behavior. */ |
205 | return local_max_fd; |
206 | } |
207 | |
208 | |
209 | /* Close all file descriptors in the range from start_fd and higher |
210 | * except for those in py_fds_to_keep. If the range defined by |
211 | * [start_fd, safe_get_max_fd()) is large this will take a long |
212 | * time as it calls close() on EVERY possible fd. |
213 | * |
214 | * It isn't possible to know for sure what the max fd to go up to |
215 | * is for processes with the capability of raising their maximum. |
216 | */ |
217 | static void |
218 | _close_fds_by_brute_force(long start_fd, PyObject *py_fds_to_keep) |
219 | { |
220 | long end_fd = safe_get_max_fd(); |
221 | Py_ssize_t num_fds_to_keep = PyTuple_GET_SIZE(py_fds_to_keep); |
222 | Py_ssize_t keep_seq_idx; |
223 | /* As py_fds_to_keep is sorted we can loop through the list closing |
224 | * fds in between any in the keep list falling within our range. */ |
225 | for (keep_seq_idx = 0; keep_seq_idx < num_fds_to_keep; ++keep_seq_idx) { |
226 | PyObject* py_keep_fd = PyTuple_GET_ITEM(py_fds_to_keep, keep_seq_idx); |
227 | int keep_fd = PyLong_AsLong(py_keep_fd); |
228 | if (keep_fd < start_fd) |
229 | continue; |
230 | _Py_closerange(start_fd, keep_fd - 1); |
231 | start_fd = keep_fd + 1; |
232 | } |
233 | if (start_fd <= end_fd) { |
234 | _Py_closerange(start_fd, end_fd); |
235 | } |
236 | } |
237 | |
238 | |
239 | #if defined(__linux__) && defined(HAVE_SYS_SYSCALL_H) |
240 | /* It doesn't matter if d_name has room for NAME_MAX chars; we're using this |
241 | * only to read a directory of short file descriptor number names. The kernel |
242 | * will return an error if we didn't give it enough space. Highly Unlikely. |
243 | * This structure is very old and stable: It will not change unless the kernel |
244 | * chooses to break compatibility with all existing binaries. Highly Unlikely. |
245 | */ |
246 | struct linux_dirent64 { |
247 | unsigned long long d_ino; |
248 | long long d_off; |
249 | unsigned short d_reclen; /* Length of this linux_dirent */ |
250 | unsigned char d_type; |
251 | char d_name[256]; /* Filename (null-terminated) */ |
252 | }; |
253 | |
254 | /* Close all open file descriptors in the range from start_fd and higher |
255 | * Do not close any in the sorted py_fds_to_keep list. |
256 | * |
257 | * This version is async signal safe as it does not make any unsafe C library |
258 | * calls, malloc calls or handle any locks. It is _unfortunate_ to be forced |
259 | * to resort to making a kernel system call directly but this is the ONLY api |
260 | * available that does no harm. opendir/readdir/closedir perform memory |
261 | * allocation and locking so while they usually work they are not guaranteed |
262 | * to (especially if you have replaced your malloc implementation). A version |
263 | * of this function that uses those can be found in the _maybe_unsafe variant. |
264 | * |
265 | * This is Linux specific because that is all I am ready to test it on. It |
266 | * should be easy to add OS specific dirent or dirent64 structures and modify |
267 | * it with some cpp #define magic to work on other OSes as well if you want. |
268 | */ |
269 | static void |
270 | _close_open_fds_safe(int start_fd, PyObject* py_fds_to_keep) |
271 | { |
272 | int fd_dir_fd; |
273 | |
274 | fd_dir_fd = _Py_open_noraise(FD_DIR, O_RDONLY); |
275 | if (fd_dir_fd == -1) { |
276 | /* No way to get a list of open fds. */ |
277 | _close_fds_by_brute_force(start_fd, py_fds_to_keep); |
278 | return; |
279 | } else { |
280 | char buffer[sizeof(struct linux_dirent64)]; |
281 | int bytes; |
282 | while ((bytes = syscall(SYS_getdents64, fd_dir_fd, |
283 | (struct linux_dirent64 *)buffer, |
284 | sizeof(buffer))) > 0) { |
285 | struct linux_dirent64 *entry; |
286 | int offset; |
287 | #ifdef _Py_MEMORY_SANITIZER |
288 | __msan_unpoison(buffer, bytes); |
289 | #endif |
290 | for (offset = 0; offset < bytes; offset += entry->d_reclen) { |
291 | int fd; |
292 | entry = (struct linux_dirent64 *)(buffer + offset); |
293 | if ((fd = _pos_int_from_ascii(entry->d_name)) < 0) |
294 | continue; /* Not a number. */ |
295 | if (fd != fd_dir_fd && fd >= start_fd && |
296 | !_is_fd_in_sorted_fd_sequence(fd, py_fds_to_keep)) { |
297 | close(fd); |
298 | } |
299 | } |
300 | } |
301 | close(fd_dir_fd); |
302 | } |
303 | } |
304 | |
305 | #define _close_open_fds _close_open_fds_safe |
306 | |
307 | #else /* NOT (defined(__linux__) && defined(HAVE_SYS_SYSCALL_H)) */ |
308 | |
309 | |
310 | /* Close all open file descriptors from start_fd and higher. |
311 | * Do not close any in the sorted py_fds_to_keep tuple. |
312 | * |
313 | * This function violates the strict use of async signal safe functions. :( |
314 | * It calls opendir(), readdir() and closedir(). Of these, the one most |
315 | * likely to ever cause a problem is opendir() as it performs an internal |
316 | * malloc(). Practically this should not be a problem. The Java VM makes the |
317 | * same calls between fork and exec in its own UNIXProcess_md.c implementation. |
318 | * |
319 | * readdir_r() is not used because it provides no benefit. It is typically |
320 | * implemented as readdir() followed by memcpy(). See also: |
321 | * http://womble.decadent.org.uk/readdir_r-advisory.html |
322 | */ |
323 | static void |
324 | _close_open_fds_maybe_unsafe(long start_fd, PyObject* py_fds_to_keep) |
325 | { |
326 | DIR *proc_fd_dir; |
327 | #ifndef HAVE_DIRFD |
328 | while (_is_fd_in_sorted_fd_sequence(start_fd, py_fds_to_keep)) { |
329 | ++start_fd; |
330 | } |
331 | /* Close our lowest fd before we call opendir so that it is likely to |
332 | * reuse that fd otherwise we might close opendir's file descriptor in |
333 | * our loop. This trick assumes that fd's are allocated on a lowest |
334 | * available basis. */ |
335 | close(start_fd); |
336 | ++start_fd; |
337 | #endif |
338 | |
339 | #if defined(__FreeBSD__) || defined(__DragonFly__) |
340 | if (!_is_fdescfs_mounted_on_dev_fd()) |
341 | proc_fd_dir = NULL; |
342 | else |
343 | #endif |
344 | proc_fd_dir = opendir(FD_DIR); |
345 | if (!proc_fd_dir) { |
346 | /* No way to get a list of open fds. */ |
347 | _close_fds_by_brute_force(start_fd, py_fds_to_keep); |
348 | } else { |
349 | struct dirent *dir_entry; |
350 | #ifdef HAVE_DIRFD |
351 | int fd_used_by_opendir = dirfd(proc_fd_dir); |
352 | #else |
353 | int fd_used_by_opendir = start_fd - 1; |
354 | #endif |
355 | errno = 0; |
356 | while ((dir_entry = readdir(proc_fd_dir))) { |
357 | int fd; |
358 | if ((fd = _pos_int_from_ascii(dir_entry->d_name)) < 0) |
359 | continue; /* Not a number. */ |
360 | if (fd != fd_used_by_opendir && fd >= start_fd && |
361 | !_is_fd_in_sorted_fd_sequence(fd, py_fds_to_keep)) { |
362 | close(fd); |
363 | } |
364 | errno = 0; |
365 | } |
366 | if (errno) { |
367 | /* readdir error, revert behavior. Highly Unlikely. */ |
368 | _close_fds_by_brute_force(start_fd, py_fds_to_keep); |
369 | } |
370 | closedir(proc_fd_dir); |
371 | } |
372 | } |
373 | |
374 | #define _close_open_fds _close_open_fds_maybe_unsafe |
375 | |
376 | #endif /* else NOT (defined(__linux__) && defined(HAVE_SYS_SYSCALL_H)) */ |
377 | |
378 | |
379 | #ifdef VFORK_USABLE |
380 | /* Reset dispositions for all signals to SIG_DFL except for ignored |
381 | * signals. This way we ensure that no signal handlers can run |
382 | * after we unblock signals in a child created by vfork(). |
383 | */ |
384 | static void |
385 | reset_signal_handlers(const sigset_t *child_sigmask) |
386 | { |
387 | struct sigaction sa_dfl = {.sa_handler = SIG_DFL}; |
388 | for (int sig = 1; sig < _NSIG; sig++) { |
389 | /* Dispositions for SIGKILL and SIGSTOP can't be changed. */ |
390 | if (sig == SIGKILL || sig == SIGSTOP) { |
391 | continue; |
392 | } |
393 | |
394 | /* There is no need to reset the disposition of signals that will |
395 | * remain blocked across execve() since the kernel will do it. */ |
396 | if (sigismember(child_sigmask, sig) == 1) { |
397 | continue; |
398 | } |
399 | |
400 | struct sigaction sa; |
401 | /* C libraries usually return EINVAL for signals used |
402 | * internally (e.g. for thread cancellation), so simply |
403 | * skip errors here. */ |
404 | if (sigaction(sig, NULL, &sa) == -1) { |
405 | continue; |
406 | } |
407 | |
408 | /* void *h works as these fields are both pointer types already. */ |
409 | void *h = (sa.sa_flags & SA_SIGINFO ? (void *)sa.sa_sigaction : |
410 | (void *)sa.sa_handler); |
411 | if (h == SIG_IGN || h == SIG_DFL) { |
412 | continue; |
413 | } |
414 | |
415 | /* This call can't reasonably fail, but if it does, terminating |
416 | * the child seems to be too harsh, so ignore errors. */ |
417 | (void) sigaction(sig, &sa_dfl, NULL); |
418 | } |
419 | } |
420 | #endif /* VFORK_USABLE */ |
421 | |
422 | |
423 | /* |
424 | * This function is code executed in the child process immediately after |
425 | * (v)fork to set things up and call exec(). |
426 | * |
427 | * All of the code in this function must only use async-signal-safe functions, |
428 | * listed at `man 7 signal` or |
429 | * http://www.opengroup.org/onlinepubs/009695399/functions/xsh_chap02_04.html. |
430 | * |
431 | * This restriction is documented at |
432 | * http://www.opengroup.org/onlinepubs/009695399/functions/fork.html. |
433 | * |
434 | * If this function is called after vfork(), even more care must be taken. |
435 | * The lack of preparations that C libraries normally take on fork(), |
436 | * as well as sharing the address space with the parent, might make even |
437 | * async-signal-safe functions vfork-unsafe. In particular, on Linux, |
438 | * set*id() and setgroups() library functions must not be called, since |
439 | * they have to interact with the library-level thread list and send |
440 | * library-internal signals to implement per-process credentials semantics |
441 | * required by POSIX but not supported natively on Linux. Another reason to |
442 | * avoid this family of functions is that sharing an address space between |
443 | * processes running with different privileges is inherently insecure. |
444 | * See bpo-35823 for further discussion and references. |
445 | * |
446 | * In some C libraries, setrlimit() has the same thread list/signalling |
447 | * behavior since resource limits were per-thread attributes before |
448 | * Linux 2.6.10. Musl, as of 1.2.1, is known to have this issue |
449 | * (https://www.openwall.com/lists/musl/2020/10/15/6). |
450 | * |
451 | * If vfork-unsafe functionality is desired after vfork(), consider using |
452 | * syscall() to obtain it. |
453 | */ |
454 | _Py_NO_INLINE static void |
455 | child_exec(char *const exec_array[], |
456 | char *const argv[], |
457 | char *const envp[], |
458 | const char *cwd, |
459 | int p2cread, int p2cwrite, |
460 | int c2pread, int c2pwrite, |
461 | int errread, int errwrite, |
462 | int errpipe_read, int errpipe_write, |
463 | int close_fds, int restore_signals, |
464 | int call_setsid, |
465 | int call_setgid, gid_t gid, |
466 | int call_setgroups, size_t groups_size, const gid_t *groups, |
467 | int call_setuid, uid_t uid, int child_umask, |
468 | const void *child_sigmask, |
469 | PyObject *py_fds_to_keep, |
470 | PyObject *preexec_fn, |
471 | PyObject *preexec_fn_args_tuple) |
472 | { |
473 | int i, saved_errno, reached_preexec = 0; |
474 | PyObject *result; |
475 | const char* err_msg = "" ; |
476 | /* Buffer large enough to hold a hex integer. We can't malloc. */ |
477 | char hex_errno[sizeof(saved_errno)*2+1]; |
478 | |
479 | if (make_inheritable(py_fds_to_keep, errpipe_write) < 0) |
480 | goto error; |
481 | |
482 | /* Close parent's pipe ends. */ |
483 | if (p2cwrite != -1) |
484 | POSIX_CALL(close(p2cwrite)); |
485 | if (c2pread != -1) |
486 | POSIX_CALL(close(c2pread)); |
487 | if (errread != -1) |
488 | POSIX_CALL(close(errread)); |
489 | POSIX_CALL(close(errpipe_read)); |
490 | |
491 | /* When duping fds, if there arises a situation where one of the fds is |
492 | either 0, 1 or 2, it is possible that it is overwritten (#12607). */ |
493 | if (c2pwrite == 0) { |
494 | POSIX_CALL(c2pwrite = dup(c2pwrite)); |
495 | /* issue32270 */ |
496 | if (_Py_set_inheritable_async_safe(c2pwrite, 0, NULL) < 0) { |
497 | goto error; |
498 | } |
499 | } |
500 | while (errwrite == 0 || errwrite == 1) { |
501 | POSIX_CALL(errwrite = dup(errwrite)); |
502 | /* issue32270 */ |
503 | if (_Py_set_inheritable_async_safe(errwrite, 0, NULL) < 0) { |
504 | goto error; |
505 | } |
506 | } |
507 | |
508 | /* Dup fds for child. |
509 | dup2() removes the CLOEXEC flag but we must do it ourselves if dup2() |
510 | would be a no-op (issue #10806). */ |
511 | if (p2cread == 0) { |
512 | if (_Py_set_inheritable_async_safe(p2cread, 1, NULL) < 0) |
513 | goto error; |
514 | } |
515 | else if (p2cread != -1) |
516 | POSIX_CALL(dup2(p2cread, 0)); /* stdin */ |
517 | |
518 | if (c2pwrite == 1) { |
519 | if (_Py_set_inheritable_async_safe(c2pwrite, 1, NULL) < 0) |
520 | goto error; |
521 | } |
522 | else if (c2pwrite != -1) |
523 | POSIX_CALL(dup2(c2pwrite, 1)); /* stdout */ |
524 | |
525 | if (errwrite == 2) { |
526 | if (_Py_set_inheritable_async_safe(errwrite, 1, NULL) < 0) |
527 | goto error; |
528 | } |
529 | else if (errwrite != -1) |
530 | POSIX_CALL(dup2(errwrite, 2)); /* stderr */ |
531 | |
532 | /* We no longer manually close p2cread, c2pwrite, and errwrite here as |
533 | * _close_open_fds takes care when it is not already non-inheritable. */ |
534 | |
535 | if (cwd) |
536 | POSIX_CALL(chdir(cwd)); |
537 | |
538 | if (child_umask >= 0) |
539 | umask(child_umask); /* umask() always succeeds. */ |
540 | |
541 | if (restore_signals) |
542 | _Py_RestoreSignals(); |
543 | |
544 | #ifdef VFORK_USABLE |
545 | if (child_sigmask) { |
546 | reset_signal_handlers(child_sigmask); |
547 | if ((errno = pthread_sigmask(SIG_SETMASK, child_sigmask, NULL))) { |
548 | goto error; |
549 | } |
550 | } |
551 | #endif |
552 | |
553 | #ifdef HAVE_SETSID |
554 | if (call_setsid) |
555 | POSIX_CALL(setsid()); |
556 | #endif |
557 | |
558 | #ifdef HAVE_SETGROUPS |
559 | if (call_setgroups) |
560 | POSIX_CALL(setgroups(groups_size, groups)); |
561 | #endif /* HAVE_SETGROUPS */ |
562 | |
563 | #ifdef HAVE_SETREGID |
564 | if (call_setgid) |
565 | POSIX_CALL(setregid(gid, gid)); |
566 | #endif /* HAVE_SETREGID */ |
567 | |
568 | #ifdef HAVE_SETREUID |
569 | if (call_setuid) |
570 | POSIX_CALL(setreuid(uid, uid)); |
571 | #endif /* HAVE_SETREUID */ |
572 | |
573 | |
574 | reached_preexec = 1; |
575 | if (preexec_fn != Py_None && preexec_fn_args_tuple) { |
576 | /* This is where the user has asked us to deadlock their program. */ |
577 | result = PyObject_Call(preexec_fn, preexec_fn_args_tuple, NULL); |
578 | if (result == NULL) { |
579 | /* Stringifying the exception or traceback would involve |
580 | * memory allocation and thus potential for deadlock. |
581 | * We've already faced potential deadlock by calling back |
582 | * into Python in the first place, so it probably doesn't |
583 | * matter but we avoid it to minimize the possibility. */ |
584 | err_msg = "Exception occurred in preexec_fn." ; |
585 | errno = 0; /* We don't want to report an OSError. */ |
586 | goto error; |
587 | } |
588 | /* Py_DECREF(result); - We're about to exec so why bother? */ |
589 | } |
590 | |
591 | /* close FDs after executing preexec_fn, which might open FDs */ |
592 | if (close_fds) { |
593 | /* TODO HP-UX could use pstat_getproc() if anyone cares about it. */ |
594 | _close_open_fds(3, py_fds_to_keep); |
595 | } |
596 | |
597 | /* This loop matches the Lib/os.py _execvpe()'s PATH search when */ |
598 | /* given the executable_list generated by Lib/subprocess.py. */ |
599 | saved_errno = 0; |
600 | for (i = 0; exec_array[i] != NULL; ++i) { |
601 | const char *executable = exec_array[i]; |
602 | if (envp) { |
603 | execve(executable, argv, envp); |
604 | } else { |
605 | execv(executable, argv); |
606 | } |
607 | if (errno != ENOENT && errno != ENOTDIR && saved_errno == 0) { |
608 | saved_errno = errno; |
609 | } |
610 | } |
611 | /* Report the first exec error, not the last. */ |
612 | if (saved_errno) |
613 | errno = saved_errno; |
614 | |
615 | error: |
616 | saved_errno = errno; |
617 | /* Report the posix error to our parent process. */ |
618 | /* We ignore all write() return values as the total size of our writes is |
619 | less than PIPEBUF and we cannot do anything about an error anyways. |
620 | Use _Py_write_noraise() to retry write() if it is interrupted by a |
621 | signal (fails with EINTR). */ |
622 | if (saved_errno) { |
623 | char *cur; |
624 | _Py_write_noraise(errpipe_write, "OSError:" , 8); |
625 | cur = hex_errno + sizeof(hex_errno); |
626 | while (saved_errno != 0 && cur != hex_errno) { |
627 | *--cur = Py_hexdigits[saved_errno % 16]; |
628 | saved_errno /= 16; |
629 | } |
630 | _Py_write_noraise(errpipe_write, cur, hex_errno + sizeof(hex_errno) - cur); |
631 | _Py_write_noraise(errpipe_write, ":" , 1); |
632 | if (!reached_preexec) { |
633 | /* Indicate to the parent that the error happened before exec(). */ |
634 | _Py_write_noraise(errpipe_write, "noexec" , 6); |
635 | } |
636 | /* We can't call strerror(saved_errno). It is not async signal safe. |
637 | * The parent process will look the error message up. */ |
638 | } else { |
639 | _Py_write_noraise(errpipe_write, "SubprocessError:0:" , 18); |
640 | _Py_write_noraise(errpipe_write, err_msg, strlen(err_msg)); |
641 | } |
642 | } |
643 | |
644 | |
645 | /* The main purpose of this wrapper function is to isolate vfork() from both |
646 | * subprocess_fork_exec() and child_exec(). A child process created via |
647 | * vfork() executes on the same stack as the parent process while the latter is |
648 | * suspended, so this function should not be inlined to avoid compiler bugs |
649 | * that might clobber data needed by the parent later. Additionally, |
650 | * child_exec() should not be inlined to avoid spurious -Wclobber warnings from |
651 | * GCC (see bpo-35823). |
652 | */ |
653 | _Py_NO_INLINE static pid_t |
654 | do_fork_exec(char *const exec_array[], |
655 | char *const argv[], |
656 | char *const envp[], |
657 | const char *cwd, |
658 | int p2cread, int p2cwrite, |
659 | int c2pread, int c2pwrite, |
660 | int errread, int errwrite, |
661 | int errpipe_read, int errpipe_write, |
662 | int close_fds, int restore_signals, |
663 | int call_setsid, |
664 | int call_setgid, gid_t gid, |
665 | int call_setgroups, size_t groups_size, const gid_t *groups, |
666 | int call_setuid, uid_t uid, int child_umask, |
667 | const void *child_sigmask, |
668 | PyObject *py_fds_to_keep, |
669 | PyObject *preexec_fn, |
670 | PyObject *preexec_fn_args_tuple) |
671 | { |
672 | |
673 | pid_t pid; |
674 | |
675 | #ifdef VFORK_USABLE |
676 | if (child_sigmask) { |
677 | /* These are checked by our caller; verify them in debug builds. */ |
678 | assert(!call_setuid); |
679 | assert(!call_setgid); |
680 | assert(!call_setgroups); |
681 | assert(preexec_fn == Py_None); |
682 | |
683 | pid = vfork(); |
684 | if (pid == -1) { |
685 | /* If vfork() fails, fall back to using fork(). When it isn't |
686 | * allowed in a process by the kernel, vfork can return -1 |
687 | * with errno EINVAL. https://bugs.python.org/issue47151. */ |
688 | pid = fork(); |
689 | } |
690 | } else |
691 | #endif |
692 | { |
693 | pid = fork(); |
694 | } |
695 | |
696 | if (pid != 0) { |
697 | return pid; |
698 | } |
699 | |
700 | /* Child process. |
701 | * See the comment above child_exec() for restrictions imposed on |
702 | * the code below. |
703 | */ |
704 | |
705 | if (preexec_fn != Py_None) { |
706 | /* We'll be calling back into Python later so we need to do this. |
707 | * This call may not be async-signal-safe but neither is calling |
708 | * back into Python. The user asked us to use hope as a strategy |
709 | * to avoid deadlock... */ |
710 | PyOS_AfterFork_Child(); |
711 | } |
712 | |
713 | child_exec(exec_array, argv, envp, cwd, |
714 | p2cread, p2cwrite, c2pread, c2pwrite, |
715 | errread, errwrite, errpipe_read, errpipe_write, |
716 | close_fds, restore_signals, call_setsid, |
717 | call_setgid, gid, call_setgroups, groups_size, groups, |
718 | call_setuid, uid, child_umask, child_sigmask, |
719 | py_fds_to_keep, preexec_fn, preexec_fn_args_tuple); |
720 | _exit(255); |
721 | return 0; /* Dead code to avoid a potential compiler warning. */ |
722 | } |
723 | |
724 | |
725 | static PyObject * |
726 | subprocess_fork_exec(PyObject *module, PyObject *args) |
727 | { |
728 | PyObject *gc_module = NULL; |
729 | PyObject *executable_list, *py_fds_to_keep; |
730 | PyObject *env_list, *preexec_fn; |
731 | PyObject *process_args, *converted_args = NULL, *fast_args = NULL; |
732 | PyObject *preexec_fn_args_tuple = NULL; |
733 | PyObject *groups_list; |
734 | PyObject *uid_object, *gid_object; |
735 | int p2cread, p2cwrite, c2pread, c2pwrite, errread, errwrite; |
736 | int errpipe_read, errpipe_write, close_fds, restore_signals; |
737 | int call_setsid; |
738 | int call_setgid = 0, call_setgroups = 0, call_setuid = 0; |
739 | uid_t uid; |
740 | gid_t gid, *groups = NULL; |
741 | int child_umask; |
742 | PyObject *cwd_obj, *cwd_obj2 = NULL; |
743 | const char *cwd; |
744 | pid_t pid = -1; |
745 | int need_to_reenable_gc = 0; |
746 | char *const *exec_array, *const *argv = NULL, *const *envp = NULL; |
747 | Py_ssize_t arg_num, num_groups = 0; |
748 | int need_after_fork = 0; |
749 | int saved_errno = 0; |
750 | |
751 | if (!PyArg_ParseTuple( |
752 | args, "OOpO!OOiiiiiiiiiiOOOiO:fork_exec" , |
753 | &process_args, &executable_list, |
754 | &close_fds, &PyTuple_Type, &py_fds_to_keep, |
755 | &cwd_obj, &env_list, |
756 | &p2cread, &p2cwrite, &c2pread, &c2pwrite, |
757 | &errread, &errwrite, &errpipe_read, &errpipe_write, |
758 | &restore_signals, &call_setsid, |
759 | &gid_object, &groups_list, &uid_object, &child_umask, |
760 | &preexec_fn)) |
761 | return NULL; |
762 | |
763 | if ((preexec_fn != Py_None) && |
764 | (PyInterpreterState_Get() != PyInterpreterState_Main())) { |
765 | PyErr_SetString(PyExc_RuntimeError, |
766 | "preexec_fn not supported within subinterpreters" ); |
767 | return NULL; |
768 | } |
769 | |
770 | if (close_fds && errpipe_write < 3) { /* precondition */ |
771 | PyErr_SetString(PyExc_ValueError, "errpipe_write must be >= 3" ); |
772 | return NULL; |
773 | } |
774 | if (_sanity_check_python_fd_sequence(py_fds_to_keep)) { |
775 | PyErr_SetString(PyExc_ValueError, "bad value(s) in fds_to_keep" ); |
776 | return NULL; |
777 | } |
778 | |
779 | PyInterpreterState *interp = PyInterpreterState_Get(); |
780 | const PyConfig *config = _PyInterpreterState_GetConfig(interp); |
781 | if (config->_isolated_interpreter) { |
782 | PyErr_SetString(PyExc_RuntimeError, |
783 | "subprocess not supported for isolated subinterpreters" ); |
784 | return NULL; |
785 | } |
786 | |
787 | /* We need to call gc.disable() when we'll be calling preexec_fn */ |
788 | if (preexec_fn != Py_None) { |
789 | need_to_reenable_gc = PyGC_Disable(); |
790 | } |
791 | |
792 | exec_array = _PySequence_BytesToCharpArray(executable_list); |
793 | if (!exec_array) |
794 | goto cleanup; |
795 | |
796 | /* Convert args and env into appropriate arguments for exec() */ |
797 | /* These conversions are done in the parent process to avoid allocating |
798 | or freeing memory in the child process. */ |
799 | if (process_args != Py_None) { |
800 | Py_ssize_t num_args; |
801 | /* Equivalent to: */ |
802 | /* tuple(PyUnicode_FSConverter(arg) for arg in process_args) */ |
803 | fast_args = PySequence_Fast(process_args, "argv must be a tuple" ); |
804 | if (fast_args == NULL) |
805 | goto cleanup; |
806 | num_args = PySequence_Fast_GET_SIZE(fast_args); |
807 | converted_args = PyTuple_New(num_args); |
808 | if (converted_args == NULL) |
809 | goto cleanup; |
810 | for (arg_num = 0; arg_num < num_args; ++arg_num) { |
811 | PyObject *borrowed_arg, *converted_arg; |
812 | if (PySequence_Fast_GET_SIZE(fast_args) != num_args) { |
813 | PyErr_SetString(PyExc_RuntimeError, "args changed during iteration" ); |
814 | goto cleanup; |
815 | } |
816 | borrowed_arg = PySequence_Fast_GET_ITEM(fast_args, arg_num); |
817 | if (PyUnicode_FSConverter(borrowed_arg, &converted_arg) == 0) |
818 | goto cleanup; |
819 | PyTuple_SET_ITEM(converted_args, arg_num, converted_arg); |
820 | } |
821 | |
822 | argv = _PySequence_BytesToCharpArray(converted_args); |
823 | Py_CLEAR(converted_args); |
824 | Py_CLEAR(fast_args); |
825 | if (!argv) |
826 | goto cleanup; |
827 | } |
828 | |
829 | if (env_list != Py_None) { |
830 | envp = _PySequence_BytesToCharpArray(env_list); |
831 | if (!envp) |
832 | goto cleanup; |
833 | } |
834 | |
835 | if (cwd_obj != Py_None) { |
836 | if (PyUnicode_FSConverter(cwd_obj, &cwd_obj2) == 0) |
837 | goto cleanup; |
838 | cwd = PyBytes_AsString(cwd_obj2); |
839 | } else { |
840 | cwd = NULL; |
841 | } |
842 | |
843 | if (groups_list != Py_None) { |
844 | #ifdef HAVE_SETGROUPS |
845 | Py_ssize_t i; |
846 | gid_t gid; |
847 | |
848 | if (!PyList_Check(groups_list)) { |
849 | PyErr_SetString(PyExc_TypeError, |
850 | "setgroups argument must be a list" ); |
851 | goto cleanup; |
852 | } |
853 | num_groups = PySequence_Size(groups_list); |
854 | |
855 | if (num_groups < 0) |
856 | goto cleanup; |
857 | |
858 | if (num_groups > MAX_GROUPS) { |
859 | PyErr_SetString(PyExc_ValueError, "too many groups" ); |
860 | goto cleanup; |
861 | } |
862 | |
863 | if ((groups = PyMem_RawMalloc(num_groups * sizeof(gid_t))) == NULL) { |
864 | PyErr_SetString(PyExc_MemoryError, |
865 | "failed to allocate memory for group list" ); |
866 | goto cleanup; |
867 | } |
868 | |
869 | for (i = 0; i < num_groups; i++) { |
870 | PyObject *elem; |
871 | elem = PySequence_GetItem(groups_list, i); |
872 | if (!elem) |
873 | goto cleanup; |
874 | if (!PyLong_Check(elem)) { |
875 | PyErr_SetString(PyExc_TypeError, |
876 | "groups must be integers" ); |
877 | Py_DECREF(elem); |
878 | goto cleanup; |
879 | } else { |
880 | if (!_Py_Gid_Converter(elem, &gid)) { |
881 | Py_DECREF(elem); |
882 | PyErr_SetString(PyExc_ValueError, "invalid group id" ); |
883 | goto cleanup; |
884 | } |
885 | groups[i] = gid; |
886 | } |
887 | Py_DECREF(elem); |
888 | } |
889 | call_setgroups = 1; |
890 | |
891 | #else /* HAVE_SETGROUPS */ |
892 | PyErr_BadInternalCall(); |
893 | goto cleanup; |
894 | #endif /* HAVE_SETGROUPS */ |
895 | } |
896 | |
897 | if (gid_object != Py_None) { |
898 | #ifdef HAVE_SETREGID |
899 | if (!_Py_Gid_Converter(gid_object, &gid)) |
900 | goto cleanup; |
901 | |
902 | call_setgid = 1; |
903 | |
904 | #else /* HAVE_SETREGID */ |
905 | PyErr_BadInternalCall(); |
906 | goto cleanup; |
907 | #endif /* HAVE_SETREUID */ |
908 | } |
909 | |
910 | if (uid_object != Py_None) { |
911 | #ifdef HAVE_SETREUID |
912 | if (!_Py_Uid_Converter(uid_object, &uid)) |
913 | goto cleanup; |
914 | |
915 | call_setuid = 1; |
916 | |
917 | #else /* HAVE_SETREUID */ |
918 | PyErr_BadInternalCall(); |
919 | goto cleanup; |
920 | #endif /* HAVE_SETREUID */ |
921 | } |
922 | |
923 | /* This must be the last thing done before fork() because we do not |
924 | * want to call PyOS_BeforeFork() if there is any chance of another |
925 | * error leading to the cleanup: code without calling fork(). */ |
926 | if (preexec_fn != Py_None) { |
927 | preexec_fn_args_tuple = PyTuple_New(0); |
928 | if (!preexec_fn_args_tuple) |
929 | goto cleanup; |
930 | PyOS_BeforeFork(); |
931 | need_after_fork = 1; |
932 | } |
933 | |
934 | /* NOTE: When old_sigmask is non-NULL, do_fork_exec() may use vfork(). */ |
935 | const void *old_sigmask = NULL; |
936 | #ifdef VFORK_USABLE |
937 | /* Use vfork() only if it's safe. See the comment above child_exec(). */ |
938 | sigset_t old_sigs; |
939 | int allow_vfork; |
940 | if (preexec_fn == Py_None) { |
941 | allow_vfork = 1; /* 3.10.0 behavior */ |
942 | PyObject *subprocess_module = PyImport_ImportModule("subprocess" ); |
943 | if (subprocess_module != NULL) { |
944 | PyObject *allow_vfork_obj = PyObject_GetAttrString( |
945 | subprocess_module, "_USE_VFORK" ); |
946 | Py_DECREF(subprocess_module); |
947 | if (allow_vfork_obj != NULL) { |
948 | allow_vfork = PyObject_IsTrue(allow_vfork_obj); |
949 | Py_DECREF(allow_vfork_obj); |
950 | if (allow_vfork < 0) { |
951 | PyErr_Clear(); /* Bad _USE_VFORK attribute. */ |
952 | allow_vfork = 1; /* 3.10.0 behavior */ |
953 | } |
954 | } else { |
955 | PyErr_Clear(); /* No _USE_VFORK attribute. */ |
956 | } |
957 | } else { |
958 | PyErr_Clear(); /* no subprocess module? suspicious; don't care. */ |
959 | } |
960 | } else { |
961 | allow_vfork = 0; |
962 | } |
963 | if (allow_vfork && !call_setuid && !call_setgid && !call_setgroups) { |
964 | /* Block all signals to ensure that no signal handlers are run in the |
965 | * child process while it shares memory with us. Note that signals |
966 | * used internally by C libraries won't be blocked by |
967 | * pthread_sigmask(), but signal handlers installed by C libraries |
968 | * normally service only signals originating from *within the process*, |
969 | * so it should be sufficient to consider any library function that |
970 | * might send such a signal to be vfork-unsafe and do not call it in |
971 | * the child. |
972 | */ |
973 | sigset_t all_sigs; |
974 | sigfillset(&all_sigs); |
975 | if ((saved_errno = pthread_sigmask(SIG_BLOCK, &all_sigs, &old_sigs))) { |
976 | goto cleanup; |
977 | } |
978 | old_sigmask = &old_sigs; |
979 | } |
980 | #endif |
981 | |
982 | pid = do_fork_exec(exec_array, argv, envp, cwd, |
983 | p2cread, p2cwrite, c2pread, c2pwrite, |
984 | errread, errwrite, errpipe_read, errpipe_write, |
985 | close_fds, restore_signals, call_setsid, |
986 | call_setgid, gid, call_setgroups, num_groups, groups, |
987 | call_setuid, uid, child_umask, old_sigmask, |
988 | py_fds_to_keep, preexec_fn, preexec_fn_args_tuple); |
989 | |
990 | /* Parent (original) process */ |
991 | if (pid == -1) { |
992 | /* Capture errno for the exception. */ |
993 | saved_errno = errno; |
994 | } |
995 | |
996 | #ifdef VFORK_USABLE |
997 | if (old_sigmask) { |
998 | /* vfork() semantics guarantees that the parent is blocked |
999 | * until the child performs _exit() or execve(), so it is safe |
1000 | * to unblock signals once we're here. |
1001 | * Note that in environments where vfork() is implemented as fork(), |
1002 | * such as QEMU user-mode emulation, the parent won't be blocked, |
1003 | * but it won't share the address space with the child, |
1004 | * so it's still safe to unblock the signals. |
1005 | * |
1006 | * We don't handle errors here because this call can't fail |
1007 | * if valid arguments are given, and because there is no good |
1008 | * way for the caller to deal with a failure to restore |
1009 | * the thread signal mask. */ |
1010 | (void) pthread_sigmask(SIG_SETMASK, old_sigmask, NULL); |
1011 | } |
1012 | #endif |
1013 | |
1014 | if (need_after_fork) |
1015 | PyOS_AfterFork_Parent(); |
1016 | |
1017 | cleanup: |
1018 | if (saved_errno != 0) { |
1019 | errno = saved_errno; |
1020 | /* We can't call this above as PyOS_AfterFork_Parent() calls back |
1021 | * into Python code which would see the unreturned error. */ |
1022 | PyErr_SetFromErrno(PyExc_OSError); |
1023 | } |
1024 | |
1025 | Py_XDECREF(preexec_fn_args_tuple); |
1026 | PyMem_RawFree(groups); |
1027 | Py_XDECREF(cwd_obj2); |
1028 | if (envp) |
1029 | _Py_FreeCharPArray(envp); |
1030 | Py_XDECREF(converted_args); |
1031 | Py_XDECREF(fast_args); |
1032 | if (argv) |
1033 | _Py_FreeCharPArray(argv); |
1034 | if (exec_array) |
1035 | _Py_FreeCharPArray(exec_array); |
1036 | |
1037 | if (need_to_reenable_gc) { |
1038 | PyGC_Enable(); |
1039 | } |
1040 | Py_XDECREF(gc_module); |
1041 | |
1042 | return pid == -1 ? NULL : PyLong_FromPid(pid); |
1043 | } |
1044 | |
1045 | |
1046 | PyDoc_STRVAR(subprocess_fork_exec_doc, |
1047 | "fork_exec(args, executable_list, close_fds, pass_fds, cwd, env,\n\ |
1048 | p2cread, p2cwrite, c2pread, c2pwrite,\n\ |
1049 | errread, errwrite, errpipe_read, errpipe_write,\n\ |
1050 | restore_signals, call_setsid,\n\ |
1051 | gid, groups_list, uid,\n\ |
1052 | preexec_fn)\n\ |
1053 | \n\ |
1054 | Forks a child process, closes parent file descriptors as appropriate in the\n\ |
1055 | child and dups the few that are needed before calling exec() in the child\n\ |
1056 | process.\n\ |
1057 | \n\ |
1058 | If close_fds is true, close file descriptors 3 and higher, except those listed\n\ |
1059 | in the sorted tuple pass_fds.\n\ |
1060 | \n\ |
1061 | The preexec_fn, if supplied, will be called immediately before closing file\n\ |
1062 | descriptors and exec.\n\ |
1063 | WARNING: preexec_fn is NOT SAFE if your application uses threads.\n\ |
1064 | It may trigger infrequent, difficult to debug deadlocks.\n\ |
1065 | \n\ |
1066 | If an error occurs in the child process before the exec, it is\n\ |
1067 | serialized and written to the errpipe_write fd per subprocess.py.\n\ |
1068 | \n\ |
1069 | Returns: the child process's PID.\n\ |
1070 | \n\ |
1071 | Raises: Only on an error in the parent process.\n\ |
1072 | " ); |
1073 | |
1074 | /* module level code ********************************************************/ |
1075 | |
1076 | PyDoc_STRVAR(module_doc, |
1077 | "A POSIX helper for the subprocess module." ); |
1078 | |
1079 | static PyMethodDef module_methods[] = { |
1080 | {"fork_exec" , subprocess_fork_exec, METH_VARARGS, subprocess_fork_exec_doc}, |
1081 | {NULL, NULL} /* sentinel */ |
1082 | }; |
1083 | |
1084 | static PyModuleDef_Slot _posixsubprocess_slots[] = { |
1085 | {0, NULL} |
1086 | }; |
1087 | |
1088 | static struct PyModuleDef _posixsubprocessmodule = { |
1089 | PyModuleDef_HEAD_INIT, |
1090 | .m_name = "_posixsubprocess" , |
1091 | .m_doc = module_doc, |
1092 | .m_size = 0, |
1093 | .m_methods = module_methods, |
1094 | .m_slots = _posixsubprocess_slots, |
1095 | }; |
1096 | |
1097 | PyMODINIT_FUNC |
1098 | PyInit__posixsubprocess(void) |
1099 | { |
1100 | return PyModuleDef_Init(&_posixsubprocessmodule); |
1101 | } |
1102 | |