1/* Authors: Gregory P. Smith & Jeffrey Yasskin */
2#include "Python.h"
3#include "pycore_fileutils.h"
4#if defined(HAVE_PIPE2) && !defined(_GNU_SOURCE)
5# define _GNU_SOURCE
6#endif
7#include <unistd.h>
8#include <fcntl.h>
9#ifdef HAVE_SYS_TYPES_H
10#include <sys/types.h>
11#endif
12#if defined(HAVE_SYS_STAT_H)
13#include <sys/stat.h>
14#endif
15#ifdef HAVE_SYS_SYSCALL_H
16#include <sys/syscall.h>
17#endif
18#if defined(HAVE_SYS_RESOURCE_H)
19#include <sys/resource.h>
20#endif
21#ifdef HAVE_DIRENT_H
22#include <dirent.h>
23#endif
24#ifdef HAVE_GRP_H
25#include <grp.h>
26#endif /* HAVE_GRP_H */
27
28#include "posixmodule.h"
29
30#ifdef _Py_MEMORY_SANITIZER
31# include <sanitizer/msan_interface.h>
32#endif
33
34#if defined(__ANDROID__) && __ANDROID_API__ < 21 && !defined(SYS_getdents64)
35# include <sys/linux-syscalls.h>
36# define SYS_getdents64 __NR_getdents64
37#endif
38
39#if defined(__linux__) && defined(HAVE_VFORK) && defined(HAVE_SIGNAL_H) && \
40 defined(HAVE_PTHREAD_SIGMASK) && !defined(HAVE_BROKEN_PTHREAD_SIGMASK)
41/* If this is ever expanded to non-Linux platforms, verify what calls are
42 * allowed after vfork(). Ex: setsid() may be disallowed on macOS? */
43# include <signal.h>
44# define VFORK_USABLE 1
45#endif
46
47#if defined(__sun) && defined(__SVR4)
48/* readdir64 is used to work around Solaris 9 bug 6395699. */
49# define readdir readdir64
50# define dirent dirent64
51# if !defined(HAVE_DIRFD)
52/* Some versions of Solaris lack dirfd(). */
53# define dirfd(dirp) ((dirp)->dd_fd)
54# define HAVE_DIRFD
55# endif
56#endif
57
58#if defined(__FreeBSD__) || (defined(__APPLE__) && defined(__MACH__)) || defined(__DragonFly__)
59# define FD_DIR "/dev/fd"
60#else
61# define FD_DIR "/proc/self/fd"
62#endif
63
64#ifdef NGROUPS_MAX
65#define MAX_GROUPS NGROUPS_MAX
66#else
67#define MAX_GROUPS 64
68#endif
69
70#define POSIX_CALL(call) do { if ((call) == -1) goto error; } while (0)
71
72static struct PyModuleDef _posixsubprocessmodule;
73
74/* Convert ASCII to a positive int, no libc call. no overflow. -1 on error. */
75static int
76_pos_int_from_ascii(const char *name)
77{
78 int num = 0;
79 while (*name >= '0' && *name <= '9') {
80 num = num * 10 + (*name - '0');
81 ++name;
82 }
83 if (*name)
84 return -1; /* Non digit found, not a number. */
85 return num;
86}
87
88
89#if defined(__FreeBSD__) || defined(__DragonFly__)
90/* When /dev/fd isn't mounted it is often a static directory populated
91 * with 0 1 2 or entries for 0 .. 63 on FreeBSD, NetBSD, OpenBSD and DragonFlyBSD.
92 * NetBSD and OpenBSD have a /proc fs available (though not necessarily
93 * mounted) and do not have fdescfs for /dev/fd. MacOS X has a devfs
94 * that properly supports /dev/fd.
95 */
96static int
97_is_fdescfs_mounted_on_dev_fd(void)
98{
99 struct stat dev_stat;
100 struct stat dev_fd_stat;
101 if (stat("/dev", &dev_stat) != 0)
102 return 0;
103 if (stat(FD_DIR, &dev_fd_stat) != 0)
104 return 0;
105 if (dev_stat.st_dev == dev_fd_stat.st_dev)
106 return 0; /* / == /dev == /dev/fd means it is static. #fail */
107 return 1;
108}
109#endif
110
111
112/* Returns 1 if there is a problem with fd_sequence, 0 otherwise. */
113static int
114_sanity_check_python_fd_sequence(PyObject *fd_sequence)
115{
116 Py_ssize_t seq_idx;
117 long prev_fd = -1;
118 for (seq_idx = 0; seq_idx < PyTuple_GET_SIZE(fd_sequence); ++seq_idx) {
119 PyObject* py_fd = PyTuple_GET_ITEM(fd_sequence, seq_idx);
120 long iter_fd;
121 if (!PyLong_Check(py_fd)) {
122 return 1;
123 }
124 iter_fd = PyLong_AsLong(py_fd);
125 if (iter_fd < 0 || iter_fd <= prev_fd || iter_fd > INT_MAX) {
126 /* Negative, overflow, unsorted, too big for a fd. */
127 return 1;
128 }
129 prev_fd = iter_fd;
130 }
131 return 0;
132}
133
134
135/* Is fd found in the sorted Python Sequence? */
136static int
137_is_fd_in_sorted_fd_sequence(int fd, PyObject *fd_sequence)
138{
139 /* Binary search. */
140 Py_ssize_t search_min = 0;
141 Py_ssize_t search_max = PyTuple_GET_SIZE(fd_sequence) - 1;
142 if (search_max < 0)
143 return 0;
144 do {
145 long middle = (search_min + search_max) / 2;
146 long middle_fd = PyLong_AsLong(PyTuple_GET_ITEM(fd_sequence, middle));
147 if (fd == middle_fd)
148 return 1;
149 if (fd > middle_fd)
150 search_min = middle + 1;
151 else
152 search_max = middle - 1;
153 } while (search_min <= search_max);
154 return 0;
155}
156
157static int
158make_inheritable(PyObject *py_fds_to_keep, int errpipe_write)
159{
160 Py_ssize_t i, len;
161
162 len = PyTuple_GET_SIZE(py_fds_to_keep);
163 for (i = 0; i < len; ++i) {
164 PyObject* fdobj = PyTuple_GET_ITEM(py_fds_to_keep, i);
165 long fd = PyLong_AsLong(fdobj);
166 assert(!PyErr_Occurred());
167 assert(0 <= fd && fd <= INT_MAX);
168 if (fd == errpipe_write) {
169 /* errpipe_write is part of py_fds_to_keep. It must be closed at
170 exec(), but kept open in the child process until exec() is
171 called. */
172 continue;
173 }
174 if (_Py_set_inheritable_async_safe((int)fd, 1, NULL) < 0)
175 return -1;
176 }
177 return 0;
178}
179
180
181/* Get the maximum file descriptor that could be opened by this process.
182 * This function is async signal safe for use between fork() and exec().
183 */
184static long
185safe_get_max_fd(void)
186{
187 long local_max_fd;
188#if defined(__NetBSD__)
189 local_max_fd = fcntl(0, F_MAXFD);
190 if (local_max_fd >= 0)
191 return local_max_fd;
192#endif
193#if defined(HAVE_SYS_RESOURCE_H) && defined(__OpenBSD__)
194 struct rlimit rl;
195 /* Not on the POSIX async signal safe functions list but likely
196 * safe. TODO - Someone should audit OpenBSD to make sure. */
197 if (getrlimit(RLIMIT_NOFILE, &rl) >= 0)
198 return (long) rl.rlim_max;
199#endif
200#ifdef _SC_OPEN_MAX
201 local_max_fd = sysconf(_SC_OPEN_MAX);
202 if (local_max_fd == -1)
203#endif
204 local_max_fd = 256; /* Matches legacy Lib/subprocess.py behavior. */
205 return local_max_fd;
206}
207
208
209/* Close all file descriptors in the range from start_fd and higher
210 * except for those in py_fds_to_keep. If the range defined by
211 * [start_fd, safe_get_max_fd()) is large this will take a long
212 * time as it calls close() on EVERY possible fd.
213 *
214 * It isn't possible to know for sure what the max fd to go up to
215 * is for processes with the capability of raising their maximum.
216 */
217static void
218_close_fds_by_brute_force(long start_fd, PyObject *py_fds_to_keep)
219{
220 long end_fd = safe_get_max_fd();
221 Py_ssize_t num_fds_to_keep = PyTuple_GET_SIZE(py_fds_to_keep);
222 Py_ssize_t keep_seq_idx;
223 /* As py_fds_to_keep is sorted we can loop through the list closing
224 * fds in between any in the keep list falling within our range. */
225 for (keep_seq_idx = 0; keep_seq_idx < num_fds_to_keep; ++keep_seq_idx) {
226 PyObject* py_keep_fd = PyTuple_GET_ITEM(py_fds_to_keep, keep_seq_idx);
227 int keep_fd = PyLong_AsLong(py_keep_fd);
228 if (keep_fd < start_fd)
229 continue;
230 _Py_closerange(start_fd, keep_fd - 1);
231 start_fd = keep_fd + 1;
232 }
233 if (start_fd <= end_fd) {
234 _Py_closerange(start_fd, end_fd);
235 }
236}
237
238
239#if defined(__linux__) && defined(HAVE_SYS_SYSCALL_H)
240/* It doesn't matter if d_name has room for NAME_MAX chars; we're using this
241 * only to read a directory of short file descriptor number names. The kernel
242 * will return an error if we didn't give it enough space. Highly Unlikely.
243 * This structure is very old and stable: It will not change unless the kernel
244 * chooses to break compatibility with all existing binaries. Highly Unlikely.
245 */
246struct linux_dirent64 {
247 unsigned long long d_ino;
248 long long d_off;
249 unsigned short d_reclen; /* Length of this linux_dirent */
250 unsigned char d_type;
251 char d_name[256]; /* Filename (null-terminated) */
252};
253
254/* Close all open file descriptors in the range from start_fd and higher
255 * Do not close any in the sorted py_fds_to_keep list.
256 *
257 * This version is async signal safe as it does not make any unsafe C library
258 * calls, malloc calls or handle any locks. It is _unfortunate_ to be forced
259 * to resort to making a kernel system call directly but this is the ONLY api
260 * available that does no harm. opendir/readdir/closedir perform memory
261 * allocation and locking so while they usually work they are not guaranteed
262 * to (especially if you have replaced your malloc implementation). A version
263 * of this function that uses those can be found in the _maybe_unsafe variant.
264 *
265 * This is Linux specific because that is all I am ready to test it on. It
266 * should be easy to add OS specific dirent or dirent64 structures and modify
267 * it with some cpp #define magic to work on other OSes as well if you want.
268 */
269static void
270_close_open_fds_safe(int start_fd, PyObject* py_fds_to_keep)
271{
272 int fd_dir_fd;
273
274 fd_dir_fd = _Py_open_noraise(FD_DIR, O_RDONLY);
275 if (fd_dir_fd == -1) {
276 /* No way to get a list of open fds. */
277 _close_fds_by_brute_force(start_fd, py_fds_to_keep);
278 return;
279 } else {
280 char buffer[sizeof(struct linux_dirent64)];
281 int bytes;
282 while ((bytes = syscall(SYS_getdents64, fd_dir_fd,
283 (struct linux_dirent64 *)buffer,
284 sizeof(buffer))) > 0) {
285 struct linux_dirent64 *entry;
286 int offset;
287#ifdef _Py_MEMORY_SANITIZER
288 __msan_unpoison(buffer, bytes);
289#endif
290 for (offset = 0; offset < bytes; offset += entry->d_reclen) {
291 int fd;
292 entry = (struct linux_dirent64 *)(buffer + offset);
293 if ((fd = _pos_int_from_ascii(entry->d_name)) < 0)
294 continue; /* Not a number. */
295 if (fd != fd_dir_fd && fd >= start_fd &&
296 !_is_fd_in_sorted_fd_sequence(fd, py_fds_to_keep)) {
297 close(fd);
298 }
299 }
300 }
301 close(fd_dir_fd);
302 }
303}
304
305#define _close_open_fds _close_open_fds_safe
306
307#else /* NOT (defined(__linux__) && defined(HAVE_SYS_SYSCALL_H)) */
308
309
310/* Close all open file descriptors from start_fd and higher.
311 * Do not close any in the sorted py_fds_to_keep tuple.
312 *
313 * This function violates the strict use of async signal safe functions. :(
314 * It calls opendir(), readdir() and closedir(). Of these, the one most
315 * likely to ever cause a problem is opendir() as it performs an internal
316 * malloc(). Practically this should not be a problem. The Java VM makes the
317 * same calls between fork and exec in its own UNIXProcess_md.c implementation.
318 *
319 * readdir_r() is not used because it provides no benefit. It is typically
320 * implemented as readdir() followed by memcpy(). See also:
321 * http://womble.decadent.org.uk/readdir_r-advisory.html
322 */
323static void
324_close_open_fds_maybe_unsafe(long start_fd, PyObject* py_fds_to_keep)
325{
326 DIR *proc_fd_dir;
327#ifndef HAVE_DIRFD
328 while (_is_fd_in_sorted_fd_sequence(start_fd, py_fds_to_keep)) {
329 ++start_fd;
330 }
331 /* Close our lowest fd before we call opendir so that it is likely to
332 * reuse that fd otherwise we might close opendir's file descriptor in
333 * our loop. This trick assumes that fd's are allocated on a lowest
334 * available basis. */
335 close(start_fd);
336 ++start_fd;
337#endif
338
339#if defined(__FreeBSD__) || defined(__DragonFly__)
340 if (!_is_fdescfs_mounted_on_dev_fd())
341 proc_fd_dir = NULL;
342 else
343#endif
344 proc_fd_dir = opendir(FD_DIR);
345 if (!proc_fd_dir) {
346 /* No way to get a list of open fds. */
347 _close_fds_by_brute_force(start_fd, py_fds_to_keep);
348 } else {
349 struct dirent *dir_entry;
350#ifdef HAVE_DIRFD
351 int fd_used_by_opendir = dirfd(proc_fd_dir);
352#else
353 int fd_used_by_opendir = start_fd - 1;
354#endif
355 errno = 0;
356 while ((dir_entry = readdir(proc_fd_dir))) {
357 int fd;
358 if ((fd = _pos_int_from_ascii(dir_entry->d_name)) < 0)
359 continue; /* Not a number. */
360 if (fd != fd_used_by_opendir && fd >= start_fd &&
361 !_is_fd_in_sorted_fd_sequence(fd, py_fds_to_keep)) {
362 close(fd);
363 }
364 errno = 0;
365 }
366 if (errno) {
367 /* readdir error, revert behavior. Highly Unlikely. */
368 _close_fds_by_brute_force(start_fd, py_fds_to_keep);
369 }
370 closedir(proc_fd_dir);
371 }
372}
373
374#define _close_open_fds _close_open_fds_maybe_unsafe
375
376#endif /* else NOT (defined(__linux__) && defined(HAVE_SYS_SYSCALL_H)) */
377
378
379#ifdef VFORK_USABLE
380/* Reset dispositions for all signals to SIG_DFL except for ignored
381 * signals. This way we ensure that no signal handlers can run
382 * after we unblock signals in a child created by vfork().
383 */
384static void
385reset_signal_handlers(const sigset_t *child_sigmask)
386{
387 struct sigaction sa_dfl = {.sa_handler = SIG_DFL};
388 for (int sig = 1; sig < _NSIG; sig++) {
389 /* Dispositions for SIGKILL and SIGSTOP can't be changed. */
390 if (sig == SIGKILL || sig == SIGSTOP) {
391 continue;
392 }
393
394 /* There is no need to reset the disposition of signals that will
395 * remain blocked across execve() since the kernel will do it. */
396 if (sigismember(child_sigmask, sig) == 1) {
397 continue;
398 }
399
400 struct sigaction sa;
401 /* C libraries usually return EINVAL for signals used
402 * internally (e.g. for thread cancellation), so simply
403 * skip errors here. */
404 if (sigaction(sig, NULL, &sa) == -1) {
405 continue;
406 }
407
408 /* void *h works as these fields are both pointer types already. */
409 void *h = (sa.sa_flags & SA_SIGINFO ? (void *)sa.sa_sigaction :
410 (void *)sa.sa_handler);
411 if (h == SIG_IGN || h == SIG_DFL) {
412 continue;
413 }
414
415 /* This call can't reasonably fail, but if it does, terminating
416 * the child seems to be too harsh, so ignore errors. */
417 (void) sigaction(sig, &sa_dfl, NULL);
418 }
419}
420#endif /* VFORK_USABLE */
421
422
423/*
424 * This function is code executed in the child process immediately after
425 * (v)fork to set things up and call exec().
426 *
427 * All of the code in this function must only use async-signal-safe functions,
428 * listed at `man 7 signal` or
429 * http://www.opengroup.org/onlinepubs/009695399/functions/xsh_chap02_04.html.
430 *
431 * This restriction is documented at
432 * http://www.opengroup.org/onlinepubs/009695399/functions/fork.html.
433 *
434 * If this function is called after vfork(), even more care must be taken.
435 * The lack of preparations that C libraries normally take on fork(),
436 * as well as sharing the address space with the parent, might make even
437 * async-signal-safe functions vfork-unsafe. In particular, on Linux,
438 * set*id() and setgroups() library functions must not be called, since
439 * they have to interact with the library-level thread list and send
440 * library-internal signals to implement per-process credentials semantics
441 * required by POSIX but not supported natively on Linux. Another reason to
442 * avoid this family of functions is that sharing an address space between
443 * processes running with different privileges is inherently insecure.
444 * See bpo-35823 for further discussion and references.
445 *
446 * In some C libraries, setrlimit() has the same thread list/signalling
447 * behavior since resource limits were per-thread attributes before
448 * Linux 2.6.10. Musl, as of 1.2.1, is known to have this issue
449 * (https://www.openwall.com/lists/musl/2020/10/15/6).
450 *
451 * If vfork-unsafe functionality is desired after vfork(), consider using
452 * syscall() to obtain it.
453 */
454_Py_NO_INLINE static void
455child_exec(char *const exec_array[],
456 char *const argv[],
457 char *const envp[],
458 const char *cwd,
459 int p2cread, int p2cwrite,
460 int c2pread, int c2pwrite,
461 int errread, int errwrite,
462 int errpipe_read, int errpipe_write,
463 int close_fds, int restore_signals,
464 int call_setsid,
465 int call_setgid, gid_t gid,
466 int call_setgroups, size_t groups_size, const gid_t *groups,
467 int call_setuid, uid_t uid, int child_umask,
468 const void *child_sigmask,
469 PyObject *py_fds_to_keep,
470 PyObject *preexec_fn,
471 PyObject *preexec_fn_args_tuple)
472{
473 int i, saved_errno, reached_preexec = 0;
474 PyObject *result;
475 const char* err_msg = "";
476 /* Buffer large enough to hold a hex integer. We can't malloc. */
477 char hex_errno[sizeof(saved_errno)*2+1];
478
479 if (make_inheritable(py_fds_to_keep, errpipe_write) < 0)
480 goto error;
481
482 /* Close parent's pipe ends. */
483 if (p2cwrite != -1)
484 POSIX_CALL(close(p2cwrite));
485 if (c2pread != -1)
486 POSIX_CALL(close(c2pread));
487 if (errread != -1)
488 POSIX_CALL(close(errread));
489 POSIX_CALL(close(errpipe_read));
490
491 /* When duping fds, if there arises a situation where one of the fds is
492 either 0, 1 or 2, it is possible that it is overwritten (#12607). */
493 if (c2pwrite == 0) {
494 POSIX_CALL(c2pwrite = dup(c2pwrite));
495 /* issue32270 */
496 if (_Py_set_inheritable_async_safe(c2pwrite, 0, NULL) < 0) {
497 goto error;
498 }
499 }
500 while (errwrite == 0 || errwrite == 1) {
501 POSIX_CALL(errwrite = dup(errwrite));
502 /* issue32270 */
503 if (_Py_set_inheritable_async_safe(errwrite, 0, NULL) < 0) {
504 goto error;
505 }
506 }
507
508 /* Dup fds for child.
509 dup2() removes the CLOEXEC flag but we must do it ourselves if dup2()
510 would be a no-op (issue #10806). */
511 if (p2cread == 0) {
512 if (_Py_set_inheritable_async_safe(p2cread, 1, NULL) < 0)
513 goto error;
514 }
515 else if (p2cread != -1)
516 POSIX_CALL(dup2(p2cread, 0)); /* stdin */
517
518 if (c2pwrite == 1) {
519 if (_Py_set_inheritable_async_safe(c2pwrite, 1, NULL) < 0)
520 goto error;
521 }
522 else if (c2pwrite != -1)
523 POSIX_CALL(dup2(c2pwrite, 1)); /* stdout */
524
525 if (errwrite == 2) {
526 if (_Py_set_inheritable_async_safe(errwrite, 1, NULL) < 0)
527 goto error;
528 }
529 else if (errwrite != -1)
530 POSIX_CALL(dup2(errwrite, 2)); /* stderr */
531
532 /* We no longer manually close p2cread, c2pwrite, and errwrite here as
533 * _close_open_fds takes care when it is not already non-inheritable. */
534
535 if (cwd)
536 POSIX_CALL(chdir(cwd));
537
538 if (child_umask >= 0)
539 umask(child_umask); /* umask() always succeeds. */
540
541 if (restore_signals)
542 _Py_RestoreSignals();
543
544#ifdef VFORK_USABLE
545 if (child_sigmask) {
546 reset_signal_handlers(child_sigmask);
547 if ((errno = pthread_sigmask(SIG_SETMASK, child_sigmask, NULL))) {
548 goto error;
549 }
550 }
551#endif
552
553#ifdef HAVE_SETSID
554 if (call_setsid)
555 POSIX_CALL(setsid());
556#endif
557
558#ifdef HAVE_SETGROUPS
559 if (call_setgroups)
560 POSIX_CALL(setgroups(groups_size, groups));
561#endif /* HAVE_SETGROUPS */
562
563#ifdef HAVE_SETREGID
564 if (call_setgid)
565 POSIX_CALL(setregid(gid, gid));
566#endif /* HAVE_SETREGID */
567
568#ifdef HAVE_SETREUID
569 if (call_setuid)
570 POSIX_CALL(setreuid(uid, uid));
571#endif /* HAVE_SETREUID */
572
573
574 reached_preexec = 1;
575 if (preexec_fn != Py_None && preexec_fn_args_tuple) {
576 /* This is where the user has asked us to deadlock their program. */
577 result = PyObject_Call(preexec_fn, preexec_fn_args_tuple, NULL);
578 if (result == NULL) {
579 /* Stringifying the exception or traceback would involve
580 * memory allocation and thus potential for deadlock.
581 * We've already faced potential deadlock by calling back
582 * into Python in the first place, so it probably doesn't
583 * matter but we avoid it to minimize the possibility. */
584 err_msg = "Exception occurred in preexec_fn.";
585 errno = 0; /* We don't want to report an OSError. */
586 goto error;
587 }
588 /* Py_DECREF(result); - We're about to exec so why bother? */
589 }
590
591 /* close FDs after executing preexec_fn, which might open FDs */
592 if (close_fds) {
593 /* TODO HP-UX could use pstat_getproc() if anyone cares about it. */
594 _close_open_fds(3, py_fds_to_keep);
595 }
596
597 /* This loop matches the Lib/os.py _execvpe()'s PATH search when */
598 /* given the executable_list generated by Lib/subprocess.py. */
599 saved_errno = 0;
600 for (i = 0; exec_array[i] != NULL; ++i) {
601 const char *executable = exec_array[i];
602 if (envp) {
603 execve(executable, argv, envp);
604 } else {
605 execv(executable, argv);
606 }
607 if (errno != ENOENT && errno != ENOTDIR && saved_errno == 0) {
608 saved_errno = errno;
609 }
610 }
611 /* Report the first exec error, not the last. */
612 if (saved_errno)
613 errno = saved_errno;
614
615error:
616 saved_errno = errno;
617 /* Report the posix error to our parent process. */
618 /* We ignore all write() return values as the total size of our writes is
619 less than PIPEBUF and we cannot do anything about an error anyways.
620 Use _Py_write_noraise() to retry write() if it is interrupted by a
621 signal (fails with EINTR). */
622 if (saved_errno) {
623 char *cur;
624 _Py_write_noraise(errpipe_write, "OSError:", 8);
625 cur = hex_errno + sizeof(hex_errno);
626 while (saved_errno != 0 && cur != hex_errno) {
627 *--cur = Py_hexdigits[saved_errno % 16];
628 saved_errno /= 16;
629 }
630 _Py_write_noraise(errpipe_write, cur, hex_errno + sizeof(hex_errno) - cur);
631 _Py_write_noraise(errpipe_write, ":", 1);
632 if (!reached_preexec) {
633 /* Indicate to the parent that the error happened before exec(). */
634 _Py_write_noraise(errpipe_write, "noexec", 6);
635 }
636 /* We can't call strerror(saved_errno). It is not async signal safe.
637 * The parent process will look the error message up. */
638 } else {
639 _Py_write_noraise(errpipe_write, "SubprocessError:0:", 18);
640 _Py_write_noraise(errpipe_write, err_msg, strlen(err_msg));
641 }
642}
643
644
645/* The main purpose of this wrapper function is to isolate vfork() from both
646 * subprocess_fork_exec() and child_exec(). A child process created via
647 * vfork() executes on the same stack as the parent process while the latter is
648 * suspended, so this function should not be inlined to avoid compiler bugs
649 * that might clobber data needed by the parent later. Additionally,
650 * child_exec() should not be inlined to avoid spurious -Wclobber warnings from
651 * GCC (see bpo-35823).
652 */
653_Py_NO_INLINE static pid_t
654do_fork_exec(char *const exec_array[],
655 char *const argv[],
656 char *const envp[],
657 const char *cwd,
658 int p2cread, int p2cwrite,
659 int c2pread, int c2pwrite,
660 int errread, int errwrite,
661 int errpipe_read, int errpipe_write,
662 int close_fds, int restore_signals,
663 int call_setsid,
664 int call_setgid, gid_t gid,
665 int call_setgroups, size_t groups_size, const gid_t *groups,
666 int call_setuid, uid_t uid, int child_umask,
667 const void *child_sigmask,
668 PyObject *py_fds_to_keep,
669 PyObject *preexec_fn,
670 PyObject *preexec_fn_args_tuple)
671{
672
673 pid_t pid;
674
675#ifdef VFORK_USABLE
676 if (child_sigmask) {
677 /* These are checked by our caller; verify them in debug builds. */
678 assert(!call_setuid);
679 assert(!call_setgid);
680 assert(!call_setgroups);
681 assert(preexec_fn == Py_None);
682
683 pid = vfork();
684 if (pid == -1) {
685 /* If vfork() fails, fall back to using fork(). When it isn't
686 * allowed in a process by the kernel, vfork can return -1
687 * with errno EINVAL. https://bugs.python.org/issue47151. */
688 pid = fork();
689 }
690 } else
691#endif
692 {
693 pid = fork();
694 }
695
696 if (pid != 0) {
697 return pid;
698 }
699
700 /* Child process.
701 * See the comment above child_exec() for restrictions imposed on
702 * the code below.
703 */
704
705 if (preexec_fn != Py_None) {
706 /* We'll be calling back into Python later so we need to do this.
707 * This call may not be async-signal-safe but neither is calling
708 * back into Python. The user asked us to use hope as a strategy
709 * to avoid deadlock... */
710 PyOS_AfterFork_Child();
711 }
712
713 child_exec(exec_array, argv, envp, cwd,
714 p2cread, p2cwrite, c2pread, c2pwrite,
715 errread, errwrite, errpipe_read, errpipe_write,
716 close_fds, restore_signals, call_setsid,
717 call_setgid, gid, call_setgroups, groups_size, groups,
718 call_setuid, uid, child_umask, child_sigmask,
719 py_fds_to_keep, preexec_fn, preexec_fn_args_tuple);
720 _exit(255);
721 return 0; /* Dead code to avoid a potential compiler warning. */
722}
723
724
725static PyObject *
726subprocess_fork_exec(PyObject *module, PyObject *args)
727{
728 PyObject *gc_module = NULL;
729 PyObject *executable_list, *py_fds_to_keep;
730 PyObject *env_list, *preexec_fn;
731 PyObject *process_args, *converted_args = NULL, *fast_args = NULL;
732 PyObject *preexec_fn_args_tuple = NULL;
733 PyObject *groups_list;
734 PyObject *uid_object, *gid_object;
735 int p2cread, p2cwrite, c2pread, c2pwrite, errread, errwrite;
736 int errpipe_read, errpipe_write, close_fds, restore_signals;
737 int call_setsid;
738 int call_setgid = 0, call_setgroups = 0, call_setuid = 0;
739 uid_t uid;
740 gid_t gid, *groups = NULL;
741 int child_umask;
742 PyObject *cwd_obj, *cwd_obj2 = NULL;
743 const char *cwd;
744 pid_t pid = -1;
745 int need_to_reenable_gc = 0;
746 char *const *exec_array, *const *argv = NULL, *const *envp = NULL;
747 Py_ssize_t arg_num, num_groups = 0;
748 int need_after_fork = 0;
749 int saved_errno = 0;
750
751 if (!PyArg_ParseTuple(
752 args, "OOpO!OOiiiiiiiiiiOOOiO:fork_exec",
753 &process_args, &executable_list,
754 &close_fds, &PyTuple_Type, &py_fds_to_keep,
755 &cwd_obj, &env_list,
756 &p2cread, &p2cwrite, &c2pread, &c2pwrite,
757 &errread, &errwrite, &errpipe_read, &errpipe_write,
758 &restore_signals, &call_setsid,
759 &gid_object, &groups_list, &uid_object, &child_umask,
760 &preexec_fn))
761 return NULL;
762
763 if ((preexec_fn != Py_None) &&
764 (PyInterpreterState_Get() != PyInterpreterState_Main())) {
765 PyErr_SetString(PyExc_RuntimeError,
766 "preexec_fn not supported within subinterpreters");
767 return NULL;
768 }
769
770 if (close_fds && errpipe_write < 3) { /* precondition */
771 PyErr_SetString(PyExc_ValueError, "errpipe_write must be >= 3");
772 return NULL;
773 }
774 if (_sanity_check_python_fd_sequence(py_fds_to_keep)) {
775 PyErr_SetString(PyExc_ValueError, "bad value(s) in fds_to_keep");
776 return NULL;
777 }
778
779 PyInterpreterState *interp = PyInterpreterState_Get();
780 const PyConfig *config = _PyInterpreterState_GetConfig(interp);
781 if (config->_isolated_interpreter) {
782 PyErr_SetString(PyExc_RuntimeError,
783 "subprocess not supported for isolated subinterpreters");
784 return NULL;
785 }
786
787 /* We need to call gc.disable() when we'll be calling preexec_fn */
788 if (preexec_fn != Py_None) {
789 need_to_reenable_gc = PyGC_Disable();
790 }
791
792 exec_array = _PySequence_BytesToCharpArray(executable_list);
793 if (!exec_array)
794 goto cleanup;
795
796 /* Convert args and env into appropriate arguments for exec() */
797 /* These conversions are done in the parent process to avoid allocating
798 or freeing memory in the child process. */
799 if (process_args != Py_None) {
800 Py_ssize_t num_args;
801 /* Equivalent to: */
802 /* tuple(PyUnicode_FSConverter(arg) for arg in process_args) */
803 fast_args = PySequence_Fast(process_args, "argv must be a tuple");
804 if (fast_args == NULL)
805 goto cleanup;
806 num_args = PySequence_Fast_GET_SIZE(fast_args);
807 converted_args = PyTuple_New(num_args);
808 if (converted_args == NULL)
809 goto cleanup;
810 for (arg_num = 0; arg_num < num_args; ++arg_num) {
811 PyObject *borrowed_arg, *converted_arg;
812 if (PySequence_Fast_GET_SIZE(fast_args) != num_args) {
813 PyErr_SetString(PyExc_RuntimeError, "args changed during iteration");
814 goto cleanup;
815 }
816 borrowed_arg = PySequence_Fast_GET_ITEM(fast_args, arg_num);
817 if (PyUnicode_FSConverter(borrowed_arg, &converted_arg) == 0)
818 goto cleanup;
819 PyTuple_SET_ITEM(converted_args, arg_num, converted_arg);
820 }
821
822 argv = _PySequence_BytesToCharpArray(converted_args);
823 Py_CLEAR(converted_args);
824 Py_CLEAR(fast_args);
825 if (!argv)
826 goto cleanup;
827 }
828
829 if (env_list != Py_None) {
830 envp = _PySequence_BytesToCharpArray(env_list);
831 if (!envp)
832 goto cleanup;
833 }
834
835 if (cwd_obj != Py_None) {
836 if (PyUnicode_FSConverter(cwd_obj, &cwd_obj2) == 0)
837 goto cleanup;
838 cwd = PyBytes_AsString(cwd_obj2);
839 } else {
840 cwd = NULL;
841 }
842
843 if (groups_list != Py_None) {
844#ifdef HAVE_SETGROUPS
845 Py_ssize_t i;
846 gid_t gid;
847
848 if (!PyList_Check(groups_list)) {
849 PyErr_SetString(PyExc_TypeError,
850 "setgroups argument must be a list");
851 goto cleanup;
852 }
853 num_groups = PySequence_Size(groups_list);
854
855 if (num_groups < 0)
856 goto cleanup;
857
858 if (num_groups > MAX_GROUPS) {
859 PyErr_SetString(PyExc_ValueError, "too many groups");
860 goto cleanup;
861 }
862
863 if ((groups = PyMem_RawMalloc(num_groups * sizeof(gid_t))) == NULL) {
864 PyErr_SetString(PyExc_MemoryError,
865 "failed to allocate memory for group list");
866 goto cleanup;
867 }
868
869 for (i = 0; i < num_groups; i++) {
870 PyObject *elem;
871 elem = PySequence_GetItem(groups_list, i);
872 if (!elem)
873 goto cleanup;
874 if (!PyLong_Check(elem)) {
875 PyErr_SetString(PyExc_TypeError,
876 "groups must be integers");
877 Py_DECREF(elem);
878 goto cleanup;
879 } else {
880 if (!_Py_Gid_Converter(elem, &gid)) {
881 Py_DECREF(elem);
882 PyErr_SetString(PyExc_ValueError, "invalid group id");
883 goto cleanup;
884 }
885 groups[i] = gid;
886 }
887 Py_DECREF(elem);
888 }
889 call_setgroups = 1;
890
891#else /* HAVE_SETGROUPS */
892 PyErr_BadInternalCall();
893 goto cleanup;
894#endif /* HAVE_SETGROUPS */
895 }
896
897 if (gid_object != Py_None) {
898#ifdef HAVE_SETREGID
899 if (!_Py_Gid_Converter(gid_object, &gid))
900 goto cleanup;
901
902 call_setgid = 1;
903
904#else /* HAVE_SETREGID */
905 PyErr_BadInternalCall();
906 goto cleanup;
907#endif /* HAVE_SETREUID */
908 }
909
910 if (uid_object != Py_None) {
911#ifdef HAVE_SETREUID
912 if (!_Py_Uid_Converter(uid_object, &uid))
913 goto cleanup;
914
915 call_setuid = 1;
916
917#else /* HAVE_SETREUID */
918 PyErr_BadInternalCall();
919 goto cleanup;
920#endif /* HAVE_SETREUID */
921 }
922
923 /* This must be the last thing done before fork() because we do not
924 * want to call PyOS_BeforeFork() if there is any chance of another
925 * error leading to the cleanup: code without calling fork(). */
926 if (preexec_fn != Py_None) {
927 preexec_fn_args_tuple = PyTuple_New(0);
928 if (!preexec_fn_args_tuple)
929 goto cleanup;
930 PyOS_BeforeFork();
931 need_after_fork = 1;
932 }
933
934 /* NOTE: When old_sigmask is non-NULL, do_fork_exec() may use vfork(). */
935 const void *old_sigmask = NULL;
936#ifdef VFORK_USABLE
937 /* Use vfork() only if it's safe. See the comment above child_exec(). */
938 sigset_t old_sigs;
939 int allow_vfork;
940 if (preexec_fn == Py_None) {
941 allow_vfork = 1; /* 3.10.0 behavior */
942 PyObject *subprocess_module = PyImport_ImportModule("subprocess");
943 if (subprocess_module != NULL) {
944 PyObject *allow_vfork_obj = PyObject_GetAttrString(
945 subprocess_module, "_USE_VFORK");
946 Py_DECREF(subprocess_module);
947 if (allow_vfork_obj != NULL) {
948 allow_vfork = PyObject_IsTrue(allow_vfork_obj);
949 Py_DECREF(allow_vfork_obj);
950 if (allow_vfork < 0) {
951 PyErr_Clear(); /* Bad _USE_VFORK attribute. */
952 allow_vfork = 1; /* 3.10.0 behavior */
953 }
954 } else {
955 PyErr_Clear(); /* No _USE_VFORK attribute. */
956 }
957 } else {
958 PyErr_Clear(); /* no subprocess module? suspicious; don't care. */
959 }
960 } else {
961 allow_vfork = 0;
962 }
963 if (allow_vfork && !call_setuid && !call_setgid && !call_setgroups) {
964 /* Block all signals to ensure that no signal handlers are run in the
965 * child process while it shares memory with us. Note that signals
966 * used internally by C libraries won't be blocked by
967 * pthread_sigmask(), but signal handlers installed by C libraries
968 * normally service only signals originating from *within the process*,
969 * so it should be sufficient to consider any library function that
970 * might send such a signal to be vfork-unsafe and do not call it in
971 * the child.
972 */
973 sigset_t all_sigs;
974 sigfillset(&all_sigs);
975 if ((saved_errno = pthread_sigmask(SIG_BLOCK, &all_sigs, &old_sigs))) {
976 goto cleanup;
977 }
978 old_sigmask = &old_sigs;
979 }
980#endif
981
982 pid = do_fork_exec(exec_array, argv, envp, cwd,
983 p2cread, p2cwrite, c2pread, c2pwrite,
984 errread, errwrite, errpipe_read, errpipe_write,
985 close_fds, restore_signals, call_setsid,
986 call_setgid, gid, call_setgroups, num_groups, groups,
987 call_setuid, uid, child_umask, old_sigmask,
988 py_fds_to_keep, preexec_fn, preexec_fn_args_tuple);
989
990 /* Parent (original) process */
991 if (pid == -1) {
992 /* Capture errno for the exception. */
993 saved_errno = errno;
994 }
995
996#ifdef VFORK_USABLE
997 if (old_sigmask) {
998 /* vfork() semantics guarantees that the parent is blocked
999 * until the child performs _exit() or execve(), so it is safe
1000 * to unblock signals once we're here.
1001 * Note that in environments where vfork() is implemented as fork(),
1002 * such as QEMU user-mode emulation, the parent won't be blocked,
1003 * but it won't share the address space with the child,
1004 * so it's still safe to unblock the signals.
1005 *
1006 * We don't handle errors here because this call can't fail
1007 * if valid arguments are given, and because there is no good
1008 * way for the caller to deal with a failure to restore
1009 * the thread signal mask. */
1010 (void) pthread_sigmask(SIG_SETMASK, old_sigmask, NULL);
1011 }
1012#endif
1013
1014 if (need_after_fork)
1015 PyOS_AfterFork_Parent();
1016
1017cleanup:
1018 if (saved_errno != 0) {
1019 errno = saved_errno;
1020 /* We can't call this above as PyOS_AfterFork_Parent() calls back
1021 * into Python code which would see the unreturned error. */
1022 PyErr_SetFromErrno(PyExc_OSError);
1023 }
1024
1025 Py_XDECREF(preexec_fn_args_tuple);
1026 PyMem_RawFree(groups);
1027 Py_XDECREF(cwd_obj2);
1028 if (envp)
1029 _Py_FreeCharPArray(envp);
1030 Py_XDECREF(converted_args);
1031 Py_XDECREF(fast_args);
1032 if (argv)
1033 _Py_FreeCharPArray(argv);
1034 if (exec_array)
1035 _Py_FreeCharPArray(exec_array);
1036
1037 if (need_to_reenable_gc) {
1038 PyGC_Enable();
1039 }
1040 Py_XDECREF(gc_module);
1041
1042 return pid == -1 ? NULL : PyLong_FromPid(pid);
1043}
1044
1045
1046PyDoc_STRVAR(subprocess_fork_exec_doc,
1047"fork_exec(args, executable_list, close_fds, pass_fds, cwd, env,\n\
1048 p2cread, p2cwrite, c2pread, c2pwrite,\n\
1049 errread, errwrite, errpipe_read, errpipe_write,\n\
1050 restore_signals, call_setsid,\n\
1051 gid, groups_list, uid,\n\
1052 preexec_fn)\n\
1053\n\
1054Forks a child process, closes parent file descriptors as appropriate in the\n\
1055child and dups the few that are needed before calling exec() in the child\n\
1056process.\n\
1057\n\
1058If close_fds is true, close file descriptors 3 and higher, except those listed\n\
1059in the sorted tuple pass_fds.\n\
1060\n\
1061The preexec_fn, if supplied, will be called immediately before closing file\n\
1062descriptors and exec.\n\
1063WARNING: preexec_fn is NOT SAFE if your application uses threads.\n\
1064 It may trigger infrequent, difficult to debug deadlocks.\n\
1065\n\
1066If an error occurs in the child process before the exec, it is\n\
1067serialized and written to the errpipe_write fd per subprocess.py.\n\
1068\n\
1069Returns: the child process's PID.\n\
1070\n\
1071Raises: Only on an error in the parent process.\n\
1072");
1073
1074/* module level code ********************************************************/
1075
1076PyDoc_STRVAR(module_doc,
1077"A POSIX helper for the subprocess module.");
1078
1079static PyMethodDef module_methods[] = {
1080 {"fork_exec", subprocess_fork_exec, METH_VARARGS, subprocess_fork_exec_doc},
1081 {NULL, NULL} /* sentinel */
1082};
1083
1084static PyModuleDef_Slot _posixsubprocess_slots[] = {
1085 {0, NULL}
1086};
1087
1088static struct PyModuleDef _posixsubprocessmodule = {
1089 PyModuleDef_HEAD_INIT,
1090 .m_name = "_posixsubprocess",
1091 .m_doc = module_doc,
1092 .m_size = 0,
1093 .m_methods = module_methods,
1094 .m_slots = _posixsubprocess_slots,
1095};
1096
1097PyMODINIT_FUNC
1098PyInit__posixsubprocess(void)
1099{
1100 return PyModuleDef_Init(&_posixsubprocessmodule);
1101}
1102