1/*
2 * Copyright (c) Facebook, Inc. and its affiliates.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17/**
18 * Subprocess library, modeled after Python's subprocess module
19 * (http://docs.python.org/2/library/subprocess.html)
20 *
21 * This library defines one class (Subprocess) which represents a child
22 * process. Subprocess has two constructors: one that takes a vector<string>
23 * and executes the given executable without using the shell, and one
24 * that takes a string and executes the given command using the shell.
25 * Subprocess allows you to redirect the child's standard input, standard
26 * output, and standard error to/from child descriptors in the parent,
27 * or to create communication pipes between the child and the parent.
28 *
29 * The simplest example is a thread-safe [1] version of the system() library
30 * function:
31 * Subprocess(cmd).wait();
32 * which executes the command using the default shell and waits for it
33 * to complete, returning the exit status.
34 *
35 * A thread-safe [1] version of popen() (type="r", to read from the child):
36 * Subprocess proc(cmd, Subprocess::Options().pipeStdout());
37 * // read from proc.stdoutFd()
38 * proc.wait();
39 *
40 * A thread-safe [1] version of popen() (type="w", to write to the child):
41 * Subprocess proc(cmd, Subprocess::Options().pipeStdin());
42 * // write to proc.stdinFd()
43 * proc.wait();
44 *
45 * If you want to redirect both stdin and stdout to pipes, you can, but note
46 * that you're subject to a variety of deadlocks. You'll want to use
47 * nonblocking I/O, like the callback version of communicate().
48 *
49 * The string or IOBuf-based variants of communicate() are the simplest way
50 * to communicate with a child via its standard input, standard output, and
51 * standard error. They buffer everything in memory, so they are not great
52 * for large amounts of data (or long-running processes), but they are much
53 * simpler than the callback version.
54 *
55 * == A note on thread-safety ==
56 *
57 * [1] "thread-safe" refers ONLY to the fact that Subprocess is very careful
58 * to fork in a way that does not cause grief in multithreaded programs.
59 *
60 * Caveat: If your system does not have the atomic pipe2 system call, it is
61 * not safe to concurrently call Subprocess from different threads.
62 * Therefore, it is best to have a single thread be responsible for spawning
63 * subprocesses.
64 *
65 * A particular instances of Subprocess is emphatically **not** thread-safe.
66 * If you need to simultaneously communicate via the pipes, and interact
67 * with the Subprocess state, your best bet is to:
68 * - takeOwnershipOfPipes() to separate the pipe I/O from the subprocess.
69 * - Only interact with the Subprocess from one thread at a time.
70 *
71 * The current implementation of communicate() cannot be safely interrupted.
72 * To do so correctly, one would need to use EventFD, or open a dedicated
73 * pipe to be messaged from a different thread -- in particular, kill() will
74 * not do, since a descendant may keep the pipes open indefinitely.
75 *
76 * So, once you call communicate(), you must wait for it to return, and not
77 * touch the pipes from other threads. closeParentFd() is emphatically
78 * unsafe to call concurrently, and even sendSignal() is not a good idea.
79 * You can perhaps give the Subprocess's PID to a different thread before
80 * starting communicate(), and use that PID to send a signal without
81 * accessing the Subprocess object. In that case, you will need a mutex
82 * that ensures you don't wait() before you sent said signal. In a
83 * nutshell, don't do this.
84 *
85 * In fact, signals are inherently concurrency-unsafe on Unix: if you signal
86 * a PID, while another thread is in waitpid(), the signal may fire either
87 * before or after the process is reaped. This means that your signal can,
88 * in pathological circumstances, be delivered to the wrong process (ouch!).
89 * To avoid this, you should only use non-blocking waits (i.e. poll()), and
90 * make sure to serialize your signals (i.e. kill()) with the waits --
91 * either wait & signal from the same thread, or use a mutex.
92 */
93
94#pragma once
95
96#include <signal.h>
97#include <sys/types.h>
98#include <sys/wait.h>
99
100#include <chrono>
101#include <exception>
102#include <string>
103#include <vector>
104
105#include <boost/container/flat_map.hpp>
106#include <boost/operators.hpp>
107
108#include <folly/Exception.h>
109#include <folly/File.h>
110#include <folly/FileUtil.h>
111#include <folly/Function.h>
112#include <folly/MapUtil.h>
113#include <folly/Optional.h>
114#include <folly/Portability.h>
115#include <folly/Range.h>
116#include <folly/gen/String.h>
117#include <folly/io/IOBufQueue.h>
118#include <folly/portability/SysResource.h>
119
120namespace folly {
121
122/**
123 * Class to wrap a process return code.
124 */
125class Subprocess;
126class ProcessReturnCode {
127 public:
128 enum State {
129 // Subprocess starts in the constructor, so this state designates only
130 // default-initialized or moved-out ProcessReturnCodes.
131 NOT_STARTED,
132 RUNNING,
133 EXITED,
134 KILLED,
135 };
136
137 static ProcessReturnCode makeNotStarted() {
138 return ProcessReturnCode(RV_NOT_STARTED);
139 }
140
141 static ProcessReturnCode makeRunning() {
142 return ProcessReturnCode(RV_RUNNING);
143 }
144
145 static ProcessReturnCode make(int status);
146
147 // Default-initialized for convenience. Subprocess::returnCode() will
148 // never produce this value.
149 ProcessReturnCode() : rawStatus_(RV_NOT_STARTED) {}
150
151 // Trivially copyable
152 ProcessReturnCode(const ProcessReturnCode& p) = default;
153 ProcessReturnCode& operator=(const ProcessReturnCode& p) = default;
154 // Non-default move: In order for Subprocess to be movable, the "moved
155 // out" state must not be "running", or ~Subprocess() will abort.
156 ProcessReturnCode(ProcessReturnCode&& p) noexcept;
157 ProcessReturnCode& operator=(ProcessReturnCode&& p) noexcept;
158
159 /**
160 * Process state. One of:
161 * NOT_STARTED: process hasn't been started successfully
162 * RUNNING: process is currently running
163 * EXITED: process exited (successfully or not)
164 * KILLED: process was killed by a signal.
165 */
166 State state() const;
167
168 /**
169 * Helper wrappers around state().
170 */
171 bool notStarted() const {
172 return state() == NOT_STARTED;
173 }
174 bool running() const {
175 return state() == RUNNING;
176 }
177 bool exited() const {
178 return state() == EXITED;
179 }
180 bool killed() const {
181 return state() == KILLED;
182 }
183
184 /**
185 * Exit status. Only valid if state() == EXITED; throws otherwise.
186 */
187 int exitStatus() const;
188
189 /**
190 * Signal that caused the process's termination. Only valid if
191 * state() == KILLED; throws otherwise.
192 */
193 int killSignal() const;
194
195 /**
196 * Was a core file generated? Only valid if state() == KILLED; throws
197 * otherwise.
198 */
199 bool coreDumped() const;
200
201 /**
202 * String representation; one of
203 * "not started"
204 * "running"
205 * "exited with status <status>"
206 * "killed by signal <signal>"
207 * "killed by signal <signal> (core dumped)"
208 */
209 std::string str() const;
210
211 /**
212 * Helper function to enforce a precondition based on this.
213 * Throws std::logic_error if in an unexpected state.
214 */
215 void enforce(State expected) const;
216
217 private:
218 explicit ProcessReturnCode(int rv) : rawStatus_(rv) {}
219 static constexpr int RV_NOT_STARTED = -2;
220 static constexpr int RV_RUNNING = -1;
221
222 int rawStatus_;
223};
224
225/**
226 * Base exception thrown by the Subprocess methods.
227 */
228class FOLLY_EXPORT SubprocessError : public std::runtime_error {
229 public:
230 using std::runtime_error::runtime_error;
231};
232
233/**
234 * Exception thrown by *Checked methods of Subprocess.
235 */
236class FOLLY_EXPORT CalledProcessError : public SubprocessError {
237 public:
238 explicit CalledProcessError(ProcessReturnCode rc);
239 ~CalledProcessError() throw() override = default;
240 ProcessReturnCode returnCode() const {
241 return returnCode_;
242 }
243
244 private:
245 ProcessReturnCode returnCode_;
246};
247
248/**
249 * Exception thrown if the subprocess cannot be started.
250 */
251class FOLLY_EXPORT SubprocessSpawnError : public SubprocessError {
252 public:
253 SubprocessSpawnError(const char* executable, int errCode, int errnoValue);
254 ~SubprocessSpawnError() throw() override = default;
255 int errnoValue() const {
256 return errnoValue_;
257 }
258
259 private:
260 int errnoValue_;
261};
262
263/**
264 * Subprocess.
265 */
266class Subprocess {
267 public:
268 static const int CLOSE = -1;
269 static const int PIPE = -2;
270 static const int PIPE_IN = -3;
271 static const int PIPE_OUT = -4;
272
273 /**
274 * See Subprocess::Options::dangerousPostForkPreExecCallback() for usage.
275 * Every derived class should include the following warning:
276 *
277 * DANGER: This class runs after fork in a child processes. Be fast, the
278 * parent thread is waiting, but remember that other parent threads are
279 * running and may mutate your state. Avoid mutating any data belonging to
280 * the parent. Avoid interacting with non-POD data that originated in the
281 * parent. Avoid any libraries that may internally reference non-POD data.
282 * Especially beware parent mutexes -- for example, glog's LOG() uses one.
283 */
284 struct DangerousPostForkPreExecCallback {
285 virtual ~DangerousPostForkPreExecCallback() {}
286 // This must return 0 on success, or an `errno` error code.
287 virtual int operator()() = 0;
288 };
289
290 /**
291 * Class representing various options: file descriptor behavior, and
292 * whether to use $PATH for searching for the executable,
293 *
294 * By default, we don't use $PATH, file descriptors are closed if
295 * the close-on-exec flag is set (fcntl FD_CLOEXEC) and inherited
296 * otherwise.
297 */
298 class Options {
299 friend class Subprocess;
300
301 public:
302 Options() {} // E.g. https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58328
303
304 /**
305 * Change action for file descriptor fd.
306 *
307 * "action" may be another file descriptor number (dup2()ed before the
308 * child execs), or one of CLOSE, PIPE_IN, and PIPE_OUT.
309 *
310 * CLOSE: close the file descriptor in the child
311 * PIPE_IN: open a pipe *from* the child
312 * PIPE_OUT: open a pipe *to* the child
313 *
314 * PIPE is a shortcut; same as PIPE_IN for stdin (fd 0), same as
315 * PIPE_OUT for stdout (fd 1) or stderr (fd 2), and an error for
316 * other file descriptors.
317 */
318 Options& fd(int fd, int action);
319
320 /**
321 * Shortcut to change the action for standard input.
322 */
323 Options& stdinFd(int action) {
324 return fd(STDIN_FILENO, action);
325 }
326
327 /**
328 * Shortcut to change the action for standard output.
329 */
330 Options& stdoutFd(int action) {
331 return fd(STDOUT_FILENO, action);
332 }
333
334 /**
335 * Shortcut to change the action for standard error.
336 * Note that stderr(1) will redirect the standard error to the same
337 * file descriptor as standard output; the equivalent of bash's "2>&1"
338 */
339 Options& stderrFd(int action) {
340 return fd(STDERR_FILENO, action);
341 }
342
343 Options& pipeStdin() {
344 return fd(STDIN_FILENO, PIPE_IN);
345 }
346 Options& pipeStdout() {
347 return fd(STDOUT_FILENO, PIPE_OUT);
348 }
349 Options& pipeStderr() {
350 return fd(STDERR_FILENO, PIPE_OUT);
351 }
352
353 /**
354 * Close all other fds (other than standard input, output, error,
355 * and file descriptors explicitly specified with fd()).
356 *
357 * This is potentially slow; it's generally a better idea to
358 * set the close-on-exec flag on all file descriptors that shouldn't
359 * be inherited by the child.
360 *
361 * Even with this option set, standard input, output, and error are
362 * not closed; use stdin(CLOSE), stdout(CLOSE), stderr(CLOSE) if you
363 * desire this.
364 */
365 Options& closeOtherFds() {
366 closeOtherFds_ = true;
367 return *this;
368 }
369
370 /**
371 * Use the search path ($PATH) when searching for the executable.
372 */
373 Options& usePath() {
374 usePath_ = true;
375 return *this;
376 }
377
378 /**
379 * Change the child's working directory, after the vfork.
380 */
381 Options& chdir(const std::string& dir) {
382 childDir_ = dir;
383 return *this;
384 }
385
386#if defined(__linux__)
387 /**
388 * Child will receive a signal when the parent *thread* exits.
389 *
390 * This is especially important when this option is used but the calling
391 * thread does not block for the duration of the subprocess. If the original
392 * thread that created the subprocess ends then the subprocess will
393 * terminate. For example, thread pool executors which can reap unused
394 * threads may trigger this behavior.
395 */
396 Options& parentDeathSignal(int sig) {
397 parentDeathSignal_ = sig;
398 return *this;
399 }
400#endif
401
402 /**
403 * Child will be made a process group leader when it starts. Upside: one
404 * can reliably kill all its non-daemonizing descendants. Downside: the
405 * child will not receive Ctrl-C etc during interactive use.
406 */
407 Options& processGroupLeader() {
408 processGroupLeader_ = true;
409 return *this;
410 }
411
412 /**
413 * Detach the spawned process, to allow destroying the Subprocess object
414 * without waiting for the child process to finish.
415 *
416 * This causes the code to fork twice before executing the command.
417 * The intermediate child process will exit immediately, causing the process
418 * running the executable to be reparented to init (pid 1).
419 *
420 * Subprocess objects created with detach() enabled will already be in an
421 * "EXITED" state when the constructor returns. The caller should not call
422 * wait() or poll() on the Subprocess, and pid() will return -1.
423 */
424 Options& detach() {
425 detach_ = true;
426 return *this;
427 }
428
429 /**
430 * *** READ THIS WHOLE DOCBLOCK BEFORE USING ***
431 *
432 * Run this callback in the child after the fork, just before the
433 * exec(), and after the child's state has been completely set up:
434 * - signal handlers have been reset to default handling and unblocked
435 * - the working directory was set
436 * - closed any file descriptors specified via Options()
437 * - set child process flags (see code)
438 *
439 * This is EXTREMELY DANGEROUS. For example, this innocuous-looking code
440 * can cause a fraction of your Subprocess launches to hang forever:
441 *
442 * LOG(INFO) << "Hello from the child";
443 *
444 * The reason is that glog has an internal mutex. If your fork() happens
445 * when the parent has the mutex locked, the child will wait forever.
446 *
447 * == GUIDELINES ==
448 *
449 * - Be quick -- the parent thread is blocked until you exit.
450 * - Remember that other parent threads are running, and may mutate your
451 * state.
452 * - Avoid mutating any data belonging to the parent.
453 * - Avoid interacting with non-POD data that came from the parent.
454 * - Avoid any libraries that may internally reference non-POD state.
455 * - Especially beware parent mutexes, e.g. LOG() uses a global mutex.
456 * - Avoid invoking the parent's destructors (you can accidentally
457 * delete files, terminate network connections, etc).
458 * - Read http://ewontfix.com/7/
459 */
460 Options& dangerousPostForkPreExecCallback(
461 DangerousPostForkPreExecCallback* cob) {
462 dangerousPostForkPreExecCallback_ = cob;
463 return *this;
464 }
465
466#if defined(__linux__)
467 /**
468 * This is an experimental feature, it is best you don't use it at this
469 * point of time.
470 * Although folly would support cloning with custom flags in some form, this
471 * API might change in the near future. So use the following assuming it is
472 * experimental. (Apr 11, 2017)
473 *
474 * This unlocks Subprocess to support clone flags, many of them need
475 * CAP_SYS_ADMIN permissions. It might also require you to go through the
476 * implementation to understand what happens before, between and after the
477 * fork-and-exec.
478 *
479 * `man 2 clone` would be a starting point for knowing about the available
480 * flags.
481 */
482 using clone_flags_t = uint64_t;
483 Options& useCloneWithFlags(clone_flags_t cloneFlags) noexcept {
484 cloneFlags_ = cloneFlags;
485 return *this;
486 }
487#endif
488
489 private:
490 typedef boost::container::flat_map<int, int> FdMap;
491 FdMap fdActions_;
492 bool closeOtherFds_{false};
493 bool usePath_{false};
494 bool processGroupLeader_{false};
495 bool detach_{false};
496 std::string childDir_; // "" keeps the parent's working directory
497#if defined(__linux__)
498 int parentDeathSignal_{0};
499#endif
500 DangerousPostForkPreExecCallback* dangerousPostForkPreExecCallback_{
501 nullptr};
502#if defined(__linux__)
503 // none means `vfork()` instead of a custom `clone()`
504 // Optional<> is used because value of '0' means do clone without any flags.
505 Optional<clone_flags_t> cloneFlags_;
506#endif
507 };
508
509 // Non-copiable, but movable
510 Subprocess(const Subprocess&) = delete;
511 Subprocess& operator=(const Subprocess&) = delete;
512 Subprocess(Subprocess&&) = default;
513 Subprocess& operator=(Subprocess&&) = default;
514
515 /**
516 * Create an uninitialized subprocess.
517 *
518 * In this state it can only be destroyed, or assigned to using the move
519 * assignment operator.
520 */
521 Subprocess();
522
523 /**
524 * Create a subprocess from the given arguments. argv[0] must be listed.
525 * If not-null, executable must be the actual executable
526 * being used (otherwise it's the same as argv[0]).
527 *
528 * If env is not-null, it must contain name=value strings to be used
529 * as the child's environment; otherwise, we inherit the environment
530 * from the parent. env must be null if options.usePath is set.
531 */
532 explicit Subprocess(
533 const std::vector<std::string>& argv,
534 const Options& options = Options(),
535 const char* executable = nullptr,
536 const std::vector<std::string>* env = nullptr);
537 ~Subprocess();
538
539 /**
540 * Create a subprocess run as a shell command (as shell -c 'command')
541 *
542 * The shell to use is taken from the environment variable $SHELL,
543 * or /bin/sh if $SHELL is unset.
544 */
545 // clang-format off
546 [[deprecated(
547 "Prefer not running in a shell or use `shellify`.")]]
548 explicit Subprocess(
549 const std::string& cmd,
550 const Options& options = Options(),
551 const std::vector<std::string>* env = nullptr);
552 // clang-format on
553
554 ////
555 //// The methods below only manipulate the process state, and do not
556 //// affect its communication pipes.
557 ////
558
559 /**
560 * Return the child's pid, or -1 if the child wasn't successfully spawned
561 * or has already been wait()ed upon.
562 */
563 pid_t pid() const;
564
565 /**
566 * Return the child's status (as per wait()) if the process has already
567 * been waited on, -1 if the process is still running, or -2 if the
568 * process hasn't been successfully started. NOTE that this does not call
569 * waitpid() or Subprocess::poll(), but simply returns the status stored
570 * in the Subprocess object.
571 */
572 ProcessReturnCode returnCode() const {
573 return returnCode_;
574 }
575
576 /**
577 * Poll the child's status and return it. Return the exit status if the
578 * subprocess had quit, or RUNNING otherwise. Throws an std::logic_error
579 * if called on a Subprocess whose status is no longer RUNNING. No other
580 * exceptions are possible. Aborts on egregious violations of contract,
581 * e.g. if you wait for the underlying process without going through this
582 * Subprocess instance.
583 */
584 ProcessReturnCode poll(struct rusage* ru = nullptr);
585
586 /**
587 * Poll the child's status. If the process is still running, return false.
588 * Otherwise, return true if the process exited with status 0 (success),
589 * or throw CalledProcessError if the process exited with a non-zero status.
590 */
591 bool pollChecked();
592
593 /**
594 * Wait for the process to terminate and return its status. Like poll(),
595 * the only exception this can throw is std::logic_error if you call this
596 * on a Subprocess whose status is not RUNNING. Aborts on egregious
597 * violations of contract, like an out-of-band waitpid(p.pid(), 0, 0).
598 */
599 ProcessReturnCode wait();
600
601 /**
602 * Wait for the process to terminate, throw if unsuccessful.
603 */
604 void waitChecked();
605
606 using TimeoutDuration = std::chrono::milliseconds;
607
608 /**
609 * Call `waitpid` non-blockingly up to `timeout`. Throws std::logic_error if
610 * called on a Subprocess whose status is not RUNNING.
611 *
612 * The return code will be running() if waiting timed out.
613 */
614 ProcessReturnCode waitTimeout(TimeoutDuration timeout);
615
616 /**
617 * Send a signal to the child. Shortcuts for the commonly used Unix
618 * signals are below.
619 */
620 void sendSignal(int signal);
621 void terminate() {
622 sendSignal(SIGTERM);
623 }
624 void kill() {
625 sendSignal(SIGKILL);
626 }
627
628 /**
629 * Call `waitpid` non-blockingly up to `waitTimeout`. If the process hasn't
630 * terminated after that, fall back on `terminateOrKill` with
631 * `sigtermTimeoutSeconds`.
632 */
633 ProcessReturnCode waitOrTerminateOrKill(
634 TimeoutDuration waitTimeout,
635 TimeoutDuration sigtermTimeout);
636
637 /**
638 * Send the SIGTERM to terminate the process, poll `waitpid` non-blockingly
639 * several times up to `sigtermTimeout`. If the process hasn't terminated
640 * after that, send SIGKILL to kill the process and call `waitpid` blockingly.
641 * Return the exit code of process.
642 */
643 ProcessReturnCode terminateOrKill(TimeoutDuration sigtermTimeout);
644
645 ////
646 //// The methods below only affect the process's communication pipes, but
647 //// not its return code or state (they do not poll() or wait()).
648 ////
649
650 /**
651 * Communicate with the child until all pipes to/from the child are closed.
652 *
653 * The input buffer is written to the process' stdin pipe, and data is read
654 * from the stdout and stderr pipes. Non-blocking I/O is performed on all
655 * pipes simultaneously to avoid deadlocks.
656 *
657 * The stdin pipe will be closed after the full input buffer has been written.
658 * An error will be thrown if a non-empty input buffer is supplied but stdin
659 * was not configured as a pipe.
660 *
661 * Returns a pair of buffers containing the data read from stdout and stderr.
662 * If stdout or stderr is not a pipe, an empty IOBuf queue will be returned
663 * for the respective buffer.
664 *
665 * Note that communicate() and communicateIOBuf() both return when all
666 * pipes to/from the child are closed; the child might stay alive after
667 * that, so you must still wait().
668 *
669 * communicateIOBuf() uses IOBufQueue for buffering (which has the
670 * advantage that it won't try to allocate all data at once), but it does
671 * store the subprocess's entire output in memory before returning.
672 *
673 * communicate() uses strings for simplicity.
674 */
675 std::pair<IOBufQueue, IOBufQueue> communicateIOBuf(
676 IOBufQueue input = IOBufQueue());
677
678 std::pair<std::string, std::string> communicate(
679 StringPiece input = StringPiece());
680
681 /**
682 * Communicate with the child until all pipes to/from the child are closed.
683 *
684 * == Semantics ==
685 *
686 * readCallback(pfd, cfd) will be called whenever there's data available
687 * on any pipe *from* the child (PIPE_OUT). pfd is the file descriptor
688 * in the parent (that you use to read from); cfd is the file descriptor
689 * in the child (used for identifying the stream; 1 = child's standard
690 * output, 2 = child's standard error, etc)
691 *
692 * writeCallback(pfd, cfd) will be called whenever a pipe *to* the child is
693 * writable (PIPE_IN). pfd is the file descriptor in the parent (that you
694 * use to write to); cfd is the file descriptor in the child (used for
695 * identifying the stream; 0 = child's standard input, etc)
696 *
697 * The read and write callbacks must read from / write to pfd and return
698 * false during normal operation. Return true to tell communicate() to
699 * close the pipe. For readCallback, this might send SIGPIPE to the
700 * child, or make its writes fail with EPIPE, so you should generally
701 * avoid returning true unless you've reached end-of-file.
702 *
703 * communicate() returns when all pipes to/from the child are closed; the
704 * child might stay alive after that, so you must still wait().
705 * Conversely, the child may quit long before its pipes are closed, since
706 * its descendants can keep them alive forever.
707 *
708 * Most users won't need to use this callback version; the simpler version
709 * of communicate (which buffers data in memory) will probably work fine.
710 *
711 * == Things you must get correct ==
712 *
713 * 1) You MUST consume all data passed to readCallback (or return true to
714 * close the pipe). Similarly, you MUST write to a writable pipe (or
715 * return true to close the pipe). To do otherwise is an error that can
716 * result in a deadlock. You must do this even for pipes you are not
717 * interested in.
718 *
719 * 2) pfd is nonblocking, so be prepared for read() / write() to return -1
720 * and set errno to EAGAIN (in which case you should return false). Use
721 * readNoInt() from FileUtil.h to handle interrupted reads for you.
722 *
723 * 3) Your callbacks MUST NOT call any of the Subprocess methods that
724 * manipulate the pipe FDs. Check the docblocks, but, for example,
725 * neither closeParentFd (return true instead) nor takeOwnershipOfPipes
726 * are safe. Stick to reading/writing from pfd, as appropriate.
727 *
728 * == Good to know ==
729 *
730 * 1) See ReadLinesCallback for an easy way to consume the child's output
731 * streams line-by-line (or tokenized by another delimiter).
732 *
733 * 2) "Wait until the descendants close the pipes" is usually the behavior
734 * you want, since the descendants may have something to say even if the
735 * immediate child is dead. If you need to be able to force-close all
736 * parent FDs, communicate() will NOT work for you. Do it your own way by
737 * using takeOwnershipOfPipes().
738 *
739 * Why not? You can return "true" from your callbacks to sever active
740 * pipes, but inactive ones can remain open indefinitely. It is
741 * impossible to safely close inactive pipes while another thread is
742 * blocked in communicate(). This is BY DESIGN. Racing communicate()'s
743 * read/write callbacks can result in wrong I/O and data corruption. This
744 * class would need internal synchronization and timeouts, a poor and
745 * expensive implementation choice, in order to make closeParentFd()
746 * thread-safe.
747 */
748 using FdCallback = folly::Function<bool(int, int)>;
749 void communicate(FdCallback readCallback, FdCallback writeCallback);
750
751 /**
752 * A readCallback for Subprocess::communicate() that helps you consume
753 * lines (or other delimited pieces) from your subprocess's file
754 * descriptors. Use the readLinesCallback() helper to get template
755 * deduction. For example:
756 *
757 * subprocess.communicate(
758 * Subprocess::readLinesCallback(
759 * [](int fd, folly::StringPiece s) {
760 * std::cout << fd << " said: " << s;
761 * return false; // Keep reading from the child
762 * }
763 * ),
764 * [](int pdf, int cfd){ return true; } // Don't write to the child
765 * );
766 *
767 * If a file line exceeds maxLineLength, your callback will get some
768 * initial chunks of maxLineLength with no trailing delimiters. The final
769 * chunk of a line is delimiter-terminated iff the delimiter was present
770 * in the input. In particular, the last line in a file always lacks a
771 * delimiter -- so if a file ends on a delimiter, the final line is empty.
772 *
773 * Like a regular communicate() callback, your fdLineCb() normally returns
774 * false. It may return true to tell Subprocess to close the underlying
775 * file descriptor. The child process may then receive SIGPIPE or get
776 * EPIPE errors on writes.
777 */
778 template <class Callback>
779 class ReadLinesCallback {
780 private:
781 // Binds an FD to the client-provided FD+line callback
782 struct StreamSplitterCallback {
783 StreamSplitterCallback(Callback& cb, int fd) : cb_(cb), fd_(fd) {}
784 // The return value semantics are inverted vs StreamSplitter
785 bool operator()(StringPiece s) {
786 return !cb_(fd_, s);
787 }
788 Callback& cb_;
789 int fd_;
790 };
791 typedef gen::StreamSplitter<StreamSplitterCallback> LineSplitter;
792
793 public:
794 explicit ReadLinesCallback(
795 Callback&& fdLineCb,
796 uint64_t maxLineLength = 0, // No line length limit by default
797 char delimiter = '\n',
798 uint64_t bufSize = 1024)
799 : fdLineCb_(std::forward<Callback>(fdLineCb)),
800 maxLineLength_(maxLineLength),
801 delimiter_(delimiter),
802 bufSize_(bufSize) {}
803
804 bool operator()(int pfd, int cfd) {
805 // Make a splitter for this cfd if it doesn't already exist
806 auto it = fdToSplitter_.find(cfd);
807 auto& splitter = (it != fdToSplitter_.end())
808 ? it->second
809 : fdToSplitter_
810 .emplace(
811 cfd,
812 LineSplitter(
813 delimiter_,
814 StreamSplitterCallback(fdLineCb_, cfd),
815 maxLineLength_))
816 .first->second;
817 // Read as much as we can from this FD
818 char buf[bufSize_];
819 while (true) {
820 ssize_t ret = readNoInt(pfd, buf, bufSize_);
821 if (ret == -1 && errno == EAGAIN) { // No more data for now
822 return false;
823 }
824 checkUnixError(ret, "read");
825 if (ret == 0) { // Reached end-of-file
826 splitter.flush(); // Ignore return since the file is over anyway
827 return true;
828 }
829 if (!splitter(StringPiece(buf, ret))) {
830 return true; // The callback told us to stop
831 }
832 }
833 }
834
835 private:
836 Callback fdLineCb_;
837 const uint64_t maxLineLength_;
838 const char delimiter_;
839 const uint64_t bufSize_;
840 // We lazily make splitters for all cfds that get used.
841 std::unordered_map<int, LineSplitter> fdToSplitter_;
842 };
843
844 // Helper to enable template deduction
845 template <class Callback>
846 static auto readLinesCallback(
847 Callback&& fdLineCb,
848 uint64_t maxLineLength = 0, // No line length limit by default
849 char delimiter = '\n',
850 uint64_t bufSize = 1024)
851 -> ReadLinesCallback<typename std::decay<Callback>::type> {
852 return ReadLinesCallback<typename std::decay<Callback>::type>(
853 std::forward<Callback>(fdLineCb), maxLineLength, delimiter, bufSize);
854 }
855
856 /**
857 * communicate() callbacks can use this to temporarily enable/disable
858 * notifications (callbacks) for a pipe to/from the child. By default,
859 * all are enabled. Useful for "chatty" communication -- you want to
860 * disable write callbacks until you receive the expected message.
861 *
862 * Disabling a pipe does not free you from the requirement to consume all
863 * incoming data. Failing to do so will easily create deadlock bugs.
864 *
865 * Throws if the childFd is not known.
866 */
867 void enableNotifications(int childFd, bool enabled);
868
869 /**
870 * Are notifications for one pipe to/from child enabled? Throws if the
871 * childFd is not known.
872 */
873 bool notificationsEnabled(int childFd) const;
874
875 ////
876 //// The following methods are meant for the cases when communicate() is
877 //// not suitable. You should not need them when you call communicate(),
878 //// and, in fact, it is INHERENTLY UNSAFE to use closeParentFd() or
879 //// takeOwnershipOfPipes() from a communicate() callback.
880 ////
881
882 /**
883 * Close the parent file descriptor given a file descriptor in the child.
884 * DO NOT USE from communicate() callbacks; make them return true instead.
885 */
886 void closeParentFd(int childFd);
887
888 /**
889 * Set all pipes from / to child to be non-blocking. communicate() does
890 * this for you.
891 */
892 void setAllNonBlocking();
893
894 /**
895 * Get parent file descriptor corresponding to the given file descriptor
896 * in the child. Throws if childFd isn't a pipe (PIPE_IN / PIPE_OUT).
897 * Do not close() the returned file descriptor; use closeParentFd, above.
898 */
899 int parentFd(int childFd) const {
900 return pipes_[findByChildFd(childFd)].pipe.fd();
901 }
902 int stdinFd() const {
903 return parentFd(0);
904 }
905 int stdoutFd() const {
906 return parentFd(1);
907 }
908 int stderrFd() const {
909 return parentFd(2);
910 }
911
912 /**
913 * The child's pipes are logically separate from the process metadata
914 * (they may even be kept alive by the child's descendants). This call
915 * lets you manage the pipes' lifetime separetely from the lifetime of the
916 * child process.
917 *
918 * After this call, the Subprocess instance will have no knowledge of
919 * these pipes, and the caller assumes responsibility for managing their
920 * lifetimes. Pro-tip: prefer to explicitly close() the pipes, since
921 * folly::File would otherwise silently suppress I/O errors.
922 *
923 * No, you may NOT call this from a communicate() callback.
924 */
925 struct ChildPipe {
926 ChildPipe(int fd, folly::File&& ppe) : childFd(fd), pipe(std::move(ppe)) {}
927 int childFd;
928 folly::File pipe; // Owns the parent FD
929 };
930 std::vector<ChildPipe> takeOwnershipOfPipes();
931
932 private:
933 // spawn() sets up a pipe to read errors from the child,
934 // then calls spawnInternal() to do the bulk of the work. Once
935 // spawnInternal() returns it reads the error pipe to see if the child
936 // encountered any errors.
937 void spawn(
938 std::unique_ptr<const char*[]> argv,
939 const char* executable,
940 const Options& options,
941 const std::vector<std::string>* env);
942 void spawnInternal(
943 std::unique_ptr<const char*[]> argv,
944 const char* executable,
945 Options& options,
946 const std::vector<std::string>* env,
947 int errFd);
948
949 // Actions to run in child.
950 // Note that this runs after vfork(), so tread lightly.
951 // Returns 0 on success, or an errno value on failure.
952 int prepareChild(
953 const Options& options,
954 const sigset_t* sigmask,
955 const char* childDir) const;
956 int runChild(
957 const char* executable,
958 char** argv,
959 char** env,
960 const Options& options) const;
961
962 /**
963 * Read from the error pipe, and throw SubprocessSpawnError if the child
964 * failed before calling exec().
965 */
966 void readChildErrorPipe(int pfd, const char* executable);
967
968 // Returns an index into pipes_. Throws std::invalid_argument if not found.
969 size_t findByChildFd(const int childFd) const;
970
971 pid_t pid_{-1};
972 ProcessReturnCode returnCode_;
973
974 /**
975 * Represents a pipe between this process, and the child process (or its
976 * descendant). To interact with these pipes, you can use communicate(),
977 * or use parentFd() and related methods, or separate them from the
978 * Subprocess instance entirely via takeOwnershipOfPipes().
979 */
980 struct Pipe : private boost::totally_ordered<Pipe> {
981 folly::File pipe; // Our end of the pipe, wrapped in a File to auto-close.
982 int childFd = -1; // Identifies the pipe: what FD is this in the child?
983 int direction = PIPE_IN; // one of PIPE_IN / PIPE_OUT
984 bool enabled = true; // Are notifications enabled in communicate()?
985
986 bool operator<(const Pipe& other) const {
987 return childFd < other.childFd;
988 }
989 bool operator==(const Pipe& other) const {
990 return childFd == other.childFd;
991 }
992 };
993
994 // Populated at process start according to fdActions, empty after
995 // takeOwnershipOfPipes(). Sorted by childFd. Can only have elements
996 // erased, but not inserted, after being populated.
997 //
998 // The number of pipes between parent and child is assumed to be small,
999 // so we're happy with a vector here, even if it means linear erase.
1000 std::vector<Pipe> pipes_;
1001};
1002
1003} // namespace folly
1004