Subprocess.h source code [glow/thirdparty/folly/folly/Subprocess.h]

1	/*
2	* Copyright (c) Facebook, Inc. and its affiliates.
3	*
4	* Licensed under the Apache License, Version 2.0 (the "License");
5	* you may not use this file except in compliance with the License.
6	* You may obtain a copy of the License at
7	*
8	* http://www.apache.org/licenses/LICENSE-2.0
9	*
10	* Unless required by applicable law or agreed to in writing, software
11	* distributed under the License is distributed on an "AS IS" BASIS,
12	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13	* See the License for the specific language governing permissions and
14	* limitations under the License.
15	*/
16
17	/**
18	* Subprocess library, modeled after Python's subprocess module
19	* (http://docs.python.org/2/library/subprocess.html)
20	*
21	* This library defines one class (Subprocess) which represents a child
22	* process. Subprocess has two constructors: one that takes a vector<string>
23	* and executes the given executable without using the shell, and one
24	* that takes a string and executes the given command using the shell.
25	* Subprocess allows you to redirect the child's standard input, standard
26	* output, and standard error to/from child descriptors in the parent,
27	* or to create communication pipes between the child and the parent.
28	*
29	* The simplest example is a thread-safe [1] version of the system() library
30	* function:
31	* Subprocess(cmd).wait();
32	* which executes the command using the default shell and waits for it
33	* to complete, returning the exit status.
34	*
35	* A thread-safe [1] version of popen() (type="r", to read from the child):
36	* Subprocess proc(cmd, Subprocess::Options().pipeStdout());
37	* // read from proc.stdoutFd()
38	* proc.wait();
39	*
40	* A thread-safe [1] version of popen() (type="w", to write to the child):
41	* Subprocess proc(cmd, Subprocess::Options().pipeStdin());
42	* // write to proc.stdinFd()
43	* proc.wait();
44	*
45	* If you want to redirect both stdin and stdout to pipes, you can, but note
46	* that you're subject to a variety of deadlocks. You'll want to use
47	* nonblocking I/O, like the callback version of communicate().
48	*
49	* The string or IOBuf-based variants of communicate() are the simplest way
50	* to communicate with a child via its standard input, standard output, and
51	* standard error. They buffer everything in memory, so they are not great
52	* for large amounts of data (or long-running processes), but they are much
53	* simpler than the callback version.
54	*
55	* == A note on thread-safety ==
56	*
57	* [1] "thread-safe" refers ONLY to the fact that Subprocess is very careful
58	* to fork in a way that does not cause grief in multithreaded programs.
59	*
60	* Caveat: If your system does not have the atomic pipe2 system call, it is
61	* not safe to concurrently call Subprocess from different threads.
62	* Therefore, it is best to have a single thread be responsible for spawning
63	* subprocesses.
64	*
65	* A particular instances of Subprocess is emphatically not thread-safe.
66	* If you need to simultaneously communicate via the pipes, and interact
67	* with the Subprocess state, your best bet is to:
68	* - takeOwnershipOfPipes() to separate the pipe I/O from the subprocess.
69	* - Only interact with the Subprocess from one thread at a time.
70	*
71	* The current implementation of communicate() cannot be safely interrupted.
72	* To do so correctly, one would need to use EventFD, or open a dedicated
73	* pipe to be messaged from a different thread -- in particular, kill() will
74	* not do, since a descendant may keep the pipes open indefinitely.
75	*
76	* So, once you call communicate(), you must wait for it to return, and not
77	* touch the pipes from other threads. closeParentFd() is emphatically
78	* unsafe to call concurrently, and even sendSignal() is not a good idea.
79	* You can perhaps give the Subprocess's PID to a different thread before
80	* starting communicate(), and use that PID to send a signal without
81	* accessing the Subprocess object. In that case, you will need a mutex
82	* that ensures you don't wait() before you sent said signal. In a
83	* nutshell, don't do this.
84	*
85	* In fact, signals are inherently concurrency-unsafe on Unix: if you signal
86	* a PID, while another thread is in waitpid(), the signal may fire either
87	* before or after the process is reaped. This means that your signal can,
88	* in pathological circumstances, be delivered to the wrong process (ouch!).
89	* To avoid this, you should only use non-blocking waits (i.e. poll()), and
90	* make sure to serialize your signals (i.e. kill()) with the waits --
91	* either wait & signal from the same thread, or use a mutex.
92	*/
93
94	#pragma once
95
96	#include <signal.h>
97	#include <sys/types.h>
98	#include <sys/wait.h>
99
100	#include <chrono>
101	#include <exception>
102	#include <string>
103	#include <vector>
104
105	#include <boost/container/flat_map.hpp>
106	#include <boost/operators.hpp>
107
108	#include <folly/Exception.h>
109	#include <folly/File.h>
110	#include <folly/FileUtil.h>
111	#include <folly/Function.h>
112	#include <folly/MapUtil.h>
113	#include <folly/Optional.h>
114	#include <folly/Portability.h>
115	#include <folly/Range.h>
116	#include <folly/gen/String.h>
117	#include <folly/io/IOBufQueue.h>
118	#include <folly/portability/SysResource.h>
119
120	namespace folly {
121
122	/**
123	* Class to wrap a process return code.
124	*/
125	class Subprocess;
126	class ProcessReturnCode {
127	public:
128	enum State {
129	// Subprocess starts in the constructor, so this state designates only
130	// default-initialized or moved-out ProcessReturnCodes.
131	NOT_STARTED,
132	RUNNING,
133	EXITED,
134	KILLED,
135	};
136
137	static ProcessReturnCode makeNotStarted() {
138	return ProcessReturnCode (RV_NOT_STARTED);
139	}
140
141	static ProcessReturnCode makeRunning() {
142	return ProcessReturnCode (RV_RUNNING);
143	}
144
145	static ProcessReturnCode make(int status);
146
147	// Default-initialized for convenience. Subprocess::returnCode() will
148	// never produce this value.
149	ProcessReturnCode() : rawStatus_(RV_NOT_STARTED) {}
150
151	// Trivially copyable
152	ProcessReturnCode(const ProcessReturnCode& p) = default;
153	ProcessReturnCode& operator=(const ProcessReturnCode& p) = default;
154	// Non-default move: In order for Subprocess to be movable, the "moved
155	// out" state must not be "running", or ~Subprocess() will abort.
156	ProcessReturnCode(ProcessReturnCode&& p) noexcept;
157	ProcessReturnCode& operator=(ProcessReturnCode&& p) noexcept;
158
159	/**
160	* Process state. One of:
161	* NOT_STARTED: process hasn't been started successfully
162	* RUNNING: process is currently running
163	* EXITED: process exited (successfully or not)
164	* KILLED: process was killed by a signal.
165	*/
166	State state() const;
167
168	/**
169	* Helper wrappers around state().
170	*/
171	bool notStarted() const {
172	return state() == NOT_STARTED;
173	}
174	bool running() const {
175	return state() == RUNNING;
176	}
177	bool exited() const {
178	return state() == EXITED;
179	}
180	bool killed() const {
181	return state() == KILLED;
182	}
183
184	/**
185	* Exit status. Only valid if state() == EXITED; throws otherwise.
186	*/
187	int exitStatus() const;
188
189	/**
190	* Signal that caused the process's termination. Only valid if
191	* state() == KILLED; throws otherwise.
192	*/
193	int killSignal() const;
194
195	/**
196	* Was a core file generated? Only valid if state() == KILLED; throws
197	* otherwise.
198	*/
199	bool coreDumped() const;
200
201	/**
202	* String representation; one of
203	* "not started"
204	* "running"
205	* "exited with status <status>"
206	* "killed by signal <signal>"
207	* "killed by signal <signal> (core dumped)"
208	*/
209	std::string str() const;
210
211	/**
212	* Helper function to enforce a precondition based on this.
213	* Throws std::logic_error if in an unexpected state.
214	*/
215	void enforce(State expected) const;
216
217	private:
218	explicit ProcessReturnCode(int rv) : rawStatus_(rv) {}
219	static constexpr int RV_NOT_STARTED = -`2`;
220	static constexpr int RV_RUNNING = -`1`;
221
222	int rawStatus_;
223	};
224
225	/**
226	* Base exception thrown by the Subprocess methods.
227	*/
228	class FOLLY_EXPORT SubprocessError : public std::runtime_error {
229	public:
230	using std::runtime_error::runtime_error;
231	};
232
233	/**
234	* Exception thrown by *Checked methods of Subprocess.
235	*/
236	class FOLLY_EXPORT CalledProcessError : public SubprocessError {
237	public:
238	explicit CalledProcessError(ProcessReturnCode rc);
239	~CalledProcessError() throw() override = default;
240	ProcessReturnCode returnCode() const {
241	return returnCode_;
242	}
243
244	private:
245	ProcessReturnCode returnCode_;
246	};
247
248	/**
249	* Exception thrown if the subprocess cannot be started.
250	*/
251	class FOLLY_EXPORT SubprocessSpawnError : public SubprocessError {
252	public:
253	SubprocessSpawnError(const char* executable, int errCode, int errnoValue);
254	~SubprocessSpawnError() throw() override = default;
255	int errnoValue() const {
256	return errnoValue_;
257	}
258
259	private:
260	int errnoValue_;
261	};
262
263	/**
264	* Subprocess.
265	*/
266	class Subprocess {
267	public:
268	static const int CLOSE = -`1`;
269	static const int PIPE = -`2`;
270	static const int PIPE_IN = -`3`;
271	static const int PIPE_OUT = -`4`;
272
273	/**
274	* See Subprocess::Options::dangerousPostForkPreExecCallback() for usage.
275	* Every derived class should include the following warning:
276	*
277	* DANGER: This class runs after fork in a child processes. Be fast, the
278	* parent thread is waiting, but remember that other parent threads are
279	* running and may mutate your state. Avoid mutating any data belonging to
280	* the parent. Avoid interacting with non-POD data that originated in the
281	* parent. Avoid any libraries that may internally reference non-POD data.
282	* Especially beware parent mutexes -- for example, glog's LOG() uses one.
283	*/
284	struct DangerousPostForkPreExecCallback {
285	virtual ~DangerousPostForkPreExecCallback() {}
286	// This must return 0 on success, or an `errno` error code.
287	virtual int operator()() = `0`;
288	};
289
290	/**
291	* Class representing various options: file descriptor behavior, and
292	* whether to use $PATH for searching for the executable,
293	*
294	* By default, we don't use $PATH, file descriptors are closed if
295	* the close-on-exec flag is set (fcntl FD_CLOEXEC) and inherited
296	* otherwise.
297	*/
298	class Options {
299	friend class Subprocess;
300
301	public:
302	Options() {} // E.g. https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58328
303
304	/**
305	* Change action for file descriptor fd.
306	*
307	* "action" may be another file descriptor number (dup2()ed before the
308	* child execs), or one of CLOSE, PIPE_IN, and PIPE_OUT.
309	*
310	* CLOSE: close the file descriptor in the child
311	* PIPE_IN: open a pipe from the child
312	* PIPE_OUT: open a pipe to the child
313	*
314	* PIPE is a shortcut; same as PIPE_IN for stdin (fd 0), same as
315	* PIPE_OUT for stdout (fd 1) or stderr (fd 2), and an error for
316	* other file descriptors.
317	*/
318	Options& fd(int fd, int action);
319
320	/**
321	* Shortcut to change the action for standard input.
322	*/
323	Options& stdinFd(int action) {
324	return fd(STDIN_FILENO, action);
325	}
326
327	/**
328	* Shortcut to change the action for standard output.
329	*/
330	Options& stdoutFd(int action) {
331	return fd(STDOUT_FILENO, action);
332	}
333
334	/**
335	* Shortcut to change the action for standard error.
336	* Note that stderr(1) will redirect the standard error to the same
337	* file descriptor as standard output; the equivalent of bash's "2>&1"
338	*/
339	Options& stderrFd(int action) {
340	return fd(STDERR_FILENO, action);
341	}
342
343	Options& pipeStdin() {
344	return fd(STDIN_FILENO, PIPE_IN);
345	}
346	Options& pipeStdout() {
347	return fd(STDOUT_FILENO, PIPE_OUT);
348	}
349	Options& pipeStderr() {
350	return fd(STDERR_FILENO, PIPE_OUT);
351	}
352
353	/**
354	* Close all other fds (other than standard input, output, error,
355	* and file descriptors explicitly specified with fd()).
356	*
357	* This is potentially slow; it's generally a better idea to
358	* set the close-on-exec flag on all file descriptors that shouldn't
359	* be inherited by the child.
360	*
361	* Even with this option set, standard input, output, and error are
362	* not closed; use stdin(CLOSE), stdout(CLOSE), stderr(CLOSE) if you
363	* desire this.
364	*/
365	Options& closeOtherFds() {
366	closeOtherFds_ = true;
367	return *this;
368	}
369
370	/**
371	* Use the search path ($PATH) when searching for the executable.
372	*/
373	Options& usePath() {
374	usePath_ = true;
375	return *this;
376	}
377
378	/**
379	* Change the child's working directory, after the vfork.
380	*/
381	Options& chdir(const std::string& dir) {
382	childDir_ = dir;
383	return *this;
384	}
385
386	#if defined(__linux__)
387	/**
388	* Child will receive a signal when the parent thread exits.
389	*
390	* This is especially important when this option is used but the calling
391	* thread does not block for the duration of the subprocess. If the original
392	* thread that created the subprocess ends then the subprocess will
393	* terminate. For example, thread pool executors which can reap unused
394	* threads may trigger this behavior.
395	*/
396	Options& parentDeathSignal(int sig) {
397	parentDeathSignal_ = sig;
398	return *this;
399	}
400	#endif
401
402	/**
403	* Child will be made a process group leader when it starts. Upside: one
404	* can reliably kill all its non-daemonizing descendants. Downside: the
405	* child will not receive Ctrl-C etc during interactive use.
406	*/
407	Options& processGroupLeader() {
408	processGroupLeader_ = true;
409	return *this;
410	}
411
412	/**
413	* Detach the spawned process, to allow destroying the Subprocess object
414	* without waiting for the child process to finish.
415	*
416	* This causes the code to fork twice before executing the command.
417	* The intermediate child process will exit immediately, causing the process
418	* running the executable to be reparented to init (pid 1).
419	*
420	* Subprocess objects created with detach() enabled will already be in an
421	* "EXITED" state when the constructor returns. The caller should not call
422	* wait() or poll() on the Subprocess, and pid() will return -1.
423	*/
424	Options& detach() {
425	detach_ = true;
426	return *this;
427	}
428
429	/**
430	* * READ THIS WHOLE DOCBLOCK BEFORE USING *
431	*
432	* Run this callback in the child after the fork, just before the
433	* exec(), and after the child's state has been completely set up:
434	* - signal handlers have been reset to default handling and unblocked
435	* - the working directory was set
436	* - closed any file descriptors specified via Options()
437	* - set child process flags (see code)
438	*
439	* This is EXTREMELY DANGEROUS. For example, this innocuous-looking code
440	* can cause a fraction of your Subprocess launches to hang forever:
441	*
442	* LOG(INFO) << "Hello from the child";
443	*
444	* The reason is that glog has an internal mutex. If your fork() happens
445	* when the parent has the mutex locked, the child will wait forever.
446	*
447	* == GUIDELINES ==
448	*
449	* - Be quick -- the parent thread is blocked until you exit.
450	* - Remember that other parent threads are running, and may mutate your
451	* state.
452	* - Avoid mutating any data belonging to the parent.
453	* - Avoid interacting with non-POD data that came from the parent.
454	* - Avoid any libraries that may internally reference non-POD state.
455	* - Especially beware parent mutexes, e.g. LOG() uses a global mutex.
456	* - Avoid invoking the parent's destructors (you can accidentally
457	* delete files, terminate network connections, etc).
458	* - Read http://ewontfix.com/7/
459	*/
460	Options& dangerousPostForkPreExecCallback(
461	DangerousPostForkPreExecCallback* cob) {
462	dangerousPostForkPreExecCallback_ = cob;
463	return *this;
464	}
465
466	#if defined(__linux__)
467	/**
468	* This is an experimental feature, it is best you don't use it at this
469	* point of time.
470	* Although folly would support cloning with custom flags in some form, this
471	* API might change in the near future. So use the following assuming it is
472	* experimental. (Apr 11, 2017)
473	*
474	* This unlocks Subprocess to support clone flags, many of them need
475	* CAP_SYS_ADMIN permissions. It might also require you to go through the
476	* implementation to understand what happens before, between and after the
477	* fork-and-exec.
478	*
479	* `man 2 clone` would be a starting point for knowing about the available
480	* flags.
481	*/
482	using clone_flags_t = uint64_t;
483	Options& useCloneWithFlags(clone_flags_t cloneFlags) noexcept {
484	cloneFlags_ = cloneFlags;
485	return *this;
486	}
487	#endif
488
489	private:
490	typedef boost::container::flat_map<int, int> FdMap;
491	FdMap fdActions_;
492	bool closeOtherFds_{false};
493	bool usePath_{false};
494	bool processGroupLeader_{false};
495	bool detach_{false};
496	std::string childDir_; // "" keeps the parent's working directory
497	#if defined(__linux__)
498	int parentDeathSignal_{`0`};
499	#endif
500	DangerousPostForkPreExecCallback* dangerousPostForkPreExecCallback_{
501	nullptr};
502	#if defined(__linux__)
503	// none means `vfork()` instead of a custom `clone()`
504	// Optional<> is used because value of '0' means do clone without any flags.
505	Optional<clone_flags_t> cloneFlags_;
506	#endif
507	};
508
509	// Non-copiable, but movable
510	Subprocess(const Subprocess&) = delete;
511	Subprocess& operator=(const Subprocess&) = delete;
512	Subprocess(Subprocess&&) = default;
513	Subprocess& operator=(Subprocess&&) = default;
514
515	/**
516	* Create an uninitialized subprocess.
517	*
518	* In this state it can only be destroyed, or assigned to using the move
519	* assignment operator.
520	*/
521	Subprocess();
522
523	/**
524	* Create a subprocess from the given arguments. argv[0] must be listed.
525	* If not-null, executable must be the actual executable
526	* being used (otherwise it's the same as argv[0]).
527	*
528	* If env is not-null, it must contain name=value strings to be used
529	* as the child's environment; otherwise, we inherit the environment
530	* from the parent. env must be null if options.usePath is set.
531	*/
532	explicit Subprocess(
533	const std::vector<std::string>& argv,
534	const Options& options = Options (),
535	const char* executable = nullptr,
536	const std::vector<std::string>* env = nullptr);
537	~Subprocess();
538
539	/**
540	* Create a subprocess run as a shell command (as shell -c 'command')
541	*
542	* The shell to use is taken from the environment variable $SHELL,
543	* or /bin/sh if $SHELL is unset.
544	*/
545	// clang-format off
546	[[deprecated(
547	"Prefer not running in a shell or use `shellify`.")]]
548	explicit Subprocess(
549	const std::string& cmd,
550	const Options& options = Options (),
551	const std::vector<std::string>* env = nullptr);
552	// clang-format on
553
554	////
555	//// The methods below only manipulate the process state, and do not
556	//// affect its communication pipes.
557	////
558
559	/**
560	* Return the child's pid, or -1 if the child wasn't successfully spawned
561	* or has already been wait()ed upon.
562	*/
563	pid_t pid() const;
564
565	/**
566	* Return the child's status (as per wait()) if the process has already
567	* been waited on, -1 if the process is still running, or -2 if the
568	* process hasn't been successfully started. NOTE that this does not call
569	* waitpid() or Subprocess::poll(), but simply returns the status stored
570	* in the Subprocess object.
571	*/
572	ProcessReturnCode returnCode() const {
573	return returnCode_;
574	}
575
576	/**
577	* Poll the child's status and return it. Return the exit status if the
578	* subprocess had quit, or RUNNING otherwise. Throws an std::logic_error
579	* if called on a Subprocess whose status is no longer RUNNING. No other
580	* exceptions are possible. Aborts on egregious violations of contract,
581	* e.g. if you wait for the underlying process without going through this
582	* Subprocess instance.
583	*/
584	ProcessReturnCode poll(struct rusage* ru = nullptr);
585
586	/**
587	* Poll the child's status. If the process is still running, return false.
588	* Otherwise, return true if the process exited with status 0 (success),
589	* or throw CalledProcessError if the process exited with a non-zero status.
590	*/
591	bool pollChecked();
592
593	/**
594	* Wait for the process to terminate and return its status. Like poll(),
595	* the only exception this can throw is std::logic_error if you call this
596	* on a Subprocess whose status is not RUNNING. Aborts on egregious
597	* violations of contract, like an out-of-band waitpid(p.pid(), 0, 0).
598	*/
599	ProcessReturnCode wait();
600
601	/**
602	* Wait for the process to terminate, throw if unsuccessful.
603	*/
604	void waitChecked();
605
606	using TimeoutDuration = std::chrono::milliseconds;
607
608	/**
609	* Call `waitpid` non-blockingly up to `timeout`. Throws std::logic_error if
610	* called on a Subprocess whose status is not RUNNING.
611	*
612	* The return code will be running() if waiting timed out.
613	*/
614	ProcessReturnCode waitTimeout(TimeoutDuration timeout);
615
616	/**
617	* Send a signal to the child. Shortcuts for the commonly used Unix
618	* signals are below.
619	*/
620	void sendSignal(int signal);
621	void terminate() {
622	sendSignal(SIGTERM);
623	}
624	void kill() {
625	sendSignal(SIGKILL);
626	}
627
628	/**
629	* Call `waitpid` non-blockingly up to `waitTimeout`. If the process hasn't
630	* terminated after that, fall back on `terminateOrKill` with
631	* `sigtermTimeoutSeconds`.
632	*/
633	ProcessReturnCode waitOrTerminateOrKill(
634	TimeoutDuration waitTimeout,
635	TimeoutDuration sigtermTimeout);
636
637	/**
638	* Send the SIGTERM to terminate the process, poll `waitpid` non-blockingly
639	* several times up to `sigtermTimeout`. If the process hasn't terminated
640	* after that, send SIGKILL to kill the process and call `waitpid` blockingly.
641	* Return the exit code of process.
642	*/
643	ProcessReturnCode terminateOrKill(TimeoutDuration sigtermTimeout);
644
645	////
646	//// The methods below only affect the process's communication pipes, but
647	//// not its return code or state (they do not poll() or wait()).
648	////
649
650	/**
651	* Communicate with the child until all pipes to/from the child are closed.
652	*
653	* The input buffer is written to the process' stdin pipe, and data is read
654	* from the stdout and stderr pipes. Non-blocking I/O is performed on all
655	* pipes simultaneously to avoid deadlocks.
656	*
657	* The stdin pipe will be closed after the full input buffer has been written.
658	* An error will be thrown if a non-empty input buffer is supplied but stdin
659	* was not configured as a pipe.
660	*
661	* Returns a pair of buffers containing the data read from stdout and stderr.
662	* If stdout or stderr is not a pipe, an empty IOBuf queue will be returned
663	* for the respective buffer.
664	*
665	* Note that communicate() and communicateIOBuf() both return when all
666	* pipes to/from the child are closed; the child might stay alive after
667	* that, so you must still wait().
668	*
669	* communicateIOBuf() uses IOBufQueue for buffering (which has the
670	* advantage that it won't try to allocate all data at once), but it does
671	* store the subprocess's entire output in memory before returning.
672	*
673	* communicate() uses strings for simplicity.
674	*/
675	std::pair<IOBufQueue, IOBufQueue> communicateIOBuf(
676	IOBufQueue input = IOBufQueue ());
677
678	std::pair<std::string, std::string> communicate(
679	StringPiece input = StringPiece ());
680
681	/**
682	* Communicate with the child until all pipes to/from the child are closed.
683	*
684	* == Semantics ==
685	*
686	* readCallback(pfd, cfd) will be called whenever there's data available
687	* on any pipe from the child (PIPE_OUT). pfd is the file descriptor
688	* in the parent (that you use to read from); cfd is the file descriptor
689	* in the child (used for identifying the stream; 1 = child's standard
690	* output, 2 = child's standard error, etc)
691	*
692	* writeCallback(pfd, cfd) will be called whenever a pipe to the child is
693	* writable (PIPE_IN). pfd is the file descriptor in the parent (that you
694	* use to write to); cfd is the file descriptor in the child (used for
695	* identifying the stream; 0 = child's standard input, etc)
696	*
697	* The read and write callbacks must read from / write to pfd and return
698	* false during normal operation. Return true to tell communicate() to
699	* close the pipe. For readCallback, this might send SIGPIPE to the
700	* child, or make its writes fail with EPIPE, so you should generally
701	* avoid returning true unless you've reached end-of-file.
702	*
703	* communicate() returns when all pipes to/from the child are closed; the
704	* child might stay alive after that, so you must still wait().
705	* Conversely, the child may quit long before its pipes are closed, since
706	* its descendants can keep them alive forever.
707	*
708	* Most users won't need to use this callback version; the simpler version
709	* of communicate (which buffers data in memory) will probably work fine.
710	*
711	* == Things you must get correct ==
712	*
713	* 1) You MUST consume all data passed to readCallback (or return true to
714	* close the pipe). Similarly, you MUST write to a writable pipe (or
715	* return true to close the pipe). To do otherwise is an error that can
716	* result in a deadlock. You must do this even for pipes you are not
717	* interested in.
718	*
719	* 2) pfd is nonblocking, so be prepared for read() / write() to return -1
720	* and set errno to EAGAIN (in which case you should return false). Use
721	* readNoInt() from FileUtil.h to handle interrupted reads for you.
722	*
723	* 3) Your callbacks MUST NOT call any of the Subprocess methods that
724	* manipulate the pipe FDs. Check the docblocks, but, for example,
725	* neither closeParentFd (return true instead) nor takeOwnershipOfPipes
726	* are safe. Stick to reading/writing from pfd, as appropriate.
727	*
728	* == Good to know ==
729	*
730	* 1) See ReadLinesCallback for an easy way to consume the child's output
731	* streams line-by-line (or tokenized by another delimiter).
732	*
733	* 2) "Wait until the descendants close the pipes" is usually the behavior
734	* you want, since the descendants may have something to say even if the
735	* immediate child is dead. If you need to be able to force-close all
736	* parent FDs, communicate() will NOT work for you. Do it your own way by
737	* using takeOwnershipOfPipes().
738	*
739	* Why not? You can return "true" from your callbacks to sever active
740	* pipes, but inactive ones can remain open indefinitely. It is
741	* impossible to safely close inactive pipes while another thread is
742	* blocked in communicate(). This is BY DESIGN. Racing communicate()'s
743	* read/write callbacks can result in wrong I/O and data corruption. This
744	* class would need internal synchronization and timeouts, a poor and
745	* expensive implementation choice, in order to make closeParentFd()
746	* thread-safe.
747	*/
748	using FdCallback = folly::Function<bool(int, int)>;
749	void communicate(FdCallback readCallback, FdCallback writeCallback);
750
751	/**
752	* A readCallback for Subprocess::communicate() that helps you consume
753	* lines (or other delimited pieces) from your subprocess's file
754	* descriptors. Use the readLinesCallback() helper to get template
755	* deduction. For example:
756	*
757	* subprocess.communicate(
758	* Subprocess::readLinesCallback(
759	* [](int fd, folly::StringPiece s) {
760	* std::cout << fd << " said: " << s;
761	* return false; // Keep reading from the child
762	* }
763	* ),
764	* [](int pdf, int cfd){ return true; } // Don't write to the child
765	* );
766	*
767	* If a file line exceeds maxLineLength, your callback will get some
768	* initial chunks of maxLineLength with no trailing delimiters. The final
769	* chunk of a line is delimiter-terminated iff the delimiter was present
770	* in the input. In particular, the last line in a file always lacks a
771	* delimiter -- so if a file ends on a delimiter, the final line is empty.
772	*
773	* Like a regular communicate() callback, your fdLineCb() normally returns
774	* false. It may return true to tell Subprocess to close the underlying
775	* file descriptor. The child process may then receive SIGPIPE or get
776	* EPIPE errors on writes.
777	*/
778	template <class Callback>
779	class ReadLinesCallback {
780	private:
781	// Binds an FD to the client-provided FD+line callback
782	struct StreamSplitterCallback {
783	StreamSplitterCallback(Callback& cb, int fd) : cb_(cb), fd_(fd) {}
784	// The return value semantics are inverted vs StreamSplitter
785	bool operator()(StringPiece s) {
786	return !cb_(fd_, s);
787	}
788	Callback& cb_;
789	int fd_;
790	};
791	typedef gen::StreamSplitter<StreamSplitterCallback> LineSplitter;
792
793	public:
794	explicit ReadLinesCallback(
795	Callback&& fdLineCb,
796	uint64_t maxLineLength = `0`, // No line length limit by default
797	char delimiter = `'\n'`,
798	uint64_t bufSize = `1024`)
799	: fdLineCb_(std::forward<Callback>(fdLineCb)),
800	maxLineLength_(maxLineLength),
801	delimiter_(delimiter),
802	bufSize_(bufSize) {}
803
804	bool operator()(int pfd, int cfd) {
805	// Make a splitter for this cfd if it doesn't already exist
806	auto it = fdToSplitter_.find(cfd);
807	auto& splitter = (it != fdToSplitter_.end())
808	? it->second
809	: fdToSplitter_
810	.emplace(
811	cfd,
812	LineSplitter(
813	delimiter_,
814	StreamSplitterCallback(fdLineCb_, cfd),
815	maxLineLength_))
816	.first->second;
817	// Read as much as we can from this FD
818	char buf[bufSize_];
819	while (true) {
820	ssize_t ret = readNoInt(pfd, buf, bufSize_);
821	if (ret == -`1` && errno == EAGAIN) { // No more data for now
822	return false;
823	}
824	checkUnixError(ret, "read");
825	if (ret == `0`) { // Reached end-of-file
826	splitter.flush(); // Ignore return since the file is over anyway
827	return true;
828	}
829	if (!splitter(StringPiece(buf, ret))) {
830	return true; // The callback told us to stop
831	}
832	}
833	}
834
835	private:
836	Callback fdLineCb_;
837	const uint64_t maxLineLength_;
838	const char delimiter_;
839	const uint64_t bufSize_;
840	// We lazily make splitters for all cfds that get used.
841	std::unordered_map<int, LineSplitter> fdToSplitter_;
842	};
843
844	// Helper to enable template deduction
845	template <class Callback>
846	static auto readLinesCallback(
847	Callback&& fdLineCb,
848	uint64_t maxLineLength = `0`, // No line length limit by default
849	char delimiter = `'\n'`,
850	uint64_t bufSize = `1024`)
851	-> ReadLinesCallback<typename std::decay<Callback>::type> {
852	return ReadLinesCallback<typename std::decay<Callback>::type>(
853	std::forward<Callback>(fdLineCb), maxLineLength, delimiter, bufSize);
854	}
855
856	/**
857	* communicate() callbacks can use this to temporarily enable/disable
858	* notifications (callbacks) for a pipe to/from the child. By default,
859	* all are enabled. Useful for "chatty" communication -- you want to
860	* disable write callbacks until you receive the expected message.
861	*
862	* Disabling a pipe does not free you from the requirement to consume all
863	* incoming data. Failing to do so will easily create deadlock bugs.
864	*
865	* Throws if the childFd is not known.
866	*/
867	void enableNotifications(int childFd, bool enabled);
868
869	/**
870	* Are notifications for one pipe to/from child enabled? Throws if the
871	* childFd is not known.
872	*/
873	bool notificationsEnabled(int childFd) const;
874
875	////
876	//// The following methods are meant for the cases when communicate() is
877	//// not suitable. You should not need them when you call communicate(),
878	//// and, in fact, it is INHERENTLY UNSAFE to use closeParentFd() or
879	//// takeOwnershipOfPipes() from a communicate() callback.
880	////
881
882	/**
883	* Close the parent file descriptor given a file descriptor in the child.
884	* DO NOT USE from communicate() callbacks; make them return true instead.
885	*/
886	void closeParentFd(int childFd);
887
888	/**
889	* Set all pipes from / to child to be non-blocking. communicate() does
890	* this for you.
891	*/
892	void setAllNonBlocking();
893
894	/**
895	* Get parent file descriptor corresponding to the given file descriptor
896	* in the child. Throws if childFd isn't a pipe (PIPE_IN / PIPE_OUT).
897	* Do not close() the returned file descriptor; use closeParentFd, above.
898	*/
899	int parentFd(int childFd) const {
900	return pipes_[findByChildFd(childFd)].pipe.fd();
901	}
902	int stdinFd() const {
903	return parentFd(`0`);
904	}
905	int stdoutFd() const {
906	return parentFd(`1`);
907	}
908	int stderrFd() const {
909	return parentFd(`2`);
910	}
911
912	/**
913	* The child's pipes are logically separate from the process metadata
914	* (they may even be kept alive by the child's descendants). This call
915	* lets you manage the pipes' lifetime separetely from the lifetime of the
916	* child process.
917	*
918	* After this call, the Subprocess instance will have no knowledge of
919	* these pipes, and the caller assumes responsibility for managing their
920	* lifetimes. Pro-tip: prefer to explicitly close() the pipes, since
921	* folly::File would otherwise silently suppress I/O errors.
922	*
923	* No, you may NOT call this from a communicate() callback.
924	*/
925	struct ChildPipe {
926	ChildPipe(int fd, folly::File&& ppe) : childFd(fd), pipe (std::move(ppe)) {}
927	int childFd;
928	folly::File pipe; // Owns the parent FD
929	};
930	std::vector<ChildPipe> takeOwnershipOfPipes();
931
932	private:
933	// spawn() sets up a pipe to read errors from the child,
934	// then calls spawnInternal() to do the bulk of the work. Once
935	// spawnInternal() returns it reads the error pipe to see if the child
936	// encountered any errors.
937	void spawn(
938	std::unique_ptr<const char*[]> argv,
939	const char* executable,
940	const Options& options,
941	const std::vector<std::string>* env);
942	void spawnInternal(
943	std::unique_ptr<const char*[]> argv,
944	const char* executable,
945	Options& options,
946	const std::vector<std::string>* env,
947	int errFd);
948
949	// Actions to run in child.
950	// Note that this runs after vfork(), so tread lightly.
951	// Returns 0 on success, or an errno value on failure.
952	int prepareChild(
953	const Options& options,
954	const sigset_t* sigmask,
955	const char* childDir) const;
956	int runChild(
957	const char* executable,
958	char** argv,
959	char** env,
960	const Options& options) const;
961
962	/**
963	* Read from the error pipe, and throw SubprocessSpawnError if the child
964	* failed before calling exec().
965	*/
966	void readChildErrorPipe(int pfd, const char* executable);
967
968	// Returns an index into pipes_. Throws std::invalid_argument if not found.
969	size_t findByChildFd(const int childFd) const;
970
971	pid_t pid_{-`1`};
972	ProcessReturnCode returnCode_;
973
974	/**
975	* Represents a pipe between this process, and the child process (or its
976	* descendant). To interact with these pipes, you can use communicate(),
977	* or use parentFd() and related methods, or separate them from the
978	* Subprocess instance entirely via takeOwnershipOfPipes().
979	*/
980	struct Pipe : private boost::totally_ordered<Pipe> {
981	folly::File pipe; // Our end of the pipe, wrapped in a File to auto-close.
982	int childFd = -`1`; // Identifies the pipe: what FD is this in the child?
983	int direction = PIPE_IN; // one of PIPE_IN / PIPE_OUT
984	bool enabled = true; // Are notifications enabled in communicate()?
985
986	bool operator<(const Pipe& other) const {
987	return childFd < other.childFd;
988	}
989	bool operator==(const Pipe& other) const {
990	return childFd == other.childFd;
991	}
992	};
993
994	// Populated at process start according to fdActions, empty after
995	// takeOwnershipOfPipes(). Sorted by childFd. Can only have elements
996	// erased, but not inserted, after being populated.
997	//
998	// The number of pipes between parent and child is assumed to be small,
999	// so we're happy with a vector here, even if it means linear erase.
1000	std::vector<Pipe> pipes_;
1001	};
1002
1003	} // namespace folly
1004

Browse the source code of glow/thirdparty/folly/folly/Subprocess.h