1 | #include <c10/util/Backtrace.h> |
2 | #include <c10/util/signal_handler.h> |
3 | |
4 | #if defined(C10_SUPPORTS_SIGNAL_HANDLER) |
5 | |
6 | // Normal signal handler implementation. |
7 | #include <cxxabi.h> |
8 | #include <dirent.h> |
9 | #include <dlfcn.h> |
10 | #include <fmt/format.h> |
11 | #include <sys/syscall.h> |
12 | #include <sys/types.h> |
13 | #include <unistd.h> |
14 | #include <unwind.h> |
15 | |
16 | #include <cstdio> |
17 | #include <cstdlib> |
18 | #include <iostream> |
19 | |
20 | #ifdef C10_ANDROID |
21 | #ifndef SYS_gettid |
22 | #define SYS_gettid __NR_gettid |
23 | #endif |
24 | #ifndef SYS_tgkill |
25 | #define SYS_tgkill __NR_tgkill |
26 | #endif |
27 | #endif |
28 | |
29 | namespace { |
30 | |
31 | struct sigaction previousSighup; |
32 | struct sigaction previousSigint; |
33 | std::atomic<int> sigintCount(0); |
34 | std::atomic<int> sighupCount(0); |
35 | std::atomic<int> hookedUpCount(0); |
36 | |
37 | void handleSignal(int signal) { |
38 | switch (signal) { |
39 | // TODO: what if the previous handler uses sa_sigaction? |
40 | case SIGHUP: |
41 | sighupCount += 1; |
42 | if (previousSighup.sa_handler) { |
43 | previousSighup.sa_handler(signal); |
44 | } |
45 | break; |
46 | case SIGINT: |
47 | sigintCount += 1; |
48 | if (previousSigint.sa_handler) { |
49 | previousSigint.sa_handler(signal); |
50 | } |
51 | break; |
52 | } |
53 | } |
54 | |
55 | void hookupHandler() { |
56 | if (hookedUpCount++) { |
57 | return; |
58 | } |
59 | struct sigaction sa {}; |
60 | // Setup the handler |
61 | sa.sa_handler = &handleSignal; |
62 | // Restart the system call, if at all possible |
63 | sa.sa_flags = SA_RESTART; |
64 | // Block every signal during the handler |
65 | sigfillset(&sa.sa_mask); |
66 | // Intercept SIGHUP and SIGINT |
67 | if (sigaction(SIGHUP, &sa, &previousSighup) == -1) { |
68 | LOG(FATAL) << "Cannot install SIGHUP handler." ; |
69 | } |
70 | if (sigaction(SIGINT, &sa, &previousSigint) == -1) { |
71 | LOG(FATAL) << "Cannot install SIGINT handler." ; |
72 | } |
73 | } |
74 | |
75 | // Set the signal handlers to the default. |
76 | void unhookHandler() { |
77 | if (--hookedUpCount > 0) { |
78 | return; |
79 | } |
80 | struct sigaction sa {}; |
81 | // Setup the sighub handler |
82 | sa.sa_handler = SIG_DFL; |
83 | // Restart the system call, if at all possible |
84 | sa.sa_flags = SA_RESTART; |
85 | // Block every signal during the handler |
86 | sigfillset(&sa.sa_mask); |
87 | // Intercept SIGHUP and SIGINT |
88 | if (sigaction(SIGHUP, &previousSighup, nullptr) == -1) { |
89 | LOG(FATAL) << "Cannot uninstall SIGHUP handler." ; |
90 | } |
91 | if (sigaction(SIGINT, &previousSigint, nullptr) == -1) { |
92 | LOG(FATAL) << "Cannot uninstall SIGINT handler." ; |
93 | } |
94 | } |
95 | |
96 | } // namespace |
97 | |
98 | namespace c10 { |
99 | |
100 | #if defined(C10_SUPPORTS_FATAL_SIGNAL_HANDLERS) |
101 | |
102 | FatalSignalHandler& FatalSignalHandler::getInstance() { |
103 | // Leaky singleton to avoid module destructor race. |
104 | static FatalSignalHandler* handler = new FatalSignalHandler(); |
105 | return *handler; |
106 | } |
107 | |
108 | FatalSignalHandler::~FatalSignalHandler() = default; |
109 | |
110 | FatalSignalHandler::FatalSignalHandler() |
111 | : fatalSignalHandlersInstalled(false), |
112 | fatalSignalReceived(false), |
113 | fatalSignalName("<UNKNOWN>" ), |
114 | writingCond(PTHREAD_COND_INITIALIZER), |
115 | writingMutex(PTHREAD_MUTEX_INITIALIZER) {} |
116 | |
117 | FatalSignalHandler::signal_handler FatalSignalHandler::kSignalHandlers[] = { |
118 | {"SIGABRT" , SIGABRT, {}}, |
119 | {"SIGINT" , SIGINT, {}}, |
120 | {"SIGILL" , SIGILL, {}}, |
121 | {"SIGFPE" , SIGFPE, {}}, |
122 | {"SIGBUS" , SIGBUS, {}}, |
123 | {"SIGSEGV" , SIGSEGV, {}}, |
124 | {nullptr, 0, {}}}; |
125 | |
126 | struct sigaction* FatalSignalHandler::getPreviousSigaction(int signum) { |
127 | for (auto handler = kSignalHandlers; handler->name != nullptr; handler++) { |
128 | if (handler->signum == signum) { |
129 | return &handler->previous; |
130 | } |
131 | } |
132 | return nullptr; |
133 | } |
134 | |
135 | const char* FatalSignalHandler::getSignalName(int signum) { |
136 | for (auto handler = kSignalHandlers; handler->name != nullptr; handler++) { |
137 | if (handler->signum == signum) { |
138 | return handler->name; |
139 | } |
140 | } |
141 | return nullptr; |
142 | } |
143 | |
144 | void FatalSignalHandler::callPreviousSignalHandler( |
145 | struct sigaction* action, |
146 | int signum, |
147 | siginfo_t* info, |
148 | void* ctx) { |
149 | if (!action->sa_handler) { |
150 | return; |
151 | } |
152 | if ((action->sa_flags & SA_SIGINFO) == SA_SIGINFO) { |
153 | action->sa_sigaction(signum, info, ctx); |
154 | } else { |
155 | action->sa_handler(signum); |
156 | } |
157 | } |
158 | |
159 | // needsLock signals whether we need to lock our writing mutex. |
160 | void FatalSignalHandler::stacktraceSignalHandler(bool needsLock) { |
161 | if (needsLock) { |
162 | pthread_mutex_lock(&writingMutex); |
163 | } |
164 | pid_t tid = syscall(SYS_gettid); |
165 | std::string backtrace = fmt::format( |
166 | "{}({}), PID: {}, Thread {}: \n {}" , |
167 | fatalSignalName, |
168 | fatalSignum, |
169 | ::getpid(), |
170 | tid, |
171 | c10::get_backtrace()); |
172 | std::cerr << backtrace << std::endl; |
173 | if (needsLock) { |
174 | pthread_mutex_unlock(&writingMutex); |
175 | pthread_cond_signal(&writingCond); |
176 | } |
177 | } |
178 | |
179 | void FatalSignalHandler::fatalSignalHandlerPostProcess() {} |
180 | |
181 | void FatalSignalHandler::fatalSignalHandlerStatic(int signum) { |
182 | getInstance().fatalSignalHandler(signum); |
183 | } |
184 | |
185 | // Our fatal signal entry point |
186 | void FatalSignalHandler::fatalSignalHandler(int signum) { |
187 | // Check if this is a proper signal that we declared above. |
188 | const char* name = getSignalName(signum); |
189 | if (!name) { |
190 | return; |
191 | } |
192 | if (fatalSignalReceived) { |
193 | return; |
194 | } |
195 | // Set the flag so that our SIGUSR2 handler knows that we're aborting and |
196 | // that it should intercept any SIGUSR2 signal. |
197 | fatalSignalReceived = true; |
198 | // Set state for other threads. |
199 | fatalSignum = signum; |
200 | fatalSignalName = name; |
201 | // Linux doesn't have a nice userland API for enumerating threads so we |
202 | // need to use the proc pseudo-filesystem. |
203 | DIR* procDir = opendir("/proc/self/task" ); |
204 | if (procDir) { |
205 | pid_t pid = getpid(); |
206 | pid_t currentTid = syscall(SYS_gettid); |
207 | struct dirent* entry = nullptr; |
208 | pthread_mutex_lock(&writingMutex); |
209 | while ((entry = readdir(procDir)) != nullptr) { |
210 | if (entry->d_name[0] == '.') { |
211 | continue; |
212 | } |
213 | pid_t tid = atoi(entry->d_name); |
214 | // If we've found the current thread then we'll jump into the SIGUSR2 |
215 | // handler before calling pthread_cond_wait thus deadlocking, so branch |
216 | // our directly to the backtrace handler instead of signaling it. |
217 | if (tid != currentTid) { |
218 | syscall(SYS_tgkill, pid, tid, SIGUSR2); |
219 | pthread_cond_wait(&writingCond, &writingMutex); |
220 | } else { |
221 | stacktraceSignalHandler(false); |
222 | } |
223 | } |
224 | pthread_mutex_unlock(&writingMutex); |
225 | } else { |
226 | perror("Failed to open /proc/self/task" ); |
227 | } |
228 | fatalSignalHandlerPostProcess(); |
229 | sigaction(signum, getPreviousSigaction(signum), nullptr); |
230 | raise(signum); |
231 | } |
232 | |
233 | // Our SIGUSR2 entry point |
234 | void FatalSignalHandler::stacktraceSignalHandlerStatic( |
235 | int signum, |
236 | siginfo_t* info, |
237 | void* ctx) { |
238 | getInstance().stacktraceSignalHandler(signum, info, ctx); |
239 | } |
240 | |
241 | void FatalSignalHandler::stacktraceSignalHandler( |
242 | int signum, |
243 | siginfo_t* info, |
244 | void* ctx) { |
245 | if (fatalSignalReceived) { |
246 | stacktraceSignalHandler(true); |
247 | } else { |
248 | // We don't want to actually change the signal handler as we want to |
249 | // remain the signal handler so that we may get the usr2 signal later. |
250 | callPreviousSignalHandler(&previousSigusr2, signum, info, ctx); |
251 | } |
252 | } |
253 | |
254 | // Installs SIGABRT signal handler so that we get stack traces |
255 | // from every thread on SIGABRT caused exit. Also installs SIGUSR2 handler |
256 | // so that threads can communicate with each other (be sure if you use SIGUSR2) |
257 | // to install your handler before initing caffe2 (we properly fall back to |
258 | // the previous handler if we didn't initiate the SIGUSR2). |
259 | void FatalSignalHandler::installFatalSignalHandlers() { |
260 | std::lock_guard<std::mutex> locker(fatalSignalHandlersInstallationMutex); |
261 | if (fatalSignalHandlersInstalled) { |
262 | return; |
263 | } |
264 | fatalSignalHandlersInstalled = true; |
265 | struct sigaction sa {}; |
266 | sigemptyset(&sa.sa_mask); |
267 | // Since we'll be in an exiting situation it's possible there's memory |
268 | // corruption, so make our own stack just in case. |
269 | sa.sa_flags = SA_ONSTACK | SA_SIGINFO; |
270 | sa.sa_handler = FatalSignalHandler::fatalSignalHandlerStatic; |
271 | for (auto* handler = kSignalHandlers; handler->name != nullptr; handler++) { |
272 | if (sigaction(handler->signum, &sa, &handler->previous)) { |
273 | std::string str("Failed to add " ); |
274 | str += handler->name; |
275 | str += " handler!" ; |
276 | perror(str.c_str()); |
277 | } |
278 | } |
279 | sa.sa_sigaction = FatalSignalHandler::stacktraceSignalHandlerStatic; |
280 | if (sigaction(SIGUSR2, &sa, &previousSigusr2)) { |
281 | perror("Failed to add SIGUSR2 handler!" ); |
282 | } |
283 | } |
284 | |
285 | void FatalSignalHandler::uninstallFatalSignalHandlers() { |
286 | std::lock_guard<std::mutex> locker(fatalSignalHandlersInstallationMutex); |
287 | if (!fatalSignalHandlersInstalled) { |
288 | return; |
289 | } |
290 | fatalSignalHandlersInstalled = false; |
291 | for (auto* handler = kSignalHandlers; handler->name != nullptr; handler++) { |
292 | if (sigaction(handler->signum, &handler->previous, nullptr)) { |
293 | std::string str("Failed to remove " ); |
294 | str += handler->name; |
295 | str += " handler!" ; |
296 | perror(str.c_str()); |
297 | } else { |
298 | handler->previous = {}; |
299 | } |
300 | } |
301 | if (sigaction(SIGUSR2, &previousSigusr2, nullptr)) { |
302 | perror("Failed to add SIGUSR2 handler!" ); |
303 | } else { |
304 | previousSigusr2 = {}; |
305 | } |
306 | } |
307 | #endif // defined(C10_SUPPORTS_FATAL_SIGNAL_HANDLERS) |
308 | |
309 | SignalHandler::SignalHandler( |
310 | SignalHandler::Action SIGINT_action, |
311 | SignalHandler::Action SIGHUP_action) |
312 | : SIGINT_action_(SIGINT_action), |
313 | SIGHUP_action_(SIGHUP_action), |
314 | my_sigint_count_(sigintCount), |
315 | my_sighup_count_(sighupCount) { |
316 | hookupHandler(); |
317 | } |
318 | |
319 | SignalHandler::~SignalHandler() { |
320 | unhookHandler(); |
321 | } |
322 | |
323 | // Return true iff a SIGINT has been received since the last time this |
324 | // function was called. |
325 | bool SignalHandler::GotSIGINT() { |
326 | uint64_t count = sigintCount; |
327 | bool result = (count != my_sigint_count_); |
328 | my_sigint_count_ = count; |
329 | return result; |
330 | } |
331 | |
332 | // Return true iff a SIGHUP has been received since the last time this |
333 | // function was called. |
334 | bool SignalHandler::GotSIGHUP() { |
335 | uint64_t count = sighupCount; |
336 | bool result = (count != my_sighup_count_); |
337 | my_sighup_count_ = count; |
338 | return result; |
339 | } |
340 | |
341 | SignalHandler::Action SignalHandler::CheckForSignals() { |
342 | if (GotSIGHUP()) { |
343 | return SIGHUP_action_; |
344 | } |
345 | if (GotSIGINT()) { |
346 | return SIGINT_action_; |
347 | } |
348 | return SignalHandler::Action::NONE; |
349 | } |
350 | |
351 | #if defined(C10_SUPPORTS_FATAL_SIGNAL_HANDLERS) |
352 | void FatalSignalHandler::setPrintStackTracesOnFatalSignal(bool print) { |
353 | if (print) { |
354 | installFatalSignalHandlers(); |
355 | } else { |
356 | uninstallFatalSignalHandlers(); |
357 | } |
358 | } |
359 | bool FatalSignalHandler::printStackTracesOnFatalSignal() { |
360 | std::lock_guard<std::mutex> locker(fatalSignalHandlersInstallationMutex); |
361 | return fatalSignalHandlersInstalled; |
362 | } |
363 | |
364 | #endif // defined(C10_SUPPORTS_FATAL_SIGNAL_HANDLERS) |
365 | } // namespace c10 |
366 | |
367 | #else // defined(C10_SUPPORTS_SIGNAL_HANDLER) |
368 | |
369 | // TODO: Currently we do not support signal handling in non-Linux yet - below is |
370 | // a minimal implementation that makes things compile. |
371 | namespace c10 { |
372 | SignalHandler::SignalHandler( |
373 | SignalHandler::Action SIGINT_action, |
374 | SignalHandler::Action SIGHUP_action) { |
375 | SIGINT_action_ = SIGINT_action; |
376 | SIGHUP_action_ = SIGHUP_action; |
377 | my_sigint_count_ = 0; |
378 | my_sighup_count_ = 0; |
379 | } |
380 | SignalHandler::~SignalHandler() {} |
381 | bool SignalHandler::GotSIGINT() { |
382 | return false; |
383 | } |
384 | bool SignalHandler::GotSIGHUP() { |
385 | return false; |
386 | } |
387 | SignalHandler::Action SignalHandler::CheckForSignals() { |
388 | return SignalHandler::Action::NONE; |
389 | } |
390 | } // namespace c10 |
391 | |
392 | #endif // defined(C10_SUPPORTS_SIGNAL_HANDLER) |
393 | |