xref: /petsc/src/sys/error/signal.c (revision 660278c0612eddb50605b63d23f7384c6f3231af)
1 
2 /*
3       Routines to handle signals the program will receive.
4     Usually this will call the error handlers.
5 */
6 #include <petsc/private/petscimpl.h>             /*I   "petscsys.h"   I*/
7 #include <signal.h>
8 #include <stdlib.h> /* for _Exit() */
9 
10 static PetscClassId SIGNAL_CLASSID = 0;
11 
12 struct SH {
13   PetscClassId   classid;
14   PetscErrorCode (*handler)(int,void*);
15   void           *ctx;
16   struct SH      *previous;
17 };
18 static struct SH *sh       = NULL;
19 static PetscBool SignalSet = PETSC_FALSE;
20 
21 /* Called by MPI_Abort() to suppress user-registered atexit()/on_exit() functions.
22    See discussion at https://gitlab.com/petsc/petsc/-/merge_requests/2745.
23 */
24 static void MyExit(void)
25 {
26   _Exit(MPI_ERR_OTHER);
27 }
28 
29 /*
30     PetscSignalHandler_Private - This is the signal handler called by the system. This calls
31              any signal handler set by PETSc or the application code.
32 
33    Input Parameters: (depends on system)
34 .    sig - integer code indicating the type of signal
35 .    code - ??
36 .    sigcontext - ??
37 .    addr - ??
38 
39 */
40 #if defined(PETSC_HAVE_4ARG_SIGNAL_HANDLER)
41 static void PetscSignalHandler_Private(int sig,int code,struct sigcontext * scp,char *addr)
42 #else
43 static void PetscSignalHandler_Private(int sig)
44 #endif
45 {
46   PetscErrorCode ierr;
47 
48   if (!sh || !sh->handler) ierr = PetscSignalHandlerDefault(sig,(void*)0);
49   else {
50     if (sh->classid != SIGNAL_CLASSID) SETERRABORT(PETSC_COMM_WORLD,PETSC_ERR_COR,"Signal object has been corrupted");
51     ierr = (*sh->handler)(sig,sh->ctx);
52   }
53   if (ierr) PETSCABORT(PETSC_COMM_WORLD,PETSC_ERR_COR);
54 }
55 
56 /*@
57    PetscSignalHandlerDefault - Default signal handler.
58 
59    Not Collective
60 
61    Input Parameters:
62 +  sig - signal value
63 -  ptr - unused pointer
64 
65    Developer Note:
66    This does not call PetscError(), handles the entire error process directly
67 
68    Level: advanced
69 
70 @*/
71 PetscErrorCode  PetscSignalHandlerDefault(int sig,void *ptr)
72 {
73   const char *SIGNAME[64];
74 
75   if (sig == SIGSEGV) PetscSignalSegvCheckPointerOrMpi();
76   SIGNAME[0]       = "Unknown signal";
77 #if !defined(PETSC_MISSING_SIGABRT)
78   SIGNAME[SIGABRT] = "Abort";
79 #endif
80 #if !defined(PETSC_MISSING_SIGALRM)
81   SIGNAME[SIGALRM] = "Alarm";
82 #endif
83 #if !defined(PETSC_MISSING_SIGBUS)
84   SIGNAME[SIGBUS]  = "BUS: Bus Error, possibly illegal memory access";
85 #endif
86 #if !defined(PETSC_MISSING_SIGCHLD)
87   SIGNAME[SIGCHLD] = "CHLD";
88 #endif
89 #if !defined(PETSC_MISSING_SIGCONT)
90   SIGNAME[SIGCONT] = "CONT";
91 #endif
92 #if !defined(PETSC_MISSING_SIGFPE)
93   SIGNAME[SIGFPE]  = "FPE: Floating Point Exception,probably divide by zero";
94 #endif
95 #if !defined(PETSC_MISSING_SIGHUP)
96   SIGNAME[SIGHUP]  = "Hang up: Some other process (or the batch system) has told this process to end";
97 #endif
98 #if !defined(PETSC_MISSING_SIGILL)
99   SIGNAME[SIGILL]  = "Illegal instruction: Likely due to memory corruption";
100 #endif
101 #if !defined(PETSC_MISSING_SIGINT)
102   SIGNAME[SIGINT]  = "Interrupt";
103 #endif
104 #if !defined(PETSC_MISSING_SIGKILL)
105   SIGNAME[SIGKILL] = "Kill: Some other process (or the batch system) has told this process to end";
106 #endif
107 #if !defined(PETSC_MISSING_SIGPIPE)
108   SIGNAME[SIGPIPE] = "Broken Pipe: Likely while reading or writing to a socket";
109 #endif
110 #if !defined(PETSC_MISSING_SIGQUIT)
111   SIGNAME[SIGQUIT] = "Quit: Some other process (or the batch system) has told this process to end";
112 #endif
113 #if !defined(PETSC_MISSING_SIGSEGV)
114   SIGNAME[SIGSEGV] = "SEGV: Segmentation Violation, probably memory access out of range";
115 #endif
116 #if !defined(PETSC_MISSING_SIGSYS)
117   SIGNAME[SIGSYS]  = "SYS";
118 #endif
119 #if !defined(PETSC_MISSING_SIGTERM)
120   SIGNAME[SIGTERM] = "Terminate: Some process (or the batch system) has told this process to end";
121 #endif
122 #if !defined(PETSC_MISSING_SIGTRAP)
123   SIGNAME[SIGTRAP] = "TRAP";
124 #endif
125 #if !defined(PETSC_MISSING_SIGTSTP)
126   SIGNAME[SIGTSTP] = "TSTP";
127 #endif
128 #if !defined(PETSC_MISSING_SIGURG)
129   SIGNAME[SIGURG]  = "URG";
130 #endif
131 #if !defined(PETSC_MISSING_SIGUSR1)
132   SIGNAME[SIGUSR1] = "User 1";
133 #endif
134 #if !defined(PETSC_MISSING_SIGUSR2)
135   SIGNAME[SIGUSR2] = "User 2";
136 #endif
137 
138   signal(sig,SIG_DFL);
139   PetscSleep(PetscGlobalRank % 4); /* prevent some jumbling of error messages from different ranks */
140   (*PetscErrorPrintf)("------------------------------------------------------------------------\n");
141   if (sig >= 0 && sig <= 20) (*PetscErrorPrintf)("Caught signal number %d %s\n",sig,SIGNAME[sig]);
142   else (*PetscErrorPrintf)("Caught signal\n");
143 
144   (*PetscErrorPrintf)("Try option -start_in_debugger or -on_error_attach_debugger\n");
145   (*PetscErrorPrintf)("or see https://petsc.org/release/faq/#valgrind and https://petsc.org/release/faq/\n");
146 #if defined(PETSC_HAVE_CUDA)
147   (*PetscErrorPrintf)("or try https://docs.nvidia.com/cuda/cuda-memcheck/index.html on NVIDIA CUDA systems to find memory corruption errors\n");
148 #endif
149 #if PetscDefined(USE_DEBUG)
150   PetscStackPop;  /* remove stack frames for error handlers */
151   PetscStackPop;
152   (*PetscErrorPrintf)("---------------------  Stack Frames ------------------------------------\n");
153   PetscStackView(PETSC_STDOUT);
154 #else
155   (*PetscErrorPrintf)("configure using --with-debugging=yes, recompile, link, and run \n");
156   (*PetscErrorPrintf)("to get more information on the crash.\n");
157 #endif
158 #if !defined(PETSC_MISSING_SIGBUS)
159   if (sig == SIGSEGV || sig == SIGBUS) {
160 #else
161   if (sig == SIGSEGV) {
162 #endif
163     PetscBool debug;
164 
165     PetscMallocGetDebug(&debug,NULL,NULL);
166     if (debug) PetscMallocValidate(__LINE__,PETSC_FUNCTION_NAME,__FILE__);
167     else (*PetscErrorPrintf)("Run with -malloc_debug to check if memory corruption is causing the crash.\n");
168   }
169   atexit(MyExit);
170   PETSCABORT(PETSC_COMM_WORLD,(int)PETSC_ERR_SIG);
171   return 0;
172 }
173 
174 #if !defined(PETSC_SIGNAL_CAST)
175 #define PETSC_SIGNAL_CAST
176 #endif
177 
178 /*@C
179    PetscPushSignalHandler - Catches the usual fatal errors and
180    calls a user-provided routine.
181 
182    Not Collective
183 
184    Input Parameters:
185 +  routine - routine to call when a signal is received
186 -  ctx - optional context needed by the routine
187 
188   Level: developer
189 
190 .seealso: `PetscPopSignalHandler()`, `PetscSignalHandlerDefault()`, `PetscPushErrorHandler()`
191 
192 @*/
193 PetscErrorCode  PetscPushSignalHandler(PetscErrorCode (*routine)(int,void*),void *ctx)
194 {
195   struct  SH     *newsh;
196 
197   PetscFunctionBegin;
198   if (!SIGNAL_CLASSID) {
199     /* PetscCall(PetscClassIdRegister("Signal",&SIGNAL_CLASSID)); */
200     SIGNAL_CLASSID = 19;
201   }
202   if (!SignalSet && routine) {
203     /* Do not catch ABRT, CHLD, KILL */
204 #if !defined(PETSC_MISSING_SIGALRM)
205     /* signal(SIGALRM, PETSC_SIGNAL_CAST PetscSignalHandler_Private); */
206 #endif
207 #if !defined(PETSC_MISSING_SIGBUS)
208     signal(SIGBUS, PETSC_SIGNAL_CAST PetscSignalHandler_Private);
209 #endif
210 #if !defined(PETSC_MISSING_SIGCONT)
211     /*signal(SIGCONT, PETSC_SIGNAL_CAST PetscSignalHandler_Private);*/
212 #endif
213 #if !defined(PETSC_MISSING_SIGFPE)
214     signal(SIGFPE,  PETSC_SIGNAL_CAST PetscSignalHandler_Private);
215 #endif
216 #if !defined(PETSC_MISSING_SIGHUP) && defined(PETSC_HAVE_STRUCT_SIGACTION)
217     {
218       struct  sigaction action;
219       sigaction(SIGHUP,NULL,&action);
220       if (action.sa_handler == SIG_IGN) {
221         PetscCall(PetscInfo(NULL,"SIGHUP previously set to ignore, therefor not changing its signal handler\n"));
222       } else {
223         signal(SIGHUP, PETSC_SIGNAL_CAST PetscSignalHandler_Private);
224       }
225     }
226 #endif
227 #if !defined(PETSC_MISSING_SIGILL)
228     signal(SIGILL,  PETSC_SIGNAL_CAST PetscSignalHandler_Private);
229 #endif
230 #if !defined(PETSC_MISSING_SIGINT)
231     /* signal(SIGINT, PETSC_SIGNAL_CAST PetscSignalHandler_Private); */
232 #endif
233 #if !defined(PETSC_MISSING_SIGPIPE)
234     signal(SIGPIPE, PETSC_SIGNAL_CAST PetscSignalHandler_Private);
235 #endif
236 #if !defined(PETSC_MISSING_SIGQUIT)
237     signal(SIGQUIT, PETSC_SIGNAL_CAST PetscSignalHandler_Private);
238 #endif
239 #if !defined(PETSC_MISSING_SIGSEGV)
240     signal(SIGSEGV, PETSC_SIGNAL_CAST PetscSignalHandler_Private);
241 #endif
242 #if !defined(PETSC_MISSING_SIGSYS)
243     signal(SIGSYS,  PETSC_SIGNAL_CAST PetscSignalHandler_Private);
244 #endif
245 #if !defined(PETSC_MISSING_SIGTERM)
246 #if !defined(OMPI_MAJOR_VERSION)
247     /* OpenMPI may use SIGTERM to close down all its ranks; we don't want to generate many confusing PETSc error messages in that case */
248     signal(SIGTERM,  PETSC_SIGNAL_CAST PetscSignalHandler_Private);
249 #endif
250 #endif
251 #if !defined(PETSC_MISSING_SIGTRAP)
252     signal(SIGTRAP,  PETSC_SIGNAL_CAST PetscSignalHandler_Private);
253 #endif
254 #if !defined(PETSC_MISSING_SIGTSTP)
255     /* signal(SIGTSTP,  PETSC_SIGNAL_CAST PetscSignalHandler_Private); */
256 #endif
257 #if !defined(PETSC_MISSING_SIGURG)
258     signal(SIGURG,  PETSC_SIGNAL_CAST PetscSignalHandler_Private);
259 #endif
260 #if !defined(PETSC_MISSING_SIGUSR1)
261     /* signal(SIGUSR1, PETSC_SIGNAL_CAST PetscSignalHandler_Private); */
262 #endif
263 #if !defined(PETSC_MISSING_SIGUSR2)
264     /* signal(SIGUSR2, PETSC_SIGNAL_CAST PetscSignalHandler_Private); */
265 #endif
266     SignalSet = PETSC_TRUE;
267   }
268   if (!routine) {
269 #if !defined(PETSC_MISSING_SIGALRM)
270     /* signal(SIGALRM, SIG_DFL); */
271 #endif
272 #if !defined(PETSC_MISSING_SIGBUS)
273     signal(SIGBUS,  SIG_DFL);
274 #endif
275 #if !defined(PETSC_MISSING_SIGCONT)
276     /* signal(SIGCONT, SIG_DFL); */
277 #endif
278 #if !defined(PETSC_MISSING_SIGFPE)
279     signal(SIGFPE,  SIG_DFL);
280 #endif
281 #if !defined(PETSC_MISSING_SIGHUP)
282     signal(SIGHUP,  SIG_DFL);
283 #endif
284 #if !defined(PETSC_MISSING_SIGILL)
285     signal(SIGILL,  SIG_DFL);
286 #endif
287 #if !defined(PETSC_MISSING_SIGINT)
288     /* signal(SIGINT,  SIG_DFL); */
289 #endif
290 #if !defined(PETSC_MISSING_SIGPIPE)
291     signal(SIGPIPE, SIG_DFL);
292 #endif
293 #if !defined(PETSC_MISSING_SIGQUIT)
294     signal(SIGQUIT, SIG_DFL);
295 #endif
296 #if !defined(PETSC_MISSING_SIGSEGV)
297     signal(SIGSEGV, SIG_DFL);
298 #endif
299 #if !defined(PETSC_MISSING_SIGSYS)
300     signal(SIGSYS,  SIG_DFL);
301 #endif
302 #if !defined(PETSC_MISSING_SIGTERM)
303     signal(SIGTERM, SIG_DFL);
304 #endif
305 #if !defined(PETSC_MISSING_SIGTRAP)
306     signal(SIGTRAP, SIG_DFL);
307 #endif
308 #if !defined(PETSC_MISSING_SIGTSTP)
309     /* signal(SIGTSTP, SIG_DFL); */
310 #endif
311 #if !defined(PETSC_MISSING_SIGURG)
312     signal(SIGURG,  SIG_DFL);
313 #endif
314 #if !defined(PETSC_MISSING_SIGUSR1)
315     /* signal(SIGUSR1, SIG_DFL); */
316 #endif
317 #if !defined(PETSC_MISSING_SIGUSR2)
318     /* signal(SIGUSR2, SIG_DFL); */
319 #endif
320     SignalSet = PETSC_FALSE;
321   }
322   PetscCall(PetscNew(&newsh));
323   if (sh) {
324     PetscCheck(sh->classid == SIGNAL_CLASSID,PETSC_COMM_SELF,PETSC_ERR_COR,"Signal object has been corrupted");
325     newsh->previous = sh;
326   }  else newsh->previous = NULL;
327   newsh->handler = routine;
328   newsh->ctx     = ctx;
329   newsh->classid = SIGNAL_CLASSID;
330   sh             = newsh;
331   PetscFunctionReturn(0);
332 }
333 
334 /*@
335    PetscPopSignalHandler - Removes the most last signal handler that was pushed.
336        If no signal handlers are left on the stack it will remove the PETSc signal handler.
337        (That is PETSc will no longer catch signals).
338 
339    Not Collective
340 
341   Level: developer
342 
343 .seealso: `PetscPushSignalHandler()`
344 
345 @*/
346 PetscErrorCode  PetscPopSignalHandler(void)
347 {
348   struct SH      *tmp;
349 
350   PetscFunctionBegin;
351   if (!sh) PetscFunctionReturn(0);
352   PetscCheck(sh->classid == SIGNAL_CLASSID,PETSC_COMM_SELF,PETSC_ERR_COR,"Signal object has been corrupted");
353 
354   tmp = sh;
355   sh  = sh->previous;
356   PetscCall(PetscFree(tmp));
357   if (!sh || !sh->handler) {
358 #if !defined(PETSC_MISSING_SIGALRM)
359     /* signal(SIGALRM, SIG_DFL); */
360 #endif
361 #if !defined(PETSC_MISSING_SIGBUS)
362     signal(SIGBUS,  SIG_DFL);
363 #endif
364 #if !defined(PETSC_MISSING_SIGCONT)
365     /* signal(SIGCONT, SIG_DFL); */
366 #endif
367 #if !defined(PETSC_MISSING_SIGFPE)
368     signal(SIGFPE,  SIG_DFL);
369 #endif
370 #if !defined(PETSC_MISSING_SIGHUP)
371     signal(SIGHUP,  SIG_DFL);
372 #endif
373 #if !defined(PETSC_MISSING_SIGILL)
374     signal(SIGILL,  SIG_DFL);
375 #endif
376 #if !defined(PETSC_MISSING_SIGINT)
377     /* signal(SIGINT,  SIG_DFL); */
378 #endif
379 #if !defined(PETSC_MISSING_SIGPIPE)
380     signal(SIGPIPE, SIG_DFL);
381 #endif
382 #if !defined(PETSC_MISSING_SIGQUIT)
383     signal(SIGQUIT, SIG_DFL);
384 #endif
385 #if !defined(PETSC_MISSING_SIGSEGV)
386     signal(SIGSEGV, SIG_DFL);
387 #endif
388 #if !defined(PETSC_MISSING_SIGSYS)
389     signal(SIGSYS,  SIG_DFL);
390 #endif
391 #if !defined(PETSC_MISSING_SIGTERM)
392     signal(SIGTERM, SIG_DFL);
393 #endif
394 #if !defined(PETSC_MISSING_SIGTRAP)
395     signal(SIGTRAP, SIG_DFL);
396 #endif
397 #if !defined(PETSC_MISSING_SIGTSTP)
398     /* signal(SIGTSTP, SIG_DFL); */
399 #endif
400 #if !defined(PETSC_MISSING_SIGURG)
401     signal(SIGURG,  SIG_DFL);
402 #endif
403 #if !defined(PETSC_MISSING_SIGUSR1)
404     /* signal(SIGUSR1, SIG_DFL); */
405 #endif
406 #if !defined(PETSC_MISSING_SIGUSR2)
407     /* signal(SIGUSR2, SIG_DFL); */
408 #endif
409     SignalSet = PETSC_FALSE;
410   } else {
411     SignalSet = PETSC_TRUE;
412   }
413   PetscFunctionReturn(0);
414 }
415