xref: /petsc/src/sys/objects/init.c (revision 36d20dc55d5999f7e3262f63e358d6f8a5a9893c)
1 /*
2 
3    This file defines part of the initialization of PETSc
4 
5   This file uses regular malloc and free because it cannot know
6   what malloc is being used until it has already processed the input.
7 */
8 
9 #define _GNU_SOURCE
10 #include <sched.h>
11 #include <petscsys.h>        /*I  "petscsys.h"   I*/
12 #if defined(PETSC_USE_PTHREAD)
13 #include <pthread.h>
14 #endif
15 #if defined(PETSC_HAVE_SYS_SYSINFO_H)
16 #include <sys/sysinfo.h>
17 #endif
18 #include <unistd.h>
19 #if defined(PETSC_HAVE_STDLIB_H)
20 #include <stdlib.h>
21 #endif
22 #if defined(PETSC_HAVE_MALLOC_H)
23 #include <malloc.h>
24 #endif
25 #if defined(PETSC_HAVE_VALGRIND)
26 #include <valgrind/valgrind.h>
27 #endif
28 
29 /* ------------------------Nasty global variables -------------------------------*/
30 /*
31      Indicates if PETSc started up MPI, or it was
32    already started before PETSc was initialized.
33 */
34 PetscBool    PetscBeganMPI         = PETSC_FALSE;
35 PetscBool    PetscInitializeCalled = PETSC_FALSE;
36 PetscBool    PetscFinalizeCalled   = PETSC_FALSE;
37 PetscBool    PetscUseThreadPool    = PETSC_FALSE;
38 PetscBool    PetscThreadGo         = PETSC_TRUE;
39 PetscMPIInt  PetscGlobalRank = -1;
40 PetscMPIInt  PetscGlobalSize = -1;
41 
42 #if defined(PETSC_USE_PTHREAD_CLASSES)
43 PetscMPIInt  PetscMaxThreads = 2;
44 pthread_t*   PetscThreadPoint;
45 #define PETSC_HAVE_PTHREAD_BARRIER
46 #if defined(PETSC_HAVE_PTHREAD_BARRIER)
47 pthread_barrier_t* BarrPoint;   /* used by 'true' thread pool */
48 #endif
49 PetscErrorCode ithreaderr = 0;
50 int*         pVal;
51 
52 #define CACHE_LINE_SIZE 64  /* used by 'chain', 'main','tree' thread pools */
53 int* ThreadCoreAffinity;
54 
55 typedef enum {JobInitiated,ThreadsWorking,JobCompleted} estat;  /* used by 'chain','tree' thread pool */
56 
57 typedef struct {
58   pthread_mutex_t** mutexarray;
59   pthread_cond_t**  cond1array;
60   pthread_cond_t** cond2array;
61   void* (*pfunc)(void*);
62   void** pdata;
63   PetscBool startJob;
64   estat eJobStat;
65   PetscBool** arrThreadStarted;
66   PetscBool** arrThreadReady;
67 } sjob_tree;
68 sjob_tree job_tree;
69 typedef struct {
70   pthread_mutex_t** mutexarray;
71   pthread_cond_t**  cond1array;
72   pthread_cond_t** cond2array;
73   void* (*pfunc)(void*);
74   void** pdata;
75   PetscBool** arrThreadReady;
76 } sjob_main;
77 sjob_main job_main;
78 typedef struct {
79   pthread_mutex_t** mutexarray;
80   pthread_cond_t**  cond1array;
81   pthread_cond_t** cond2array;
82   void* (*pfunc)(void*);
83   void** pdata;
84   PetscBool startJob;
85   estat eJobStat;
86   PetscBool** arrThreadStarted;
87   PetscBool** arrThreadReady;
88 } sjob_chain;
89 sjob_chain job_chain;
90 #if defined(PETSC_HAVE_PTHREAD_BARRIER)
91 typedef struct {
92   pthread_mutex_t mutex;
93   pthread_cond_t cond;
94   void* (*pfunc)(void*);
95   void** pdata;
96   pthread_barrier_t* pbarr;
97   int iNumJobThreads;
98   int iNumReadyThreads;
99   PetscBool startJob;
100 } sjob_true;
101 sjob_true job_true = {PTHREAD_MUTEX_INITIALIZER,PTHREAD_COND_INITIALIZER,NULL,NULL,NULL,0,0,PETSC_FALSE};
102 #endif
103 
104 pthread_cond_t  main_cond  = PTHREAD_COND_INITIALIZER;  /* used by 'true', 'chain','tree' thread pools */
105 char* arrmutex; /* used by 'chain','main','tree' thread pools */
106 char* arrcond1; /* used by 'chain','main','tree' thread pools */
107 char* arrcond2; /* used by 'chain','main','tree' thread pools */
108 char* arrstart; /* used by 'chain','main','tree' thread pools */
109 char* arrready; /* used by 'chain','main','tree' thread pools */
110 
111 /* Function Pointers */
112 void*          (*PetscThreadFunc)(void*) = NULL;
113 void*          (*PetscThreadInitialize)(PetscInt) = NULL;
114 PetscErrorCode (*PetscThreadFinalize)(void) = NULL;
115 void           (*MainWait)(void) = NULL;
116 PetscErrorCode (*MainJob)(void* (*pFunc)(void*),void**,PetscInt) = NULL;
117 /**** Tree Thread Pool Functions ****/
118 void*          PetscThreadFunc_Tree(void*);
119 void*          PetscThreadInitialize_Tree(PetscInt);
120 PetscErrorCode PetscThreadFinalize_Tree(void);
121 void           MainWait_Tree(void);
122 PetscErrorCode MainJob_Tree(void* (*pFunc)(void*),void**,PetscInt);
123 /**** Main Thread Pool Functions ****/
124 void*          PetscThreadFunc_Main(void*);
125 void*          PetscThreadInitialize_Main(PetscInt);
126 PetscErrorCode PetscThreadFinalize_Main(void);
127 void           MainWait_Main(void);
128 PetscErrorCode MainJob_Main(void* (*pFunc)(void*),void**,PetscInt);
129 /**** Chain Thread Pool Functions ****/
130 void*          PetscThreadFunc_Chain(void*);
131 void*          PetscThreadInitialize_Chain(PetscInt);
132 PetscErrorCode PetscThreadFinalize_Chain(void);
133 void           MainWait_Chain(void);
134 PetscErrorCode MainJob_Chain(void* (*pFunc)(void*),void**,PetscInt);
135 /**** True Thread Pool Functions ****/
136 void*          PetscThreadFunc_True(void*);
137 void*          PetscThreadInitialize_True(PetscInt);
138 PetscErrorCode PetscThreadFinalize_True(void);
139 void           MainWait_True(void);
140 PetscErrorCode MainJob_True(void* (*pFunc)(void*),void**,PetscInt);
141 /**** NO Thread Pool Function  ****/
142 PetscErrorCode MainJob_Spawn(void* (*pFunc)(void*),void**,PetscInt);
143 /****  ****/
144 void* FuncFinish(void*);
145 void* PetscThreadRun(MPI_Comm Comm,void* (*pFunc)(void*),int,pthread_t*,void**);
146 void* PetscThreadStop(MPI_Comm Comm,int,pthread_t*);
147 #endif
148 
149 #if defined(PETSC_USE_COMPLEX)
150 #if defined(PETSC_COMPLEX_INSTANTIATE)
151 template <> class std::complex<double>; /* instantiate complex template class */
152 #endif
153 #if !defined(PETSC_HAVE_MPI_C_DOUBLE_COMPLEX)
154 MPI_Datatype   MPI_C_DOUBLE_COMPLEX;
155 MPI_Datatype   MPI_C_COMPLEX;
156 #endif
157 PetscScalar    PETSC_i;
158 #else
159 PetscScalar    PETSC_i = 0.0;
160 #endif
161 #if defined(PETSC_USE_REAL___FLOAT128)
162 MPI_Datatype   MPIU___FLOAT128 = 0;
163 #endif
164 MPI_Datatype   MPIU_2SCALAR = 0;
165 MPI_Datatype   MPIU_2INT = 0;
166 
167 /*
168      These are needed by petscbt.h
169 */
170 #include <petscbt.h>
171 char      _BT_mask = ' ';
172 char      _BT_c = ' ';
173 PetscInt  _BT_idx  = 0;
174 
175 /*
176        Function that is called to display all error messages
177 */
178 PetscErrorCode  (*PetscErrorPrintf)(const char [],...)          = PetscErrorPrintfDefault;
179 PetscErrorCode  (*PetscHelpPrintf)(MPI_Comm,const char [],...)  = PetscHelpPrintfDefault;
180 #if defined(PETSC_HAVE_MATLAB_ENGINE)
181 PetscErrorCode  (*PetscVFPrintf)(FILE*,const char[],va_list)    = PetscVFPrintf_Matlab;
182 #else
183 PetscErrorCode  (*PetscVFPrintf)(FILE*,const char[],va_list)    = PetscVFPrintfDefault;
184 #endif
185 /*
186   This is needed to turn on/off cusp synchronization */
187 PetscBool   synchronizeCUSP = PETSC_FALSE;
188 
189 /* ------------------------------------------------------------------------------*/
190 /*
191    Optional file where all PETSc output from various prints is saved
192 */
193 FILE *petsc_history = PETSC_NULL;
194 
195 #undef __FUNCT__
196 #define __FUNCT__ "PetscOpenHistoryFile"
197 PetscErrorCode  PetscOpenHistoryFile(const char filename[],FILE **fd)
198 {
199   PetscErrorCode ierr;
200   PetscMPIInt    rank,size;
201   char           pfile[PETSC_MAX_PATH_LEN],pname[PETSC_MAX_PATH_LEN],fname[PETSC_MAX_PATH_LEN],date[64];
202   char           version[256];
203 
204   PetscFunctionBegin;
205   ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr);
206   if (!rank) {
207     char        arch[10];
208     int         err;
209     PetscViewer viewer;
210 
211     ierr = PetscGetArchType(arch,10);CHKERRQ(ierr);
212     ierr = PetscGetDate(date,64);CHKERRQ(ierr);
213     ierr = PetscGetVersion(version,256);CHKERRQ(ierr);
214     ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRQ(ierr);
215     if (filename) {
216       ierr = PetscFixFilename(filename,fname);CHKERRQ(ierr);
217     } else {
218       ierr = PetscGetHomeDirectory(pfile,240);CHKERRQ(ierr);
219       ierr = PetscStrcat(pfile,"/.petschistory");CHKERRQ(ierr);
220       ierr = PetscFixFilename(pfile,fname);CHKERRQ(ierr);
221     }
222 
223     *fd = fopen(fname,"a"); if (!fd) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_FILE_OPEN,"Cannot open file: %s",fname);
224     ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"---------------------------------------------------------\n");CHKERRQ(ierr);
225     ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"%s %s\n",version,date);CHKERRQ(ierr);
226     ierr = PetscGetProgramName(pname,PETSC_MAX_PATH_LEN);CHKERRQ(ierr);
227     ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"%s on a %s, %d proc. with options:\n",pname,arch,size);CHKERRQ(ierr);
228     ierr = PetscViewerASCIIOpenWithFILE(PETSC_COMM_WORLD,*fd,&viewer);CHKERRQ(ierr);
229     ierr = PetscOptionsView(viewer);CHKERRQ(ierr);
230     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
231     ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"---------------------------------------------------------\n");CHKERRQ(ierr);
232     err = fflush(*fd);
233     if (err) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SYS,"fflush() failed on file");
234   }
235   PetscFunctionReturn(0);
236 }
237 
238 #undef __FUNCT__
239 #define __FUNCT__ "PetscCloseHistoryFile"
240 PetscErrorCode  PetscCloseHistoryFile(FILE **fd)
241 {
242   PetscErrorCode ierr;
243   PetscMPIInt    rank;
244   char           date[64];
245   int            err;
246 
247   PetscFunctionBegin;
248   ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr);
249   if (!rank) {
250     ierr = PetscGetDate(date,64);CHKERRQ(ierr);
251     ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"---------------------------------------------------------\n");CHKERRQ(ierr);
252     ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"Finished at %s\n",date);CHKERRQ(ierr);
253     ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"---------------------------------------------------------\n");CHKERRQ(ierr);
254     err = fflush(*fd);
255     if (err) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SYS,"fflush() failed on file");
256     err = fclose(*fd);
257     if (err) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SYS,"fclose() failed on file");
258   }
259   PetscFunctionReturn(0);
260 }
261 
262 /* ------------------------------------------------------------------------------*/
263 
264 /*
265    This is ugly and probably belongs somewhere else, but I want to
266   be able to put a true MPI abort error handler with command line args.
267 
268     This is so MPI errors in the debugger will leave all the stack
269   frames. The default MP_Abort() cleans up and exits thus providing no useful information
270   in the debugger hence we call abort() instead of MPI_Abort().
271 */
272 
273 #undef __FUNCT__
274 #define __FUNCT__ "Petsc_MPI_AbortOnError"
275 void Petsc_MPI_AbortOnError(MPI_Comm *comm,PetscMPIInt *flag)
276 {
277   PetscFunctionBegin;
278   (*PetscErrorPrintf)("MPI error %d\n",*flag);
279   abort();
280 }
281 
282 #undef __FUNCT__
283 #define __FUNCT__ "Petsc_MPI_DebuggerOnError"
284 void Petsc_MPI_DebuggerOnError(MPI_Comm *comm,PetscMPIInt *flag)
285 {
286   PetscErrorCode ierr;
287 
288   PetscFunctionBegin;
289   (*PetscErrorPrintf)("MPI error %d\n",*flag);
290   ierr = PetscAttachDebugger();
291   if (ierr) { /* hopeless so get out */
292     MPI_Abort(*comm,*flag);
293   }
294 }
295 
296 #undef __FUNCT__
297 #define __FUNCT__ "PetscEnd"
298 /*@C
299    PetscEnd - Calls PetscFinalize() and then ends the program. This is useful if one
300      wishes a clean exit somewhere deep in the program.
301 
302    Collective on PETSC_COMM_WORLD
303 
304    Options Database Keys are the same as for PetscFinalize()
305 
306    Level: advanced
307 
308    Note:
309    See PetscInitialize() for more general runtime options.
310 
311 .seealso: PetscInitialize(), PetscOptionsView(), PetscMallocDump(), PetscMPIDump(), PetscFinalize()
312 @*/
313 PetscErrorCode  PetscEnd(void)
314 {
315   PetscFunctionBegin;
316   PetscFinalize();
317   exit(0);
318   return 0;
319 }
320 
321 PetscBool    PetscOptionsPublish = PETSC_FALSE;
322 extern PetscErrorCode        PetscSetUseTrMalloc_Private(void);
323 extern PetscBool  petscsetmallocvisited;
324 static char       emacsmachinename[256];
325 
326 PetscErrorCode (*PetscExternalVersionFunction)(MPI_Comm) = 0;
327 PetscErrorCode (*PetscExternalHelpFunction)(MPI_Comm)    = 0;
328 
329 #undef __FUNCT__
330 #define __FUNCT__ "PetscSetHelpVersionFunctions"
331 /*@C
332    PetscSetHelpVersionFunctions - Sets functions that print help and version information
333    before the PETSc help and version information is printed. Must call BEFORE PetscInitialize().
334    This routine enables a "higher-level" package that uses PETSc to print its messages first.
335 
336    Input Parameter:
337 +  help - the help function (may be PETSC_NULL)
338 -  version - the version function (may be PETSC_NULL)
339 
340    Level: developer
341 
342    Concepts: package help message
343 
344 @*/
345 PetscErrorCode  PetscSetHelpVersionFunctions(PetscErrorCode (*help)(MPI_Comm),PetscErrorCode (*version)(MPI_Comm))
346 {
347   PetscFunctionBegin;
348   PetscExternalHelpFunction    = help;
349   PetscExternalVersionFunction = version;
350   PetscFunctionReturn(0);
351 }
352 
353 #undef __FUNCT__
354 #define __FUNCT__ "PetscOptionsCheckInitial_Private"
355 PetscErrorCode  PetscOptionsCheckInitial_Private(void)
356 {
357   char           string[64],mname[PETSC_MAX_PATH_LEN],*f;
358   MPI_Comm       comm = PETSC_COMM_WORLD;
359   PetscBool      flg1 = PETSC_FALSE,flg2 = PETSC_FALSE,flg3 = PETSC_FALSE,flg4 = PETSC_FALSE,flag,flgz,flgzout;
360   PetscErrorCode ierr;
361   PetscReal      si;
362   int            i;
363   PetscMPIInt    rank;
364   char           version[256];
365 
366   PetscFunctionBegin;
367   ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr);
368 
369   /*
370       Setup the memory management; support for tracing malloc() usage
371   */
372   ierr = PetscOptionsHasName(PETSC_NULL,"-malloc_log",&flg3);CHKERRQ(ierr);
373 #if defined(PETSC_USE_DEBUG) && !defined(PETSC_USE_PTHREAD)
374   ierr = PetscOptionsGetBool(PETSC_NULL,"-malloc",&flg1,&flg2);CHKERRQ(ierr);
375   if ((!flg2 || flg1) && !petscsetmallocvisited) {
376 #if defined(PETSC_HAVE_VALGRIND)
377     if (flg2 || !(RUNNING_ON_VALGRIND)) {
378       /* turn off default -malloc if valgrind is being used */
379 #endif
380       ierr = PetscSetUseTrMalloc_Private();CHKERRQ(ierr);
381 #if defined(PETSC_HAVE_VALGRIND)
382     }
383 #endif
384   }
385 #else
386   ierr = PetscOptionsGetBool(PETSC_NULL,"-malloc_dump",&flg1,PETSC_NULL);CHKERRQ(ierr);
387   ierr = PetscOptionsGetBool(PETSC_NULL,"-malloc",&flg2,PETSC_NULL);CHKERRQ(ierr);
388   if (flg1 || flg2 || flg3) {ierr = PetscSetUseTrMalloc_Private();CHKERRQ(ierr);}
389 #endif
390   if (flg3) {
391     ierr = PetscMallocSetDumpLog();CHKERRQ(ierr);
392   }
393   flg1 = PETSC_FALSE;
394   ierr = PetscOptionsGetBool(PETSC_NULL,"-malloc_debug",&flg1,PETSC_NULL);CHKERRQ(ierr);
395   if (flg1) {
396     ierr = PetscSetUseTrMalloc_Private();CHKERRQ(ierr);
397     ierr = PetscMallocDebug(PETSC_TRUE);CHKERRQ(ierr);
398   }
399 
400   flg1 = PETSC_FALSE;
401   ierr = PetscOptionsGetBool(PETSC_NULL,"-malloc_info",&flg1,PETSC_NULL);CHKERRQ(ierr);
402   if (!flg1) {
403     flg1 = PETSC_FALSE;
404     ierr = PetscOptionsGetBool(PETSC_NULL,"-memory_info",&flg1,PETSC_NULL);CHKERRQ(ierr);
405   }
406   if (flg1) {
407     ierr = PetscMemorySetGetMaximumUsage();CHKERRQ(ierr);
408   }
409 
410   /*
411       Set the display variable for graphics
412   */
413   ierr = PetscSetDisplay();CHKERRQ(ierr);
414 
415 
416   /*
417       Print the PETSc version information
418   */
419   ierr = PetscOptionsHasName(PETSC_NULL,"-v",&flg1);CHKERRQ(ierr);
420   ierr = PetscOptionsHasName(PETSC_NULL,"-version",&flg2);CHKERRQ(ierr);
421   ierr = PetscOptionsHasName(PETSC_NULL,"-help",&flg3);CHKERRQ(ierr);
422   if (flg1 || flg2 || flg3){
423 
424     /*
425        Print "higher-level" package version message
426     */
427     if (PetscExternalVersionFunction) {
428       ierr = (*PetscExternalVersionFunction)(comm);CHKERRQ(ierr);
429     }
430 
431     ierr = PetscGetVersion(version,256);CHKERRQ(ierr);
432     ierr = (*PetscHelpPrintf)(comm,"--------------------------------------------\
433 ------------------------------\n");CHKERRQ(ierr);
434     ierr = (*PetscHelpPrintf)(comm,"%s\n",version);CHKERRQ(ierr);
435     ierr = (*PetscHelpPrintf)(comm,"%s",PETSC_AUTHOR_INFO);CHKERRQ(ierr);
436     ierr = (*PetscHelpPrintf)(comm,"See docs/changes/index.html for recent updates.\n");CHKERRQ(ierr);
437     ierr = (*PetscHelpPrintf)(comm,"See docs/faq.html for problems.\n");CHKERRQ(ierr);
438     ierr = (*PetscHelpPrintf)(comm,"See docs/manualpages/index.html for help. \n");CHKERRQ(ierr);
439     ierr = (*PetscHelpPrintf)(comm,"Libraries linked from %s\n",PETSC_LIB_DIR);CHKERRQ(ierr);
440     ierr = (*PetscHelpPrintf)(comm,"--------------------------------------------\
441 ------------------------------\n");CHKERRQ(ierr);
442   }
443 
444   /*
445        Print "higher-level" package help message
446   */
447   if (flg3){
448     if (PetscExternalHelpFunction) {
449       ierr = (*PetscExternalHelpFunction)(comm);CHKERRQ(ierr);
450     }
451   }
452 
453   /*
454       Setup the error handling
455   */
456   flg1 = PETSC_FALSE;
457   ierr = PetscOptionsGetBool(PETSC_NULL,"-on_error_abort",&flg1,PETSC_NULL);CHKERRQ(ierr);
458   if (flg1) { ierr = PetscPushErrorHandler(PetscAbortErrorHandler,0);CHKERRQ(ierr);}
459   flg1 = PETSC_FALSE;
460   ierr = PetscOptionsGetBool(PETSC_NULL,"-on_error_mpiabort",&flg1,PETSC_NULL);CHKERRQ(ierr);
461   if (flg1) { ierr = PetscPushErrorHandler(PetscMPIAbortErrorHandler,0);CHKERRQ(ierr);}
462   flg1 = PETSC_FALSE;
463   ierr = PetscOptionsGetBool(PETSC_NULL,"-mpi_return_on_error",&flg1,PETSC_NULL);CHKERRQ(ierr);
464   if (flg1) {
465     ierr = MPI_Errhandler_set(comm,MPI_ERRORS_RETURN);CHKERRQ(ierr);
466   }
467   flg1 = PETSC_FALSE;
468   ierr = PetscOptionsGetBool(PETSC_NULL,"-no_signal_handler",&flg1,PETSC_NULL);CHKERRQ(ierr);
469   if (!flg1) {ierr = PetscPushSignalHandler(PetscDefaultSignalHandler,(void*)0);CHKERRQ(ierr);}
470   flg1 = PETSC_FALSE;
471   ierr = PetscOptionsGetBool(PETSC_NULL,"-fp_trap",&flg1,PETSC_NULL);CHKERRQ(ierr);
472   if (flg1) {ierr = PetscSetFPTrap(PETSC_FP_TRAP_ON);CHKERRQ(ierr);}
473 
474   /*
475       Setup debugger information
476   */
477   ierr = PetscSetDefaultDebugger();CHKERRQ(ierr);
478   ierr = PetscOptionsGetString(PETSC_NULL,"-on_error_attach_debugger",string,64,&flg1);CHKERRQ(ierr);
479   if (flg1) {
480     MPI_Errhandler err_handler;
481 
482     ierr = PetscSetDebuggerFromString(string);CHKERRQ(ierr);
483     ierr = MPI_Errhandler_create((MPI_Handler_function*)Petsc_MPI_DebuggerOnError,&err_handler);CHKERRQ(ierr);
484     ierr = MPI_Errhandler_set(comm,err_handler);CHKERRQ(ierr);
485     ierr = PetscPushErrorHandler(PetscAttachDebuggerErrorHandler,0);CHKERRQ(ierr);
486   }
487   ierr = PetscOptionsGetString(PETSC_NULL,"-debug_terminal",string,64,&flg1);CHKERRQ(ierr);
488   if (flg1) { ierr = PetscSetDebugTerminal(string);CHKERRQ(ierr); }
489   ierr = PetscOptionsGetString(PETSC_NULL,"-start_in_debugger",string,64,&flg1);CHKERRQ(ierr);
490   ierr = PetscOptionsGetString(PETSC_NULL,"-stop_for_debugger",string,64,&flg2);CHKERRQ(ierr);
491   if (flg1 || flg2) {
492     PetscMPIInt    size;
493     PetscInt       lsize,*nodes;
494     MPI_Errhandler err_handler;
495     /*
496        we have to make sure that all processors have opened
497        connections to all other processors, otherwise once the
498        debugger has stated it is likely to receive a SIGUSR1
499        and kill the program.
500     */
501     ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRQ(ierr);
502     if (size > 2) {
503       PetscMPIInt dummy = 0;
504       MPI_Status  status;
505       for (i=0; i<size; i++) {
506         if (rank != i) {
507           ierr = MPI_Send(&dummy,1,MPI_INT,i,109,PETSC_COMM_WORLD);CHKERRQ(ierr);
508         }
509       }
510       for (i=0; i<size; i++) {
511         if (rank != i) {
512           ierr = MPI_Recv(&dummy,1,MPI_INT,i,109,PETSC_COMM_WORLD,&status);CHKERRQ(ierr);
513         }
514       }
515     }
516     /* check if this processor node should be in debugger */
517     ierr  = PetscMalloc(size*sizeof(PetscInt),&nodes);CHKERRQ(ierr);
518     lsize = size;
519     ierr  = PetscOptionsGetIntArray(PETSC_NULL,"-debugger_nodes",nodes,&lsize,&flag);CHKERRQ(ierr);
520     if (flag) {
521       for (i=0; i<lsize; i++) {
522         if (nodes[i] == rank) { flag = PETSC_FALSE; break; }
523       }
524     }
525     if (!flag) {
526       ierr = PetscSetDebuggerFromString(string);CHKERRQ(ierr);
527       ierr = PetscPushErrorHandler(PetscAbortErrorHandler,0);CHKERRQ(ierr);
528       if (flg1) {
529         ierr = PetscAttachDebugger();CHKERRQ(ierr);
530       } else {
531         ierr = PetscStopForDebugger();CHKERRQ(ierr);
532       }
533       ierr = MPI_Errhandler_create((MPI_Handler_function*)Petsc_MPI_AbortOnError,&err_handler);CHKERRQ(ierr);
534       ierr = MPI_Errhandler_set(comm,err_handler);CHKERRQ(ierr);
535     }
536     ierr = PetscFree(nodes);CHKERRQ(ierr);
537   }
538 
539   ierr = PetscOptionsGetString(PETSC_NULL,"-on_error_emacs",emacsmachinename,128,&flg1);CHKERRQ(ierr);
540   if (flg1 && !rank) {ierr = PetscPushErrorHandler(PetscEmacsClientErrorHandler,emacsmachinename);CHKERRQ(ierr);}
541 
542 #if defined(PETSC_USE_SOCKET_VIEWER)
543   /*
544     Activates new sockets for zope if needed
545   */
546   ierr = PetscOptionsHasName(PETSC_NULL,"-zope", &flgz);CHKERRQ(ierr);
547   ierr = PetscOptionsHasName(PETSC_NULL,"-nostdout", &flgzout);CHKERRQ(ierr);
548   if (flgz){
549     int  sockfd;
550     char hostname[256];
551     char username[256];
552     int  remoteport = 9999;
553 
554     ierr = PetscOptionsGetString(PETSC_NULL, "-zope", hostname, 256, &flgz);CHKERRQ(ierr);
555     if (!hostname[0]){
556       ierr = PetscGetHostName(hostname,256);CHKERRQ(ierr);
557     }
558     ierr = PetscOpenSocket(hostname, remoteport, &sockfd);CHKERRQ(ierr);
559     ierr = PetscGetUserName(username, 256);CHKERRQ(ierr);
560     PETSC_ZOPEFD = fdopen(sockfd, "w");
561     if (flgzout){
562       PETSC_STDOUT = PETSC_ZOPEFD;
563       fprintf(PETSC_STDOUT, "<<<user>>> %s\n",username);
564       fprintf(PETSC_STDOUT, "<<<start>>>");
565     } else {
566       fprintf(PETSC_ZOPEFD, "<<<user>>> %s\n",username);
567       fprintf(PETSC_ZOPEFD, "<<<start>>>");
568     }
569   }
570 #endif
571 #if defined(PETSC_USE_SERVER)
572   ierr = PetscOptionsHasName(PETSC_NULL,"-server", &flgz);CHKERRQ(ierr);
573   if (flgz){
574     PetscInt port = PETSC_DECIDE;
575     ierr = PetscOptionsGetInt(PETSC_NULL,"-server",&port,PETSC_NULL);CHKERRQ(ierr);
576     ierr = PetscWebServe(PETSC_COMM_WORLD,(int)port);CHKERRQ(ierr);
577   }
578 #endif
579 
580   /*
581         Setup profiling and logging
582   */
583 #if defined (PETSC_USE_INFO)
584   {
585     char logname[PETSC_MAX_PATH_LEN]; logname[0] = 0;
586     ierr = PetscOptionsGetString(PETSC_NULL,"-info",logname,250,&flg1);CHKERRQ(ierr);
587     if (flg1 && logname[0]) {
588       ierr = PetscInfoAllow(PETSC_TRUE,logname);CHKERRQ(ierr);
589     } else if (flg1) {
590       ierr = PetscInfoAllow(PETSC_TRUE,PETSC_NULL);CHKERRQ(ierr);
591     }
592   }
593 #endif
594 #if defined(PETSC_USE_LOG)
595   mname[0] = 0;
596   ierr = PetscOptionsGetString(PETSC_NULL,"-history",mname,PETSC_MAX_PATH_LEN,&flg1);CHKERRQ(ierr);
597   if (flg1) {
598     if (mname[0]) {
599       ierr = PetscOpenHistoryFile(mname,&petsc_history);CHKERRQ(ierr);
600     } else {
601       ierr = PetscOpenHistoryFile(0,&petsc_history);CHKERRQ(ierr);
602     }
603   }
604 #if defined(PETSC_HAVE_MPE)
605   flg1 = PETSC_FALSE;
606   ierr = PetscOptionsHasName(PETSC_NULL,"-log_mpe",&flg1);CHKERRQ(ierr);
607   if (flg1) PetscLogMPEBegin();
608 #endif
609   flg1 = PETSC_FALSE;
610   flg2 = PETSC_FALSE;
611   flg3 = PETSC_FALSE;
612   ierr = PetscOptionsGetBool(PETSC_NULL,"-log_all",&flg1,PETSC_NULL);CHKERRQ(ierr);
613   ierr = PetscOptionsGetBool(PETSC_NULL,"-log",&flg2,PETSC_NULL);CHKERRQ(ierr);
614   ierr = PetscOptionsHasName(PETSC_NULL,"-log_summary",&flg3);CHKERRQ(ierr);
615   ierr = PetscOptionsHasName(PETSC_NULL,"-log_summary_python",&flg4);CHKERRQ(ierr);
616   if (flg1)                      {  ierr = PetscLogAllBegin();CHKERRQ(ierr); }
617   else if (flg2 || flg3 || flg4) {  ierr = PetscLogBegin();CHKERRQ(ierr);}
618 
619   ierr = PetscOptionsGetString(PETSC_NULL,"-log_trace",mname,250,&flg1);CHKERRQ(ierr);
620   if (flg1) {
621     char name[PETSC_MAX_PATH_LEN],fname[PETSC_MAX_PATH_LEN];
622     FILE *file;
623     if (mname[0]) {
624       sprintf(name,"%s.%d",mname,rank);
625       ierr = PetscFixFilename(name,fname);CHKERRQ(ierr);
626       file = fopen(fname,"w");
627       if (!file) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_FILE_OPEN,"Unable to open trace file: %s",fname);
628     } else {
629       file = PETSC_STDOUT;
630     }
631     ierr = PetscLogTraceBegin(file);CHKERRQ(ierr);
632   }
633 #endif
634 
635   /*
636       Setup building of stack frames for all function calls
637   */
638 #if defined(PETSC_USE_DEBUG) && !defined(PETSC_USE_PTHREAD)
639   ierr = PetscStackCreate();CHKERRQ(ierr);
640 #endif
641 
642   ierr = PetscOptionsGetBool(PETSC_NULL,"-options_gui",&PetscOptionsPublish,PETSC_NULL);CHKERRQ(ierr);
643 
644 #if defined(PETSC_USE_PTHREAD_CLASSES)
645   /*
646       Determine whether user specified maximum number of threads
647    */
648   ierr = PetscOptionsGetInt(PETSC_NULL,"-thread_max",&PetscMaxThreads,PETSC_NULL);CHKERRQ(ierr);
649 
650   ierr = PetscOptionsHasName(PETSC_NULL,"-main",&flg1);CHKERRQ(ierr);
651   if(flg1) {
652     cpu_set_t mset;
653     int icorr,ncorr = get_nprocs();
654     ierr = PetscOptionsGetInt(PETSC_NULL,"-main",&icorr,PETSC_NULL);CHKERRQ(ierr);
655     CPU_ZERO(&mset);
656     CPU_SET(icorr%ncorr,&mset);
657     sched_setaffinity(0,sizeof(cpu_set_t),&mset);
658   }
659 
660   /*
661       Determine whether to use thread pool
662    */
663   ierr = PetscOptionsHasName(PETSC_NULL,"-use_thread_pool",&flg1);CHKERRQ(ierr);
664   if (flg1) {
665     PetscUseThreadPool = PETSC_TRUE;
666     PetscInt N_CORES = get_nprocs();
667     ThreadCoreAffinity = (int*)malloc(N_CORES*sizeof(int));
668     char tstr[9];
669     char tbuf[2];
670     strcpy(tstr,"-thread");
671     for(i=0;i<PetscMaxThreads;i++) {
672       ThreadCoreAffinity[i] = i;
673       sprintf(tbuf,"%d",i);
674       strcat(tstr,tbuf);
675       ierr = PetscOptionsHasName(PETSC_NULL,tstr,&flg1);CHKERRQ(ierr);
676       if(flg1) {
677         ierr = PetscOptionsGetInt(PETSC_NULL,tstr,&ThreadCoreAffinity[i],PETSC_NULL);CHKERRQ(ierr);
678         ThreadCoreAffinity[i] = ThreadCoreAffinity[i]%N_CORES; /* check on the user */
679       }
680       tstr[7] = '\0';
681     }
682     /* get the thread pool type */
683     PetscInt ipool = 0;
684     const char *choices[4] = {"true","tree","main","chain"};
685 
686     ierr = PetscOptionsGetEList(PETSC_NULL,"-use_thread_pool",choices,4,&ipool,PETSC_NULL);CHKERRQ(ierr);
687     switch(ipool) {
688     case 1:
689       PetscThreadFunc       = &PetscThreadFunc_Tree;
690       PetscThreadInitialize = &PetscThreadInitialize_Tree;
691       PetscThreadFinalize   = &PetscThreadFinalize_Tree;
692       MainWait              = &MainWait_Tree;
693       MainJob               = &MainJob_Tree;
694       PetscInfo(PETSC_NULL,"Using tree thread pool\n");
695       break;
696     case 2:
697       PetscThreadFunc       = &PetscThreadFunc_Main;
698       PetscThreadInitialize = &PetscThreadInitialize_Main;
699       PetscThreadFinalize   = &PetscThreadFinalize_Main;
700       MainWait              = &MainWait_Main;
701       MainJob               = &MainJob_Main;
702       PetscInfo(PETSC_NULL,"Using main thread pool\n");
703       break;
704 #if defined(PETSC_HAVE_PTHREAD_BARRIER)
705     case 3:
706 #else
707     default:
708 #endif
709       PetscThreadFunc       = &PetscThreadFunc_Chain;
710       PetscThreadInitialize = &PetscThreadInitialize_Chain;
711       PetscThreadFinalize   = &PetscThreadFinalize_Chain;
712       MainWait              = &MainWait_Chain;
713       MainJob               = &MainJob_Chain;
714       PetscInfo(PETSC_NULL,"Using chain thread pool\n");
715       break;
716 #if defined(PETSC_HAVE_PTHREAD_BARRIER)
717     default:
718       PetscThreadFunc       = &PetscThreadFunc_True;
719       PetscThreadInitialize = &PetscThreadInitialize_True;
720       PetscThreadFinalize   = &PetscThreadFinalize_True;
721       MainWait              = &MainWait_True;
722       MainJob               = &MainJob_True;
723       PetscInfo(PETSC_NULL,"Using true thread pool\n");
724       break;
725 #endif
726     }
727     PetscThreadInitialize(PetscMaxThreads);
728   } else {
729     //need to define these in the case on 'no threads' or 'thread create/destroy'
730     //could take any of the above versions
731     MainJob               = &MainJob_Spawn;
732   }
733 #endif
734   /*
735        Print basic help message
736   */
737   ierr = PetscOptionsHasName(PETSC_NULL,"-help",&flg1);CHKERRQ(ierr);
738   if (flg1) {
739     ierr = (*PetscHelpPrintf)(comm,"Options for all PETSc programs:\n");CHKERRQ(ierr);
740     ierr = (*PetscHelpPrintf)(comm," -help: prints help method for each option\n");CHKERRQ(ierr);
741     ierr = (*PetscHelpPrintf)(comm," -on_error_abort: cause an abort when an error is detected. Useful \n ");CHKERRQ(ierr);
742     ierr = (*PetscHelpPrintf)(comm,"       only when run in the debugger\n");CHKERRQ(ierr);
743     ierr = (*PetscHelpPrintf)(comm," -on_error_attach_debugger [gdb,dbx,xxgdb,ups,noxterm]\n");CHKERRQ(ierr);
744     ierr = (*PetscHelpPrintf)(comm,"       start the debugger in new xterm\n");CHKERRQ(ierr);
745     ierr = (*PetscHelpPrintf)(comm,"       unless noxterm is given\n");CHKERRQ(ierr);
746     ierr = (*PetscHelpPrintf)(comm," -start_in_debugger [gdb,dbx,xxgdb,ups,noxterm]\n");CHKERRQ(ierr);
747     ierr = (*PetscHelpPrintf)(comm,"       start all processes in the debugger\n");CHKERRQ(ierr);
748     ierr = (*PetscHelpPrintf)(comm," -on_error_emacs <machinename>\n");CHKERRQ(ierr);
749     ierr = (*PetscHelpPrintf)(comm,"    emacs jumps to error file\n");CHKERRQ(ierr);
750     ierr = (*PetscHelpPrintf)(comm," -debugger_nodes [n1,n2,..] Nodes to start in debugger\n");CHKERRQ(ierr);
751     ierr = (*PetscHelpPrintf)(comm," -debugger_pause [m] : delay (in seconds) to attach debugger\n");CHKERRQ(ierr);
752     ierr = (*PetscHelpPrintf)(comm," -stop_for_debugger : prints message on how to attach debugger manually\n");CHKERRQ(ierr);
753     ierr = (*PetscHelpPrintf)(comm,"                      waits the delay for you to attach\n");CHKERRQ(ierr);
754     ierr = (*PetscHelpPrintf)(comm," -display display: Location where graphics and debuggers are displayed\n");CHKERRQ(ierr);
755     ierr = (*PetscHelpPrintf)(comm," -no_signal_handler: do not trap error signals\n");CHKERRQ(ierr);
756     ierr = (*PetscHelpPrintf)(comm," -mpi_return_on_error: MPI returns error code, rather than abort on internal error\n");CHKERRQ(ierr);
757     ierr = (*PetscHelpPrintf)(comm," -fp_trap: stop on floating point exceptions\n");CHKERRQ(ierr);
758     ierr = (*PetscHelpPrintf)(comm,"           note on IBM RS6000 this slows run greatly\n");CHKERRQ(ierr);
759     ierr = (*PetscHelpPrintf)(comm," -malloc_dump <optional filename>: dump list of unfreed memory at conclusion\n");CHKERRQ(ierr);
760     ierr = (*PetscHelpPrintf)(comm," -malloc: use our error checking malloc\n");CHKERRQ(ierr);
761     ierr = (*PetscHelpPrintf)(comm," -malloc no: don't use error checking malloc\n");CHKERRQ(ierr);
762     ierr = (*PetscHelpPrintf)(comm," -malloc_info: prints total memory usage\n");CHKERRQ(ierr);
763     ierr = (*PetscHelpPrintf)(comm," -malloc_log: keeps log of all memory allocations\n");CHKERRQ(ierr);
764     ierr = (*PetscHelpPrintf)(comm," -malloc_debug: enables extended checking for memory corruption\n");CHKERRQ(ierr);
765     ierr = (*PetscHelpPrintf)(comm," -options_table: dump list of options inputted\n");CHKERRQ(ierr);
766     ierr = (*PetscHelpPrintf)(comm," -options_left: dump list of unused options\n");CHKERRQ(ierr);
767     ierr = (*PetscHelpPrintf)(comm," -options_left no: don't dump list of unused options\n");CHKERRQ(ierr);
768     ierr = (*PetscHelpPrintf)(comm," -tmp tmpdir: alternative /tmp directory\n");CHKERRQ(ierr);
769     ierr = (*PetscHelpPrintf)(comm," -shared_tmp: tmp directory is shared by all processors\n");CHKERRQ(ierr);
770     ierr = (*PetscHelpPrintf)(comm," -not_shared_tmp: each processor has separate tmp directory\n");CHKERRQ(ierr);
771     ierr = (*PetscHelpPrintf)(comm," -memory_info: print memory usage at end of run\n");CHKERRQ(ierr);
772     ierr = (*PetscHelpPrintf)(comm," -server <port>: Run PETSc webserver (default port is 8080) see PetscWebServe()\n");CHKERRQ(ierr);
773 #if defined(PETSC_USE_LOG)
774     ierr = (*PetscHelpPrintf)(comm," -get_total_flops: total flops over all processors\n");CHKERRQ(ierr);
775     ierr = (*PetscHelpPrintf)(comm," -log[_all _summary _summary_python]: logging objects and events\n");CHKERRQ(ierr);
776     ierr = (*PetscHelpPrintf)(comm," -log_trace [filename]: prints trace of all PETSc calls\n");CHKERRQ(ierr);
777 #if defined(PETSC_HAVE_MPE)
778     ierr = (*PetscHelpPrintf)(comm," -log_mpe: Also create logfile viewable through upshot\n");CHKERRQ(ierr);
779 #endif
780     ierr = (*PetscHelpPrintf)(comm," -info <optional filename>: print informative messages about the calculations\n");CHKERRQ(ierr);
781 #endif
782     ierr = (*PetscHelpPrintf)(comm," -v: prints PETSc version number and release date\n");CHKERRQ(ierr);
783     ierr = (*PetscHelpPrintf)(comm," -options_file <file>: reads options from file\n");CHKERRQ(ierr);
784     ierr = (*PetscHelpPrintf)(comm," -petsc_sleep n: sleeps n seconds before running program\n");CHKERRQ(ierr);
785     ierr = (*PetscHelpPrintf)(comm,"-----------------------------------------------\n");CHKERRQ(ierr);
786   }
787 
788   ierr = PetscOptionsGetReal(PETSC_NULL,"-petsc_sleep",&si,&flg1);CHKERRQ(ierr);
789   if (flg1) {
790     ierr = PetscSleep(si);CHKERRQ(ierr);
791   }
792 
793   ierr = PetscOptionsGetString(PETSC_NULL,"-info_exclude",mname,PETSC_MAX_PATH_LEN,&flg1);CHKERRQ(ierr);
794   ierr = PetscStrstr(mname,"null",&f);CHKERRQ(ierr);
795   if (f) {
796     ierr = PetscInfoDeactivateClass(PETSC_NULL);CHKERRQ(ierr);
797   }
798 
799 #if defined(PETSC_HAVE_CUSP)
800   ierr = PetscOptionsHasName(PETSC_NULL,"-log_summary",&flg3);CHKERRQ(ierr);
801   if (flg3) flg1 = PETSC_TRUE;
802   else flg1 = PETSC_FALSE;
803   ierr = PetscOptionsGetBool(PETSC_NULL,"-cusp_synchronize",&flg1,PETSC_NULL);CHKERRQ(ierr);
804   if (flg1) synchronizeCUSP = PETSC_TRUE;
805 #endif
806 
807   PetscFunctionReturn(0);
808 }
809 
810 #if defined(PETSC_USE_PTHREAD_CLASSES)
811 
812 /**** 'Tree' Thread Pool Functions ****/
813 void* PetscThreadFunc_Tree(void* arg) {
814   PetscErrorCode iterr;
815   int icorr,ierr;
816   int* pId = (int*)arg;
817   int ThreadId = *pId,Mary = 2,i,SubWorker;
818   PetscBool PeeOn;
819   cpu_set_t mset;
820   //printf("Thread %d In Tree Thread Function\n",ThreadId);
821   icorr = ThreadCoreAffinity[ThreadId];
822   CPU_ZERO(&mset);
823   CPU_SET(icorr,&mset);
824   sched_setaffinity(0,sizeof(cpu_set_t),&mset);
825 
826   if((Mary*ThreadId+1)>(PetscMaxThreads-1)) {
827     PeeOn = PETSC_TRUE;
828   }
829   else {
830     PeeOn = PETSC_FALSE;
831   }
832   if(PeeOn==PETSC_FALSE) {
833     /* check your subordinates, wait for them to be ready */
834     for(i=1;i<=Mary;i++) {
835       SubWorker = Mary*ThreadId+i;
836       if(SubWorker<PetscMaxThreads) {
837         ierr = pthread_mutex_lock(job_tree.mutexarray[SubWorker]);
838         while(*(job_tree.arrThreadReady[SubWorker])==PETSC_FALSE) {
839           /* upon entry, automically releases the lock and blocks
840            upon return, has the lock */
841           ierr = pthread_cond_wait(job_tree.cond1array[SubWorker],job_tree.mutexarray[SubWorker]);
842         }
843         ierr = pthread_mutex_unlock(job_tree.mutexarray[SubWorker]);
844       }
845     }
846     /* your subordinates are now ready */
847   }
848   ierr = pthread_mutex_lock(job_tree.mutexarray[ThreadId]);
849   /* update your ready status */
850   *(job_tree.arrThreadReady[ThreadId]) = PETSC_TRUE;
851   if(ThreadId==0) {
852     job_tree.eJobStat = JobCompleted;
853     /* ignal main */
854     ierr = pthread_cond_signal(&main_cond);
855   }
856   else {
857     /* tell your boss that you're ready to work */
858     ierr = pthread_cond_signal(job_tree.cond1array[ThreadId]);
859   }
860   /* the while loop needs to have an exit
861   the 'main' thread can terminate all the threads by performing a broadcast
862    and calling FuncFinish */
863   while(PetscThreadGo) {
864     /*need to check the condition to ensure we don't have to wait
865       waiting when you don't have to causes problems
866      also need to check the condition to ensure proper handling of spurious wakeups */
867     while(*(job_tree.arrThreadReady[ThreadId])==PETSC_TRUE) {
868       /* upon entry, automically releases the lock and blocks
869        upon return, has the lock */
870         ierr = pthread_cond_wait(job_tree.cond2array[ThreadId],job_tree.mutexarray[ThreadId]);
871 	*(job_tree.arrThreadStarted[ThreadId]) = PETSC_TRUE;
872 	*(job_tree.arrThreadReady[ThreadId])   = PETSC_FALSE;
873     }
874     if(ThreadId==0) {
875       job_tree.startJob = PETSC_FALSE;
876       job_tree.eJobStat = ThreadsWorking;
877     }
878     ierr = pthread_mutex_unlock(job_tree.mutexarray[ThreadId]);
879     if(PeeOn==PETSC_FALSE) {
880       /* tell your subordinates it's time to get to work */
881       for(i=1; i<=Mary; i++) {
882 	SubWorker = Mary*ThreadId+i;
883         if(SubWorker<PetscMaxThreads) {
884           ierr = pthread_cond_signal(job_tree.cond2array[SubWorker]);
885         }
886       }
887     }
888     /* do your job */
889     if(job_tree.pdata==NULL) {
890       iterr = (PetscErrorCode)(long int)job_tree.pfunc(job_tree.pdata);
891     }
892     else {
893       iterr = (PetscErrorCode)(long int)job_tree.pfunc(job_tree.pdata[ThreadId]);
894     }
895     if(iterr!=0) {
896       ithreaderr = 1;
897     }
898     if(PetscThreadGo) {
899       /* reset job, get ready for more */
900       if(PeeOn==PETSC_FALSE) {
901         /* check your subordinates, waiting for them to be ready
902          how do you know for a fact that a given subordinate has actually started? */
903 	for(i=1;i<=Mary;i++) {
904 	  SubWorker = Mary*ThreadId+i;
905           if(SubWorker<PetscMaxThreads) {
906             ierr = pthread_mutex_lock(job_tree.mutexarray[SubWorker]);
907             while(*(job_tree.arrThreadReady[SubWorker])==PETSC_FALSE||*(job_tree.arrThreadStarted[SubWorker])==PETSC_FALSE) {
908               /* upon entry, automically releases the lock and blocks
909                upon return, has the lock */
910               ierr = pthread_cond_wait(job_tree.cond1array[SubWorker],job_tree.mutexarray[SubWorker]);
911             }
912             ierr = pthread_mutex_unlock(job_tree.mutexarray[SubWorker]);
913           }
914 	}
915         /* your subordinates are now ready */
916       }
917       ierr = pthread_mutex_lock(job_tree.mutexarray[ThreadId]);
918       *(job_tree.arrThreadReady[ThreadId]) = PETSC_TRUE;
919       if(ThreadId==0) {
920 	job_tree.eJobStat = JobCompleted; /* oot thread: last thread to complete, guaranteed! */
921         /* root thread signals 'main' */
922         ierr = pthread_cond_signal(&main_cond);
923       }
924       else {
925         /* signal your boss before you go to sleep */
926         ierr = pthread_cond_signal(job_tree.cond1array[ThreadId]);
927       }
928     }
929   }
930   return NULL;
931 }
932 
933 #undef __FUNCT__
934 #define __FUNCT__ "PetscThreadInitialize_Tree"
935 void* PetscThreadInitialize_Tree(PetscInt N) {
936   PetscInt i,ierr;
937   int status;
938 
939   if(PetscUseThreadPool) {
940     size_t Val1 = (size_t)CACHE_LINE_SIZE;
941     size_t Val2 = (size_t)PetscMaxThreads*CACHE_LINE_SIZE;
942     arrmutex = (char*)memalign(Val1,Val2);
943     arrcond1 = (char*)memalign(Val1,Val2);
944     arrcond2 = (char*)memalign(Val1,Val2);
945     arrstart = (char*)memalign(Val1,Val2);
946     arrready = (char*)memalign(Val1,Val2);
947     job_tree.mutexarray       = (pthread_mutex_t**)malloc(PetscMaxThreads*sizeof(pthread_mutex_t*));
948     job_tree.cond1array       = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*));
949     job_tree.cond2array       = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*));
950     job_tree.arrThreadStarted = (PetscBool**)malloc(PetscMaxThreads*sizeof(PetscBool*));
951     job_tree.arrThreadReady   = (PetscBool**)malloc(PetscMaxThreads*sizeof(PetscBool*));
952     /* initialize job structure */
953     for(i=0; i<PetscMaxThreads; i++) {
954       job_tree.mutexarray[i]        = (pthread_mutex_t*)(arrmutex+CACHE_LINE_SIZE*i);
955       job_tree.cond1array[i]        = (pthread_cond_t*)(arrcond1+CACHE_LINE_SIZE*i);
956       job_tree.cond2array[i]        = (pthread_cond_t*)(arrcond2+CACHE_LINE_SIZE*i);
957       job_tree.arrThreadStarted[i]  = (PetscBool*)(arrstart+CACHE_LINE_SIZE*i);
958       job_tree.arrThreadReady[i]    = (PetscBool*)(arrready+CACHE_LINE_SIZE*i);
959     }
960     for(i=0; i<PetscMaxThreads; i++) {
961       ierr = pthread_mutex_init(job_tree.mutexarray[i],NULL);
962       ierr = pthread_cond_init(job_tree.cond1array[i],NULL);
963       ierr = pthread_cond_init(job_tree.cond2array[i],NULL);
964       *(job_tree.arrThreadStarted[i])  = PETSC_FALSE;
965       *(job_tree.arrThreadReady[i])    = PETSC_FALSE;
966     }
967     job_tree.pfunc = NULL;
968     job_tree.pdata = (void**)malloc(N*sizeof(void*));
969     job_tree.startJob = PETSC_FALSE;
970     job_tree.eJobStat = JobInitiated;
971     pVal = (int*)malloc(N*sizeof(int));
972     /* allocate memory in the heap for the thread structure */
973     PetscThreadPoint = (pthread_t*)malloc(N*sizeof(pthread_t));
974     /* create threads */
975     for(i=0; i<N; i++) {
976       pVal[i] = i;
977       status = pthread_create(&PetscThreadPoint[i],NULL,PetscThreadFunc,&pVal[i]);
978       /* should check status */
979     }
980   }
981   return NULL;
982 }
983 
984 #undef __FUNCT__
985 #define __FUNCT__ "PetscThreadFinalize_Tree"
986 PetscErrorCode PetscThreadFinalize_Tree() {
987   int i,ierr;
988   void* jstatus;
989 
990   PetscFunctionBegin;
991 
992   if(PetscUseThreadPool) {
993     MainJob(FuncFinish,NULL,PetscMaxThreads);  /* set up job and broadcast work */
994     /* join the threads */
995     for(i=0; i<PetscMaxThreads; i++) {
996       ierr = pthread_join(PetscThreadPoint[i],&jstatus);
997       /* do error checking*/
998     }
999     free(PetscThreadPoint);
1000     free(arrmutex);
1001     free(arrcond1);
1002     free(arrcond2);
1003     free(arrstart);
1004     free(arrready);
1005     free(job_tree.pdata);
1006     free(pVal);
1007   }
1008   else {
1009   }
1010   PetscFunctionReturn(0);
1011 }
1012 
1013 #undef __FUNCT__
1014 #define __FUNCT__ "MainWait_Tree"
1015 void MainWait_Tree() {
1016   int ierr;
1017   ierr = pthread_mutex_lock(job_tree.mutexarray[0]);
1018   while(job_tree.eJobStat<JobCompleted||job_tree.startJob==PETSC_TRUE) {
1019     ierr = pthread_cond_wait(&main_cond,job_tree.mutexarray[0]);
1020   }
1021   ierr = pthread_mutex_unlock(job_tree.mutexarray[0]);
1022 }
1023 
1024 #undef __FUNCT__
1025 #define __FUNCT__ "MainJob_Tree"
1026 PetscErrorCode MainJob_Tree(void* (*pFunc)(void*),void** data,PetscInt n) {
1027   int i,ierr;
1028   PetscErrorCode ijoberr = 0;
1029 
1030   MainWait();
1031   job_tree.pfunc = pFunc;
1032   job_tree.pdata = data;
1033   job_tree.startJob = PETSC_TRUE;
1034   for(i=0; i<PetscMaxThreads; i++) {
1035     *(job_tree.arrThreadStarted[i]) = PETSC_FALSE;
1036   }
1037   job_tree.eJobStat = JobInitiated;
1038   ierr = pthread_cond_signal(job_tree.cond2array[0]);
1039   if(pFunc!=FuncFinish) {
1040     MainWait(); /* why wait after? guarantees that job gets done before proceeding with result collection (if any) */
1041   }
1042 
1043   if(ithreaderr) {
1044     ijoberr = ithreaderr;
1045   }
1046   return ijoberr;
1047 }
1048 /****  ****/
1049 
1050 /**** 'Main' Thread Pool Functions ****/
1051 void* PetscThreadFunc_Main(void* arg) {
1052   PetscErrorCode iterr;
1053   int icorr,ierr;
1054   int* pId = (int*)arg;
1055   int ThreadId = *pId;
1056   cpu_set_t mset;
1057   //printf("Thread %d In Main Thread Function\n",ThreadId);
1058   icorr = ThreadCoreAffinity[ThreadId];
1059   CPU_ZERO(&mset);
1060   CPU_SET(icorr,&mset);
1061   sched_setaffinity(0,sizeof(cpu_set_t),&mset);
1062 
1063   ierr = pthread_mutex_lock(job_main.mutexarray[ThreadId]);
1064   /* update your ready status */
1065   *(job_main.arrThreadReady[ThreadId]) = PETSC_TRUE;
1066   /* tell the BOSS that you're ready to work before you go to sleep */
1067   ierr = pthread_cond_signal(job_main.cond1array[ThreadId]);
1068 
1069   /* the while loop needs to have an exit
1070      the 'main' thread can terminate all the threads by performing a broadcast
1071      and calling FuncFinish */
1072   while(PetscThreadGo) {
1073     /* need to check the condition to ensure we don't have to wait
1074        waiting when you don't have to causes problems
1075      also need to check the condition to ensure proper handling of spurious wakeups */
1076     while(*(job_main.arrThreadReady[ThreadId])==PETSC_TRUE) {
1077       /* upon entry, atomically releases the lock and blocks
1078        upon return, has the lock */
1079         ierr = pthread_cond_wait(job_main.cond2array[ThreadId],job_main.mutexarray[ThreadId]);
1080 	/* (job_main.arrThreadReady[ThreadId])   = PETSC_FALSE; */
1081     }
1082     ierr = pthread_mutex_unlock(job_main.mutexarray[ThreadId]);
1083     if(job_main.pdata==NULL) {
1084       iterr = (PetscErrorCode)(long int)job_main.pfunc(job_main.pdata);
1085     }
1086     else {
1087       iterr = (PetscErrorCode)(long int)job_main.pfunc(job_main.pdata[ThreadId]);
1088     }
1089     if(iterr!=0) {
1090       ithreaderr = 1;
1091     }
1092     if(PetscThreadGo) {
1093       /* reset job, get ready for more */
1094       ierr = pthread_mutex_lock(job_main.mutexarray[ThreadId]);
1095       *(job_main.arrThreadReady[ThreadId]) = PETSC_TRUE;
1096       /* tell the BOSS that you're ready to work before you go to sleep */
1097       ierr = pthread_cond_signal(job_main.cond1array[ThreadId]);
1098     }
1099   }
1100   return NULL;
1101 }
1102 
1103 #undef __FUNCT__
1104 #define __FUNCT__ "PetscThreadInitialize_Main"
1105 void* PetscThreadInitialize_Main(PetscInt N) {
1106   PetscInt i,ierr;
1107   int status;
1108 
1109   if(PetscUseThreadPool) {
1110     size_t Val1 = (size_t)CACHE_LINE_SIZE;
1111     size_t Val2 = (size_t)PetscMaxThreads*CACHE_LINE_SIZE;
1112     arrmutex = (char*)memalign(Val1,Val2);
1113     arrcond1 = (char*)memalign(Val1,Val2);
1114     arrcond2 = (char*)memalign(Val1,Val2);
1115     arrstart = (char*)memalign(Val1,Val2);
1116     arrready = (char*)memalign(Val1,Val2);
1117     job_main.mutexarray       = (pthread_mutex_t**)malloc(PetscMaxThreads*sizeof(pthread_mutex_t*));
1118     job_main.cond1array       = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*));
1119     job_main.cond2array       = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*));
1120     job_main.arrThreadReady   = (PetscBool**)malloc(PetscMaxThreads*sizeof(PetscBool*));
1121     /* initialize job structure */
1122     for(i=0; i<PetscMaxThreads; i++) {
1123       job_main.mutexarray[i]        = (pthread_mutex_t*)(arrmutex+CACHE_LINE_SIZE*i);
1124       job_main.cond1array[i]        = (pthread_cond_t*)(arrcond1+CACHE_LINE_SIZE*i);
1125       job_main.cond2array[i]        = (pthread_cond_t*)(arrcond2+CACHE_LINE_SIZE*i);
1126       job_main.arrThreadReady[i]    = (PetscBool*)(arrready+CACHE_LINE_SIZE*i);
1127     }
1128     for(i=0; i<PetscMaxThreads; i++) {
1129       ierr = pthread_mutex_init(job_main.mutexarray[i],NULL);
1130       ierr = pthread_cond_init(job_main.cond1array[i],NULL);
1131       ierr = pthread_cond_init(job_main.cond2array[i],NULL);
1132       *(job_main.arrThreadReady[i])    = PETSC_FALSE;
1133     }
1134     job_main.pfunc = NULL;
1135     job_main.pdata = (void**)malloc(N*sizeof(void*));
1136     pVal = (int*)malloc(N*sizeof(int));
1137     /* allocate memory in the heap for the thread structure */
1138     PetscThreadPoint = (pthread_t*)malloc(N*sizeof(pthread_t));
1139     /* create threads */
1140     for(i=0; i<N; i++) {
1141       pVal[i] = i;
1142       status = pthread_create(&PetscThreadPoint[i],NULL,PetscThreadFunc,&pVal[i]);
1143       /* error check */
1144     }
1145   }
1146   else {
1147   }
1148   return NULL;
1149 }
1150 
1151 #undef __FUNCT__
1152 #define __FUNCT__ "PetscThreadFinalize_Main"
1153 PetscErrorCode PetscThreadFinalize_Main() {
1154   int i,ierr;
1155   void* jstatus;
1156 
1157   PetscFunctionBegin;
1158 
1159   if(PetscUseThreadPool) {
1160     MainJob(FuncFinish,NULL,PetscMaxThreads);  /* set up job and broadcast work */
1161     /* join the threads */
1162     for(i=0; i<PetscMaxThreads; i++) {
1163       ierr = pthread_join(PetscThreadPoint[i],&jstatus);CHKERRQ(ierr);
1164     }
1165     free(PetscThreadPoint);
1166     free(arrmutex);
1167     free(arrcond1);
1168     free(arrcond2);
1169     free(arrstart);
1170     free(arrready);
1171     free(job_main.pdata);
1172     free(pVal);
1173   }
1174   PetscFunctionReturn(0);
1175 }
1176 
1177 #undef __FUNCT__
1178 #define __FUNCT__ "MainWait_Main"
1179 void MainWait_Main() {
1180   int i,ierr;
1181   for(i=0; i<PetscMaxThreads; i++) {
1182     ierr = pthread_mutex_lock(job_main.mutexarray[i]);
1183     while(*(job_main.arrThreadReady[i])==PETSC_FALSE) {
1184       ierr = pthread_cond_wait(job_main.cond1array[i],job_main.mutexarray[i]);
1185     }
1186     ierr = pthread_mutex_unlock(job_main.mutexarray[i]);
1187   }
1188 }
1189 
1190 #undef __FUNCT__
1191 #define __FUNCT__ "MainJob_Main"
1192 PetscErrorCode MainJob_Main(void* (*pFunc)(void*),void** data,PetscInt n) {
1193   int i,ierr;
1194   PetscErrorCode ijoberr = 0;
1195 
1196   MainWait(); /* you know everyone is waiting to be signalled! */
1197   job_main.pfunc = pFunc;
1198   job_main.pdata = data;
1199   for(i=0; i<PetscMaxThreads; i++) {
1200     *(job_main.arrThreadReady[i]) = PETSC_FALSE; /* why do this?  suppose you get into MainWait first */
1201   }
1202   /* tell the threads to go to work */
1203   for(i=0; i<PetscMaxThreads; i++) {
1204     ierr = pthread_cond_signal(job_main.cond2array[i]);
1205   }
1206   if(pFunc!=FuncFinish) {
1207     MainWait(); /* why wait after? guarantees that job gets done before proceeding with result collection (if any) */
1208   }
1209 
1210   if(ithreaderr) {
1211     ijoberr = ithreaderr;
1212   }
1213   return ijoberr;
1214 }
1215 /****  ****/
1216 
1217 /**** Chain Thread Functions ****/
1218 void* PetscThreadFunc_Chain(void* arg) {
1219   PetscErrorCode iterr;
1220   int icorr,ierr;
1221   int* pId = (int*)arg;
1222   int ThreadId = *pId;
1223   int SubWorker = ThreadId + 1;
1224   PetscBool PeeOn;
1225   cpu_set_t mset;
1226   //printf("Thread %d In Chain Thread Function\n",ThreadId);
1227   icorr = ThreadCoreAffinity[ThreadId];
1228   CPU_ZERO(&mset);
1229   CPU_SET(icorr,&mset);
1230   sched_setaffinity(0,sizeof(cpu_set_t),&mset);
1231 
1232   if(ThreadId==(PetscMaxThreads-1)) {
1233     PeeOn = PETSC_TRUE;
1234   }
1235   else {
1236     PeeOn = PETSC_FALSE;
1237   }
1238   if(PeeOn==PETSC_FALSE) {
1239     /* check your subordinate, wait for him to be ready */
1240     ierr = pthread_mutex_lock(job_chain.mutexarray[SubWorker]);
1241     while(*(job_chain.arrThreadReady[SubWorker])==PETSC_FALSE) {
1242       /* upon entry, automically releases the lock and blocks
1243        upon return, has the lock */
1244       ierr = pthread_cond_wait(job_chain.cond1array[SubWorker],job_chain.mutexarray[SubWorker]);
1245     }
1246     ierr = pthread_mutex_unlock(job_chain.mutexarray[SubWorker]);
1247     /* your subordinate is now ready*/
1248   }
1249   ierr = pthread_mutex_lock(job_chain.mutexarray[ThreadId]);
1250   /* update your ready status */
1251   *(job_chain.arrThreadReady[ThreadId]) = PETSC_TRUE;
1252   if(ThreadId==0) {
1253     job_chain.eJobStat = JobCompleted;
1254     /* signal main */
1255     ierr = pthread_cond_signal(&main_cond);
1256   }
1257   else {
1258     /* tell your boss that you're ready to work */
1259     ierr = pthread_cond_signal(job_chain.cond1array[ThreadId]);
1260   }
1261   /*  the while loop needs to have an exit
1262      the 'main' thread can terminate all the threads by performing a broadcast
1263    and calling FuncFinish */
1264   while(PetscThreadGo) {
1265     /* need to check the condition to ensure we don't have to wait
1266        waiting when you don't have to causes problems
1267      also need to check the condition to ensure proper handling of spurious wakeups */
1268     while(*(job_chain.arrThreadReady[ThreadId])==PETSC_TRUE) {
1269       /*upon entry, automically releases the lock and blocks
1270        upon return, has the lock */
1271         ierr = pthread_cond_wait(job_chain.cond2array[ThreadId],job_chain.mutexarray[ThreadId]);
1272 	*(job_chain.arrThreadStarted[ThreadId]) = PETSC_TRUE;
1273 	*(job_chain.arrThreadReady[ThreadId])   = PETSC_FALSE;
1274     }
1275     if(ThreadId==0) {
1276       job_chain.startJob = PETSC_FALSE;
1277       job_chain.eJobStat = ThreadsWorking;
1278     }
1279     ierr = pthread_mutex_unlock(job_chain.mutexarray[ThreadId]);
1280     if(PeeOn==PETSC_FALSE) {
1281       /* tell your subworker it's time to get to work */
1282       ierr = pthread_cond_signal(job_chain.cond2array[SubWorker]);
1283     }
1284     /* do your job */
1285     if(job_chain.pdata==NULL) {
1286       iterr = (PetscErrorCode)(long int)job_chain.pfunc(job_chain.pdata);
1287     }
1288     else {
1289       iterr = (PetscErrorCode)(long int)job_chain.pfunc(job_chain.pdata[ThreadId]);
1290     }
1291     if(iterr!=0) {
1292       ithreaderr = 1;
1293     }
1294     if(PetscThreadGo) {
1295       /* reset job, get ready for more */
1296       if(PeeOn==PETSC_FALSE) {
1297         /* check your subordinate, wait for him to be ready
1298          how do you know for a fact that your subordinate has actually started? */
1299         ierr = pthread_mutex_lock(job_chain.mutexarray[SubWorker]);
1300         while(*(job_chain.arrThreadReady[SubWorker])==PETSC_FALSE||*(job_chain.arrThreadStarted[SubWorker])==PETSC_FALSE) {
1301           /* upon entry, automically releases the lock and blocks
1302            upon return, has the lock */
1303           ierr = pthread_cond_wait(job_chain.cond1array[SubWorker],job_chain.mutexarray[SubWorker]);
1304         }
1305         ierr = pthread_mutex_unlock(job_chain.mutexarray[SubWorker]);
1306         /* your subordinate is now ready */
1307       }
1308       ierr = pthread_mutex_lock(job_chain.mutexarray[ThreadId]);
1309       *(job_chain.arrThreadReady[ThreadId]) = PETSC_TRUE;
1310       if(ThreadId==0) {
1311 	job_chain.eJobStat = JobCompleted; /* foreman: last thread to complete, guaranteed! */
1312         /* root thread (foreman) signals 'main' */
1313         ierr = pthread_cond_signal(&main_cond);
1314       }
1315       else {
1316         /* signal your boss before you go to sleep */
1317         ierr = pthread_cond_signal(job_chain.cond1array[ThreadId]);
1318       }
1319     }
1320   }
1321   return NULL;
1322 }
1323 
1324 #undef __FUNCT__
1325 #define __FUNCT__ "PetscThreadInitialize_Chain"
1326 void* PetscThreadInitialize_Chain(PetscInt N) {
1327   PetscInt i,ierr;
1328   int status;
1329 
1330   if(PetscUseThreadPool) {
1331     size_t Val1 = (size_t)CACHE_LINE_SIZE;
1332     size_t Val2 = (size_t)PetscMaxThreads*CACHE_LINE_SIZE;
1333     arrmutex = (char*)memalign(Val1,Val2);
1334     arrcond1 = (char*)memalign(Val1,Val2);
1335     arrcond2 = (char*)memalign(Val1,Val2);
1336     arrstart = (char*)memalign(Val1,Val2);
1337     arrready = (char*)memalign(Val1,Val2);
1338     job_chain.mutexarray       = (pthread_mutex_t**)malloc(PetscMaxThreads*sizeof(pthread_mutex_t*));
1339     job_chain.cond1array       = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*));
1340     job_chain.cond2array       = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*));
1341     job_chain.arrThreadStarted = (PetscBool**)malloc(PetscMaxThreads*sizeof(PetscBool*));
1342     job_chain.arrThreadReady   = (PetscBool**)malloc(PetscMaxThreads*sizeof(PetscBool*));
1343     /* initialize job structure */
1344     for(i=0; i<PetscMaxThreads; i++) {
1345       job_chain.mutexarray[i]        = (pthread_mutex_t*)(arrmutex+CACHE_LINE_SIZE*i);
1346       job_chain.cond1array[i]        = (pthread_cond_t*)(arrcond1+CACHE_LINE_SIZE*i);
1347       job_chain.cond2array[i]        = (pthread_cond_t*)(arrcond2+CACHE_LINE_SIZE*i);
1348       job_chain.arrThreadStarted[i]  = (PetscBool*)(arrstart+CACHE_LINE_SIZE*i);
1349       job_chain.arrThreadReady[i]    = (PetscBool*)(arrready+CACHE_LINE_SIZE*i);
1350     }
1351     for(i=0; i<PetscMaxThreads; i++) {
1352       ierr = pthread_mutex_init(job_chain.mutexarray[i],NULL);
1353       ierr = pthread_cond_init(job_chain.cond1array[i],NULL);
1354       ierr = pthread_cond_init(job_chain.cond2array[i],NULL);
1355       *(job_chain.arrThreadStarted[i])  = PETSC_FALSE;
1356       *(job_chain.arrThreadReady[i])    = PETSC_FALSE;
1357     }
1358     job_chain.pfunc = NULL;
1359     job_chain.pdata = (void**)malloc(N*sizeof(void*));
1360     job_chain.startJob = PETSC_FALSE;
1361     job_chain.eJobStat = JobInitiated;
1362     pVal = (int*)malloc(N*sizeof(int));
1363     /* allocate memory in the heap for the thread structure */
1364     PetscThreadPoint = (pthread_t*)malloc(N*sizeof(pthread_t));
1365     /* create threads */
1366     for(i=0; i<N; i++) {
1367       pVal[i] = i;
1368       status = pthread_create(&PetscThreadPoint[i],NULL,PetscThreadFunc,&pVal[i]);
1369       /* should check error */
1370     }
1371   }
1372   else {
1373   }
1374   return NULL;
1375 }
1376 
1377 
1378 #undef __FUNCT__
1379 #define __FUNCT__ "PetscThreadFinalize_Chain"
1380 PetscErrorCode PetscThreadFinalize_Chain() {
1381   int i,ierr;
1382   void* jstatus;
1383 
1384   PetscFunctionBegin;
1385 
1386   if(PetscUseThreadPool) {
1387     MainJob(FuncFinish,NULL,PetscMaxThreads);  /* set up job and broadcast work */
1388     /* join the threads */
1389     for(i=0; i<PetscMaxThreads; i++) {
1390       ierr = pthread_join(PetscThreadPoint[i],&jstatus);
1391       /* should check error */
1392     }
1393     free(PetscThreadPoint);
1394     free(arrmutex);
1395     free(arrcond1);
1396     free(arrcond2);
1397     free(arrstart);
1398     free(arrready);
1399     free(job_chain.pdata);
1400     free(pVal);
1401   }
1402   else {
1403   }
1404   PetscFunctionReturn(0);
1405 }
1406 
1407 #undef __FUNCT__
1408 #define __FUNCT__ "MainWait_Chain"
1409 void MainWait_Chain() {
1410   int ierr;
1411   ierr = pthread_mutex_lock(job_chain.mutexarray[0]);
1412   while(job_chain.eJobStat<JobCompleted||job_chain.startJob==PETSC_TRUE) {
1413     ierr = pthread_cond_wait(&main_cond,job_chain.mutexarray[0]);
1414   }
1415   ierr = pthread_mutex_unlock(job_chain.mutexarray[0]);
1416 }
1417 
1418 #undef __FUNCT__
1419 #define __FUNCT__ "MainJob_Chain"
1420 PetscErrorCode MainJob_Chain(void* (*pFunc)(void*),void** data,PetscInt n) {
1421   int i,ierr;
1422   PetscErrorCode ijoberr = 0;
1423 
1424   MainWait();
1425   job_chain.pfunc = pFunc;
1426   job_chain.pdata = data;
1427   job_chain.startJob = PETSC_TRUE;
1428   for(i=0; i<PetscMaxThreads; i++) {
1429     *(job_chain.arrThreadStarted[i]) = PETSC_FALSE;
1430   }
1431   job_chain.eJobStat = JobInitiated;
1432   ierr = pthread_cond_signal(job_chain.cond2array[0]);
1433   if(pFunc!=FuncFinish) {
1434     MainWait(); /* why wait after? guarantees that job gets done before proceeding with result collection (if any) */
1435   }
1436 
1437   if(ithreaderr) {
1438     ijoberr = ithreaderr;
1439   }
1440   return ijoberr;
1441 }
1442 /****  ****/
1443 
1444 #if defined(PETSC_HAVE_PTHREAD_BARRIER)
1445 /**** True Thread Functions ****/
1446 void* PetscThreadFunc_True(void* arg) {
1447   int icorr,ierr,iVal;
1448   int* pId = (int*)arg;
1449   int ThreadId = *pId;
1450   PetscErrorCode iterr;
1451   cpu_set_t mset;
1452   //printf("Thread %d In True Pool Thread Function\n",ThreadId);
1453   icorr = ThreadCoreAffinity[ThreadId];
1454   CPU_ZERO(&mset);
1455   CPU_SET(icorr,&mset);
1456   sched_setaffinity(0,sizeof(cpu_set_t),&mset);
1457 
1458   ierr = pthread_mutex_lock(&job_true.mutex);
1459   job_true.iNumReadyThreads++;
1460   if(job_true.iNumReadyThreads==PetscMaxThreads) {
1461     ierr = pthread_cond_signal(&main_cond);
1462   }
1463   /*the while loop needs to have an exit
1464     the 'main' thread can terminate all the threads by performing a broadcast
1465    and calling FuncFinish */
1466   while(PetscThreadGo) {
1467     /*need to check the condition to ensure we don't have to wait
1468       waiting when you don't have to causes problems
1469      also need to wait if another thread sneaks in and messes with the predicate */
1470     while(job_true.startJob==PETSC_FALSE&&job_true.iNumJobThreads==0) {
1471       /* upon entry, automically releases the lock and blocks
1472        upon return, has the lock */
1473       ierr = pthread_cond_wait(&job_true.cond,&job_true.mutex);
1474     }
1475     job_true.startJob = PETSC_FALSE;
1476     job_true.iNumJobThreads--;
1477     job_true.iNumReadyThreads--;
1478     iVal = PetscMaxThreads-job_true.iNumReadyThreads-1;
1479     pthread_mutex_unlock(&job_true.mutex);
1480     if(job_true.pdata==NULL) {
1481       iterr = (PetscErrorCode)(long int)job_true.pfunc(job_true.pdata);
1482     }
1483     else {
1484       iterr = (PetscErrorCode)(long int)job_true.pfunc(job_true.pdata[iVal]);
1485     }
1486     if(iterr!=0) {
1487       ithreaderr = 1;
1488     }
1489     /* the barrier is necessary BECAUSE: look at job_true.iNumReadyThreads
1490       what happens if a thread finishes before they all start? BAD!
1491      what happens if a thread finishes before any else start? BAD! */
1492     pthread_barrier_wait(job_true.pbarr); /* ensures all threads are finished */
1493     /* reset job */
1494     if(PetscThreadGo) {
1495       pthread_mutex_lock(&job_true.mutex);
1496       job_true.iNumReadyThreads++;
1497       if(job_true.iNumReadyThreads==PetscMaxThreads) {
1498 	/* signal the 'main' thread that the job is done! (only done once) */
1499 	ierr = pthread_cond_signal(&main_cond);
1500       }
1501     }
1502   }
1503   return NULL;
1504 }
1505 
1506 #undef __FUNCT__
1507 #define __FUNCT__ "PetscThreadInitialize_True"
1508 void* PetscThreadInitialize_True(PetscInt N) {
1509   PetscInt i;
1510   int status;
1511 
1512   if(PetscUseThreadPool) {
1513     pVal = (int*)malloc(N*sizeof(int));
1514     /* allocate memory in the heap for the thread structure */
1515     PetscThreadPoint = (pthread_t*)malloc(N*sizeof(pthread_t));
1516     BarrPoint = (pthread_barrier_t*)malloc((N+1)*sizeof(pthread_barrier_t)); /* BarrPoint[0] makes no sense, don't use it! */
1517     job_true.pdata = (void**)malloc(N*sizeof(void*));
1518     for(i=0; i<N; i++) {
1519       pVal[i] = i;
1520       status = pthread_create(&PetscThreadPoint[i],NULL,PetscThreadFunc,&pVal[i]);
1521       /* error check to ensure proper thread creation */
1522       status = pthread_barrier_init(&BarrPoint[i+1],NULL,i+1);
1523       /* should check error */
1524     }
1525   }
1526   else {
1527   }
1528   return NULL;
1529 }
1530 
1531 
1532 #undef __FUNCT__
1533 #define __FUNCT__ "PetscThreadFinalize_True"
1534 PetscErrorCode PetscThreadFinalize_True() {
1535   int i,ierr;
1536   void* jstatus;
1537 
1538   PetscFunctionBegin;
1539 
1540   if(PetscUseThreadPool) {
1541     MainJob(FuncFinish,NULL,PetscMaxThreads);  /* set up job and broadcast work */
1542     /* join the threads */
1543     for(i=0; i<PetscMaxThreads; i++) {
1544       ierr = pthread_join(PetscThreadPoint[i],&jstatus);
1545       /* should check error */
1546     }
1547     free(BarrPoint);
1548     free(PetscThreadPoint);
1549   }
1550   else {
1551   }
1552   PetscFunctionReturn(0);
1553 }
1554 
1555 #undef __FUNCT__
1556 #define __FUNCT__ "MainWait_True"
1557 void MainWait_True() {
1558   int ierr;
1559   while(job_true.iNumReadyThreads<PetscMaxThreads||job_true.startJob==PETSC_TRUE) {
1560     ierr = pthread_cond_wait(&main_cond,&job_true.mutex);
1561   }
1562   ierr = pthread_mutex_unlock(&job_true.mutex);
1563 }
1564 
1565 #undef __FUNCT__
1566 #define __FUNCT__ "MainJob_True"
1567 PetscErrorCode MainJob_True(void* (*pFunc)(void*),void** data,PetscInt n) {
1568   int ierr;
1569   PetscErrorCode ijoberr = 0;
1570 
1571   MainWait();
1572   job_true.pfunc = pFunc;
1573   job_true.pdata = data;
1574   job_true.pbarr = &BarrPoint[n];
1575   job_true.iNumJobThreads = n;
1576   job_true.startJob = PETSC_TRUE;
1577   ierr = pthread_cond_broadcast(&job_true.cond);
1578   if(pFunc!=FuncFinish) {
1579     MainWait(); /* why wait after? guarantees that job gets done */
1580   }
1581 
1582   if(ithreaderr) {
1583     ijoberr = ithreaderr;
1584   }
1585   return ijoberr;
1586 }
1587 /**** NO THREAD POOL FUNCTION ****/
1588 #undef __FUNCT__
1589 #define __FUNCT__ "MainJob_Spawn"
1590 PetscErrorCode MainJob_Spawn(void* (*pFunc)(void*),void** data,PetscInt n) {
1591   PetscErrorCode ijoberr = 0;
1592 
1593   pthread_t* apThread = (pthread_t*)malloc(n*sizeof(pthread_t));
1594   PetscThreadRun(MPI_COMM_WORLD,pFunc,n,apThread,data);
1595   PetscThreadStop(MPI_COMM_WORLD,n,apThread); /* ensures that all threads are finished with the job */
1596   free(apThread);
1597 
1598   return ijoberr;
1599 }
1600 /****  ****/
1601 #endif
1602 
1603 void* FuncFinish(void* arg) {
1604   PetscThreadGo = PETSC_FALSE;
1605   return(0);
1606 }
1607 
1608 #endif
1609