xref: /petsc/src/sys/objects/init.c (revision 9e800a489d4dd7df6e219efb49e6e50c80139b3f)
1 //new kds file - implements all thread pool versions
2 /*
3 
4    This file defines part of the initialization of PETSc
5 
6   This file uses regular malloc and free because it cannot know
7   what malloc is being used until it has already processed the input.
8 */
9 
10 #define _GNU_SOURCE
11 #include <sched.h>
12 #include <petscsys.h>        /*I  "petscsys.h"   I*/
13 #include <pthread.h>
14 #include <sys/sysinfo.h>
15 #include <unistd.h>
16 #if defined(PETSC_HAVE_STDLIB_H)
17 #include <stdlib.h>
18 #endif
19 #if defined(PETSC_HAVE_MALLOC_H)
20 #include <malloc.h>
21 #endif
22 #if defined(PETSC_HAVE_VALGRIND)
23 #include <valgrind/valgrind.h>
24 #endif
25 
26 /* ------------------------Nasty global variables -------------------------------*/
27 /*
28      Indicates if PETSc started up MPI, or it was
29    already started before PETSc was initialized.
30 */
31 PetscBool    PetscBeganMPI         = PETSC_FALSE;
32 PetscBool    PetscInitializeCalled = PETSC_FALSE;
33 PetscBool    PetscFinalizeCalled   = PETSC_FALSE;
34 PetscBool    PetscUseThreadPool    = PETSC_FALSE;
35 PetscBool    PetscThreadGo         = PETSC_TRUE;
36 PetscMPIInt  PetscGlobalRank = -1;
37 PetscMPIInt  PetscGlobalSize = -1;
38 PetscMPIInt  PetscMaxThreads = 2;
39 pthread_t*   PetscThreadPoint;
40 pthread_barrier_t* BarrPoint;   //used by 'true' thread pool
41 PetscErrorCode ithreaderr = 0;
42 int*         pVal;
43 
44 #define CACHE_LINE_SIZE 64  //used by 'chain', 'main','tree' thread pools
45 int* ThreadCoreAffinity;
46 
47 typedef enum {JobInitiated,ThreadsWorking,JobCompleted} estat;  //used by 'chain','tree' thread pool
48 
49 typedef struct {
50   pthread_mutex_t** mutexarray;
51   pthread_cond_t**  cond1array;
52   pthread_cond_t** cond2array;
53   void* (*pfunc)(void*);
54   void** pdata;
55   PetscBool startJob;
56   estat eJobStat;
57   PetscBool** arrThreadStarted;
58   PetscBool** arrThreadReady;
59 } sjob_tree;
60 sjob_tree job_tree;
61 typedef struct {
62   pthread_mutex_t** mutexarray;
63   pthread_cond_t**  cond1array;
64   pthread_cond_t** cond2array;
65   void* (*pfunc)(void*);
66   void** pdata;
67   PetscBool** arrThreadReady;
68 } sjob_main;
69 sjob_main job_main;
70 typedef struct {
71   pthread_mutex_t** mutexarray;
72   pthread_cond_t**  cond1array;
73   pthread_cond_t** cond2array;
74   void* (*pfunc)(void*);
75   void** pdata;
76   PetscBool startJob;
77   estat eJobStat;
78   PetscBool** arrThreadStarted;
79   PetscBool** arrThreadReady;
80 } sjob_chain;
81 sjob_chain job_chain;
82 typedef struct {
83   pthread_mutex_t mutex;
84   pthread_cond_t cond;
85   void* (*pfunc)(void*);
86   void** pdata;
87   pthread_barrier_t* pbarr;
88   int iNumJobThreads;
89   int iNumReadyThreads;
90   PetscBool startJob;
91 } sjob_true;
92 sjob_true job_true = {PTHREAD_MUTEX_INITIALIZER,PTHREAD_COND_INITIALIZER,NULL,NULL,NULL,0,0,PETSC_FALSE};
93 
94 pthread_cond_t  main_cond  = PTHREAD_COND_INITIALIZER;  //used by 'true', 'chain','tree' thread pools
95 char* arrmutex; //used by 'chain','main','tree' thread pools
96 char* arrcond1; //used by 'chain','main','tree' thread pools
97 char* arrcond2; //used by 'chain','main','tree' thread pools
98 char* arrstart; //used by 'chain','main','tree' thread pools
99 char* arrready; //used by 'chain','main','tree' thread pools
100 
101 /* Function Pointers */
102 void*          (*PetscThreadFunc)(void*) = NULL;
103 void*          (*PetscThreadInitialize)(PetscInt) = NULL;
104 PetscErrorCode (*PetscThreadFinalize)(void) = NULL;
105 void           (*MainWait)(void) = NULL;
106 PetscErrorCode (*MainJob)(void* (*pFunc)(void*),void**,PetscInt) = NULL;
107 /**** Tree Functions ****/
108 void*          PetscThreadFunc_Tree(void*);
109 void*          PetscThreadInitialize_Tree(PetscInt);
110 PetscErrorCode PetscThreadFinalize_Tree(void);
111 void           MainWait_Tree(void);
112 PetscErrorCode MainJob_Tree(void* (*pFunc)(void*),void**,PetscInt);
113 /**** Main Functions ****/
114 void*          PetscThreadFunc_Main(void*);
115 void*          PetscThreadInitialize_Main(PetscInt);
116 PetscErrorCode PetscThreadFinalize_Main(void);
117 void           MainWait_Main(void);
118 PetscErrorCode MainJob_Main(void* (*pFunc)(void*),void**,PetscInt);
119 /**** Chain Functions ****/
120 void*          PetscThreadFunc_Chain(void*);
121 void*          PetscThreadInitialize_Chain(PetscInt);
122 PetscErrorCode PetscThreadFinalize_Chain(void);
123 void           MainWait_Chain(void);
124 PetscErrorCode MainJob_Chain(void* (*pFunc)(void*),void**,PetscInt);
125 /**** True Functions ****/
126 void*          PetscThreadFunc_True(void*);
127 void*          PetscThreadInitialize_True(PetscInt);
128 PetscErrorCode PetscThreadFinalize_True(void);
129 void           MainWait_True(void);
130 PetscErrorCode MainJob_True(void* (*pFunc)(void*),void**,PetscInt);
131 /****  ****/
132 
133 void* FuncFinish(void*);
134 void* PetscThreadRun(MPI_Comm Comm,void* (*pFunc)(void*),int,pthread_t*,void**);
135 void* PetscThreadStop(MPI_Comm Comm,int,pthread_t*);
136 
137 #if defined(PETSC_USE_COMPLEX)
138 #if defined(PETSC_COMPLEX_INSTANTIATE)
139 template <> class std::complex<double>; /* instantiate complex template class */
140 #endif
141 #if !defined(PETSC_HAVE_MPI_C_DOUBLE_COMPLEX)
142 MPI_Datatype   MPI_C_DOUBLE_COMPLEX;
143 MPI_Datatype   MPI_C_COMPLEX;
144 #endif
145 PetscScalar    PETSC_i;
146 #else
147 PetscScalar    PETSC_i = 0.0;
148 #endif
149 #if defined(PETSC_USE_REAL___FLOAT128)
150 MPI_Datatype   MPIU___FLOAT128 = 0;
151 #endif
152 MPI_Datatype   MPIU_2SCALAR = 0;
153 MPI_Datatype   MPIU_2INT = 0;
154 
155 /*
156      These are needed by petscbt.h
157 */
158 #include <petscbt.h>
159 char      _BT_mask = ' ';
160 char      _BT_c = ' ';
161 PetscInt  _BT_idx  = 0;
162 
163 /*
164        Function that is called to display all error messages
165 */
166 PetscErrorCode  (*PetscErrorPrintf)(const char [],...)          = PetscErrorPrintfDefault;
167 PetscErrorCode  (*PetscHelpPrintf)(MPI_Comm,const char [],...)  = PetscHelpPrintfDefault;
168 #if defined(PETSC_HAVE_MATLAB_ENGINE)
169 PetscErrorCode  (*PetscVFPrintf)(FILE*,const char[],va_list)    = PetscVFPrintf_Matlab;
170 #else
171 PetscErrorCode  (*PetscVFPrintf)(FILE*,const char[],va_list)    = PetscVFPrintfDefault;
172 #endif
173 /*
174   This is needed to turn on/off cusp synchronization */
175 PetscBool   synchronizeCUSP = PETSC_FALSE;
176 
177 /* ------------------------------------------------------------------------------*/
178 /*
179    Optional file where all PETSc output from various prints is saved
180 */
181 FILE *petsc_history = PETSC_NULL;
182 
183 #undef __FUNCT__
184 #define __FUNCT__ "PetscOpenHistoryFile"
185 PetscErrorCode  PetscOpenHistoryFile(const char filename[],FILE **fd)
186 {
187   PetscErrorCode ierr;
188   PetscMPIInt    rank,size;
189   char           pfile[PETSC_MAX_PATH_LEN],pname[PETSC_MAX_PATH_LEN],fname[PETSC_MAX_PATH_LEN],date[64];
190   char           version[256];
191 
192   PetscFunctionBegin;
193   ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr);
194   if (!rank) {
195     char        arch[10];
196     int         err;
197     PetscViewer viewer;
198 
199     ierr = PetscGetArchType(arch,10);CHKERRQ(ierr);
200     ierr = PetscGetDate(date,64);CHKERRQ(ierr);
201     ierr = PetscGetVersion(version,256);CHKERRQ(ierr);
202     ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRQ(ierr);
203     if (filename) {
204       ierr = PetscFixFilename(filename,fname);CHKERRQ(ierr);
205     } else {
206       ierr = PetscGetHomeDirectory(pfile,240);CHKERRQ(ierr);
207       ierr = PetscStrcat(pfile,"/.petschistory");CHKERRQ(ierr);
208       ierr = PetscFixFilename(pfile,fname);CHKERRQ(ierr);
209     }
210 
211     *fd = fopen(fname,"a"); if (!fd) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_FILE_OPEN,"Cannot open file: %s",fname);
212     ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"---------------------------------------------------------\n");CHKERRQ(ierr);
213     ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"%s %s\n",version,date);CHKERRQ(ierr);
214     ierr = PetscGetProgramName(pname,PETSC_MAX_PATH_LEN);CHKERRQ(ierr);
215     ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"%s on a %s, %d proc. with options:\n",pname,arch,size);CHKERRQ(ierr);
216     ierr = PetscViewerASCIIOpenWithFILE(PETSC_COMM_WORLD,*fd,&viewer);CHKERRQ(ierr);
217     ierr = PetscOptionsView(viewer);CHKERRQ(ierr);
218     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
219     ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"---------------------------------------------------------\n");CHKERRQ(ierr);
220     err = fflush(*fd);
221     if (err) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SYS,"fflush() failed on file");
222   }
223   PetscFunctionReturn(0);
224 }
225 
226 #undef __FUNCT__
227 #define __FUNCT__ "PetscCloseHistoryFile"
228 PetscErrorCode  PetscCloseHistoryFile(FILE **fd)
229 {
230   PetscErrorCode ierr;
231   PetscMPIInt    rank;
232   char           date[64];
233   int            err;
234 
235   PetscFunctionBegin;
236   ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr);
237   if (!rank) {
238     ierr = PetscGetDate(date,64);CHKERRQ(ierr);
239     ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"---------------------------------------------------------\n");CHKERRQ(ierr);
240     ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"Finished at %s\n",date);CHKERRQ(ierr);
241     ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"---------------------------------------------------------\n");CHKERRQ(ierr);
242     err = fflush(*fd);
243     if (err) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SYS,"fflush() failed on file");
244     err = fclose(*fd);
245     if (err) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SYS,"fclose() failed on file");
246   }
247   PetscFunctionReturn(0);
248 }
249 
250 /* ------------------------------------------------------------------------------*/
251 
252 /*
253    This is ugly and probably belongs somewhere else, but I want to
254   be able to put a true MPI abort error handler with command line args.
255 
256     This is so MPI errors in the debugger will leave all the stack
257   frames. The default MP_Abort() cleans up and exits thus providing no useful information
258   in the debugger hence we call abort() instead of MPI_Abort().
259 */
260 
261 #undef __FUNCT__
262 #define __FUNCT__ "Petsc_MPI_AbortOnError"
263 void Petsc_MPI_AbortOnError(MPI_Comm *comm,PetscMPIInt *flag)
264 {
265   PetscFunctionBegin;
266   (*PetscErrorPrintf)("MPI error %d\n",*flag);
267   abort();
268 }
269 
270 #undef __FUNCT__
271 #define __FUNCT__ "Petsc_MPI_DebuggerOnError"
272 void Petsc_MPI_DebuggerOnError(MPI_Comm *comm,PetscMPIInt *flag)
273 {
274   PetscErrorCode ierr;
275 
276   PetscFunctionBegin;
277   (*PetscErrorPrintf)("MPI error %d\n",*flag);
278   ierr = PetscAttachDebugger();
279   if (ierr) { /* hopeless so get out */
280     MPI_Abort(*comm,*flag);
281   }
282 }
283 
284 #undef __FUNCT__
285 #define __FUNCT__ "PetscEnd"
286 /*@C
287    PetscEnd - Calls PetscFinalize() and then ends the program. This is useful if one
288      wishes a clean exit somewhere deep in the program.
289 
290    Collective on PETSC_COMM_WORLD
291 
292    Options Database Keys are the same as for PetscFinalize()
293 
294    Level: advanced
295 
296    Note:
297    See PetscInitialize() for more general runtime options.
298 
299 .seealso: PetscInitialize(), PetscOptionsView(), PetscMallocDump(), PetscMPIDump(), PetscFinalize()
300 @*/
301 PetscErrorCode  PetscEnd(void)
302 {
303   PetscFunctionBegin;
304   PetscFinalize();
305   exit(0);
306   return 0;
307 }
308 
309 PetscBool    PetscOptionsPublish = PETSC_FALSE;
310 extern PetscErrorCode        PetscSetUseTrMalloc_Private(void);
311 extern PetscBool  petscsetmallocvisited;
312 static char       emacsmachinename[256];
313 
314 PetscErrorCode (*PetscExternalVersionFunction)(MPI_Comm) = 0;
315 PetscErrorCode (*PetscExternalHelpFunction)(MPI_Comm)    = 0;
316 
317 #undef __FUNCT__
318 #define __FUNCT__ "PetscSetHelpVersionFunctions"
319 /*@C
320    PetscSetHelpVersionFunctions - Sets functions that print help and version information
321    before the PETSc help and version information is printed. Must call BEFORE PetscInitialize().
322    This routine enables a "higher-level" package that uses PETSc to print its messages first.
323 
324    Input Parameter:
325 +  help - the help function (may be PETSC_NULL)
326 -  version - the version function (may be PETSC_NULL)
327 
328    Level: developer
329 
330    Concepts: package help message
331 
332 @*/
333 PetscErrorCode  PetscSetHelpVersionFunctions(PetscErrorCode (*help)(MPI_Comm),PetscErrorCode (*version)(MPI_Comm))
334 {
335   PetscFunctionBegin;
336   PetscExternalHelpFunction    = help;
337   PetscExternalVersionFunction = version;
338   PetscFunctionReturn(0);
339 }
340 
341 #undef __FUNCT__
342 #define __FUNCT__ "PetscOptionsCheckInitial_Private"
343 PetscErrorCode  PetscOptionsCheckInitial_Private(void)
344 {
345   char           string[64],mname[PETSC_MAX_PATH_LEN],*f;
346   MPI_Comm       comm = PETSC_COMM_WORLD;
347   PetscBool      flg1 = PETSC_FALSE,flg2 = PETSC_FALSE,flg3 = PETSC_FALSE,flg4 = PETSC_FALSE,flag,flgz,flgzout;
348   PetscErrorCode ierr;
349   PetscReal      si;
350   int            i;
351   PetscMPIInt    rank;
352   char           version[256];
353 
354   PetscFunctionBegin;
355   ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr);
356 
357   /*
358       Setup the memory management; support for tracing malloc() usage
359   */
360   ierr = PetscOptionsHasName(PETSC_NULL,"-malloc_log",&flg3);CHKERRQ(ierr);
361 #if defined(PETSC_USE_DEBUG) && !defined(PETSC_USE_PTHREAD)
362   ierr = PetscOptionsGetBool(PETSC_NULL,"-malloc",&flg1,&flg2);CHKERRQ(ierr);
363   if ((!flg2 || flg1) && !petscsetmallocvisited) {
364 #if defined(PETSC_HAVE_VALGRIND)
365     if (flg2 || !(RUNNING_ON_VALGRIND)) {
366       /* turn off default -malloc if valgrind is being used */
367 #endif
368       ierr = PetscSetUseTrMalloc_Private();CHKERRQ(ierr);
369 #if defined(PETSC_HAVE_VALGRIND)
370     }
371 #endif
372   }
373 #else
374   ierr = PetscOptionsGetBool(PETSC_NULL,"-malloc_dump",&flg1,PETSC_NULL);CHKERRQ(ierr);
375   ierr = PetscOptionsGetBool(PETSC_NULL,"-malloc",&flg2,PETSC_NULL);CHKERRQ(ierr);
376   if (flg1 || flg2 || flg3) {ierr = PetscSetUseTrMalloc_Private();CHKERRQ(ierr);}
377 #endif
378   if (flg3) {
379     ierr = PetscMallocSetDumpLog();CHKERRQ(ierr);
380   }
381   flg1 = PETSC_FALSE;
382   ierr = PetscOptionsGetBool(PETSC_NULL,"-malloc_debug",&flg1,PETSC_NULL);CHKERRQ(ierr);
383   if (flg1) {
384     ierr = PetscSetUseTrMalloc_Private();CHKERRQ(ierr);
385     ierr = PetscMallocDebug(PETSC_TRUE);CHKERRQ(ierr);
386   }
387 
388   flg1 = PETSC_FALSE;
389   ierr = PetscOptionsGetBool(PETSC_NULL,"-malloc_info",&flg1,PETSC_NULL);CHKERRQ(ierr);
390   if (!flg1) {
391     flg1 = PETSC_FALSE;
392     ierr = PetscOptionsGetBool(PETSC_NULL,"-memory_info",&flg1,PETSC_NULL);CHKERRQ(ierr);
393   }
394   if (flg1) {
395     ierr = PetscMemorySetGetMaximumUsage();CHKERRQ(ierr);
396   }
397 
398   /*
399       Set the display variable for graphics
400   */
401   ierr = PetscSetDisplay();CHKERRQ(ierr);
402 
403   /*
404       Determine whether user specified maximum number of threads
405    */
406   ierr = PetscOptionsHasName(PETSC_NULL,"-thread_max",&flg1);CHKERRQ(ierr);
407   if(flg1) {
408     ierr = PetscOptionsGetInt(PETSC_NULL,"-thread_max",&PetscMaxThreads,PETSC_NULL);CHKERRQ(ierr);
409   }
410 
411   /*
412       Determine whether to use thread pool
413    */
414   ierr = PetscOptionsHasName(PETSC_NULL,"-use_thread_pool",&flg1);CHKERRQ(ierr);
415   if(flg1) {
416     PetscUseThreadPool = PETSC_TRUE;
417     PetscInt N_CORES = get_nprocs();
418     ThreadCoreAffinity = (int*)malloc(N_CORES*sizeof(int));
419     char tstr[9];
420     char tbuf[2];
421     strcpy(tstr,"-thread");
422     for(i=0;i<PetscMaxThreads;i++) {
423       ThreadCoreAffinity[i] = i;  //default
424       sprintf(tbuf,"%d",i);
425       strcat(tstr,tbuf);
426       ierr = PetscOptionsHasName(PETSC_NULL,tstr,&flg1);CHKERRQ(ierr);
427       if(flg1) {
428         ierr = PetscOptionsGetInt(PETSC_NULL,tstr,&ThreadCoreAffinity[i],PETSC_NULL);CHKERRQ(ierr);
429         ThreadCoreAffinity[i] = ThreadCoreAffinity[i]%N_CORES; //check on the user
430       }
431       tstr[7] = '\0';
432     }
433     //get the thread pool type
434     PetscInt ipool = 0;
435     ierr = PetscOptionsGetInt(PETSC_NULL,"-pool",&ipool,PETSC_NULL);CHKERRQ(ierr);
436     switch(ipool) {
437     case 1:
438       PetscThreadFunc       = &PetscThreadFunc_Tree;
439       PetscThreadInitialize = &PetscThreadInitialize_Tree;
440       PetscThreadFinalize   = &PetscThreadFinalize_Tree;
441       MainWait              = &MainWait_Tree;
442       MainJob               = &MainJob_Tree;
443       break;
444     case 2:
445       PetscThreadFunc       = &PetscThreadFunc_Main;
446       PetscThreadInitialize = &PetscThreadInitialize_Main;
447       PetscThreadFinalize   = &PetscThreadFinalize_Main;
448       MainWait              = &MainWait_Main;
449       MainJob               = &MainJob_Main;
450       break;
451     case 3:
452       PetscThreadFunc       = &PetscThreadFunc_Chain;
453       PetscThreadInitialize = &PetscThreadInitialize_Chain;
454       PetscThreadFinalize   = &PetscThreadFinalize_Chain;
455       MainWait              = &MainWait_Chain;
456       MainJob               = &MainJob_Chain;
457       break;
458     default:
459       PetscThreadFunc       = &PetscThreadFunc_True;
460       PetscThreadInitialize = &PetscThreadInitialize_True;
461       PetscThreadFinalize   = &PetscThreadFinalize_True;
462       MainWait              = &MainWait_True;
463       MainJob               = &MainJob_True;
464       break;
465     }
466   }
467   else {
468     //need to define these in the case on 'no threads' or 'thread create/destroy'
469     //could take any of the above versions
470     PetscThreadInitialize = &PetscThreadInitialize_True;
471     PetscThreadFinalize   = &PetscThreadFinalize_True;
472     MainWait              = &MainWait_True;
473     MainJob               = &MainJob_True;
474   }
475   PetscThreadInitialize(PetscMaxThreads);
476   /*
477       Print the PETSc version information
478   */
479   ierr = PetscOptionsHasName(PETSC_NULL,"-v",&flg1);CHKERRQ(ierr);
480   ierr = PetscOptionsHasName(PETSC_NULL,"-version",&flg2);CHKERRQ(ierr);
481   ierr = PetscOptionsHasName(PETSC_NULL,"-help",&flg3);CHKERRQ(ierr);
482   if (flg1 || flg2 || flg3){
483 
484     /*
485        Print "higher-level" package version message
486     */
487     if (PetscExternalVersionFunction) {
488       ierr = (*PetscExternalVersionFunction)(comm);CHKERRQ(ierr);
489     }
490 
491     ierr = PetscGetVersion(version,256);CHKERRQ(ierr);
492     ierr = (*PetscHelpPrintf)(comm,"--------------------------------------------\
493 ------------------------------\n");CHKERRQ(ierr);
494     ierr = (*PetscHelpPrintf)(comm,"%s\n",version);CHKERRQ(ierr);
495     ierr = (*PetscHelpPrintf)(comm,"%s",PETSC_AUTHOR_INFO);CHKERRQ(ierr);
496     ierr = (*PetscHelpPrintf)(comm,"See docs/changes/index.html for recent updates.\n");CHKERRQ(ierr);
497     ierr = (*PetscHelpPrintf)(comm,"See docs/faq.html for problems.\n");CHKERRQ(ierr);
498     ierr = (*PetscHelpPrintf)(comm,"See docs/manualpages/index.html for help. \n");CHKERRQ(ierr);
499     ierr = (*PetscHelpPrintf)(comm,"Libraries linked from %s\n",PETSC_LIB_DIR);CHKERRQ(ierr);
500     ierr = (*PetscHelpPrintf)(comm,"--------------------------------------------\
501 ------------------------------\n");CHKERRQ(ierr);
502   }
503 
504   /*
505        Print "higher-level" package help message
506   */
507   if (flg3){
508     if (PetscExternalHelpFunction) {
509       ierr = (*PetscExternalHelpFunction)(comm);CHKERRQ(ierr);
510     }
511   }
512 
513   /*
514       Setup the error handling
515   */
516   flg1 = PETSC_FALSE;
517   ierr = PetscOptionsGetBool(PETSC_NULL,"-on_error_abort",&flg1,PETSC_NULL);CHKERRQ(ierr);
518   if (flg1) { ierr = PetscPushErrorHandler(PetscAbortErrorHandler,0);CHKERRQ(ierr);}
519   flg1 = PETSC_FALSE;
520   ierr = PetscOptionsGetBool(PETSC_NULL,"-on_error_mpiabort",&flg1,PETSC_NULL);CHKERRQ(ierr);
521   if (flg1) { ierr = PetscPushErrorHandler(PetscMPIAbortErrorHandler,0);CHKERRQ(ierr);}
522   flg1 = PETSC_FALSE;
523   ierr = PetscOptionsGetBool(PETSC_NULL,"-mpi_return_on_error",&flg1,PETSC_NULL);CHKERRQ(ierr);
524   if (flg1) {
525     ierr = MPI_Errhandler_set(comm,MPI_ERRORS_RETURN);CHKERRQ(ierr);
526   }
527   flg1 = PETSC_FALSE;
528   ierr = PetscOptionsGetBool(PETSC_NULL,"-no_signal_handler",&flg1,PETSC_NULL);CHKERRQ(ierr);
529   if (!flg1) {ierr = PetscPushSignalHandler(PetscDefaultSignalHandler,(void*)0);CHKERRQ(ierr);}
530   flg1 = PETSC_FALSE;
531   ierr = PetscOptionsGetBool(PETSC_NULL,"-fp_trap",&flg1,PETSC_NULL);CHKERRQ(ierr);
532   if (flg1) {ierr = PetscSetFPTrap(PETSC_FP_TRAP_ON);CHKERRQ(ierr);}
533 
534   /*
535       Setup debugger information
536   */
537   ierr = PetscSetDefaultDebugger();CHKERRQ(ierr);
538   ierr = PetscOptionsGetString(PETSC_NULL,"-on_error_attach_debugger",string,64,&flg1);CHKERRQ(ierr);
539   if (flg1) {
540     MPI_Errhandler err_handler;
541 
542     ierr = PetscSetDebuggerFromString(string);CHKERRQ(ierr);
543     ierr = MPI_Errhandler_create((MPI_Handler_function*)Petsc_MPI_DebuggerOnError,&err_handler);CHKERRQ(ierr);
544     ierr = MPI_Errhandler_set(comm,err_handler);CHKERRQ(ierr);
545     ierr = PetscPushErrorHandler(PetscAttachDebuggerErrorHandler,0);CHKERRQ(ierr);
546   }
547   ierr = PetscOptionsGetString(PETSC_NULL,"-debug_terminal",string,64,&flg1);CHKERRQ(ierr);
548   if (flg1) { ierr = PetscSetDebugTerminal(string);CHKERRQ(ierr); }
549   ierr = PetscOptionsGetString(PETSC_NULL,"-start_in_debugger",string,64,&flg1);CHKERRQ(ierr);
550   ierr = PetscOptionsGetString(PETSC_NULL,"-stop_for_debugger",string,64,&flg2);CHKERRQ(ierr);
551   if (flg1 || flg2) {
552     PetscMPIInt    size;
553     PetscInt       lsize,*nodes;
554     MPI_Errhandler err_handler;
555     /*
556        we have to make sure that all processors have opened
557        connections to all other processors, otherwise once the
558        debugger has stated it is likely to receive a SIGUSR1
559        and kill the program.
560     */
561     ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRQ(ierr);
562     if (size > 2) {
563       PetscMPIInt dummy = 0;
564       MPI_Status  status;
565       for (i=0; i<size; i++) {
566         if (rank != i) {
567           ierr = MPI_Send(&dummy,1,MPI_INT,i,109,PETSC_COMM_WORLD);CHKERRQ(ierr);
568         }
569       }
570       for (i=0; i<size; i++) {
571         if (rank != i) {
572           ierr = MPI_Recv(&dummy,1,MPI_INT,i,109,PETSC_COMM_WORLD,&status);CHKERRQ(ierr);
573         }
574       }
575     }
576     /* check if this processor node should be in debugger */
577     ierr  = PetscMalloc(size*sizeof(PetscInt),&nodes);CHKERRQ(ierr);
578     lsize = size;
579     ierr  = PetscOptionsGetIntArray(PETSC_NULL,"-debugger_nodes",nodes,&lsize,&flag);CHKERRQ(ierr);
580     if (flag) {
581       for (i=0; i<lsize; i++) {
582         if (nodes[i] == rank) { flag = PETSC_FALSE; break; }
583       }
584     }
585     if (!flag) {
586       ierr = PetscSetDebuggerFromString(string);CHKERRQ(ierr);
587       ierr = PetscPushErrorHandler(PetscAbortErrorHandler,0);CHKERRQ(ierr);
588       if (flg1) {
589         ierr = PetscAttachDebugger();CHKERRQ(ierr);
590       } else {
591         ierr = PetscStopForDebugger();CHKERRQ(ierr);
592       }
593       ierr = MPI_Errhandler_create((MPI_Handler_function*)Petsc_MPI_AbortOnError,&err_handler);CHKERRQ(ierr);
594       ierr = MPI_Errhandler_set(comm,err_handler);CHKERRQ(ierr);
595     }
596     ierr = PetscFree(nodes);CHKERRQ(ierr);
597   }
598 
599   ierr = PetscOptionsGetString(PETSC_NULL,"-on_error_emacs",emacsmachinename,128,&flg1);CHKERRQ(ierr);
600   if (flg1 && !rank) {ierr = PetscPushErrorHandler(PetscEmacsClientErrorHandler,emacsmachinename);CHKERRQ(ierr);}
601 
602 #if defined(PETSC_USE_SOCKET_VIEWER)
603   /*
604     Activates new sockets for zope if needed
605   */
606   ierr = PetscOptionsHasName(PETSC_NULL,"-zope", &flgz);CHKERRQ(ierr);
607   ierr = PetscOptionsHasName(PETSC_NULL,"-nostdout", &flgzout);CHKERRQ(ierr);
608   if (flgz){
609     int  sockfd;
610     char hostname[256];
611     char username[256];
612     int  remoteport = 9999;
613 
614     ierr = PetscOptionsGetString(PETSC_NULL, "-zope", hostname, 256, &flgz);CHKERRQ(ierr);
615     if (!hostname[0]){
616       ierr = PetscGetHostName(hostname,256);CHKERRQ(ierr);
617     }
618     ierr = PetscOpenSocket(hostname, remoteport, &sockfd);CHKERRQ(ierr);
619     ierr = PetscGetUserName(username, 256);CHKERRQ(ierr);
620     PETSC_ZOPEFD = fdopen(sockfd, "w");
621     if (flgzout){
622       PETSC_STDOUT = PETSC_ZOPEFD;
623       fprintf(PETSC_STDOUT, "<<<user>>> %s\n",username);
624       fprintf(PETSC_STDOUT, "<<<start>>>");
625     } else {
626       fprintf(PETSC_ZOPEFD, "<<<user>>> %s\n",username);
627       fprintf(PETSC_ZOPEFD, "<<<start>>>");
628     }
629   }
630 #endif
631 #if defined(PETSC_USE_SERVER)
632   ierr = PetscOptionsHasName(PETSC_NULL,"-server", &flgz);CHKERRQ(ierr);
633   if (flgz){
634     PetscInt port = PETSC_DECIDE;
635     ierr = PetscOptionsGetInt(PETSC_NULL,"-server",&port,PETSC_NULL);CHKERRQ(ierr);
636     ierr = PetscWebServe(PETSC_COMM_WORLD,(int)port);CHKERRQ(ierr);
637   }
638 #endif
639 
640   /*
641         Setup profiling and logging
642   */
643 #if defined (PETSC_USE_INFO)
644   {
645     char logname[PETSC_MAX_PATH_LEN]; logname[0] = 0;
646     ierr = PetscOptionsGetString(PETSC_NULL,"-info",logname,250,&flg1);CHKERRQ(ierr);
647     if (flg1 && logname[0]) {
648       ierr = PetscInfoAllow(PETSC_TRUE,logname);CHKERRQ(ierr);
649     } else if (flg1) {
650       ierr = PetscInfoAllow(PETSC_TRUE,PETSC_NULL);CHKERRQ(ierr);
651     }
652   }
653 #endif
654 #if defined(PETSC_USE_LOG)
655   mname[0] = 0;
656   ierr = PetscOptionsGetString(PETSC_NULL,"-history",mname,PETSC_MAX_PATH_LEN,&flg1);CHKERRQ(ierr);
657   if (flg1) {
658     if (mname[0]) {
659       ierr = PetscOpenHistoryFile(mname,&petsc_history);CHKERRQ(ierr);
660     } else {
661       ierr = PetscOpenHistoryFile(0,&petsc_history);CHKERRQ(ierr);
662     }
663   }
664 #if defined(PETSC_HAVE_MPE)
665   flg1 = PETSC_FALSE;
666   ierr = PetscOptionsHasName(PETSC_NULL,"-log_mpe",&flg1);CHKERRQ(ierr);
667   if (flg1) PetscLogMPEBegin();
668 #endif
669   flg1 = PETSC_FALSE;
670   flg2 = PETSC_FALSE;
671   flg3 = PETSC_FALSE;
672   ierr = PetscOptionsGetBool(PETSC_NULL,"-log_all",&flg1,PETSC_NULL);CHKERRQ(ierr);
673   ierr = PetscOptionsGetBool(PETSC_NULL,"-log",&flg2,PETSC_NULL);CHKERRQ(ierr);
674   ierr = PetscOptionsHasName(PETSC_NULL,"-log_summary",&flg3);CHKERRQ(ierr);
675   ierr = PetscOptionsHasName(PETSC_NULL,"-log_summary_python",&flg4);CHKERRQ(ierr);
676   if (flg1)                      {  ierr = PetscLogAllBegin();CHKERRQ(ierr); }
677   else if (flg2 || flg3 || flg4) {  ierr = PetscLogBegin();CHKERRQ(ierr);}
678 
679   ierr = PetscOptionsGetString(PETSC_NULL,"-log_trace",mname,250,&flg1);CHKERRQ(ierr);
680   if (flg1) {
681     char name[PETSC_MAX_PATH_LEN],fname[PETSC_MAX_PATH_LEN];
682     FILE *file;
683     if (mname[0]) {
684       sprintf(name,"%s.%d",mname,rank);
685       ierr = PetscFixFilename(name,fname);CHKERRQ(ierr);
686       file = fopen(fname,"w");
687       if (!file) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_FILE_OPEN,"Unable to open trace file: %s",fname);
688     } else {
689       file = PETSC_STDOUT;
690     }
691     ierr = PetscLogTraceBegin(file);CHKERRQ(ierr);
692   }
693 #endif
694 
695   /*
696       Setup building of stack frames for all function calls
697   */
698 #if defined(PETSC_USE_DEBUG) && !defined(PETSC_USE_PTHREAD)
699   ierr = PetscStackCreate();CHKERRQ(ierr);
700 #endif
701 
702   ierr = PetscOptionsGetBool(PETSC_NULL,"-options_gui",&PetscOptionsPublish,PETSC_NULL);CHKERRQ(ierr);
703 
704   /*
705        Print basic help message
706   */
707   ierr = PetscOptionsHasName(PETSC_NULL,"-help",&flg1);CHKERRQ(ierr);
708   if (flg1) {
709     ierr = (*PetscHelpPrintf)(comm,"Options for all PETSc programs:\n");CHKERRQ(ierr);
710     ierr = (*PetscHelpPrintf)(comm," -help: prints help method for each option\n");CHKERRQ(ierr);
711     ierr = (*PetscHelpPrintf)(comm," -on_error_abort: cause an abort when an error is detected. Useful \n ");CHKERRQ(ierr);
712     ierr = (*PetscHelpPrintf)(comm,"       only when run in the debugger\n");CHKERRQ(ierr);
713     ierr = (*PetscHelpPrintf)(comm," -on_error_attach_debugger [gdb,dbx,xxgdb,ups,noxterm]\n");CHKERRQ(ierr);
714     ierr = (*PetscHelpPrintf)(comm,"       start the debugger in new xterm\n");CHKERRQ(ierr);
715     ierr = (*PetscHelpPrintf)(comm,"       unless noxterm is given\n");CHKERRQ(ierr);
716     ierr = (*PetscHelpPrintf)(comm," -start_in_debugger [gdb,dbx,xxgdb,ups,noxterm]\n");CHKERRQ(ierr);
717     ierr = (*PetscHelpPrintf)(comm,"       start all processes in the debugger\n");CHKERRQ(ierr);
718     ierr = (*PetscHelpPrintf)(comm," -on_error_emacs <machinename>\n");CHKERRQ(ierr);
719     ierr = (*PetscHelpPrintf)(comm,"    emacs jumps to error file\n");CHKERRQ(ierr);
720     ierr = (*PetscHelpPrintf)(comm," -debugger_nodes [n1,n2,..] Nodes to start in debugger\n");CHKERRQ(ierr);
721     ierr = (*PetscHelpPrintf)(comm," -debugger_pause [m] : delay (in seconds) to attach debugger\n");CHKERRQ(ierr);
722     ierr = (*PetscHelpPrintf)(comm," -stop_for_debugger : prints message on how to attach debugger manually\n");CHKERRQ(ierr);
723     ierr = (*PetscHelpPrintf)(comm,"                      waits the delay for you to attach\n");CHKERRQ(ierr);
724     ierr = (*PetscHelpPrintf)(comm," -display display: Location where graphics and debuggers are displayed\n");CHKERRQ(ierr);
725     ierr = (*PetscHelpPrintf)(comm," -no_signal_handler: do not trap error signals\n");CHKERRQ(ierr);
726     ierr = (*PetscHelpPrintf)(comm," -mpi_return_on_error: MPI returns error code, rather than abort on internal error\n");CHKERRQ(ierr);
727     ierr = (*PetscHelpPrintf)(comm," -fp_trap: stop on floating point exceptions\n");CHKERRQ(ierr);
728     ierr = (*PetscHelpPrintf)(comm,"           note on IBM RS6000 this slows run greatly\n");CHKERRQ(ierr);
729     ierr = (*PetscHelpPrintf)(comm," -malloc_dump <optional filename>: dump list of unfreed memory at conclusion\n");CHKERRQ(ierr);
730     ierr = (*PetscHelpPrintf)(comm," -malloc: use our error checking malloc\n");CHKERRQ(ierr);
731     ierr = (*PetscHelpPrintf)(comm," -malloc no: don't use error checking malloc\n");CHKERRQ(ierr);
732     ierr = (*PetscHelpPrintf)(comm," -malloc_info: prints total memory usage\n");CHKERRQ(ierr);
733     ierr = (*PetscHelpPrintf)(comm," -malloc_log: keeps log of all memory allocations\n");CHKERRQ(ierr);
734     ierr = (*PetscHelpPrintf)(comm," -malloc_debug: enables extended checking for memory corruption\n");CHKERRQ(ierr);
735     ierr = (*PetscHelpPrintf)(comm," -options_table: dump list of options inputted\n");CHKERRQ(ierr);
736     ierr = (*PetscHelpPrintf)(comm," -options_left: dump list of unused options\n");CHKERRQ(ierr);
737     ierr = (*PetscHelpPrintf)(comm," -options_left no: don't dump list of unused options\n");CHKERRQ(ierr);
738     ierr = (*PetscHelpPrintf)(comm," -tmp tmpdir: alternative /tmp directory\n");CHKERRQ(ierr);
739     ierr = (*PetscHelpPrintf)(comm," -shared_tmp: tmp directory is shared by all processors\n");CHKERRQ(ierr);
740     ierr = (*PetscHelpPrintf)(comm," -not_shared_tmp: each processor has separate tmp directory\n");CHKERRQ(ierr);
741     ierr = (*PetscHelpPrintf)(comm," -memory_info: print memory usage at end of run\n");CHKERRQ(ierr);
742 #if defined(PETSC_USE_LOG)
743     ierr = (*PetscHelpPrintf)(comm," -get_total_flops: total flops over all processors\n");CHKERRQ(ierr);
744     ierr = (*PetscHelpPrintf)(comm," -log[_all _summary]: logging objects and events\n");CHKERRQ(ierr);
745     ierr = (*PetscHelpPrintf)(comm," -log_trace [filename]: prints trace of all PETSc calls\n");CHKERRQ(ierr);
746 #if defined(PETSC_HAVE_MPE)
747     ierr = (*PetscHelpPrintf)(comm," -log_mpe: Also create logfile viewable through upshot\n");CHKERRQ(ierr);
748 #endif
749     ierr = (*PetscHelpPrintf)(comm," -info <optional filename>: print informative messages about the calculations\n");CHKERRQ(ierr);
750 #endif
751     ierr = (*PetscHelpPrintf)(comm," -v: prints PETSc version number and release date\n");CHKERRQ(ierr);
752     ierr = (*PetscHelpPrintf)(comm," -options_file <file>: reads options from file\n");CHKERRQ(ierr);
753     ierr = (*PetscHelpPrintf)(comm," -petsc_sleep n: sleeps n seconds before running program\n");CHKERRQ(ierr);
754     ierr = (*PetscHelpPrintf)(comm,"-----------------------------------------------\n");CHKERRQ(ierr);
755   }
756 
757   ierr = PetscOptionsGetReal(PETSC_NULL,"-petsc_sleep",&si,&flg1);CHKERRQ(ierr);
758   if (flg1) {
759     ierr = PetscSleep(si);CHKERRQ(ierr);
760   }
761 
762   ierr = PetscOptionsGetString(PETSC_NULL,"-info_exclude",mname,PETSC_MAX_PATH_LEN,&flg1);CHKERRQ(ierr);
763   ierr = PetscStrstr(mname,"null",&f);CHKERRQ(ierr);
764   if (f) {
765     ierr = PetscInfoDeactivateClass(PETSC_NULL);CHKERRQ(ierr);
766   }
767 
768 #if defined(PETSC_HAVE_CUSP)
769   ierr = PetscOptionsHasName(PETSC_NULL,"-log_summary",&flg3);CHKERRQ(ierr);
770   if (flg3) flg1 = PETSC_TRUE;
771   else flg1 = PETSC_FALSE;
772   ierr = PetscOptionsGetBool(PETSC_NULL,"-cusp_synchronize",&flg1,PETSC_NULL);CHKERRQ(ierr);
773   if (flg1) synchronizeCUSP = PETSC_TRUE;
774 #endif
775 
776   PetscFunctionReturn(0);
777 }
778 
779 /**** 'Tree' Thread Pool Functions ****/
780 void* PetscThreadFunc_Tree(void* arg) {
781   PetscErrorCode iterr;
782   int icorr,ierr;
783   int* pId = (int*)arg;
784   int ThreadId = *pId,Mary = 2,i,SubWorker;
785   PetscBool PeeOn;
786   cpu_set_t mset;
787   //printf("Thread %d In Tree Thread Function\n",ThreadId);
788   icorr = ThreadCoreAffinity[ThreadId];
789   CPU_ZERO(&mset);
790   CPU_SET(icorr,&mset);
791   sched_setaffinity(0,sizeof(cpu_set_t),&mset);
792 
793   if((Mary*ThreadId+1)>(PetscMaxThreads-1)) {
794     PeeOn = PETSC_TRUE;
795   }
796   else {
797     PeeOn = PETSC_FALSE;
798   }
799   if(PeeOn==PETSC_FALSE) {
800     //check your subordinates, wait for them to be ready
801     for(i=1;i<=Mary;i++) {
802       SubWorker = Mary*ThreadId+i;
803       if(SubWorker<PetscMaxThreads) {
804         ierr = pthread_mutex_lock(job_tree.mutexarray[SubWorker]);
805         while(*(job_tree.arrThreadReady[SubWorker])==PETSC_FALSE) {
806           //upon entry, automically releases the lock and blocks
807           //upon return, has the lock
808           ierr = pthread_cond_wait(job_tree.cond1array[SubWorker],job_tree.mutexarray[SubWorker]);
809         }
810         ierr = pthread_mutex_unlock(job_tree.mutexarray[SubWorker]);
811       }
812     }
813     //your subordinates are now ready
814   }
815   ierr = pthread_mutex_lock(job_tree.mutexarray[ThreadId]);
816   //update your ready status
817   *(job_tree.arrThreadReady[ThreadId]) = PETSC_TRUE;
818   if(ThreadId==0) {
819     job_tree.eJobStat = JobCompleted;
820     //signal main
821     ierr = pthread_cond_signal(&main_cond);
822   }
823   else {
824     //tell your boss that you're ready to work
825     ierr = pthread_cond_signal(job_tree.cond1array[ThreadId]);
826   }
827   //the while loop needs to have an exit
828   //the 'main' thread can terminate all the threads by performing a broadcast
829   //and calling FuncFinish
830   while(PetscThreadGo) {
831     //need to check the condition to ensure we don't have to wait
832     //waiting when you don't have to causes problems
833     //also need to check the condition to ensure proper handling of spurious wakeups
834     while(*(job_tree.arrThreadReady[ThreadId])==PETSC_TRUE) {
835         //upon entry, automically releases the lock and blocks
836         //upon return, has the lock
837         ierr = pthread_cond_wait(job_tree.cond2array[ThreadId],job_tree.mutexarray[ThreadId]);
838 	*(job_tree.arrThreadStarted[ThreadId]) = PETSC_TRUE;
839 	*(job_tree.arrThreadReady[ThreadId])   = PETSC_FALSE;
840     }
841     if(ThreadId==0) {
842       job_tree.startJob = PETSC_FALSE;
843       job_tree.eJobStat = ThreadsWorking;
844     }
845     ierr = pthread_mutex_unlock(job_tree.mutexarray[ThreadId]);
846     if(PeeOn==PETSC_FALSE) {
847       //tell your subordinates it's time to get to work
848       for(i=1; i<=Mary; i++) {
849 	SubWorker = Mary*ThreadId+i;
850         if(SubWorker<PetscMaxThreads) {
851           ierr = pthread_cond_signal(job_tree.cond2array[SubWorker]);
852         }
853       }
854     }
855     //do your job
856     if(job_tree.pdata==NULL) {
857       iterr = (PetscErrorCode)(long int)job_tree.pfunc(job_tree.pdata);
858     }
859     else {
860       iterr = (PetscErrorCode)(long int)job_tree.pfunc(job_tree.pdata[ThreadId]);
861     }
862     if(iterr!=0) {
863       ithreaderr = 1;
864     }
865     if(PetscThreadGo) {
866       //reset job, get ready for more
867       if(PeeOn==PETSC_FALSE) {
868         //check your subordinates, waiting for them to be ready
869 	//how do you know for a fact that a given subordinate has actually started?
870 	for(i=1;i<=Mary;i++) {
871 	  SubWorker = Mary*ThreadId+i;
872           if(SubWorker<PetscMaxThreads) {
873             ierr = pthread_mutex_lock(job_tree.mutexarray[SubWorker]);
874             while(*(job_tree.arrThreadReady[SubWorker])==PETSC_FALSE||*(job_tree.arrThreadStarted[SubWorker])==PETSC_FALSE) {
875               //upon entry, automically releases the lock and blocks
876               //upon return, has the lock
877               ierr = pthread_cond_wait(job_tree.cond1array[SubWorker],job_tree.mutexarray[SubWorker]);
878             }
879             ierr = pthread_mutex_unlock(job_tree.mutexarray[SubWorker]);
880           }
881 	}
882         //your subordinates are now ready
883       }
884       ierr = pthread_mutex_lock(job_tree.mutexarray[ThreadId]);
885       *(job_tree.arrThreadReady[ThreadId]) = PETSC_TRUE;
886       if(ThreadId==0) {
887 	job_tree.eJobStat = JobCompleted; //root thread: last thread to complete, guaranteed!
888         //root thread signals 'main'
889         ierr = pthread_cond_signal(&main_cond);
890       }
891       else {
892         //signal your boss before you go to sleep
893         ierr = pthread_cond_signal(job_tree.cond1array[ThreadId]);
894       }
895     }
896   }
897   return NULL;
898 }
899 
900 #undef __FUNCT__
901 #define __FUNCT__ "PetscThreadInitialize_Tree"
902 void* PetscThreadInitialize_Tree(PetscInt N) {
903   PetscInt i,ierr;
904   int status;
905 
906   if(PetscUseThreadPool) {
907     size_t Val1 = (size_t)CACHE_LINE_SIZE;
908     size_t Val2 = (size_t)PetscMaxThreads*CACHE_LINE_SIZE;
909     arrmutex = (char*)memalign(Val1,Val2);
910     arrcond1 = (char*)memalign(Val1,Val2);
911     arrcond2 = (char*)memalign(Val1,Val2);
912     arrstart = (char*)memalign(Val1,Val2);
913     arrready = (char*)memalign(Val1,Val2);
914     job_tree.mutexarray       = (pthread_mutex_t**)malloc(PetscMaxThreads*sizeof(pthread_mutex_t*));
915     job_tree.cond1array       = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*));
916     job_tree.cond2array       = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*));
917     job_tree.arrThreadStarted = (PetscBool**)malloc(PetscMaxThreads*sizeof(PetscBool*));
918     job_tree.arrThreadReady   = (PetscBool**)malloc(PetscMaxThreads*sizeof(PetscBool*));
919     //initialize job structure
920     for(i=0; i<PetscMaxThreads; i++) {
921       job_tree.mutexarray[i]        = (pthread_mutex_t*)(arrmutex+CACHE_LINE_SIZE*i);
922       job_tree.cond1array[i]        = (pthread_cond_t*)(arrcond1+CACHE_LINE_SIZE*i);
923       job_tree.cond2array[i]        = (pthread_cond_t*)(arrcond2+CACHE_LINE_SIZE*i);
924       job_tree.arrThreadStarted[i]  = (PetscBool*)(arrstart+CACHE_LINE_SIZE*i);
925       job_tree.arrThreadReady[i]    = (PetscBool*)(arrready+CACHE_LINE_SIZE*i);
926     }
927     for(i=0; i<PetscMaxThreads; i++) {
928       ierr = pthread_mutex_init(job_tree.mutexarray[i],NULL);
929       ierr = pthread_cond_init(job_tree.cond1array[i],NULL);
930       ierr = pthread_cond_init(job_tree.cond2array[i],NULL);
931       *(job_tree.arrThreadStarted[i])  = PETSC_FALSE;
932       *(job_tree.arrThreadReady[i])    = PETSC_FALSE;
933     }
934     job_tree.pfunc = NULL;
935     job_tree.pdata = (void**)malloc(N*sizeof(void*));
936     job_tree.startJob = PETSC_FALSE;
937     job_tree.eJobStat = JobInitiated;
938     pVal = (int*)malloc(N*sizeof(int));
939     //allocate memory in the heap for the thread structure
940     PetscThreadPoint = (pthread_t*)malloc(N*sizeof(pthread_t));
941     //create threads
942     for(i=0; i<N; i++) {
943       pVal[i] = i;
944       status = pthread_create(&PetscThreadPoint[i],NULL,PetscThreadFunc,&pVal[i]);
945       //error check
946     }
947   }
948   else {
949     //do nothing
950   }
951   return NULL;
952 }
953 
954 #undef __FUNCT__
955 #define __FUNCT__ "PetscThreadFinalize_Tree"
956 PetscErrorCode PetscThreadFinalize_Tree() {
957   int i,ierr;
958   void* jstatus;
959 
960   PetscFunctionBegin;
961 
962   if(PetscUseThreadPool) {
963     MainJob(FuncFinish,NULL,PetscMaxThreads);  //set up job and broadcast work
964     //join the threads
965     for(i=0; i<PetscMaxThreads; i++) {
966       ierr = pthread_join(PetscThreadPoint[i],&jstatus);
967       //do error checking
968     }
969     free(PetscThreadPoint);
970     free(arrmutex);
971     free(arrcond1);
972     free(arrcond2);
973     free(arrstart);
974     free(arrready);
975     free(job_tree.pdata);
976     free(pVal);
977   }
978   else {
979   }
980   PetscFunctionReturn(0);
981 }
982 
983 #undef __FUNCT__
984 #define __FUNCT__ "MainWait_Tree"
985 void MainWait_Tree() {
986   int ierr;
987   ierr = pthread_mutex_lock(job_tree.mutexarray[0]);
988   while(job_tree.eJobStat<JobCompleted||job_tree.startJob==PETSC_TRUE) {
989     ierr = pthread_cond_wait(&main_cond,job_tree.mutexarray[0]);
990   }
991   ierr = pthread_mutex_unlock(job_tree.mutexarray[0]);
992 }
993 
994 #undef __FUNCT__
995 #define __FUNCT__ "MainJob_Tree"
996 PetscErrorCode MainJob_Tree(void* (*pFunc)(void*),void** data,PetscInt n) {
997   int i,ierr;
998   PetscErrorCode ijoberr = 0;
999   if(PetscUseThreadPool) {
1000     MainWait();
1001     job_tree.pfunc = pFunc;
1002     job_tree.pdata = data;
1003     job_tree.startJob = PETSC_TRUE;
1004     for(i=0; i<PetscMaxThreads; i++) {
1005       *(job_tree.arrThreadStarted[i]) = PETSC_FALSE;
1006     }
1007     job_tree.eJobStat = JobInitiated;
1008     ierr = pthread_cond_signal(job_tree.cond2array[0]);
1009     if(pFunc!=FuncFinish) {
1010       MainWait(); //why wait after? guarantees that job gets done before proceeding with result collection (if any)
1011     }
1012   }
1013   else {
1014     pthread_t* apThread = (pthread_t*)malloc(n*sizeof(pthread_t));
1015     PetscThreadRun(MPI_COMM_WORLD,pFunc,n,apThread,data);
1016     PetscThreadStop(MPI_COMM_WORLD,n,apThread); //ensures that all threads are finished with the job
1017     free(apThread);
1018   }
1019   if(ithreaderr) {
1020     ijoberr = ithreaderr;
1021   }
1022   return ijoberr;
1023 }
1024 /****  ****/
1025 
1026 /**** 'Main' Thread Pool Functions ****/
1027 void* PetscThreadFunc_Main(void* arg) {
1028   PetscErrorCode iterr;
1029   int icorr,ierr;
1030   int* pId = (int*)arg;
1031   int ThreadId = *pId;
1032   cpu_set_t mset;
1033   //printf("Thread %d In Main Thread Function\n",ThreadId);
1034   icorr = ThreadCoreAffinity[ThreadId];
1035   CPU_ZERO(&mset);
1036   CPU_SET(icorr,&mset);
1037   sched_setaffinity(0,sizeof(cpu_set_t),&mset);
1038 
1039   ierr = pthread_mutex_lock(job_main.mutexarray[ThreadId]);
1040   //update your ready status
1041   *(job_main.arrThreadReady[ThreadId]) = PETSC_TRUE;
1042   //tell the BOSS that you're ready to work before you go to sleep
1043   ierr = pthread_cond_signal(job_main.cond1array[ThreadId]);
1044 
1045   //the while loop needs to have an exit
1046   //the 'main' thread can terminate all the threads by performing a broadcast
1047   //and calling FuncFinish
1048   while(PetscThreadGo) {
1049     //need to check the condition to ensure we don't have to wait
1050     //waiting when you don't have to causes problems
1051     //also need to check the condition to ensure proper handling of spurious wakeups
1052     while(*(job_main.arrThreadReady[ThreadId])==PETSC_TRUE) {
1053         //upon entry, atomically releases the lock and blocks
1054         //upon return, has the lock
1055         ierr = pthread_cond_wait(job_main.cond2array[ThreadId],job_main.mutexarray[ThreadId]);
1056 	//*(job_main.arrThreadReady[ThreadId])   = PETSC_FALSE;
1057     }
1058     ierr = pthread_mutex_unlock(job_main.mutexarray[ThreadId]);
1059     //do your job
1060     if(job_main.pdata==NULL) {
1061       iterr = (PetscErrorCode)(long int)job_main.pfunc(job_main.pdata);
1062     }
1063     else {
1064       iterr = (PetscErrorCode)(long int)job_main.pfunc(job_main.pdata[ThreadId]);
1065     }
1066     if(iterr!=0) {
1067       ithreaderr = 1;
1068     }
1069     if(PetscThreadGo) {
1070       //reset job, get ready for more
1071       ierr = pthread_mutex_lock(job_main.mutexarray[ThreadId]);
1072       *(job_main.arrThreadReady[ThreadId]) = PETSC_TRUE;
1073       //tell the BOSS that you're ready to work before you go to sleep
1074       ierr = pthread_cond_signal(job_main.cond1array[ThreadId]);
1075     }
1076   }
1077   return NULL;
1078 }
1079 
1080 #undef __FUNCT__
1081 #define __FUNCT__ "PetscThreadInitialize_Main"
1082 void* PetscThreadInitialize_Main(PetscInt N) {
1083   PetscInt i,ierr;
1084   int status;
1085 
1086   if(PetscUseThreadPool) {
1087     size_t Val1 = (size_t)CACHE_LINE_SIZE;
1088     size_t Val2 = (size_t)PetscMaxThreads*CACHE_LINE_SIZE;
1089     arrmutex = (char*)memalign(Val1,Val2);
1090     arrcond1 = (char*)memalign(Val1,Val2);
1091     arrcond2 = (char*)memalign(Val1,Val2);
1092     arrstart = (char*)memalign(Val1,Val2);
1093     arrready = (char*)memalign(Val1,Val2);
1094     job_main.mutexarray       = (pthread_mutex_t**)malloc(PetscMaxThreads*sizeof(pthread_mutex_t*));
1095     job_main.cond1array       = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*));
1096     job_main.cond2array       = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*));
1097     job_main.arrThreadReady   = (PetscBool**)malloc(PetscMaxThreads*sizeof(PetscBool*));
1098     //initialize job structure
1099     for(i=0; i<PetscMaxThreads; i++) {
1100       job_main.mutexarray[i]        = (pthread_mutex_t*)(arrmutex+CACHE_LINE_SIZE*i);
1101       job_main.cond1array[i]        = (pthread_cond_t*)(arrcond1+CACHE_LINE_SIZE*i);
1102       job_main.cond2array[i]        = (pthread_cond_t*)(arrcond2+CACHE_LINE_SIZE*i);
1103       job_main.arrThreadReady[i]    = (PetscBool*)(arrready+CACHE_LINE_SIZE*i);
1104     }
1105     for(i=0; i<PetscMaxThreads; i++) {
1106       ierr = pthread_mutex_init(job_main.mutexarray[i],NULL);
1107       ierr = pthread_cond_init(job_main.cond1array[i],NULL);
1108       ierr = pthread_cond_init(job_main.cond2array[i],NULL);
1109       *(job_main.arrThreadReady[i])    = PETSC_FALSE;
1110     }
1111     job_main.pfunc = NULL;
1112     job_main.pdata = (void**)malloc(N*sizeof(void*));
1113     pVal = (int*)malloc(N*sizeof(int));
1114     //allocate memory in the heap for the thread structure
1115     PetscThreadPoint = (pthread_t*)malloc(N*sizeof(pthread_t));
1116     //create threads
1117     for(i=0; i<N; i++) {
1118       pVal[i] = i;
1119       status = pthread_create(&PetscThreadPoint[i],NULL,PetscThreadFunc,&pVal[i]);
1120       //error check
1121     }
1122   }
1123   else {
1124   }
1125   return NULL;
1126 }
1127 
1128 #undef __FUNCT__
1129 #define __FUNCT__ "PetscThreadFinalize_Main"
1130 PetscErrorCode PetscThreadFinalize_Main() {
1131   int i,ierr;
1132   void* jstatus;
1133 
1134   PetscFunctionBegin;
1135 
1136   if(PetscUseThreadPool) {
1137     MainJob(FuncFinish,NULL,PetscMaxThreads);  //set up job and broadcast work
1138     //join the threads
1139     for(i=0; i<PetscMaxThreads; i++) {
1140       ierr = pthread_join(PetscThreadPoint[i],&jstatus);
1141       //do error checking
1142     }
1143     free(PetscThreadPoint);
1144     free(arrmutex);
1145     free(arrcond1);
1146     free(arrcond2);
1147     free(arrstart);
1148     free(arrready);
1149     free(job_main.pdata);
1150     free(pVal);
1151   }
1152   else {
1153   }
1154   PetscFunctionReturn(0);
1155 }
1156 
1157 #undef __FUNCT__
1158 #define __FUNCT__ "MainWait_Main"
1159 void MainWait_Main() {
1160   int i,ierr;
1161   for(i=0; i<PetscMaxThreads; i++) {
1162     ierr = pthread_mutex_lock(job_main.mutexarray[i]);
1163     while(*(job_main.arrThreadReady[i])==PETSC_FALSE) {
1164       ierr = pthread_cond_wait(job_main.cond1array[i],job_main.mutexarray[i]);
1165     }
1166     ierr = pthread_mutex_unlock(job_main.mutexarray[i]);
1167   }
1168 }
1169 
1170 #undef __FUNCT__
1171 #define __FUNCT__ "MainJob_Main"
1172 PetscErrorCode MainJob_Main(void* (*pFunc)(void*),void** data,PetscInt n) {
1173   int i,ierr;
1174   PetscErrorCode ijoberr = 0;
1175   if(PetscUseThreadPool) {
1176     MainWait(); //you know everyone is waiting to be signalled!
1177     job_main.pfunc = pFunc;
1178     job_main.pdata = data;
1179     for(i=0; i<PetscMaxThreads; i++) {
1180       *(job_main.arrThreadReady[i]) = PETSC_FALSE; //why do this?  suppose you get into MainWait first
1181     }
1182     //tell the threads to go to work
1183     for(i=0; i<PetscMaxThreads; i++) {
1184       ierr = pthread_cond_signal(job_main.cond2array[i]);
1185     }
1186     if(pFunc!=FuncFinish) {
1187       MainWait(); //why wait after? guarantees that job gets done before proceeding with result collection (if any)
1188     }
1189   }
1190   else {
1191     pthread_t* apThread = (pthread_t*)malloc(n*sizeof(pthread_t));
1192     PetscThreadRun(MPI_COMM_WORLD,pFunc,n,apThread,data);
1193     PetscThreadStop(MPI_COMM_WORLD,n,apThread); //ensures that all threads are finished with the job
1194     free(apThread);
1195   }
1196   if(ithreaderr) {
1197     ijoberr = ithreaderr;
1198   }
1199   return ijoberr;
1200 }
1201 /****  ****/
1202 
1203 /**** Chain Thread Functions ****/
1204 void* PetscThreadFunc_Chain(void* arg) {
1205   PetscErrorCode iterr;
1206   int icorr,ierr;
1207   int* pId = (int*)arg;
1208   int ThreadId = *pId;
1209   int SubWorker = ThreadId + 1;
1210   PetscBool PeeOn;
1211   cpu_set_t mset;
1212   //printf("Thread %d In Chain Thread Function\n",ThreadId);
1213   icorr = ThreadCoreAffinity[ThreadId];
1214   CPU_ZERO(&mset);
1215   CPU_SET(icorr,&mset);
1216   sched_setaffinity(0,sizeof(cpu_set_t),&mset);
1217 
1218   if(ThreadId==(PetscMaxThreads-1)) {
1219     PeeOn = PETSC_TRUE;
1220   }
1221   else {
1222     PeeOn = PETSC_FALSE;
1223   }
1224   if(PeeOn==PETSC_FALSE) {
1225     //check your subordinate, wait for him to be ready
1226     ierr = pthread_mutex_lock(job_chain.mutexarray[SubWorker]);
1227     while(*(job_chain.arrThreadReady[SubWorker])==PETSC_FALSE) {
1228       //upon entry, automically releases the lock and blocks
1229       //upon return, has the lock
1230       ierr = pthread_cond_wait(job_chain.cond1array[SubWorker],job_chain.mutexarray[SubWorker]);
1231     }
1232     ierr = pthread_mutex_unlock(job_chain.mutexarray[SubWorker]);
1233     //your subordinate is now ready
1234   }
1235   ierr = pthread_mutex_lock(job_chain.mutexarray[ThreadId]);
1236   //update your ready status
1237   *(job_chain.arrThreadReady[ThreadId]) = PETSC_TRUE;
1238   if(ThreadId==0) {
1239     job_chain.eJobStat = JobCompleted;
1240     //signal main
1241     ierr = pthread_cond_signal(&main_cond);
1242   }
1243   else {
1244     //tell your boss that you're ready to work
1245     ierr = pthread_cond_signal(job_chain.cond1array[ThreadId]);
1246   }
1247   //the while loop needs to have an exit
1248   //the 'main' thread can terminate all the threads by performing a broadcast
1249   //and calling FuncFinish
1250   while(PetscThreadGo) {
1251     //need to check the condition to ensure we don't have to wait
1252     //waiting when you don't have to causes problems
1253     //also need to check the condition to ensure proper handling of spurious wakeups
1254     while(*(job_chain.arrThreadReady[ThreadId])==PETSC_TRUE) {
1255         //upon entry, automically releases the lock and blocks
1256         //upon return, has the lock
1257         ierr = pthread_cond_wait(job_chain.cond2array[ThreadId],job_chain.mutexarray[ThreadId]);
1258 	*(job_chain.arrThreadStarted[ThreadId]) = PETSC_TRUE;
1259 	*(job_chain.arrThreadReady[ThreadId])   = PETSC_FALSE;
1260     }
1261     if(ThreadId==0) {
1262       job_chain.startJob = PETSC_FALSE;
1263       job_chain.eJobStat = ThreadsWorking;
1264     }
1265     ierr = pthread_mutex_unlock(job_chain.mutexarray[ThreadId]);
1266     if(PeeOn==PETSC_FALSE) {
1267       //tell your subworker it's time to get to work
1268       ierr = pthread_cond_signal(job_chain.cond2array[SubWorker]);
1269     }
1270     //do your job
1271     if(job_chain.pdata==NULL) {
1272       iterr = (PetscErrorCode)(long int)job_chain.pfunc(job_chain.pdata);
1273     }
1274     else {
1275       iterr = (PetscErrorCode)(long int)job_chain.pfunc(job_chain.pdata[ThreadId]);
1276     }
1277     if(iterr!=0) {
1278       ithreaderr = 1;
1279     }
1280     if(PetscThreadGo) {
1281       //reset job, get ready for more
1282       if(PeeOn==PETSC_FALSE) {
1283         //check your subordinate, wait for him to be ready
1284 	//how do you know for a fact that your subordinate has actually started?
1285         ierr = pthread_mutex_lock(job_chain.mutexarray[SubWorker]);
1286         while(*(job_chain.arrThreadReady[SubWorker])==PETSC_FALSE||*(job_chain.arrThreadStarted[SubWorker])==PETSC_FALSE) {
1287           //upon entry, automically releases the lock and blocks
1288           //upon return, has the lock
1289           ierr = pthread_cond_wait(job_chain.cond1array[SubWorker],job_chain.mutexarray[SubWorker]);
1290         }
1291         ierr = pthread_mutex_unlock(job_chain.mutexarray[SubWorker]);
1292         //your subordinate is now ready
1293       }
1294       ierr = pthread_mutex_lock(job_chain.mutexarray[ThreadId]);
1295       *(job_chain.arrThreadReady[ThreadId]) = PETSC_TRUE;
1296       if(ThreadId==0) {
1297 	job_chain.eJobStat = JobCompleted; //foreman: last thread to complete, guaranteed!
1298         //root thread (foreman) signals 'main'
1299         ierr = pthread_cond_signal(&main_cond);
1300       }
1301       else {
1302         //signal your boss before you go to sleep
1303         ierr = pthread_cond_signal(job_chain.cond1array[ThreadId]);
1304       }
1305     }
1306   }
1307   return NULL;
1308 }
1309 
1310 #undef __FUNCT__
1311 #define __FUNCT__ "PetscThreadInitialize_Chain"
1312 void* PetscThreadInitialize_Chain(PetscInt N) {
1313   PetscInt i,ierr;
1314   int status;
1315 
1316   if(PetscUseThreadPool) {
1317     size_t Val1 = (size_t)CACHE_LINE_SIZE;
1318     size_t Val2 = (size_t)PetscMaxThreads*CACHE_LINE_SIZE;
1319     arrmutex = (char*)memalign(Val1,Val2);
1320     arrcond1 = (char*)memalign(Val1,Val2);
1321     arrcond2 = (char*)memalign(Val1,Val2);
1322     arrstart = (char*)memalign(Val1,Val2);
1323     arrready = (char*)memalign(Val1,Val2);
1324     job_chain.mutexarray       = (pthread_mutex_t**)malloc(PetscMaxThreads*sizeof(pthread_mutex_t*));
1325     job_chain.cond1array       = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*));
1326     job_chain.cond2array       = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*));
1327     job_chain.arrThreadStarted = (PetscBool**)malloc(PetscMaxThreads*sizeof(PetscBool*));
1328     job_chain.arrThreadReady   = (PetscBool**)malloc(PetscMaxThreads*sizeof(PetscBool*));
1329     //initialize job structure
1330     for(i=0; i<PetscMaxThreads; i++) {
1331       job_chain.mutexarray[i]        = (pthread_mutex_t*)(arrmutex+CACHE_LINE_SIZE*i);
1332       job_chain.cond1array[i]        = (pthread_cond_t*)(arrcond1+CACHE_LINE_SIZE*i);
1333       job_chain.cond2array[i]        = (pthread_cond_t*)(arrcond2+CACHE_LINE_SIZE*i);
1334       job_chain.arrThreadStarted[i]  = (PetscBool*)(arrstart+CACHE_LINE_SIZE*i);
1335       job_chain.arrThreadReady[i]    = (PetscBool*)(arrready+CACHE_LINE_SIZE*i);
1336     }
1337     for(i=0; i<PetscMaxThreads; i++) {
1338       ierr = pthread_mutex_init(job_chain.mutexarray[i],NULL);
1339       ierr = pthread_cond_init(job_chain.cond1array[i],NULL);
1340       ierr = pthread_cond_init(job_chain.cond2array[i],NULL);
1341       *(job_chain.arrThreadStarted[i])  = PETSC_FALSE;
1342       *(job_chain.arrThreadReady[i])    = PETSC_FALSE;
1343     }
1344     job_chain.pfunc = NULL;
1345     job_chain.pdata = (void**)malloc(N*sizeof(void*));
1346     job_chain.startJob = PETSC_FALSE;
1347     job_chain.eJobStat = JobInitiated;
1348     pVal = (int*)malloc(N*sizeof(int));
1349     //allocate memory in the heap for the thread structure
1350     PetscThreadPoint = (pthread_t*)malloc(N*sizeof(pthread_t));
1351     //create threads
1352     for(i=0; i<N; i++) {
1353       pVal[i] = i;
1354       status = pthread_create(&PetscThreadPoint[i],NULL,PetscThreadFunc,&pVal[i]);
1355       //error check
1356     }
1357   }
1358   else {
1359   }
1360   return NULL;
1361 }
1362 
1363 
1364 #undef __FUNCT__
1365 #define __FUNCT__ "PetscThreadFinalize_Chain"
1366 PetscErrorCode PetscThreadFinalize_Chain() {
1367   int i,ierr;
1368   void* jstatus;
1369 
1370   PetscFunctionBegin;
1371 
1372   if(PetscUseThreadPool) {
1373     MainJob(FuncFinish,NULL,PetscMaxThreads);  //set up job and broadcast work
1374     //join the threads
1375     for(i=0; i<PetscMaxThreads; i++) {
1376       ierr = pthread_join(PetscThreadPoint[i],&jstatus);
1377       //do error checking
1378     }
1379     free(PetscThreadPoint);
1380     free(arrmutex);
1381     free(arrcond1);
1382     free(arrcond2);
1383     free(arrstart);
1384     free(arrready);
1385     free(job_chain.pdata);
1386     free(pVal);
1387   }
1388   else {
1389   }
1390   PetscFunctionReturn(0);
1391 }
1392 
1393 #undef __FUNCT__
1394 #define __FUNCT__ "MainWait_Chain"
1395 void MainWait_Chain() {
1396   int ierr;
1397   ierr = pthread_mutex_lock(job_chain.mutexarray[0]);
1398   while(job_chain.eJobStat<JobCompleted||job_chain.startJob==PETSC_TRUE) {
1399     ierr = pthread_cond_wait(&main_cond,job_chain.mutexarray[0]);
1400   }
1401   ierr = pthread_mutex_unlock(job_chain.mutexarray[0]);
1402 }
1403 
1404 #undef __FUNCT__
1405 #define __FUNCT__ "MainJob_Chain"
1406 PetscErrorCode MainJob_Chain(void* (*pFunc)(void*),void** data,PetscInt n) {
1407   int i,ierr;
1408   PetscErrorCode ijoberr = 0;
1409   if(PetscUseThreadPool) {
1410     MainWait();
1411     job_chain.pfunc = pFunc;
1412     job_chain.pdata = data;
1413     job_chain.startJob = PETSC_TRUE;
1414     for(i=0; i<PetscMaxThreads; i++) {
1415       *(job_chain.arrThreadStarted[i]) = PETSC_FALSE;
1416     }
1417     job_chain.eJobStat = JobInitiated;
1418     ierr = pthread_cond_signal(job_chain.cond2array[0]);
1419     if(pFunc!=FuncFinish) {
1420       MainWait(); //why wait after? guarantees that job gets done before proceeding with result collection (if any)
1421     }
1422   }
1423   else {
1424     pthread_t* apThread = (pthread_t*)malloc(n*sizeof(pthread_t));
1425     PetscThreadRun(MPI_COMM_WORLD,pFunc,n,apThread,data);
1426     PetscThreadStop(MPI_COMM_WORLD,n,apThread); //ensures that all threads are finished with the job
1427     free(apThread);
1428   }
1429   if(ithreaderr) {
1430     ijoberr = ithreaderr;
1431   }
1432   return ijoberr;
1433 }
1434 /****  ****/
1435 
1436 /**** True Thread Functions ****/
1437 void* PetscThreadFunc_True(void* arg) {
1438   int icorr,ierr,iVal;
1439   int* pId = (int*)arg;
1440   int ThreadId = *pId;
1441   PetscErrorCode iterr;
1442   cpu_set_t mset;
1443   //printf("Thread %d In True Pool Thread Function\n",ThreadId);
1444   icorr = ThreadCoreAffinity[ThreadId];
1445   CPU_ZERO(&mset);
1446   CPU_SET(icorr,&mset);
1447   sched_setaffinity(0,sizeof(cpu_set_t),&mset);
1448 
1449   ierr = pthread_mutex_lock(&job_true.mutex);
1450   job_true.iNumReadyThreads++;
1451   if(job_true.iNumReadyThreads==PetscMaxThreads) {
1452     ierr = pthread_cond_signal(&main_cond);
1453   }
1454   //the while loop needs to have an exit
1455   //the 'main' thread can terminate all the threads by performing a broadcast
1456   //and calling FuncFinish
1457   while(PetscThreadGo) {
1458     //need to check the condition to ensure we don't have to wait
1459     //waiting when you don't have to causes problems
1460     //also need to wait if another thread sneaks in and messes with the predicate
1461     while(job_true.startJob==PETSC_FALSE&&job_true.iNumJobThreads==0) {
1462       //upon entry, automically releases the lock and blocks
1463       //upon return, has the lock
1464       ierr = pthread_cond_wait(&job_true.cond,&job_true.mutex);
1465     }
1466     job_true.startJob = PETSC_FALSE;
1467     job_true.iNumJobThreads--;
1468     job_true.iNumReadyThreads--;
1469     iVal = PetscMaxThreads-job_true.iNumReadyThreads-1;
1470     pthread_mutex_unlock(&job_true.mutex);
1471     if(job_true.pdata==NULL) {
1472       iterr = (PetscErrorCode)(long int)job_true.pfunc(job_true.pdata);
1473     }
1474     else {
1475       iterr = (PetscErrorCode)(long int)job_true.pfunc(job_true.pdata[iVal]);
1476     }
1477     if(iterr!=0) {
1478       ithreaderr = 1;
1479     }
1480     //the barrier is necessary BECAUSE: look at job_true.iNumReadyThreads
1481     //what happens if a thread finishes before they all start? BAD!
1482     //what happens if a thread finishes before any else start? BAD!
1483     pthread_barrier_wait(job_true.pbarr); //ensures all threads are finished
1484     //reset job
1485     if(PetscThreadGo) {
1486       pthread_mutex_lock(&job_true.mutex);
1487       job_true.iNumReadyThreads++;
1488       if(job_true.iNumReadyThreads==PetscMaxThreads) {
1489 	//signal the 'main' thread that the job is done! (only done once)
1490 	ierr = pthread_cond_signal(&main_cond);
1491       }
1492     }
1493   }
1494   return NULL;
1495 }
1496 
1497 #undef __FUNCT__
1498 #define __FUNCT__ "PetscThreadInitialize_True"
1499 void* PetscThreadInitialize_True(PetscInt N) {
1500   PetscInt i;
1501   int status;
1502 
1503   if(PetscUseThreadPool) {
1504     pVal = (int*)malloc(N*sizeof(int));
1505     //allocate memory in the heap for the thread structure
1506     PetscThreadPoint = (pthread_t*)malloc(N*sizeof(pthread_t));
1507     BarrPoint = (pthread_barrier_t*)malloc((N+1)*sizeof(pthread_barrier_t)); //BarrPoint[0] makes no sense, don't use it!
1508     job_true.pdata = (void**)malloc(N*sizeof(void*));
1509     for(i=0; i<N; i++) {
1510       pVal[i] = i;
1511       status = pthread_create(&PetscThreadPoint[i],NULL,PetscThreadFunc,&pVal[i]);
1512       //error check to ensure proper thread creation
1513       status = pthread_barrier_init(&BarrPoint[i+1],NULL,i+1);
1514       //error check
1515     }
1516   }
1517   else {
1518   }
1519   return NULL;
1520 }
1521 
1522 
1523 #undef __FUNCT__
1524 #define __FUNCT__ "PetscThreadFinalize_True"
1525 PetscErrorCode PetscThreadFinalize_True() {
1526   int i,ierr;
1527   void* jstatus;
1528 
1529   PetscFunctionBegin;
1530 
1531   if(PetscUseThreadPool) {
1532     MainJob(FuncFinish,NULL,PetscMaxThreads);  //set up job and broadcast work
1533     //join the threads
1534     for(i=0; i<PetscMaxThreads; i++) {
1535       ierr = pthread_join(PetscThreadPoint[i],&jstatus);
1536       //do error checking
1537     }
1538     free(BarrPoint);
1539     free(PetscThreadPoint);
1540   }
1541   else {
1542   }
1543   PetscFunctionReturn(0);
1544 }
1545 
1546 #undef __FUNCT__
1547 #define __FUNCT__ "MainWait_True"
1548 void MainWait_True() {
1549   int ierr;
1550   while(job_true.iNumReadyThreads<PetscMaxThreads||job_true.startJob==PETSC_TRUE) {
1551     ierr = pthread_cond_wait(&main_cond,&job_true.mutex);
1552   }
1553   ierr = pthread_mutex_unlock(&job_true.mutex);
1554 }
1555 
1556 #undef __FUNCT__
1557 #define __FUNCT__ "MainJob_True"
1558 PetscErrorCode MainJob_True(void* (*pFunc)(void*),void** data,PetscInt n) {
1559   int ierr;
1560   PetscErrorCode ijoberr = 0;
1561   if(PetscUseThreadPool) {
1562     MainWait();
1563     job_true.pfunc = pFunc;
1564     job_true.pdata = data;
1565     job_true.pbarr = &BarrPoint[n];
1566     job_true.iNumJobThreads = n;
1567     job_true.startJob = PETSC_TRUE;
1568     ierr = pthread_cond_broadcast(&job_true.cond);
1569     if(pFunc!=FuncFinish) {
1570       MainWait(); //why wait after? guarantees that job gets done
1571     }
1572   }
1573   else {
1574     pthread_t* apThread = (pthread_t*)malloc(n*sizeof(pthread_t));
1575     PetscThreadRun(MPI_COMM_WORLD,pFunc,n,apThread,data);
1576     PetscThreadStop(MPI_COMM_WORLD,n,apThread); //ensures that all threads are finished with the job
1577     free(apThread);
1578   }
1579   if(ithreaderr) {
1580     ijoberr = ithreaderr;
1581   }
1582   return ijoberr;
1583 }
1584 /****  ****/
1585 
1586 void* FuncFinish(void* arg) {
1587   PetscThreadGo = PETSC_FALSE;
1588   return(0);
1589 }
1590