1 //new kds file - implements all thread pool versions 2 /* 3 4 This file defines part of the initialization of PETSc 5 6 This file uses regular malloc and free because it cannot know 7 what malloc is being used until it has already processed the input. 8 */ 9 10 #define _GNU_SOURCE 11 #include <sched.h> 12 #include <petscsys.h> /*I "petscsys.h" I*/ 13 #include <pthread.h> 14 #include <sys/sysinfo.h> 15 #include <unistd.h> 16 #if defined(PETSC_HAVE_STDLIB_H) 17 #include <stdlib.h> 18 #endif 19 #if defined(PETSC_HAVE_MALLOC_H) 20 #include <malloc.h> 21 #endif 22 #if defined(PETSC_HAVE_VALGRIND) 23 #include <valgrind/valgrind.h> 24 #endif 25 26 /* ------------------------Nasty global variables -------------------------------*/ 27 /* 28 Indicates if PETSc started up MPI, or it was 29 already started before PETSc was initialized. 30 */ 31 PetscBool PetscBeganMPI = PETSC_FALSE; 32 PetscBool PetscInitializeCalled = PETSC_FALSE; 33 PetscBool PetscFinalizeCalled = PETSC_FALSE; 34 PetscBool PetscUseThreadPool = PETSC_FALSE; 35 PetscBool PetscThreadGo = PETSC_TRUE; 36 PetscMPIInt PetscGlobalRank = -1; 37 PetscMPIInt PetscGlobalSize = -1; 38 PetscMPIInt PetscMaxThreads = 2; 39 pthread_t* PetscThreadPoint; 40 pthread_barrier_t* BarrPoint; //used by 'true' thread pool 41 PetscErrorCode ithreaderr = 0; 42 int* pVal; 43 44 #define CACHE_LINE_SIZE 64 //used by 'chain', 'main','tree' thread pools 45 int* ThreadCoreAffinity; 46 47 typedef enum {JobInitiated,ThreadsWorking,JobCompleted} estat; //used by 'chain','tree' thread pool 48 49 typedef struct { 50 pthread_mutex_t** mutexarray; 51 pthread_cond_t** cond1array; 52 pthread_cond_t** cond2array; 53 void* (*pfunc)(void*); 54 void** pdata; 55 PetscBool startJob; 56 estat eJobStat; 57 PetscBool** arrThreadStarted; 58 PetscBool** arrThreadReady; 59 } sjob_tree; 60 sjob_tree job_tree; 61 typedef struct { 62 pthread_mutex_t** mutexarray; 63 pthread_cond_t** cond1array; 64 pthread_cond_t** cond2array; 65 void* (*pfunc)(void*); 66 void** pdata; 67 PetscBool** arrThreadReady; 68 } sjob_main; 69 sjob_main job_main; 70 typedef struct { 71 pthread_mutex_t** mutexarray; 72 pthread_cond_t** cond1array; 73 pthread_cond_t** cond2array; 74 void* (*pfunc)(void*); 75 void** pdata; 76 PetscBool startJob; 77 estat eJobStat; 78 PetscBool** arrThreadStarted; 79 PetscBool** arrThreadReady; 80 } sjob_chain; 81 sjob_chain job_chain; 82 typedef struct { 83 pthread_mutex_t mutex; 84 pthread_cond_t cond; 85 void* (*pfunc)(void*); 86 void** pdata; 87 pthread_barrier_t* pbarr; 88 int iNumJobThreads; 89 int iNumReadyThreads; 90 PetscBool startJob; 91 } sjob_true; 92 sjob_true job_true = {PTHREAD_MUTEX_INITIALIZER,PTHREAD_COND_INITIALIZER,NULL,NULL,NULL,0,0,PETSC_FALSE}; 93 94 pthread_cond_t main_cond = PTHREAD_COND_INITIALIZER; //used by 'true', 'chain','tree' thread pools 95 char* arrmutex; //used by 'chain','main','tree' thread pools 96 char* arrcond1; //used by 'chain','main','tree' thread pools 97 char* arrcond2; //used by 'chain','main','tree' thread pools 98 char* arrstart; //used by 'chain','main','tree' thread pools 99 char* arrready; //used by 'chain','main','tree' thread pools 100 101 /* Function Pointers */ 102 void* (*PetscThreadFunc)(void*) = NULL; 103 void* (*PetscThreadInitialize)(PetscInt) = NULL; 104 PetscErrorCode (*PetscThreadFinalize)(void) = NULL; 105 void (*MainWait)(void) = NULL; 106 PetscErrorCode (*MainJob)(void* (*pFunc)(void*),void**,PetscInt) = NULL; 107 /**** Tree Functions ****/ 108 void* PetscThreadFunc_Tree(void*); 109 void* PetscThreadInitialize_Tree(PetscInt); 110 PetscErrorCode PetscThreadFinalize_Tree(void); 111 void MainWait_Tree(void); 112 PetscErrorCode MainJob_Tree(void* (*pFunc)(void*),void**,PetscInt); 113 /**** Main Functions ****/ 114 void* PetscThreadFunc_Main(void*); 115 void* PetscThreadInitialize_Main(PetscInt); 116 PetscErrorCode PetscThreadFinalize_Main(void); 117 void MainWait_Main(void); 118 PetscErrorCode MainJob_Main(void* (*pFunc)(void*),void**,PetscInt); 119 /**** Chain Functions ****/ 120 void* PetscThreadFunc_Chain(void*); 121 void* PetscThreadInitialize_Chain(PetscInt); 122 PetscErrorCode PetscThreadFinalize_Chain(void); 123 void MainWait_Chain(void); 124 PetscErrorCode MainJob_Chain(void* (*pFunc)(void*),void**,PetscInt); 125 /**** True Functions ****/ 126 void* PetscThreadFunc_True(void*); 127 void* PetscThreadInitialize_True(PetscInt); 128 PetscErrorCode PetscThreadFinalize_True(void); 129 void MainWait_True(void); 130 PetscErrorCode MainJob_True(void* (*pFunc)(void*),void**,PetscInt); 131 /**** ****/ 132 133 void* FuncFinish(void*); 134 void* PetscThreadRun(MPI_Comm Comm,void* (*pFunc)(void*),int,pthread_t*,void**); 135 void* PetscThreadStop(MPI_Comm Comm,int,pthread_t*); 136 137 #if defined(PETSC_USE_COMPLEX) 138 #if defined(PETSC_COMPLEX_INSTANTIATE) 139 template <> class std::complex<double>; /* instantiate complex template class */ 140 #endif 141 #if !defined(PETSC_HAVE_MPI_C_DOUBLE_COMPLEX) 142 MPI_Datatype MPI_C_DOUBLE_COMPLEX; 143 MPI_Datatype MPI_C_COMPLEX; 144 #endif 145 PetscScalar PETSC_i; 146 #else 147 PetscScalar PETSC_i = 0.0; 148 #endif 149 #if defined(PETSC_USE_REAL___FLOAT128) 150 MPI_Datatype MPIU___FLOAT128 = 0; 151 #endif 152 MPI_Datatype MPIU_2SCALAR = 0; 153 MPI_Datatype MPIU_2INT = 0; 154 155 /* 156 These are needed by petscbt.h 157 */ 158 #include <petscbt.h> 159 char _BT_mask = ' '; 160 char _BT_c = ' '; 161 PetscInt _BT_idx = 0; 162 163 /* 164 Function that is called to display all error messages 165 */ 166 PetscErrorCode (*PetscErrorPrintf)(const char [],...) = PetscErrorPrintfDefault; 167 PetscErrorCode (*PetscHelpPrintf)(MPI_Comm,const char [],...) = PetscHelpPrintfDefault; 168 #if defined(PETSC_HAVE_MATLAB_ENGINE) 169 PetscErrorCode (*PetscVFPrintf)(FILE*,const char[],va_list) = PetscVFPrintf_Matlab; 170 #else 171 PetscErrorCode (*PetscVFPrintf)(FILE*,const char[],va_list) = PetscVFPrintfDefault; 172 #endif 173 /* 174 This is needed to turn on/off cusp synchronization */ 175 PetscBool synchronizeCUSP = PETSC_FALSE; 176 177 /* ------------------------------------------------------------------------------*/ 178 /* 179 Optional file where all PETSc output from various prints is saved 180 */ 181 FILE *petsc_history = PETSC_NULL; 182 183 #undef __FUNCT__ 184 #define __FUNCT__ "PetscOpenHistoryFile" 185 PetscErrorCode PetscOpenHistoryFile(const char filename[],FILE **fd) 186 { 187 PetscErrorCode ierr; 188 PetscMPIInt rank,size; 189 char pfile[PETSC_MAX_PATH_LEN],pname[PETSC_MAX_PATH_LEN],fname[PETSC_MAX_PATH_LEN],date[64]; 190 char version[256]; 191 192 PetscFunctionBegin; 193 ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr); 194 if (!rank) { 195 char arch[10]; 196 int err; 197 PetscViewer viewer; 198 199 ierr = PetscGetArchType(arch,10);CHKERRQ(ierr); 200 ierr = PetscGetDate(date,64);CHKERRQ(ierr); 201 ierr = PetscGetVersion(version,256);CHKERRQ(ierr); 202 ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRQ(ierr); 203 if (filename) { 204 ierr = PetscFixFilename(filename,fname);CHKERRQ(ierr); 205 } else { 206 ierr = PetscGetHomeDirectory(pfile,240);CHKERRQ(ierr); 207 ierr = PetscStrcat(pfile,"/.petschistory");CHKERRQ(ierr); 208 ierr = PetscFixFilename(pfile,fname);CHKERRQ(ierr); 209 } 210 211 *fd = fopen(fname,"a"); if (!fd) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_FILE_OPEN,"Cannot open file: %s",fname); 212 ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"---------------------------------------------------------\n");CHKERRQ(ierr); 213 ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"%s %s\n",version,date);CHKERRQ(ierr); 214 ierr = PetscGetProgramName(pname,PETSC_MAX_PATH_LEN);CHKERRQ(ierr); 215 ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"%s on a %s, %d proc. with options:\n",pname,arch,size);CHKERRQ(ierr); 216 ierr = PetscViewerASCIIOpenWithFILE(PETSC_COMM_WORLD,*fd,&viewer);CHKERRQ(ierr); 217 ierr = PetscOptionsView(viewer);CHKERRQ(ierr); 218 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 219 ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"---------------------------------------------------------\n");CHKERRQ(ierr); 220 err = fflush(*fd); 221 if (err) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SYS,"fflush() failed on file"); 222 } 223 PetscFunctionReturn(0); 224 } 225 226 #undef __FUNCT__ 227 #define __FUNCT__ "PetscCloseHistoryFile" 228 PetscErrorCode PetscCloseHistoryFile(FILE **fd) 229 { 230 PetscErrorCode ierr; 231 PetscMPIInt rank; 232 char date[64]; 233 int err; 234 235 PetscFunctionBegin; 236 ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr); 237 if (!rank) { 238 ierr = PetscGetDate(date,64);CHKERRQ(ierr); 239 ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"---------------------------------------------------------\n");CHKERRQ(ierr); 240 ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"Finished at %s\n",date);CHKERRQ(ierr); 241 ierr = PetscFPrintf(PETSC_COMM_SELF,*fd,"---------------------------------------------------------\n");CHKERRQ(ierr); 242 err = fflush(*fd); 243 if (err) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SYS,"fflush() failed on file"); 244 err = fclose(*fd); 245 if (err) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SYS,"fclose() failed on file"); 246 } 247 PetscFunctionReturn(0); 248 } 249 250 /* ------------------------------------------------------------------------------*/ 251 252 /* 253 This is ugly and probably belongs somewhere else, but I want to 254 be able to put a true MPI abort error handler with command line args. 255 256 This is so MPI errors in the debugger will leave all the stack 257 frames. The default MP_Abort() cleans up and exits thus providing no useful information 258 in the debugger hence we call abort() instead of MPI_Abort(). 259 */ 260 261 #undef __FUNCT__ 262 #define __FUNCT__ "Petsc_MPI_AbortOnError" 263 void Petsc_MPI_AbortOnError(MPI_Comm *comm,PetscMPIInt *flag) 264 { 265 PetscFunctionBegin; 266 (*PetscErrorPrintf)("MPI error %d\n",*flag); 267 abort(); 268 } 269 270 #undef __FUNCT__ 271 #define __FUNCT__ "Petsc_MPI_DebuggerOnError" 272 void Petsc_MPI_DebuggerOnError(MPI_Comm *comm,PetscMPIInt *flag) 273 { 274 PetscErrorCode ierr; 275 276 PetscFunctionBegin; 277 (*PetscErrorPrintf)("MPI error %d\n",*flag); 278 ierr = PetscAttachDebugger(); 279 if (ierr) { /* hopeless so get out */ 280 MPI_Abort(*comm,*flag); 281 } 282 } 283 284 #undef __FUNCT__ 285 #define __FUNCT__ "PetscEnd" 286 /*@C 287 PetscEnd - Calls PetscFinalize() and then ends the program. This is useful if one 288 wishes a clean exit somewhere deep in the program. 289 290 Collective on PETSC_COMM_WORLD 291 292 Options Database Keys are the same as for PetscFinalize() 293 294 Level: advanced 295 296 Note: 297 See PetscInitialize() for more general runtime options. 298 299 .seealso: PetscInitialize(), PetscOptionsView(), PetscMallocDump(), PetscMPIDump(), PetscFinalize() 300 @*/ 301 PetscErrorCode PetscEnd(void) 302 { 303 PetscFunctionBegin; 304 PetscFinalize(); 305 exit(0); 306 return 0; 307 } 308 309 PetscBool PetscOptionsPublish = PETSC_FALSE; 310 extern PetscErrorCode PetscSetUseTrMalloc_Private(void); 311 extern PetscBool petscsetmallocvisited; 312 static char emacsmachinename[256]; 313 314 PetscErrorCode (*PetscExternalVersionFunction)(MPI_Comm) = 0; 315 PetscErrorCode (*PetscExternalHelpFunction)(MPI_Comm) = 0; 316 317 #undef __FUNCT__ 318 #define __FUNCT__ "PetscSetHelpVersionFunctions" 319 /*@C 320 PetscSetHelpVersionFunctions - Sets functions that print help and version information 321 before the PETSc help and version information is printed. Must call BEFORE PetscInitialize(). 322 This routine enables a "higher-level" package that uses PETSc to print its messages first. 323 324 Input Parameter: 325 + help - the help function (may be PETSC_NULL) 326 - version - the version function (may be PETSC_NULL) 327 328 Level: developer 329 330 Concepts: package help message 331 332 @*/ 333 PetscErrorCode PetscSetHelpVersionFunctions(PetscErrorCode (*help)(MPI_Comm),PetscErrorCode (*version)(MPI_Comm)) 334 { 335 PetscFunctionBegin; 336 PetscExternalHelpFunction = help; 337 PetscExternalVersionFunction = version; 338 PetscFunctionReturn(0); 339 } 340 341 #undef __FUNCT__ 342 #define __FUNCT__ "PetscOptionsCheckInitial_Private" 343 PetscErrorCode PetscOptionsCheckInitial_Private(void) 344 { 345 char string[64],mname[PETSC_MAX_PATH_LEN],*f; 346 MPI_Comm comm = PETSC_COMM_WORLD; 347 PetscBool flg1 = PETSC_FALSE,flg2 = PETSC_FALSE,flg3 = PETSC_FALSE,flg4 = PETSC_FALSE,flag,flgz,flgzout; 348 PetscErrorCode ierr; 349 PetscReal si; 350 int i; 351 PetscMPIInt rank; 352 char version[256]; 353 354 PetscFunctionBegin; 355 ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr); 356 357 /* 358 Setup the memory management; support for tracing malloc() usage 359 */ 360 ierr = PetscOptionsHasName(PETSC_NULL,"-malloc_log",&flg3);CHKERRQ(ierr); 361 #if defined(PETSC_USE_DEBUG) && !defined(PETSC_USE_PTHREAD) 362 ierr = PetscOptionsGetBool(PETSC_NULL,"-malloc",&flg1,&flg2);CHKERRQ(ierr); 363 if ((!flg2 || flg1) && !petscsetmallocvisited) { 364 #if defined(PETSC_HAVE_VALGRIND) 365 if (flg2 || !(RUNNING_ON_VALGRIND)) { 366 /* turn off default -malloc if valgrind is being used */ 367 #endif 368 ierr = PetscSetUseTrMalloc_Private();CHKERRQ(ierr); 369 #if defined(PETSC_HAVE_VALGRIND) 370 } 371 #endif 372 } 373 #else 374 ierr = PetscOptionsGetBool(PETSC_NULL,"-malloc_dump",&flg1,PETSC_NULL);CHKERRQ(ierr); 375 ierr = PetscOptionsGetBool(PETSC_NULL,"-malloc",&flg2,PETSC_NULL);CHKERRQ(ierr); 376 if (flg1 || flg2 || flg3) {ierr = PetscSetUseTrMalloc_Private();CHKERRQ(ierr);} 377 #endif 378 if (flg3) { 379 ierr = PetscMallocSetDumpLog();CHKERRQ(ierr); 380 } 381 flg1 = PETSC_FALSE; 382 ierr = PetscOptionsGetBool(PETSC_NULL,"-malloc_debug",&flg1,PETSC_NULL);CHKERRQ(ierr); 383 if (flg1) { 384 ierr = PetscSetUseTrMalloc_Private();CHKERRQ(ierr); 385 ierr = PetscMallocDebug(PETSC_TRUE);CHKERRQ(ierr); 386 } 387 388 flg1 = PETSC_FALSE; 389 ierr = PetscOptionsGetBool(PETSC_NULL,"-malloc_info",&flg1,PETSC_NULL);CHKERRQ(ierr); 390 if (!flg1) { 391 flg1 = PETSC_FALSE; 392 ierr = PetscOptionsGetBool(PETSC_NULL,"-memory_info",&flg1,PETSC_NULL);CHKERRQ(ierr); 393 } 394 if (flg1) { 395 ierr = PetscMemorySetGetMaximumUsage();CHKERRQ(ierr); 396 } 397 398 /* 399 Set the display variable for graphics 400 */ 401 ierr = PetscSetDisplay();CHKERRQ(ierr); 402 403 /* 404 Determine whether user specified maximum number of threads 405 */ 406 ierr = PetscOptionsHasName(PETSC_NULL,"-thread_max",&flg1);CHKERRQ(ierr); 407 if(flg1) { 408 ierr = PetscOptionsGetInt(PETSC_NULL,"-thread_max",&PetscMaxThreads,PETSC_NULL);CHKERRQ(ierr); 409 } 410 411 /* 412 Determine whether to use thread pool 413 */ 414 ierr = PetscOptionsHasName(PETSC_NULL,"-use_thread_pool",&flg1);CHKERRQ(ierr); 415 if(flg1) { 416 PetscUseThreadPool = PETSC_TRUE; 417 PetscInt N_CORES = get_nprocs(); 418 ThreadCoreAffinity = (int*)malloc(N_CORES*sizeof(int)); 419 char tstr[9]; 420 char tbuf[2]; 421 strcpy(tstr,"-thread"); 422 for(i=0;i<PetscMaxThreads;i++) { 423 ThreadCoreAffinity[i] = i; //default 424 sprintf(tbuf,"%d",i); 425 strcat(tstr,tbuf); 426 ierr = PetscOptionsHasName(PETSC_NULL,tstr,&flg1);CHKERRQ(ierr); 427 if(flg1) { 428 ierr = PetscOptionsGetInt(PETSC_NULL,tstr,&ThreadCoreAffinity[i],PETSC_NULL);CHKERRQ(ierr); 429 ThreadCoreAffinity[i] = ThreadCoreAffinity[i]%N_CORES; //check on the user 430 } 431 tstr[7] = '\0'; 432 } 433 //get the thread pool type 434 PetscInt ipool = 0; 435 ierr = PetscOptionsGetInt(PETSC_NULL,"-pool",&ipool,PETSC_NULL);CHKERRQ(ierr); 436 switch(ipool) { 437 case 1: 438 PetscThreadFunc = &PetscThreadFunc_Tree; 439 PetscThreadInitialize = &PetscThreadInitialize_Tree; 440 PetscThreadFinalize = &PetscThreadFinalize_Tree; 441 MainWait = &MainWait_Tree; 442 MainJob = &MainJob_Tree; 443 break; 444 case 2: 445 PetscThreadFunc = &PetscThreadFunc_Main; 446 PetscThreadInitialize = &PetscThreadInitialize_Main; 447 PetscThreadFinalize = &PetscThreadFinalize_Main; 448 MainWait = &MainWait_Main; 449 MainJob = &MainJob_Main; 450 break; 451 case 3: 452 PetscThreadFunc = &PetscThreadFunc_Chain; 453 PetscThreadInitialize = &PetscThreadInitialize_Chain; 454 PetscThreadFinalize = &PetscThreadFinalize_Chain; 455 MainWait = &MainWait_Chain; 456 MainJob = &MainJob_Chain; 457 break; 458 default: 459 PetscThreadFunc = &PetscThreadFunc_True; 460 PetscThreadInitialize = &PetscThreadInitialize_True; 461 PetscThreadFinalize = &PetscThreadFinalize_True; 462 MainWait = &MainWait_True; 463 MainJob = &MainJob_True; 464 break; 465 } 466 } 467 else { 468 //need to define these in the case on 'no threads' or 'thread create/destroy' 469 //could take any of the above versions 470 PetscThreadInitialize = &PetscThreadInitialize_True; 471 PetscThreadFinalize = &PetscThreadFinalize_True; 472 MainWait = &MainWait_True; 473 MainJob = &MainJob_True; 474 } 475 PetscThreadInitialize(PetscMaxThreads); 476 /* 477 Print the PETSc version information 478 */ 479 ierr = PetscOptionsHasName(PETSC_NULL,"-v",&flg1);CHKERRQ(ierr); 480 ierr = PetscOptionsHasName(PETSC_NULL,"-version",&flg2);CHKERRQ(ierr); 481 ierr = PetscOptionsHasName(PETSC_NULL,"-help",&flg3);CHKERRQ(ierr); 482 if (flg1 || flg2 || flg3){ 483 484 /* 485 Print "higher-level" package version message 486 */ 487 if (PetscExternalVersionFunction) { 488 ierr = (*PetscExternalVersionFunction)(comm);CHKERRQ(ierr); 489 } 490 491 ierr = PetscGetVersion(version,256);CHKERRQ(ierr); 492 ierr = (*PetscHelpPrintf)(comm,"--------------------------------------------\ 493 ------------------------------\n");CHKERRQ(ierr); 494 ierr = (*PetscHelpPrintf)(comm,"%s\n",version);CHKERRQ(ierr); 495 ierr = (*PetscHelpPrintf)(comm,"%s",PETSC_AUTHOR_INFO);CHKERRQ(ierr); 496 ierr = (*PetscHelpPrintf)(comm,"See docs/changes/index.html for recent updates.\n");CHKERRQ(ierr); 497 ierr = (*PetscHelpPrintf)(comm,"See docs/faq.html for problems.\n");CHKERRQ(ierr); 498 ierr = (*PetscHelpPrintf)(comm,"See docs/manualpages/index.html for help. \n");CHKERRQ(ierr); 499 ierr = (*PetscHelpPrintf)(comm,"Libraries linked from %s\n",PETSC_LIB_DIR);CHKERRQ(ierr); 500 ierr = (*PetscHelpPrintf)(comm,"--------------------------------------------\ 501 ------------------------------\n");CHKERRQ(ierr); 502 } 503 504 /* 505 Print "higher-level" package help message 506 */ 507 if (flg3){ 508 if (PetscExternalHelpFunction) { 509 ierr = (*PetscExternalHelpFunction)(comm);CHKERRQ(ierr); 510 } 511 } 512 513 /* 514 Setup the error handling 515 */ 516 flg1 = PETSC_FALSE; 517 ierr = PetscOptionsGetBool(PETSC_NULL,"-on_error_abort",&flg1,PETSC_NULL);CHKERRQ(ierr); 518 if (flg1) { ierr = PetscPushErrorHandler(PetscAbortErrorHandler,0);CHKERRQ(ierr);} 519 flg1 = PETSC_FALSE; 520 ierr = PetscOptionsGetBool(PETSC_NULL,"-on_error_mpiabort",&flg1,PETSC_NULL);CHKERRQ(ierr); 521 if (flg1) { ierr = PetscPushErrorHandler(PetscMPIAbortErrorHandler,0);CHKERRQ(ierr);} 522 flg1 = PETSC_FALSE; 523 ierr = PetscOptionsGetBool(PETSC_NULL,"-mpi_return_on_error",&flg1,PETSC_NULL);CHKERRQ(ierr); 524 if (flg1) { 525 ierr = MPI_Errhandler_set(comm,MPI_ERRORS_RETURN);CHKERRQ(ierr); 526 } 527 flg1 = PETSC_FALSE; 528 ierr = PetscOptionsGetBool(PETSC_NULL,"-no_signal_handler",&flg1,PETSC_NULL);CHKERRQ(ierr); 529 if (!flg1) {ierr = PetscPushSignalHandler(PetscDefaultSignalHandler,(void*)0);CHKERRQ(ierr);} 530 flg1 = PETSC_FALSE; 531 ierr = PetscOptionsGetBool(PETSC_NULL,"-fp_trap",&flg1,PETSC_NULL);CHKERRQ(ierr); 532 if (flg1) {ierr = PetscSetFPTrap(PETSC_FP_TRAP_ON);CHKERRQ(ierr);} 533 534 /* 535 Setup debugger information 536 */ 537 ierr = PetscSetDefaultDebugger();CHKERRQ(ierr); 538 ierr = PetscOptionsGetString(PETSC_NULL,"-on_error_attach_debugger",string,64,&flg1);CHKERRQ(ierr); 539 if (flg1) { 540 MPI_Errhandler err_handler; 541 542 ierr = PetscSetDebuggerFromString(string);CHKERRQ(ierr); 543 ierr = MPI_Errhandler_create((MPI_Handler_function*)Petsc_MPI_DebuggerOnError,&err_handler);CHKERRQ(ierr); 544 ierr = MPI_Errhandler_set(comm,err_handler);CHKERRQ(ierr); 545 ierr = PetscPushErrorHandler(PetscAttachDebuggerErrorHandler,0);CHKERRQ(ierr); 546 } 547 ierr = PetscOptionsGetString(PETSC_NULL,"-debug_terminal",string,64,&flg1);CHKERRQ(ierr); 548 if (flg1) { ierr = PetscSetDebugTerminal(string);CHKERRQ(ierr); } 549 ierr = PetscOptionsGetString(PETSC_NULL,"-start_in_debugger",string,64,&flg1);CHKERRQ(ierr); 550 ierr = PetscOptionsGetString(PETSC_NULL,"-stop_for_debugger",string,64,&flg2);CHKERRQ(ierr); 551 if (flg1 || flg2) { 552 PetscMPIInt size; 553 PetscInt lsize,*nodes; 554 MPI_Errhandler err_handler; 555 /* 556 we have to make sure that all processors have opened 557 connections to all other processors, otherwise once the 558 debugger has stated it is likely to receive a SIGUSR1 559 and kill the program. 560 */ 561 ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRQ(ierr); 562 if (size > 2) { 563 PetscMPIInt dummy = 0; 564 MPI_Status status; 565 for (i=0; i<size; i++) { 566 if (rank != i) { 567 ierr = MPI_Send(&dummy,1,MPI_INT,i,109,PETSC_COMM_WORLD);CHKERRQ(ierr); 568 } 569 } 570 for (i=0; i<size; i++) { 571 if (rank != i) { 572 ierr = MPI_Recv(&dummy,1,MPI_INT,i,109,PETSC_COMM_WORLD,&status);CHKERRQ(ierr); 573 } 574 } 575 } 576 /* check if this processor node should be in debugger */ 577 ierr = PetscMalloc(size*sizeof(PetscInt),&nodes);CHKERRQ(ierr); 578 lsize = size; 579 ierr = PetscOptionsGetIntArray(PETSC_NULL,"-debugger_nodes",nodes,&lsize,&flag);CHKERRQ(ierr); 580 if (flag) { 581 for (i=0; i<lsize; i++) { 582 if (nodes[i] == rank) { flag = PETSC_FALSE; break; } 583 } 584 } 585 if (!flag) { 586 ierr = PetscSetDebuggerFromString(string);CHKERRQ(ierr); 587 ierr = PetscPushErrorHandler(PetscAbortErrorHandler,0);CHKERRQ(ierr); 588 if (flg1) { 589 ierr = PetscAttachDebugger();CHKERRQ(ierr); 590 } else { 591 ierr = PetscStopForDebugger();CHKERRQ(ierr); 592 } 593 ierr = MPI_Errhandler_create((MPI_Handler_function*)Petsc_MPI_AbortOnError,&err_handler);CHKERRQ(ierr); 594 ierr = MPI_Errhandler_set(comm,err_handler);CHKERRQ(ierr); 595 } 596 ierr = PetscFree(nodes);CHKERRQ(ierr); 597 } 598 599 ierr = PetscOptionsGetString(PETSC_NULL,"-on_error_emacs",emacsmachinename,128,&flg1);CHKERRQ(ierr); 600 if (flg1 && !rank) {ierr = PetscPushErrorHandler(PetscEmacsClientErrorHandler,emacsmachinename);CHKERRQ(ierr);} 601 602 #if defined(PETSC_USE_SOCKET_VIEWER) 603 /* 604 Activates new sockets for zope if needed 605 */ 606 ierr = PetscOptionsHasName(PETSC_NULL,"-zope", &flgz);CHKERRQ(ierr); 607 ierr = PetscOptionsHasName(PETSC_NULL,"-nostdout", &flgzout);CHKERRQ(ierr); 608 if (flgz){ 609 int sockfd; 610 char hostname[256]; 611 char username[256]; 612 int remoteport = 9999; 613 614 ierr = PetscOptionsGetString(PETSC_NULL, "-zope", hostname, 256, &flgz);CHKERRQ(ierr); 615 if (!hostname[0]){ 616 ierr = PetscGetHostName(hostname,256);CHKERRQ(ierr); 617 } 618 ierr = PetscOpenSocket(hostname, remoteport, &sockfd);CHKERRQ(ierr); 619 ierr = PetscGetUserName(username, 256);CHKERRQ(ierr); 620 PETSC_ZOPEFD = fdopen(sockfd, "w"); 621 if (flgzout){ 622 PETSC_STDOUT = PETSC_ZOPEFD; 623 fprintf(PETSC_STDOUT, "<<<user>>> %s\n",username); 624 fprintf(PETSC_STDOUT, "<<<start>>>"); 625 } else { 626 fprintf(PETSC_ZOPEFD, "<<<user>>> %s\n",username); 627 fprintf(PETSC_ZOPEFD, "<<<start>>>"); 628 } 629 } 630 #endif 631 #if defined(PETSC_USE_SERVER) 632 ierr = PetscOptionsHasName(PETSC_NULL,"-server", &flgz);CHKERRQ(ierr); 633 if (flgz){ 634 PetscInt port = PETSC_DECIDE; 635 ierr = PetscOptionsGetInt(PETSC_NULL,"-server",&port,PETSC_NULL);CHKERRQ(ierr); 636 ierr = PetscWebServe(PETSC_COMM_WORLD,(int)port);CHKERRQ(ierr); 637 } 638 #endif 639 640 /* 641 Setup profiling and logging 642 */ 643 #if defined (PETSC_USE_INFO) 644 { 645 char logname[PETSC_MAX_PATH_LEN]; logname[0] = 0; 646 ierr = PetscOptionsGetString(PETSC_NULL,"-info",logname,250,&flg1);CHKERRQ(ierr); 647 if (flg1 && logname[0]) { 648 ierr = PetscInfoAllow(PETSC_TRUE,logname);CHKERRQ(ierr); 649 } else if (flg1) { 650 ierr = PetscInfoAllow(PETSC_TRUE,PETSC_NULL);CHKERRQ(ierr); 651 } 652 } 653 #endif 654 #if defined(PETSC_USE_LOG) 655 mname[0] = 0; 656 ierr = PetscOptionsGetString(PETSC_NULL,"-history",mname,PETSC_MAX_PATH_LEN,&flg1);CHKERRQ(ierr); 657 if (flg1) { 658 if (mname[0]) { 659 ierr = PetscOpenHistoryFile(mname,&petsc_history);CHKERRQ(ierr); 660 } else { 661 ierr = PetscOpenHistoryFile(0,&petsc_history);CHKERRQ(ierr); 662 } 663 } 664 #if defined(PETSC_HAVE_MPE) 665 flg1 = PETSC_FALSE; 666 ierr = PetscOptionsHasName(PETSC_NULL,"-log_mpe",&flg1);CHKERRQ(ierr); 667 if (flg1) PetscLogMPEBegin(); 668 #endif 669 flg1 = PETSC_FALSE; 670 flg2 = PETSC_FALSE; 671 flg3 = PETSC_FALSE; 672 ierr = PetscOptionsGetBool(PETSC_NULL,"-log_all",&flg1,PETSC_NULL);CHKERRQ(ierr); 673 ierr = PetscOptionsGetBool(PETSC_NULL,"-log",&flg2,PETSC_NULL);CHKERRQ(ierr); 674 ierr = PetscOptionsHasName(PETSC_NULL,"-log_summary",&flg3);CHKERRQ(ierr); 675 ierr = PetscOptionsHasName(PETSC_NULL,"-log_summary_python",&flg4);CHKERRQ(ierr); 676 if (flg1) { ierr = PetscLogAllBegin();CHKERRQ(ierr); } 677 else if (flg2 || flg3 || flg4) { ierr = PetscLogBegin();CHKERRQ(ierr);} 678 679 ierr = PetscOptionsGetString(PETSC_NULL,"-log_trace",mname,250,&flg1);CHKERRQ(ierr); 680 if (flg1) { 681 char name[PETSC_MAX_PATH_LEN],fname[PETSC_MAX_PATH_LEN]; 682 FILE *file; 683 if (mname[0]) { 684 sprintf(name,"%s.%d",mname,rank); 685 ierr = PetscFixFilename(name,fname);CHKERRQ(ierr); 686 file = fopen(fname,"w"); 687 if (!file) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_FILE_OPEN,"Unable to open trace file: %s",fname); 688 } else { 689 file = PETSC_STDOUT; 690 } 691 ierr = PetscLogTraceBegin(file);CHKERRQ(ierr); 692 } 693 #endif 694 695 /* 696 Setup building of stack frames for all function calls 697 */ 698 #if defined(PETSC_USE_DEBUG) && !defined(PETSC_USE_PTHREAD) 699 ierr = PetscStackCreate();CHKERRQ(ierr); 700 #endif 701 702 ierr = PetscOptionsGetBool(PETSC_NULL,"-options_gui",&PetscOptionsPublish,PETSC_NULL);CHKERRQ(ierr); 703 704 /* 705 Print basic help message 706 */ 707 ierr = PetscOptionsHasName(PETSC_NULL,"-help",&flg1);CHKERRQ(ierr); 708 if (flg1) { 709 ierr = (*PetscHelpPrintf)(comm,"Options for all PETSc programs:\n");CHKERRQ(ierr); 710 ierr = (*PetscHelpPrintf)(comm," -help: prints help method for each option\n");CHKERRQ(ierr); 711 ierr = (*PetscHelpPrintf)(comm," -on_error_abort: cause an abort when an error is detected. Useful \n ");CHKERRQ(ierr); 712 ierr = (*PetscHelpPrintf)(comm," only when run in the debugger\n");CHKERRQ(ierr); 713 ierr = (*PetscHelpPrintf)(comm," -on_error_attach_debugger [gdb,dbx,xxgdb,ups,noxterm]\n");CHKERRQ(ierr); 714 ierr = (*PetscHelpPrintf)(comm," start the debugger in new xterm\n");CHKERRQ(ierr); 715 ierr = (*PetscHelpPrintf)(comm," unless noxterm is given\n");CHKERRQ(ierr); 716 ierr = (*PetscHelpPrintf)(comm," -start_in_debugger [gdb,dbx,xxgdb,ups,noxterm]\n");CHKERRQ(ierr); 717 ierr = (*PetscHelpPrintf)(comm," start all processes in the debugger\n");CHKERRQ(ierr); 718 ierr = (*PetscHelpPrintf)(comm," -on_error_emacs <machinename>\n");CHKERRQ(ierr); 719 ierr = (*PetscHelpPrintf)(comm," emacs jumps to error file\n");CHKERRQ(ierr); 720 ierr = (*PetscHelpPrintf)(comm," -debugger_nodes [n1,n2,..] Nodes to start in debugger\n");CHKERRQ(ierr); 721 ierr = (*PetscHelpPrintf)(comm," -debugger_pause [m] : delay (in seconds) to attach debugger\n");CHKERRQ(ierr); 722 ierr = (*PetscHelpPrintf)(comm," -stop_for_debugger : prints message on how to attach debugger manually\n");CHKERRQ(ierr); 723 ierr = (*PetscHelpPrintf)(comm," waits the delay for you to attach\n");CHKERRQ(ierr); 724 ierr = (*PetscHelpPrintf)(comm," -display display: Location where graphics and debuggers are displayed\n");CHKERRQ(ierr); 725 ierr = (*PetscHelpPrintf)(comm," -no_signal_handler: do not trap error signals\n");CHKERRQ(ierr); 726 ierr = (*PetscHelpPrintf)(comm," -mpi_return_on_error: MPI returns error code, rather than abort on internal error\n");CHKERRQ(ierr); 727 ierr = (*PetscHelpPrintf)(comm," -fp_trap: stop on floating point exceptions\n");CHKERRQ(ierr); 728 ierr = (*PetscHelpPrintf)(comm," note on IBM RS6000 this slows run greatly\n");CHKERRQ(ierr); 729 ierr = (*PetscHelpPrintf)(comm," -malloc_dump <optional filename>: dump list of unfreed memory at conclusion\n");CHKERRQ(ierr); 730 ierr = (*PetscHelpPrintf)(comm," -malloc: use our error checking malloc\n");CHKERRQ(ierr); 731 ierr = (*PetscHelpPrintf)(comm," -malloc no: don't use error checking malloc\n");CHKERRQ(ierr); 732 ierr = (*PetscHelpPrintf)(comm," -malloc_info: prints total memory usage\n");CHKERRQ(ierr); 733 ierr = (*PetscHelpPrintf)(comm," -malloc_log: keeps log of all memory allocations\n");CHKERRQ(ierr); 734 ierr = (*PetscHelpPrintf)(comm," -malloc_debug: enables extended checking for memory corruption\n");CHKERRQ(ierr); 735 ierr = (*PetscHelpPrintf)(comm," -options_table: dump list of options inputted\n");CHKERRQ(ierr); 736 ierr = (*PetscHelpPrintf)(comm," -options_left: dump list of unused options\n");CHKERRQ(ierr); 737 ierr = (*PetscHelpPrintf)(comm," -options_left no: don't dump list of unused options\n");CHKERRQ(ierr); 738 ierr = (*PetscHelpPrintf)(comm," -tmp tmpdir: alternative /tmp directory\n");CHKERRQ(ierr); 739 ierr = (*PetscHelpPrintf)(comm," -shared_tmp: tmp directory is shared by all processors\n");CHKERRQ(ierr); 740 ierr = (*PetscHelpPrintf)(comm," -not_shared_tmp: each processor has separate tmp directory\n");CHKERRQ(ierr); 741 ierr = (*PetscHelpPrintf)(comm," -memory_info: print memory usage at end of run\n");CHKERRQ(ierr); 742 #if defined(PETSC_USE_LOG) 743 ierr = (*PetscHelpPrintf)(comm," -get_total_flops: total flops over all processors\n");CHKERRQ(ierr); 744 ierr = (*PetscHelpPrintf)(comm," -log[_all _summary]: logging objects and events\n");CHKERRQ(ierr); 745 ierr = (*PetscHelpPrintf)(comm," -log_trace [filename]: prints trace of all PETSc calls\n");CHKERRQ(ierr); 746 #if defined(PETSC_HAVE_MPE) 747 ierr = (*PetscHelpPrintf)(comm," -log_mpe: Also create logfile viewable through upshot\n");CHKERRQ(ierr); 748 #endif 749 ierr = (*PetscHelpPrintf)(comm," -info <optional filename>: print informative messages about the calculations\n");CHKERRQ(ierr); 750 #endif 751 ierr = (*PetscHelpPrintf)(comm," -v: prints PETSc version number and release date\n");CHKERRQ(ierr); 752 ierr = (*PetscHelpPrintf)(comm," -options_file <file>: reads options from file\n");CHKERRQ(ierr); 753 ierr = (*PetscHelpPrintf)(comm," -petsc_sleep n: sleeps n seconds before running program\n");CHKERRQ(ierr); 754 ierr = (*PetscHelpPrintf)(comm,"-----------------------------------------------\n");CHKERRQ(ierr); 755 } 756 757 ierr = PetscOptionsGetReal(PETSC_NULL,"-petsc_sleep",&si,&flg1);CHKERRQ(ierr); 758 if (flg1) { 759 ierr = PetscSleep(si);CHKERRQ(ierr); 760 } 761 762 ierr = PetscOptionsGetString(PETSC_NULL,"-info_exclude",mname,PETSC_MAX_PATH_LEN,&flg1);CHKERRQ(ierr); 763 ierr = PetscStrstr(mname,"null",&f);CHKERRQ(ierr); 764 if (f) { 765 ierr = PetscInfoDeactivateClass(PETSC_NULL);CHKERRQ(ierr); 766 } 767 768 #if defined(PETSC_HAVE_CUSP) 769 ierr = PetscOptionsHasName(PETSC_NULL,"-log_summary",&flg3);CHKERRQ(ierr); 770 if (flg3) flg1 = PETSC_TRUE; 771 else flg1 = PETSC_FALSE; 772 ierr = PetscOptionsGetBool(PETSC_NULL,"-cusp_synchronize",&flg1,PETSC_NULL);CHKERRQ(ierr); 773 if (flg1) synchronizeCUSP = PETSC_TRUE; 774 #endif 775 776 PetscFunctionReturn(0); 777 } 778 779 /**** 'Tree' Thread Pool Functions ****/ 780 void* PetscThreadFunc_Tree(void* arg) { 781 PetscErrorCode iterr; 782 int icorr,ierr; 783 int* pId = (int*)arg; 784 int ThreadId = *pId,Mary = 2,i,SubWorker; 785 PetscBool PeeOn; 786 cpu_set_t mset; 787 //printf("Thread %d In Tree Thread Function\n",ThreadId); 788 icorr = ThreadCoreAffinity[ThreadId]; 789 CPU_ZERO(&mset); 790 CPU_SET(icorr,&mset); 791 sched_setaffinity(0,sizeof(cpu_set_t),&mset); 792 793 if((Mary*ThreadId+1)>(PetscMaxThreads-1)) { 794 PeeOn = PETSC_TRUE; 795 } 796 else { 797 PeeOn = PETSC_FALSE; 798 } 799 if(PeeOn==PETSC_FALSE) { 800 //check your subordinates, wait for them to be ready 801 for(i=1;i<=Mary;i++) { 802 SubWorker = Mary*ThreadId+i; 803 if(SubWorker<PetscMaxThreads) { 804 ierr = pthread_mutex_lock(job_tree.mutexarray[SubWorker]); 805 while(*(job_tree.arrThreadReady[SubWorker])==PETSC_FALSE) { 806 //upon entry, automically releases the lock and blocks 807 //upon return, has the lock 808 ierr = pthread_cond_wait(job_tree.cond1array[SubWorker],job_tree.mutexarray[SubWorker]); 809 } 810 ierr = pthread_mutex_unlock(job_tree.mutexarray[SubWorker]); 811 } 812 } 813 //your subordinates are now ready 814 } 815 ierr = pthread_mutex_lock(job_tree.mutexarray[ThreadId]); 816 //update your ready status 817 *(job_tree.arrThreadReady[ThreadId]) = PETSC_TRUE; 818 if(ThreadId==0) { 819 job_tree.eJobStat = JobCompleted; 820 //signal main 821 ierr = pthread_cond_signal(&main_cond); 822 } 823 else { 824 //tell your boss that you're ready to work 825 ierr = pthread_cond_signal(job_tree.cond1array[ThreadId]); 826 } 827 //the while loop needs to have an exit 828 //the 'main' thread can terminate all the threads by performing a broadcast 829 //and calling FuncFinish 830 while(PetscThreadGo) { 831 //need to check the condition to ensure we don't have to wait 832 //waiting when you don't have to causes problems 833 //also need to check the condition to ensure proper handling of spurious wakeups 834 while(*(job_tree.arrThreadReady[ThreadId])==PETSC_TRUE) { 835 //upon entry, automically releases the lock and blocks 836 //upon return, has the lock 837 ierr = pthread_cond_wait(job_tree.cond2array[ThreadId],job_tree.mutexarray[ThreadId]); 838 *(job_tree.arrThreadStarted[ThreadId]) = PETSC_TRUE; 839 *(job_tree.arrThreadReady[ThreadId]) = PETSC_FALSE; 840 } 841 if(ThreadId==0) { 842 job_tree.startJob = PETSC_FALSE; 843 job_tree.eJobStat = ThreadsWorking; 844 } 845 ierr = pthread_mutex_unlock(job_tree.mutexarray[ThreadId]); 846 if(PeeOn==PETSC_FALSE) { 847 //tell your subordinates it's time to get to work 848 for(i=1; i<=Mary; i++) { 849 SubWorker = Mary*ThreadId+i; 850 if(SubWorker<PetscMaxThreads) { 851 ierr = pthread_cond_signal(job_tree.cond2array[SubWorker]); 852 } 853 } 854 } 855 //do your job 856 if(job_tree.pdata==NULL) { 857 iterr = (PetscErrorCode)(long int)job_tree.pfunc(job_tree.pdata); 858 } 859 else { 860 iterr = (PetscErrorCode)(long int)job_tree.pfunc(job_tree.pdata[ThreadId]); 861 } 862 if(iterr!=0) { 863 ithreaderr = 1; 864 } 865 if(PetscThreadGo) { 866 //reset job, get ready for more 867 if(PeeOn==PETSC_FALSE) { 868 //check your subordinates, waiting for them to be ready 869 //how do you know for a fact that a given subordinate has actually started? 870 for(i=1;i<=Mary;i++) { 871 SubWorker = Mary*ThreadId+i; 872 if(SubWorker<PetscMaxThreads) { 873 ierr = pthread_mutex_lock(job_tree.mutexarray[SubWorker]); 874 while(*(job_tree.arrThreadReady[SubWorker])==PETSC_FALSE||*(job_tree.arrThreadStarted[SubWorker])==PETSC_FALSE) { 875 //upon entry, automically releases the lock and blocks 876 //upon return, has the lock 877 ierr = pthread_cond_wait(job_tree.cond1array[SubWorker],job_tree.mutexarray[SubWorker]); 878 } 879 ierr = pthread_mutex_unlock(job_tree.mutexarray[SubWorker]); 880 } 881 } 882 //your subordinates are now ready 883 } 884 ierr = pthread_mutex_lock(job_tree.mutexarray[ThreadId]); 885 *(job_tree.arrThreadReady[ThreadId]) = PETSC_TRUE; 886 if(ThreadId==0) { 887 job_tree.eJobStat = JobCompleted; //root thread: last thread to complete, guaranteed! 888 //root thread signals 'main' 889 ierr = pthread_cond_signal(&main_cond); 890 } 891 else { 892 //signal your boss before you go to sleep 893 ierr = pthread_cond_signal(job_tree.cond1array[ThreadId]); 894 } 895 } 896 } 897 return NULL; 898 } 899 900 #undef __FUNCT__ 901 #define __FUNCT__ "PetscThreadInitialize_Tree" 902 void* PetscThreadInitialize_Tree(PetscInt N) { 903 PetscInt i,ierr; 904 int status; 905 906 if(PetscUseThreadPool) { 907 size_t Val1 = (size_t)CACHE_LINE_SIZE; 908 size_t Val2 = (size_t)PetscMaxThreads*CACHE_LINE_SIZE; 909 arrmutex = (char*)memalign(Val1,Val2); 910 arrcond1 = (char*)memalign(Val1,Val2); 911 arrcond2 = (char*)memalign(Val1,Val2); 912 arrstart = (char*)memalign(Val1,Val2); 913 arrready = (char*)memalign(Val1,Val2); 914 job_tree.mutexarray = (pthread_mutex_t**)malloc(PetscMaxThreads*sizeof(pthread_mutex_t*)); 915 job_tree.cond1array = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*)); 916 job_tree.cond2array = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*)); 917 job_tree.arrThreadStarted = (PetscBool**)malloc(PetscMaxThreads*sizeof(PetscBool*)); 918 job_tree.arrThreadReady = (PetscBool**)malloc(PetscMaxThreads*sizeof(PetscBool*)); 919 //initialize job structure 920 for(i=0; i<PetscMaxThreads; i++) { 921 job_tree.mutexarray[i] = (pthread_mutex_t*)(arrmutex+CACHE_LINE_SIZE*i); 922 job_tree.cond1array[i] = (pthread_cond_t*)(arrcond1+CACHE_LINE_SIZE*i); 923 job_tree.cond2array[i] = (pthread_cond_t*)(arrcond2+CACHE_LINE_SIZE*i); 924 job_tree.arrThreadStarted[i] = (PetscBool*)(arrstart+CACHE_LINE_SIZE*i); 925 job_tree.arrThreadReady[i] = (PetscBool*)(arrready+CACHE_LINE_SIZE*i); 926 } 927 for(i=0; i<PetscMaxThreads; i++) { 928 ierr = pthread_mutex_init(job_tree.mutexarray[i],NULL); 929 ierr = pthread_cond_init(job_tree.cond1array[i],NULL); 930 ierr = pthread_cond_init(job_tree.cond2array[i],NULL); 931 *(job_tree.arrThreadStarted[i]) = PETSC_FALSE; 932 *(job_tree.arrThreadReady[i]) = PETSC_FALSE; 933 } 934 job_tree.pfunc = NULL; 935 job_tree.pdata = (void**)malloc(N*sizeof(void*)); 936 job_tree.startJob = PETSC_FALSE; 937 job_tree.eJobStat = JobInitiated; 938 pVal = (int*)malloc(N*sizeof(int)); 939 //allocate memory in the heap for the thread structure 940 PetscThreadPoint = (pthread_t*)malloc(N*sizeof(pthread_t)); 941 //create threads 942 for(i=0; i<N; i++) { 943 pVal[i] = i; 944 status = pthread_create(&PetscThreadPoint[i],NULL,PetscThreadFunc,&pVal[i]); 945 //error check 946 } 947 } 948 else { 949 //do nothing 950 } 951 return NULL; 952 } 953 954 #undef __FUNCT__ 955 #define __FUNCT__ "PetscThreadFinalize_Tree" 956 PetscErrorCode PetscThreadFinalize_Tree() { 957 int i,ierr; 958 void* jstatus; 959 960 PetscFunctionBegin; 961 962 if(PetscUseThreadPool) { 963 MainJob(FuncFinish,NULL,PetscMaxThreads); //set up job and broadcast work 964 //join the threads 965 for(i=0; i<PetscMaxThreads; i++) { 966 ierr = pthread_join(PetscThreadPoint[i],&jstatus); 967 //do error checking 968 } 969 free(PetscThreadPoint); 970 free(arrmutex); 971 free(arrcond1); 972 free(arrcond2); 973 free(arrstart); 974 free(arrready); 975 free(job_tree.pdata); 976 free(pVal); 977 } 978 else { 979 } 980 PetscFunctionReturn(0); 981 } 982 983 #undef __FUNCT__ 984 #define __FUNCT__ "MainWait_Tree" 985 void MainWait_Tree() { 986 int ierr; 987 ierr = pthread_mutex_lock(job_tree.mutexarray[0]); 988 while(job_tree.eJobStat<JobCompleted||job_tree.startJob==PETSC_TRUE) { 989 ierr = pthread_cond_wait(&main_cond,job_tree.mutexarray[0]); 990 } 991 ierr = pthread_mutex_unlock(job_tree.mutexarray[0]); 992 } 993 994 #undef __FUNCT__ 995 #define __FUNCT__ "MainJob_Tree" 996 PetscErrorCode MainJob_Tree(void* (*pFunc)(void*),void** data,PetscInt n) { 997 int i,ierr; 998 PetscErrorCode ijoberr = 0; 999 if(PetscUseThreadPool) { 1000 MainWait(); 1001 job_tree.pfunc = pFunc; 1002 job_tree.pdata = data; 1003 job_tree.startJob = PETSC_TRUE; 1004 for(i=0; i<PetscMaxThreads; i++) { 1005 *(job_tree.arrThreadStarted[i]) = PETSC_FALSE; 1006 } 1007 job_tree.eJobStat = JobInitiated; 1008 ierr = pthread_cond_signal(job_tree.cond2array[0]); 1009 if(pFunc!=FuncFinish) { 1010 MainWait(); //why wait after? guarantees that job gets done before proceeding with result collection (if any) 1011 } 1012 } 1013 else { 1014 pthread_t* apThread = (pthread_t*)malloc(n*sizeof(pthread_t)); 1015 PetscThreadRun(MPI_COMM_WORLD,pFunc,n,apThread,data); 1016 PetscThreadStop(MPI_COMM_WORLD,n,apThread); //ensures that all threads are finished with the job 1017 free(apThread); 1018 } 1019 if(ithreaderr) { 1020 ijoberr = ithreaderr; 1021 } 1022 return ijoberr; 1023 } 1024 /**** ****/ 1025 1026 /**** 'Main' Thread Pool Functions ****/ 1027 void* PetscThreadFunc_Main(void* arg) { 1028 PetscErrorCode iterr; 1029 int icorr,ierr; 1030 int* pId = (int*)arg; 1031 int ThreadId = *pId; 1032 cpu_set_t mset; 1033 //printf("Thread %d In Main Thread Function\n",ThreadId); 1034 icorr = ThreadCoreAffinity[ThreadId]; 1035 CPU_ZERO(&mset); 1036 CPU_SET(icorr,&mset); 1037 sched_setaffinity(0,sizeof(cpu_set_t),&mset); 1038 1039 ierr = pthread_mutex_lock(job_main.mutexarray[ThreadId]); 1040 //update your ready status 1041 *(job_main.arrThreadReady[ThreadId]) = PETSC_TRUE; 1042 //tell the BOSS that you're ready to work before you go to sleep 1043 ierr = pthread_cond_signal(job_main.cond1array[ThreadId]); 1044 1045 //the while loop needs to have an exit 1046 //the 'main' thread can terminate all the threads by performing a broadcast 1047 //and calling FuncFinish 1048 while(PetscThreadGo) { 1049 //need to check the condition to ensure we don't have to wait 1050 //waiting when you don't have to causes problems 1051 //also need to check the condition to ensure proper handling of spurious wakeups 1052 while(*(job_main.arrThreadReady[ThreadId])==PETSC_TRUE) { 1053 //upon entry, atomically releases the lock and blocks 1054 //upon return, has the lock 1055 ierr = pthread_cond_wait(job_main.cond2array[ThreadId],job_main.mutexarray[ThreadId]); 1056 //*(job_main.arrThreadReady[ThreadId]) = PETSC_FALSE; 1057 } 1058 ierr = pthread_mutex_unlock(job_main.mutexarray[ThreadId]); 1059 //do your job 1060 if(job_main.pdata==NULL) { 1061 iterr = (PetscErrorCode)(long int)job_main.pfunc(job_main.pdata); 1062 } 1063 else { 1064 iterr = (PetscErrorCode)(long int)job_main.pfunc(job_main.pdata[ThreadId]); 1065 } 1066 if(iterr!=0) { 1067 ithreaderr = 1; 1068 } 1069 if(PetscThreadGo) { 1070 //reset job, get ready for more 1071 ierr = pthread_mutex_lock(job_main.mutexarray[ThreadId]); 1072 *(job_main.arrThreadReady[ThreadId]) = PETSC_TRUE; 1073 //tell the BOSS that you're ready to work before you go to sleep 1074 ierr = pthread_cond_signal(job_main.cond1array[ThreadId]); 1075 } 1076 } 1077 return NULL; 1078 } 1079 1080 #undef __FUNCT__ 1081 #define __FUNCT__ "PetscThreadInitialize_Main" 1082 void* PetscThreadInitialize_Main(PetscInt N) { 1083 PetscInt i,ierr; 1084 int status; 1085 1086 if(PetscUseThreadPool) { 1087 size_t Val1 = (size_t)CACHE_LINE_SIZE; 1088 size_t Val2 = (size_t)PetscMaxThreads*CACHE_LINE_SIZE; 1089 arrmutex = (char*)memalign(Val1,Val2); 1090 arrcond1 = (char*)memalign(Val1,Val2); 1091 arrcond2 = (char*)memalign(Val1,Val2); 1092 arrstart = (char*)memalign(Val1,Val2); 1093 arrready = (char*)memalign(Val1,Val2); 1094 job_main.mutexarray = (pthread_mutex_t**)malloc(PetscMaxThreads*sizeof(pthread_mutex_t*)); 1095 job_main.cond1array = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*)); 1096 job_main.cond2array = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*)); 1097 job_main.arrThreadReady = (PetscBool**)malloc(PetscMaxThreads*sizeof(PetscBool*)); 1098 //initialize job structure 1099 for(i=0; i<PetscMaxThreads; i++) { 1100 job_main.mutexarray[i] = (pthread_mutex_t*)(arrmutex+CACHE_LINE_SIZE*i); 1101 job_main.cond1array[i] = (pthread_cond_t*)(arrcond1+CACHE_LINE_SIZE*i); 1102 job_main.cond2array[i] = (pthread_cond_t*)(arrcond2+CACHE_LINE_SIZE*i); 1103 job_main.arrThreadReady[i] = (PetscBool*)(arrready+CACHE_LINE_SIZE*i); 1104 } 1105 for(i=0; i<PetscMaxThreads; i++) { 1106 ierr = pthread_mutex_init(job_main.mutexarray[i],NULL); 1107 ierr = pthread_cond_init(job_main.cond1array[i],NULL); 1108 ierr = pthread_cond_init(job_main.cond2array[i],NULL); 1109 *(job_main.arrThreadReady[i]) = PETSC_FALSE; 1110 } 1111 job_main.pfunc = NULL; 1112 job_main.pdata = (void**)malloc(N*sizeof(void*)); 1113 pVal = (int*)malloc(N*sizeof(int)); 1114 //allocate memory in the heap for the thread structure 1115 PetscThreadPoint = (pthread_t*)malloc(N*sizeof(pthread_t)); 1116 //create threads 1117 for(i=0; i<N; i++) { 1118 pVal[i] = i; 1119 status = pthread_create(&PetscThreadPoint[i],NULL,PetscThreadFunc,&pVal[i]); 1120 //error check 1121 } 1122 } 1123 else { 1124 } 1125 return NULL; 1126 } 1127 1128 #undef __FUNCT__ 1129 #define __FUNCT__ "PetscThreadFinalize_Main" 1130 PetscErrorCode PetscThreadFinalize_Main() { 1131 int i,ierr; 1132 void* jstatus; 1133 1134 PetscFunctionBegin; 1135 1136 if(PetscUseThreadPool) { 1137 MainJob(FuncFinish,NULL,PetscMaxThreads); //set up job and broadcast work 1138 //join the threads 1139 for(i=0; i<PetscMaxThreads; i++) { 1140 ierr = pthread_join(PetscThreadPoint[i],&jstatus); 1141 //do error checking 1142 } 1143 free(PetscThreadPoint); 1144 free(arrmutex); 1145 free(arrcond1); 1146 free(arrcond2); 1147 free(arrstart); 1148 free(arrready); 1149 free(job_main.pdata); 1150 free(pVal); 1151 } 1152 else { 1153 } 1154 PetscFunctionReturn(0); 1155 } 1156 1157 #undef __FUNCT__ 1158 #define __FUNCT__ "MainWait_Main" 1159 void MainWait_Main() { 1160 int i,ierr; 1161 for(i=0; i<PetscMaxThreads; i++) { 1162 ierr = pthread_mutex_lock(job_main.mutexarray[i]); 1163 while(*(job_main.arrThreadReady[i])==PETSC_FALSE) { 1164 ierr = pthread_cond_wait(job_main.cond1array[i],job_main.mutexarray[i]); 1165 } 1166 ierr = pthread_mutex_unlock(job_main.mutexarray[i]); 1167 } 1168 } 1169 1170 #undef __FUNCT__ 1171 #define __FUNCT__ "MainJob_Main" 1172 PetscErrorCode MainJob_Main(void* (*pFunc)(void*),void** data,PetscInt n) { 1173 int i,ierr; 1174 PetscErrorCode ijoberr = 0; 1175 if(PetscUseThreadPool) { 1176 MainWait(); //you know everyone is waiting to be signalled! 1177 job_main.pfunc = pFunc; 1178 job_main.pdata = data; 1179 for(i=0; i<PetscMaxThreads; i++) { 1180 *(job_main.arrThreadReady[i]) = PETSC_FALSE; //why do this? suppose you get into MainWait first 1181 } 1182 //tell the threads to go to work 1183 for(i=0; i<PetscMaxThreads; i++) { 1184 ierr = pthread_cond_signal(job_main.cond2array[i]); 1185 } 1186 if(pFunc!=FuncFinish) { 1187 MainWait(); //why wait after? guarantees that job gets done before proceeding with result collection (if any) 1188 } 1189 } 1190 else { 1191 pthread_t* apThread = (pthread_t*)malloc(n*sizeof(pthread_t)); 1192 PetscThreadRun(MPI_COMM_WORLD,pFunc,n,apThread,data); 1193 PetscThreadStop(MPI_COMM_WORLD,n,apThread); //ensures that all threads are finished with the job 1194 free(apThread); 1195 } 1196 if(ithreaderr) { 1197 ijoberr = ithreaderr; 1198 } 1199 return ijoberr; 1200 } 1201 /**** ****/ 1202 1203 /**** Chain Thread Functions ****/ 1204 void* PetscThreadFunc_Chain(void* arg) { 1205 PetscErrorCode iterr; 1206 int icorr,ierr; 1207 int* pId = (int*)arg; 1208 int ThreadId = *pId; 1209 int SubWorker = ThreadId + 1; 1210 PetscBool PeeOn; 1211 cpu_set_t mset; 1212 //printf("Thread %d In Chain Thread Function\n",ThreadId); 1213 icorr = ThreadCoreAffinity[ThreadId]; 1214 CPU_ZERO(&mset); 1215 CPU_SET(icorr,&mset); 1216 sched_setaffinity(0,sizeof(cpu_set_t),&mset); 1217 1218 if(ThreadId==(PetscMaxThreads-1)) { 1219 PeeOn = PETSC_TRUE; 1220 } 1221 else { 1222 PeeOn = PETSC_FALSE; 1223 } 1224 if(PeeOn==PETSC_FALSE) { 1225 //check your subordinate, wait for him to be ready 1226 ierr = pthread_mutex_lock(job_chain.mutexarray[SubWorker]); 1227 while(*(job_chain.arrThreadReady[SubWorker])==PETSC_FALSE) { 1228 //upon entry, automically releases the lock and blocks 1229 //upon return, has the lock 1230 ierr = pthread_cond_wait(job_chain.cond1array[SubWorker],job_chain.mutexarray[SubWorker]); 1231 } 1232 ierr = pthread_mutex_unlock(job_chain.mutexarray[SubWorker]); 1233 //your subordinate is now ready 1234 } 1235 ierr = pthread_mutex_lock(job_chain.mutexarray[ThreadId]); 1236 //update your ready status 1237 *(job_chain.arrThreadReady[ThreadId]) = PETSC_TRUE; 1238 if(ThreadId==0) { 1239 job_chain.eJobStat = JobCompleted; 1240 //signal main 1241 ierr = pthread_cond_signal(&main_cond); 1242 } 1243 else { 1244 //tell your boss that you're ready to work 1245 ierr = pthread_cond_signal(job_chain.cond1array[ThreadId]); 1246 } 1247 //the while loop needs to have an exit 1248 //the 'main' thread can terminate all the threads by performing a broadcast 1249 //and calling FuncFinish 1250 while(PetscThreadGo) { 1251 //need to check the condition to ensure we don't have to wait 1252 //waiting when you don't have to causes problems 1253 //also need to check the condition to ensure proper handling of spurious wakeups 1254 while(*(job_chain.arrThreadReady[ThreadId])==PETSC_TRUE) { 1255 //upon entry, automically releases the lock and blocks 1256 //upon return, has the lock 1257 ierr = pthread_cond_wait(job_chain.cond2array[ThreadId],job_chain.mutexarray[ThreadId]); 1258 *(job_chain.arrThreadStarted[ThreadId]) = PETSC_TRUE; 1259 *(job_chain.arrThreadReady[ThreadId]) = PETSC_FALSE; 1260 } 1261 if(ThreadId==0) { 1262 job_chain.startJob = PETSC_FALSE; 1263 job_chain.eJobStat = ThreadsWorking; 1264 } 1265 ierr = pthread_mutex_unlock(job_chain.mutexarray[ThreadId]); 1266 if(PeeOn==PETSC_FALSE) { 1267 //tell your subworker it's time to get to work 1268 ierr = pthread_cond_signal(job_chain.cond2array[SubWorker]); 1269 } 1270 //do your job 1271 if(job_chain.pdata==NULL) { 1272 iterr = (PetscErrorCode)(long int)job_chain.pfunc(job_chain.pdata); 1273 } 1274 else { 1275 iterr = (PetscErrorCode)(long int)job_chain.pfunc(job_chain.pdata[ThreadId]); 1276 } 1277 if(iterr!=0) { 1278 ithreaderr = 1; 1279 } 1280 if(PetscThreadGo) { 1281 //reset job, get ready for more 1282 if(PeeOn==PETSC_FALSE) { 1283 //check your subordinate, wait for him to be ready 1284 //how do you know for a fact that your subordinate has actually started? 1285 ierr = pthread_mutex_lock(job_chain.mutexarray[SubWorker]); 1286 while(*(job_chain.arrThreadReady[SubWorker])==PETSC_FALSE||*(job_chain.arrThreadStarted[SubWorker])==PETSC_FALSE) { 1287 //upon entry, automically releases the lock and blocks 1288 //upon return, has the lock 1289 ierr = pthread_cond_wait(job_chain.cond1array[SubWorker],job_chain.mutexarray[SubWorker]); 1290 } 1291 ierr = pthread_mutex_unlock(job_chain.mutexarray[SubWorker]); 1292 //your subordinate is now ready 1293 } 1294 ierr = pthread_mutex_lock(job_chain.mutexarray[ThreadId]); 1295 *(job_chain.arrThreadReady[ThreadId]) = PETSC_TRUE; 1296 if(ThreadId==0) { 1297 job_chain.eJobStat = JobCompleted; //foreman: last thread to complete, guaranteed! 1298 //root thread (foreman) signals 'main' 1299 ierr = pthread_cond_signal(&main_cond); 1300 } 1301 else { 1302 //signal your boss before you go to sleep 1303 ierr = pthread_cond_signal(job_chain.cond1array[ThreadId]); 1304 } 1305 } 1306 } 1307 return NULL; 1308 } 1309 1310 #undef __FUNCT__ 1311 #define __FUNCT__ "PetscThreadInitialize_Chain" 1312 void* PetscThreadInitialize_Chain(PetscInt N) { 1313 PetscInt i,ierr; 1314 int status; 1315 1316 if(PetscUseThreadPool) { 1317 size_t Val1 = (size_t)CACHE_LINE_SIZE; 1318 size_t Val2 = (size_t)PetscMaxThreads*CACHE_LINE_SIZE; 1319 arrmutex = (char*)memalign(Val1,Val2); 1320 arrcond1 = (char*)memalign(Val1,Val2); 1321 arrcond2 = (char*)memalign(Val1,Val2); 1322 arrstart = (char*)memalign(Val1,Val2); 1323 arrready = (char*)memalign(Val1,Val2); 1324 job_chain.mutexarray = (pthread_mutex_t**)malloc(PetscMaxThreads*sizeof(pthread_mutex_t*)); 1325 job_chain.cond1array = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*)); 1326 job_chain.cond2array = (pthread_cond_t**)malloc(PetscMaxThreads*sizeof(pthread_cond_t*)); 1327 job_chain.arrThreadStarted = (PetscBool**)malloc(PetscMaxThreads*sizeof(PetscBool*)); 1328 job_chain.arrThreadReady = (PetscBool**)malloc(PetscMaxThreads*sizeof(PetscBool*)); 1329 //initialize job structure 1330 for(i=0; i<PetscMaxThreads; i++) { 1331 job_chain.mutexarray[i] = (pthread_mutex_t*)(arrmutex+CACHE_LINE_SIZE*i); 1332 job_chain.cond1array[i] = (pthread_cond_t*)(arrcond1+CACHE_LINE_SIZE*i); 1333 job_chain.cond2array[i] = (pthread_cond_t*)(arrcond2+CACHE_LINE_SIZE*i); 1334 job_chain.arrThreadStarted[i] = (PetscBool*)(arrstart+CACHE_LINE_SIZE*i); 1335 job_chain.arrThreadReady[i] = (PetscBool*)(arrready+CACHE_LINE_SIZE*i); 1336 } 1337 for(i=0; i<PetscMaxThreads; i++) { 1338 ierr = pthread_mutex_init(job_chain.mutexarray[i],NULL); 1339 ierr = pthread_cond_init(job_chain.cond1array[i],NULL); 1340 ierr = pthread_cond_init(job_chain.cond2array[i],NULL); 1341 *(job_chain.arrThreadStarted[i]) = PETSC_FALSE; 1342 *(job_chain.arrThreadReady[i]) = PETSC_FALSE; 1343 } 1344 job_chain.pfunc = NULL; 1345 job_chain.pdata = (void**)malloc(N*sizeof(void*)); 1346 job_chain.startJob = PETSC_FALSE; 1347 job_chain.eJobStat = JobInitiated; 1348 pVal = (int*)malloc(N*sizeof(int)); 1349 //allocate memory in the heap for the thread structure 1350 PetscThreadPoint = (pthread_t*)malloc(N*sizeof(pthread_t)); 1351 //create threads 1352 for(i=0; i<N; i++) { 1353 pVal[i] = i; 1354 status = pthread_create(&PetscThreadPoint[i],NULL,PetscThreadFunc,&pVal[i]); 1355 //error check 1356 } 1357 } 1358 else { 1359 } 1360 return NULL; 1361 } 1362 1363 1364 #undef __FUNCT__ 1365 #define __FUNCT__ "PetscThreadFinalize_Chain" 1366 PetscErrorCode PetscThreadFinalize_Chain() { 1367 int i,ierr; 1368 void* jstatus; 1369 1370 PetscFunctionBegin; 1371 1372 if(PetscUseThreadPool) { 1373 MainJob(FuncFinish,NULL,PetscMaxThreads); //set up job and broadcast work 1374 //join the threads 1375 for(i=0; i<PetscMaxThreads; i++) { 1376 ierr = pthread_join(PetscThreadPoint[i],&jstatus); 1377 //do error checking 1378 } 1379 free(PetscThreadPoint); 1380 free(arrmutex); 1381 free(arrcond1); 1382 free(arrcond2); 1383 free(arrstart); 1384 free(arrready); 1385 free(job_chain.pdata); 1386 free(pVal); 1387 } 1388 else { 1389 } 1390 PetscFunctionReturn(0); 1391 } 1392 1393 #undef __FUNCT__ 1394 #define __FUNCT__ "MainWait_Chain" 1395 void MainWait_Chain() { 1396 int ierr; 1397 ierr = pthread_mutex_lock(job_chain.mutexarray[0]); 1398 while(job_chain.eJobStat<JobCompleted||job_chain.startJob==PETSC_TRUE) { 1399 ierr = pthread_cond_wait(&main_cond,job_chain.mutexarray[0]); 1400 } 1401 ierr = pthread_mutex_unlock(job_chain.mutexarray[0]); 1402 } 1403 1404 #undef __FUNCT__ 1405 #define __FUNCT__ "MainJob_Chain" 1406 PetscErrorCode MainJob_Chain(void* (*pFunc)(void*),void** data,PetscInt n) { 1407 int i,ierr; 1408 PetscErrorCode ijoberr = 0; 1409 if(PetscUseThreadPool) { 1410 MainWait(); 1411 job_chain.pfunc = pFunc; 1412 job_chain.pdata = data; 1413 job_chain.startJob = PETSC_TRUE; 1414 for(i=0; i<PetscMaxThreads; i++) { 1415 *(job_chain.arrThreadStarted[i]) = PETSC_FALSE; 1416 } 1417 job_chain.eJobStat = JobInitiated; 1418 ierr = pthread_cond_signal(job_chain.cond2array[0]); 1419 if(pFunc!=FuncFinish) { 1420 MainWait(); //why wait after? guarantees that job gets done before proceeding with result collection (if any) 1421 } 1422 } 1423 else { 1424 pthread_t* apThread = (pthread_t*)malloc(n*sizeof(pthread_t)); 1425 PetscThreadRun(MPI_COMM_WORLD,pFunc,n,apThread,data); 1426 PetscThreadStop(MPI_COMM_WORLD,n,apThread); //ensures that all threads are finished with the job 1427 free(apThread); 1428 } 1429 if(ithreaderr) { 1430 ijoberr = ithreaderr; 1431 } 1432 return ijoberr; 1433 } 1434 /**** ****/ 1435 1436 /**** True Thread Functions ****/ 1437 void* PetscThreadFunc_True(void* arg) { 1438 int icorr,ierr,iVal; 1439 int* pId = (int*)arg; 1440 int ThreadId = *pId; 1441 PetscErrorCode iterr; 1442 cpu_set_t mset; 1443 //printf("Thread %d In True Pool Thread Function\n",ThreadId); 1444 icorr = ThreadCoreAffinity[ThreadId]; 1445 CPU_ZERO(&mset); 1446 CPU_SET(icorr,&mset); 1447 sched_setaffinity(0,sizeof(cpu_set_t),&mset); 1448 1449 ierr = pthread_mutex_lock(&job_true.mutex); 1450 job_true.iNumReadyThreads++; 1451 if(job_true.iNumReadyThreads==PetscMaxThreads) { 1452 ierr = pthread_cond_signal(&main_cond); 1453 } 1454 //the while loop needs to have an exit 1455 //the 'main' thread can terminate all the threads by performing a broadcast 1456 //and calling FuncFinish 1457 while(PetscThreadGo) { 1458 //need to check the condition to ensure we don't have to wait 1459 //waiting when you don't have to causes problems 1460 //also need to wait if another thread sneaks in and messes with the predicate 1461 while(job_true.startJob==PETSC_FALSE&&job_true.iNumJobThreads==0) { 1462 //upon entry, automically releases the lock and blocks 1463 //upon return, has the lock 1464 ierr = pthread_cond_wait(&job_true.cond,&job_true.mutex); 1465 } 1466 job_true.startJob = PETSC_FALSE; 1467 job_true.iNumJobThreads--; 1468 job_true.iNumReadyThreads--; 1469 iVal = PetscMaxThreads-job_true.iNumReadyThreads-1; 1470 pthread_mutex_unlock(&job_true.mutex); 1471 if(job_true.pdata==NULL) { 1472 iterr = (PetscErrorCode)(long int)job_true.pfunc(job_true.pdata); 1473 } 1474 else { 1475 iterr = (PetscErrorCode)(long int)job_true.pfunc(job_true.pdata[iVal]); 1476 } 1477 if(iterr!=0) { 1478 ithreaderr = 1; 1479 } 1480 //the barrier is necessary BECAUSE: look at job_true.iNumReadyThreads 1481 //what happens if a thread finishes before they all start? BAD! 1482 //what happens if a thread finishes before any else start? BAD! 1483 pthread_barrier_wait(job_true.pbarr); //ensures all threads are finished 1484 //reset job 1485 if(PetscThreadGo) { 1486 pthread_mutex_lock(&job_true.mutex); 1487 job_true.iNumReadyThreads++; 1488 if(job_true.iNumReadyThreads==PetscMaxThreads) { 1489 //signal the 'main' thread that the job is done! (only done once) 1490 ierr = pthread_cond_signal(&main_cond); 1491 } 1492 } 1493 } 1494 return NULL; 1495 } 1496 1497 #undef __FUNCT__ 1498 #define __FUNCT__ "PetscThreadInitialize_True" 1499 void* PetscThreadInitialize_True(PetscInt N) { 1500 PetscInt i; 1501 int status; 1502 1503 if(PetscUseThreadPool) { 1504 pVal = (int*)malloc(N*sizeof(int)); 1505 //allocate memory in the heap for the thread structure 1506 PetscThreadPoint = (pthread_t*)malloc(N*sizeof(pthread_t)); 1507 BarrPoint = (pthread_barrier_t*)malloc((N+1)*sizeof(pthread_barrier_t)); //BarrPoint[0] makes no sense, don't use it! 1508 job_true.pdata = (void**)malloc(N*sizeof(void*)); 1509 for(i=0; i<N; i++) { 1510 pVal[i] = i; 1511 status = pthread_create(&PetscThreadPoint[i],NULL,PetscThreadFunc,&pVal[i]); 1512 //error check to ensure proper thread creation 1513 status = pthread_barrier_init(&BarrPoint[i+1],NULL,i+1); 1514 //error check 1515 } 1516 } 1517 else { 1518 } 1519 return NULL; 1520 } 1521 1522 1523 #undef __FUNCT__ 1524 #define __FUNCT__ "PetscThreadFinalize_True" 1525 PetscErrorCode PetscThreadFinalize_True() { 1526 int i,ierr; 1527 void* jstatus; 1528 1529 PetscFunctionBegin; 1530 1531 if(PetscUseThreadPool) { 1532 MainJob(FuncFinish,NULL,PetscMaxThreads); //set up job and broadcast work 1533 //join the threads 1534 for(i=0; i<PetscMaxThreads; i++) { 1535 ierr = pthread_join(PetscThreadPoint[i],&jstatus); 1536 //do error checking 1537 } 1538 free(BarrPoint); 1539 free(PetscThreadPoint); 1540 } 1541 else { 1542 } 1543 PetscFunctionReturn(0); 1544 } 1545 1546 #undef __FUNCT__ 1547 #define __FUNCT__ "MainWait_True" 1548 void MainWait_True() { 1549 int ierr; 1550 while(job_true.iNumReadyThreads<PetscMaxThreads||job_true.startJob==PETSC_TRUE) { 1551 ierr = pthread_cond_wait(&main_cond,&job_true.mutex); 1552 } 1553 ierr = pthread_mutex_unlock(&job_true.mutex); 1554 } 1555 1556 #undef __FUNCT__ 1557 #define __FUNCT__ "MainJob_True" 1558 PetscErrorCode MainJob_True(void* (*pFunc)(void*),void** data,PetscInt n) { 1559 int ierr; 1560 PetscErrorCode ijoberr = 0; 1561 if(PetscUseThreadPool) { 1562 MainWait(); 1563 job_true.pfunc = pFunc; 1564 job_true.pdata = data; 1565 job_true.pbarr = &BarrPoint[n]; 1566 job_true.iNumJobThreads = n; 1567 job_true.startJob = PETSC_TRUE; 1568 ierr = pthread_cond_broadcast(&job_true.cond); 1569 if(pFunc!=FuncFinish) { 1570 MainWait(); //why wait after? guarantees that job gets done 1571 } 1572 } 1573 else { 1574 pthread_t* apThread = (pthread_t*)malloc(n*sizeof(pthread_t)); 1575 PetscThreadRun(MPI_COMM_WORLD,pFunc,n,apThread,data); 1576 PetscThreadStop(MPI_COMM_WORLD,n,apThread); //ensures that all threads are finished with the job 1577 free(apThread); 1578 } 1579 if(ithreaderr) { 1580 ijoberr = ithreaderr; 1581 } 1582 return ijoberr; 1583 } 1584 /**** ****/ 1585 1586 void* FuncFinish(void* arg) { 1587 PetscThreadGo = PETSC_FALSE; 1588 return(0); 1589 } 1590