1 #define PETSCKSP_DLL 2 3 /* 4 3/99 Modified by Stephen Barnard to support SPAI version 3.0 5 */ 6 7 /* 8 Provides an interface to the SPAI Sparse Approximate Inverse Preconditioner 9 Code written by Stephen Barnard. 10 11 Note: there is some BAD memory bleeding below! 12 13 This code needs work 14 15 1) get rid of all memory bleeding 16 2) fix PETSc/interface so that it gets if the matrix is symmetric from the matrix 17 rather than having the sp flag for PC_SPAI 18 19 */ 20 21 #include "src/ksp/pc/pcimpl.h" /*I "petscpc.h" I*/ 22 23 /* 24 These are the SPAI include files 25 */ 26 EXTERN_C_BEGIN 27 #include "spai.h" 28 #include "matrix.h" 29 EXTERN_C_END 30 31 EXTERN PetscErrorCode ConvertMatToMatrix(MPI_Comm,Mat,Mat,matrix**); 32 EXTERN PetscErrorCode ConvertMatrixToMat(MPI_Comm,matrix *,Mat *); 33 EXTERN PetscErrorCode ConvertVectorToVec(MPI_Comm,vector *v,Vec *Pv); 34 EXTERN PetscErrorCode MM_to_PETSC(char *,char *,char *); 35 36 typedef struct { 37 38 matrix *B; /* matrix in SPAI format */ 39 matrix *BT; /* transpose of matrix in SPAI format */ 40 matrix *M; /* the approximate inverse in SPAI format */ 41 42 Mat PM; /* the approximate inverse PETSc format */ 43 44 double epsilon; /* tolerance */ 45 int nbsteps; /* max number of "improvement" steps per line */ 46 int max; /* max dimensions of is_I, q, etc. */ 47 int maxnew; /* max number of new entries per step */ 48 int block_size; /* constant block size */ 49 int cache_size; /* one of (1,2,3,4,5,6) indicting size of cache */ 50 int verbose; /* SPAI prints timing and statistics */ 51 52 int sp; /* symmetric nonzero pattern */ 53 MPI_Comm comm_spai; /* communicator to be used with spai */ 54 } PC_SPAI; 55 56 /**********************************************************************/ 57 58 #undef __FUNCT__ 59 #define __FUNCT__ "PCSetUp_SPAI" 60 static PetscErrorCode PCSetUp_SPAI(PC pc) 61 { 62 PC_SPAI *ispai = (PC_SPAI*)pc->data; 63 PetscErrorCode ierr; 64 Mat AT; 65 66 PetscFunctionBegin; 67 68 init_SPAI(); 69 70 if (ispai->sp) { 71 ierr = ConvertMatToMatrix(ispai->comm_spai,pc->pmat,pc->pmat,&ispai->B);CHKERRQ(ierr); 72 } else { 73 /* Use the transpose to get the column nonzero structure. */ 74 ierr = MatTranspose(pc->pmat,&AT);CHKERRQ(ierr); 75 ierr = ConvertMatToMatrix(ispai->comm_spai,pc->pmat,AT,&ispai->B);CHKERRQ(ierr); 76 ierr = MatDestroy(AT);CHKERRQ(ierr); 77 } 78 79 /* Destroy the transpose */ 80 /* Don't know how to do it. PETSc developers? */ 81 82 /* construct SPAI preconditioner */ 83 /* FILE *messages */ /* file for warning messages */ 84 /* double epsilon */ /* tolerance */ 85 /* int nbsteps */ /* max number of "improvement" steps per line */ 86 /* int max */ /* max dimensions of is_I, q, etc. */ 87 /* int maxnew */ /* max number of new entries per step */ 88 /* int block_size */ /* block_size == 1 specifies scalar elments 89 block_size == n specifies nxn constant-block elements 90 block_size == 0 specifies variable-block elements */ 91 /* int cache_size */ /* one of (1,2,3,4,5,6) indicting size of cache */ 92 /* cache_size == 0 indicates no caching */ 93 /* int verbose */ /* verbose == 0 specifies that SPAI is silent 94 verbose == 1 prints timing and matrix statistics */ 95 96 ierr = bspai(ispai->B,&ispai->M, 97 stdout, 98 ispai->epsilon, 99 ispai->nbsteps, 100 ispai->max, 101 ispai->maxnew, 102 ispai->block_size, 103 ispai->cache_size, 104 ispai->verbose); CHKERRQ(ierr); 105 106 ierr = ConvertMatrixToMat(pc->comm,ispai->M,&ispai->PM);CHKERRQ(ierr); 107 108 /* free the SPAI matrices */ 109 sp_free_matrix(ispai->B); 110 sp_free_matrix(ispai->M); 111 112 PetscFunctionReturn(0); 113 } 114 115 /**********************************************************************/ 116 117 #undef __FUNCT__ 118 #define __FUNCT__ "PCApply_SPAI" 119 static PetscErrorCode PCApply_SPAI(PC pc,Vec xx,Vec y) 120 { 121 PC_SPAI *ispai = (PC_SPAI*)pc->data; 122 PetscErrorCode ierr; 123 124 PetscFunctionBegin; 125 /* Now using PETSc's multiply */ 126 ierr = MatMult(ispai->PM,xx,y);CHKERRQ(ierr); 127 PetscFunctionReturn(0); 128 } 129 130 /**********************************************************************/ 131 132 #undef __FUNCT__ 133 #define __FUNCT__ "PCDestroy_SPAI" 134 static PetscErrorCode PCDestroy_SPAI(PC pc) 135 { 136 PetscErrorCode ierr; 137 PC_SPAI *ispai = (PC_SPAI*)pc->data; 138 139 PetscFunctionBegin; 140 if (ispai->PM) {ierr = MatDestroy(ispai->PM);CHKERRQ(ierr);} 141 ierr = MPI_Comm_free(&(ispai->comm_spai));CHKERRQ(ierr); 142 ierr = PetscFree(ispai);CHKERRQ(ierr); 143 PetscFunctionReturn(0); 144 } 145 146 /**********************************************************************/ 147 148 #undef __FUNCT__ 149 #define __FUNCT__ "PCView_SPAI" 150 static PetscErrorCode PCView_SPAI(PC pc,PetscViewer viewer) 151 { 152 PC_SPAI *ispai = (PC_SPAI*)pc->data; 153 PetscErrorCode ierr; 154 PetscTruth iascii; 155 156 PetscFunctionBegin; 157 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 158 if (iascii) { 159 ierr = PetscViewerASCIIPrintf(viewer," SPAI preconditioner\n");CHKERRQ(ierr); 160 ierr = PetscViewerASCIIPrintf(viewer," epsilon %g\n", ispai->epsilon);CHKERRQ(ierr); 161 ierr = PetscViewerASCIIPrintf(viewer," nbsteps %d\n", ispai->nbsteps);CHKERRQ(ierr); 162 ierr = PetscViewerASCIIPrintf(viewer," max %d\n", ispai->max);CHKERRQ(ierr); 163 ierr = PetscViewerASCIIPrintf(viewer," maxnew %d\n", ispai->maxnew);CHKERRQ(ierr); 164 ierr = PetscViewerASCIIPrintf(viewer," block_size %d\n",ispai->block_size);CHKERRQ(ierr); 165 ierr = PetscViewerASCIIPrintf(viewer," cache_size %d\n",ispai->cache_size);CHKERRQ(ierr); 166 ierr = PetscViewerASCIIPrintf(viewer," verbose %d\n", ispai->verbose);CHKERRQ(ierr); 167 ierr = PetscViewerASCIIPrintf(viewer," sp %d\n", ispai->sp);CHKERRQ(ierr); 168 169 } 170 PetscFunctionReturn(0); 171 } 172 173 EXTERN_C_BEGIN 174 #undef __FUNCT__ 175 #define __FUNCT__ "PCSPAISetEpsilon_SPAI" 176 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetEpsilon_SPAI(PC pc,double epsilon1) 177 { 178 PC_SPAI *ispai = (PC_SPAI*)pc->data; 179 PetscFunctionBegin; 180 ispai->epsilon = epsilon1; 181 PetscFunctionReturn(0); 182 } 183 EXTERN_C_END 184 185 /**********************************************************************/ 186 187 EXTERN_C_BEGIN 188 #undef __FUNCT__ 189 #define __FUNCT__ "PCSPAISetNBSteps_SPAI" 190 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetNBSteps_SPAI(PC pc,int nbsteps1) 191 { 192 PC_SPAI *ispai = (PC_SPAI*)pc->data; 193 PetscFunctionBegin; 194 ispai->nbsteps = nbsteps1; 195 PetscFunctionReturn(0); 196 } 197 EXTERN_C_END 198 199 /**********************************************************************/ 200 201 /* added 1/7/99 g.h. */ 202 EXTERN_C_BEGIN 203 #undef __FUNCT__ 204 #define __FUNCT__ "PCSPAISetMax_SPAI" 205 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetMax_SPAI(PC pc,int max1) 206 { 207 PC_SPAI *ispai = (PC_SPAI*)pc->data; 208 PetscFunctionBegin; 209 ispai->max = max1; 210 PetscFunctionReturn(0); 211 } 212 EXTERN_C_END 213 214 /**********************************************************************/ 215 216 EXTERN_C_BEGIN 217 #undef __FUNCT__ 218 #define __FUNCT__ "PCSPAISetMaxNew_SPAI" 219 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetMaxNew_SPAI(PC pc,int maxnew1) 220 { 221 PC_SPAI *ispai = (PC_SPAI*)pc->data; 222 PetscFunctionBegin; 223 ispai->maxnew = maxnew1; 224 PetscFunctionReturn(0); 225 } 226 EXTERN_C_END 227 228 /**********************************************************************/ 229 230 EXTERN_C_BEGIN 231 #undef __FUNCT__ 232 #define __FUNCT__ "PCSPAISetBlockSize_SPAI" 233 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetBlockSize_SPAI(PC pc,int block_size1) 234 { 235 PC_SPAI *ispai = (PC_SPAI*)pc->data; 236 PetscFunctionBegin; 237 ispai->block_size = block_size1; 238 PetscFunctionReturn(0); 239 } 240 EXTERN_C_END 241 242 /**********************************************************************/ 243 244 EXTERN_C_BEGIN 245 #undef __FUNCT__ 246 #define __FUNCT__ "PCSPAISetCacheSize_SPAI" 247 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetCacheSize_SPAI(PC pc,int cache_size) 248 { 249 PC_SPAI *ispai = (PC_SPAI*)pc->data; 250 PetscFunctionBegin; 251 ispai->cache_size = cache_size; 252 PetscFunctionReturn(0); 253 } 254 EXTERN_C_END 255 256 /**********************************************************************/ 257 258 EXTERN_C_BEGIN 259 #undef __FUNCT__ 260 #define __FUNCT__ "PCSPAISetVerbose_SPAI" 261 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetVerbose_SPAI(PC pc,int verbose) 262 { 263 PC_SPAI *ispai = (PC_SPAI*)pc->data; 264 PetscFunctionBegin; 265 ispai->verbose = verbose; 266 PetscFunctionReturn(0); 267 } 268 EXTERN_C_END 269 270 /**********************************************************************/ 271 272 EXTERN_C_BEGIN 273 #undef __FUNCT__ 274 #define __FUNCT__ "PCSPAISetSp_SPAI" 275 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetSp_SPAI(PC pc,int sp) 276 { 277 PC_SPAI *ispai = (PC_SPAI*)pc->data; 278 PetscFunctionBegin; 279 ispai->sp = sp; 280 PetscFunctionReturn(0); 281 } 282 EXTERN_C_END 283 284 /* -------------------------------------------------------------------*/ 285 286 #undef __FUNCT__ 287 #define __FUNCT__ "PCSPAISetEpsilon" 288 /*@ 289 PCSPAISetEpsilon -- Set the tolerance for the SPAI preconditioner 290 291 Input Parameters: 292 + pc - the preconditioner 293 - eps - epsilon (default .4) 294 295 Notes: Espilon must be between 0 and 1. It controls the 296 quality of the approximation of M to the inverse of 297 A. Higher values of epsilon lead to more work, more 298 fill, and usually better preconditioners. In many 299 cases the best choice of epsilon is the one that 300 divides the total solution time equally between the 301 preconditioner and the solver. 302 303 Level: intermediate 304 305 .seealso: PCSPAI, PCSetType() 306 @*/ 307 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetEpsilon(PC pc,double epsilon1) 308 { 309 PetscErrorCode ierr,(*f)(PC,double); 310 PetscFunctionBegin; 311 ierr = PetscObjectQueryFunction((PetscObject)pc,"PCSPAISetEpsilon_C",(void (**)(void))&f);CHKERRQ(ierr); 312 if (f) { 313 ierr = (*f)(pc,epsilon1);CHKERRQ(ierr); 314 } 315 PetscFunctionReturn(0); 316 } 317 318 /**********************************************************************/ 319 320 #undef __FUNCT__ 321 #define __FUNCT__ "PCSPAISetNBSteps" 322 /*@ 323 PCSPAISetNBSteps - set maximum number of improvement steps per row in 324 the SPAI preconditioner 325 326 Input Parameters: 327 + pc - the preconditioner 328 - n - number of steps (default 5) 329 330 Notes: SPAI constructs to approximation to every column of 331 the exact inverse of A in a series of improvement 332 steps. The quality of the approximation is determined 333 by epsilon. If an approximation achieving an accuracy 334 of epsilon is not obtained after ns steps, SPAI simply 335 uses the best approximation constructed so far. 336 337 Level: intermediate 338 339 .seealso: PCSPAI, PCSetType(), PCSPAISetMaxNew() 340 @*/ 341 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetNBSteps(PC pc,int nbsteps1) 342 { 343 PetscErrorCode ierr,(*f)(PC,int); 344 PetscFunctionBegin; 345 ierr = PetscObjectQueryFunction((PetscObject)pc,"PCSPAISetNBSteps_C",(void (**)(void))&f);CHKERRQ(ierr); 346 if (f) { 347 ierr = (*f)(pc,nbsteps1);CHKERRQ(ierr); 348 } 349 PetscFunctionReturn(0); 350 } 351 352 /**********************************************************************/ 353 354 /* added 1/7/99 g.h. */ 355 #undef __FUNCT__ 356 #define __FUNCT__ "PCSPAISetMax" 357 /*@ 358 PCSPAISetMax - set the size of various working buffers in 359 the SPAI preconditioner 360 361 Input Parameters: 362 + pc - the preconditioner 363 - n - size (default is 5000) 364 365 Level: intermediate 366 367 .seealso: PCSPAI, PCSetType() 368 @*/ 369 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetMax(PC pc,int max1) 370 { 371 PetscErrorCode ierr,(*f)(PC,int); 372 PetscFunctionBegin; 373 ierr = PetscObjectQueryFunction((PetscObject)pc,"PCSPAISetMax_C",(void (**)(void))&f);CHKERRQ(ierr); 374 if (f) { 375 ierr = (*f)(pc,max1);CHKERRQ(ierr); 376 } 377 PetscFunctionReturn(0); 378 } 379 380 /**********************************************************************/ 381 382 #undef __FUNCT__ 383 #define __FUNCT__ "PCSPAISetMaxNew" 384 /*@ 385 PCSPAISetMaxNew - set maximum number of new nonzero candidates per step 386 in SPAI preconditioner 387 388 Input Parameters: 389 + pc - the preconditioner 390 - n - maximum number (default 5) 391 392 Level: intermediate 393 394 .seealso: PCSPAI, PCSetType(), PCSPAISetNBSteps() 395 @*/ 396 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetMaxNew(PC pc,int maxnew1) 397 { 398 PetscErrorCode ierr,(*f)(PC,int); 399 PetscFunctionBegin; 400 ierr = PetscObjectQueryFunction((PetscObject)pc,"PCSPAISetMaxNew_C",(void (**)(void))&f);CHKERRQ(ierr); 401 if (f) { 402 ierr = (*f)(pc,maxnew1);CHKERRQ(ierr); 403 } 404 PetscFunctionReturn(0); 405 } 406 407 /**********************************************************************/ 408 409 #undef __FUNCT__ 410 #define __FUNCT__ "PCSPAISetBlockSize" 411 /*@ 412 PCSPAISetBlockSize - set the block size for the SPAI preconditioner 413 414 Input Parameters: 415 + pc - the preconditioner 416 - n - block size (default 1) 417 418 Notes: A block 419 size of 1 treats A as a matrix of scalar elements. A 420 block size of s > 1 treats A as a matrix of sxs 421 blocks. A block size of 0 treats A as a matrix with 422 variable sized blocks, which are determined by 423 searching for dense square diagonal blocks in A. 424 This can be very effective for finite-element 425 matrices. 426 427 SPAI will convert A to block form, use a block 428 version of the preconditioner algorithm, and then 429 convert the result back to scalar form. 430 431 In many cases the a block-size parameter other than 1 432 can lead to very significant improvement in 433 performance. 434 435 436 Level: intermediate 437 438 .seealso: PCSPAI, PCSetType() 439 @*/ 440 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetBlockSize(PC pc,int block_size1) 441 { 442 PetscErrorCode ierr,(*f)(PC,int); 443 PetscFunctionBegin; 444 ierr = PetscObjectQueryFunction((PetscObject)pc,"PCSPAISetBlockSize_C",(void (**)(void))&f);CHKERRQ(ierr); 445 if (f) { 446 ierr = (*f)(pc,block_size1);CHKERRQ(ierr); 447 } 448 PetscFunctionReturn(0); 449 } 450 451 /**********************************************************************/ 452 453 #undef __FUNCT__ 454 #define __FUNCT__ "PCSPAISetCacheSize" 455 /*@ 456 PCSPAISetCacheSize - specify cache size in the SPAI preconditioner 457 458 Input Parameters: 459 + pc - the preconditioner 460 - n - cache size {0,1,2,3,4,5} (default 5) 461 462 Notes: SPAI uses a hash table to cache messages and avoid 463 redundant communication. If suggest always using 464 5. This parameter is irrelevant in the serial 465 version. 466 467 Level: intermediate 468 469 .seealso: PCSPAI, PCSetType() 470 @*/ 471 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetCacheSize(PC pc,int cache_size) 472 { 473 PetscErrorCode ierr,(*f)(PC,int); 474 PetscFunctionBegin; 475 ierr = PetscObjectQueryFunction((PetscObject)pc,"PCSPAISetCacheSize_C",(void (**)(void))&f);CHKERRQ(ierr); 476 if (f) { 477 ierr = (*f)(pc,cache_size);CHKERRQ(ierr); 478 } 479 PetscFunctionReturn(0); 480 } 481 482 /**********************************************************************/ 483 484 #undef __FUNCT__ 485 #define __FUNCT__ "PCSPAISetVerbose" 486 /*@ 487 PCSPAISetVerbose - verbosity level for the SPAI preconditioner 488 489 Input Parameters: 490 + pc - the preconditioner 491 - n - level (default 1) 492 493 Notes: print parameters, timings and matrix statistics 494 495 Level: intermediate 496 497 .seealso: PCSPAI, PCSetType() 498 @*/ 499 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetVerbose(PC pc,int verbose) 500 { 501 PetscErrorCode ierr,(*f)(PC,int); 502 PetscFunctionBegin; 503 ierr = PetscObjectQueryFunction((PetscObject)pc,"PCSPAISetVerbose_C",(void (**)(void))&f);CHKERRQ(ierr); 504 if (f) { 505 ierr = (*f)(pc,verbose);CHKERRQ(ierr); 506 } 507 PetscFunctionReturn(0); 508 } 509 510 /**********************************************************************/ 511 512 #undef __FUNCT__ 513 #define __FUNCT__ "PCSPAISetSp" 514 /*@ 515 PCSPAISetSp - specify a symmetric matrix sparsity pattern in the SPAI preconditioner 516 517 Input Parameters: 518 + pc - the preconditioner 519 - n - 0 or 1 520 521 Notes: If A has a symmetric nonzero pattern use -sp 1 to 522 improve performance by eliminating some communication 523 in the parallel version. Even if A does not have a 524 symmetric nonzero pattern -sp 1 may well lead to good 525 results, but the code will not follow the published 526 SPAI algorithm exactly. 527 528 529 Level: intermediate 530 531 .seealso: PCSPAI, PCSetType() 532 @*/ 533 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetSp(PC pc,int sp) 534 { 535 PetscErrorCode ierr,(*f)(PC,int); 536 PetscFunctionBegin; 537 ierr = PetscObjectQueryFunction((PetscObject)pc,"PCSPAISetSp_C",(void (**)(void))&f);CHKERRQ(ierr); 538 if (f) { 539 ierr = (*f)(pc,sp);CHKERRQ(ierr); 540 } 541 PetscFunctionReturn(0); 542 } 543 544 /**********************************************************************/ 545 546 /**********************************************************************/ 547 548 #undef __FUNCT__ 549 #define __FUNCT__ "PCSetFromOptions_SPAI" 550 static PetscErrorCode PCSetFromOptions_SPAI(PC pc) 551 { 552 PC_SPAI *ispai = (PC_SPAI*)pc->data; 553 PetscErrorCode ierr; 554 int nbsteps1,max1,maxnew1,block_size1,cache_size,verbose,sp; 555 double epsilon1; 556 PetscTruth flg; 557 558 PetscFunctionBegin; 559 ierr = PetscOptionsHead("SPAI options");CHKERRQ(ierr); 560 ierr = PetscOptionsReal("-pc_spai_epsilon","","PCSPAISetEpsilon",ispai->epsilon,&epsilon1,&flg);CHKERRQ(ierr); 561 if (flg) { 562 ierr = PCSPAISetEpsilon(pc,epsilon1);CHKERRQ(ierr); 563 } 564 ierr = PetscOptionsInt("-pc_spai_nbsteps","","PCSPAISetNBSteps",ispai->nbsteps,&nbsteps1,&flg);CHKERRQ(ierr); 565 if (flg) { 566 ierr = PCSPAISetNBSteps(pc,nbsteps1);CHKERRQ(ierr); 567 } 568 /* added 1/7/99 g.h. */ 569 ierr = PetscOptionsInt("-pc_spai_max","","PCSPAISetMax",ispai->max,&max1,&flg);CHKERRQ(ierr); 570 if (flg) { 571 ierr = PCSPAISetMax(pc,max1);CHKERRQ(ierr); 572 } 573 ierr = PetscOptionsInt("-pc_spai_maxnew","","PCSPAISetMaxNew",ispai->maxnew,&maxnew1,&flg);CHKERRQ(ierr); 574 if (flg) { 575 ierr = PCSPAISetMaxNew(pc,maxnew1);CHKERRQ(ierr); 576 } 577 ierr = PetscOptionsInt("-pc_spai_block_size","","PCSPAISetBlockSize",ispai->block_size,&block_size1,&flg);CHKERRQ(ierr); 578 if (flg) { 579 ierr = PCSPAISetBlockSize(pc,block_size1);CHKERRQ(ierr); 580 } 581 ierr = PetscOptionsInt("-pc_spai_cache_size","","PCSPAISetCacheSize",ispai->cache_size,&cache_size,&flg);CHKERRQ(ierr); 582 if (flg) { 583 ierr = PCSPAISetCacheSize(pc,cache_size);CHKERRQ(ierr); 584 } 585 ierr = PetscOptionsInt("-pc_spai_verbose","","PCSPAISetVerbose",ispai->verbose,&verbose,&flg);CHKERRQ(ierr); 586 if (flg) { 587 ierr = PCSPAISetVerbose(pc,verbose);CHKERRQ(ierr); 588 } 589 ierr = PetscOptionsInt("-pc_spai_sp","","PCSPAISetSp",ispai->sp,&sp,&flg);CHKERRQ(ierr); 590 if (flg) { 591 ierr = PCSPAISetSp(pc,sp);CHKERRQ(ierr); 592 } 593 ierr = PetscOptionsTail();CHKERRQ(ierr); 594 PetscFunctionReturn(0); 595 } 596 597 /**********************************************************************/ 598 599 /*MC 600 PCSPAI - Use the Sparse Approximate Inverse method of Grote and Barnard 601 as a preconditioner (SIAM J. Sci. Comput.; vol 18, nr 3) 602 603 Options Database Keys: 604 + -pc_spai_set_epsilon <eps> - set tolerance 605 . -pc_spai_nbstep <n> - set nbsteps 606 . -pc_spai_max <m> - set max 607 . -pc_spai_max_new <m> - set maxnew 608 . -pc_spai_block_size <n> - set block size 609 . -pc_spai_cache_size <n> - set cache size 610 . -pc_spai_sp <m> - set sp 611 - -pc_spai_set_verbose <true,false> - verbose output 612 613 Notes: This only works with AIJ matrices. 614 615 Level: beginner 616 617 Concepts: approximate inverse 618 619 .seealso: PCCreate(), PCSetType(), PCType (for list of available types), PC, 620 PCSPAISetEpsilon(), PCSPAISetMax(), PCSPAISetMaxNew(), PCSPAISetBlockSize(), 621 PCSPAISetVerbose(), PCSPAISetSp() 622 M*/ 623 624 EXTERN_C_BEGIN 625 #undef __FUNCT__ 626 #define __FUNCT__ "PCCreate_SPAI" 627 PetscErrorCode PETSCKSP_DLLEXPORT PCCreate_SPAI(PC pc) 628 { 629 PC_SPAI *ispai; 630 PetscErrorCode ierr; 631 632 PetscFunctionBegin; 633 ierr = PetscNew(PC_SPAI,&ispai);CHKERRQ(ierr); 634 pc->data = (void*)ispai; 635 636 pc->ops->destroy = PCDestroy_SPAI; 637 pc->ops->apply = PCApply_SPAI; 638 pc->ops->applyrichardson = 0; 639 pc->ops->setup = PCSetUp_SPAI; 640 pc->ops->view = PCView_SPAI; 641 pc->ops->setfromoptions = PCSetFromOptions_SPAI; 642 643 pc->name = 0; 644 ispai->epsilon = .4; 645 ispai->nbsteps = 5; 646 ispai->max = 5000; 647 ispai->maxnew = 5; 648 ispai->block_size = 1; 649 ispai->cache_size = 5; 650 ispai->verbose = 0; 651 652 ispai->sp = 1; 653 ierr = MPI_Comm_dup(pc->comm,&(ispai->comm_spai));CHKERRQ(ierr); 654 655 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetEpsilon_C", 656 "PCSPAISetEpsilon_SPAI", 657 PCSPAISetEpsilon_SPAI);CHKERRQ(ierr); 658 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetNBSteps_C", 659 "PCSPAISetNBSteps_SPAI", 660 PCSPAISetNBSteps_SPAI);CHKERRQ(ierr); 661 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetMax_C", 662 "PCSPAISetMax_SPAI", 663 PCSPAISetMax_SPAI);CHKERRQ(ierr); 664 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetMaxNew_CC", 665 "PCSPAISetMaxNew_SPAI", 666 PCSPAISetMaxNew_SPAI);CHKERRQ(ierr); 667 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetBlockSize_C", 668 "PCSPAISetBlockSize_SPAI", 669 PCSPAISetBlockSize_SPAI);CHKERRQ(ierr); 670 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetCacheSize_C", 671 "PCSPAISetCacheSize_SPAI", 672 PCSPAISetCacheSize_SPAI);CHKERRQ(ierr); 673 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetVerbose_C", 674 "PCSPAISetVerbose_SPAI", 675 PCSPAISetVerbose_SPAI);CHKERRQ(ierr); 676 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetSp_C", 677 "PCSPAISetSp_SPAI", 678 PCSPAISetSp_SPAI);CHKERRQ(ierr); 679 680 PetscFunctionReturn(0); 681 } 682 EXTERN_C_END 683 684 /**********************************************************************/ 685 686 /* 687 Converts from a PETSc matrix to an SPAI matrix 688 */ 689 #undef __FUNCT__ 690 #define __FUNCT__ "ConvertMatToMatrix" 691 PetscErrorCode ConvertMatToMatrix(MPI_Comm comm, Mat A,Mat AT,matrix **B) 692 { 693 matrix *M; 694 int i,j,col; 695 int row_indx; 696 int len,pe,local_indx,start_indx; 697 int *mapping; 698 PetscErrorCode ierr; 699 const int *cols; 700 const double *vals; 701 int *num_ptr,n,mnl,nnl,rank,size,nz,rstart,rend; 702 struct compressed_lines *rows; 703 704 PetscFunctionBegin; 705 706 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 707 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 708 ierr = MatGetSize(A,&n,&n);CHKERRQ(ierr); 709 ierr = MatGetLocalSize(A,&mnl,&nnl);CHKERRQ(ierr); 710 711 /* 712 not sure why a barrier is required. commenting out 713 ierr = MPI_Barrier(comm);CHKERRQ(ierr); 714 */ 715 716 M = new_matrix((void*)comm); 717 718 M->n = n; 719 M->bs = 1; 720 M->max_block_size = 1; 721 722 M->mnls = (int*)malloc(sizeof(int)*size); 723 M->start_indices = (int*)malloc(sizeof(int)*size); 724 M->pe = (int*)malloc(sizeof(int)*n); 725 M->block_sizes = (int*)malloc(sizeof(int)*n); 726 for (i=0; i<n; i++) M->block_sizes[i] = 1; 727 728 ierr = MPI_Allgather(&mnl,1,MPI_INT,M->mnls,1,MPI_INT,comm);CHKERRQ(ierr); 729 730 M->start_indices[0] = 0; 731 for (i=1; i<size; i++) { 732 M->start_indices[i] = M->start_indices[i-1] + M->mnls[i-1]; 733 } 734 735 M->mnl = M->mnls[M->myid]; 736 M->my_start_index = M->start_indices[M->myid]; 737 738 for (i=0; i<size; i++) { 739 start_indx = M->start_indices[i]; 740 for (j=0; j<M->mnls[i]; j++) 741 M->pe[start_indx+j] = i; 742 } 743 744 if (AT) { 745 M->lines = new_compressed_lines(M->mnls[rank],1);CHKERRQ(ierr); 746 } else { 747 M->lines = new_compressed_lines(M->mnls[rank],0);CHKERRQ(ierr); 748 } 749 750 rows = M->lines; 751 752 /* Determine the mapping from global indices to pointers */ 753 ierr = PetscMalloc(M->n*sizeof(int),&mapping);CHKERRQ(ierr); 754 pe = 0; 755 local_indx = 0; 756 for (i=0; i<M->n; i++) { 757 if (local_indx >= M->mnls[pe]) { 758 pe++; 759 local_indx = 0; 760 } 761 mapping[i] = local_indx + M->start_indices[pe]; 762 local_indx++; 763 } 764 765 766 ierr = PetscMalloc(mnl*sizeof(int),&num_ptr);CHKERRQ(ierr); 767 768 /*********************************************************/ 769 /************** Set up the row structure *****************/ 770 /*********************************************************/ 771 772 /* count number of nonzeros in every row */ 773 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 774 for (i=rstart; i<rend; i++) { 775 ierr = MatGetRow(A,i,&num_ptr[i-rstart],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 776 ierr = MatRestoreRow(A,i,&num_ptr[i-rstart],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 777 } 778 779 /* allocate buffers */ 780 len = 0; 781 for (i=0; i<mnl; i++) { 782 if (len < num_ptr[i]) len = num_ptr[i]; 783 } 784 785 for (i=rstart; i<rend; i++) { 786 row_indx = i-rstart; 787 len = num_ptr[row_indx]; 788 rows->ptrs[row_indx] = (int*)malloc(len*sizeof(int)); 789 rows->A[row_indx] = (double*)malloc(len*sizeof(double)); 790 } 791 792 /* copy the matrix */ 793 for (i=rstart; i<rend; i++) { 794 row_indx = i - rstart; 795 ierr = MatGetRow(A,i,&nz,&cols,&vals);CHKERRQ(ierr); 796 for (j=0; j<nz; j++) { 797 col = cols[j]; 798 len = rows->len[row_indx]++; 799 rows->ptrs[row_indx][len] = mapping[col]; 800 rows->A[row_indx][len] = vals[j]; 801 } 802 rows->slen[row_indx] = rows->len[row_indx]; 803 ierr = MatRestoreRow(A,i,&nz,&cols,&vals);CHKERRQ(ierr); 804 } 805 806 807 /************************************************************/ 808 /************** Set up the column structure *****************/ 809 /*********************************************************/ 810 811 if (AT) { 812 813 /* count number of nonzeros in every column */ 814 for (i=rstart; i<rend; i++) { 815 ierr = MatGetRow(AT,i,&num_ptr[i-rstart],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 816 ierr = MatRestoreRow(AT,i,&num_ptr[i-rstart],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 817 } 818 819 /* allocate buffers */ 820 len = 0; 821 for (i=0; i<mnl; i++) { 822 if (len < num_ptr[i]) len = num_ptr[i]; 823 } 824 825 for (i=rstart; i<rend; i++) { 826 row_indx = i-rstart; 827 len = num_ptr[row_indx]; 828 rows->rptrs[row_indx] = (int*)malloc(len*sizeof(int)); 829 } 830 831 /* copy the matrix (i.e., the structure) */ 832 for (i=rstart; i<rend; i++) { 833 row_indx = i - rstart; 834 ierr = MatGetRow(AT,i,&nz,&cols,&vals);CHKERRQ(ierr); 835 for (j=0; j<nz; j++) { 836 col = cols[j]; 837 len = rows->rlen[row_indx]++; 838 rows->rptrs[row_indx][len] = mapping[col]; 839 } 840 ierr = MatRestoreRow(AT,i,&nz,&cols,&vals);CHKERRQ(ierr); 841 } 842 } 843 844 ierr = PetscFree(num_ptr);CHKERRQ(ierr); 845 ierr = PetscFree(mapping);CHKERRQ(ierr); 846 847 order_pointers(M); 848 M->maxnz = calc_maxnz(M); 849 850 *B = M; 851 852 PetscFunctionReturn(0); 853 } 854 855 /**********************************************************************/ 856 857 /* 858 Converts from an SPAI matrix B to a PETSc matrix PB. 859 This assumes that the the SPAI matrix B is stored in 860 COMPRESSED-ROW format. 861 */ 862 #undef __FUNCT__ 863 #define __FUNCT__ "ConvertMatrixToMat" 864 PetscErrorCode ConvertMatrixToMat(MPI_Comm comm,matrix *B,Mat *PB) 865 { 866 int size,rank; 867 PetscErrorCode ierr; 868 int m,n,M,N; 869 int d_nz,o_nz; 870 int *d_nnz,*o_nnz; 871 int i,k,global_row,global_col,first_diag_col,last_diag_col; 872 PetscScalar val; 873 874 PetscFunctionBegin; 875 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 876 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 877 878 m = n = B->mnls[rank]; 879 d_nz = o_nz = 0; 880 881 /* Determine preallocation for MatCreateMPIAIJ */ 882 ierr = PetscMalloc(m*sizeof(int),&d_nnz);CHKERRQ(ierr); 883 ierr = PetscMalloc(m*sizeof(int),&o_nnz);CHKERRQ(ierr); 884 for (i=0; i<m; i++) d_nnz[i] = o_nnz[i] = 0; 885 first_diag_col = B->start_indices[rank]; 886 last_diag_col = first_diag_col + B->mnls[rank]; 887 for (i=0; i<B->mnls[rank]; i++) { 888 for (k=0; k<B->lines->len[i]; k++) { 889 global_col = B->lines->ptrs[i][k]; 890 if ((global_col >= first_diag_col) && (global_col <= last_diag_col)) 891 d_nnz[i]++; 892 else 893 o_nnz[i]++; 894 } 895 } 896 897 M = N = B->n; 898 /* Here we only know how to create AIJ format */ 899 ierr = MatCreate(comm,m,n,M,N,PB);CHKERRQ(ierr); 900 ierr = MatSetType(*PB,MATAIJ);CHKERRQ(ierr); 901 ierr = MatSeqAIJSetPreallocation(*PB,d_nz,d_nnz);CHKERRQ(ierr); 902 ierr = MatMPIAIJSetPreallocation(*PB,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 903 904 for (i=0; i<B->mnls[rank]; i++) { 905 global_row = B->start_indices[rank]+i; 906 for (k=0; k<B->lines->len[i]; k++) { 907 global_col = B->lines->ptrs[i][k]; 908 val = B->lines->A[i][k]; 909 ierr = MatSetValues(*PB,1,&global_row,1,&global_col,&val,ADD_VALUES);CHKERRQ(ierr); 910 } 911 } 912 913 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 914 ierr = PetscFree(o_nnz);CHKERRQ(ierr); 915 916 ierr = MatAssemblyBegin(*PB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 917 ierr = MatAssemblyEnd(*PB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 918 919 PetscFunctionReturn(0); 920 } 921 922 /**********************************************************************/ 923 924 /* 925 Converts from an SPAI vector v to a PETSc vec Pv. 926 */ 927 #undef __FUNCT__ 928 #define __FUNCT__ "ConvertVectorToVec" 929 PetscErrorCode ConvertVectorToVec(MPI_Comm comm,vector *v,Vec *Pv) 930 { 931 PetscErrorCode ierr; 932 int size,rank,m,M,i,*mnls,*start_indices,*global_indices; 933 934 PetscFunctionBegin; 935 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 936 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 937 938 m = v->mnl; 939 M = v->n; 940 941 942 ierr = VecCreateMPI(comm,m,M,Pv);CHKERRQ(ierr); 943 944 ierr = PetscMalloc(size*sizeof(int),&mnls);CHKERRQ(ierr); 945 ierr = MPI_Allgather((void*)&v->mnl,1,MPI_INT,(void*)mnls,1,MPI_INT,comm);CHKERRQ(ierr); 946 947 ierr = PetscMalloc(size*sizeof(int),&start_indices);CHKERRQ(ierr); 948 start_indices[0] = 0; 949 for (i=1; i<size; i++) 950 start_indices[i] = start_indices[i-1] +mnls[i-1]; 951 952 ierr = PetscMalloc(v->mnl*sizeof(int),&global_indices);CHKERRQ(ierr); 953 for (i=0; i<v->mnl; i++) 954 global_indices[i] = start_indices[rank] + i; 955 956 ierr = PetscFree(mnls);CHKERRQ(ierr); 957 ierr = PetscFree(start_indices);CHKERRQ(ierr); 958 959 ierr = VecSetValues(*Pv,v->mnl,global_indices,v->v,INSERT_VALUES);CHKERRQ(ierr); 960 961 ierr = PetscFree(global_indices);CHKERRQ(ierr); 962 963 ierr = VecAssemblyBegin(*Pv);CHKERRQ(ierr); 964 ierr = VecAssemblyEnd(*Pv);CHKERRQ(ierr); 965 966 PetscFunctionReturn(0); 967 } 968 969 970 971 972