1 2 /* 3 3/99 Modified by Stephen Barnard to support SPAI version 3.0 4 */ 5 6 /* 7 Provides an interface to the SPAI Sparse Approximate Inverse Preconditioner 8 Code written by Stephen Barnard. 9 10 Note: there is some BAD memory bleeding below! 11 12 This code needs work 13 14 1) get rid of all memory bleeding 15 2) fix PETSc/interface so that it gets if the matrix is symmetric from the matrix 16 rather than having the sp flag for PC_SPAI 17 3) fix to set the block size based on the matrix block size 18 19 */ 20 21 #include <petsc-private/pcimpl.h> /*I "petscpc.h" I*/ 22 #include "petscspai.h" 23 24 /* 25 These are the SPAI include files 26 */ 27 EXTERN_C_BEGIN 28 #define MPI /* required for setting SPAI_Comm correctly in basics.h */ 29 #include <spai.h> 30 #include <matrix.h> 31 EXTERN_C_END 32 33 extern PetscErrorCode ConvertMatToMatrix(MPI_Comm,Mat,Mat,matrix**); 34 extern PetscErrorCode ConvertMatrixToMat(MPI_Comm,matrix *,Mat *); 35 extern PetscErrorCode ConvertVectorToVec(MPI_Comm,vector *v,Vec *Pv); 36 extern PetscErrorCode MM_to_PETSC(char *,char *,char *); 37 38 typedef struct { 39 40 matrix *B; /* matrix in SPAI format */ 41 matrix *BT; /* transpose of matrix in SPAI format */ 42 matrix *M; /* the approximate inverse in SPAI format */ 43 44 Mat PM; /* the approximate inverse PETSc format */ 45 46 double epsilon; /* tolerance */ 47 int nbsteps; /* max number of "improvement" steps per line */ 48 int max; /* max dimensions of is_I, q, etc. */ 49 int maxnew; /* max number of new entries per step */ 50 int block_size; /* constant block size */ 51 int cache_size; /* one of (1,2,3,4,5,6) indicting size of cache */ 52 int verbose; /* SPAI prints timing and statistics */ 53 54 int sp; /* symmetric nonzero pattern */ 55 MPI_Comm comm_spai; /* communicator to be used with spai */ 56 } PC_SPAI; 57 58 /**********************************************************************/ 59 60 #undef __FUNCT__ 61 #define __FUNCT__ "PCSetUp_SPAI" 62 static PetscErrorCode PCSetUp_SPAI(PC pc) 63 { 64 PC_SPAI *ispai = (PC_SPAI*)pc->data; 65 PetscErrorCode ierr; 66 Mat AT; 67 68 PetscFunctionBegin; 69 init_SPAI(); 70 71 if (ispai->sp) { 72 ierr = ConvertMatToMatrix(ispai->comm_spai,pc->pmat,pc->pmat,&ispai->B);CHKERRQ(ierr); 73 } else { 74 /* Use the transpose to get the column nonzero structure. */ 75 ierr = MatTranspose(pc->pmat,MAT_INITIAL_MATRIX,&AT);CHKERRQ(ierr); 76 ierr = ConvertMatToMatrix(ispai->comm_spai,pc->pmat,AT,&ispai->B);CHKERRQ(ierr); 77 ierr = MatDestroy(&AT);CHKERRQ(ierr); 78 } 79 80 /* Destroy the transpose */ 81 /* Don't know how to do it. PETSc developers? */ 82 83 /* construct SPAI preconditioner */ 84 /* FILE *messages */ /* file for warning messages */ 85 /* double epsilon */ /* tolerance */ 86 /* int nbsteps */ /* max number of "improvement" steps per line */ 87 /* int max */ /* max dimensions of is_I, q, etc. */ 88 /* int maxnew */ /* max number of new entries per step */ 89 /* int block_size */ /* block_size == 1 specifies scalar elments 90 block_size == n specifies nxn constant-block elements 91 block_size == 0 specifies variable-block elements */ 92 /* int cache_size */ /* one of (1,2,3,4,5,6) indicting size of cache */ 93 /* cache_size == 0 indicates no caching */ 94 /* int verbose */ /* verbose == 0 specifies that SPAI is silent 95 verbose == 1 prints timing and matrix statistics */ 96 97 ierr = bspai(ispai->B,&ispai->M, 98 stdout, 99 ispai->epsilon, 100 ispai->nbsteps, 101 ispai->max, 102 ispai->maxnew, 103 ispai->block_size, 104 ispai->cache_size, 105 ispai->verbose);CHKERRQ(ierr); 106 107 ierr = ConvertMatrixToMat(((PetscObject)pc)->comm,ispai->M,&ispai->PM);CHKERRQ(ierr); 108 109 /* free the SPAI matrices */ 110 sp_free_matrix(ispai->B); 111 sp_free_matrix(ispai->M); 112 113 PetscFunctionReturn(0); 114 } 115 116 /**********************************************************************/ 117 118 #undef __FUNCT__ 119 #define __FUNCT__ "PCApply_SPAI" 120 static PetscErrorCode PCApply_SPAI(PC pc,Vec xx,Vec y) 121 { 122 PC_SPAI *ispai = (PC_SPAI*)pc->data; 123 PetscErrorCode ierr; 124 125 PetscFunctionBegin; 126 /* Now using PETSc's multiply */ 127 ierr = MatMult(ispai->PM,xx,y);CHKERRQ(ierr); 128 PetscFunctionReturn(0); 129 } 130 131 /**********************************************************************/ 132 133 #undef __FUNCT__ 134 #define __FUNCT__ "PCDestroy_SPAI" 135 static PetscErrorCode PCDestroy_SPAI(PC pc) 136 { 137 PetscErrorCode ierr; 138 PC_SPAI *ispai = (PC_SPAI*)pc->data; 139 140 PetscFunctionBegin; 141 ierr = MatDestroy(&ispai->PM);CHKERRQ(ierr); 142 ierr = MPI_Comm_free(&(ispai->comm_spai));CHKERRQ(ierr); 143 ierr = PetscFree(pc->data);CHKERRQ(ierr); 144 PetscFunctionReturn(0); 145 } 146 147 /**********************************************************************/ 148 149 #undef __FUNCT__ 150 #define __FUNCT__ "PCView_SPAI" 151 static PetscErrorCode PCView_SPAI(PC pc,PetscViewer viewer) 152 { 153 PC_SPAI *ispai = (PC_SPAI*)pc->data; 154 PetscErrorCode ierr; 155 PetscBool iascii; 156 157 PetscFunctionBegin; 158 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 159 if (iascii) { 160 ierr = PetscViewerASCIIPrintf(viewer," SPAI preconditioner\n");CHKERRQ(ierr); 161 ierr = PetscViewerASCIIPrintf(viewer," epsilon %G\n", ispai->epsilon);CHKERRQ(ierr); 162 ierr = PetscViewerASCIIPrintf(viewer," nbsteps %d\n", ispai->nbsteps);CHKERRQ(ierr); 163 ierr = PetscViewerASCIIPrintf(viewer," max %d\n", ispai->max);CHKERRQ(ierr); 164 ierr = PetscViewerASCIIPrintf(viewer," maxnew %d\n", ispai->maxnew);CHKERRQ(ierr); 165 ierr = PetscViewerASCIIPrintf(viewer," block_size %d\n",ispai->block_size);CHKERRQ(ierr); 166 ierr = PetscViewerASCIIPrintf(viewer," cache_size %d\n",ispai->cache_size);CHKERRQ(ierr); 167 ierr = PetscViewerASCIIPrintf(viewer," verbose %d\n", ispai->verbose);CHKERRQ(ierr); 168 ierr = PetscViewerASCIIPrintf(viewer," sp %d\n", ispai->sp);CHKERRQ(ierr); 169 } 170 PetscFunctionReturn(0); 171 } 172 173 EXTERN_C_BEGIN 174 #undef __FUNCT__ 175 #define __FUNCT__ "PCSPAISetEpsilon_SPAI" 176 PetscErrorCode PCSPAISetEpsilon_SPAI(PC pc,double epsilon1) 177 { 178 PC_SPAI *ispai = (PC_SPAI*)pc->data; 179 180 PetscFunctionBegin; 181 ispai->epsilon = epsilon1; 182 PetscFunctionReturn(0); 183 } 184 EXTERN_C_END 185 186 /**********************************************************************/ 187 188 EXTERN_C_BEGIN 189 #undef __FUNCT__ 190 #define __FUNCT__ "PCSPAISetNBSteps_SPAI" 191 PetscErrorCode PCSPAISetNBSteps_SPAI(PC pc,int nbsteps1) 192 { 193 PC_SPAI *ispai = (PC_SPAI*)pc->data; 194 195 PetscFunctionBegin; 196 ispai->nbsteps = nbsteps1; 197 PetscFunctionReturn(0); 198 } 199 EXTERN_C_END 200 201 /**********************************************************************/ 202 203 /* added 1/7/99 g.h. */ 204 EXTERN_C_BEGIN 205 #undef __FUNCT__ 206 #define __FUNCT__ "PCSPAISetMax_SPAI" 207 PetscErrorCode PCSPAISetMax_SPAI(PC pc,int max1) 208 { 209 PC_SPAI *ispai = (PC_SPAI*)pc->data; 210 211 PetscFunctionBegin; 212 ispai->max = max1; 213 PetscFunctionReturn(0); 214 } 215 EXTERN_C_END 216 217 /**********************************************************************/ 218 219 EXTERN_C_BEGIN 220 #undef __FUNCT__ 221 #define __FUNCT__ "PCSPAISetMaxNew_SPAI" 222 PetscErrorCode PCSPAISetMaxNew_SPAI(PC pc,int maxnew1) 223 { 224 PC_SPAI *ispai = (PC_SPAI*)pc->data; 225 226 PetscFunctionBegin; 227 ispai->maxnew = maxnew1; 228 PetscFunctionReturn(0); 229 } 230 EXTERN_C_END 231 232 /**********************************************************************/ 233 234 EXTERN_C_BEGIN 235 #undef __FUNCT__ 236 #define __FUNCT__ "PCSPAISetBlockSize_SPAI" 237 PetscErrorCode PCSPAISetBlockSize_SPAI(PC pc,int block_size1) 238 { 239 PC_SPAI *ispai = (PC_SPAI*)pc->data; 240 241 PetscFunctionBegin; 242 ispai->block_size = block_size1; 243 PetscFunctionReturn(0); 244 } 245 EXTERN_C_END 246 247 /**********************************************************************/ 248 249 EXTERN_C_BEGIN 250 #undef __FUNCT__ 251 #define __FUNCT__ "PCSPAISetCacheSize_SPAI" 252 PetscErrorCode PCSPAISetCacheSize_SPAI(PC pc,int cache_size) 253 { 254 PC_SPAI *ispai = (PC_SPAI*)pc->data; 255 256 PetscFunctionBegin; 257 ispai->cache_size = cache_size; 258 PetscFunctionReturn(0); 259 } 260 EXTERN_C_END 261 262 /**********************************************************************/ 263 264 EXTERN_C_BEGIN 265 #undef __FUNCT__ 266 #define __FUNCT__ "PCSPAISetVerbose_SPAI" 267 PetscErrorCode PCSPAISetVerbose_SPAI(PC pc,int verbose) 268 { 269 PC_SPAI *ispai = (PC_SPAI*)pc->data; 270 271 PetscFunctionBegin; 272 ispai->verbose = verbose; 273 PetscFunctionReturn(0); 274 } 275 EXTERN_C_END 276 277 /**********************************************************************/ 278 279 EXTERN_C_BEGIN 280 #undef __FUNCT__ 281 #define __FUNCT__ "PCSPAISetSp_SPAI" 282 PetscErrorCode PCSPAISetSp_SPAI(PC pc,int sp) 283 { 284 PC_SPAI *ispai = (PC_SPAI*)pc->data; 285 286 PetscFunctionBegin; 287 ispai->sp = sp; 288 PetscFunctionReturn(0); 289 } 290 EXTERN_C_END 291 292 /* -------------------------------------------------------------------*/ 293 294 #undef __FUNCT__ 295 #define __FUNCT__ "PCSPAISetEpsilon" 296 /*@ 297 PCSPAISetEpsilon -- Set the tolerance for the SPAI preconditioner 298 299 Input Parameters: 300 + pc - the preconditioner 301 - eps - epsilon (default .4) 302 303 Notes: Espilon must be between 0 and 1. It controls the 304 quality of the approximation of M to the inverse of 305 A. Higher values of epsilon lead to more work, more 306 fill, and usually better preconditioners. In many 307 cases the best choice of epsilon is the one that 308 divides the total solution time equally between the 309 preconditioner and the solver. 310 311 Level: intermediate 312 313 .seealso: PCSPAI, PCSetType() 314 @*/ 315 PetscErrorCode PCSPAISetEpsilon(PC pc,double epsilon1) 316 { 317 PetscErrorCode ierr; 318 319 PetscFunctionBegin; 320 ierr = PetscTryMethod(pc,"PCSPAISetEpsilon_C",(PC,double),(pc,epsilon1));CHKERRQ(ierr); 321 PetscFunctionReturn(0); 322 } 323 324 /**********************************************************************/ 325 326 #undef __FUNCT__ 327 #define __FUNCT__ "PCSPAISetNBSteps" 328 /*@ 329 PCSPAISetNBSteps - set maximum number of improvement steps per row in 330 the SPAI preconditioner 331 332 Input Parameters: 333 + pc - the preconditioner 334 - n - number of steps (default 5) 335 336 Notes: SPAI constructs to approximation to every column of 337 the exact inverse of A in a series of improvement 338 steps. The quality of the approximation is determined 339 by epsilon. If an approximation achieving an accuracy 340 of epsilon is not obtained after ns steps, SPAI simply 341 uses the best approximation constructed so far. 342 343 Level: intermediate 344 345 .seealso: PCSPAI, PCSetType(), PCSPAISetMaxNew() 346 @*/ 347 PetscErrorCode PCSPAISetNBSteps(PC pc,int nbsteps1) 348 { 349 PetscErrorCode ierr; 350 351 PetscFunctionBegin; 352 ierr = PetscTryMethod(pc,"PCSPAISetNBSteps_C",(PC,int),(pc,nbsteps1));CHKERRQ(ierr); 353 PetscFunctionReturn(0); 354 } 355 356 /**********************************************************************/ 357 358 /* added 1/7/99 g.h. */ 359 #undef __FUNCT__ 360 #define __FUNCT__ "PCSPAISetMax" 361 /*@ 362 PCSPAISetMax - set the size of various working buffers in 363 the SPAI preconditioner 364 365 Input Parameters: 366 + pc - the preconditioner 367 - n - size (default is 5000) 368 369 Level: intermediate 370 371 .seealso: PCSPAI, PCSetType() 372 @*/ 373 PetscErrorCode PCSPAISetMax(PC pc,int max1) 374 { 375 PetscErrorCode ierr; 376 377 PetscFunctionBegin; 378 ierr = PetscTryMethod(pc,"PCSPAISetMax_C",(PC,int),(pc,max1));CHKERRQ(ierr); 379 PetscFunctionReturn(0); 380 } 381 382 /**********************************************************************/ 383 384 #undef __FUNCT__ 385 #define __FUNCT__ "PCSPAISetMaxNew" 386 /*@ 387 PCSPAISetMaxNew - set maximum number of new nonzero candidates per step 388 in SPAI preconditioner 389 390 Input Parameters: 391 + pc - the preconditioner 392 - n - maximum number (default 5) 393 394 Level: intermediate 395 396 .seealso: PCSPAI, PCSetType(), PCSPAISetNBSteps() 397 @*/ 398 PetscErrorCode PCSPAISetMaxNew(PC pc,int maxnew1) 399 { 400 PetscErrorCode ierr; 401 402 PetscFunctionBegin; 403 ierr = PetscTryMethod(pc,"PCSPAISetMaxNew_C",(PC,int),(pc,maxnew1));CHKERRQ(ierr); 404 PetscFunctionReturn(0); 405 } 406 407 /**********************************************************************/ 408 409 #undef __FUNCT__ 410 #define __FUNCT__ "PCSPAISetBlockSize" 411 /*@ 412 PCSPAISetBlockSize - set the block size for the SPAI preconditioner 413 414 Input Parameters: 415 + pc - the preconditioner 416 - n - block size (default 1) 417 418 Notes: A block 419 size of 1 treats A as a matrix of scalar elements. A 420 block size of s > 1 treats A as a matrix of sxs 421 blocks. A block size of 0 treats A as a matrix with 422 variable sized blocks, which are determined by 423 searching for dense square diagonal blocks in A. 424 This can be very effective for finite-element 425 matrices. 426 427 SPAI will convert A to block form, use a block 428 version of the preconditioner algorithm, and then 429 convert the result back to scalar form. 430 431 In many cases the a block-size parameter other than 1 432 can lead to very significant improvement in 433 performance. 434 435 436 Level: intermediate 437 438 .seealso: PCSPAI, PCSetType() 439 @*/ 440 PetscErrorCode PCSPAISetBlockSize(PC pc,int block_size1) 441 { 442 PetscErrorCode ierr; 443 444 PetscFunctionBegin; 445 ierr = PetscTryMethod(pc,"PCSPAISetBlockSize_C",(PC,int),(pc,block_size1));CHKERRQ(ierr); 446 PetscFunctionReturn(0); 447 } 448 449 /**********************************************************************/ 450 451 #undef __FUNCT__ 452 #define __FUNCT__ "PCSPAISetCacheSize" 453 /*@ 454 PCSPAISetCacheSize - specify cache size in the SPAI preconditioner 455 456 Input Parameters: 457 + pc - the preconditioner 458 - n - cache size {0,1,2,3,4,5} (default 5) 459 460 Notes: SPAI uses a hash table to cache messages and avoid 461 redundant communication. If suggest always using 462 5. This parameter is irrelevant in the serial 463 version. 464 465 Level: intermediate 466 467 .seealso: PCSPAI, PCSetType() 468 @*/ 469 PetscErrorCode PCSPAISetCacheSize(PC pc,int cache_size) 470 { 471 PetscErrorCode ierr; 472 473 PetscFunctionBegin; 474 ierr = PetscTryMethod(pc,"PCSPAISetCacheSize_C",(PC,int),(pc,cache_size));CHKERRQ(ierr); 475 PetscFunctionReturn(0); 476 } 477 478 /**********************************************************************/ 479 480 #undef __FUNCT__ 481 #define __FUNCT__ "PCSPAISetVerbose" 482 /*@ 483 PCSPAISetVerbose - verbosity level for the SPAI preconditioner 484 485 Input Parameters: 486 + pc - the preconditioner 487 - n - level (default 1) 488 489 Notes: print parameters, timings and matrix statistics 490 491 Level: intermediate 492 493 .seealso: PCSPAI, PCSetType() 494 @*/ 495 PetscErrorCode PCSPAISetVerbose(PC pc,int verbose) 496 { 497 PetscErrorCode ierr; 498 499 PetscFunctionBegin; 500 ierr = PetscTryMethod(pc,"PCSPAISetVerbose_C",(PC,int),(pc,verbose));CHKERRQ(ierr); 501 PetscFunctionReturn(0); 502 } 503 504 /**********************************************************************/ 505 506 #undef __FUNCT__ 507 #define __FUNCT__ "PCSPAISetSp" 508 /*@ 509 PCSPAISetSp - specify a symmetric matrix sparsity pattern in the SPAI preconditioner 510 511 Input Parameters: 512 + pc - the preconditioner 513 - n - 0 or 1 514 515 Notes: If A has a symmetric nonzero pattern use -sp 1 to 516 improve performance by eliminating some communication 517 in the parallel version. Even if A does not have a 518 symmetric nonzero pattern -sp 1 may well lead to good 519 results, but the code will not follow the published 520 SPAI algorithm exactly. 521 522 523 Level: intermediate 524 525 .seealso: PCSPAI, PCSetType() 526 @*/ 527 PetscErrorCode PCSPAISetSp(PC pc,int sp) 528 { 529 PetscErrorCode ierr; 530 531 PetscFunctionBegin; 532 ierr = PetscTryMethod(pc,"PCSPAISetSp_C",(PC,int),(pc,sp));CHKERRQ(ierr); 533 PetscFunctionReturn(0); 534 } 535 536 /**********************************************************************/ 537 538 /**********************************************************************/ 539 540 #undef __FUNCT__ 541 #define __FUNCT__ "PCSetFromOptions_SPAI" 542 static PetscErrorCode PCSetFromOptions_SPAI(PC pc) 543 { 544 PC_SPAI *ispai = (PC_SPAI*)pc->data; 545 PetscErrorCode ierr; 546 int nbsteps1,max1,maxnew1,block_size1,cache_size,verbose,sp; 547 double epsilon1; 548 PetscBool flg; 549 550 PetscFunctionBegin; 551 ierr = PetscOptionsHead("SPAI options");CHKERRQ(ierr); 552 ierr = PetscOptionsReal("-pc_spai_epsilon","","PCSPAISetEpsilon",ispai->epsilon,&epsilon1,&flg);CHKERRQ(ierr); 553 if (flg) { 554 ierr = PCSPAISetEpsilon(pc,epsilon1);CHKERRQ(ierr); 555 } 556 ierr = PetscOptionsInt("-pc_spai_nbsteps","","PCSPAISetNBSteps",ispai->nbsteps,&nbsteps1,&flg);CHKERRQ(ierr); 557 if (flg) { 558 ierr = PCSPAISetNBSteps(pc,nbsteps1);CHKERRQ(ierr); 559 } 560 /* added 1/7/99 g.h. */ 561 ierr = PetscOptionsInt("-pc_spai_max","","PCSPAISetMax",ispai->max,&max1,&flg);CHKERRQ(ierr); 562 if (flg) { 563 ierr = PCSPAISetMax(pc,max1);CHKERRQ(ierr); 564 } 565 ierr = PetscOptionsInt("-pc_spai_maxnew","","PCSPAISetMaxNew",ispai->maxnew,&maxnew1,&flg);CHKERRQ(ierr); 566 if (flg) { 567 ierr = PCSPAISetMaxNew(pc,maxnew1);CHKERRQ(ierr); 568 } 569 ierr = PetscOptionsInt("-pc_spai_block_size","","PCSPAISetBlockSize",ispai->block_size,&block_size1,&flg);CHKERRQ(ierr); 570 if (flg) { 571 ierr = PCSPAISetBlockSize(pc,block_size1);CHKERRQ(ierr); 572 } 573 ierr = PetscOptionsInt("-pc_spai_cache_size","","PCSPAISetCacheSize",ispai->cache_size,&cache_size,&flg);CHKERRQ(ierr); 574 if (flg) { 575 ierr = PCSPAISetCacheSize(pc,cache_size);CHKERRQ(ierr); 576 } 577 ierr = PetscOptionsInt("-pc_spai_verbose","","PCSPAISetVerbose",ispai->verbose,&verbose,&flg);CHKERRQ(ierr); 578 if (flg) { 579 ierr = PCSPAISetVerbose(pc,verbose);CHKERRQ(ierr); 580 } 581 ierr = PetscOptionsInt("-pc_spai_sp","","PCSPAISetSp",ispai->sp,&sp,&flg);CHKERRQ(ierr); 582 if (flg) { 583 ierr = PCSPAISetSp(pc,sp);CHKERRQ(ierr); 584 } 585 ierr = PetscOptionsTail();CHKERRQ(ierr); 586 PetscFunctionReturn(0); 587 } 588 589 /**********************************************************************/ 590 591 /*MC 592 PCSPAI - Use the Sparse Approximate Inverse method of Grote and Barnard 593 as a preconditioner (SIAM J. Sci. Comput.; vol 18, nr 3) 594 595 Options Database Keys: 596 + -pc_spai_epsilon <eps> - set tolerance 597 . -pc_spai_nbstep <n> - set nbsteps 598 . -pc_spai_max <m> - set max 599 . -pc_spai_max_new <m> - set maxnew 600 . -pc_spai_block_size <n> - set block size 601 . -pc_spai_cache_size <n> - set cache size 602 . -pc_spai_sp <m> - set sp 603 - -pc_spai_set_verbose <true,false> - verbose output 604 605 Notes: This only works with AIJ matrices. 606 607 Level: beginner 608 609 Concepts: approximate inverse 610 611 .seealso: PCCreate(), PCSetType(), PCType (for list of available types), PC, 612 PCSPAISetEpsilon(), PCSPAISetMax(), PCSPAISetMaxNew(), PCSPAISetBlockSize(), 613 PCSPAISetVerbose(), PCSPAISetSp() 614 M*/ 615 616 EXTERN_C_BEGIN 617 #undef __FUNCT__ 618 #define __FUNCT__ "PCCreate_SPAI" 619 PetscErrorCode PCCreate_SPAI(PC pc) 620 { 621 PC_SPAI *ispai; 622 PetscErrorCode ierr; 623 624 PetscFunctionBegin; 625 ierr = PetscNewLog(pc,PC_SPAI,&ispai);CHKERRQ(ierr); 626 pc->data = ispai; 627 628 pc->ops->destroy = PCDestroy_SPAI; 629 pc->ops->apply = PCApply_SPAI; 630 pc->ops->applyrichardson = 0; 631 pc->ops->setup = PCSetUp_SPAI; 632 pc->ops->view = PCView_SPAI; 633 pc->ops->setfromoptions = PCSetFromOptions_SPAI; 634 635 ispai->epsilon = .4; 636 ispai->nbsteps = 5; 637 ispai->max = 5000; 638 ispai->maxnew = 5; 639 ispai->block_size = 1; 640 ispai->cache_size = 5; 641 ispai->verbose = 0; 642 643 ispai->sp = 1; 644 ierr = MPI_Comm_dup(((PetscObject)pc)->comm,&(ispai->comm_spai));CHKERRQ(ierr); 645 646 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetEpsilon_C", 647 "PCSPAISetEpsilon_SPAI", 648 PCSPAISetEpsilon_SPAI);CHKERRQ(ierr); 649 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetNBSteps_C", 650 "PCSPAISetNBSteps_SPAI", 651 PCSPAISetNBSteps_SPAI);CHKERRQ(ierr); 652 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetMax_C", 653 "PCSPAISetMax_SPAI", 654 PCSPAISetMax_SPAI);CHKERRQ(ierr); 655 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetMaxNew_CC", 656 "PCSPAISetMaxNew_SPAI", 657 PCSPAISetMaxNew_SPAI);CHKERRQ(ierr); 658 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetBlockSize_C", 659 "PCSPAISetBlockSize_SPAI", 660 PCSPAISetBlockSize_SPAI);CHKERRQ(ierr); 661 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetCacheSize_C", 662 "PCSPAISetCacheSize_SPAI", 663 PCSPAISetCacheSize_SPAI);CHKERRQ(ierr); 664 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetVerbose_C", 665 "PCSPAISetVerbose_SPAI", 666 PCSPAISetVerbose_SPAI);CHKERRQ(ierr); 667 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetSp_C", 668 "PCSPAISetSp_SPAI", 669 PCSPAISetSp_SPAI);CHKERRQ(ierr); 670 671 PetscFunctionReturn(0); 672 } 673 EXTERN_C_END 674 675 /**********************************************************************/ 676 677 /* 678 Converts from a PETSc matrix to an SPAI matrix 679 */ 680 #undef __FUNCT__ 681 #define __FUNCT__ "ConvertMatToMatrix" 682 PetscErrorCode ConvertMatToMatrix(MPI_Comm comm, Mat A,Mat AT,matrix **B) 683 { 684 matrix *M; 685 int i,j,col; 686 int row_indx; 687 int len,pe,local_indx,start_indx; 688 int *mapping; 689 PetscErrorCode ierr; 690 const int *cols; 691 const double *vals; 692 int *num_ptr,n,mnl,nnl,nz,rstart,rend; 693 PetscMPIInt size,rank; 694 struct compressed_lines *rows; 695 696 PetscFunctionBegin; 697 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 698 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 699 ierr = MatGetSize(A,&n,&n);CHKERRQ(ierr); 700 ierr = MatGetLocalSize(A,&mnl,&nnl);CHKERRQ(ierr); 701 702 /* 703 not sure why a barrier is required. commenting out 704 ierr = MPI_Barrier(comm);CHKERRQ(ierr); 705 */ 706 707 M = new_matrix((SPAI_Comm)comm); 708 709 M->n = n; 710 M->bs = 1; 711 M->max_block_size = 1; 712 713 M->mnls = (int*)malloc(sizeof(int)*size); 714 M->start_indices = (int*)malloc(sizeof(int)*size); 715 M->pe = (int*)malloc(sizeof(int)*n); 716 M->block_sizes = (int*)malloc(sizeof(int)*n); 717 for (i=0; i<n; i++) M->block_sizes[i] = 1; 718 719 ierr = MPI_Allgather(&mnl,1,MPI_INT,M->mnls,1,MPI_INT,comm);CHKERRQ(ierr); 720 721 M->start_indices[0] = 0; 722 for (i=1; i<size; i++) { 723 M->start_indices[i] = M->start_indices[i-1] + M->mnls[i-1]; 724 } 725 726 M->mnl = M->mnls[M->myid]; 727 M->my_start_index = M->start_indices[M->myid]; 728 729 for (i=0; i<size; i++) { 730 start_indx = M->start_indices[i]; 731 for (j=0; j<M->mnls[i]; j++) 732 M->pe[start_indx+j] = i; 733 } 734 735 if (AT) { 736 M->lines = new_compressed_lines(M->mnls[rank],1);CHKERRQ(ierr); 737 } else { 738 M->lines = new_compressed_lines(M->mnls[rank],0);CHKERRQ(ierr); 739 } 740 741 rows = M->lines; 742 743 /* Determine the mapping from global indices to pointers */ 744 ierr = PetscMalloc(M->n*sizeof(int),&mapping);CHKERRQ(ierr); 745 pe = 0; 746 local_indx = 0; 747 for (i=0; i<M->n; i++) { 748 if (local_indx >= M->mnls[pe]) { 749 pe++; 750 local_indx = 0; 751 } 752 mapping[i] = local_indx + M->start_indices[pe]; 753 local_indx++; 754 } 755 756 757 ierr = PetscMalloc(mnl*sizeof(int),&num_ptr);CHKERRQ(ierr); 758 759 /*********************************************************/ 760 /************** Set up the row structure *****************/ 761 /*********************************************************/ 762 763 /* count number of nonzeros in every row */ 764 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 765 for (i=rstart; i<rend; i++) { 766 ierr = MatGetRow(A,i,&num_ptr[i-rstart],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 767 ierr = MatRestoreRow(A,i,&num_ptr[i-rstart],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 768 } 769 770 /* allocate buffers */ 771 len = 0; 772 for (i=0; i<mnl; i++) { 773 if (len < num_ptr[i]) len = num_ptr[i]; 774 } 775 776 for (i=rstart; i<rend; i++) { 777 row_indx = i-rstart; 778 len = num_ptr[row_indx]; 779 rows->ptrs[row_indx] = (int*)malloc(len*sizeof(int)); 780 rows->A[row_indx] = (double*)malloc(len*sizeof(double)); 781 } 782 783 /* copy the matrix */ 784 for (i=rstart; i<rend; i++) { 785 row_indx = i - rstart; 786 ierr = MatGetRow(A,i,&nz,&cols,&vals);CHKERRQ(ierr); 787 for (j=0; j<nz; j++) { 788 col = cols[j]; 789 len = rows->len[row_indx]++; 790 rows->ptrs[row_indx][len] = mapping[col]; 791 rows->A[row_indx][len] = vals[j]; 792 } 793 rows->slen[row_indx] = rows->len[row_indx]; 794 ierr = MatRestoreRow(A,i,&nz,&cols,&vals);CHKERRQ(ierr); 795 } 796 797 798 /************************************************************/ 799 /************** Set up the column structure *****************/ 800 /*********************************************************/ 801 802 if (AT) { 803 804 /* count number of nonzeros in every column */ 805 for (i=rstart; i<rend; i++) { 806 ierr = MatGetRow(AT,i,&num_ptr[i-rstart],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 807 ierr = MatRestoreRow(AT,i,&num_ptr[i-rstart],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 808 } 809 810 /* allocate buffers */ 811 len = 0; 812 for (i=0; i<mnl; i++) { 813 if (len < num_ptr[i]) len = num_ptr[i]; 814 } 815 816 for (i=rstart; i<rend; i++) { 817 row_indx = i-rstart; 818 len = num_ptr[row_indx]; 819 rows->rptrs[row_indx] = (int*)malloc(len*sizeof(int)); 820 } 821 822 /* copy the matrix (i.e., the structure) */ 823 for (i=rstart; i<rend; i++) { 824 row_indx = i - rstart; 825 ierr = MatGetRow(AT,i,&nz,&cols,&vals);CHKERRQ(ierr); 826 for (j=0; j<nz; j++) { 827 col = cols[j]; 828 len = rows->rlen[row_indx]++; 829 rows->rptrs[row_indx][len] = mapping[col]; 830 } 831 ierr = MatRestoreRow(AT,i,&nz,&cols,&vals);CHKERRQ(ierr); 832 } 833 } 834 835 ierr = PetscFree(num_ptr);CHKERRQ(ierr); 836 ierr = PetscFree(mapping);CHKERRQ(ierr); 837 838 order_pointers(M); 839 M->maxnz = calc_maxnz(M); 840 841 *B = M; 842 843 PetscFunctionReturn(0); 844 } 845 846 /**********************************************************************/ 847 848 /* 849 Converts from an SPAI matrix B to a PETSc matrix PB. 850 This assumes that the the SPAI matrix B is stored in 851 COMPRESSED-ROW format. 852 */ 853 #undef __FUNCT__ 854 #define __FUNCT__ "ConvertMatrixToMat" 855 PetscErrorCode ConvertMatrixToMat(MPI_Comm comm,matrix *B,Mat *PB) 856 { 857 PetscMPIInt size,rank; 858 PetscErrorCode ierr; 859 int m,n,M,N; 860 int d_nz,o_nz; 861 int *d_nnz,*o_nnz; 862 int i,k,global_row,global_col,first_diag_col,last_diag_col; 863 PetscScalar val; 864 865 PetscFunctionBegin; 866 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 867 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 868 869 m = n = B->mnls[rank]; 870 d_nz = o_nz = 0; 871 872 /* Determine preallocation for MatCreateMPIAIJ */ 873 ierr = PetscMalloc(m*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 874 ierr = PetscMalloc(m*sizeof(PetscInt),&o_nnz);CHKERRQ(ierr); 875 for (i=0; i<m; i++) d_nnz[i] = o_nnz[i] = 0; 876 first_diag_col = B->start_indices[rank]; 877 last_diag_col = first_diag_col + B->mnls[rank]; 878 for (i=0; i<B->mnls[rank]; i++) { 879 for (k=0; k<B->lines->len[i]; k++) { 880 global_col = B->lines->ptrs[i][k]; 881 if ((global_col >= first_diag_col) && (global_col < last_diag_col)) d_nnz[i]++; 882 else o_nnz[i]++; 883 } 884 } 885 886 M = N = B->n; 887 /* Here we only know how to create AIJ format */ 888 ierr = MatCreate(comm,PB);CHKERRQ(ierr); 889 ierr = MatSetSizes(*PB,m,n,M,N);CHKERRQ(ierr); 890 ierr = MatSetType(*PB,MATAIJ);CHKERRQ(ierr); 891 ierr = MatSeqAIJSetPreallocation(*PB,d_nz,d_nnz);CHKERRQ(ierr); 892 ierr = MatMPIAIJSetPreallocation(*PB,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 893 894 for (i=0; i<B->mnls[rank]; i++) { 895 global_row = B->start_indices[rank]+i; 896 for (k=0; k<B->lines->len[i]; k++) { 897 global_col = B->lines->ptrs[i][k]; 898 val = B->lines->A[i][k]; 899 ierr = MatSetValues(*PB,1,&global_row,1,&global_col,&val,ADD_VALUES);CHKERRQ(ierr); 900 } 901 } 902 903 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 904 ierr = PetscFree(o_nnz);CHKERRQ(ierr); 905 906 ierr = MatAssemblyBegin(*PB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 907 ierr = MatAssemblyEnd(*PB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 908 909 PetscFunctionReturn(0); 910 } 911 912 /**********************************************************************/ 913 914 /* 915 Converts from an SPAI vector v to a PETSc vec Pv. 916 */ 917 #undef __FUNCT__ 918 #define __FUNCT__ "ConvertVectorToVec" 919 PetscErrorCode ConvertVectorToVec(MPI_Comm comm,vector *v,Vec *Pv) 920 { 921 PetscErrorCode ierr; 922 PetscMPIInt size,rank; 923 int m,M,i,*mnls,*start_indices,*global_indices; 924 925 PetscFunctionBegin; 926 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 927 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 928 929 m = v->mnl; 930 M = v->n; 931 932 933 ierr = VecCreateMPI(comm,m,M,Pv);CHKERRQ(ierr); 934 935 ierr = PetscMalloc(size*sizeof(int),&mnls);CHKERRQ(ierr); 936 ierr = MPI_Allgather(&v->mnl,1,MPI_INT,mnls,1,MPI_INT,comm);CHKERRQ(ierr); 937 938 ierr = PetscMalloc(size*sizeof(int),&start_indices);CHKERRQ(ierr); 939 start_indices[0] = 0; 940 for (i=1; i<size; i++) 941 start_indices[i] = start_indices[i-1] +mnls[i-1]; 942 943 ierr = PetscMalloc(v->mnl*sizeof(int),&global_indices);CHKERRQ(ierr); 944 for (i=0; i<v->mnl; i++) 945 global_indices[i] = start_indices[rank] + i; 946 947 ierr = PetscFree(mnls);CHKERRQ(ierr); 948 ierr = PetscFree(start_indices);CHKERRQ(ierr); 949 950 ierr = VecSetValues(*Pv,v->mnl,global_indices,v->v,INSERT_VALUES);CHKERRQ(ierr); 951 ierr = VecAssemblyBegin(*Pv);CHKERRQ(ierr); 952 ierr = VecAssemblyEnd(*Pv);CHKERRQ(ierr); 953 954 ierr = PetscFree(global_indices);CHKERRQ(ierr); 955 PetscFunctionReturn(0); 956 } 957 958 959 960 961