1 2 /* 3 3/99 Modified by Stephen Barnard to support SPAI version 3.0 4 */ 5 6 /* 7 Provides an interface to the SPAI Sparse Approximate Inverse Preconditioner 8 Code written by Stephen Barnard. 9 10 Note: there is some BAD memory bleeding below! 11 12 This code needs work 13 14 1) get rid of all memory bleeding 15 2) fix PETSc/interface so that it gets if the matrix is symmetric from the matrix 16 rather than having the sp flag for PC_SPAI 17 3) fix to set the block size based on the matrix block size 18 19 */ 20 21 #include <petsc-private/pcimpl.h> /*I "petscpc.h" I*/ 22 #include "petscspai.h" 23 24 /* 25 These are the SPAI include files 26 */ 27 EXTERN_C_BEGIN 28 #define MPI /* required for setting SPAI_Comm correctly in basics.h */ 29 #include <spai.h> 30 #include <matrix.h> 31 EXTERN_C_END 32 33 extern PetscErrorCode ConvertMatToMatrix(MPI_Comm,Mat,Mat,matrix**); 34 extern PetscErrorCode ConvertMatrixToMat(MPI_Comm,matrix*,Mat*); 35 extern PetscErrorCode ConvertVectorToVec(MPI_Comm,vector *v,Vec *Pv); 36 extern PetscErrorCode MM_to_PETSC(char*,char*,char*); 37 38 typedef struct { 39 40 matrix *B; /* matrix in SPAI format */ 41 matrix *BT; /* transpose of matrix in SPAI format */ 42 matrix *M; /* the approximate inverse in SPAI format */ 43 44 Mat PM; /* the approximate inverse PETSc format */ 45 46 double epsilon; /* tolerance */ 47 int nbsteps; /* max number of "improvement" steps per line */ 48 int max; /* max dimensions of is_I, q, etc. */ 49 int maxnew; /* max number of new entries per step */ 50 int block_size; /* constant block size */ 51 int cache_size; /* one of (1,2,3,4,5,6) indicting size of cache */ 52 int verbose; /* SPAI prints timing and statistics */ 53 54 int sp; /* symmetric nonzero pattern */ 55 MPI_Comm comm_spai; /* communicator to be used with spai */ 56 } PC_SPAI; 57 58 /**********************************************************************/ 59 60 #undef __FUNCT__ 61 #define __FUNCT__ "PCSetUp_SPAI" 62 static PetscErrorCode PCSetUp_SPAI(PC pc) 63 { 64 PC_SPAI *ispai = (PC_SPAI*)pc->data; 65 PetscErrorCode ierr; 66 Mat AT; 67 68 PetscFunctionBegin; 69 init_SPAI(); 70 71 if (ispai->sp) { 72 ierr = ConvertMatToMatrix(ispai->comm_spai,pc->pmat,pc->pmat,&ispai->B);CHKERRQ(ierr); 73 } else { 74 /* Use the transpose to get the column nonzero structure. */ 75 ierr = MatTranspose(pc->pmat,MAT_INITIAL_MATRIX,&AT);CHKERRQ(ierr); 76 ierr = ConvertMatToMatrix(ispai->comm_spai,pc->pmat,AT,&ispai->B);CHKERRQ(ierr); 77 ierr = MatDestroy(&AT);CHKERRQ(ierr); 78 } 79 80 /* Destroy the transpose */ 81 /* Don't know how to do it. PETSc developers? */ 82 83 /* construct SPAI preconditioner */ 84 /* FILE *messages */ /* file for warning messages */ 85 /* double epsilon */ /* tolerance */ 86 /* int nbsteps */ /* max number of "improvement" steps per line */ 87 /* int max */ /* max dimensions of is_I, q, etc. */ 88 /* int maxnew */ /* max number of new entries per step */ 89 /* int block_size */ /* block_size == 1 specifies scalar elments 90 block_size == n specifies nxn constant-block elements 91 block_size == 0 specifies variable-block elements */ 92 /* int cache_size */ /* one of (1,2,3,4,5,6) indicting size of cache. cache_size == 0 indicates no caching */ 93 /* int verbose */ /* verbose == 0 specifies that SPAI is silent 94 verbose == 1 prints timing and matrix statistics */ 95 96 ierr = bspai(ispai->B,&ispai->M, 97 stdout, 98 ispai->epsilon, 99 ispai->nbsteps, 100 ispai->max, 101 ispai->maxnew, 102 ispai->block_size, 103 ispai->cache_size, 104 ispai->verbose);CHKERRQ(ierr); 105 106 ierr = ConvertMatrixToMat(PetscObjectComm((PetscObject)pc),ispai->M,&ispai->PM);CHKERRQ(ierr); 107 108 /* free the SPAI matrices */ 109 sp_free_matrix(ispai->B); 110 sp_free_matrix(ispai->M); 111 PetscFunctionReturn(0); 112 } 113 114 /**********************************************************************/ 115 116 #undef __FUNCT__ 117 #define __FUNCT__ "PCApply_SPAI" 118 static PetscErrorCode PCApply_SPAI(PC pc,Vec xx,Vec y) 119 { 120 PC_SPAI *ispai = (PC_SPAI*)pc->data; 121 PetscErrorCode ierr; 122 123 PetscFunctionBegin; 124 /* Now using PETSc's multiply */ 125 ierr = MatMult(ispai->PM,xx,y);CHKERRQ(ierr); 126 PetscFunctionReturn(0); 127 } 128 129 /**********************************************************************/ 130 131 #undef __FUNCT__ 132 #define __FUNCT__ "PCDestroy_SPAI" 133 static PetscErrorCode PCDestroy_SPAI(PC pc) 134 { 135 PetscErrorCode ierr; 136 PC_SPAI *ispai = (PC_SPAI*)pc->data; 137 138 PetscFunctionBegin; 139 ierr = MatDestroy(&ispai->PM);CHKERRQ(ierr); 140 ierr = MPI_Comm_free(&(ispai->comm_spai));CHKERRQ(ierr); 141 ierr = PetscFree(pc->data);CHKERRQ(ierr); 142 PetscFunctionReturn(0); 143 } 144 145 /**********************************************************************/ 146 147 #undef __FUNCT__ 148 #define __FUNCT__ "PCView_SPAI" 149 static PetscErrorCode PCView_SPAI(PC pc,PetscViewer viewer) 150 { 151 PC_SPAI *ispai = (PC_SPAI*)pc->data; 152 PetscErrorCode ierr; 153 PetscBool iascii; 154 155 PetscFunctionBegin; 156 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 157 if (iascii) { 158 ierr = PetscViewerASCIIPrintf(viewer," SPAI preconditioner\n");CHKERRQ(ierr); 159 ierr = PetscViewerASCIIPrintf(viewer," epsilon %G\n", ispai->epsilon);CHKERRQ(ierr); 160 ierr = PetscViewerASCIIPrintf(viewer," nbsteps %d\n", ispai->nbsteps);CHKERRQ(ierr); 161 ierr = PetscViewerASCIIPrintf(viewer," max %d\n", ispai->max);CHKERRQ(ierr); 162 ierr = PetscViewerASCIIPrintf(viewer," maxnew %d\n", ispai->maxnew);CHKERRQ(ierr); 163 ierr = PetscViewerASCIIPrintf(viewer," block_size %d\n",ispai->block_size);CHKERRQ(ierr); 164 ierr = PetscViewerASCIIPrintf(viewer," cache_size %d\n",ispai->cache_size);CHKERRQ(ierr); 165 ierr = PetscViewerASCIIPrintf(viewer," verbose %d\n", ispai->verbose);CHKERRQ(ierr); 166 ierr = PetscViewerASCIIPrintf(viewer," sp %d\n", ispai->sp);CHKERRQ(ierr); 167 } 168 PetscFunctionReturn(0); 169 } 170 171 EXTERN_C_BEGIN 172 #undef __FUNCT__ 173 #define __FUNCT__ "PCSPAISetEpsilon_SPAI" 174 PetscErrorCode PCSPAISetEpsilon_SPAI(PC pc,double epsilon1) 175 { 176 PC_SPAI *ispai = (PC_SPAI*)pc->data; 177 178 PetscFunctionBegin; 179 ispai->epsilon = epsilon1; 180 PetscFunctionReturn(0); 181 } 182 EXTERN_C_END 183 184 /**********************************************************************/ 185 186 EXTERN_C_BEGIN 187 #undef __FUNCT__ 188 #define __FUNCT__ "PCSPAISetNBSteps_SPAI" 189 PetscErrorCode PCSPAISetNBSteps_SPAI(PC pc,int nbsteps1) 190 { 191 PC_SPAI *ispai = (PC_SPAI*)pc->data; 192 193 PetscFunctionBegin; 194 ispai->nbsteps = nbsteps1; 195 PetscFunctionReturn(0); 196 } 197 EXTERN_C_END 198 199 /**********************************************************************/ 200 201 /* added 1/7/99 g.h. */ 202 EXTERN_C_BEGIN 203 #undef __FUNCT__ 204 #define __FUNCT__ "PCSPAISetMax_SPAI" 205 PetscErrorCode PCSPAISetMax_SPAI(PC pc,int max1) 206 { 207 PC_SPAI *ispai = (PC_SPAI*)pc->data; 208 209 PetscFunctionBegin; 210 ispai->max = max1; 211 PetscFunctionReturn(0); 212 } 213 EXTERN_C_END 214 215 /**********************************************************************/ 216 217 EXTERN_C_BEGIN 218 #undef __FUNCT__ 219 #define __FUNCT__ "PCSPAISetMaxNew_SPAI" 220 PetscErrorCode PCSPAISetMaxNew_SPAI(PC pc,int maxnew1) 221 { 222 PC_SPAI *ispai = (PC_SPAI*)pc->data; 223 224 PetscFunctionBegin; 225 ispai->maxnew = maxnew1; 226 PetscFunctionReturn(0); 227 } 228 EXTERN_C_END 229 230 /**********************************************************************/ 231 232 EXTERN_C_BEGIN 233 #undef __FUNCT__ 234 #define __FUNCT__ "PCSPAISetBlockSize_SPAI" 235 PetscErrorCode PCSPAISetBlockSize_SPAI(PC pc,int block_size1) 236 { 237 PC_SPAI *ispai = (PC_SPAI*)pc->data; 238 239 PetscFunctionBegin; 240 ispai->block_size = block_size1; 241 PetscFunctionReturn(0); 242 } 243 EXTERN_C_END 244 245 /**********************************************************************/ 246 247 EXTERN_C_BEGIN 248 #undef __FUNCT__ 249 #define __FUNCT__ "PCSPAISetCacheSize_SPAI" 250 PetscErrorCode PCSPAISetCacheSize_SPAI(PC pc,int cache_size) 251 { 252 PC_SPAI *ispai = (PC_SPAI*)pc->data; 253 254 PetscFunctionBegin; 255 ispai->cache_size = cache_size; 256 PetscFunctionReturn(0); 257 } 258 EXTERN_C_END 259 260 /**********************************************************************/ 261 262 EXTERN_C_BEGIN 263 #undef __FUNCT__ 264 #define __FUNCT__ "PCSPAISetVerbose_SPAI" 265 PetscErrorCode PCSPAISetVerbose_SPAI(PC pc,int verbose) 266 { 267 PC_SPAI *ispai = (PC_SPAI*)pc->data; 268 269 PetscFunctionBegin; 270 ispai->verbose = verbose; 271 PetscFunctionReturn(0); 272 } 273 EXTERN_C_END 274 275 /**********************************************************************/ 276 277 EXTERN_C_BEGIN 278 #undef __FUNCT__ 279 #define __FUNCT__ "PCSPAISetSp_SPAI" 280 PetscErrorCode PCSPAISetSp_SPAI(PC pc,int sp) 281 { 282 PC_SPAI *ispai = (PC_SPAI*)pc->data; 283 284 PetscFunctionBegin; 285 ispai->sp = sp; 286 PetscFunctionReturn(0); 287 } 288 EXTERN_C_END 289 290 /* -------------------------------------------------------------------*/ 291 292 #undef __FUNCT__ 293 #define __FUNCT__ "PCSPAISetEpsilon" 294 /*@ 295 PCSPAISetEpsilon -- Set the tolerance for the SPAI preconditioner 296 297 Input Parameters: 298 + pc - the preconditioner 299 - eps - epsilon (default .4) 300 301 Notes: Espilon must be between 0 and 1. It controls the 302 quality of the approximation of M to the inverse of 303 A. Higher values of epsilon lead to more work, more 304 fill, and usually better preconditioners. In many 305 cases the best choice of epsilon is the one that 306 divides the total solution time equally between the 307 preconditioner and the solver. 308 309 Level: intermediate 310 311 .seealso: PCSPAI, PCSetType() 312 @*/ 313 PetscErrorCode PCSPAISetEpsilon(PC pc,double epsilon1) 314 { 315 PetscErrorCode ierr; 316 317 PetscFunctionBegin; 318 ierr = PetscTryMethod(pc,"PCSPAISetEpsilon_C",(PC,double),(pc,epsilon1));CHKERRQ(ierr); 319 PetscFunctionReturn(0); 320 } 321 322 /**********************************************************************/ 323 324 #undef __FUNCT__ 325 #define __FUNCT__ "PCSPAISetNBSteps" 326 /*@ 327 PCSPAISetNBSteps - set maximum number of improvement steps per row in 328 the SPAI preconditioner 329 330 Input Parameters: 331 + pc - the preconditioner 332 - n - number of steps (default 5) 333 334 Notes: SPAI constructs to approximation to every column of 335 the exact inverse of A in a series of improvement 336 steps. The quality of the approximation is determined 337 by epsilon. If an approximation achieving an accuracy 338 of epsilon is not obtained after ns steps, SPAI simply 339 uses the best approximation constructed so far. 340 341 Level: intermediate 342 343 .seealso: PCSPAI, PCSetType(), PCSPAISetMaxNew() 344 @*/ 345 PetscErrorCode PCSPAISetNBSteps(PC pc,int nbsteps1) 346 { 347 PetscErrorCode ierr; 348 349 PetscFunctionBegin; 350 ierr = PetscTryMethod(pc,"PCSPAISetNBSteps_C",(PC,int),(pc,nbsteps1));CHKERRQ(ierr); 351 PetscFunctionReturn(0); 352 } 353 354 /**********************************************************************/ 355 356 /* added 1/7/99 g.h. */ 357 #undef __FUNCT__ 358 #define __FUNCT__ "PCSPAISetMax" 359 /*@ 360 PCSPAISetMax - set the size of various working buffers in 361 the SPAI preconditioner 362 363 Input Parameters: 364 + pc - the preconditioner 365 - n - size (default is 5000) 366 367 Level: intermediate 368 369 .seealso: PCSPAI, PCSetType() 370 @*/ 371 PetscErrorCode PCSPAISetMax(PC pc,int max1) 372 { 373 PetscErrorCode ierr; 374 375 PetscFunctionBegin; 376 ierr = PetscTryMethod(pc,"PCSPAISetMax_C",(PC,int),(pc,max1));CHKERRQ(ierr); 377 PetscFunctionReturn(0); 378 } 379 380 /**********************************************************************/ 381 382 #undef __FUNCT__ 383 #define __FUNCT__ "PCSPAISetMaxNew" 384 /*@ 385 PCSPAISetMaxNew - set maximum number of new nonzero candidates per step 386 in SPAI preconditioner 387 388 Input Parameters: 389 + pc - the preconditioner 390 - n - maximum number (default 5) 391 392 Level: intermediate 393 394 .seealso: PCSPAI, PCSetType(), PCSPAISetNBSteps() 395 @*/ 396 PetscErrorCode PCSPAISetMaxNew(PC pc,int maxnew1) 397 { 398 PetscErrorCode ierr; 399 400 PetscFunctionBegin; 401 ierr = PetscTryMethod(pc,"PCSPAISetMaxNew_C",(PC,int),(pc,maxnew1));CHKERRQ(ierr); 402 PetscFunctionReturn(0); 403 } 404 405 /**********************************************************************/ 406 407 #undef __FUNCT__ 408 #define __FUNCT__ "PCSPAISetBlockSize" 409 /*@ 410 PCSPAISetBlockSize - set the block size for the SPAI preconditioner 411 412 Input Parameters: 413 + pc - the preconditioner 414 - n - block size (default 1) 415 416 Notes: A block 417 size of 1 treats A as a matrix of scalar elements. A 418 block size of s > 1 treats A as a matrix of sxs 419 blocks. A block size of 0 treats A as a matrix with 420 variable sized blocks, which are determined by 421 searching for dense square diagonal blocks in A. 422 This can be very effective for finite-element 423 matrices. 424 425 SPAI will convert A to block form, use a block 426 version of the preconditioner algorithm, and then 427 convert the result back to scalar form. 428 429 In many cases the a block-size parameter other than 1 430 can lead to very significant improvement in 431 performance. 432 433 434 Level: intermediate 435 436 .seealso: PCSPAI, PCSetType() 437 @*/ 438 PetscErrorCode PCSPAISetBlockSize(PC pc,int block_size1) 439 { 440 PetscErrorCode ierr; 441 442 PetscFunctionBegin; 443 ierr = PetscTryMethod(pc,"PCSPAISetBlockSize_C",(PC,int),(pc,block_size1));CHKERRQ(ierr); 444 PetscFunctionReturn(0); 445 } 446 447 /**********************************************************************/ 448 449 #undef __FUNCT__ 450 #define __FUNCT__ "PCSPAISetCacheSize" 451 /*@ 452 PCSPAISetCacheSize - specify cache size in the SPAI preconditioner 453 454 Input Parameters: 455 + pc - the preconditioner 456 - n - cache size {0,1,2,3,4,5} (default 5) 457 458 Notes: SPAI uses a hash table to cache messages and avoid 459 redundant communication. If suggest always using 460 5. This parameter is irrelevant in the serial 461 version. 462 463 Level: intermediate 464 465 .seealso: PCSPAI, PCSetType() 466 @*/ 467 PetscErrorCode PCSPAISetCacheSize(PC pc,int cache_size) 468 { 469 PetscErrorCode ierr; 470 471 PetscFunctionBegin; 472 ierr = PetscTryMethod(pc,"PCSPAISetCacheSize_C",(PC,int),(pc,cache_size));CHKERRQ(ierr); 473 PetscFunctionReturn(0); 474 } 475 476 /**********************************************************************/ 477 478 #undef __FUNCT__ 479 #define __FUNCT__ "PCSPAISetVerbose" 480 /*@ 481 PCSPAISetVerbose - verbosity level for the SPAI preconditioner 482 483 Input Parameters: 484 + pc - the preconditioner 485 - n - level (default 1) 486 487 Notes: print parameters, timings and matrix statistics 488 489 Level: intermediate 490 491 .seealso: PCSPAI, PCSetType() 492 @*/ 493 PetscErrorCode PCSPAISetVerbose(PC pc,int verbose) 494 { 495 PetscErrorCode ierr; 496 497 PetscFunctionBegin; 498 ierr = PetscTryMethod(pc,"PCSPAISetVerbose_C",(PC,int),(pc,verbose));CHKERRQ(ierr); 499 PetscFunctionReturn(0); 500 } 501 502 /**********************************************************************/ 503 504 #undef __FUNCT__ 505 #define __FUNCT__ "PCSPAISetSp" 506 /*@ 507 PCSPAISetSp - specify a symmetric matrix sparsity pattern in the SPAI preconditioner 508 509 Input Parameters: 510 + pc - the preconditioner 511 - n - 0 or 1 512 513 Notes: If A has a symmetric nonzero pattern use -sp 1 to 514 improve performance by eliminating some communication 515 in the parallel version. Even if A does not have a 516 symmetric nonzero pattern -sp 1 may well lead to good 517 results, but the code will not follow the published 518 SPAI algorithm exactly. 519 520 521 Level: intermediate 522 523 .seealso: PCSPAI, PCSetType() 524 @*/ 525 PetscErrorCode PCSPAISetSp(PC pc,int sp) 526 { 527 PetscErrorCode ierr; 528 529 PetscFunctionBegin; 530 ierr = PetscTryMethod(pc,"PCSPAISetSp_C",(PC,int),(pc,sp));CHKERRQ(ierr); 531 PetscFunctionReturn(0); 532 } 533 534 /**********************************************************************/ 535 536 /**********************************************************************/ 537 538 #undef __FUNCT__ 539 #define __FUNCT__ "PCSetFromOptions_SPAI" 540 static PetscErrorCode PCSetFromOptions_SPAI(PC pc) 541 { 542 PC_SPAI *ispai = (PC_SPAI*)pc->data; 543 PetscErrorCode ierr; 544 int nbsteps1,max1,maxnew1,block_size1,cache_size,verbose,sp; 545 double epsilon1; 546 PetscBool flg; 547 548 PetscFunctionBegin; 549 ierr = PetscOptionsHead("SPAI options");CHKERRQ(ierr); 550 ierr = PetscOptionsReal("-pc_spai_epsilon","","PCSPAISetEpsilon",ispai->epsilon,&epsilon1,&flg);CHKERRQ(ierr); 551 if (flg) { 552 ierr = PCSPAISetEpsilon(pc,epsilon1);CHKERRQ(ierr); 553 } 554 ierr = PetscOptionsInt("-pc_spai_nbsteps","","PCSPAISetNBSteps",ispai->nbsteps,&nbsteps1,&flg);CHKERRQ(ierr); 555 if (flg) { 556 ierr = PCSPAISetNBSteps(pc,nbsteps1);CHKERRQ(ierr); 557 } 558 /* added 1/7/99 g.h. */ 559 ierr = PetscOptionsInt("-pc_spai_max","","PCSPAISetMax",ispai->max,&max1,&flg);CHKERRQ(ierr); 560 if (flg) { 561 ierr = PCSPAISetMax(pc,max1);CHKERRQ(ierr); 562 } 563 ierr = PetscOptionsInt("-pc_spai_maxnew","","PCSPAISetMaxNew",ispai->maxnew,&maxnew1,&flg);CHKERRQ(ierr); 564 if (flg) { 565 ierr = PCSPAISetMaxNew(pc,maxnew1);CHKERRQ(ierr); 566 } 567 ierr = PetscOptionsInt("-pc_spai_block_size","","PCSPAISetBlockSize",ispai->block_size,&block_size1,&flg);CHKERRQ(ierr); 568 if (flg) { 569 ierr = PCSPAISetBlockSize(pc,block_size1);CHKERRQ(ierr); 570 } 571 ierr = PetscOptionsInt("-pc_spai_cache_size","","PCSPAISetCacheSize",ispai->cache_size,&cache_size,&flg);CHKERRQ(ierr); 572 if (flg) { 573 ierr = PCSPAISetCacheSize(pc,cache_size);CHKERRQ(ierr); 574 } 575 ierr = PetscOptionsInt("-pc_spai_verbose","","PCSPAISetVerbose",ispai->verbose,&verbose,&flg);CHKERRQ(ierr); 576 if (flg) { 577 ierr = PCSPAISetVerbose(pc,verbose);CHKERRQ(ierr); 578 } 579 ierr = PetscOptionsInt("-pc_spai_sp","","PCSPAISetSp",ispai->sp,&sp,&flg);CHKERRQ(ierr); 580 if (flg) { 581 ierr = PCSPAISetSp(pc,sp);CHKERRQ(ierr); 582 } 583 ierr = PetscOptionsTail();CHKERRQ(ierr); 584 PetscFunctionReturn(0); 585 } 586 587 /**********************************************************************/ 588 589 /*MC 590 PCSPAI - Use the Sparse Approximate Inverse method of Grote and Barnard 591 as a preconditioner (SIAM J. Sci. Comput.; vol 18, nr 3) 592 593 Options Database Keys: 594 + -pc_spai_epsilon <eps> - set tolerance 595 . -pc_spai_nbstep <n> - set nbsteps 596 . -pc_spai_max <m> - set max 597 . -pc_spai_max_new <m> - set maxnew 598 . -pc_spai_block_size <n> - set block size 599 . -pc_spai_cache_size <n> - set cache size 600 . -pc_spai_sp <m> - set sp 601 - -pc_spai_set_verbose <true,false> - verbose output 602 603 Notes: This only works with AIJ matrices. 604 605 Level: beginner 606 607 Concepts: approximate inverse 608 609 .seealso: PCCreate(), PCSetType(), PCType (for list of available types), PC, 610 PCSPAISetEpsilon(), PCSPAISetMax(), PCSPAISetMaxNew(), PCSPAISetBlockSize(), 611 PCSPAISetVerbose(), PCSPAISetSp() 612 M*/ 613 614 EXTERN_C_BEGIN 615 #undef __FUNCT__ 616 #define __FUNCT__ "PCCreate_SPAI" 617 PetscErrorCode PCCreate_SPAI(PC pc) 618 { 619 PC_SPAI *ispai; 620 PetscErrorCode ierr; 621 622 PetscFunctionBegin; 623 ierr = PetscNewLog(pc,PC_SPAI,&ispai);CHKERRQ(ierr); 624 pc->data = ispai; 625 626 pc->ops->destroy = PCDestroy_SPAI; 627 pc->ops->apply = PCApply_SPAI; 628 pc->ops->applyrichardson = 0; 629 pc->ops->setup = PCSetUp_SPAI; 630 pc->ops->view = PCView_SPAI; 631 pc->ops->setfromoptions = PCSetFromOptions_SPAI; 632 633 ispai->epsilon = .4; 634 ispai->nbsteps = 5; 635 ispai->max = 5000; 636 ispai->maxnew = 5; 637 ispai->block_size = 1; 638 ispai->cache_size = 5; 639 ispai->verbose = 0; 640 641 ispai->sp = 1; 642 ierr = MPI_Comm_dup(PetscObjectComm((PetscObject)pc),&(ispai->comm_spai));CHKERRQ(ierr); 643 644 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCSPAISetEpsilon_C","PCSPAISetEpsilon_SPAI",PCSPAISetEpsilon_SPAI);CHKERRQ(ierr); 645 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCSPAISetNBSteps_C","PCSPAISetNBSteps_SPAI",PCSPAISetNBSteps_SPAI);CHKERRQ(ierr); 646 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCSPAISetMax_C","PCSPAISetMax_SPAI",PCSPAISetMax_SPAI);CHKERRQ(ierr); 647 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCSPAISetMaxNew_C","PCSPAISetMaxNew_SPAI",PCSPAISetMaxNew_SPAI);CHKERRQ(ierr); 648 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCSPAISetBlockSize_C","PCSPAISetBlockSize_SPAI",PCSPAISetBlockSize_SPAI);CHKERRQ(ierr); 649 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCSPAISetCacheSize_C","PCSPAISetCacheSize_SPAI",PCSPAISetCacheSize_SPAI);CHKERRQ(ierr); 650 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCSPAISetVerbose_C","PCSPAISetVerbose_SPAI",PCSPAISetVerbose_SPAI);CHKERRQ(ierr); 651 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCSPAISetSp_C","PCSPAISetSp_SPAI",PCSPAISetSp_SPAI);CHKERRQ(ierr); 652 PetscFunctionReturn(0); 653 } 654 EXTERN_C_END 655 656 /**********************************************************************/ 657 658 /* 659 Converts from a PETSc matrix to an SPAI matrix 660 */ 661 #undef __FUNCT__ 662 #define __FUNCT__ "ConvertMatToMatrix" 663 PetscErrorCode ConvertMatToMatrix(MPI_Comm comm, Mat A,Mat AT,matrix **B) 664 { 665 matrix *M; 666 int i,j,col; 667 int row_indx; 668 int len,pe,local_indx,start_indx; 669 int *mapping; 670 PetscErrorCode ierr; 671 const int *cols; 672 const double *vals; 673 int *num_ptr,n,mnl,nnl,nz,rstart,rend; 674 PetscMPIInt size,rank; 675 struct compressed_lines *rows; 676 677 PetscFunctionBegin; 678 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 679 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 680 ierr = MatGetSize(A,&n,&n);CHKERRQ(ierr); 681 ierr = MatGetLocalSize(A,&mnl,&nnl);CHKERRQ(ierr); 682 683 /* 684 not sure why a barrier is required. commenting out 685 ierr = MPI_Barrier(comm);CHKERRQ(ierr); 686 */ 687 688 M = new_matrix((SPAI_Comm)comm); 689 690 M->n = n; 691 M->bs = 1; 692 M->max_block_size = 1; 693 694 M->mnls = (int*)malloc(sizeof(int)*size); 695 M->start_indices = (int*)malloc(sizeof(int)*size); 696 M->pe = (int*)malloc(sizeof(int)*n); 697 M->block_sizes = (int*)malloc(sizeof(int)*n); 698 for (i=0; i<n; i++) M->block_sizes[i] = 1; 699 700 ierr = MPI_Allgather(&mnl,1,MPI_INT,M->mnls,1,MPI_INT,comm);CHKERRQ(ierr); 701 702 M->start_indices[0] = 0; 703 for (i=1; i<size; i++) M->start_indices[i] = M->start_indices[i-1] + M->mnls[i-1]; 704 705 M->mnl = M->mnls[M->myid]; 706 M->my_start_index = M->start_indices[M->myid]; 707 708 for (i=0; i<size; i++) { 709 start_indx = M->start_indices[i]; 710 for (j=0; j<M->mnls[i]; j++) M->pe[start_indx+j] = i; 711 } 712 713 if (AT) { 714 M->lines = new_compressed_lines(M->mnls[rank],1);CHKERRQ(ierr); 715 } else { 716 M->lines = new_compressed_lines(M->mnls[rank],0);CHKERRQ(ierr); 717 } 718 719 rows = M->lines; 720 721 /* Determine the mapping from global indices to pointers */ 722 ierr = PetscMalloc(M->n*sizeof(int),&mapping);CHKERRQ(ierr); 723 pe = 0; 724 local_indx = 0; 725 for (i=0; i<M->n; i++) { 726 if (local_indx >= M->mnls[pe]) { 727 pe++; 728 local_indx = 0; 729 } 730 mapping[i] = local_indx + M->start_indices[pe]; 731 local_indx++; 732 } 733 734 735 ierr = PetscMalloc(mnl*sizeof(int),&num_ptr);CHKERRQ(ierr); 736 737 /*********************************************************/ 738 /************** Set up the row structure *****************/ 739 /*********************************************************/ 740 741 /* count number of nonzeros in every row */ 742 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 743 for (i=rstart; i<rend; i++) { 744 ierr = MatGetRow(A,i,&num_ptr[i-rstart],NULL,NULL);CHKERRQ(ierr); 745 ierr = MatRestoreRow(A,i,&num_ptr[i-rstart],NULL,NULL);CHKERRQ(ierr); 746 } 747 748 /* allocate buffers */ 749 len = 0; 750 for (i=0; i<mnl; i++) { 751 if (len < num_ptr[i]) len = num_ptr[i]; 752 } 753 754 for (i=rstart; i<rend; i++) { 755 row_indx = i-rstart; 756 len = num_ptr[row_indx]; 757 rows->ptrs[row_indx] = (int*)malloc(len*sizeof(int)); 758 rows->A[row_indx] = (double*)malloc(len*sizeof(double)); 759 } 760 761 /* copy the matrix */ 762 for (i=rstart; i<rend; i++) { 763 row_indx = i - rstart; 764 ierr = MatGetRow(A,i,&nz,&cols,&vals);CHKERRQ(ierr); 765 for (j=0; j<nz; j++) { 766 col = cols[j]; 767 len = rows->len[row_indx]++; 768 769 rows->ptrs[row_indx][len] = mapping[col]; 770 rows->A[row_indx][len] = vals[j]; 771 } 772 rows->slen[row_indx] = rows->len[row_indx]; 773 774 ierr = MatRestoreRow(A,i,&nz,&cols,&vals);CHKERRQ(ierr); 775 } 776 777 778 /************************************************************/ 779 /************** Set up the column structure *****************/ 780 /*********************************************************/ 781 782 if (AT) { 783 784 /* count number of nonzeros in every column */ 785 for (i=rstart; i<rend; i++) { 786 ierr = MatGetRow(AT,i,&num_ptr[i-rstart],NULL,NULL);CHKERRQ(ierr); 787 ierr = MatRestoreRow(AT,i,&num_ptr[i-rstart],NULL,NULL);CHKERRQ(ierr); 788 } 789 790 /* allocate buffers */ 791 len = 0; 792 for (i=0; i<mnl; i++) { 793 if (len < num_ptr[i]) len = num_ptr[i]; 794 } 795 796 for (i=rstart; i<rend; i++) { 797 row_indx = i-rstart; 798 len = num_ptr[row_indx]; 799 800 rows->rptrs[row_indx] = (int*)malloc(len*sizeof(int)); 801 } 802 803 /* copy the matrix (i.e., the structure) */ 804 for (i=rstart; i<rend; i++) { 805 row_indx = i - rstart; 806 ierr = MatGetRow(AT,i,&nz,&cols,&vals);CHKERRQ(ierr); 807 for (j=0; j<nz; j++) { 808 col = cols[j]; 809 len = rows->rlen[row_indx]++; 810 811 rows->rptrs[row_indx][len] = mapping[col]; 812 } 813 ierr = MatRestoreRow(AT,i,&nz,&cols,&vals);CHKERRQ(ierr); 814 } 815 } 816 817 ierr = PetscFree(num_ptr);CHKERRQ(ierr); 818 ierr = PetscFree(mapping);CHKERRQ(ierr); 819 820 order_pointers(M); 821 M->maxnz = calc_maxnz(M); 822 *B = M; 823 PetscFunctionReturn(0); 824 } 825 826 /**********************************************************************/ 827 828 /* 829 Converts from an SPAI matrix B to a PETSc matrix PB. 830 This assumes that the the SPAI matrix B is stored in 831 COMPRESSED-ROW format. 832 */ 833 #undef __FUNCT__ 834 #define __FUNCT__ "ConvertMatrixToMat" 835 PetscErrorCode ConvertMatrixToMat(MPI_Comm comm,matrix *B,Mat *PB) 836 { 837 PetscMPIInt size,rank; 838 PetscErrorCode ierr; 839 int m,n,M,N; 840 int d_nz,o_nz; 841 int *d_nnz,*o_nnz; 842 int i,k,global_row,global_col,first_diag_col,last_diag_col; 843 PetscScalar val; 844 845 PetscFunctionBegin; 846 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 847 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 848 849 m = n = B->mnls[rank]; 850 d_nz = o_nz = 0; 851 852 /* Determine preallocation for MatCreateMPIAIJ */ 853 ierr = PetscMalloc(m*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 854 ierr = PetscMalloc(m*sizeof(PetscInt),&o_nnz);CHKERRQ(ierr); 855 for (i=0; i<m; i++) d_nnz[i] = o_nnz[i] = 0; 856 first_diag_col = B->start_indices[rank]; 857 last_diag_col = first_diag_col + B->mnls[rank]; 858 for (i=0; i<B->mnls[rank]; i++) { 859 for (k=0; k<B->lines->len[i]; k++) { 860 global_col = B->lines->ptrs[i][k]; 861 if ((global_col >= first_diag_col) && (global_col < last_diag_col)) d_nnz[i]++; 862 else o_nnz[i]++; 863 } 864 } 865 866 M = N = B->n; 867 /* Here we only know how to create AIJ format */ 868 ierr = MatCreate(comm,PB);CHKERRQ(ierr); 869 ierr = MatSetSizes(*PB,m,n,M,N);CHKERRQ(ierr); 870 ierr = MatSetType(*PB,MATAIJ);CHKERRQ(ierr); 871 ierr = MatSeqAIJSetPreallocation(*PB,d_nz,d_nnz);CHKERRQ(ierr); 872 ierr = MatMPIAIJSetPreallocation(*PB,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 873 874 for (i=0; i<B->mnls[rank]; i++) { 875 global_row = B->start_indices[rank]+i; 876 for (k=0; k<B->lines->len[i]; k++) { 877 global_col = B->lines->ptrs[i][k]; 878 879 val = B->lines->A[i][k]; 880 ierr = MatSetValues(*PB,1,&global_row,1,&global_col,&val,ADD_VALUES);CHKERRQ(ierr); 881 } 882 } 883 884 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 885 ierr = PetscFree(o_nnz);CHKERRQ(ierr); 886 887 ierr = MatAssemblyBegin(*PB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 888 ierr = MatAssemblyEnd(*PB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 889 PetscFunctionReturn(0); 890 } 891 892 /**********************************************************************/ 893 894 /* 895 Converts from an SPAI vector v to a PETSc vec Pv. 896 */ 897 #undef __FUNCT__ 898 #define __FUNCT__ "ConvertVectorToVec" 899 PetscErrorCode ConvertVectorToVec(MPI_Comm comm,vector *v,Vec *Pv) 900 { 901 PetscErrorCode ierr; 902 PetscMPIInt size,rank; 903 int m,M,i,*mnls,*start_indices,*global_indices; 904 905 PetscFunctionBegin; 906 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 907 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 908 909 m = v->mnl; 910 M = v->n; 911 912 913 ierr = VecCreateMPI(comm,m,M,Pv);CHKERRQ(ierr); 914 915 ierr = PetscMalloc(size*sizeof(int),&mnls);CHKERRQ(ierr); 916 ierr = MPI_Allgather(&v->mnl,1,MPI_INT,mnls,1,MPI_INT,comm);CHKERRQ(ierr); 917 918 ierr = PetscMalloc(size*sizeof(int),&start_indices);CHKERRQ(ierr); 919 920 start_indices[0] = 0; 921 for (i=1; i<size; i++) start_indices[i] = start_indices[i-1] +mnls[i-1]; 922 923 ierr = PetscMalloc(v->mnl*sizeof(int),&global_indices);CHKERRQ(ierr); 924 for (i=0; i<v->mnl; i++) global_indices[i] = start_indices[rank] + i; 925 926 ierr = PetscFree(mnls);CHKERRQ(ierr); 927 ierr = PetscFree(start_indices);CHKERRQ(ierr); 928 929 ierr = VecSetValues(*Pv,v->mnl,global_indices,v->v,INSERT_VALUES);CHKERRQ(ierr); 930 ierr = VecAssemblyBegin(*Pv);CHKERRQ(ierr); 931 ierr = VecAssemblyEnd(*Pv);CHKERRQ(ierr); 932 933 ierr = PetscFree(global_indices);CHKERRQ(ierr); 934 PetscFunctionReturn(0); 935 } 936 937 938 939 940