1 2 /* 3 3/99 Modified by Stephen Barnard to support SPAI version 3.0 4 */ 5 6 /* 7 Provides an interface to the SPAI Sparse Approximate Inverse Preconditioner 8 Code written by Stephen Barnard. 9 10 Note: there is some BAD memory bleeding below! 11 12 This code needs work 13 14 1) get rid of all memory bleeding 15 2) fix PETSc/interface so that it gets if the matrix is symmetric from the matrix 16 rather than having the sp flag for PC_SPAI 17 3) fix to set the block size based on the matrix block size 18 19 */ 20 21 #include <petsc-private/pcimpl.h> /*I "petscpc.h" I*/ 22 #include "petscspai.h" 23 24 /* 25 These are the SPAI include files 26 */ 27 EXTERN_C_BEGIN 28 #define MPI /* required for setting SPAI_Comm correctly in basics.h */ 29 #include <spai.h> 30 #include <matrix.h> 31 EXTERN_C_END 32 33 extern PetscErrorCode ConvertMatToMatrix(MPI_Comm,Mat,Mat,matrix**); 34 extern PetscErrorCode ConvertMatrixToMat(MPI_Comm,matrix*,Mat*); 35 extern PetscErrorCode ConvertVectorToVec(MPI_Comm,vector *v,Vec *Pv); 36 extern PetscErrorCode MM_to_PETSC(char*,char*,char*); 37 38 typedef struct { 39 40 matrix *B; /* matrix in SPAI format */ 41 matrix *BT; /* transpose of matrix in SPAI format */ 42 matrix *M; /* the approximate inverse in SPAI format */ 43 44 Mat PM; /* the approximate inverse PETSc format */ 45 46 double epsilon; /* tolerance */ 47 int nbsteps; /* max number of "improvement" steps per line */ 48 int max; /* max dimensions of is_I, q, etc. */ 49 int maxnew; /* max number of new entries per step */ 50 int block_size; /* constant block size */ 51 int cache_size; /* one of (1,2,3,4,5,6) indicting size of cache */ 52 int verbose; /* SPAI prints timing and statistics */ 53 54 int sp; /* symmetric nonzero pattern */ 55 MPI_Comm comm_spai; /* communicator to be used with spai */ 56 } PC_SPAI; 57 58 /**********************************************************************/ 59 60 #undef __FUNCT__ 61 #define __FUNCT__ "PCSetUp_SPAI" 62 static PetscErrorCode PCSetUp_SPAI(PC pc) 63 { 64 PC_SPAI *ispai = (PC_SPAI*)pc->data; 65 PetscErrorCode ierr; 66 Mat AT; 67 68 PetscFunctionBegin; 69 init_SPAI(); 70 71 if (ispai->sp) { 72 ierr = ConvertMatToMatrix(ispai->comm_spai,pc->pmat,pc->pmat,&ispai->B);CHKERRQ(ierr); 73 } else { 74 /* Use the transpose to get the column nonzero structure. */ 75 ierr = MatTranspose(pc->pmat,MAT_INITIAL_MATRIX,&AT);CHKERRQ(ierr); 76 ierr = ConvertMatToMatrix(ispai->comm_spai,pc->pmat,AT,&ispai->B);CHKERRQ(ierr); 77 ierr = MatDestroy(&AT);CHKERRQ(ierr); 78 } 79 80 /* Destroy the transpose */ 81 /* Don't know how to do it. PETSc developers? */ 82 83 /* construct SPAI preconditioner */ 84 /* FILE *messages */ /* file for warning messages */ 85 /* double epsilon */ /* tolerance */ 86 /* int nbsteps */ /* max number of "improvement" steps per line */ 87 /* int max */ /* max dimensions of is_I, q, etc. */ 88 /* int maxnew */ /* max number of new entries per step */ 89 /* int block_size */ /* block_size == 1 specifies scalar elments 90 block_size == n specifies nxn constant-block elements 91 block_size == 0 specifies variable-block elements */ 92 /* int cache_size */ /* one of (1,2,3,4,5,6) indicting size of cache. cache_size == 0 indicates no caching */ 93 /* int verbose */ /* verbose == 0 specifies that SPAI is silent 94 verbose == 1 prints timing and matrix statistics */ 95 96 ierr = bspai(ispai->B,&ispai->M, 97 stdout, 98 ispai->epsilon, 99 ispai->nbsteps, 100 ispai->max, 101 ispai->maxnew, 102 ispai->block_size, 103 ispai->cache_size, 104 ispai->verbose);CHKERRQ(ierr); 105 106 ierr = ConvertMatrixToMat(PetscObjectComm((PetscObject)pc),ispai->M,&ispai->PM);CHKERRQ(ierr); 107 108 /* free the SPAI matrices */ 109 sp_free_matrix(ispai->B); 110 sp_free_matrix(ispai->M); 111 PetscFunctionReturn(0); 112 } 113 114 /**********************************************************************/ 115 116 #undef __FUNCT__ 117 #define __FUNCT__ "PCApply_SPAI" 118 static PetscErrorCode PCApply_SPAI(PC pc,Vec xx,Vec y) 119 { 120 PC_SPAI *ispai = (PC_SPAI*)pc->data; 121 PetscErrorCode ierr; 122 123 PetscFunctionBegin; 124 /* Now using PETSc's multiply */ 125 ierr = MatMult(ispai->PM,xx,y);CHKERRQ(ierr); 126 PetscFunctionReturn(0); 127 } 128 129 /**********************************************************************/ 130 131 #undef __FUNCT__ 132 #define __FUNCT__ "PCDestroy_SPAI" 133 static PetscErrorCode PCDestroy_SPAI(PC pc) 134 { 135 PetscErrorCode ierr; 136 PC_SPAI *ispai = (PC_SPAI*)pc->data; 137 138 PetscFunctionBegin; 139 ierr = MatDestroy(&ispai->PM);CHKERRQ(ierr); 140 ierr = MPI_Comm_free(&(ispai->comm_spai));CHKERRQ(ierr); 141 ierr = PetscFree(pc->data);CHKERRQ(ierr); 142 PetscFunctionReturn(0); 143 } 144 145 /**********************************************************************/ 146 147 #undef __FUNCT__ 148 #define __FUNCT__ "PCView_SPAI" 149 static PetscErrorCode PCView_SPAI(PC pc,PetscViewer viewer) 150 { 151 PC_SPAI *ispai = (PC_SPAI*)pc->data; 152 PetscErrorCode ierr; 153 PetscBool iascii; 154 155 PetscFunctionBegin; 156 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 157 if (iascii) { 158 ierr = PetscViewerASCIIPrintf(viewer," SPAI preconditioner\n");CHKERRQ(ierr); 159 ierr = PetscViewerASCIIPrintf(viewer," epsilon %G\n", ispai->epsilon);CHKERRQ(ierr); 160 ierr = PetscViewerASCIIPrintf(viewer," nbsteps %d\n", ispai->nbsteps);CHKERRQ(ierr); 161 ierr = PetscViewerASCIIPrintf(viewer," max %d\n", ispai->max);CHKERRQ(ierr); 162 ierr = PetscViewerASCIIPrintf(viewer," maxnew %d\n", ispai->maxnew);CHKERRQ(ierr); 163 ierr = PetscViewerASCIIPrintf(viewer," block_size %d\n",ispai->block_size);CHKERRQ(ierr); 164 ierr = PetscViewerASCIIPrintf(viewer," cache_size %d\n",ispai->cache_size);CHKERRQ(ierr); 165 ierr = PetscViewerASCIIPrintf(viewer," verbose %d\n", ispai->verbose);CHKERRQ(ierr); 166 ierr = PetscViewerASCIIPrintf(viewer," sp %d\n", ispai->sp);CHKERRQ(ierr); 167 } 168 PetscFunctionReturn(0); 169 } 170 171 #undef __FUNCT__ 172 #define __FUNCT__ "PCSPAISetEpsilon_SPAI" 173 static PetscErrorCode PCSPAISetEpsilon_SPAI(PC pc,double epsilon1) 174 { 175 PC_SPAI *ispai = (PC_SPAI*)pc->data; 176 177 PetscFunctionBegin; 178 ispai->epsilon = epsilon1; 179 PetscFunctionReturn(0); 180 } 181 182 /**********************************************************************/ 183 184 #undef __FUNCT__ 185 #define __FUNCT__ "PCSPAISetNBSteps_SPAI" 186 static PetscErrorCode PCSPAISetNBSteps_SPAI(PC pc,int nbsteps1) 187 { 188 PC_SPAI *ispai = (PC_SPAI*)pc->data; 189 190 PetscFunctionBegin; 191 ispai->nbsteps = nbsteps1; 192 PetscFunctionReturn(0); 193 } 194 195 /**********************************************************************/ 196 197 /* added 1/7/99 g.h. */ 198 #undef __FUNCT__ 199 #define __FUNCT__ "PCSPAISetMax_SPAI" 200 static PetscErrorCode PCSPAISetMax_SPAI(PC pc,int max1) 201 { 202 PC_SPAI *ispai = (PC_SPAI*)pc->data; 203 204 PetscFunctionBegin; 205 ispai->max = max1; 206 PetscFunctionReturn(0); 207 } 208 209 /**********************************************************************/ 210 211 #undef __FUNCT__ 212 #define __FUNCT__ "PCSPAISetMaxNew_SPAI" 213 static PetscErrorCode PCSPAISetMaxNew_SPAI(PC pc,int maxnew1) 214 { 215 PC_SPAI *ispai = (PC_SPAI*)pc->data; 216 217 PetscFunctionBegin; 218 ispai->maxnew = maxnew1; 219 PetscFunctionReturn(0); 220 } 221 222 /**********************************************************************/ 223 224 #undef __FUNCT__ 225 #define __FUNCT__ "PCSPAISetBlockSize_SPAI" 226 static PetscErrorCode PCSPAISetBlockSize_SPAI(PC pc,int block_size1) 227 { 228 PC_SPAI *ispai = (PC_SPAI*)pc->data; 229 230 PetscFunctionBegin; 231 ispai->block_size = block_size1; 232 PetscFunctionReturn(0); 233 } 234 235 /**********************************************************************/ 236 237 #undef __FUNCT__ 238 #define __FUNCT__ "PCSPAISetCacheSize_SPAI" 239 static PetscErrorCode PCSPAISetCacheSize_SPAI(PC pc,int cache_size) 240 { 241 PC_SPAI *ispai = (PC_SPAI*)pc->data; 242 243 PetscFunctionBegin; 244 ispai->cache_size = cache_size; 245 PetscFunctionReturn(0); 246 } 247 248 /**********************************************************************/ 249 250 #undef __FUNCT__ 251 #define __FUNCT__ "PCSPAISetVerbose_SPAI" 252 static PetscErrorCode PCSPAISetVerbose_SPAI(PC pc,int verbose) 253 { 254 PC_SPAI *ispai = (PC_SPAI*)pc->data; 255 256 PetscFunctionBegin; 257 ispai->verbose = verbose; 258 PetscFunctionReturn(0); 259 } 260 261 /**********************************************************************/ 262 263 #undef __FUNCT__ 264 #define __FUNCT__ "PCSPAISetSp_SPAI" 265 static PetscErrorCode PCSPAISetSp_SPAI(PC pc,int sp) 266 { 267 PC_SPAI *ispai = (PC_SPAI*)pc->data; 268 269 PetscFunctionBegin; 270 ispai->sp = sp; 271 PetscFunctionReturn(0); 272 } 273 274 /* -------------------------------------------------------------------*/ 275 276 #undef __FUNCT__ 277 #define __FUNCT__ "PCSPAISetEpsilon" 278 /*@ 279 PCSPAISetEpsilon -- Set the tolerance for the SPAI preconditioner 280 281 Input Parameters: 282 + pc - the preconditioner 283 - eps - epsilon (default .4) 284 285 Notes: Espilon must be between 0 and 1. It controls the 286 quality of the approximation of M to the inverse of 287 A. Higher values of epsilon lead to more work, more 288 fill, and usually better preconditioners. In many 289 cases the best choice of epsilon is the one that 290 divides the total solution time equally between the 291 preconditioner and the solver. 292 293 Level: intermediate 294 295 .seealso: PCSPAI, PCSetType() 296 @*/ 297 PetscErrorCode PCSPAISetEpsilon(PC pc,double epsilon1) 298 { 299 PetscErrorCode ierr; 300 301 PetscFunctionBegin; 302 ierr = PetscTryMethod(pc,"PCSPAISetEpsilon_C",(PC,double),(pc,epsilon1));CHKERRQ(ierr); 303 PetscFunctionReturn(0); 304 } 305 306 /**********************************************************************/ 307 308 #undef __FUNCT__ 309 #define __FUNCT__ "PCSPAISetNBSteps" 310 /*@ 311 PCSPAISetNBSteps - set maximum number of improvement steps per row in 312 the SPAI preconditioner 313 314 Input Parameters: 315 + pc - the preconditioner 316 - n - number of steps (default 5) 317 318 Notes: SPAI constructs to approximation to every column of 319 the exact inverse of A in a series of improvement 320 steps. The quality of the approximation is determined 321 by epsilon. If an approximation achieving an accuracy 322 of epsilon is not obtained after ns steps, SPAI simply 323 uses the best approximation constructed so far. 324 325 Level: intermediate 326 327 .seealso: PCSPAI, PCSetType(), PCSPAISetMaxNew() 328 @*/ 329 PetscErrorCode PCSPAISetNBSteps(PC pc,int nbsteps1) 330 { 331 PetscErrorCode ierr; 332 333 PetscFunctionBegin; 334 ierr = PetscTryMethod(pc,"PCSPAISetNBSteps_C",(PC,int),(pc,nbsteps1));CHKERRQ(ierr); 335 PetscFunctionReturn(0); 336 } 337 338 /**********************************************************************/ 339 340 /* added 1/7/99 g.h. */ 341 #undef __FUNCT__ 342 #define __FUNCT__ "PCSPAISetMax" 343 /*@ 344 PCSPAISetMax - set the size of various working buffers in 345 the SPAI preconditioner 346 347 Input Parameters: 348 + pc - the preconditioner 349 - n - size (default is 5000) 350 351 Level: intermediate 352 353 .seealso: PCSPAI, PCSetType() 354 @*/ 355 PetscErrorCode PCSPAISetMax(PC pc,int max1) 356 { 357 PetscErrorCode ierr; 358 359 PetscFunctionBegin; 360 ierr = PetscTryMethod(pc,"PCSPAISetMax_C",(PC,int),(pc,max1));CHKERRQ(ierr); 361 PetscFunctionReturn(0); 362 } 363 364 /**********************************************************************/ 365 366 #undef __FUNCT__ 367 #define __FUNCT__ "PCSPAISetMaxNew" 368 /*@ 369 PCSPAISetMaxNew - set maximum number of new nonzero candidates per step 370 in SPAI preconditioner 371 372 Input Parameters: 373 + pc - the preconditioner 374 - n - maximum number (default 5) 375 376 Level: intermediate 377 378 .seealso: PCSPAI, PCSetType(), PCSPAISetNBSteps() 379 @*/ 380 PetscErrorCode PCSPAISetMaxNew(PC pc,int maxnew1) 381 { 382 PetscErrorCode ierr; 383 384 PetscFunctionBegin; 385 ierr = PetscTryMethod(pc,"PCSPAISetMaxNew_C",(PC,int),(pc,maxnew1));CHKERRQ(ierr); 386 PetscFunctionReturn(0); 387 } 388 389 /**********************************************************************/ 390 391 #undef __FUNCT__ 392 #define __FUNCT__ "PCSPAISetBlockSize" 393 /*@ 394 PCSPAISetBlockSize - set the block size for the SPAI preconditioner 395 396 Input Parameters: 397 + pc - the preconditioner 398 - n - block size (default 1) 399 400 Notes: A block 401 size of 1 treats A as a matrix of scalar elements. A 402 block size of s > 1 treats A as a matrix of sxs 403 blocks. A block size of 0 treats A as a matrix with 404 variable sized blocks, which are determined by 405 searching for dense square diagonal blocks in A. 406 This can be very effective for finite-element 407 matrices. 408 409 SPAI will convert A to block form, use a block 410 version of the preconditioner algorithm, and then 411 convert the result back to scalar form. 412 413 In many cases the a block-size parameter other than 1 414 can lead to very significant improvement in 415 performance. 416 417 418 Level: intermediate 419 420 .seealso: PCSPAI, PCSetType() 421 @*/ 422 PetscErrorCode PCSPAISetBlockSize(PC pc,int block_size1) 423 { 424 PetscErrorCode ierr; 425 426 PetscFunctionBegin; 427 ierr = PetscTryMethod(pc,"PCSPAISetBlockSize_C",(PC,int),(pc,block_size1));CHKERRQ(ierr); 428 PetscFunctionReturn(0); 429 } 430 431 /**********************************************************************/ 432 433 #undef __FUNCT__ 434 #define __FUNCT__ "PCSPAISetCacheSize" 435 /*@ 436 PCSPAISetCacheSize - specify cache size in the SPAI preconditioner 437 438 Input Parameters: 439 + pc - the preconditioner 440 - n - cache size {0,1,2,3,4,5} (default 5) 441 442 Notes: SPAI uses a hash table to cache messages and avoid 443 redundant communication. If suggest always using 444 5. This parameter is irrelevant in the serial 445 version. 446 447 Level: intermediate 448 449 .seealso: PCSPAI, PCSetType() 450 @*/ 451 PetscErrorCode PCSPAISetCacheSize(PC pc,int cache_size) 452 { 453 PetscErrorCode ierr; 454 455 PetscFunctionBegin; 456 ierr = PetscTryMethod(pc,"PCSPAISetCacheSize_C",(PC,int),(pc,cache_size));CHKERRQ(ierr); 457 PetscFunctionReturn(0); 458 } 459 460 /**********************************************************************/ 461 462 #undef __FUNCT__ 463 #define __FUNCT__ "PCSPAISetVerbose" 464 /*@ 465 PCSPAISetVerbose - verbosity level for the SPAI preconditioner 466 467 Input Parameters: 468 + pc - the preconditioner 469 - n - level (default 1) 470 471 Notes: print parameters, timings and matrix statistics 472 473 Level: intermediate 474 475 .seealso: PCSPAI, PCSetType() 476 @*/ 477 PetscErrorCode PCSPAISetVerbose(PC pc,int verbose) 478 { 479 PetscErrorCode ierr; 480 481 PetscFunctionBegin; 482 ierr = PetscTryMethod(pc,"PCSPAISetVerbose_C",(PC,int),(pc,verbose));CHKERRQ(ierr); 483 PetscFunctionReturn(0); 484 } 485 486 /**********************************************************************/ 487 488 #undef __FUNCT__ 489 #define __FUNCT__ "PCSPAISetSp" 490 /*@ 491 PCSPAISetSp - specify a symmetric matrix sparsity pattern in the SPAI preconditioner 492 493 Input Parameters: 494 + pc - the preconditioner 495 - n - 0 or 1 496 497 Notes: If A has a symmetric nonzero pattern use -sp 1 to 498 improve performance by eliminating some communication 499 in the parallel version. Even if A does not have a 500 symmetric nonzero pattern -sp 1 may well lead to good 501 results, but the code will not follow the published 502 SPAI algorithm exactly. 503 504 505 Level: intermediate 506 507 .seealso: PCSPAI, PCSetType() 508 @*/ 509 PetscErrorCode PCSPAISetSp(PC pc,int sp) 510 { 511 PetscErrorCode ierr; 512 513 PetscFunctionBegin; 514 ierr = PetscTryMethod(pc,"PCSPAISetSp_C",(PC,int),(pc,sp));CHKERRQ(ierr); 515 PetscFunctionReturn(0); 516 } 517 518 /**********************************************************************/ 519 520 /**********************************************************************/ 521 522 #undef __FUNCT__ 523 #define __FUNCT__ "PCSetFromOptions_SPAI" 524 static PetscErrorCode PCSetFromOptions_SPAI(PC pc) 525 { 526 PC_SPAI *ispai = (PC_SPAI*)pc->data; 527 PetscErrorCode ierr; 528 int nbsteps1,max1,maxnew1,block_size1,cache_size,verbose,sp; 529 double epsilon1; 530 PetscBool flg; 531 532 PetscFunctionBegin; 533 ierr = PetscOptionsHead("SPAI options");CHKERRQ(ierr); 534 ierr = PetscOptionsReal("-pc_spai_epsilon","","PCSPAISetEpsilon",ispai->epsilon,&epsilon1,&flg);CHKERRQ(ierr); 535 if (flg) { 536 ierr = PCSPAISetEpsilon(pc,epsilon1);CHKERRQ(ierr); 537 } 538 ierr = PetscOptionsInt("-pc_spai_nbsteps","","PCSPAISetNBSteps",ispai->nbsteps,&nbsteps1,&flg);CHKERRQ(ierr); 539 if (flg) { 540 ierr = PCSPAISetNBSteps(pc,nbsteps1);CHKERRQ(ierr); 541 } 542 /* added 1/7/99 g.h. */ 543 ierr = PetscOptionsInt("-pc_spai_max","","PCSPAISetMax",ispai->max,&max1,&flg);CHKERRQ(ierr); 544 if (flg) { 545 ierr = PCSPAISetMax(pc,max1);CHKERRQ(ierr); 546 } 547 ierr = PetscOptionsInt("-pc_spai_maxnew","","PCSPAISetMaxNew",ispai->maxnew,&maxnew1,&flg);CHKERRQ(ierr); 548 if (flg) { 549 ierr = PCSPAISetMaxNew(pc,maxnew1);CHKERRQ(ierr); 550 } 551 ierr = PetscOptionsInt("-pc_spai_block_size","","PCSPAISetBlockSize",ispai->block_size,&block_size1,&flg);CHKERRQ(ierr); 552 if (flg) { 553 ierr = PCSPAISetBlockSize(pc,block_size1);CHKERRQ(ierr); 554 } 555 ierr = PetscOptionsInt("-pc_spai_cache_size","","PCSPAISetCacheSize",ispai->cache_size,&cache_size,&flg);CHKERRQ(ierr); 556 if (flg) { 557 ierr = PCSPAISetCacheSize(pc,cache_size);CHKERRQ(ierr); 558 } 559 ierr = PetscOptionsInt("-pc_spai_verbose","","PCSPAISetVerbose",ispai->verbose,&verbose,&flg);CHKERRQ(ierr); 560 if (flg) { 561 ierr = PCSPAISetVerbose(pc,verbose);CHKERRQ(ierr); 562 } 563 ierr = PetscOptionsInt("-pc_spai_sp","","PCSPAISetSp",ispai->sp,&sp,&flg);CHKERRQ(ierr); 564 if (flg) { 565 ierr = PCSPAISetSp(pc,sp);CHKERRQ(ierr); 566 } 567 ierr = PetscOptionsTail();CHKERRQ(ierr); 568 PetscFunctionReturn(0); 569 } 570 571 /**********************************************************************/ 572 573 /*MC 574 PCSPAI - Use the Sparse Approximate Inverse method of Grote and Barnard 575 as a preconditioner (SIAM J. Sci. Comput.; vol 18, nr 3) 576 577 Options Database Keys: 578 + -pc_spai_epsilon <eps> - set tolerance 579 . -pc_spai_nbstep <n> - set nbsteps 580 . -pc_spai_max <m> - set max 581 . -pc_spai_max_new <m> - set maxnew 582 . -pc_spai_block_size <n> - set block size 583 . -pc_spai_cache_size <n> - set cache size 584 . -pc_spai_sp <m> - set sp 585 - -pc_spai_set_verbose <true,false> - verbose output 586 587 Notes: This only works with AIJ matrices. 588 589 Level: beginner 590 591 Concepts: approximate inverse 592 593 .seealso: PCCreate(), PCSetType(), PCType (for list of available types), PC, 594 PCSPAISetEpsilon(), PCSPAISetMax(), PCSPAISetMaxNew(), PCSPAISetBlockSize(), 595 PCSPAISetVerbose(), PCSPAISetSp() 596 M*/ 597 598 EXTERN_C_BEGIN 599 #undef __FUNCT__ 600 #define __FUNCT__ "PCCreate_SPAI" 601 PetscErrorCode PCCreate_SPAI(PC pc) 602 { 603 PC_SPAI *ispai; 604 PetscErrorCode ierr; 605 606 PetscFunctionBegin; 607 ierr = PetscNewLog(pc,PC_SPAI,&ispai);CHKERRQ(ierr); 608 pc->data = ispai; 609 610 pc->ops->destroy = PCDestroy_SPAI; 611 pc->ops->apply = PCApply_SPAI; 612 pc->ops->applyrichardson = 0; 613 pc->ops->setup = PCSetUp_SPAI; 614 pc->ops->view = PCView_SPAI; 615 pc->ops->setfromoptions = PCSetFromOptions_SPAI; 616 617 ispai->epsilon = .4; 618 ispai->nbsteps = 5; 619 ispai->max = 5000; 620 ispai->maxnew = 5; 621 ispai->block_size = 1; 622 ispai->cache_size = 5; 623 ispai->verbose = 0; 624 625 ispai->sp = 1; 626 ierr = MPI_Comm_dup(PetscObjectComm((PetscObject)pc),&(ispai->comm_spai));CHKERRQ(ierr); 627 628 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCSPAISetEpsilon_C","PCSPAISetEpsilon_SPAI",PCSPAISetEpsilon_SPAI);CHKERRQ(ierr); 629 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCSPAISetNBSteps_C","PCSPAISetNBSteps_SPAI",PCSPAISetNBSteps_SPAI);CHKERRQ(ierr); 630 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCSPAISetMax_C","PCSPAISetMax_SPAI",PCSPAISetMax_SPAI);CHKERRQ(ierr); 631 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCSPAISetMaxNew_C","PCSPAISetMaxNew_SPAI",PCSPAISetMaxNew_SPAI);CHKERRQ(ierr); 632 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCSPAISetBlockSize_C","PCSPAISetBlockSize_SPAI",PCSPAISetBlockSize_SPAI);CHKERRQ(ierr); 633 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCSPAISetCacheSize_C","PCSPAISetCacheSize_SPAI",PCSPAISetCacheSize_SPAI);CHKERRQ(ierr); 634 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCSPAISetVerbose_C","PCSPAISetVerbose_SPAI",PCSPAISetVerbose_SPAI);CHKERRQ(ierr); 635 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCSPAISetSp_C","PCSPAISetSp_SPAI",PCSPAISetSp_SPAI);CHKERRQ(ierr); 636 PetscFunctionReturn(0); 637 } 638 EXTERN_C_END 639 640 /**********************************************************************/ 641 642 /* 643 Converts from a PETSc matrix to an SPAI matrix 644 */ 645 #undef __FUNCT__ 646 #define __FUNCT__ "ConvertMatToMatrix" 647 PetscErrorCode ConvertMatToMatrix(MPI_Comm comm, Mat A,Mat AT,matrix **B) 648 { 649 matrix *M; 650 int i,j,col; 651 int row_indx; 652 int len,pe,local_indx,start_indx; 653 int *mapping; 654 PetscErrorCode ierr; 655 const int *cols; 656 const double *vals; 657 int *num_ptr,n,mnl,nnl,nz,rstart,rend; 658 PetscMPIInt size,rank; 659 struct compressed_lines *rows; 660 661 PetscFunctionBegin; 662 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 663 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 664 ierr = MatGetSize(A,&n,&n);CHKERRQ(ierr); 665 ierr = MatGetLocalSize(A,&mnl,&nnl);CHKERRQ(ierr); 666 667 /* 668 not sure why a barrier is required. commenting out 669 ierr = MPI_Barrier(comm);CHKERRQ(ierr); 670 */ 671 672 M = new_matrix((SPAI_Comm)comm); 673 674 M->n = n; 675 M->bs = 1; 676 M->max_block_size = 1; 677 678 M->mnls = (int*)malloc(sizeof(int)*size); 679 M->start_indices = (int*)malloc(sizeof(int)*size); 680 M->pe = (int*)malloc(sizeof(int)*n); 681 M->block_sizes = (int*)malloc(sizeof(int)*n); 682 for (i=0; i<n; i++) M->block_sizes[i] = 1; 683 684 ierr = MPI_Allgather(&mnl,1,MPI_INT,M->mnls,1,MPI_INT,comm);CHKERRQ(ierr); 685 686 M->start_indices[0] = 0; 687 for (i=1; i<size; i++) M->start_indices[i] = M->start_indices[i-1] + M->mnls[i-1]; 688 689 M->mnl = M->mnls[M->myid]; 690 M->my_start_index = M->start_indices[M->myid]; 691 692 for (i=0; i<size; i++) { 693 start_indx = M->start_indices[i]; 694 for (j=0; j<M->mnls[i]; j++) M->pe[start_indx+j] = i; 695 } 696 697 if (AT) { 698 M->lines = new_compressed_lines(M->mnls[rank],1);CHKERRQ(ierr); 699 } else { 700 M->lines = new_compressed_lines(M->mnls[rank],0);CHKERRQ(ierr); 701 } 702 703 rows = M->lines; 704 705 /* Determine the mapping from global indices to pointers */ 706 ierr = PetscMalloc(M->n*sizeof(int),&mapping);CHKERRQ(ierr); 707 pe = 0; 708 local_indx = 0; 709 for (i=0; i<M->n; i++) { 710 if (local_indx >= M->mnls[pe]) { 711 pe++; 712 local_indx = 0; 713 } 714 mapping[i] = local_indx + M->start_indices[pe]; 715 local_indx++; 716 } 717 718 719 ierr = PetscMalloc(mnl*sizeof(int),&num_ptr);CHKERRQ(ierr); 720 721 /*********************************************************/ 722 /************** Set up the row structure *****************/ 723 /*********************************************************/ 724 725 /* count number of nonzeros in every row */ 726 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 727 for (i=rstart; i<rend; i++) { 728 ierr = MatGetRow(A,i,&num_ptr[i-rstart],NULL,NULL);CHKERRQ(ierr); 729 ierr = MatRestoreRow(A,i,&num_ptr[i-rstart],NULL,NULL);CHKERRQ(ierr); 730 } 731 732 /* allocate buffers */ 733 len = 0; 734 for (i=0; i<mnl; i++) { 735 if (len < num_ptr[i]) len = num_ptr[i]; 736 } 737 738 for (i=rstart; i<rend; i++) { 739 row_indx = i-rstart; 740 len = num_ptr[row_indx]; 741 rows->ptrs[row_indx] = (int*)malloc(len*sizeof(int)); 742 rows->A[row_indx] = (double*)malloc(len*sizeof(double)); 743 } 744 745 /* copy the matrix */ 746 for (i=rstart; i<rend; i++) { 747 row_indx = i - rstart; 748 ierr = MatGetRow(A,i,&nz,&cols,&vals);CHKERRQ(ierr); 749 for (j=0; j<nz; j++) { 750 col = cols[j]; 751 len = rows->len[row_indx]++; 752 753 rows->ptrs[row_indx][len] = mapping[col]; 754 rows->A[row_indx][len] = vals[j]; 755 } 756 rows->slen[row_indx] = rows->len[row_indx]; 757 758 ierr = MatRestoreRow(A,i,&nz,&cols,&vals);CHKERRQ(ierr); 759 } 760 761 762 /************************************************************/ 763 /************** Set up the column structure *****************/ 764 /*********************************************************/ 765 766 if (AT) { 767 768 /* count number of nonzeros in every column */ 769 for (i=rstart; i<rend; i++) { 770 ierr = MatGetRow(AT,i,&num_ptr[i-rstart],NULL,NULL);CHKERRQ(ierr); 771 ierr = MatRestoreRow(AT,i,&num_ptr[i-rstart],NULL,NULL);CHKERRQ(ierr); 772 } 773 774 /* allocate buffers */ 775 len = 0; 776 for (i=0; i<mnl; i++) { 777 if (len < num_ptr[i]) len = num_ptr[i]; 778 } 779 780 for (i=rstart; i<rend; i++) { 781 row_indx = i-rstart; 782 len = num_ptr[row_indx]; 783 784 rows->rptrs[row_indx] = (int*)malloc(len*sizeof(int)); 785 } 786 787 /* copy the matrix (i.e., the structure) */ 788 for (i=rstart; i<rend; i++) { 789 row_indx = i - rstart; 790 ierr = MatGetRow(AT,i,&nz,&cols,&vals);CHKERRQ(ierr); 791 for (j=0; j<nz; j++) { 792 col = cols[j]; 793 len = rows->rlen[row_indx]++; 794 795 rows->rptrs[row_indx][len] = mapping[col]; 796 } 797 ierr = MatRestoreRow(AT,i,&nz,&cols,&vals);CHKERRQ(ierr); 798 } 799 } 800 801 ierr = PetscFree(num_ptr);CHKERRQ(ierr); 802 ierr = PetscFree(mapping);CHKERRQ(ierr); 803 804 order_pointers(M); 805 M->maxnz = calc_maxnz(M); 806 *B = M; 807 PetscFunctionReturn(0); 808 } 809 810 /**********************************************************************/ 811 812 /* 813 Converts from an SPAI matrix B to a PETSc matrix PB. 814 This assumes that the the SPAI matrix B is stored in 815 COMPRESSED-ROW format. 816 */ 817 #undef __FUNCT__ 818 #define __FUNCT__ "ConvertMatrixToMat" 819 PetscErrorCode ConvertMatrixToMat(MPI_Comm comm,matrix *B,Mat *PB) 820 { 821 PetscMPIInt size,rank; 822 PetscErrorCode ierr; 823 int m,n,M,N; 824 int d_nz,o_nz; 825 int *d_nnz,*o_nnz; 826 int i,k,global_row,global_col,first_diag_col,last_diag_col; 827 PetscScalar val; 828 829 PetscFunctionBegin; 830 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 831 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 832 833 m = n = B->mnls[rank]; 834 d_nz = o_nz = 0; 835 836 /* Determine preallocation for MatCreateMPIAIJ */ 837 ierr = PetscMalloc(m*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 838 ierr = PetscMalloc(m*sizeof(PetscInt),&o_nnz);CHKERRQ(ierr); 839 for (i=0; i<m; i++) d_nnz[i] = o_nnz[i] = 0; 840 first_diag_col = B->start_indices[rank]; 841 last_diag_col = first_diag_col + B->mnls[rank]; 842 for (i=0; i<B->mnls[rank]; i++) { 843 for (k=0; k<B->lines->len[i]; k++) { 844 global_col = B->lines->ptrs[i][k]; 845 if ((global_col >= first_diag_col) && (global_col < last_diag_col)) d_nnz[i]++; 846 else o_nnz[i]++; 847 } 848 } 849 850 M = N = B->n; 851 /* Here we only know how to create AIJ format */ 852 ierr = MatCreate(comm,PB);CHKERRQ(ierr); 853 ierr = MatSetSizes(*PB,m,n,M,N);CHKERRQ(ierr); 854 ierr = MatSetType(*PB,MATAIJ);CHKERRQ(ierr); 855 ierr = MatSeqAIJSetPreallocation(*PB,d_nz,d_nnz);CHKERRQ(ierr); 856 ierr = MatMPIAIJSetPreallocation(*PB,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 857 858 for (i=0; i<B->mnls[rank]; i++) { 859 global_row = B->start_indices[rank]+i; 860 for (k=0; k<B->lines->len[i]; k++) { 861 global_col = B->lines->ptrs[i][k]; 862 863 val = B->lines->A[i][k]; 864 ierr = MatSetValues(*PB,1,&global_row,1,&global_col,&val,ADD_VALUES);CHKERRQ(ierr); 865 } 866 } 867 868 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 869 ierr = PetscFree(o_nnz);CHKERRQ(ierr); 870 871 ierr = MatAssemblyBegin(*PB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 872 ierr = MatAssemblyEnd(*PB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 873 PetscFunctionReturn(0); 874 } 875 876 /**********************************************************************/ 877 878 /* 879 Converts from an SPAI vector v to a PETSc vec Pv. 880 */ 881 #undef __FUNCT__ 882 #define __FUNCT__ "ConvertVectorToVec" 883 PetscErrorCode ConvertVectorToVec(MPI_Comm comm,vector *v,Vec *Pv) 884 { 885 PetscErrorCode ierr; 886 PetscMPIInt size,rank; 887 int m,M,i,*mnls,*start_indices,*global_indices; 888 889 PetscFunctionBegin; 890 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 891 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 892 893 m = v->mnl; 894 M = v->n; 895 896 897 ierr = VecCreateMPI(comm,m,M,Pv);CHKERRQ(ierr); 898 899 ierr = PetscMalloc(size*sizeof(int),&mnls);CHKERRQ(ierr); 900 ierr = MPI_Allgather(&v->mnl,1,MPI_INT,mnls,1,MPI_INT,comm);CHKERRQ(ierr); 901 902 ierr = PetscMalloc(size*sizeof(int),&start_indices);CHKERRQ(ierr); 903 904 start_indices[0] = 0; 905 for (i=1; i<size; i++) start_indices[i] = start_indices[i-1] +mnls[i-1]; 906 907 ierr = PetscMalloc(v->mnl*sizeof(int),&global_indices);CHKERRQ(ierr); 908 for (i=0; i<v->mnl; i++) global_indices[i] = start_indices[rank] + i; 909 910 ierr = PetscFree(mnls);CHKERRQ(ierr); 911 ierr = PetscFree(start_indices);CHKERRQ(ierr); 912 913 ierr = VecSetValues(*Pv,v->mnl,global_indices,v->v,INSERT_VALUES);CHKERRQ(ierr); 914 ierr = VecAssemblyBegin(*Pv);CHKERRQ(ierr); 915 ierr = VecAssemblyEnd(*Pv);CHKERRQ(ierr); 916 917 ierr = PetscFree(global_indices);CHKERRQ(ierr); 918 PetscFunctionReturn(0); 919 } 920 921 922 923 924