1 2 /* 3 3/99 Modified by Stephen Barnard to support SPAI version 3.0 4 */ 5 6 /* 7 Provides an interface to the SPAI Sparse Approximate Inverse Preconditioner 8 Code written by Stephen Barnard. 9 10 Note: there is some BAD memory bleeding below! 11 12 This code needs work 13 14 1) get rid of all memory bleeding 15 2) fix PETSc/interface so that it gets if the matrix is symmetric from the matrix 16 rather than having the sp flag for PC_SPAI 17 3) fix to set the block size based on the matrix block size 18 19 */ 20 #define PETSC_SKIP_COMPLEX /* since spai uses I which conflicts with some complex implementations */ 21 22 #include <petsc/private/pcimpl.h> /*I "petscpc.h" I*/ 23 #include <../src/ksp/pc/impls/spai/petscspai.h> 24 25 /* 26 These are the SPAI include files 27 */ 28 EXTERN_C_BEGIN 29 #define SPAI_USE_MPI /* required for setting SPAI_Comm correctly in basics.h */ 30 #include <spai.h> 31 #include <matrix.h> 32 EXTERN_C_END 33 34 extern PetscErrorCode ConvertMatToMatrix(MPI_Comm,Mat,Mat,matrix**); 35 extern PetscErrorCode ConvertMatrixToMat(MPI_Comm,matrix*,Mat*); 36 extern PetscErrorCode ConvertVectorToVec(MPI_Comm,vector*,Vec*); 37 extern PetscErrorCode MM_to_PETSC(char*,char*,char*); 38 39 typedef struct { 40 41 matrix *B; /* matrix in SPAI format */ 42 matrix *BT; /* transpose of matrix in SPAI format */ 43 matrix *M; /* the approximate inverse in SPAI format */ 44 45 Mat PM; /* the approximate inverse PETSc format */ 46 47 double epsilon; /* tolerance */ 48 int nbsteps; /* max number of "improvement" steps per line */ 49 int max; /* max dimensions of is_I, q, etc. */ 50 int maxnew; /* max number of new entries per step */ 51 int block_size; /* constant block size */ 52 int cache_size; /* one of (1,2,3,4,5,6) indicting size of cache */ 53 int verbose; /* SPAI prints timing and statistics */ 54 55 int sp; /* symmetric nonzero pattern */ 56 MPI_Comm comm_spai; /* communicator to be used with spai */ 57 } PC_SPAI; 58 59 /**********************************************************************/ 60 61 static PetscErrorCode PCSetUp_SPAI(PC pc) 62 { 63 PC_SPAI *ispai = (PC_SPAI*)pc->data; 64 Mat AT; 65 66 PetscFunctionBegin; 67 init_SPAI(); 68 69 if (ispai->sp) { 70 PetscCall(ConvertMatToMatrix(ispai->comm_spai,pc->pmat,pc->pmat,&ispai->B)); 71 } else { 72 /* Use the transpose to get the column nonzero structure. */ 73 PetscCall(MatTranspose(pc->pmat,MAT_INITIAL_MATRIX,&AT)); 74 PetscCall(ConvertMatToMatrix(ispai->comm_spai,pc->pmat,AT,&ispai->B)); 75 PetscCall(MatDestroy(&AT)); 76 } 77 78 /* Destroy the transpose */ 79 /* Don't know how to do it. PETSc developers? */ 80 81 /* construct SPAI preconditioner */ 82 /* FILE *messages */ /* file for warning messages */ 83 /* double epsilon */ /* tolerance */ 84 /* int nbsteps */ /* max number of "improvement" steps per line */ 85 /* int max */ /* max dimensions of is_I, q, etc. */ 86 /* int maxnew */ /* max number of new entries per step */ 87 /* int block_size */ /* block_size == 1 specifies scalar elments 88 block_size == n specifies nxn constant-block elements 89 block_size == 0 specifies variable-block elements */ 90 /* int cache_size */ /* one of (1,2,3,4,5,6) indicting size of cache. cache_size == 0 indicates no caching */ 91 /* int verbose */ /* verbose == 0 specifies that SPAI is silent 92 verbose == 1 prints timing and matrix statistics */ 93 94 PetscCall(bspai(ispai->B,&ispai->M, 95 stdout, 96 ispai->epsilon, 97 ispai->nbsteps, 98 ispai->max, 99 ispai->maxnew, 100 ispai->block_size, 101 ispai->cache_size, 102 ispai->verbose)); 103 104 PetscCall(ConvertMatrixToMat(PetscObjectComm((PetscObject)pc),ispai->M,&ispai->PM)); 105 106 /* free the SPAI matrices */ 107 sp_free_matrix(ispai->B); 108 sp_free_matrix(ispai->M); 109 PetscFunctionReturn(0); 110 } 111 112 /**********************************************************************/ 113 114 static PetscErrorCode PCApply_SPAI(PC pc,Vec xx,Vec y) 115 { 116 PC_SPAI *ispai = (PC_SPAI*)pc->data; 117 118 PetscFunctionBegin; 119 /* Now using PETSc's multiply */ 120 PetscCall(MatMult(ispai->PM,xx,y)); 121 PetscFunctionReturn(0); 122 } 123 124 static PetscErrorCode PCMatApply_SPAI(PC pc,Mat X,Mat Y) 125 { 126 PC_SPAI *ispai = (PC_SPAI*)pc->data; 127 128 PetscFunctionBegin; 129 /* Now using PETSc's multiply */ 130 PetscCall(MatMatMult(ispai->PM,X,MAT_REUSE_MATRIX,PETSC_DEFAULT,&Y)); 131 PetscFunctionReturn(0); 132 } 133 134 /**********************************************************************/ 135 136 static PetscErrorCode PCDestroy_SPAI(PC pc) 137 { 138 PC_SPAI *ispai = (PC_SPAI*)pc->data; 139 140 PetscFunctionBegin; 141 PetscCall(MatDestroy(&ispai->PM)); 142 PetscCallMPI(MPI_Comm_free(&(ispai->comm_spai))); 143 PetscCall(PetscFree(pc->data)); 144 PetscCall(PetscObjectComposeFunction((PetscObject)pc,"PCSPAISetEpsilon_C",NULL)); 145 PetscCall(PetscObjectComposeFunction((PetscObject)pc,"PCSPAISetNBSteps_C",NULL)); 146 PetscCall(PetscObjectComposeFunction((PetscObject)pc,"PCSPAISetMax_C",NULL)); 147 PetscCall(PetscObjectComposeFunction((PetscObject)pc,"PCSPAISetMaxNew_C",NULL)); 148 PetscCall(PetscObjectComposeFunction((PetscObject)pc,"PCSPAISetBlockSize_C",NULL)); 149 PetscCall(PetscObjectComposeFunction((PetscObject)pc,"PCSPAISetCacheSize_C",NULL)); 150 PetscCall(PetscObjectComposeFunction((PetscObject)pc,"PCSPAISetVerbose_C",NULL)); 151 PetscCall(PetscObjectComposeFunction((PetscObject)pc,"PCSPAISetSp_C",NULL)); 152 PetscFunctionReturn(0); 153 } 154 155 /**********************************************************************/ 156 157 static PetscErrorCode PCView_SPAI(PC pc,PetscViewer viewer) 158 { 159 PC_SPAI *ispai = (PC_SPAI*)pc->data; 160 PetscBool iascii; 161 162 PetscFunctionBegin; 163 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii)); 164 if (iascii) { 165 PetscCall(PetscViewerASCIIPrintf(viewer," epsilon %g\n", ispai->epsilon)); 166 PetscCall(PetscViewerASCIIPrintf(viewer," nbsteps %d\n", ispai->nbsteps)); 167 PetscCall(PetscViewerASCIIPrintf(viewer," max %d\n", ispai->max)); 168 PetscCall(PetscViewerASCIIPrintf(viewer," maxnew %d\n", ispai->maxnew)); 169 PetscCall(PetscViewerASCIIPrintf(viewer," block_size %d\n",ispai->block_size)); 170 PetscCall(PetscViewerASCIIPrintf(viewer," cache_size %d\n",ispai->cache_size)); 171 PetscCall(PetscViewerASCIIPrintf(viewer," verbose %d\n", ispai->verbose)); 172 PetscCall(PetscViewerASCIIPrintf(viewer," sp %d\n", ispai->sp)); 173 } 174 PetscFunctionReturn(0); 175 } 176 177 static PetscErrorCode PCSPAISetEpsilon_SPAI(PC pc, PetscReal epsilon1) 178 { 179 PC_SPAI *ispai = (PC_SPAI*)pc->data; 180 181 PetscFunctionBegin; 182 ispai->epsilon = (double)epsilon1; 183 PetscFunctionReturn(0); 184 } 185 186 /**********************************************************************/ 187 188 static PetscErrorCode PCSPAISetNBSteps_SPAI(PC pc,PetscInt nbsteps1) 189 { 190 PC_SPAI *ispai = (PC_SPAI*)pc->data; 191 192 PetscFunctionBegin; 193 ispai->nbsteps = (int)nbsteps1; 194 PetscFunctionReturn(0); 195 } 196 197 /**********************************************************************/ 198 199 /* added 1/7/99 g.h. */ 200 static PetscErrorCode PCSPAISetMax_SPAI(PC pc,PetscInt max1) 201 { 202 PC_SPAI *ispai = (PC_SPAI*)pc->data; 203 204 PetscFunctionBegin; 205 ispai->max = (int)max1; 206 PetscFunctionReturn(0); 207 } 208 209 /**********************************************************************/ 210 211 static PetscErrorCode PCSPAISetMaxNew_SPAI(PC pc,PetscInt maxnew1) 212 { 213 PC_SPAI *ispai = (PC_SPAI*)pc->data; 214 215 PetscFunctionBegin; 216 ispai->maxnew = (int)maxnew1; 217 PetscFunctionReturn(0); 218 } 219 220 /**********************************************************************/ 221 222 static PetscErrorCode PCSPAISetBlockSize_SPAI(PC pc,PetscInt block_size1) 223 { 224 PC_SPAI *ispai = (PC_SPAI*)pc->data; 225 226 PetscFunctionBegin; 227 ispai->block_size = (int)block_size1; 228 PetscFunctionReturn(0); 229 } 230 231 /**********************************************************************/ 232 233 static PetscErrorCode PCSPAISetCacheSize_SPAI(PC pc,PetscInt cache_size) 234 { 235 PC_SPAI *ispai = (PC_SPAI*)pc->data; 236 237 PetscFunctionBegin; 238 ispai->cache_size = (int)cache_size; 239 PetscFunctionReturn(0); 240 } 241 242 /**********************************************************************/ 243 244 static PetscErrorCode PCSPAISetVerbose_SPAI(PC pc,PetscInt verbose) 245 { 246 PC_SPAI *ispai = (PC_SPAI*)pc->data; 247 248 PetscFunctionBegin; 249 ispai->verbose = (int)verbose; 250 PetscFunctionReturn(0); 251 } 252 253 /**********************************************************************/ 254 255 static PetscErrorCode PCSPAISetSp_SPAI(PC pc,PetscInt sp) 256 { 257 PC_SPAI *ispai = (PC_SPAI*)pc->data; 258 259 PetscFunctionBegin; 260 ispai->sp = (int)sp; 261 PetscFunctionReturn(0); 262 } 263 264 /* -------------------------------------------------------------------*/ 265 266 /*@ 267 PCSPAISetEpsilon -- Set the tolerance for the SPAI preconditioner 268 269 Input Parameters: 270 + pc - the preconditioner 271 - eps - epsilon (default .4) 272 273 Notes: 274 Espilon must be between 0 and 1. It controls the 275 quality of the approximation of M to the inverse of 276 A. Higher values of epsilon lead to more work, more 277 fill, and usually better preconditioners. In many 278 cases the best choice of epsilon is the one that 279 divides the total solution time equally between the 280 preconditioner and the solver. 281 282 Level: intermediate 283 284 .seealso: `PCSPAI`, `PCSetType()` 285 @*/ 286 PetscErrorCode PCSPAISetEpsilon(PC pc,PetscReal epsilon1) 287 { 288 PetscFunctionBegin; 289 PetscTryMethod(pc,"PCSPAISetEpsilon_C",(PC,PetscReal),(pc,epsilon1)); 290 PetscFunctionReturn(0); 291 } 292 293 /**********************************************************************/ 294 295 /*@ 296 PCSPAISetNBSteps - set maximum number of improvement steps per row in 297 the SPAI preconditioner 298 299 Input Parameters: 300 + pc - the preconditioner 301 - n - number of steps (default 5) 302 303 Notes: 304 SPAI constructs to approximation to every column of 305 the exact inverse of A in a series of improvement 306 steps. The quality of the approximation is determined 307 by epsilon. If an approximation achieving an accuracy 308 of epsilon is not obtained after ns steps, SPAI simply 309 uses the best approximation constructed so far. 310 311 Level: intermediate 312 313 .seealso: `PCSPAI`, `PCSetType()`, `PCSPAISetMaxNew()` 314 @*/ 315 PetscErrorCode PCSPAISetNBSteps(PC pc,PetscInt nbsteps1) 316 { 317 PetscFunctionBegin; 318 PetscTryMethod(pc,"PCSPAISetNBSteps_C",(PC,PetscInt),(pc,nbsteps1)); 319 PetscFunctionReturn(0); 320 } 321 322 /**********************************************************************/ 323 324 /* added 1/7/99 g.h. */ 325 /*@ 326 PCSPAISetMax - set the size of various working buffers in 327 the SPAI preconditioner 328 329 Input Parameters: 330 + pc - the preconditioner 331 - n - size (default is 5000) 332 333 Level: intermediate 334 335 .seealso: `PCSPAI`, `PCSetType()` 336 @*/ 337 PetscErrorCode PCSPAISetMax(PC pc,PetscInt max1) 338 { 339 PetscFunctionBegin; 340 PetscTryMethod(pc,"PCSPAISetMax_C",(PC,PetscInt),(pc,max1)); 341 PetscFunctionReturn(0); 342 } 343 344 /**********************************************************************/ 345 346 /*@ 347 PCSPAISetMaxNew - set maximum number of new nonzero candidates per step 348 in SPAI preconditioner 349 350 Input Parameters: 351 + pc - the preconditioner 352 - n - maximum number (default 5) 353 354 Level: intermediate 355 356 .seealso: `PCSPAI`, `PCSetType()`, `PCSPAISetNBSteps()` 357 @*/ 358 PetscErrorCode PCSPAISetMaxNew(PC pc,PetscInt maxnew1) 359 { 360 PetscFunctionBegin; 361 PetscTryMethod(pc,"PCSPAISetMaxNew_C",(PC,PetscInt),(pc,maxnew1)); 362 PetscFunctionReturn(0); 363 } 364 365 /**********************************************************************/ 366 367 /*@ 368 PCSPAISetBlockSize - set the block size for the SPAI preconditioner 369 370 Input Parameters: 371 + pc - the preconditioner 372 - n - block size (default 1) 373 374 Notes: 375 A block 376 size of 1 treats A as a matrix of scalar elements. A 377 block size of s > 1 treats A as a matrix of sxs 378 blocks. A block size of 0 treats A as a matrix with 379 variable sized blocks, which are determined by 380 searching for dense square diagonal blocks in A. 381 This can be very effective for finite-element 382 matrices. 383 384 SPAI will convert A to block form, use a block 385 version of the preconditioner algorithm, and then 386 convert the result back to scalar form. 387 388 In many cases the a block-size parameter other than 1 389 can lead to very significant improvement in 390 performance. 391 392 Level: intermediate 393 394 .seealso: `PCSPAI`, `PCSetType()` 395 @*/ 396 PetscErrorCode PCSPAISetBlockSize(PC pc,PetscInt block_size1) 397 { 398 PetscFunctionBegin; 399 PetscTryMethod(pc,"PCSPAISetBlockSize_C",(PC,PetscInt),(pc,block_size1)); 400 PetscFunctionReturn(0); 401 } 402 403 /**********************************************************************/ 404 405 /*@ 406 PCSPAISetCacheSize - specify cache size in the SPAI preconditioner 407 408 Input Parameters: 409 + pc - the preconditioner 410 - n - cache size {0,1,2,3,4,5} (default 5) 411 412 Notes: 413 SPAI uses a hash table to cache messages and avoid 414 redundant communication. If suggest always using 415 5. This parameter is irrelevant in the serial 416 version. 417 418 Level: intermediate 419 420 .seealso: `PCSPAI`, `PCSetType()` 421 @*/ 422 PetscErrorCode PCSPAISetCacheSize(PC pc,PetscInt cache_size) 423 { 424 PetscFunctionBegin; 425 PetscTryMethod(pc,"PCSPAISetCacheSize_C",(PC,PetscInt),(pc,cache_size)); 426 PetscFunctionReturn(0); 427 } 428 429 /**********************************************************************/ 430 431 /*@ 432 PCSPAISetVerbose - verbosity level for the SPAI preconditioner 433 434 Input Parameters: 435 + pc - the preconditioner 436 - n - level (default 1) 437 438 Notes: 439 print parameters, timings and matrix statistics 440 441 Level: intermediate 442 443 .seealso: `PCSPAI`, `PCSetType()` 444 @*/ 445 PetscErrorCode PCSPAISetVerbose(PC pc,PetscInt verbose) 446 { 447 PetscFunctionBegin; 448 PetscTryMethod(pc,"PCSPAISetVerbose_C",(PC,PetscInt),(pc,verbose)); 449 PetscFunctionReturn(0); 450 } 451 452 /**********************************************************************/ 453 454 /*@ 455 PCSPAISetSp - specify a symmetric matrix sparsity pattern in the SPAI preconditioner 456 457 Input Parameters: 458 + pc - the preconditioner 459 - n - 0 or 1 460 461 Notes: 462 If A has a symmetric nonzero pattern use -sp 1 to 463 improve performance by eliminating some communication 464 in the parallel version. Even if A does not have a 465 symmetric nonzero pattern -sp 1 may well lead to good 466 results, but the code will not follow the published 467 SPAI algorithm exactly. 468 469 Level: intermediate 470 471 .seealso: `PCSPAI`, `PCSetType()` 472 @*/ 473 PetscErrorCode PCSPAISetSp(PC pc,PetscInt sp) 474 { 475 PetscFunctionBegin; 476 PetscTryMethod(pc,"PCSPAISetSp_C",(PC,PetscInt),(pc,sp)); 477 PetscFunctionReturn(0); 478 } 479 480 /**********************************************************************/ 481 482 /**********************************************************************/ 483 484 static PetscErrorCode PCSetFromOptions_SPAI(PetscOptionItems *PetscOptionsObject,PC pc) 485 { 486 PC_SPAI *ispai = (PC_SPAI*)pc->data; 487 int nbsteps1,max1,maxnew1,block_size1,cache_size,verbose,sp; 488 double epsilon1; 489 PetscBool flg; 490 491 PetscFunctionBegin; 492 PetscOptionsHeadBegin(PetscOptionsObject,"SPAI options"); 493 PetscCall(PetscOptionsReal("-pc_spai_epsilon","","PCSPAISetEpsilon",ispai->epsilon,&epsilon1,&flg)); 494 if (flg) { 495 PetscCall(PCSPAISetEpsilon(pc,epsilon1)); 496 } 497 PetscCall(PetscOptionsInt("-pc_spai_nbsteps","","PCSPAISetNBSteps",ispai->nbsteps,&nbsteps1,&flg)); 498 if (flg) { 499 PetscCall(PCSPAISetNBSteps(pc,nbsteps1)); 500 } 501 /* added 1/7/99 g.h. */ 502 PetscCall(PetscOptionsInt("-pc_spai_max","","PCSPAISetMax",ispai->max,&max1,&flg)); 503 if (flg) { 504 PetscCall(PCSPAISetMax(pc,max1)); 505 } 506 PetscCall(PetscOptionsInt("-pc_spai_maxnew","","PCSPAISetMaxNew",ispai->maxnew,&maxnew1,&flg)); 507 if (flg) { 508 PetscCall(PCSPAISetMaxNew(pc,maxnew1)); 509 } 510 PetscCall(PetscOptionsInt("-pc_spai_block_size","","PCSPAISetBlockSize",ispai->block_size,&block_size1,&flg)); 511 if (flg) { 512 PetscCall(PCSPAISetBlockSize(pc,block_size1)); 513 } 514 PetscCall(PetscOptionsInt("-pc_spai_cache_size","","PCSPAISetCacheSize",ispai->cache_size,&cache_size,&flg)); 515 if (flg) { 516 PetscCall(PCSPAISetCacheSize(pc,cache_size)); 517 } 518 PetscCall(PetscOptionsInt("-pc_spai_verbose","","PCSPAISetVerbose",ispai->verbose,&verbose,&flg)); 519 if (flg) { 520 PetscCall(PCSPAISetVerbose(pc,verbose)); 521 } 522 PetscCall(PetscOptionsInt("-pc_spai_sp","","PCSPAISetSp",ispai->sp,&sp,&flg)); 523 if (flg) { 524 PetscCall(PCSPAISetSp(pc,sp)); 525 } 526 PetscOptionsHeadEnd(); 527 PetscFunctionReturn(0); 528 } 529 530 /**********************************************************************/ 531 532 /*MC 533 PCSPAI - Use the Sparse Approximate Inverse method of Grote and Barnard 534 as a preconditioner (SIAM J. Sci. Comput.; vol 18, nr 3) 535 536 Options Database Keys: 537 + -pc_spai_epsilon <eps> - set tolerance 538 . -pc_spai_nbstep <n> - set nbsteps 539 . -pc_spai_max <m> - set max 540 . -pc_spai_max_new <m> - set maxnew 541 . -pc_spai_block_size <n> - set block size 542 . -pc_spai_cache_size <n> - set cache size 543 . -pc_spai_sp <m> - set sp 544 - -pc_spai_set_verbose <true,false> - verbose output 545 546 Notes: 547 This only works with AIJ matrices. 548 549 Level: beginner 550 551 .seealso: `PCCreate()`, `PCSetType()`, `PCType`, `PC`, 552 `PCSPAISetEpsilon()`, `PCSPAISetMax()`, `PCSPAISetMaxNew()`, `PCSPAISetBlockSize()`, 553 `PCSPAISetVerbose()`, `PCSPAISetSp()` 554 M*/ 555 556 PETSC_EXTERN PetscErrorCode PCCreate_SPAI(PC pc) 557 { 558 PC_SPAI *ispai; 559 560 PetscFunctionBegin; 561 PetscCall(PetscNewLog(pc,&ispai)); 562 pc->data = ispai; 563 564 pc->ops->destroy = PCDestroy_SPAI; 565 pc->ops->apply = PCApply_SPAI; 566 pc->ops->matapply = PCMatApply_SPAI; 567 pc->ops->applyrichardson = 0; 568 pc->ops->setup = PCSetUp_SPAI; 569 pc->ops->view = PCView_SPAI; 570 pc->ops->setfromoptions = PCSetFromOptions_SPAI; 571 572 ispai->epsilon = .4; 573 ispai->nbsteps = 5; 574 ispai->max = 5000; 575 ispai->maxnew = 5; 576 ispai->block_size = 1; 577 ispai->cache_size = 5; 578 ispai->verbose = 0; 579 580 ispai->sp = 1; 581 PetscCallMPI(MPI_Comm_dup(PetscObjectComm((PetscObject)pc),&(ispai->comm_spai))); 582 583 PetscCall(PetscObjectComposeFunction((PetscObject)pc,"PCSPAISetEpsilon_C",PCSPAISetEpsilon_SPAI)); 584 PetscCall(PetscObjectComposeFunction((PetscObject)pc,"PCSPAISetNBSteps_C",PCSPAISetNBSteps_SPAI)); 585 PetscCall(PetscObjectComposeFunction((PetscObject)pc,"PCSPAISetMax_C",PCSPAISetMax_SPAI)); 586 PetscCall(PetscObjectComposeFunction((PetscObject)pc,"PCSPAISetMaxNew_C",PCSPAISetMaxNew_SPAI)); 587 PetscCall(PetscObjectComposeFunction((PetscObject)pc,"PCSPAISetBlockSize_C",PCSPAISetBlockSize_SPAI)); 588 PetscCall(PetscObjectComposeFunction((PetscObject)pc,"PCSPAISetCacheSize_C",PCSPAISetCacheSize_SPAI)); 589 PetscCall(PetscObjectComposeFunction((PetscObject)pc,"PCSPAISetVerbose_C",PCSPAISetVerbose_SPAI)); 590 PetscCall(PetscObjectComposeFunction((PetscObject)pc,"PCSPAISetSp_C",PCSPAISetSp_SPAI)); 591 PetscFunctionReturn(0); 592 } 593 594 /**********************************************************************/ 595 596 /* 597 Converts from a PETSc matrix to an SPAI matrix 598 */ 599 PetscErrorCode ConvertMatToMatrix(MPI_Comm comm, Mat A,Mat AT,matrix **B) 600 { 601 matrix *M; 602 int i,j,col; 603 int row_indx; 604 int len,pe,local_indx,start_indx; 605 int *mapping; 606 const int *cols; 607 const double *vals; 608 int n,mnl,nnl,nz,rstart,rend; 609 PetscMPIInt size,rank; 610 struct compressed_lines *rows; 611 612 PetscFunctionBegin; 613 PetscCallMPI(MPI_Comm_size(comm,&size)); 614 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 615 PetscCall(MatGetSize(A,&n,&n)); 616 PetscCall(MatGetLocalSize(A,&mnl,&nnl)); 617 618 /* 619 not sure why a barrier is required. commenting out 620 PetscCallMPI(MPI_Barrier(comm)); 621 */ 622 623 M = new_matrix((SPAI_Comm)comm); 624 625 M->n = n; 626 M->bs = 1; 627 M->max_block_size = 1; 628 629 M->mnls = (int*)malloc(sizeof(int)*size); 630 M->start_indices = (int*)malloc(sizeof(int)*size); 631 M->pe = (int*)malloc(sizeof(int)*n); 632 M->block_sizes = (int*)malloc(sizeof(int)*n); 633 for (i=0; i<n; i++) M->block_sizes[i] = 1; 634 635 PetscCallMPI(MPI_Allgather(&mnl,1,MPI_INT,M->mnls,1,MPI_INT,comm)); 636 637 M->start_indices[0] = 0; 638 for (i=1; i<size; i++) M->start_indices[i] = M->start_indices[i-1] + M->mnls[i-1]; 639 640 M->mnl = M->mnls[M->myid]; 641 M->my_start_index = M->start_indices[M->myid]; 642 643 for (i=0; i<size; i++) { 644 start_indx = M->start_indices[i]; 645 for (j=0; j<M->mnls[i]; j++) M->pe[start_indx+j] = i; 646 } 647 648 if (AT) { 649 M->lines = new_compressed_lines(M->mnls[rank],1); 650 } else { 651 M->lines = new_compressed_lines(M->mnls[rank],0); 652 } 653 654 rows = M->lines; 655 656 /* Determine the mapping from global indices to pointers */ 657 PetscCall(PetscMalloc1(M->n,&mapping)); 658 pe = 0; 659 local_indx = 0; 660 for (i=0; i<M->n; i++) { 661 if (local_indx >= M->mnls[pe]) { 662 pe++; 663 local_indx = 0; 664 } 665 mapping[i] = local_indx + M->start_indices[pe]; 666 local_indx++; 667 } 668 669 /*********************************************************/ 670 /************** Set up the row structure *****************/ 671 /*********************************************************/ 672 673 PetscCall(MatGetOwnershipRange(A,&rstart,&rend)); 674 for (i=rstart; i<rend; i++) { 675 row_indx = i - rstart; 676 PetscCall(MatGetRow(A,i,&nz,&cols,&vals)); 677 /* allocate buffers */ 678 rows->ptrs[row_indx] = (int*)malloc(nz*sizeof(int)); 679 rows->A[row_indx] = (double*)malloc(nz*sizeof(double)); 680 /* copy the matrix */ 681 for (j=0; j<nz; j++) { 682 col = cols[j]; 683 len = rows->len[row_indx]++; 684 685 rows->ptrs[row_indx][len] = mapping[col]; 686 rows->A[row_indx][len] = vals[j]; 687 } 688 rows->slen[row_indx] = rows->len[row_indx]; 689 690 PetscCall(MatRestoreRow(A,i,&nz,&cols,&vals)); 691 } 692 693 /************************************************************/ 694 /************** Set up the column structure *****************/ 695 /*********************************************************/ 696 697 if (AT) { 698 699 for (i=rstart; i<rend; i++) { 700 row_indx = i - rstart; 701 PetscCall(MatGetRow(AT,i,&nz,&cols,&vals)); 702 /* allocate buffers */ 703 rows->rptrs[row_indx] = (int*)malloc(nz*sizeof(int)); 704 /* copy the matrix (i.e., the structure) */ 705 for (j=0; j<nz; j++) { 706 col = cols[j]; 707 len = rows->rlen[row_indx]++; 708 709 rows->rptrs[row_indx][len] = mapping[col]; 710 } 711 PetscCall(MatRestoreRow(AT,i,&nz,&cols,&vals)); 712 } 713 } 714 715 PetscCall(PetscFree(mapping)); 716 717 order_pointers(M); 718 M->maxnz = calc_maxnz(M); 719 *B = M; 720 PetscFunctionReturn(0); 721 } 722 723 /**********************************************************************/ 724 725 /* 726 Converts from an SPAI matrix B to a PETSc matrix PB. 727 This assumes that the SPAI matrix B is stored in 728 COMPRESSED-ROW format. 729 */ 730 PetscErrorCode ConvertMatrixToMat(MPI_Comm comm,matrix *B,Mat *PB) 731 { 732 PetscMPIInt size,rank; 733 int m,n,M,N; 734 int d_nz,o_nz; 735 int *d_nnz,*o_nnz; 736 int i,k,global_row,global_col,first_diag_col,last_diag_col; 737 PetscScalar val; 738 739 PetscFunctionBegin; 740 PetscCallMPI(MPI_Comm_size(comm,&size)); 741 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 742 743 m = n = B->mnls[rank]; 744 d_nz = o_nz = 0; 745 746 /* Determine preallocation for MatCreateAIJ */ 747 PetscCall(PetscMalloc1(m,&d_nnz)); 748 PetscCall(PetscMalloc1(m,&o_nnz)); 749 for (i=0; i<m; i++) d_nnz[i] = o_nnz[i] = 0; 750 first_diag_col = B->start_indices[rank]; 751 last_diag_col = first_diag_col + B->mnls[rank]; 752 for (i=0; i<B->mnls[rank]; i++) { 753 for (k=0; k<B->lines->len[i]; k++) { 754 global_col = B->lines->ptrs[i][k]; 755 if ((global_col >= first_diag_col) && (global_col < last_diag_col)) d_nnz[i]++; 756 else o_nnz[i]++; 757 } 758 } 759 760 M = N = B->n; 761 /* Here we only know how to create AIJ format */ 762 PetscCall(MatCreate(comm,PB)); 763 PetscCall(MatSetSizes(*PB,m,n,M,N)); 764 PetscCall(MatSetType(*PB,MATAIJ)); 765 PetscCall(MatSeqAIJSetPreallocation(*PB,d_nz,d_nnz)); 766 PetscCall(MatMPIAIJSetPreallocation(*PB,d_nz,d_nnz,o_nz,o_nnz)); 767 768 for (i=0; i<B->mnls[rank]; i++) { 769 global_row = B->start_indices[rank]+i; 770 for (k=0; k<B->lines->len[i]; k++) { 771 global_col = B->lines->ptrs[i][k]; 772 773 val = B->lines->A[i][k]; 774 PetscCall(MatSetValues(*PB,1,&global_row,1,&global_col,&val,ADD_VALUES)); 775 } 776 } 777 778 PetscCall(PetscFree(d_nnz)); 779 PetscCall(PetscFree(o_nnz)); 780 781 PetscCall(MatAssemblyBegin(*PB,MAT_FINAL_ASSEMBLY)); 782 PetscCall(MatAssemblyEnd(*PB,MAT_FINAL_ASSEMBLY)); 783 PetscFunctionReturn(0); 784 } 785 786 /**********************************************************************/ 787 788 /* 789 Converts from an SPAI vector v to a PETSc vec Pv. 790 */ 791 PetscErrorCode ConvertVectorToVec(MPI_Comm comm,vector *v,Vec *Pv) 792 { 793 PetscMPIInt size,rank; 794 int m,M,i,*mnls,*start_indices,*global_indices; 795 796 PetscFunctionBegin; 797 PetscCallMPI(MPI_Comm_size(comm,&size)); 798 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 799 800 m = v->mnl; 801 M = v->n; 802 803 PetscCall(VecCreateMPI(comm,m,M,Pv)); 804 805 PetscCall(PetscMalloc1(size,&mnls)); 806 PetscCallMPI(MPI_Allgather(&v->mnl,1,MPI_INT,mnls,1,MPI_INT,comm)); 807 808 PetscCall(PetscMalloc1(size,&start_indices)); 809 810 start_indices[0] = 0; 811 for (i=1; i<size; i++) start_indices[i] = start_indices[i-1] +mnls[i-1]; 812 813 PetscCall(PetscMalloc1(v->mnl,&global_indices)); 814 for (i=0; i<v->mnl; i++) global_indices[i] = start_indices[rank] + i; 815 816 PetscCall(PetscFree(mnls)); 817 PetscCall(PetscFree(start_indices)); 818 819 PetscCall(VecSetValues(*Pv,v->mnl,global_indices,v->v,INSERT_VALUES)); 820 PetscCall(VecAssemblyBegin(*Pv)); 821 PetscCall(VecAssemblyEnd(*Pv)); 822 823 PetscCall(PetscFree(global_indices)); 824 PetscFunctionReturn(0); 825 } 826