1 #define PETSCMAT_DLL 2 3 /* 4 Provides an interface to the SuperLU_DIST_2.2 sparse solver 5 */ 6 7 #include "src/mat/impls/aij/seq/aij.h" 8 #include "src/mat/impls/aij/mpi/mpiaij.h" 9 #if defined(PETSC_HAVE_STDLIB_H) /* This is to get around weird problem with SuperLU on cray */ 10 #include "stdlib.h" 11 #endif 12 13 EXTERN_C_BEGIN 14 #if defined(PETSC_USE_COMPLEX) 15 #include "superlu_zdefs.h" 16 #else 17 #include "superlu_ddefs.h" 18 #endif 19 EXTERN_C_END 20 21 typedef enum {GLOBAL,DISTRIBUTED} SuperLU_MatInputMode; 22 23 typedef struct { 24 int_t nprow,npcol,*row,*col; 25 gridinfo_t grid; 26 superlu_options_t options; 27 SuperMatrix A_sup; 28 ScalePermstruct_t ScalePermstruct; 29 LUstruct_t LUstruct; 30 int StatPrint; 31 int MatInputMode; 32 SOLVEstruct_t SOLVEstruct; 33 fact_t FactPattern; 34 MPI_Comm comm_superlu; 35 #if defined(PETSC_USE_COMPLEX) 36 doublecomplex *val; 37 #else 38 double *val; 39 #endif 40 41 /* Flag to clean up (non-global) SuperLU objects during Destroy */ 42 PetscTruth CleanUpSuperLU_Dist; 43 } Mat_SuperLU_DIST; 44 45 extern PetscErrorCode MatFactorInfo_SuperLU_DIST(Mat,PetscViewer); 46 extern PetscErrorCode MatLUFactorNumeric_SuperLU_DIST(Mat,Mat,const MatFactorInfo *); 47 extern PetscErrorCode MatDestroy_SuperLU_DIST(Mat); 48 extern PetscErrorCode MatView_SuperLU_DIST(Mat,PetscViewer); 49 extern PetscErrorCode MatSolve_SuperLU_DIST(Mat,Vec,Vec); 50 extern PetscErrorCode MatLUFactorSymbolic_SuperLU_DIST(Mat,Mat,IS,IS,const MatFactorInfo *); 51 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 52 53 #undef __FUNCT__ 54 #define __FUNCT__ "MatDestroy_SuperLU_DIST" 55 PetscErrorCode MatDestroy_SuperLU_DIST(Mat A) 56 { 57 PetscErrorCode ierr; 58 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr; 59 60 PetscFunctionBegin; 61 if (lu->CleanUpSuperLU_Dist) { 62 /* Deallocate SuperLU_DIST storage */ 63 if (lu->MatInputMode == GLOBAL) { 64 Destroy_CompCol_Matrix_dist(&lu->A_sup); 65 } else { 66 Destroy_CompRowLoc_Matrix_dist(&lu->A_sup); 67 if ( lu->options.SolveInitialized ) { 68 #if defined(PETSC_USE_COMPLEX) 69 zSolveFinalize(&lu->options, &lu->SOLVEstruct); 70 #else 71 dSolveFinalize(&lu->options, &lu->SOLVEstruct); 72 #endif 73 } 74 } 75 Destroy_LU(A->cmap->N, &lu->grid, &lu->LUstruct); 76 ScalePermstructFree(&lu->ScalePermstruct); 77 LUstructFree(&lu->LUstruct); 78 79 /* Release the SuperLU_DIST process grid. */ 80 superlu_gridexit(&lu->grid); 81 82 ierr = MPI_Comm_free(&(lu->comm_superlu));CHKERRQ(ierr); 83 } 84 85 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 86 PetscFunctionReturn(0); 87 } 88 89 #undef __FUNCT__ 90 #define __FUNCT__ "MatSolve_SuperLU_DIST" 91 PetscErrorCode MatSolve_SuperLU_DIST(Mat A,Vec b_mpi,Vec x) 92 { 93 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr; 94 PetscErrorCode ierr; 95 PetscMPIInt size; 96 PetscInt m=A->rmap->N, N=A->cmap->N; 97 SuperLUStat_t stat; 98 double berr[1]; 99 PetscScalar *bptr; 100 PetscInt info, nrhs=1; 101 Vec x_seq; 102 IS iden; 103 VecScatter scat; 104 105 PetscFunctionBegin; 106 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 107 if (size > 1) { 108 if (lu->MatInputMode == GLOBAL) { /* global mat input, convert b to x_seq */ 109 ierr = VecCreateSeq(PETSC_COMM_SELF,N,&x_seq);CHKERRQ(ierr); 110 ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iden);CHKERRQ(ierr); 111 ierr = VecScatterCreate(b_mpi,iden,x_seq,iden,&scat);CHKERRQ(ierr); 112 ierr = ISDestroy(iden);CHKERRQ(ierr); 113 114 ierr = VecScatterBegin(scat,b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 115 ierr = VecScatterEnd(scat,b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 116 ierr = VecGetArray(x_seq,&bptr);CHKERRQ(ierr); 117 } else { /* distributed mat input */ 118 ierr = VecCopy(b_mpi,x);CHKERRQ(ierr); 119 ierr = VecGetArray(x,&bptr);CHKERRQ(ierr); 120 } 121 } else { /* size == 1 */ 122 ierr = VecCopy(b_mpi,x);CHKERRQ(ierr); 123 ierr = VecGetArray(x,&bptr);CHKERRQ(ierr); 124 } 125 126 if (lu->options.Fact != FACTORED) SETERRQ(PETSC_ERR_ARG_WRONG,"SuperLU_DIST options.Fact mush equal FACTORED"); 127 128 PStatInit(&stat); /* Initialize the statistics variables. */ 129 if (lu->MatInputMode == GLOBAL) { 130 #if defined(PETSC_USE_COMPLEX) 131 pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,(doublecomplex*)bptr, m, nrhs, 132 &lu->grid, &lu->LUstruct, berr, &stat, &info); 133 #else 134 pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,bptr, m, nrhs, 135 &lu->grid, &lu->LUstruct, berr, &stat, &info); 136 #endif 137 } else { /* distributed mat input */ 138 #if defined(PETSC_USE_COMPLEX) 139 pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, (doublecomplex*)bptr, A->rmap->N, nrhs, &lu->grid, 140 &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info); 141 if (info) SETERRQ1(PETSC_ERR_LIB,"pzgssvx fails, info: %d\n",info); 142 #else 143 pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, bptr, A->rmap->N, nrhs, &lu->grid, 144 &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info); 145 if (info) SETERRQ1(PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",info); 146 #endif 147 } 148 if (lu->options.PrintStat) { 149 PStatPrint(&lu->options, &stat, &lu->grid); /* Print the statistics. */ 150 } 151 PStatFree(&stat); 152 153 if (size > 1) { 154 if (lu->MatInputMode == GLOBAL){ /* convert seq x to mpi x */ 155 ierr = VecRestoreArray(x_seq,&bptr);CHKERRQ(ierr); 156 ierr = VecScatterBegin(scat,x_seq,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 157 ierr = VecScatterEnd(scat,x_seq,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 158 ierr = VecScatterDestroy(scat);CHKERRQ(ierr); 159 ierr = VecDestroy(x_seq);CHKERRQ(ierr); 160 } else { 161 ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr); 162 } 163 } else { 164 ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr); 165 } 166 PetscFunctionReturn(0); 167 } 168 169 #undef __FUNCT__ 170 #define __FUNCT__ "MatLUFactorNumeric_SuperLU_DIST" 171 PetscErrorCode MatLUFactorNumeric_SuperLU_DIST(Mat F,Mat A,const MatFactorInfo *info) 172 { 173 Mat *tseq,A_seq = PETSC_NULL; 174 Mat_SeqAIJ *aa,*bb; 175 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)(F)->spptr; 176 PetscErrorCode ierr; 177 PetscInt M=A->rmap->N,N=A->cmap->N,sinfo,i,*ai,*aj,*bi,*bj,nz,rstart,*garray, 178 m=A->rmap->n, irow,colA_start,j,jcol,jB,countA,countB,*bjj,*ajj; 179 PetscMPIInt size,rank; 180 SuperLUStat_t stat; 181 double *berr=0; 182 IS isrow; 183 PetscLogDouble time0,time,time_min,time_max; 184 Mat F_diag=PETSC_NULL; 185 #if defined(PETSC_USE_COMPLEX) 186 doublecomplex *av, *bv; 187 #else 188 double *av, *bv; 189 #endif 190 191 PetscFunctionBegin; 192 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 193 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 194 195 if (lu->options.PrintStat) { /* collect time for mat conversion */ 196 ierr = MPI_Barrier(((PetscObject)A)->comm);CHKERRQ(ierr); 197 ierr = PetscGetTime(&time0);CHKERRQ(ierr); 198 } 199 200 if (lu->MatInputMode == GLOBAL) { /* global mat input */ 201 if (size > 1) { /* convert mpi A to seq mat A */ 202 ierr = ISCreateStride(PETSC_COMM_SELF,M,0,1,&isrow);CHKERRQ(ierr); 203 ierr = MatGetSubMatrices(A,1,&isrow,&isrow,MAT_INITIAL_MATRIX,&tseq);CHKERRQ(ierr); 204 ierr = ISDestroy(isrow);CHKERRQ(ierr); 205 206 A_seq = *tseq; 207 ierr = PetscFree(tseq);CHKERRQ(ierr); 208 aa = (Mat_SeqAIJ*)A_seq->data; 209 } else { 210 aa = (Mat_SeqAIJ*)A->data; 211 } 212 213 /* Convert Petsc NR matrix to SuperLU_DIST NC. 214 Note: memories of lu->val, col and row are allocated by CompRow_to_CompCol_dist()! */ 215 if (lu->options.Fact != DOFACT) {/* successive numeric factorization, sparsity pattern is reused. */ 216 if (lu->FactPattern == SamePattern_SameRowPerm){ 217 Destroy_CompCol_Matrix_dist(&lu->A_sup); 218 /* Destroy_LU(N, &lu->grid, &lu->LUstruct); Crash! Comment it out does not lead to mem leak. */ 219 lu->options.Fact = SamePattern_SameRowPerm; /* matrix has similar numerical values */ 220 } else { 221 Destroy_CompCol_Matrix_dist(&lu->A_sup); 222 Destroy_LU(N, &lu->grid, &lu->LUstruct); 223 lu->options.Fact = SamePattern; 224 } 225 } 226 #if defined(PETSC_USE_COMPLEX) 227 zCompRow_to_CompCol_dist(M,N,aa->nz,(doublecomplex*)aa->a,aa->j,aa->i,&lu->val,&lu->col, &lu->row); 228 #else 229 dCompRow_to_CompCol_dist(M,N,aa->nz,aa->a,aa->j,aa->i,&lu->val, &lu->col, &lu->row); 230 #endif 231 232 /* Create compressed column matrix A_sup. */ 233 #if defined(PETSC_USE_COMPLEX) 234 zCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_Z, SLU_GE); 235 #else 236 dCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_D, SLU_GE); 237 #endif 238 } else { /* distributed mat input */ 239 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 240 aa=(Mat_SeqAIJ*)(mat->A)->data; 241 bb=(Mat_SeqAIJ*)(mat->B)->data; 242 ai=aa->i; aj=aa->j; 243 bi=bb->i; bj=bb->j; 244 #if defined(PETSC_USE_COMPLEX) 245 av=(doublecomplex*)aa->a; 246 bv=(doublecomplex*)bb->a; 247 #else 248 av=aa->a; 249 bv=bb->a; 250 #endif 251 rstart = A->rmap->rstart; 252 nz = aa->nz + bb->nz; 253 garray = mat->garray; 254 255 if (lu->options.Fact == DOFACT) {/* first numeric factorization */ 256 #if defined(PETSC_USE_COMPLEX) 257 zallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row); 258 #else 259 dallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row); 260 #endif 261 } else { /* successive numeric factorization, sparsity pattern and perm_c are reused. */ 262 if (lu->FactPattern == SamePattern_SameRowPerm){ 263 /* Destroy_LU(N, &lu->grid, &lu->LUstruct); Crash! Comment it out does not lead to mem leak. */ 264 lu->options.Fact = SamePattern_SameRowPerm; /* matrix has similar numerical values */ 265 } else { 266 Destroy_LU(N, &lu->grid, &lu->LUstruct); /* Deallocate storage associated with the L and U matrices. */ 267 lu->options.Fact = SamePattern; 268 } 269 } 270 nz = 0; irow = rstart; 271 for ( i=0; i<m; i++ ) { 272 lu->row[i] = nz; 273 countA = ai[i+1] - ai[i]; 274 countB = bi[i+1] - bi[i]; 275 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 276 bjj = bj + bi[i]; 277 278 /* B part, smaller col index */ 279 colA_start = rstart + ajj[0]; /* the smallest global col index of A */ 280 jB = 0; 281 for (j=0; j<countB; j++){ 282 jcol = garray[bjj[j]]; 283 if (jcol > colA_start) { 284 jB = j; 285 break; 286 } 287 lu->col[nz] = jcol; 288 lu->val[nz++] = *bv++; 289 if (j==countB-1) jB = countB; 290 } 291 292 /* A part */ 293 for (j=0; j<countA; j++){ 294 lu->col[nz] = rstart + ajj[j]; 295 lu->val[nz++] = *av++; 296 } 297 298 /* B part, larger col index */ 299 for (j=jB; j<countB; j++){ 300 lu->col[nz] = garray[bjj[j]]; 301 lu->val[nz++] = *bv++; 302 } 303 } 304 lu->row[m] = nz; 305 #if defined(PETSC_USE_COMPLEX) 306 zCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart, 307 lu->val, lu->col, lu->row, SLU_NR_loc, SLU_Z, SLU_GE); 308 #else 309 dCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart, 310 lu->val, lu->col, lu->row, SLU_NR_loc, SLU_D, SLU_GE); 311 #endif 312 } 313 if (lu->options.PrintStat) { 314 ierr = PetscGetTime(&time);CHKERRQ(ierr); 315 time0 = time - time0; 316 } 317 318 /* Factor the matrix. */ 319 PStatInit(&stat); /* Initialize the statistics variables. */ 320 321 if (lu->MatInputMode == GLOBAL) { /* global mat input */ 322 #if defined(PETSC_USE_COMPLEX) 323 pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, 324 &lu->grid, &lu->LUstruct, berr, &stat, &sinfo); 325 #else 326 pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, 327 &lu->grid, &lu->LUstruct, berr, &stat, &sinfo); 328 #endif 329 } else { /* distributed mat input */ 330 #if defined(PETSC_USE_COMPLEX) 331 pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid, 332 &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo); 333 if (sinfo) SETERRQ1(PETSC_ERR_LIB,"pzgssvx fails, info: %d\n",sinfo); 334 #else 335 pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid, 336 &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo); 337 if (sinfo) SETERRQ1(PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",sinfo); 338 #endif 339 } 340 341 if (lu->MatInputMode == GLOBAL && size > 1){ 342 ierr = MatDestroy(A_seq);CHKERRQ(ierr); 343 } 344 345 if (lu->options.PrintStat) { 346 if (size > 1){ 347 ierr = MPI_Reduce(&time0,&time_max,1,MPI_DOUBLE,MPI_MAX,0,((PetscObject)A)->comm); 348 ierr = MPI_Reduce(&time0,&time_min,1,MPI_DOUBLE,MPI_MIN,0,((PetscObject)A)->comm); 349 ierr = MPI_Reduce(&time0,&time,1,MPI_DOUBLE,MPI_SUM,0,((PetscObject)A)->comm); 350 time = time/size; /* average time */ 351 if (!rank) { 352 ierr = PetscPrintf(PETSC_COMM_SELF, " Mat conversion(PETSc->SuperLU_DIST) time (max/min/avg): \n %g / %g / %g\n",time_max,time_min,time);CHKERRQ(ierr); 353 } 354 } else { 355 ierr = PetscPrintf(PETSC_COMM_SELF, " Mat conversion(PETSc->SuperLU_DIST) time: \n %g\n",time0);CHKERRQ(ierr); 356 } 357 PStatPrint(&lu->options, &stat, &lu->grid); /* Print the statistics. */ 358 } 359 PStatFree(&stat); 360 if (size > 1){ 361 F_diag = ((Mat_MPIAIJ *)(F)->data)->A; 362 F_diag->assembled = PETSC_TRUE; 363 } 364 (F)->assembled = PETSC_TRUE; 365 (F)->preallocated = PETSC_TRUE; 366 lu->options.Fact = FACTORED; /* The factored form of A is supplied. Local option used by this func. only */ 367 PetscFunctionReturn(0); 368 } 369 370 /* Note the Petsc r and c permutations are ignored */ 371 #undef __FUNCT__ 372 #define __FUNCT__ "MatLUFactorSymbolic_SuperLU_DIST" 373 PetscErrorCode MatLUFactorSymbolic_SuperLU_DIST(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) 374 { 375 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*) (F)->spptr; 376 PetscInt M=A->rmap->N,N=A->cmap->N; 377 378 PetscFunctionBegin; 379 /* Initialize the SuperLU process grid. */ 380 superlu_gridinit(lu->comm_superlu, lu->nprow, lu->npcol, &lu->grid); 381 382 /* Initialize ScalePermstruct and LUstruct. */ 383 ScalePermstructInit(M, N, &lu->ScalePermstruct); 384 LUstructInit(M, N, &lu->LUstruct); 385 (F)->ops->lufactornumeric = MatLUFactorNumeric_SuperLU_DIST; 386 (F)->ops->solve = MatSolve_SuperLU_DIST; 387 PetscFunctionReturn(0); 388 } 389 390 EXTERN_C_BEGIN 391 #undef __FUNCT__ 392 #define __FUNCT__ "MatGetFactor_mpiaij_superlu_dist" 393 PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat A,MatFactorType ftype,Mat *F) 394 { 395 Mat B; 396 Mat_SuperLU_DIST *lu; 397 PetscErrorCode ierr; 398 PetscInt M=A->rmap->N,N=A->cmap->N,indx; 399 PetscMPIInt size; 400 superlu_options_t options; 401 PetscTruth flg; 402 const char *pctype[] = {"MMD_AT_PLUS_A","NATURAL","MMD_ATA","PARMETIS"}; 403 const char *prtype[] = {"LargeDiag","NATURAL"}; 404 const char *factPattern[] = {"SamePattern","SamePattern_SameRowPerm"}; 405 406 PetscFunctionBegin; 407 /* Create the factorization matrix */ 408 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 409 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,M,N);CHKERRQ(ierr); 410 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 411 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL); 412 ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 413 414 B->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST; 415 B->ops->view = MatView_SuperLU_DIST; 416 B->factor = MAT_FACTOR_LU; 417 418 ierr = PetscNewLog(B,Mat_SuperLU_DIST,&lu);CHKERRQ(ierr); 419 B->spptr = lu; 420 421 /* Set the default input options: 422 options.Fact = DOFACT; 423 options.Equil = YES; 424 options.ParSymbFact = NO; 425 options.ColPerm = MMD_AT_PLUS_A; 426 options.RowPerm = LargeDiag; 427 options.ReplaceTinyPivot = YES; 428 options.IterRefine = DOUBLE; 429 options.Trans = NOTRANS; 430 options.SolveInitialized = NO; 431 options.RefineInitialized = NO; 432 options.PrintStat = YES; 433 */ 434 set_default_options_dist(&options); 435 436 ierr = MPI_Comm_dup(((PetscObject)A)->comm,&(lu->comm_superlu));CHKERRQ(ierr); 437 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 438 /* Default num of process columns and rows */ 439 lu->npcol = PetscMPIIntCast((PetscInt)(0.5 + sqrt((PetscReal)size))); 440 if (!lu->npcol) lu->npcol = 1; 441 while (lu->npcol > 0) { 442 lu->nprow = PetscMPIIntCast(size/lu->npcol); 443 if (size == lu->nprow * lu->npcol) break; 444 lu->npcol --; 445 } 446 447 ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"SuperLU_Dist Options","Mat");CHKERRQ(ierr); 448 ierr = PetscOptionsInt("-mat_superlu_dist_r","Number rows in processor partition","None",lu->nprow,&lu->nprow,PETSC_NULL);CHKERRQ(ierr); 449 ierr = PetscOptionsInt("-mat_superlu_dist_c","Number columns in processor partition","None",lu->npcol,&lu->npcol,PETSC_NULL);CHKERRQ(ierr); 450 if (size != lu->nprow * lu->npcol) 451 SETERRQ3(PETSC_ERR_ARG_SIZ,"Number of processes %d must equal to nprow %d * npcol %d",size,lu->nprow,lu->npcol); 452 453 lu->MatInputMode = DISTRIBUTED; 454 ierr = PetscOptionsInt("-mat_superlu_dist_matinput","Matrix input mode (0: GLOBAL; 1: DISTRIBUTED)","None",lu->MatInputMode,&lu->MatInputMode,PETSC_NULL);CHKERRQ(ierr); 455 if(lu->MatInputMode == DISTRIBUTED && size == 1) lu->MatInputMode = GLOBAL; 456 457 ierr = PetscOptionsTruth("-mat_superlu_dist_equil","Equilibrate matrix","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr); 458 if (!flg) { 459 options.Equil = NO; 460 } 461 462 ierr = PetscOptionsEList("-mat_superlu_dist_rowperm","Row permutation","None",prtype,2,prtype[0],&indx,&flg);CHKERRQ(ierr); 463 if (flg) { 464 switch (indx) { 465 case 0: 466 options.RowPerm = LargeDiag; 467 break; 468 case 1: 469 options.RowPerm = NOROWPERM; 470 break; 471 } 472 } 473 474 ierr = PetscOptionsEList("-mat_superlu_dist_colperm","Column permutation","None",pctype,4,pctype[0],&indx,&flg);CHKERRQ(ierr); 475 if (flg) { 476 switch (indx) { 477 case 0: 478 options.ColPerm = MMD_AT_PLUS_A; 479 break; 480 case 1: 481 options.ColPerm = NATURAL; 482 break; 483 case 2: 484 options.ColPerm = MMD_ATA; 485 break; 486 case 3: 487 options.ColPerm = PARMETIS; 488 break; 489 } 490 } 491 492 ierr = PetscOptionsTruth("-mat_superlu_dist_replacetinypivot","Replace tiny pivots","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr); 493 if (!flg) { 494 options.ReplaceTinyPivot = NO; 495 } 496 497 options.ParSymbFact = NO; 498 ierr = PetscOptionsTruth("-mat_superlu_dist_parsymbfact","Parallel symbolic factorization","None",PETSC_FALSE,&flg,0);CHKERRQ(ierr); 499 if (flg){ 500 #ifdef PETSC_HAVE_PARMETIS 501 options.ParSymbFact = YES; 502 options.ColPerm = PARMETIS; /* in v2.2, PARMETIS is forced for ParSymbFact regardless of user ordering setting */ 503 #else 504 printf("parsymbfact needs PARMETIS"); 505 #endif 506 } 507 508 lu->FactPattern = SamePattern; 509 ierr = PetscOptionsEList("-mat_superlu_dist_fact","Sparsity pattern for repeated matrix factorization","None",factPattern,2,factPattern[0],&indx,&flg);CHKERRQ(ierr); 510 if (flg) { 511 switch (indx) { 512 case 0: 513 lu->FactPattern = SamePattern; 514 break; 515 case 1: 516 lu->FactPattern = SamePattern_SameRowPerm; 517 break; 518 } 519 } 520 521 options.IterRefine = NOREFINE; 522 ierr = PetscOptionsTruth("-mat_superlu_dist_iterrefine","Use iterative refinement","None",PETSC_FALSE,&flg,0);CHKERRQ(ierr); 523 if (flg) { 524 options.IterRefine = DOUBLE; 525 } 526 527 if (PetscLogPrintInfo) { 528 options.PrintStat = YES; 529 } else { 530 options.PrintStat = NO; 531 } 532 ierr = PetscOptionsTruth("-mat_superlu_dist_statprint","Print factorization information","None", 533 (PetscTruth)options.PrintStat,(PetscTruth*)&options.PrintStat,0);CHKERRQ(ierr); 534 PetscOptionsEnd(); 535 536 lu->options = options; 537 lu->options.Fact = DOFACT; 538 lu->CleanUpSuperLU_Dist = PETSC_TRUE; 539 *F = B; 540 PetscFunctionReturn(0); 541 } 542 EXTERN_C_END 543 544 #undef __FUNCT__ 545 #define __FUNCT__ "MatFactorInfo_SuperLU_DIST" 546 PetscErrorCode MatFactorInfo_SuperLU_DIST(Mat A,PetscViewer viewer) 547 { 548 Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST*)A->spptr; 549 superlu_options_t options; 550 PetscErrorCode ierr; 551 552 PetscFunctionBegin; 553 /* check if matrix is superlu_dist type */ 554 if (A->ops->solve != MatSolve_SuperLU_DIST) PetscFunctionReturn(0); 555 556 options = lu->options; 557 ierr = PetscViewerASCIIPrintf(viewer,"SuperLU_DIST run parameters:\n");CHKERRQ(ierr); 558 ierr = PetscViewerASCIIPrintf(viewer," Process grid nprow %D x npcol %D \n",lu->nprow,lu->npcol);CHKERRQ(ierr); 559 ierr = PetscViewerASCIIPrintf(viewer," Equilibrate matrix %s \n",PetscTruths[options.Equil != NO]);CHKERRQ(ierr); 560 ierr = PetscViewerASCIIPrintf(viewer," Matrix input mode %d \n",lu->MatInputMode);CHKERRQ(ierr); 561 ierr = PetscViewerASCIIPrintf(viewer," Replace tiny pivots %s \n",PetscTruths[options.ReplaceTinyPivot != NO]);CHKERRQ(ierr); 562 ierr = PetscViewerASCIIPrintf(viewer," Use iterative refinement %s \n",PetscTruths[options.IterRefine == DOUBLE]);CHKERRQ(ierr); 563 ierr = PetscViewerASCIIPrintf(viewer," Processors in row %d col partition %d \n",lu->nprow,lu->npcol);CHKERRQ(ierr); 564 ierr = PetscViewerASCIIPrintf(viewer," Row permutation %s \n",(options.RowPerm == NOROWPERM) ? "NATURAL": "LargeDiag");CHKERRQ(ierr); 565 if (options.ColPerm == NATURAL) { 566 ierr = PetscViewerASCIIPrintf(viewer," Column permutation NATURAL\n");CHKERRQ(ierr); 567 } else if (options.ColPerm == MMD_AT_PLUS_A) { 568 ierr = PetscViewerASCIIPrintf(viewer," Column permutation MMD_AT_PLUS_A\n");CHKERRQ(ierr); 569 } else if (options.ColPerm == MMD_ATA) { 570 ierr = PetscViewerASCIIPrintf(viewer," Column permutation MMD_ATA\n");CHKERRQ(ierr); 571 } else if (options.ColPerm == PARMETIS) { 572 ierr = PetscViewerASCIIPrintf(viewer," Column permutation PARMETIS\n");CHKERRQ(ierr); 573 } else { 574 SETERRQ(PETSC_ERR_ARG_WRONG,"Unknown column permutation"); 575 } 576 577 ierr = PetscViewerASCIIPrintf(viewer," Parallel symbolic factorization %s \n",PetscTruths[options.ParSymbFact != NO]);CHKERRQ(ierr); 578 579 if (lu->FactPattern == SamePattern){ 580 ierr = PetscViewerASCIIPrintf(viewer," Repeated factorization SamePattern\n");CHKERRQ(ierr); 581 } else { 582 ierr = PetscViewerASCIIPrintf(viewer," Repeated factorization SamePattern_SameRowPerm\n");CHKERRQ(ierr); 583 } 584 PetscFunctionReturn(0); 585 } 586 587 #undef __FUNCT__ 588 #define __FUNCT__ "MatView_SuperLU_DIST" 589 PetscErrorCode MatView_SuperLU_DIST(Mat A,PetscViewer viewer) 590 { 591 PetscErrorCode ierr; 592 PetscTruth iascii; 593 PetscViewerFormat format; 594 595 PetscFunctionBegin; 596 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 597 if (iascii) { 598 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 599 if (format == PETSC_VIEWER_ASCII_INFO) { 600 ierr = MatFactorInfo_SuperLU_DIST(A,viewer);CHKERRQ(ierr); 601 } 602 } 603 PetscFunctionReturn(0); 604 } 605 606 607 /*MC 608 MATSUPERLU_DIST - MATSUPERLU_DIST = "superlu_dist" - A matrix type providing direct solvers (LU) for parallel matrices 609 via the external package SuperLU_DIST. 610 611 If SuperLU_DIST is installed (see the manual for 612 instructions on how to declare the existence of external packages), 613 a matrix type can be constructed which invokes SuperLU_DIST solvers. 614 After calling MatCreate(...,A), simply call MatSetType(A,MATSUPERLU_DIST) then 615 optionally call MatSeqAIJSetPreallocation() or MatMPIAIJSetPreallocation() DO NOT 616 call MatCreateSeqAIJ/MPIAIJ() directly or the preallocation information will be LOST! 617 618 This matrix inherits from MATSEQAIJ when constructed with a single process communicator, 619 and from MATMPIAIJ otherwise. As a result, for single process communicators, 620 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 621 for communicators controlling multiple processes. It is recommended that you call both of 622 the above preallocation routines for simplicity. One can also call MatConvert() for an inplace 623 conversion to or from the MATSEQAIJ or MATMPIAIJ type (depending on the communicator size) 624 without data copy; but this MUST be called AFTER the matrix values are set. 625 626 627 628 Options Database Keys: 629 + -mat_type superlu_dist - sets the matrix type to "superlu_dist" during a call to MatSetFromOptions() 630 . -mat_superlu_dist_r <n> - number of rows in processor partition 631 . -mat_superlu_dist_c <n> - number of columns in processor partition 632 . -mat_superlu_dist_matinput <0,1> - matrix input mode; 0=global, 1=distributed 633 . -mat_superlu_dist_equil - equilibrate the matrix 634 . -mat_superlu_dist_rowperm <LargeDiag,NATURAL> - row permutation 635 . -mat_superlu_dist_colperm <MMD_AT_PLUS_A,MMD_ATA,NATURAL> - column permutation 636 . -mat_superlu_dist_replacetinypivot - replace tiny pivots 637 . -mat_superlu_dist_fact <SamePattern> (choose one of) SamePattern SamePattern_SameRowPerm 638 . -mat_superlu_dist_iterrefine - use iterative refinement 639 - -mat_superlu_dist_statprint - print factorization information 640 641 Level: beginner 642 643 .seealso: PCLU 644 M*/ 645 646