1 #define PETSCMAT_DLL 2 3 /* 4 Provides an interface to the SuperLU_DIST_2.2 sparse solver 5 */ 6 7 #include "src/mat/impls/aij/seq/aij.h" 8 #include "src/mat/impls/aij/mpi/mpiaij.h" 9 #if defined(PETSC_HAVE_STDLIB_H) /* This is to get around weird problem with SuperLU on cray */ 10 #include "stdlib.h" 11 #endif 12 13 EXTERN_C_BEGIN 14 #if defined(PETSC_USE_COMPLEX) 15 #include "superlu_zdefs.h" 16 #else 17 #include "superlu_ddefs.h" 18 #endif 19 EXTERN_C_END 20 21 typedef enum {GLOBAL,DISTRIBUTED} SuperLU_MatInputMode; 22 23 typedef struct { 24 int_t nprow,npcol,*row,*col; 25 gridinfo_t grid; 26 superlu_options_t options; 27 SuperMatrix A_sup; 28 ScalePermstruct_t ScalePermstruct; 29 LUstruct_t LUstruct; 30 int StatPrint; 31 int MatInputMode; 32 SOLVEstruct_t SOLVEstruct; 33 fact_t FactPattern; 34 MPI_Comm comm_superlu; 35 #if defined(PETSC_USE_COMPLEX) 36 doublecomplex *val; 37 #else 38 double *val; 39 #endif 40 41 /* Flag to clean up (non-global) SuperLU objects during Destroy */ 42 PetscTruth CleanUpSuperLU_Dist; 43 } Mat_SuperLU_DIST; 44 45 extern PetscErrorCode MatFactorInfo_SuperLU_DIST(Mat,PetscViewer); 46 extern PetscErrorCode MatLUFactorNumeric_SuperLU_DIST(Mat,MatFactorInfo *,Mat *); 47 extern PetscErrorCode MatDestroy_SuperLU_DIST(Mat); 48 extern PetscErrorCode MatView_SuperLU_DIST(Mat,PetscViewer); 49 extern PetscErrorCode MatSolve_SuperLU_DIST(Mat,Vec,Vec); 50 extern PetscErrorCode MatLUFactorSymbolic_SuperLU_DIST(Mat,IS,IS,MatFactorInfo *,Mat *); 51 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 52 53 #undef __FUNCT__ 54 #define __FUNCT__ "MatDestroy_SuperLU_DIST" 55 PetscErrorCode MatDestroy_SuperLU_DIST(Mat A) 56 { 57 PetscErrorCode ierr; 58 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr; 59 60 PetscFunctionBegin; 61 if (lu->CleanUpSuperLU_Dist) { 62 /* Deallocate SuperLU_DIST storage */ 63 if (lu->MatInputMode == GLOBAL) { 64 Destroy_CompCol_Matrix_dist(&lu->A_sup); 65 } else { 66 Destroy_CompRowLoc_Matrix_dist(&lu->A_sup); 67 if ( lu->options.SolveInitialized ) { 68 #if defined(PETSC_USE_COMPLEX) 69 zSolveFinalize(&lu->options, &lu->SOLVEstruct); 70 #else 71 dSolveFinalize(&lu->options, &lu->SOLVEstruct); 72 #endif 73 } 74 } 75 Destroy_LU(A->cmap.N, &lu->grid, &lu->LUstruct); 76 ScalePermstructFree(&lu->ScalePermstruct); 77 LUstructFree(&lu->LUstruct); 78 79 /* Release the SuperLU_DIST process grid. */ 80 superlu_gridexit(&lu->grid); 81 82 ierr = MPI_Comm_free(&(lu->comm_superlu));CHKERRQ(ierr); 83 } 84 85 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 86 PetscFunctionReturn(0); 87 } 88 89 #undef __FUNCT__ 90 #define __FUNCT__ "MatSolve_SuperLU_DIST" 91 PetscErrorCode MatSolve_SuperLU_DIST(Mat A,Vec b_mpi,Vec x) 92 { 93 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr; 94 PetscErrorCode ierr; 95 PetscMPIInt size; 96 PetscInt m=A->rmap.N, N=A->cmap.N; 97 SuperLUStat_t stat; 98 double berr[1]; 99 PetscScalar *bptr; 100 PetscInt info, nrhs=1; 101 Vec x_seq; 102 IS iden; 103 VecScatter scat; 104 105 PetscFunctionBegin; 106 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 107 if (size > 1) { 108 if (lu->MatInputMode == GLOBAL) { /* global mat input, convert b to x_seq */ 109 ierr = VecCreateSeq(PETSC_COMM_SELF,N,&x_seq);CHKERRQ(ierr); 110 ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iden);CHKERRQ(ierr); 111 ierr = VecScatterCreate(b_mpi,iden,x_seq,iden,&scat);CHKERRQ(ierr); 112 ierr = ISDestroy(iden);CHKERRQ(ierr); 113 114 ierr = VecScatterBegin(scat,b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 115 ierr = VecScatterEnd(scat,b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 116 ierr = VecGetArray(x_seq,&bptr);CHKERRQ(ierr); 117 } else { /* distributed mat input */ 118 ierr = VecCopy(b_mpi,x);CHKERRQ(ierr); 119 ierr = VecGetArray(x,&bptr);CHKERRQ(ierr); 120 } 121 } else { /* size == 1 */ 122 ierr = VecCopy(b_mpi,x);CHKERRQ(ierr); 123 ierr = VecGetArray(x,&bptr);CHKERRQ(ierr); 124 } 125 126 if (lu->options.Fact != FACTORED) SETERRQ(PETSC_ERR_ARG_WRONG,"SuperLU_DIST options.Fact mush equal FACTORED"); 127 128 PStatInit(&stat); /* Initialize the statistics variables. */ 129 if (lu->MatInputMode == GLOBAL) { 130 #if defined(PETSC_USE_COMPLEX) 131 pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,(doublecomplex*)bptr, m, nrhs, 132 &lu->grid, &lu->LUstruct, berr, &stat, &info); 133 #else 134 pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,bptr, m, nrhs, 135 &lu->grid, &lu->LUstruct, berr, &stat, &info); 136 #endif 137 } else { /* distributed mat input */ 138 #if defined(PETSC_USE_COMPLEX) 139 pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, (doublecomplex*)bptr, A->rmap.N, nrhs, &lu->grid, 140 &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info); 141 if (info) SETERRQ1(PETSC_ERR_LIB,"pzgssvx fails, info: %d\n",info); 142 #else 143 pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, bptr, A->rmap.N, nrhs, &lu->grid, 144 &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info); 145 if (info) SETERRQ1(PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",info); 146 #endif 147 } 148 if (lu->options.PrintStat) { 149 PStatPrint(&lu->options, &stat, &lu->grid); /* Print the statistics. */ 150 } 151 PStatFree(&stat); 152 153 if (size > 1) { 154 if (lu->MatInputMode == GLOBAL){ /* convert seq x to mpi x */ 155 ierr = VecRestoreArray(x_seq,&bptr);CHKERRQ(ierr); 156 ierr = VecScatterBegin(scat,x_seq,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 157 ierr = VecScatterEnd(scat,x_seq,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 158 ierr = VecScatterDestroy(scat);CHKERRQ(ierr); 159 ierr = VecDestroy(x_seq);CHKERRQ(ierr); 160 } else { 161 ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr); 162 } 163 } else { 164 ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr); 165 } 166 PetscFunctionReturn(0); 167 } 168 169 #undef __FUNCT__ 170 #define __FUNCT__ "MatLUFactorNumeric_SuperLU_DIST" 171 PetscErrorCode MatLUFactorNumeric_SuperLU_DIST(Mat A,MatFactorInfo *info,Mat *F) 172 { 173 Mat *tseq,A_seq = PETSC_NULL; 174 Mat_SeqAIJ *aa,*bb; 175 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)(*F)->spptr; 176 PetscErrorCode ierr; 177 PetscInt M=A->rmap.N,N=A->cmap.N,sinfo,i,*ai,*aj,*bi,*bj,nz,rstart,*garray, 178 m=A->rmap.n, irow,colA_start,j,jcol,jB,countA,countB,*bjj,*ajj; 179 PetscMPIInt size,rank; 180 SuperLUStat_t stat; 181 double *berr=0; 182 IS isrow; 183 PetscLogDouble time0,time,time_min,time_max; 184 Mat F_diag=PETSC_NULL; 185 #if defined(PETSC_USE_COMPLEX) 186 doublecomplex *av, *bv; 187 #else 188 double *av, *bv; 189 #endif 190 191 PetscFunctionBegin; 192 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 193 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 194 195 if (lu->options.PrintStat) { /* collect time for mat conversion */ 196 ierr = MPI_Barrier(((PetscObject)A)->comm);CHKERRQ(ierr); 197 ierr = PetscGetTime(&time0);CHKERRQ(ierr); 198 } 199 200 if (lu->MatInputMode == GLOBAL) { /* global mat input */ 201 if (size > 1) { /* convert mpi A to seq mat A */ 202 ierr = ISCreateStride(PETSC_COMM_SELF,M,0,1,&isrow);CHKERRQ(ierr); 203 ierr = MatGetSubMatrices(A,1,&isrow,&isrow,MAT_INITIAL_MATRIX,&tseq);CHKERRQ(ierr); 204 ierr = ISDestroy(isrow);CHKERRQ(ierr); 205 206 A_seq = *tseq; 207 ierr = PetscFree(tseq);CHKERRQ(ierr); 208 aa = (Mat_SeqAIJ*)A_seq->data; 209 } else { 210 aa = (Mat_SeqAIJ*)A->data; 211 } 212 213 /* Convert Petsc NR matrix to SuperLU_DIST NC. 214 Note: memories of lu->val, col and row are allocated by CompRow_to_CompCol_dist()! */ 215 if (lu->options.Fact != DOFACT) {/* successive numeric factorization, sparsity pattern is reused. */ 216 if (lu->FactPattern == SamePattern_SameRowPerm){ 217 Destroy_CompCol_Matrix_dist(&lu->A_sup); 218 /* Destroy_LU(N, &lu->grid, &lu->LUstruct); Crash! Comment it out does not lead to mem leak. */ 219 lu->options.Fact = SamePattern_SameRowPerm; /* matrix has similar numerical values */ 220 } else { 221 Destroy_CompCol_Matrix_dist(&lu->A_sup); 222 Destroy_LU(N, &lu->grid, &lu->LUstruct); 223 lu->options.Fact = SamePattern; 224 } 225 } 226 #if defined(PETSC_USE_COMPLEX) 227 zCompRow_to_CompCol_dist(M,N,aa->nz,(doublecomplex*)aa->a,aa->j,aa->i,&lu->val,&lu->col, &lu->row); 228 #else 229 dCompRow_to_CompCol_dist(M,N,aa->nz,aa->a,aa->j,aa->i,&lu->val, &lu->col, &lu->row); 230 #endif 231 232 /* Create compressed column matrix A_sup. */ 233 #if defined(PETSC_USE_COMPLEX) 234 zCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_Z, SLU_GE); 235 #else 236 dCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_D, SLU_GE); 237 #endif 238 } else { /* distributed mat input */ 239 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 240 aa=(Mat_SeqAIJ*)(mat->A)->data; 241 bb=(Mat_SeqAIJ*)(mat->B)->data; 242 ai=aa->i; aj=aa->j; 243 bi=bb->i; bj=bb->j; 244 #if defined(PETSC_USE_COMPLEX) 245 av=(doublecomplex*)aa->a; 246 bv=(doublecomplex*)bb->a; 247 #else 248 av=aa->a; 249 bv=bb->a; 250 #endif 251 rstart = A->rmap.rstart; 252 nz = aa->nz + bb->nz; 253 garray = mat->garray; 254 255 if (lu->options.Fact == DOFACT) {/* first numeric factorization */ 256 #if defined(PETSC_USE_COMPLEX) 257 zallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row); 258 #else 259 dallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row); 260 #endif 261 } else { /* successive numeric factorization, sparsity pattern and perm_c are reused. */ 262 if (lu->FactPattern == SamePattern_SameRowPerm){ 263 /* Destroy_LU(N, &lu->grid, &lu->LUstruct); Crash! Comment it out does not lead to mem leak. */ 264 lu->options.Fact = SamePattern_SameRowPerm; /* matrix has similar numerical values */ 265 } else { 266 Destroy_LU(N, &lu->grid, &lu->LUstruct); /* Deallocate storage associated with the L and U matrices. */ 267 lu->options.Fact = SamePattern; 268 } 269 } 270 nz = 0; irow = rstart; 271 for ( i=0; i<m; i++ ) { 272 lu->row[i] = nz; 273 countA = ai[i+1] - ai[i]; 274 countB = bi[i+1] - bi[i]; 275 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 276 bjj = bj + bi[i]; 277 278 /* B part, smaller col index */ 279 colA_start = rstart + ajj[0]; /* the smallest global col index of A */ 280 jB = 0; 281 for (j=0; j<countB; j++){ 282 jcol = garray[bjj[j]]; 283 if (jcol > colA_start) { 284 jB = j; 285 break; 286 } 287 lu->col[nz] = jcol; 288 lu->val[nz++] = *bv++; 289 if (j==countB-1) jB = countB; 290 } 291 292 /* A part */ 293 for (j=0; j<countA; j++){ 294 lu->col[nz] = rstart + ajj[j]; 295 lu->val[nz++] = *av++; 296 } 297 298 /* B part, larger col index */ 299 for (j=jB; j<countB; j++){ 300 lu->col[nz] = garray[bjj[j]]; 301 lu->val[nz++] = *bv++; 302 } 303 } 304 lu->row[m] = nz; 305 #if defined(PETSC_USE_COMPLEX) 306 zCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart, 307 lu->val, lu->col, lu->row, SLU_NR_loc, SLU_Z, SLU_GE); 308 #else 309 dCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart, 310 lu->val, lu->col, lu->row, SLU_NR_loc, SLU_D, SLU_GE); 311 #endif 312 } 313 if (lu->options.PrintStat) { 314 ierr = PetscGetTime(&time);CHKERRQ(ierr); 315 time0 = time - time0; 316 } 317 318 /* Factor the matrix. */ 319 PStatInit(&stat); /* Initialize the statistics variables. */ 320 321 if (lu->MatInputMode == GLOBAL) { /* global mat input */ 322 #if defined(PETSC_USE_COMPLEX) 323 pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, 324 &lu->grid, &lu->LUstruct, berr, &stat, &sinfo); 325 #else 326 pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, 327 &lu->grid, &lu->LUstruct, berr, &stat, &sinfo); 328 #endif 329 } else { /* distributed mat input */ 330 #if defined(PETSC_USE_COMPLEX) 331 pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid, 332 &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo); 333 if (sinfo) SETERRQ1(PETSC_ERR_LIB,"pzgssvx fails, info: %d\n",sinfo); 334 #else 335 pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid, 336 &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo); 337 if (sinfo) SETERRQ1(PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",sinfo); 338 #endif 339 } 340 341 if (lu->MatInputMode == GLOBAL && size > 1){ 342 ierr = MatDestroy(A_seq);CHKERRQ(ierr); 343 } 344 345 if (lu->options.PrintStat) { 346 if (size > 1){ 347 ierr = MPI_Reduce(&time0,&time_max,1,MPI_DOUBLE,MPI_MAX,0,((PetscObject)A)->comm); 348 ierr = MPI_Reduce(&time0,&time_min,1,MPI_DOUBLE,MPI_MIN,0,((PetscObject)A)->comm); 349 ierr = MPI_Reduce(&time0,&time,1,MPI_DOUBLE,MPI_SUM,0,((PetscObject)A)->comm); 350 time = time/size; /* average time */ 351 if (!rank) { 352 ierr = PetscPrintf(PETSC_COMM_SELF, " Mat conversion(PETSc->SuperLU_DIST) time (max/min/avg): \n %g / %g / %g\n",time_max,time_min,time);CHKERRQ(ierr); 353 } 354 } else { 355 ierr = PetscPrintf(PETSC_COMM_SELF, " Mat conversion(PETSc->SuperLU_DIST) time: \n %g\n",time0);CHKERRQ(ierr); 356 } 357 PStatPrint(&lu->options, &stat, &lu->grid); /* Print the statistics. */ 358 } 359 PStatFree(&stat); 360 if (size > 1){ 361 F_diag = ((Mat_MPIAIJ *)(*F)->data)->A; 362 F_diag->assembled = PETSC_TRUE; 363 } 364 (*F)->assembled = PETSC_TRUE; 365 (*F)->preallocated = PETSC_TRUE; 366 lu->options.Fact = FACTORED; /* The factored form of A is supplied. Local option used by this func. only */ 367 PetscFunctionReturn(0); 368 } 369 370 /* Note the Petsc r and c permutations are ignored */ 371 #undef __FUNCT__ 372 #define __FUNCT__ "MatLUFactorSymbolic_SuperLU_DIST" 373 PetscErrorCode MatLUFactorSymbolic_SuperLU_DIST(Mat A,IS r,IS c,MatFactorInfo *info,Mat *F) 374 { 375 Mat B; 376 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*) (*F)->spptr; 377 PetscErrorCode ierr; 378 PetscInt M=A->rmap.N,N=A->cmap.N,indx; 379 380 PetscFunctionBegin; 381 /* Initialize the SuperLU process grid. */ 382 superlu_gridinit(lu->comm_superlu, lu->nprow, lu->npcol, &lu->grid); 383 384 /* Initialize ScalePermstruct and LUstruct. */ 385 ScalePermstructInit(M, N, &lu->ScalePermstruct); 386 LUstructInit(M, N, &lu->LUstruct); 387 PetscFunctionReturn(0); 388 } 389 390 EXTERN_C_BEGIN 391 #undef __FUNCT__ 392 #define __FUNCT__ "MatGetFactor_mpiaij_superlu_dist" 393 PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat A,MatFactorType ftype,Mat *F) 394 { 395 Mat B; 396 Mat_SuperLU_DIST *lu; 397 PetscErrorCode ierr; 398 PetscInt M=A->rmap.N,N=A->cmap.N,indx; 399 PetscMPIInt size; 400 superlu_options_t options; 401 PetscTruth flg; 402 const char *pctype[] = {"MMD_AT_PLUS_A","NATURAL","MMD_ATA","PARMETIS"}; 403 const char *prtype[] = {"LargeDiag","NATURAL"}; 404 const char *factPattern[] = {"SamePattern","SamePattern_SameRowPerm"}; 405 406 PetscFunctionBegin; 407 /* Create the factorization matrix */ 408 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 409 ierr = MatSetSizes(B,A->rmap.n,A->cmap.n,M,N);CHKERRQ(ierr); 410 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 411 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL); 412 ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 413 414 B->ops->lufactornumeric = MatLUFactorNumeric_SuperLU_DIST; 415 B->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST; 416 B->ops->solve = MatSolve_SuperLU_DIST; 417 B->factor = MAT_FACTOR_LU; 418 419 ierr = PetscNewLog(B,Mat_SuperLU_DIST,&lu);CHKERRQ(ierr); 420 B->spptr = lu; 421 422 /* Set the default input options: 423 options.Fact = DOFACT; 424 options.Equil = YES; 425 options.ParSymbFact = NO; 426 options.ColPerm = MMD_AT_PLUS_A; 427 options.RowPerm = LargeDiag; 428 options.ReplaceTinyPivot = YES; 429 options.IterRefine = DOUBLE; 430 options.Trans = NOTRANS; 431 options.SolveInitialized = NO; 432 options.RefineInitialized = NO; 433 options.PrintStat = YES; 434 */ 435 set_default_options_dist(&options); 436 437 ierr = MPI_Comm_dup(((PetscObject)A)->comm,&(lu->comm_superlu));CHKERRQ(ierr); 438 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 439 /* Default num of process columns and rows */ 440 lu->npcol = PetscMPIIntCast((PetscInt)(0.5 + sqrt((PetscReal)size))); 441 if (!lu->npcol) lu->npcol = 1; 442 while (lu->npcol > 0) { 443 lu->nprow = PetscMPIIntCast(size/lu->npcol); 444 if (size == lu->nprow * lu->npcol) break; 445 lu->npcol --; 446 } 447 448 ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"SuperLU_Dist Options","Mat");CHKERRQ(ierr); 449 ierr = PetscOptionsInt("-mat_superlu_dist_r","Number rows in processor partition","None",lu->nprow,&lu->nprow,PETSC_NULL);CHKERRQ(ierr); 450 ierr = PetscOptionsInt("-mat_superlu_dist_c","Number columns in processor partition","None",lu->npcol,&lu->npcol,PETSC_NULL);CHKERRQ(ierr); 451 if (size != lu->nprow * lu->npcol) 452 SETERRQ3(PETSC_ERR_ARG_SIZ,"Number of processes %d must equal to nprow %d * npcol %d",size,lu->nprow,lu->npcol); 453 454 lu->MatInputMode = DISTRIBUTED; 455 ierr = PetscOptionsInt("-mat_superlu_dist_matinput","Matrix input mode (0: GLOBAL; 1: DISTRIBUTED)","None",lu->MatInputMode,&lu->MatInputMode,PETSC_NULL);CHKERRQ(ierr); 456 if(lu->MatInputMode == DISTRIBUTED && size == 1) lu->MatInputMode = GLOBAL; 457 458 ierr = PetscOptionsTruth("-mat_superlu_dist_equil","Equilibrate matrix","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr); 459 if (!flg) { 460 options.Equil = NO; 461 } 462 463 ierr = PetscOptionsEList("-mat_superlu_dist_rowperm","Row permutation","None",prtype,2,prtype[0],&indx,&flg);CHKERRQ(ierr); 464 if (flg) { 465 switch (indx) { 466 case 0: 467 options.RowPerm = LargeDiag; 468 break; 469 case 1: 470 options.RowPerm = NOROWPERM; 471 break; 472 } 473 } 474 475 ierr = PetscOptionsEList("-mat_superlu_dist_colperm","Column permutation","None",pctype,4,pctype[0],&indx,&flg);CHKERRQ(ierr); 476 if (flg) { 477 switch (indx) { 478 case 0: 479 options.ColPerm = MMD_AT_PLUS_A; 480 break; 481 case 1: 482 options.ColPerm = NATURAL; 483 break; 484 case 2: 485 options.ColPerm = MMD_ATA; 486 break; 487 case 3: 488 options.ColPerm = PARMETIS; 489 break; 490 } 491 } 492 493 ierr = PetscOptionsTruth("-mat_superlu_dist_replacetinypivot","Replace tiny pivots","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr); 494 if (!flg) { 495 options.ReplaceTinyPivot = NO; 496 } 497 498 options.ParSymbFact = NO; 499 ierr = PetscOptionsTruth("-mat_superlu_dist_parsymbfact","Parallel symbolic factorization","None",PETSC_FALSE,&flg,0);CHKERRQ(ierr); 500 if (flg){ 501 #ifdef PETSC_HAVE_PARMETIS 502 options.ParSymbFact = YES; 503 options.ColPerm = PARMETIS; /* in v2.2, PARMETIS is forced for ParSymbFact regardless of user ordering setting */ 504 #else 505 printf("parsymbfact needs PARMETIS"); 506 #endif 507 } 508 509 lu->FactPattern = SamePattern; 510 ierr = PetscOptionsEList("-mat_superlu_dist_fact","Sparsity pattern for repeated matrix factorization","None",factPattern,2,factPattern[0],&indx,&flg);CHKERRQ(ierr); 511 if (flg) { 512 switch (indx) { 513 case 0: 514 lu->FactPattern = SamePattern; 515 break; 516 case 1: 517 lu->FactPattern = SamePattern_SameRowPerm; 518 break; 519 } 520 } 521 522 options.IterRefine = NOREFINE; 523 ierr = PetscOptionsTruth("-mat_superlu_dist_iterrefine","Use iterative refinement","None",PETSC_FALSE,&flg,0);CHKERRQ(ierr); 524 if (flg) { 525 options.IterRefine = DOUBLE; 526 } 527 528 if (PetscLogPrintInfo) { 529 options.PrintStat = YES; 530 } else { 531 options.PrintStat = NO; 532 } 533 ierr = PetscOptionsTruth("-mat_superlu_dist_statprint","Print factorization information","None", 534 (PetscTruth)options.PrintStat,(PetscTruth*)&options.PrintStat,0);CHKERRQ(ierr); 535 PetscOptionsEnd(); 536 537 lu->options = options; 538 lu->options.Fact = DOFACT; 539 lu->CleanUpSuperLU_Dist = PETSC_TRUE; 540 *F = B; 541 PetscFunctionReturn(0); 542 } 543 EXTERN_C_END 544 545 #undef __FUNCT__ 546 #define __FUNCT__ "MatFactorInfo_SuperLU_DIST" 547 PetscErrorCode MatFactorInfo_SuperLU_DIST(Mat A,PetscViewer viewer) 548 { 549 Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST*)A->spptr; 550 superlu_options_t options; 551 PetscErrorCode ierr; 552 553 PetscFunctionBegin; 554 /* check if matrix is superlu_dist type */ 555 if (A->ops->solve != MatSolve_SuperLU_DIST) PetscFunctionReturn(0); 556 557 options = lu->options; 558 ierr = PetscViewerASCIIPrintf(viewer,"SuperLU_DIST run parameters:\n");CHKERRQ(ierr); 559 ierr = PetscViewerASCIIPrintf(viewer," Process grid nprow %D x npcol %D \n",lu->nprow,lu->npcol);CHKERRQ(ierr); 560 ierr = PetscViewerASCIIPrintf(viewer," Equilibrate matrix %s \n",PetscTruths[options.Equil != NO]);CHKERRQ(ierr); 561 ierr = PetscViewerASCIIPrintf(viewer," Matrix input mode %d \n",lu->MatInputMode);CHKERRQ(ierr); 562 ierr = PetscViewerASCIIPrintf(viewer," Replace tiny pivots %s \n",PetscTruths[options.ReplaceTinyPivot != NO]);CHKERRQ(ierr); 563 ierr = PetscViewerASCIIPrintf(viewer," Use iterative refinement %s \n",PetscTruths[options.IterRefine == DOUBLE]);CHKERRQ(ierr); 564 ierr = PetscViewerASCIIPrintf(viewer," Processors in row %d col partition %d \n",lu->nprow,lu->npcol);CHKERRQ(ierr); 565 ierr = PetscViewerASCIIPrintf(viewer," Row permutation %s \n",(options.RowPerm == NOROWPERM) ? "NATURAL": "LargeDiag");CHKERRQ(ierr); 566 if (options.ColPerm == NATURAL) { 567 ierr = PetscViewerASCIIPrintf(viewer," Column permutation NATURAL\n");CHKERRQ(ierr); 568 } else if (options.ColPerm == MMD_AT_PLUS_A) { 569 ierr = PetscViewerASCIIPrintf(viewer," Column permutation MMD_AT_PLUS_A\n");CHKERRQ(ierr); 570 } else if (options.ColPerm == MMD_ATA) { 571 ierr = PetscViewerASCIIPrintf(viewer," Column permutation MMD_ATA\n");CHKERRQ(ierr); 572 } else if (options.ColPerm == PARMETIS) { 573 ierr = PetscViewerASCIIPrintf(viewer," Column permutation PARMETIS\n");CHKERRQ(ierr); 574 } else { 575 SETERRQ(PETSC_ERR_ARG_WRONG,"Unknown column permutation"); 576 } 577 578 ierr = PetscViewerASCIIPrintf(viewer," Parallel symbolic factorization %s \n",PetscTruths[options.ParSymbFact != NO]);CHKERRQ(ierr); 579 580 if (lu->FactPattern == SamePattern){ 581 ierr = PetscViewerASCIIPrintf(viewer," Repeated factorization SamePattern\n");CHKERRQ(ierr); 582 } else { 583 ierr = PetscViewerASCIIPrintf(viewer," Repeated factorization SamePattern_SameRowPerm\n");CHKERRQ(ierr); 584 } 585 PetscFunctionReturn(0); 586 } 587 588 #undef __FUNCT__ 589 #define __FUNCT__ "MatView_SuperLU_DIST" 590 PetscErrorCode MatView_SuperLU_DIST(Mat A,PetscViewer viewer) 591 { 592 PetscErrorCode ierr; 593 PetscTruth iascii; 594 PetscViewerFormat format; 595 Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST*)(A->spptr); 596 597 PetscFunctionBegin; 598 599 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 600 if (iascii) { 601 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 602 if (format == PETSC_VIEWER_ASCII_INFO) { 603 ierr = MatFactorInfo_SuperLU_DIST(A,viewer);CHKERRQ(ierr); 604 } 605 } 606 PetscFunctionReturn(0); 607 } 608 609 610 /*MC 611 MATSUPERLU_DIST - MATSUPERLU_DIST = "superlu_dist" - A matrix type providing direct solvers (LU) for parallel matrices 612 via the external package SuperLU_DIST. 613 614 If SuperLU_DIST is installed (see the manual for 615 instructions on how to declare the existence of external packages), 616 a matrix type can be constructed which invokes SuperLU_DIST solvers. 617 After calling MatCreate(...,A), simply call MatSetType(A,MATSUPERLU_DIST) then 618 optionally call MatSeqAIJSetPreallocation() or MatMPIAIJSetPreallocation() DO NOT 619 call MatCreateSeqAIJ/MPIAIJ() directly or the preallocation information will be LOST! 620 621 This matrix inherits from MATSEQAIJ when constructed with a single process communicator, 622 and from MATMPIAIJ otherwise. As a result, for single process communicators, 623 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 624 for communicators controlling multiple processes. It is recommended that you call both of 625 the above preallocation routines for simplicity. One can also call MatConvert() for an inplace 626 conversion to or from the MATSEQAIJ or MATMPIAIJ type (depending on the communicator size) 627 without data copy; but this MUST be called AFTER the matrix values are set. 628 629 630 631 Options Database Keys: 632 + -mat_type superlu_dist - sets the matrix type to "superlu_dist" during a call to MatSetFromOptions() 633 . -mat_superlu_dist_r <n> - number of rows in processor partition 634 . -mat_superlu_dist_c <n> - number of columns in processor partition 635 . -mat_superlu_dist_matinput <0,1> - matrix input mode; 0=global, 1=distributed 636 . -mat_superlu_dist_equil - equilibrate the matrix 637 . -mat_superlu_dist_rowperm <LargeDiag,NATURAL> - row permutation 638 . -mat_superlu_dist_colperm <MMD_AT_PLUS_A,MMD_ATA,NATURAL> - column permutation 639 . -mat_superlu_dist_replacetinypivot - replace tiny pivots 640 . -mat_superlu_dist_fact <SamePattern> (choose one of) SamePattern SamePattern_SameRowPerm 641 . -mat_superlu_dist_iterrefine - use iterative refinement 642 - -mat_superlu_dist_statprint - print factorization information 643 644 Level: beginner 645 646 .seealso: PCLU 647 M*/ 648 649