1 #define PETSCMAT_DLL 2 3 /* 4 Provides an interface to the SuperLU_DIST_2.2 sparse solver 5 */ 6 7 #include "src/mat/impls/aij/seq/aij.h" 8 #include "src/mat/impls/aij/mpi/mpiaij.h" 9 #if defined(PETSC_HAVE_STDLIB_H) /* This is to get around weird problem with SuperLU on cray */ 10 #include "stdlib.h" 11 #endif 12 13 EXTERN_C_BEGIN 14 #if defined(PETSC_USE_COMPLEX) 15 #include "superlu_zdefs.h" 16 #else 17 #include "superlu_ddefs.h" 18 #endif 19 EXTERN_C_END 20 21 typedef enum {GLOBAL,DISTRIBUTED} SuperLU_MatInputMode; 22 23 typedef struct { 24 int_t nprow,npcol,*row,*col; 25 gridinfo_t grid; 26 superlu_options_t options; 27 SuperMatrix A_sup; 28 ScalePermstruct_t ScalePermstruct; 29 LUstruct_t LUstruct; 30 int StatPrint; 31 int MatInputMode; 32 SOLVEstruct_t SOLVEstruct; 33 fact_t FactPattern; 34 MPI_Comm comm_superlu; 35 #if defined(PETSC_USE_COMPLEX) 36 doublecomplex *val; 37 #else 38 double *val; 39 #endif 40 41 /* Flag to clean up (non-global) SuperLU objects during Destroy */ 42 PetscTruth CleanUpSuperLU_Dist; 43 } Mat_SuperLU_DIST; 44 45 extern PetscErrorCode MatFactorInfo_SuperLU_DIST(Mat,PetscViewer); 46 extern PetscErrorCode MatLUFactorNumeric_SuperLU_DIST(Mat,MatFactorInfo *,Mat *); 47 extern PetscErrorCode MatDestroy_SuperLU_DIST(Mat); 48 extern PetscErrorCode MatView_SuperLU_DIST(Mat,PetscViewer); 49 extern PetscErrorCode MatSolve_SuperLU_DIST(Mat,Vec,Vec); 50 extern PetscErrorCode MatLUFactorSymbolic_SuperLU_DIST(Mat,IS,IS,MatFactorInfo *,Mat *); 51 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 52 53 #undef __FUNCT__ 54 #define __FUNCT__ "MatDestroy_SuperLU_DIST" 55 PetscErrorCode MatDestroy_SuperLU_DIST(Mat A) 56 { 57 PetscErrorCode ierr; 58 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr; 59 60 PetscFunctionBegin; 61 if (lu->CleanUpSuperLU_Dist) { 62 /* Deallocate SuperLU_DIST storage */ 63 if (lu->MatInputMode == GLOBAL) { 64 Destroy_CompCol_Matrix_dist(&lu->A_sup); 65 } else { 66 Destroy_CompRowLoc_Matrix_dist(&lu->A_sup); 67 if ( lu->options.SolveInitialized ) { 68 #if defined(PETSC_USE_COMPLEX) 69 zSolveFinalize(&lu->options, &lu->SOLVEstruct); 70 #else 71 dSolveFinalize(&lu->options, &lu->SOLVEstruct); 72 #endif 73 } 74 } 75 Destroy_LU(A->cmap.N, &lu->grid, &lu->LUstruct); 76 ScalePermstructFree(&lu->ScalePermstruct); 77 LUstructFree(&lu->LUstruct); 78 79 /* Release the SuperLU_DIST process grid. */ 80 superlu_gridexit(&lu->grid); 81 82 ierr = MPI_Comm_free(&(lu->comm_superlu));CHKERRQ(ierr); 83 } 84 85 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 86 PetscFunctionReturn(0); 87 } 88 89 #undef __FUNCT__ 90 #define __FUNCT__ "MatSolve_SuperLU_DIST" 91 PetscErrorCode MatSolve_SuperLU_DIST(Mat A,Vec b_mpi,Vec x) 92 { 93 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr; 94 PetscErrorCode ierr; 95 PetscMPIInt size; 96 PetscInt m=A->rmap.N, N=A->cmap.N; 97 SuperLUStat_t stat; 98 double berr[1]; 99 PetscScalar *bptr; 100 PetscInt info, nrhs=1; 101 Vec x_seq; 102 IS iden; 103 VecScatter scat; 104 105 PetscFunctionBegin; 106 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 107 if (size > 1) { 108 if (lu->MatInputMode == GLOBAL) { /* global mat input, convert b to x_seq */ 109 ierr = VecCreateSeq(PETSC_COMM_SELF,N,&x_seq);CHKERRQ(ierr); 110 ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iden);CHKERRQ(ierr); 111 ierr = VecScatterCreate(b_mpi,iden,x_seq,iden,&scat);CHKERRQ(ierr); 112 ierr = ISDestroy(iden);CHKERRQ(ierr); 113 114 ierr = VecScatterBegin(scat,b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 115 ierr = VecScatterEnd(scat,b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 116 ierr = VecGetArray(x_seq,&bptr);CHKERRQ(ierr); 117 } else { /* distributed mat input */ 118 ierr = VecCopy(b_mpi,x);CHKERRQ(ierr); 119 ierr = VecGetArray(x,&bptr);CHKERRQ(ierr); 120 } 121 } else { /* size == 1 */ 122 ierr = VecCopy(b_mpi,x);CHKERRQ(ierr); 123 ierr = VecGetArray(x,&bptr);CHKERRQ(ierr); 124 } 125 126 if (lu->options.Fact != FACTORED) SETERRQ(PETSC_ERR_ARG_WRONG,"SuperLU_DIST options.Fact mush equal FACTORED"); 127 128 PStatInit(&stat); /* Initialize the statistics variables. */ 129 if (lu->MatInputMode == GLOBAL) { 130 #if defined(PETSC_USE_COMPLEX) 131 pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,(doublecomplex*)bptr, m, nrhs, 132 &lu->grid, &lu->LUstruct, berr, &stat, &info); 133 #else 134 pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,bptr, m, nrhs, 135 &lu->grid, &lu->LUstruct, berr, &stat, &info); 136 #endif 137 } else { /* distributed mat input */ 138 #if defined(PETSC_USE_COMPLEX) 139 pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, (doublecomplex*)bptr, A->rmap.N, nrhs, &lu->grid, 140 &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info); 141 if (info) SETERRQ1(PETSC_ERR_LIB,"pzgssvx fails, info: %d\n",info); 142 #else 143 pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, bptr, A->rmap.N, nrhs, &lu->grid, 144 &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info); 145 if (info) SETERRQ1(PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",info); 146 #endif 147 } 148 if (lu->options.PrintStat) { 149 PStatPrint(&lu->options, &stat, &lu->grid); /* Print the statistics. */ 150 } 151 PStatFree(&stat); 152 153 if (size > 1) { 154 if (lu->MatInputMode == GLOBAL){ /* convert seq x to mpi x */ 155 ierr = VecRestoreArray(x_seq,&bptr);CHKERRQ(ierr); 156 ierr = VecScatterBegin(scat,x_seq,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 157 ierr = VecScatterEnd(scat,x_seq,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 158 ierr = VecScatterDestroy(scat);CHKERRQ(ierr); 159 ierr = VecDestroy(x_seq);CHKERRQ(ierr); 160 } else { 161 ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr); 162 } 163 } else { 164 ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr); 165 } 166 PetscFunctionReturn(0); 167 } 168 169 #undef __FUNCT__ 170 #define __FUNCT__ "MatLUFactorNumeric_SuperLU_DIST" 171 PetscErrorCode MatLUFactorNumeric_SuperLU_DIST(Mat A,MatFactorInfo *info,Mat *F) 172 { 173 Mat *tseq,A_seq = PETSC_NULL; 174 Mat_SeqAIJ *aa,*bb; 175 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)(*F)->spptr; 176 PetscErrorCode ierr; 177 PetscInt M=A->rmap.N,N=A->cmap.N,sinfo,i,*ai,*aj,*bi,*bj,nz,rstart,*garray, 178 m=A->rmap.n, irow,colA_start,j,jcol,jB,countA,countB,*bjj,*ajj; 179 PetscMPIInt size,rank; 180 SuperLUStat_t stat; 181 double *berr=0; 182 IS isrow; 183 PetscLogDouble time0,time,time_min,time_max; 184 Mat F_diag=PETSC_NULL; 185 #if defined(PETSC_USE_COMPLEX) 186 doublecomplex *av, *bv; 187 #else 188 double *av, *bv; 189 #endif 190 191 PetscFunctionBegin; 192 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 193 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 194 195 if (lu->options.PrintStat) { /* collect time for mat conversion */ 196 ierr = MPI_Barrier(((PetscObject)A)->comm);CHKERRQ(ierr); 197 ierr = PetscGetTime(&time0);CHKERRQ(ierr); 198 } 199 200 if (lu->MatInputMode == GLOBAL) { /* global mat input */ 201 if (size > 1) { /* convert mpi A to seq mat A */ 202 ierr = ISCreateStride(PETSC_COMM_SELF,M,0,1,&isrow);CHKERRQ(ierr); 203 ierr = MatGetSubMatrices(A,1,&isrow,&isrow,MAT_INITIAL_MATRIX,&tseq);CHKERRQ(ierr); 204 ierr = ISDestroy(isrow);CHKERRQ(ierr); 205 206 A_seq = *tseq; 207 ierr = PetscFree(tseq);CHKERRQ(ierr); 208 aa = (Mat_SeqAIJ*)A_seq->data; 209 } else { 210 aa = (Mat_SeqAIJ*)A->data; 211 } 212 213 /* Convert Petsc NR matrix to SuperLU_DIST NC. 214 Note: memories of lu->val, col and row are allocated by CompRow_to_CompCol_dist()! */ 215 if (lu->options.Fact != DOFACT) {/* successive numeric factorization, sparsity pattern is reused. */ 216 if (lu->FactPattern == SamePattern_SameRowPerm){ 217 Destroy_CompCol_Matrix_dist(&lu->A_sup); 218 /* Destroy_LU(N, &lu->grid, &lu->LUstruct); Crash! Comment it out does not lead to mem leak. */ 219 lu->options.Fact = SamePattern_SameRowPerm; /* matrix has similar numerical values */ 220 } else { 221 Destroy_CompCol_Matrix_dist(&lu->A_sup); 222 Destroy_LU(N, &lu->grid, &lu->LUstruct); 223 lu->options.Fact = SamePattern; 224 } 225 } 226 #if defined(PETSC_USE_COMPLEX) 227 zCompRow_to_CompCol_dist(M,N,aa->nz,(doublecomplex*)aa->a,aa->j,aa->i,&lu->val,&lu->col, &lu->row); 228 #else 229 dCompRow_to_CompCol_dist(M,N,aa->nz,aa->a,aa->j,aa->i,&lu->val, &lu->col, &lu->row); 230 #endif 231 232 /* Create compressed column matrix A_sup. */ 233 #if defined(PETSC_USE_COMPLEX) 234 zCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_Z, SLU_GE); 235 #else 236 dCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_D, SLU_GE); 237 #endif 238 } else { /* distributed mat input */ 239 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 240 aa=(Mat_SeqAIJ*)(mat->A)->data; 241 bb=(Mat_SeqAIJ*)(mat->B)->data; 242 ai=aa->i; aj=aa->j; 243 bi=bb->i; bj=bb->j; 244 #if defined(PETSC_USE_COMPLEX) 245 av=(doublecomplex*)aa->a; 246 bv=(doublecomplex*)bb->a; 247 #else 248 av=aa->a; 249 bv=bb->a; 250 #endif 251 rstart = A->rmap.rstart; 252 nz = aa->nz + bb->nz; 253 garray = mat->garray; 254 255 if (lu->options.Fact == DOFACT) {/* first numeric factorization */ 256 #if defined(PETSC_USE_COMPLEX) 257 zallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row); 258 #else 259 dallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row); 260 #endif 261 } else { /* successive numeric factorization, sparsity pattern and perm_c are reused. */ 262 if (lu->FactPattern == SamePattern_SameRowPerm){ 263 /* Destroy_LU(N, &lu->grid, &lu->LUstruct); Crash! Comment it out does not lead to mem leak. */ 264 lu->options.Fact = SamePattern_SameRowPerm; /* matrix has similar numerical values */ 265 } else { 266 Destroy_LU(N, &lu->grid, &lu->LUstruct); /* Deallocate storage associated with the L and U matrices. */ 267 lu->options.Fact = SamePattern; 268 } 269 } 270 nz = 0; irow = rstart; 271 for ( i=0; i<m; i++ ) { 272 lu->row[i] = nz; 273 countA = ai[i+1] - ai[i]; 274 countB = bi[i+1] - bi[i]; 275 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 276 bjj = bj + bi[i]; 277 278 /* B part, smaller col index */ 279 colA_start = rstart + ajj[0]; /* the smallest global col index of A */ 280 jB = 0; 281 for (j=0; j<countB; j++){ 282 jcol = garray[bjj[j]]; 283 if (jcol > colA_start) { 284 jB = j; 285 break; 286 } 287 lu->col[nz] = jcol; 288 lu->val[nz++] = *bv++; 289 if (j==countB-1) jB = countB; 290 } 291 292 /* A part */ 293 for (j=0; j<countA; j++){ 294 lu->col[nz] = rstart + ajj[j]; 295 lu->val[nz++] = *av++; 296 } 297 298 /* B part, larger col index */ 299 for (j=jB; j<countB; j++){ 300 lu->col[nz] = garray[bjj[j]]; 301 lu->val[nz++] = *bv++; 302 } 303 } 304 lu->row[m] = nz; 305 #if defined(PETSC_USE_COMPLEX) 306 zCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart, 307 lu->val, lu->col, lu->row, SLU_NR_loc, SLU_Z, SLU_GE); 308 #else 309 dCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart, 310 lu->val, lu->col, lu->row, SLU_NR_loc, SLU_D, SLU_GE); 311 #endif 312 } 313 if (lu->options.PrintStat) { 314 ierr = PetscGetTime(&time);CHKERRQ(ierr); 315 time0 = time - time0; 316 } 317 318 /* Factor the matrix. */ 319 PStatInit(&stat); /* Initialize the statistics variables. */ 320 321 if (lu->MatInputMode == GLOBAL) { /* global mat input */ 322 #if defined(PETSC_USE_COMPLEX) 323 pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, 324 &lu->grid, &lu->LUstruct, berr, &stat, &sinfo); 325 #else 326 pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, 327 &lu->grid, &lu->LUstruct, berr, &stat, &sinfo); 328 #endif 329 } else { /* distributed mat input */ 330 #if defined(PETSC_USE_COMPLEX) 331 pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid, 332 &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo); 333 if (sinfo) SETERRQ1(PETSC_ERR_LIB,"pzgssvx fails, info: %d\n",sinfo); 334 #else 335 pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid, 336 &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo); 337 if (sinfo) SETERRQ1(PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",sinfo); 338 #endif 339 } 340 341 if (lu->MatInputMode == GLOBAL && size > 1){ 342 ierr = MatDestroy(A_seq);CHKERRQ(ierr); 343 } 344 345 if (lu->options.PrintStat) { 346 if (size > 1){ 347 ierr = MPI_Reduce(&time0,&time_max,1,MPI_DOUBLE,MPI_MAX,0,((PetscObject)A)->comm); 348 ierr = MPI_Reduce(&time0,&time_min,1,MPI_DOUBLE,MPI_MIN,0,((PetscObject)A)->comm); 349 ierr = MPI_Reduce(&time0,&time,1,MPI_DOUBLE,MPI_SUM,0,((PetscObject)A)->comm); 350 time = time/size; /* average time */ 351 if (!rank) { 352 ierr = PetscPrintf(PETSC_COMM_SELF, " Mat conversion(PETSc->SuperLU_DIST) time (max/min/avg): \n %g / %g / %g\n",time_max,time_min,time);CHKERRQ(ierr); 353 } 354 } else { 355 ierr = PetscPrintf(PETSC_COMM_SELF, " Mat conversion(PETSc->SuperLU_DIST) time: \n %g\n",time0);CHKERRQ(ierr); 356 } 357 PStatPrint(&lu->options, &stat, &lu->grid); /* Print the statistics. */ 358 } 359 PStatFree(&stat); 360 if (size > 1){ 361 F_diag = ((Mat_MPIAIJ *)(*F)->data)->A; 362 F_diag->assembled = PETSC_TRUE; 363 } 364 (*F)->assembled = PETSC_TRUE; 365 lu->options.Fact = FACTORED; /* The factored form of A is supplied. Local option used by this func. only */ 366 PetscFunctionReturn(0); 367 } 368 369 /* Note the Petsc r and c permutations are ignored */ 370 #undef __FUNCT__ 371 #define __FUNCT__ "MatLUFactorSymbolic_SuperLU_DIST" 372 PetscErrorCode MatLUFactorSymbolic_SuperLU_DIST(Mat A,IS r,IS c,MatFactorInfo *info,Mat *F) 373 { 374 Mat B; 375 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*) (*F)->spptr; 376 PetscErrorCode ierr; 377 PetscInt M=A->rmap.N,N=A->cmap.N,indx; 378 379 PetscFunctionBegin; 380 /* Initialize the SuperLU process grid. */ 381 superlu_gridinit(lu->comm_superlu, lu->nprow, lu->npcol, &lu->grid); 382 383 /* Initialize ScalePermstruct and LUstruct. */ 384 ScalePermstructInit(M, N, &lu->ScalePermstruct); 385 LUstructInit(M, N, &lu->LUstruct); 386 PetscFunctionReturn(0); 387 } 388 389 #undef __FUNCT__ 390 #define __FUNCT__ "MatGetFactor_mpiaij_superlu_dist" 391 PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat A,MatFactorType ftype,Mat *F) 392 { 393 Mat B; 394 Mat_SuperLU_DIST *lu; 395 PetscErrorCode ierr; 396 PetscInt M=A->rmap.N,N=A->cmap.N,indx; 397 PetscMPIInt size; 398 superlu_options_t options; 399 PetscTruth flg; 400 const char *pctype[] = {"MMD_AT_PLUS_A","NATURAL","MMD_ATA","PARMETIS"}; 401 const char *prtype[] = {"LargeDiag","NATURAL"}; 402 const char *factPattern[] = {"SamePattern","SamePattern_SameRowPerm"}; 403 404 PetscFunctionBegin; 405 /* Create the factorization matrix */ 406 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 407 ierr = MatSetSizes(B,A->rmap.n,A->cmap.n,M,N);CHKERRQ(ierr); 408 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 409 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL); 410 ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 411 412 B->ops->lufactornumeric = MatLUFactorNumeric_SuperLU_DIST; 413 B->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST; 414 B->ops->solve = MatSolve_SuperLU_DIST; 415 B->factor = FACTOR_LU; 416 417 lu = (Mat_SuperLU_DIST*)(B->spptr); 418 419 /* Set the default input options: 420 options.Fact = DOFACT; 421 options.Equil = YES; 422 options.ParSymbFact = NO; 423 options.ColPerm = MMD_AT_PLUS_A; 424 options.RowPerm = LargeDiag; 425 options.ReplaceTinyPivot = YES; 426 options.IterRefine = DOUBLE; 427 options.Trans = NOTRANS; 428 options.SolveInitialized = NO; 429 options.RefineInitialized = NO; 430 options.PrintStat = YES; 431 */ 432 set_default_options_dist(&options); 433 434 ierr = MPI_Comm_dup(((PetscObject)A)->comm,&(lu->comm_superlu));CHKERRQ(ierr); 435 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 436 /* Default num of process columns and rows */ 437 lu->npcol = PetscMPIIntCast((PetscInt)(0.5 + sqrt((PetscReal)size))); 438 if (!lu->npcol) lu->npcol = 1; 439 while (lu->npcol > 0) { 440 lu->nprow = PetscMPIIntCast(size/lu->npcol); 441 if (size == lu->nprow * lu->npcol) break; 442 lu->npcol --; 443 } 444 445 ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"SuperLU_Dist Options","Mat");CHKERRQ(ierr); 446 ierr = PetscOptionsInt("-mat_superlu_dist_r","Number rows in processor partition","None",lu->nprow,&lu->nprow,PETSC_NULL);CHKERRQ(ierr); 447 ierr = PetscOptionsInt("-mat_superlu_dist_c","Number columns in processor partition","None",lu->npcol,&lu->npcol,PETSC_NULL);CHKERRQ(ierr); 448 if (size != lu->nprow * lu->npcol) 449 SETERRQ3(PETSC_ERR_ARG_SIZ,"Number of processes %d must equal to nprow %d * npcol %d",size,lu->nprow,lu->npcol); 450 451 lu->MatInputMode = DISTRIBUTED; 452 ierr = PetscOptionsInt("-mat_superlu_dist_matinput","Matrix input mode (0: GLOBAL; 1: DISTRIBUTED)","None",lu->MatInputMode,&lu->MatInputMode,PETSC_NULL);CHKERRQ(ierr); 453 if(lu->MatInputMode == DISTRIBUTED && size == 1) lu->MatInputMode = GLOBAL; 454 455 ierr = PetscOptionsTruth("-mat_superlu_dist_equil","Equilibrate matrix","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr); 456 if (!flg) { 457 options.Equil = NO; 458 } 459 460 ierr = PetscOptionsEList("-mat_superlu_dist_rowperm","Row permutation","None",prtype,2,prtype[0],&indx,&flg);CHKERRQ(ierr); 461 if (flg) { 462 switch (indx) { 463 case 0: 464 options.RowPerm = LargeDiag; 465 break; 466 case 1: 467 options.RowPerm = NOROWPERM; 468 break; 469 } 470 } 471 472 ierr = PetscOptionsEList("-mat_superlu_dist_colperm","Column permutation","None",pctype,4,pctype[0],&indx,&flg);CHKERRQ(ierr); 473 if (flg) { 474 switch (indx) { 475 case 0: 476 options.ColPerm = MMD_AT_PLUS_A; 477 break; 478 case 1: 479 options.ColPerm = NATURAL; 480 break; 481 case 2: 482 options.ColPerm = MMD_ATA; 483 break; 484 case 3: 485 options.ColPerm = PARMETIS; 486 break; 487 } 488 } 489 490 ierr = PetscOptionsTruth("-mat_superlu_dist_replacetinypivot","Replace tiny pivots","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr); 491 if (!flg) { 492 options.ReplaceTinyPivot = NO; 493 } 494 495 options.ParSymbFact = NO; 496 ierr = PetscOptionsTruth("-mat_superlu_dist_parsymbfact","Parallel symbolic factorization","None",PETSC_FALSE,&flg,0);CHKERRQ(ierr); 497 if (flg){ 498 #ifdef PETSC_HAVE_PARMETIS 499 options.ParSymbFact = YES; 500 options.ColPerm = PARMETIS; /* in v2.2, PARMETIS is forced for ParSymbFact regardless of user ordering setting */ 501 #else 502 printf("parsymbfact needs PARMETIS"); 503 #endif 504 } 505 506 lu->FactPattern = SamePattern; 507 ierr = PetscOptionsEList("-mat_superlu_dist_fact","Sparsity pattern for repeated matrix factorization","None",factPattern,2,factPattern[0],&indx,&flg);CHKERRQ(ierr); 508 if (flg) { 509 switch (indx) { 510 case 0: 511 lu->FactPattern = SamePattern; 512 break; 513 case 1: 514 lu->FactPattern = SamePattern_SameRowPerm; 515 break; 516 } 517 } 518 519 options.IterRefine = NOREFINE; 520 ierr = PetscOptionsTruth("-mat_superlu_dist_iterrefine","Use iterative refinement","None",PETSC_FALSE,&flg,0);CHKERRQ(ierr); 521 if (flg) { 522 options.IterRefine = DOUBLE; 523 } 524 525 if (PetscLogPrintInfo) { 526 options.PrintStat = YES; 527 } else { 528 options.PrintStat = NO; 529 } 530 ierr = PetscOptionsTruth("-mat_superlu_dist_statprint","Print factorization information","None", 531 (PetscTruth)options.PrintStat,(PetscTruth*)&options.PrintStat,0);CHKERRQ(ierr); 532 PetscOptionsEnd(); 533 534 lu->options = options; 535 lu->options.Fact = DOFACT; 536 lu->CleanUpSuperLU_Dist = PETSC_TRUE; 537 *F = B; 538 PetscFunctionReturn(0); 539 } 540 541 #undef __FUNCT__ 542 #define __FUNCT__ "MatFactorInfo_SuperLU_DIST" 543 PetscErrorCode MatFactorInfo_SuperLU_DIST(Mat A,PetscViewer viewer) 544 { 545 Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST*)A->spptr; 546 superlu_options_t options; 547 PetscErrorCode ierr; 548 549 PetscFunctionBegin; 550 /* check if matrix is superlu_dist type */ 551 if (A->ops->solve != MatSolve_SuperLU_DIST) PetscFunctionReturn(0); 552 553 options = lu->options; 554 ierr = PetscViewerASCIIPrintf(viewer,"SuperLU_DIST run parameters:\n");CHKERRQ(ierr); 555 ierr = PetscViewerASCIIPrintf(viewer," Process grid nprow %D x npcol %D \n",lu->nprow,lu->npcol);CHKERRQ(ierr); 556 ierr = PetscViewerASCIIPrintf(viewer," Equilibrate matrix %s \n",PetscTruths[options.Equil != NO]);CHKERRQ(ierr); 557 ierr = PetscViewerASCIIPrintf(viewer," Matrix input mode %d \n",lu->MatInputMode);CHKERRQ(ierr); 558 ierr = PetscViewerASCIIPrintf(viewer," Replace tiny pivots %s \n",PetscTruths[options.ReplaceTinyPivot != NO]);CHKERRQ(ierr); 559 ierr = PetscViewerASCIIPrintf(viewer," Use iterative refinement %s \n",PetscTruths[options.IterRefine == DOUBLE]);CHKERRQ(ierr); 560 ierr = PetscViewerASCIIPrintf(viewer," Processors in row %d col partition %d \n",lu->nprow,lu->npcol);CHKERRQ(ierr); 561 ierr = PetscViewerASCIIPrintf(viewer," Row permutation %s \n",(options.RowPerm == NOROWPERM) ? "NATURAL": "LargeDiag");CHKERRQ(ierr); 562 if (options.ColPerm == NATURAL) { 563 ierr = PetscViewerASCIIPrintf(viewer," Column permutation NATURAL\n");CHKERRQ(ierr); 564 } else if (options.ColPerm == MMD_AT_PLUS_A) { 565 ierr = PetscViewerASCIIPrintf(viewer," Column permutation MMD_AT_PLUS_A\n");CHKERRQ(ierr); 566 } else if (options.ColPerm == MMD_ATA) { 567 ierr = PetscViewerASCIIPrintf(viewer," Column permutation MMD_ATA\n");CHKERRQ(ierr); 568 } else if (options.ColPerm == PARMETIS) { 569 ierr = PetscViewerASCIIPrintf(viewer," Column permutation PARMETIS\n");CHKERRQ(ierr); 570 } else { 571 SETERRQ(PETSC_ERR_ARG_WRONG,"Unknown column permutation"); 572 } 573 574 ierr = PetscViewerASCIIPrintf(viewer," Parallel symbolic factorization %s \n",PetscTruths[options.ParSymbFact != NO]);CHKERRQ(ierr); 575 576 if (lu->FactPattern == SamePattern){ 577 ierr = PetscViewerASCIIPrintf(viewer," Repeated factorization SamePattern\n");CHKERRQ(ierr); 578 } else { 579 ierr = PetscViewerASCIIPrintf(viewer," Repeated factorization SamePattern_SameRowPerm\n");CHKERRQ(ierr); 580 } 581 PetscFunctionReturn(0); 582 } 583 584 #undef __FUNCT__ 585 #define __FUNCT__ "MatView_SuperLU_DIST" 586 PetscErrorCode MatView_SuperLU_DIST(Mat A,PetscViewer viewer) 587 { 588 PetscErrorCode ierr; 589 PetscTruth iascii; 590 PetscViewerFormat format; 591 Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST*)(A->spptr); 592 593 PetscFunctionBegin; 594 595 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 596 if (iascii) { 597 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 598 if (format == PETSC_VIEWER_ASCII_INFO) { 599 ierr = MatFactorInfo_SuperLU_DIST(A,viewer);CHKERRQ(ierr); 600 } 601 } 602 PetscFunctionReturn(0); 603 } 604 605 606 /*MC 607 MATSUPERLU_DIST - MATSUPERLU_DIST = "superlu_dist" - A matrix type providing direct solvers (LU) for parallel matrices 608 via the external package SuperLU_DIST. 609 610 If SuperLU_DIST is installed (see the manual for 611 instructions on how to declare the existence of external packages), 612 a matrix type can be constructed which invokes SuperLU_DIST solvers. 613 After calling MatCreate(...,A), simply call MatSetType(A,MATSUPERLU_DIST) then 614 optionally call MatSeqAIJSetPreallocation() or MatMPIAIJSetPreallocation() DO NOT 615 call MatCreateSeqAIJ/MPIAIJ() directly or the preallocation information will be LOST! 616 617 This matrix inherits from MATSEQAIJ when constructed with a single process communicator, 618 and from MATMPIAIJ otherwise. As a result, for single process communicators, 619 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 620 for communicators controlling multiple processes. It is recommended that you call both of 621 the above preallocation routines for simplicity. One can also call MatConvert() for an inplace 622 conversion to or from the MATSEQAIJ or MATMPIAIJ type (depending on the communicator size) 623 without data copy; but this MUST be called AFTER the matrix values are set. 624 625 626 627 Options Database Keys: 628 + -mat_type superlu_dist - sets the matrix type to "superlu_dist" during a call to MatSetFromOptions() 629 . -mat_superlu_dist_r <n> - number of rows in processor partition 630 . -mat_superlu_dist_c <n> - number of columns in processor partition 631 . -mat_superlu_dist_matinput <0,1> - matrix input mode; 0=global, 1=distributed 632 . -mat_superlu_dist_equil - equilibrate the matrix 633 . -mat_superlu_dist_rowperm <LargeDiag,NATURAL> - row permutation 634 . -mat_superlu_dist_colperm <MMD_AT_PLUS_A,MMD_ATA,NATURAL> - column permutation 635 . -mat_superlu_dist_replacetinypivot - replace tiny pivots 636 . -mat_superlu_dist_fact <SamePattern> (choose one of) SamePattern SamePattern_SameRowPerm 637 . -mat_superlu_dist_iterrefine - use iterative refinement 638 - -mat_superlu_dist_statprint - print factorization information 639 640 Level: beginner 641 642 .seealso: PCLU 643 M*/ 644 645