1 #define PETSCMAT_DLL 2 3 /* 4 Provides an interface to the SuperLU_DIST_2.2 sparse solver 5 */ 6 7 #include "src/mat/impls/aij/seq/aij.h" 8 #include "src/mat/impls/aij/mpi/mpiaij.h" 9 #if defined(PETSC_HAVE_STDLIB_H) /* This is to get around weird problem with SuperLU on cray */ 10 #include "stdlib.h" 11 #endif 12 13 #if defined(PETSC_USE_64BIT_INDICES) 14 /* ugly SuperLU_Dist variable telling it to use long long int */ 15 #define _LONGINT 16 #endif 17 18 EXTERN_C_BEGIN 19 #if defined(PETSC_USE_COMPLEX) 20 #include "superlu_zdefs.h" 21 #else 22 #include "superlu_ddefs.h" 23 #endif 24 EXTERN_C_END 25 26 typedef enum {GLOBAL,DISTRIBUTED} SuperLU_MatInputMode; 27 const char *SuperLU_MatInputModes[] = {"GLOBAL","DISTRIBUTED","SuperLU_MatInputMode","PETSC_",0}; 28 29 typedef struct { 30 int_t nprow,npcol,*row,*col; 31 gridinfo_t grid; 32 superlu_options_t options; 33 SuperMatrix A_sup; 34 ScalePermstruct_t ScalePermstruct; 35 LUstruct_t LUstruct; 36 int StatPrint; 37 SuperLU_MatInputMode MatInputMode; 38 SOLVEstruct_t SOLVEstruct; 39 fact_t FactPattern; 40 MPI_Comm comm_superlu; 41 #if defined(PETSC_USE_COMPLEX) 42 doublecomplex *val; 43 #else 44 double *val; 45 #endif 46 47 /* Flag to clean up (non-global) SuperLU objects during Destroy */ 48 PetscTruth CleanUpSuperLU_Dist; 49 } Mat_SuperLU_DIST; 50 51 extern PetscErrorCode MatFactorInfo_SuperLU_DIST(Mat,PetscViewer); 52 extern PetscErrorCode MatLUFactorNumeric_SuperLU_DIST(Mat,Mat,const MatFactorInfo *); 53 extern PetscErrorCode MatDestroy_SuperLU_DIST(Mat); 54 extern PetscErrorCode MatView_SuperLU_DIST(Mat,PetscViewer); 55 extern PetscErrorCode MatSolve_SuperLU_DIST(Mat,Vec,Vec); 56 extern PetscErrorCode MatLUFactorSymbolic_SuperLU_DIST(Mat,Mat,IS,IS,const MatFactorInfo *); 57 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 58 59 #undef __FUNCT__ 60 #define __FUNCT__ "MatDestroy_SuperLU_DIST" 61 PetscErrorCode MatDestroy_SuperLU_DIST(Mat A) 62 { 63 PetscErrorCode ierr; 64 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr; 65 66 PetscFunctionBegin; 67 if (lu->CleanUpSuperLU_Dist) { 68 /* Deallocate SuperLU_DIST storage */ 69 if (lu->MatInputMode == GLOBAL) { 70 Destroy_CompCol_Matrix_dist(&lu->A_sup); 71 } else { 72 Destroy_CompRowLoc_Matrix_dist(&lu->A_sup); 73 if ( lu->options.SolveInitialized ) { 74 #if defined(PETSC_USE_COMPLEX) 75 zSolveFinalize(&lu->options, &lu->SOLVEstruct); 76 #else 77 dSolveFinalize(&lu->options, &lu->SOLVEstruct); 78 #endif 79 } 80 } 81 Destroy_LU(A->cmap->N, &lu->grid, &lu->LUstruct); 82 ScalePermstructFree(&lu->ScalePermstruct); 83 LUstructFree(&lu->LUstruct); 84 85 /* Release the SuperLU_DIST process grid. */ 86 superlu_gridexit(&lu->grid); 87 88 ierr = MPI_Comm_free(&(lu->comm_superlu));CHKERRQ(ierr); 89 } 90 91 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 92 PetscFunctionReturn(0); 93 } 94 95 #undef __FUNCT__ 96 #define __FUNCT__ "MatSolve_SuperLU_DIST" 97 PetscErrorCode MatSolve_SuperLU_DIST(Mat A,Vec b_mpi,Vec x) 98 { 99 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr; 100 PetscErrorCode ierr; 101 PetscMPIInt size; 102 PetscInt m=A->rmap->N, N=A->cmap->N; 103 SuperLUStat_t stat; 104 double berr[1]; 105 PetscScalar *bptr; 106 PetscInt nrhs=1; 107 Vec x_seq; 108 IS iden; 109 VecScatter scat; 110 int info; /* SuperLU_Dist info code is ALWAYS an int, even with long long indices */ 111 112 PetscFunctionBegin; 113 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 114 if (size > 1) { 115 if (lu->MatInputMode == GLOBAL) { /* global mat input, convert b to x_seq */ 116 ierr = VecCreateSeq(PETSC_COMM_SELF,N,&x_seq);CHKERRQ(ierr); 117 ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iden);CHKERRQ(ierr); 118 ierr = VecScatterCreate(b_mpi,iden,x_seq,iden,&scat);CHKERRQ(ierr); 119 ierr = ISDestroy(iden);CHKERRQ(ierr); 120 121 ierr = VecScatterBegin(scat,b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 122 ierr = VecScatterEnd(scat,b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 123 ierr = VecGetArray(x_seq,&bptr);CHKERRQ(ierr); 124 } else { /* distributed mat input */ 125 ierr = VecCopy(b_mpi,x);CHKERRQ(ierr); 126 ierr = VecGetArray(x,&bptr);CHKERRQ(ierr); 127 } 128 } else { /* size == 1 */ 129 ierr = VecCopy(b_mpi,x);CHKERRQ(ierr); 130 ierr = VecGetArray(x,&bptr);CHKERRQ(ierr); 131 } 132 133 if (lu->options.Fact != FACTORED) SETERRQ(PETSC_ERR_ARG_WRONG,"SuperLU_DIST options.Fact mush equal FACTORED"); 134 135 PStatInit(&stat); /* Initialize the statistics variables. */ 136 if (lu->MatInputMode == GLOBAL) { 137 #if defined(PETSC_USE_COMPLEX) 138 pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,(doublecomplex*)bptr, m, nrhs, 139 &lu->grid, &lu->LUstruct, berr, &stat, &info); 140 #else 141 pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,bptr, m, nrhs, 142 &lu->grid, &lu->LUstruct, berr, &stat, &info); 143 #endif 144 } else { /* distributed mat input */ 145 #if defined(PETSC_USE_COMPLEX) 146 pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, (doublecomplex*)bptr, A->rmap->N, nrhs, &lu->grid, 147 &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info); 148 if (info) SETERRQ1(PETSC_ERR_LIB,"pzgssvx fails, info: %d\n",info); 149 #else 150 pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, bptr, A->rmap->N, nrhs, &lu->grid, 151 &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info); 152 if (info) SETERRQ1(PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",info); 153 #endif 154 } 155 if (lu->options.PrintStat) { 156 PStatPrint(&lu->options, &stat, &lu->grid); /* Print the statistics. */ 157 } 158 PStatFree(&stat); 159 160 if (size > 1) { 161 if (lu->MatInputMode == GLOBAL){ /* convert seq x to mpi x */ 162 ierr = VecRestoreArray(x_seq,&bptr);CHKERRQ(ierr); 163 ierr = VecScatterBegin(scat,x_seq,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 164 ierr = VecScatterEnd(scat,x_seq,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 165 ierr = VecScatterDestroy(scat);CHKERRQ(ierr); 166 ierr = VecDestroy(x_seq);CHKERRQ(ierr); 167 } else { 168 ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr); 169 } 170 } else { 171 ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr); 172 } 173 PetscFunctionReturn(0); 174 } 175 176 #undef __FUNCT__ 177 #define __FUNCT__ "MatLUFactorNumeric_SuperLU_DIST" 178 PetscErrorCode MatLUFactorNumeric_SuperLU_DIST(Mat F,Mat A,const MatFactorInfo *info) 179 { 180 Mat *tseq,A_seq = PETSC_NULL; 181 Mat_SeqAIJ *aa,*bb; 182 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)(F)->spptr; 183 PetscErrorCode ierr; 184 PetscInt M=A->rmap->N,N=A->cmap->N,i,*ai,*aj,*bi,*bj,nz,rstart,*garray, 185 m=A->rmap->n, irow,colA_start,j,jcol,jB,countA,countB,*bjj,*ajj; 186 int sinfo; /* SuperLU_Dist info flag is always an int even with long long indices */ 187 PetscMPIInt size,rank; 188 SuperLUStat_t stat; 189 double *berr=0; 190 IS isrow; 191 PetscLogDouble time0,time,time_min,time_max; 192 Mat F_diag=PETSC_NULL; 193 #if defined(PETSC_USE_COMPLEX) 194 doublecomplex *av, *bv; 195 #else 196 double *av, *bv; 197 #endif 198 199 PetscFunctionBegin; 200 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 201 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 202 203 if (lu->options.PrintStat) { /* collect time for mat conversion */ 204 ierr = MPI_Barrier(((PetscObject)A)->comm);CHKERRQ(ierr); 205 ierr = PetscGetTime(&time0);CHKERRQ(ierr); 206 } 207 208 if (lu->MatInputMode == GLOBAL) { /* global mat input */ 209 if (size > 1) { /* convert mpi A to seq mat A */ 210 ierr = ISCreateStride(PETSC_COMM_SELF,M,0,1,&isrow);CHKERRQ(ierr); 211 ierr = MatGetSubMatrices(A,1,&isrow,&isrow,MAT_INITIAL_MATRIX,&tseq);CHKERRQ(ierr); 212 ierr = ISDestroy(isrow);CHKERRQ(ierr); 213 214 A_seq = *tseq; 215 ierr = PetscFree(tseq);CHKERRQ(ierr); 216 aa = (Mat_SeqAIJ*)A_seq->data; 217 } else { 218 aa = (Mat_SeqAIJ*)A->data; 219 } 220 221 /* Convert Petsc NR matrix to SuperLU_DIST NC. 222 Note: memories of lu->val, col and row are allocated by CompRow_to_CompCol_dist()! */ 223 if (lu->options.Fact != DOFACT) {/* successive numeric factorization, sparsity pattern is reused. */ 224 if (lu->FactPattern == SamePattern_SameRowPerm){ 225 Destroy_CompCol_Matrix_dist(&lu->A_sup); 226 /* Destroy_LU(N, &lu->grid, &lu->LUstruct); Crash! Comment it out does not lead to mem leak. */ 227 lu->options.Fact = SamePattern_SameRowPerm; /* matrix has similar numerical values */ 228 } else { 229 Destroy_CompCol_Matrix_dist(&lu->A_sup); 230 Destroy_LU(N, &lu->grid, &lu->LUstruct); 231 lu->options.Fact = SamePattern; 232 } 233 } 234 #if defined(PETSC_USE_COMPLEX) 235 zCompRow_to_CompCol_dist(M,N,aa->nz,(doublecomplex*)aa->a,aa->j,aa->i,&lu->val,&lu->col, &lu->row); 236 #else 237 dCompRow_to_CompCol_dist(M,N,aa->nz,aa->a,aa->j,aa->i,&lu->val, &lu->col, &lu->row); 238 #endif 239 240 /* Create compressed column matrix A_sup. */ 241 #if defined(PETSC_USE_COMPLEX) 242 zCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_Z, SLU_GE); 243 #else 244 dCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_D, SLU_GE); 245 #endif 246 } else { /* distributed mat input */ 247 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 248 aa=(Mat_SeqAIJ*)(mat->A)->data; 249 bb=(Mat_SeqAIJ*)(mat->B)->data; 250 ai=aa->i; aj=aa->j; 251 bi=bb->i; bj=bb->j; 252 #if defined(PETSC_USE_COMPLEX) 253 av=(doublecomplex*)aa->a; 254 bv=(doublecomplex*)bb->a; 255 #else 256 av=aa->a; 257 bv=bb->a; 258 #endif 259 rstart = A->rmap->rstart; 260 nz = aa->nz + bb->nz; 261 garray = mat->garray; 262 263 if (lu->options.Fact == DOFACT) {/* first numeric factorization */ 264 #if defined(PETSC_USE_COMPLEX) 265 zallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row); 266 #else 267 dallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row); 268 #endif 269 } else { /* successive numeric factorization, sparsity pattern and perm_c are reused. */ 270 if (lu->FactPattern == SamePattern_SameRowPerm){ 271 /* Destroy_LU(N, &lu->grid, &lu->LUstruct); Crash! Comment it out does not lead to mem leak. */ 272 lu->options.Fact = SamePattern_SameRowPerm; /* matrix has similar numerical values */ 273 } else { 274 Destroy_LU(N, &lu->grid, &lu->LUstruct); /* Deallocate storage associated with the L and U matrices. */ 275 lu->options.Fact = SamePattern; 276 } 277 } 278 nz = 0; irow = rstart; 279 for ( i=0; i<m; i++ ) { 280 lu->row[i] = nz; 281 countA = ai[i+1] - ai[i]; 282 countB = bi[i+1] - bi[i]; 283 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 284 bjj = bj + bi[i]; 285 286 /* B part, smaller col index */ 287 colA_start = rstart + ajj[0]; /* the smallest global col index of A */ 288 jB = 0; 289 for (j=0; j<countB; j++){ 290 jcol = garray[bjj[j]]; 291 if (jcol > colA_start) { 292 jB = j; 293 break; 294 } 295 lu->col[nz] = jcol; 296 lu->val[nz++] = *bv++; 297 if (j==countB-1) jB = countB; 298 } 299 300 /* A part */ 301 for (j=0; j<countA; j++){ 302 lu->col[nz] = rstart + ajj[j]; 303 lu->val[nz++] = *av++; 304 } 305 306 /* B part, larger col index */ 307 for (j=jB; j<countB; j++){ 308 lu->col[nz] = garray[bjj[j]]; 309 lu->val[nz++] = *bv++; 310 } 311 } 312 lu->row[m] = nz; 313 #if defined(PETSC_USE_COMPLEX) 314 zCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart, 315 lu->val, lu->col, lu->row, SLU_NR_loc, SLU_Z, SLU_GE); 316 #else 317 dCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart, 318 lu->val, lu->col, lu->row, SLU_NR_loc, SLU_D, SLU_GE); 319 #endif 320 } 321 if (lu->options.PrintStat) { 322 ierr = PetscGetTime(&time);CHKERRQ(ierr); 323 time0 = time - time0; 324 } 325 326 /* Factor the matrix. */ 327 PStatInit(&stat); /* Initialize the statistics variables. */ 328 CHKMEMQ; 329 if (lu->MatInputMode == GLOBAL) { /* global mat input */ 330 #if defined(PETSC_USE_COMPLEX) 331 pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, 332 &lu->grid, &lu->LUstruct, berr, &stat, &sinfo); 333 #else 334 pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, 335 &lu->grid, &lu->LUstruct, berr, &stat, &sinfo); 336 #endif 337 } else { /* distributed mat input */ 338 #if defined(PETSC_USE_COMPLEX) 339 pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid, 340 &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo); 341 if (sinfo) SETERRQ1(PETSC_ERR_LIB,"pzgssvx fails, info: %d\n",sinfo); 342 #else 343 pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid, 344 &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo); 345 if (sinfo) SETERRQ1(PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",sinfo); 346 #endif 347 } 348 349 if (lu->MatInputMode == GLOBAL && size > 1){ 350 ierr = MatDestroy(A_seq);CHKERRQ(ierr); 351 } 352 353 if (lu->options.PrintStat) { 354 if (size > 1){ 355 ierr = MPI_Reduce(&time0,&time_max,1,MPI_DOUBLE,MPI_MAX,0,((PetscObject)A)->comm); 356 ierr = MPI_Reduce(&time0,&time_min,1,MPI_DOUBLE,MPI_MIN,0,((PetscObject)A)->comm); 357 ierr = MPI_Reduce(&time0,&time,1,MPI_DOUBLE,MPI_SUM,0,((PetscObject)A)->comm); 358 time = time/size; /* average time */ 359 if (!rank) { 360 ierr = PetscPrintf(PETSC_COMM_SELF, " Mat conversion(PETSc->SuperLU_DIST) time (max/min/avg): \n %g / %g / %g\n",time_max,time_min,time);CHKERRQ(ierr); 361 } 362 } else { 363 ierr = PetscPrintf(PETSC_COMM_SELF, " Mat conversion(PETSc->SuperLU_DIST) time: \n %g\n",time0);CHKERRQ(ierr); 364 } 365 PStatPrint(&lu->options, &stat, &lu->grid); /* Print the statistics. */ 366 } 367 PStatFree(&stat); 368 if (size > 1){ 369 F_diag = ((Mat_MPIAIJ *)(F)->data)->A; 370 F_diag->assembled = PETSC_TRUE; 371 } 372 (F)->assembled = PETSC_TRUE; 373 (F)->preallocated = PETSC_TRUE; 374 lu->options.Fact = FACTORED; /* The factored form of A is supplied. Local option used by this func. only */ 375 PetscFunctionReturn(0); 376 } 377 378 /* Note the Petsc r and c permutations are ignored */ 379 #undef __FUNCT__ 380 #define __FUNCT__ "MatLUFactorSymbolic_SuperLU_DIST" 381 PetscErrorCode MatLUFactorSymbolic_SuperLU_DIST(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) 382 { 383 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*) (F)->spptr; 384 PetscInt M=A->rmap->N,N=A->cmap->N; 385 386 PetscFunctionBegin; 387 /* Initialize the SuperLU process grid. */ 388 superlu_gridinit(lu->comm_superlu, lu->nprow, lu->npcol, &lu->grid); 389 390 /* Initialize ScalePermstruct and LUstruct. */ 391 ScalePermstructInit(M, N, &lu->ScalePermstruct); 392 LUstructInit(M, N, &lu->LUstruct); 393 (F)->ops->lufactornumeric = MatLUFactorNumeric_SuperLU_DIST; 394 (F)->ops->solve = MatSolve_SuperLU_DIST; 395 PetscFunctionReturn(0); 396 } 397 398 #undef __FUNCT__ 399 #define __FUNCT__ "MatGetFactor_aij_superlu_dist" 400 PetscErrorCode MatGetFactor_aij_superlu_dist(Mat A,MatFactorType ftype,Mat *F) 401 { 402 Mat B; 403 Mat_SuperLU_DIST *lu; 404 PetscErrorCode ierr; 405 PetscInt M=A->rmap->N,N=A->cmap->N,indx; 406 PetscMPIInt size; 407 superlu_options_t options; 408 PetscTruth flg; 409 const char *pctype[] = {"MMD_AT_PLUS_A","NATURAL","MMD_ATA","PARMETIS"}; 410 const char *prtype[] = {"LargeDiag","NATURAL"}; 411 const char *factPattern[] = {"SamePattern","SamePattern_SameRowPerm"}; 412 413 PetscFunctionBegin; 414 /* Create the factorization matrix */ 415 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 416 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,M,N);CHKERRQ(ierr); 417 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 418 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL); 419 ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 420 421 B->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST; 422 B->ops->view = MatView_SuperLU_DIST; 423 B->factor = MAT_FACTOR_LU; 424 425 ierr = PetscNewLog(B,Mat_SuperLU_DIST,&lu);CHKERRQ(ierr); 426 B->spptr = lu; 427 428 /* Set the default input options: 429 options.Fact = DOFACT; 430 options.Equil = YES; 431 options.ParSymbFact = NO; 432 options.ColPerm = MMD_AT_PLUS_A; 433 options.RowPerm = LargeDiag; 434 options.ReplaceTinyPivot = YES; 435 options.IterRefine = DOUBLE; 436 options.Trans = NOTRANS; 437 options.SolveInitialized = NO; 438 options.RefineInitialized = NO; 439 options.PrintStat = YES; 440 */ 441 set_default_options_dist(&options); 442 443 ierr = MPI_Comm_dup(((PetscObject)A)->comm,&(lu->comm_superlu));CHKERRQ(ierr); 444 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 445 /* Default num of process columns and rows */ 446 lu->npcol = PetscMPIIntCast((PetscInt)(0.5 + sqrt((PetscReal)size))); 447 if (!lu->npcol) lu->npcol = 1; 448 while (lu->npcol > 0) { 449 lu->nprow = PetscMPIIntCast(size/lu->npcol); 450 if (size == lu->nprow * lu->npcol) break; 451 lu->npcol --; 452 } 453 454 ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"SuperLU_Dist Options","Mat");CHKERRQ(ierr); 455 ierr = PetscOptionsInt("-mat_superlu_dist_r","Number rows in processor partition","None",lu->nprow,&lu->nprow,PETSC_NULL);CHKERRQ(ierr); 456 ierr = PetscOptionsInt("-mat_superlu_dist_c","Number columns in processor partition","None",lu->npcol,&lu->npcol,PETSC_NULL);CHKERRQ(ierr); 457 if (size != lu->nprow * lu->npcol) 458 SETERRQ3(PETSC_ERR_ARG_SIZ,"Number of processes %d must equal to nprow %d * npcol %d",size,lu->nprow,lu->npcol); 459 460 lu->MatInputMode = DISTRIBUTED; 461 ierr = PetscOptionsEnum("-mat_superlu_dist_matinput","Matrix input mode (global or distributed)","None",SuperLU_MatInputModes,lu->MatInputMode,(PetscEnum*)&lu->MatInputMode,PETSC_NULL);CHKERRQ(ierr); 462 if(lu->MatInputMode == DISTRIBUTED && size == 1) lu->MatInputMode = GLOBAL; 463 464 ierr = PetscOptionsTruth("-mat_superlu_dist_equil","Equilibrate matrix","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr); 465 if (!flg) { 466 options.Equil = NO; 467 } 468 469 ierr = PetscOptionsEList("-mat_superlu_dist_rowperm","Row permutation","None",prtype,2,prtype[0],&indx,&flg);CHKERRQ(ierr); 470 if (flg) { 471 switch (indx) { 472 case 0: 473 options.RowPerm = LargeDiag; 474 break; 475 case 1: 476 options.RowPerm = NOROWPERM; 477 break; 478 } 479 } 480 481 ierr = PetscOptionsEList("-mat_superlu_dist_colperm","Column permutation","None",pctype,4,pctype[0],&indx,&flg);CHKERRQ(ierr); 482 if (flg) { 483 switch (indx) { 484 case 0: 485 options.ColPerm = MMD_AT_PLUS_A; 486 break; 487 case 1: 488 options.ColPerm = NATURAL; 489 break; 490 case 2: 491 options.ColPerm = MMD_ATA; 492 break; 493 case 3: 494 options.ColPerm = PARMETIS; 495 break; 496 } 497 } 498 499 ierr = PetscOptionsTruth("-mat_superlu_dist_replacetinypivot","Replace tiny pivots","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr); 500 if (!flg) { 501 options.ReplaceTinyPivot = NO; 502 } 503 504 options.ParSymbFact = NO; 505 ierr = PetscOptionsTruth("-mat_superlu_dist_parsymbfact","Parallel symbolic factorization","None",PETSC_FALSE,&flg,0);CHKERRQ(ierr); 506 if (flg){ 507 #ifdef PETSC_HAVE_PARMETIS 508 options.ParSymbFact = YES; 509 options.ColPerm = PARMETIS; /* in v2.2, PARMETIS is forced for ParSymbFact regardless of user ordering setting */ 510 #else 511 printf("parsymbfact needs PARMETIS"); 512 #endif 513 } 514 515 lu->FactPattern = SamePattern; 516 ierr = PetscOptionsEList("-mat_superlu_dist_fact","Sparsity pattern for repeated matrix factorization","None",factPattern,2,factPattern[0],&indx,&flg);CHKERRQ(ierr); 517 if (flg) { 518 switch (indx) { 519 case 0: 520 lu->FactPattern = SamePattern; 521 break; 522 case 1: 523 lu->FactPattern = SamePattern_SameRowPerm; 524 break; 525 } 526 } 527 528 options.IterRefine = NOREFINE; 529 ierr = PetscOptionsTruth("-mat_superlu_dist_iterrefine","Use iterative refinement","None",PETSC_FALSE,&flg,0);CHKERRQ(ierr); 530 if (flg) { 531 options.IterRefine = DOUBLE; 532 } 533 534 if (PetscLogPrintInfo) { 535 options.PrintStat = YES; 536 } else { 537 options.PrintStat = NO; 538 } 539 ierr = PetscOptionsTruth("-mat_superlu_dist_statprint","Print factorization information","None", 540 (PetscTruth)options.PrintStat,(PetscTruth*)&options.PrintStat,0);CHKERRQ(ierr); 541 PetscOptionsEnd(); 542 543 lu->options = options; 544 lu->options.Fact = DOFACT; 545 lu->CleanUpSuperLU_Dist = PETSC_TRUE; 546 *F = B; 547 PetscFunctionReturn(0); 548 } 549 550 EXTERN_C_BEGIN 551 #undef __FUNCT__ 552 #define __FUNCT__ "MatGetFactor_seqaij_superlu_dist" 553 PetscErrorCode MatGetFactor_seqaij_superlu_dist(Mat A,MatFactorType ftype,Mat *F) 554 { 555 PetscErrorCode ierr; 556 557 PetscFunctionBegin; 558 ierr = MatGetFactor_aij_superlu_dist(A,ftype,F);CHKERRQ(ierr); 559 PetscFunctionReturn(0); 560 } 561 EXTERN_C_END 562 563 EXTERN_C_BEGIN 564 #undef __FUNCT__ 565 #define __FUNCT__ "MatGetFactor_mpiaij_superlu_dist" 566 PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat A,MatFactorType ftype,Mat *F) 567 { 568 PetscErrorCode ierr; 569 570 PetscFunctionBegin; 571 ierr = MatGetFactor_aij_superlu_dist(A,ftype,F);CHKERRQ(ierr); 572 PetscFunctionReturn(0); 573 } 574 EXTERN_C_END 575 576 #undef __FUNCT__ 577 #define __FUNCT__ "MatFactorInfo_SuperLU_DIST" 578 PetscErrorCode MatFactorInfo_SuperLU_DIST(Mat A,PetscViewer viewer) 579 { 580 Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST*)A->spptr; 581 superlu_options_t options; 582 PetscErrorCode ierr; 583 584 PetscFunctionBegin; 585 /* check if matrix is superlu_dist type */ 586 if (A->ops->solve != MatSolve_SuperLU_DIST) PetscFunctionReturn(0); 587 588 options = lu->options; 589 ierr = PetscViewerASCIIPrintf(viewer,"SuperLU_DIST run parameters:\n");CHKERRQ(ierr); 590 ierr = PetscViewerASCIIPrintf(viewer," Process grid nprow %D x npcol %D \n",lu->nprow,lu->npcol);CHKERRQ(ierr); 591 ierr = PetscViewerASCIIPrintf(viewer," Equilibrate matrix %s \n",PetscTruths[options.Equil != NO]);CHKERRQ(ierr); 592 ierr = PetscViewerASCIIPrintf(viewer," Matrix input mode %d \n",lu->MatInputMode);CHKERRQ(ierr); 593 ierr = PetscViewerASCIIPrintf(viewer," Replace tiny pivots %s \n",PetscTruths[options.ReplaceTinyPivot != NO]);CHKERRQ(ierr); 594 ierr = PetscViewerASCIIPrintf(viewer," Use iterative refinement %s \n",PetscTruths[options.IterRefine == DOUBLE]);CHKERRQ(ierr); 595 ierr = PetscViewerASCIIPrintf(viewer," Processors in row %d col partition %d \n",lu->nprow,lu->npcol);CHKERRQ(ierr); 596 ierr = PetscViewerASCIIPrintf(viewer," Row permutation %s \n",(options.RowPerm == NOROWPERM) ? "NATURAL": "LargeDiag");CHKERRQ(ierr); 597 if (options.ColPerm == NATURAL) { 598 ierr = PetscViewerASCIIPrintf(viewer," Column permutation NATURAL\n");CHKERRQ(ierr); 599 } else if (options.ColPerm == MMD_AT_PLUS_A) { 600 ierr = PetscViewerASCIIPrintf(viewer," Column permutation MMD_AT_PLUS_A\n");CHKERRQ(ierr); 601 } else if (options.ColPerm == MMD_ATA) { 602 ierr = PetscViewerASCIIPrintf(viewer," Column permutation MMD_ATA\n");CHKERRQ(ierr); 603 } else if (options.ColPerm == PARMETIS) { 604 ierr = PetscViewerASCIIPrintf(viewer," Column permutation PARMETIS\n");CHKERRQ(ierr); 605 } else { 606 SETERRQ(PETSC_ERR_ARG_WRONG,"Unknown column permutation"); 607 } 608 609 ierr = PetscViewerASCIIPrintf(viewer," Parallel symbolic factorization %s \n",PetscTruths[options.ParSymbFact != NO]);CHKERRQ(ierr); 610 611 if (lu->FactPattern == SamePattern){ 612 ierr = PetscViewerASCIIPrintf(viewer," Repeated factorization SamePattern\n");CHKERRQ(ierr); 613 } else { 614 ierr = PetscViewerASCIIPrintf(viewer," Repeated factorization SamePattern_SameRowPerm\n");CHKERRQ(ierr); 615 } 616 PetscFunctionReturn(0); 617 } 618 619 #undef __FUNCT__ 620 #define __FUNCT__ "MatView_SuperLU_DIST" 621 PetscErrorCode MatView_SuperLU_DIST(Mat A,PetscViewer viewer) 622 { 623 PetscErrorCode ierr; 624 PetscTruth iascii; 625 PetscViewerFormat format; 626 627 PetscFunctionBegin; 628 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 629 if (iascii) { 630 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 631 if (format == PETSC_VIEWER_ASCII_INFO) { 632 ierr = MatFactorInfo_SuperLU_DIST(A,viewer);CHKERRQ(ierr); 633 } 634 } 635 PetscFunctionReturn(0); 636 } 637 638 639 /*MC 640 MATSUPERLU_DIST - MATSUPERLU_DIST = "superlu_dist" - A matrix type providing direct solvers (LU) for parallel matrices 641 via the external package SuperLU_DIST. 642 643 If SuperLU_DIST is installed (see the manual for 644 instructions on how to declare the existence of external packages), 645 a matrix type can be constructed which invokes SuperLU_DIST solvers. 646 After calling MatCreate(...,A), simply call MatSetType(A,MATSUPERLU_DIST) then 647 optionally call MatSeqAIJSetPreallocation() or MatMPIAIJSetPreallocation() DO NOT 648 call MatCreateSeqAIJ/MPIAIJ() directly or the preallocation information will be LOST! 649 650 This matrix inherits from MATSEQAIJ when constructed with a single process communicator, 651 and from MATMPIAIJ otherwise. As a result, for single process communicators, 652 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 653 for communicators controlling multiple processes. It is recommended that you call both of 654 the above preallocation routines for simplicity. One can also call MatConvert() for an inplace 655 conversion to or from the MATSEQAIJ or MATMPIAIJ type (depending on the communicator size) 656 without data copy; but this MUST be called AFTER the matrix values are set. 657 658 659 660 Options Database Keys: 661 + -mat_type superlu_dist - sets the matrix type to "superlu_dist" during a call to MatSetFromOptions() 662 . -mat_superlu_dist_r <n> - number of rows in processor partition 663 . -mat_superlu_dist_c <n> - number of columns in processor partition 664 . -mat_superlu_dist_matinput <0,1> - matrix input mode; 0=global, 1=distributed 665 . -mat_superlu_dist_equil - equilibrate the matrix 666 . -mat_superlu_dist_rowperm <LargeDiag,NATURAL> - row permutation 667 . -mat_superlu_dist_colperm <MMD_AT_PLUS_A,MMD_ATA,NATURAL> - column permutation 668 . -mat_superlu_dist_replacetinypivot - replace tiny pivots 669 . -mat_superlu_dist_fact <SamePattern> (choose one of) SamePattern SamePattern_SameRowPerm 670 . -mat_superlu_dist_iterrefine - use iterative refinement 671 - -mat_superlu_dist_statprint - print factorization information 672 673 Level: beginner 674 675 .seealso: PCLU 676 M*/ 677 678