1 /* 2 Provides an interface to the SuperLU_DIST_2.0 sparse solver 3 */ 4 5 #include "src/mat/impls/aij/seq/aij.h" 6 #include "src/mat/impls/aij/mpi/mpiaij.h" 7 #if defined(PETSC_HAVE_STDLIB_H) /* This is to get arround weird problem with SuperLU on cray */ 8 #include "stdlib.h" 9 #endif 10 11 EXTERN_C_BEGIN 12 #if defined(PETSC_USE_COMPLEX) 13 #include "superlu_zdefs.h" 14 #else 15 #include "superlu_ddefs.h" 16 #endif 17 EXTERN_C_END 18 19 typedef enum { GLOBAL,DISTRIBUTED 20 } SuperLU_MatInputMode; 21 22 typedef struct { 23 int_t nprow,npcol,*row,*col; 24 gridinfo_t grid; 25 superlu_options_t options; 26 SuperMatrix A_sup; 27 ScalePermstruct_t ScalePermstruct; 28 LUstruct_t LUstruct; 29 int StatPrint; 30 int MatInputMode; 31 SOLVEstruct_t SOLVEstruct; 32 MatStructure flg; 33 MPI_Comm comm_superlu; 34 #if defined(PETSC_USE_COMPLEX) 35 doublecomplex *val; 36 #else 37 double *val; 38 #endif 39 40 /* A few function pointers for inheritance */ 41 PetscErrorCode (*MatDuplicate)(Mat,MatDuplicateOption,Mat*); 42 PetscErrorCode (*MatView)(Mat,PetscViewer); 43 PetscErrorCode (*MatAssemblyEnd)(Mat,MatAssemblyType); 44 PetscErrorCode (*MatLUFactorSymbolic)(Mat,IS,IS,MatFactorInfo*,Mat*); 45 PetscErrorCode (*MatDestroy)(Mat); 46 47 /* Flag to clean up (non-global) SuperLU objects during Destroy */ 48 PetscTruth CleanUpSuperLU_Dist; 49 } Mat_SuperLU_DIST; 50 51 EXTERN PetscErrorCode MatDuplicate_SuperLU_DIST(Mat,MatDuplicateOption,Mat*); 52 53 EXTERN_C_BEGIN 54 #undef __FUNCT__ 55 #define __FUNCT__ "MatConvert_SuperLU_DIST_Base" 56 PetscErrorCode MatConvert_SuperLU_DIST_Base(Mat A,const MatType type,Mat *newmat) 57 { 58 PetscErrorCode ierr; 59 Mat B=*newmat; 60 Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST *)A->spptr; 61 62 PetscFunctionBegin; 63 if (B != A) { 64 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 65 } 66 /* Reset the original function pointers */ 67 B->ops->duplicate = lu->MatDuplicate; 68 B->ops->view = lu->MatView; 69 B->ops->assemblyend = lu->MatAssemblyEnd; 70 B->ops->lufactorsymbolic = lu->MatLUFactorSymbolic; 71 B->ops->destroy = lu->MatDestroy; 72 73 ierr = PetscFree(lu);CHKERRQ(ierr); 74 75 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaij_superlu_dist_C","",PETSC_NULL);CHKERRQ(ierr); 76 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_superlu_dist_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 77 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_superlu_dist_C","",PETSC_NULL);CHKERRQ(ierr); 78 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_superlu_dist_mpiaij_C","",PETSC_NULL);CHKERRQ(ierr); 79 80 ierr = PetscObjectChangeTypeName((PetscObject)B,type);CHKERRQ(ierr); 81 *newmat = B; 82 PetscFunctionReturn(0); 83 } 84 EXTERN_C_END 85 86 #undef __FUNCT__ 87 #define __FUNCT__ "MatDestroy_SuperLU_DIST" 88 PetscErrorCode MatDestroy_SuperLU_DIST(Mat A) 89 { 90 PetscErrorCode ierr; 91 int size; 92 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr; 93 94 PetscFunctionBegin; 95 if (lu->CleanUpSuperLU_Dist) { 96 /* Deallocate SuperLU_DIST storage */ 97 if (lu->MatInputMode == GLOBAL) { 98 Destroy_CompCol_Matrix_dist(&lu->A_sup); 99 } else { 100 Destroy_CompRowLoc_Matrix_dist(&lu->A_sup); 101 if ( lu->options.SolveInitialized ) { 102 #if defined(PETSC_USE_COMPLEX) 103 zSolveFinalize(&lu->options, &lu->SOLVEstruct); 104 #else 105 dSolveFinalize(&lu->options, &lu->SOLVEstruct); 106 #endif 107 } 108 } 109 Destroy_LU(A->N, &lu->grid, &lu->LUstruct); 110 ScalePermstructFree(&lu->ScalePermstruct); 111 LUstructFree(&lu->LUstruct); 112 113 /* Release the SuperLU_DIST process grid. */ 114 superlu_gridexit(&lu->grid); 115 116 ierr = MPI_Comm_free(&(lu->comm_superlu));CHKERRQ(ierr); 117 } 118 119 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 120 if (size == 1) { 121 ierr = MatConvert_SuperLU_DIST_Base(A,MATSEQAIJ,&A);CHKERRQ(ierr); 122 } else { 123 ierr = MatConvert_SuperLU_DIST_Base(A,MATMPIAIJ,&A);CHKERRQ(ierr); 124 } 125 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 126 127 PetscFunctionReturn(0); 128 } 129 130 #undef __FUNCT__ 131 #define __FUNCT__ "MatSolve_SuperLU_DIST" 132 PetscErrorCode MatSolve_SuperLU_DIST(Mat A,Vec b_mpi,Vec x) 133 { 134 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr; 135 PetscErrorCode ierr; 136 int size; 137 int m=A->M, N=A->N; 138 SuperLUStat_t stat; 139 double berr[1]; 140 PetscScalar *bptr; 141 int info, nrhs=1; 142 Vec x_seq; 143 IS iden; 144 VecScatter scat; 145 146 PetscFunctionBegin; 147 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 148 if (size > 1) { 149 if (lu->MatInputMode == GLOBAL) { /* global mat input, convert b to x_seq */ 150 ierr = VecCreateSeq(PETSC_COMM_SELF,N,&x_seq);CHKERRQ(ierr); 151 ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iden);CHKERRQ(ierr); 152 ierr = VecScatterCreate(b_mpi,iden,x_seq,iden,&scat);CHKERRQ(ierr); 153 ierr = ISDestroy(iden);CHKERRQ(ierr); 154 155 ierr = VecScatterBegin(b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat);CHKERRQ(ierr); 156 ierr = VecScatterEnd(b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat);CHKERRQ(ierr); 157 ierr = VecGetArray(x_seq,&bptr);CHKERRQ(ierr); 158 } else { /* distributed mat input */ 159 ierr = VecCopy(b_mpi,x);CHKERRQ(ierr); 160 ierr = VecGetArray(x,&bptr);CHKERRQ(ierr); 161 } 162 } else { /* size == 1 */ 163 ierr = VecCopy(b_mpi,x);CHKERRQ(ierr); 164 ierr = VecGetArray(x,&bptr);CHKERRQ(ierr); 165 } 166 167 lu->options.Fact = FACTORED; /* The factored form of A is supplied. Local option used by this func. only.*/ 168 169 PStatInit(&stat); /* Initialize the statistics variables. */ 170 171 if (lu->MatInputMode == GLOBAL) { 172 #if defined(PETSC_USE_COMPLEX) 173 pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,(doublecomplex*)bptr, m, nrhs, 174 &lu->grid, &lu->LUstruct, berr, &stat, &info); 175 #else 176 pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,bptr, m, nrhs, 177 &lu->grid, &lu->LUstruct, berr, &stat, &info); 178 #endif 179 } else { /* distributed mat input */ 180 #if defined(PETSC_USE_COMPLEX) 181 pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, (doublecomplex*)bptr, A->M, nrhs, &lu->grid, 182 &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info); 183 if (info) SETERRQ1(PETSC_ERR_LIB,"pzgssvx fails, info: %d\n",info); 184 #else 185 pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, bptr, A->M, nrhs, &lu->grid, 186 &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info); 187 if (info) SETERRQ1(PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",info); 188 #endif 189 } 190 if (lu->StatPrint) { 191 PStatPrint(&lu->options, &stat, &lu->grid); /* Print the statistics. */ 192 } 193 PStatFree(&stat); 194 195 if (size > 1) { 196 if (lu->MatInputMode == GLOBAL){ /* convert seq x to mpi x */ 197 ierr = VecRestoreArray(x_seq,&bptr);CHKERRQ(ierr); 198 ierr = VecScatterBegin(x_seq,x,INSERT_VALUES,SCATTER_REVERSE,scat);CHKERRQ(ierr); 199 ierr = VecScatterEnd(x_seq,x,INSERT_VALUES,SCATTER_REVERSE,scat);CHKERRQ(ierr); 200 ierr = VecScatterDestroy(scat);CHKERRQ(ierr); 201 ierr = VecDestroy(x_seq);CHKERRQ(ierr); 202 } else { 203 ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr); 204 } 205 } else { 206 ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr); 207 } 208 PetscFunctionReturn(0); 209 } 210 211 #undef __FUNCT__ 212 #define __FUNCT__ "MatLUFactorNumeric_SuperLU_DIST" 213 PetscErrorCode MatLUFactorNumeric_SuperLU_DIST(Mat A,MatFactorInfo *info,Mat *F) 214 { 215 Mat *tseq,A_seq = PETSC_NULL; 216 Mat_SeqAIJ *aa,*bb; 217 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)(*F)->spptr; 218 PetscErrorCode ierr; 219 int M=A->M,N=A->N,info,size,rank,i,*ai,*aj,*bi,*bj,nz,rstart,*garray, 220 m=A->m, irow,colA_start,j,jcol,jB,countA,countB,*bjj,*ajj; 221 SuperLUStat_t stat; 222 double *berr=0; 223 IS isrow; 224 PetscLogDouble time0,time,time_min,time_max; 225 #if defined(PETSC_USE_COMPLEX) 226 doublecomplex *av, *bv; 227 #else 228 double *av, *bv; 229 #endif 230 231 PetscFunctionBegin; 232 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 233 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 234 235 if (lu->StatPrint) { /* collect time for mat conversion */ 236 ierr = MPI_Barrier(A->comm);CHKERRQ(ierr); 237 ierr = PetscGetTime(&time0);CHKERRQ(ierr); 238 } 239 240 if (lu->MatInputMode == GLOBAL) { /* global mat input */ 241 if (size > 1) { /* convert mpi A to seq mat A */ 242 ierr = ISCreateStride(PETSC_COMM_SELF,M,0,1,&isrow);CHKERRQ(ierr); 243 ierr = MatGetSubMatrices(A,1,&isrow,&isrow,MAT_INITIAL_MATRIX,&tseq);CHKERRQ(ierr); 244 ierr = ISDestroy(isrow);CHKERRQ(ierr); 245 246 A_seq = *tseq; 247 ierr = PetscFree(tseq);CHKERRQ(ierr); 248 aa = (Mat_SeqAIJ*)A_seq->data; 249 } else { 250 aa = (Mat_SeqAIJ*)A->data; 251 } 252 253 /* Allocate storage, then convert Petsc NR matrix to SuperLU_DIST NC */ 254 if (lu->flg == DIFFERENT_NONZERO_PATTERN) {/* first numeric factorization */ 255 #if defined(PETSC_USE_COMPLEX) 256 zallocateA_dist(N, aa->nz, &lu->val, &lu->col, &lu->row); 257 #else 258 dallocateA_dist(N, aa->nz, &lu->val, &lu->col, &lu->row); 259 #endif 260 } else { /* successive numeric factorization, sparsity pattern is reused. */ 261 Destroy_CompCol_Matrix_dist(&lu->A_sup); 262 Destroy_LU(N, &lu->grid, &lu->LUstruct); 263 lu->options.Fact = SamePattern; 264 } 265 #if defined(PETSC_USE_COMPLEX) 266 zCompRow_to_CompCol_dist(M,N,aa->nz,(doublecomplex*)aa->a,aa->j,aa->i,&lu->val,&lu->col, &lu->row); 267 #else 268 dCompRow_to_CompCol_dist(M,N,aa->nz,aa->a,aa->j,aa->i,&lu->val, &lu->col, &lu->row); 269 #endif 270 271 /* Create compressed column matrix A_sup. */ 272 #if defined(PETSC_USE_COMPLEX) 273 zCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_Z, SLU_GE); 274 #else 275 dCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_D, SLU_GE); 276 #endif 277 } else { /* distributed mat input */ 278 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 279 aa=(Mat_SeqAIJ*)(mat->A)->data; 280 bb=(Mat_SeqAIJ*)(mat->B)->data; 281 ai=aa->i; aj=aa->j; 282 bi=bb->i; bj=bb->j; 283 #if defined(PETSC_USE_COMPLEX) 284 av=(doublecomplex*)aa->a; 285 bv=(doublecomplex*)bb->a; 286 #else 287 av=aa->a; 288 bv=bb->a; 289 #endif 290 rstart = mat->rstart; 291 nz = aa->nz + bb->nz; 292 garray = mat->garray; 293 rstart = mat->rstart; 294 295 if (lu->flg == DIFFERENT_NONZERO_PATTERN) {/* first numeric factorization */ 296 #if defined(PETSC_USE_COMPLEX) 297 zallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row); 298 #else 299 dallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row); 300 #endif 301 } else { /* successive numeric factorization, sparsity pattern and perm_c are reused. */ 302 /* Destroy_CompRowLoc_Matrix_dist(&lu->A_sup); */ /* crash! */ 303 Destroy_LU(N, &lu->grid, &lu->LUstruct); 304 lu->options.Fact = SamePattern; 305 } 306 nz = 0; irow = mat->rstart; 307 for ( i=0; i<m; i++ ) { 308 lu->row[i] = nz; 309 countA = ai[i+1] - ai[i]; 310 countB = bi[i+1] - bi[i]; 311 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 312 bjj = bj + bi[i]; 313 314 /* B part, smaller col index */ 315 colA_start = mat->rstart + ajj[0]; /* the smallest global col index of A */ 316 jB = 0; 317 for (j=0; j<countB; j++){ 318 jcol = garray[bjj[j]]; 319 if (jcol > colA_start) { 320 jB = j; 321 break; 322 } 323 lu->col[nz] = jcol; 324 lu->val[nz++] = *bv++; 325 if (j==countB-1) jB = countB; 326 } 327 328 /* A part */ 329 for (j=0; j<countA; j++){ 330 lu->col[nz] = mat->rstart + ajj[j]; 331 lu->val[nz++] = *av++; 332 } 333 334 /* B part, larger col index */ 335 for (j=jB; j<countB; j++){ 336 lu->col[nz] = garray[bjj[j]]; 337 lu->val[nz++] = *bv++; 338 } 339 } 340 lu->row[m] = nz; 341 #if defined(PETSC_USE_COMPLEX) 342 zCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart, 343 lu->val, lu->col, lu->row, SLU_NR_loc, SLU_Z, SLU_GE); 344 #else 345 dCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart, 346 lu->val, lu->col, lu->row, SLU_NR_loc, SLU_D, SLU_GE); 347 #endif 348 } 349 if (lu->StatPrint) { 350 ierr = PetscGetTime(&time);CHKERRQ(ierr); 351 time0 = time - time0; 352 } 353 354 /* Factor the matrix. */ 355 PStatInit(&stat); /* Initialize the statistics variables. */ 356 357 if (lu->MatInputMode == GLOBAL) { /* global mat input */ 358 #if defined(PETSC_USE_COMPLEX) 359 pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, 360 &lu->grid, &lu->LUstruct, berr, &stat, &info); 361 #else 362 pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, 363 &lu->grid, &lu->LUstruct, berr, &stat, &info); 364 #endif 365 } else { /* distributed mat input */ 366 #if defined(PETSC_USE_COMPLEX) 367 pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid, 368 &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info); 369 if (info) SETERRQ1(PETSC_ERR_LIB,"pzgssvx fails, info: %d\n",info); 370 #else 371 pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid, 372 &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info); 373 if (info) SETERRQ1(PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",info); 374 #endif 375 } 376 377 if (lu->MatInputMode == GLOBAL && size > 1){ 378 ierr = MatDestroy(A_seq);CHKERRQ(ierr); 379 } 380 381 if (lu->StatPrint) { 382 if (size > 1){ 383 ierr = MPI_Reduce(&time0,&time_max,1,MPI_DOUBLE,MPI_MAX,0,A->comm); 384 ierr = MPI_Reduce(&time0,&time_min,1,MPI_DOUBLE,MPI_MIN,0,A->comm); 385 ierr = MPI_Reduce(&time0,&time,1,MPI_DOUBLE,MPI_SUM,0,A->comm); 386 time = time/size; /* average time */ 387 if (!rank) 388 ierr = PetscPrintf(PETSC_COMM_SELF, " Mat conversion(PETSc->SuperLU_DIST) time (max/min/avg): \n \ 389 %g / %g / %g\n",time_max,time_min,time); 390 } else { 391 ierr = PetscPrintf(PETSC_COMM_SELF, " Mat conversion(PETSc->SuperLU_DIST) time: \n \ 392 %g\n",time0); 393 } 394 395 PStatPrint(&lu->options, &stat, &lu->grid); /* Print the statistics. */ 396 } 397 PStatFree(&stat); 398 (*F)->assembled = PETSC_TRUE; 399 lu->flg = SAME_NONZERO_PATTERN; 400 PetscFunctionReturn(0); 401 } 402 403 /* Note the Petsc r and c permutations are ignored */ 404 #undef __FUNCT__ 405 #define __FUNCT__ "MatLUFactorSymbolic_SuperLU_DIST" 406 PetscErrorCode MatLUFactorSymbolic_SuperLU_DIST(Mat A,IS r,IS c,MatFactorInfo *info,Mat *F) 407 { 408 Mat B; 409 Mat_SuperLU_DIST *lu; 410 PetscErrorCode ierr; 411 int M=A->M,N=A->N,size,indx; 412 superlu_options_t options; 413 PetscTruth flg; 414 const char *ptype[] = {"MMD_AT_PLUS_A","NATURAL","MMD_ATA","COLAMD"}; 415 const char *prtype[] = {"LargeDiag","NATURAL"}; 416 417 PetscFunctionBegin; 418 /* Create the factorization matrix */ 419 ierr = MatCreate(A->comm,A->m,A->n,M,N,&B);CHKERRQ(ierr); 420 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 421 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL); 422 ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 423 424 B->ops->lufactornumeric = MatLUFactorNumeric_SuperLU_DIST; 425 B->ops->solve = MatSolve_SuperLU_DIST; 426 B->factor = FACTOR_LU; 427 428 lu = (Mat_SuperLU_DIST*)(B->spptr); 429 430 /* Set the input options */ 431 set_default_options_dist(&options); 432 lu->MatInputMode = GLOBAL; 433 ierr = MPI_Comm_dup(A->comm,&(lu->comm_superlu));CHKERRQ(ierr); 434 435 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 436 lu->nprow = size/2; /* Default process rows. */ 437 if (!lu->nprow) lu->nprow = 1; 438 lu->npcol = size/lu->nprow; /* Default process columns. */ 439 440 ierr = PetscOptionsBegin(A->comm,A->prefix,"SuperLU_Dist Options","Mat");CHKERRQ(ierr); 441 442 ierr = PetscOptionsInt("-mat_superlu_dist_r","Number rows in processor partition","None",lu->nprow,&lu->nprow,PETSC_NULL);CHKERRQ(ierr); 443 ierr = PetscOptionsInt("-mat_superlu_dist_c","Number columns in processor partition","None",lu->npcol,&lu->npcol,PETSC_NULL);CHKERRQ(ierr); 444 if (size != lu->nprow * lu->npcol) SETERRQ(PETSC_ERR_ARG_SIZ,"Number of processes should be equal to nprow*npcol"); 445 446 ierr = PetscOptionsInt("-mat_superlu_dist_matinput","Matrix input mode (0: GLOBAL; 1: DISTRIBUTED)","None",lu->MatInputMode,&lu->MatInputMode,PETSC_NULL);CHKERRQ(ierr); 447 if(lu->MatInputMode == DISTRIBUTED && size == 1) lu->MatInputMode = GLOBAL; 448 449 ierr = PetscOptionsLogical("-mat_superlu_dist_equil","Equilibrate matrix","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr); 450 if (!flg) { 451 options.Equil = NO; 452 } 453 454 ierr = PetscOptionsEList("-mat_superlu_dist_rowperm","Row permutation","None",prtype,2,prtype[0],&indx,&flg);CHKERRQ(ierr); 455 if (flg) { 456 switch (indx) { 457 case 0: 458 options.RowPerm = LargeDiag; 459 break; 460 case 1: 461 options.RowPerm = NOROWPERM; 462 break; 463 } 464 } 465 466 ierr = PetscOptionsEList("-mat_superlu_dist_colperm","Column permutation","None",ptype,4,ptype[0],&indx,&flg);CHKERRQ(ierr); 467 if (flg) { 468 switch (indx) { 469 case 0: 470 options.ColPerm = MMD_AT_PLUS_A; 471 break; 472 case 1: 473 options.ColPerm = NATURAL; 474 break; 475 case 2: 476 options.ColPerm = MMD_ATA; 477 break; 478 case 3: 479 options.ColPerm = COLAMD; 480 break; 481 } 482 } 483 484 ierr = PetscOptionsLogical("-mat_superlu_dist_replacetinypivot","Replace tiny pivots","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr); 485 if (!flg) { 486 options.ReplaceTinyPivot = NO; 487 } 488 489 options.IterRefine = NOREFINE; 490 ierr = PetscOptionsLogical("-mat_superlu_dist_iterrefine","Use iterative refinement","None",PETSC_FALSE,&flg,0);CHKERRQ(ierr); 491 if (flg) { 492 options.IterRefine = DOUBLE; 493 } 494 495 if (PetscLogPrintInfo) { 496 lu->StatPrint = (int)PETSC_TRUE; 497 } else { 498 lu->StatPrint = (int)PETSC_FALSE; 499 } 500 ierr = PetscOptionsLogical("-mat_superlu_dist_statprint","Print factorization information","None", 501 (PetscTruth)lu->StatPrint,(PetscTruth*)&lu->StatPrint,0);CHKERRQ(ierr); 502 PetscOptionsEnd(); 503 504 /* Initialize the SuperLU process grid. */ 505 superlu_gridinit(lu->comm_superlu, lu->nprow, lu->npcol, &lu->grid); 506 507 /* Initialize ScalePermstruct and LUstruct. */ 508 ScalePermstructInit(M, N, &lu->ScalePermstruct); 509 LUstructInit(M, N, &lu->LUstruct); 510 511 lu->options = options; 512 lu->flg = DIFFERENT_NONZERO_PATTERN; 513 lu->CleanUpSuperLU_Dist = PETSC_TRUE; 514 *F = B; 515 PetscFunctionReturn(0); 516 } 517 518 #undef __FUNCT__ 519 #define __FUNCT__ "MatAssemblyEnd_SuperLU_DIST" 520 PetscErrorCode MatAssemblyEnd_SuperLU_DIST(Mat A,MatAssemblyType mode) { 521 PetscErrorCode ierr; 522 Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST*)(A->spptr); 523 524 PetscFunctionBegin; 525 ierr = (*lu->MatAssemblyEnd)(A,mode);CHKERRQ(ierr); 526 lu->MatLUFactorSymbolic = A->ops->lufactorsymbolic; 527 A->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST; 528 PetscFunctionReturn(0); 529 } 530 531 #undef __FUNCT__ 532 #define __FUNCT__ "MatFactorInfo_SuperLU_DIST" 533 PetscErrorCode MatFactorInfo_SuperLU_DIST(Mat A,PetscViewer viewer) 534 { 535 Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST*)A->spptr; 536 superlu_options_t options; 537 PetscErrorCode ierr; 538 char *colperm; 539 540 PetscFunctionBegin; 541 /* check if matrix is superlu_dist type */ 542 if (A->ops->solve != MatSolve_SuperLU_DIST) PetscFunctionReturn(0); 543 544 options = lu->options; 545 ierr = PetscViewerASCIIPrintf(viewer,"SuperLU_DIST run parameters:\n");CHKERRQ(ierr); 546 ierr = PetscViewerASCIIPrintf(viewer," Equilibrate matrix %s \n",(options.Equil != NO) ? "true": "false");CHKERRQ(ierr); 547 ierr = PetscViewerASCIIPrintf(viewer," Matrix input mode %d \n",lu->MatInputMode);CHKERRQ(ierr); 548 ierr = PetscViewerASCIIPrintf(viewer," Replace tiny pivots %s \n",(options.ReplaceTinyPivot != NO) ? "true": "false");CHKERRQ(ierr); 549 ierr = PetscViewerASCIIPrintf(viewer," Use iterative refinement %s \n",(options.IterRefine == DOUBLE) ? "true": "false");CHKERRQ(ierr); 550 ierr = PetscViewerASCIIPrintf(viewer," Processors in row %d col partition %d \n",lu->nprow,lu->npcol);CHKERRQ(ierr); 551 ierr = PetscViewerASCIIPrintf(viewer," Row permutation %s \n",(options.RowPerm == NOROWPERM) ? "NATURAL": "LargeDiag");CHKERRQ(ierr); 552 if (options.ColPerm == NATURAL) { 553 colperm = "NATURAL"; 554 } else if (options.ColPerm == MMD_AT_PLUS_A) { 555 colperm = "MMD_AT_PLUS_A"; 556 } else if (options.ColPerm == MMD_ATA) { 557 colperm = "MMD_ATA"; 558 } else if (options.ColPerm == COLAMD) { 559 colperm = "COLAMD"; 560 } else { 561 SETERRQ(PETSC_ERR_ARG_WRONG,"Unknown column permutation"); 562 } 563 ierr = PetscViewerASCIIPrintf(viewer," Column permutation %s \n",colperm);CHKERRQ(ierr); 564 PetscFunctionReturn(0); 565 } 566 567 #undef __FUNCT__ 568 #define __FUNCT__ "MatView_SuperLU_DIST" 569 PetscErrorCode MatView_SuperLU_DIST(Mat A,PetscViewer viewer) 570 { 571 PetscErrorCode ierr; 572 PetscTruth iascii; 573 PetscViewerFormat format; 574 Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST*)(A->spptr); 575 576 PetscFunctionBegin; 577 ierr = (*lu->MatView)(A,viewer);CHKERRQ(ierr); 578 579 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 580 if (iascii) { 581 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 582 if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 583 ierr = MatFactorInfo_SuperLU_DIST(A,viewer);CHKERRQ(ierr); 584 } 585 } 586 PetscFunctionReturn(0); 587 } 588 589 590 EXTERN_C_BEGIN 591 #undef __FUNCT__ 592 #define __FUNCT__ "MatConvert_Base_SuperLU_DIST" 593 PetscErrorCode MatConvert_Base_SuperLU_DIST(Mat A,const MatType type,Mat *newmat) 594 { 595 /* This routine is only called to convert to MATSUPERLU_DIST */ 596 /* from MATSEQAIJ if A has a single process communicator */ 597 /* or MATMPIAIJ otherwise, so we will ignore 'MatType type'. */ 598 PetscErrorCode ierr; 599 PetscMPIInt size; 600 MPI_Comm comm; 601 Mat B=*newmat; 602 Mat_SuperLU_DIST *lu; 603 604 PetscFunctionBegin; 605 if (B != A) { 606 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 607 } 608 609 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 610 ierr = PetscNew(Mat_SuperLU_DIST,&lu);CHKERRQ(ierr); 611 612 lu->MatDuplicate = A->ops->duplicate; 613 lu->MatView = A->ops->view; 614 lu->MatAssemblyEnd = A->ops->assemblyend; 615 lu->MatLUFactorSymbolic = A->ops->lufactorsymbolic; 616 lu->MatDestroy = A->ops->destroy; 617 lu->CleanUpSuperLU_Dist = PETSC_FALSE; 618 619 B->spptr = (void*)lu; 620 B->ops->duplicate = MatDuplicate_SuperLU_DIST; 621 B->ops->view = MatView_SuperLU_DIST; 622 B->ops->assemblyend = MatAssemblyEnd_SuperLU_DIST; 623 B->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST; 624 B->ops->destroy = MatDestroy_SuperLU_DIST; 625 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);CHKERRQ(ierr); 626 if (size == 1) { 627 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqaij_superlu_dist_C", 628 "MatConvert_Base_SuperLU_DIST",MatConvert_Base_SuperLU_DIST);CHKERRQ(ierr); 629 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_superlu_dist_seqaij_C", 630 "MatConvert_SuperLU_DIST_Base",MatConvert_SuperLU_DIST_Base);CHKERRQ(ierr); 631 } else { 632 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_superlu_dist_C", 633 "MatConvert_Base_SuperLU_DIST",MatConvert_Base_SuperLU_DIST);CHKERRQ(ierr); 634 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_superlu_dist_mpiaij_C", 635 "MatConvert_SuperLU_DIST_Base",MatConvert_SuperLU_DIST_Base);CHKERRQ(ierr); 636 } 637 PetscLogInfo(0,"Using SuperLU_DIST for SeqAIJ LU factorization and solves."); 638 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSUPERLU_DIST);CHKERRQ(ierr); 639 *newmat = B; 640 PetscFunctionReturn(0); 641 } 642 EXTERN_C_END 643 644 #undef __FUNCT__ 645 #define __FUNCT__ "MatDuplicate_SuperLU_DIST" 646 PetscErrorCode MatDuplicate_SuperLU_DIST(Mat A, MatDuplicateOption op, Mat *M) { 647 PetscErrorCode ierr; 648 Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST *)A->spptr; 649 650 PetscFunctionBegin; 651 ierr = (*lu->MatDuplicate)(A,op,M);CHKERRQ(ierr); 652 ierr = PetscMemcpy((*M)->spptr,lu,sizeof(Mat_SuperLU_DIST));CHKERRQ(ierr); 653 PetscFunctionReturn(0); 654 } 655 656 /*MC 657 MATSUPERLU_DIST - MATSUPERLU_DIST = "superlu_dist" - A matrix type providing direct solvers (LU) for parallel matrices 658 via the external package SuperLU_DIST. 659 660 If SuperLU_DIST is installed (see the manual for 661 instructions on how to declare the existence of external packages), 662 a matrix type can be constructed which invokes SuperLU_DIST solvers. 663 After calling MatCreate(...,A), simply call MatSetType(A,MATSUPERLU_DIST). 664 665 This matrix inherits from MATSEQAIJ when constructed with a single process communicator, 666 and from MATMPIAIJ otherwise. As a result, for single process communicators, 667 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 668 for communicators controlling multiple processes. It is recommended that you call both of 669 the above preallocation routines for simplicity. One can also call MatConvert for an inplace 670 conversion to or from the MATSEQAIJ or MATMPIAIJ type (depending on the communicator size) 671 without data copy. 672 673 Options Database Keys: 674 + -mat_type superlu_dist - sets the matrix type to "superlu_dist" during a call to MatSetFromOptions() 675 . -mat_superlu_dist_r <n> - number of rows in processor partition 676 . -mat_superlu_dist_c <n> - number of columns in processor partition 677 . -mat_superlu_dist_matinput <0,1> - matrix input mode; 0=global, 1=distributed 678 . -mat_superlu_dist_equil - equilibrate the matrix 679 . -mat_superlu_dist_rowperm <LargeDiag,NATURAL> - row permutation 680 . -mat_superlu_dist_colperm <MMD_AT_PLUS_A,MMD_ATA,COLAMD,NATURAL> - column permutation 681 . -mat_superlu_dist_replacetinypivot - replace tiny pivots 682 . -mat_superlu_dist_iterrefine - use iterative refinement 683 - -mat_superlu_dist_statprint - print factorization information 684 685 Level: beginner 686 687 .seealso: PCLU 688 M*/ 689 690 EXTERN_C_BEGIN 691 #undef __FUNCT__ 692 #define __FUNCT__ "MatCreate_SuperLU_DIST" 693 PetscErrorCode MatCreate_SuperLU_DIST(Mat A) 694 { 695 PetscErrorCode ierr; 696 int size; 697 Mat A_diag; 698 699 PetscFunctionBegin; 700 /* Change type name before calling MatSetType to force proper construction of SeqAIJ or MPIAIJ */ 701 /* and SuperLU_DIST types */ 702 ierr = PetscObjectChangeTypeName((PetscObject)A,MATSUPERLU_DIST);CHKERRQ(ierr); 703 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 704 if (size == 1) { 705 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 706 } else { 707 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 708 A_diag = ((Mat_MPIAIJ *)A->data)->A; 709 ierr = MatConvert_Base_SuperLU_DIST(A_diag,MATSUPERLU_DIST,&A_diag);CHKERRQ(ierr); 710 } 711 ierr = MatConvert_Base_SuperLU_DIST(A,MATSUPERLU_DIST,&A);CHKERRQ(ierr); 712 PetscFunctionReturn(0); 713 } 714 EXTERN_C_END 715 716