1 /* 2 Provides an interface to the SuperLU_DIST_2.0 sparse solver 3 */ 4 5 #include "src/mat/impls/aij/seq/aij.h" 6 #include "src/mat/impls/aij/mpi/mpiaij.h" 7 #if defined(PETSC_HAVE_STDLIB_H) /* This is to get arround weird problem with SuperLU on cray */ 8 #include "stdlib.h" 9 #endif 10 11 EXTERN_C_BEGIN 12 #if defined(PETSC_USE_COMPLEX) 13 #include "superlu_zdefs.h" 14 #else 15 #include "superlu_ddefs.h" 16 #endif 17 EXTERN_C_END 18 19 typedef enum { GLOBAL,DISTRIBUTED 20 } SuperLU_MatInputMode; 21 22 typedef struct { 23 int_t nprow,npcol,*row,*col; 24 gridinfo_t grid; 25 superlu_options_t options; 26 SuperMatrix A_sup; 27 ScalePermstruct_t ScalePermstruct; 28 LUstruct_t LUstruct; 29 int StatPrint; 30 int MatInputMode; 31 SOLVEstruct_t SOLVEstruct; 32 MatStructure flg; 33 MPI_Comm comm_superlu; 34 #if defined(PETSC_USE_COMPLEX) 35 doublecomplex *val; 36 #else 37 double *val; 38 #endif 39 40 /* A few function pointers for inheritance */ 41 PetscErrorCode (*MatDuplicate)(Mat,MatDuplicateOption,Mat*); 42 PetscErrorCode (*MatView)(Mat,PetscViewer); 43 PetscErrorCode (*MatAssemblyEnd)(Mat,MatAssemblyType); 44 PetscErrorCode (*MatLUFactorSymbolic)(Mat,IS,IS,MatFactorInfo*,Mat*); 45 PetscErrorCode (*MatDestroy)(Mat); 46 47 /* Flag to clean up (non-global) SuperLU objects during Destroy */ 48 PetscTruth CleanUpSuperLU_Dist; 49 } Mat_SuperLU_DIST; 50 51 EXTERN PetscErrorCode MatDuplicate_SuperLU_DIST(Mat,MatDuplicateOption,Mat*); 52 53 EXTERN_C_BEGIN 54 #undef __FUNCT__ 55 #define __FUNCT__ "MatConvert_SuperLU_DIST_Base" 56 PetscErrorCode MatConvert_SuperLU_DIST_Base(Mat A,const MatType type,MatReuse reuse,Mat *newmat) 57 { 58 PetscErrorCode ierr; 59 Mat B=*newmat; 60 Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST *)A->spptr; 61 62 PetscFunctionBegin; 63 if (reuse == MAT_INITIAL_MATRIX) { 64 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 65 } 66 /* Reset the original function pointers */ 67 B->ops->duplicate = lu->MatDuplicate; 68 B->ops->view = lu->MatView; 69 B->ops->assemblyend = lu->MatAssemblyEnd; 70 B->ops->lufactorsymbolic = lu->MatLUFactorSymbolic; 71 B->ops->destroy = lu->MatDestroy; 72 73 ierr = PetscFree(lu);CHKERRQ(ierr); 74 75 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaij_superlu_dist_C","",PETSC_NULL);CHKERRQ(ierr); 76 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_superlu_dist_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 77 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_superlu_dist_C","",PETSC_NULL);CHKERRQ(ierr); 78 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_superlu_dist_mpiaij_C","",PETSC_NULL);CHKERRQ(ierr); 79 80 ierr = PetscObjectChangeTypeName((PetscObject)B,type);CHKERRQ(ierr); 81 *newmat = B; 82 PetscFunctionReturn(0); 83 } 84 EXTERN_C_END 85 86 #undef __FUNCT__ 87 #define __FUNCT__ "MatDestroy_SuperLU_DIST" 88 PetscErrorCode MatDestroy_SuperLU_DIST(Mat A) 89 { 90 PetscErrorCode ierr; 91 PetscMPIInt size; 92 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr; 93 94 PetscFunctionBegin; 95 if (lu->CleanUpSuperLU_Dist) { 96 /* Deallocate SuperLU_DIST storage */ 97 if (lu->MatInputMode == GLOBAL) { 98 Destroy_CompCol_Matrix_dist(&lu->A_sup); 99 } else { 100 Destroy_CompRowLoc_Matrix_dist(&lu->A_sup); 101 if ( lu->options.SolveInitialized ) { 102 #if defined(PETSC_USE_COMPLEX) 103 zSolveFinalize(&lu->options, &lu->SOLVEstruct); 104 #else 105 dSolveFinalize(&lu->options, &lu->SOLVEstruct); 106 #endif 107 } 108 } 109 Destroy_LU(A->N, &lu->grid, &lu->LUstruct); 110 ScalePermstructFree(&lu->ScalePermstruct); 111 LUstructFree(&lu->LUstruct); 112 113 /* Release the SuperLU_DIST process grid. */ 114 superlu_gridexit(&lu->grid); 115 116 ierr = MPI_Comm_free(&(lu->comm_superlu));CHKERRQ(ierr); 117 } 118 119 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 120 if (size == 1) { 121 ierr = MatConvert_SuperLU_DIST_Base(A,MATSEQAIJ,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr); 122 } else { 123 ierr = MatConvert_SuperLU_DIST_Base(A,MATMPIAIJ,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr); 124 } 125 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 126 127 PetscFunctionReturn(0); 128 } 129 130 #undef __FUNCT__ 131 #define __FUNCT__ "MatSolve_SuperLU_DIST" 132 PetscErrorCode MatSolve_SuperLU_DIST(Mat A,Vec b_mpi,Vec x) 133 { 134 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr; 135 PetscErrorCode ierr; 136 PetscMPIInt size; 137 PetscInt m=A->M, N=A->N; 138 SuperLUStat_t stat; 139 double berr[1]; 140 PetscScalar *bptr; 141 PetscInt info, nrhs=1; 142 Vec x_seq; 143 IS iden; 144 VecScatter scat; 145 146 PetscFunctionBegin; 147 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 148 if (size > 1) { 149 if (lu->MatInputMode == GLOBAL) { /* global mat input, convert b to x_seq */ 150 ierr = VecCreateSeq(PETSC_COMM_SELF,N,&x_seq);CHKERRQ(ierr); 151 ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iden);CHKERRQ(ierr); 152 ierr = VecScatterCreate(b_mpi,iden,x_seq,iden,&scat);CHKERRQ(ierr); 153 ierr = ISDestroy(iden);CHKERRQ(ierr); 154 155 ierr = VecScatterBegin(b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat);CHKERRQ(ierr); 156 ierr = VecScatterEnd(b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat);CHKERRQ(ierr); 157 ierr = VecGetArray(x_seq,&bptr);CHKERRQ(ierr); 158 } else { /* distributed mat input */ 159 ierr = VecCopy(b_mpi,x);CHKERRQ(ierr); 160 ierr = VecGetArray(x,&bptr);CHKERRQ(ierr); 161 } 162 } else { /* size == 1 */ 163 ierr = VecCopy(b_mpi,x);CHKERRQ(ierr); 164 ierr = VecGetArray(x,&bptr);CHKERRQ(ierr); 165 } 166 167 lu->options.Fact = FACTORED; /* The factored form of A is supplied. Local option used by this func. only.*/ 168 169 PStatInit(&stat); /* Initialize the statistics variables. */ 170 171 if (lu->MatInputMode == GLOBAL) { 172 #if defined(PETSC_USE_COMPLEX) 173 pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,(doublecomplex*)bptr, m, nrhs, 174 &lu->grid, &lu->LUstruct, berr, &stat, &info); 175 #else 176 pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,bptr, m, nrhs, 177 &lu->grid, &lu->LUstruct, berr, &stat, &info); 178 #endif 179 } else { /* distributed mat input */ 180 #if defined(PETSC_USE_COMPLEX) 181 pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, (doublecomplex*)bptr, A->M, nrhs, &lu->grid, 182 &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info); 183 if (info) SETERRQ1(PETSC_ERR_LIB,"pzgssvx fails, info: %d\n",info); 184 #else 185 pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, bptr, A->M, nrhs, &lu->grid, 186 &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info); 187 if (info) SETERRQ1(PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",info); 188 #endif 189 } 190 if (lu->StatPrint) { 191 PStatPrint(&lu->options, &stat, &lu->grid); /* Print the statistics. */ 192 } 193 PStatFree(&stat); 194 195 if (size > 1) { 196 if (lu->MatInputMode == GLOBAL){ /* convert seq x to mpi x */ 197 ierr = VecRestoreArray(x_seq,&bptr);CHKERRQ(ierr); 198 ierr = VecScatterBegin(x_seq,x,INSERT_VALUES,SCATTER_REVERSE,scat);CHKERRQ(ierr); 199 ierr = VecScatterEnd(x_seq,x,INSERT_VALUES,SCATTER_REVERSE,scat);CHKERRQ(ierr); 200 ierr = VecScatterDestroy(scat);CHKERRQ(ierr); 201 ierr = VecDestroy(x_seq);CHKERRQ(ierr); 202 } else { 203 ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr); 204 } 205 } else { 206 ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr); 207 } 208 PetscFunctionReturn(0); 209 } 210 211 #undef __FUNCT__ 212 #define __FUNCT__ "MatLUFactorNumeric_SuperLU_DIST" 213 PetscErrorCode MatLUFactorNumeric_SuperLU_DIST(Mat A,MatFactorInfo *info,Mat *F) 214 { 215 Mat *tseq,A_seq = PETSC_NULL; 216 Mat_SeqAIJ *aa,*bb; 217 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)(*F)->spptr; 218 PetscErrorCode ierr; 219 PetscInt M=A->M,N=A->N,sinfo,i,*ai,*aj,*bi,*bj,nz,rstart,*garray, 220 m=A->m, irow,colA_start,j,jcol,jB,countA,countB,*bjj,*ajj; 221 PetscMPIInt size,rank; 222 SuperLUStat_t stat; 223 double *berr=0; 224 IS isrow; 225 PetscLogDouble time0,time,time_min,time_max; 226 #if defined(PETSC_USE_COMPLEX) 227 doublecomplex *av, *bv; 228 #else 229 double *av, *bv; 230 #endif 231 232 PetscFunctionBegin; 233 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 234 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 235 236 if (lu->StatPrint) { /* collect time for mat conversion */ 237 ierr = MPI_Barrier(A->comm);CHKERRQ(ierr); 238 ierr = PetscGetTime(&time0);CHKERRQ(ierr); 239 } 240 241 if (lu->MatInputMode == GLOBAL) { /* global mat input */ 242 if (size > 1) { /* convert mpi A to seq mat A */ 243 ierr = ISCreateStride(PETSC_COMM_SELF,M,0,1,&isrow);CHKERRQ(ierr); 244 ierr = MatGetSubMatrices(A,1,&isrow,&isrow,MAT_INITIAL_MATRIX,&tseq);CHKERRQ(ierr); 245 ierr = ISDestroy(isrow);CHKERRQ(ierr); 246 247 A_seq = *tseq; 248 ierr = PetscFree(tseq);CHKERRQ(ierr); 249 aa = (Mat_SeqAIJ*)A_seq->data; 250 } else { 251 aa = (Mat_SeqAIJ*)A->data; 252 } 253 254 /* Allocate storage, then convert Petsc NR matrix to SuperLU_DIST NC */ 255 if (lu->flg == DIFFERENT_NONZERO_PATTERN) {/* first numeric factorization */ 256 #if defined(PETSC_USE_COMPLEX) 257 zallocateA_dist(N, aa->nz, &lu->val, &lu->col, &lu->row); 258 #else 259 dallocateA_dist(N, aa->nz, &lu->val, &lu->col, &lu->row); 260 #endif 261 } else { /* successive numeric factorization, sparsity pattern is reused. */ 262 Destroy_CompCol_Matrix_dist(&lu->A_sup); 263 Destroy_LU(N, &lu->grid, &lu->LUstruct); 264 lu->options.Fact = SamePattern; 265 } 266 #if defined(PETSC_USE_COMPLEX) 267 zCompRow_to_CompCol_dist(M,N,aa->nz,(doublecomplex*)aa->a,aa->j,aa->i,&lu->val,&lu->col, &lu->row); 268 #else 269 dCompRow_to_CompCol_dist(M,N,aa->nz,aa->a,aa->j,aa->i,&lu->val, &lu->col, &lu->row); 270 #endif 271 272 /* Create compressed column matrix A_sup. */ 273 #if defined(PETSC_USE_COMPLEX) 274 zCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_Z, SLU_GE); 275 #else 276 dCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_D, SLU_GE); 277 #endif 278 } else { /* distributed mat input */ 279 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 280 aa=(Mat_SeqAIJ*)(mat->A)->data; 281 bb=(Mat_SeqAIJ*)(mat->B)->data; 282 ai=aa->i; aj=aa->j; 283 bi=bb->i; bj=bb->j; 284 #if defined(PETSC_USE_COMPLEX) 285 av=(doublecomplex*)aa->a; 286 bv=(doublecomplex*)bb->a; 287 #else 288 av=aa->a; 289 bv=bb->a; 290 #endif 291 rstart = mat->rstart; 292 nz = aa->nz + bb->nz; 293 garray = mat->garray; 294 rstart = mat->rstart; 295 296 if (lu->flg == DIFFERENT_NONZERO_PATTERN) {/* first numeric factorization */ 297 #if defined(PETSC_USE_COMPLEX) 298 zallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row); 299 #else 300 dallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row); 301 #endif 302 } else { /* successive numeric factorization, sparsity pattern and perm_c are reused. */ 303 /* Destroy_CompRowLoc_Matrix_dist(&lu->A_sup); */ /* crash! */ 304 Destroy_LU(N, &lu->grid, &lu->LUstruct); 305 lu->options.Fact = SamePattern; 306 } 307 nz = 0; irow = mat->rstart; 308 for ( i=0; i<m; i++ ) { 309 lu->row[i] = nz; 310 countA = ai[i+1] - ai[i]; 311 countB = bi[i+1] - bi[i]; 312 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 313 bjj = bj + bi[i]; 314 315 /* B part, smaller col index */ 316 colA_start = mat->rstart + ajj[0]; /* the smallest global col index of A */ 317 jB = 0; 318 for (j=0; j<countB; j++){ 319 jcol = garray[bjj[j]]; 320 if (jcol > colA_start) { 321 jB = j; 322 break; 323 } 324 lu->col[nz] = jcol; 325 lu->val[nz++] = *bv++; 326 if (j==countB-1) jB = countB; 327 } 328 329 /* A part */ 330 for (j=0; j<countA; j++){ 331 lu->col[nz] = mat->rstart + ajj[j]; 332 lu->val[nz++] = *av++; 333 } 334 335 /* B part, larger col index */ 336 for (j=jB; j<countB; j++){ 337 lu->col[nz] = garray[bjj[j]]; 338 lu->val[nz++] = *bv++; 339 } 340 } 341 lu->row[m] = nz; 342 #if defined(PETSC_USE_COMPLEX) 343 zCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart, 344 lu->val, lu->col, lu->row, SLU_NR_loc, SLU_Z, SLU_GE); 345 #else 346 dCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart, 347 lu->val, lu->col, lu->row, SLU_NR_loc, SLU_D, SLU_GE); 348 #endif 349 } 350 if (lu->StatPrint) { 351 ierr = PetscGetTime(&time);CHKERRQ(ierr); 352 time0 = time - time0; 353 } 354 355 /* Factor the matrix. */ 356 PStatInit(&stat); /* Initialize the statistics variables. */ 357 358 if (lu->MatInputMode == GLOBAL) { /* global mat input */ 359 #if defined(PETSC_USE_COMPLEX) 360 pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, 361 &lu->grid, &lu->LUstruct, berr, &stat, &sinfo); 362 #else 363 pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, 364 &lu->grid, &lu->LUstruct, berr, &stat, &sinfo); 365 #endif 366 } else { /* distributed mat input */ 367 #if defined(PETSC_USE_COMPLEX) 368 pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid, 369 &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo); 370 if (sinfo) SETERRQ1(PETSC_ERR_LIB,"pzgssvx fails, info: %d\n",sinfo); 371 #else 372 pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid, 373 &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo); 374 if (sinfo) SETERRQ1(PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",sinfo); 375 #endif 376 } 377 378 if (lu->MatInputMode == GLOBAL && size > 1){ 379 ierr = MatDestroy(A_seq);CHKERRQ(ierr); 380 } 381 382 if (lu->StatPrint) { 383 if (size > 1){ 384 ierr = MPI_Reduce(&time0,&time_max,1,MPI_DOUBLE,MPI_MAX,0,A->comm); 385 ierr = MPI_Reduce(&time0,&time_min,1,MPI_DOUBLE,MPI_MIN,0,A->comm); 386 ierr = MPI_Reduce(&time0,&time,1,MPI_DOUBLE,MPI_SUM,0,A->comm); 387 time = time/size; /* average time */ 388 if (!rank) 389 ierr = PetscPrintf(PETSC_COMM_SELF, " Mat conversion(PETSc->SuperLU_DIST) time (max/min/avg): \n \ 390 %g / %g / %g\n",time_max,time_min,time); 391 } else { 392 ierr = PetscPrintf(PETSC_COMM_SELF, " Mat conversion(PETSc->SuperLU_DIST) time: \n \ 393 %g\n",time0); 394 } 395 396 PStatPrint(&lu->options, &stat, &lu->grid); /* Print the statistics. */ 397 } 398 PStatFree(&stat); 399 (*F)->assembled = PETSC_TRUE; 400 lu->flg = SAME_NONZERO_PATTERN; 401 PetscFunctionReturn(0); 402 } 403 404 /* Note the Petsc r and c permutations are ignored */ 405 #undef __FUNCT__ 406 #define __FUNCT__ "MatLUFactorSymbolic_SuperLU_DIST" 407 PetscErrorCode MatLUFactorSymbolic_SuperLU_DIST(Mat A,IS r,IS c,MatFactorInfo *info,Mat *F) 408 { 409 Mat B; 410 Mat_SuperLU_DIST *lu; 411 PetscErrorCode ierr; 412 PetscInt M=A->M,N=A->N,indx; 413 PetscMPIInt size; 414 superlu_options_t options; 415 PetscTruth flg; 416 const char *ptype[] = {"MMD_AT_PLUS_A","NATURAL","MMD_ATA","COLAMD"}; 417 const char *prtype[] = {"LargeDiag","NATURAL"}; 418 419 PetscFunctionBegin; 420 /* Create the factorization matrix */ 421 ierr = MatCreate(A->comm,A->m,A->n,M,N,&B);CHKERRQ(ierr); 422 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 423 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL); 424 ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 425 426 B->ops->lufactornumeric = MatLUFactorNumeric_SuperLU_DIST; 427 B->ops->solve = MatSolve_SuperLU_DIST; 428 B->factor = FACTOR_LU; 429 430 lu = (Mat_SuperLU_DIST*)(B->spptr); 431 432 /* Set the input options */ 433 set_default_options_dist(&options); 434 lu->MatInputMode = GLOBAL; 435 ierr = MPI_Comm_dup(A->comm,&(lu->comm_superlu));CHKERRQ(ierr); 436 437 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 438 lu->nprow = size/2; /* Default process rows. */ 439 if (!lu->nprow) lu->nprow = 1; 440 lu->npcol = size/lu->nprow; /* Default process columns. */ 441 442 ierr = PetscOptionsBegin(A->comm,A->prefix,"SuperLU_Dist Options","Mat");CHKERRQ(ierr); 443 444 ierr = PetscOptionsInt("-mat_superlu_dist_r","Number rows in processor partition","None",lu->nprow,&lu->nprow,PETSC_NULL);CHKERRQ(ierr); 445 ierr = PetscOptionsInt("-mat_superlu_dist_c","Number columns in processor partition","None",lu->npcol,&lu->npcol,PETSC_NULL);CHKERRQ(ierr); 446 if (size != lu->nprow * lu->npcol) SETERRQ(PETSC_ERR_ARG_SIZ,"Number of processes should be equal to nprow*npcol"); 447 448 ierr = PetscOptionsInt("-mat_superlu_dist_matinput","Matrix input mode (0: GLOBAL; 1: DISTRIBUTED)","None",lu->MatInputMode,&lu->MatInputMode,PETSC_NULL);CHKERRQ(ierr); 449 if(lu->MatInputMode == DISTRIBUTED && size == 1) lu->MatInputMode = GLOBAL; 450 451 ierr = PetscOptionsLogical("-mat_superlu_dist_equil","Equilibrate matrix","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr); 452 if (!flg) { 453 options.Equil = NO; 454 } 455 456 ierr = PetscOptionsEList("-mat_superlu_dist_rowperm","Row permutation","None",prtype,2,prtype[0],&indx,&flg);CHKERRQ(ierr); 457 if (flg) { 458 switch (indx) { 459 case 0: 460 options.RowPerm = LargeDiag; 461 break; 462 case 1: 463 options.RowPerm = NOROWPERM; 464 break; 465 } 466 } 467 468 ierr = PetscOptionsEList("-mat_superlu_dist_colperm","Column permutation","None",ptype,4,ptype[0],&indx,&flg);CHKERRQ(ierr); 469 if (flg) { 470 switch (indx) { 471 case 0: 472 options.ColPerm = MMD_AT_PLUS_A; 473 break; 474 case 1: 475 options.ColPerm = NATURAL; 476 break; 477 case 2: 478 options.ColPerm = MMD_ATA; 479 break; 480 case 3: 481 options.ColPerm = COLAMD; 482 break; 483 } 484 } 485 486 ierr = PetscOptionsLogical("-mat_superlu_dist_replacetinypivot","Replace tiny pivots","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr); 487 if (!flg) { 488 options.ReplaceTinyPivot = NO; 489 } 490 491 options.IterRefine = NOREFINE; 492 ierr = PetscOptionsLogical("-mat_superlu_dist_iterrefine","Use iterative refinement","None",PETSC_FALSE,&flg,0);CHKERRQ(ierr); 493 if (flg) { 494 options.IterRefine = DOUBLE; 495 } 496 497 if (PetscLogPrintInfo) { 498 lu->StatPrint = (PetscInt)PETSC_TRUE; 499 } else { 500 lu->StatPrint = (PetscInt)PETSC_FALSE; 501 } 502 ierr = PetscOptionsLogical("-mat_superlu_dist_statprint","Print factorization information","None", 503 (PetscTruth)lu->StatPrint,(PetscTruth*)&lu->StatPrint,0);CHKERRQ(ierr); 504 PetscOptionsEnd(); 505 506 /* Initialize the SuperLU process grid. */ 507 superlu_gridinit(lu->comm_superlu, lu->nprow, lu->npcol, &lu->grid); 508 509 /* Initialize ScalePermstruct and LUstruct. */ 510 ScalePermstructInit(M, N, &lu->ScalePermstruct); 511 LUstructInit(M, N, &lu->LUstruct); 512 513 lu->options = options; 514 lu->flg = DIFFERENT_NONZERO_PATTERN; 515 lu->CleanUpSuperLU_Dist = PETSC_TRUE; 516 *F = B; 517 PetscFunctionReturn(0); 518 } 519 520 #undef __FUNCT__ 521 #define __FUNCT__ "MatAssemblyEnd_SuperLU_DIST" 522 PetscErrorCode MatAssemblyEnd_SuperLU_DIST(Mat A,MatAssemblyType mode) { 523 PetscErrorCode ierr; 524 Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST*)(A->spptr); 525 526 PetscFunctionBegin; 527 ierr = (*lu->MatAssemblyEnd)(A,mode);CHKERRQ(ierr); 528 lu->MatLUFactorSymbolic = A->ops->lufactorsymbolic; 529 A->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST; 530 PetscFunctionReturn(0); 531 } 532 533 #undef __FUNCT__ 534 #define __FUNCT__ "MatFactorInfo_SuperLU_DIST" 535 PetscErrorCode MatFactorInfo_SuperLU_DIST(Mat A,PetscViewer viewer) 536 { 537 Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST*)A->spptr; 538 superlu_options_t options; 539 PetscErrorCode ierr; 540 541 PetscFunctionBegin; 542 /* check if matrix is superlu_dist type */ 543 if (A->ops->solve != MatSolve_SuperLU_DIST) PetscFunctionReturn(0); 544 545 options = lu->options; 546 ierr = PetscViewerASCIIPrintf(viewer,"SuperLU_DIST run parameters:\n");CHKERRQ(ierr); 547 ierr = PetscViewerASCIIPrintf(viewer," Equilibrate matrix %s \n",(options.Equil != NO) ? "true": "false");CHKERRQ(ierr); 548 ierr = PetscViewerASCIIPrintf(viewer," Matrix input mode %d \n",lu->MatInputMode);CHKERRQ(ierr); 549 ierr = PetscViewerASCIIPrintf(viewer," Replace tiny pivots %s \n",(options.ReplaceTinyPivot != NO) ? "true": "false");CHKERRQ(ierr); 550 ierr = PetscViewerASCIIPrintf(viewer," Use iterative refinement %s \n",(options.IterRefine == DOUBLE) ? "true": "false");CHKERRQ(ierr); 551 ierr = PetscViewerASCIIPrintf(viewer," Processors in row %d col partition %d \n",lu->nprow,lu->npcol);CHKERRQ(ierr); 552 ierr = PetscViewerASCIIPrintf(viewer," Row permutation %s \n",(options.RowPerm == NOROWPERM) ? "NATURAL": "LargeDiag");CHKERRQ(ierr); 553 if (options.ColPerm == NATURAL) { 554 ierr = PetscViewerASCIIPrintf(viewer," Column permutation NATURAL\n");CHKERRQ(ierr); 555 } else if (options.ColPerm == MMD_AT_PLUS_A) { 556 ierr = PetscViewerASCIIPrintf(viewer," Column permutation MMD_AT_PLUS_A\n");CHKERRQ(ierr); 557 } else if (options.ColPerm == MMD_ATA) { 558 ierr = PetscViewerASCIIPrintf(viewer," Column permutation MMD_ATA\n");CHKERRQ(ierr); 559 } else if (options.ColPerm == COLAMD) { 560 ierr = PetscViewerASCIIPrintf(viewer," Column permutation COLAMD\n");CHKERRQ(ierr); 561 } else { 562 SETERRQ(PETSC_ERR_ARG_WRONG,"Unknown column permutation"); 563 } 564 PetscFunctionReturn(0); 565 } 566 567 #undef __FUNCT__ 568 #define __FUNCT__ "MatView_SuperLU_DIST" 569 PetscErrorCode MatView_SuperLU_DIST(Mat A,PetscViewer viewer) 570 { 571 PetscErrorCode ierr; 572 PetscTruth iascii; 573 PetscViewerFormat format; 574 Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST*)(A->spptr); 575 576 PetscFunctionBegin; 577 ierr = (*lu->MatView)(A,viewer);CHKERRQ(ierr); 578 579 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 580 if (iascii) { 581 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 582 if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 583 ierr = MatFactorInfo_SuperLU_DIST(A,viewer);CHKERRQ(ierr); 584 } 585 } 586 PetscFunctionReturn(0); 587 } 588 589 590 EXTERN_C_BEGIN 591 #undef __FUNCT__ 592 #define __FUNCT__ "MatConvert_Base_SuperLU_DIST" 593 PetscErrorCode MatConvert_Base_SuperLU_DIST(Mat A,const MatType type,MatReuse reuse,Mat *newmat) 594 { 595 /* This routine is only called to convert to MATSUPERLU_DIST */ 596 /* from MATSEQAIJ if A has a single process communicator */ 597 /* or MATMPIAIJ otherwise, so we will ignore 'MatType type'. */ 598 PetscErrorCode ierr; 599 PetscMPIInt size; 600 MPI_Comm comm; 601 Mat B=*newmat; 602 Mat_SuperLU_DIST *lu; 603 604 PetscFunctionBegin; 605 if (reuse == MAT_INITIAL_MATRIX) { 606 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 607 } 608 609 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 610 ierr = PetscNew(Mat_SuperLU_DIST,&lu);CHKERRQ(ierr); 611 612 lu->MatDuplicate = A->ops->duplicate; 613 lu->MatView = A->ops->view; 614 lu->MatAssemblyEnd = A->ops->assemblyend; 615 lu->MatLUFactorSymbolic = A->ops->lufactorsymbolic; 616 lu->MatDestroy = A->ops->destroy; 617 lu->CleanUpSuperLU_Dist = PETSC_FALSE; 618 619 B->spptr = (void*)lu; 620 B->ops->duplicate = MatDuplicate_SuperLU_DIST; 621 B->ops->view = MatView_SuperLU_DIST; 622 B->ops->assemblyend = MatAssemblyEnd_SuperLU_DIST; 623 B->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST; 624 B->ops->destroy = MatDestroy_SuperLU_DIST; 625 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);CHKERRQ(ierr); 626 if (size == 1) { 627 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqaij_superlu_dist_C", 628 "MatConvert_Base_SuperLU_DIST",MatConvert_Base_SuperLU_DIST);CHKERRQ(ierr); 629 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_superlu_dist_seqaij_C", 630 "MatConvert_SuperLU_DIST_Base",MatConvert_SuperLU_DIST_Base);CHKERRQ(ierr); 631 } else { 632 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_superlu_dist_C", 633 "MatConvert_Base_SuperLU_DIST",MatConvert_Base_SuperLU_DIST);CHKERRQ(ierr); 634 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_superlu_dist_mpiaij_C", 635 "MatConvert_SuperLU_DIST_Base",MatConvert_SuperLU_DIST_Base);CHKERRQ(ierr); 636 } 637 ierr = PetscLogInfo((0,"MatConvert_Base_SuperLU_DIST:Using SuperLU_DIST for SeqAIJ LU factorization and solves.\n"));CHKERRQ(ierr); 638 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSUPERLU_DIST);CHKERRQ(ierr); 639 *newmat = B; 640 PetscFunctionReturn(0); 641 } 642 EXTERN_C_END 643 644 #undef __FUNCT__ 645 #define __FUNCT__ "MatDuplicate_SuperLU_DIST" 646 PetscErrorCode MatDuplicate_SuperLU_DIST(Mat A, MatDuplicateOption op, Mat *M) { 647 PetscErrorCode ierr; 648 Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST *)A->spptr; 649 650 PetscFunctionBegin; 651 ierr = (*lu->MatDuplicate)(A,op,M);CHKERRQ(ierr); 652 ierr = PetscMemcpy((*M)->spptr,lu,sizeof(Mat_SuperLU_DIST));CHKERRQ(ierr); 653 PetscFunctionReturn(0); 654 } 655 656 /*MC 657 MATSUPERLU_DIST - MATSUPERLU_DIST = "superlu_dist" - A matrix type providing direct solvers (LU) for parallel matrices 658 via the external package SuperLU_DIST. 659 660 If SuperLU_DIST is installed (see the manual for 661 instructions on how to declare the existence of external packages), 662 a matrix type can be constructed which invokes SuperLU_DIST solvers. 663 After calling MatCreate(...,A), simply call MatSetType(A,MATSUPERLU_DIST). 664 665 This matrix inherits from MATSEQAIJ when constructed with a single process communicator, 666 and from MATMPIAIJ otherwise. As a result, for single process communicators, 667 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 668 for communicators controlling multiple processes. It is recommended that you call both of 669 the above preallocation routines for simplicity. One can also call MatConvert for an inplace 670 conversion to or from the MATSEQAIJ or MATMPIAIJ type (depending on the communicator size) 671 without data copy. 672 673 Options Database Keys: 674 + -mat_type superlu_dist - sets the matrix type to "superlu_dist" during a call to MatSetFromOptions() 675 . -mat_superlu_dist_r <n> - number of rows in processor partition 676 . -mat_superlu_dist_c <n> - number of columns in processor partition 677 . -mat_superlu_dist_matinput <0,1> - matrix input mode; 0=global, 1=distributed 678 . -mat_superlu_dist_equil - equilibrate the matrix 679 . -mat_superlu_dist_rowperm <LargeDiag,NATURAL> - row permutation 680 . -mat_superlu_dist_colperm <MMD_AT_PLUS_A,MMD_ATA,COLAMD,NATURAL> - column permutation 681 . -mat_superlu_dist_replacetinypivot - replace tiny pivots 682 . -mat_superlu_dist_iterrefine - use iterative refinement 683 - -mat_superlu_dist_statprint - print factorization information 684 685 Level: beginner 686 687 .seealso: PCLU 688 M*/ 689 690 EXTERN_C_BEGIN 691 #undef __FUNCT__ 692 #define __FUNCT__ "MatCreate_SuperLU_DIST" 693 PetscErrorCode MatCreate_SuperLU_DIST(Mat A) 694 { 695 PetscErrorCode ierr; 696 PetscMPIInt size; 697 Mat A_diag; 698 699 PetscFunctionBegin; 700 /* Change type name before calling MatSetType to force proper construction of SeqAIJ or MPIAIJ */ 701 /* and SuperLU_DIST types */ 702 ierr = PetscObjectChangeTypeName((PetscObject)A,MATSUPERLU_DIST);CHKERRQ(ierr); 703 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 704 if (size == 1) { 705 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 706 } else { 707 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 708 A_diag = ((Mat_MPIAIJ *)A->data)->A; 709 ierr = MatConvert_Base_SuperLU_DIST(A_diag,MATSUPERLU_DIST,MAT_REUSE_MATRIX,&A_diag);CHKERRQ(ierr); 710 } 711 ierr = MatConvert_Base_SuperLU_DIST(A,MATSUPERLU_DIST,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr); 712 PetscFunctionReturn(0); 713 } 714 EXTERN_C_END 715 716