1 #define PETSCMAT_DLL 2 3 /* 4 Provides an interface to the SuperLU_DIST_2.0 sparse solver 5 */ 6 7 #include "src/mat/impls/aij/seq/aij.h" 8 #include "src/mat/impls/aij/mpi/mpiaij.h" 9 #if defined(PETSC_HAVE_STDLIB_H) /* This is to get arround weird problem with SuperLU on cray */ 10 #include "stdlib.h" 11 #endif 12 13 EXTERN_C_BEGIN 14 #if defined(PETSC_USE_COMPLEX) 15 #include "superlu_zdefs.h" 16 #else 17 #include "superlu_ddefs.h" 18 #endif 19 EXTERN_C_END 20 21 typedef enum { GLOBAL,DISTRIBUTED 22 } SuperLU_MatInputMode; 23 24 typedef struct { 25 int_t nprow,npcol,*row,*col; 26 gridinfo_t grid; 27 superlu_options_t options; 28 SuperMatrix A_sup; 29 ScalePermstruct_t ScalePermstruct; 30 LUstruct_t LUstruct; 31 int StatPrint; 32 int MatInputMode; 33 SOLVEstruct_t SOLVEstruct; 34 MatStructure flg; 35 MPI_Comm comm_superlu; 36 #if defined(PETSC_USE_COMPLEX) 37 doublecomplex *val; 38 #else 39 double *val; 40 #endif 41 42 /* A few function pointers for inheritance */ 43 PetscErrorCode (*MatDuplicate)(Mat,MatDuplicateOption,Mat*); 44 PetscErrorCode (*MatView)(Mat,PetscViewer); 45 PetscErrorCode (*MatAssemblyEnd)(Mat,MatAssemblyType); 46 PetscErrorCode (*MatLUFactorSymbolic)(Mat,IS,IS,MatFactorInfo*,Mat*); 47 PetscErrorCode (*MatDestroy)(Mat); 48 49 /* Flag to clean up (non-global) SuperLU objects during Destroy */ 50 PetscTruth CleanUpSuperLU_Dist; 51 } Mat_SuperLU_DIST; 52 53 EXTERN PetscErrorCode MatDuplicate_SuperLU_DIST(Mat,MatDuplicateOption,Mat*); 54 55 EXTERN_C_BEGIN 56 #undef __FUNCT__ 57 #define __FUNCT__ "MatConvert_SuperLU_DIST_Base" 58 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SuperLU_DIST_Base(Mat A,const MatType type,MatReuse reuse,Mat *newmat) 59 { 60 PetscErrorCode ierr; 61 Mat B=*newmat; 62 Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST *)A->spptr; 63 64 PetscFunctionBegin; 65 if (reuse == MAT_INITIAL_MATRIX) { 66 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 67 } 68 /* Reset the original function pointers */ 69 B->ops->duplicate = lu->MatDuplicate; 70 B->ops->view = lu->MatView; 71 B->ops->assemblyend = lu->MatAssemblyEnd; 72 B->ops->lufactorsymbolic = lu->MatLUFactorSymbolic; 73 B->ops->destroy = lu->MatDestroy; 74 75 ierr = PetscFree(lu);CHKERRQ(ierr); 76 77 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaij_superlu_dist_C","",PETSC_NULL);CHKERRQ(ierr); 78 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_superlu_dist_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 79 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_superlu_dist_C","",PETSC_NULL);CHKERRQ(ierr); 80 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_superlu_dist_mpiaij_C","",PETSC_NULL);CHKERRQ(ierr); 81 82 ierr = PetscObjectChangeTypeName((PetscObject)B,type);CHKERRQ(ierr); 83 *newmat = B; 84 PetscFunctionReturn(0); 85 } 86 EXTERN_C_END 87 88 #undef __FUNCT__ 89 #define __FUNCT__ "MatDestroy_SuperLU_DIST" 90 PetscErrorCode MatDestroy_SuperLU_DIST(Mat A) 91 { 92 PetscErrorCode ierr; 93 PetscMPIInt size; 94 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr; 95 96 PetscFunctionBegin; 97 if (lu->CleanUpSuperLU_Dist) { 98 /* Deallocate SuperLU_DIST storage */ 99 if (lu->MatInputMode == GLOBAL) { 100 Destroy_CompCol_Matrix_dist(&lu->A_sup); 101 } else { 102 Destroy_CompRowLoc_Matrix_dist(&lu->A_sup); 103 if ( lu->options.SolveInitialized ) { 104 #if defined(PETSC_USE_COMPLEX) 105 zSolveFinalize(&lu->options, &lu->SOLVEstruct); 106 #else 107 dSolveFinalize(&lu->options, &lu->SOLVEstruct); 108 #endif 109 } 110 } 111 Destroy_LU(A->N, &lu->grid, &lu->LUstruct); 112 ScalePermstructFree(&lu->ScalePermstruct); 113 LUstructFree(&lu->LUstruct); 114 115 /* Release the SuperLU_DIST process grid. */ 116 superlu_gridexit(&lu->grid); 117 118 ierr = MPI_Comm_free(&(lu->comm_superlu));CHKERRQ(ierr); 119 } 120 121 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 122 if (size == 1) { 123 ierr = MatConvert_SuperLU_DIST_Base(A,MATSEQAIJ,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr); 124 } else { 125 ierr = MatConvert_SuperLU_DIST_Base(A,MATMPIAIJ,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr); 126 } 127 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 128 129 PetscFunctionReturn(0); 130 } 131 132 #undef __FUNCT__ 133 #define __FUNCT__ "MatSolve_SuperLU_DIST" 134 PetscErrorCode MatSolve_SuperLU_DIST(Mat A,Vec b_mpi,Vec x) 135 { 136 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr; 137 PetscErrorCode ierr; 138 PetscMPIInt size; 139 PetscInt m=A->M, N=A->N; 140 SuperLUStat_t stat; 141 double berr[1]; 142 PetscScalar *bptr; 143 PetscInt info, nrhs=1; 144 Vec x_seq; 145 IS iden; 146 VecScatter scat; 147 148 PetscFunctionBegin; 149 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 150 if (size > 1) { 151 if (lu->MatInputMode == GLOBAL) { /* global mat input, convert b to x_seq */ 152 ierr = VecCreateSeq(PETSC_COMM_SELF,N,&x_seq);CHKERRQ(ierr); 153 ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iden);CHKERRQ(ierr); 154 ierr = VecScatterCreate(b_mpi,iden,x_seq,iden,&scat);CHKERRQ(ierr); 155 ierr = ISDestroy(iden);CHKERRQ(ierr); 156 157 ierr = VecScatterBegin(b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat);CHKERRQ(ierr); 158 ierr = VecScatterEnd(b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat);CHKERRQ(ierr); 159 ierr = VecGetArray(x_seq,&bptr);CHKERRQ(ierr); 160 } else { /* distributed mat input */ 161 ierr = VecCopy(b_mpi,x);CHKERRQ(ierr); 162 ierr = VecGetArray(x,&bptr);CHKERRQ(ierr); 163 } 164 } else { /* size == 1 */ 165 ierr = VecCopy(b_mpi,x);CHKERRQ(ierr); 166 ierr = VecGetArray(x,&bptr);CHKERRQ(ierr); 167 } 168 169 lu->options.Fact = FACTORED; /* The factored form of A is supplied. Local option used by this func. only.*/ 170 171 PStatInit(&stat); /* Initialize the statistics variables. */ 172 173 if (lu->MatInputMode == GLOBAL) { 174 #if defined(PETSC_USE_COMPLEX) 175 pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,(doublecomplex*)bptr, m, nrhs, 176 &lu->grid, &lu->LUstruct, berr, &stat, &info); 177 #else 178 pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,bptr, m, nrhs, 179 &lu->grid, &lu->LUstruct, berr, &stat, &info); 180 #endif 181 } else { /* distributed mat input */ 182 #if defined(PETSC_USE_COMPLEX) 183 pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, (doublecomplex*)bptr, A->M, nrhs, &lu->grid, 184 &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info); 185 if (info) SETERRQ1(PETSC_ERR_LIB,"pzgssvx fails, info: %d\n",info); 186 #else 187 pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, bptr, A->M, nrhs, &lu->grid, 188 &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info); 189 if (info) SETERRQ1(PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",info); 190 #endif 191 } 192 if (lu->StatPrint) { 193 PStatPrint(&lu->options, &stat, &lu->grid); /* Print the statistics. */ 194 } 195 PStatFree(&stat); 196 197 if (size > 1) { 198 if (lu->MatInputMode == GLOBAL){ /* convert seq x to mpi x */ 199 ierr = VecRestoreArray(x_seq,&bptr);CHKERRQ(ierr); 200 ierr = VecScatterBegin(x_seq,x,INSERT_VALUES,SCATTER_REVERSE,scat);CHKERRQ(ierr); 201 ierr = VecScatterEnd(x_seq,x,INSERT_VALUES,SCATTER_REVERSE,scat);CHKERRQ(ierr); 202 ierr = VecScatterDestroy(scat);CHKERRQ(ierr); 203 ierr = VecDestroy(x_seq);CHKERRQ(ierr); 204 } else { 205 ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr); 206 } 207 } else { 208 ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr); 209 } 210 PetscFunctionReturn(0); 211 } 212 213 #undef __FUNCT__ 214 #define __FUNCT__ "MatLUFactorNumeric_SuperLU_DIST" 215 PetscErrorCode MatLUFactorNumeric_SuperLU_DIST(Mat A,MatFactorInfo *info,Mat *F) 216 { 217 Mat *tseq,A_seq = PETSC_NULL; 218 Mat_SeqAIJ *aa,*bb; 219 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)(*F)->spptr; 220 PetscErrorCode ierr; 221 PetscInt M=A->M,N=A->N,sinfo,i,*ai,*aj,*bi,*bj,nz,rstart,*garray, 222 m=A->m, irow,colA_start,j,jcol,jB,countA,countB,*bjj,*ajj; 223 PetscMPIInt size,rank; 224 SuperLUStat_t stat; 225 double *berr=0; 226 IS isrow; 227 PetscLogDouble time0,time,time_min,time_max; 228 #if defined(PETSC_USE_COMPLEX) 229 doublecomplex *av, *bv; 230 #else 231 double *av, *bv; 232 #endif 233 234 PetscFunctionBegin; 235 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 236 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 237 238 if (lu->StatPrint) { /* collect time for mat conversion */ 239 ierr = MPI_Barrier(A->comm);CHKERRQ(ierr); 240 ierr = PetscGetTime(&time0);CHKERRQ(ierr); 241 } 242 243 if (lu->MatInputMode == GLOBAL) { /* global mat input */ 244 if (size > 1) { /* convert mpi A to seq mat A */ 245 ierr = ISCreateStride(PETSC_COMM_SELF,M,0,1,&isrow);CHKERRQ(ierr); 246 ierr = MatGetSubMatrices(A,1,&isrow,&isrow,MAT_INITIAL_MATRIX,&tseq);CHKERRQ(ierr); 247 ierr = ISDestroy(isrow);CHKERRQ(ierr); 248 249 A_seq = *tseq; 250 ierr = PetscFree(tseq);CHKERRQ(ierr); 251 aa = (Mat_SeqAIJ*)A_seq->data; 252 } else { 253 aa = (Mat_SeqAIJ*)A->data; 254 } 255 256 /* Allocate storage, then convert Petsc NR matrix to SuperLU_DIST NC */ 257 if (lu->flg == DIFFERENT_NONZERO_PATTERN) {/* first numeric factorization */ 258 #if defined(PETSC_USE_COMPLEX) 259 zallocateA_dist(N, aa->nz, &lu->val, &lu->col, &lu->row); 260 #else 261 dallocateA_dist(N, aa->nz, &lu->val, &lu->col, &lu->row); 262 #endif 263 } else { /* successive numeric factorization, sparsity pattern is reused. */ 264 Destroy_CompCol_Matrix_dist(&lu->A_sup); 265 Destroy_LU(N, &lu->grid, &lu->LUstruct); 266 lu->options.Fact = SamePattern; 267 } 268 #if defined(PETSC_USE_COMPLEX) 269 zCompRow_to_CompCol_dist(M,N,aa->nz,(doublecomplex*)aa->a,aa->j,aa->i,&lu->val,&lu->col, &lu->row); 270 #else 271 dCompRow_to_CompCol_dist(M,N,aa->nz,aa->a,aa->j,aa->i,&lu->val, &lu->col, &lu->row); 272 #endif 273 274 /* Create compressed column matrix A_sup. */ 275 #if defined(PETSC_USE_COMPLEX) 276 zCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_Z, SLU_GE); 277 #else 278 dCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_D, SLU_GE); 279 #endif 280 } else { /* distributed mat input */ 281 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 282 aa=(Mat_SeqAIJ*)(mat->A)->data; 283 bb=(Mat_SeqAIJ*)(mat->B)->data; 284 ai=aa->i; aj=aa->j; 285 bi=bb->i; bj=bb->j; 286 #if defined(PETSC_USE_COMPLEX) 287 av=(doublecomplex*)aa->a; 288 bv=(doublecomplex*)bb->a; 289 #else 290 av=aa->a; 291 bv=bb->a; 292 #endif 293 rstart = mat->rstart; 294 nz = aa->nz + bb->nz; 295 garray = mat->garray; 296 rstart = mat->rstart; 297 298 if (lu->flg == DIFFERENT_NONZERO_PATTERN) {/* first numeric factorization */ 299 #if defined(PETSC_USE_COMPLEX) 300 zallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row); 301 #else 302 dallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row); 303 #endif 304 } else { /* successive numeric factorization, sparsity pattern and perm_c are reused. */ 305 /* Destroy_CompRowLoc_Matrix_dist(&lu->A_sup); */ /* crash! */ 306 Destroy_LU(N, &lu->grid, &lu->LUstruct); 307 lu->options.Fact = SamePattern; 308 } 309 nz = 0; irow = mat->rstart; 310 for ( i=0; i<m; i++ ) { 311 lu->row[i] = nz; 312 countA = ai[i+1] - ai[i]; 313 countB = bi[i+1] - bi[i]; 314 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 315 bjj = bj + bi[i]; 316 317 /* B part, smaller col index */ 318 colA_start = mat->rstart + ajj[0]; /* the smallest global col index of A */ 319 jB = 0; 320 for (j=0; j<countB; j++){ 321 jcol = garray[bjj[j]]; 322 if (jcol > colA_start) { 323 jB = j; 324 break; 325 } 326 lu->col[nz] = jcol; 327 lu->val[nz++] = *bv++; 328 if (j==countB-1) jB = countB; 329 } 330 331 /* A part */ 332 for (j=0; j<countA; j++){ 333 lu->col[nz] = mat->rstart + ajj[j]; 334 lu->val[nz++] = *av++; 335 } 336 337 /* B part, larger col index */ 338 for (j=jB; j<countB; j++){ 339 lu->col[nz] = garray[bjj[j]]; 340 lu->val[nz++] = *bv++; 341 } 342 } 343 lu->row[m] = nz; 344 #if defined(PETSC_USE_COMPLEX) 345 zCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart, 346 lu->val, lu->col, lu->row, SLU_NR_loc, SLU_Z, SLU_GE); 347 #else 348 dCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart, 349 lu->val, lu->col, lu->row, SLU_NR_loc, SLU_D, SLU_GE); 350 #endif 351 } 352 if (lu->StatPrint) { 353 ierr = PetscGetTime(&time);CHKERRQ(ierr); 354 time0 = time - time0; 355 } 356 357 /* Factor the matrix. */ 358 PStatInit(&stat); /* Initialize the statistics variables. */ 359 360 if (lu->MatInputMode == GLOBAL) { /* global mat input */ 361 #if defined(PETSC_USE_COMPLEX) 362 pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, 363 &lu->grid, &lu->LUstruct, berr, &stat, &sinfo); 364 #else 365 pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, 366 &lu->grid, &lu->LUstruct, berr, &stat, &sinfo); 367 #endif 368 } else { /* distributed mat input */ 369 #if defined(PETSC_USE_COMPLEX) 370 pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid, 371 &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo); 372 if (sinfo) SETERRQ1(PETSC_ERR_LIB,"pzgssvx fails, info: %d\n",sinfo); 373 #else 374 pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid, 375 &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo); 376 if (sinfo) SETERRQ1(PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",sinfo); 377 #endif 378 } 379 380 if (lu->MatInputMode == GLOBAL && size > 1){ 381 ierr = MatDestroy(A_seq);CHKERRQ(ierr); 382 } 383 384 if (lu->StatPrint) { 385 if (size > 1){ 386 ierr = MPI_Reduce(&time0,&time_max,1,MPI_DOUBLE,MPI_MAX,0,A->comm); 387 ierr = MPI_Reduce(&time0,&time_min,1,MPI_DOUBLE,MPI_MIN,0,A->comm); 388 ierr = MPI_Reduce(&time0,&time,1,MPI_DOUBLE,MPI_SUM,0,A->comm); 389 time = time/size; /* average time */ 390 if (!rank) 391 ierr = PetscPrintf(PETSC_COMM_SELF, " Mat conversion(PETSc->SuperLU_DIST) time (max/min/avg): \n \ 392 %g / %g / %g\n",time_max,time_min,time); 393 } else { 394 ierr = PetscPrintf(PETSC_COMM_SELF, " Mat conversion(PETSc->SuperLU_DIST) time: \n \ 395 %g\n",time0); 396 } 397 398 PStatPrint(&lu->options, &stat, &lu->grid); /* Print the statistics. */ 399 } 400 PStatFree(&stat); 401 (*F)->assembled = PETSC_TRUE; 402 lu->flg = SAME_NONZERO_PATTERN; 403 PetscFunctionReturn(0); 404 } 405 406 /* Note the Petsc r and c permutations are ignored */ 407 #undef __FUNCT__ 408 #define __FUNCT__ "MatLUFactorSymbolic_SuperLU_DIST" 409 PetscErrorCode MatLUFactorSymbolic_SuperLU_DIST(Mat A,IS r,IS c,MatFactorInfo *info,Mat *F) 410 { 411 Mat B; 412 Mat_SuperLU_DIST *lu; 413 PetscErrorCode ierr; 414 PetscInt M=A->M,N=A->N,indx; 415 PetscMPIInt size; 416 superlu_options_t options; 417 PetscTruth flg; 418 const char *ptype[] = {"MMD_AT_PLUS_A","NATURAL","MMD_ATA","COLAMD"}; 419 const char *prtype[] = {"LargeDiag","NATURAL"}; 420 421 PetscFunctionBegin; 422 /* Create the factorization matrix */ 423 ierr = MatCreate(A->comm,A->m,A->n,M,N,&B);CHKERRQ(ierr); 424 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 425 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL); 426 ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 427 428 B->ops->lufactornumeric = MatLUFactorNumeric_SuperLU_DIST; 429 B->ops->solve = MatSolve_SuperLU_DIST; 430 B->factor = FACTOR_LU; 431 432 lu = (Mat_SuperLU_DIST*)(B->spptr); 433 434 /* Set the input options */ 435 set_default_options_dist(&options); 436 lu->MatInputMode = GLOBAL; 437 ierr = MPI_Comm_dup(A->comm,&(lu->comm_superlu));CHKERRQ(ierr); 438 439 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 440 lu->nprow = size/2; /* Default process rows. */ 441 if (!lu->nprow) lu->nprow = 1; 442 lu->npcol = size/lu->nprow; /* Default process columns. */ 443 444 ierr = PetscOptionsBegin(A->comm,A->prefix,"SuperLU_Dist Options","Mat");CHKERRQ(ierr); 445 446 ierr = PetscOptionsInt("-mat_superlu_dist_r","Number rows in processor partition","None",lu->nprow,&lu->nprow,PETSC_NULL);CHKERRQ(ierr); 447 ierr = PetscOptionsInt("-mat_superlu_dist_c","Number columns in processor partition","None",lu->npcol,&lu->npcol,PETSC_NULL);CHKERRQ(ierr); 448 if (size != lu->nprow * lu->npcol) SETERRQ(PETSC_ERR_ARG_SIZ,"Number of processes should be equal to nprow*npcol"); 449 450 ierr = PetscOptionsInt("-mat_superlu_dist_matinput","Matrix input mode (0: GLOBAL; 1: DISTRIBUTED)","None",lu->MatInputMode,&lu->MatInputMode,PETSC_NULL);CHKERRQ(ierr); 451 if(lu->MatInputMode == DISTRIBUTED && size == 1) lu->MatInputMode = GLOBAL; 452 453 ierr = PetscOptionsLogical("-mat_superlu_dist_equil","Equilibrate matrix","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr); 454 if (!flg) { 455 options.Equil = NO; 456 } 457 458 ierr = PetscOptionsEList("-mat_superlu_dist_rowperm","Row permutation","None",prtype,2,prtype[0],&indx,&flg);CHKERRQ(ierr); 459 if (flg) { 460 switch (indx) { 461 case 0: 462 options.RowPerm = LargeDiag; 463 break; 464 case 1: 465 options.RowPerm = NOROWPERM; 466 break; 467 } 468 } 469 470 ierr = PetscOptionsEList("-mat_superlu_dist_colperm","Column permutation","None",ptype,4,ptype[0],&indx,&flg);CHKERRQ(ierr); 471 if (flg) { 472 switch (indx) { 473 case 0: 474 options.ColPerm = MMD_AT_PLUS_A; 475 break; 476 case 1: 477 options.ColPerm = NATURAL; 478 break; 479 case 2: 480 options.ColPerm = MMD_ATA; 481 break; 482 case 3: 483 options.ColPerm = COLAMD; 484 break; 485 } 486 } 487 488 ierr = PetscOptionsLogical("-mat_superlu_dist_replacetinypivot","Replace tiny pivots","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr); 489 if (!flg) { 490 options.ReplaceTinyPivot = NO; 491 } 492 493 options.IterRefine = NOREFINE; 494 ierr = PetscOptionsLogical("-mat_superlu_dist_iterrefine","Use iterative refinement","None",PETSC_FALSE,&flg,0);CHKERRQ(ierr); 495 if (flg) { 496 options.IterRefine = DOUBLE; 497 } 498 499 if (PetscLogPrintInfo) { 500 lu->StatPrint = (PetscInt)PETSC_TRUE; 501 } else { 502 lu->StatPrint = (PetscInt)PETSC_FALSE; 503 } 504 ierr = PetscOptionsLogical("-mat_superlu_dist_statprint","Print factorization information","None", 505 (PetscTruth)lu->StatPrint,(PetscTruth*)&lu->StatPrint,0);CHKERRQ(ierr); 506 PetscOptionsEnd(); 507 508 /* Initialize the SuperLU process grid. */ 509 superlu_gridinit(lu->comm_superlu, lu->nprow, lu->npcol, &lu->grid); 510 511 /* Initialize ScalePermstruct and LUstruct. */ 512 ScalePermstructInit(M, N, &lu->ScalePermstruct); 513 LUstructInit(M, N, &lu->LUstruct); 514 515 lu->options = options; 516 lu->flg = DIFFERENT_NONZERO_PATTERN; 517 lu->CleanUpSuperLU_Dist = PETSC_TRUE; 518 *F = B; 519 PetscFunctionReturn(0); 520 } 521 522 #undef __FUNCT__ 523 #define __FUNCT__ "MatAssemblyEnd_SuperLU_DIST" 524 PetscErrorCode MatAssemblyEnd_SuperLU_DIST(Mat A,MatAssemblyType mode) { 525 PetscErrorCode ierr; 526 Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST*)(A->spptr); 527 528 PetscFunctionBegin; 529 ierr = (*lu->MatAssemblyEnd)(A,mode);CHKERRQ(ierr); 530 lu->MatLUFactorSymbolic = A->ops->lufactorsymbolic; 531 A->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST; 532 PetscFunctionReturn(0); 533 } 534 535 #undef __FUNCT__ 536 #define __FUNCT__ "MatFactorInfo_SuperLU_DIST" 537 PetscErrorCode MatFactorInfo_SuperLU_DIST(Mat A,PetscViewer viewer) 538 { 539 Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST*)A->spptr; 540 superlu_options_t options; 541 PetscErrorCode ierr; 542 543 PetscFunctionBegin; 544 /* check if matrix is superlu_dist type */ 545 if (A->ops->solve != MatSolve_SuperLU_DIST) PetscFunctionReturn(0); 546 547 options = lu->options; 548 ierr = PetscViewerASCIIPrintf(viewer,"SuperLU_DIST run parameters:\n");CHKERRQ(ierr); 549 ierr = PetscViewerASCIIPrintf(viewer," Equilibrate matrix %s \n",PetscTruths[options.Equil != NO]);CHKERRQ(ierr); 550 ierr = PetscViewerASCIIPrintf(viewer," Matrix input mode %d \n",lu->MatInputMode);CHKERRQ(ierr); 551 ierr = PetscViewerASCIIPrintf(viewer," Replace tiny pivots %s \n",PetscTruths[options.ReplaceTinyPivot] != NO);CHKERRQ(ierr); 552 ierr = PetscViewerASCIIPrintf(viewer," Use iterative refinement %s \n",PetscTruths[options.IterRefine == DOUBLE]);CHKERRQ(ierr); 553 ierr = PetscViewerASCIIPrintf(viewer," Processors in row %d col partition %d \n",lu->nprow,lu->npcol);CHKERRQ(ierr); 554 ierr = PetscViewerASCIIPrintf(viewer," Row permutation %s \n",(options.RowPerm == NOROWPERM) ? "NATURAL": "LargeDiag");CHKERRQ(ierr); 555 if (options.ColPerm == NATURAL) { 556 ierr = PetscViewerASCIIPrintf(viewer," Column permutation NATURAL\n");CHKERRQ(ierr); 557 } else if (options.ColPerm == MMD_AT_PLUS_A) { 558 ierr = PetscViewerASCIIPrintf(viewer," Column permutation MMD_AT_PLUS_A\n");CHKERRQ(ierr); 559 } else if (options.ColPerm == MMD_ATA) { 560 ierr = PetscViewerASCIIPrintf(viewer," Column permutation MMD_ATA\n");CHKERRQ(ierr); 561 } else if (options.ColPerm == COLAMD) { 562 ierr = PetscViewerASCIIPrintf(viewer," Column permutation COLAMD\n");CHKERRQ(ierr); 563 } else { 564 SETERRQ(PETSC_ERR_ARG_WRONG,"Unknown column permutation"); 565 } 566 PetscFunctionReturn(0); 567 } 568 569 #undef __FUNCT__ 570 #define __FUNCT__ "MatView_SuperLU_DIST" 571 PetscErrorCode MatView_SuperLU_DIST(Mat A,PetscViewer viewer) 572 { 573 PetscErrorCode ierr; 574 PetscTruth iascii; 575 PetscViewerFormat format; 576 Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST*)(A->spptr); 577 578 PetscFunctionBegin; 579 ierr = (*lu->MatView)(A,viewer);CHKERRQ(ierr); 580 581 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 582 if (iascii) { 583 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 584 if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 585 ierr = MatFactorInfo_SuperLU_DIST(A,viewer);CHKERRQ(ierr); 586 } 587 } 588 PetscFunctionReturn(0); 589 } 590 591 592 EXTERN_C_BEGIN 593 #undef __FUNCT__ 594 #define __FUNCT__ "MatConvert_Base_SuperLU_DIST" 595 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_Base_SuperLU_DIST(Mat A,const MatType type,MatReuse reuse,Mat *newmat) 596 { 597 /* This routine is only called to convert to MATSUPERLU_DIST */ 598 /* from MATSEQAIJ if A has a single process communicator */ 599 /* or MATMPIAIJ otherwise, so we will ignore 'MatType type'. */ 600 PetscErrorCode ierr; 601 PetscMPIInt size; 602 MPI_Comm comm; 603 Mat B=*newmat; 604 Mat_SuperLU_DIST *lu; 605 606 PetscFunctionBegin; 607 if (reuse == MAT_INITIAL_MATRIX) { 608 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 609 } 610 611 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 612 ierr = PetscNew(Mat_SuperLU_DIST,&lu);CHKERRQ(ierr); 613 614 lu->MatDuplicate = A->ops->duplicate; 615 lu->MatView = A->ops->view; 616 lu->MatAssemblyEnd = A->ops->assemblyend; 617 lu->MatLUFactorSymbolic = A->ops->lufactorsymbolic; 618 lu->MatDestroy = A->ops->destroy; 619 lu->CleanUpSuperLU_Dist = PETSC_FALSE; 620 621 B->spptr = (void*)lu; 622 B->ops->duplicate = MatDuplicate_SuperLU_DIST; 623 B->ops->view = MatView_SuperLU_DIST; 624 B->ops->assemblyend = MatAssemblyEnd_SuperLU_DIST; 625 B->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST; 626 B->ops->destroy = MatDestroy_SuperLU_DIST; 627 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);CHKERRQ(ierr); 628 if (size == 1) { 629 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqaij_superlu_dist_C", 630 "MatConvert_Base_SuperLU_DIST",MatConvert_Base_SuperLU_DIST);CHKERRQ(ierr); 631 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_superlu_dist_seqaij_C", 632 "MatConvert_SuperLU_DIST_Base",MatConvert_SuperLU_DIST_Base);CHKERRQ(ierr); 633 } else { 634 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_superlu_dist_C", 635 "MatConvert_Base_SuperLU_DIST",MatConvert_Base_SuperLU_DIST);CHKERRQ(ierr); 636 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_superlu_dist_mpiaij_C", 637 "MatConvert_SuperLU_DIST_Base",MatConvert_SuperLU_DIST_Base);CHKERRQ(ierr); 638 } 639 ierr = PetscLogInfo((0,"MatConvert_Base_SuperLU_DIST:Using SuperLU_DIST for SeqAIJ LU factorization and solves.\n"));CHKERRQ(ierr); 640 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSUPERLU_DIST);CHKERRQ(ierr); 641 *newmat = B; 642 PetscFunctionReturn(0); 643 } 644 EXTERN_C_END 645 646 #undef __FUNCT__ 647 #define __FUNCT__ "MatDuplicate_SuperLU_DIST" 648 PetscErrorCode MatDuplicate_SuperLU_DIST(Mat A, MatDuplicateOption op, Mat *M) { 649 PetscErrorCode ierr; 650 Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST *)A->spptr; 651 652 PetscFunctionBegin; 653 ierr = (*lu->MatDuplicate)(A,op,M);CHKERRQ(ierr); 654 ierr = PetscMemcpy((*M)->spptr,lu,sizeof(Mat_SuperLU_DIST));CHKERRQ(ierr); 655 PetscFunctionReturn(0); 656 } 657 658 /*MC 659 MATSUPERLU_DIST - MATSUPERLU_DIST = "superlu_dist" - A matrix type providing direct solvers (LU) for parallel matrices 660 via the external package SuperLU_DIST. 661 662 If SuperLU_DIST is installed (see the manual for 663 instructions on how to declare the existence of external packages), 664 a matrix type can be constructed which invokes SuperLU_DIST solvers. 665 After calling MatCreate(...,A), simply call MatSetType(A,MATSUPERLU_DIST). 666 667 This matrix inherits from MATSEQAIJ when constructed with a single process communicator, 668 and from MATMPIAIJ otherwise. As a result, for single process communicators, 669 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 670 for communicators controlling multiple processes. It is recommended that you call both of 671 the above preallocation routines for simplicity. One can also call MatConvert for an inplace 672 conversion to or from the MATSEQAIJ or MATMPIAIJ type (depending on the communicator size) 673 without data copy. 674 675 Options Database Keys: 676 + -mat_type superlu_dist - sets the matrix type to "superlu_dist" during a call to MatSetFromOptions() 677 . -mat_superlu_dist_r <n> - number of rows in processor partition 678 . -mat_superlu_dist_c <n> - number of columns in processor partition 679 . -mat_superlu_dist_matinput <0,1> - matrix input mode; 0=global, 1=distributed 680 . -mat_superlu_dist_equil - equilibrate the matrix 681 . -mat_superlu_dist_rowperm <LargeDiag,NATURAL> - row permutation 682 . -mat_superlu_dist_colperm <MMD_AT_PLUS_A,MMD_ATA,COLAMD,NATURAL> - column permutation 683 . -mat_superlu_dist_replacetinypivot - replace tiny pivots 684 . -mat_superlu_dist_iterrefine - use iterative refinement 685 - -mat_superlu_dist_statprint - print factorization information 686 687 Level: beginner 688 689 .seealso: PCLU 690 M*/ 691 692 EXTERN_C_BEGIN 693 #undef __FUNCT__ 694 #define __FUNCT__ "MatCreate_SuperLU_DIST" 695 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SuperLU_DIST(Mat A) 696 { 697 PetscErrorCode ierr; 698 PetscMPIInt size; 699 Mat A_diag; 700 701 PetscFunctionBegin; 702 /* Change type name before calling MatSetType to force proper construction of SeqAIJ or MPIAIJ */ 703 /* and SuperLU_DIST types */ 704 ierr = PetscObjectChangeTypeName((PetscObject)A,MATSUPERLU_DIST);CHKERRQ(ierr); 705 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 706 if (size == 1) { 707 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 708 } else { 709 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 710 A_diag = ((Mat_MPIAIJ *)A->data)->A; 711 ierr = MatConvert_Base_SuperLU_DIST(A_diag,MATSUPERLU_DIST,MAT_REUSE_MATRIX,&A_diag);CHKERRQ(ierr); 712 } 713 ierr = MatConvert_Base_SuperLU_DIST(A,MATSUPERLU_DIST,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr); 714 PetscFunctionReturn(0); 715 } 716 EXTERN_C_END 717 718