1 #define PETSCMAT_DLL 2 3 /* 4 Provides an interface to the SuperLU_DIST_2.0 sparse solver 5 */ 6 7 #include "src/mat/impls/aij/seq/aij.h" 8 #include "src/mat/impls/aij/mpi/mpiaij.h" 9 #if defined(PETSC_HAVE_STDLIB_H) /* This is to get arround weird problem with SuperLU on cray */ 10 #include "stdlib.h" 11 #endif 12 13 EXTERN_C_BEGIN 14 #if defined(PETSC_USE_COMPLEX) 15 #include "superlu_zdefs.h" 16 #else 17 #include "superlu_ddefs.h" 18 #endif 19 EXTERN_C_END 20 21 typedef enum { GLOBAL,DISTRIBUTED 22 } SuperLU_MatInputMode; 23 24 typedef struct { 25 int_t nprow,npcol,*row,*col; 26 gridinfo_t grid; 27 superlu_options_t options; 28 SuperMatrix A_sup; 29 ScalePermstruct_t ScalePermstruct; 30 LUstruct_t LUstruct; 31 int StatPrint; 32 int MatInputMode; 33 SOLVEstruct_t SOLVEstruct; 34 MatStructure flg; 35 MPI_Comm comm_superlu; 36 #if defined(PETSC_USE_COMPLEX) 37 doublecomplex *val; 38 #else 39 double *val; 40 #endif 41 42 /* A few function pointers for inheritance */ 43 PetscErrorCode (*MatDuplicate)(Mat,MatDuplicateOption,Mat*); 44 PetscErrorCode (*MatView)(Mat,PetscViewer); 45 PetscErrorCode (*MatAssemblyEnd)(Mat,MatAssemblyType); 46 PetscErrorCode (*MatLUFactorSymbolic)(Mat,IS,IS,MatFactorInfo*,Mat*); 47 PetscErrorCode (*MatDestroy)(Mat); 48 49 /* Flag to clean up (non-global) SuperLU objects during Destroy */ 50 PetscTruth CleanUpSuperLU_Dist; 51 } Mat_SuperLU_DIST; 52 53 EXTERN PetscErrorCode MatDuplicate_SuperLU_DIST(Mat,MatDuplicateOption,Mat*); 54 55 EXTERN_C_BEGIN 56 #undef __FUNCT__ 57 #define __FUNCT__ "MatConvert_SuperLU_DIST_Base" 58 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SuperLU_DIST_Base(Mat A,const MatType type,MatReuse reuse,Mat *newmat) 59 { 60 PetscErrorCode ierr; 61 Mat B=*newmat; 62 Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST *)A->spptr; 63 64 PetscFunctionBegin; 65 if (reuse == MAT_INITIAL_MATRIX) { 66 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 67 } 68 /* Reset the original function pointers */ 69 B->ops->duplicate = lu->MatDuplicate; 70 B->ops->view = lu->MatView; 71 B->ops->assemblyend = lu->MatAssemblyEnd; 72 B->ops->lufactorsymbolic = lu->MatLUFactorSymbolic; 73 B->ops->destroy = lu->MatDestroy; 74 75 ierr = PetscFree(lu);CHKERRQ(ierr); 76 77 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaij_superlu_dist_C","",PETSC_NULL);CHKERRQ(ierr); 78 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_superlu_dist_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 79 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_superlu_dist_C","",PETSC_NULL);CHKERRQ(ierr); 80 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_superlu_dist_mpiaij_C","",PETSC_NULL);CHKERRQ(ierr); 81 82 ierr = PetscObjectChangeTypeName((PetscObject)B,type);CHKERRQ(ierr); 83 *newmat = B; 84 PetscFunctionReturn(0); 85 } 86 EXTERN_C_END 87 88 #undef __FUNCT__ 89 #define __FUNCT__ "MatDestroy_SuperLU_DIST" 90 PetscErrorCode MatDestroy_SuperLU_DIST(Mat A) 91 { 92 PetscErrorCode ierr; 93 PetscMPIInt size; 94 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr; 95 96 PetscFunctionBegin; 97 if (lu->CleanUpSuperLU_Dist) { 98 /* Deallocate SuperLU_DIST storage */ 99 if (lu->MatInputMode == GLOBAL) { 100 Destroy_CompCol_Matrix_dist(&lu->A_sup); 101 } else { 102 Destroy_CompRowLoc_Matrix_dist(&lu->A_sup); 103 if ( lu->options.SolveInitialized ) { 104 #if defined(PETSC_USE_COMPLEX) 105 zSolveFinalize(&lu->options, &lu->SOLVEstruct); 106 #else 107 dSolveFinalize(&lu->options, &lu->SOLVEstruct); 108 #endif 109 } 110 } 111 Destroy_LU(A->N, &lu->grid, &lu->LUstruct); 112 ScalePermstructFree(&lu->ScalePermstruct); 113 LUstructFree(&lu->LUstruct); 114 115 /* Release the SuperLU_DIST process grid. */ 116 superlu_gridexit(&lu->grid); 117 118 ierr = MPI_Comm_free(&(lu->comm_superlu));CHKERRQ(ierr); 119 } 120 121 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 122 if (size == 1) { 123 ierr = MatConvert_SuperLU_DIST_Base(A,MATSEQAIJ,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr); 124 } else { 125 ierr = MatConvert_SuperLU_DIST_Base(A,MATMPIAIJ,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr); 126 } 127 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 128 129 PetscFunctionReturn(0); 130 } 131 132 #undef __FUNCT__ 133 #define __FUNCT__ "MatSolve_SuperLU_DIST" 134 PetscErrorCode MatSolve_SuperLU_DIST(Mat A,Vec b_mpi,Vec x) 135 { 136 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr; 137 PetscErrorCode ierr; 138 PetscMPIInt size; 139 PetscInt m=A->M, N=A->N; 140 SuperLUStat_t stat; 141 double berr[1]; 142 PetscScalar *bptr; 143 PetscInt info, nrhs=1; 144 Vec x_seq; 145 IS iden; 146 VecScatter scat; 147 148 PetscFunctionBegin; 149 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 150 if (size > 1) { 151 if (lu->MatInputMode == GLOBAL) { /* global mat input, convert b to x_seq */ 152 ierr = VecCreateSeq(PETSC_COMM_SELF,N,&x_seq);CHKERRQ(ierr); 153 ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iden);CHKERRQ(ierr); 154 ierr = VecScatterCreate(b_mpi,iden,x_seq,iden,&scat);CHKERRQ(ierr); 155 ierr = ISDestroy(iden);CHKERRQ(ierr); 156 157 ierr = VecScatterBegin(b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat);CHKERRQ(ierr); 158 ierr = VecScatterEnd(b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat);CHKERRQ(ierr); 159 ierr = VecGetArray(x_seq,&bptr);CHKERRQ(ierr); 160 } else { /* distributed mat input */ 161 ierr = VecCopy(b_mpi,x);CHKERRQ(ierr); 162 ierr = VecGetArray(x,&bptr);CHKERRQ(ierr); 163 } 164 } else { /* size == 1 */ 165 ierr = VecCopy(b_mpi,x);CHKERRQ(ierr); 166 ierr = VecGetArray(x,&bptr);CHKERRQ(ierr); 167 } 168 169 lu->options.Fact = FACTORED; /* The factored form of A is supplied. Local option used by this func. only.*/ 170 171 PStatInit(&stat); /* Initialize the statistics variables. */ 172 173 if (lu->MatInputMode == GLOBAL) { 174 #if defined(PETSC_USE_COMPLEX) 175 pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,(doublecomplex*)bptr, m, nrhs, 176 &lu->grid, &lu->LUstruct, berr, &stat, &info); 177 #else 178 pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,bptr, m, nrhs, 179 &lu->grid, &lu->LUstruct, berr, &stat, &info); 180 #endif 181 } else { /* distributed mat input */ 182 #if defined(PETSC_USE_COMPLEX) 183 pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, (doublecomplex*)bptr, A->M, nrhs, &lu->grid, 184 &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info); 185 if (info) SETERRQ1(PETSC_ERR_LIB,"pzgssvx fails, info: %d\n",info); 186 #else 187 pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, bptr, A->M, nrhs, &lu->grid, 188 &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info); 189 if (info) SETERRQ1(PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",info); 190 #endif 191 } 192 if (lu->StatPrint) { 193 PStatPrint(&lu->options, &stat, &lu->grid); /* Print the statistics. */ 194 } 195 PStatFree(&stat); 196 197 if (size > 1) { 198 if (lu->MatInputMode == GLOBAL){ /* convert seq x to mpi x */ 199 ierr = VecRestoreArray(x_seq,&bptr);CHKERRQ(ierr); 200 ierr = VecScatterBegin(x_seq,x,INSERT_VALUES,SCATTER_REVERSE,scat);CHKERRQ(ierr); 201 ierr = VecScatterEnd(x_seq,x,INSERT_VALUES,SCATTER_REVERSE,scat);CHKERRQ(ierr); 202 ierr = VecScatterDestroy(scat);CHKERRQ(ierr); 203 ierr = VecDestroy(x_seq);CHKERRQ(ierr); 204 } else { 205 ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr); 206 } 207 } else { 208 ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr); 209 } 210 PetscFunctionReturn(0); 211 } 212 213 #undef __FUNCT__ 214 #define __FUNCT__ "MatLUFactorNumeric_SuperLU_DIST" 215 PetscErrorCode MatLUFactorNumeric_SuperLU_DIST(Mat A,MatFactorInfo *info,Mat *F) 216 { 217 Mat *tseq,A_seq = PETSC_NULL; 218 Mat_SeqAIJ *aa,*bb; 219 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)(*F)->spptr; 220 PetscErrorCode ierr; 221 PetscInt M=A->M,N=A->N,sinfo,i,*ai,*aj,*bi,*bj,nz,rstart,*garray, 222 m=A->m, irow,colA_start,j,jcol,jB,countA,countB,*bjj,*ajj; 223 PetscMPIInt size,rank; 224 SuperLUStat_t stat; 225 double *berr=0; 226 IS isrow; 227 PetscLogDouble time0,time,time_min,time_max; 228 #if defined(PETSC_USE_COMPLEX) 229 doublecomplex *av, *bv; 230 #else 231 double *av, *bv; 232 #endif 233 234 PetscFunctionBegin; 235 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 236 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 237 238 if (lu->StatPrint) { /* collect time for mat conversion */ 239 ierr = MPI_Barrier(A->comm);CHKERRQ(ierr); 240 ierr = PetscGetTime(&time0);CHKERRQ(ierr); 241 } 242 243 if (lu->MatInputMode == GLOBAL) { /* global mat input */ 244 if (size > 1) { /* convert mpi A to seq mat A */ 245 ierr = ISCreateStride(PETSC_COMM_SELF,M,0,1,&isrow);CHKERRQ(ierr); 246 ierr = MatGetSubMatrices(A,1,&isrow,&isrow,MAT_INITIAL_MATRIX,&tseq);CHKERRQ(ierr); 247 ierr = ISDestroy(isrow);CHKERRQ(ierr); 248 249 A_seq = *tseq; 250 ierr = PetscFree(tseq);CHKERRQ(ierr); 251 aa = (Mat_SeqAIJ*)A_seq->data; 252 } else { 253 aa = (Mat_SeqAIJ*)A->data; 254 } 255 256 /* Allocate storage, then convert Petsc NR matrix to SuperLU_DIST NC */ 257 if (lu->flg == DIFFERENT_NONZERO_PATTERN) {/* first numeric factorization */ 258 #if defined(PETSC_USE_COMPLEX) 259 zallocateA_dist(N, aa->nz, &lu->val, &lu->col, &lu->row); 260 #else 261 dallocateA_dist(N, aa->nz, &lu->val, &lu->col, &lu->row); 262 #endif 263 } else { /* successive numeric factorization, sparsity pattern is reused. */ 264 Destroy_CompCol_Matrix_dist(&lu->A_sup); 265 Destroy_LU(N, &lu->grid, &lu->LUstruct); 266 lu->options.Fact = SamePattern; 267 } 268 #if defined(PETSC_USE_COMPLEX) 269 zCompRow_to_CompCol_dist(M,N,aa->nz,(doublecomplex*)aa->a,aa->j,aa->i,&lu->val,&lu->col, &lu->row); 270 #else 271 dCompRow_to_CompCol_dist(M,N,aa->nz,aa->a,aa->j,aa->i,&lu->val, &lu->col, &lu->row); 272 #endif 273 274 /* Create compressed column matrix A_sup. */ 275 #if defined(PETSC_USE_COMPLEX) 276 zCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_Z, SLU_GE); 277 #else 278 dCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_D, SLU_GE); 279 #endif 280 } else { /* distributed mat input */ 281 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 282 aa=(Mat_SeqAIJ*)(mat->A)->data; 283 bb=(Mat_SeqAIJ*)(mat->B)->data; 284 ai=aa->i; aj=aa->j; 285 bi=bb->i; bj=bb->j; 286 #if defined(PETSC_USE_COMPLEX) 287 av=(doublecomplex*)aa->a; 288 bv=(doublecomplex*)bb->a; 289 #else 290 av=aa->a; 291 bv=bb->a; 292 #endif 293 rstart = mat->rstart; 294 nz = aa->nz + bb->nz; 295 garray = mat->garray; 296 rstart = mat->rstart; 297 298 if (lu->flg == DIFFERENT_NONZERO_PATTERN) {/* first numeric factorization */ 299 #if defined(PETSC_USE_COMPLEX) 300 zallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row); 301 #else 302 dallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row); 303 #endif 304 } else { /* successive numeric factorization, sparsity pattern and perm_c are reused. */ 305 /* Destroy_CompRowLoc_Matrix_dist(&lu->A_sup); */ /* crash! */ 306 Destroy_LU(N, &lu->grid, &lu->LUstruct); 307 lu->options.Fact = SamePattern; 308 } 309 nz = 0; irow = mat->rstart; 310 for ( i=0; i<m; i++ ) { 311 lu->row[i] = nz; 312 countA = ai[i+1] - ai[i]; 313 countB = bi[i+1] - bi[i]; 314 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 315 bjj = bj + bi[i]; 316 317 /* B part, smaller col index */ 318 colA_start = mat->rstart + ajj[0]; /* the smallest global col index of A */ 319 jB = 0; 320 for (j=0; j<countB; j++){ 321 jcol = garray[bjj[j]]; 322 if (jcol > colA_start) { 323 jB = j; 324 break; 325 } 326 lu->col[nz] = jcol; 327 lu->val[nz++] = *bv++; 328 if (j==countB-1) jB = countB; 329 } 330 331 /* A part */ 332 for (j=0; j<countA; j++){ 333 lu->col[nz] = mat->rstart + ajj[j]; 334 lu->val[nz++] = *av++; 335 } 336 337 /* B part, larger col index */ 338 for (j=jB; j<countB; j++){ 339 lu->col[nz] = garray[bjj[j]]; 340 lu->val[nz++] = *bv++; 341 } 342 } 343 lu->row[m] = nz; 344 #if defined(PETSC_USE_COMPLEX) 345 zCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart, 346 lu->val, lu->col, lu->row, SLU_NR_loc, SLU_Z, SLU_GE); 347 #else 348 dCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart, 349 lu->val, lu->col, lu->row, SLU_NR_loc, SLU_D, SLU_GE); 350 #endif 351 } 352 if (lu->StatPrint) { 353 ierr = PetscGetTime(&time);CHKERRQ(ierr); 354 time0 = time - time0; 355 } 356 357 /* Factor the matrix. */ 358 PStatInit(&stat); /* Initialize the statistics variables. */ 359 360 if (lu->MatInputMode == GLOBAL) { /* global mat input */ 361 #if defined(PETSC_USE_COMPLEX) 362 pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, 363 &lu->grid, &lu->LUstruct, berr, &stat, &sinfo); 364 #else 365 pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, 366 &lu->grid, &lu->LUstruct, berr, &stat, &sinfo); 367 #endif 368 } else { /* distributed mat input */ 369 #if defined(PETSC_USE_COMPLEX) 370 pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid, 371 &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo); 372 if (sinfo) SETERRQ1(PETSC_ERR_LIB,"pzgssvx fails, info: %d\n",sinfo); 373 #else 374 pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid, 375 &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo); 376 if (sinfo) SETERRQ1(PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",sinfo); 377 #endif 378 } 379 380 if (lu->MatInputMode == GLOBAL && size > 1){ 381 ierr = MatDestroy(A_seq);CHKERRQ(ierr); 382 } 383 384 if (lu->StatPrint) { 385 if (size > 1){ 386 ierr = MPI_Reduce(&time0,&time_max,1,MPI_DOUBLE,MPI_MAX,0,A->comm); 387 ierr = MPI_Reduce(&time0,&time_min,1,MPI_DOUBLE,MPI_MIN,0,A->comm); 388 ierr = MPI_Reduce(&time0,&time,1,MPI_DOUBLE,MPI_SUM,0,A->comm); 389 time = time/size; /* average time */ 390 if (!rank) 391 ierr = PetscPrintf(PETSC_COMM_SELF, " Mat conversion(PETSc->SuperLU_DIST) time (max/min/avg): \n \ 392 %g / %g / %g\n",time_max,time_min,time); 393 } else { 394 ierr = PetscPrintf(PETSC_COMM_SELF, " Mat conversion(PETSc->SuperLU_DIST) time: \n \ 395 %g\n",time0); 396 } 397 398 PStatPrint(&lu->options, &stat, &lu->grid); /* Print the statistics. */ 399 } 400 PStatFree(&stat); 401 (*F)->assembled = PETSC_TRUE; 402 lu->flg = SAME_NONZERO_PATTERN; 403 PetscFunctionReturn(0); 404 } 405 406 /* Note the Petsc r and c permutations are ignored */ 407 #undef __FUNCT__ 408 #define __FUNCT__ "MatLUFactorSymbolic_SuperLU_DIST" 409 PetscErrorCode MatLUFactorSymbolic_SuperLU_DIST(Mat A,IS r,IS c,MatFactorInfo *info,Mat *F) 410 { 411 Mat B; 412 Mat_SuperLU_DIST *lu; 413 PetscErrorCode ierr; 414 PetscInt M=A->M,N=A->N,indx; 415 PetscMPIInt size; 416 superlu_options_t options; 417 PetscTruth flg; 418 const char *ptype[] = {"MMD_AT_PLUS_A","NATURAL","MMD_ATA","COLAMD"}; 419 const char *prtype[] = {"LargeDiag","NATURAL"}; 420 421 PetscFunctionBegin; 422 /* Create the factorization matrix */ 423 ierr = MatCreate(A->comm,&B);CHKERRQ(ierr); 424 ierr = MatSetSizes(B,A->m,A->n,M,N);CHKERRQ(ierr); 425 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 426 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL); 427 ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 428 429 B->ops->lufactornumeric = MatLUFactorNumeric_SuperLU_DIST; 430 B->ops->solve = MatSolve_SuperLU_DIST; 431 B->factor = FACTOR_LU; 432 433 lu = (Mat_SuperLU_DIST*)(B->spptr); 434 435 /* Set the input options */ 436 set_default_options_dist(&options); 437 lu->MatInputMode = GLOBAL; 438 ierr = MPI_Comm_dup(A->comm,&(lu->comm_superlu));CHKERRQ(ierr); 439 440 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 441 lu->nprow = size/2; /* Default process rows. */ 442 if (!lu->nprow) lu->nprow = 1; 443 lu->npcol = size/lu->nprow; /* Default process columns. */ 444 445 ierr = PetscOptionsBegin(A->comm,A->prefix,"SuperLU_Dist Options","Mat");CHKERRQ(ierr); 446 447 ierr = PetscOptionsInt("-mat_superlu_dist_r","Number rows in processor partition","None",lu->nprow,&lu->nprow,PETSC_NULL);CHKERRQ(ierr); 448 ierr = PetscOptionsInt("-mat_superlu_dist_c","Number columns in processor partition","None",lu->npcol,&lu->npcol,PETSC_NULL);CHKERRQ(ierr); 449 if (size != lu->nprow * lu->npcol) SETERRQ(PETSC_ERR_ARG_SIZ,"Number of processes should be equal to nprow*npcol"); 450 451 ierr = PetscOptionsInt("-mat_superlu_dist_matinput","Matrix input mode (0: GLOBAL; 1: DISTRIBUTED)","None",lu->MatInputMode,&lu->MatInputMode,PETSC_NULL);CHKERRQ(ierr); 452 if(lu->MatInputMode == DISTRIBUTED && size == 1) lu->MatInputMode = GLOBAL; 453 454 ierr = PetscOptionsLogical("-mat_superlu_dist_equil","Equilibrate matrix","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr); 455 if (!flg) { 456 options.Equil = NO; 457 } 458 459 ierr = PetscOptionsEList("-mat_superlu_dist_rowperm","Row permutation","None",prtype,2,prtype[0],&indx,&flg);CHKERRQ(ierr); 460 if (flg) { 461 switch (indx) { 462 case 0: 463 options.RowPerm = LargeDiag; 464 break; 465 case 1: 466 options.RowPerm = NOROWPERM; 467 break; 468 } 469 } 470 471 ierr = PetscOptionsEList("-mat_superlu_dist_colperm","Column permutation","None",ptype,4,ptype[0],&indx,&flg);CHKERRQ(ierr); 472 if (flg) { 473 switch (indx) { 474 case 0: 475 options.ColPerm = MMD_AT_PLUS_A; 476 break; 477 case 1: 478 options.ColPerm = NATURAL; 479 break; 480 case 2: 481 options.ColPerm = MMD_ATA; 482 break; 483 case 3: 484 options.ColPerm = COLAMD; 485 break; 486 } 487 } 488 489 ierr = PetscOptionsLogical("-mat_superlu_dist_replacetinypivot","Replace tiny pivots","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr); 490 if (!flg) { 491 options.ReplaceTinyPivot = NO; 492 } 493 494 options.IterRefine = NOREFINE; 495 ierr = PetscOptionsLogical("-mat_superlu_dist_iterrefine","Use iterative refinement","None",PETSC_FALSE,&flg,0);CHKERRQ(ierr); 496 if (flg) { 497 options.IterRefine = DOUBLE; 498 } 499 500 if (PetscLogPrintInfo) { 501 lu->StatPrint = (PetscInt)PETSC_TRUE; 502 } else { 503 lu->StatPrint = (PetscInt)PETSC_FALSE; 504 } 505 ierr = PetscOptionsLogical("-mat_superlu_dist_statprint","Print factorization information","None", 506 (PetscTruth)lu->StatPrint,(PetscTruth*)&lu->StatPrint,0);CHKERRQ(ierr); 507 PetscOptionsEnd(); 508 509 /* Initialize the SuperLU process grid. */ 510 superlu_gridinit(lu->comm_superlu, lu->nprow, lu->npcol, &lu->grid); 511 512 /* Initialize ScalePermstruct and LUstruct. */ 513 ScalePermstructInit(M, N, &lu->ScalePermstruct); 514 LUstructInit(M, N, &lu->LUstruct); 515 516 lu->options = options; 517 lu->flg = DIFFERENT_NONZERO_PATTERN; 518 lu->CleanUpSuperLU_Dist = PETSC_TRUE; 519 *F = B; 520 PetscFunctionReturn(0); 521 } 522 523 #undef __FUNCT__ 524 #define __FUNCT__ "MatAssemblyEnd_SuperLU_DIST" 525 PetscErrorCode MatAssemblyEnd_SuperLU_DIST(Mat A,MatAssemblyType mode) { 526 PetscErrorCode ierr; 527 Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST*)(A->spptr); 528 529 PetscFunctionBegin; 530 ierr = (*lu->MatAssemblyEnd)(A,mode);CHKERRQ(ierr); 531 lu->MatLUFactorSymbolic = A->ops->lufactorsymbolic; 532 A->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST; 533 PetscFunctionReturn(0); 534 } 535 536 #undef __FUNCT__ 537 #define __FUNCT__ "MatFactorInfo_SuperLU_DIST" 538 PetscErrorCode MatFactorInfo_SuperLU_DIST(Mat A,PetscViewer viewer) 539 { 540 Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST*)A->spptr; 541 superlu_options_t options; 542 PetscErrorCode ierr; 543 544 PetscFunctionBegin; 545 /* check if matrix is superlu_dist type */ 546 if (A->ops->solve != MatSolve_SuperLU_DIST) PetscFunctionReturn(0); 547 548 options = lu->options; 549 ierr = PetscViewerASCIIPrintf(viewer,"SuperLU_DIST run parameters:\n");CHKERRQ(ierr); 550 ierr = PetscViewerASCIIPrintf(viewer," Equilibrate matrix %s \n",PetscTruths[options.Equil != NO]);CHKERRQ(ierr); 551 ierr = PetscViewerASCIIPrintf(viewer," Matrix input mode %d \n",lu->MatInputMode);CHKERRQ(ierr); 552 ierr = PetscViewerASCIIPrintf(viewer," Replace tiny pivots %s \n",PetscTruths[options.ReplaceTinyPivot] != NO);CHKERRQ(ierr); 553 ierr = PetscViewerASCIIPrintf(viewer," Use iterative refinement %s \n",PetscTruths[options.IterRefine == DOUBLE]);CHKERRQ(ierr); 554 ierr = PetscViewerASCIIPrintf(viewer," Processors in row %d col partition %d \n",lu->nprow,lu->npcol);CHKERRQ(ierr); 555 ierr = PetscViewerASCIIPrintf(viewer," Row permutation %s \n",(options.RowPerm == NOROWPERM) ? "NATURAL": "LargeDiag");CHKERRQ(ierr); 556 if (options.ColPerm == NATURAL) { 557 ierr = PetscViewerASCIIPrintf(viewer," Column permutation NATURAL\n");CHKERRQ(ierr); 558 } else if (options.ColPerm == MMD_AT_PLUS_A) { 559 ierr = PetscViewerASCIIPrintf(viewer," Column permutation MMD_AT_PLUS_A\n");CHKERRQ(ierr); 560 } else if (options.ColPerm == MMD_ATA) { 561 ierr = PetscViewerASCIIPrintf(viewer," Column permutation MMD_ATA\n");CHKERRQ(ierr); 562 } else if (options.ColPerm == COLAMD) { 563 ierr = PetscViewerASCIIPrintf(viewer," Column permutation COLAMD\n");CHKERRQ(ierr); 564 } else { 565 SETERRQ(PETSC_ERR_ARG_WRONG,"Unknown column permutation"); 566 } 567 PetscFunctionReturn(0); 568 } 569 570 #undef __FUNCT__ 571 #define __FUNCT__ "MatView_SuperLU_DIST" 572 PetscErrorCode MatView_SuperLU_DIST(Mat A,PetscViewer viewer) 573 { 574 PetscErrorCode ierr; 575 PetscTruth iascii; 576 PetscViewerFormat format; 577 Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST*)(A->spptr); 578 579 PetscFunctionBegin; 580 ierr = (*lu->MatView)(A,viewer);CHKERRQ(ierr); 581 582 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 583 if (iascii) { 584 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 585 if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 586 ierr = MatFactorInfo_SuperLU_DIST(A,viewer);CHKERRQ(ierr); 587 } 588 } 589 PetscFunctionReturn(0); 590 } 591 592 593 EXTERN_C_BEGIN 594 #undef __FUNCT__ 595 #define __FUNCT__ "MatConvert_Base_SuperLU_DIST" 596 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_Base_SuperLU_DIST(Mat A,const MatType type,MatReuse reuse,Mat *newmat) 597 { 598 /* This routine is only called to convert to MATSUPERLU_DIST */ 599 /* from MATSEQAIJ if A has a single process communicator */ 600 /* or MATMPIAIJ otherwise, so we will ignore 'MatType type'. */ 601 PetscErrorCode ierr; 602 PetscMPIInt size; 603 MPI_Comm comm; 604 Mat B=*newmat; 605 Mat_SuperLU_DIST *lu; 606 607 PetscFunctionBegin; 608 if (reuse == MAT_INITIAL_MATRIX) { 609 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 610 } 611 612 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 613 ierr = PetscNew(Mat_SuperLU_DIST,&lu);CHKERRQ(ierr); 614 615 lu->MatDuplicate = A->ops->duplicate; 616 lu->MatView = A->ops->view; 617 lu->MatAssemblyEnd = A->ops->assemblyend; 618 lu->MatLUFactorSymbolic = A->ops->lufactorsymbolic; 619 lu->MatDestroy = A->ops->destroy; 620 lu->CleanUpSuperLU_Dist = PETSC_FALSE; 621 622 B->spptr = (void*)lu; 623 B->ops->duplicate = MatDuplicate_SuperLU_DIST; 624 B->ops->view = MatView_SuperLU_DIST; 625 B->ops->assemblyend = MatAssemblyEnd_SuperLU_DIST; 626 B->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST; 627 B->ops->destroy = MatDestroy_SuperLU_DIST; 628 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);CHKERRQ(ierr); 629 if (size == 1) { 630 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqaij_superlu_dist_C", 631 "MatConvert_Base_SuperLU_DIST",MatConvert_Base_SuperLU_DIST);CHKERRQ(ierr); 632 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_superlu_dist_seqaij_C", 633 "MatConvert_SuperLU_DIST_Base",MatConvert_SuperLU_DIST_Base);CHKERRQ(ierr); 634 } else { 635 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_superlu_dist_C", 636 "MatConvert_Base_SuperLU_DIST",MatConvert_Base_SuperLU_DIST);CHKERRQ(ierr); 637 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_superlu_dist_mpiaij_C", 638 "MatConvert_SuperLU_DIST_Base",MatConvert_SuperLU_DIST_Base);CHKERRQ(ierr); 639 } 640 ierr = PetscLogInfo((0,"MatConvert_Base_SuperLU_DIST:Using SuperLU_DIST for SeqAIJ LU factorization and solves.\n"));CHKERRQ(ierr); 641 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSUPERLU_DIST);CHKERRQ(ierr); 642 *newmat = B; 643 PetscFunctionReturn(0); 644 } 645 EXTERN_C_END 646 647 #undef __FUNCT__ 648 #define __FUNCT__ "MatDuplicate_SuperLU_DIST" 649 PetscErrorCode MatDuplicate_SuperLU_DIST(Mat A, MatDuplicateOption op, Mat *M) { 650 PetscErrorCode ierr; 651 Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST *)A->spptr; 652 653 PetscFunctionBegin; 654 ierr = (*lu->MatDuplicate)(A,op,M);CHKERRQ(ierr); 655 ierr = PetscMemcpy((*M)->spptr,lu,sizeof(Mat_SuperLU_DIST));CHKERRQ(ierr); 656 PetscFunctionReturn(0); 657 } 658 659 /*MC 660 MATSUPERLU_DIST - MATSUPERLU_DIST = "superlu_dist" - A matrix type providing direct solvers (LU) for parallel matrices 661 via the external package SuperLU_DIST. 662 663 If SuperLU_DIST is installed (see the manual for 664 instructions on how to declare the existence of external packages), 665 a matrix type can be constructed which invokes SuperLU_DIST solvers. 666 After calling MatCreate(...,A), simply call MatSetType(A,MATSUPERLU_DIST). 667 668 This matrix inherits from MATSEQAIJ when constructed with a single process communicator, 669 and from MATMPIAIJ otherwise. As a result, for single process communicators, 670 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 671 for communicators controlling multiple processes. It is recommended that you call both of 672 the above preallocation routines for simplicity. One can also call MatConvert for an inplace 673 conversion to or from the MATSEQAIJ or MATMPIAIJ type (depending on the communicator size) 674 without data copy. 675 676 Options Database Keys: 677 + -mat_type superlu_dist - sets the matrix type to "superlu_dist" during a call to MatSetFromOptions() 678 . -mat_superlu_dist_r <n> - number of rows in processor partition 679 . -mat_superlu_dist_c <n> - number of columns in processor partition 680 . -mat_superlu_dist_matinput <0,1> - matrix input mode; 0=global, 1=distributed 681 . -mat_superlu_dist_equil - equilibrate the matrix 682 . -mat_superlu_dist_rowperm <LargeDiag,NATURAL> - row permutation 683 . -mat_superlu_dist_colperm <MMD_AT_PLUS_A,MMD_ATA,COLAMD,NATURAL> - column permutation 684 . -mat_superlu_dist_replacetinypivot - replace tiny pivots 685 . -mat_superlu_dist_iterrefine - use iterative refinement 686 - -mat_superlu_dist_statprint - print factorization information 687 688 Level: beginner 689 690 .seealso: PCLU 691 M*/ 692 693 EXTERN_C_BEGIN 694 #undef __FUNCT__ 695 #define __FUNCT__ "MatCreate_SuperLU_DIST" 696 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SuperLU_DIST(Mat A) 697 { 698 PetscErrorCode ierr; 699 PetscMPIInt size; 700 Mat A_diag; 701 702 PetscFunctionBegin; 703 /* Change type name before calling MatSetType to force proper construction of SeqAIJ or MPIAIJ */ 704 /* and SuperLU_DIST types */ 705 ierr = PetscObjectChangeTypeName((PetscObject)A,MATSUPERLU_DIST);CHKERRQ(ierr); 706 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 707 if (size == 1) { 708 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 709 } else { 710 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 711 A_diag = ((Mat_MPIAIJ *)A->data)->A; 712 ierr = MatConvert_Base_SuperLU_DIST(A_diag,MATSUPERLU_DIST,MAT_REUSE_MATRIX,&A_diag);CHKERRQ(ierr); 713 } 714 ierr = MatConvert_Base_SuperLU_DIST(A,MATSUPERLU_DIST,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr); 715 PetscFunctionReturn(0); 716 } 717 EXTERN_C_END 718 719