1 #define PETSCMAT_DLL 2 3 /* 4 Provides an interface to the SuperLU_DIST_2.0 sparse solver 5 */ 6 7 #include "src/mat/impls/aij/seq/aij.h" 8 #include "src/mat/impls/aij/mpi/mpiaij.h" 9 #if defined(PETSC_HAVE_STDLIB_H) /* This is to get arround weird problem with SuperLU on cray */ 10 #include "stdlib.h" 11 #endif 12 13 EXTERN_C_BEGIN 14 #if defined(PETSC_USE_COMPLEX) 15 #include "superlu_zdefs.h" 16 #else 17 #include "superlu_ddefs.h" 18 #endif 19 EXTERN_C_END 20 21 typedef enum { GLOBAL,DISTRIBUTED 22 } SuperLU_MatInputMode; 23 24 typedef struct { 25 int_t nprow,npcol,*row,*col; 26 gridinfo_t grid; 27 superlu_options_t options; 28 SuperMatrix A_sup; 29 ScalePermstruct_t ScalePermstruct; 30 LUstruct_t LUstruct; 31 int StatPrint; 32 int MatInputMode; 33 SOLVEstruct_t SOLVEstruct; 34 MatStructure flg; 35 MPI_Comm comm_superlu; 36 #if defined(PETSC_USE_COMPLEX) 37 doublecomplex *val; 38 #else 39 double *val; 40 #endif 41 42 /* A few function pointers for inheritance */ 43 PetscErrorCode (*MatDuplicate)(Mat,MatDuplicateOption,Mat*); 44 PetscErrorCode (*MatView)(Mat,PetscViewer); 45 PetscErrorCode (*MatAssemblyEnd)(Mat,MatAssemblyType); 46 PetscErrorCode (*MatLUFactorSymbolic)(Mat,IS,IS,MatFactorInfo*,Mat*); 47 PetscErrorCode (*MatDestroy)(Mat); 48 49 /* Flag to clean up (non-global) SuperLU objects during Destroy */ 50 PetscTruth CleanUpSuperLU_Dist; 51 } Mat_SuperLU_DIST; 52 53 EXTERN PetscErrorCode MatDuplicate_SuperLU_DIST(Mat,MatDuplicateOption,Mat*); 54 55 EXTERN_C_BEGIN 56 #undef __FUNCT__ 57 #define __FUNCT__ "MatConvert_SuperLU_DIST_Base" 58 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SuperLU_DIST_Base(Mat A,MatType type,MatReuse reuse,Mat *newmat) 59 { 60 PetscErrorCode ierr; 61 Mat B=*newmat; 62 Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST *)A->spptr; 63 64 PetscFunctionBegin; 65 if (reuse == MAT_INITIAL_MATRIX) { 66 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 67 } 68 /* Reset the original function pointers */ 69 B->ops->duplicate = lu->MatDuplicate; 70 B->ops->view = lu->MatView; 71 B->ops->assemblyend = lu->MatAssemblyEnd; 72 B->ops->lufactorsymbolic = lu->MatLUFactorSymbolic; 73 B->ops->destroy = lu->MatDestroy; 74 75 ierr = PetscFree(lu);CHKERRQ(ierr); 76 77 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaij_superlu_dist_C","",PETSC_NULL);CHKERRQ(ierr); 78 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_superlu_dist_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 79 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_superlu_dist_C","",PETSC_NULL);CHKERRQ(ierr); 80 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_superlu_dist_mpiaij_C","",PETSC_NULL);CHKERRQ(ierr); 81 82 ierr = PetscObjectChangeTypeName((PetscObject)B,type);CHKERRQ(ierr); 83 *newmat = B; 84 PetscFunctionReturn(0); 85 } 86 EXTERN_C_END 87 88 #undef __FUNCT__ 89 #define __FUNCT__ "MatDestroy_SuperLU_DIST" 90 PetscErrorCode MatDestroy_SuperLU_DIST(Mat A) 91 { 92 PetscErrorCode ierr; 93 PetscMPIInt size; 94 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr; 95 96 PetscFunctionBegin; 97 if (lu->CleanUpSuperLU_Dist) { 98 /* Deallocate SuperLU_DIST storage */ 99 if (lu->MatInputMode == GLOBAL) { 100 Destroy_CompCol_Matrix_dist(&lu->A_sup); 101 } else { 102 Destroy_CompRowLoc_Matrix_dist(&lu->A_sup); 103 if ( lu->options.SolveInitialized ) { 104 #if defined(PETSC_USE_COMPLEX) 105 zSolveFinalize(&lu->options, &lu->SOLVEstruct); 106 #else 107 dSolveFinalize(&lu->options, &lu->SOLVEstruct); 108 #endif 109 } 110 } 111 Destroy_LU(A->N, &lu->grid, &lu->LUstruct); 112 ScalePermstructFree(&lu->ScalePermstruct); 113 LUstructFree(&lu->LUstruct); 114 115 /* Release the SuperLU_DIST process grid. */ 116 superlu_gridexit(&lu->grid); 117 118 ierr = MPI_Comm_free(&(lu->comm_superlu));CHKERRQ(ierr); 119 } 120 121 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 122 if (size == 1) { 123 ierr = MatConvert_SuperLU_DIST_Base(A,MATSEQAIJ,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr); 124 } else { 125 ierr = MatConvert_SuperLU_DIST_Base(A,MATMPIAIJ,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr); 126 } 127 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 128 PetscFunctionReturn(0); 129 } 130 131 #undef __FUNCT__ 132 #define __FUNCT__ "MatSolve_SuperLU_DIST" 133 PetscErrorCode MatSolve_SuperLU_DIST(Mat A,Vec b_mpi,Vec x) 134 { 135 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr; 136 PetscErrorCode ierr; 137 PetscMPIInt size; 138 PetscInt m=A->M, N=A->N; 139 SuperLUStat_t stat; 140 double berr[1]; 141 PetscScalar *bptr; 142 PetscInt info, nrhs=1; 143 Vec x_seq; 144 IS iden; 145 VecScatter scat; 146 147 PetscFunctionBegin; 148 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 149 if (size > 1) { 150 if (lu->MatInputMode == GLOBAL) { /* global mat input, convert b to x_seq */ 151 ierr = VecCreateSeq(PETSC_COMM_SELF,N,&x_seq);CHKERRQ(ierr); 152 ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iden);CHKERRQ(ierr); 153 ierr = VecScatterCreate(b_mpi,iden,x_seq,iden,&scat);CHKERRQ(ierr); 154 ierr = ISDestroy(iden);CHKERRQ(ierr); 155 156 ierr = VecScatterBegin(b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat);CHKERRQ(ierr); 157 ierr = VecScatterEnd(b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat);CHKERRQ(ierr); 158 ierr = VecGetArray(x_seq,&bptr);CHKERRQ(ierr); 159 } else { /* distributed mat input */ 160 ierr = VecCopy(b_mpi,x);CHKERRQ(ierr); 161 ierr = VecGetArray(x,&bptr);CHKERRQ(ierr); 162 } 163 } else { /* size == 1 */ 164 ierr = VecCopy(b_mpi,x);CHKERRQ(ierr); 165 ierr = VecGetArray(x,&bptr);CHKERRQ(ierr); 166 } 167 168 lu->options.Fact = FACTORED; /* The factored form of A is supplied. Local option used by this func. only.*/ 169 170 PStatInit(&stat); /* Initialize the statistics variables. */ 171 if (lu->MatInputMode == GLOBAL) { 172 #if defined(PETSC_USE_COMPLEX) 173 pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,(doublecomplex*)bptr, m, nrhs, 174 &lu->grid, &lu->LUstruct, berr, &stat, &info); 175 #else 176 pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,bptr, m, nrhs, 177 &lu->grid, &lu->LUstruct, berr, &stat, &info); 178 #endif 179 } else { /* distributed mat input */ 180 #if defined(PETSC_USE_COMPLEX) 181 pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, (doublecomplex*)bptr, A->M, nrhs, &lu->grid, 182 &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info); 183 if (info) SETERRQ1(PETSC_ERR_LIB,"pzgssvx fails, info: %d\n",info); 184 #else 185 pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, bptr, A->M, nrhs, &lu->grid, 186 &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info); 187 if (info) SETERRQ1(PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",info); 188 #endif 189 } 190 if (lu->options.PrintStat) { 191 PStatPrint(&lu->options, &stat, &lu->grid); /* Print the statistics. */ 192 } 193 PStatFree(&stat); 194 195 if (size > 1) { 196 if (lu->MatInputMode == GLOBAL){ /* convert seq x to mpi x */ 197 ierr = VecRestoreArray(x_seq,&bptr);CHKERRQ(ierr); 198 ierr = VecScatterBegin(x_seq,x,INSERT_VALUES,SCATTER_REVERSE,scat);CHKERRQ(ierr); 199 ierr = VecScatterEnd(x_seq,x,INSERT_VALUES,SCATTER_REVERSE,scat);CHKERRQ(ierr); 200 ierr = VecScatterDestroy(scat);CHKERRQ(ierr); 201 ierr = VecDestroy(x_seq);CHKERRQ(ierr); 202 } else { 203 ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr); 204 } 205 } else { 206 ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr); 207 } 208 PetscFunctionReturn(0); 209 } 210 211 #undef __FUNCT__ 212 #define __FUNCT__ "MatLUFactorNumeric_SuperLU_DIST" 213 PetscErrorCode MatLUFactorNumeric_SuperLU_DIST(Mat A,MatFactorInfo *info,Mat *F) 214 { 215 Mat *tseq,A_seq = PETSC_NULL; 216 Mat_SeqAIJ *aa,*bb; 217 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)(*F)->spptr; 218 PetscErrorCode ierr; 219 PetscInt M=A->M,N=A->N,sinfo,i,*ai,*aj,*bi,*bj,nz,rstart,*garray, 220 m=A->m, irow,colA_start,j,jcol,jB,countA,countB,*bjj,*ajj; 221 PetscMPIInt size,rank; 222 SuperLUStat_t stat; 223 double *berr=0; 224 IS isrow; 225 PetscLogDouble time0,time,time_min,time_max; 226 Mat F_diag=PETSC_NULL; 227 #if defined(PETSC_USE_COMPLEX) 228 doublecomplex *av, *bv; 229 #else 230 double *av, *bv; 231 #endif 232 233 PetscFunctionBegin; 234 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 235 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 236 237 if (lu->options.PrintStat) { /* collect time for mat conversion */ 238 ierr = MPI_Barrier(A->comm);CHKERRQ(ierr); 239 ierr = PetscGetTime(&time0);CHKERRQ(ierr); 240 } 241 242 if (lu->MatInputMode == GLOBAL) { /* global mat input */ 243 if (size > 1) { /* convert mpi A to seq mat A */ 244 ierr = ISCreateStride(PETSC_COMM_SELF,M,0,1,&isrow);CHKERRQ(ierr); 245 ierr = MatGetSubMatrices(A,1,&isrow,&isrow,MAT_INITIAL_MATRIX,&tseq);CHKERRQ(ierr); 246 ierr = ISDestroy(isrow);CHKERRQ(ierr); 247 248 A_seq = *tseq; 249 ierr = PetscFree(tseq);CHKERRQ(ierr); 250 aa = (Mat_SeqAIJ*)A_seq->data; 251 } else { 252 aa = (Mat_SeqAIJ*)A->data; 253 } 254 255 /* Convert Petsc NR matrix to SuperLU_DIST NC. 256 Note: memories of lu->val, col and row are allocated by CompRow_to_CompCol_dist()! */ 257 if (lu->flg == SAME_NONZERO_PATTERN) {/* successive numeric factorization, sparsity pattern is reused. */ 258 Destroy_CompCol_Matrix_dist(&lu->A_sup); 259 Destroy_LU(N, &lu->grid, &lu->LUstruct); 260 lu->options.Fact = SamePattern; 261 } 262 #if defined(PETSC_USE_COMPLEX) 263 zCompRow_to_CompCol_dist(M,N,aa->nz,(doublecomplex*)aa->a,aa->j,aa->i,&lu->val,&lu->col, &lu->row); 264 #else 265 dCompRow_to_CompCol_dist(M,N,aa->nz,aa->a,aa->j,aa->i,&lu->val, &lu->col, &lu->row); 266 #endif 267 268 /* Create compressed column matrix A_sup. */ 269 #if defined(PETSC_USE_COMPLEX) 270 zCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_Z, SLU_GE); 271 #else 272 dCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_D, SLU_GE); 273 #endif 274 } else { /* distributed mat input */ 275 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 276 aa=(Mat_SeqAIJ*)(mat->A)->data; 277 bb=(Mat_SeqAIJ*)(mat->B)->data; 278 ai=aa->i; aj=aa->j; 279 bi=bb->i; bj=bb->j; 280 #if defined(PETSC_USE_COMPLEX) 281 av=(doublecomplex*)aa->a; 282 bv=(doublecomplex*)bb->a; 283 #else 284 av=aa->a; 285 bv=bb->a; 286 #endif 287 rstart = mat->rstart; 288 nz = aa->nz + bb->nz; 289 garray = mat->garray; 290 rstart = mat->rstart; 291 292 if (lu->flg == DIFFERENT_NONZERO_PATTERN) {/* first numeric factorization */ 293 #if defined(PETSC_USE_COMPLEX) 294 zallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row); 295 #else 296 dallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row); 297 #endif 298 } else { /* successive numeric factorization, sparsity pattern and perm_c are reused. */ 299 /* Destroy_CompRowLoc_Matrix_dist(&lu->A_sup); */ /* crash! */ 300 Destroy_LU(N, &lu->grid, &lu->LUstruct); 301 lu->options.Fact = SamePattern; 302 } 303 nz = 0; irow = mat->rstart; 304 for ( i=0; i<m; i++ ) { 305 lu->row[i] = nz; 306 countA = ai[i+1] - ai[i]; 307 countB = bi[i+1] - bi[i]; 308 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 309 bjj = bj + bi[i]; 310 311 /* B part, smaller col index */ 312 colA_start = mat->rstart + ajj[0]; /* the smallest global col index of A */ 313 jB = 0; 314 for (j=0; j<countB; j++){ 315 jcol = garray[bjj[j]]; 316 if (jcol > colA_start) { 317 jB = j; 318 break; 319 } 320 lu->col[nz] = jcol; 321 lu->val[nz++] = *bv++; 322 if (j==countB-1) jB = countB; 323 } 324 325 /* A part */ 326 for (j=0; j<countA; j++){ 327 lu->col[nz] = mat->rstart + ajj[j]; 328 lu->val[nz++] = *av++; 329 } 330 331 /* B part, larger col index */ 332 for (j=jB; j<countB; j++){ 333 lu->col[nz] = garray[bjj[j]]; 334 lu->val[nz++] = *bv++; 335 } 336 } 337 lu->row[m] = nz; 338 #if defined(PETSC_USE_COMPLEX) 339 zCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart, 340 lu->val, lu->col, lu->row, SLU_NR_loc, SLU_Z, SLU_GE); 341 #else 342 dCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart, 343 lu->val, lu->col, lu->row, SLU_NR_loc, SLU_D, SLU_GE); 344 #endif 345 } 346 if (lu->options.PrintStat) { 347 ierr = PetscGetTime(&time);CHKERRQ(ierr); 348 time0 = time - time0; 349 } 350 351 /* Factor the matrix. */ 352 PStatInit(&stat); /* Initialize the statistics variables. */ 353 354 if (lu->MatInputMode == GLOBAL) { /* global mat input */ 355 #if defined(PETSC_USE_COMPLEX) 356 pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, 357 &lu->grid, &lu->LUstruct, berr, &stat, &sinfo); 358 #else 359 pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, 360 &lu->grid, &lu->LUstruct, berr, &stat, &sinfo); 361 #endif 362 } else { /* distributed mat input */ 363 #if defined(PETSC_USE_COMPLEX) 364 pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid, 365 &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo); 366 if (sinfo) SETERRQ1(PETSC_ERR_LIB,"pzgssvx fails, info: %d\n",sinfo); 367 #else 368 pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid, 369 &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo); 370 if (sinfo) SETERRQ1(PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",sinfo); 371 #endif 372 } 373 374 if (lu->MatInputMode == GLOBAL && size > 1){ 375 ierr = MatDestroy(A_seq);CHKERRQ(ierr); 376 } 377 378 if (lu->options.PrintStat) { 379 if (size > 1){ 380 ierr = MPI_Reduce(&time0,&time_max,1,MPI_DOUBLE,MPI_MAX,0,A->comm); 381 ierr = MPI_Reduce(&time0,&time_min,1,MPI_DOUBLE,MPI_MIN,0,A->comm); 382 ierr = MPI_Reduce(&time0,&time,1,MPI_DOUBLE,MPI_SUM,0,A->comm); 383 time = time/size; /* average time */ 384 if (!rank) 385 ierr = PetscPrintf(PETSC_COMM_SELF, " Mat conversion(PETSc->SuperLU_DIST) time (max/min/avg): \n \ 386 %g / %g / %g\n",time_max,time_min,time); 387 } else { 388 ierr = PetscPrintf(PETSC_COMM_SELF, " Mat conversion(PETSc->SuperLU_DIST) time: \n \ 389 %g\n",time0); 390 } 391 392 PStatPrint(&lu->options, &stat, &lu->grid); /* Print the statistics. */ 393 } 394 PStatFree(&stat); 395 if (size > 1){ 396 F_diag = ((Mat_MPIAIJ *)(*F)->data)->A; 397 F_diag->assembled = PETSC_TRUE; 398 } 399 (*F)->assembled = PETSC_TRUE; 400 lu->flg = SAME_NONZERO_PATTERN; 401 PetscFunctionReturn(0); 402 } 403 404 /* Note the Petsc r and c permutations are ignored */ 405 #undef __FUNCT__ 406 #define __FUNCT__ "MatLUFactorSymbolic_SuperLU_DIST" 407 PetscErrorCode MatLUFactorSymbolic_SuperLU_DIST(Mat A,IS r,IS c,MatFactorInfo *info,Mat *F) 408 { 409 Mat B; 410 Mat_SuperLU_DIST *lu; 411 PetscErrorCode ierr; 412 PetscInt M=A->M,N=A->N,indx; 413 PetscMPIInt size; 414 superlu_options_t options; 415 PetscTruth flg; 416 const char *ptype[] = {"MMD_AT_PLUS_A","NATURAL","MMD_ATA"}; 417 const char *prtype[] = {"LargeDiag","NATURAL"}; 418 419 PetscFunctionBegin; 420 /* Create the factorization matrix */ 421 ierr = MatCreate(A->comm,&B);CHKERRQ(ierr); 422 ierr = MatSetSizes(B,A->m,A->n,M,N);CHKERRQ(ierr); 423 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 424 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL); 425 ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 426 427 B->ops->lufactornumeric = MatLUFactorNumeric_SuperLU_DIST; 428 B->ops->solve = MatSolve_SuperLU_DIST; 429 B->factor = FACTOR_LU; 430 431 lu = (Mat_SuperLU_DIST*)(B->spptr); 432 433 /* Set the input options */ 434 set_default_options_dist(&options); 435 436 lu->MatInputMode = GLOBAL; 437 ierr = MPI_Comm_dup(A->comm,&(lu->comm_superlu));CHKERRQ(ierr); 438 439 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 440 lu->nprow = size/2; /* Default process rows. */ 441 if (!lu->nprow) lu->nprow = 1; 442 lu->npcol = size/lu->nprow; /* Default process columns. */ 443 444 ierr = PetscOptionsBegin(A->comm,A->prefix,"SuperLU_Dist Options","Mat");CHKERRQ(ierr); 445 446 ierr = PetscOptionsInt("-mat_superlu_dist_r","Number rows in processor partition","None",lu->nprow,&lu->nprow,PETSC_NULL);CHKERRQ(ierr); 447 ierr = PetscOptionsInt("-mat_superlu_dist_c","Number columns in processor partition","None",lu->npcol,&lu->npcol,PETSC_NULL);CHKERRQ(ierr); 448 if (size != lu->nprow * lu->npcol) SETERRQ(PETSC_ERR_ARG_SIZ,"Number of processes should be equal to nprow*npcol"); 449 450 ierr = PetscOptionsInt("-mat_superlu_dist_matinput","Matrix input mode (0: GLOBAL; 1: DISTRIBUTED)","None",lu->MatInputMode,&lu->MatInputMode,PETSC_NULL);CHKERRQ(ierr); 451 if(lu->MatInputMode == DISTRIBUTED && size == 1) lu->MatInputMode = GLOBAL; 452 453 ierr = PetscOptionsTruth("-mat_superlu_dist_equil","Equilibrate matrix","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr); 454 if (!flg) { 455 options.Equil = NO; 456 } 457 458 ierr = PetscOptionsEList("-mat_superlu_dist_rowperm","Row permutation","None",prtype,2,prtype[0],&indx,&flg);CHKERRQ(ierr); 459 if (flg) { 460 switch (indx) { 461 case 0: 462 options.RowPerm = LargeDiag; 463 break; 464 case 1: 465 options.RowPerm = NOROWPERM; 466 break; 467 } 468 } 469 470 ierr = PetscOptionsEList("-mat_superlu_dist_colperm","Column permutation","None",ptype,4,ptype[0],&indx,&flg);CHKERRQ(ierr); 471 if (flg) { 472 switch (indx) { 473 case 0: 474 options.ColPerm = MMD_AT_PLUS_A; 475 break; 476 case 1: 477 options.ColPerm = NATURAL; 478 break; 479 case 2: 480 options.ColPerm = MMD_ATA; 481 break; 482 } 483 } 484 485 ierr = PetscOptionsTruth("-mat_superlu_dist_replacetinypivot","Replace tiny pivots","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr); 486 if (!flg) { 487 options.ReplaceTinyPivot = NO; 488 } 489 490 options.IterRefine = NOREFINE; 491 ierr = PetscOptionsTruth("-mat_superlu_dist_iterrefine","Use iterative refinement","None",PETSC_FALSE,&flg,0);CHKERRQ(ierr); 492 if (flg) { 493 options.IterRefine = DOUBLE; 494 } 495 496 if (PetscLogPrintInfo) { 497 options.PrintStat = YES; 498 } else { 499 options.PrintStat = NO; 500 } 501 ierr = PetscOptionsTruth("-mat_superlu_dist_statprint","Print factorization information","None", 502 (PetscTruth)options.PrintStat,(PetscTruth*)&options.PrintStat,0);CHKERRQ(ierr); 503 PetscOptionsEnd(); 504 505 /* Initialize the SuperLU process grid. */ 506 superlu_gridinit(lu->comm_superlu, lu->nprow, lu->npcol, &lu->grid); 507 508 /* Initialize ScalePermstruct and LUstruct. */ 509 ScalePermstructInit(M, N, &lu->ScalePermstruct); 510 LUstructInit(M, N, &lu->LUstruct); 511 512 lu->options = options; 513 lu->flg = DIFFERENT_NONZERO_PATTERN; 514 lu->CleanUpSuperLU_Dist = PETSC_TRUE; 515 *F = B; 516 517 PetscFunctionReturn(0); 518 } 519 520 #undef __FUNCT__ 521 #define __FUNCT__ "MatAssemblyEnd_SuperLU_DIST" 522 PetscErrorCode MatAssemblyEnd_SuperLU_DIST(Mat A,MatAssemblyType mode) { 523 PetscErrorCode ierr; 524 Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST*)(A->spptr); 525 526 PetscFunctionBegin; 527 ierr = (*lu->MatAssemblyEnd)(A,mode);CHKERRQ(ierr); 528 lu->MatLUFactorSymbolic = A->ops->lufactorsymbolic; 529 A->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST; 530 PetscFunctionReturn(0); 531 } 532 533 #undef __FUNCT__ 534 #define __FUNCT__ "MatFactorInfo_SuperLU_DIST" 535 PetscErrorCode MatFactorInfo_SuperLU_DIST(Mat A,PetscViewer viewer) 536 { 537 Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST*)A->spptr; 538 superlu_options_t options; 539 PetscErrorCode ierr; 540 541 PetscFunctionBegin; 542 /* check if matrix is superlu_dist type */ 543 if (A->ops->solve != MatSolve_SuperLU_DIST) PetscFunctionReturn(0); 544 545 options = lu->options; 546 ierr = PetscViewerASCIIPrintf(viewer,"SuperLU_DIST run parameters:\n");CHKERRQ(ierr); 547 ierr = PetscViewerASCIIPrintf(viewer," Equilibrate matrix %s \n",PetscTruths[options.Equil != NO]);CHKERRQ(ierr); 548 ierr = PetscViewerASCIIPrintf(viewer," Matrix input mode %d \n",lu->MatInputMode);CHKERRQ(ierr); 549 ierr = PetscViewerASCIIPrintf(viewer," Replace tiny pivots %s \n",PetscTruths[options.ReplaceTinyPivot != NO]);CHKERRQ(ierr); 550 ierr = PetscViewerASCIIPrintf(viewer," Use iterative refinement %s \n",PetscTruths[options.IterRefine == DOUBLE]);CHKERRQ(ierr); 551 ierr = PetscViewerASCIIPrintf(viewer," Processors in row %d col partition %d \n",lu->nprow,lu->npcol);CHKERRQ(ierr); 552 ierr = PetscViewerASCIIPrintf(viewer," Row permutation %s \n",(options.RowPerm == NOROWPERM) ? "NATURAL": "LargeDiag");CHKERRQ(ierr); 553 if (options.ColPerm == NATURAL) { 554 ierr = PetscViewerASCIIPrintf(viewer," Column permutation NATURAL\n");CHKERRQ(ierr); 555 } else if (options.ColPerm == MMD_AT_PLUS_A) { 556 ierr = PetscViewerASCIIPrintf(viewer," Column permutation MMD_AT_PLUS_A\n");CHKERRQ(ierr); 557 } else if (options.ColPerm == MMD_ATA) { 558 ierr = PetscViewerASCIIPrintf(viewer," Column permutation MMD_ATA\n");CHKERRQ(ierr); 559 } else { 560 SETERRQ(PETSC_ERR_ARG_WRONG,"Unknown column permutation"); 561 } 562 PetscFunctionReturn(0); 563 } 564 565 #undef __FUNCT__ 566 #define __FUNCT__ "MatView_SuperLU_DIST" 567 PetscErrorCode MatView_SuperLU_DIST(Mat A,PetscViewer viewer) 568 { 569 PetscErrorCode ierr; 570 PetscTruth iascii; 571 PetscViewerFormat format; 572 Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST*)(A->spptr); 573 574 PetscFunctionBegin; 575 ierr = (*lu->MatView)(A,viewer);CHKERRQ(ierr); 576 577 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 578 if (iascii) { 579 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 580 if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 581 ierr = MatFactorInfo_SuperLU_DIST(A,viewer);CHKERRQ(ierr); 582 } 583 } 584 PetscFunctionReturn(0); 585 } 586 587 588 EXTERN_C_BEGIN 589 #undef __FUNCT__ 590 #define __FUNCT__ "MatConvert_Base_SuperLU_DIST" 591 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_Base_SuperLU_DIST(Mat A,MatType type,MatReuse reuse,Mat *newmat) 592 { 593 /* This routine is only called to convert to MATSUPERLU_DIST */ 594 /* from MATSEQAIJ if A has a single process communicator */ 595 /* or MATMPIAIJ otherwise, so we will ignore 'MatType type'. */ 596 PetscErrorCode ierr; 597 PetscMPIInt size; 598 MPI_Comm comm; 599 Mat B=*newmat; 600 Mat_SuperLU_DIST *lu; 601 602 PetscFunctionBegin; 603 if (reuse == MAT_INITIAL_MATRIX) { 604 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 605 } 606 607 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 608 ierr = PetscNew(Mat_SuperLU_DIST,&lu);CHKERRQ(ierr); 609 610 lu->MatDuplicate = A->ops->duplicate; 611 lu->MatView = A->ops->view; 612 lu->MatAssemblyEnd = A->ops->assemblyend; 613 lu->MatLUFactorSymbolic = A->ops->lufactorsymbolic; 614 lu->MatDestroy = A->ops->destroy; 615 lu->CleanUpSuperLU_Dist = PETSC_FALSE; 616 617 B->spptr = (void*)lu; 618 B->ops->duplicate = MatDuplicate_SuperLU_DIST; 619 B->ops->view = MatView_SuperLU_DIST; 620 B->ops->assemblyend = MatAssemblyEnd_SuperLU_DIST; 621 B->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST; 622 B->ops->destroy = MatDestroy_SuperLU_DIST; 623 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);CHKERRQ(ierr); 624 if (size == 1) { 625 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqaij_superlu_dist_C", 626 "MatConvert_Base_SuperLU_DIST",MatConvert_Base_SuperLU_DIST);CHKERRQ(ierr); 627 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_superlu_dist_seqaij_C", 628 "MatConvert_SuperLU_DIST_Base",MatConvert_SuperLU_DIST_Base);CHKERRQ(ierr); 629 } else { 630 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_superlu_dist_C", 631 "MatConvert_Base_SuperLU_DIST",MatConvert_Base_SuperLU_DIST);CHKERRQ(ierr); 632 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_superlu_dist_mpiaij_C", 633 "MatConvert_SuperLU_DIST_Base",MatConvert_SuperLU_DIST_Base);CHKERRQ(ierr); 634 } 635 ierr = PetscVerboseInfo((0,"MatConvert_Base_SuperLU_DIST:Using SuperLU_DIST for SeqAIJ LU factorization and solves.\n"));CHKERRQ(ierr); 636 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSUPERLU_DIST);CHKERRQ(ierr); 637 *newmat = B; 638 PetscFunctionReturn(0); 639 } 640 EXTERN_C_END 641 642 #undef __FUNCT__ 643 #define __FUNCT__ "MatDuplicate_SuperLU_DIST" 644 PetscErrorCode MatDuplicate_SuperLU_DIST(Mat A, MatDuplicateOption op, Mat *M) { 645 PetscErrorCode ierr; 646 Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST *)A->spptr; 647 648 PetscFunctionBegin; 649 ierr = (*lu->MatDuplicate)(A,op,M);CHKERRQ(ierr); 650 ierr = PetscMemcpy((*M)->spptr,lu,sizeof(Mat_SuperLU_DIST));CHKERRQ(ierr); 651 PetscFunctionReturn(0); 652 } 653 654 /*MC 655 MATSUPERLU_DIST - MATSUPERLU_DIST = "superlu_dist" - A matrix type providing direct solvers (LU) for parallel matrices 656 via the external package SuperLU_DIST. 657 658 If SuperLU_DIST is installed (see the manual for 659 instructions on how to declare the existence of external packages), 660 a matrix type can be constructed which invokes SuperLU_DIST solvers. 661 After calling MatCreate(...,A), simply call MatSetType(A,MATSUPERLU_DIST). 662 663 This matrix inherits from MATSEQAIJ when constructed with a single process communicator, 664 and from MATMPIAIJ otherwise. As a result, for single process communicators, 665 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 666 for communicators controlling multiple processes. It is recommended that you call both of 667 the above preallocation routines for simplicity. One can also call MatConvert for an inplace 668 conversion to or from the MATSEQAIJ or MATMPIAIJ type (depending on the communicator size) 669 without data copy. 670 671 Options Database Keys: 672 + -mat_type superlu_dist - sets the matrix type to "superlu_dist" during a call to MatSetFromOptions() 673 . -mat_superlu_dist_r <n> - number of rows in processor partition 674 . -mat_superlu_dist_c <n> - number of columns in processor partition 675 . -mat_superlu_dist_matinput <0,1> - matrix input mode; 0=global, 1=distributed 676 . -mat_superlu_dist_equil - equilibrate the matrix 677 . -mat_superlu_dist_rowperm <LargeDiag,NATURAL> - row permutation 678 . -mat_superlu_dist_colperm <MMD_AT_PLUS_A,MMD_ATA,NATURAL> - column permutation 679 . -mat_superlu_dist_replacetinypivot - replace tiny pivots 680 . -mat_superlu_dist_iterrefine - use iterative refinement 681 - -mat_superlu_dist_statprint - print factorization information 682 683 Level: beginner 684 685 .seealso: PCLU 686 M*/ 687 688 EXTERN_C_BEGIN 689 #undef __FUNCT__ 690 #define __FUNCT__ "MatCreate_SuperLU_DIST" 691 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SuperLU_DIST(Mat A) 692 { 693 PetscErrorCode ierr; 694 PetscMPIInt size; 695 Mat A_diag; 696 697 PetscFunctionBegin; 698 /* Change type name before calling MatSetType to force proper construction of SeqAIJ or MPIAIJ */ 699 /* and SuperLU_DIST types */ 700 ierr = PetscObjectChangeTypeName((PetscObject)A,MATSUPERLU_DIST);CHKERRQ(ierr); 701 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 702 if (size == 1) { 703 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 704 } else { 705 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 706 A_diag = ((Mat_MPIAIJ *)A->data)->A; 707 ierr = MatConvert_Base_SuperLU_DIST(A_diag,MATSUPERLU_DIST,MAT_REUSE_MATRIX,&A_diag);CHKERRQ(ierr); 708 } 709 ierr = MatConvert_Base_SuperLU_DIST(A,MATSUPERLU_DIST,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr); 710 PetscFunctionReturn(0); 711 } 712 EXTERN_C_END 713 714