1 #define PETSCMAT_DLL 2 3 /* 4 Provides an interface to the SuperLU_DIST_2.0 sparse solver 5 */ 6 7 #include "src/mat/impls/aij/seq/aij.h" 8 #include "src/mat/impls/aij/mpi/mpiaij.h" 9 #if defined(PETSC_HAVE_STDLIB_H) /* This is to get arround weird problem with SuperLU on cray */ 10 #include "stdlib.h" 11 #endif 12 13 EXTERN_C_BEGIN 14 #if defined(PETSC_USE_COMPLEX) 15 #include "superlu_zdefs.h" 16 #else 17 #include "superlu_ddefs.h" 18 #endif 19 EXTERN_C_END 20 21 typedef enum { GLOBAL,DISTRIBUTED 22 } SuperLU_MatInputMode; 23 24 typedef struct { 25 int_t nprow,npcol,*row,*col; 26 gridinfo_t grid; 27 superlu_options_t options; 28 SuperMatrix A_sup; 29 ScalePermstruct_t ScalePermstruct; 30 LUstruct_t LUstruct; 31 int StatPrint; 32 int MatInputMode; 33 SOLVEstruct_t SOLVEstruct; 34 MatStructure flg; 35 MPI_Comm comm_superlu; 36 #if defined(PETSC_USE_COMPLEX) 37 doublecomplex *val; 38 #else 39 double *val; 40 #endif 41 42 /* A few function pointers for inheritance */ 43 PetscErrorCode (*MatDuplicate)(Mat,MatDuplicateOption,Mat*); 44 PetscErrorCode (*MatView)(Mat,PetscViewer); 45 PetscErrorCode (*MatAssemblyEnd)(Mat,MatAssemblyType); 46 PetscErrorCode (*MatLUFactorSymbolic)(Mat,IS,IS,MatFactorInfo*,Mat*); 47 PetscErrorCode (*MatDestroy)(Mat); 48 49 /* Flag to clean up (non-global) SuperLU objects during Destroy */ 50 PetscTruth CleanUpSuperLU_Dist; 51 } Mat_SuperLU_DIST; 52 53 EXTERN PetscErrorCode MatDuplicate_SuperLU_DIST(Mat,MatDuplicateOption,Mat*); 54 55 EXTERN_C_BEGIN 56 #undef __FUNCT__ 57 #define __FUNCT__ "MatConvert_SuperLU_DIST_Base" 58 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SuperLU_DIST_Base(Mat A,MatType type,MatReuse reuse,Mat *newmat) 59 { 60 PetscErrorCode ierr; 61 Mat B=*newmat; 62 Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST *)A->spptr; 63 64 PetscFunctionBegin; 65 if (reuse == MAT_INITIAL_MATRIX) { 66 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 67 } 68 /* Reset the original function pointers */ 69 B->ops->duplicate = lu->MatDuplicate; 70 B->ops->view = lu->MatView; 71 B->ops->assemblyend = lu->MatAssemblyEnd; 72 B->ops->lufactorsymbolic = lu->MatLUFactorSymbolic; 73 B->ops->destroy = lu->MatDestroy; 74 75 ierr = PetscFree(lu);CHKERRQ(ierr); 76 77 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaij_superlu_dist_C","",PETSC_NULL);CHKERRQ(ierr); 78 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_superlu_dist_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 79 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_superlu_dist_C","",PETSC_NULL);CHKERRQ(ierr); 80 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_superlu_dist_mpiaij_C","",PETSC_NULL);CHKERRQ(ierr); 81 82 ierr = PetscObjectChangeTypeName((PetscObject)B,type);CHKERRQ(ierr); 83 *newmat = B; 84 PetscFunctionReturn(0); 85 } 86 EXTERN_C_END 87 88 #undef __FUNCT__ 89 #define __FUNCT__ "MatDestroy_SuperLU_DIST" 90 PetscErrorCode MatDestroy_SuperLU_DIST(Mat A) 91 { 92 PetscErrorCode ierr; 93 PetscMPIInt size; 94 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr; 95 96 PetscFunctionBegin; 97 if (lu->CleanUpSuperLU_Dist) { 98 /* Deallocate SuperLU_DIST storage */ 99 if (lu->MatInputMode == GLOBAL) { 100 Destroy_CompCol_Matrix_dist(&lu->A_sup); 101 } else { 102 Destroy_CompRowLoc_Matrix_dist(&lu->A_sup); 103 if ( lu->options.SolveInitialized ) { 104 #if defined(PETSC_USE_COMPLEX) 105 zSolveFinalize(&lu->options, &lu->SOLVEstruct); 106 #else 107 dSolveFinalize(&lu->options, &lu->SOLVEstruct); 108 #endif 109 } 110 } 111 Destroy_LU(A->N, &lu->grid, &lu->LUstruct); 112 ScalePermstructFree(&lu->ScalePermstruct); 113 LUstructFree(&lu->LUstruct); 114 115 /* Release the SuperLU_DIST process grid. */ 116 superlu_gridexit(&lu->grid); 117 118 ierr = MPI_Comm_free(&(lu->comm_superlu));CHKERRQ(ierr); 119 } 120 121 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 122 if (size == 1) { 123 ierr = MatConvert_SuperLU_DIST_Base(A,MATSEQAIJ,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr); 124 } else { 125 ierr = MatConvert_SuperLU_DIST_Base(A,MATMPIAIJ,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr); 126 } 127 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 128 129 PetscFunctionReturn(0); 130 } 131 132 #undef __FUNCT__ 133 #define __FUNCT__ "MatSolve_SuperLU_DIST" 134 PetscErrorCode MatSolve_SuperLU_DIST(Mat A,Vec b_mpi,Vec x) 135 { 136 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr; 137 PetscErrorCode ierr; 138 PetscMPIInt size; 139 PetscInt m=A->M, N=A->N; 140 SuperLUStat_t stat; 141 double berr[1]; 142 PetscScalar *bptr; 143 PetscInt info, nrhs=1; 144 Vec x_seq; 145 IS iden; 146 VecScatter scat; 147 148 PetscFunctionBegin; 149 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 150 if (size > 1) { 151 if (lu->MatInputMode == GLOBAL) { /* global mat input, convert b to x_seq */ 152 ierr = VecCreateSeq(PETSC_COMM_SELF,N,&x_seq);CHKERRQ(ierr); 153 ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iden);CHKERRQ(ierr); 154 ierr = VecScatterCreate(b_mpi,iden,x_seq,iden,&scat);CHKERRQ(ierr); 155 ierr = ISDestroy(iden);CHKERRQ(ierr); 156 157 ierr = VecScatterBegin(b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat);CHKERRQ(ierr); 158 ierr = VecScatterEnd(b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat);CHKERRQ(ierr); 159 ierr = VecGetArray(x_seq,&bptr);CHKERRQ(ierr); 160 } else { /* distributed mat input */ 161 ierr = VecCopy(b_mpi,x);CHKERRQ(ierr); 162 ierr = VecGetArray(x,&bptr);CHKERRQ(ierr); 163 } 164 } else { /* size == 1 */ 165 ierr = VecCopy(b_mpi,x);CHKERRQ(ierr); 166 ierr = VecGetArray(x,&bptr);CHKERRQ(ierr); 167 } 168 169 lu->options.Fact = FACTORED; /* The factored form of A is supplied. Local option used by this func. only.*/ 170 171 PStatInit(&stat); /* Initialize the statistics variables. */ 172 if (lu->MatInputMode == GLOBAL) { 173 #if defined(PETSC_USE_COMPLEX) 174 pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,(doublecomplex*)bptr, m, nrhs, 175 &lu->grid, &lu->LUstruct, berr, &stat, &info); 176 #else 177 pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,bptr, m, nrhs, 178 &lu->grid, &lu->LUstruct, berr, &stat, &info); 179 #endif 180 } else { /* distributed mat input */ 181 #if defined(PETSC_USE_COMPLEX) 182 pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, (doublecomplex*)bptr, A->M, nrhs, &lu->grid, 183 &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info); 184 if (info) SETERRQ1(PETSC_ERR_LIB,"pzgssvx fails, info: %d\n",info); 185 #else 186 pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, bptr, A->M, nrhs, &lu->grid, 187 &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info); 188 if (info) SETERRQ1(PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",info); 189 #endif 190 } 191 if (lu->options.PrintStat) { 192 PStatPrint(&lu->options, &stat, &lu->grid); /* Print the statistics. */ 193 } 194 PStatFree(&stat); 195 196 if (size > 1) { 197 if (lu->MatInputMode == GLOBAL){ /* convert seq x to mpi x */ 198 ierr = VecRestoreArray(x_seq,&bptr);CHKERRQ(ierr); 199 ierr = VecScatterBegin(x_seq,x,INSERT_VALUES,SCATTER_REVERSE,scat);CHKERRQ(ierr); 200 ierr = VecScatterEnd(x_seq,x,INSERT_VALUES,SCATTER_REVERSE,scat);CHKERRQ(ierr); 201 ierr = VecScatterDestroy(scat);CHKERRQ(ierr); 202 ierr = VecDestroy(x_seq);CHKERRQ(ierr); 203 } else { 204 ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr); 205 } 206 } else { 207 ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr); 208 } 209 PetscFunctionReturn(0); 210 } 211 212 #undef __FUNCT__ 213 #define __FUNCT__ "MatLUFactorNumeric_SuperLU_DIST" 214 PetscErrorCode MatLUFactorNumeric_SuperLU_DIST(Mat A,MatFactorInfo *info,Mat *F) 215 { 216 Mat *tseq,A_seq = PETSC_NULL; 217 Mat_SeqAIJ *aa,*bb; 218 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)(*F)->spptr; 219 PetscErrorCode ierr; 220 PetscInt M=A->M,N=A->N,sinfo,i,*ai,*aj,*bi,*bj,nz,rstart,*garray, 221 m=A->m, irow,colA_start,j,jcol,jB,countA,countB,*bjj,*ajj; 222 PetscMPIInt size,rank; 223 SuperLUStat_t stat; 224 double *berr=0; 225 IS isrow; 226 PetscLogDouble time0,time,time_min,time_max; 227 Mat F_diag=PETSC_NULL; 228 #if defined(PETSC_USE_COMPLEX) 229 doublecomplex *av, *bv; 230 #else 231 double *av, *bv; 232 #endif 233 234 PetscFunctionBegin; 235 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 236 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 237 238 if (lu->options.PrintStat) { /* collect time for mat conversion */ 239 ierr = MPI_Barrier(A->comm);CHKERRQ(ierr); 240 ierr = PetscGetTime(&time0);CHKERRQ(ierr); 241 } 242 243 if (lu->MatInputMode == GLOBAL) { /* global mat input */ 244 if (size > 1) { /* convert mpi A to seq mat A */ 245 ierr = ISCreateStride(PETSC_COMM_SELF,M,0,1,&isrow);CHKERRQ(ierr); 246 ierr = MatGetSubMatrices(A,1,&isrow,&isrow,MAT_INITIAL_MATRIX,&tseq);CHKERRQ(ierr); 247 ierr = ISDestroy(isrow);CHKERRQ(ierr); 248 249 A_seq = *tseq; 250 ierr = PetscFree(tseq);CHKERRQ(ierr); 251 aa = (Mat_SeqAIJ*)A_seq->data; 252 } else { 253 aa = (Mat_SeqAIJ*)A->data; 254 } 255 256 /* Allocate storage, then convert Petsc NR matrix to SuperLU_DIST NC */ 257 if (lu->flg == DIFFERENT_NONZERO_PATTERN) {/* first numeric factorization */ 258 #if defined(PETSC_USE_COMPLEX) 259 zallocateA_dist(N, aa->nz, &lu->val, &lu->col, &lu->row); 260 #else 261 dallocateA_dist(N, aa->nz, &lu->val, &lu->col, &lu->row); 262 #endif 263 } else { /* successive numeric factorization, sparsity pattern is reused. */ 264 Destroy_CompCol_Matrix_dist(&lu->A_sup); 265 Destroy_LU(N, &lu->grid, &lu->LUstruct); 266 lu->options.Fact = SamePattern; 267 } 268 #if defined(PETSC_USE_COMPLEX) 269 zCompRow_to_CompCol_dist(M,N,aa->nz,(doublecomplex*)aa->a,aa->j,aa->i,&lu->val,&lu->col, &lu->row); 270 #else 271 dCompRow_to_CompCol_dist(M,N,aa->nz,aa->a,aa->j,aa->i,&lu->val, &lu->col, &lu->row); 272 #endif 273 274 /* Create compressed column matrix A_sup. */ 275 #if defined(PETSC_USE_COMPLEX) 276 zCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_Z, SLU_GE); 277 #else 278 dCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_D, SLU_GE); 279 #endif 280 } else { /* distributed mat input */ 281 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 282 aa=(Mat_SeqAIJ*)(mat->A)->data; 283 bb=(Mat_SeqAIJ*)(mat->B)->data; 284 ai=aa->i; aj=aa->j; 285 bi=bb->i; bj=bb->j; 286 #if defined(PETSC_USE_COMPLEX) 287 av=(doublecomplex*)aa->a; 288 bv=(doublecomplex*)bb->a; 289 #else 290 av=aa->a; 291 bv=bb->a; 292 #endif 293 rstart = mat->rstart; 294 nz = aa->nz + bb->nz; 295 garray = mat->garray; 296 rstart = mat->rstart; 297 298 if (lu->flg == DIFFERENT_NONZERO_PATTERN) {/* first numeric factorization */ 299 #if defined(PETSC_USE_COMPLEX) 300 zallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row); 301 #else 302 dallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row); 303 #endif 304 } else { /* successive numeric factorization, sparsity pattern and perm_c are reused. */ 305 /* Destroy_CompRowLoc_Matrix_dist(&lu->A_sup); */ /* crash! */ 306 Destroy_LU(N, &lu->grid, &lu->LUstruct); 307 lu->options.Fact = SamePattern; 308 } 309 nz = 0; irow = mat->rstart; 310 for ( i=0; i<m; i++ ) { 311 lu->row[i] = nz; 312 countA = ai[i+1] - ai[i]; 313 countB = bi[i+1] - bi[i]; 314 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 315 bjj = bj + bi[i]; 316 317 /* B part, smaller col index */ 318 colA_start = mat->rstart + ajj[0]; /* the smallest global col index of A */ 319 jB = 0; 320 for (j=0; j<countB; j++){ 321 jcol = garray[bjj[j]]; 322 if (jcol > colA_start) { 323 jB = j; 324 break; 325 } 326 lu->col[nz] = jcol; 327 lu->val[nz++] = *bv++; 328 if (j==countB-1) jB = countB; 329 } 330 331 /* A part */ 332 for (j=0; j<countA; j++){ 333 lu->col[nz] = mat->rstart + ajj[j]; 334 lu->val[nz++] = *av++; 335 } 336 337 /* B part, larger col index */ 338 for (j=jB; j<countB; j++){ 339 lu->col[nz] = garray[bjj[j]]; 340 lu->val[nz++] = *bv++; 341 } 342 } 343 lu->row[m] = nz; 344 #if defined(PETSC_USE_COMPLEX) 345 zCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart, 346 lu->val, lu->col, lu->row, SLU_NR_loc, SLU_Z, SLU_GE); 347 #else 348 dCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart, 349 lu->val, lu->col, lu->row, SLU_NR_loc, SLU_D, SLU_GE); 350 #endif 351 } 352 if (lu->options.PrintStat) { 353 ierr = PetscGetTime(&time);CHKERRQ(ierr); 354 time0 = time - time0; 355 } 356 357 /* Factor the matrix. */ 358 PStatInit(&stat); /* Initialize the statistics variables. */ 359 360 if (lu->MatInputMode == GLOBAL) { /* global mat input */ 361 #if defined(PETSC_USE_COMPLEX) 362 pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, 363 &lu->grid, &lu->LUstruct, berr, &stat, &sinfo); 364 #else 365 pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, 366 &lu->grid, &lu->LUstruct, berr, &stat, &sinfo); 367 #endif 368 } else { /* distributed mat input */ 369 #if defined(PETSC_USE_COMPLEX) 370 pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid, 371 &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo); 372 if (sinfo) SETERRQ1(PETSC_ERR_LIB,"pzgssvx fails, info: %d\n",sinfo); 373 #else 374 pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid, 375 &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo); 376 if (sinfo) SETERRQ1(PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",sinfo); 377 #endif 378 } 379 380 if (lu->MatInputMode == GLOBAL && size > 1){ 381 ierr = MatDestroy(A_seq);CHKERRQ(ierr); 382 } 383 384 if (lu->options.PrintStat) { 385 if (size > 1){ 386 ierr = MPI_Reduce(&time0,&time_max,1,MPI_DOUBLE,MPI_MAX,0,A->comm); 387 ierr = MPI_Reduce(&time0,&time_min,1,MPI_DOUBLE,MPI_MIN,0,A->comm); 388 ierr = MPI_Reduce(&time0,&time,1,MPI_DOUBLE,MPI_SUM,0,A->comm); 389 time = time/size; /* average time */ 390 if (!rank) 391 ierr = PetscPrintf(PETSC_COMM_SELF, " Mat conversion(PETSc->SuperLU_DIST) time (max/min/avg): \n \ 392 %g / %g / %g\n",time_max,time_min,time); 393 } else { 394 ierr = PetscPrintf(PETSC_COMM_SELF, " Mat conversion(PETSc->SuperLU_DIST) time: \n \ 395 %g\n",time0); 396 } 397 398 PStatPrint(&lu->options, &stat, &lu->grid); /* Print the statistics. */ 399 } 400 PStatFree(&stat); 401 if (size > 1){ 402 F_diag = ((Mat_MPIAIJ *)(*F)->data)->A; 403 F_diag->assembled = PETSC_TRUE; 404 } 405 (*F)->assembled = PETSC_TRUE; 406 lu->flg = SAME_NONZERO_PATTERN; 407 PetscFunctionReturn(0); 408 } 409 410 /* Note the Petsc r and c permutations are ignored */ 411 #undef __FUNCT__ 412 #define __FUNCT__ "MatLUFactorSymbolic_SuperLU_DIST" 413 PetscErrorCode MatLUFactorSymbolic_SuperLU_DIST(Mat A,IS r,IS c,MatFactorInfo *info,Mat *F) 414 { 415 Mat B; 416 Mat_SuperLU_DIST *lu; 417 PetscErrorCode ierr; 418 PetscInt M=A->M,N=A->N,indx; 419 PetscMPIInt size; 420 superlu_options_t options; 421 PetscTruth flg; 422 const char *ptype[] = {"MMD_AT_PLUS_A","NATURAL","MMD_ATA"}; 423 const char *prtype[] = {"LargeDiag","NATURAL"}; 424 425 PetscFunctionBegin; 426 /* Create the factorization matrix */ 427 ierr = MatCreate(A->comm,&B);CHKERRQ(ierr); 428 ierr = MatSetSizes(B,A->m,A->n,M,N);CHKERRQ(ierr); 429 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 430 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL); 431 ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 432 433 B->ops->lufactornumeric = MatLUFactorNumeric_SuperLU_DIST; 434 B->ops->solve = MatSolve_SuperLU_DIST; 435 B->factor = FACTOR_LU; 436 437 lu = (Mat_SuperLU_DIST*)(B->spptr); 438 439 /* Set the input options */ 440 set_default_options_dist(&options); 441 442 lu->MatInputMode = GLOBAL; 443 ierr = MPI_Comm_dup(A->comm,&(lu->comm_superlu));CHKERRQ(ierr); 444 445 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 446 lu->nprow = size/2; /* Default process rows. */ 447 if (!lu->nprow) lu->nprow = 1; 448 lu->npcol = size/lu->nprow; /* Default process columns. */ 449 450 ierr = PetscOptionsBegin(A->comm,A->prefix,"SuperLU_Dist Options","Mat");CHKERRQ(ierr); 451 452 ierr = PetscOptionsInt("-mat_superlu_dist_r","Number rows in processor partition","None",lu->nprow,&lu->nprow,PETSC_NULL);CHKERRQ(ierr); 453 ierr = PetscOptionsInt("-mat_superlu_dist_c","Number columns in processor partition","None",lu->npcol,&lu->npcol,PETSC_NULL);CHKERRQ(ierr); 454 if (size != lu->nprow * lu->npcol) SETERRQ(PETSC_ERR_ARG_SIZ,"Number of processes should be equal to nprow*npcol"); 455 456 ierr = PetscOptionsInt("-mat_superlu_dist_matinput","Matrix input mode (0: GLOBAL; 1: DISTRIBUTED)","None",lu->MatInputMode,&lu->MatInputMode,PETSC_NULL);CHKERRQ(ierr); 457 if(lu->MatInputMode == DISTRIBUTED && size == 1) lu->MatInputMode = GLOBAL; 458 459 ierr = PetscOptionsTruth("-mat_superlu_dist_equil","Equilibrate matrix","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr); 460 if (!flg) { 461 options.Equil = NO; 462 } 463 464 ierr = PetscOptionsEList("-mat_superlu_dist_rowperm","Row permutation","None",prtype,2,prtype[0],&indx,&flg);CHKERRQ(ierr); 465 if (flg) { 466 switch (indx) { 467 case 0: 468 options.RowPerm = LargeDiag; 469 break; 470 case 1: 471 options.RowPerm = NOROWPERM; 472 break; 473 } 474 } 475 476 ierr = PetscOptionsEList("-mat_superlu_dist_colperm","Column permutation","None",ptype,4,ptype[0],&indx,&flg);CHKERRQ(ierr); 477 if (flg) { 478 switch (indx) { 479 case 0: 480 options.ColPerm = MMD_AT_PLUS_A; 481 break; 482 case 1: 483 options.ColPerm = NATURAL; 484 break; 485 case 2: 486 options.ColPerm = MMD_ATA; 487 break; 488 } 489 } 490 491 ierr = PetscOptionsTruth("-mat_superlu_dist_replacetinypivot","Replace tiny pivots","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr); 492 if (!flg) { 493 options.ReplaceTinyPivot = NO; 494 } 495 496 options.IterRefine = NOREFINE; 497 ierr = PetscOptionsTruth("-mat_superlu_dist_iterrefine","Use iterative refinement","None",PETSC_FALSE,&flg,0);CHKERRQ(ierr); 498 if (flg) { 499 options.IterRefine = DOUBLE; 500 } 501 502 if (PetscLogPrintInfo) { 503 options.PrintStat = YES; 504 } else { 505 options.PrintStat = NO; 506 } 507 ierr = PetscOptionsTruth("-mat_superlu_dist_statprint","Print factorization information","None", 508 (PetscTruth)options.PrintStat,(PetscTruth*)&options.PrintStat,0);CHKERRQ(ierr); 509 PetscOptionsEnd(); 510 511 /* Initialize the SuperLU process grid. */ 512 superlu_gridinit(lu->comm_superlu, lu->nprow, lu->npcol, &lu->grid); 513 514 /* Initialize ScalePermstruct and LUstruct. */ 515 ScalePermstructInit(M, N, &lu->ScalePermstruct); 516 LUstructInit(M, N, &lu->LUstruct); 517 518 lu->options = options; 519 lu->flg = DIFFERENT_NONZERO_PATTERN; 520 lu->CleanUpSuperLU_Dist = PETSC_TRUE; 521 *F = B; 522 523 PetscFunctionReturn(0); 524 } 525 526 #undef __FUNCT__ 527 #define __FUNCT__ "MatAssemblyEnd_SuperLU_DIST" 528 PetscErrorCode MatAssemblyEnd_SuperLU_DIST(Mat A,MatAssemblyType mode) { 529 PetscErrorCode ierr; 530 Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST*)(A->spptr); 531 532 PetscFunctionBegin; 533 ierr = (*lu->MatAssemblyEnd)(A,mode);CHKERRQ(ierr); 534 lu->MatLUFactorSymbolic = A->ops->lufactorsymbolic; 535 A->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST; 536 PetscFunctionReturn(0); 537 } 538 539 #undef __FUNCT__ 540 #define __FUNCT__ "MatFactorInfo_SuperLU_DIST" 541 PetscErrorCode MatFactorInfo_SuperLU_DIST(Mat A,PetscViewer viewer) 542 { 543 Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST*)A->spptr; 544 superlu_options_t options; 545 PetscErrorCode ierr; 546 547 PetscFunctionBegin; 548 /* check if matrix is superlu_dist type */ 549 if (A->ops->solve != MatSolve_SuperLU_DIST) PetscFunctionReturn(0); 550 551 options = lu->options; 552 ierr = PetscViewerASCIIPrintf(viewer,"SuperLU_DIST run parameters:\n");CHKERRQ(ierr); 553 ierr = PetscViewerASCIIPrintf(viewer," Equilibrate matrix %s \n",PetscTruths[options.Equil != NO]);CHKERRQ(ierr); 554 ierr = PetscViewerASCIIPrintf(viewer," Matrix input mode %d \n",lu->MatInputMode);CHKERRQ(ierr); 555 ierr = PetscViewerASCIIPrintf(viewer," Replace tiny pivots %s \n",PetscTruths[options.ReplaceTinyPivot != NO]);CHKERRQ(ierr); 556 ierr = PetscViewerASCIIPrintf(viewer," Use iterative refinement %s \n",PetscTruths[options.IterRefine == DOUBLE]);CHKERRQ(ierr); 557 ierr = PetscViewerASCIIPrintf(viewer," Processors in row %d col partition %d \n",lu->nprow,lu->npcol);CHKERRQ(ierr); 558 ierr = PetscViewerASCIIPrintf(viewer," Row permutation %s \n",(options.RowPerm == NOROWPERM) ? "NATURAL": "LargeDiag");CHKERRQ(ierr); 559 if (options.ColPerm == NATURAL) { 560 ierr = PetscViewerASCIIPrintf(viewer," Column permutation NATURAL\n");CHKERRQ(ierr); 561 } else if (options.ColPerm == MMD_AT_PLUS_A) { 562 ierr = PetscViewerASCIIPrintf(viewer," Column permutation MMD_AT_PLUS_A\n");CHKERRQ(ierr); 563 } else if (options.ColPerm == MMD_ATA) { 564 ierr = PetscViewerASCIIPrintf(viewer," Column permutation MMD_ATA\n");CHKERRQ(ierr); 565 } else { 566 SETERRQ(PETSC_ERR_ARG_WRONG,"Unknown column permutation"); 567 } 568 PetscFunctionReturn(0); 569 } 570 571 #undef __FUNCT__ 572 #define __FUNCT__ "MatView_SuperLU_DIST" 573 PetscErrorCode MatView_SuperLU_DIST(Mat A,PetscViewer viewer) 574 { 575 PetscErrorCode ierr; 576 PetscTruth iascii; 577 PetscViewerFormat format; 578 Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST*)(A->spptr); 579 580 PetscFunctionBegin; 581 ierr = (*lu->MatView)(A,viewer);CHKERRQ(ierr); 582 583 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 584 if (iascii) { 585 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 586 if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 587 ierr = MatFactorInfo_SuperLU_DIST(A,viewer);CHKERRQ(ierr); 588 } 589 } 590 PetscFunctionReturn(0); 591 } 592 593 594 EXTERN_C_BEGIN 595 #undef __FUNCT__ 596 #define __FUNCT__ "MatConvert_Base_SuperLU_DIST" 597 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_Base_SuperLU_DIST(Mat A,MatType type,MatReuse reuse,Mat *newmat) 598 { 599 /* This routine is only called to convert to MATSUPERLU_DIST */ 600 /* from MATSEQAIJ if A has a single process communicator */ 601 /* or MATMPIAIJ otherwise, so we will ignore 'MatType type'. */ 602 PetscErrorCode ierr; 603 PetscMPIInt size; 604 MPI_Comm comm; 605 Mat B=*newmat; 606 Mat_SuperLU_DIST *lu; 607 608 PetscFunctionBegin; 609 if (reuse == MAT_INITIAL_MATRIX) { 610 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 611 } 612 613 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 614 ierr = PetscNew(Mat_SuperLU_DIST,&lu);CHKERRQ(ierr); 615 616 lu->MatDuplicate = A->ops->duplicate; 617 lu->MatView = A->ops->view; 618 lu->MatAssemblyEnd = A->ops->assemblyend; 619 lu->MatLUFactorSymbolic = A->ops->lufactorsymbolic; 620 lu->MatDestroy = A->ops->destroy; 621 lu->CleanUpSuperLU_Dist = PETSC_FALSE; 622 623 B->spptr = (void*)lu; 624 B->ops->duplicate = MatDuplicate_SuperLU_DIST; 625 B->ops->view = MatView_SuperLU_DIST; 626 B->ops->assemblyend = MatAssemblyEnd_SuperLU_DIST; 627 B->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST; 628 B->ops->destroy = MatDestroy_SuperLU_DIST; 629 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);CHKERRQ(ierr); 630 if (size == 1) { 631 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqaij_superlu_dist_C", 632 "MatConvert_Base_SuperLU_DIST",MatConvert_Base_SuperLU_DIST);CHKERRQ(ierr); 633 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_superlu_dist_seqaij_C", 634 "MatConvert_SuperLU_DIST_Base",MatConvert_SuperLU_DIST_Base);CHKERRQ(ierr); 635 } else { 636 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_superlu_dist_C", 637 "MatConvert_Base_SuperLU_DIST",MatConvert_Base_SuperLU_DIST);CHKERRQ(ierr); 638 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_superlu_dist_mpiaij_C", 639 "MatConvert_SuperLU_DIST_Base",MatConvert_SuperLU_DIST_Base);CHKERRQ(ierr); 640 } 641 ierr = PetscVerboseInfo((0,"MatConvert_Base_SuperLU_DIST:Using SuperLU_DIST for SeqAIJ LU factorization and solves.\n"));CHKERRQ(ierr); 642 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSUPERLU_DIST);CHKERRQ(ierr); 643 *newmat = B; 644 PetscFunctionReturn(0); 645 } 646 EXTERN_C_END 647 648 #undef __FUNCT__ 649 #define __FUNCT__ "MatDuplicate_SuperLU_DIST" 650 PetscErrorCode MatDuplicate_SuperLU_DIST(Mat A, MatDuplicateOption op, Mat *M) { 651 PetscErrorCode ierr; 652 Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST *)A->spptr; 653 654 PetscFunctionBegin; 655 ierr = (*lu->MatDuplicate)(A,op,M);CHKERRQ(ierr); 656 ierr = PetscMemcpy((*M)->spptr,lu,sizeof(Mat_SuperLU_DIST));CHKERRQ(ierr); 657 PetscFunctionReturn(0); 658 } 659 660 /*MC 661 MATSUPERLU_DIST - MATSUPERLU_DIST = "superlu_dist" - A matrix type providing direct solvers (LU) for parallel matrices 662 via the external package SuperLU_DIST. 663 664 If SuperLU_DIST is installed (see the manual for 665 instructions on how to declare the existence of external packages), 666 a matrix type can be constructed which invokes SuperLU_DIST solvers. 667 After calling MatCreate(...,A), simply call MatSetType(A,MATSUPERLU_DIST). 668 669 This matrix inherits from MATSEQAIJ when constructed with a single process communicator, 670 and from MATMPIAIJ otherwise. As a result, for single process communicators, 671 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 672 for communicators controlling multiple processes. It is recommended that you call both of 673 the above preallocation routines for simplicity. One can also call MatConvert for an inplace 674 conversion to or from the MATSEQAIJ or MATMPIAIJ type (depending on the communicator size) 675 without data copy. 676 677 Options Database Keys: 678 + -mat_type superlu_dist - sets the matrix type to "superlu_dist" during a call to MatSetFromOptions() 679 . -mat_superlu_dist_r <n> - number of rows in processor partition 680 . -mat_superlu_dist_c <n> - number of columns in processor partition 681 . -mat_superlu_dist_matinput <0,1> - matrix input mode; 0=global, 1=distributed 682 . -mat_superlu_dist_equil - equilibrate the matrix 683 . -mat_superlu_dist_rowperm <LargeDiag,NATURAL> - row permutation 684 . -mat_superlu_dist_colperm <MMD_AT_PLUS_A,MMD_ATA,NATURAL> - column permutation 685 . -mat_superlu_dist_replacetinypivot - replace tiny pivots 686 . -mat_superlu_dist_iterrefine - use iterative refinement 687 - -mat_superlu_dist_statprint - print factorization information 688 689 Level: beginner 690 691 .seealso: PCLU 692 M*/ 693 694 EXTERN_C_BEGIN 695 #undef __FUNCT__ 696 #define __FUNCT__ "MatCreate_SuperLU_DIST" 697 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SuperLU_DIST(Mat A) 698 { 699 PetscErrorCode ierr; 700 PetscMPIInt size; 701 Mat A_diag; 702 703 PetscFunctionBegin; 704 /* Change type name before calling MatSetType to force proper construction of SeqAIJ or MPIAIJ */ 705 /* and SuperLU_DIST types */ 706 ierr = PetscObjectChangeTypeName((PetscObject)A,MATSUPERLU_DIST);CHKERRQ(ierr); 707 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 708 if (size == 1) { 709 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 710 } else { 711 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 712 A_diag = ((Mat_MPIAIJ *)A->data)->A; 713 ierr = MatConvert_Base_SuperLU_DIST(A_diag,MATSUPERLU_DIST,MAT_REUSE_MATRIX,&A_diag);CHKERRQ(ierr); 714 } 715 ierr = MatConvert_Base_SuperLU_DIST(A,MATSUPERLU_DIST,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr); 716 PetscFunctionReturn(0); 717 } 718 EXTERN_C_END 719 720