1 #define PETSCMAT_DLL 2 3 /* 4 Provides an interface to the SuperLU_DIST_2.0 sparse solver 5 */ 6 7 #include "src/mat/impls/aij/seq/aij.h" 8 #include "src/mat/impls/aij/mpi/mpiaij.h" 9 #if defined(PETSC_HAVE_STDLIB_H) /* This is to get arround weird problem with SuperLU on cray */ 10 #include "stdlib.h" 11 #endif 12 13 EXTERN_C_BEGIN 14 #if defined(PETSC_USE_COMPLEX) 15 #include "superlu_zdefs.h" 16 #else 17 #include "superlu_ddefs.h" 18 #endif 19 EXTERN_C_END 20 21 typedef enum { GLOBAL,DISTRIBUTED 22 } SuperLU_MatInputMode; 23 24 typedef struct { 25 int_t nprow,npcol,*row,*col; 26 gridinfo_t grid; 27 superlu_options_t options; 28 SuperMatrix A_sup; 29 ScalePermstruct_t ScalePermstruct; 30 LUstruct_t LUstruct; 31 int StatPrint; 32 int MatInputMode; 33 SOLVEstruct_t SOLVEstruct; 34 MatStructure flg; 35 MPI_Comm comm_superlu; 36 #if defined(PETSC_USE_COMPLEX) 37 doublecomplex *val; 38 #else 39 double *val; 40 #endif 41 42 /* A few function pointers for inheritance */ 43 PetscErrorCode (*MatDuplicate)(Mat,MatDuplicateOption,Mat*); 44 PetscErrorCode (*MatView)(Mat,PetscViewer); 45 PetscErrorCode (*MatAssemblyEnd)(Mat,MatAssemblyType); 46 PetscErrorCode (*MatLUFactorSymbolic)(Mat,IS,IS,MatFactorInfo*,Mat*); 47 PetscErrorCode (*MatDestroy)(Mat); 48 49 /* Flag to clean up (non-global) SuperLU objects during Destroy */ 50 PetscTruth CleanUpSuperLU_Dist; 51 } Mat_SuperLU_DIST; 52 53 EXTERN PetscErrorCode MatDuplicate_SuperLU_DIST(Mat,MatDuplicateOption,Mat*); 54 55 EXTERN_C_BEGIN 56 #undef __FUNCT__ 57 #define __FUNCT__ "MatConvert_SuperLU_DIST_Base" 58 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SuperLU_DIST_Base(Mat A,MatType type,MatReuse reuse,Mat *newmat) 59 { 60 PetscErrorCode ierr; 61 Mat B=*newmat; 62 Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST *)A->spptr; 63 64 PetscFunctionBegin; 65 if (reuse == MAT_INITIAL_MATRIX) { 66 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 67 } 68 /* Reset the original function pointers */ 69 B->ops->duplicate = lu->MatDuplicate; 70 B->ops->view = lu->MatView; 71 B->ops->assemblyend = lu->MatAssemblyEnd; 72 B->ops->lufactorsymbolic = lu->MatLUFactorSymbolic; 73 B->ops->destroy = lu->MatDestroy; 74 75 ierr = PetscFree(lu);CHKERRQ(ierr); 76 77 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaij_superlu_dist_C","",PETSC_NULL);CHKERRQ(ierr); 78 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_superlu_dist_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 79 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_superlu_dist_C","",PETSC_NULL);CHKERRQ(ierr); 80 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_superlu_dist_mpiaij_C","",PETSC_NULL);CHKERRQ(ierr); 81 82 ierr = PetscObjectChangeTypeName((PetscObject)B,type);CHKERRQ(ierr); 83 *newmat = B; 84 PetscFunctionReturn(0); 85 } 86 EXTERN_C_END 87 88 #undef __FUNCT__ 89 #define __FUNCT__ "MatDestroy_SuperLU_DIST" 90 PetscErrorCode MatDestroy_SuperLU_DIST(Mat A) 91 { 92 PetscErrorCode ierr; 93 PetscMPIInt size; 94 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr; 95 96 PetscFunctionBegin; 97 if (lu->CleanUpSuperLU_Dist) { 98 /* Deallocate SuperLU_DIST storage */ 99 if (lu->MatInputMode == GLOBAL) { 100 Destroy_CompCol_Matrix_dist(&lu->A_sup); 101 } else { 102 Destroy_CompRowLoc_Matrix_dist(&lu->A_sup); 103 if ( lu->options.SolveInitialized ) { 104 #if defined(PETSC_USE_COMPLEX) 105 zSolveFinalize(&lu->options, &lu->SOLVEstruct); 106 #else 107 dSolveFinalize(&lu->options, &lu->SOLVEstruct); 108 #endif 109 } 110 } 111 Destroy_LU(A->N, &lu->grid, &lu->LUstruct); 112 ScalePermstructFree(&lu->ScalePermstruct); 113 LUstructFree(&lu->LUstruct); 114 115 /* Release the SuperLU_DIST process grid. */ 116 superlu_gridexit(&lu->grid); 117 118 ierr = MPI_Comm_free(&(lu->comm_superlu));CHKERRQ(ierr); 119 } 120 121 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 122 if (size == 1) { 123 ierr = MatConvert_SuperLU_DIST_Base(A,MATSEQAIJ,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr); 124 } else { 125 ierr = MatConvert_SuperLU_DIST_Base(A,MATMPIAIJ,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr); 126 } 127 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 128 129 PetscFunctionReturn(0); 130 } 131 132 #undef __FUNCT__ 133 #define __FUNCT__ "MatSolve_SuperLU_DIST" 134 PetscErrorCode MatSolve_SuperLU_DIST(Mat A,Vec b_mpi,Vec x) 135 { 136 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr; 137 PetscErrorCode ierr; 138 PetscMPIInt size; 139 PetscInt m=A->M, N=A->N; 140 SuperLUStat_t stat; 141 double berr[1]; 142 PetscScalar *bptr; 143 PetscInt info, nrhs=1; 144 Vec x_seq; 145 IS iden; 146 VecScatter scat; 147 148 PetscFunctionBegin; 149 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 150 if (size > 1) { 151 if (lu->MatInputMode == GLOBAL) { /* global mat input, convert b to x_seq */ 152 ierr = VecCreateSeq(PETSC_COMM_SELF,N,&x_seq);CHKERRQ(ierr); 153 ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iden);CHKERRQ(ierr); 154 ierr = VecScatterCreate(b_mpi,iden,x_seq,iden,&scat);CHKERRQ(ierr); 155 ierr = ISDestroy(iden);CHKERRQ(ierr); 156 157 ierr = VecScatterBegin(b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat);CHKERRQ(ierr); 158 ierr = VecScatterEnd(b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat);CHKERRQ(ierr); 159 ierr = VecGetArray(x_seq,&bptr);CHKERRQ(ierr); 160 } else { /* distributed mat input */ 161 ierr = VecCopy(b_mpi,x);CHKERRQ(ierr); 162 ierr = VecGetArray(x,&bptr);CHKERRQ(ierr); 163 } 164 } else { /* size == 1 */ 165 ierr = VecCopy(b_mpi,x);CHKERRQ(ierr); 166 ierr = VecGetArray(x,&bptr);CHKERRQ(ierr); 167 } 168 169 lu->options.Fact = FACTORED; /* The factored form of A is supplied. Local option used by this func. only.*/ 170 171 PStatInit(&stat); /* Initialize the statistics variables. */ 172 if (lu->MatInputMode == GLOBAL) { 173 #if defined(PETSC_USE_COMPLEX) 174 pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,(doublecomplex*)bptr, m, nrhs, 175 &lu->grid, &lu->LUstruct, berr, &stat, &info); 176 #else 177 pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,bptr, m, nrhs, 178 &lu->grid, &lu->LUstruct, berr, &stat, &info); 179 #endif 180 } else { /* distributed mat input */ 181 #if defined(PETSC_USE_COMPLEX) 182 pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, (doublecomplex*)bptr, A->M, nrhs, &lu->grid, 183 &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info); 184 if (info) SETERRQ1(PETSC_ERR_LIB,"pzgssvx fails, info: %d\n",info); 185 #else 186 pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, bptr, A->M, nrhs, &lu->grid, 187 &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info); 188 if (info) SETERRQ1(PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",info); 189 #endif 190 } 191 if (lu->options.PrintStat) { 192 PStatPrint(&lu->options, &stat, &lu->grid); /* Print the statistics. */ 193 } 194 PStatFree(&stat); 195 196 if (size > 1) { 197 if (lu->MatInputMode == GLOBAL){ /* convert seq x to mpi x */ 198 ierr = VecRestoreArray(x_seq,&bptr);CHKERRQ(ierr); 199 ierr = VecScatterBegin(x_seq,x,INSERT_VALUES,SCATTER_REVERSE,scat);CHKERRQ(ierr); 200 ierr = VecScatterEnd(x_seq,x,INSERT_VALUES,SCATTER_REVERSE,scat);CHKERRQ(ierr); 201 ierr = VecScatterDestroy(scat);CHKERRQ(ierr); 202 ierr = VecDestroy(x_seq);CHKERRQ(ierr); 203 } else { 204 ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr); 205 } 206 } else { 207 ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr); 208 } 209 PetscFunctionReturn(0); 210 } 211 212 #undef __FUNCT__ 213 #define __FUNCT__ "MatLUFactorNumeric_SuperLU_DIST" 214 PetscErrorCode MatLUFactorNumeric_SuperLU_DIST(Mat A,MatFactorInfo *info,Mat *F) 215 { 216 Mat *tseq,A_seq = PETSC_NULL; 217 Mat_SeqAIJ *aa,*bb; 218 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)(*F)->spptr; 219 PetscErrorCode ierr; 220 PetscInt M=A->M,N=A->N,sinfo,i,*ai,*aj,*bi,*bj,nz,rstart,*garray, 221 m=A->m, irow,colA_start,j,jcol,jB,countA,countB,*bjj,*ajj; 222 PetscMPIInt size,rank; 223 SuperLUStat_t stat; 224 double *berr=0; 225 IS isrow; 226 PetscLogDouble time0,time,time_min,time_max; 227 #if defined(PETSC_USE_COMPLEX) 228 doublecomplex *av, *bv; 229 #else 230 double *av, *bv; 231 #endif 232 233 PetscFunctionBegin; 234 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 235 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 236 237 if (lu->options.PrintStat) { /* collect time for mat conversion */ 238 ierr = MPI_Barrier(A->comm);CHKERRQ(ierr); 239 ierr = PetscGetTime(&time0);CHKERRQ(ierr); 240 } 241 242 if (lu->MatInputMode == GLOBAL) { /* global mat input */ 243 if (size > 1) { /* convert mpi A to seq mat A */ 244 ierr = ISCreateStride(PETSC_COMM_SELF,M,0,1,&isrow);CHKERRQ(ierr); 245 ierr = MatGetSubMatrices(A,1,&isrow,&isrow,MAT_INITIAL_MATRIX,&tseq);CHKERRQ(ierr); 246 ierr = ISDestroy(isrow);CHKERRQ(ierr); 247 248 A_seq = *tseq; 249 ierr = PetscFree(tseq);CHKERRQ(ierr); 250 aa = (Mat_SeqAIJ*)A_seq->data; 251 } else { 252 aa = (Mat_SeqAIJ*)A->data; 253 } 254 255 /* Allocate storage, then convert Petsc NR matrix to SuperLU_DIST NC */ 256 if (lu->flg == DIFFERENT_NONZERO_PATTERN) {/* first numeric factorization */ 257 #if defined(PETSC_USE_COMPLEX) 258 zallocateA_dist(N, aa->nz, &lu->val, &lu->col, &lu->row); 259 #else 260 dallocateA_dist(N, aa->nz, &lu->val, &lu->col, &lu->row); 261 #endif 262 } else { /* successive numeric factorization, sparsity pattern is reused. */ 263 Destroy_CompCol_Matrix_dist(&lu->A_sup); 264 Destroy_LU(N, &lu->grid, &lu->LUstruct); 265 lu->options.Fact = SamePattern; 266 } 267 #if defined(PETSC_USE_COMPLEX) 268 zCompRow_to_CompCol_dist(M,N,aa->nz,(doublecomplex*)aa->a,aa->j,aa->i,&lu->val,&lu->col, &lu->row); 269 #else 270 dCompRow_to_CompCol_dist(M,N,aa->nz,aa->a,aa->j,aa->i,&lu->val, &lu->col, &lu->row); 271 #endif 272 273 /* Create compressed column matrix A_sup. */ 274 #if defined(PETSC_USE_COMPLEX) 275 zCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_Z, SLU_GE); 276 #else 277 dCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_D, SLU_GE); 278 #endif 279 } else { /* distributed mat input */ 280 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 281 aa=(Mat_SeqAIJ*)(mat->A)->data; 282 bb=(Mat_SeqAIJ*)(mat->B)->data; 283 ai=aa->i; aj=aa->j; 284 bi=bb->i; bj=bb->j; 285 #if defined(PETSC_USE_COMPLEX) 286 av=(doublecomplex*)aa->a; 287 bv=(doublecomplex*)bb->a; 288 #else 289 av=aa->a; 290 bv=bb->a; 291 #endif 292 rstart = mat->rstart; 293 nz = aa->nz + bb->nz; 294 garray = mat->garray; 295 rstart = mat->rstart; 296 297 if (lu->flg == DIFFERENT_NONZERO_PATTERN) {/* first numeric factorization */ 298 #if defined(PETSC_USE_COMPLEX) 299 zallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row); 300 #else 301 dallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row); 302 #endif 303 } else { /* successive numeric factorization, sparsity pattern and perm_c are reused. */ 304 /* Destroy_CompRowLoc_Matrix_dist(&lu->A_sup); */ /* crash! */ 305 Destroy_LU(N, &lu->grid, &lu->LUstruct); 306 lu->options.Fact = SamePattern; 307 } 308 nz = 0; irow = mat->rstart; 309 for ( i=0; i<m; i++ ) { 310 lu->row[i] = nz; 311 countA = ai[i+1] - ai[i]; 312 countB = bi[i+1] - bi[i]; 313 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 314 bjj = bj + bi[i]; 315 316 /* B part, smaller col index */ 317 colA_start = mat->rstart + ajj[0]; /* the smallest global col index of A */ 318 jB = 0; 319 for (j=0; j<countB; j++){ 320 jcol = garray[bjj[j]]; 321 if (jcol > colA_start) { 322 jB = j; 323 break; 324 } 325 lu->col[nz] = jcol; 326 lu->val[nz++] = *bv++; 327 if (j==countB-1) jB = countB; 328 } 329 330 /* A part */ 331 for (j=0; j<countA; j++){ 332 lu->col[nz] = mat->rstart + ajj[j]; 333 lu->val[nz++] = *av++; 334 } 335 336 /* B part, larger col index */ 337 for (j=jB; j<countB; j++){ 338 lu->col[nz] = garray[bjj[j]]; 339 lu->val[nz++] = *bv++; 340 } 341 } 342 lu->row[m] = nz; 343 #if defined(PETSC_USE_COMPLEX) 344 zCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart, 345 lu->val, lu->col, lu->row, SLU_NR_loc, SLU_Z, SLU_GE); 346 #else 347 dCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart, 348 lu->val, lu->col, lu->row, SLU_NR_loc, SLU_D, SLU_GE); 349 #endif 350 } 351 if (lu->options.PrintStat) { 352 ierr = PetscGetTime(&time);CHKERRQ(ierr); 353 time0 = time - time0; 354 } 355 356 /* Factor the matrix. */ 357 PStatInit(&stat); /* Initialize the statistics variables. */ 358 359 if (lu->MatInputMode == GLOBAL) { /* global mat input */ 360 #if defined(PETSC_USE_COMPLEX) 361 pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, 362 &lu->grid, &lu->LUstruct, berr, &stat, &sinfo); 363 #else 364 pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, 365 &lu->grid, &lu->LUstruct, berr, &stat, &sinfo); 366 #endif 367 } else { /* distributed mat input */ 368 #if defined(PETSC_USE_COMPLEX) 369 pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid, 370 &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo); 371 if (sinfo) SETERRQ1(PETSC_ERR_LIB,"pzgssvx fails, info: %d\n",sinfo); 372 #else 373 pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid, 374 &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo); 375 if (sinfo) SETERRQ1(PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",sinfo); 376 #endif 377 } 378 379 if (lu->MatInputMode == GLOBAL && size > 1){ 380 ierr = MatDestroy(A_seq);CHKERRQ(ierr); 381 } 382 383 if (lu->options.PrintStat) { 384 if (size > 1){ 385 ierr = MPI_Reduce(&time0,&time_max,1,MPI_DOUBLE,MPI_MAX,0,A->comm); 386 ierr = MPI_Reduce(&time0,&time_min,1,MPI_DOUBLE,MPI_MIN,0,A->comm); 387 ierr = MPI_Reduce(&time0,&time,1,MPI_DOUBLE,MPI_SUM,0,A->comm); 388 time = time/size; /* average time */ 389 if (!rank) 390 ierr = PetscPrintf(PETSC_COMM_SELF, " Mat conversion(PETSc->SuperLU_DIST) time (max/min/avg): \n \ 391 %g / %g / %g\n",time_max,time_min,time); 392 } else { 393 ierr = PetscPrintf(PETSC_COMM_SELF, " Mat conversion(PETSc->SuperLU_DIST) time: \n \ 394 %g\n",time0); 395 } 396 397 PStatPrint(&lu->options, &stat, &lu->grid); /* Print the statistics. */ 398 } 399 PStatFree(&stat); 400 (*F)->assembled = PETSC_TRUE; 401 lu->flg = SAME_NONZERO_PATTERN; 402 PetscFunctionReturn(0); 403 } 404 405 /* Note the Petsc r and c permutations are ignored */ 406 #undef __FUNCT__ 407 #define __FUNCT__ "MatLUFactorSymbolic_SuperLU_DIST" 408 PetscErrorCode MatLUFactorSymbolic_SuperLU_DIST(Mat A,IS r,IS c,MatFactorInfo *info,Mat *F) 409 { 410 Mat B; 411 Mat_SuperLU_DIST *lu; 412 PetscErrorCode ierr; 413 PetscInt M=A->M,N=A->N,indx; 414 PetscMPIInt size; 415 superlu_options_t options; 416 PetscTruth flg; 417 const char *ptype[] = {"MMD_AT_PLUS_A","NATURAL","MMD_ATA"}; 418 const char *prtype[] = {"LargeDiag","NATURAL"}; 419 420 PetscFunctionBegin; 421 /* Create the factorization matrix */ 422 ierr = MatCreate(A->comm,&B);CHKERRQ(ierr); 423 ierr = MatSetSizes(B,A->m,A->n,M,N);CHKERRQ(ierr); 424 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 425 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL); 426 ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 427 428 B->ops->lufactornumeric = MatLUFactorNumeric_SuperLU_DIST; 429 B->ops->solve = MatSolve_SuperLU_DIST; 430 B->factor = FACTOR_LU; 431 432 lu = (Mat_SuperLU_DIST*)(B->spptr); 433 434 /* Set the input options */ 435 set_default_options_dist(&options); 436 437 lu->MatInputMode = GLOBAL; 438 ierr = MPI_Comm_dup(A->comm,&(lu->comm_superlu));CHKERRQ(ierr); 439 440 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 441 lu->nprow = size/2; /* Default process rows. */ 442 if (!lu->nprow) lu->nprow = 1; 443 lu->npcol = size/lu->nprow; /* Default process columns. */ 444 445 ierr = PetscOptionsBegin(A->comm,A->prefix,"SuperLU_Dist Options","Mat");CHKERRQ(ierr); 446 447 ierr = PetscOptionsInt("-mat_superlu_dist_r","Number rows in processor partition","None",lu->nprow,&lu->nprow,PETSC_NULL);CHKERRQ(ierr); 448 ierr = PetscOptionsInt("-mat_superlu_dist_c","Number columns in processor partition","None",lu->npcol,&lu->npcol,PETSC_NULL);CHKERRQ(ierr); 449 if (size != lu->nprow * lu->npcol) SETERRQ(PETSC_ERR_ARG_SIZ,"Number of processes should be equal to nprow*npcol"); 450 451 ierr = PetscOptionsInt("-mat_superlu_dist_matinput","Matrix input mode (0: GLOBAL; 1: DISTRIBUTED)","None",lu->MatInputMode,&lu->MatInputMode,PETSC_NULL);CHKERRQ(ierr); 452 if(lu->MatInputMode == DISTRIBUTED && size == 1) lu->MatInputMode = GLOBAL; 453 454 ierr = PetscOptionsTruth("-mat_superlu_dist_equil","Equilibrate matrix","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr); 455 if (!flg) { 456 options.Equil = NO; 457 } 458 459 ierr = PetscOptionsEList("-mat_superlu_dist_rowperm","Row permutation","None",prtype,2,prtype[0],&indx,&flg);CHKERRQ(ierr); 460 if (flg) { 461 switch (indx) { 462 case 0: 463 options.RowPerm = LargeDiag; 464 break; 465 case 1: 466 options.RowPerm = NOROWPERM; 467 break; 468 } 469 } 470 471 ierr = PetscOptionsEList("-mat_superlu_dist_colperm","Column permutation","None",ptype,4,ptype[0],&indx,&flg);CHKERRQ(ierr); 472 if (flg) { 473 switch (indx) { 474 case 0: 475 options.ColPerm = MMD_AT_PLUS_A; 476 break; 477 case 1: 478 options.ColPerm = NATURAL; 479 break; 480 case 2: 481 options.ColPerm = MMD_ATA; 482 break; 483 } 484 } 485 486 ierr = PetscOptionsTruth("-mat_superlu_dist_replacetinypivot","Replace tiny pivots","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr); 487 if (!flg) { 488 options.ReplaceTinyPivot = NO; 489 } 490 491 options.IterRefine = NOREFINE; 492 ierr = PetscOptionsTruth("-mat_superlu_dist_iterrefine","Use iterative refinement","None",PETSC_FALSE,&flg,0);CHKERRQ(ierr); 493 if (flg) { 494 options.IterRefine = DOUBLE; 495 } 496 497 if (PetscLogPrintInfo) { 498 options.PrintStat = YES; 499 } else { 500 options.PrintStat = NO; 501 } 502 ierr = PetscOptionsTruth("-mat_superlu_dist_statprint","Print factorization information","None", 503 (PetscTruth)options.PrintStat,(PetscTruth*)&options.PrintStat,0);CHKERRQ(ierr); 504 PetscOptionsEnd(); 505 506 /* Initialize the SuperLU process grid. */ 507 superlu_gridinit(lu->comm_superlu, lu->nprow, lu->npcol, &lu->grid); 508 509 /* Initialize ScalePermstruct and LUstruct. */ 510 ScalePermstructInit(M, N, &lu->ScalePermstruct); 511 LUstructInit(M, N, &lu->LUstruct); 512 513 lu->options = options; 514 lu->flg = DIFFERENT_NONZERO_PATTERN; 515 lu->CleanUpSuperLU_Dist = PETSC_TRUE; 516 *F = B; 517 518 PetscFunctionReturn(0); 519 } 520 521 #undef __FUNCT__ 522 #define __FUNCT__ "MatAssemblyEnd_SuperLU_DIST" 523 PetscErrorCode MatAssemblyEnd_SuperLU_DIST(Mat A,MatAssemblyType mode) { 524 PetscErrorCode ierr; 525 Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST*)(A->spptr); 526 527 PetscFunctionBegin; 528 ierr = (*lu->MatAssemblyEnd)(A,mode);CHKERRQ(ierr); 529 lu->MatLUFactorSymbolic = A->ops->lufactorsymbolic; 530 A->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST; 531 PetscFunctionReturn(0); 532 } 533 534 #undef __FUNCT__ 535 #define __FUNCT__ "MatFactorInfo_SuperLU_DIST" 536 PetscErrorCode MatFactorInfo_SuperLU_DIST(Mat A,PetscViewer viewer) 537 { 538 Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST*)A->spptr; 539 superlu_options_t options; 540 PetscErrorCode ierr; 541 542 PetscFunctionBegin; 543 /* check if matrix is superlu_dist type */ 544 if (A->ops->solve != MatSolve_SuperLU_DIST) PetscFunctionReturn(0); 545 546 options = lu->options; 547 ierr = PetscViewerASCIIPrintf(viewer,"SuperLU_DIST run parameters:\n");CHKERRQ(ierr); 548 ierr = PetscViewerASCIIPrintf(viewer," Equilibrate matrix %s \n",PetscTruths[options.Equil != NO]);CHKERRQ(ierr); 549 ierr = PetscViewerASCIIPrintf(viewer," Matrix input mode %d \n",lu->MatInputMode);CHKERRQ(ierr); 550 ierr = PetscViewerASCIIPrintf(viewer," Replace tiny pivots %s \n",PetscTruths[options.ReplaceTinyPivot != NO]);CHKERRQ(ierr); 551 ierr = PetscViewerASCIIPrintf(viewer," Use iterative refinement %s \n",PetscTruths[options.IterRefine == DOUBLE]);CHKERRQ(ierr); 552 ierr = PetscViewerASCIIPrintf(viewer," Processors in row %d col partition %d \n",lu->nprow,lu->npcol);CHKERRQ(ierr); 553 ierr = PetscViewerASCIIPrintf(viewer," Row permutation %s \n",(options.RowPerm == NOROWPERM) ? "NATURAL": "LargeDiag");CHKERRQ(ierr); 554 if (options.ColPerm == NATURAL) { 555 ierr = PetscViewerASCIIPrintf(viewer," Column permutation NATURAL\n");CHKERRQ(ierr); 556 } else if (options.ColPerm == MMD_AT_PLUS_A) { 557 ierr = PetscViewerASCIIPrintf(viewer," Column permutation MMD_AT_PLUS_A\n");CHKERRQ(ierr); 558 } else if (options.ColPerm == MMD_ATA) { 559 ierr = PetscViewerASCIIPrintf(viewer," Column permutation MMD_ATA\n");CHKERRQ(ierr); 560 } else { 561 SETERRQ(PETSC_ERR_ARG_WRONG,"Unknown column permutation"); 562 } 563 PetscFunctionReturn(0); 564 } 565 566 #undef __FUNCT__ 567 #define __FUNCT__ "MatView_SuperLU_DIST" 568 PetscErrorCode MatView_SuperLU_DIST(Mat A,PetscViewer viewer) 569 { 570 PetscErrorCode ierr; 571 PetscTruth iascii; 572 PetscViewerFormat format; 573 Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST*)(A->spptr); 574 575 PetscFunctionBegin; 576 ierr = (*lu->MatView)(A,viewer);CHKERRQ(ierr); 577 578 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 579 if (iascii) { 580 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 581 if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 582 ierr = MatFactorInfo_SuperLU_DIST(A,viewer);CHKERRQ(ierr); 583 } 584 } 585 PetscFunctionReturn(0); 586 } 587 588 589 EXTERN_C_BEGIN 590 #undef __FUNCT__ 591 #define __FUNCT__ "MatConvert_Base_SuperLU_DIST" 592 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_Base_SuperLU_DIST(Mat A,MatType type,MatReuse reuse,Mat *newmat) 593 { 594 /* This routine is only called to convert to MATSUPERLU_DIST */ 595 /* from MATSEQAIJ if A has a single process communicator */ 596 /* or MATMPIAIJ otherwise, so we will ignore 'MatType type'. */ 597 PetscErrorCode ierr; 598 PetscMPIInt size; 599 MPI_Comm comm; 600 Mat B=*newmat; 601 Mat_SuperLU_DIST *lu; 602 603 PetscFunctionBegin; 604 if (reuse == MAT_INITIAL_MATRIX) { 605 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 606 } 607 608 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 609 ierr = PetscNew(Mat_SuperLU_DIST,&lu);CHKERRQ(ierr); 610 611 lu->MatDuplicate = A->ops->duplicate; 612 lu->MatView = A->ops->view; 613 lu->MatAssemblyEnd = A->ops->assemblyend; 614 lu->MatLUFactorSymbolic = A->ops->lufactorsymbolic; 615 lu->MatDestroy = A->ops->destroy; 616 lu->CleanUpSuperLU_Dist = PETSC_FALSE; 617 618 B->spptr = (void*)lu; 619 B->ops->duplicate = MatDuplicate_SuperLU_DIST; 620 B->ops->view = MatView_SuperLU_DIST; 621 B->ops->assemblyend = MatAssemblyEnd_SuperLU_DIST; 622 B->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST; 623 B->ops->destroy = MatDestroy_SuperLU_DIST; 624 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);CHKERRQ(ierr); 625 if (size == 1) { 626 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqaij_superlu_dist_C", 627 "MatConvert_Base_SuperLU_DIST",MatConvert_Base_SuperLU_DIST);CHKERRQ(ierr); 628 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_superlu_dist_seqaij_C", 629 "MatConvert_SuperLU_DIST_Base",MatConvert_SuperLU_DIST_Base);CHKERRQ(ierr); 630 } else { 631 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_superlu_dist_C", 632 "MatConvert_Base_SuperLU_DIST",MatConvert_Base_SuperLU_DIST);CHKERRQ(ierr); 633 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_superlu_dist_mpiaij_C", 634 "MatConvert_SuperLU_DIST_Base",MatConvert_SuperLU_DIST_Base);CHKERRQ(ierr); 635 } 636 ierr = PetscLogInfo((0,"MatConvert_Base_SuperLU_DIST:Using SuperLU_DIST for SeqAIJ LU factorization and solves.\n"));CHKERRQ(ierr); 637 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSUPERLU_DIST);CHKERRQ(ierr); 638 *newmat = B; 639 PetscFunctionReturn(0); 640 } 641 EXTERN_C_END 642 643 #undef __FUNCT__ 644 #define __FUNCT__ "MatDuplicate_SuperLU_DIST" 645 PetscErrorCode MatDuplicate_SuperLU_DIST(Mat A, MatDuplicateOption op, Mat *M) { 646 PetscErrorCode ierr; 647 Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST *)A->spptr; 648 649 PetscFunctionBegin; 650 ierr = (*lu->MatDuplicate)(A,op,M);CHKERRQ(ierr); 651 ierr = PetscMemcpy((*M)->spptr,lu,sizeof(Mat_SuperLU_DIST));CHKERRQ(ierr); 652 PetscFunctionReturn(0); 653 } 654 655 /*MC 656 MATSUPERLU_DIST - MATSUPERLU_DIST = "superlu_dist" - A matrix type providing direct solvers (LU) for parallel matrices 657 via the external package SuperLU_DIST. 658 659 If SuperLU_DIST is installed (see the manual for 660 instructions on how to declare the existence of external packages), 661 a matrix type can be constructed which invokes SuperLU_DIST solvers. 662 After calling MatCreate(...,A), simply call MatSetType(A,MATSUPERLU_DIST). 663 664 This matrix inherits from MATSEQAIJ when constructed with a single process communicator, 665 and from MATMPIAIJ otherwise. As a result, for single process communicators, 666 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 667 for communicators controlling multiple processes. It is recommended that you call both of 668 the above preallocation routines for simplicity. One can also call MatConvert for an inplace 669 conversion to or from the MATSEQAIJ or MATMPIAIJ type (depending on the communicator size) 670 without data copy. 671 672 Options Database Keys: 673 + -mat_type superlu_dist - sets the matrix type to "superlu_dist" during a call to MatSetFromOptions() 674 . -mat_superlu_dist_r <n> - number of rows in processor partition 675 . -mat_superlu_dist_c <n> - number of columns in processor partition 676 . -mat_superlu_dist_matinput <0,1> - matrix input mode; 0=global, 1=distributed 677 . -mat_superlu_dist_equil - equilibrate the matrix 678 . -mat_superlu_dist_rowperm <LargeDiag,NATURAL> - row permutation 679 . -mat_superlu_dist_colperm <MMD_AT_PLUS_A,MMD_ATA,NATURAL> - column permutation 680 . -mat_superlu_dist_replacetinypivot - replace tiny pivots 681 . -mat_superlu_dist_iterrefine - use iterative refinement 682 - -mat_superlu_dist_statprint - print factorization information 683 684 Level: beginner 685 686 .seealso: PCLU 687 M*/ 688 689 EXTERN_C_BEGIN 690 #undef __FUNCT__ 691 #define __FUNCT__ "MatCreate_SuperLU_DIST" 692 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SuperLU_DIST(Mat A) 693 { 694 PetscErrorCode ierr; 695 PetscMPIInt size; 696 Mat A_diag; 697 698 PetscFunctionBegin; 699 /* Change type name before calling MatSetType to force proper construction of SeqAIJ or MPIAIJ */ 700 /* and SuperLU_DIST types */ 701 ierr = PetscObjectChangeTypeName((PetscObject)A,MATSUPERLU_DIST);CHKERRQ(ierr); 702 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 703 if (size == 1) { 704 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 705 } else { 706 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 707 A_diag = ((Mat_MPIAIJ *)A->data)->A; 708 ierr = MatConvert_Base_SuperLU_DIST(A_diag,MATSUPERLU_DIST,MAT_REUSE_MATRIX,&A_diag);CHKERRQ(ierr); 709 } 710 ierr = MatConvert_Base_SuperLU_DIST(A,MATSUPERLU_DIST,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr); 711 PetscFunctionReturn(0); 712 } 713 EXTERN_C_END 714 715