1 #define PETSCMAT_DLL 2 3 /* 4 Provides an interface to the SuperLU_DIST_2.0 sparse solver 5 */ 6 7 #include "src/mat/impls/aij/seq/aij.h" 8 #include "src/mat/impls/aij/mpi/mpiaij.h" 9 #if defined(PETSC_HAVE_STDLIB_H) /* This is to get arround weird problem with SuperLU on cray */ 10 #include "stdlib.h" 11 #endif 12 13 EXTERN_C_BEGIN 14 #if defined(PETSC_USE_COMPLEX) 15 #include "superlu_zdefs.h" 16 #else 17 #include "superlu_ddefs.h" 18 #endif 19 EXTERN_C_END 20 21 typedef enum { GLOBAL,DISTRIBUTED 22 } SuperLU_MatInputMode; 23 24 typedef struct { 25 int_t nprow,npcol,*row,*col; 26 gridinfo_t grid; 27 superlu_options_t options; 28 SuperMatrix A_sup; 29 ScalePermstruct_t ScalePermstruct; 30 LUstruct_t LUstruct; 31 int StatPrint; 32 int MatInputMode; 33 SOLVEstruct_t SOLVEstruct; 34 MatStructure flg; 35 MPI_Comm comm_superlu; 36 #if defined(PETSC_USE_COMPLEX) 37 doublecomplex *val; 38 #else 39 double *val; 40 #endif 41 42 /* A few function pointers for inheritance */ 43 PetscErrorCode (*MatDuplicate)(Mat,MatDuplicateOption,Mat*); 44 PetscErrorCode (*MatView)(Mat,PetscViewer); 45 PetscErrorCode (*MatAssemblyEnd)(Mat,MatAssemblyType); 46 PetscErrorCode (*MatLUFactorSymbolic)(Mat,IS,IS,MatFactorInfo*,Mat*); 47 PetscErrorCode (*MatDestroy)(Mat); 48 49 /* Flag to clean up (non-global) SuperLU objects during Destroy */ 50 PetscTruth CleanUpSuperLU_Dist; 51 } Mat_SuperLU_DIST; 52 53 EXTERN PetscErrorCode MatDuplicate_SuperLU_DIST(Mat,MatDuplicateOption,Mat*); 54 55 EXTERN_C_BEGIN 56 #undef __FUNCT__ 57 #define __FUNCT__ "MatConvert_SuperLU_DIST_Base" 58 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SuperLU_DIST_Base(Mat A,MatType type,MatReuse reuse,Mat *newmat) 59 { 60 PetscErrorCode ierr; 61 Mat B=*newmat; 62 Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST *)A->spptr; 63 64 PetscFunctionBegin; 65 if (reuse == MAT_INITIAL_MATRIX) { 66 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 67 } 68 /* Reset the original function pointers */ 69 B->ops->duplicate = lu->MatDuplicate; 70 B->ops->view = lu->MatView; 71 B->ops->assemblyend = lu->MatAssemblyEnd; 72 B->ops->lufactorsymbolic = lu->MatLUFactorSymbolic; 73 B->ops->destroy = lu->MatDestroy; 74 75 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaij_superlu_dist_C","",PETSC_NULL);CHKERRQ(ierr); 76 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_superlu_dist_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 77 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_superlu_dist_C","",PETSC_NULL);CHKERRQ(ierr); 78 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_superlu_dist_mpiaij_C","",PETSC_NULL);CHKERRQ(ierr); 79 80 ierr = PetscObjectChangeTypeName((PetscObject)B,type);CHKERRQ(ierr); 81 *newmat = B; 82 PetscFunctionReturn(0); 83 } 84 EXTERN_C_END 85 86 #undef __FUNCT__ 87 #define __FUNCT__ "MatDestroy_SuperLU_DIST" 88 PetscErrorCode MatDestroy_SuperLU_DIST(Mat A) 89 { 90 PetscErrorCode ierr; 91 PetscMPIInt size; 92 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr; 93 94 PetscFunctionBegin; 95 if (lu->CleanUpSuperLU_Dist) { 96 /* Deallocate SuperLU_DIST storage */ 97 if (lu->MatInputMode == GLOBAL) { 98 Destroy_CompCol_Matrix_dist(&lu->A_sup); 99 } else { 100 Destroy_CompRowLoc_Matrix_dist(&lu->A_sup); 101 if ( lu->options.SolveInitialized ) { 102 #if defined(PETSC_USE_COMPLEX) 103 zSolveFinalize(&lu->options, &lu->SOLVEstruct); 104 #else 105 dSolveFinalize(&lu->options, &lu->SOLVEstruct); 106 #endif 107 } 108 } 109 Destroy_LU(A->cmap.N, &lu->grid, &lu->LUstruct); 110 ScalePermstructFree(&lu->ScalePermstruct); 111 LUstructFree(&lu->LUstruct); 112 113 /* Release the SuperLU_DIST process grid. */ 114 superlu_gridexit(&lu->grid); 115 116 ierr = MPI_Comm_free(&(lu->comm_superlu));CHKERRQ(ierr); 117 } 118 119 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 120 if (size == 1) { 121 ierr = MatConvert_SuperLU_DIST_Base(A,MATSEQAIJ,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr); 122 } else { 123 ierr = MatConvert_SuperLU_DIST_Base(A,MATMPIAIJ,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr); 124 } 125 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 126 PetscFunctionReturn(0); 127 } 128 129 #undef __FUNCT__ 130 #define __FUNCT__ "MatSolve_SuperLU_DIST" 131 PetscErrorCode MatSolve_SuperLU_DIST(Mat A,Vec b_mpi,Vec x) 132 { 133 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr; 134 PetscErrorCode ierr; 135 PetscMPIInt size; 136 PetscInt m=A->rmap.N, N=A->cmap.N; 137 SuperLUStat_t stat; 138 double berr[1]; 139 PetscScalar *bptr; 140 PetscInt info, nrhs=1; 141 Vec x_seq; 142 IS iden; 143 VecScatter scat; 144 145 PetscFunctionBegin; 146 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 147 if (size > 1) { 148 if (lu->MatInputMode == GLOBAL) { /* global mat input, convert b to x_seq */ 149 ierr = VecCreateSeq(PETSC_COMM_SELF,N,&x_seq);CHKERRQ(ierr); 150 ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iden);CHKERRQ(ierr); 151 ierr = VecScatterCreate(b_mpi,iden,x_seq,iden,&scat);CHKERRQ(ierr); 152 ierr = ISDestroy(iden);CHKERRQ(ierr); 153 154 ierr = VecScatterBegin(b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat);CHKERRQ(ierr); 155 ierr = VecScatterEnd(b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat);CHKERRQ(ierr); 156 ierr = VecGetArray(x_seq,&bptr);CHKERRQ(ierr); 157 } else { /* distributed mat input */ 158 ierr = VecCopy(b_mpi,x);CHKERRQ(ierr); 159 ierr = VecGetArray(x,&bptr);CHKERRQ(ierr); 160 } 161 } else { /* size == 1 */ 162 ierr = VecCopy(b_mpi,x);CHKERRQ(ierr); 163 ierr = VecGetArray(x,&bptr);CHKERRQ(ierr); 164 } 165 166 lu->options.Fact = FACTORED; /* The factored form of A is supplied. Local option used by this func. only.*/ 167 168 PStatInit(&stat); /* Initialize the statistics variables. */ 169 if (lu->MatInputMode == GLOBAL) { 170 #if defined(PETSC_USE_COMPLEX) 171 pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,(doublecomplex*)bptr, m, nrhs, 172 &lu->grid, &lu->LUstruct, berr, &stat, &info); 173 #else 174 pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,bptr, m, nrhs, 175 &lu->grid, &lu->LUstruct, berr, &stat, &info); 176 #endif 177 } else { /* distributed mat input */ 178 #if defined(PETSC_USE_COMPLEX) 179 pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, (doublecomplex*)bptr, A->rmap.N, nrhs, &lu->grid, 180 &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info); 181 if (info) SETERRQ1(PETSC_ERR_LIB,"pzgssvx fails, info: %d\n",info); 182 #else 183 pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, bptr, A->rmap.N, nrhs, &lu->grid, 184 &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info); 185 if (info) SETERRQ1(PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",info); 186 #endif 187 } 188 if (lu->options.PrintStat) { 189 PStatPrint(&lu->options, &stat, &lu->grid); /* Print the statistics. */ 190 } 191 PStatFree(&stat); 192 193 if (size > 1) { 194 if (lu->MatInputMode == GLOBAL){ /* convert seq x to mpi x */ 195 ierr = VecRestoreArray(x_seq,&bptr);CHKERRQ(ierr); 196 ierr = VecScatterBegin(x_seq,x,INSERT_VALUES,SCATTER_REVERSE,scat);CHKERRQ(ierr); 197 ierr = VecScatterEnd(x_seq,x,INSERT_VALUES,SCATTER_REVERSE,scat);CHKERRQ(ierr); 198 ierr = VecScatterDestroy(scat);CHKERRQ(ierr); 199 ierr = VecDestroy(x_seq);CHKERRQ(ierr); 200 } else { 201 ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr); 202 } 203 } else { 204 ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr); 205 } 206 PetscFunctionReturn(0); 207 } 208 209 #undef __FUNCT__ 210 #define __FUNCT__ "MatLUFactorNumeric_SuperLU_DIST" 211 PetscErrorCode MatLUFactorNumeric_SuperLU_DIST(Mat A,MatFactorInfo *info,Mat *F) 212 { 213 Mat *tseq,A_seq = PETSC_NULL; 214 Mat_SeqAIJ *aa,*bb; 215 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)(*F)->spptr; 216 PetscErrorCode ierr; 217 PetscInt M=A->rmap.N,N=A->cmap.N,sinfo,i,*ai,*aj,*bi,*bj,nz,rstart,*garray, 218 m=A->rmap.n, irow,colA_start,j,jcol,jB,countA,countB,*bjj,*ajj; 219 PetscMPIInt size,rank; 220 SuperLUStat_t stat; 221 double *berr=0; 222 IS isrow; 223 PetscLogDouble time0,time,time_min,time_max; 224 Mat F_diag=PETSC_NULL; 225 #if defined(PETSC_USE_COMPLEX) 226 doublecomplex *av, *bv; 227 #else 228 double *av, *bv; 229 #endif 230 231 PetscFunctionBegin; 232 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 233 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 234 235 if (lu->options.PrintStat) { /* collect time for mat conversion */ 236 ierr = MPI_Barrier(A->comm);CHKERRQ(ierr); 237 ierr = PetscGetTime(&time0);CHKERRQ(ierr); 238 } 239 240 if (lu->MatInputMode == GLOBAL) { /* global mat input */ 241 if (size > 1) { /* convert mpi A to seq mat A */ 242 ierr = ISCreateStride(PETSC_COMM_SELF,M,0,1,&isrow);CHKERRQ(ierr); 243 ierr = MatGetSubMatrices(A,1,&isrow,&isrow,MAT_INITIAL_MATRIX,&tseq);CHKERRQ(ierr); 244 ierr = ISDestroy(isrow);CHKERRQ(ierr); 245 246 A_seq = *tseq; 247 ierr = PetscFree(tseq);CHKERRQ(ierr); 248 aa = (Mat_SeqAIJ*)A_seq->data; 249 } else { 250 aa = (Mat_SeqAIJ*)A->data; 251 } 252 253 /* Convert Petsc NR matrix to SuperLU_DIST NC. 254 Note: memories of lu->val, col and row are allocated by CompRow_to_CompCol_dist()! */ 255 if (lu->flg == SAME_NONZERO_PATTERN) {/* successive numeric factorization, sparsity pattern is reused. */ 256 Destroy_CompCol_Matrix_dist(&lu->A_sup); 257 Destroy_LU(N, &lu->grid, &lu->LUstruct); 258 lu->options.Fact = SamePattern; 259 } 260 #if defined(PETSC_USE_COMPLEX) 261 zCompRow_to_CompCol_dist(M,N,aa->nz,(doublecomplex*)aa->a,aa->j,aa->i,&lu->val,&lu->col, &lu->row); 262 #else 263 dCompRow_to_CompCol_dist(M,N,aa->nz,aa->a,aa->j,aa->i,&lu->val, &lu->col, &lu->row); 264 #endif 265 266 /* Create compressed column matrix A_sup. */ 267 #if defined(PETSC_USE_COMPLEX) 268 zCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_Z, SLU_GE); 269 #else 270 dCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_D, SLU_GE); 271 #endif 272 } else { /* distributed mat input */ 273 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 274 aa=(Mat_SeqAIJ*)(mat->A)->data; 275 bb=(Mat_SeqAIJ*)(mat->B)->data; 276 ai=aa->i; aj=aa->j; 277 bi=bb->i; bj=bb->j; 278 #if defined(PETSC_USE_COMPLEX) 279 av=(doublecomplex*)aa->a; 280 bv=(doublecomplex*)bb->a; 281 #else 282 av=aa->a; 283 bv=bb->a; 284 #endif 285 rstart = A->rmap.rstart; 286 nz = aa->nz + bb->nz; 287 garray = mat->garray; 288 289 if (lu->flg == DIFFERENT_NONZERO_PATTERN) {/* first numeric factorization */ 290 #if defined(PETSC_USE_COMPLEX) 291 zallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row); 292 #else 293 dallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row); 294 #endif 295 } else { /* successive numeric factorization, sparsity pattern and perm_c are reused. */ 296 /* Destroy_CompRowLoc_Matrix_dist(&lu->A_sup); */ /* crash! */ 297 Destroy_LU(N, &lu->grid, &lu->LUstruct); 298 lu->options.Fact = SamePattern; 299 } 300 nz = 0; irow = rstart; 301 for ( i=0; i<m; i++ ) { 302 lu->row[i] = nz; 303 countA = ai[i+1] - ai[i]; 304 countB = bi[i+1] - bi[i]; 305 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 306 bjj = bj + bi[i]; 307 308 /* B part, smaller col index */ 309 colA_start = rstart + ajj[0]; /* the smallest global col index of A */ 310 jB = 0; 311 for (j=0; j<countB; j++){ 312 jcol = garray[bjj[j]]; 313 if (jcol > colA_start) { 314 jB = j; 315 break; 316 } 317 lu->col[nz] = jcol; 318 lu->val[nz++] = *bv++; 319 if (j==countB-1) jB = countB; 320 } 321 322 /* A part */ 323 for (j=0; j<countA; j++){ 324 lu->col[nz] = rstart + ajj[j]; 325 lu->val[nz++] = *av++; 326 } 327 328 /* B part, larger col index */ 329 for (j=jB; j<countB; j++){ 330 lu->col[nz] = garray[bjj[j]]; 331 lu->val[nz++] = *bv++; 332 } 333 } 334 lu->row[m] = nz; 335 #if defined(PETSC_USE_COMPLEX) 336 zCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart, 337 lu->val, lu->col, lu->row, SLU_NR_loc, SLU_Z, SLU_GE); 338 #else 339 dCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart, 340 lu->val, lu->col, lu->row, SLU_NR_loc, SLU_D, SLU_GE); 341 #endif 342 } 343 if (lu->options.PrintStat) { 344 ierr = PetscGetTime(&time);CHKERRQ(ierr); 345 time0 = time - time0; 346 } 347 348 /* Factor the matrix. */ 349 PStatInit(&stat); /* Initialize the statistics variables. */ 350 351 if (lu->MatInputMode == GLOBAL) { /* global mat input */ 352 #if defined(PETSC_USE_COMPLEX) 353 pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, 354 &lu->grid, &lu->LUstruct, berr, &stat, &sinfo); 355 #else 356 pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, 357 &lu->grid, &lu->LUstruct, berr, &stat, &sinfo); 358 #endif 359 } else { /* distributed mat input */ 360 #if defined(PETSC_USE_COMPLEX) 361 pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid, 362 &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo); 363 if (sinfo) SETERRQ1(PETSC_ERR_LIB,"pzgssvx fails, info: %d\n",sinfo); 364 #else 365 pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid, 366 &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo); 367 if (sinfo) SETERRQ1(PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",sinfo); 368 #endif 369 } 370 371 if (lu->MatInputMode == GLOBAL && size > 1){ 372 ierr = MatDestroy(A_seq);CHKERRQ(ierr); 373 } 374 375 if (lu->options.PrintStat) { 376 if (size > 1){ 377 ierr = MPI_Reduce(&time0,&time_max,1,MPI_DOUBLE,MPI_MAX,0,A->comm); 378 ierr = MPI_Reduce(&time0,&time_min,1,MPI_DOUBLE,MPI_MIN,0,A->comm); 379 ierr = MPI_Reduce(&time0,&time,1,MPI_DOUBLE,MPI_SUM,0,A->comm); 380 time = time/size; /* average time */ 381 if (!rank) 382 ierr = PetscPrintf(PETSC_COMM_SELF, " Mat conversion(PETSc->SuperLU_DIST) time (max/min/avg): \n \ 383 %g / %g / %g\n",time_max,time_min,time); 384 } else { 385 ierr = PetscPrintf(PETSC_COMM_SELF, " Mat conversion(PETSc->SuperLU_DIST) time: \n \ 386 %g\n",time0); 387 } 388 389 PStatPrint(&lu->options, &stat, &lu->grid); /* Print the statistics. */ 390 } 391 PStatFree(&stat); 392 if (size > 1){ 393 F_diag = ((Mat_MPIAIJ *)(*F)->data)->A; 394 F_diag->assembled = PETSC_TRUE; 395 } 396 (*F)->assembled = PETSC_TRUE; 397 lu->flg = SAME_NONZERO_PATTERN; 398 PetscFunctionReturn(0); 399 } 400 401 /* Note the Petsc r and c permutations are ignored */ 402 #undef __FUNCT__ 403 #define __FUNCT__ "MatLUFactorSymbolic_SuperLU_DIST" 404 PetscErrorCode MatLUFactorSymbolic_SuperLU_DIST(Mat A,IS r,IS c,MatFactorInfo *info,Mat *F) 405 { 406 Mat B; 407 Mat_SuperLU_DIST *lu; 408 PetscErrorCode ierr; 409 PetscInt M=A->rmap.N,N=A->cmap.N,indx; 410 PetscMPIInt size; 411 superlu_options_t options; 412 PetscTruth flg; 413 const char *ptype[] = {"MMD_AT_PLUS_A","NATURAL","MMD_ATA"}; 414 const char *prtype[] = {"LargeDiag","NATURAL"}; 415 416 PetscFunctionBegin; 417 /* Create the factorization matrix */ 418 ierr = MatCreate(A->comm,&B);CHKERRQ(ierr); 419 ierr = MatSetSizes(B,A->rmap.n,A->cmap.n,M,N);CHKERRQ(ierr); 420 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 421 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL); 422 ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 423 424 B->ops->lufactornumeric = MatLUFactorNumeric_SuperLU_DIST; 425 B->ops->solve = MatSolve_SuperLU_DIST; 426 B->factor = FACTOR_LU; 427 428 lu = (Mat_SuperLU_DIST*)(B->spptr); 429 430 /* Set the input options */ 431 set_default_options_dist(&options); 432 433 lu->MatInputMode = GLOBAL; 434 ierr = MPI_Comm_dup(A->comm,&(lu->comm_superlu));CHKERRQ(ierr); 435 436 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 437 lu->nprow = size/2; /* Default process rows. */ 438 if (!lu->nprow) lu->nprow = 1; 439 lu->npcol = size/lu->nprow; /* Default process columns. */ 440 441 ierr = PetscOptionsBegin(A->comm,A->prefix,"SuperLU_Dist Options","Mat");CHKERRQ(ierr); 442 443 ierr = PetscOptionsInt("-mat_superlu_dist_r","Number rows in processor partition","None",lu->nprow,&lu->nprow,PETSC_NULL);CHKERRQ(ierr); 444 ierr = PetscOptionsInt("-mat_superlu_dist_c","Number columns in processor partition","None",lu->npcol,&lu->npcol,PETSC_NULL);CHKERRQ(ierr); 445 if (size != lu->nprow * lu->npcol) SETERRQ(PETSC_ERR_ARG_SIZ,"Number of processes should be equal to nprow*npcol"); 446 447 ierr = PetscOptionsInt("-mat_superlu_dist_matinput","Matrix input mode (0: GLOBAL; 1: DISTRIBUTED)","None",lu->MatInputMode,&lu->MatInputMode,PETSC_NULL);CHKERRQ(ierr); 448 if(lu->MatInputMode == DISTRIBUTED && size == 1) lu->MatInputMode = GLOBAL; 449 450 ierr = PetscOptionsTruth("-mat_superlu_dist_equil","Equilibrate matrix","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr); 451 if (!flg) { 452 options.Equil = NO; 453 } 454 455 ierr = PetscOptionsEList("-mat_superlu_dist_rowperm","Row permutation","None",prtype,2,prtype[0],&indx,&flg);CHKERRQ(ierr); 456 if (flg) { 457 switch (indx) { 458 case 0: 459 options.RowPerm = LargeDiag; 460 break; 461 case 1: 462 options.RowPerm = NOROWPERM; 463 break; 464 } 465 } 466 467 ierr = PetscOptionsEList("-mat_superlu_dist_colperm","Column permutation","None",ptype,4,ptype[0],&indx,&flg);CHKERRQ(ierr); 468 if (flg) { 469 switch (indx) { 470 case 0: 471 options.ColPerm = MMD_AT_PLUS_A; 472 break; 473 case 1: 474 options.ColPerm = NATURAL; 475 break; 476 case 2: 477 options.ColPerm = MMD_ATA; 478 break; 479 } 480 } 481 482 ierr = PetscOptionsTruth("-mat_superlu_dist_replacetinypivot","Replace tiny pivots","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr); 483 if (!flg) { 484 options.ReplaceTinyPivot = NO; 485 } 486 487 options.IterRefine = NOREFINE; 488 ierr = PetscOptionsTruth("-mat_superlu_dist_iterrefine","Use iterative refinement","None",PETSC_FALSE,&flg,0);CHKERRQ(ierr); 489 if (flg) { 490 options.IterRefine = DOUBLE; 491 } 492 493 if (PetscLogPrintInfo) { 494 options.PrintStat = YES; 495 } else { 496 options.PrintStat = NO; 497 } 498 ierr = PetscOptionsTruth("-mat_superlu_dist_statprint","Print factorization information","None", 499 (PetscTruth)options.PrintStat,(PetscTruth*)&options.PrintStat,0);CHKERRQ(ierr); 500 PetscOptionsEnd(); 501 502 /* Initialize the SuperLU process grid. */ 503 superlu_gridinit(lu->comm_superlu, lu->nprow, lu->npcol, &lu->grid); 504 505 /* Initialize ScalePermstruct and LUstruct. */ 506 ScalePermstructInit(M, N, &lu->ScalePermstruct); 507 LUstructInit(M, N, &lu->LUstruct); 508 509 lu->options = options; 510 lu->flg = DIFFERENT_NONZERO_PATTERN; 511 lu->CleanUpSuperLU_Dist = PETSC_TRUE; 512 *F = B; 513 514 PetscFunctionReturn(0); 515 } 516 517 #undef __FUNCT__ 518 #define __FUNCT__ "MatAssemblyEnd_SuperLU_DIST" 519 PetscErrorCode MatAssemblyEnd_SuperLU_DIST(Mat A,MatAssemblyType mode) { 520 PetscErrorCode ierr; 521 Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST*)(A->spptr); 522 523 PetscFunctionBegin; 524 ierr = (*lu->MatAssemblyEnd)(A,mode);CHKERRQ(ierr); 525 lu->MatLUFactorSymbolic = A->ops->lufactorsymbolic; 526 A->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST; 527 PetscFunctionReturn(0); 528 } 529 530 #undef __FUNCT__ 531 #define __FUNCT__ "MatFactorInfo_SuperLU_DIST" 532 PetscErrorCode MatFactorInfo_SuperLU_DIST(Mat A,PetscViewer viewer) 533 { 534 Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST*)A->spptr; 535 superlu_options_t options; 536 PetscErrorCode ierr; 537 538 PetscFunctionBegin; 539 /* check if matrix is superlu_dist type */ 540 if (A->ops->solve != MatSolve_SuperLU_DIST) PetscFunctionReturn(0); 541 542 options = lu->options; 543 ierr = PetscViewerASCIIPrintf(viewer,"SuperLU_DIST run parameters:\n");CHKERRQ(ierr); 544 ierr = PetscViewerASCIIPrintf(viewer," Equilibrate matrix %s \n",PetscTruths[options.Equil != NO]);CHKERRQ(ierr); 545 ierr = PetscViewerASCIIPrintf(viewer," Matrix input mode %d \n",lu->MatInputMode);CHKERRQ(ierr); 546 ierr = PetscViewerASCIIPrintf(viewer," Replace tiny pivots %s \n",PetscTruths[options.ReplaceTinyPivot != NO]);CHKERRQ(ierr); 547 ierr = PetscViewerASCIIPrintf(viewer," Use iterative refinement %s \n",PetscTruths[options.IterRefine == DOUBLE]);CHKERRQ(ierr); 548 ierr = PetscViewerASCIIPrintf(viewer," Processors in row %d col partition %d \n",lu->nprow,lu->npcol);CHKERRQ(ierr); 549 ierr = PetscViewerASCIIPrintf(viewer," Row permutation %s \n",(options.RowPerm == NOROWPERM) ? "NATURAL": "LargeDiag");CHKERRQ(ierr); 550 if (options.ColPerm == NATURAL) { 551 ierr = PetscViewerASCIIPrintf(viewer," Column permutation NATURAL\n");CHKERRQ(ierr); 552 } else if (options.ColPerm == MMD_AT_PLUS_A) { 553 ierr = PetscViewerASCIIPrintf(viewer," Column permutation MMD_AT_PLUS_A\n");CHKERRQ(ierr); 554 } else if (options.ColPerm == MMD_ATA) { 555 ierr = PetscViewerASCIIPrintf(viewer," Column permutation MMD_ATA\n");CHKERRQ(ierr); 556 } else { 557 SETERRQ(PETSC_ERR_ARG_WRONG,"Unknown column permutation"); 558 } 559 PetscFunctionReturn(0); 560 } 561 562 #undef __FUNCT__ 563 #define __FUNCT__ "MatView_SuperLU_DIST" 564 PetscErrorCode MatView_SuperLU_DIST(Mat A,PetscViewer viewer) 565 { 566 PetscErrorCode ierr; 567 PetscTruth iascii; 568 PetscViewerFormat format; 569 Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST*)(A->spptr); 570 571 PetscFunctionBegin; 572 ierr = (*lu->MatView)(A,viewer);CHKERRQ(ierr); 573 574 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 575 if (iascii) { 576 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 577 if (format == PETSC_VIEWER_ASCII_INFO) { 578 ierr = MatFactorInfo_SuperLU_DIST(A,viewer);CHKERRQ(ierr); 579 } 580 } 581 PetscFunctionReturn(0); 582 } 583 584 585 EXTERN_C_BEGIN 586 #undef __FUNCT__ 587 #define __FUNCT__ "MatConvert_Base_SuperLU_DIST" 588 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_Base_SuperLU_DIST(Mat A,MatType type,MatReuse reuse,Mat *newmat) 589 { 590 /* This routine is only called to convert to MATSUPERLU_DIST */ 591 /* from MATSEQAIJ if A has a single process communicator */ 592 /* or MATMPIAIJ otherwise, so we will ignore 'MatType type'. */ 593 PetscErrorCode ierr; 594 PetscMPIInt size; 595 MPI_Comm comm; 596 Mat B=*newmat; 597 Mat_SuperLU_DIST *lu; 598 599 PetscFunctionBegin; 600 if (reuse == MAT_INITIAL_MATRIX) { 601 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 602 } 603 604 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 605 ierr = PetscNew(Mat_SuperLU_DIST,&lu);CHKERRQ(ierr); 606 607 lu->MatDuplicate = A->ops->duplicate; 608 lu->MatView = A->ops->view; 609 lu->MatAssemblyEnd = A->ops->assemblyend; 610 lu->MatLUFactorSymbolic = A->ops->lufactorsymbolic; 611 lu->MatDestroy = A->ops->destroy; 612 lu->CleanUpSuperLU_Dist = PETSC_FALSE; 613 614 B->spptr = (void*)lu; 615 B->ops->duplicate = MatDuplicate_SuperLU_DIST; 616 B->ops->view = MatView_SuperLU_DIST; 617 B->ops->assemblyend = MatAssemblyEnd_SuperLU_DIST; 618 B->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST; 619 B->ops->destroy = MatDestroy_SuperLU_DIST; 620 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);CHKERRQ(ierr); 621 if (size == 1) { 622 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqaij_superlu_dist_C", 623 "MatConvert_Base_SuperLU_DIST",MatConvert_Base_SuperLU_DIST);CHKERRQ(ierr); 624 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_superlu_dist_seqaij_C", 625 "MatConvert_SuperLU_DIST_Base",MatConvert_SuperLU_DIST_Base);CHKERRQ(ierr); 626 } else { 627 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_superlu_dist_C", 628 "MatConvert_Base_SuperLU_DIST",MatConvert_Base_SuperLU_DIST);CHKERRQ(ierr); 629 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_superlu_dist_mpiaij_C", 630 "MatConvert_SuperLU_DIST_Base",MatConvert_SuperLU_DIST_Base);CHKERRQ(ierr); 631 } 632 ierr = PetscInfo(0,"Using SuperLU_DIST for SeqAIJ LU factorization and solves.\n");CHKERRQ(ierr); 633 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSUPERLU_DIST);CHKERRQ(ierr); 634 *newmat = B; 635 PetscFunctionReturn(0); 636 } 637 EXTERN_C_END 638 639 #undef __FUNCT__ 640 #define __FUNCT__ "MatDuplicate_SuperLU_DIST" 641 PetscErrorCode MatDuplicate_SuperLU_DIST(Mat A, MatDuplicateOption op, Mat *M) { 642 PetscErrorCode ierr; 643 Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST *)A->spptr; 644 645 PetscFunctionBegin; 646 ierr = (*lu->MatDuplicate)(A,op,M);CHKERRQ(ierr); 647 ierr = PetscMemcpy((*M)->spptr,lu,sizeof(Mat_SuperLU_DIST));CHKERRQ(ierr); 648 PetscFunctionReturn(0); 649 } 650 651 /*MC 652 MATSUPERLU_DIST - MATSUPERLU_DIST = "superlu_dist" - A matrix type providing direct solvers (LU) for parallel matrices 653 via the external package SuperLU_DIST. 654 655 If SuperLU_DIST is installed (see the manual for 656 instructions on how to declare the existence of external packages), 657 a matrix type can be constructed which invokes SuperLU_DIST solvers. 658 After calling MatCreate(...,A), simply call MatSetType(A,MATSUPERLU_DIST). 659 660 This matrix inherits from MATSEQAIJ when constructed with a single process communicator, 661 and from MATMPIAIJ otherwise. As a result, for single process communicators, 662 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 663 for communicators controlling multiple processes. It is recommended that you call both of 664 the above preallocation routines for simplicity. One can also call MatConvert for an inplace 665 conversion to or from the MATSEQAIJ or MATMPIAIJ type (depending on the communicator size) 666 without data copy. 667 668 Options Database Keys: 669 + -mat_type superlu_dist - sets the matrix type to "superlu_dist" during a call to MatSetFromOptions() 670 . -mat_superlu_dist_r <n> - number of rows in processor partition 671 . -mat_superlu_dist_c <n> - number of columns in processor partition 672 . -mat_superlu_dist_matinput <0,1> - matrix input mode; 0=global, 1=distributed 673 . -mat_superlu_dist_equil - equilibrate the matrix 674 . -mat_superlu_dist_rowperm <LargeDiag,NATURAL> - row permutation 675 . -mat_superlu_dist_colperm <MMD_AT_PLUS_A,MMD_ATA,NATURAL> - column permutation 676 . -mat_superlu_dist_replacetinypivot - replace tiny pivots 677 . -mat_superlu_dist_iterrefine - use iterative refinement 678 - -mat_superlu_dist_statprint - print factorization information 679 680 Level: beginner 681 682 .seealso: PCLU 683 M*/ 684 685 EXTERN_C_BEGIN 686 #undef __FUNCT__ 687 #define __FUNCT__ "MatCreate_SuperLU_DIST" 688 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SuperLU_DIST(Mat A) 689 { 690 PetscErrorCode ierr; 691 PetscMPIInt size; 692 693 PetscFunctionBegin; 694 /* Change type name before calling MatSetType to force proper construction of SeqAIJ or MPIAIJ */ 695 /* and SuperLU_DIST types */ 696 ierr = PetscObjectChangeTypeName((PetscObject)A,MATSUPERLU_DIST);CHKERRQ(ierr); 697 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 698 if (size == 1) { 699 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 700 } else { 701 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 702 /* A_diag = 0x0 ??? -- do we need it? 703 Mat A_diag = ((Mat_MPIAIJ *)A->data)->A; 704 ierr = MatConvert_Base_SuperLU_DIST(A_diag,MATSUPERLU_DIST,MAT_REUSE_MATRIX,&A_diag);CHKERRQ(ierr); 705 */ 706 } 707 ierr = MatConvert_Base_SuperLU_DIST(A,MATSUPERLU_DIST,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr); 708 PetscFunctionReturn(0); 709 } 710 EXTERN_C_END 711 712