1 #define PETSCMAT_DLL 2 3 /* 4 Provides an interface to the SuperLU_DIST_2.0 sparse solver 5 */ 6 7 #include "src/mat/impls/aij/seq/aij.h" 8 #include "src/mat/impls/aij/mpi/mpiaij.h" 9 #if defined(PETSC_HAVE_STDLIB_H) /* This is to get arround weird problem with SuperLU on cray */ 10 #include "stdlib.h" 11 #endif 12 13 EXTERN_C_BEGIN 14 #if defined(PETSC_USE_COMPLEX) 15 #include "superlu_zdefs.h" 16 #else 17 #include "superlu_ddefs.h" 18 #endif 19 EXTERN_C_END 20 21 typedef enum { GLOBAL,DISTRIBUTED 22 } SuperLU_MatInputMode; 23 24 typedef struct { 25 int_t nprow,npcol,*row,*col; 26 gridinfo_t grid; 27 superlu_options_t options; 28 SuperMatrix A_sup; 29 ScalePermstruct_t ScalePermstruct; 30 LUstruct_t LUstruct; 31 int StatPrint; 32 int MatInputMode; 33 SOLVEstruct_t SOLVEstruct; 34 MatStructure flg; 35 MPI_Comm comm_superlu; 36 #if defined(PETSC_USE_COMPLEX) 37 doublecomplex *val; 38 #else 39 double *val; 40 #endif 41 42 /* A few function pointers for inheritance */ 43 PetscErrorCode (*MatDuplicate)(Mat,MatDuplicateOption,Mat*); 44 PetscErrorCode (*MatView)(Mat,PetscViewer); 45 PetscErrorCode (*MatAssemblyEnd)(Mat,MatAssemblyType); 46 PetscErrorCode (*MatLUFactorSymbolic)(Mat,IS,IS,MatFactorInfo*,Mat*); 47 PetscErrorCode (*MatDestroy)(Mat); 48 49 /* Flag to clean up (non-global) SuperLU objects during Destroy */ 50 PetscTruth CleanUpSuperLU_Dist; 51 } Mat_SuperLU_DIST; 52 53 EXTERN PetscErrorCode MatDuplicate_SuperLU_DIST(Mat,MatDuplicateOption,Mat*); 54 55 EXTERN_C_BEGIN 56 #undef __FUNCT__ 57 #define __FUNCT__ "MatConvert_SuperLU_DIST_Base" 58 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SuperLU_DIST_Base(Mat A,MatType type,MatReuse reuse,Mat *newmat) 59 { 60 PetscErrorCode ierr; 61 Mat B=*newmat; 62 Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST *)A->spptr; 63 64 PetscFunctionBegin; 65 if (reuse == MAT_INITIAL_MATRIX) { 66 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 67 } 68 /* Reset the original function pointers */ 69 B->ops->duplicate = lu->MatDuplicate; 70 B->ops->view = lu->MatView; 71 B->ops->assemblyend = lu->MatAssemblyEnd; 72 B->ops->lufactorsymbolic = lu->MatLUFactorSymbolic; 73 B->ops->destroy = lu->MatDestroy; 74 75 ierr = PetscFree(lu);CHKERRQ(ierr); 76 77 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaij_superlu_dist_C","",PETSC_NULL);CHKERRQ(ierr); 78 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_superlu_dist_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 79 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_superlu_dist_C","",PETSC_NULL);CHKERRQ(ierr); 80 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_superlu_dist_mpiaij_C","",PETSC_NULL);CHKERRQ(ierr); 81 82 ierr = PetscObjectChangeTypeName((PetscObject)B,type);CHKERRQ(ierr); 83 *newmat = B; 84 PetscFunctionReturn(0); 85 } 86 EXTERN_C_END 87 88 #undef __FUNCT__ 89 #define __FUNCT__ "MatDestroy_SuperLU_DIST" 90 PetscErrorCode MatDestroy_SuperLU_DIST(Mat A) 91 { 92 PetscErrorCode ierr; 93 PetscMPIInt size; 94 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr; 95 96 PetscFunctionBegin; 97 if (lu->CleanUpSuperLU_Dist) { 98 /* Deallocate SuperLU_DIST storage */ 99 if (lu->MatInputMode == GLOBAL) { 100 Destroy_CompCol_Matrix_dist(&lu->A_sup); 101 } else { 102 Destroy_CompRowLoc_Matrix_dist(&lu->A_sup); 103 if ( lu->options.SolveInitialized ) { 104 #if defined(PETSC_USE_COMPLEX) 105 zSolveFinalize(&lu->options, &lu->SOLVEstruct); 106 #else 107 dSolveFinalize(&lu->options, &lu->SOLVEstruct); 108 #endif 109 } 110 } 111 Destroy_LU(A->cmap.N, &lu->grid, &lu->LUstruct); 112 ScalePermstructFree(&lu->ScalePermstruct); 113 LUstructFree(&lu->LUstruct); 114 115 /* Release the SuperLU_DIST process grid. */ 116 superlu_gridexit(&lu->grid); 117 118 ierr = MPI_Comm_free(&(lu->comm_superlu));CHKERRQ(ierr); 119 } 120 121 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 122 if (size == 1) { 123 ierr = MatConvert_SuperLU_DIST_Base(A,MATSEQAIJ,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr); 124 } else { 125 ierr = MatConvert_SuperLU_DIST_Base(A,MATMPIAIJ,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr); 126 } 127 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 128 PetscFunctionReturn(0); 129 } 130 131 #undef __FUNCT__ 132 #define __FUNCT__ "MatSolve_SuperLU_DIST" 133 PetscErrorCode MatSolve_SuperLU_DIST(Mat A,Vec b_mpi,Vec x) 134 { 135 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr; 136 PetscErrorCode ierr; 137 PetscMPIInt size; 138 PetscInt m=A->rmap.N, N=A->cmap.N; 139 SuperLUStat_t stat; 140 double berr[1]; 141 PetscScalar *bptr; 142 PetscInt info, nrhs=1; 143 Vec x_seq; 144 IS iden; 145 VecScatter scat; 146 147 PetscFunctionBegin; 148 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 149 if (size > 1) { 150 if (lu->MatInputMode == GLOBAL) { /* global mat input, convert b to x_seq */ 151 ierr = VecCreateSeq(PETSC_COMM_SELF,N,&x_seq);CHKERRQ(ierr); 152 ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iden);CHKERRQ(ierr); 153 ierr = VecScatterCreate(b_mpi,iden,x_seq,iden,&scat);CHKERRQ(ierr); 154 ierr = ISDestroy(iden);CHKERRQ(ierr); 155 156 ierr = VecScatterBegin(b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat);CHKERRQ(ierr); 157 ierr = VecScatterEnd(b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat);CHKERRQ(ierr); 158 ierr = VecGetArray(x_seq,&bptr);CHKERRQ(ierr); 159 } else { /* distributed mat input */ 160 ierr = VecCopy(b_mpi,x);CHKERRQ(ierr); 161 ierr = VecGetArray(x,&bptr);CHKERRQ(ierr); 162 } 163 } else { /* size == 1 */ 164 ierr = VecCopy(b_mpi,x);CHKERRQ(ierr); 165 ierr = VecGetArray(x,&bptr);CHKERRQ(ierr); 166 } 167 168 lu->options.Fact = FACTORED; /* The factored form of A is supplied. Local option used by this func. only.*/ 169 170 PStatInit(&stat); /* Initialize the statistics variables. */ 171 if (lu->MatInputMode == GLOBAL) { 172 #if defined(PETSC_USE_COMPLEX) 173 pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,(doublecomplex*)bptr, m, nrhs, 174 &lu->grid, &lu->LUstruct, berr, &stat, &info); 175 #else 176 pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,bptr, m, nrhs, 177 &lu->grid, &lu->LUstruct, berr, &stat, &info); 178 #endif 179 } else { /* distributed mat input */ 180 #if defined(PETSC_USE_COMPLEX) 181 pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, (doublecomplex*)bptr, A->rmap.N, nrhs, &lu->grid, 182 &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info); 183 if (info) SETERRQ1(PETSC_ERR_LIB,"pzgssvx fails, info: %d\n",info); 184 #else 185 pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, bptr, A->rmap.N, nrhs, &lu->grid, 186 &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info); 187 if (info) SETERRQ1(PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",info); 188 #endif 189 } 190 if (lu->options.PrintStat) { 191 PStatPrint(&lu->options, &stat, &lu->grid); /* Print the statistics. */ 192 } 193 PStatFree(&stat); 194 195 if (size > 1) { 196 if (lu->MatInputMode == GLOBAL){ /* convert seq x to mpi x */ 197 ierr = VecRestoreArray(x_seq,&bptr);CHKERRQ(ierr); 198 ierr = VecScatterBegin(x_seq,x,INSERT_VALUES,SCATTER_REVERSE,scat);CHKERRQ(ierr); 199 ierr = VecScatterEnd(x_seq,x,INSERT_VALUES,SCATTER_REVERSE,scat);CHKERRQ(ierr); 200 ierr = VecScatterDestroy(scat);CHKERRQ(ierr); 201 ierr = VecDestroy(x_seq);CHKERRQ(ierr); 202 } else { 203 ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr); 204 } 205 } else { 206 ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr); 207 } 208 PetscFunctionReturn(0); 209 } 210 211 #undef __FUNCT__ 212 #define __FUNCT__ "MatLUFactorNumeric_SuperLU_DIST" 213 PetscErrorCode MatLUFactorNumeric_SuperLU_DIST(Mat A,MatFactorInfo *info,Mat *F) 214 { 215 Mat *tseq,A_seq = PETSC_NULL; 216 Mat_SeqAIJ *aa,*bb; 217 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)(*F)->spptr; 218 PetscErrorCode ierr; 219 PetscInt M=A->rmap.N,N=A->cmap.N,sinfo,i,*ai,*aj,*bi,*bj,nz,rstart,*garray, 220 m=A->rmap.n, irow,colA_start,j,jcol,jB,countA,countB,*bjj,*ajj; 221 PetscMPIInt size,rank; 222 SuperLUStat_t stat; 223 double *berr=0; 224 IS isrow; 225 PetscLogDouble time0,time,time_min,time_max; 226 Mat F_diag=PETSC_NULL; 227 #if defined(PETSC_USE_COMPLEX) 228 doublecomplex *av, *bv; 229 #else 230 double *av, *bv; 231 #endif 232 233 PetscFunctionBegin; 234 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 235 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 236 237 if (lu->options.PrintStat) { /* collect time for mat conversion */ 238 ierr = MPI_Barrier(A->comm);CHKERRQ(ierr); 239 ierr = PetscGetTime(&time0);CHKERRQ(ierr); 240 } 241 242 if (lu->MatInputMode == GLOBAL) { /* global mat input */ 243 if (size > 1) { /* convert mpi A to seq mat A */ 244 ierr = ISCreateStride(PETSC_COMM_SELF,M,0,1,&isrow);CHKERRQ(ierr); 245 ierr = MatGetSubMatrices(A,1,&isrow,&isrow,MAT_INITIAL_MATRIX,&tseq);CHKERRQ(ierr); 246 ierr = ISDestroy(isrow);CHKERRQ(ierr); 247 248 A_seq = *tseq; 249 ierr = PetscFree(tseq);CHKERRQ(ierr); 250 aa = (Mat_SeqAIJ*)A_seq->data; 251 } else { 252 aa = (Mat_SeqAIJ*)A->data; 253 } 254 255 /* Convert Petsc NR matrix to SuperLU_DIST NC. 256 Note: memories of lu->val, col and row are allocated by CompRow_to_CompCol_dist()! */ 257 if (lu->flg == SAME_NONZERO_PATTERN) {/* successive numeric factorization, sparsity pattern is reused. */ 258 Destroy_CompCol_Matrix_dist(&lu->A_sup); 259 Destroy_LU(N, &lu->grid, &lu->LUstruct); 260 lu->options.Fact = SamePattern; 261 } 262 #if defined(PETSC_USE_COMPLEX) 263 zCompRow_to_CompCol_dist(M,N,aa->nz,(doublecomplex*)aa->a,aa->j,aa->i,&lu->val,&lu->col, &lu->row); 264 #else 265 dCompRow_to_CompCol_dist(M,N,aa->nz,aa->a,aa->j,aa->i,&lu->val, &lu->col, &lu->row); 266 #endif 267 268 /* Create compressed column matrix A_sup. */ 269 #if defined(PETSC_USE_COMPLEX) 270 zCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_Z, SLU_GE); 271 #else 272 dCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_D, SLU_GE); 273 #endif 274 } else { /* distributed mat input */ 275 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 276 aa=(Mat_SeqAIJ*)(mat->A)->data; 277 bb=(Mat_SeqAIJ*)(mat->B)->data; 278 ai=aa->i; aj=aa->j; 279 bi=bb->i; bj=bb->j; 280 #if defined(PETSC_USE_COMPLEX) 281 av=(doublecomplex*)aa->a; 282 bv=(doublecomplex*)bb->a; 283 #else 284 av=aa->a; 285 bv=bb->a; 286 #endif 287 rstart = A->rmap.rstart; 288 nz = aa->nz + bb->nz; 289 garray = mat->garray; 290 291 if (lu->flg == DIFFERENT_NONZERO_PATTERN) {/* first numeric factorization */ 292 #if defined(PETSC_USE_COMPLEX) 293 zallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row); 294 #else 295 dallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row); 296 #endif 297 } else { /* successive numeric factorization, sparsity pattern and perm_c are reused. */ 298 /* Destroy_CompRowLoc_Matrix_dist(&lu->A_sup); */ /* crash! */ 299 Destroy_LU(N, &lu->grid, &lu->LUstruct); 300 lu->options.Fact = SamePattern; 301 } 302 nz = 0; irow = rstart; 303 for ( i=0; i<m; i++ ) { 304 lu->row[i] = nz; 305 countA = ai[i+1] - ai[i]; 306 countB = bi[i+1] - bi[i]; 307 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 308 bjj = bj + bi[i]; 309 310 /* B part, smaller col index */ 311 colA_start = rstart + ajj[0]; /* the smallest global col index of A */ 312 jB = 0; 313 for (j=0; j<countB; j++){ 314 jcol = garray[bjj[j]]; 315 if (jcol > colA_start) { 316 jB = j; 317 break; 318 } 319 lu->col[nz] = jcol; 320 lu->val[nz++] = *bv++; 321 if (j==countB-1) jB = countB; 322 } 323 324 /* A part */ 325 for (j=0; j<countA; j++){ 326 lu->col[nz] = rstart + ajj[j]; 327 lu->val[nz++] = *av++; 328 } 329 330 /* B part, larger col index */ 331 for (j=jB; j<countB; j++){ 332 lu->col[nz] = garray[bjj[j]]; 333 lu->val[nz++] = *bv++; 334 } 335 } 336 lu->row[m] = nz; 337 #if defined(PETSC_USE_COMPLEX) 338 zCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart, 339 lu->val, lu->col, lu->row, SLU_NR_loc, SLU_Z, SLU_GE); 340 #else 341 dCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart, 342 lu->val, lu->col, lu->row, SLU_NR_loc, SLU_D, SLU_GE); 343 #endif 344 } 345 if (lu->options.PrintStat) { 346 ierr = PetscGetTime(&time);CHKERRQ(ierr); 347 time0 = time - time0; 348 } 349 350 /* Factor the matrix. */ 351 PStatInit(&stat); /* Initialize the statistics variables. */ 352 353 if (lu->MatInputMode == GLOBAL) { /* global mat input */ 354 #if defined(PETSC_USE_COMPLEX) 355 pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, 356 &lu->grid, &lu->LUstruct, berr, &stat, &sinfo); 357 #else 358 pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, 359 &lu->grid, &lu->LUstruct, berr, &stat, &sinfo); 360 #endif 361 } else { /* distributed mat input */ 362 #if defined(PETSC_USE_COMPLEX) 363 pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid, 364 &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo); 365 if (sinfo) SETERRQ1(PETSC_ERR_LIB,"pzgssvx fails, info: %d\n",sinfo); 366 #else 367 pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid, 368 &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo); 369 if (sinfo) SETERRQ1(PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",sinfo); 370 #endif 371 } 372 373 if (lu->MatInputMode == GLOBAL && size > 1){ 374 ierr = MatDestroy(A_seq);CHKERRQ(ierr); 375 } 376 377 if (lu->options.PrintStat) { 378 if (size > 1){ 379 ierr = MPI_Reduce(&time0,&time_max,1,MPI_DOUBLE,MPI_MAX,0,A->comm); 380 ierr = MPI_Reduce(&time0,&time_min,1,MPI_DOUBLE,MPI_MIN,0,A->comm); 381 ierr = MPI_Reduce(&time0,&time,1,MPI_DOUBLE,MPI_SUM,0,A->comm); 382 time = time/size; /* average time */ 383 if (!rank) 384 ierr = PetscPrintf(PETSC_COMM_SELF, " Mat conversion(PETSc->SuperLU_DIST) time (max/min/avg): \n \ 385 %g / %g / %g\n",time_max,time_min,time); 386 } else { 387 ierr = PetscPrintf(PETSC_COMM_SELF, " Mat conversion(PETSc->SuperLU_DIST) time: \n \ 388 %g\n",time0); 389 } 390 391 PStatPrint(&lu->options, &stat, &lu->grid); /* Print the statistics. */ 392 } 393 PStatFree(&stat); 394 if (size > 1){ 395 F_diag = ((Mat_MPIAIJ *)(*F)->data)->A; 396 F_diag->assembled = PETSC_TRUE; 397 } 398 (*F)->assembled = PETSC_TRUE; 399 lu->flg = SAME_NONZERO_PATTERN; 400 PetscFunctionReturn(0); 401 } 402 403 /* Note the Petsc r and c permutations are ignored */ 404 #undef __FUNCT__ 405 #define __FUNCT__ "MatLUFactorSymbolic_SuperLU_DIST" 406 PetscErrorCode MatLUFactorSymbolic_SuperLU_DIST(Mat A,IS r,IS c,MatFactorInfo *info,Mat *F) 407 { 408 Mat B; 409 Mat_SuperLU_DIST *lu; 410 PetscErrorCode ierr; 411 PetscInt M=A->rmap.N,N=A->cmap.N,indx; 412 PetscMPIInt size; 413 superlu_options_t options; 414 PetscTruth flg; 415 const char *ptype[] = {"MMD_AT_PLUS_A","NATURAL","MMD_ATA"}; 416 const char *prtype[] = {"LargeDiag","NATURAL"}; 417 418 PetscFunctionBegin; 419 /* Create the factorization matrix */ 420 ierr = MatCreate(A->comm,&B);CHKERRQ(ierr); 421 ierr = MatSetSizes(B,A->rmap.n,A->cmap.n,M,N);CHKERRQ(ierr); 422 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 423 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL); 424 ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 425 426 B->ops->lufactornumeric = MatLUFactorNumeric_SuperLU_DIST; 427 B->ops->solve = MatSolve_SuperLU_DIST; 428 B->factor = FACTOR_LU; 429 430 lu = (Mat_SuperLU_DIST*)(B->spptr); 431 432 /* Set the input options */ 433 set_default_options_dist(&options); 434 435 lu->MatInputMode = GLOBAL; 436 ierr = MPI_Comm_dup(A->comm,&(lu->comm_superlu));CHKERRQ(ierr); 437 438 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 439 lu->nprow = size/2; /* Default process rows. */ 440 if (!lu->nprow) lu->nprow = 1; 441 lu->npcol = size/lu->nprow; /* Default process columns. */ 442 443 ierr = PetscOptionsBegin(A->comm,A->prefix,"SuperLU_Dist Options","Mat");CHKERRQ(ierr); 444 445 ierr = PetscOptionsInt("-mat_superlu_dist_r","Number rows in processor partition","None",lu->nprow,&lu->nprow,PETSC_NULL);CHKERRQ(ierr); 446 ierr = PetscOptionsInt("-mat_superlu_dist_c","Number columns in processor partition","None",lu->npcol,&lu->npcol,PETSC_NULL);CHKERRQ(ierr); 447 if (size != lu->nprow * lu->npcol) SETERRQ(PETSC_ERR_ARG_SIZ,"Number of processes should be equal to nprow*npcol"); 448 449 ierr = PetscOptionsInt("-mat_superlu_dist_matinput","Matrix input mode (0: GLOBAL; 1: DISTRIBUTED)","None",lu->MatInputMode,&lu->MatInputMode,PETSC_NULL);CHKERRQ(ierr); 450 if(lu->MatInputMode == DISTRIBUTED && size == 1) lu->MatInputMode = GLOBAL; 451 452 ierr = PetscOptionsTruth("-mat_superlu_dist_equil","Equilibrate matrix","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr); 453 if (!flg) { 454 options.Equil = NO; 455 } 456 457 ierr = PetscOptionsEList("-mat_superlu_dist_rowperm","Row permutation","None",prtype,2,prtype[0],&indx,&flg);CHKERRQ(ierr); 458 if (flg) { 459 switch (indx) { 460 case 0: 461 options.RowPerm = LargeDiag; 462 break; 463 case 1: 464 options.RowPerm = NOROWPERM; 465 break; 466 } 467 } 468 469 ierr = PetscOptionsEList("-mat_superlu_dist_colperm","Column permutation","None",ptype,4,ptype[0],&indx,&flg);CHKERRQ(ierr); 470 if (flg) { 471 switch (indx) { 472 case 0: 473 options.ColPerm = MMD_AT_PLUS_A; 474 break; 475 case 1: 476 options.ColPerm = NATURAL; 477 break; 478 case 2: 479 options.ColPerm = MMD_ATA; 480 break; 481 } 482 } 483 484 ierr = PetscOptionsTruth("-mat_superlu_dist_replacetinypivot","Replace tiny pivots","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr); 485 if (!flg) { 486 options.ReplaceTinyPivot = NO; 487 } 488 489 options.IterRefine = NOREFINE; 490 ierr = PetscOptionsTruth("-mat_superlu_dist_iterrefine","Use iterative refinement","None",PETSC_FALSE,&flg,0);CHKERRQ(ierr); 491 if (flg) { 492 options.IterRefine = DOUBLE; 493 } 494 495 if (PetscLogPrintInfo) { 496 options.PrintStat = YES; 497 } else { 498 options.PrintStat = NO; 499 } 500 ierr = PetscOptionsTruth("-mat_superlu_dist_statprint","Print factorization information","None", 501 (PetscTruth)options.PrintStat,(PetscTruth*)&options.PrintStat,0);CHKERRQ(ierr); 502 PetscOptionsEnd(); 503 504 /* Initialize the SuperLU process grid. */ 505 superlu_gridinit(lu->comm_superlu, lu->nprow, lu->npcol, &lu->grid); 506 507 /* Initialize ScalePermstruct and LUstruct. */ 508 ScalePermstructInit(M, N, &lu->ScalePermstruct); 509 LUstructInit(M, N, &lu->LUstruct); 510 511 lu->options = options; 512 lu->flg = DIFFERENT_NONZERO_PATTERN; 513 lu->CleanUpSuperLU_Dist = PETSC_TRUE; 514 *F = B; 515 516 PetscFunctionReturn(0); 517 } 518 519 #undef __FUNCT__ 520 #define __FUNCT__ "MatAssemblyEnd_SuperLU_DIST" 521 PetscErrorCode MatAssemblyEnd_SuperLU_DIST(Mat A,MatAssemblyType mode) { 522 PetscErrorCode ierr; 523 Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST*)(A->spptr); 524 525 PetscFunctionBegin; 526 ierr = (*lu->MatAssemblyEnd)(A,mode);CHKERRQ(ierr); 527 lu->MatLUFactorSymbolic = A->ops->lufactorsymbolic; 528 A->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST; 529 PetscFunctionReturn(0); 530 } 531 532 #undef __FUNCT__ 533 #define __FUNCT__ "MatFactorInfo_SuperLU_DIST" 534 PetscErrorCode MatFactorInfo_SuperLU_DIST(Mat A,PetscViewer viewer) 535 { 536 Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST*)A->spptr; 537 superlu_options_t options; 538 PetscErrorCode ierr; 539 540 PetscFunctionBegin; 541 /* check if matrix is superlu_dist type */ 542 if (A->ops->solve != MatSolve_SuperLU_DIST) PetscFunctionReturn(0); 543 544 options = lu->options; 545 ierr = PetscViewerASCIIPrintf(viewer,"SuperLU_DIST run parameters:\n");CHKERRQ(ierr); 546 ierr = PetscViewerASCIIPrintf(viewer," Equilibrate matrix %s \n",PetscTruths[options.Equil != NO]);CHKERRQ(ierr); 547 ierr = PetscViewerASCIIPrintf(viewer," Matrix input mode %d \n",lu->MatInputMode);CHKERRQ(ierr); 548 ierr = PetscViewerASCIIPrintf(viewer," Replace tiny pivots %s \n",PetscTruths[options.ReplaceTinyPivot != NO]);CHKERRQ(ierr); 549 ierr = PetscViewerASCIIPrintf(viewer," Use iterative refinement %s \n",PetscTruths[options.IterRefine == DOUBLE]);CHKERRQ(ierr); 550 ierr = PetscViewerASCIIPrintf(viewer," Processors in row %d col partition %d \n",lu->nprow,lu->npcol);CHKERRQ(ierr); 551 ierr = PetscViewerASCIIPrintf(viewer," Row permutation %s \n",(options.RowPerm == NOROWPERM) ? "NATURAL": "LargeDiag");CHKERRQ(ierr); 552 if (options.ColPerm == NATURAL) { 553 ierr = PetscViewerASCIIPrintf(viewer," Column permutation NATURAL\n");CHKERRQ(ierr); 554 } else if (options.ColPerm == MMD_AT_PLUS_A) { 555 ierr = PetscViewerASCIIPrintf(viewer," Column permutation MMD_AT_PLUS_A\n");CHKERRQ(ierr); 556 } else if (options.ColPerm == MMD_ATA) { 557 ierr = PetscViewerASCIIPrintf(viewer," Column permutation MMD_ATA\n");CHKERRQ(ierr); 558 } else { 559 SETERRQ(PETSC_ERR_ARG_WRONG,"Unknown column permutation"); 560 } 561 PetscFunctionReturn(0); 562 } 563 564 #undef __FUNCT__ 565 #define __FUNCT__ "MatView_SuperLU_DIST" 566 PetscErrorCode MatView_SuperLU_DIST(Mat A,PetscViewer viewer) 567 { 568 PetscErrorCode ierr; 569 PetscTruth iascii; 570 PetscViewerFormat format; 571 Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST*)(A->spptr); 572 573 PetscFunctionBegin; 574 ierr = (*lu->MatView)(A,viewer);CHKERRQ(ierr); 575 576 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 577 if (iascii) { 578 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 579 if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 580 ierr = MatFactorInfo_SuperLU_DIST(A,viewer);CHKERRQ(ierr); 581 } 582 } 583 PetscFunctionReturn(0); 584 } 585 586 587 EXTERN_C_BEGIN 588 #undef __FUNCT__ 589 #define __FUNCT__ "MatConvert_Base_SuperLU_DIST" 590 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_Base_SuperLU_DIST(Mat A,MatType type,MatReuse reuse,Mat *newmat) 591 { 592 /* This routine is only called to convert to MATSUPERLU_DIST */ 593 /* from MATSEQAIJ if A has a single process communicator */ 594 /* or MATMPIAIJ otherwise, so we will ignore 'MatType type'. */ 595 PetscErrorCode ierr; 596 PetscMPIInt size; 597 MPI_Comm comm; 598 Mat B=*newmat; 599 Mat_SuperLU_DIST *lu; 600 601 PetscFunctionBegin; 602 if (reuse == MAT_INITIAL_MATRIX) { 603 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 604 } 605 606 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 607 ierr = PetscNew(Mat_SuperLU_DIST,&lu);CHKERRQ(ierr); 608 609 lu->MatDuplicate = A->ops->duplicate; 610 lu->MatView = A->ops->view; 611 lu->MatAssemblyEnd = A->ops->assemblyend; 612 lu->MatLUFactorSymbolic = A->ops->lufactorsymbolic; 613 lu->MatDestroy = A->ops->destroy; 614 lu->CleanUpSuperLU_Dist = PETSC_FALSE; 615 616 B->spptr = (void*)lu; 617 B->ops->duplicate = MatDuplicate_SuperLU_DIST; 618 B->ops->view = MatView_SuperLU_DIST; 619 B->ops->assemblyend = MatAssemblyEnd_SuperLU_DIST; 620 B->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST; 621 B->ops->destroy = MatDestroy_SuperLU_DIST; 622 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);CHKERRQ(ierr); 623 if (size == 1) { 624 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqaij_superlu_dist_C", 625 "MatConvert_Base_SuperLU_DIST",MatConvert_Base_SuperLU_DIST);CHKERRQ(ierr); 626 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_superlu_dist_seqaij_C", 627 "MatConvert_SuperLU_DIST_Base",MatConvert_SuperLU_DIST_Base);CHKERRQ(ierr); 628 } else { 629 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_superlu_dist_C", 630 "MatConvert_Base_SuperLU_DIST",MatConvert_Base_SuperLU_DIST);CHKERRQ(ierr); 631 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_superlu_dist_mpiaij_C", 632 "MatConvert_SuperLU_DIST_Base",MatConvert_SuperLU_DIST_Base);CHKERRQ(ierr); 633 } 634 ierr = PetscInfo(0,"Using SuperLU_DIST for SeqAIJ LU factorization and solves.\n");CHKERRQ(ierr); 635 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSUPERLU_DIST);CHKERRQ(ierr); 636 *newmat = B; 637 PetscFunctionReturn(0); 638 } 639 EXTERN_C_END 640 641 #undef __FUNCT__ 642 #define __FUNCT__ "MatDuplicate_SuperLU_DIST" 643 PetscErrorCode MatDuplicate_SuperLU_DIST(Mat A, MatDuplicateOption op, Mat *M) { 644 PetscErrorCode ierr; 645 Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST *)A->spptr; 646 647 PetscFunctionBegin; 648 ierr = (*lu->MatDuplicate)(A,op,M);CHKERRQ(ierr); 649 ierr = PetscMemcpy((*M)->spptr,lu,sizeof(Mat_SuperLU_DIST));CHKERRQ(ierr); 650 PetscFunctionReturn(0); 651 } 652 653 /*MC 654 MATSUPERLU_DIST - MATSUPERLU_DIST = "superlu_dist" - A matrix type providing direct solvers (LU) for parallel matrices 655 via the external package SuperLU_DIST. 656 657 If SuperLU_DIST is installed (see the manual for 658 instructions on how to declare the existence of external packages), 659 a matrix type can be constructed which invokes SuperLU_DIST solvers. 660 After calling MatCreate(...,A), simply call MatSetType(A,MATSUPERLU_DIST). 661 662 This matrix inherits from MATSEQAIJ when constructed with a single process communicator, 663 and from MATMPIAIJ otherwise. As a result, for single process communicators, 664 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 665 for communicators controlling multiple processes. It is recommended that you call both of 666 the above preallocation routines for simplicity. One can also call MatConvert for an inplace 667 conversion to or from the MATSEQAIJ or MATMPIAIJ type (depending on the communicator size) 668 without data copy. 669 670 Options Database Keys: 671 + -mat_type superlu_dist - sets the matrix type to "superlu_dist" during a call to MatSetFromOptions() 672 . -mat_superlu_dist_r <n> - number of rows in processor partition 673 . -mat_superlu_dist_c <n> - number of columns in processor partition 674 . -mat_superlu_dist_matinput <0,1> - matrix input mode; 0=global, 1=distributed 675 . -mat_superlu_dist_equil - equilibrate the matrix 676 . -mat_superlu_dist_rowperm <LargeDiag,NATURAL> - row permutation 677 . -mat_superlu_dist_colperm <MMD_AT_PLUS_A,MMD_ATA,NATURAL> - column permutation 678 . -mat_superlu_dist_replacetinypivot - replace tiny pivots 679 . -mat_superlu_dist_iterrefine - use iterative refinement 680 - -mat_superlu_dist_statprint - print factorization information 681 682 Level: beginner 683 684 .seealso: PCLU 685 M*/ 686 687 EXTERN_C_BEGIN 688 #undef __FUNCT__ 689 #define __FUNCT__ "MatCreate_SuperLU_DIST" 690 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SuperLU_DIST(Mat A) 691 { 692 PetscErrorCode ierr; 693 PetscMPIInt size; 694 695 PetscFunctionBegin; 696 /* Change type name before calling MatSetType to force proper construction of SeqAIJ or MPIAIJ */ 697 /* and SuperLU_DIST types */ 698 ierr = PetscObjectChangeTypeName((PetscObject)A,MATSUPERLU_DIST);CHKERRQ(ierr); 699 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 700 if (size == 1) { 701 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 702 } else { 703 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 704 /* A_diag = 0x0 ??? -- do we need it? 705 Mat A_diag = ((Mat_MPIAIJ *)A->data)->A; 706 ierr = MatConvert_Base_SuperLU_DIST(A_diag,MATSUPERLU_DIST,MAT_REUSE_MATRIX,&A_diag);CHKERRQ(ierr); 707 */ 708 } 709 ierr = MatConvert_Base_SuperLU_DIST(A,MATSUPERLU_DIST,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr); 710 PetscFunctionReturn(0); 711 } 712 EXTERN_C_END 713 714