1 #define PETSCMAT_DLL 2 3 /* 4 Provides an interface to the SuperLU_DIST_2.2 sparse solver 5 */ 6 7 #include "src/mat/impls/aij/seq/aij.h" 8 #include "src/mat/impls/aij/mpi/mpiaij.h" 9 #if defined(PETSC_HAVE_STDLIB_H) /* This is to get around weird problem with SuperLU on cray */ 10 #include "stdlib.h" 11 #endif 12 13 EXTERN_C_BEGIN 14 #if defined(PETSC_USE_COMPLEX) 15 #include "superlu_zdefs.h" 16 #else 17 #include "superlu_ddefs.h" 18 #endif 19 EXTERN_C_END 20 21 typedef enum {GLOBAL,DISTRIBUTED} SuperLU_MatInputMode; 22 23 typedef struct { 24 int_t nprow,npcol,*row,*col; 25 gridinfo_t grid; 26 superlu_options_t options; 27 SuperMatrix A_sup; 28 ScalePermstruct_t ScalePermstruct; 29 LUstruct_t LUstruct; 30 int StatPrint; 31 int MatInputMode; 32 SOLVEstruct_t SOLVEstruct; 33 fact_t FactPattern; 34 MPI_Comm comm_superlu; 35 #if defined(PETSC_USE_COMPLEX) 36 doublecomplex *val; 37 #else 38 double *val; 39 #endif 40 41 /* A few function pointers for inheritance */ 42 PetscErrorCode (*MatDuplicate)(Mat,MatDuplicateOption,Mat*); 43 PetscErrorCode (*MatView)(Mat,PetscViewer); 44 PetscErrorCode (*MatAssemblyEnd)(Mat,MatAssemblyType); 45 PetscErrorCode (*MatLUFactorSymbolic)(Mat,IS,IS,MatFactorInfo*,Mat*); 46 PetscErrorCode (*MatDestroy)(Mat); 47 48 /* Flag to clean up (non-global) SuperLU objects during Destroy */ 49 PetscTruth CleanUpSuperLU_Dist; 50 } Mat_SuperLU_DIST; 51 52 EXTERN PetscErrorCode MatDuplicate_SuperLU_DIST(Mat,MatDuplicateOption,Mat*); 53 extern PetscErrorCode MatFactorInfo_SuperLU_DIST(Mat,PetscViewer); 54 extern PetscErrorCode MatLUFactorNumeric_SuperLU_DIST(Mat,MatFactorInfo *,Mat *); 55 extern PetscErrorCode MatDestroy_SuperLU_DIST(Mat); 56 extern PetscErrorCode MatView_SuperLU_DIST(Mat,PetscViewer); 57 extern PetscErrorCode MatAssemblyEnd_SuperLU_DIST(Mat,MatAssemblyType); 58 extern PetscErrorCode MatSolve_SuperLU_DIST(Mat,Vec,Vec); 59 extern PetscErrorCode MatLUFactorSymbolic_SuperLU_DIST(Mat,IS,IS,MatFactorInfo *,Mat *); 60 extern PetscErrorCode MatDuplicate_SuperLU_DIST(Mat, MatDuplicateOption, Mat *); 61 62 EXTERN_C_BEGIN 63 #undef __FUNCT__ 64 #define __FUNCT__ "MatConvert_SuperLU_DIST_AIJ" 65 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SuperLU_DIST_AIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat) 66 { 67 PetscErrorCode ierr; 68 Mat B=*newmat; 69 Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST *)A->spptr; 70 71 PetscFunctionBegin; 72 if (reuse == MAT_INITIAL_MATRIX) { 73 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 74 } 75 /* Reset the original function pointers */ 76 B->ops->duplicate = lu->MatDuplicate; 77 B->ops->view = lu->MatView; 78 B->ops->assemblyend = lu->MatAssemblyEnd; 79 B->ops->lufactorsymbolic = lu->MatLUFactorSymbolic; 80 B->ops->destroy = lu->MatDestroy; 81 ierr = PetscFree(lu);CHKERRQ(ierr); 82 A->spptr = PETSC_NULL; 83 84 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaij_superlu_dist_C","",PETSC_NULL);CHKERRQ(ierr); 85 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_superlu_dist_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 86 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_superlu_dist_C","",PETSC_NULL);CHKERRQ(ierr); 87 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_superlu_dist_mpiaij_C","",PETSC_NULL);CHKERRQ(ierr); 88 89 ierr = PetscObjectChangeTypeName((PetscObject)B,type);CHKERRQ(ierr); 90 *newmat = B; 91 PetscFunctionReturn(0); 92 } 93 EXTERN_C_END 94 95 #undef __FUNCT__ 96 #define __FUNCT__ "MatDestroy_SuperLU_DIST" 97 PetscErrorCode MatDestroy_SuperLU_DIST(Mat A) 98 { 99 PetscErrorCode ierr; 100 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr; 101 102 PetscFunctionBegin; 103 if (lu->CleanUpSuperLU_Dist) { 104 /* Deallocate SuperLU_DIST storage */ 105 if (lu->MatInputMode == GLOBAL) { 106 Destroy_CompCol_Matrix_dist(&lu->A_sup); 107 } else { 108 Destroy_CompRowLoc_Matrix_dist(&lu->A_sup); 109 if ( lu->options.SolveInitialized ) { 110 #if defined(PETSC_USE_COMPLEX) 111 zSolveFinalize(&lu->options, &lu->SOLVEstruct); 112 #else 113 dSolveFinalize(&lu->options, &lu->SOLVEstruct); 114 #endif 115 } 116 } 117 Destroy_LU(A->cmap.N, &lu->grid, &lu->LUstruct); 118 ScalePermstructFree(&lu->ScalePermstruct); 119 LUstructFree(&lu->LUstruct); 120 121 /* Release the SuperLU_DIST process grid. */ 122 superlu_gridexit(&lu->grid); 123 124 ierr = MPI_Comm_free(&(lu->comm_superlu));CHKERRQ(ierr); 125 } 126 127 ierr = MatConvert_SuperLU_DIST_AIJ(A,MATAIJ,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr); 128 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 129 PetscFunctionReturn(0); 130 } 131 132 #undef __FUNCT__ 133 #define __FUNCT__ "MatSolve_SuperLU_DIST" 134 PetscErrorCode MatSolve_SuperLU_DIST(Mat A,Vec b_mpi,Vec x) 135 { 136 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr; 137 PetscErrorCode ierr; 138 PetscMPIInt size; 139 PetscInt m=A->rmap.N, N=A->cmap.N; 140 SuperLUStat_t stat; 141 double berr[1]; 142 PetscScalar *bptr; 143 PetscInt info, nrhs=1; 144 Vec x_seq; 145 IS iden; 146 VecScatter scat; 147 148 PetscFunctionBegin; 149 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 150 if (size > 1) { 151 if (lu->MatInputMode == GLOBAL) { /* global mat input, convert b to x_seq */ 152 ierr = VecCreateSeq(PETSC_COMM_SELF,N,&x_seq);CHKERRQ(ierr); 153 ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iden);CHKERRQ(ierr); 154 ierr = VecScatterCreate(b_mpi,iden,x_seq,iden,&scat);CHKERRQ(ierr); 155 ierr = ISDestroy(iden);CHKERRQ(ierr); 156 157 ierr = VecScatterBegin(scat,b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 158 ierr = VecScatterEnd(scat,b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 159 ierr = VecGetArray(x_seq,&bptr);CHKERRQ(ierr); 160 } else { /* distributed mat input */ 161 ierr = VecCopy(b_mpi,x);CHKERRQ(ierr); 162 ierr = VecGetArray(x,&bptr);CHKERRQ(ierr); 163 } 164 } else { /* size == 1 */ 165 ierr = VecCopy(b_mpi,x);CHKERRQ(ierr); 166 ierr = VecGetArray(x,&bptr);CHKERRQ(ierr); 167 } 168 169 if (lu->options.Fact != FACTORED) SETERRQ(PETSC_ERR_ARG_WRONG,"SuperLU_DIST options.Fact mush equal FACTORED"); 170 171 PStatInit(&stat); /* Initialize the statistics variables. */ 172 if (lu->MatInputMode == GLOBAL) { 173 #if defined(PETSC_USE_COMPLEX) 174 pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,(doublecomplex*)bptr, m, nrhs, 175 &lu->grid, &lu->LUstruct, berr, &stat, &info); 176 #else 177 pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,bptr, m, nrhs, 178 &lu->grid, &lu->LUstruct, berr, &stat, &info); 179 #endif 180 } else { /* distributed mat input */ 181 #if defined(PETSC_USE_COMPLEX) 182 pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, (doublecomplex*)bptr, A->rmap.N, nrhs, &lu->grid, 183 &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info); 184 if (info) SETERRQ1(PETSC_ERR_LIB,"pzgssvx fails, info: %d\n",info); 185 #else 186 pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, bptr, A->rmap.N, nrhs, &lu->grid, 187 &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info); 188 if (info) SETERRQ1(PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",info); 189 #endif 190 } 191 if (lu->options.PrintStat) { 192 PStatPrint(&lu->options, &stat, &lu->grid); /* Print the statistics. */ 193 } 194 PStatFree(&stat); 195 196 if (size > 1) { 197 if (lu->MatInputMode == GLOBAL){ /* convert seq x to mpi x */ 198 ierr = VecRestoreArray(x_seq,&bptr);CHKERRQ(ierr); 199 ierr = VecScatterBegin(scat,x_seq,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 200 ierr = VecScatterEnd(scat,x_seq,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 201 ierr = VecScatterDestroy(scat);CHKERRQ(ierr); 202 ierr = VecDestroy(x_seq);CHKERRQ(ierr); 203 } else { 204 ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr); 205 } 206 } else { 207 ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr); 208 } 209 PetscFunctionReturn(0); 210 } 211 212 #undef __FUNCT__ 213 #define __FUNCT__ "MatLUFactorNumeric_SuperLU_DIST" 214 PetscErrorCode MatLUFactorNumeric_SuperLU_DIST(Mat A,MatFactorInfo *info,Mat *F) 215 { 216 Mat *tseq,A_seq = PETSC_NULL; 217 Mat_SeqAIJ *aa,*bb; 218 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)(*F)->spptr; 219 PetscErrorCode ierr; 220 PetscInt M=A->rmap.N,N=A->cmap.N,sinfo,i,*ai,*aj,*bi,*bj,nz,rstart,*garray, 221 m=A->rmap.n, irow,colA_start,j,jcol,jB,countA,countB,*bjj,*ajj; 222 PetscMPIInt size,rank; 223 SuperLUStat_t stat; 224 double *berr=0; 225 IS isrow; 226 PetscLogDouble time0,time,time_min,time_max; 227 Mat F_diag=PETSC_NULL; 228 #if defined(PETSC_USE_COMPLEX) 229 doublecomplex *av, *bv; 230 #else 231 double *av, *bv; 232 #endif 233 234 PetscFunctionBegin; 235 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 236 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 237 238 if (lu->options.PrintStat) { /* collect time for mat conversion */ 239 ierr = MPI_Barrier(((PetscObject)A)->comm);CHKERRQ(ierr); 240 ierr = PetscGetTime(&time0);CHKERRQ(ierr); 241 } 242 243 if (lu->MatInputMode == GLOBAL) { /* global mat input */ 244 if (size > 1) { /* convert mpi A to seq mat A */ 245 ierr = ISCreateStride(PETSC_COMM_SELF,M,0,1,&isrow);CHKERRQ(ierr); 246 ierr = MatGetSubMatrices(A,1,&isrow,&isrow,MAT_INITIAL_MATRIX,&tseq);CHKERRQ(ierr); 247 ierr = ISDestroy(isrow);CHKERRQ(ierr); 248 249 A_seq = *tseq; 250 ierr = PetscFree(tseq);CHKERRQ(ierr); 251 aa = (Mat_SeqAIJ*)A_seq->data; 252 } else { 253 aa = (Mat_SeqAIJ*)A->data; 254 } 255 256 /* Convert Petsc NR matrix to SuperLU_DIST NC. 257 Note: memories of lu->val, col and row are allocated by CompRow_to_CompCol_dist()! */ 258 if (lu->options.Fact != DOFACT) {/* successive numeric factorization, sparsity pattern is reused. */ 259 if (lu->FactPattern == SamePattern_SameRowPerm){ 260 Destroy_CompCol_Matrix_dist(&lu->A_sup); 261 /* Destroy_LU(N, &lu->grid, &lu->LUstruct); Crash! Comment it out does not lead to mem leak. */ 262 lu->options.Fact = SamePattern_SameRowPerm; /* matrix has similar numerical values */ 263 } else { 264 Destroy_CompCol_Matrix_dist(&lu->A_sup); 265 Destroy_LU(N, &lu->grid, &lu->LUstruct); 266 lu->options.Fact = SamePattern; 267 } 268 } 269 #if defined(PETSC_USE_COMPLEX) 270 zCompRow_to_CompCol_dist(M,N,aa->nz,(doublecomplex*)aa->a,aa->j,aa->i,&lu->val,&lu->col, &lu->row); 271 #else 272 dCompRow_to_CompCol_dist(M,N,aa->nz,aa->a,aa->j,aa->i,&lu->val, &lu->col, &lu->row); 273 #endif 274 275 /* Create compressed column matrix A_sup. */ 276 #if defined(PETSC_USE_COMPLEX) 277 zCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_Z, SLU_GE); 278 #else 279 dCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_D, SLU_GE); 280 #endif 281 } else { /* distributed mat input */ 282 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 283 aa=(Mat_SeqAIJ*)(mat->A)->data; 284 bb=(Mat_SeqAIJ*)(mat->B)->data; 285 ai=aa->i; aj=aa->j; 286 bi=bb->i; bj=bb->j; 287 #if defined(PETSC_USE_COMPLEX) 288 av=(doublecomplex*)aa->a; 289 bv=(doublecomplex*)bb->a; 290 #else 291 av=aa->a; 292 bv=bb->a; 293 #endif 294 rstart = A->rmap.rstart; 295 nz = aa->nz + bb->nz; 296 garray = mat->garray; 297 298 if (lu->options.Fact == DOFACT) {/* first numeric factorization */ 299 #if defined(PETSC_USE_COMPLEX) 300 zallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row); 301 #else 302 dallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row); 303 #endif 304 } else { /* successive numeric factorization, sparsity pattern and perm_c are reused. */ 305 if (lu->FactPattern == SamePattern_SameRowPerm){ 306 /* Destroy_LU(N, &lu->grid, &lu->LUstruct); Crash! Comment it out does not lead to mem leak. */ 307 lu->options.Fact = SamePattern_SameRowPerm; /* matrix has similar numerical values */ 308 } else { 309 Destroy_LU(N, &lu->grid, &lu->LUstruct); /* Deallocate storage associated with the L and U matrices. */ 310 lu->options.Fact = SamePattern; 311 } 312 } 313 nz = 0; irow = rstart; 314 for ( i=0; i<m; i++ ) { 315 lu->row[i] = nz; 316 countA = ai[i+1] - ai[i]; 317 countB = bi[i+1] - bi[i]; 318 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 319 bjj = bj + bi[i]; 320 321 /* B part, smaller col index */ 322 colA_start = rstart + ajj[0]; /* the smallest global col index of A */ 323 jB = 0; 324 for (j=0; j<countB; j++){ 325 jcol = garray[bjj[j]]; 326 if (jcol > colA_start) { 327 jB = j; 328 break; 329 } 330 lu->col[nz] = jcol; 331 lu->val[nz++] = *bv++; 332 if (j==countB-1) jB = countB; 333 } 334 335 /* A part */ 336 for (j=0; j<countA; j++){ 337 lu->col[nz] = rstart + ajj[j]; 338 lu->val[nz++] = *av++; 339 } 340 341 /* B part, larger col index */ 342 for (j=jB; j<countB; j++){ 343 lu->col[nz] = garray[bjj[j]]; 344 lu->val[nz++] = *bv++; 345 } 346 } 347 lu->row[m] = nz; 348 #if defined(PETSC_USE_COMPLEX) 349 zCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart, 350 lu->val, lu->col, lu->row, SLU_NR_loc, SLU_Z, SLU_GE); 351 #else 352 dCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart, 353 lu->val, lu->col, lu->row, SLU_NR_loc, SLU_D, SLU_GE); 354 #endif 355 } 356 if (lu->options.PrintStat) { 357 ierr = PetscGetTime(&time);CHKERRQ(ierr); 358 time0 = time - time0; 359 } 360 361 /* Factor the matrix. */ 362 PStatInit(&stat); /* Initialize the statistics variables. */ 363 364 if (lu->MatInputMode == GLOBAL) { /* global mat input */ 365 #if defined(PETSC_USE_COMPLEX) 366 pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, 367 &lu->grid, &lu->LUstruct, berr, &stat, &sinfo); 368 #else 369 pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, 370 &lu->grid, &lu->LUstruct, berr, &stat, &sinfo); 371 #endif 372 } else { /* distributed mat input */ 373 #if defined(PETSC_USE_COMPLEX) 374 pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid, 375 &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo); 376 if (sinfo) SETERRQ1(PETSC_ERR_LIB,"pzgssvx fails, info: %d\n",sinfo); 377 #else 378 pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid, 379 &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo); 380 if (sinfo) SETERRQ1(PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",sinfo); 381 #endif 382 } 383 384 if (lu->MatInputMode == GLOBAL && size > 1){ 385 ierr = MatDestroy(A_seq);CHKERRQ(ierr); 386 } 387 388 if (lu->options.PrintStat) { 389 if (size > 1){ 390 ierr = MPI_Reduce(&time0,&time_max,1,MPI_DOUBLE,MPI_MAX,0,((PetscObject)A)->comm); 391 ierr = MPI_Reduce(&time0,&time_min,1,MPI_DOUBLE,MPI_MIN,0,((PetscObject)A)->comm); 392 ierr = MPI_Reduce(&time0,&time,1,MPI_DOUBLE,MPI_SUM,0,((PetscObject)A)->comm); 393 time = time/size; /* average time */ 394 if (!rank) { 395 ierr = PetscPrintf(PETSC_COMM_SELF, " Mat conversion(PETSc->SuperLU_DIST) time (max/min/avg): \n %g / %g / %g\n",time_max,time_min,time);CHKERRQ(ierr); 396 } 397 } else { 398 ierr = PetscPrintf(PETSC_COMM_SELF, " Mat conversion(PETSc->SuperLU_DIST) time: \n %g\n",time0);CHKERRQ(ierr); 399 } 400 PStatPrint(&lu->options, &stat, &lu->grid); /* Print the statistics. */ 401 } 402 PStatFree(&stat); 403 if (size > 1){ 404 F_diag = ((Mat_MPIAIJ *)(*F)->data)->A; 405 F_diag->assembled = PETSC_TRUE; 406 } 407 (*F)->assembled = PETSC_TRUE; 408 lu->options.Fact = FACTORED; /* The factored form of A is supplied. Local option used by this func. only */ 409 PetscFunctionReturn(0); 410 } 411 412 /* Note the Petsc r and c permutations are ignored */ 413 #undef __FUNCT__ 414 #define __FUNCT__ "MatLUFactorSymbolic_SuperLU_DIST" 415 PetscErrorCode MatLUFactorSymbolic_SuperLU_DIST(Mat A,IS r,IS c,MatFactorInfo *info,Mat *F) 416 { 417 Mat B; 418 Mat_SuperLU_DIST *lu; 419 PetscErrorCode ierr; 420 PetscInt M=A->rmap.N,N=A->cmap.N,indx; 421 PetscMPIInt size; 422 superlu_options_t options; 423 PetscTruth flg; 424 const char *pctype[] = {"MMD_AT_PLUS_A","NATURAL","MMD_ATA","PARMETIS"}; 425 const char *prtype[] = {"LargeDiag","NATURAL"}; 426 const char *factPattern[] = {"SamePattern","SamePattern_SameRowPerm"}; 427 428 PetscFunctionBegin; 429 /* Create the factorization matrix */ 430 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 431 ierr = MatSetSizes(B,A->rmap.n,A->cmap.n,M,N);CHKERRQ(ierr); 432 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 433 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL); 434 ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 435 436 B->ops->lufactornumeric = MatLUFactorNumeric_SuperLU_DIST; 437 B->ops->solve = MatSolve_SuperLU_DIST; 438 B->factor = FACTOR_LU; 439 440 lu = (Mat_SuperLU_DIST*)(B->spptr); 441 442 /* Set the default input options: 443 options.Fact = DOFACT; 444 options.Equil = YES; 445 options.ParSymbFact = NO; 446 options.ColPerm = MMD_AT_PLUS_A; 447 options.RowPerm = LargeDiag; 448 options.ReplaceTinyPivot = YES; 449 options.IterRefine = DOUBLE; 450 options.Trans = NOTRANS; 451 options.SolveInitialized = NO; 452 options.RefineInitialized = NO; 453 options.PrintStat = YES; 454 */ 455 set_default_options_dist(&options); 456 457 ierr = MPI_Comm_dup(((PetscObject)A)->comm,&(lu->comm_superlu));CHKERRQ(ierr); 458 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 459 /* Default num of process columns and rows */ 460 lu->npcol = PetscMPIIntCast((PetscInt)(0.5 + sqrt((PetscReal)size))); 461 if (!lu->npcol) lu->npcol = 1; 462 while (lu->npcol > 0) { 463 lu->nprow = PetscMPIIntCast(size/lu->npcol); 464 if (size == lu->nprow * lu->npcol) break; 465 lu->npcol --; 466 } 467 468 ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"SuperLU_Dist Options","Mat");CHKERRQ(ierr); 469 ierr = PetscOptionsInt("-mat_superlu_dist_r","Number rows in processor partition","None",lu->nprow,&lu->nprow,PETSC_NULL);CHKERRQ(ierr); 470 ierr = PetscOptionsInt("-mat_superlu_dist_c","Number columns in processor partition","None",lu->npcol,&lu->npcol,PETSC_NULL);CHKERRQ(ierr); 471 if (size != lu->nprow * lu->npcol) 472 SETERRQ3(PETSC_ERR_ARG_SIZ,"Number of processes %d must equal to nprow %d * npcol %d",size,lu->nprow,lu->npcol); 473 474 lu->MatInputMode = DISTRIBUTED; 475 ierr = PetscOptionsInt("-mat_superlu_dist_matinput","Matrix input mode (0: GLOBAL; 1: DISTRIBUTED)","None",lu->MatInputMode,&lu->MatInputMode,PETSC_NULL);CHKERRQ(ierr); 476 if(lu->MatInputMode == DISTRIBUTED && size == 1) lu->MatInputMode = GLOBAL; 477 478 ierr = PetscOptionsTruth("-mat_superlu_dist_equil","Equilibrate matrix","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr); 479 if (!flg) { 480 options.Equil = NO; 481 } 482 483 ierr = PetscOptionsEList("-mat_superlu_dist_rowperm","Row permutation","None",prtype,2,prtype[0],&indx,&flg);CHKERRQ(ierr); 484 if (flg) { 485 switch (indx) { 486 case 0: 487 options.RowPerm = LargeDiag; 488 break; 489 case 1: 490 options.RowPerm = NOROWPERM; 491 break; 492 } 493 } 494 495 ierr = PetscOptionsEList("-mat_superlu_dist_colperm","Column permutation","None",pctype,4,pctype[0],&indx,&flg);CHKERRQ(ierr); 496 if (flg) { 497 switch (indx) { 498 case 0: 499 options.ColPerm = MMD_AT_PLUS_A; 500 break; 501 case 1: 502 options.ColPerm = NATURAL; 503 break; 504 case 2: 505 options.ColPerm = MMD_ATA; 506 break; 507 case 3: 508 options.ColPerm = PARMETIS; 509 break; 510 } 511 } 512 513 ierr = PetscOptionsTruth("-mat_superlu_dist_replacetinypivot","Replace tiny pivots","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr); 514 if (!flg) { 515 options.ReplaceTinyPivot = NO; 516 } 517 518 options.ParSymbFact = NO; 519 ierr = PetscOptionsTruth("-mat_superlu_dist_parsymbfact","Parallel symbolic factorization","None",PETSC_FALSE,&flg,0);CHKERRQ(ierr); 520 if (flg){ 521 #ifdef PETSC_HAVE_PARMETIS 522 options.ParSymbFact = YES; 523 options.ColPerm = PARMETIS; /* in v2.2, PARMETIS is forced for ParSymbFact regardless of user ordering setting */ 524 #else 525 printf("parsymbfact needs PARMETIS"); 526 #endif 527 } 528 529 lu->FactPattern = SamePattern; 530 ierr = PetscOptionsEList("-mat_superlu_dist_fact","Sparsity pattern for repeated matrix factorization","None",factPattern,2,factPattern[0],&indx,&flg);CHKERRQ(ierr); 531 if (flg) { 532 switch (indx) { 533 case 0: 534 lu->FactPattern = SamePattern; 535 break; 536 case 1: 537 lu->FactPattern = SamePattern_SameRowPerm; 538 break; 539 } 540 } 541 542 options.IterRefine = NOREFINE; 543 ierr = PetscOptionsTruth("-mat_superlu_dist_iterrefine","Use iterative refinement","None",PETSC_FALSE,&flg,0);CHKERRQ(ierr); 544 if (flg) { 545 options.IterRefine = DOUBLE; 546 } 547 548 if (PetscLogPrintInfo) { 549 options.PrintStat = YES; 550 } else { 551 options.PrintStat = NO; 552 } 553 ierr = PetscOptionsTruth("-mat_superlu_dist_statprint","Print factorization information","None", 554 (PetscTruth)options.PrintStat,(PetscTruth*)&options.PrintStat,0);CHKERRQ(ierr); 555 PetscOptionsEnd(); 556 557 /* Initialize the SuperLU process grid. */ 558 superlu_gridinit(lu->comm_superlu, lu->nprow, lu->npcol, &lu->grid); 559 560 /* Initialize ScalePermstruct and LUstruct. */ 561 ScalePermstructInit(M, N, &lu->ScalePermstruct); 562 LUstructInit(M, N, &lu->LUstruct); 563 564 lu->options = options; 565 lu->options.Fact = DOFACT; 566 lu->CleanUpSuperLU_Dist = PETSC_TRUE; 567 *F = B; 568 PetscFunctionReturn(0); 569 } 570 571 #undef __FUNCT__ 572 #define __FUNCT__ "MatAssemblyEnd_SuperLU_DIST" 573 PetscErrorCode MatAssemblyEnd_SuperLU_DIST(Mat A,MatAssemblyType mode) { 574 PetscErrorCode ierr; 575 Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST*)(A->spptr); 576 577 PetscFunctionBegin; 578 ierr = (*lu->MatAssemblyEnd)(A,mode);CHKERRQ(ierr); 579 lu->MatLUFactorSymbolic = A->ops->lufactorsymbolic; 580 A->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST; 581 PetscFunctionReturn(0); 582 } 583 584 #undef __FUNCT__ 585 #define __FUNCT__ "MatFactorInfo_SuperLU_DIST" 586 PetscErrorCode MatFactorInfo_SuperLU_DIST(Mat A,PetscViewer viewer) 587 { 588 Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST*)A->spptr; 589 superlu_options_t options; 590 PetscErrorCode ierr; 591 592 PetscFunctionBegin; 593 /* check if matrix is superlu_dist type */ 594 if (A->ops->solve != MatSolve_SuperLU_DIST) PetscFunctionReturn(0); 595 596 options = lu->options; 597 ierr = PetscViewerASCIIPrintf(viewer,"SuperLU_DIST run parameters:\n");CHKERRQ(ierr); 598 ierr = PetscViewerASCIIPrintf(viewer," Process grid nprow %D x npcol %D \n",lu->nprow,lu->npcol);CHKERRQ(ierr); 599 ierr = PetscViewerASCIIPrintf(viewer," Equilibrate matrix %s \n",PetscTruths[options.Equil != NO]);CHKERRQ(ierr); 600 ierr = PetscViewerASCIIPrintf(viewer," Matrix input mode %d \n",lu->MatInputMode);CHKERRQ(ierr); 601 ierr = PetscViewerASCIIPrintf(viewer," Replace tiny pivots %s \n",PetscTruths[options.ReplaceTinyPivot != NO]);CHKERRQ(ierr); 602 ierr = PetscViewerASCIIPrintf(viewer," Use iterative refinement %s \n",PetscTruths[options.IterRefine == DOUBLE]);CHKERRQ(ierr); 603 ierr = PetscViewerASCIIPrintf(viewer," Processors in row %d col partition %d \n",lu->nprow,lu->npcol);CHKERRQ(ierr); 604 ierr = PetscViewerASCIIPrintf(viewer," Row permutation %s \n",(options.RowPerm == NOROWPERM) ? "NATURAL": "LargeDiag");CHKERRQ(ierr); 605 if (options.ColPerm == NATURAL) { 606 ierr = PetscViewerASCIIPrintf(viewer," Column permutation NATURAL\n");CHKERRQ(ierr); 607 } else if (options.ColPerm == MMD_AT_PLUS_A) { 608 ierr = PetscViewerASCIIPrintf(viewer," Column permutation MMD_AT_PLUS_A\n");CHKERRQ(ierr); 609 } else if (options.ColPerm == MMD_ATA) { 610 ierr = PetscViewerASCIIPrintf(viewer," Column permutation MMD_ATA\n");CHKERRQ(ierr); 611 } else if (options.ColPerm == PARMETIS) { 612 ierr = PetscViewerASCIIPrintf(viewer," Column permutation PARMETIS\n");CHKERRQ(ierr); 613 } else { 614 SETERRQ(PETSC_ERR_ARG_WRONG,"Unknown column permutation"); 615 } 616 617 ierr = PetscViewerASCIIPrintf(viewer," Parallel symbolic factorization %s \n",PetscTruths[options.ParSymbFact != NO]);CHKERRQ(ierr); 618 619 if (lu->FactPattern == SamePattern){ 620 ierr = PetscViewerASCIIPrintf(viewer," Repeated factorization SamePattern\n");CHKERRQ(ierr); 621 } else { 622 ierr = PetscViewerASCIIPrintf(viewer," Repeated factorization SamePattern_SameRowPerm\n");CHKERRQ(ierr); 623 } 624 PetscFunctionReturn(0); 625 } 626 627 #undef __FUNCT__ 628 #define __FUNCT__ "MatView_SuperLU_DIST" 629 PetscErrorCode MatView_SuperLU_DIST(Mat A,PetscViewer viewer) 630 { 631 PetscErrorCode ierr; 632 PetscTruth iascii; 633 PetscViewerFormat format; 634 Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST*)(A->spptr); 635 636 PetscFunctionBegin; 637 ierr = (*lu->MatView)(A,viewer);CHKERRQ(ierr); 638 639 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 640 if (iascii) { 641 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 642 if (format == PETSC_VIEWER_ASCII_INFO) { 643 ierr = MatFactorInfo_SuperLU_DIST(A,viewer);CHKERRQ(ierr); 644 } 645 } 646 PetscFunctionReturn(0); 647 } 648 649 650 EXTERN_C_BEGIN 651 #undef __FUNCT__ 652 #define __FUNCT__ "MatConvert_AIJ_SuperLU_DIST" 653 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_AIJ_SuperLU_DIST(Mat A,MatType type,MatReuse reuse,Mat *newmat) 654 { 655 /* This routine is only called to convert to MATSUPERLU_DIST */ 656 /* from MATSEQAIJ if A has a single process communicator */ 657 /* or MATMPIAIJ otherwise, so we will ignore 'MatType type'. */ 658 PetscErrorCode ierr; 659 PetscMPIInt size; 660 MPI_Comm comm; 661 Mat B=*newmat; 662 Mat_SuperLU_DIST *lu; 663 664 PetscFunctionBegin; 665 ierr = PetscNewLog(B,Mat_SuperLU_DIST,&lu);CHKERRQ(ierr); 666 if (reuse == MAT_INITIAL_MATRIX) { 667 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 668 lu->MatDuplicate = B->ops->duplicate; 669 lu->MatView = B->ops->view; 670 lu->MatAssemblyEnd = B->ops->assemblyend; 671 lu->MatLUFactorSymbolic = B->ops->lufactorsymbolic; 672 lu->MatDestroy = B->ops->destroy; 673 } else { 674 lu->MatDuplicate = A->ops->duplicate; 675 lu->MatView = A->ops->view; 676 lu->MatAssemblyEnd = A->ops->assemblyend; 677 lu->MatLUFactorSymbolic = A->ops->lufactorsymbolic; 678 lu->MatDestroy = A->ops->destroy; 679 } 680 lu->CleanUpSuperLU_Dist = PETSC_FALSE; 681 682 B->spptr = (void*)lu; 683 B->ops->duplicate = MatDuplicate_SuperLU_DIST; 684 B->ops->view = MatView_SuperLU_DIST; 685 B->ops->assemblyend = MatAssemblyEnd_SuperLU_DIST; 686 B->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST; 687 B->ops->destroy = MatDestroy_SuperLU_DIST; 688 689 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 690 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);CHKERRQ(ierr); 691 if (size == 1) { 692 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqaij_superlu_dist_C", 693 "MatConvert_AIJ_SuperLU_DIST",MatConvert_AIJ_SuperLU_DIST);CHKERRQ(ierr); 694 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_superlu_dist_seqaij_C", 695 "MatConvert_SuperLU_DIST_AIJ",MatConvert_SuperLU_DIST_AIJ);CHKERRQ(ierr); 696 } else { 697 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_superlu_dist_C", 698 "MatConvert_AIJ_SuperLU_DIST",MatConvert_AIJ_SuperLU_DIST);CHKERRQ(ierr); 699 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_superlu_dist_mpiaij_C", 700 "MatConvert_SuperLU_DIST_AIJ",MatConvert_SuperLU_DIST_AIJ);CHKERRQ(ierr); 701 } 702 ierr = PetscInfo(A,"Using SuperLU_DIST for SeqAIJ LU factorization and solves.\n");CHKERRQ(ierr); 703 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSUPERLU_DIST);CHKERRQ(ierr); 704 *newmat = B; 705 PetscFunctionReturn(0); 706 } 707 EXTERN_C_END 708 709 #undef __FUNCT__ 710 #define __FUNCT__ "MatDuplicate_SuperLU_DIST" 711 PetscErrorCode MatDuplicate_SuperLU_DIST(Mat A, MatDuplicateOption op, Mat *M) { 712 PetscErrorCode ierr; 713 Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST *)A->spptr; 714 715 PetscFunctionBegin; 716 ierr = (*lu->MatDuplicate)(A,op,M);CHKERRQ(ierr); 717 ierr = PetscMemcpy((*M)->spptr,lu,sizeof(Mat_SuperLU_DIST));CHKERRQ(ierr); 718 PetscFunctionReturn(0); 719 } 720 721 /*MC 722 MATSUPERLU_DIST - MATSUPERLU_DIST = "superlu_dist" - A matrix type providing direct solvers (LU) for parallel matrices 723 via the external package SuperLU_DIST. 724 725 If SuperLU_DIST is installed (see the manual for 726 instructions on how to declare the existence of external packages), 727 a matrix type can be constructed which invokes SuperLU_DIST solvers. 728 After calling MatCreate(...,A), simply call MatSetType(A,MATSUPERLU_DIST) then 729 optionally call MatSeqAIJSetPreallocation() or MatMPIAIJSetPreallocation() DO NOT 730 call MatCreateSeqAIJ/MPIAIJ() directly or the preallocation information will be LOST! 731 732 This matrix inherits from MATSEQAIJ when constructed with a single process communicator, 733 and from MATMPIAIJ otherwise. As a result, for single process communicators, 734 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 735 for communicators controlling multiple processes. It is recommended that you call both of 736 the above preallocation routines for simplicity. One can also call MatConvert() for an inplace 737 conversion to or from the MATSEQAIJ or MATMPIAIJ type (depending on the communicator size) 738 without data copy; but this MUST be called AFTER the matrix values are set. 739 740 741 742 Options Database Keys: 743 + -mat_type superlu_dist - sets the matrix type to "superlu_dist" during a call to MatSetFromOptions() 744 . -mat_superlu_dist_r <n> - number of rows in processor partition 745 . -mat_superlu_dist_c <n> - number of columns in processor partition 746 . -mat_superlu_dist_matinput <0,1> - matrix input mode; 0=global, 1=distributed 747 . -mat_superlu_dist_equil - equilibrate the matrix 748 . -mat_superlu_dist_rowperm <LargeDiag,NATURAL> - row permutation 749 . -mat_superlu_dist_colperm <MMD_AT_PLUS_A,MMD_ATA,NATURAL> - column permutation 750 . -mat_superlu_dist_replacetinypivot - replace tiny pivots 751 . -mat_superlu_dist_fact <SamePattern> (choose one of) SamePattern SamePattern_SameRowPerm 752 . -mat_superlu_dist_iterrefine - use iterative refinement 753 - -mat_superlu_dist_statprint - print factorization information 754 755 Level: beginner 756 757 .seealso: PCLU 758 M*/ 759 760 EXTERN_C_BEGIN 761 #undef __FUNCT__ 762 #define __FUNCT__ "MatCreate_SuperLU_DIST" 763 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SuperLU_DIST(Mat A) 764 { 765 PetscErrorCode ierr; 766 PetscMPIInt size; 767 768 PetscFunctionBegin; 769 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 770 ierr = MatSetType(A,MATAIJ);CHKERRQ(ierr); 771 ierr = MatConvert_AIJ_SuperLU_DIST(A,MATSUPERLU_DIST,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr); 772 PetscFunctionReturn(0); 773 } 774 EXTERN_C_END 775 776