1 #define PETSCMAT_DLL 2 3 /* 4 Provides an interface to the SuperLU_DIST_2.0 sparse solver 5 */ 6 7 #include "src/mat/impls/aij/seq/aij.h" 8 #include "src/mat/impls/aij/mpi/mpiaij.h" 9 #if defined(PETSC_HAVE_STDLIB_H) /* This is to get arround weird problem with SuperLU on cray */ 10 #include "stdlib.h" 11 #endif 12 13 EXTERN_C_BEGIN 14 #if defined(PETSC_USE_COMPLEX) 15 #include "superlu_zdefs.h" 16 #else 17 #include "superlu_ddefs.h" 18 #endif 19 EXTERN_C_END 20 21 typedef enum { GLOBAL,DISTRIBUTED 22 } SuperLU_MatInputMode; 23 24 typedef struct { 25 int_t nprow,npcol,*row,*col; 26 gridinfo_t grid; 27 superlu_options_t options; 28 SuperMatrix A_sup; 29 ScalePermstruct_t ScalePermstruct; 30 LUstruct_t LUstruct; 31 int StatPrint; 32 int MatInputMode; 33 SOLVEstruct_t SOLVEstruct; 34 fact_t FactPattern; 35 MPI_Comm comm_superlu; 36 #if defined(PETSC_USE_COMPLEX) 37 doublecomplex *val; 38 #else 39 double *val; 40 #endif 41 42 /* A few function pointers for inheritance */ 43 PetscErrorCode (*MatDuplicate)(Mat,MatDuplicateOption,Mat*); 44 PetscErrorCode (*MatView)(Mat,PetscViewer); 45 PetscErrorCode (*MatAssemblyEnd)(Mat,MatAssemblyType); 46 PetscErrorCode (*MatLUFactorSymbolic)(Mat,IS,IS,MatFactorInfo*,Mat*); 47 PetscErrorCode (*MatDestroy)(Mat); 48 49 /* Flag to clean up (non-global) SuperLU objects during Destroy */ 50 PetscTruth CleanUpSuperLU_Dist; 51 } Mat_SuperLU_DIST; 52 53 EXTERN PetscErrorCode MatDuplicate_SuperLU_DIST(Mat,MatDuplicateOption,Mat*); 54 extern PetscErrorCode MatFactorInfo_SuperLU_DIST(Mat,PetscViewer); 55 extern PetscErrorCode MatLUFactorNumeric_SuperLU_DIST(Mat,MatFactorInfo *,Mat *); 56 extern PetscErrorCode MatDestroy_SuperLU_DIST(Mat); 57 extern PetscErrorCode MatView_SuperLU_DIST(Mat,PetscViewer); 58 extern PetscErrorCode MatAssemblyEnd_SuperLU_DIST(Mat,MatAssemblyType); 59 extern PetscErrorCode MatSolve_SuperLU_DIST(Mat,Vec,Vec); 60 extern PetscErrorCode MatLUFactorSymbolic_SuperLU_DIST(Mat,IS,IS,MatFactorInfo *,Mat *); 61 extern PetscErrorCode MatDuplicate_SuperLU_DIST(Mat, MatDuplicateOption, Mat *); 62 63 EXTERN_C_BEGIN 64 #undef __FUNCT__ 65 #define __FUNCT__ "MatConvert_SuperLU_DIST_AIJ" 66 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SuperLU_DIST_AIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat) 67 { 68 PetscErrorCode ierr; 69 Mat B=*newmat; 70 Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST *)A->spptr; 71 72 PetscFunctionBegin; 73 if (reuse == MAT_INITIAL_MATRIX) { 74 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 75 } 76 /* Reset the original function pointers */ 77 B->ops->duplicate = lu->MatDuplicate; 78 B->ops->view = lu->MatView; 79 B->ops->assemblyend = lu->MatAssemblyEnd; 80 B->ops->lufactorsymbolic = lu->MatLUFactorSymbolic; 81 B->ops->destroy = lu->MatDestroy; 82 83 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaij_superlu_dist_C","",PETSC_NULL);CHKERRQ(ierr); 84 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_superlu_dist_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 85 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_superlu_dist_C","",PETSC_NULL);CHKERRQ(ierr); 86 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_superlu_dist_mpiaij_C","",PETSC_NULL);CHKERRQ(ierr); 87 88 ierr = PetscObjectChangeTypeName((PetscObject)B,type);CHKERRQ(ierr); 89 *newmat = B; 90 PetscFunctionReturn(0); 91 } 92 EXTERN_C_END 93 94 #undef __FUNCT__ 95 #define __FUNCT__ "MatDestroy_SuperLU_DIST" 96 PetscErrorCode MatDestroy_SuperLU_DIST(Mat A) 97 { 98 PetscErrorCode ierr; 99 PetscMPIInt size; 100 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr; 101 102 PetscFunctionBegin; 103 if (lu->CleanUpSuperLU_Dist) { 104 /* Deallocate SuperLU_DIST storage */ 105 if (lu->MatInputMode == GLOBAL) { 106 Destroy_CompCol_Matrix_dist(&lu->A_sup); 107 } else { 108 Destroy_CompRowLoc_Matrix_dist(&lu->A_sup); 109 if ( lu->options.SolveInitialized ) { 110 #if defined(PETSC_USE_COMPLEX) 111 zSolveFinalize(&lu->options, &lu->SOLVEstruct); 112 #else 113 dSolveFinalize(&lu->options, &lu->SOLVEstruct); 114 #endif 115 } 116 } 117 Destroy_LU(A->cmap.N, &lu->grid, &lu->LUstruct); 118 ScalePermstructFree(&lu->ScalePermstruct); 119 LUstructFree(&lu->LUstruct); 120 121 /* Release the SuperLU_DIST process grid. */ 122 superlu_gridexit(&lu->grid); 123 124 ierr = MPI_Comm_free(&(lu->comm_superlu));CHKERRQ(ierr); 125 } 126 127 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 128 if (size == 1) { 129 ierr = MatConvert_SuperLU_DIST_AIJ(A,MATSEQAIJ,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr); 130 } else { 131 ierr = MatConvert_SuperLU_DIST_AIJ(A,MATMPIAIJ,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr); 132 } 133 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 134 PetscFunctionReturn(0); 135 } 136 137 #undef __FUNCT__ 138 #define __FUNCT__ "MatSolve_SuperLU_DIST" 139 PetscErrorCode MatSolve_SuperLU_DIST(Mat A,Vec b_mpi,Vec x) 140 { 141 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr; 142 PetscErrorCode ierr; 143 PetscMPIInt size; 144 PetscInt m=A->rmap.N, N=A->cmap.N; 145 SuperLUStat_t stat; 146 double berr[1]; 147 PetscScalar *bptr; 148 PetscInt info, nrhs=1; 149 Vec x_seq; 150 IS iden; 151 VecScatter scat; 152 153 PetscFunctionBegin; 154 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 155 if (size > 1) { 156 if (lu->MatInputMode == GLOBAL) { /* global mat input, convert b to x_seq */ 157 ierr = VecCreateSeq(PETSC_COMM_SELF,N,&x_seq);CHKERRQ(ierr); 158 ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iden);CHKERRQ(ierr); 159 ierr = VecScatterCreate(b_mpi,iden,x_seq,iden,&scat);CHKERRQ(ierr); 160 ierr = ISDestroy(iden);CHKERRQ(ierr); 161 162 ierr = VecScatterBegin(b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat);CHKERRQ(ierr); 163 ierr = VecScatterEnd(b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat);CHKERRQ(ierr); 164 ierr = VecGetArray(x_seq,&bptr);CHKERRQ(ierr); 165 } else { /* distributed mat input */ 166 ierr = VecCopy(b_mpi,x);CHKERRQ(ierr); 167 ierr = VecGetArray(x,&bptr);CHKERRQ(ierr); 168 } 169 } else { /* size == 1 */ 170 ierr = VecCopy(b_mpi,x);CHKERRQ(ierr); 171 ierr = VecGetArray(x,&bptr);CHKERRQ(ierr); 172 } 173 174 if (lu->options.Fact != FACTORED) 175 SETERRQ(PETSC_ERR_ARG_WRONG,"SuperLU_DIST options.Fact mush equal FACTORED"); 176 177 PStatInit(&stat); /* Initialize the statistics variables. */ 178 if (lu->MatInputMode == GLOBAL) { 179 #if defined(PETSC_USE_COMPLEX) 180 pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,(doublecomplex*)bptr, m, nrhs, 181 &lu->grid, &lu->LUstruct, berr, &stat, &info); 182 #else 183 pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,bptr, m, nrhs, 184 &lu->grid, &lu->LUstruct, berr, &stat, &info); 185 #endif 186 } else { /* distributed mat input */ 187 #if defined(PETSC_USE_COMPLEX) 188 pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, (doublecomplex*)bptr, A->rmap.N, nrhs, &lu->grid, 189 &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info); 190 if (info) SETERRQ1(PETSC_ERR_LIB,"pzgssvx fails, info: %d\n",info); 191 #else 192 pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, bptr, A->rmap.N, nrhs, &lu->grid, 193 &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info); 194 if (info) SETERRQ1(PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",info); 195 #endif 196 } 197 if (lu->options.PrintStat) { 198 PStatPrint(&lu->options, &stat, &lu->grid); /* Print the statistics. */ 199 } 200 PStatFree(&stat); 201 202 if (size > 1) { 203 if (lu->MatInputMode == GLOBAL){ /* convert seq x to mpi x */ 204 ierr = VecRestoreArray(x_seq,&bptr);CHKERRQ(ierr); 205 ierr = VecScatterBegin(x_seq,x,INSERT_VALUES,SCATTER_REVERSE,scat);CHKERRQ(ierr); 206 ierr = VecScatterEnd(x_seq,x,INSERT_VALUES,SCATTER_REVERSE,scat);CHKERRQ(ierr); 207 ierr = VecScatterDestroy(scat);CHKERRQ(ierr); 208 ierr = VecDestroy(x_seq);CHKERRQ(ierr); 209 } else { 210 ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr); 211 } 212 } else { 213 ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr); 214 } 215 PetscFunctionReturn(0); 216 } 217 218 #undef __FUNCT__ 219 #define __FUNCT__ "MatLUFactorNumeric_SuperLU_DIST" 220 PetscErrorCode MatLUFactorNumeric_SuperLU_DIST(Mat A,MatFactorInfo *info,Mat *F) 221 { 222 Mat *tseq,A_seq = PETSC_NULL; 223 Mat_SeqAIJ *aa,*bb; 224 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)(*F)->spptr; 225 PetscErrorCode ierr; 226 PetscInt M=A->rmap.N,N=A->cmap.N,sinfo,i,*ai,*aj,*bi,*bj,nz,rstart,*garray, 227 m=A->rmap.n, irow,colA_start,j,jcol,jB,countA,countB,*bjj,*ajj; 228 PetscMPIInt size,rank; 229 SuperLUStat_t stat; 230 double *berr=0; 231 IS isrow; 232 PetscLogDouble time0,time,time_min,time_max; 233 Mat F_diag=PETSC_NULL; 234 #if defined(PETSC_USE_COMPLEX) 235 doublecomplex *av, *bv; 236 #else 237 double *av, *bv; 238 #endif 239 240 PetscFunctionBegin; 241 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 242 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 243 244 if (lu->options.PrintStat) { /* collect time for mat conversion */ 245 ierr = MPI_Barrier(A->comm);CHKERRQ(ierr); 246 ierr = PetscGetTime(&time0);CHKERRQ(ierr); 247 } 248 249 if (lu->MatInputMode == GLOBAL) { /* global mat input */ 250 if (size > 1) { /* convert mpi A to seq mat A */ 251 ierr = ISCreateStride(PETSC_COMM_SELF,M,0,1,&isrow);CHKERRQ(ierr); 252 ierr = MatGetSubMatrices(A,1,&isrow,&isrow,MAT_INITIAL_MATRIX,&tseq);CHKERRQ(ierr); 253 ierr = ISDestroy(isrow);CHKERRQ(ierr); 254 255 A_seq = *tseq; 256 ierr = PetscFree(tseq);CHKERRQ(ierr); 257 aa = (Mat_SeqAIJ*)A_seq->data; 258 } else { 259 aa = (Mat_SeqAIJ*)A->data; 260 } 261 262 /* Convert Petsc NR matrix to SuperLU_DIST NC. 263 Note: memories of lu->val, col and row are allocated by CompRow_to_CompCol_dist()! */ 264 if (lu->options.Fact != DOFACT) {/* successive numeric factorization, sparsity pattern is reused. */ 265 if (lu->FactPattern == SamePattern_SameRowPerm){ 266 Destroy_CompCol_Matrix_dist(&lu->A_sup); 267 /* Destroy_LU(N, &lu->grid, &lu->LUstruct); Crash! Comment it out does not lead to mem leak. */ 268 lu->options.Fact = SamePattern_SameRowPerm; /* matrix has similar numerical values */ 269 } else { 270 Destroy_CompCol_Matrix_dist(&lu->A_sup); 271 Destroy_LU(N, &lu->grid, &lu->LUstruct); 272 lu->options.Fact = SamePattern; 273 } 274 } 275 #if defined(PETSC_USE_COMPLEX) 276 zCompRow_to_CompCol_dist(M,N,aa->nz,(doublecomplex*)aa->a,aa->j,aa->i,&lu->val,&lu->col, &lu->row); 277 #else 278 dCompRow_to_CompCol_dist(M,N,aa->nz,aa->a,aa->j,aa->i,&lu->val, &lu->col, &lu->row); 279 #endif 280 281 /* Create compressed column matrix A_sup. */ 282 #if defined(PETSC_USE_COMPLEX) 283 zCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_Z, SLU_GE); 284 #else 285 dCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_D, SLU_GE); 286 #endif 287 } else { /* distributed mat input */ 288 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 289 aa=(Mat_SeqAIJ*)(mat->A)->data; 290 bb=(Mat_SeqAIJ*)(mat->B)->data; 291 ai=aa->i; aj=aa->j; 292 bi=bb->i; bj=bb->j; 293 #if defined(PETSC_USE_COMPLEX) 294 av=(doublecomplex*)aa->a; 295 bv=(doublecomplex*)bb->a; 296 #else 297 av=aa->a; 298 bv=bb->a; 299 #endif 300 rstart = A->rmap.rstart; 301 nz = aa->nz + bb->nz; 302 garray = mat->garray; 303 304 if (lu->options.Fact == DOFACT) {/* first numeric factorization */ 305 #if defined(PETSC_USE_COMPLEX) 306 zallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row); 307 #else 308 dallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row); 309 #endif 310 } else { /* successive numeric factorization, sparsity pattern and perm_c are reused. */ 311 if (lu->FactPattern == SamePattern_SameRowPerm){ 312 /* Destroy_LU(N, &lu->grid, &lu->LUstruct); Crash! Comment it out does not lead to mem leak. */ 313 lu->options.Fact = SamePattern_SameRowPerm; /* matrix has similar numerical values */ 314 } else { 315 Destroy_LU(N, &lu->grid, &lu->LUstruct); /* Deallocate storage associated with the L and U matrices. */ 316 lu->options.Fact = SamePattern; 317 } 318 } 319 nz = 0; irow = rstart; 320 for ( i=0; i<m; i++ ) { 321 lu->row[i] = nz; 322 countA = ai[i+1] - ai[i]; 323 countB = bi[i+1] - bi[i]; 324 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 325 bjj = bj + bi[i]; 326 327 /* B part, smaller col index */ 328 colA_start = rstart + ajj[0]; /* the smallest global col index of A */ 329 jB = 0; 330 for (j=0; j<countB; j++){ 331 jcol = garray[bjj[j]]; 332 if (jcol > colA_start) { 333 jB = j; 334 break; 335 } 336 lu->col[nz] = jcol; 337 lu->val[nz++] = *bv++; 338 if (j==countB-1) jB = countB; 339 } 340 341 /* A part */ 342 for (j=0; j<countA; j++){ 343 lu->col[nz] = rstart + ajj[j]; 344 lu->val[nz++] = *av++; 345 } 346 347 /* B part, larger col index */ 348 for (j=jB; j<countB; j++){ 349 lu->col[nz] = garray[bjj[j]]; 350 lu->val[nz++] = *bv++; 351 } 352 } 353 lu->row[m] = nz; 354 #if defined(PETSC_USE_COMPLEX) 355 zCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart, 356 lu->val, lu->col, lu->row, SLU_NR_loc, SLU_Z, SLU_GE); 357 #else 358 dCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart, 359 lu->val, lu->col, lu->row, SLU_NR_loc, SLU_D, SLU_GE); 360 #endif 361 } 362 if (lu->options.PrintStat) { 363 ierr = PetscGetTime(&time);CHKERRQ(ierr); 364 time0 = time - time0; 365 } 366 367 /* Factor the matrix. */ 368 PStatInit(&stat); /* Initialize the statistics variables. */ 369 370 if (lu->MatInputMode == GLOBAL) { /* global mat input */ 371 #if defined(PETSC_USE_COMPLEX) 372 pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, 373 &lu->grid, &lu->LUstruct, berr, &stat, &sinfo); 374 #else 375 pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, 376 &lu->grid, &lu->LUstruct, berr, &stat, &sinfo); 377 #endif 378 } else { /* distributed mat input */ 379 #if defined(PETSC_USE_COMPLEX) 380 pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid, 381 &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo); 382 if (sinfo) SETERRQ1(PETSC_ERR_LIB,"pzgssvx fails, info: %d\n",sinfo); 383 #else 384 pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid, 385 &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo); 386 if (sinfo) SETERRQ1(PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",sinfo); 387 #endif 388 } 389 390 if (lu->MatInputMode == GLOBAL && size > 1){ 391 ierr = MatDestroy(A_seq);CHKERRQ(ierr); 392 } 393 394 if (lu->options.PrintStat) { 395 if (size > 1){ 396 ierr = MPI_Reduce(&time0,&time_max,1,MPI_DOUBLE,MPI_MAX,0,A->comm); 397 ierr = MPI_Reduce(&time0,&time_min,1,MPI_DOUBLE,MPI_MIN,0,A->comm); 398 ierr = MPI_Reduce(&time0,&time,1,MPI_DOUBLE,MPI_SUM,0,A->comm); 399 time = time/size; /* average time */ 400 if (!rank) 401 ierr = PetscPrintf(PETSC_COMM_SELF, " Mat conversion(PETSc->SuperLU_DIST) time (max/min/avg): \n \ 402 %g / %g / %g\n",time_max,time_min,time); 403 } else { 404 ierr = PetscPrintf(PETSC_COMM_SELF, " Mat conversion(PETSc->SuperLU_DIST) time: \n \ 405 %g\n",time0); 406 } 407 408 PStatPrint(&lu->options, &stat, &lu->grid); /* Print the statistics. */ 409 } 410 PStatFree(&stat); 411 if (size > 1){ 412 F_diag = ((Mat_MPIAIJ *)(*F)->data)->A; 413 F_diag->assembled = PETSC_TRUE; 414 } 415 (*F)->assembled = PETSC_TRUE; 416 lu->options.Fact = FACTORED; /* The factored form of A is supplied. Local option used by this func. only */ 417 PetscFunctionReturn(0); 418 } 419 420 /* Note the Petsc r and c permutations are ignored */ 421 #undef __FUNCT__ 422 #define __FUNCT__ "MatLUFactorSymbolic_SuperLU_DIST" 423 PetscErrorCode MatLUFactorSymbolic_SuperLU_DIST(Mat A,IS r,IS c,MatFactorInfo *info,Mat *F) 424 { 425 Mat B; 426 Mat_SuperLU_DIST *lu; 427 PetscErrorCode ierr; 428 PetscInt M=A->rmap.N,N=A->cmap.N,indx; 429 PetscMPIInt size; 430 superlu_options_t options; 431 PetscTruth flg; 432 const char *pctype[] = {"MMD_AT_PLUS_A","NATURAL","MMD_ATA"}; 433 const char *prtype[] = {"LargeDiag","NATURAL"}; 434 const char *factPattern[] = {"SamePattern","SamePattern_SameRowPerm"}; 435 436 PetscFunctionBegin; 437 /* Create the factorization matrix */ 438 ierr = MatCreate(A->comm,&B);CHKERRQ(ierr); 439 ierr = MatSetSizes(B,A->rmap.n,A->cmap.n,M,N);CHKERRQ(ierr); 440 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 441 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL); 442 ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 443 444 B->ops->lufactornumeric = MatLUFactorNumeric_SuperLU_DIST; 445 B->ops->solve = MatSolve_SuperLU_DIST; 446 B->factor = FACTOR_LU; 447 448 lu = (Mat_SuperLU_DIST*)(B->spptr); 449 450 /* Set the default input options: 451 options.Fact = DOFACT; 452 options.Equil = YES; 453 options.ColPerm = MMD_AT_PLUS_A; 454 options.RowPerm = LargeDiag; 455 options.ReplaceTinyPivot = YES; 456 options.Trans = NOTRANS; 457 options.IterRefine = DOUBLE; 458 options.SolveInitialized = NO; 459 options.RefineInitialized = NO; 460 options.PrintStat = YES; 461 */ 462 set_default_options_dist(&options); 463 464 ierr = MPI_Comm_dup(A->comm,&(lu->comm_superlu));CHKERRQ(ierr); 465 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 466 /* Default num of process columns and rows */ 467 lu->npcol = (PetscMPIInt)(0.5 + sqrt((PetscReal)size)); 468 if (!lu->npcol) lu->npcol = 1; 469 while (lu->npcol > 0) { 470 lu->nprow = (PetscMPIInt)(size/lu->npcol); 471 if (size == lu->nprow * lu->npcol) break; 472 lu->npcol --; 473 } 474 475 ierr = PetscOptionsBegin(A->comm,A->prefix,"SuperLU_Dist Options","Mat");CHKERRQ(ierr); 476 ierr = PetscOptionsInt("-mat_superlu_dist_r","Number rows in processor partition","None",lu->nprow,&lu->nprow,PETSC_NULL);CHKERRQ(ierr); 477 ierr = PetscOptionsInt("-mat_superlu_dist_c","Number columns in processor partition","None",lu->npcol,&lu->npcol,PETSC_NULL);CHKERRQ(ierr); 478 if (size != lu->nprow * lu->npcol) 479 SETERRQ3(PETSC_ERR_ARG_SIZ,"Number of processes %d must equal to nprow %d * npcol %d",size,lu->nprow,lu->npcol); 480 481 lu->MatInputMode = DISTRIBUTED; 482 ierr = PetscOptionsInt("-mat_superlu_dist_matinput","Matrix input mode (0: GLOBAL; 1: DISTRIBUTED)","None",lu->MatInputMode,&lu->MatInputMode,PETSC_NULL);CHKERRQ(ierr); 483 if(lu->MatInputMode == DISTRIBUTED && size == 1) lu->MatInputMode = GLOBAL; 484 485 ierr = PetscOptionsTruth("-mat_superlu_dist_equil","Equilibrate matrix","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr); 486 if (!flg) { 487 options.Equil = NO; 488 } 489 490 ierr = PetscOptionsEList("-mat_superlu_dist_rowperm","Row permutation","None",prtype,2,prtype[0],&indx,&flg);CHKERRQ(ierr); 491 if (flg) { 492 switch (indx) { 493 case 0: 494 options.RowPerm = LargeDiag; 495 break; 496 case 1: 497 options.RowPerm = NOROWPERM; 498 break; 499 } 500 } 501 502 ierr = PetscOptionsEList("-mat_superlu_dist_colperm","Column permutation","None",pctype,3,pctype[0],&indx,&flg);CHKERRQ(ierr); 503 if (flg) { 504 switch (indx) { 505 case 0: 506 options.ColPerm = MMD_AT_PLUS_A; 507 break; 508 case 1: 509 options.ColPerm = NATURAL; 510 break; 511 case 2: 512 options.ColPerm = MMD_ATA; 513 break; 514 } 515 } 516 517 ierr = PetscOptionsTruth("-mat_superlu_dist_replacetinypivot","Replace tiny pivots","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr); 518 if (!flg) { 519 options.ReplaceTinyPivot = NO; 520 } 521 522 lu->FactPattern = SamePattern; 523 ierr = PetscOptionsEList("-mat_superlu_dist_fact","Sparsity pattern for repeated matrix factorization","None",factPattern,2,factPattern[0],&indx,&flg);CHKERRQ(ierr); 524 if (flg) { 525 switch (indx) { 526 case 0: 527 lu->FactPattern = SamePattern; 528 break; 529 case 1: 530 lu->FactPattern = SamePattern_SameRowPerm; 531 break; 532 } 533 } 534 535 options.IterRefine = NOREFINE; 536 ierr = PetscOptionsTruth("-mat_superlu_dist_iterrefine","Use iterative refinement","None",PETSC_FALSE,&flg,0);CHKERRQ(ierr); 537 if (flg) { 538 options.IterRefine = DOUBLE; 539 } 540 541 if (PetscLogPrintInfo) { 542 options.PrintStat = YES; 543 } else { 544 options.PrintStat = NO; 545 } 546 ierr = PetscOptionsTruth("-mat_superlu_dist_statprint","Print factorization information","None", 547 (PetscTruth)options.PrintStat,(PetscTruth*)&options.PrintStat,0);CHKERRQ(ierr); 548 PetscOptionsEnd(); 549 550 /* Initialize the SuperLU process grid. */ 551 superlu_gridinit(lu->comm_superlu, lu->nprow, lu->npcol, &lu->grid); 552 553 /* Initialize ScalePermstruct and LUstruct. */ 554 ScalePermstructInit(M, N, &lu->ScalePermstruct); 555 LUstructInit(M, N, &lu->LUstruct); 556 557 lu->options = options; 558 lu->options.Fact = DOFACT; 559 lu->CleanUpSuperLU_Dist = PETSC_TRUE; 560 *F = B; 561 PetscFunctionReturn(0); 562 } 563 564 #undef __FUNCT__ 565 #define __FUNCT__ "MatAssemblyEnd_SuperLU_DIST" 566 PetscErrorCode MatAssemblyEnd_SuperLU_DIST(Mat A,MatAssemblyType mode) { 567 PetscErrorCode ierr; 568 Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST*)(A->spptr); 569 570 PetscFunctionBegin; 571 ierr = (*lu->MatAssemblyEnd)(A,mode);CHKERRQ(ierr); 572 lu->MatLUFactorSymbolic = A->ops->lufactorsymbolic; 573 A->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST; 574 PetscFunctionReturn(0); 575 } 576 577 #undef __FUNCT__ 578 #define __FUNCT__ "MatFactorInfo_SuperLU_DIST" 579 PetscErrorCode MatFactorInfo_SuperLU_DIST(Mat A,PetscViewer viewer) 580 { 581 Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST*)A->spptr; 582 superlu_options_t options; 583 PetscErrorCode ierr; 584 585 PetscFunctionBegin; 586 /* check if matrix is superlu_dist type */ 587 if (A->ops->solve != MatSolve_SuperLU_DIST) PetscFunctionReturn(0); 588 589 options = lu->options; 590 ierr = PetscViewerASCIIPrintf(viewer,"SuperLU_DIST run parameters:\n");CHKERRQ(ierr); 591 ierr = PetscViewerASCIIPrintf(viewer," Process grid nprow %D x npcol %D \n",lu->nprow,lu->npcol);CHKERRQ(ierr); 592 ierr = PetscViewerASCIIPrintf(viewer," Equilibrate matrix %s \n",PetscTruths[options.Equil != NO]);CHKERRQ(ierr); 593 ierr = PetscViewerASCIIPrintf(viewer," Matrix input mode %d \n",lu->MatInputMode);CHKERRQ(ierr); 594 ierr = PetscViewerASCIIPrintf(viewer," Replace tiny pivots %s \n",PetscTruths[options.ReplaceTinyPivot != NO]);CHKERRQ(ierr); 595 ierr = PetscViewerASCIIPrintf(viewer," Use iterative refinement %s \n",PetscTruths[options.IterRefine == DOUBLE]);CHKERRQ(ierr); 596 ierr = PetscViewerASCIIPrintf(viewer," Processors in row %d col partition %d \n",lu->nprow,lu->npcol);CHKERRQ(ierr); 597 ierr = PetscViewerASCIIPrintf(viewer," Row permutation %s \n",(options.RowPerm == NOROWPERM) ? "NATURAL": "LargeDiag");CHKERRQ(ierr); 598 if (options.ColPerm == NATURAL) { 599 ierr = PetscViewerASCIIPrintf(viewer," Column permutation NATURAL\n");CHKERRQ(ierr); 600 } else if (options.ColPerm == MMD_AT_PLUS_A) { 601 ierr = PetscViewerASCIIPrintf(viewer," Column permutation MMD_AT_PLUS_A\n");CHKERRQ(ierr); 602 } else if (options.ColPerm == MMD_ATA) { 603 ierr = PetscViewerASCIIPrintf(viewer," Column permutation MMD_ATA\n");CHKERRQ(ierr); 604 } else { 605 SETERRQ(PETSC_ERR_ARG_WRONG,"Unknown column permutation"); 606 } 607 608 if (lu->FactPattern == SamePattern){ 609 ierr = PetscViewerASCIIPrintf(viewer," Repeated factorization SamePattern\n");CHKERRQ(ierr); 610 } else { 611 ierr = PetscViewerASCIIPrintf(viewer," Repeated factorization SamePattern_SameRowPerm\n");CHKERRQ(ierr); 612 } 613 PetscFunctionReturn(0); 614 } 615 616 #undef __FUNCT__ 617 #define __FUNCT__ "MatView_SuperLU_DIST" 618 PetscErrorCode MatView_SuperLU_DIST(Mat A,PetscViewer viewer) 619 { 620 PetscErrorCode ierr; 621 PetscTruth iascii; 622 PetscViewerFormat format; 623 Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST*)(A->spptr); 624 625 PetscFunctionBegin; 626 ierr = (*lu->MatView)(A,viewer);CHKERRQ(ierr); 627 628 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 629 if (iascii) { 630 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 631 if (format == PETSC_VIEWER_ASCII_INFO) { 632 ierr = MatFactorInfo_SuperLU_DIST(A,viewer);CHKERRQ(ierr); 633 } 634 } 635 PetscFunctionReturn(0); 636 } 637 638 639 EXTERN_C_BEGIN 640 #undef __FUNCT__ 641 #define __FUNCT__ "MatConvert_AIJ_SuperLU_DIST" 642 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_AIJ_SuperLU_DIST(Mat A,MatType type,MatReuse reuse,Mat *newmat) 643 { 644 /* This routine is only called to convert to MATSUPERLU_DIST */ 645 /* from MATSEQAIJ if A has a single process communicator */ 646 /* or MATMPIAIJ otherwise, so we will ignore 'MatType type'. */ 647 PetscErrorCode ierr; 648 PetscMPIInt size; 649 MPI_Comm comm; 650 Mat B=*newmat; 651 Mat_SuperLU_DIST *lu; 652 653 PetscFunctionBegin; 654 ierr = PetscNew(Mat_SuperLU_DIST,&lu);CHKERRQ(ierr); 655 if (reuse == MAT_INITIAL_MATRIX) { 656 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 657 lu->MatDuplicate = B->ops->duplicate; 658 lu->MatView = B->ops->view; 659 lu->MatAssemblyEnd = B->ops->assemblyend; 660 lu->MatLUFactorSymbolic = B->ops->lufactorsymbolic; 661 lu->MatDestroy = B->ops->destroy; 662 } else { 663 lu->MatDuplicate = A->ops->duplicate; 664 lu->MatView = A->ops->view; 665 lu->MatAssemblyEnd = A->ops->assemblyend; 666 lu->MatLUFactorSymbolic = A->ops->lufactorsymbolic; 667 lu->MatDestroy = A->ops->destroy; 668 } 669 lu->CleanUpSuperLU_Dist = PETSC_FALSE; 670 671 B->spptr = (void*)lu; 672 B->ops->duplicate = MatDuplicate_SuperLU_DIST; 673 B->ops->view = MatView_SuperLU_DIST; 674 B->ops->assemblyend = MatAssemblyEnd_SuperLU_DIST; 675 B->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST; 676 B->ops->destroy = MatDestroy_SuperLU_DIST; 677 678 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 679 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);CHKERRQ(ierr); 680 if (size == 1) { 681 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqaij_superlu_dist_C", 682 "MatConvert_AIJ_SuperLU_DIST",MatConvert_AIJ_SuperLU_DIST);CHKERRQ(ierr); 683 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_superlu_dist_seqaij_C", 684 "MatConvert_SuperLU_DIST_AIJ",MatConvert_SuperLU_DIST_AIJ);CHKERRQ(ierr); 685 } else { 686 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_superlu_dist_C", 687 "MatConvert_AIJ_SuperLU_DIST",MatConvert_AIJ_SuperLU_DIST);CHKERRQ(ierr); 688 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_superlu_dist_mpiaij_C", 689 "MatConvert_SuperLU_DIST_AIJ",MatConvert_SuperLU_DIST_AIJ);CHKERRQ(ierr); 690 } 691 ierr = PetscInfo(0,"Using SuperLU_DIST for SeqAIJ LU factorization and solves.\n");CHKERRQ(ierr); 692 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSUPERLU_DIST);CHKERRQ(ierr); 693 *newmat = B; 694 PetscFunctionReturn(0); 695 } 696 EXTERN_C_END 697 698 #undef __FUNCT__ 699 #define __FUNCT__ "MatDuplicate_SuperLU_DIST" 700 PetscErrorCode MatDuplicate_SuperLU_DIST(Mat A, MatDuplicateOption op, Mat *M) { 701 PetscErrorCode ierr; 702 Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST *)A->spptr; 703 704 PetscFunctionBegin; 705 ierr = (*lu->MatDuplicate)(A,op,M);CHKERRQ(ierr); 706 ierr = PetscMemcpy((*M)->spptr,lu,sizeof(Mat_SuperLU_DIST));CHKERRQ(ierr); 707 PetscFunctionReturn(0); 708 } 709 710 /*MC 711 MATSUPERLU_DIST - MATSUPERLU_DIST = "superlu_dist" - A matrix type providing direct solvers (LU) for parallel matrices 712 via the external package SuperLU_DIST. 713 714 If SuperLU_DIST is installed (see the manual for 715 instructions on how to declare the existence of external packages), 716 a matrix type can be constructed which invokes SuperLU_DIST solvers. 717 After calling MatCreate(...,A), simply call MatSetType(A,MATSUPERLU_DIST). 718 719 This matrix inherits from MATSEQAIJ when constructed with a single process communicator, 720 and from MATMPIAIJ otherwise. As a result, for single process communicators, 721 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 722 for communicators controlling multiple processes. It is recommended that you call both of 723 the above preallocation routines for simplicity. One can also call MatConvert for an inplace 724 conversion to or from the MATSEQAIJ or MATMPIAIJ type (depending on the communicator size) 725 without data copy. 726 727 Options Database Keys: 728 + -mat_type superlu_dist - sets the matrix type to "superlu_dist" during a call to MatSetFromOptions() 729 . -mat_superlu_dist_r <n> - number of rows in processor partition 730 . -mat_superlu_dist_c <n> - number of columns in processor partition 731 . -mat_superlu_dist_matinput <0,1> - matrix input mode; 0=global, 1=distributed 732 . -mat_superlu_dist_equil - equilibrate the matrix 733 . -mat_superlu_dist_rowperm <LargeDiag,NATURAL> - row permutation 734 . -mat_superlu_dist_colperm <MMD_AT_PLUS_A,MMD_ATA,NATURAL> - column permutation 735 . -mat_superlu_dist_replacetinypivot - replace tiny pivots 736 . -mat_superlu_dist_fact <SamePattern> (choose one of) SamePattern SamePattern_SameRowPerm 737 . -mat_superlu_dist_iterrefine - use iterative refinement 738 - -mat_superlu_dist_statprint - print factorization information 739 740 Level: beginner 741 742 .seealso: PCLU 743 M*/ 744 745 EXTERN_C_BEGIN 746 #undef __FUNCT__ 747 #define __FUNCT__ "MatCreate_SuperLU_DIST" 748 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SuperLU_DIST(Mat A) 749 { 750 PetscErrorCode ierr; 751 PetscMPIInt size; 752 753 PetscFunctionBegin; 754 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 755 if (size == 1) { 756 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 757 } else { 758 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 759 /* A_diag = 0x0 ??? -- do we need it? 760 Mat A_diag = ((Mat_MPIAIJ *)A->data)->A; 761 ierr = MatConvert_AIJ_SuperLU_DIST(A_diag,MATSUPERLU_DIST,MAT_REUSE_MATRIX,&A_diag);CHKERRQ(ierr); 762 */ 763 } 764 ierr = MatConvert_AIJ_SuperLU_DIST(A,MATSUPERLU_DIST,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr); 765 PetscFunctionReturn(0); 766 } 767 EXTERN_C_END 768 769