1 #define PETSCMAT_DLL 2 3 /* 4 Provides an interface to the SuperLU_DIST_2.2 sparse solver 5 */ 6 7 #include "src/mat/impls/aij/seq/aij.h" 8 #include "src/mat/impls/aij/mpi/mpiaij.h" 9 #if defined(PETSC_HAVE_STDLIB_H) /* This is to get around weird problem with SuperLU on cray */ 10 #include "stdlib.h" 11 #endif 12 13 EXTERN_C_BEGIN 14 #if defined(PETSC_USE_COMPLEX) 15 #include "superlu_zdefs.h" 16 #else 17 #include "superlu_ddefs.h" 18 #endif 19 EXTERN_C_END 20 21 typedef enum {GLOBAL,DISTRIBUTED} SuperLU_MatInputMode; 22 23 typedef struct { 24 int_t nprow,npcol,*row,*col; 25 gridinfo_t grid; 26 superlu_options_t options; 27 SuperMatrix A_sup; 28 ScalePermstruct_t ScalePermstruct; 29 LUstruct_t LUstruct; 30 int StatPrint; 31 int MatInputMode; 32 SOLVEstruct_t SOLVEstruct; 33 fact_t FactPattern; 34 MPI_Comm comm_superlu; 35 #if defined(PETSC_USE_COMPLEX) 36 doublecomplex *val; 37 #else 38 double *val; 39 #endif 40 41 /* A few function pointers for inheritance */ 42 PetscErrorCode (*MatDuplicate)(Mat,MatDuplicateOption,Mat*); 43 PetscErrorCode (*MatView)(Mat,PetscViewer); 44 PetscErrorCode (*MatAssemblyEnd)(Mat,MatAssemblyType); 45 PetscErrorCode (*MatLUFactorSymbolic)(Mat,IS,IS,MatFactorInfo*,Mat*); 46 PetscErrorCode (*MatDestroy)(Mat); 47 48 /* Flag to clean up (non-global) SuperLU objects during Destroy */ 49 PetscTruth CleanUpSuperLU_Dist; 50 } Mat_SuperLU_DIST; 51 52 EXTERN PetscErrorCode MatDuplicate_SuperLU_DIST(Mat,MatDuplicateOption,Mat*); 53 extern PetscErrorCode MatFactorInfo_SuperLU_DIST(Mat,PetscViewer); 54 extern PetscErrorCode MatLUFactorNumeric_SuperLU_DIST(Mat,MatFactorInfo *,Mat *); 55 extern PetscErrorCode MatDestroy_SuperLU_DIST(Mat); 56 extern PetscErrorCode MatView_SuperLU_DIST(Mat,PetscViewer); 57 extern PetscErrorCode MatAssemblyEnd_SuperLU_DIST(Mat,MatAssemblyType); 58 extern PetscErrorCode MatSolve_SuperLU_DIST(Mat,Vec,Vec); 59 extern PetscErrorCode MatLUFactorSymbolic_SuperLU_DIST(Mat,IS,IS,MatFactorInfo *,Mat *); 60 extern PetscErrorCode MatDuplicate_SuperLU_DIST(Mat, MatDuplicateOption, Mat *); 61 62 EXTERN_C_BEGIN 63 #undef __FUNCT__ 64 #define __FUNCT__ "MatConvert_SuperLU_DIST_AIJ" 65 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SuperLU_DIST_AIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat) 66 { 67 PetscErrorCode ierr; 68 Mat B=*newmat; 69 Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST *)A->spptr; 70 71 PetscFunctionBegin; 72 if (reuse == MAT_INITIAL_MATRIX) { 73 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 74 } 75 /* Reset the original function pointers */ 76 B->ops->duplicate = lu->MatDuplicate; 77 B->ops->view = lu->MatView; 78 B->ops->assemblyend = lu->MatAssemblyEnd; 79 B->ops->lufactorsymbolic = lu->MatLUFactorSymbolic; 80 B->ops->destroy = lu->MatDestroy; 81 ierr = PetscFree(lu);CHKERRQ(ierr); 82 A->spptr = PETSC_NULL; 83 84 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaij_superlu_dist_C","",PETSC_NULL);CHKERRQ(ierr); 85 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_superlu_dist_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 86 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_superlu_dist_C","",PETSC_NULL);CHKERRQ(ierr); 87 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_superlu_dist_mpiaij_C","",PETSC_NULL);CHKERRQ(ierr); 88 89 ierr = PetscObjectChangeTypeName((PetscObject)B,type);CHKERRQ(ierr); 90 *newmat = B; 91 PetscFunctionReturn(0); 92 } 93 EXTERN_C_END 94 95 #undef __FUNCT__ 96 #define __FUNCT__ "MatDestroy_SuperLU_DIST" 97 PetscErrorCode MatDestroy_SuperLU_DIST(Mat A) 98 { 99 PetscErrorCode ierr; 100 PetscMPIInt size; 101 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr; 102 103 PetscFunctionBegin; 104 if (lu->CleanUpSuperLU_Dist) { 105 /* Deallocate SuperLU_DIST storage */ 106 if (lu->MatInputMode == GLOBAL) { 107 Destroy_CompCol_Matrix_dist(&lu->A_sup); 108 } else { 109 Destroy_CompRowLoc_Matrix_dist(&lu->A_sup); 110 if ( lu->options.SolveInitialized ) { 111 #if defined(PETSC_USE_COMPLEX) 112 zSolveFinalize(&lu->options, &lu->SOLVEstruct); 113 #else 114 dSolveFinalize(&lu->options, &lu->SOLVEstruct); 115 #endif 116 } 117 } 118 Destroy_LU(A->cmap.N, &lu->grid, &lu->LUstruct); 119 ScalePermstructFree(&lu->ScalePermstruct); 120 LUstructFree(&lu->LUstruct); 121 122 /* Release the SuperLU_DIST process grid. */ 123 superlu_gridexit(&lu->grid); 124 125 ierr = MPI_Comm_free(&(lu->comm_superlu));CHKERRQ(ierr); 126 } 127 128 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 129 if (size == 1) { 130 ierr = MatConvert_SuperLU_DIST_AIJ(A,MATSEQAIJ,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr); 131 } else { 132 ierr = MatConvert_SuperLU_DIST_AIJ(A,MATMPIAIJ,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr); 133 } 134 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 135 PetscFunctionReturn(0); 136 } 137 138 #undef __FUNCT__ 139 #define __FUNCT__ "MatSolve_SuperLU_DIST" 140 PetscErrorCode MatSolve_SuperLU_DIST(Mat A,Vec b_mpi,Vec x) 141 { 142 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr; 143 PetscErrorCode ierr; 144 PetscMPIInt size; 145 PetscInt m=A->rmap.N, N=A->cmap.N; 146 SuperLUStat_t stat; 147 double berr[1]; 148 PetscScalar *bptr; 149 PetscInt info, nrhs=1; 150 Vec x_seq; 151 IS iden; 152 VecScatter scat; 153 154 PetscFunctionBegin; 155 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 156 if (size > 1) { 157 if (lu->MatInputMode == GLOBAL) { /* global mat input, convert b to x_seq */ 158 ierr = VecCreateSeq(PETSC_COMM_SELF,N,&x_seq);CHKERRQ(ierr); 159 ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iden);CHKERRQ(ierr); 160 ierr = VecScatterCreate(b_mpi,iden,x_seq,iden,&scat);CHKERRQ(ierr); 161 ierr = ISDestroy(iden);CHKERRQ(ierr); 162 163 ierr = VecScatterBegin(scat,b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 164 ierr = VecScatterEnd(scat,b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 165 ierr = VecGetArray(x_seq,&bptr);CHKERRQ(ierr); 166 } else { /* distributed mat input */ 167 ierr = VecCopy(b_mpi,x);CHKERRQ(ierr); 168 ierr = VecGetArray(x,&bptr);CHKERRQ(ierr); 169 } 170 } else { /* size == 1 */ 171 ierr = VecCopy(b_mpi,x);CHKERRQ(ierr); 172 ierr = VecGetArray(x,&bptr);CHKERRQ(ierr); 173 } 174 175 if (lu->options.Fact != FACTORED) SETERRQ(PETSC_ERR_ARG_WRONG,"SuperLU_DIST options.Fact mush equal FACTORED"); 176 177 PStatInit(&stat); /* Initialize the statistics variables. */ 178 if (lu->MatInputMode == GLOBAL) { 179 #if defined(PETSC_USE_COMPLEX) 180 pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,(doublecomplex*)bptr, m, nrhs, 181 &lu->grid, &lu->LUstruct, berr, &stat, &info); 182 #else 183 pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,bptr, m, nrhs, 184 &lu->grid, &lu->LUstruct, berr, &stat, &info); 185 #endif 186 } else { /* distributed mat input */ 187 #if defined(PETSC_USE_COMPLEX) 188 pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, (doublecomplex*)bptr, A->rmap.N, nrhs, &lu->grid, 189 &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info); 190 if (info) SETERRQ1(PETSC_ERR_LIB,"pzgssvx fails, info: %d\n",info); 191 #else 192 pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, bptr, A->rmap.N, nrhs, &lu->grid, 193 &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info); 194 if (info) SETERRQ1(PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",info); 195 #endif 196 } 197 if (lu->options.PrintStat) { 198 PStatPrint(&lu->options, &stat, &lu->grid); /* Print the statistics. */ 199 } 200 PStatFree(&stat); 201 202 if (size > 1) { 203 if (lu->MatInputMode == GLOBAL){ /* convert seq x to mpi x */ 204 ierr = VecRestoreArray(x_seq,&bptr);CHKERRQ(ierr); 205 ierr = VecScatterBegin(scat,x_seq,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 206 ierr = VecScatterEnd(scat,x_seq,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 207 ierr = VecScatterDestroy(scat);CHKERRQ(ierr); 208 ierr = VecDestroy(x_seq);CHKERRQ(ierr); 209 } else { 210 ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr); 211 } 212 } else { 213 ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr); 214 } 215 PetscFunctionReturn(0); 216 } 217 218 #undef __FUNCT__ 219 #define __FUNCT__ "MatLUFactorNumeric_SuperLU_DIST" 220 PetscErrorCode MatLUFactorNumeric_SuperLU_DIST(Mat A,MatFactorInfo *info,Mat *F) 221 { 222 Mat *tseq,A_seq = PETSC_NULL; 223 Mat_SeqAIJ *aa,*bb; 224 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)(*F)->spptr; 225 PetscErrorCode ierr; 226 PetscInt M=A->rmap.N,N=A->cmap.N,sinfo,i,*ai,*aj,*bi,*bj,nz,rstart,*garray, 227 m=A->rmap.n, irow,colA_start,j,jcol,jB,countA,countB,*bjj,*ajj; 228 PetscMPIInt size,rank; 229 SuperLUStat_t stat; 230 double *berr=0; 231 IS isrow; 232 PetscLogDouble time0,time,time_min,time_max; 233 Mat F_diag=PETSC_NULL; 234 #if defined(PETSC_USE_COMPLEX) 235 doublecomplex *av, *bv; 236 #else 237 double *av, *bv; 238 #endif 239 240 PetscFunctionBegin; 241 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 242 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 243 244 if (lu->options.PrintStat) { /* collect time for mat conversion */ 245 ierr = MPI_Barrier(((PetscObject)A)->comm);CHKERRQ(ierr); 246 ierr = PetscGetTime(&time0);CHKERRQ(ierr); 247 } 248 249 if (lu->MatInputMode == GLOBAL) { /* global mat input */ 250 if (size > 1) { /* convert mpi A to seq mat A */ 251 ierr = ISCreateStride(PETSC_COMM_SELF,M,0,1,&isrow);CHKERRQ(ierr); 252 ierr = MatGetSubMatrices(A,1,&isrow,&isrow,MAT_INITIAL_MATRIX,&tseq);CHKERRQ(ierr); 253 ierr = ISDestroy(isrow);CHKERRQ(ierr); 254 255 A_seq = *tseq; 256 ierr = PetscFree(tseq);CHKERRQ(ierr); 257 aa = (Mat_SeqAIJ*)A_seq->data; 258 } else { 259 aa = (Mat_SeqAIJ*)A->data; 260 } 261 262 /* Convert Petsc NR matrix to SuperLU_DIST NC. 263 Note: memories of lu->val, col and row are allocated by CompRow_to_CompCol_dist()! */ 264 if (lu->options.Fact != DOFACT) {/* successive numeric factorization, sparsity pattern is reused. */ 265 if (lu->FactPattern == SamePattern_SameRowPerm){ 266 Destroy_CompCol_Matrix_dist(&lu->A_sup); 267 /* Destroy_LU(N, &lu->grid, &lu->LUstruct); Crash! Comment it out does not lead to mem leak. */ 268 lu->options.Fact = SamePattern_SameRowPerm; /* matrix has similar numerical values */ 269 } else { 270 Destroy_CompCol_Matrix_dist(&lu->A_sup); 271 Destroy_LU(N, &lu->grid, &lu->LUstruct); 272 lu->options.Fact = SamePattern; 273 } 274 } 275 #if defined(PETSC_USE_COMPLEX) 276 zCompRow_to_CompCol_dist(M,N,aa->nz,(doublecomplex*)aa->a,aa->j,aa->i,&lu->val,&lu->col, &lu->row); 277 #else 278 dCompRow_to_CompCol_dist(M,N,aa->nz,aa->a,aa->j,aa->i,&lu->val, &lu->col, &lu->row); 279 #endif 280 281 /* Create compressed column matrix A_sup. */ 282 #if defined(PETSC_USE_COMPLEX) 283 zCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_Z, SLU_GE); 284 #else 285 dCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_D, SLU_GE); 286 #endif 287 } else { /* distributed mat input */ 288 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 289 aa=(Mat_SeqAIJ*)(mat->A)->data; 290 bb=(Mat_SeqAIJ*)(mat->B)->data; 291 ai=aa->i; aj=aa->j; 292 bi=bb->i; bj=bb->j; 293 #if defined(PETSC_USE_COMPLEX) 294 av=(doublecomplex*)aa->a; 295 bv=(doublecomplex*)bb->a; 296 #else 297 av=aa->a; 298 bv=bb->a; 299 #endif 300 rstart = A->rmap.rstart; 301 nz = aa->nz + bb->nz; 302 garray = mat->garray; 303 304 if (lu->options.Fact == DOFACT) {/* first numeric factorization */ 305 #if defined(PETSC_USE_COMPLEX) 306 zallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row); 307 #else 308 dallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row); 309 #endif 310 } else { /* successive numeric factorization, sparsity pattern and perm_c are reused. */ 311 if (lu->FactPattern == SamePattern_SameRowPerm){ 312 /* Destroy_LU(N, &lu->grid, &lu->LUstruct); Crash! Comment it out does not lead to mem leak. */ 313 lu->options.Fact = SamePattern_SameRowPerm; /* matrix has similar numerical values */ 314 } else { 315 Destroy_LU(N, &lu->grid, &lu->LUstruct); /* Deallocate storage associated with the L and U matrices. */ 316 lu->options.Fact = SamePattern; 317 } 318 } 319 nz = 0; irow = rstart; 320 for ( i=0; i<m; i++ ) { 321 lu->row[i] = nz; 322 countA = ai[i+1] - ai[i]; 323 countB = bi[i+1] - bi[i]; 324 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 325 bjj = bj + bi[i]; 326 327 /* B part, smaller col index */ 328 colA_start = rstart + ajj[0]; /* the smallest global col index of A */ 329 jB = 0; 330 for (j=0; j<countB; j++){ 331 jcol = garray[bjj[j]]; 332 if (jcol > colA_start) { 333 jB = j; 334 break; 335 } 336 lu->col[nz] = jcol; 337 lu->val[nz++] = *bv++; 338 if (j==countB-1) jB = countB; 339 } 340 341 /* A part */ 342 for (j=0; j<countA; j++){ 343 lu->col[nz] = rstart + ajj[j]; 344 lu->val[nz++] = *av++; 345 } 346 347 /* B part, larger col index */ 348 for (j=jB; j<countB; j++){ 349 lu->col[nz] = garray[bjj[j]]; 350 lu->val[nz++] = *bv++; 351 } 352 } 353 lu->row[m] = nz; 354 #if defined(PETSC_USE_COMPLEX) 355 zCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart, 356 lu->val, lu->col, lu->row, SLU_NR_loc, SLU_Z, SLU_GE); 357 #else 358 dCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart, 359 lu->val, lu->col, lu->row, SLU_NR_loc, SLU_D, SLU_GE); 360 #endif 361 } 362 if (lu->options.PrintStat) { 363 ierr = PetscGetTime(&time);CHKERRQ(ierr); 364 time0 = time - time0; 365 } 366 367 /* Factor the matrix. */ 368 PStatInit(&stat); /* Initialize the statistics variables. */ 369 370 if (lu->MatInputMode == GLOBAL) { /* global mat input */ 371 #if defined(PETSC_USE_COMPLEX) 372 pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, 373 &lu->grid, &lu->LUstruct, berr, &stat, &sinfo); 374 #else 375 pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, 376 &lu->grid, &lu->LUstruct, berr, &stat, &sinfo); 377 #endif 378 } else { /* distributed mat input */ 379 #if defined(PETSC_USE_COMPLEX) 380 pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid, 381 &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo); 382 if (sinfo) SETERRQ1(PETSC_ERR_LIB,"pzgssvx fails, info: %d\n",sinfo); 383 #else 384 pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid, 385 &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo); 386 if (sinfo) SETERRQ1(PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",sinfo); 387 #endif 388 } 389 390 if (lu->MatInputMode == GLOBAL && size > 1){ 391 ierr = MatDestroy(A_seq);CHKERRQ(ierr); 392 } 393 394 if (lu->options.PrintStat) { 395 if (size > 1){ 396 ierr = MPI_Reduce(&time0,&time_max,1,MPI_DOUBLE,MPI_MAX,0,((PetscObject)A)->comm); 397 ierr = MPI_Reduce(&time0,&time_min,1,MPI_DOUBLE,MPI_MIN,0,((PetscObject)A)->comm); 398 ierr = MPI_Reduce(&time0,&time,1,MPI_DOUBLE,MPI_SUM,0,((PetscObject)A)->comm); 399 time = time/size; /* average time */ 400 if (!rank) { 401 ierr = PetscPrintf(PETSC_COMM_SELF, " Mat conversion(PETSc->SuperLU_DIST) time (max/min/avg): \n %g / %g / %g\n",time_max,time_min,time);CHKERRQ(ierr); 402 } 403 } else { 404 ierr = PetscPrintf(PETSC_COMM_SELF, " Mat conversion(PETSc->SuperLU_DIST) time: \n %g\n",time0);CHKERRQ(ierr); 405 } 406 PStatPrint(&lu->options, &stat, &lu->grid); /* Print the statistics. */ 407 } 408 PStatFree(&stat); 409 if (size > 1){ 410 F_diag = ((Mat_MPIAIJ *)(*F)->data)->A; 411 F_diag->assembled = PETSC_TRUE; 412 } 413 (*F)->assembled = PETSC_TRUE; 414 lu->options.Fact = FACTORED; /* The factored form of A is supplied. Local option used by this func. only */ 415 PetscFunctionReturn(0); 416 } 417 418 /* Note the Petsc r and c permutations are ignored */ 419 #undef __FUNCT__ 420 #define __FUNCT__ "MatLUFactorSymbolic_SuperLU_DIST" 421 PetscErrorCode MatLUFactorSymbolic_SuperLU_DIST(Mat A,IS r,IS c,MatFactorInfo *info,Mat *F) 422 { 423 Mat B; 424 Mat_SuperLU_DIST *lu; 425 PetscErrorCode ierr; 426 PetscInt M=A->rmap.N,N=A->cmap.N,indx; 427 PetscMPIInt size; 428 superlu_options_t options; 429 PetscTruth flg; 430 const char *pctype[] = {"MMD_AT_PLUS_A","NATURAL","MMD_ATA","PARMETIS"}; 431 const char *prtype[] = {"LargeDiag","NATURAL"}; 432 const char *factPattern[] = {"SamePattern","SamePattern_SameRowPerm"}; 433 434 PetscFunctionBegin; 435 /* Create the factorization matrix */ 436 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 437 ierr = MatSetSizes(B,A->rmap.n,A->cmap.n,M,N);CHKERRQ(ierr); 438 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 439 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL); 440 ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 441 442 B->ops->lufactornumeric = MatLUFactorNumeric_SuperLU_DIST; 443 B->ops->solve = MatSolve_SuperLU_DIST; 444 B->factor = FACTOR_LU; 445 446 lu = (Mat_SuperLU_DIST*)(B->spptr); 447 448 /* Set the default input options: 449 options.Fact = DOFACT; 450 options.Equil = YES; 451 options.ParSymbFact = NO; 452 options.ColPerm = MMD_AT_PLUS_A; 453 options.RowPerm = LargeDiag; 454 options.ReplaceTinyPivot = YES; 455 options.IterRefine = DOUBLE; 456 options.Trans = NOTRANS; 457 options.SolveInitialized = NO; 458 options.RefineInitialized = NO; 459 options.PrintStat = YES; 460 */ 461 set_default_options_dist(&options); 462 463 ierr = MPI_Comm_dup(((PetscObject)A)->comm,&(lu->comm_superlu));CHKERRQ(ierr); 464 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 465 /* Default num of process columns and rows */ 466 lu->npcol = PetscMPIIntCast((PetscInt)(0.5 + sqrt((PetscReal)size))); 467 if (!lu->npcol) lu->npcol = 1; 468 while (lu->npcol > 0) { 469 lu->nprow = PetscMPIIntCast(size/lu->npcol); 470 if (size == lu->nprow * lu->npcol) break; 471 lu->npcol --; 472 } 473 474 ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"SuperLU_Dist Options","Mat");CHKERRQ(ierr); 475 ierr = PetscOptionsInt("-mat_superlu_dist_r","Number rows in processor partition","None",lu->nprow,&lu->nprow,PETSC_NULL);CHKERRQ(ierr); 476 ierr = PetscOptionsInt("-mat_superlu_dist_c","Number columns in processor partition","None",lu->npcol,&lu->npcol,PETSC_NULL);CHKERRQ(ierr); 477 if (size != lu->nprow * lu->npcol) 478 SETERRQ3(PETSC_ERR_ARG_SIZ,"Number of processes %d must equal to nprow %d * npcol %d",size,lu->nprow,lu->npcol); 479 480 lu->MatInputMode = DISTRIBUTED; 481 ierr = PetscOptionsInt("-mat_superlu_dist_matinput","Matrix input mode (0: GLOBAL; 1: DISTRIBUTED)","None",lu->MatInputMode,&lu->MatInputMode,PETSC_NULL);CHKERRQ(ierr); 482 if(lu->MatInputMode == DISTRIBUTED && size == 1) lu->MatInputMode = GLOBAL; 483 484 ierr = PetscOptionsTruth("-mat_superlu_dist_equil","Equilibrate matrix","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr); 485 if (!flg) { 486 options.Equil = NO; 487 } 488 489 ierr = PetscOptionsEList("-mat_superlu_dist_rowperm","Row permutation","None",prtype,2,prtype[0],&indx,&flg);CHKERRQ(ierr); 490 if (flg) { 491 switch (indx) { 492 case 0: 493 options.RowPerm = LargeDiag; 494 break; 495 case 1: 496 options.RowPerm = NOROWPERM; 497 break; 498 } 499 } 500 501 ierr = PetscOptionsEList("-mat_superlu_dist_colperm","Column permutation","None",pctype,4,pctype[0],&indx,&flg);CHKERRQ(ierr); 502 if (flg) { 503 switch (indx) { 504 case 0: 505 options.ColPerm = MMD_AT_PLUS_A; 506 break; 507 case 1: 508 options.ColPerm = NATURAL; 509 break; 510 case 2: 511 options.ColPerm = MMD_ATA; 512 break; 513 case 3: 514 options.ColPerm = PARMETIS; 515 break; 516 } 517 } 518 519 ierr = PetscOptionsTruth("-mat_superlu_dist_replacetinypivot","Replace tiny pivots","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr); 520 if (!flg) { 521 options.ReplaceTinyPivot = NO; 522 } 523 524 options.ParSymbFact = NO; 525 ierr = PetscOptionsTruth("-mat_superlu_dist_parsymbfact","Parallel symbolic factorization","None",PETSC_FALSE,&flg,0);CHKERRQ(ierr); 526 if (flg){ 527 #ifdef PETSC_HAVE_PARMETIS 528 options.ParSymbFact = YES; 529 options.ColPerm = PARMETIS; /* in v2.2, PARMETIS is forced for ParSymbFact regardless of user ordering setting */ 530 #else 531 printf("parsymbfact needs PARMETIS"); 532 #endif 533 } 534 535 lu->FactPattern = SamePattern; 536 ierr = PetscOptionsEList("-mat_superlu_dist_fact","Sparsity pattern for repeated matrix factorization","None",factPattern,2,factPattern[0],&indx,&flg);CHKERRQ(ierr); 537 if (flg) { 538 switch (indx) { 539 case 0: 540 lu->FactPattern = SamePattern; 541 break; 542 case 1: 543 lu->FactPattern = SamePattern_SameRowPerm; 544 break; 545 } 546 } 547 548 options.IterRefine = NOREFINE; 549 ierr = PetscOptionsTruth("-mat_superlu_dist_iterrefine","Use iterative refinement","None",PETSC_FALSE,&flg,0);CHKERRQ(ierr); 550 if (flg) { 551 options.IterRefine = DOUBLE; 552 } 553 554 if (PetscLogPrintInfo) { 555 options.PrintStat = YES; 556 } else { 557 options.PrintStat = NO; 558 } 559 ierr = PetscOptionsTruth("-mat_superlu_dist_statprint","Print factorization information","None", 560 (PetscTruth)options.PrintStat,(PetscTruth*)&options.PrintStat,0);CHKERRQ(ierr); 561 PetscOptionsEnd(); 562 563 /* Initialize the SuperLU process grid. */ 564 superlu_gridinit(lu->comm_superlu, lu->nprow, lu->npcol, &lu->grid); 565 566 /* Initialize ScalePermstruct and LUstruct. */ 567 ScalePermstructInit(M, N, &lu->ScalePermstruct); 568 LUstructInit(M, N, &lu->LUstruct); 569 570 lu->options = options; 571 lu->options.Fact = DOFACT; 572 lu->CleanUpSuperLU_Dist = PETSC_TRUE; 573 *F = B; 574 PetscFunctionReturn(0); 575 } 576 577 #undef __FUNCT__ 578 #define __FUNCT__ "MatAssemblyEnd_SuperLU_DIST" 579 PetscErrorCode MatAssemblyEnd_SuperLU_DIST(Mat A,MatAssemblyType mode) { 580 PetscErrorCode ierr; 581 Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST*)(A->spptr); 582 583 PetscFunctionBegin; 584 ierr = (*lu->MatAssemblyEnd)(A,mode);CHKERRQ(ierr); 585 lu->MatLUFactorSymbolic = A->ops->lufactorsymbolic; 586 A->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST; 587 PetscFunctionReturn(0); 588 } 589 590 #undef __FUNCT__ 591 #define __FUNCT__ "MatFactorInfo_SuperLU_DIST" 592 PetscErrorCode MatFactorInfo_SuperLU_DIST(Mat A,PetscViewer viewer) 593 { 594 Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST*)A->spptr; 595 superlu_options_t options; 596 PetscErrorCode ierr; 597 598 PetscFunctionBegin; 599 /* check if matrix is superlu_dist type */ 600 if (A->ops->solve != MatSolve_SuperLU_DIST) PetscFunctionReturn(0); 601 602 options = lu->options; 603 ierr = PetscViewerASCIIPrintf(viewer,"SuperLU_DIST run parameters:\n");CHKERRQ(ierr); 604 ierr = PetscViewerASCIIPrintf(viewer," Process grid nprow %D x npcol %D \n",lu->nprow,lu->npcol);CHKERRQ(ierr); 605 ierr = PetscViewerASCIIPrintf(viewer," Equilibrate matrix %s \n",PetscTruths[options.Equil != NO]);CHKERRQ(ierr); 606 ierr = PetscViewerASCIIPrintf(viewer," Matrix input mode %d \n",lu->MatInputMode);CHKERRQ(ierr); 607 ierr = PetscViewerASCIIPrintf(viewer," Replace tiny pivots %s \n",PetscTruths[options.ReplaceTinyPivot != NO]);CHKERRQ(ierr); 608 ierr = PetscViewerASCIIPrintf(viewer," Use iterative refinement %s \n",PetscTruths[options.IterRefine == DOUBLE]);CHKERRQ(ierr); 609 ierr = PetscViewerASCIIPrintf(viewer," Processors in row %d col partition %d \n",lu->nprow,lu->npcol);CHKERRQ(ierr); 610 ierr = PetscViewerASCIIPrintf(viewer," Row permutation %s \n",(options.RowPerm == NOROWPERM) ? "NATURAL": "LargeDiag");CHKERRQ(ierr); 611 if (options.ColPerm == NATURAL) { 612 ierr = PetscViewerASCIIPrintf(viewer," Column permutation NATURAL\n");CHKERRQ(ierr); 613 } else if (options.ColPerm == MMD_AT_PLUS_A) { 614 ierr = PetscViewerASCIIPrintf(viewer," Column permutation MMD_AT_PLUS_A\n");CHKERRQ(ierr); 615 } else if (options.ColPerm == MMD_ATA) { 616 ierr = PetscViewerASCIIPrintf(viewer," Column permutation MMD_ATA\n");CHKERRQ(ierr); 617 } else if (options.ColPerm == PARMETIS) { 618 ierr = PetscViewerASCIIPrintf(viewer," Column permutation PARMETIS\n");CHKERRQ(ierr); 619 } else { 620 SETERRQ(PETSC_ERR_ARG_WRONG,"Unknown column permutation"); 621 } 622 623 ierr = PetscViewerASCIIPrintf(viewer," Parallel symbolic factorization %s \n",PetscTruths[options.ParSymbFact != NO]);CHKERRQ(ierr); 624 625 if (lu->FactPattern == SamePattern){ 626 ierr = PetscViewerASCIIPrintf(viewer," Repeated factorization SamePattern\n");CHKERRQ(ierr); 627 } else { 628 ierr = PetscViewerASCIIPrintf(viewer," Repeated factorization SamePattern_SameRowPerm\n");CHKERRQ(ierr); 629 } 630 PetscFunctionReturn(0); 631 } 632 633 #undef __FUNCT__ 634 #define __FUNCT__ "MatView_SuperLU_DIST" 635 PetscErrorCode MatView_SuperLU_DIST(Mat A,PetscViewer viewer) 636 { 637 PetscErrorCode ierr; 638 PetscTruth iascii; 639 PetscViewerFormat format; 640 Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST*)(A->spptr); 641 642 PetscFunctionBegin; 643 ierr = (*lu->MatView)(A,viewer);CHKERRQ(ierr); 644 645 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 646 if (iascii) { 647 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 648 if (format == PETSC_VIEWER_ASCII_INFO) { 649 ierr = MatFactorInfo_SuperLU_DIST(A,viewer);CHKERRQ(ierr); 650 } 651 } 652 PetscFunctionReturn(0); 653 } 654 655 656 EXTERN_C_BEGIN 657 #undef __FUNCT__ 658 #define __FUNCT__ "MatConvert_AIJ_SuperLU_DIST" 659 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_AIJ_SuperLU_DIST(Mat A,MatType type,MatReuse reuse,Mat *newmat) 660 { 661 /* This routine is only called to convert to MATSUPERLU_DIST */ 662 /* from MATSEQAIJ if A has a single process communicator */ 663 /* or MATMPIAIJ otherwise, so we will ignore 'MatType type'. */ 664 PetscErrorCode ierr; 665 PetscMPIInt size; 666 MPI_Comm comm; 667 Mat B=*newmat; 668 Mat_SuperLU_DIST *lu; 669 670 PetscFunctionBegin; 671 ierr = PetscNewLog(B,Mat_SuperLU_DIST,&lu);CHKERRQ(ierr); 672 if (reuse == MAT_INITIAL_MATRIX) { 673 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 674 lu->MatDuplicate = B->ops->duplicate; 675 lu->MatView = B->ops->view; 676 lu->MatAssemblyEnd = B->ops->assemblyend; 677 lu->MatLUFactorSymbolic = B->ops->lufactorsymbolic; 678 lu->MatDestroy = B->ops->destroy; 679 } else { 680 lu->MatDuplicate = A->ops->duplicate; 681 lu->MatView = A->ops->view; 682 lu->MatAssemblyEnd = A->ops->assemblyend; 683 lu->MatLUFactorSymbolic = A->ops->lufactorsymbolic; 684 lu->MatDestroy = A->ops->destroy; 685 } 686 lu->CleanUpSuperLU_Dist = PETSC_FALSE; 687 688 B->spptr = (void*)lu; 689 B->ops->duplicate = MatDuplicate_SuperLU_DIST; 690 B->ops->view = MatView_SuperLU_DIST; 691 B->ops->assemblyend = MatAssemblyEnd_SuperLU_DIST; 692 B->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST; 693 B->ops->destroy = MatDestroy_SuperLU_DIST; 694 695 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 696 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);CHKERRQ(ierr); 697 if (size == 1) { 698 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqaij_superlu_dist_C", 699 "MatConvert_AIJ_SuperLU_DIST",MatConvert_AIJ_SuperLU_DIST);CHKERRQ(ierr); 700 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_superlu_dist_seqaij_C", 701 "MatConvert_SuperLU_DIST_AIJ",MatConvert_SuperLU_DIST_AIJ);CHKERRQ(ierr); 702 } else { 703 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_superlu_dist_C", 704 "MatConvert_AIJ_SuperLU_DIST",MatConvert_AIJ_SuperLU_DIST);CHKERRQ(ierr); 705 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_superlu_dist_mpiaij_C", 706 "MatConvert_SuperLU_DIST_AIJ",MatConvert_SuperLU_DIST_AIJ);CHKERRQ(ierr); 707 } 708 ierr = PetscInfo(A,"Using SuperLU_DIST for SeqAIJ LU factorization and solves.\n");CHKERRQ(ierr); 709 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSUPERLU_DIST);CHKERRQ(ierr); 710 *newmat = B; 711 PetscFunctionReturn(0); 712 } 713 EXTERN_C_END 714 715 #undef __FUNCT__ 716 #define __FUNCT__ "MatDuplicate_SuperLU_DIST" 717 PetscErrorCode MatDuplicate_SuperLU_DIST(Mat A, MatDuplicateOption op, Mat *M) { 718 PetscErrorCode ierr; 719 Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST *)A->spptr; 720 721 PetscFunctionBegin; 722 ierr = (*lu->MatDuplicate)(A,op,M);CHKERRQ(ierr); 723 ierr = PetscMemcpy((*M)->spptr,lu,sizeof(Mat_SuperLU_DIST));CHKERRQ(ierr); 724 PetscFunctionReturn(0); 725 } 726 727 /*MC 728 MATSUPERLU_DIST - MATSUPERLU_DIST = "superlu_dist" - A matrix type providing direct solvers (LU) for parallel matrices 729 via the external package SuperLU_DIST. 730 731 If SuperLU_DIST is installed (see the manual for 732 instructions on how to declare the existence of external packages), 733 a matrix type can be constructed which invokes SuperLU_DIST solvers. 734 After calling MatCreate(...,A), simply call MatSetType(A,MATSUPERLU_DIST) then 735 optionally call MatSeqAIJSetPreallocation() or MatMPIAIJSetPreallocation() DO NOT 736 call MatCreateSeqAIJ/MPIAIJ() directly or the preallocation information will be LOST! 737 738 This matrix inherits from MATSEQAIJ when constructed with a single process communicator, 739 and from MATMPIAIJ otherwise. As a result, for single process communicators, 740 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 741 for communicators controlling multiple processes. It is recommended that you call both of 742 the above preallocation routines for simplicity. One can also call MatConvert() for an inplace 743 conversion to or from the MATSEQAIJ or MATMPIAIJ type (depending on the communicator size) 744 without data copy; but this MUST be called AFTER the matrix values are set. 745 746 747 748 Options Database Keys: 749 + -mat_type superlu_dist - sets the matrix type to "superlu_dist" during a call to MatSetFromOptions() 750 . -mat_superlu_dist_r <n> - number of rows in processor partition 751 . -mat_superlu_dist_c <n> - number of columns in processor partition 752 . -mat_superlu_dist_matinput <0,1> - matrix input mode; 0=global, 1=distributed 753 . -mat_superlu_dist_equil - equilibrate the matrix 754 . -mat_superlu_dist_rowperm <LargeDiag,NATURAL> - row permutation 755 . -mat_superlu_dist_colperm <MMD_AT_PLUS_A,MMD_ATA,NATURAL> - column permutation 756 . -mat_superlu_dist_replacetinypivot - replace tiny pivots 757 . -mat_superlu_dist_fact <SamePattern> (choose one of) SamePattern SamePattern_SameRowPerm 758 . -mat_superlu_dist_iterrefine - use iterative refinement 759 - -mat_superlu_dist_statprint - print factorization information 760 761 Level: beginner 762 763 .seealso: PCLU 764 M*/ 765 766 EXTERN_C_BEGIN 767 #undef __FUNCT__ 768 #define __FUNCT__ "MatCreate_SuperLU_DIST" 769 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SuperLU_DIST(Mat A) 770 { 771 PetscErrorCode ierr; 772 PetscMPIInt size; 773 774 PetscFunctionBegin; 775 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 776 if (size == 1) { 777 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 778 } else { 779 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 780 /* A_diag = 0x0 ??? -- do we need it? 781 Mat A_diag = ((Mat_MPIAIJ *)A->data)->A; 782 ierr = MatConvert_AIJ_SuperLU_DIST(A_diag,MATSUPERLU_DIST,MAT_REUSE_MATRIX,&A_diag);CHKERRQ(ierr); 783 */ 784 } 785 ierr = MatConvert_AIJ_SuperLU_DIST(A,MATSUPERLU_DIST,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr); 786 PetscFunctionReturn(0); 787 } 788 EXTERN_C_END 789 790