1 #define PETSCMAT_DLL 2 3 /* 4 Provides an interface to the SuperLU_DIST_2.0 sparse solver 5 */ 6 7 #include "src/mat/impls/aij/seq/aij.h" 8 #include "src/mat/impls/aij/mpi/mpiaij.h" 9 #if defined(PETSC_HAVE_STDLIB_H) /* This is to get around weird problem with SuperLU on cray */ 10 #include "stdlib.h" 11 #endif 12 13 EXTERN_C_BEGIN 14 #if defined(PETSC_USE_COMPLEX) 15 #include "superlu_zdefs.h" 16 #else 17 #include "superlu_ddefs.h" 18 #endif 19 EXTERN_C_END 20 21 typedef enum {GLOBAL,DISTRIBUTED} SuperLU_MatInputMode; 22 23 typedef struct { 24 int_t nprow,npcol,*row,*col; 25 gridinfo_t grid; 26 superlu_options_t options; 27 SuperMatrix A_sup; 28 ScalePermstruct_t ScalePermstruct; 29 LUstruct_t LUstruct; 30 int StatPrint; 31 int MatInputMode; 32 SOLVEstruct_t SOLVEstruct; 33 fact_t FactPattern; 34 MPI_Comm comm_superlu; 35 #if defined(PETSC_USE_COMPLEX) 36 doublecomplex *val; 37 #else 38 double *val; 39 #endif 40 41 /* A few function pointers for inheritance */ 42 PetscErrorCode (*MatDuplicate)(Mat,MatDuplicateOption,Mat*); 43 PetscErrorCode (*MatView)(Mat,PetscViewer); 44 PetscErrorCode (*MatAssemblyEnd)(Mat,MatAssemblyType); 45 PetscErrorCode (*MatLUFactorSymbolic)(Mat,IS,IS,MatFactorInfo*,Mat*); 46 PetscErrorCode (*MatDestroy)(Mat); 47 48 /* Flag to clean up (non-global) SuperLU objects during Destroy */ 49 PetscTruth CleanUpSuperLU_Dist; 50 } Mat_SuperLU_DIST; 51 52 EXTERN PetscErrorCode MatDuplicate_SuperLU_DIST(Mat,MatDuplicateOption,Mat*); 53 extern PetscErrorCode MatFactorInfo_SuperLU_DIST(Mat,PetscViewer); 54 extern PetscErrorCode MatLUFactorNumeric_SuperLU_DIST(Mat,MatFactorInfo *,Mat *); 55 extern PetscErrorCode MatDestroy_SuperLU_DIST(Mat); 56 extern PetscErrorCode MatView_SuperLU_DIST(Mat,PetscViewer); 57 extern PetscErrorCode MatAssemblyEnd_SuperLU_DIST(Mat,MatAssemblyType); 58 extern PetscErrorCode MatSolve_SuperLU_DIST(Mat,Vec,Vec); 59 extern PetscErrorCode MatLUFactorSymbolic_SuperLU_DIST(Mat,IS,IS,MatFactorInfo *,Mat *); 60 extern PetscErrorCode MatDuplicate_SuperLU_DIST(Mat, MatDuplicateOption, Mat *); 61 62 EXTERN_C_BEGIN 63 #undef __FUNCT__ 64 #define __FUNCT__ "MatConvert_SuperLU_DIST_AIJ" 65 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SuperLU_DIST_AIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat) 66 { 67 PetscErrorCode ierr; 68 Mat B=*newmat; 69 Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST *)A->spptr; 70 71 PetscFunctionBegin; 72 if (reuse == MAT_INITIAL_MATRIX) { 73 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 74 } 75 /* Reset the original function pointers */ 76 B->ops->duplicate = lu->MatDuplicate; 77 B->ops->view = lu->MatView; 78 B->ops->assemblyend = lu->MatAssemblyEnd; 79 B->ops->lufactorsymbolic = lu->MatLUFactorSymbolic; 80 B->ops->destroy = lu->MatDestroy; 81 ierr = PetscFree(lu);CHKERRQ(ierr); 82 A->spptr = PETSC_NULL; 83 84 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaij_superlu_dist_C","",PETSC_NULL);CHKERRQ(ierr); 85 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_superlu_dist_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 86 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_superlu_dist_C","",PETSC_NULL);CHKERRQ(ierr); 87 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_superlu_dist_mpiaij_C","",PETSC_NULL);CHKERRQ(ierr); 88 89 ierr = PetscObjectChangeTypeName((PetscObject)B,type);CHKERRQ(ierr); 90 *newmat = B; 91 PetscFunctionReturn(0); 92 } 93 EXTERN_C_END 94 95 #undef __FUNCT__ 96 #define __FUNCT__ "MatDestroy_SuperLU_DIST" 97 PetscErrorCode MatDestroy_SuperLU_DIST(Mat A) 98 { 99 PetscErrorCode ierr; 100 PetscMPIInt size; 101 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr; 102 103 PetscFunctionBegin; 104 if (lu->CleanUpSuperLU_Dist) { 105 /* Deallocate SuperLU_DIST storage */ 106 if (lu->MatInputMode == GLOBAL) { 107 Destroy_CompCol_Matrix_dist(&lu->A_sup); 108 } else { 109 Destroy_CompRowLoc_Matrix_dist(&lu->A_sup); 110 if ( lu->options.SolveInitialized ) { 111 #if defined(PETSC_USE_COMPLEX) 112 zSolveFinalize(&lu->options, &lu->SOLVEstruct); 113 #else 114 dSolveFinalize(&lu->options, &lu->SOLVEstruct); 115 #endif 116 } 117 } 118 Destroy_LU(A->cmap.N, &lu->grid, &lu->LUstruct); 119 ScalePermstructFree(&lu->ScalePermstruct); 120 LUstructFree(&lu->LUstruct); 121 122 /* Release the SuperLU_DIST process grid. */ 123 superlu_gridexit(&lu->grid); 124 125 ierr = MPI_Comm_free(&(lu->comm_superlu));CHKERRQ(ierr); 126 } 127 128 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 129 if (size == 1) { 130 ierr = MatConvert_SuperLU_DIST_AIJ(A,MATSEQAIJ,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr); 131 } else { 132 ierr = MatConvert_SuperLU_DIST_AIJ(A,MATMPIAIJ,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr); 133 } 134 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 135 PetscFunctionReturn(0); 136 } 137 138 #undef __FUNCT__ 139 #define __FUNCT__ "MatSolve_SuperLU_DIST" 140 PetscErrorCode MatSolve_SuperLU_DIST(Mat A,Vec b_mpi,Vec x) 141 { 142 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr; 143 PetscErrorCode ierr; 144 PetscMPIInt size; 145 PetscInt m=A->rmap.N, N=A->cmap.N; 146 SuperLUStat_t stat; 147 double berr[1]; 148 PetscScalar *bptr; 149 PetscInt info, nrhs=1; 150 Vec x_seq; 151 IS iden; 152 VecScatter scat; 153 154 PetscFunctionBegin; 155 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 156 if (size > 1) { 157 if (lu->MatInputMode == GLOBAL) { /* global mat input, convert b to x_seq */ 158 ierr = VecCreateSeq(PETSC_COMM_SELF,N,&x_seq);CHKERRQ(ierr); 159 ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iden);CHKERRQ(ierr); 160 ierr = VecScatterCreate(b_mpi,iden,x_seq,iden,&scat);CHKERRQ(ierr); 161 ierr = ISDestroy(iden);CHKERRQ(ierr); 162 163 ierr = VecScatterBegin(scat,b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 164 ierr = VecScatterEnd(scat,b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 165 ierr = VecGetArray(x_seq,&bptr);CHKERRQ(ierr); 166 } else { /* distributed mat input */ 167 ierr = VecCopy(b_mpi,x);CHKERRQ(ierr); 168 ierr = VecGetArray(x,&bptr);CHKERRQ(ierr); 169 } 170 } else { /* size == 1 */ 171 ierr = VecCopy(b_mpi,x);CHKERRQ(ierr); 172 ierr = VecGetArray(x,&bptr);CHKERRQ(ierr); 173 } 174 175 if (lu->options.Fact != FACTORED) 176 SETERRQ(PETSC_ERR_ARG_WRONG,"SuperLU_DIST options.Fact mush equal FACTORED"); 177 178 PStatInit(&stat); /* Initialize the statistics variables. */ 179 if (lu->MatInputMode == GLOBAL) { 180 #if defined(PETSC_USE_COMPLEX) 181 pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,(doublecomplex*)bptr, m, nrhs, 182 &lu->grid, &lu->LUstruct, berr, &stat, &info); 183 #else 184 pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,bptr, m, nrhs, 185 &lu->grid, &lu->LUstruct, berr, &stat, &info); 186 #endif 187 } else { /* distributed mat input */ 188 #if defined(PETSC_USE_COMPLEX) 189 pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, (doublecomplex*)bptr, A->rmap.N, nrhs, &lu->grid, 190 &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info); 191 if (info) SETERRQ1(PETSC_ERR_LIB,"pzgssvx fails, info: %d\n",info); 192 #else 193 pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, bptr, A->rmap.N, nrhs, &lu->grid, 194 &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info); 195 if (info) SETERRQ1(PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",info); 196 #endif 197 } 198 if (lu->options.PrintStat) { 199 PStatPrint(&lu->options, &stat, &lu->grid); /* Print the statistics. */ 200 } 201 PStatFree(&stat); 202 203 if (size > 1) { 204 if (lu->MatInputMode == GLOBAL){ /* convert seq x to mpi x */ 205 ierr = VecRestoreArray(x_seq,&bptr);CHKERRQ(ierr); 206 ierr = VecScatterBegin(scat,x_seq,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 207 ierr = VecScatterEnd(scat,x_seq,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 208 ierr = VecScatterDestroy(scat);CHKERRQ(ierr); 209 ierr = VecDestroy(x_seq);CHKERRQ(ierr); 210 } else { 211 ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr); 212 } 213 } else { 214 ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr); 215 } 216 PetscFunctionReturn(0); 217 } 218 219 #undef __FUNCT__ 220 #define __FUNCT__ "MatLUFactorNumeric_SuperLU_DIST" 221 PetscErrorCode MatLUFactorNumeric_SuperLU_DIST(Mat A,MatFactorInfo *info,Mat *F) 222 { 223 Mat *tseq,A_seq = PETSC_NULL; 224 Mat_SeqAIJ *aa,*bb; 225 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)(*F)->spptr; 226 PetscErrorCode ierr; 227 PetscInt M=A->rmap.N,N=A->cmap.N,sinfo,i,*ai,*aj,*bi,*bj,nz,rstart,*garray, 228 m=A->rmap.n, irow,colA_start,j,jcol,jB,countA,countB,*bjj,*ajj; 229 PetscMPIInt size,rank; 230 SuperLUStat_t stat; 231 double *berr=0; 232 IS isrow; 233 PetscLogDouble time0,time,time_min,time_max; 234 Mat F_diag=PETSC_NULL; 235 #if defined(PETSC_USE_COMPLEX) 236 doublecomplex *av, *bv; 237 #else 238 double *av, *bv; 239 #endif 240 241 PetscFunctionBegin; 242 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 243 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 244 245 if (lu->options.PrintStat) { /* collect time for mat conversion */ 246 ierr = MPI_Barrier(A->comm);CHKERRQ(ierr); 247 ierr = PetscGetTime(&time0);CHKERRQ(ierr); 248 } 249 250 if (lu->MatInputMode == GLOBAL) { /* global mat input */ 251 if (size > 1) { /* convert mpi A to seq mat A */ 252 ierr = ISCreateStride(PETSC_COMM_SELF,M,0,1,&isrow);CHKERRQ(ierr); 253 ierr = MatGetSubMatrices(A,1,&isrow,&isrow,MAT_INITIAL_MATRIX,&tseq);CHKERRQ(ierr); 254 ierr = ISDestroy(isrow);CHKERRQ(ierr); 255 256 A_seq = *tseq; 257 ierr = PetscFree(tseq);CHKERRQ(ierr); 258 aa = (Mat_SeqAIJ*)A_seq->data; 259 } else { 260 aa = (Mat_SeqAIJ*)A->data; 261 } 262 263 /* Convert Petsc NR matrix to SuperLU_DIST NC. 264 Note: memories of lu->val, col and row are allocated by CompRow_to_CompCol_dist()! */ 265 if (lu->options.Fact != DOFACT) {/* successive numeric factorization, sparsity pattern is reused. */ 266 if (lu->FactPattern == SamePattern_SameRowPerm){ 267 Destroy_CompCol_Matrix_dist(&lu->A_sup); 268 /* Destroy_LU(N, &lu->grid, &lu->LUstruct); Crash! Comment it out does not lead to mem leak. */ 269 lu->options.Fact = SamePattern_SameRowPerm; /* matrix has similar numerical values */ 270 } else { 271 Destroy_CompCol_Matrix_dist(&lu->A_sup); 272 Destroy_LU(N, &lu->grid, &lu->LUstruct); 273 lu->options.Fact = SamePattern; 274 } 275 } 276 #if defined(PETSC_USE_COMPLEX) 277 zCompRow_to_CompCol_dist(M,N,aa->nz,(doublecomplex*)aa->a,aa->j,aa->i,&lu->val,&lu->col, &lu->row); 278 #else 279 dCompRow_to_CompCol_dist(M,N,aa->nz,aa->a,aa->j,aa->i,&lu->val, &lu->col, &lu->row); 280 #endif 281 282 /* Create compressed column matrix A_sup. */ 283 #if defined(PETSC_USE_COMPLEX) 284 zCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_Z, SLU_GE); 285 #else 286 dCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_D, SLU_GE); 287 #endif 288 } else { /* distributed mat input */ 289 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 290 aa=(Mat_SeqAIJ*)(mat->A)->data; 291 bb=(Mat_SeqAIJ*)(mat->B)->data; 292 ai=aa->i; aj=aa->j; 293 bi=bb->i; bj=bb->j; 294 #if defined(PETSC_USE_COMPLEX) 295 av=(doublecomplex*)aa->a; 296 bv=(doublecomplex*)bb->a; 297 #else 298 av=aa->a; 299 bv=bb->a; 300 #endif 301 rstart = A->rmap.rstart; 302 nz = aa->nz + bb->nz; 303 garray = mat->garray; 304 305 if (lu->options.Fact == DOFACT) {/* first numeric factorization */ 306 #if defined(PETSC_USE_COMPLEX) 307 zallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row); 308 #else 309 dallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row); 310 #endif 311 } else { /* successive numeric factorization, sparsity pattern and perm_c are reused. */ 312 if (lu->FactPattern == SamePattern_SameRowPerm){ 313 /* Destroy_LU(N, &lu->grid, &lu->LUstruct); Crash! Comment it out does not lead to mem leak. */ 314 lu->options.Fact = SamePattern_SameRowPerm; /* matrix has similar numerical values */ 315 } else { 316 Destroy_LU(N, &lu->grid, &lu->LUstruct); /* Deallocate storage associated with the L and U matrices. */ 317 lu->options.Fact = SamePattern; 318 } 319 } 320 nz = 0; irow = rstart; 321 for ( i=0; i<m; i++ ) { 322 lu->row[i] = nz; 323 countA = ai[i+1] - ai[i]; 324 countB = bi[i+1] - bi[i]; 325 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 326 bjj = bj + bi[i]; 327 328 /* B part, smaller col index */ 329 colA_start = rstart + ajj[0]; /* the smallest global col index of A */ 330 jB = 0; 331 for (j=0; j<countB; j++){ 332 jcol = garray[bjj[j]]; 333 if (jcol > colA_start) { 334 jB = j; 335 break; 336 } 337 lu->col[nz] = jcol; 338 lu->val[nz++] = *bv++; 339 if (j==countB-1) jB = countB; 340 } 341 342 /* A part */ 343 for (j=0; j<countA; j++){ 344 lu->col[nz] = rstart + ajj[j]; 345 lu->val[nz++] = *av++; 346 } 347 348 /* B part, larger col index */ 349 for (j=jB; j<countB; j++){ 350 lu->col[nz] = garray[bjj[j]]; 351 lu->val[nz++] = *bv++; 352 } 353 } 354 lu->row[m] = nz; 355 #if defined(PETSC_USE_COMPLEX) 356 zCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart, 357 lu->val, lu->col, lu->row, SLU_NR_loc, SLU_Z, SLU_GE); 358 #else 359 dCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart, 360 lu->val, lu->col, lu->row, SLU_NR_loc, SLU_D, SLU_GE); 361 #endif 362 } 363 if (lu->options.PrintStat) { 364 ierr = PetscGetTime(&time);CHKERRQ(ierr); 365 time0 = time - time0; 366 } 367 368 /* Factor the matrix. */ 369 PStatInit(&stat); /* Initialize the statistics variables. */ 370 371 if (lu->MatInputMode == GLOBAL) { /* global mat input */ 372 #if defined(PETSC_USE_COMPLEX) 373 pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, 374 &lu->grid, &lu->LUstruct, berr, &stat, &sinfo); 375 #else 376 pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, 377 &lu->grid, &lu->LUstruct, berr, &stat, &sinfo); 378 #endif 379 } else { /* distributed mat input */ 380 #if defined(PETSC_USE_COMPLEX) 381 pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid, 382 &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo); 383 if (sinfo) SETERRQ1(PETSC_ERR_LIB,"pzgssvx fails, info: %d\n",sinfo); 384 #else 385 pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid, 386 &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo); 387 if (sinfo) SETERRQ1(PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",sinfo); 388 #endif 389 } 390 391 if (lu->MatInputMode == GLOBAL && size > 1){ 392 ierr = MatDestroy(A_seq);CHKERRQ(ierr); 393 } 394 395 if (lu->options.PrintStat) { 396 if (size > 1){ 397 ierr = MPI_Reduce(&time0,&time_max,1,MPI_DOUBLE,MPI_MAX,0,A->comm); 398 ierr = MPI_Reduce(&time0,&time_min,1,MPI_DOUBLE,MPI_MIN,0,A->comm); 399 ierr = MPI_Reduce(&time0,&time,1,MPI_DOUBLE,MPI_SUM,0,A->comm); 400 time = time/size; /* average time */ 401 if (!rank) 402 ierr = PetscPrintf(PETSC_COMM_SELF, " Mat conversion(PETSc->SuperLU_DIST) time (max/min/avg): \n \ 403 %g / %g / %g\n",time_max,time_min,time); 404 } else { 405 ierr = PetscPrintf(PETSC_COMM_SELF, " Mat conversion(PETSc->SuperLU_DIST) time: \n \ 406 %g\n",time0); 407 } 408 409 PStatPrint(&lu->options, &stat, &lu->grid); /* Print the statistics. */ 410 } 411 PStatFree(&stat); 412 if (size > 1){ 413 F_diag = ((Mat_MPIAIJ *)(*F)->data)->A; 414 F_diag->assembled = PETSC_TRUE; 415 } 416 (*F)->assembled = PETSC_TRUE; 417 lu->options.Fact = FACTORED; /* The factored form of A is supplied. Local option used by this func. only */ 418 PetscFunctionReturn(0); 419 } 420 421 /* Note the Petsc r and c permutations are ignored */ 422 #undef __FUNCT__ 423 #define __FUNCT__ "MatLUFactorSymbolic_SuperLU_DIST" 424 PetscErrorCode MatLUFactorSymbolic_SuperLU_DIST(Mat A,IS r,IS c,MatFactorInfo *info,Mat *F) 425 { 426 Mat B; 427 Mat_SuperLU_DIST *lu; 428 PetscErrorCode ierr; 429 PetscInt M=A->rmap.N,N=A->cmap.N,indx; 430 PetscMPIInt size; 431 superlu_options_t options; 432 PetscTruth flg; 433 const char *pctype[] = {"MMD_AT_PLUS_A","NATURAL","MMD_ATA"}; 434 const char *prtype[] = {"LargeDiag","NATURAL"}; 435 const char *factPattern[] = {"SamePattern","SamePattern_SameRowPerm"}; 436 437 PetscFunctionBegin; 438 /* Create the factorization matrix */ 439 ierr = MatCreate(A->comm,&B);CHKERRQ(ierr); 440 ierr = MatSetSizes(B,A->rmap.n,A->cmap.n,M,N);CHKERRQ(ierr); 441 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 442 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL); 443 ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 444 445 B->ops->lufactornumeric = MatLUFactorNumeric_SuperLU_DIST; 446 B->ops->solve = MatSolve_SuperLU_DIST; 447 B->factor = FACTOR_LU; 448 449 lu = (Mat_SuperLU_DIST*)(B->spptr); 450 451 /* Set the default input options: 452 options.Fact = DOFACT; 453 options.Equil = YES; 454 options.ColPerm = MMD_AT_PLUS_A; 455 options.RowPerm = LargeDiag; 456 options.ReplaceTinyPivot = YES; 457 options.Trans = NOTRANS; 458 options.IterRefine = DOUBLE; 459 options.SolveInitialized = NO; 460 options.RefineInitialized = NO; 461 options.PrintStat = YES; 462 */ 463 set_default_options_dist(&options); 464 465 ierr = MPI_Comm_dup(A->comm,&(lu->comm_superlu));CHKERRQ(ierr); 466 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 467 /* Default num of process columns and rows */ 468 lu->npcol = (PetscMPIInt)(0.5 + sqrt((PetscReal)size)); 469 if (!lu->npcol) lu->npcol = 1; 470 while (lu->npcol > 0) { 471 lu->nprow = (PetscMPIInt)(size/lu->npcol); 472 if (size == lu->nprow * lu->npcol) break; 473 lu->npcol --; 474 } 475 476 ierr = PetscOptionsBegin(A->comm,A->prefix,"SuperLU_Dist Options","Mat");CHKERRQ(ierr); 477 ierr = PetscOptionsInt("-mat_superlu_dist_r","Number rows in processor partition","None",lu->nprow,&lu->nprow,PETSC_NULL);CHKERRQ(ierr); 478 ierr = PetscOptionsInt("-mat_superlu_dist_c","Number columns in processor partition","None",lu->npcol,&lu->npcol,PETSC_NULL);CHKERRQ(ierr); 479 if (size != lu->nprow * lu->npcol) 480 SETERRQ3(PETSC_ERR_ARG_SIZ,"Number of processes %d must equal to nprow %d * npcol %d",size,lu->nprow,lu->npcol); 481 482 lu->MatInputMode = DISTRIBUTED; 483 ierr = PetscOptionsInt("-mat_superlu_dist_matinput","Matrix input mode (0: GLOBAL; 1: DISTRIBUTED)","None",lu->MatInputMode,&lu->MatInputMode,PETSC_NULL);CHKERRQ(ierr); 484 if(lu->MatInputMode == DISTRIBUTED && size == 1) lu->MatInputMode = GLOBAL; 485 486 ierr = PetscOptionsTruth("-mat_superlu_dist_equil","Equilibrate matrix","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr); 487 if (!flg) { 488 options.Equil = NO; 489 } 490 491 ierr = PetscOptionsEList("-mat_superlu_dist_rowperm","Row permutation","None",prtype,2,prtype[0],&indx,&flg);CHKERRQ(ierr); 492 if (flg) { 493 switch (indx) { 494 case 0: 495 options.RowPerm = LargeDiag; 496 break; 497 case 1: 498 options.RowPerm = NOROWPERM; 499 break; 500 } 501 } 502 503 ierr = PetscOptionsEList("-mat_superlu_dist_colperm","Column permutation","None",pctype,3,pctype[0],&indx,&flg);CHKERRQ(ierr); 504 if (flg) { 505 switch (indx) { 506 case 0: 507 options.ColPerm = MMD_AT_PLUS_A; 508 break; 509 case 1: 510 options.ColPerm = NATURAL; 511 break; 512 case 2: 513 options.ColPerm = MMD_ATA; 514 break; 515 } 516 } 517 518 ierr = PetscOptionsTruth("-mat_superlu_dist_replacetinypivot","Replace tiny pivots","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr); 519 if (!flg) { 520 options.ReplaceTinyPivot = NO; 521 } 522 523 lu->FactPattern = SamePattern; 524 ierr = PetscOptionsEList("-mat_superlu_dist_fact","Sparsity pattern for repeated matrix factorization","None",factPattern,2,factPattern[0],&indx,&flg);CHKERRQ(ierr); 525 if (flg) { 526 switch (indx) { 527 case 0: 528 lu->FactPattern = SamePattern; 529 break; 530 case 1: 531 lu->FactPattern = SamePattern_SameRowPerm; 532 break; 533 } 534 } 535 536 options.IterRefine = NOREFINE; 537 ierr = PetscOptionsTruth("-mat_superlu_dist_iterrefine","Use iterative refinement","None",PETSC_FALSE,&flg,0);CHKERRQ(ierr); 538 if (flg) { 539 options.IterRefine = DOUBLE; 540 } 541 542 if (PetscLogPrintInfo) { 543 options.PrintStat = YES; 544 } else { 545 options.PrintStat = NO; 546 } 547 ierr = PetscOptionsTruth("-mat_superlu_dist_statprint","Print factorization information","None", 548 (PetscTruth)options.PrintStat,(PetscTruth*)&options.PrintStat,0);CHKERRQ(ierr); 549 PetscOptionsEnd(); 550 551 /* Initialize the SuperLU process grid. */ 552 superlu_gridinit(lu->comm_superlu, lu->nprow, lu->npcol, &lu->grid); 553 554 /* Initialize ScalePermstruct and LUstruct. */ 555 ScalePermstructInit(M, N, &lu->ScalePermstruct); 556 LUstructInit(M, N, &lu->LUstruct); 557 558 lu->options = options; 559 lu->options.Fact = DOFACT; 560 lu->CleanUpSuperLU_Dist = PETSC_TRUE; 561 *F = B; 562 PetscFunctionReturn(0); 563 } 564 565 #undef __FUNCT__ 566 #define __FUNCT__ "MatAssemblyEnd_SuperLU_DIST" 567 PetscErrorCode MatAssemblyEnd_SuperLU_DIST(Mat A,MatAssemblyType mode) { 568 PetscErrorCode ierr; 569 Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST*)(A->spptr); 570 571 PetscFunctionBegin; 572 ierr = (*lu->MatAssemblyEnd)(A,mode);CHKERRQ(ierr); 573 lu->MatLUFactorSymbolic = A->ops->lufactorsymbolic; 574 A->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST; 575 PetscFunctionReturn(0); 576 } 577 578 #undef __FUNCT__ 579 #define __FUNCT__ "MatFactorInfo_SuperLU_DIST" 580 PetscErrorCode MatFactorInfo_SuperLU_DIST(Mat A,PetscViewer viewer) 581 { 582 Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST*)A->spptr; 583 superlu_options_t options; 584 PetscErrorCode ierr; 585 586 PetscFunctionBegin; 587 /* check if matrix is superlu_dist type */ 588 if (A->ops->solve != MatSolve_SuperLU_DIST) PetscFunctionReturn(0); 589 590 options = lu->options; 591 ierr = PetscViewerASCIIPrintf(viewer,"SuperLU_DIST run parameters:\n");CHKERRQ(ierr); 592 ierr = PetscViewerASCIIPrintf(viewer," Process grid nprow %D x npcol %D \n",lu->nprow,lu->npcol);CHKERRQ(ierr); 593 ierr = PetscViewerASCIIPrintf(viewer," Equilibrate matrix %s \n",PetscTruths[options.Equil != NO]);CHKERRQ(ierr); 594 ierr = PetscViewerASCIIPrintf(viewer," Matrix input mode %d \n",lu->MatInputMode);CHKERRQ(ierr); 595 ierr = PetscViewerASCIIPrintf(viewer," Replace tiny pivots %s \n",PetscTruths[options.ReplaceTinyPivot != NO]);CHKERRQ(ierr); 596 ierr = PetscViewerASCIIPrintf(viewer," Use iterative refinement %s \n",PetscTruths[options.IterRefine == DOUBLE]);CHKERRQ(ierr); 597 ierr = PetscViewerASCIIPrintf(viewer," Processors in row %d col partition %d \n",lu->nprow,lu->npcol);CHKERRQ(ierr); 598 ierr = PetscViewerASCIIPrintf(viewer," Row permutation %s \n",(options.RowPerm == NOROWPERM) ? "NATURAL": "LargeDiag");CHKERRQ(ierr); 599 if (options.ColPerm == NATURAL) { 600 ierr = PetscViewerASCIIPrintf(viewer," Column permutation NATURAL\n");CHKERRQ(ierr); 601 } else if (options.ColPerm == MMD_AT_PLUS_A) { 602 ierr = PetscViewerASCIIPrintf(viewer," Column permutation MMD_AT_PLUS_A\n");CHKERRQ(ierr); 603 } else if (options.ColPerm == MMD_ATA) { 604 ierr = PetscViewerASCIIPrintf(viewer," Column permutation MMD_ATA\n");CHKERRQ(ierr); 605 } else { 606 SETERRQ(PETSC_ERR_ARG_WRONG,"Unknown column permutation"); 607 } 608 609 if (lu->FactPattern == SamePattern){ 610 ierr = PetscViewerASCIIPrintf(viewer," Repeated factorization SamePattern\n");CHKERRQ(ierr); 611 } else { 612 ierr = PetscViewerASCIIPrintf(viewer," Repeated factorization SamePattern_SameRowPerm\n");CHKERRQ(ierr); 613 } 614 PetscFunctionReturn(0); 615 } 616 617 #undef __FUNCT__ 618 #define __FUNCT__ "MatView_SuperLU_DIST" 619 PetscErrorCode MatView_SuperLU_DIST(Mat A,PetscViewer viewer) 620 { 621 PetscErrorCode ierr; 622 PetscTruth iascii; 623 PetscViewerFormat format; 624 Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST*)(A->spptr); 625 626 PetscFunctionBegin; 627 ierr = (*lu->MatView)(A,viewer);CHKERRQ(ierr); 628 629 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 630 if (iascii) { 631 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 632 if (format == PETSC_VIEWER_ASCII_INFO) { 633 ierr = MatFactorInfo_SuperLU_DIST(A,viewer);CHKERRQ(ierr); 634 } 635 } 636 PetscFunctionReturn(0); 637 } 638 639 640 EXTERN_C_BEGIN 641 #undef __FUNCT__ 642 #define __FUNCT__ "MatConvert_AIJ_SuperLU_DIST" 643 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_AIJ_SuperLU_DIST(Mat A,MatType type,MatReuse reuse,Mat *newmat) 644 { 645 /* This routine is only called to convert to MATSUPERLU_DIST */ 646 /* from MATSEQAIJ if A has a single process communicator */ 647 /* or MATMPIAIJ otherwise, so we will ignore 'MatType type'. */ 648 PetscErrorCode ierr; 649 PetscMPIInt size; 650 MPI_Comm comm; 651 Mat B=*newmat; 652 Mat_SuperLU_DIST *lu; 653 654 PetscFunctionBegin; 655 ierr = PetscNewLog(B,Mat_SuperLU_DIST,&lu);CHKERRQ(ierr); 656 if (reuse == MAT_INITIAL_MATRIX) { 657 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 658 lu->MatDuplicate = B->ops->duplicate; 659 lu->MatView = B->ops->view; 660 lu->MatAssemblyEnd = B->ops->assemblyend; 661 lu->MatLUFactorSymbolic = B->ops->lufactorsymbolic; 662 lu->MatDestroy = B->ops->destroy; 663 } else { 664 lu->MatDuplicate = A->ops->duplicate; 665 lu->MatView = A->ops->view; 666 lu->MatAssemblyEnd = A->ops->assemblyend; 667 lu->MatLUFactorSymbolic = A->ops->lufactorsymbolic; 668 lu->MatDestroy = A->ops->destroy; 669 } 670 lu->CleanUpSuperLU_Dist = PETSC_FALSE; 671 672 B->spptr = (void*)lu; 673 B->ops->duplicate = MatDuplicate_SuperLU_DIST; 674 B->ops->view = MatView_SuperLU_DIST; 675 B->ops->assemblyend = MatAssemblyEnd_SuperLU_DIST; 676 B->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST; 677 B->ops->destroy = MatDestroy_SuperLU_DIST; 678 679 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 680 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);CHKERRQ(ierr); 681 if (size == 1) { 682 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqaij_superlu_dist_C", 683 "MatConvert_AIJ_SuperLU_DIST",MatConvert_AIJ_SuperLU_DIST);CHKERRQ(ierr); 684 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_superlu_dist_seqaij_C", 685 "MatConvert_SuperLU_DIST_AIJ",MatConvert_SuperLU_DIST_AIJ);CHKERRQ(ierr); 686 } else { 687 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_superlu_dist_C", 688 "MatConvert_AIJ_SuperLU_DIST",MatConvert_AIJ_SuperLU_DIST);CHKERRQ(ierr); 689 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_superlu_dist_mpiaij_C", 690 "MatConvert_SuperLU_DIST_AIJ",MatConvert_SuperLU_DIST_AIJ);CHKERRQ(ierr); 691 } 692 ierr = PetscInfo(A,"Using SuperLU_DIST for SeqAIJ LU factorization and solves.\n");CHKERRQ(ierr); 693 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSUPERLU_DIST);CHKERRQ(ierr); 694 *newmat = B; 695 PetscFunctionReturn(0); 696 } 697 EXTERN_C_END 698 699 #undef __FUNCT__ 700 #define __FUNCT__ "MatDuplicate_SuperLU_DIST" 701 PetscErrorCode MatDuplicate_SuperLU_DIST(Mat A, MatDuplicateOption op, Mat *M) { 702 PetscErrorCode ierr; 703 Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST *)A->spptr; 704 705 PetscFunctionBegin; 706 ierr = (*lu->MatDuplicate)(A,op,M);CHKERRQ(ierr); 707 ierr = PetscMemcpy((*M)->spptr,lu,sizeof(Mat_SuperLU_DIST));CHKERRQ(ierr); 708 PetscFunctionReturn(0); 709 } 710 711 /*MC 712 MATSUPERLU_DIST - MATSUPERLU_DIST = "superlu_dist" - A matrix type providing direct solvers (LU) for parallel matrices 713 via the external package SuperLU_DIST. 714 715 If SuperLU_DIST is installed (see the manual for 716 instructions on how to declare the existence of external packages), 717 a matrix type can be constructed which invokes SuperLU_DIST solvers. 718 After calling MatCreate(...,A), simply call MatSetType(A,MATSUPERLU_DIST) then 719 optionally call MatSeqAIJSetPreallocation() or MatMPIAIJSetPreallocation() DO NOT 720 call MatCreateSeqAIJ/MPIAIJ() directly or the preallocation information will be LOST! 721 722 This matrix inherits from MATSEQAIJ when constructed with a single process communicator, 723 and from MATMPIAIJ otherwise. As a result, for single process communicators, 724 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 725 for communicators controlling multiple processes. It is recommended that you call both of 726 the above preallocation routines for simplicity. One can also call MatConvert() for an inplace 727 conversion to or from the MATSEQAIJ or MATMPIAIJ type (depending on the communicator size) 728 without data copy; but this MUST be called AFTER the matrix values are set. 729 730 731 732 Options Database Keys: 733 + -mat_type superlu_dist - sets the matrix type to "superlu_dist" during a call to MatSetFromOptions() 734 . -mat_superlu_dist_r <n> - number of rows in processor partition 735 . -mat_superlu_dist_c <n> - number of columns in processor partition 736 . -mat_superlu_dist_matinput <0,1> - matrix input mode; 0=global, 1=distributed 737 . -mat_superlu_dist_equil - equilibrate the matrix 738 . -mat_superlu_dist_rowperm <LargeDiag,NATURAL> - row permutation 739 . -mat_superlu_dist_colperm <MMD_AT_PLUS_A,MMD_ATA,NATURAL> - column permutation 740 . -mat_superlu_dist_replacetinypivot - replace tiny pivots 741 . -mat_superlu_dist_fact <SamePattern> (choose one of) SamePattern SamePattern_SameRowPerm 742 . -mat_superlu_dist_iterrefine - use iterative refinement 743 - -mat_superlu_dist_statprint - print factorization information 744 745 Level: beginner 746 747 .seealso: PCLU 748 M*/ 749 750 EXTERN_C_BEGIN 751 #undef __FUNCT__ 752 #define __FUNCT__ "MatCreate_SuperLU_DIST" 753 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SuperLU_DIST(Mat A) 754 { 755 PetscErrorCode ierr; 756 PetscMPIInt size; 757 758 PetscFunctionBegin; 759 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 760 if (size == 1) { 761 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 762 } else { 763 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 764 /* A_diag = 0x0 ??? -- do we need it? 765 Mat A_diag = ((Mat_MPIAIJ *)A->data)->A; 766 ierr = MatConvert_AIJ_SuperLU_DIST(A_diag,MATSUPERLU_DIST,MAT_REUSE_MATRIX,&A_diag);CHKERRQ(ierr); 767 */ 768 } 769 ierr = MatConvert_AIJ_SuperLU_DIST(A,MATSUPERLU_DIST,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr); 770 PetscFunctionReturn(0); 771 } 772 EXTERN_C_END 773 774