1 #define PETSCMAT_DLL 2 3 /* 4 Provides an interface to the SuperLU_DIST_2.0 sparse solver 5 */ 6 7 #include "src/mat/impls/aij/seq/aij.h" 8 #include "src/mat/impls/aij/mpi/mpiaij.h" 9 #if defined(PETSC_HAVE_STDLIB_H) /* This is to get arround weird problem with SuperLU on cray */ 10 #include "stdlib.h" 11 #endif 12 13 EXTERN_C_BEGIN 14 #if defined(PETSC_USE_COMPLEX) 15 #include "superlu_zdefs.h" 16 #else 17 #include "superlu_ddefs.h" 18 #endif 19 EXTERN_C_END 20 21 typedef enum { GLOBAL,DISTRIBUTED 22 } SuperLU_MatInputMode; 23 24 typedef struct { 25 int_t nprow,npcol,*row,*col; 26 gridinfo_t grid; 27 superlu_options_t options; 28 SuperMatrix A_sup; 29 ScalePermstruct_t ScalePermstruct; 30 LUstruct_t LUstruct; 31 int StatPrint; 32 int MatInputMode; 33 SOLVEstruct_t SOLVEstruct; 34 MatStructure flg; 35 MPI_Comm comm_superlu; 36 #if defined(PETSC_USE_COMPLEX) 37 doublecomplex *val; 38 #else 39 double *val; 40 #endif 41 42 /* A few function pointers for inheritance */ 43 PetscErrorCode (*MatDuplicate)(Mat,MatDuplicateOption,Mat*); 44 PetscErrorCode (*MatView)(Mat,PetscViewer); 45 PetscErrorCode (*MatAssemblyEnd)(Mat,MatAssemblyType); 46 PetscErrorCode (*MatLUFactorSymbolic)(Mat,IS,IS,MatFactorInfo*,Mat*); 47 PetscErrorCode (*MatDestroy)(Mat); 48 49 /* Flag to clean up (non-global) SuperLU objects during Destroy */ 50 PetscTruth CleanUpSuperLU_Dist; 51 } Mat_SuperLU_DIST; 52 53 EXTERN PetscErrorCode MatDuplicate_SuperLU_DIST(Mat,MatDuplicateOption,Mat*); 54 55 EXTERN_C_BEGIN 56 #undef __FUNCT__ 57 #define __FUNCT__ "MatConvert_SuperLU_DIST_Base" 58 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SuperLU_DIST_Base(Mat A,MatType type,MatReuse reuse,Mat *newmat) 59 { 60 PetscErrorCode ierr; 61 Mat B=*newmat; 62 Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST *)A->spptr; 63 64 PetscFunctionBegin; 65 if (reuse == MAT_INITIAL_MATRIX) { 66 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 67 } 68 /* Reset the original function pointers */ 69 B->ops->duplicate = lu->MatDuplicate; 70 B->ops->view = lu->MatView; 71 B->ops->assemblyend = lu->MatAssemblyEnd; 72 B->ops->lufactorsymbolic = lu->MatLUFactorSymbolic; 73 B->ops->destroy = lu->MatDestroy; 74 75 ierr = PetscFree(lu);CHKERRQ(ierr); 76 77 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaij_superlu_dist_C","",PETSC_NULL);CHKERRQ(ierr); 78 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_superlu_dist_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 79 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_superlu_dist_C","",PETSC_NULL);CHKERRQ(ierr); 80 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_superlu_dist_mpiaij_C","",PETSC_NULL);CHKERRQ(ierr); 81 82 ierr = PetscObjectChangeTypeName((PetscObject)B,type);CHKERRQ(ierr); 83 *newmat = B; 84 PetscFunctionReturn(0); 85 } 86 EXTERN_C_END 87 88 #undef __FUNCT__ 89 #define __FUNCT__ "MatDestroy_SuperLU_DIST" 90 PetscErrorCode MatDestroy_SuperLU_DIST(Mat A) 91 { 92 PetscErrorCode ierr; 93 PetscMPIInt size; 94 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr; 95 96 PetscFunctionBegin; 97 if (lu->CleanUpSuperLU_Dist) { 98 /* Deallocate SuperLU_DIST storage */ 99 if (lu->MatInputMode == GLOBAL) { 100 Destroy_CompCol_Matrix_dist(&lu->A_sup); 101 } else { 102 Destroy_CompRowLoc_Matrix_dist(&lu->A_sup); 103 if ( lu->options.SolveInitialized ) { 104 #if defined(PETSC_USE_COMPLEX) 105 zSolveFinalize(&lu->options, &lu->SOLVEstruct); 106 #else 107 dSolveFinalize(&lu->options, &lu->SOLVEstruct); 108 #endif 109 } 110 } 111 Destroy_LU(A->N, &lu->grid, &lu->LUstruct); 112 ScalePermstructFree(&lu->ScalePermstruct); 113 LUstructFree(&lu->LUstruct); 114 115 /* Release the SuperLU_DIST process grid. */ 116 superlu_gridexit(&lu->grid); 117 118 ierr = MPI_Comm_free(&(lu->comm_superlu));CHKERRQ(ierr); 119 } 120 121 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 122 if (size == 1) { 123 ierr = MatConvert_SuperLU_DIST_Base(A,MATSEQAIJ,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr); 124 } else { 125 ierr = MatConvert_SuperLU_DIST_Base(A,MATMPIAIJ,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr); 126 } 127 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 128 129 PetscFunctionReturn(0); 130 } 131 132 #undef __FUNCT__ 133 #define __FUNCT__ "MatSolve_SuperLU_DIST" 134 PetscErrorCode MatSolve_SuperLU_DIST(Mat A,Vec b_mpi,Vec x) 135 { 136 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr; 137 PetscErrorCode ierr; 138 PetscMPIInt size; 139 PetscInt m=A->M, N=A->N; 140 SuperLUStat_t stat; 141 double berr[1]; 142 PetscScalar *bptr; 143 PetscInt info, nrhs=1; 144 Vec x_seq; 145 IS iden; 146 VecScatter scat; 147 148 PetscFunctionBegin; 149 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 150 if (size > 1) { 151 if (lu->MatInputMode == GLOBAL) { /* global mat input, convert b to x_seq */ 152 ierr = VecCreateSeq(PETSC_COMM_SELF,N,&x_seq);CHKERRQ(ierr); 153 ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iden);CHKERRQ(ierr); 154 ierr = VecScatterCreate(b_mpi,iden,x_seq,iden,&scat);CHKERRQ(ierr); 155 ierr = ISDestroy(iden);CHKERRQ(ierr); 156 157 ierr = VecScatterBegin(b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat);CHKERRQ(ierr); 158 ierr = VecScatterEnd(b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat);CHKERRQ(ierr); 159 ierr = VecGetArray(x_seq,&bptr);CHKERRQ(ierr); 160 } else { /* distributed mat input */ 161 ierr = VecCopy(b_mpi,x);CHKERRQ(ierr); 162 ierr = VecGetArray(x,&bptr);CHKERRQ(ierr); 163 } 164 } else { /* size == 1 */ 165 ierr = VecCopy(b_mpi,x);CHKERRQ(ierr); 166 ierr = VecGetArray(x,&bptr);CHKERRQ(ierr); 167 } 168 169 lu->options.Fact = FACTORED; /* The factored form of A is supplied. Local option used by this func. only.*/ 170 171 PStatInit(&stat); /* Initialize the statistics variables. */ 172 if (lu->MatInputMode == GLOBAL) { 173 #if defined(PETSC_USE_COMPLEX) 174 pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,(doublecomplex*)bptr, m, nrhs, 175 &lu->grid, &lu->LUstruct, berr, &stat, &info); 176 #else 177 pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,bptr, m, nrhs, 178 &lu->grid, &lu->LUstruct, berr, &stat, &info); 179 #endif 180 } else { /* distributed mat input */ 181 #if defined(PETSC_USE_COMPLEX) 182 pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, (doublecomplex*)bptr, A->M, nrhs, &lu->grid, 183 &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info); 184 if (info) SETERRQ1(PETSC_ERR_LIB,"pzgssvx fails, info: %d\n",info); 185 #else 186 pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, bptr, A->M, nrhs, &lu->grid, 187 &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info); 188 if (info) SETERRQ1(PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",info); 189 #endif 190 } 191 if (lu->options.PrintStat) { 192 PStatPrint(&lu->options, &stat, &lu->grid); /* Print the statistics. */ 193 } 194 PStatFree(&stat); 195 196 if (size > 1) { 197 if (lu->MatInputMode == GLOBAL){ /* convert seq x to mpi x */ 198 ierr = VecRestoreArray(x_seq,&bptr);CHKERRQ(ierr); 199 ierr = VecScatterBegin(x_seq,x,INSERT_VALUES,SCATTER_REVERSE,scat);CHKERRQ(ierr); 200 ierr = VecScatterEnd(x_seq,x,INSERT_VALUES,SCATTER_REVERSE,scat);CHKERRQ(ierr); 201 ierr = VecScatterDestroy(scat);CHKERRQ(ierr); 202 ierr = VecDestroy(x_seq);CHKERRQ(ierr); 203 } else { 204 ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr); 205 } 206 } else { 207 ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr); 208 } 209 PetscFunctionReturn(0); 210 } 211 212 #undef __FUNCT__ 213 #define __FUNCT__ "MatLUFactorNumeric_SuperLU_DIST" 214 PetscErrorCode MatLUFactorNumeric_SuperLU_DIST(Mat A,MatFactorInfo *info,Mat *F) 215 { 216 Mat *tseq,A_seq = PETSC_NULL; 217 Mat_SeqAIJ *aa,*bb; 218 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)(*F)->spptr; 219 PetscErrorCode ierr; 220 PetscInt M=A->M,N=A->N,sinfo,i,*ai,*aj,*bi,*bj,nz,rstart,*garray, 221 m=A->m, irow,colA_start,j,jcol,jB,countA,countB,*bjj,*ajj; 222 PetscMPIInt size,rank; 223 SuperLUStat_t stat; 224 double *berr=0; 225 IS isrow; 226 PetscLogDouble time0,time,time_min,time_max; 227 Mat F_diag; 228 #if defined(PETSC_USE_COMPLEX) 229 doublecomplex *av, *bv; 230 #else 231 double *av, *bv; 232 #endif 233 234 PetscFunctionBegin; 235 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 236 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 237 238 if (lu->options.PrintStat) { /* collect time for mat conversion */ 239 ierr = MPI_Barrier(A->comm);CHKERRQ(ierr); 240 ierr = PetscGetTime(&time0);CHKERRQ(ierr); 241 } 242 243 if (lu->MatInputMode == GLOBAL) { /* global mat input */ 244 if (size > 1) { /* convert mpi A to seq mat A */ 245 ierr = ISCreateStride(PETSC_COMM_SELF,M,0,1,&isrow);CHKERRQ(ierr); 246 ierr = MatGetSubMatrices(A,1,&isrow,&isrow,MAT_INITIAL_MATRIX,&tseq);CHKERRQ(ierr); 247 ierr = ISDestroy(isrow);CHKERRQ(ierr); 248 249 A_seq = *tseq; 250 ierr = PetscFree(tseq);CHKERRQ(ierr); 251 aa = (Mat_SeqAIJ*)A_seq->data; 252 } else { 253 aa = (Mat_SeqAIJ*)A->data; 254 } 255 256 /* Allocate storage, then convert Petsc NR matrix to SuperLU_DIST NC */ 257 if (lu->flg == DIFFERENT_NONZERO_PATTERN) {/* first numeric factorization */ 258 #if defined(PETSC_USE_COMPLEX) 259 zallocateA_dist(N, aa->nz, &lu->val, &lu->col, &lu->row); 260 #else 261 dallocateA_dist(N, aa->nz, &lu->val, &lu->col, &lu->row); 262 #endif 263 } else { /* successive numeric factorization, sparsity pattern is reused. */ 264 Destroy_CompCol_Matrix_dist(&lu->A_sup); 265 Destroy_LU(N, &lu->grid, &lu->LUstruct); 266 lu->options.Fact = SamePattern; 267 } 268 #if defined(PETSC_USE_COMPLEX) 269 zCompRow_to_CompCol_dist(M,N,aa->nz,(doublecomplex*)aa->a,aa->j,aa->i,&lu->val,&lu->col, &lu->row); 270 #else 271 dCompRow_to_CompCol_dist(M,N,aa->nz,aa->a,aa->j,aa->i,&lu->val, &lu->col, &lu->row); 272 #endif 273 274 /* Create compressed column matrix A_sup. */ 275 #if defined(PETSC_USE_COMPLEX) 276 zCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_Z, SLU_GE); 277 #else 278 dCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_D, SLU_GE); 279 #endif 280 } else { /* distributed mat input */ 281 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 282 aa=(Mat_SeqAIJ*)(mat->A)->data; 283 bb=(Mat_SeqAIJ*)(mat->B)->data; 284 ai=aa->i; aj=aa->j; 285 bi=bb->i; bj=bb->j; 286 #if defined(PETSC_USE_COMPLEX) 287 av=(doublecomplex*)aa->a; 288 bv=(doublecomplex*)bb->a; 289 #else 290 av=aa->a; 291 bv=bb->a; 292 #endif 293 rstart = mat->rstart; 294 nz = aa->nz + bb->nz; 295 garray = mat->garray; 296 rstart = mat->rstart; 297 298 if (lu->flg == DIFFERENT_NONZERO_PATTERN) {/* first numeric factorization */ 299 #if defined(PETSC_USE_COMPLEX) 300 zallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row); 301 #else 302 dallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row); 303 #endif 304 } else { /* successive numeric factorization, sparsity pattern and perm_c are reused. */ 305 /* Destroy_CompRowLoc_Matrix_dist(&lu->A_sup); */ /* crash! */ 306 Destroy_LU(N, &lu->grid, &lu->LUstruct); 307 lu->options.Fact = SamePattern; 308 } 309 nz = 0; irow = mat->rstart; 310 for ( i=0; i<m; i++ ) { 311 lu->row[i] = nz; 312 countA = ai[i+1] - ai[i]; 313 countB = bi[i+1] - bi[i]; 314 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 315 bjj = bj + bi[i]; 316 317 /* B part, smaller col index */ 318 colA_start = mat->rstart + ajj[0]; /* the smallest global col index of A */ 319 jB = 0; 320 for (j=0; j<countB; j++){ 321 jcol = garray[bjj[j]]; 322 if (jcol > colA_start) { 323 jB = j; 324 break; 325 } 326 lu->col[nz] = jcol; 327 lu->val[nz++] = *bv++; 328 if (j==countB-1) jB = countB; 329 } 330 331 /* A part */ 332 for (j=0; j<countA; j++){ 333 lu->col[nz] = mat->rstart + ajj[j]; 334 lu->val[nz++] = *av++; 335 } 336 337 /* B part, larger col index */ 338 for (j=jB; j<countB; j++){ 339 lu->col[nz] = garray[bjj[j]]; 340 lu->val[nz++] = *bv++; 341 } 342 } 343 lu->row[m] = nz; 344 #if defined(PETSC_USE_COMPLEX) 345 zCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart, 346 lu->val, lu->col, lu->row, SLU_NR_loc, SLU_Z, SLU_GE); 347 #else 348 dCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart, 349 lu->val, lu->col, lu->row, SLU_NR_loc, SLU_D, SLU_GE); 350 #endif 351 } 352 if (lu->options.PrintStat) { 353 ierr = PetscGetTime(&time);CHKERRQ(ierr); 354 time0 = time - time0; 355 } 356 357 /* Factor the matrix. */ 358 PStatInit(&stat); /* Initialize the statistics variables. */ 359 360 if (lu->MatInputMode == GLOBAL) { /* global mat input */ 361 #if defined(PETSC_USE_COMPLEX) 362 pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, 363 &lu->grid, &lu->LUstruct, berr, &stat, &sinfo); 364 #else 365 pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, 366 &lu->grid, &lu->LUstruct, berr, &stat, &sinfo); 367 #endif 368 } else { /* distributed mat input */ 369 #if defined(PETSC_USE_COMPLEX) 370 pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid, 371 &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo); 372 if (sinfo) SETERRQ1(PETSC_ERR_LIB,"pzgssvx fails, info: %d\n",sinfo); 373 #else 374 pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid, 375 &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo); 376 if (sinfo) SETERRQ1(PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",sinfo); 377 #endif 378 } 379 380 if (lu->MatInputMode == GLOBAL && size > 1){ 381 ierr = MatDestroy(A_seq);CHKERRQ(ierr); 382 } 383 384 if (lu->options.PrintStat) { 385 if (size > 1){ 386 ierr = MPI_Reduce(&time0,&time_max,1,MPI_DOUBLE,MPI_MAX,0,A->comm); 387 ierr = MPI_Reduce(&time0,&time_min,1,MPI_DOUBLE,MPI_MIN,0,A->comm); 388 ierr = MPI_Reduce(&time0,&time,1,MPI_DOUBLE,MPI_SUM,0,A->comm); 389 time = time/size; /* average time */ 390 if (!rank) 391 ierr = PetscPrintf(PETSC_COMM_SELF, " Mat conversion(PETSc->SuperLU_DIST) time (max/min/avg): \n \ 392 %g / %g / %g\n",time_max,time_min,time); 393 } else { 394 ierr = PetscPrintf(PETSC_COMM_SELF, " Mat conversion(PETSc->SuperLU_DIST) time: \n \ 395 %g\n",time0); 396 } 397 398 PStatPrint(&lu->options, &stat, &lu->grid); /* Print the statistics. */ 399 } 400 PStatFree(&stat); 401 402 F_diag = ((Mat_MPIAIJ *)(*F)->data)->A; 403 F_diag->assembled = PETSC_TRUE; 404 (*F)->assembled = PETSC_TRUE; 405 lu->flg = SAME_NONZERO_PATTERN; 406 PetscFunctionReturn(0); 407 } 408 409 /* Note the Petsc r and c permutations are ignored */ 410 #undef __FUNCT__ 411 #define __FUNCT__ "MatLUFactorSymbolic_SuperLU_DIST" 412 PetscErrorCode MatLUFactorSymbolic_SuperLU_DIST(Mat A,IS r,IS c,MatFactorInfo *info,Mat *F) 413 { 414 Mat B; 415 Mat_SuperLU_DIST *lu; 416 PetscErrorCode ierr; 417 PetscInt M=A->M,N=A->N,indx; 418 PetscMPIInt size; 419 superlu_options_t options; 420 PetscTruth flg; 421 const char *ptype[] = {"MMD_AT_PLUS_A","NATURAL","MMD_ATA"}; 422 const char *prtype[] = {"LargeDiag","NATURAL"}; 423 424 PetscFunctionBegin; 425 /* Create the factorization matrix */ 426 ierr = MatCreate(A->comm,&B);CHKERRQ(ierr); 427 ierr = MatSetSizes(B,A->m,A->n,M,N);CHKERRQ(ierr); 428 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 429 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL); 430 ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 431 432 B->ops->lufactornumeric = MatLUFactorNumeric_SuperLU_DIST; 433 B->ops->solve = MatSolve_SuperLU_DIST; 434 B->factor = FACTOR_LU; 435 436 lu = (Mat_SuperLU_DIST*)(B->spptr); 437 438 /* Set the input options */ 439 set_default_options_dist(&options); 440 441 lu->MatInputMode = GLOBAL; 442 ierr = MPI_Comm_dup(A->comm,&(lu->comm_superlu));CHKERRQ(ierr); 443 444 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 445 lu->nprow = size/2; /* Default process rows. */ 446 if (!lu->nprow) lu->nprow = 1; 447 lu->npcol = size/lu->nprow; /* Default process columns. */ 448 449 ierr = PetscOptionsBegin(A->comm,A->prefix,"SuperLU_Dist Options","Mat");CHKERRQ(ierr); 450 451 ierr = PetscOptionsInt("-mat_superlu_dist_r","Number rows in processor partition","None",lu->nprow,&lu->nprow,PETSC_NULL);CHKERRQ(ierr); 452 ierr = PetscOptionsInt("-mat_superlu_dist_c","Number columns in processor partition","None",lu->npcol,&lu->npcol,PETSC_NULL);CHKERRQ(ierr); 453 if (size != lu->nprow * lu->npcol) SETERRQ(PETSC_ERR_ARG_SIZ,"Number of processes should be equal to nprow*npcol"); 454 455 ierr = PetscOptionsInt("-mat_superlu_dist_matinput","Matrix input mode (0: GLOBAL; 1: DISTRIBUTED)","None",lu->MatInputMode,&lu->MatInputMode,PETSC_NULL);CHKERRQ(ierr); 456 if(lu->MatInputMode == DISTRIBUTED && size == 1) lu->MatInputMode = GLOBAL; 457 458 ierr = PetscOptionsTruth("-mat_superlu_dist_equil","Equilibrate matrix","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr); 459 if (!flg) { 460 options.Equil = NO; 461 } 462 463 ierr = PetscOptionsEList("-mat_superlu_dist_rowperm","Row permutation","None",prtype,2,prtype[0],&indx,&flg);CHKERRQ(ierr); 464 if (flg) { 465 switch (indx) { 466 case 0: 467 options.RowPerm = LargeDiag; 468 break; 469 case 1: 470 options.RowPerm = NOROWPERM; 471 break; 472 } 473 } 474 475 ierr = PetscOptionsEList("-mat_superlu_dist_colperm","Column permutation","None",ptype,4,ptype[0],&indx,&flg);CHKERRQ(ierr); 476 if (flg) { 477 switch (indx) { 478 case 0: 479 options.ColPerm = MMD_AT_PLUS_A; 480 break; 481 case 1: 482 options.ColPerm = NATURAL; 483 break; 484 case 2: 485 options.ColPerm = MMD_ATA; 486 break; 487 } 488 } 489 490 ierr = PetscOptionsTruth("-mat_superlu_dist_replacetinypivot","Replace tiny pivots","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr); 491 if (!flg) { 492 options.ReplaceTinyPivot = NO; 493 } 494 495 options.IterRefine = NOREFINE; 496 ierr = PetscOptionsTruth("-mat_superlu_dist_iterrefine","Use iterative refinement","None",PETSC_FALSE,&flg,0);CHKERRQ(ierr); 497 if (flg) { 498 options.IterRefine = DOUBLE; 499 } 500 501 if (PetscLogPrintInfo) { 502 options.PrintStat = YES; 503 } else { 504 options.PrintStat = NO; 505 } 506 ierr = PetscOptionsTruth("-mat_superlu_dist_statprint","Print factorization information","None", 507 (PetscTruth)options.PrintStat,(PetscTruth*)&options.PrintStat,0);CHKERRQ(ierr); 508 PetscOptionsEnd(); 509 510 /* Initialize the SuperLU process grid. */ 511 superlu_gridinit(lu->comm_superlu, lu->nprow, lu->npcol, &lu->grid); 512 513 /* Initialize ScalePermstruct and LUstruct. */ 514 ScalePermstructInit(M, N, &lu->ScalePermstruct); 515 LUstructInit(M, N, &lu->LUstruct); 516 517 lu->options = options; 518 lu->flg = DIFFERENT_NONZERO_PATTERN; 519 lu->CleanUpSuperLU_Dist = PETSC_TRUE; 520 *F = B; 521 522 PetscFunctionReturn(0); 523 } 524 525 #undef __FUNCT__ 526 #define __FUNCT__ "MatAssemblyEnd_SuperLU_DIST" 527 PetscErrorCode MatAssemblyEnd_SuperLU_DIST(Mat A,MatAssemblyType mode) { 528 PetscErrorCode ierr; 529 Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST*)(A->spptr); 530 531 PetscFunctionBegin; 532 ierr = (*lu->MatAssemblyEnd)(A,mode);CHKERRQ(ierr); 533 lu->MatLUFactorSymbolic = A->ops->lufactorsymbolic; 534 A->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST; 535 PetscFunctionReturn(0); 536 } 537 538 #undef __FUNCT__ 539 #define __FUNCT__ "MatFactorInfo_SuperLU_DIST" 540 PetscErrorCode MatFactorInfo_SuperLU_DIST(Mat A,PetscViewer viewer) 541 { 542 Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST*)A->spptr; 543 superlu_options_t options; 544 PetscErrorCode ierr; 545 546 PetscFunctionBegin; 547 /* check if matrix is superlu_dist type */ 548 if (A->ops->solve != MatSolve_SuperLU_DIST) PetscFunctionReturn(0); 549 550 options = lu->options; 551 ierr = PetscViewerASCIIPrintf(viewer,"SuperLU_DIST run parameters:\n");CHKERRQ(ierr); 552 ierr = PetscViewerASCIIPrintf(viewer," Equilibrate matrix %s \n",PetscTruths[options.Equil != NO]);CHKERRQ(ierr); 553 ierr = PetscViewerASCIIPrintf(viewer," Matrix input mode %d \n",lu->MatInputMode);CHKERRQ(ierr); 554 ierr = PetscViewerASCIIPrintf(viewer," Replace tiny pivots %s \n",PetscTruths[options.ReplaceTinyPivot != NO]);CHKERRQ(ierr); 555 ierr = PetscViewerASCIIPrintf(viewer," Use iterative refinement %s \n",PetscTruths[options.IterRefine == DOUBLE]);CHKERRQ(ierr); 556 ierr = PetscViewerASCIIPrintf(viewer," Processors in row %d col partition %d \n",lu->nprow,lu->npcol);CHKERRQ(ierr); 557 ierr = PetscViewerASCIIPrintf(viewer," Row permutation %s \n",(options.RowPerm == NOROWPERM) ? "NATURAL": "LargeDiag");CHKERRQ(ierr); 558 if (options.ColPerm == NATURAL) { 559 ierr = PetscViewerASCIIPrintf(viewer," Column permutation NATURAL\n");CHKERRQ(ierr); 560 } else if (options.ColPerm == MMD_AT_PLUS_A) { 561 ierr = PetscViewerASCIIPrintf(viewer," Column permutation MMD_AT_PLUS_A\n");CHKERRQ(ierr); 562 } else if (options.ColPerm == MMD_ATA) { 563 ierr = PetscViewerASCIIPrintf(viewer," Column permutation MMD_ATA\n");CHKERRQ(ierr); 564 } else { 565 SETERRQ(PETSC_ERR_ARG_WRONG,"Unknown column permutation"); 566 } 567 PetscFunctionReturn(0); 568 } 569 570 #undef __FUNCT__ 571 #define __FUNCT__ "MatView_SuperLU_DIST" 572 PetscErrorCode MatView_SuperLU_DIST(Mat A,PetscViewer viewer) 573 { 574 PetscErrorCode ierr; 575 PetscTruth iascii; 576 PetscViewerFormat format; 577 Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST*)(A->spptr); 578 579 PetscFunctionBegin; 580 ierr = (*lu->MatView)(A,viewer);CHKERRQ(ierr); 581 582 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 583 if (iascii) { 584 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 585 if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 586 ierr = MatFactorInfo_SuperLU_DIST(A,viewer);CHKERRQ(ierr); 587 } 588 } 589 PetscFunctionReturn(0); 590 } 591 592 593 EXTERN_C_BEGIN 594 #undef __FUNCT__ 595 #define __FUNCT__ "MatConvert_Base_SuperLU_DIST" 596 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_Base_SuperLU_DIST(Mat A,MatType type,MatReuse reuse,Mat *newmat) 597 { 598 /* This routine is only called to convert to MATSUPERLU_DIST */ 599 /* from MATSEQAIJ if A has a single process communicator */ 600 /* or MATMPIAIJ otherwise, so we will ignore 'MatType type'. */ 601 PetscErrorCode ierr; 602 PetscMPIInt size; 603 MPI_Comm comm; 604 Mat B=*newmat; 605 Mat_SuperLU_DIST *lu; 606 607 PetscFunctionBegin; 608 if (reuse == MAT_INITIAL_MATRIX) { 609 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 610 } 611 612 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 613 ierr = PetscNew(Mat_SuperLU_DIST,&lu);CHKERRQ(ierr); 614 615 lu->MatDuplicate = A->ops->duplicate; 616 lu->MatView = A->ops->view; 617 lu->MatAssemblyEnd = A->ops->assemblyend; 618 lu->MatLUFactorSymbolic = A->ops->lufactorsymbolic; 619 lu->MatDestroy = A->ops->destroy; 620 lu->CleanUpSuperLU_Dist = PETSC_FALSE; 621 622 B->spptr = (void*)lu; 623 B->ops->duplicate = MatDuplicate_SuperLU_DIST; 624 B->ops->view = MatView_SuperLU_DIST; 625 B->ops->assemblyend = MatAssemblyEnd_SuperLU_DIST; 626 B->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST; 627 B->ops->destroy = MatDestroy_SuperLU_DIST; 628 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);CHKERRQ(ierr); 629 if (size == 1) { 630 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqaij_superlu_dist_C", 631 "MatConvert_Base_SuperLU_DIST",MatConvert_Base_SuperLU_DIST);CHKERRQ(ierr); 632 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_superlu_dist_seqaij_C", 633 "MatConvert_SuperLU_DIST_Base",MatConvert_SuperLU_DIST_Base);CHKERRQ(ierr); 634 } else { 635 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_superlu_dist_C", 636 "MatConvert_Base_SuperLU_DIST",MatConvert_Base_SuperLU_DIST);CHKERRQ(ierr); 637 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_superlu_dist_mpiaij_C", 638 "MatConvert_SuperLU_DIST_Base",MatConvert_SuperLU_DIST_Base);CHKERRQ(ierr); 639 } 640 ierr = PetscLogInfo((0,"MatConvert_Base_SuperLU_DIST:Using SuperLU_DIST for SeqAIJ LU factorization and solves.\n"));CHKERRQ(ierr); 641 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSUPERLU_DIST);CHKERRQ(ierr); 642 *newmat = B; 643 PetscFunctionReturn(0); 644 } 645 EXTERN_C_END 646 647 #undef __FUNCT__ 648 #define __FUNCT__ "MatDuplicate_SuperLU_DIST" 649 PetscErrorCode MatDuplicate_SuperLU_DIST(Mat A, MatDuplicateOption op, Mat *M) { 650 PetscErrorCode ierr; 651 Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST *)A->spptr; 652 653 PetscFunctionBegin; 654 ierr = (*lu->MatDuplicate)(A,op,M);CHKERRQ(ierr); 655 ierr = PetscMemcpy((*M)->spptr,lu,sizeof(Mat_SuperLU_DIST));CHKERRQ(ierr); 656 PetscFunctionReturn(0); 657 } 658 659 /*MC 660 MATSUPERLU_DIST - MATSUPERLU_DIST = "superlu_dist" - A matrix type providing direct solvers (LU) for parallel matrices 661 via the external package SuperLU_DIST. 662 663 If SuperLU_DIST is installed (see the manual for 664 instructions on how to declare the existence of external packages), 665 a matrix type can be constructed which invokes SuperLU_DIST solvers. 666 After calling MatCreate(...,A), simply call MatSetType(A,MATSUPERLU_DIST). 667 668 This matrix inherits from MATSEQAIJ when constructed with a single process communicator, 669 and from MATMPIAIJ otherwise. As a result, for single process communicators, 670 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 671 for communicators controlling multiple processes. It is recommended that you call both of 672 the above preallocation routines for simplicity. One can also call MatConvert for an inplace 673 conversion to or from the MATSEQAIJ or MATMPIAIJ type (depending on the communicator size) 674 without data copy. 675 676 Options Database Keys: 677 + -mat_type superlu_dist - sets the matrix type to "superlu_dist" during a call to MatSetFromOptions() 678 . -mat_superlu_dist_r <n> - number of rows in processor partition 679 . -mat_superlu_dist_c <n> - number of columns in processor partition 680 . -mat_superlu_dist_matinput <0,1> - matrix input mode; 0=global, 1=distributed 681 . -mat_superlu_dist_equil - equilibrate the matrix 682 . -mat_superlu_dist_rowperm <LargeDiag,NATURAL> - row permutation 683 . -mat_superlu_dist_colperm <MMD_AT_PLUS_A,MMD_ATA,NATURAL> - column permutation 684 . -mat_superlu_dist_replacetinypivot - replace tiny pivots 685 . -mat_superlu_dist_iterrefine - use iterative refinement 686 - -mat_superlu_dist_statprint - print factorization information 687 688 Level: beginner 689 690 .seealso: PCLU 691 M*/ 692 693 EXTERN_C_BEGIN 694 #undef __FUNCT__ 695 #define __FUNCT__ "MatCreate_SuperLU_DIST" 696 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SuperLU_DIST(Mat A) 697 { 698 PetscErrorCode ierr; 699 PetscMPIInt size; 700 Mat A_diag; 701 702 PetscFunctionBegin; 703 /* Change type name before calling MatSetType to force proper construction of SeqAIJ or MPIAIJ */ 704 /* and SuperLU_DIST types */ 705 ierr = PetscObjectChangeTypeName((PetscObject)A,MATSUPERLU_DIST);CHKERRQ(ierr); 706 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 707 if (size == 1) { 708 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 709 } else { 710 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 711 A_diag = ((Mat_MPIAIJ *)A->data)->A; 712 ierr = MatConvert_Base_SuperLU_DIST(A_diag,MATSUPERLU_DIST,MAT_REUSE_MATRIX,&A_diag);CHKERRQ(ierr); 713 } 714 ierr = MatConvert_Base_SuperLU_DIST(A,MATSUPERLU_DIST,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr); 715 PetscFunctionReturn(0); 716 } 717 EXTERN_C_END 718 719