1 /* 2 Provides an interface to the SuperLU_DIST_2.0 sparse solver 3 */ 4 5 #include "src/mat/impls/aij/seq/aij.h" 6 #include "src/mat/impls/aij/mpi/mpiaij.h" 7 #if defined(PETSC_HAVE_STDLIB_H) /* This is to get arround weird problem with SuperLU on cray */ 8 #include "stdlib.h" 9 #endif 10 11 EXTERN_C_BEGIN 12 #if defined(PETSC_USE_COMPLEX) 13 #include "superlu_zdefs.h" 14 #else 15 #include "superlu_ddefs.h" 16 #endif 17 EXTERN_C_END 18 19 typedef enum { GLOBAL,DISTRIBUTED 20 } SuperLU_MatInputMode; 21 22 typedef struct { 23 int_t nprow,npcol,*row,*col; 24 gridinfo_t grid; 25 superlu_options_t options; 26 SuperMatrix A_sup; 27 ScalePermstruct_t ScalePermstruct; 28 LUstruct_t LUstruct; 29 int StatPrint; 30 int MatInputMode; 31 SOLVEstruct_t SOLVEstruct; 32 MatStructure flg; 33 MPI_Comm comm_superlu; 34 #if defined(PETSC_USE_COMPLEX) 35 doublecomplex *val; 36 #else 37 double *val; 38 #endif 39 40 /* A few function pointers for inheritance */ 41 PetscErrorCode (*MatDuplicate)(Mat,MatDuplicateOption,Mat*); 42 PetscErrorCode (*MatView)(Mat,PetscViewer); 43 PetscErrorCode (*MatAssemblyEnd)(Mat,MatAssemblyType); 44 PetscErrorCode (*MatLUFactorSymbolic)(Mat,IS,IS,MatFactorInfo*,Mat*); 45 PetscErrorCode (*MatDestroy)(Mat); 46 47 /* Flag to clean up (non-global) SuperLU objects during Destroy */ 48 PetscTruth CleanUpSuperLU_Dist; 49 } Mat_SuperLU_DIST; 50 51 EXTERN PetscErrorCode MatDuplicate_SuperLU_DIST(Mat,MatDuplicateOption,Mat*); 52 53 EXTERN_C_BEGIN 54 #undef __FUNCT__ 55 #define __FUNCT__ "MatConvert_SuperLU_DIST_Base" 56 PetscErrorCode MatConvert_SuperLU_DIST_Base(Mat A,const MatType type,Mat *newmat) { 57 PetscErrorCode ierr; 58 Mat B=*newmat; 59 Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST *)A->spptr; 60 61 PetscFunctionBegin; 62 if (B != A) { 63 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 64 } 65 /* Reset the original function pointers */ 66 B->ops->duplicate = lu->MatDuplicate; 67 B->ops->view = lu->MatView; 68 B->ops->assemblyend = lu->MatAssemblyEnd; 69 B->ops->lufactorsymbolic = lu->MatLUFactorSymbolic; 70 B->ops->destroy = lu->MatDestroy; 71 72 ierr = PetscFree(lu);CHKERRQ(ierr); 73 74 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaij_superlu_dist_C","",PETSC_NULL);CHKERRQ(ierr); 75 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_superlu_dist_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 76 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_superlu_dist_C","",PETSC_NULL);CHKERRQ(ierr); 77 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_superlu_dist_mpiaij_C","",PETSC_NULL);CHKERRQ(ierr); 78 79 ierr = PetscObjectChangeTypeName((PetscObject)B,type);CHKERRQ(ierr); 80 *newmat = B; 81 PetscFunctionReturn(0); 82 } 83 EXTERN_C_END 84 85 #undef __FUNCT__ 86 #define __FUNCT__ "MatDestroy_SuperLU_DIST" 87 PetscErrorCode MatDestroy_SuperLU_DIST(Mat A) 88 { 89 PetscErrorCode ierr; 90 int size; 91 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr; 92 93 PetscFunctionBegin; 94 if (lu->CleanUpSuperLU_Dist) { 95 /* Deallocate SuperLU_DIST storage */ 96 if (lu->MatInputMode == GLOBAL) { 97 Destroy_CompCol_Matrix_dist(&lu->A_sup); 98 } else { 99 Destroy_CompRowLoc_Matrix_dist(&lu->A_sup); 100 if ( lu->options.SolveInitialized ) { 101 #if defined(PETSC_USE_COMPLEX) 102 zSolveFinalize(&lu->options, &lu->SOLVEstruct); 103 #else 104 dSolveFinalize(&lu->options, &lu->SOLVEstruct); 105 #endif 106 } 107 } 108 Destroy_LU(A->N, &lu->grid, &lu->LUstruct); 109 ScalePermstructFree(&lu->ScalePermstruct); 110 LUstructFree(&lu->LUstruct); 111 112 /* Release the SuperLU_DIST process grid. */ 113 superlu_gridexit(&lu->grid); 114 115 ierr = MPI_Comm_free(&(lu->comm_superlu));CHKERRQ(ierr); 116 } 117 118 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 119 if (size == 1) { 120 ierr = MatConvert_SuperLU_DIST_Base(A,MATSEQAIJ,&A);CHKERRQ(ierr); 121 } else { 122 ierr = MatConvert_SuperLU_DIST_Base(A,MATMPIAIJ,&A);CHKERRQ(ierr); 123 } 124 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 125 126 PetscFunctionReturn(0); 127 } 128 129 #undef __FUNCT__ 130 #define __FUNCT__ "MatSolve_SuperLU_DIST" 131 PetscErrorCode MatSolve_SuperLU_DIST(Mat A,Vec b_mpi,Vec x) 132 { 133 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr; 134 PetscErrorCode ierr; 135 int size; 136 int m=A->M, N=A->N; 137 SuperLUStat_t stat; 138 double berr[1]; 139 PetscScalar *bptr; 140 int info, nrhs=1; 141 Vec x_seq; 142 IS iden; 143 VecScatter scat; 144 145 PetscFunctionBegin; 146 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 147 if (size > 1) { 148 if (lu->MatInputMode == GLOBAL) { /* global mat input, convert b to x_seq */ 149 ierr = VecCreateSeq(PETSC_COMM_SELF,N,&x_seq);CHKERRQ(ierr); 150 ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iden);CHKERRQ(ierr); 151 ierr = VecScatterCreate(b_mpi,iden,x_seq,iden,&scat);CHKERRQ(ierr); 152 ierr = ISDestroy(iden);CHKERRQ(ierr); 153 154 ierr = VecScatterBegin(b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat);CHKERRQ(ierr); 155 ierr = VecScatterEnd(b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat);CHKERRQ(ierr); 156 ierr = VecGetArray(x_seq,&bptr);CHKERRQ(ierr); 157 } else { /* distributed mat input */ 158 ierr = VecCopy(b_mpi,x);CHKERRQ(ierr); 159 ierr = VecGetArray(x,&bptr);CHKERRQ(ierr); 160 } 161 } else { /* size == 1 */ 162 ierr = VecCopy(b_mpi,x);CHKERRQ(ierr); 163 ierr = VecGetArray(x,&bptr);CHKERRQ(ierr); 164 } 165 166 lu->options.Fact = FACTORED; /* The factored form of A is supplied. Local option used by this func. only.*/ 167 168 PStatInit(&stat); /* Initialize the statistics variables. */ 169 170 if (lu->MatInputMode == GLOBAL) { 171 #if defined(PETSC_USE_COMPLEX) 172 pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,(doublecomplex*)bptr, m, nrhs, 173 &lu->grid, &lu->LUstruct, berr, &stat, &info); 174 #else 175 pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,bptr, m, nrhs, 176 &lu->grid, &lu->LUstruct, berr, &stat, &info); 177 #endif 178 } else { /* distributed mat input */ 179 #if defined(PETSC_USE_COMPLEX) 180 pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, (doublecomplex*)bptr, A->M, nrhs, &lu->grid, 181 &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info); 182 if (info) SETERRQ1(1,"pzgssvx fails, info: %d\n",info); 183 #else 184 pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, bptr, A->M, nrhs, &lu->grid, 185 &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info); 186 if (info) SETERRQ1(1,"pdgssvx fails, info: %d\n",info); 187 #endif 188 } 189 if (lu->StatPrint) { 190 PStatPrint(&lu->options, &stat, &lu->grid); /* Print the statistics. */ 191 } 192 PStatFree(&stat); 193 194 if (size > 1) { 195 if (lu->MatInputMode == GLOBAL){ /* convert seq x to mpi x */ 196 ierr = VecRestoreArray(x_seq,&bptr);CHKERRQ(ierr); 197 ierr = VecScatterBegin(x_seq,x,INSERT_VALUES,SCATTER_REVERSE,scat);CHKERRQ(ierr); 198 ierr = VecScatterEnd(x_seq,x,INSERT_VALUES,SCATTER_REVERSE,scat);CHKERRQ(ierr); 199 ierr = VecScatterDestroy(scat);CHKERRQ(ierr); 200 ierr = VecDestroy(x_seq);CHKERRQ(ierr); 201 } else { 202 ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr); 203 } 204 } else { 205 ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr); 206 } 207 PetscFunctionReturn(0); 208 } 209 210 #undef __FUNCT__ 211 #define __FUNCT__ "MatLUFactorNumeric_SuperLU_DIST" 212 PetscErrorCode MatLUFactorNumeric_SuperLU_DIST(Mat A,Mat *F) 213 { 214 Mat *tseq,A_seq = PETSC_NULL; 215 Mat_SeqAIJ *aa,*bb; 216 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)(*F)->spptr; 217 PetscErrorCode ierr; 218 int M=A->M,N=A->N,info,size,rank,i,*ai,*aj,*bi,*bj,nz,rstart,*garray, 219 m=A->m, irow,colA_start,j,jcol,jB,countA,countB,*bjj,*ajj; 220 SuperLUStat_t stat; 221 double *berr=0; 222 IS isrow; 223 PetscLogDouble time0,time,time_min,time_max; 224 #if defined(PETSC_USE_COMPLEX) 225 doublecomplex *av, *bv; 226 #else 227 double *av, *bv; 228 #endif 229 230 PetscFunctionBegin; 231 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 232 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 233 234 if (lu->StatPrint) { /* collect time for mat conversion */ 235 ierr = MPI_Barrier(A->comm);CHKERRQ(ierr); 236 ierr = PetscGetTime(&time0);CHKERRQ(ierr); 237 } 238 239 if (lu->MatInputMode == GLOBAL) { /* global mat input */ 240 if (size > 1) { /* convert mpi A to seq mat A */ 241 ierr = ISCreateStride(PETSC_COMM_SELF,M,0,1,&isrow);CHKERRQ(ierr); 242 ierr = MatGetSubMatrices(A,1,&isrow,&isrow,MAT_INITIAL_MATRIX,&tseq);CHKERRQ(ierr); 243 ierr = ISDestroy(isrow);CHKERRQ(ierr); 244 245 A_seq = *tseq; 246 ierr = PetscFree(tseq);CHKERRQ(ierr); 247 aa = (Mat_SeqAIJ*)A_seq->data; 248 } else { 249 aa = (Mat_SeqAIJ*)A->data; 250 } 251 252 /* Allocate storage, then convert Petsc NR matrix to SuperLU_DIST NC */ 253 if (lu->flg == DIFFERENT_NONZERO_PATTERN) {/* first numeric factorization */ 254 #if defined(PETSC_USE_COMPLEX) 255 zallocateA_dist(N, aa->nz, &lu->val, &lu->col, &lu->row); 256 #else 257 dallocateA_dist(N, aa->nz, &lu->val, &lu->col, &lu->row); 258 #endif 259 } else { /* successive numeric factorization, sparsity pattern is reused. */ 260 Destroy_CompCol_Matrix_dist(&lu->A_sup); 261 Destroy_LU(N, &lu->grid, &lu->LUstruct); 262 lu->options.Fact = SamePattern; 263 } 264 #if defined(PETSC_USE_COMPLEX) 265 zCompRow_to_CompCol_dist(M,N,aa->nz,(doublecomplex*)aa->a,aa->j,aa->i,&lu->val,&lu->col, &lu->row); 266 #else 267 dCompRow_to_CompCol_dist(M,N,aa->nz,aa->a,aa->j,aa->i,&lu->val, &lu->col, &lu->row); 268 #endif 269 270 /* Create compressed column matrix A_sup. */ 271 #if defined(PETSC_USE_COMPLEX) 272 zCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_Z, SLU_GE); 273 #else 274 dCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_D, SLU_GE); 275 #endif 276 } else { /* distributed mat input */ 277 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 278 aa=(Mat_SeqAIJ*)(mat->A)->data; 279 bb=(Mat_SeqAIJ*)(mat->B)->data; 280 ai=aa->i; aj=aa->j; 281 bi=bb->i; bj=bb->j; 282 #if defined(PETSC_USE_COMPLEX) 283 av=(doublecomplex*)aa->a; 284 bv=(doublecomplex*)bb->a; 285 #else 286 av=aa->a; 287 bv=bb->a; 288 #endif 289 rstart = mat->rstart; 290 nz = aa->nz + bb->nz; 291 garray = mat->garray; 292 rstart = mat->rstart; 293 294 if (lu->flg == DIFFERENT_NONZERO_PATTERN) {/* first numeric factorization */ 295 #if defined(PETSC_USE_COMPLEX) 296 zallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row); 297 #else 298 dallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row); 299 #endif 300 } else { /* successive numeric factorization, sparsity pattern and perm_c are reused. */ 301 /* Destroy_CompRowLoc_Matrix_dist(&lu->A_sup); */ /* crash! */ 302 Destroy_LU(N, &lu->grid, &lu->LUstruct); 303 lu->options.Fact = SamePattern; 304 } 305 nz = 0; irow = mat->rstart; 306 for ( i=0; i<m; i++ ) { 307 lu->row[i] = nz; 308 countA = ai[i+1] - ai[i]; 309 countB = bi[i+1] - bi[i]; 310 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 311 bjj = bj + bi[i]; 312 313 /* B part, smaller col index */ 314 colA_start = mat->rstart + ajj[0]; /* the smallest global col index of A */ 315 jB = 0; 316 for (j=0; j<countB; j++){ 317 jcol = garray[bjj[j]]; 318 if (jcol > colA_start) { 319 jB = j; 320 break; 321 } 322 lu->col[nz] = jcol; 323 lu->val[nz++] = *bv++; 324 if (j==countB-1) jB = countB; 325 } 326 327 /* A part */ 328 for (j=0; j<countA; j++){ 329 lu->col[nz] = mat->rstart + ajj[j]; 330 lu->val[nz++] = *av++; 331 } 332 333 /* B part, larger col index */ 334 for (j=jB; j<countB; j++){ 335 lu->col[nz] = garray[bjj[j]]; 336 lu->val[nz++] = *bv++; 337 } 338 } 339 lu->row[m] = nz; 340 #if defined(PETSC_USE_COMPLEX) 341 zCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart, 342 lu->val, lu->col, lu->row, SLU_NR_loc, SLU_Z, SLU_GE); 343 #else 344 dCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart, 345 lu->val, lu->col, lu->row, SLU_NR_loc, SLU_D, SLU_GE); 346 #endif 347 } 348 if (lu->StatPrint) { 349 ierr = PetscGetTime(&time);CHKERRQ(ierr); 350 time0 = time - time0; 351 } 352 353 /* Factor the matrix. */ 354 PStatInit(&stat); /* Initialize the statistics variables. */ 355 356 if (lu->MatInputMode == GLOBAL) { /* global mat input */ 357 #if defined(PETSC_USE_COMPLEX) 358 pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, 359 &lu->grid, &lu->LUstruct, berr, &stat, &info); 360 #else 361 pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, 362 &lu->grid, &lu->LUstruct, berr, &stat, &info); 363 #endif 364 } else { /* distributed mat input */ 365 #if defined(PETSC_USE_COMPLEX) 366 pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid, 367 &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info); 368 if (info) SETERRQ1(1,"pzgssvx fails, info: %d\n",info); 369 #else 370 pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid, 371 &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info); 372 if (info) SETERRQ1(1,"pdgssvx fails, info: %d\n",info); 373 #endif 374 } 375 376 if (lu->MatInputMode == GLOBAL && size > 1){ 377 ierr = MatDestroy(A_seq);CHKERRQ(ierr); 378 } 379 380 if (lu->StatPrint) { 381 if (size > 1){ 382 ierr = MPI_Reduce(&time0,&time_max,1,MPI_DOUBLE,MPI_MAX,0,A->comm); 383 ierr = MPI_Reduce(&time0,&time_min,1,MPI_DOUBLE,MPI_MIN,0,A->comm); 384 ierr = MPI_Reduce(&time0,&time,1,MPI_DOUBLE,MPI_SUM,0,A->comm); 385 time = time/size; /* average time */ 386 if (!rank) 387 ierr = PetscPrintf(PETSC_COMM_SELF, " Mat conversion(PETSc->SuperLU_DIST) time (max/min/avg): \n \ 388 %g / %g / %g\n",time_max,time_min,time); 389 } else { 390 ierr = PetscPrintf(PETSC_COMM_SELF, " Mat conversion(PETSc->SuperLU_DIST) time: \n \ 391 %g\n",time0); 392 } 393 394 PStatPrint(&lu->options, &stat, &lu->grid); /* Print the statistics. */ 395 } 396 PStatFree(&stat); 397 (*F)->assembled = PETSC_TRUE; 398 lu->flg = SAME_NONZERO_PATTERN; 399 PetscFunctionReturn(0); 400 } 401 402 /* Note the Petsc r and c permutations are ignored */ 403 #undef __FUNCT__ 404 #define __FUNCT__ "MatLUFactorSymbolic_SuperLU_DIST" 405 PetscErrorCode MatLUFactorSymbolic_SuperLU_DIST(Mat A,IS r,IS c,MatFactorInfo *info,Mat *F) 406 { 407 Mat B; 408 Mat_SuperLU_DIST *lu; 409 PetscErrorCode ierr; 410 int M=A->M,N=A->N,size,indx; 411 superlu_options_t options; 412 PetscTruth flg; 413 const char *ptype[] = {"MMD_AT_PLUS_A","NATURAL","MMD_ATA","COLAMD"}; 414 const char *prtype[] = {"LargeDiag","NATURAL"}; 415 416 PetscFunctionBegin; 417 /* Create the factorization matrix */ 418 ierr = MatCreate(A->comm,A->m,A->n,M,N,&B);CHKERRQ(ierr); 419 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 420 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL); 421 ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 422 423 B->ops->lufactornumeric = MatLUFactorNumeric_SuperLU_DIST; 424 B->ops->solve = MatSolve_SuperLU_DIST; 425 B->factor = FACTOR_LU; 426 427 lu = (Mat_SuperLU_DIST*)(B->spptr); 428 429 /* Set the input options */ 430 set_default_options_dist(&options); 431 lu->MatInputMode = GLOBAL; 432 ierr = MPI_Comm_dup(A->comm,&(lu->comm_superlu));CHKERRQ(ierr); 433 434 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 435 lu->nprow = size/2; /* Default process rows. */ 436 if (!lu->nprow) lu->nprow = 1; 437 lu->npcol = size/lu->nprow; /* Default process columns. */ 438 439 ierr = PetscOptionsBegin(A->comm,A->prefix,"SuperLU_Dist Options","Mat");CHKERRQ(ierr); 440 441 ierr = PetscOptionsInt("-mat_superlu_dist_r","Number rows in processor partition","None",lu->nprow,&lu->nprow,PETSC_NULL);CHKERRQ(ierr); 442 ierr = PetscOptionsInt("-mat_superlu_dist_c","Number columns in processor partition","None",lu->npcol,&lu->npcol,PETSC_NULL);CHKERRQ(ierr); 443 if (size != lu->nprow * lu->npcol) SETERRQ(PETSC_ERR_ARG_SIZ,"Number of processes should be equal to nprow*npcol"); 444 445 ierr = PetscOptionsInt("-mat_superlu_dist_matinput","Matrix input mode (0: GLOBAL; 1: DISTRIBUTED)","None",lu->MatInputMode,&lu->MatInputMode,PETSC_NULL);CHKERRQ(ierr); 446 if(lu->MatInputMode == DISTRIBUTED && size == 1) lu->MatInputMode = GLOBAL; 447 448 ierr = PetscOptionsLogical("-mat_superlu_dist_equil","Equilibrate matrix","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr); 449 if (!flg) { 450 options.Equil = NO; 451 } 452 453 ierr = PetscOptionsEList("-mat_superlu_dist_rowperm","Row permutation","None",prtype,2,prtype[0],&indx,&flg);CHKERRQ(ierr); 454 if (flg) { 455 switch (indx) { 456 case 0: 457 options.RowPerm = LargeDiag; 458 break; 459 case 1: 460 options.RowPerm = NOROWPERM; 461 break; 462 } 463 } 464 465 ierr = PetscOptionsEList("-mat_superlu_dist_colperm","Column permutation","None",ptype,4,ptype[0],&indx,&flg);CHKERRQ(ierr); 466 if (flg) { 467 switch (indx) { 468 case 0: 469 options.ColPerm = MMD_AT_PLUS_A; 470 break; 471 case 1: 472 options.ColPerm = NATURAL; 473 break; 474 case 2: 475 options.ColPerm = MMD_ATA; 476 break; 477 case 3: 478 options.ColPerm = COLAMD; 479 break; 480 } 481 } 482 483 ierr = PetscOptionsLogical("-mat_superlu_dist_replacetinypivot","Replace tiny pivots","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr); 484 if (!flg) { 485 options.ReplaceTinyPivot = NO; 486 } 487 488 options.IterRefine = NOREFINE; 489 ierr = PetscOptionsLogical("-mat_superlu_dist_iterrefine","Use iterative refinement","None",PETSC_FALSE,&flg,0);CHKERRQ(ierr); 490 if (flg) { 491 options.IterRefine = DOUBLE; 492 } 493 494 if (PetscLogPrintInfo) { 495 lu->StatPrint = (int)PETSC_TRUE; 496 } else { 497 lu->StatPrint = (int)PETSC_FALSE; 498 } 499 ierr = PetscOptionsLogical("-mat_superlu_dist_statprint","Print factorization information","None", 500 (PetscTruth)lu->StatPrint,(PetscTruth*)&lu->StatPrint,0);CHKERRQ(ierr); 501 PetscOptionsEnd(); 502 503 /* Initialize the SuperLU process grid. */ 504 superlu_gridinit(lu->comm_superlu, lu->nprow, lu->npcol, &lu->grid); 505 506 /* Initialize ScalePermstruct and LUstruct. */ 507 ScalePermstructInit(M, N, &lu->ScalePermstruct); 508 LUstructInit(M, N, &lu->LUstruct); 509 510 lu->options = options; 511 lu->flg = DIFFERENT_NONZERO_PATTERN; 512 lu->CleanUpSuperLU_Dist = PETSC_TRUE; 513 *F = B; 514 PetscFunctionReturn(0); 515 } 516 517 #undef __FUNCT__ 518 #define __FUNCT__ "MatAssemblyEnd_SuperLU_DIST" 519 PetscErrorCode MatAssemblyEnd_SuperLU_DIST(Mat A,MatAssemblyType mode) { 520 PetscErrorCode ierr; 521 Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST*)(A->spptr); 522 523 PetscFunctionBegin; 524 ierr = (*lu->MatAssemblyEnd)(A,mode);CHKERRQ(ierr); 525 lu->MatLUFactorSymbolic = A->ops->lufactorsymbolic; 526 A->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST; 527 PetscFunctionReturn(0); 528 } 529 530 #undef __FUNCT__ 531 #define __FUNCT__ "MatFactorInfo_SuperLU_DIST" 532 PetscErrorCode MatFactorInfo_SuperLU_DIST(Mat A,PetscViewer viewer) 533 { 534 Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST*)A->spptr; 535 superlu_options_t options; 536 PetscErrorCode ierr; 537 char *colperm; 538 539 PetscFunctionBegin; 540 /* check if matrix is superlu_dist type */ 541 if (A->ops->solve != MatSolve_SuperLU_DIST) PetscFunctionReturn(0); 542 543 options = lu->options; 544 ierr = PetscViewerASCIIPrintf(viewer,"SuperLU_DIST run parameters:\n");CHKERRQ(ierr); 545 ierr = PetscViewerASCIIPrintf(viewer," Equilibrate matrix %s \n",(options.Equil != NO) ? "true": "false");CHKERRQ(ierr); 546 ierr = PetscViewerASCIIPrintf(viewer," Matrix input mode %d \n",lu->MatInputMode);CHKERRQ(ierr); 547 ierr = PetscViewerASCIIPrintf(viewer," Replace tiny pivots %s \n",(options.ReplaceTinyPivot != NO) ? "true": "false");CHKERRQ(ierr); 548 ierr = PetscViewerASCIIPrintf(viewer," Use iterative refinement %s \n",(options.IterRefine == DOUBLE) ? "true": "false");CHKERRQ(ierr); 549 ierr = PetscViewerASCIIPrintf(viewer," Processors in row %d col partition %d \n",lu->nprow,lu->npcol);CHKERRQ(ierr); 550 ierr = PetscViewerASCIIPrintf(viewer," Row permutation %s \n",(options.RowPerm == NOROWPERM) ? "NATURAL": "LargeDiag");CHKERRQ(ierr); 551 if (options.ColPerm == NATURAL) { 552 colperm = "NATURAL"; 553 } else if (options.ColPerm == MMD_AT_PLUS_A) { 554 colperm = "MMD_AT_PLUS_A"; 555 } else if (options.ColPerm == MMD_ATA) { 556 colperm = "MMD_ATA"; 557 } else if (options.ColPerm == COLAMD) { 558 colperm = "COLAMD"; 559 } else { 560 SETERRQ(PETSC_ERR_ARG_WRONG,"Unknown column permutation"); 561 } 562 ierr = PetscViewerASCIIPrintf(viewer," Column permutation %s \n",colperm);CHKERRQ(ierr); 563 PetscFunctionReturn(0); 564 } 565 566 #undef __FUNCT__ 567 #define __FUNCT__ "MatView_SuperLU_DIST" 568 PetscErrorCode MatView_SuperLU_DIST(Mat A,PetscViewer viewer) 569 { 570 PetscErrorCode ierr; 571 PetscTruth iascii; 572 PetscViewerFormat format; 573 Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST*)(A->spptr); 574 575 PetscFunctionBegin; 576 ierr = (*lu->MatView)(A,viewer);CHKERRQ(ierr); 577 578 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 579 if (iascii) { 580 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 581 if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 582 ierr = MatFactorInfo_SuperLU_DIST(A,viewer);CHKERRQ(ierr); 583 } 584 } 585 PetscFunctionReturn(0); 586 } 587 588 589 EXTERN_C_BEGIN 590 #undef __FUNCT__ 591 #define __FUNCT__ "MatConvert_Base_SuperLU_DIST" 592 PetscErrorCode MatConvert_Base_SuperLU_DIST(Mat A,const MatType type,Mat *newmat) { 593 /* This routine is only called to convert to MATSUPERLU_DIST */ 594 /* from MATSEQAIJ if A has a single process communicator */ 595 /* or MATMPIAIJ otherwise, so we will ignore 'MatType type'. */ 596 PetscErrorCode ierr; 597 int size; 598 MPI_Comm comm; 599 Mat B=*newmat; 600 Mat_SuperLU_DIST *lu; 601 602 PetscFunctionBegin; 603 if (B != A) { 604 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 605 } 606 607 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 608 ierr = PetscNew(Mat_SuperLU_DIST,&lu);CHKERRQ(ierr); 609 610 lu->MatDuplicate = A->ops->duplicate; 611 lu->MatView = A->ops->view; 612 lu->MatAssemblyEnd = A->ops->assemblyend; 613 lu->MatLUFactorSymbolic = A->ops->lufactorsymbolic; 614 lu->MatDestroy = A->ops->destroy; 615 lu->CleanUpSuperLU_Dist = PETSC_FALSE; 616 617 B->spptr = (void*)lu; 618 B->ops->duplicate = MatDuplicate_SuperLU_DIST; 619 B->ops->view = MatView_SuperLU_DIST; 620 B->ops->assemblyend = MatAssemblyEnd_SuperLU_DIST; 621 B->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST; 622 B->ops->destroy = MatDestroy_SuperLU_DIST; 623 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);CHKERRQ(ierr); 624 if (size == 1) { 625 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqaij_superlu_dist_C", 626 "MatConvert_Base_SuperLU_DIST",MatConvert_Base_SuperLU_DIST);CHKERRQ(ierr); 627 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_superlu_dist_seqaij_C", 628 "MatConvert_SuperLU_DIST_Base",MatConvert_SuperLU_DIST_Base);CHKERRQ(ierr); 629 } else { 630 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_superlu_dist_C", 631 "MatConvert_Base_SuperLU_DIST",MatConvert_Base_SuperLU_DIST);CHKERRQ(ierr); 632 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_superlu_dist_mpiaij_C", 633 "MatConvert_SuperLU_DIST_Base",MatConvert_SuperLU_DIST_Base);CHKERRQ(ierr); 634 } 635 PetscLogInfo(0,"Using SuperLU_DIST for SeqAIJ LU factorization and solves."); 636 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSUPERLU_DIST);CHKERRQ(ierr); 637 *newmat = B; 638 PetscFunctionReturn(0); 639 } 640 EXTERN_C_END 641 642 #undef __FUNCT__ 643 #define __FUNCT__ "MatDuplicate_SuperLU_DIST" 644 PetscErrorCode MatDuplicate_SuperLU_DIST(Mat A, MatDuplicateOption op, Mat *M) { 645 PetscErrorCode ierr; 646 Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST *)A->spptr; 647 648 PetscFunctionBegin; 649 ierr = (*lu->MatDuplicate)(A,op,M);CHKERRQ(ierr); 650 ierr = PetscMemcpy((*M)->spptr,lu,sizeof(Mat_SuperLU_DIST));CHKERRQ(ierr); 651 PetscFunctionReturn(0); 652 } 653 654 /*MC 655 MATSUPERLU_DIST - MATSUPERLU_DIST = "superlu_dist" - A matrix type providing direct solvers (LU) for parallel matrices 656 via the external package SuperLU_DIST. 657 658 If SuperLU_DIST is installed (see the manual for 659 instructions on how to declare the existence of external packages), 660 a matrix type can be constructed which invokes SuperLU_DIST solvers. 661 After calling MatCreate(...,A), simply call MatSetType(A,MATSUPERLU_DIST). 662 This matrix type is only supported for double precision real. 663 664 This matrix inherits from MATSEQAIJ when constructed with a single process communicator, 665 and from MATMPIAIJ otherwise. As a result, for single process communicators, 666 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 667 for communicators controlling multiple processes. It is recommended that you call both of 668 the above preallocation routines for simplicity. One can also call MatConvert for an inplace 669 conversion to or from the MATSEQAIJ or MATMPIAIJ type (depending on the communicator size) 670 without data copy. 671 672 Options Database Keys: 673 + -mat_type superlu_dist - sets the matrix type to "superlu_dist" during a call to MatSetFromOptions() 674 . -mat_superlu_dist_r <n> - number of rows in processor partition 675 . -mat_superlu_dist_c <n> - number of columns in processor partition 676 . -mat_superlu_dist_matinput <0,1> - matrix input mode; 0=global, 1=distributed 677 . -mat_superlu_dist_equil - equilibrate the matrix 678 . -mat_superlu_dist_rowperm <LargeDiag,NATURAL> - row permutation 679 . -mat_superlu_dist_colperm <MMD_AT_PLUS_A,MMD_ATA,COLAMD,NATURAL> - column permutation 680 . -mat_superlu_dist_replacetinypivot - replace tiny pivots 681 . -mat_superlu_dist_iterrefine - use iterative refinement 682 - -mat_superlu_dist_statprint - print factorization information 683 684 Level: beginner 685 686 .seealso: PCLU 687 M*/ 688 689 EXTERN_C_BEGIN 690 #undef __FUNCT__ 691 #define __FUNCT__ "MatCreate_SuperLU_DIST" 692 PetscErrorCode MatCreate_SuperLU_DIST(Mat A) 693 { 694 PetscErrorCode ierr; 695 int size; 696 Mat A_diag; 697 698 PetscFunctionBegin; 699 /* Change type name before calling MatSetType to force proper construction of SeqAIJ or MPIAIJ */ 700 /* and SuperLU_DIST types */ 701 ierr = PetscObjectChangeTypeName((PetscObject)A,MATSUPERLU_DIST);CHKERRQ(ierr); 702 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 703 if (size == 1) { 704 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 705 } else { 706 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 707 A_diag = ((Mat_MPIAIJ *)A->data)->A; 708 ierr = MatConvert_Base_SuperLU_DIST(A_diag,MATSUPERLU_DIST,&A_diag);CHKERRQ(ierr); 709 } 710 ierr = MatConvert_Base_SuperLU_DIST(A,MATSUPERLU_DIST,&A);CHKERRQ(ierr); 711 PetscFunctionReturn(0); 712 } 713 EXTERN_C_END 714 715