1 /* 2 Provides an interface to the SuperLU_DIST_2.0 sparse solver 3 */ 4 5 #include "src/mat/impls/aij/seq/aij.h" 6 #include "src/mat/impls/aij/mpi/mpiaij.h" 7 #if defined(PETSC_HAVE_STDLIB_H) /* This is to get arround weird problem with SuperLU on cray */ 8 #include "stdlib.h" 9 #endif 10 11 EXTERN_C_BEGIN 12 #if defined(PETSC_USE_COMPLEX) 13 #include "superlu_zdefs.h" 14 #else 15 #include "superlu_ddefs.h" 16 #endif 17 EXTERN_C_END 18 19 typedef enum { GLOBAL,DISTRIBUTED 20 } SuperLU_MatInputMode; 21 22 typedef struct { 23 int_t nprow,npcol,*row,*col; 24 gridinfo_t grid; 25 superlu_options_t options; 26 SuperMatrix A_sup; 27 ScalePermstruct_t ScalePermstruct; 28 LUstruct_t LUstruct; 29 int StatPrint; 30 int MatInputMode; 31 SOLVEstruct_t SOLVEstruct; 32 MatStructure flg; 33 MPI_Comm comm_superlu; 34 #if defined(PETSC_USE_COMPLEX) 35 doublecomplex *val; 36 #else 37 double *val; 38 #endif 39 40 /* A few function pointers for inheritance */ 41 int (*MatDuplicate)(Mat,MatDuplicateOption,Mat*); 42 int (*MatView)(Mat,PetscViewer); 43 int (*MatAssemblyEnd)(Mat,MatAssemblyType); 44 int (*MatLUFactorSymbolic)(Mat,IS,IS,MatFactorInfo*,Mat*); 45 int (*MatDestroy)(Mat); 46 47 /* Flag to clean up (non-global) SuperLU objects during Destroy */ 48 PetscTruth CleanUpSuperLU_Dist; 49 } Mat_SuperLU_DIST; 50 51 EXTERN PetscErrorCode MatDuplicate_SuperLU_DIST(Mat,MatDuplicateOption,Mat*); 52 53 EXTERN_C_BEGIN 54 #undef __FUNCT__ 55 #define __FUNCT__ "MatConvert_SuperLU_DIST_Base" 56 PetscErrorCode MatConvert_SuperLU_DIST_Base(Mat A,const MatType type,Mat *newmat) { 57 PetscErrorCode ierr; 58 Mat B=*newmat; 59 Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST *)A->spptr; 60 61 PetscFunctionBegin; 62 if (B != A) { 63 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 64 } 65 /* Reset the original function pointers */ 66 B->ops->duplicate = lu->MatDuplicate; 67 B->ops->view = lu->MatView; 68 B->ops->assemblyend = lu->MatAssemblyEnd; 69 B->ops->lufactorsymbolic = lu->MatLUFactorSymbolic; 70 B->ops->destroy = lu->MatDestroy; 71 72 ierr = PetscObjectChangeTypeName((PetscObject)B,type);CHKERRQ(ierr); 73 ierr = PetscFree(lu);CHKERRQ(ierr); 74 75 *newmat = B; 76 PetscFunctionReturn(0); 77 } 78 EXTERN_C_END 79 80 #undef __FUNCT__ 81 #define __FUNCT__ "MatDestroy_SuperLU_DIST" 82 PetscErrorCode MatDestroy_SuperLU_DIST(Mat A) 83 { 84 PetscErrorCode ierr; 85 int size; 86 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr; 87 88 PetscFunctionBegin; 89 if (lu->CleanUpSuperLU_Dist) { 90 /* Deallocate SuperLU_DIST storage */ 91 if (lu->MatInputMode == GLOBAL) { 92 Destroy_CompCol_Matrix_dist(&lu->A_sup); 93 } else { 94 Destroy_CompRowLoc_Matrix_dist(&lu->A_sup); 95 if ( lu->options.SolveInitialized ) { 96 #if defined(PETSC_USE_COMPLEX) 97 zSolveFinalize(&lu->options, &lu->SOLVEstruct); 98 #else 99 dSolveFinalize(&lu->options, &lu->SOLVEstruct); 100 #endif 101 } 102 } 103 Destroy_LU(A->N, &lu->grid, &lu->LUstruct); 104 ScalePermstructFree(&lu->ScalePermstruct); 105 LUstructFree(&lu->LUstruct); 106 107 /* Release the SuperLU_DIST process grid. */ 108 superlu_gridexit(&lu->grid); 109 110 ierr = MPI_Comm_free(&(lu->comm_superlu));CHKERRQ(ierr); 111 } 112 113 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 114 if (size == 1) { 115 ierr = MatConvert_SuperLU_DIST_Base(A,MATSEQAIJ,&A);CHKERRQ(ierr); 116 } else { 117 ierr = MatConvert_SuperLU_DIST_Base(A,MATMPIAIJ,&A);CHKERRQ(ierr); 118 } 119 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 120 121 PetscFunctionReturn(0); 122 } 123 124 #undef __FUNCT__ 125 #define __FUNCT__ "MatSolve_SuperLU_DIST" 126 PetscErrorCode MatSolve_SuperLU_DIST(Mat A,Vec b_mpi,Vec x) 127 { 128 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr; 129 PetscErrorCode ierr; 130 int size; 131 int m=A->M, N=A->N; 132 SuperLUStat_t stat; 133 double berr[1]; 134 PetscScalar *bptr; 135 int info, nrhs=1; 136 Vec x_seq; 137 IS iden; 138 VecScatter scat; 139 140 PetscFunctionBegin; 141 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 142 if (size > 1) { 143 if (lu->MatInputMode == GLOBAL) { /* global mat input, convert b to x_seq */ 144 ierr = VecCreateSeq(PETSC_COMM_SELF,N,&x_seq);CHKERRQ(ierr); 145 ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iden);CHKERRQ(ierr); 146 ierr = VecScatterCreate(b_mpi,iden,x_seq,iden,&scat);CHKERRQ(ierr); 147 ierr = ISDestroy(iden);CHKERRQ(ierr); 148 149 ierr = VecScatterBegin(b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat);CHKERRQ(ierr); 150 ierr = VecScatterEnd(b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat);CHKERRQ(ierr); 151 ierr = VecGetArray(x_seq,&bptr);CHKERRQ(ierr); 152 } else { /* distributed mat input */ 153 ierr = VecCopy(b_mpi,x);CHKERRQ(ierr); 154 ierr = VecGetArray(x,&bptr);CHKERRQ(ierr); 155 } 156 } else { /* size == 1 */ 157 ierr = VecCopy(b_mpi,x);CHKERRQ(ierr); 158 ierr = VecGetArray(x,&bptr);CHKERRQ(ierr); 159 } 160 161 lu->options.Fact = FACTORED; /* The factored form of A is supplied. Local option used by this func. only.*/ 162 163 PStatInit(&stat); /* Initialize the statistics variables. */ 164 165 if (lu->MatInputMode == GLOBAL) { 166 #if defined(PETSC_USE_COMPLEX) 167 pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,(doublecomplex*)bptr, m, nrhs, 168 &lu->grid, &lu->LUstruct, berr, &stat, &info); 169 #else 170 pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,bptr, m, nrhs, 171 &lu->grid, &lu->LUstruct, berr, &stat, &info); 172 #endif 173 } else { /* distributed mat input */ 174 #if defined(PETSC_USE_COMPLEX) 175 pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, (doublecomplex*)bptr, A->M, nrhs, &lu->grid, 176 &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info); 177 if (info) SETERRQ1(1,"pzgssvx fails, info: %d\n",info); 178 #else 179 pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, bptr, A->M, nrhs, &lu->grid, 180 &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info); 181 if (info) SETERRQ1(1,"pdgssvx fails, info: %d\n",info); 182 #endif 183 } 184 if (lu->StatPrint) { 185 PStatPrint(&lu->options, &stat, &lu->grid); /* Print the statistics. */ 186 } 187 PStatFree(&stat); 188 189 if (size > 1) { 190 if (lu->MatInputMode == GLOBAL){ /* convert seq x to mpi x */ 191 ierr = VecRestoreArray(x_seq,&bptr);CHKERRQ(ierr); 192 ierr = VecScatterBegin(x_seq,x,INSERT_VALUES,SCATTER_REVERSE,scat);CHKERRQ(ierr); 193 ierr = VecScatterEnd(x_seq,x,INSERT_VALUES,SCATTER_REVERSE,scat);CHKERRQ(ierr); 194 ierr = VecScatterDestroy(scat);CHKERRQ(ierr); 195 ierr = VecDestroy(x_seq);CHKERRQ(ierr); 196 } else { 197 ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr); 198 } 199 } else { 200 ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr); 201 } 202 PetscFunctionReturn(0); 203 } 204 205 #undef __FUNCT__ 206 #define __FUNCT__ "MatLUFactorNumeric_SuperLU_DIST" 207 PetscErrorCode MatLUFactorNumeric_SuperLU_DIST(Mat A,Mat *F) 208 { 209 Mat *tseq,A_seq = PETSC_NULL; 210 Mat_SeqAIJ *aa,*bb; 211 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)(*F)->spptr; 212 int M=A->M,N=A->N,info,ierr,size,rank,i,*ai,*aj,*bi,*bj,nz,rstart,*garray, 213 m=A->m, irow,colA_start,j,jcol,jB,countA,countB,*bjj,*ajj; 214 SuperLUStat_t stat; 215 double *berr=0; 216 IS isrow; 217 PetscLogDouble time0,time,time_min,time_max; 218 #if defined(PETSC_USE_COMPLEX) 219 doublecomplex *av, *bv; 220 #else 221 double *av, *bv; 222 #endif 223 224 PetscFunctionBegin; 225 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 226 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 227 228 if (lu->StatPrint) { /* collect time for mat conversion */ 229 ierr = MPI_Barrier(A->comm);CHKERRQ(ierr); 230 ierr = PetscGetTime(&time0);CHKERRQ(ierr); 231 } 232 233 if (lu->MatInputMode == GLOBAL) { /* global mat input */ 234 if (size > 1) { /* convert mpi A to seq mat A */ 235 ierr = ISCreateStride(PETSC_COMM_SELF,M,0,1,&isrow);CHKERRQ(ierr); 236 ierr = MatGetSubMatrices(A,1,&isrow,&isrow,MAT_INITIAL_MATRIX,&tseq);CHKERRQ(ierr); 237 ierr = ISDestroy(isrow);CHKERRQ(ierr); 238 239 A_seq = *tseq; 240 ierr = PetscFree(tseq);CHKERRQ(ierr); 241 aa = (Mat_SeqAIJ*)A_seq->data; 242 } else { 243 aa = (Mat_SeqAIJ*)A->data; 244 } 245 246 /* Allocate storage, then convert Petsc NR matrix to SuperLU_DIST NC */ 247 if (lu->flg == DIFFERENT_NONZERO_PATTERN) {/* first numeric factorization */ 248 #if defined(PETSC_USE_COMPLEX) 249 zallocateA_dist(N, aa->nz, &lu->val, &lu->col, &lu->row); 250 #else 251 dallocateA_dist(N, aa->nz, &lu->val, &lu->col, &lu->row); 252 #endif 253 } else { /* successive numeric factorization, sparsity pattern is reused. */ 254 Destroy_CompCol_Matrix_dist(&lu->A_sup); 255 Destroy_LU(N, &lu->grid, &lu->LUstruct); 256 lu->options.Fact = SamePattern; 257 } 258 #if defined(PETSC_USE_COMPLEX) 259 zCompRow_to_CompCol_dist(M,N,aa->nz,(doublecomplex*)aa->a,aa->j,aa->i,&lu->val,&lu->col, &lu->row); 260 #else 261 dCompRow_to_CompCol_dist(M,N,aa->nz,aa->a,aa->j,aa->i,&lu->val, &lu->col, &lu->row); 262 #endif 263 264 /* Create compressed column matrix A_sup. */ 265 #if defined(PETSC_USE_COMPLEX) 266 zCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_Z, SLU_GE); 267 #else 268 dCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_D, SLU_GE); 269 #endif 270 } else { /* distributed mat input */ 271 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 272 aa=(Mat_SeqAIJ*)(mat->A)->data; 273 bb=(Mat_SeqAIJ*)(mat->B)->data; 274 ai=aa->i; aj=aa->j; 275 bi=bb->i; bj=bb->j; 276 #if defined(PETSC_USE_COMPLEX) 277 av=(doublecomplex*)aa->a; 278 bv=(doublecomplex*)bb->a; 279 #else 280 av=aa->a; 281 bv=bb->a; 282 #endif 283 rstart = mat->rstart; 284 nz = aa->nz + bb->nz; 285 garray = mat->garray; 286 rstart = mat->rstart; 287 288 if (lu->flg == DIFFERENT_NONZERO_PATTERN) {/* first numeric factorization */ 289 #if defined(PETSC_USE_COMPLEX) 290 zallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row); 291 #else 292 dallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row); 293 #endif 294 } else { /* successive numeric factorization, sparsity pattern and perm_c are reused. */ 295 /* Destroy_CompRowLoc_Matrix_dist(&lu->A_sup); */ /* crash! */ 296 Destroy_LU(N, &lu->grid, &lu->LUstruct); 297 lu->options.Fact = SamePattern; 298 } 299 nz = 0; irow = mat->rstart; 300 for ( i=0; i<m; i++ ) { 301 lu->row[i] = nz; 302 countA = ai[i+1] - ai[i]; 303 countB = bi[i+1] - bi[i]; 304 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 305 bjj = bj + bi[i]; 306 307 /* B part, smaller col index */ 308 colA_start = mat->rstart + ajj[0]; /* the smallest global col index of A */ 309 jB = 0; 310 for (j=0; j<countB; j++){ 311 jcol = garray[bjj[j]]; 312 if (jcol > colA_start) { 313 jB = j; 314 break; 315 } 316 lu->col[nz] = jcol; 317 lu->val[nz++] = *bv++; 318 if (j==countB-1) jB = countB; 319 } 320 321 /* A part */ 322 for (j=0; j<countA; j++){ 323 lu->col[nz] = mat->rstart + ajj[j]; 324 lu->val[nz++] = *av++; 325 } 326 327 /* B part, larger col index */ 328 for (j=jB; j<countB; j++){ 329 lu->col[nz] = garray[bjj[j]]; 330 lu->val[nz++] = *bv++; 331 } 332 } 333 lu->row[m] = nz; 334 #if defined(PETSC_USE_COMPLEX) 335 zCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart, 336 lu->val, lu->col, lu->row, SLU_NR_loc, SLU_Z, SLU_GE); 337 #else 338 dCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart, 339 lu->val, lu->col, lu->row, SLU_NR_loc, SLU_D, SLU_GE); 340 #endif 341 } 342 if (lu->StatPrint) { 343 ierr = PetscGetTime(&time);CHKERRQ(ierr); 344 time0 = time - time0; 345 } 346 347 /* Factor the matrix. */ 348 PStatInit(&stat); /* Initialize the statistics variables. */ 349 350 if (lu->MatInputMode == GLOBAL) { /* global mat input */ 351 #if defined(PETSC_USE_COMPLEX) 352 pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, 353 &lu->grid, &lu->LUstruct, berr, &stat, &info); 354 #else 355 pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, 356 &lu->grid, &lu->LUstruct, berr, &stat, &info); 357 #endif 358 } else { /* distributed mat input */ 359 #if defined(PETSC_USE_COMPLEX) 360 pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid, 361 &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info); 362 if (info) SETERRQ1(1,"pzgssvx fails, info: %d\n",info); 363 #else 364 pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid, 365 &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info); 366 if (info) SETERRQ1(1,"pdgssvx fails, info: %d\n",info); 367 #endif 368 } 369 370 if (lu->MatInputMode == GLOBAL && size > 1){ 371 ierr = MatDestroy(A_seq);CHKERRQ(ierr); 372 } 373 374 if (lu->StatPrint) { 375 if (size > 1){ 376 ierr = MPI_Reduce(&time0,&time_max,1,MPI_DOUBLE,MPI_MAX,0,A->comm); 377 ierr = MPI_Reduce(&time0,&time_min,1,MPI_DOUBLE,MPI_MIN,0,A->comm); 378 ierr = MPI_Reduce(&time0,&time,1,MPI_DOUBLE,MPI_SUM,0,A->comm); 379 time = time/size; /* average time */ 380 if (!rank) 381 ierr = PetscPrintf(PETSC_COMM_SELF, " Mat conversion(PETSc->SuperLU_DIST) time (max/min/avg): \n \ 382 %g / %g / %g\n",time_max,time_min,time); 383 } else { 384 ierr = PetscPrintf(PETSC_COMM_SELF, " Mat conversion(PETSc->SuperLU_DIST) time: \n \ 385 %g\n",time0); 386 } 387 388 PStatPrint(&lu->options, &stat, &lu->grid); /* Print the statistics. */ 389 } 390 PStatFree(&stat); 391 (*F)->assembled = PETSC_TRUE; 392 lu->flg = SAME_NONZERO_PATTERN; 393 PetscFunctionReturn(0); 394 } 395 396 /* Note the Petsc r and c permutations are ignored */ 397 #undef __FUNCT__ 398 #define __FUNCT__ "MatLUFactorSymbolic_SuperLU_DIST" 399 PetscErrorCode MatLUFactorSymbolic_SuperLU_DIST(Mat A,IS r,IS c,MatFactorInfo *info,Mat *F) 400 { 401 Mat B; 402 Mat_SuperLU_DIST *lu; 403 PetscErrorCode ierr; 404 int M=A->M,N=A->N,size,indx; 405 superlu_options_t options; 406 PetscTruth flg; 407 const char *ptype[] = {"MMD_AT_PLUS_A","NATURAL","MMD_ATA","COLAMD"}; 408 const char *prtype[] = {"LargeDiag","NATURAL"}; 409 410 PetscFunctionBegin; 411 /* Create the factorization matrix */ 412 ierr = MatCreate(A->comm,A->m,A->n,M,N,&B);CHKERRQ(ierr); 413 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 414 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL); 415 ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 416 417 B->ops->lufactornumeric = MatLUFactorNumeric_SuperLU_DIST; 418 B->ops->solve = MatSolve_SuperLU_DIST; 419 B->factor = FACTOR_LU; 420 421 lu = (Mat_SuperLU_DIST*)(B->spptr); 422 423 /* Set the input options */ 424 set_default_options_dist(&options); 425 lu->MatInputMode = GLOBAL; 426 ierr = MPI_Comm_dup(A->comm,&(lu->comm_superlu));CHKERRQ(ierr); 427 428 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 429 lu->nprow = size/2; /* Default process rows. */ 430 if (!lu->nprow) lu->nprow = 1; 431 lu->npcol = size/lu->nprow; /* Default process columns. */ 432 433 ierr = PetscOptionsBegin(A->comm,A->prefix,"SuperLU_Dist Options","Mat");CHKERRQ(ierr); 434 435 ierr = PetscOptionsInt("-mat_superlu_dist_r","Number rows in processor partition","None",lu->nprow,&lu->nprow,PETSC_NULL);CHKERRQ(ierr); 436 ierr = PetscOptionsInt("-mat_superlu_dist_c","Number columns in processor partition","None",lu->npcol,&lu->npcol,PETSC_NULL);CHKERRQ(ierr); 437 if (size != lu->nprow * lu->npcol) SETERRQ(1,"Number of processes should be equal to nprow*npcol"); 438 439 ierr = PetscOptionsInt("-mat_superlu_dist_matinput","Matrix input mode (0: GLOBAL; 1: DISTRIBUTED)","None",lu->MatInputMode,&lu->MatInputMode,PETSC_NULL);CHKERRQ(ierr); 440 if(lu->MatInputMode == DISTRIBUTED && size == 1) lu->MatInputMode = GLOBAL; 441 442 ierr = PetscOptionsLogical("-mat_superlu_dist_equil","Equilibrate matrix","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr); 443 if (!flg) { 444 options.Equil = NO; 445 } 446 447 ierr = PetscOptionsEList("-mat_superlu_dist_rowperm","Row permutation","None",prtype,2,prtype[0],&indx,&flg);CHKERRQ(ierr); 448 if (flg) { 449 switch (indx) { 450 case 0: 451 options.RowPerm = LargeDiag; 452 break; 453 case 1: 454 options.RowPerm = NOROWPERM; 455 break; 456 } 457 } 458 459 ierr = PetscOptionsEList("-mat_superlu_dist_colperm","Column permutation","None",ptype,4,ptype[0],&indx,&flg);CHKERRQ(ierr); 460 if (flg) { 461 switch (indx) { 462 case 0: 463 options.ColPerm = MMD_AT_PLUS_A; 464 break; 465 case 1: 466 options.ColPerm = NATURAL; 467 break; 468 case 2: 469 options.ColPerm = MMD_ATA; 470 break; 471 case 3: 472 options.ColPerm = COLAMD; 473 break; 474 } 475 } 476 477 ierr = PetscOptionsLogical("-mat_superlu_dist_replacetinypivot","Replace tiny pivots","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr); 478 if (!flg) { 479 options.ReplaceTinyPivot = NO; 480 } 481 482 options.IterRefine = NOREFINE; 483 ierr = PetscOptionsLogical("-mat_superlu_dist_iterrefine","Use iterative refinement","None",PETSC_FALSE,&flg,0);CHKERRQ(ierr); 484 if (flg) { 485 options.IterRefine = DOUBLE; 486 } 487 488 if (PetscLogPrintInfo) { 489 lu->StatPrint = (int)PETSC_TRUE; 490 } else { 491 lu->StatPrint = (int)PETSC_FALSE; 492 } 493 ierr = PetscOptionsLogical("-mat_superlu_dist_statprint","Print factorization information","None", 494 (PetscTruth)lu->StatPrint,(PetscTruth*)&lu->StatPrint,0);CHKERRQ(ierr); 495 PetscOptionsEnd(); 496 497 /* Initialize the SuperLU process grid. */ 498 superlu_gridinit(lu->comm_superlu, lu->nprow, lu->npcol, &lu->grid); 499 500 /* Initialize ScalePermstruct and LUstruct. */ 501 ScalePermstructInit(M, N, &lu->ScalePermstruct); 502 LUstructInit(M, N, &lu->LUstruct); 503 504 lu->options = options; 505 lu->flg = DIFFERENT_NONZERO_PATTERN; 506 lu->CleanUpSuperLU_Dist = PETSC_TRUE; 507 *F = B; 508 PetscFunctionReturn(0); 509 } 510 511 #undef __FUNCT__ 512 #define __FUNCT__ "MatAssemblyEnd_SuperLU_DIST" 513 PetscErrorCode MatAssemblyEnd_SuperLU_DIST(Mat A,MatAssemblyType mode) { 514 PetscErrorCode ierr; 515 Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST*)(A->spptr); 516 517 PetscFunctionBegin; 518 ierr = (*lu->MatAssemblyEnd)(A,mode);CHKERRQ(ierr); 519 lu->MatLUFactorSymbolic = A->ops->lufactorsymbolic; 520 A->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST; 521 PetscFunctionReturn(0); 522 } 523 524 #undef __FUNCT__ 525 #define __FUNCT__ "MatFactorInfo_SuperLU_DIST" 526 PetscErrorCode MatFactorInfo_SuperLU_DIST(Mat A,PetscViewer viewer) 527 { 528 Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST*)A->spptr; 529 superlu_options_t options; 530 PetscErrorCode ierr; 531 char *colperm; 532 533 PetscFunctionBegin; 534 /* check if matrix is superlu_dist type */ 535 if (A->ops->solve != MatSolve_SuperLU_DIST) PetscFunctionReturn(0); 536 537 options = lu->options; 538 ierr = PetscViewerASCIIPrintf(viewer,"SuperLU_DIST run parameters:\n");CHKERRQ(ierr); 539 ierr = PetscViewerASCIIPrintf(viewer," Equilibrate matrix %s \n",(options.Equil != NO) ? "true": "false");CHKERRQ(ierr); 540 ierr = PetscViewerASCIIPrintf(viewer," Matrix input mode %d \n",lu->MatInputMode);CHKERRQ(ierr); 541 ierr = PetscViewerASCIIPrintf(viewer," Replace tiny pivots %s \n",(options.ReplaceTinyPivot != NO) ? "true": "false");CHKERRQ(ierr); 542 ierr = PetscViewerASCIIPrintf(viewer," Use iterative refinement %s \n",(options.IterRefine == DOUBLE) ? "true": "false");CHKERRQ(ierr); 543 ierr = PetscViewerASCIIPrintf(viewer," Processors in row %d col partition %d \n",lu->nprow,lu->npcol);CHKERRQ(ierr); 544 ierr = PetscViewerASCIIPrintf(viewer," Row permutation %s \n",(options.RowPerm == NOROWPERM) ? "NATURAL": "LargeDiag");CHKERRQ(ierr); 545 if (options.ColPerm == NATURAL) { 546 colperm = "NATURAL"; 547 } else if (options.ColPerm == MMD_AT_PLUS_A) { 548 colperm = "MMD_AT_PLUS_A"; 549 } else if (options.ColPerm == MMD_ATA) { 550 colperm = "MMD_ATA"; 551 } else if (options.ColPerm == COLAMD) { 552 colperm = "COLAMD"; 553 } else { 554 SETERRQ(1,"Unknown column permutation"); 555 } 556 ierr = PetscViewerASCIIPrintf(viewer," Column permutation %s \n",colperm);CHKERRQ(ierr); 557 PetscFunctionReturn(0); 558 } 559 560 #undef __FUNCT__ 561 #define __FUNCT__ "MatView_SuperLU_DIST" 562 PetscErrorCode MatView_SuperLU_DIST(Mat A,PetscViewer viewer) 563 { 564 PetscErrorCode ierr; 565 PetscTruth iascii; 566 PetscViewerFormat format; 567 Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST*)(A->spptr); 568 569 PetscFunctionBegin; 570 ierr = (*lu->MatView)(A,viewer);CHKERRQ(ierr); 571 572 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 573 if (iascii) { 574 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 575 if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 576 ierr = MatFactorInfo_SuperLU_DIST(A,viewer);CHKERRQ(ierr); 577 } 578 } 579 PetscFunctionReturn(0); 580 } 581 582 583 EXTERN_C_BEGIN 584 #undef __FUNCT__ 585 #define __FUNCT__ "MatConvert_Base_SuperLU_DIST" 586 PetscErrorCode MatConvert_Base_SuperLU_DIST(Mat A,const MatType type,Mat *newmat) { 587 /* This routine is only called to convert to MATSUPERLU_DIST */ 588 /* from MATSEQAIJ if A has a single process communicator */ 589 /* or MATMPIAIJ otherwise, so we will ignore 'MatType type'. */ 590 PetscErrorCode ierr; 591 int size; 592 MPI_Comm comm; 593 Mat B=*newmat; 594 Mat_SuperLU_DIST *lu; 595 596 PetscFunctionBegin; 597 if (B != A) { 598 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 599 } 600 601 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 602 ierr = PetscNew(Mat_SuperLU_DIST,&lu);CHKERRQ(ierr); 603 604 lu->MatDuplicate = A->ops->duplicate; 605 lu->MatView = A->ops->view; 606 lu->MatAssemblyEnd = A->ops->assemblyend; 607 lu->MatLUFactorSymbolic = A->ops->lufactorsymbolic; 608 lu->MatDestroy = A->ops->destroy; 609 lu->CleanUpSuperLU_Dist = PETSC_FALSE; 610 611 B->spptr = (void*)lu; 612 B->ops->duplicate = MatDuplicate_SuperLU_DIST; 613 B->ops->view = MatView_SuperLU_DIST; 614 B->ops->assemblyend = MatAssemblyEnd_SuperLU_DIST; 615 B->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST; 616 B->ops->destroy = MatDestroy_SuperLU_DIST; 617 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);CHKERRQ(ierr); 618 if (size == 1) { 619 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqaij_superlu_dist_C", 620 "MatConvert_Base_SuperLU_DIST",MatConvert_Base_SuperLU_DIST);CHKERRQ(ierr); 621 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_superlu_dist_seqaij_C", 622 "MatConvert_SuperLU_DIST_Base",MatConvert_SuperLU_DIST_Base);CHKERRQ(ierr); 623 } else { 624 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_superlu_dist_C", 625 "MatConvert_Base_SuperLU_DIST",MatConvert_Base_SuperLU_DIST);CHKERRQ(ierr); 626 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_superlu_dist_mpiaij_C", 627 "MatConvert_SuperLU_DIST_Base",MatConvert_SuperLU_DIST_Base);CHKERRQ(ierr); 628 } 629 PetscLogInfo(0,"Using SuperLU_DIST for SeqAIJ LU factorization and solves."); 630 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSUPERLU_DIST);CHKERRQ(ierr); 631 *newmat = B; 632 PetscFunctionReturn(0); 633 } 634 EXTERN_C_END 635 636 #undef __FUNCT__ 637 #define __FUNCT__ "MatDuplicate_SuperLU_DIST" 638 PetscErrorCode MatDuplicate_SuperLU_DIST(Mat A, MatDuplicateOption op, Mat *M) { 639 PetscErrorCode ierr; 640 Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST *)A->spptr; 641 642 PetscFunctionBegin; 643 ierr = (*lu->MatDuplicate)(A,op,M);CHKERRQ(ierr); 644 ierr = PetscMemcpy((*M)->spptr,lu,sizeof(Mat_SuperLU_DIST));CHKERRQ(ierr); 645 PetscFunctionReturn(0); 646 } 647 648 /*MC 649 MATSUPERLU_DIST - MATSUPERLU_DIST = "superlu_dist" - A matrix type providing direct solvers (LU) for parallel matrices 650 via the external package SuperLU_DIST. 651 652 If SuperLU_DIST is installed (see the manual for 653 instructions on how to declare the existence of external packages), 654 a matrix type can be constructed which invokes SuperLU_DIST solvers. 655 After calling MatCreate(...,A), simply call MatSetType(A,MATSUPERLU_DIST). 656 This matrix type is only supported for double precision real. 657 658 This matrix inherits from MATSEQAIJ when constructed with a single process communicator, 659 and from MATMPIAIJ otherwise. As a result, for single process communicators, 660 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 661 for communicators controlling multiple processes. It is recommended that you call both of 662 the above preallocation routines for simplicity. One can also call MatConvert for an inplace 663 conversion to or from the MATSEQAIJ or MATMPIAIJ type (depending on the communicator size) 664 without data copy. 665 666 Options Database Keys: 667 + -mat_type superlu_dist - sets the matrix type to "superlu_dist" during a call to MatSetFromOptions() 668 . -mat_superlu_dist_r <n> - number of rows in processor partition 669 . -mat_superlu_dist_c <n> - number of columns in processor partition 670 . -mat_superlu_dist_matinput <0,1> - matrix input mode; 0=global, 1=distributed 671 . -mat_superlu_dist_equil - equilibrate the matrix 672 . -mat_superlu_dist_rowperm <LargeDiag,NATURAL> - row permutation 673 . -mat_superlu_dist_colperm <MMD_AT_PLUS_A,MMD_ATA,COLAMD,NATURAL> - column permutation 674 . -mat_superlu_dist_replacetinypivot - replace tiny pivots 675 . -mat_superlu_dist_iterrefine - use iterative refinement 676 - -mat_superlu_dist_statprint - print factorization information 677 678 Level: beginner 679 680 .seealso: PCLU 681 M*/ 682 683 EXTERN_C_BEGIN 684 #undef __FUNCT__ 685 #define __FUNCT__ "MatCreate_SuperLU_DIST" 686 PetscErrorCode MatCreate_SuperLU_DIST(Mat A) 687 { 688 PetscErrorCode ierr,size; 689 Mat A_diag; 690 691 PetscFunctionBegin; 692 /* Change type name before calling MatSetType to force proper construction of SeqAIJ or MPIAIJ */ 693 /* and SuperLU_DIST types */ 694 ierr = PetscObjectChangeTypeName((PetscObject)A,MATSUPERLU_DIST);CHKERRQ(ierr); 695 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 696 if (size == 1) { 697 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 698 } else { 699 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 700 A_diag = ((Mat_MPIAIJ *)A->data)->A; 701 ierr = MatConvert_Base_SuperLU_DIST(A_diag,MATSUPERLU_DIST,&A_diag);CHKERRQ(ierr); 702 } 703 ierr = MatConvert_Base_SuperLU_DIST(A,MATSUPERLU_DIST,&A);CHKERRQ(ierr); 704 PetscFunctionReturn(0); 705 } 706 EXTERN_C_END 707 708