1 /*$Id: superlu_DIST.c,v 1.10 2001/08/15 15:56:50 bsmith Exp $*/ 2 /* 3 Provides an interface to the SuperLU_DIST_2.0 sparse solver 4 */ 5 6 #include "src/mat/impls/aij/seq/aij.h" 7 #include "src/mat/impls/aij/mpi/mpiaij.h" 8 #if defined(PETSC_HAVE_STDLIB_H) /* This is to get arround weird problem with SuperLU on cray */ 9 #include "stdlib.h" 10 #endif 11 12 EXTERN_C_BEGIN 13 #if defined(PETSC_USE_COMPLEX) 14 #include "superlu_zdefs.h" 15 #else 16 #include "superlu_ddefs.h" 17 #endif 18 EXTERN_C_END 19 20 typedef enum { GLOBAL,DISTRIBUTED 21 } SuperLU_MatInputMode; 22 23 typedef struct { 24 int_t nprow,npcol,*row,*col; 25 gridinfo_t grid; 26 superlu_options_t options; 27 SuperMatrix A_sup; 28 ScalePermstruct_t ScalePermstruct; 29 LUstruct_t LUstruct; 30 int StatPrint; 31 int MatInputMode; 32 SOLVEstruct_t SOLVEstruct; 33 MatStructure flg; 34 MPI_Comm comm_superlu; 35 #if defined(PETSC_USE_COMPLEX) 36 doublecomplex *val; 37 #else 38 double *val; 39 #endif 40 41 /* A few function pointers for inheritance */ 42 int (*MatDuplicate)(Mat,MatDuplicateOption,Mat*); 43 int (*MatView)(Mat,PetscViewer); 44 int (*MatAssemblyEnd)(Mat,MatAssemblyType); 45 int (*MatLUFactorSymbolic)(Mat,IS,IS,MatFactorInfo*,Mat*); 46 int (*MatDestroy)(Mat); 47 48 /* Flag to clean up (non-global) SuperLU objects during Destroy */ 49 PetscTruth CleanUpSuperLU_Dist; 50 } Mat_SuperLU_DIST; 51 52 EXTERN int MatDuplicate_SuperLU_DIST(Mat,MatDuplicateOption,Mat*); 53 54 EXTERN_C_BEGIN 55 #undef __FUNCT__ 56 #define __FUNCT__ "MatConvert_SuperLU_DIST_Base" 57 int MatConvert_SuperLU_DIST_Base(Mat A,const MatType type,Mat *newmat) { 58 int ierr; 59 Mat B=*newmat; 60 Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST *)A->spptr; 61 62 PetscFunctionBegin; 63 if (B != A) { 64 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 65 } 66 /* Reset the original function pointers */ 67 B->ops->duplicate = lu->MatDuplicate; 68 B->ops->view = lu->MatView; 69 B->ops->assemblyend = lu->MatAssemblyEnd; 70 B->ops->lufactorsymbolic = lu->MatLUFactorSymbolic; 71 B->ops->destroy = lu->MatDestroy; 72 73 ierr = PetscObjectChangeTypeName((PetscObject)B,type);CHKERRQ(ierr); 74 ierr = PetscFree(lu);CHKERRQ(ierr); 75 76 *newmat = B; 77 PetscFunctionReturn(0); 78 } 79 EXTERN_C_END 80 81 #undef __FUNCT__ 82 #define __FUNCT__ "MatDestroy_SuperLU_DIST" 83 int MatDestroy_SuperLU_DIST(Mat A) 84 { 85 int ierr,size; 86 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr; 87 88 PetscFunctionBegin; 89 if (lu->CleanUpSuperLU_Dist) { 90 /* Deallocate SuperLU_DIST storage */ 91 if (lu->MatInputMode == GLOBAL) { 92 Destroy_CompCol_Matrix_dist(&lu->A_sup); 93 } else { 94 Destroy_CompRowLoc_Matrix_dist(&lu->A_sup); 95 if ( lu->options.SolveInitialized ) { 96 #if defined(PETSC_USE_COMPLEX) 97 zSolveFinalize(&lu->options, &lu->SOLVEstruct); 98 #else 99 dSolveFinalize(&lu->options, &lu->SOLVEstruct); 100 #endif 101 } 102 } 103 Destroy_LU(A->N, &lu->grid, &lu->LUstruct); 104 ScalePermstructFree(&lu->ScalePermstruct); 105 LUstructFree(&lu->LUstruct); 106 107 /* Release the SuperLU_DIST process grid. */ 108 superlu_gridexit(&lu->grid); 109 110 ierr = MPI_Comm_free(&(lu->comm_superlu));CHKERRQ(ierr); 111 } 112 113 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 114 if (size == 1) { 115 ierr = MatConvert_SuperLU_DIST_Base(A,MATSEQAIJ,&A);CHKERRQ(ierr); 116 } else { 117 ierr = MatConvert_SuperLU_DIST_Base(A,MATMPIAIJ,&A);CHKERRQ(ierr); 118 } 119 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 120 121 PetscFunctionReturn(0); 122 } 123 124 #undef __FUNCT__ 125 #define __FUNCT__ "MatSolve_SuperLU_DIST" 126 int MatSolve_SuperLU_DIST(Mat A,Vec b_mpi,Vec x) 127 { 128 Mat_MPIAIJ *aa = (Mat_MPIAIJ*)A->data; 129 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr; 130 int ierr, size; 131 int m=A->M, N=A->N; 132 SuperLUStat_t stat; 133 double berr[1]; 134 PetscScalar *bptr; 135 int info, nrhs=1; 136 Vec x_seq; 137 IS iden; 138 VecScatter scat; 139 140 PetscFunctionBegin; 141 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 142 if (size > 1) { 143 if (lu->MatInputMode == GLOBAL) { /* global mat input, convert b to x_seq */ 144 ierr = VecCreateSeq(PETSC_COMM_SELF,N,&x_seq);CHKERRQ(ierr); 145 ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iden);CHKERRQ(ierr); 146 ierr = VecScatterCreate(b_mpi,iden,x_seq,iden,&scat);CHKERRQ(ierr); 147 ierr = ISDestroy(iden);CHKERRQ(ierr); 148 149 ierr = VecScatterBegin(b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat);CHKERRQ(ierr); 150 ierr = VecScatterEnd(b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat);CHKERRQ(ierr); 151 ierr = VecGetArray(x_seq,&bptr);CHKERRQ(ierr); 152 } else { /* distributed mat input */ 153 ierr = VecCopy(b_mpi,x);CHKERRQ(ierr); 154 ierr = VecGetArray(x,&bptr);CHKERRQ(ierr); 155 } 156 } else { /* size == 1 */ 157 ierr = VecCopy(b_mpi,x);CHKERRQ(ierr); 158 ierr = VecGetArray(x,&bptr);CHKERRQ(ierr); 159 } 160 161 lu->options.Fact = FACTORED; /* The factored form of A is supplied. Local option used by this func. only.*/ 162 163 PStatInit(&stat); /* Initialize the statistics variables. */ 164 165 if (lu->MatInputMode == GLOBAL) { 166 #if defined(PETSC_USE_COMPLEX) 167 pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,(doublecomplex*)bptr, m, nrhs, 168 &lu->grid, &lu->LUstruct, berr, &stat, &info); 169 #else 170 pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,bptr, m, nrhs, 171 &lu->grid, &lu->LUstruct, berr, &stat, &info); 172 #endif 173 } else { /* distributed mat input */ 174 #if defined(PETSC_USE_COMPLEX) 175 pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, (doublecomplex*)bptr, A->M, nrhs, &lu->grid, 176 &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info); 177 if (info) SETERRQ1(1,"pzgssvx fails, info: %d\n",info); 178 #else 179 pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, bptr, A->M, nrhs, &lu->grid, 180 &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info); 181 if (info) SETERRQ1(1,"pdgssvx fails, info: %d\n",info); 182 #endif 183 } 184 if (lu->StatPrint) { 185 PStatPrint(&lu->options, &stat, &lu->grid); /* Print the statistics. */ 186 } 187 PStatFree(&stat); 188 189 if (size > 1) { 190 if (lu->MatInputMode == GLOBAL){ /* convert seq x to mpi x */ 191 ierr = VecRestoreArray(x_seq,&bptr);CHKERRQ(ierr); 192 ierr = VecScatterBegin(x_seq,x,INSERT_VALUES,SCATTER_REVERSE,scat);CHKERRQ(ierr); 193 ierr = VecScatterEnd(x_seq,x,INSERT_VALUES,SCATTER_REVERSE,scat);CHKERRQ(ierr); 194 ierr = VecScatterDestroy(scat);CHKERRQ(ierr); 195 ierr = VecDestroy(x_seq);CHKERRQ(ierr); 196 } else { 197 ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr); 198 } 199 } else { 200 ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr); 201 } 202 PetscFunctionReturn(0); 203 } 204 205 #undef __FUNCT__ 206 #define __FUNCT__ "MatLUFactorNumeric_SuperLU_DIST" 207 int MatLUFactorNumeric_SuperLU_DIST(Mat A,Mat *F) 208 { 209 Mat_MPIAIJ *fac = (Mat_MPIAIJ*)(*F)->data,*mat; 210 Mat *tseq,A_seq = PETSC_NULL; 211 Mat_SeqAIJ *aa,*bb; 212 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)(*F)->spptr; 213 int M=A->M,N=A->N,info,ierr,size,rank,i,*ai,*aj,*bi,*bj,nz,rstart,*garray, 214 m=A->m, irow,colA_start,j,jcol,jB,countA,countB,*bjj,*ajj; 215 SuperLUStat_t stat; 216 double *berr=0; 217 IS isrow; 218 PetscLogDouble time0,time,time_min,time_max; 219 #if defined(PETSC_USE_COMPLEX) 220 doublecomplex *av, *bv; 221 #else 222 double *av, *bv; 223 #endif 224 225 PetscFunctionBegin; 226 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 227 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 228 229 if (lu->StatPrint) { /* collect time for mat conversion */ 230 ierr = MPI_Barrier(A->comm);CHKERRQ(ierr); 231 ierr = PetscGetTime(&time0);CHKERRQ(ierr); 232 } 233 234 if (lu->MatInputMode == GLOBAL) { /* global mat input */ 235 if (size > 1) { /* convert mpi A to seq mat A */ 236 ierr = ISCreateStride(PETSC_COMM_SELF,M,0,1,&isrow); CHKERRQ(ierr); 237 ierr = MatGetSubMatrices(A,1,&isrow,&isrow,MAT_INITIAL_MATRIX,&tseq); CHKERRQ(ierr); 238 ierr = ISDestroy(isrow);CHKERRQ(ierr); 239 240 A_seq = *tseq; 241 ierr = PetscFree(tseq);CHKERRQ(ierr); 242 aa = (Mat_SeqAIJ*)A_seq->data; 243 } else { 244 aa = (Mat_SeqAIJ*)A->data; 245 } 246 247 /* Allocate storage, then convert Petsc NR matrix to SuperLU_DIST NC */ 248 if (lu->flg == DIFFERENT_NONZERO_PATTERN) {/* first numeric factorization */ 249 #if defined(PETSC_USE_COMPLEX) 250 zallocateA_dist(N, aa->nz, &lu->val, &lu->col, &lu->row); 251 #else 252 dallocateA_dist(N, aa->nz, &lu->val, &lu->col, &lu->row); 253 #endif 254 } else { /* successive numeric factorization, sparsity pattern is reused. */ 255 Destroy_CompCol_Matrix_dist(&lu->A_sup); 256 Destroy_LU(N, &lu->grid, &lu->LUstruct); 257 lu->options.Fact = SamePattern; 258 } 259 #if defined(PETSC_USE_COMPLEX) 260 zCompRow_to_CompCol_dist(M,N,aa->nz,(doublecomplex*)aa->a,aa->j,aa->i,&lu->val,&lu->col, &lu->row); 261 #else 262 dCompRow_to_CompCol_dist(M,N,aa->nz,aa->a,aa->j,aa->i,&lu->val, &lu->col, &lu->row); 263 #endif 264 265 /* Create compressed column matrix A_sup. */ 266 #if defined(PETSC_USE_COMPLEX) 267 zCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_Z, SLU_GE); 268 #else 269 dCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_D, SLU_GE); 270 #endif 271 } else { /* distributed mat input */ 272 mat = (Mat_MPIAIJ*)A->data; 273 aa=(Mat_SeqAIJ*)(mat->A)->data; 274 bb=(Mat_SeqAIJ*)(mat->B)->data; 275 ai=aa->i; aj=aa->j; 276 bi=bb->i; bj=bb->j; 277 #if defined(PETSC_USE_COMPLEX) 278 av=(doublecomplex*)aa->a; 279 bv=(doublecomplex*)bb->a; 280 #else 281 av=aa->a; 282 bv=bb->a; 283 #endif 284 rstart = mat->rstart; 285 nz = aa->nz + bb->nz; 286 garray = mat->garray; 287 rstart = mat->rstart; 288 289 if (lu->flg == DIFFERENT_NONZERO_PATTERN) {/* first numeric factorization */ 290 #if defined(PETSC_USE_COMPLEX) 291 zallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row); 292 #else 293 dallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row); 294 #endif 295 } else { /* successive numeric factorization, sparsity pattern and perm_c are reused. */ 296 /* Destroy_CompRowLoc_Matrix_dist(&lu->A_sup); */ /* crash! */ 297 Destroy_LU(N, &lu->grid, &lu->LUstruct); 298 lu->options.Fact = SamePattern; 299 } 300 nz = 0; irow = mat->rstart; 301 for ( i=0; i<m; i++ ) { 302 lu->row[i] = nz; 303 countA = ai[i+1] - ai[i]; 304 countB = bi[i+1] - bi[i]; 305 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 306 bjj = bj + bi[i]; 307 308 /* B part, smaller col index */ 309 colA_start = mat->rstart + ajj[0]; /* the smallest global col index of A */ 310 jB = 0; 311 for (j=0; j<countB; j++){ 312 jcol = garray[bjj[j]]; 313 if (jcol > colA_start) { 314 jB = j; 315 break; 316 } 317 lu->col[nz] = jcol; 318 lu->val[nz++] = *bv++; 319 if (j==countB-1) jB = countB; 320 } 321 322 /* A part */ 323 for (j=0; j<countA; j++){ 324 lu->col[nz] = mat->rstart + ajj[j]; 325 lu->val[nz++] = *av++; 326 } 327 328 /* B part, larger col index */ 329 for (j=jB; j<countB; j++){ 330 lu->col[nz] = garray[bjj[j]]; 331 lu->val[nz++] = *bv++; 332 } 333 } 334 lu->row[m] = nz; 335 #if defined(PETSC_USE_COMPLEX) 336 zCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart, 337 lu->val, lu->col, lu->row, SLU_NR_loc, SLU_Z, SLU_GE); 338 #else 339 dCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart, 340 lu->val, lu->col, lu->row, SLU_NR_loc, SLU_D, SLU_GE); 341 #endif 342 } 343 if (lu->StatPrint) { 344 ierr = PetscGetTime(&time);CHKERRQ(ierr); 345 time0 = time - time0; 346 } 347 348 /* Factor the matrix. */ 349 PStatInit(&stat); /* Initialize the statistics variables. */ 350 351 if (lu->MatInputMode == GLOBAL) { /* global mat input */ 352 #if defined(PETSC_USE_COMPLEX) 353 pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, 354 &lu->grid, &lu->LUstruct, berr, &stat, &info); 355 #else 356 pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, 357 &lu->grid, &lu->LUstruct, berr, &stat, &info); 358 #endif 359 } else { /* distributed mat input */ 360 #if defined(PETSC_USE_COMPLEX) 361 pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid, 362 &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info); 363 if (info) SETERRQ1(1,"pzgssvx fails, info: %d\n",info); 364 #else 365 pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid, 366 &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info); 367 if (info) SETERRQ1(1,"pdgssvx fails, info: %d\n",info); 368 #endif 369 } 370 371 if (lu->MatInputMode == GLOBAL && size > 1){ 372 ierr = MatDestroy(A_seq);CHKERRQ(ierr); 373 } 374 375 if (lu->StatPrint) { 376 if (size > 1){ 377 ierr = MPI_Reduce(&time0,&time_max,1,MPI_DOUBLE,MPI_MAX,0,A->comm); 378 ierr = MPI_Reduce(&time0,&time_min,1,MPI_DOUBLE,MPI_MIN,0,A->comm); 379 ierr = MPI_Reduce(&time0,&time,1,MPI_DOUBLE,MPI_SUM,0,A->comm); 380 time = time/size; /* average time */ 381 if (!rank) 382 ierr = PetscPrintf(PETSC_COMM_SELF, " Mat conversion(PETSc->SuperLU_DIST) time (max/min/avg): \n \ 383 %g / %g / %g\n",time_max,time_min,time); 384 } else { 385 ierr = PetscPrintf(PETSC_COMM_SELF, " Mat conversion(PETSc->SuperLU_DIST) time: \n \ 386 %g\n",time0); 387 } 388 389 PStatPrint(&lu->options, &stat, &lu->grid); /* Print the statistics. */ 390 } 391 PStatFree(&stat); 392 (*F)->assembled = PETSC_TRUE; 393 lu->flg = SAME_NONZERO_PATTERN; 394 PetscFunctionReturn(0); 395 } 396 397 /* Note the Petsc r and c permutations are ignored */ 398 #undef __FUNCT__ 399 #define __FUNCT__ "MatLUFactorSymbolic_SuperLU_DIST" 400 int MatLUFactorSymbolic_SuperLU_DIST(Mat A,IS r,IS c,MatFactorInfo *info,Mat *F) 401 { 402 Mat B; 403 Mat_SuperLU_DIST *lu; 404 int ierr,M=A->M,N=A->N,size,indx; 405 superlu_options_t options; 406 PetscTruth flg; 407 const char *ptype[] = {"MMD_AT_PLUS_A","NATURAL","MMD_ATA","COLAMD"}; 408 const char *prtype[] = {"LargeDiag","NATURAL"}; 409 410 PetscFunctionBegin; 411 /* Create the factorization matrix */ 412 ierr = MatCreate(A->comm,A->m,A->n,M,N,&B);CHKERRQ(ierr); 413 ierr = MatSetType(B,MATSUPERLU_DIST);CHKERRQ(ierr); 414 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL); 415 ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 416 417 B->ops->lufactornumeric = MatLUFactorNumeric_SuperLU_DIST; 418 B->ops->solve = MatSolve_SuperLU_DIST; 419 B->factor = FACTOR_LU; 420 421 lu = (Mat_SuperLU_DIST*)(B->spptr); 422 423 /* Set the input options */ 424 set_default_options_dist(&options); 425 lu->MatInputMode = GLOBAL; 426 ierr = MPI_Comm_dup(A->comm,&(lu->comm_superlu));CHKERRQ(ierr); 427 428 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 429 lu->nprow = size/2; /* Default process rows. */ 430 if (lu->nprow == 0) lu->nprow = 1; 431 lu->npcol = size/lu->nprow; /* Default process columns. */ 432 433 ierr = PetscOptionsBegin(A->comm,A->prefix,"SuperLU_Dist Options","Mat");CHKERRQ(ierr); 434 435 ierr = PetscOptionsInt("-mat_superlu_dist_r","Number rows in processor partition","None",lu->nprow,&lu->nprow,PETSC_NULL);CHKERRQ(ierr); 436 ierr = PetscOptionsInt("-mat_superlu_dist_c","Number columns in processor partition","None",lu->npcol,&lu->npcol,PETSC_NULL);CHKERRQ(ierr); 437 if (size != lu->nprow * lu->npcol) SETERRQ(1,"Number of processes should be equal to nprow*npcol"); 438 439 ierr = PetscOptionsInt("-mat_superlu_dist_matinput","Matrix input mode (0: GLOBAL; 1: DISTRIBUTED)","None",lu->MatInputMode,&lu->MatInputMode,PETSC_NULL);CHKERRQ(ierr); 440 if(lu->MatInputMode == DISTRIBUTED && size == 1) lu->MatInputMode = GLOBAL; 441 442 ierr = PetscOptionsLogical("-mat_superlu_dist_equil","Equilibrate matrix","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr); 443 if (!flg) { 444 options.Equil = NO; 445 } 446 447 ierr = PetscOptionsEList("-mat_superlu_dist_rowperm","Row permutation","None",prtype,2,prtype[0],&indx,&flg);CHKERRQ(ierr); 448 if (flg) { 449 switch (indx) { 450 case 0: 451 options.RowPerm = LargeDiag; 452 break; 453 case 1: 454 options.RowPerm = NOROWPERM; 455 break; 456 } 457 } 458 459 ierr = PetscOptionsEList("-mat_superlu_dist_colperm","Column permutation","None",ptype,4,ptype[0],&indx,&flg);CHKERRQ(ierr); 460 if (flg) { 461 switch (indx) { 462 case 0: 463 options.ColPerm = MMD_AT_PLUS_A; 464 break; 465 case 1: 466 options.ColPerm = NATURAL; 467 break; 468 case 2: 469 options.ColPerm = MMD_ATA; 470 break; 471 case 3: 472 options.ColPerm = COLAMD; 473 break; 474 } 475 } 476 477 ierr = PetscOptionsLogical("-mat_superlu_dist_replacetinypivot","Replace tiny pivots","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr); 478 if (!flg) { 479 options.ReplaceTinyPivot = NO; 480 } 481 482 options.IterRefine = NOREFINE; 483 ierr = PetscOptionsLogical("-mat_superlu_dist_iterrefine","Use iterative refinement","None",PETSC_FALSE,&flg,0);CHKERRQ(ierr); 484 if (flg) { 485 options.IterRefine = DOUBLE; 486 } 487 488 if (PetscLogPrintInfo) { 489 lu->StatPrint = (int)PETSC_TRUE; 490 } else { 491 lu->StatPrint = (int)PETSC_FALSE; 492 } 493 ierr = PetscOptionsLogical("-mat_superlu_dist_statprint","Print factorization information","None", 494 (PetscTruth)lu->StatPrint,(PetscTruth*)&lu->StatPrint,0);CHKERRQ(ierr); 495 PetscOptionsEnd(); 496 497 /* Initialize the SuperLU process grid. */ 498 superlu_gridinit(lu->comm_superlu, lu->nprow, lu->npcol, &lu->grid); 499 500 /* Initialize ScalePermstruct and LUstruct. */ 501 ScalePermstructInit(M, N, &lu->ScalePermstruct); 502 LUstructInit(M, N, &lu->LUstruct); 503 504 lu->options = options; 505 lu->flg = DIFFERENT_NONZERO_PATTERN; 506 lu->CleanUpSuperLU_Dist = PETSC_TRUE; 507 *F = B; 508 PetscFunctionReturn(0); 509 } 510 511 #undef __FUNCT__ 512 #define __FUNCT__ "MatAssemblyEnd_SuperLU_DIST" 513 int MatAssemblyEnd_SuperLU_DIST(Mat A,MatAssemblyType mode) { 514 int ierr; 515 Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST*)(A->spptr); 516 517 PetscFunctionBegin; 518 ierr = (*lu->MatAssemblyEnd)(A,mode);CHKERRQ(ierr); 519 lu->MatLUFactorSymbolic = A->ops->lufactorsymbolic; 520 A->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST; 521 PetscFunctionReturn(0); 522 } 523 524 #undef __FUNCT__ 525 #define __FUNCT__ "MatFactorInfo_SuperLU_DIST" 526 int MatFactorInfo_SuperLU_DIST(Mat A,PetscViewer viewer) 527 { 528 Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST*)A->spptr; 529 superlu_options_t options; 530 int ierr; 531 char *colperm; 532 533 PetscFunctionBegin; 534 /* check if matrix is superlu_dist type */ 535 if (A->ops->solve != MatSolve_SuperLU_DIST) PetscFunctionReturn(0); 536 537 options = lu->options; 538 ierr = PetscViewerASCIIPrintf(viewer,"SuperLU_DIST run parameters:\n");CHKERRQ(ierr); 539 ierr = PetscViewerASCIIPrintf(viewer," Equilibrate matrix %s \n",(options.Equil != NO) ? "true": "false");CHKERRQ(ierr); 540 ierr = PetscViewerASCIIPrintf(viewer," Matrix input mode %d \n",lu->MatInputMode);CHKERRQ(ierr); 541 ierr = PetscViewerASCIIPrintf(viewer," Replace tiny pivots %s \n",(options.ReplaceTinyPivot != NO) ? "true": "false");CHKERRQ(ierr); 542 ierr = PetscViewerASCIIPrintf(viewer," Use iterative refinement %s \n",(options.IterRefine == DOUBLE) ? "true": "false");CHKERRQ(ierr); 543 ierr = PetscViewerASCIIPrintf(viewer," Processors in row %d col partition %d \n",lu->nprow,lu->npcol);CHKERRQ(ierr); 544 ierr = PetscViewerASCIIPrintf(viewer," Row permutation %s \n",(options.RowPerm == NOROWPERM) ? "NATURAL": "LargeDiag");CHKERRQ(ierr); 545 if (options.ColPerm == NATURAL) { 546 colperm = "NATURAL"; 547 } else if (options.ColPerm == MMD_AT_PLUS_A) { 548 colperm = "MMD_AT_PLUS_A"; 549 } else if (options.ColPerm == MMD_ATA) { 550 colperm = "MMD_ATA"; 551 } else if (options.ColPerm == COLAMD) { 552 colperm = "COLAMD"; 553 } else { 554 SETERRQ(1,"Unknown column permutation"); 555 } 556 ierr = PetscViewerASCIIPrintf(viewer," Column permutation %s \n",colperm);CHKERRQ(ierr); 557 PetscFunctionReturn(0); 558 } 559 560 #undef __FUNCT__ 561 #define __FUNCT__ "MatView_SuperLU_DIST" 562 int MatView_SuperLU_DIST(Mat A,PetscViewer viewer) 563 { 564 int ierr; 565 PetscTruth isascii; 566 PetscViewerFormat format; 567 Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST*)(A->spptr); 568 569 PetscFunctionBegin; 570 ierr = (*lu->MatView)(A,viewer);CHKERRQ(ierr); 571 572 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 573 if (isascii) { 574 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 575 if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 576 ierr = MatFactorInfo_SuperLU_DIST(A,viewer);CHKERRQ(ierr); 577 } 578 } 579 PetscFunctionReturn(0); 580 } 581 582 583 EXTERN_C_BEGIN 584 #undef __FUNCT__ 585 #define __FUNCT__ "MatConvert_Base_SuperLU_DIST" 586 int MatConvert_Base_SuperLU_DIST(Mat A,const MatType type,Mat *newmat) { 587 /* This routine is only called to convert to MATSUPERLU_DIST */ 588 /* from MATSEQAIJ if A has a single process communicator */ 589 /* or MATMPIAIJ otherwise, so we will ignore 'MatType type'. */ 590 int ierr, size; 591 MPI_Comm comm; 592 Mat B=*newmat; 593 Mat_SuperLU_DIST *lu; 594 595 PetscFunctionBegin; 596 if (B != A) { 597 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 598 } 599 600 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 601 ierr = PetscNew(Mat_SuperLU_DIST,&lu);CHKERRQ(ierr); 602 603 lu->MatDuplicate = A->ops->duplicate; 604 lu->MatView = A->ops->view; 605 lu->MatAssemblyEnd = A->ops->assemblyend; 606 lu->MatLUFactorSymbolic = A->ops->lufactorsymbolic; 607 lu->MatDestroy = A->ops->destroy; 608 lu->CleanUpSuperLU_Dist = PETSC_FALSE; 609 610 B->spptr = (void*)lu; 611 B->ops->duplicate = MatDuplicate_SuperLU_DIST; 612 B->ops->view = MatView_SuperLU_DIST; 613 B->ops->assemblyend = MatAssemblyEnd_SuperLU_DIST; 614 B->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST; 615 B->ops->destroy = MatDestroy_SuperLU_DIST; 616 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);CHKERRQ(ierr); 617 if (size == 1) { 618 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqaij_superlu_dist_C", 619 "MatConvert_Base_SuperLU_DIST",MatConvert_Base_SuperLU_DIST);CHKERRQ(ierr); 620 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_superlu_dist_seqaij_C", 621 "MatConvert_SuperLU_DIST_Base",MatConvert_SuperLU_DIST_Base);CHKERRQ(ierr); 622 } else { 623 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_superlu_dist_C", 624 "MatConvert_Base_SuperLU_DIST",MatConvert_Base_SuperLU_DIST);CHKERRQ(ierr); 625 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_superlu_dist_mpiaij_C", 626 "MatConvert_SuperLU_DIST_Base",MatConvert_SuperLU_DIST_Base);CHKERRQ(ierr); 627 } 628 PetscLogInfo(0,"Using SuperLU_DIST for SeqAIJ LU factorization and solves."); 629 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSUPERLU_DIST);CHKERRQ(ierr); 630 *newmat = B; 631 PetscFunctionReturn(0); 632 } 633 EXTERN_C_END 634 635 #undef __FUNCT__ 636 #define __FUNCT__ "MatDuplicate_SuperLU_DIST" 637 int MatDuplicate_SuperLU_DIST(Mat A, MatDuplicateOption op, Mat *M) { 638 int ierr; 639 Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST *)A->spptr; 640 641 PetscFunctionBegin; 642 ierr = (*lu->MatDuplicate)(A,op,M);CHKERRQ(ierr); 643 ierr = MatConvert_Base_SuperLU_DIST(*M,MATSUPERLU_DIST,M);CHKERRQ(ierr); 644 ierr = PetscMemcpy((*M)->spptr,lu,sizeof(Mat_SuperLU_DIST));CHKERRQ(ierr); 645 PetscFunctionReturn(0); 646 } 647 648 /*MC 649 MATSUPERLU_DIST - MATSUPERLU_DIST = "superlu_dist" - A matrix type providing direct solvers (LU) for parallel matrices 650 via the external package SuperLU_DIST. 651 652 If SuperLU_DIST is installed (see the manual for 653 instructions on how to declare the existence of external packages), 654 a matrix type can be constructed which invokes SuperLU_DIST solvers. 655 After calling MatCreate(...,A), simply call MatSetType(A,MATSUPERLU_DIST). 656 This matrix type is only supported for double precision real. 657 658 This matrix inherits from MATSEQAIJ when constructed with a single process communicator, 659 and from MATMPIAIJ otherwise. As a result, for single process communicators, 660 MatSeqAIJSetPreallocation is supported, and similarly MatMPISBAIJSetPreallocation is supported 661 for communicators controlling multiple processes. It is recommended that you call both of 662 the above preallocation routines for simplicity. One can also call MatConvert for an inplace 663 conversion to or from the MATSEQAIJ or MATMPIAIJ type (depending on the communicator size) 664 without data copy. 665 666 Options Database Keys: 667 + -mat_type superlu_dist - sets the matrix type to "superlu_dist" during a call to MatSetFromOptions() 668 . -mat_superlu_dist_r <n> - number of rows in processor partition 669 . -mat_superlu_dist_c <n> - number of columns in processor partition 670 . -mat_superlu_dist_matinput <0,1> - matrix input mode; 0=global, 1=distributed 671 . -mat_superlu_dist_equil - equilibrate the matrix 672 . -mat_superlu_dist_rowperm <LargeDiag,NATURAL> - row permutation 673 . -mat_superlu_dist_colperm <MMD_AT_PLUS_A,MMD_ATA,COLAMD,NATURAL> - column permutation 674 . -mat_superlu_dist_replacetinypivot - replace tiny pivots 675 . -mat_superlu_dist_iterrefine - use iterative refinement 676 - -mat_superlu_dist_statprint - print factorization information 677 678 Level: beginner 679 680 .seealso: PCLU 681 M*/ 682 683 EXTERN_C_BEGIN 684 #undef __FUNCT__ 685 #define __FUNCT__ "MatCreate_SuperLU_DIST" 686 int MatCreate_SuperLU_DIST(Mat A) { 687 int ierr,size; 688 689 PetscFunctionBegin; 690 /* Change type name before calling MatSetType to force proper construction of SeqAIJ or MPIAIJ */ 691 /* and SuperLU_DIST types */ 692 ierr = PetscObjectChangeTypeName((PetscObject)A,MATSUPERLU_DIST);CHKERRQ(ierr); 693 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 694 if (size == 1) { 695 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 696 } else { 697 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 698 } 699 ierr = MatConvert_Base_SuperLU_DIST(A,MATSUPERLU_DIST,&A);CHKERRQ(ierr); 700 PetscFunctionReturn(0); 701 } 702 EXTERN_C_END 703 704