1 /*$Id: superlu_DIST.c,v 1.10 2001/08/15 15:56:50 bsmith Exp $*/ 2 /* 3 Provides an interface to the SuperLU_DIST_2.0 sparse solver 4 */ 5 6 #include "src/mat/impls/aij/seq/aij.h" 7 #include "src/mat/impls/aij/mpi/mpiaij.h" 8 #if defined(PETSC_HAVE_STDLIB_H) /* This is to get arround weird problem with SuperLU on cray */ 9 #include "stdlib.h" 10 #endif 11 12 EXTERN_C_BEGIN 13 #if defined(PETSC_USE_COMPLEX) 14 #include "superlu_zdefs.h" 15 #else 16 #include "superlu_ddefs.h" 17 #endif 18 EXTERN_C_END 19 20 typedef enum { GLOBAL,DISTRIBUTED 21 } SuperLU_MatInputMode; 22 23 typedef struct { 24 int_t nprow,npcol,*row,*col; 25 gridinfo_t grid; 26 superlu_options_t options; 27 SuperMatrix A_sup; 28 ScalePermstruct_t ScalePermstruct; 29 LUstruct_t LUstruct; 30 int StatPrint; 31 int MatInputMode; 32 SOLVEstruct_t SOLVEstruct; 33 MatStructure flg; 34 MPI_Comm comm_superlu; 35 #if defined(PETSC_USE_COMPLEX) 36 doublecomplex *val; 37 #else 38 double *val; 39 #endif 40 41 /* A few function pointers for inheritance */ 42 int (*MatDuplicate)(Mat,MatDuplicateOption,Mat*); 43 int (*MatView)(Mat,PetscViewer); 44 int (*MatAssemblyEnd)(Mat,MatAssemblyType); 45 int (*MatLUFactorSymbolic)(Mat,IS,IS,MatFactorInfo*,Mat*); 46 int (*MatDestroy)(Mat); 47 48 /* Flag to clean up (non-global) SuperLU objects during Destroy */ 49 PetscTruth CleanUpSuperLU_Dist; 50 } Mat_SuperLU_DIST; 51 52 EXTERN int MatDuplicate_SuperLU_DIST(Mat,MatDuplicateOption,Mat*); 53 54 EXTERN_C_BEGIN 55 #undef __FUNCT__ 56 #define __FUNCT__ "MatConvert_SuperLU_DIST_Base" 57 int MatConvert_SuperLU_DIST_Base(Mat A,const MatType type,Mat *newmat) { 58 int ierr; 59 Mat B=*newmat; 60 Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST *)A->spptr; 61 62 PetscFunctionBegin; 63 if (B != A) { 64 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 65 } 66 /* Reset the original function pointers */ 67 B->ops->duplicate = lu->MatDuplicate; 68 B->ops->view = lu->MatView; 69 B->ops->assemblyend = lu->MatAssemblyEnd; 70 B->ops->lufactorsymbolic = lu->MatLUFactorSymbolic; 71 B->ops->destroy = lu->MatDestroy; 72 73 ierr = PetscObjectChangeTypeName((PetscObject)B,type);CHKERRQ(ierr); 74 ierr = PetscFree(lu);CHKERRQ(ierr); 75 76 *newmat = B; 77 PetscFunctionReturn(0); 78 } 79 EXTERN_C_END 80 81 #undef __FUNCT__ 82 #define __FUNCT__ "MatDestroy_SuperLU_DIST" 83 int MatDestroy_SuperLU_DIST(Mat A) 84 { 85 int ierr,size; 86 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr; 87 88 PetscFunctionBegin; 89 if (lu->CleanUpSuperLU_Dist) { 90 /* Deallocate SuperLU_DIST storage */ 91 if (lu->MatInputMode == GLOBAL) { 92 Destroy_CompCol_Matrix_dist(&lu->A_sup); 93 } else { 94 Destroy_CompRowLoc_Matrix_dist(&lu->A_sup); 95 if ( lu->options.SolveInitialized ) { 96 #if defined(PETSC_USE_COMPLEX) 97 zSolveFinalize(&lu->options, &lu->SOLVEstruct); 98 #else 99 dSolveFinalize(&lu->options, &lu->SOLVEstruct); 100 #endif 101 } 102 } 103 Destroy_LU(A->N, &lu->grid, &lu->LUstruct); 104 ScalePermstructFree(&lu->ScalePermstruct); 105 LUstructFree(&lu->LUstruct); 106 107 /* Release the SuperLU_DIST process grid. */ 108 superlu_gridexit(&lu->grid); 109 110 ierr = MPI_Comm_free(&(lu->comm_superlu));CHKERRQ(ierr); 111 } 112 113 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 114 if (size == 1) { 115 ierr = MatConvert_SuperLU_DIST_Base(A,MATSEQAIJ,&A);CHKERRQ(ierr); 116 } else { 117 ierr = MatConvert_SuperLU_DIST_Base(A,MATMPIAIJ,&A);CHKERRQ(ierr); 118 } 119 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 120 121 PetscFunctionReturn(0); 122 } 123 124 #undef __FUNCT__ 125 #define __FUNCT__ "MatSolve_SuperLU_DIST" 126 int MatSolve_SuperLU_DIST(Mat A,Vec b_mpi,Vec x) 127 { 128 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr; 129 int ierr, size; 130 int m=A->M, N=A->N; 131 SuperLUStat_t stat; 132 double berr[1]; 133 PetscScalar *bptr; 134 int info, nrhs=1; 135 Vec x_seq; 136 IS iden; 137 VecScatter scat; 138 139 PetscFunctionBegin; 140 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 141 if (size > 1) { 142 if (lu->MatInputMode == GLOBAL) { /* global mat input, convert b to x_seq */ 143 ierr = VecCreateSeq(PETSC_COMM_SELF,N,&x_seq);CHKERRQ(ierr); 144 ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iden);CHKERRQ(ierr); 145 ierr = VecScatterCreate(b_mpi,iden,x_seq,iden,&scat);CHKERRQ(ierr); 146 ierr = ISDestroy(iden);CHKERRQ(ierr); 147 148 ierr = VecScatterBegin(b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat);CHKERRQ(ierr); 149 ierr = VecScatterEnd(b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat);CHKERRQ(ierr); 150 ierr = VecGetArray(x_seq,&bptr);CHKERRQ(ierr); 151 } else { /* distributed mat input */ 152 ierr = VecCopy(b_mpi,x);CHKERRQ(ierr); 153 ierr = VecGetArray(x,&bptr);CHKERRQ(ierr); 154 } 155 } else { /* size == 1 */ 156 ierr = VecCopy(b_mpi,x);CHKERRQ(ierr); 157 ierr = VecGetArray(x,&bptr);CHKERRQ(ierr); 158 } 159 160 lu->options.Fact = FACTORED; /* The factored form of A is supplied. Local option used by this func. only.*/ 161 162 PStatInit(&stat); /* Initialize the statistics variables. */ 163 164 if (lu->MatInputMode == GLOBAL) { 165 #if defined(PETSC_USE_COMPLEX) 166 pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,(doublecomplex*)bptr, m, nrhs, 167 &lu->grid, &lu->LUstruct, berr, &stat, &info); 168 #else 169 pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,bptr, m, nrhs, 170 &lu->grid, &lu->LUstruct, berr, &stat, &info); 171 #endif 172 } else { /* distributed mat input */ 173 #if defined(PETSC_USE_COMPLEX) 174 pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, (doublecomplex*)bptr, A->M, nrhs, &lu->grid, 175 &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info); 176 if (info) SETERRQ1(1,"pzgssvx fails, info: %d\n",info); 177 #else 178 pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, bptr, A->M, nrhs, &lu->grid, 179 &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info); 180 if (info) SETERRQ1(1,"pdgssvx fails, info: %d\n",info); 181 #endif 182 } 183 if (lu->StatPrint) { 184 PStatPrint(&lu->options, &stat, &lu->grid); /* Print the statistics. */ 185 } 186 PStatFree(&stat); 187 188 if (size > 1) { 189 if (lu->MatInputMode == GLOBAL){ /* convert seq x to mpi x */ 190 ierr = VecRestoreArray(x_seq,&bptr);CHKERRQ(ierr); 191 ierr = VecScatterBegin(x_seq,x,INSERT_VALUES,SCATTER_REVERSE,scat);CHKERRQ(ierr); 192 ierr = VecScatterEnd(x_seq,x,INSERT_VALUES,SCATTER_REVERSE,scat);CHKERRQ(ierr); 193 ierr = VecScatterDestroy(scat);CHKERRQ(ierr); 194 ierr = VecDestroy(x_seq);CHKERRQ(ierr); 195 } else { 196 ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr); 197 } 198 } else { 199 ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr); 200 } 201 PetscFunctionReturn(0); 202 } 203 204 #undef __FUNCT__ 205 #define __FUNCT__ "MatLUFactorNumeric_SuperLU_DIST" 206 int MatLUFactorNumeric_SuperLU_DIST(Mat A,Mat *F) 207 { 208 Mat *tseq,A_seq = PETSC_NULL; 209 Mat_SeqAIJ *aa,*bb; 210 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)(*F)->spptr; 211 int M=A->M,N=A->N,info,ierr,size,rank,i,*ai,*aj,*bi,*bj,nz,rstart,*garray, 212 m=A->m, irow,colA_start,j,jcol,jB,countA,countB,*bjj,*ajj; 213 SuperLUStat_t stat; 214 double *berr=0; 215 IS isrow; 216 PetscLogDouble time0,time,time_min,time_max; 217 #if defined(PETSC_USE_COMPLEX) 218 doublecomplex *av, *bv; 219 #else 220 double *av, *bv; 221 #endif 222 223 PetscFunctionBegin; 224 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 225 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 226 227 if (lu->StatPrint) { /* collect time for mat conversion */ 228 ierr = MPI_Barrier(A->comm);CHKERRQ(ierr); 229 ierr = PetscGetTime(&time0);CHKERRQ(ierr); 230 } 231 232 if (lu->MatInputMode == GLOBAL) { /* global mat input */ 233 if (size > 1) { /* convert mpi A to seq mat A */ 234 ierr = ISCreateStride(PETSC_COMM_SELF,M,0,1,&isrow); CHKERRQ(ierr); 235 ierr = MatGetSubMatrices(A,1,&isrow,&isrow,MAT_INITIAL_MATRIX,&tseq); CHKERRQ(ierr); 236 ierr = ISDestroy(isrow);CHKERRQ(ierr); 237 238 A_seq = *tseq; 239 ierr = PetscFree(tseq);CHKERRQ(ierr); 240 aa = (Mat_SeqAIJ*)A_seq->data; 241 } else { 242 aa = (Mat_SeqAIJ*)A->data; 243 } 244 245 /* Allocate storage, then convert Petsc NR matrix to SuperLU_DIST NC */ 246 if (lu->flg == DIFFERENT_NONZERO_PATTERN) {/* first numeric factorization */ 247 #if defined(PETSC_USE_COMPLEX) 248 zallocateA_dist(N, aa->nz, &lu->val, &lu->col, &lu->row); 249 #else 250 dallocateA_dist(N, aa->nz, &lu->val, &lu->col, &lu->row); 251 #endif 252 } else { /* successive numeric factorization, sparsity pattern is reused. */ 253 Destroy_CompCol_Matrix_dist(&lu->A_sup); 254 Destroy_LU(N, &lu->grid, &lu->LUstruct); 255 lu->options.Fact = SamePattern; 256 } 257 #if defined(PETSC_USE_COMPLEX) 258 zCompRow_to_CompCol_dist(M,N,aa->nz,(doublecomplex*)aa->a,aa->j,aa->i,&lu->val,&lu->col, &lu->row); 259 #else 260 dCompRow_to_CompCol_dist(M,N,aa->nz,aa->a,aa->j,aa->i,&lu->val, &lu->col, &lu->row); 261 #endif 262 263 /* Create compressed column matrix A_sup. */ 264 #if defined(PETSC_USE_COMPLEX) 265 zCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_Z, SLU_GE); 266 #else 267 dCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_D, SLU_GE); 268 #endif 269 } else { /* distributed mat input */ 270 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 271 aa=(Mat_SeqAIJ*)(mat->A)->data; 272 bb=(Mat_SeqAIJ*)(mat->B)->data; 273 ai=aa->i; aj=aa->j; 274 bi=bb->i; bj=bb->j; 275 #if defined(PETSC_USE_COMPLEX) 276 av=(doublecomplex*)aa->a; 277 bv=(doublecomplex*)bb->a; 278 #else 279 av=aa->a; 280 bv=bb->a; 281 #endif 282 rstart = mat->rstart; 283 nz = aa->nz + bb->nz; 284 garray = mat->garray; 285 rstart = mat->rstart; 286 287 if (lu->flg == DIFFERENT_NONZERO_PATTERN) {/* first numeric factorization */ 288 #if defined(PETSC_USE_COMPLEX) 289 zallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row); 290 #else 291 dallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row); 292 #endif 293 } else { /* successive numeric factorization, sparsity pattern and perm_c are reused. */ 294 /* Destroy_CompRowLoc_Matrix_dist(&lu->A_sup); */ /* crash! */ 295 Destroy_LU(N, &lu->grid, &lu->LUstruct); 296 lu->options.Fact = SamePattern; 297 } 298 nz = 0; irow = mat->rstart; 299 for ( i=0; i<m; i++ ) { 300 lu->row[i] = nz; 301 countA = ai[i+1] - ai[i]; 302 countB = bi[i+1] - bi[i]; 303 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 304 bjj = bj + bi[i]; 305 306 /* B part, smaller col index */ 307 colA_start = mat->rstart + ajj[0]; /* the smallest global col index of A */ 308 jB = 0; 309 for (j=0; j<countB; j++){ 310 jcol = garray[bjj[j]]; 311 if (jcol > colA_start) { 312 jB = j; 313 break; 314 } 315 lu->col[nz] = jcol; 316 lu->val[nz++] = *bv++; 317 if (j==countB-1) jB = countB; 318 } 319 320 /* A part */ 321 for (j=0; j<countA; j++){ 322 lu->col[nz] = mat->rstart + ajj[j]; 323 lu->val[nz++] = *av++; 324 } 325 326 /* B part, larger col index */ 327 for (j=jB; j<countB; j++){ 328 lu->col[nz] = garray[bjj[j]]; 329 lu->val[nz++] = *bv++; 330 } 331 } 332 lu->row[m] = nz; 333 #if defined(PETSC_USE_COMPLEX) 334 zCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart, 335 lu->val, lu->col, lu->row, SLU_NR_loc, SLU_Z, SLU_GE); 336 #else 337 dCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart, 338 lu->val, lu->col, lu->row, SLU_NR_loc, SLU_D, SLU_GE); 339 #endif 340 } 341 if (lu->StatPrint) { 342 ierr = PetscGetTime(&time);CHKERRQ(ierr); 343 time0 = time - time0; 344 } 345 346 /* Factor the matrix. */ 347 PStatInit(&stat); /* Initialize the statistics variables. */ 348 349 if (lu->MatInputMode == GLOBAL) { /* global mat input */ 350 #if defined(PETSC_USE_COMPLEX) 351 pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, 352 &lu->grid, &lu->LUstruct, berr, &stat, &info); 353 #else 354 pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, 355 &lu->grid, &lu->LUstruct, berr, &stat, &info); 356 #endif 357 } else { /* distributed mat input */ 358 #if defined(PETSC_USE_COMPLEX) 359 pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid, 360 &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info); 361 if (info) SETERRQ1(1,"pzgssvx fails, info: %d\n",info); 362 #else 363 pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid, 364 &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info); 365 if (info) SETERRQ1(1,"pdgssvx fails, info: %d\n",info); 366 #endif 367 } 368 369 if (lu->MatInputMode == GLOBAL && size > 1){ 370 ierr = MatDestroy(A_seq);CHKERRQ(ierr); 371 } 372 373 if (lu->StatPrint) { 374 if (size > 1){ 375 ierr = MPI_Reduce(&time0,&time_max,1,MPI_DOUBLE,MPI_MAX,0,A->comm); 376 ierr = MPI_Reduce(&time0,&time_min,1,MPI_DOUBLE,MPI_MIN,0,A->comm); 377 ierr = MPI_Reduce(&time0,&time,1,MPI_DOUBLE,MPI_SUM,0,A->comm); 378 time = time/size; /* average time */ 379 if (!rank) 380 ierr = PetscPrintf(PETSC_COMM_SELF, " Mat conversion(PETSc->SuperLU_DIST) time (max/min/avg): \n \ 381 %g / %g / %g\n",time_max,time_min,time); 382 } else { 383 ierr = PetscPrintf(PETSC_COMM_SELF, " Mat conversion(PETSc->SuperLU_DIST) time: \n \ 384 %g\n",time0); 385 } 386 387 PStatPrint(&lu->options, &stat, &lu->grid); /* Print the statistics. */ 388 } 389 PStatFree(&stat); 390 (*F)->assembled = PETSC_TRUE; 391 lu->flg = SAME_NONZERO_PATTERN; 392 PetscFunctionReturn(0); 393 } 394 395 /* Note the Petsc r and c permutations are ignored */ 396 #undef __FUNCT__ 397 #define __FUNCT__ "MatLUFactorSymbolic_SuperLU_DIST" 398 int MatLUFactorSymbolic_SuperLU_DIST(Mat A,IS r,IS c,MatFactorInfo *info,Mat *F) 399 { 400 Mat B; 401 Mat_SuperLU_DIST *lu; 402 int ierr,M=A->M,N=A->N,size,indx; 403 superlu_options_t options; 404 PetscTruth flg; 405 const char *ptype[] = {"MMD_AT_PLUS_A","NATURAL","MMD_ATA","COLAMD"}; 406 const char *prtype[] = {"LargeDiag","NATURAL"}; 407 408 PetscFunctionBegin; 409 /* Create the factorization matrix */ 410 ierr = MatCreate(A->comm,A->m,A->n,M,N,&B);CHKERRQ(ierr); 411 ierr = MatSetType(B,MATSUPERLU_DIST);CHKERRQ(ierr); 412 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL); 413 ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 414 415 B->ops->lufactornumeric = MatLUFactorNumeric_SuperLU_DIST; 416 B->ops->solve = MatSolve_SuperLU_DIST; 417 B->factor = FACTOR_LU; 418 419 lu = (Mat_SuperLU_DIST*)(B->spptr); 420 421 /* Set the input options */ 422 set_default_options_dist(&options); 423 lu->MatInputMode = GLOBAL; 424 ierr = MPI_Comm_dup(A->comm,&(lu->comm_superlu));CHKERRQ(ierr); 425 426 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 427 lu->nprow = size/2; /* Default process rows. */ 428 if (lu->nprow == 0) lu->nprow = 1; 429 lu->npcol = size/lu->nprow; /* Default process columns. */ 430 431 ierr = PetscOptionsBegin(A->comm,A->prefix,"SuperLU_Dist Options","Mat");CHKERRQ(ierr); 432 433 ierr = PetscOptionsInt("-mat_superlu_dist_r","Number rows in processor partition","None",lu->nprow,&lu->nprow,PETSC_NULL);CHKERRQ(ierr); 434 ierr = PetscOptionsInt("-mat_superlu_dist_c","Number columns in processor partition","None",lu->npcol,&lu->npcol,PETSC_NULL);CHKERRQ(ierr); 435 if (size != lu->nprow * lu->npcol) SETERRQ(1,"Number of processes should be equal to nprow*npcol"); 436 437 ierr = PetscOptionsInt("-mat_superlu_dist_matinput","Matrix input mode (0: GLOBAL; 1: DISTRIBUTED)","None",lu->MatInputMode,&lu->MatInputMode,PETSC_NULL);CHKERRQ(ierr); 438 if(lu->MatInputMode == DISTRIBUTED && size == 1) lu->MatInputMode = GLOBAL; 439 440 ierr = PetscOptionsLogical("-mat_superlu_dist_equil","Equilibrate matrix","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr); 441 if (!flg) { 442 options.Equil = NO; 443 } 444 445 ierr = PetscOptionsEList("-mat_superlu_dist_rowperm","Row permutation","None",prtype,2,prtype[0],&indx,&flg);CHKERRQ(ierr); 446 if (flg) { 447 switch (indx) { 448 case 0: 449 options.RowPerm = LargeDiag; 450 break; 451 case 1: 452 options.RowPerm = NOROWPERM; 453 break; 454 } 455 } 456 457 ierr = PetscOptionsEList("-mat_superlu_dist_colperm","Column permutation","None",ptype,4,ptype[0],&indx,&flg);CHKERRQ(ierr); 458 if (flg) { 459 switch (indx) { 460 case 0: 461 options.ColPerm = MMD_AT_PLUS_A; 462 break; 463 case 1: 464 options.ColPerm = NATURAL; 465 break; 466 case 2: 467 options.ColPerm = MMD_ATA; 468 break; 469 case 3: 470 options.ColPerm = COLAMD; 471 break; 472 } 473 } 474 475 ierr = PetscOptionsLogical("-mat_superlu_dist_replacetinypivot","Replace tiny pivots","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr); 476 if (!flg) { 477 options.ReplaceTinyPivot = NO; 478 } 479 480 options.IterRefine = NOREFINE; 481 ierr = PetscOptionsLogical("-mat_superlu_dist_iterrefine","Use iterative refinement","None",PETSC_FALSE,&flg,0);CHKERRQ(ierr); 482 if (flg) { 483 options.IterRefine = DOUBLE; 484 } 485 486 if (PetscLogPrintInfo) { 487 lu->StatPrint = (int)PETSC_TRUE; 488 } else { 489 lu->StatPrint = (int)PETSC_FALSE; 490 } 491 ierr = PetscOptionsLogical("-mat_superlu_dist_statprint","Print factorization information","None", 492 (PetscTruth)lu->StatPrint,(PetscTruth*)&lu->StatPrint,0);CHKERRQ(ierr); 493 PetscOptionsEnd(); 494 495 /* Initialize the SuperLU process grid. */ 496 superlu_gridinit(lu->comm_superlu, lu->nprow, lu->npcol, &lu->grid); 497 498 /* Initialize ScalePermstruct and LUstruct. */ 499 ScalePermstructInit(M, N, &lu->ScalePermstruct); 500 LUstructInit(M, N, &lu->LUstruct); 501 502 lu->options = options; 503 lu->flg = DIFFERENT_NONZERO_PATTERN; 504 lu->CleanUpSuperLU_Dist = PETSC_TRUE; 505 *F = B; 506 PetscFunctionReturn(0); 507 } 508 509 #undef __FUNCT__ 510 #define __FUNCT__ "MatAssemblyEnd_SuperLU_DIST" 511 int MatAssemblyEnd_SuperLU_DIST(Mat A,MatAssemblyType mode) { 512 int ierr; 513 Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST*)(A->spptr); 514 515 PetscFunctionBegin; 516 ierr = (*lu->MatAssemblyEnd)(A,mode);CHKERRQ(ierr); 517 lu->MatLUFactorSymbolic = A->ops->lufactorsymbolic; 518 A->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST; 519 PetscFunctionReturn(0); 520 } 521 522 #undef __FUNCT__ 523 #define __FUNCT__ "MatFactorInfo_SuperLU_DIST" 524 int MatFactorInfo_SuperLU_DIST(Mat A,PetscViewer viewer) 525 { 526 Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST*)A->spptr; 527 superlu_options_t options; 528 int ierr; 529 char *colperm; 530 531 PetscFunctionBegin; 532 /* check if matrix is superlu_dist type */ 533 if (A->ops->solve != MatSolve_SuperLU_DIST) PetscFunctionReturn(0); 534 535 options = lu->options; 536 ierr = PetscViewerASCIIPrintf(viewer,"SuperLU_DIST run parameters:\n");CHKERRQ(ierr); 537 ierr = PetscViewerASCIIPrintf(viewer," Equilibrate matrix %s \n",(options.Equil != NO) ? "true": "false");CHKERRQ(ierr); 538 ierr = PetscViewerASCIIPrintf(viewer," Matrix input mode %d \n",lu->MatInputMode);CHKERRQ(ierr); 539 ierr = PetscViewerASCIIPrintf(viewer," Replace tiny pivots %s \n",(options.ReplaceTinyPivot != NO) ? "true": "false");CHKERRQ(ierr); 540 ierr = PetscViewerASCIIPrintf(viewer," Use iterative refinement %s \n",(options.IterRefine == DOUBLE) ? "true": "false");CHKERRQ(ierr); 541 ierr = PetscViewerASCIIPrintf(viewer," Processors in row %d col partition %d \n",lu->nprow,lu->npcol);CHKERRQ(ierr); 542 ierr = PetscViewerASCIIPrintf(viewer," Row permutation %s \n",(options.RowPerm == NOROWPERM) ? "NATURAL": "LargeDiag");CHKERRQ(ierr); 543 if (options.ColPerm == NATURAL) { 544 colperm = "NATURAL"; 545 } else if (options.ColPerm == MMD_AT_PLUS_A) { 546 colperm = "MMD_AT_PLUS_A"; 547 } else if (options.ColPerm == MMD_ATA) { 548 colperm = "MMD_ATA"; 549 } else if (options.ColPerm == COLAMD) { 550 colperm = "COLAMD"; 551 } else { 552 SETERRQ(1,"Unknown column permutation"); 553 } 554 ierr = PetscViewerASCIIPrintf(viewer," Column permutation %s \n",colperm);CHKERRQ(ierr); 555 PetscFunctionReturn(0); 556 } 557 558 #undef __FUNCT__ 559 #define __FUNCT__ "MatView_SuperLU_DIST" 560 int MatView_SuperLU_DIST(Mat A,PetscViewer viewer) 561 { 562 int ierr; 563 PetscTruth isascii; 564 PetscViewerFormat format; 565 Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST*)(A->spptr); 566 567 PetscFunctionBegin; 568 ierr = (*lu->MatView)(A,viewer);CHKERRQ(ierr); 569 570 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 571 if (isascii) { 572 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 573 if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 574 ierr = MatFactorInfo_SuperLU_DIST(A,viewer);CHKERRQ(ierr); 575 } 576 } 577 PetscFunctionReturn(0); 578 } 579 580 581 EXTERN_C_BEGIN 582 #undef __FUNCT__ 583 #define __FUNCT__ "MatConvert_Base_SuperLU_DIST" 584 int MatConvert_Base_SuperLU_DIST(Mat A,const MatType type,Mat *newmat) { 585 /* This routine is only called to convert to MATSUPERLU_DIST */ 586 /* from MATSEQAIJ if A has a single process communicator */ 587 /* or MATMPIAIJ otherwise, so we will ignore 'MatType type'. */ 588 int ierr, size; 589 MPI_Comm comm; 590 Mat B=*newmat; 591 Mat_SuperLU_DIST *lu; 592 593 PetscFunctionBegin; 594 if (B != A) { 595 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 596 } 597 598 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 599 ierr = PetscNew(Mat_SuperLU_DIST,&lu);CHKERRQ(ierr); 600 601 lu->MatDuplicate = A->ops->duplicate; 602 lu->MatView = A->ops->view; 603 lu->MatAssemblyEnd = A->ops->assemblyend; 604 lu->MatLUFactorSymbolic = A->ops->lufactorsymbolic; 605 lu->MatDestroy = A->ops->destroy; 606 lu->CleanUpSuperLU_Dist = PETSC_FALSE; 607 608 B->spptr = (void*)lu; 609 B->ops->duplicate = MatDuplicate_SuperLU_DIST; 610 B->ops->view = MatView_SuperLU_DIST; 611 B->ops->assemblyend = MatAssemblyEnd_SuperLU_DIST; 612 B->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST; 613 B->ops->destroy = MatDestroy_SuperLU_DIST; 614 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);CHKERRQ(ierr); 615 if (size == 1) { 616 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqaij_superlu_dist_C", 617 "MatConvert_Base_SuperLU_DIST",MatConvert_Base_SuperLU_DIST);CHKERRQ(ierr); 618 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_superlu_dist_seqaij_C", 619 "MatConvert_SuperLU_DIST_Base",MatConvert_SuperLU_DIST_Base);CHKERRQ(ierr); 620 } else { 621 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_superlu_dist_C", 622 "MatConvert_Base_SuperLU_DIST",MatConvert_Base_SuperLU_DIST);CHKERRQ(ierr); 623 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_superlu_dist_mpiaij_C", 624 "MatConvert_SuperLU_DIST_Base",MatConvert_SuperLU_DIST_Base);CHKERRQ(ierr); 625 } 626 PetscLogInfo(0,"Using SuperLU_DIST for SeqAIJ LU factorization and solves."); 627 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSUPERLU_DIST);CHKERRQ(ierr); 628 *newmat = B; 629 PetscFunctionReturn(0); 630 } 631 EXTERN_C_END 632 633 #undef __FUNCT__ 634 #define __FUNCT__ "MatDuplicate_SuperLU_DIST" 635 int MatDuplicate_SuperLU_DIST(Mat A, MatDuplicateOption op, Mat *M) { 636 int ierr; 637 Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST *)A->spptr; 638 639 PetscFunctionBegin; 640 ierr = (*lu->MatDuplicate)(A,op,M);CHKERRQ(ierr); 641 ierr = MatConvert_Base_SuperLU_DIST(*M,MATSUPERLU_DIST,M);CHKERRQ(ierr); 642 ierr = PetscMemcpy((*M)->spptr,lu,sizeof(Mat_SuperLU_DIST));CHKERRQ(ierr); 643 PetscFunctionReturn(0); 644 } 645 646 /*MC 647 MATSUPERLU_DIST - MATSUPERLU_DIST = "superlu_dist" - A matrix type providing direct solvers (LU) for parallel matrices 648 via the external package SuperLU_DIST. 649 650 If SuperLU_DIST is installed (see the manual for 651 instructions on how to declare the existence of external packages), 652 a matrix type can be constructed which invokes SuperLU_DIST solvers. 653 After calling MatCreate(...,A), simply call MatSetType(A,MATSUPERLU_DIST). 654 This matrix type is only supported for double precision real. 655 656 This matrix inherits from MATSEQAIJ when constructed with a single process communicator, 657 and from MATMPIAIJ otherwise. As a result, for single process communicators, 658 MatSeqAIJSetPreallocation is supported, and similarly MatMPISBAIJSetPreallocation is supported 659 for communicators controlling multiple processes. It is recommended that you call both of 660 the above preallocation routines for simplicity. One can also call MatConvert for an inplace 661 conversion to or from the MATSEQAIJ or MATMPIAIJ type (depending on the communicator size) 662 without data copy. 663 664 Options Database Keys: 665 + -mat_type superlu_dist - sets the matrix type to "superlu_dist" during a call to MatSetFromOptions() 666 . -mat_superlu_dist_r <n> - number of rows in processor partition 667 . -mat_superlu_dist_c <n> - number of columns in processor partition 668 . -mat_superlu_dist_matinput <0,1> - matrix input mode; 0=global, 1=distributed 669 . -mat_superlu_dist_equil - equilibrate the matrix 670 . -mat_superlu_dist_rowperm <LargeDiag,NATURAL> - row permutation 671 . -mat_superlu_dist_colperm <MMD_AT_PLUS_A,MMD_ATA,COLAMD,NATURAL> - column permutation 672 . -mat_superlu_dist_replacetinypivot - replace tiny pivots 673 . -mat_superlu_dist_iterrefine - use iterative refinement 674 - -mat_superlu_dist_statprint - print factorization information 675 676 Level: beginner 677 678 .seealso: PCLU 679 M*/ 680 681 EXTERN_C_BEGIN 682 #undef __FUNCT__ 683 #define __FUNCT__ "MatCreate_SuperLU_DIST" 684 int MatCreate_SuperLU_DIST(Mat A) { 685 int ierr,size; 686 Mat A_diag; 687 688 PetscFunctionBegin; 689 /* Change type name before calling MatSetType to force proper construction of SeqAIJ or MPIAIJ */ 690 /* and SuperLU_DIST types */ 691 ierr = PetscObjectChangeTypeName((PetscObject)A,MATSUPERLU_DIST);CHKERRQ(ierr); 692 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 693 if (size == 1) { 694 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 695 } else { 696 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 697 A_diag = ((Mat_MPIAIJ *)A->data)->A; 698 ierr = MatConvert_Base_SuperLU_DIST(A_diag,MATSUPERLU_DIST,&A_diag);CHKERRQ(ierr); 699 } 700 ierr = MatConvert_Base_SuperLU_DIST(A,MATSUPERLU_DIST,&A);CHKERRQ(ierr); 701 PetscFunctionReturn(0); 702 } 703 EXTERN_C_END 704 705