1 /*$Id: superlu_DIST.c,v 1.10 2001/08/15 15:56:50 bsmith Exp $*/ 2 /* 3 Provides an interface to the SuperLU_DIST_2.0 sparse solver 4 */ 5 6 #include "src/mat/impls/aij/seq/aij.h" 7 #include "src/mat/impls/aij/mpi/mpiaij.h" 8 #if defined(PETSC_HAVE_STDLIB_H) /* This is to get arround weird problem with SuperLU on cray */ 9 #include "stdlib.h" 10 #endif 11 12 EXTERN_C_BEGIN 13 #if defined(PETSC_USE_COMPLEX) 14 #include "superlu_zdefs.h" 15 #else 16 #include "superlu_ddefs.h" 17 #endif 18 EXTERN_C_END 19 20 typedef enum { GLOBAL,DISTRIBUTED 21 } SuperLU_MatInputMode; 22 23 typedef struct { 24 int_t nprow,npcol,*row,*col; 25 gridinfo_t grid; 26 superlu_options_t options; 27 SuperMatrix A_sup; 28 ScalePermstruct_t ScalePermstruct; 29 LUstruct_t LUstruct; 30 int StatPrint; 31 int MatInputMode; 32 SOLVEstruct_t SOLVEstruct; 33 MatStructure flg; 34 MPI_Comm comm_superlu; 35 #if defined(PETSC_USE_COMPLEX) 36 doublecomplex *val; 37 #else 38 double *val; 39 #endif 40 41 int size; 42 /* A few function pointers for inheritance */ 43 int (*MatDuplicate)(Mat,MatDuplicateOption,Mat*); 44 int (*MatView)(Mat,PetscViewer); 45 int (*MatAssemblyEnd)(Mat,MatAssemblyType); 46 int (*MatLUFactorSymbolic)(Mat,IS,IS,MatFactorInfo*,Mat*); 47 int (*MatDestroy)(Mat); 48 49 /* Flag to clean up (non-global) SuperLU objects during Destroy */ 50 PetscTruth CleanUpSuperLU_Dist; 51 } Mat_SuperLU_DIST; 52 53 EXTERN int MatDuplicate_SuperLU_DIST(Mat,MatDuplicateOption,Mat*); 54 55 EXTERN_C_BEGIN 56 #undef __FUNCT__ 57 #define __FUNCT__ "MatConvert_SuperLU_DIST_Base" 58 int MatConvert_SuperLU_DIST_Base(Mat A,MatType type,Mat *newmat) { 59 int ierr; 60 Mat B=*newmat; 61 Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST *)A->spptr; 62 63 PetscFunctionBegin; 64 if (B != A) { 65 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 66 } 67 /* Reset the original function pointers */ 68 B->ops->duplicate = lu->MatDuplicate; 69 B->ops->view = lu->MatView; 70 B->ops->assemblyend = lu->MatAssemblyEnd; 71 B->ops->lufactorsymbolic = lu->MatLUFactorSymbolic; 72 B->ops->destroy = lu->MatDestroy; 73 74 ierr = PetscObjectChangeTypeName((PetscObject)B,type);CHKERRQ(ierr); 75 ierr = PetscFree(lu);CHKERRQ(ierr); 76 77 *newmat = B; 78 PetscFunctionReturn(0); 79 } 80 EXTERN_C_END 81 82 #undef __FUNCT__ 83 #define __FUNCT__ "MatDestroy_SuperLU_DIST" 84 int MatDestroy_SuperLU_DIST(Mat A) 85 { 86 int ierr; 87 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr; 88 89 PetscFunctionBegin; 90 if (lu->CleanUpSuperLU_Dist) { 91 /* Deallocate SuperLU_DIST storage */ 92 if (lu->MatInputMode == GLOBAL) { 93 Destroy_CompCol_Matrix_dist(&lu->A_sup); 94 } else { 95 Destroy_CompRowLoc_Matrix_dist(&lu->A_sup); 96 if ( lu->options.SolveInitialized ) { 97 #if defined(PETSC_USE_COMPLEX) 98 zSolveFinalize(&lu->options, &lu->SOLVEstruct); 99 #else 100 dSolveFinalize(&lu->options, &lu->SOLVEstruct); 101 #endif 102 } 103 } 104 Destroy_LU(A->N, &lu->grid, &lu->LUstruct); 105 ScalePermstructFree(&lu->ScalePermstruct); 106 LUstructFree(&lu->LUstruct); 107 108 /* Release the SuperLU_DIST process grid. */ 109 superlu_gridexit(&lu->grid); 110 111 ierr = MPI_Comm_free(&(lu->comm_superlu));CHKERRQ(ierr); 112 } 113 114 if (lu->size == 1) { 115 ierr = MatConvert_SuperLU_DIST_Base(A,MATSEQAIJ,&A);CHKERRQ(ierr); 116 } else { 117 ierr = MatConvert_SuperLU_DIST_Base(A,MATMPIAIJ,&A);CHKERRQ(ierr); 118 } 119 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 120 121 PetscFunctionReturn(0); 122 } 123 124 #undef __FUNCT__ 125 #define __FUNCT__ "MatSolve_SuperLU_DIST" 126 int MatSolve_SuperLU_DIST(Mat A,Vec b_mpi,Vec x) 127 { 128 Mat_MPIAIJ *aa = (Mat_MPIAIJ*)A->data; 129 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr; 130 int ierr, size=aa->size; 131 int m=A->M, N=A->N; 132 SuperLUStat_t stat; 133 double berr[1]; 134 PetscScalar *bptr; 135 int info, nrhs=1; 136 Vec x_seq; 137 IS iden; 138 VecScatter scat; 139 PetscLogDouble time0,time,time_min,time_max; 140 141 PetscFunctionBegin; 142 if (size > 1) { 143 if (lu->MatInputMode == GLOBAL) { /* global mat input, convert b to x_seq */ 144 ierr = VecCreateSeq(PETSC_COMM_SELF,N,&x_seq);CHKERRQ(ierr); 145 ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iden);CHKERRQ(ierr); 146 ierr = VecScatterCreate(b_mpi,iden,x_seq,iden,&scat);CHKERRQ(ierr); 147 ierr = ISDestroy(iden);CHKERRQ(ierr); 148 149 ierr = VecScatterBegin(b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat);CHKERRQ(ierr); 150 ierr = VecScatterEnd(b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat);CHKERRQ(ierr); 151 ierr = VecGetArray(x_seq,&bptr);CHKERRQ(ierr); 152 } else { /* distributed mat input */ 153 ierr = VecCopy(b_mpi,x);CHKERRQ(ierr); 154 ierr = VecGetArray(x,&bptr);CHKERRQ(ierr); 155 } 156 } else { /* size == 1 */ 157 ierr = VecCopy(b_mpi,x);CHKERRQ(ierr); 158 ierr = VecGetArray(x,&bptr);CHKERRQ(ierr); 159 } 160 161 lu->options.Fact = FACTORED; /* The factored form of A is supplied. Local option used by this func. only.*/ 162 163 PStatInit(&stat); /* Initialize the statistics variables. */ 164 if (lu->StatPrint) { 165 ierr = MPI_Barrier(A->comm);CHKERRQ(ierr); /* to be removed */ 166 ierr = PetscGetTime(&time0);CHKERRQ(ierr); /* to be removed */ 167 } 168 if (lu->MatInputMode == GLOBAL) { 169 #if defined(PETSC_USE_COMPLEX) 170 pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,(doublecomplex*)bptr, m, nrhs, 171 &lu->grid, &lu->LUstruct, berr, &stat, &info); 172 #else 173 pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,bptr, m, nrhs, 174 &lu->grid, &lu->LUstruct, berr, &stat, &info); 175 #endif 176 } else { /* distributed mat input */ 177 #if defined(PETSC_USE_COMPLEX) 178 pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, (doublecomplex*)bptr, A->M, nrhs, &lu->grid, 179 &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info); 180 if (info) SETERRQ1(1,"pzgssvx fails, info: %d\n",info); 181 #else 182 pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, bptr, A->M, nrhs, &lu->grid, 183 &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info); 184 if (info) SETERRQ1(1,"pdgssvx fails, info: %d\n",info); 185 #endif 186 } 187 if (lu->StatPrint) { 188 ierr = PetscGetTime(&time);CHKERRQ(ierr); /* to be removed */ 189 PStatPrint(&lu->options, &stat, &lu->grid); /* Print the statistics. */ 190 } 191 PStatFree(&stat); 192 193 if (size > 1) { 194 if (lu->MatInputMode == GLOBAL){ /* convert seq x to mpi x */ 195 ierr = VecRestoreArray(x_seq,&bptr);CHKERRQ(ierr); 196 ierr = VecScatterBegin(x_seq,x,INSERT_VALUES,SCATTER_REVERSE,scat);CHKERRQ(ierr); 197 ierr = VecScatterEnd(x_seq,x,INSERT_VALUES,SCATTER_REVERSE,scat);CHKERRQ(ierr); 198 ierr = VecScatterDestroy(scat);CHKERRQ(ierr); 199 ierr = VecDestroy(x_seq);CHKERRQ(ierr); 200 } else { 201 ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr); 202 } 203 } else { 204 ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr); 205 } 206 if (lu->StatPrint) { 207 time0 = time - time0; 208 ierr = MPI_Reduce(&time0,&time_max,1,MPI_DOUBLE,MPI_MAX,0,A->comm);CHKERRQ(ierr); 209 ierr = MPI_Reduce(&time0,&time_min,1,MPI_DOUBLE,MPI_MIN,0,A->comm);CHKERRQ(ierr); 210 ierr = MPI_Reduce(&time0,&time,1,MPI_DOUBLE,MPI_SUM,0,A->comm);CHKERRQ(ierr); 211 time = time/size; /* average time */ 212 ierr = PetscPrintf(A->comm, " Time for superlu_dist solve (max/min/avg): %g / %g / %g\n\n",time_max,time_min,time);CHKERRQ(ierr); 213 } 214 PetscFunctionReturn(0); 215 } 216 217 #undef __FUNCT__ 218 #define __FUNCT__ "MatLUFactorNumeric_SuperLU_DIST" 219 int MatLUFactorNumeric_SuperLU_DIST(Mat A,Mat *F) 220 { 221 Mat_MPIAIJ *fac = (Mat_MPIAIJ*)(*F)->data,*mat; 222 Mat *tseq,A_seq = PETSC_NULL; 223 Mat_SeqAIJ *aa,*bb; 224 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)(*F)->spptr; 225 int M=A->M,N=A->N,info,ierr,size=fac->size,i,*ai,*aj,*bi,*bj,nz,rstart,*garray, 226 m=A->m, irow,colA_start,j,jcol,jB,countA,countB,*bjj,*ajj; 227 SuperLUStat_t stat; 228 double *berr=0; 229 IS isrow; 230 PetscLogDouble time0[2],time[2],time_min[2],time_max[2]; 231 #if defined(PETSC_USE_COMPLEX) 232 doublecomplex *av, *bv; 233 #else 234 double *av, *bv; 235 #endif 236 237 PetscFunctionBegin; 238 if (lu->StatPrint) { 239 ierr = MPI_Barrier(A->comm);CHKERRQ(ierr); 240 ierr = PetscGetTime(&time0[0]);CHKERRQ(ierr); 241 } 242 243 if (lu->MatInputMode == GLOBAL) { /* global mat input */ 244 if (size > 1) { /* convert mpi A to seq mat A */ 245 ierr = ISCreateStride(PETSC_COMM_SELF,M,0,1,&isrow); CHKERRQ(ierr); 246 ierr = MatGetSubMatrices(A,1,&isrow,&isrow,MAT_INITIAL_MATRIX,&tseq); CHKERRQ(ierr); 247 ierr = ISDestroy(isrow);CHKERRQ(ierr); 248 249 A_seq = *tseq; 250 ierr = PetscFree(tseq);CHKERRQ(ierr); 251 aa = (Mat_SeqAIJ*)A_seq->data; 252 } else { 253 aa = (Mat_SeqAIJ*)A->data; 254 } 255 256 /* Allocate storage, then convert Petsc NR matrix to SuperLU_DIST NC */ 257 if (lu->flg == DIFFERENT_NONZERO_PATTERN) {/* first numeric factorization */ 258 #if defined(PETSC_USE_COMPLEX) 259 zallocateA_dist(N, aa->nz, &lu->val, &lu->col, &lu->row); 260 #else 261 dallocateA_dist(N, aa->nz, &lu->val, &lu->col, &lu->row); 262 #endif 263 } else { /* successive numeric factorization, sparsity pattern is reused. */ 264 Destroy_CompCol_Matrix_dist(&lu->A_sup); 265 Destroy_LU(N, &lu->grid, &lu->LUstruct); 266 lu->options.Fact = SamePattern; 267 } 268 #if defined(PETSC_USE_COMPLEX) 269 zCompRow_to_CompCol_dist(M,N,aa->nz,(doublecomplex*)aa->a,aa->j,aa->i,&lu->val,&lu->col, &lu->row); 270 #else 271 dCompRow_to_CompCol_dist(M,N,aa->nz,aa->a,aa->j,aa->i,&lu->val, &lu->col, &lu->row); 272 #endif 273 274 /* Create compressed column matrix A_sup. */ 275 #if defined(PETSC_USE_COMPLEX) 276 zCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_Z, SLU_GE); 277 #else 278 dCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_D, SLU_GE); 279 #endif 280 } else { /* distributed mat input */ 281 mat = (Mat_MPIAIJ*)A->data; 282 aa=(Mat_SeqAIJ*)(mat->A)->data; 283 bb=(Mat_SeqAIJ*)(mat->B)->data; 284 ai=aa->i; aj=aa->j; 285 bi=bb->i; bj=bb->j; 286 #if defined(PETSC_USE_COMPLEX) 287 av=(doublecomplex*)aa->a; 288 bv=(doublecomplex*)bb->a; 289 #else 290 av=aa->a; 291 bv=bb->a; 292 #endif 293 rstart = mat->rstart; 294 nz = aa->nz + bb->nz; 295 garray = mat->garray; 296 rstart = mat->rstart; 297 298 if (lu->flg == DIFFERENT_NONZERO_PATTERN) {/* first numeric factorization */ 299 #if defined(PETSC_USE_COMPLEX) 300 zallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row); 301 #else 302 dallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row); 303 #endif 304 } else { /* successive numeric factorization, sparsity pattern and perm_c are reused. */ 305 /* Destroy_CompRowLoc_Matrix_dist(&lu->A_sup); */ /* crash! */ 306 Destroy_LU(N, &lu->grid, &lu->LUstruct); 307 lu->options.Fact = SamePattern; 308 } 309 nz = 0; irow = mat->rstart; 310 for ( i=0; i<m; i++ ) { 311 lu->row[i] = nz; 312 countA = ai[i+1] - ai[i]; 313 countB = bi[i+1] - bi[i]; 314 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 315 bjj = bj + bi[i]; 316 317 /* B part, smaller col index */ 318 colA_start = mat->rstart + ajj[0]; /* the smallest global col index of A */ 319 jB = 0; 320 for (j=0; j<countB; j++){ 321 jcol = garray[bjj[j]]; 322 if (jcol > colA_start) { 323 jB = j; 324 break; 325 } 326 lu->col[nz] = jcol; 327 lu->val[nz++] = *bv++; 328 if (j==countB-1) jB = countB; 329 } 330 331 /* A part */ 332 for (j=0; j<countA; j++){ 333 lu->col[nz] = mat->rstart + ajj[j]; 334 lu->val[nz++] = *av++; 335 } 336 337 /* B part, larger col index */ 338 for (j=jB; j<countB; j++){ 339 lu->col[nz] = garray[bjj[j]]; 340 lu->val[nz++] = *bv++; 341 } 342 } 343 lu->row[m] = nz; 344 #if defined(PETSC_USE_COMPLEX) 345 zCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart, 346 lu->val, lu->col, lu->row, SLU_NR_loc, SLU_Z, SLU_GE); 347 #else 348 dCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart, 349 lu->val, lu->col, lu->row, SLU_NR_loc, SLU_D, SLU_GE); 350 #endif 351 } 352 if (lu->StatPrint) { 353 ierr = PetscGetTime(&time[0]);CHKERRQ(ierr); 354 time0[0] = time[0] - time0[0]; 355 } 356 357 /* Factor the matrix. */ 358 PStatInit(&stat); /* Initialize the statistics variables. */ 359 360 if (lu->StatPrint) { 361 ierr = MPI_Barrier(A->comm);CHKERRQ(ierr); 362 ierr = PetscGetTime(&time0[1]);CHKERRQ(ierr); 363 } 364 365 if (lu->MatInputMode == GLOBAL) { /* global mat input */ 366 #if defined(PETSC_USE_COMPLEX) 367 pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, 368 &lu->grid, &lu->LUstruct, berr, &stat, &info); 369 #else 370 pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, 371 &lu->grid, &lu->LUstruct, berr, &stat, &info); 372 #endif 373 } else { /* distributed mat input */ 374 #if defined(PETSC_USE_COMPLEX) 375 pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid, 376 &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info); 377 if (info) SETERRQ1(1,"pzgssvx fails, info: %d\n",info); 378 #else 379 pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid, 380 &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info); 381 if (info) SETERRQ1(1,"pdgssvx fails, info: %d\n",info); 382 #endif 383 } 384 if (lu->StatPrint) { 385 ierr = PetscGetTime(&time[1]);CHKERRQ(ierr); /* to be removed */ 386 time0[1] = time[1] - time0[1]; 387 if (lu->StatPrint) PStatPrint(&lu->options, &stat, &lu->grid); /* Print the statistics. */ 388 } 389 PStatFree(&stat); 390 391 if (lu->MatInputMode == GLOBAL && size > 1){ 392 ierr = MatDestroy(A_seq);CHKERRQ(ierr); 393 } 394 395 if (lu->StatPrint) { 396 ierr = MPI_Reduce(time0,time_max,2,MPI_DOUBLE,MPI_MAX,0,A->comm); 397 ierr = MPI_Reduce(time0,time_min,2,MPI_DOUBLE,MPI_MIN,0,A->comm); 398 ierr = MPI_Reduce(time0,time,2,MPI_DOUBLE,MPI_SUM,0,A->comm); 399 for (i=0; i<2; i++) time[i] = time[i]/size; /* average time */ 400 ierr = PetscPrintf(A->comm, " Time for mat conversion (max/min/avg): %g / %g / %g\n",time_max[0],time_min[0],time[0]); 401 ierr = PetscPrintf(A->comm, " Time for superlu_dist fact (max/min/avg): %g / %g / %g\n\n",time_max[1],time_min[1],time[1]); 402 } 403 (*F)->assembled = PETSC_TRUE; 404 lu->flg = SAME_NONZERO_PATTERN; 405 PetscFunctionReturn(0); 406 } 407 408 /* Note the Petsc r and c permutations are ignored */ 409 #undef __FUNCT__ 410 #define __FUNCT__ "MatLUFactorSymbolic_SuperLU_DIST" 411 int MatLUFactorSymbolic_SuperLU_DIST(Mat A,IS r,IS c,MatFactorInfo *info,Mat *F) 412 { 413 Mat B; 414 Mat_SuperLU_DIST *lu; 415 int ierr,M=A->M,N=A->N,size; 416 superlu_options_t options; 417 char buff[32]; 418 PetscTruth flg; 419 char *ptype[] = {"MMD_AT_PLUS_A","NATURAL","MMD_ATA","COLAMD"}; 420 char *prtype[] = {"LargeDiag","NATURAL"}; 421 422 PetscFunctionBegin; 423 /* Create the factorization matrix */ 424 ierr = MatCreate(A->comm,A->m,A->n,M,N,&B);CHKERRQ(ierr); 425 ierr = MatSetType(B,MATSUPERLU_DIST);CHKERRQ(ierr); 426 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL); 427 ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 428 429 B->ops->lufactornumeric = MatLUFactorNumeric_SuperLU_DIST; 430 B->ops->solve = MatSolve_SuperLU_DIST; 431 B->factor = FACTOR_LU; 432 433 lu = (Mat_SuperLU_DIST*)(B->spptr); 434 435 /* Set the input options */ 436 set_default_options(&options); 437 lu->MatInputMode = GLOBAL; 438 ierr = MPI_Comm_dup(A->comm,&(lu->comm_superlu));CHKERRQ(ierr); 439 440 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 441 lu->nprow = size/2; /* Default process rows. */ 442 if (lu->nprow == 0) lu->nprow = 1; 443 lu->npcol = size/lu->nprow; /* Default process columns. */ 444 445 ierr = PetscOptionsBegin(A->comm,A->prefix,"SuperLU_Dist Options","Mat");CHKERRQ(ierr); 446 447 ierr = PetscOptionsInt("-mat_superlu_dist_r","Number rows in processor partition","None",lu->nprow,&lu->nprow,PETSC_NULL);CHKERRQ(ierr); 448 ierr = PetscOptionsInt("-mat_superlu_dist_c","Number columns in processor partition","None",lu->npcol,&lu->npcol,PETSC_NULL);CHKERRQ(ierr); 449 if (size != lu->nprow * lu->npcol) SETERRQ(1,"Number of processes should be equal to nprow*npcol"); 450 451 ierr = PetscOptionsInt("-mat_superlu_dist_matinput","Matrix input mode (0: GLOBAL; 1: DISTRIBUTED)","None",lu->MatInputMode,&lu->MatInputMode,PETSC_NULL);CHKERRQ(ierr); 452 if(lu->MatInputMode == DISTRIBUTED && size == 1) lu->MatInputMode = GLOBAL; 453 454 ierr = PetscOptionsLogical("-mat_superlu_dist_equil","Equilibrate matrix","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr); 455 if (!flg) { 456 options.Equil = NO; 457 } 458 459 ierr = PetscOptionsEList("-mat_superlu_dist_rowperm","Row permutation","None",prtype,2,prtype[0],buff,32,&flg);CHKERRQ(ierr); 460 while (flg) { 461 ierr = PetscStrcmp(buff,"LargeDiag",&flg);CHKERRQ(ierr); 462 if (flg) { 463 options.RowPerm = LargeDiag; 464 break; 465 } 466 ierr = PetscStrcmp(buff,"NATURAL",&flg);CHKERRQ(ierr); 467 if (flg) { 468 options.RowPerm = NOROWPERM; 469 break; 470 } 471 SETERRQ1(1,"Unknown row permutation %s",buff); 472 } 473 474 ierr = PetscOptionsEList("-mat_superlu_dist_colperm","Column permutation","None",ptype,4,ptype[0],buff,32,&flg);CHKERRQ(ierr); 475 while (flg) { 476 ierr = PetscStrcmp(buff,"MMD_AT_PLUS_A",&flg);CHKERRQ(ierr); 477 if (flg) { 478 options.ColPerm = MMD_AT_PLUS_A; 479 break; 480 } 481 ierr = PetscStrcmp(buff,"NATURAL",&flg);CHKERRQ(ierr); 482 if (flg) { 483 options.ColPerm = NATURAL; 484 break; 485 } 486 ierr = PetscStrcmp(buff,"MMD_ATA",&flg);CHKERRQ(ierr); 487 if (flg) { 488 options.ColPerm = MMD_ATA; 489 break; 490 } 491 ierr = PetscStrcmp(buff,"COLAMD",&flg);CHKERRQ(ierr); 492 if (flg) { 493 options.ColPerm = COLAMD; 494 break; 495 } 496 SETERRQ1(1,"Unknown column permutation %s",buff); 497 } 498 499 ierr = PetscOptionsLogical("-mat_superlu_dist_replacetinypivot","Replace tiny pivots","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr); 500 if (!flg) { 501 options.ReplaceTinyPivot = NO; 502 } 503 504 options.IterRefine = NOREFINE; 505 ierr = PetscOptionsLogical("-mat_superlu_dist_iterrefine","Use iterative refinement","None",PETSC_FALSE,&flg,0);CHKERRQ(ierr); 506 if (flg) { 507 options.IterRefine = DOUBLE; 508 } 509 510 if (PetscLogPrintInfo) { 511 lu->StatPrint = (int)PETSC_TRUE; 512 } else { 513 lu->StatPrint = (int)PETSC_FALSE; 514 } 515 ierr = PetscOptionsLogical("-mat_superlu_dist_statprint","Print factorization information","None", 516 (PetscTruth)lu->StatPrint,(PetscTruth*)&lu->StatPrint,0);CHKERRQ(ierr); 517 PetscOptionsEnd(); 518 519 /* Initialize the SuperLU process grid. */ 520 superlu_gridinit(lu->comm_superlu, lu->nprow, lu->npcol, &lu->grid); 521 522 /* Initialize ScalePermstruct and LUstruct. */ 523 ScalePermstructInit(M, N, &lu->ScalePermstruct); 524 LUstructInit(M, N, &lu->LUstruct); 525 526 lu->options = options; 527 lu->flg = DIFFERENT_NONZERO_PATTERN; 528 lu->CleanUpSuperLU_Dist = PETSC_TRUE; 529 *F = B; 530 PetscFunctionReturn(0); 531 } 532 533 #undef __FUNCT__ 534 #define __FUNCT__ "MatAssemblyEnd_SuperLU_DIST" 535 int MatAssemblyEnd_SuperLU_DIST(Mat A,MatAssemblyType mode) { 536 int ierr; 537 Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST*)(A->spptr); 538 539 PetscFunctionBegin; 540 ierr = (*lu->MatAssemblyEnd)(A,mode);CHKERRQ(ierr); 541 lu->MatLUFactorSymbolic = A->ops->lufactorsymbolic; 542 A->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST; 543 PetscFunctionReturn(0); 544 } 545 546 #undef __FUNCT__ 547 #define __FUNCT__ "MatFactorInfo_SuperLU_DIST" 548 int MatFactorInfo_SuperLU_DIST(Mat A,PetscViewer viewer) 549 { 550 Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST*)A->spptr; 551 superlu_options_t options; 552 int ierr; 553 char *colperm; 554 555 PetscFunctionBegin; 556 /* check if matrix is superlu_dist type */ 557 if (A->ops->solve != MatSolve_SuperLU_DIST) PetscFunctionReturn(0); 558 559 options = lu->options; 560 ierr = PetscViewerASCIIPrintf(viewer,"SuperLU_DIST run parameters:\n");CHKERRQ(ierr); 561 ierr = PetscViewerASCIIPrintf(viewer," Equilibrate matrix %s \n",(options.Equil != NO) ? "true": "false");CHKERRQ(ierr); 562 ierr = PetscViewerASCIIPrintf(viewer," Matrix input mode %d \n",lu->MatInputMode);CHKERRQ(ierr); 563 ierr = PetscViewerASCIIPrintf(viewer," Replace tiny pivots %s \n",(options.ReplaceTinyPivot != NO) ? "true": "false");CHKERRQ(ierr); 564 ierr = PetscViewerASCIIPrintf(viewer," Use iterative refinement %s \n",(options.IterRefine == DOUBLE) ? "true": "false");CHKERRQ(ierr); 565 ierr = PetscViewerASCIIPrintf(viewer," Processors in row %d col partition %d \n",lu->nprow,lu->npcol);CHKERRQ(ierr); 566 ierr = PetscViewerASCIIPrintf(viewer," Row permutation %s \n",(options.RowPerm == NOROWPERM) ? "NATURAL": "LargeDiag");CHKERRQ(ierr); 567 if (options.ColPerm == NATURAL) { 568 colperm = "NATURAL"; 569 } else if (options.ColPerm == MMD_AT_PLUS_A) { 570 colperm = "MMD_AT_PLUS_A"; 571 } else if (options.ColPerm == MMD_ATA) { 572 colperm = "MMD_ATA"; 573 } else if (options.ColPerm == COLAMD) { 574 colperm = "COLAMD"; 575 } else { 576 SETERRQ(1,"Unknown column permutation"); 577 } 578 ierr = PetscViewerASCIIPrintf(viewer," Column permutation %s \n",colperm);CHKERRQ(ierr); 579 PetscFunctionReturn(0); 580 } 581 582 #undef __FUNCT__ 583 #define __FUNCT__ "MatView_SuperLU_DIST" 584 int MatView_SuperLU_DIST(Mat A,PetscViewer viewer) 585 { 586 int ierr; 587 PetscTruth isascii; 588 PetscViewerFormat format; 589 Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST*)(A->spptr); 590 591 PetscFunctionBegin; 592 ierr = (*lu->MatView)(A,viewer);CHKERRQ(ierr); 593 594 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 595 if (isascii) { 596 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 597 if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 598 ierr = MatFactorInfo_SuperLU_DIST(A,viewer);CHKERRQ(ierr); 599 } 600 } 601 PetscFunctionReturn(0); 602 } 603 604 605 EXTERN_C_BEGIN 606 #undef __FUNCT__ 607 #define __FUNCT__ "MatConvert_Base_SuperLU_DIST" 608 int MatConvert_Base_SuperLU_DIST(Mat A,MatType type,Mat *newmat) { 609 /* This routine is only called to convert to MATSUPERLU_DIST */ 610 /* from MATSEQAIJ if A has a single process communicator */ 611 /* or MATMPIAIJ otherwise, so we will ignore 'MatType type'. */ 612 int ierr; 613 MPI_Comm comm; 614 Mat B=*newmat; 615 Mat_SuperLU_DIST *lu; 616 617 PetscFunctionBegin; 618 if (B != A) { 619 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 620 } 621 622 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 623 ierr = PetscNew(Mat_SuperLU_DIST,&lu);CHKERRQ(ierr); 624 625 lu->MatDuplicate = A->ops->duplicate; 626 lu->MatView = A->ops->view; 627 lu->MatAssemblyEnd = A->ops->assemblyend; 628 lu->MatLUFactorSymbolic = A->ops->lufactorsymbolic; 629 lu->MatDestroy = A->ops->destroy; 630 lu->CleanUpSuperLU_Dist = PETSC_FALSE; 631 632 B->spptr = (void*)lu; 633 B->ops->duplicate = MatDuplicate_SuperLU_DIST; 634 B->ops->view = MatView_SuperLU_DIST; 635 B->ops->assemblyend = MatAssemblyEnd_SuperLU_DIST; 636 B->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST; 637 B->ops->destroy = MatDestroy_SuperLU_DIST; 638 ierr = MPI_Comm_size(comm,&(lu->size));CHKERRQ(ierr);CHKERRQ(ierr); 639 if ((lu->size) == 1) { 640 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqaij_superlu_dist_C", 641 "MatConvert_Base_SuperLU_DIST",MatConvert_Base_SuperLU_DIST);CHKERRQ(ierr); 642 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_superlu_dist_seqaij_C", 643 "MatConvert_SuperLU_DIST_Base",MatConvert_SuperLU_DIST_Base);CHKERRQ(ierr); 644 } else { 645 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_superlu_dist_C", 646 "MatConvert_Base_SuperLU_DIST",MatConvert_Base_SuperLU_DIST);CHKERRQ(ierr); 647 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_superlu_dist_mpiaij_C", 648 "MatConvert_SuperLU_DIST_Base",MatConvert_SuperLU_DIST_Base);CHKERRQ(ierr); 649 } 650 PetscLogInfo(0,"Using SuperLU_DIST for SeqAIJ LU factorization and solves."); 651 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSUPERLU_DIST);CHKERRQ(ierr); 652 *newmat = B; 653 PetscFunctionReturn(0); 654 } 655 EXTERN_C_END 656 657 #undef __FUNCT__ 658 #define __FUNCT__ "MatDuplicate_SuperLU_DIST" 659 int MatDuplicate_SuperLU_DIST(Mat A, MatDuplicateOption op, Mat *M) { 660 int ierr; 661 PetscFunctionBegin; 662 ierr = (*A->ops->duplicate)(A,op,M);CHKERRQ(ierr); 663 ierr = MatConvert_Base_SuperLU_DIST(*M,MATSUPERLU_DIST,M);CHKERRQ(ierr); 664 PetscFunctionReturn(0); 665 } 666 667 /*MC 668 MATSUPERLU_DIST - MATSUPERLU_DIST = "superlu_dist" - A matrix type providing direct solvers (LU) for parallel matrices 669 via the external package SuperLU_DIST. 670 671 If SuperLU_DIST is installed (see the manual for 672 instructions on how to declare the existence of external packages), 673 a matrix type can be constructed which invokes SuperLU_DIST solvers. 674 After calling MatCreate(...,A), simply call MatSetType(A,MATSUPERLU_DIST). 675 This matrix type is only supported for double precision real. 676 677 This matrix inherits from MATSEQAIJ when constructed with a single process communicator, 678 and from MATMPIAIJ otherwise. As a result, for single process communicators, 679 MatSeqAIJSetPreallocation is supported, and similarly MatMPISBAIJSetPreallocation is supported 680 for communicators controlling multiple processes. It is recommended that you call both of 681 the above preallocation routines for simplicity. One can also call MatConvert for an inplace 682 conversion to or from the MATSEQAIJ or MATMPIAIJ type (depending on the communicator size) 683 without data copy. 684 685 Options Database Keys: 686 + -mat_type superlu_dist - sets the matrix type to "superlu_dist" during a call to MatSetFromOptions() 687 . -mat_superlu_dist_r <n> - number of rows in processor partition 688 . -mat_superlu_dist_c <n> - number of columns in processor partition 689 . -mat_superlu_dist_matinput <0,1> - matrix input mode; 0=global, 1=distributed 690 . -mat_superlu_dist_equil - equilibrate the matrix 691 . -mat_superlu_dist_rowperm <LargeDiag,NATURAL> - row permutation 692 . -mat_superlu_dist_colperm <MMD_AT_PLUS_A,MMD_ATA,COLAMD,NATURAL> - column permutation 693 . -mat_superlu_dist_replacetinypivot - replace tiny pivots 694 . -mat_superlu_dist_iterrefine - use iterative refinement 695 - -mat_superlu_dist_statprint - print factorization information 696 697 Level: beginner 698 699 .seealso: PCLU 700 M*/ 701 702 EXTERN_C_BEGIN 703 #undef __FUNCT__ 704 #define __FUNCT__ "MatCreate_SuperLU_DIST" 705 int MatCreate_SuperLU_DIST(Mat A) { 706 int ierr,size; 707 708 PetscFunctionBegin; 709 /* Change type name before calling MatSetType to force proper construction of SeqAIJ or MPIAIJ */ 710 /* and SuperLU_DIST types */ 711 ierr = PetscObjectChangeTypeName((PetscObject)A,MATSUPERLU_DIST);CHKERRQ(ierr); 712 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 713 if (size == 1) { 714 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 715 } else { 716 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 717 } 718 ierr = MatConvert_Base_SuperLU_DIST(A,MATSUPERLU_DIST,&A);CHKERRQ(ierr); 719 PetscFunctionReturn(0); 720 } 721 EXTERN_C_END 722 723