1 /*$Id: superlu_DIST.c,v 1.10 2001/08/15 15:56:50 bsmith Exp $*/ 2 /* 3 Provides an interface to the SuperLU_DIST_2.0 sparse solver 4 */ 5 6 #include "src/mat/impls/aij/seq/aij.h" 7 #include "src/mat/impls/aij/mpi/mpiaij.h" 8 #if defined(PETSC_HAVE_STDLIB_H) /* This is to get arround weird problem with SuperLU on cray */ 9 #include "stdlib.h" 10 #endif 11 12 EXTERN_C_BEGIN 13 #if defined(PETSC_USE_COMPLEX) 14 #include "superlu_zdefs.h" 15 #else 16 #include "superlu_ddefs.h" 17 #endif 18 EXTERN_C_END 19 20 typedef enum { GLOBAL,DISTRIBUTED 21 } SuperLU_MatInputMode; 22 23 typedef struct { 24 int_t nprow,npcol,*row,*col; 25 gridinfo_t grid; 26 superlu_options_t options; 27 SuperMatrix A_sup; 28 ScalePermstruct_t ScalePermstruct; 29 LUstruct_t LUstruct; 30 int StatPrint; 31 int MatInputMode; 32 SOLVEstruct_t SOLVEstruct; 33 MatStructure flg; 34 MPI_Comm comm_superlu; 35 #if defined(PETSC_USE_COMPLEX) 36 doublecomplex *val; 37 #else 38 double *val; 39 #endif 40 41 int size; 42 /* A few function pointers for inheritance */ 43 int (*MatDuplicate)(Mat,MatDuplicateOption,Mat*); 44 int (*MatView)(Mat,PetscViewer); 45 int (*MatAssemblyEnd)(Mat,MatAssemblyType); 46 int (*MatLUFactorSymbolic)(Mat,IS,IS,MatFactorInfo*,Mat*); 47 int (*MatDestroy)(Mat); 48 49 /* Flag to clean up (non-global) SuperLU objects during Destroy */ 50 PetscTruth CleanUpSuperLU_Dist; 51 } Mat_SuperLU_DIST; 52 53 EXTERN int MatDuplicate_SuperLU_DIST(Mat,MatDuplicateOption,Mat*); 54 55 EXTERN_C_BEGIN 56 #undef __FUNCT__ 57 #define __FUNCT__ "MatConvert_SuperLU_DIST_Base" 58 int MatConvert_SuperLU_DIST_Base(Mat A,MatType type,Mat *newmat) { 59 int ierr; 60 Mat B=*newmat; 61 Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST *)A->spptr; 62 63 PetscFunctionBegin; 64 if (B != A) { 65 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 66 } 67 /* Reset the original function pointers */ 68 B->ops->duplicate = lu->MatDuplicate; 69 B->ops->view = lu->MatView; 70 B->ops->assemblyend = lu->MatAssemblyEnd; 71 B->ops->lufactorsymbolic = lu->MatLUFactorSymbolic; 72 B->ops->destroy = lu->MatDestroy; 73 74 ierr = PetscObjectChangeTypeName((PetscObject)B,type);CHKERRQ(ierr); 75 ierr = PetscFree(lu);CHKERRQ(ierr); 76 77 *newmat = B; 78 PetscFunctionReturn(0); 79 } 80 EXTERN_C_END 81 82 #undef __FUNCT__ 83 #define __FUNCT__ "MatDestroy_SuperLU_DIST" 84 int MatDestroy_SuperLU_DIST(Mat A) 85 { 86 int ierr; 87 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr; 88 89 PetscFunctionBegin; 90 if (lu->CleanUpSuperLU_Dist) { 91 /* Deallocate SuperLU_DIST storage */ 92 if (lu->MatInputMode == GLOBAL) { 93 Destroy_CompCol_Matrix_dist(&lu->A_sup); 94 } else { 95 Destroy_CompRowLoc_Matrix_dist(&lu->A_sup); 96 if ( lu->options.SolveInitialized ) { 97 #if defined(PETSC_USE_COMPLEX) 98 zSolveFinalize(&lu->options, &lu->SOLVEstruct); 99 #else 100 dSolveFinalize(&lu->options, &lu->SOLVEstruct); 101 #endif 102 } 103 } 104 Destroy_LU(A->N, &lu->grid, &lu->LUstruct); 105 ScalePermstructFree(&lu->ScalePermstruct); 106 LUstructFree(&lu->LUstruct); 107 108 /* Release the SuperLU_DIST process grid. */ 109 superlu_gridexit(&lu->grid); 110 111 ierr = MPI_Comm_free(&(lu->comm_superlu));CHKERRQ(ierr); 112 } 113 114 if (lu->size == 1) { 115 ierr = MatConvert_SuperLU_DIST_Base(A,MATSEQAIJ,&A);CHKERRQ(ierr); 116 } else { 117 ierr = MatConvert_SuperLU_DIST_Base(A,MATMPIAIJ,&A);CHKERRQ(ierr); 118 } 119 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 120 121 PetscFunctionReturn(0); 122 } 123 124 #undef __FUNCT__ 125 #define __FUNCT__ "MatSolve_SuperLU_DIST" 126 int MatSolve_SuperLU_DIST(Mat A,Vec b_mpi,Vec x) 127 { 128 Mat_MPIAIJ *aa = (Mat_MPIAIJ*)A->data; 129 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr; 130 int ierr, size=aa->size; 131 int m=A->M, N=A->N; 132 SuperLUStat_t stat; 133 double berr[1]; 134 PetscScalar *bptr; 135 int info, nrhs=1; 136 Vec x_seq; 137 IS iden; 138 VecScatter scat; 139 PetscLogDouble time0,time,time_min,time_max; 140 141 PetscFunctionBegin; 142 if (size > 1) { 143 if (lu->MatInputMode == GLOBAL) { /* global mat input, convert b to x_seq */ 144 ierr = VecCreateSeq(PETSC_COMM_SELF,N,&x_seq);CHKERRQ(ierr); 145 ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iden);CHKERRQ(ierr); 146 ierr = VecScatterCreate(b_mpi,iden,x_seq,iden,&scat);CHKERRQ(ierr); 147 ierr = ISDestroy(iden);CHKERRQ(ierr); 148 149 ierr = VecScatterBegin(b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat);CHKERRQ(ierr); 150 ierr = VecScatterEnd(b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat);CHKERRQ(ierr); 151 ierr = VecGetArray(x_seq,&bptr);CHKERRQ(ierr); 152 } else { /* distributed mat input */ 153 ierr = VecCopy(b_mpi,x);CHKERRQ(ierr); 154 ierr = VecGetArray(x,&bptr);CHKERRQ(ierr); 155 } 156 } else { /* size == 1 */ 157 ierr = VecCopy(b_mpi,x);CHKERRQ(ierr); 158 ierr = VecGetArray(x,&bptr);CHKERRQ(ierr); 159 } 160 161 lu->options.Fact = FACTORED; /* The factored form of A is supplied. Local option used by this func. only.*/ 162 163 PStatInit(&stat); /* Initialize the statistics variables. */ 164 if (lu->StatPrint) { 165 ierr = MPI_Barrier(A->comm);CHKERRQ(ierr); /* to be removed */ 166 ierr = PetscGetTime(&time0);CHKERRQ(ierr); /* to be removed */ 167 } 168 if (lu->MatInputMode == GLOBAL) { 169 #if defined(PETSC_USE_COMPLEX) 170 pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,(doublecomplex*)bptr, m, nrhs, 171 &lu->grid, &lu->LUstruct, berr, &stat, &info); 172 #else 173 pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,bptr, m, nrhs, 174 &lu->grid, &lu->LUstruct, berr, &stat, &info); 175 #endif 176 } else { /* distributed mat input */ 177 #if defined(PETSC_USE_COMPLEX) 178 pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, (doublecomplex*)bptr, A->M, nrhs, &lu->grid, 179 &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info); 180 if (info) SETERRQ1(1,"pzgssvx fails, info: %d\n",info); 181 #else 182 pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, bptr, A->M, nrhs, &lu->grid, 183 &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info); 184 if (info) SETERRQ1(1,"pdgssvx fails, info: %d\n",info); 185 #endif 186 } 187 if (lu->StatPrint) { 188 ierr = PetscGetTime(&time);CHKERRQ(ierr); /* to be removed */ 189 PStatPrint(&lu->options, &stat, &lu->grid); /* Print the statistics. */ 190 } 191 PStatFree(&stat); 192 193 if (size > 1) { 194 if (lu->MatInputMode == GLOBAL){ /* convert seq x to mpi x */ 195 ierr = VecRestoreArray(x_seq,&bptr);CHKERRQ(ierr); 196 ierr = VecScatterBegin(x_seq,x,INSERT_VALUES,SCATTER_REVERSE,scat);CHKERRQ(ierr); 197 ierr = VecScatterEnd(x_seq,x,INSERT_VALUES,SCATTER_REVERSE,scat);CHKERRQ(ierr); 198 ierr = VecScatterDestroy(scat);CHKERRQ(ierr); 199 ierr = VecDestroy(x_seq);CHKERRQ(ierr); 200 } else { 201 ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr); 202 } 203 } else { 204 ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr); 205 } 206 if (lu->StatPrint) { 207 time0 = time - time0; 208 ierr = MPI_Reduce(&time0,&time_max,1,MPI_DOUBLE,MPI_MAX,0,A->comm);CHKERRQ(ierr); 209 ierr = MPI_Reduce(&time0,&time_min,1,MPI_DOUBLE,MPI_MIN,0,A->comm);CHKERRQ(ierr); 210 ierr = MPI_Reduce(&time0,&time,1,MPI_DOUBLE,MPI_SUM,0,A->comm);CHKERRQ(ierr); 211 time = time/size; /* average time */ 212 ierr = PetscPrintf(A->comm, " Time for superlu_dist solve (max/min/avg): %g / %g / %g\n\n",time_max,time_min,time);CHKERRQ(ierr); 213 } 214 PetscFunctionReturn(0); 215 } 216 217 #undef __FUNCT__ 218 #define __FUNCT__ "MatLUFactorNumeric_SuperLU_DIST" 219 int MatLUFactorNumeric_SuperLU_DIST(Mat A,Mat *F) 220 { 221 Mat_MPIAIJ *fac = (Mat_MPIAIJ*)(*F)->data,*mat; 222 Mat *tseq,A_seq = PETSC_NULL; 223 Mat_SeqAIJ *aa,*bb; 224 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)(*F)->spptr; 225 int M=A->M,N=A->N,info,ierr,size=fac->size,i,*ai,*aj,*bi,*bj,nz,rstart,*garray, 226 m=A->m, irow,colA_start,j,jcol,jB,countA,countB,*bjj,*ajj; 227 SuperLUStat_t stat; 228 double *berr=0; 229 IS isrow; 230 PetscLogDouble time0[2],time[2],time_min[2],time_max[2]; 231 #if defined(PETSC_USE_COMPLEX) 232 doublecomplex *av, *bv; 233 #else 234 double *av, *bv; 235 #endif 236 237 PetscFunctionBegin; 238 if (lu->StatPrint) { 239 ierr = MPI_Barrier(A->comm);CHKERRQ(ierr); 240 ierr = PetscGetTime(&time0[0]);CHKERRQ(ierr); 241 } 242 243 if (lu->MatInputMode == GLOBAL) { /* global mat input */ 244 if (size > 1) { /* convert mpi A to seq mat A */ 245 ierr = ISCreateStride(PETSC_COMM_SELF,M,0,1,&isrow); CHKERRQ(ierr); 246 ierr = MatGetSubMatrices(A,1,&isrow,&isrow,MAT_INITIAL_MATRIX,&tseq); CHKERRQ(ierr); 247 ierr = ISDestroy(isrow);CHKERRQ(ierr); 248 249 A_seq = *tseq; 250 ierr = PetscFree(tseq);CHKERRQ(ierr); 251 aa = (Mat_SeqAIJ*)A_seq->data; 252 } else { 253 aa = (Mat_SeqAIJ*)A->data; 254 } 255 256 /* Allocate storage, then convert Petsc NR matrix to SuperLU_DIST NC */ 257 if (lu->flg == DIFFERENT_NONZERO_PATTERN) {/* first numeric factorization */ 258 #if defined(PETSC_USE_COMPLEX) 259 zallocateA_dist(N, aa->nz, &lu->val, &lu->col, &lu->row); 260 #else 261 dallocateA_dist(N, aa->nz, &lu->val, &lu->col, &lu->row); 262 #endif 263 } else { /* successive numeric factorization, sparsity pattern is reused. */ 264 Destroy_CompCol_Matrix_dist(&lu->A_sup); 265 Destroy_LU(N, &lu->grid, &lu->LUstruct); 266 lu->options.Fact = SamePattern; 267 } 268 #if defined(PETSC_USE_COMPLEX) 269 zCompRow_to_CompCol_dist(M,N,aa->nz,(doublecomplex*)aa->a,aa->j,aa->i,&lu->val,&lu->col, &lu->row); 270 #else 271 dCompRow_to_CompCol_dist(M,N,aa->nz,aa->a,aa->j,aa->i,&lu->val, &lu->col, &lu->row); 272 #endif 273 274 /* Create compressed column matrix A_sup. */ 275 #if defined(PETSC_USE_COMPLEX) 276 zCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_Z, SLU_GE); 277 #else 278 dCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_D, SLU_GE); 279 #endif 280 } else { /* distributed mat input */ 281 mat = (Mat_MPIAIJ*)A->data; 282 aa=(Mat_SeqAIJ*)(mat->A)->data; 283 bb=(Mat_SeqAIJ*)(mat->B)->data; 284 ai=aa->i; aj=aa->j; 285 bi=bb->i; bj=bb->j; 286 #if defined(PETSC_USE_COMPLEX) 287 av=(doublecomplex*)aa->a; 288 bv=(doublecomplex*)bb->a; 289 #else 290 av=aa->a; 291 bv=bb->a; 292 #endif 293 rstart = mat->rstart; 294 nz = aa->nz + bb->nz; 295 garray = mat->garray; 296 rstart = mat->rstart; 297 298 if (lu->flg == DIFFERENT_NONZERO_PATTERN) {/* first numeric factorization */ 299 #if defined(PETSC_USE_COMPLEX) 300 zallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row); 301 #else 302 dallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row); 303 #endif 304 } else { /* successive numeric factorization, sparsity pattern and perm_c are reused. */ 305 /* Destroy_CompRowLoc_Matrix_dist(&lu->A_sup); */ /* crash! */ 306 Destroy_LU(N, &lu->grid, &lu->LUstruct); 307 lu->options.Fact = SamePattern; 308 } 309 nz = 0; jB = 0; irow = mat->rstart; 310 for ( i=0; i<m; i++ ) { 311 lu->row[i] = nz; 312 countA = ai[i+1] - ai[i]; 313 countB = bi[i+1] - bi[i]; 314 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 315 bjj = bj + bi[i]; 316 317 /* B part, smaller col index */ 318 colA_start = mat->rstart + ajj[0]; /* the smallest global col index of A */ 319 for (j=0; j<countB; j++){ 320 jcol = garray[bjj[j]]; 321 if (jcol > colA_start) { 322 jB = j; 323 break; 324 } 325 lu->col[nz] = jcol; 326 lu->val[nz++] = *bv++; 327 if (j==countB-1) jB = countB; 328 } 329 330 /* A part */ 331 for (j=0; j<countA; j++){ 332 lu->col[nz] = mat->rstart + ajj[j]; 333 lu->val[nz++] = *av++; 334 } 335 336 /* B part, larger col index */ 337 for (j=jB; j<countB; j++){ 338 lu->col[nz] = garray[bjj[j]]; 339 lu->val[nz++] = *bv++; 340 } 341 } 342 lu->row[m] = nz; 343 #if defined(PETSC_USE_COMPLEX) 344 zCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart, 345 lu->val, lu->col, lu->row, SLU_NR_loc, SLU_Z, SLU_GE); 346 #else 347 dCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart, 348 lu->val, lu->col, lu->row, SLU_NR_loc, SLU_D, SLU_GE); 349 #endif 350 } 351 if (lu->StatPrint) { 352 ierr = PetscGetTime(&time[0]);CHKERRQ(ierr); 353 time0[0] = time[0] - time0[0]; 354 } 355 356 /* Factor the matrix. */ 357 PStatInit(&stat); /* Initialize the statistics variables. */ 358 359 if (lu->StatPrint) { 360 ierr = MPI_Barrier(A->comm);CHKERRQ(ierr); 361 ierr = PetscGetTime(&time0[1]);CHKERRQ(ierr); 362 } 363 364 if (lu->MatInputMode == GLOBAL) { /* global mat input */ 365 #if defined(PETSC_USE_COMPLEX) 366 pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, 367 &lu->grid, &lu->LUstruct, berr, &stat, &info); 368 #else 369 pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, 370 &lu->grid, &lu->LUstruct, berr, &stat, &info); 371 #endif 372 } else { /* distributed mat input */ 373 #if defined(PETSC_USE_COMPLEX) 374 pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid, 375 &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info); 376 if (info) SETERRQ1(1,"pzgssvx fails, info: %d\n",info); 377 #else 378 pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid, 379 &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info); 380 if (info) SETERRQ1(1,"pdgssvx fails, info: %d\n",info); 381 #endif 382 } 383 if (lu->StatPrint) { 384 ierr = PetscGetTime(&time[1]);CHKERRQ(ierr); /* to be removed */ 385 time0[1] = time[1] - time0[1]; 386 if (lu->StatPrint) PStatPrint(&lu->options, &stat, &lu->grid); /* Print the statistics. */ 387 } 388 PStatFree(&stat); 389 390 if (lu->MatInputMode == GLOBAL && size > 1){ 391 ierr = MatDestroy(A_seq);CHKERRQ(ierr); 392 } 393 394 if (lu->StatPrint) { 395 ierr = MPI_Reduce(time0,time_max,2,MPI_DOUBLE,MPI_MAX,0,A->comm); 396 ierr = MPI_Reduce(time0,time_min,2,MPI_DOUBLE,MPI_MIN,0,A->comm); 397 ierr = MPI_Reduce(time0,time,2,MPI_DOUBLE,MPI_SUM,0,A->comm); 398 for (i=0; i<2; i++) time[i] = time[i]/size; /* average time */ 399 ierr = PetscPrintf(A->comm, " Time for mat conversion (max/min/avg): %g / %g / %g\n",time_max[0],time_min[0],time[0]); 400 ierr = PetscPrintf(A->comm, " Time for superlu_dist fact (max/min/avg): %g / %g / %g\n\n",time_max[1],time_min[1],time[1]); 401 } 402 (*F)->assembled = PETSC_TRUE; 403 lu->flg = SAME_NONZERO_PATTERN; 404 PetscFunctionReturn(0); 405 } 406 407 /* Note the Petsc r and c permutations are ignored */ 408 #undef __FUNCT__ 409 #define __FUNCT__ "MatLUFactorSymbolic_SuperLU_DIST" 410 int MatLUFactorSymbolic_SuperLU_DIST(Mat A,IS r,IS c,MatFactorInfo *info,Mat *F) 411 { 412 Mat B; 413 Mat_SuperLU_DIST *lu; 414 int ierr,M=A->M,N=A->N,size; 415 superlu_options_t options; 416 char buff[32]; 417 PetscTruth flg; 418 char *ptype[] = {"MMD_AT_PLUS_A","NATURAL","MMD_ATA","COLAMD"}; 419 char *prtype[] = {"LargeDiag","NATURAL"}; 420 421 PetscFunctionBegin; 422 /* Create the factorization matrix */ 423 ierr = MatCreate(A->comm,A->m,A->n,M,N,&B);CHKERRQ(ierr); 424 ierr = MatSetType(B,MATSUPERLU_DIST);CHKERRQ(ierr); 425 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL); 426 ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 427 428 B->ops->lufactornumeric = MatLUFactorNumeric_SuperLU_DIST; 429 B->ops->solve = MatSolve_SuperLU_DIST; 430 B->factor = FACTOR_LU; 431 432 lu = (Mat_SuperLU_DIST*)(B->spptr); 433 434 /* Set the input options */ 435 set_default_options(&options); 436 lu->MatInputMode = GLOBAL; 437 ierr = MPI_Comm_dup(A->comm,&(lu->comm_superlu));CHKERRQ(ierr); 438 439 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 440 lu->nprow = size/2; /* Default process rows. */ 441 if (lu->nprow == 0) lu->nprow = 1; 442 lu->npcol = size/lu->nprow; /* Default process columns. */ 443 444 ierr = PetscOptionsBegin(A->comm,A->prefix,"SuperLU_Dist Options","Mat");CHKERRQ(ierr); 445 446 ierr = PetscOptionsInt("-mat_superlu_dist_r","Number rows in processor partition","None",lu->nprow,&lu->nprow,PETSC_NULL);CHKERRQ(ierr); 447 ierr = PetscOptionsInt("-mat_superlu_dist_c","Number columns in processor partition","None",lu->npcol,&lu->npcol,PETSC_NULL);CHKERRQ(ierr); 448 if (size != lu->nprow * lu->npcol) SETERRQ(1,"Number of processes should be equal to nprow*npcol"); 449 450 ierr = PetscOptionsInt("-mat_superlu_dist_matinput","Matrix input mode (0: GLOBAL; 1: DISTRIBUTED)","None",lu->MatInputMode,&lu->MatInputMode,PETSC_NULL);CHKERRQ(ierr); 451 if(lu->MatInputMode == DISTRIBUTED && size == 1) lu->MatInputMode = GLOBAL; 452 453 ierr = PetscOptionsLogical("-mat_superlu_dist_equil","Equilibrate matrix","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr); 454 if (!flg) { 455 options.Equil = NO; 456 } 457 458 ierr = PetscOptionsEList("-mat_superlu_dist_rowperm","Row permutation","None",prtype,2,prtype[0],buff,32,&flg);CHKERRQ(ierr); 459 while (flg) { 460 ierr = PetscStrcmp(buff,"LargeDiag",&flg);CHKERRQ(ierr); 461 if (flg) { 462 options.RowPerm = LargeDiag; 463 break; 464 } 465 ierr = PetscStrcmp(buff,"NATURAL",&flg);CHKERRQ(ierr); 466 if (flg) { 467 options.RowPerm = NOROWPERM; 468 break; 469 } 470 SETERRQ1(1,"Unknown row permutation %s",buff); 471 } 472 473 ierr = PetscOptionsEList("-mat_superlu_dist_colperm","Column permutation","None",ptype,4,ptype[0],buff,32,&flg);CHKERRQ(ierr); 474 while (flg) { 475 ierr = PetscStrcmp(buff,"MMD_AT_PLUS_A",&flg);CHKERRQ(ierr); 476 if (flg) { 477 options.ColPerm = MMD_AT_PLUS_A; 478 break; 479 } 480 ierr = PetscStrcmp(buff,"NATURAL",&flg);CHKERRQ(ierr); 481 if (flg) { 482 options.ColPerm = NATURAL; 483 break; 484 } 485 ierr = PetscStrcmp(buff,"MMD_ATA",&flg);CHKERRQ(ierr); 486 if (flg) { 487 options.ColPerm = MMD_ATA; 488 break; 489 } 490 ierr = PetscStrcmp(buff,"COLAMD",&flg);CHKERRQ(ierr); 491 if (flg) { 492 options.ColPerm = COLAMD; 493 break; 494 } 495 SETERRQ1(1,"Unknown column permutation %s",buff); 496 } 497 498 ierr = PetscOptionsLogical("-mat_superlu_dist_replacetinypivot","Replace tiny pivots","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr); 499 if (!flg) { 500 options.ReplaceTinyPivot = NO; 501 } 502 503 options.IterRefine = NOREFINE; 504 ierr = PetscOptionsLogical("-mat_superlu_dist_iterrefine","Use iterative refinement","None",PETSC_FALSE,&flg,0);CHKERRQ(ierr); 505 if (flg) { 506 options.IterRefine = DOUBLE; 507 } 508 509 if (PetscLogPrintInfo) { 510 lu->StatPrint = (int)PETSC_TRUE; 511 } else { 512 lu->StatPrint = (int)PETSC_FALSE; 513 } 514 ierr = PetscOptionsLogical("-mat_superlu_dist_statprint","Print factorization information","None", 515 (PetscTruth)lu->StatPrint,(PetscTruth*)&lu->StatPrint,0);CHKERRQ(ierr); 516 PetscOptionsEnd(); 517 518 /* Initialize the SuperLU process grid. */ 519 superlu_gridinit(lu->comm_superlu, lu->nprow, lu->npcol, &lu->grid); 520 521 /* Initialize ScalePermstruct and LUstruct. */ 522 ScalePermstructInit(M, N, &lu->ScalePermstruct); 523 LUstructInit(M, N, &lu->LUstruct); 524 525 lu->options = options; 526 lu->flg = DIFFERENT_NONZERO_PATTERN; 527 lu->CleanUpSuperLU_Dist = PETSC_TRUE; 528 *F = B; 529 PetscFunctionReturn(0); 530 } 531 532 #undef __FUNCT__ 533 #define __FUNCT__ "MatAssemblyEnd_SuperLU_DIST" 534 int MatAssemblyEnd_SuperLU_DIST(Mat A,MatAssemblyType mode) { 535 int ierr; 536 Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST*)(A->spptr); 537 538 PetscFunctionBegin; 539 ierr = (*lu->MatAssemblyEnd)(A,mode);CHKERRQ(ierr); 540 lu->MatLUFactorSymbolic = A->ops->lufactorsymbolic; 541 A->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST; 542 PetscFunctionReturn(0); 543 } 544 545 #undef __FUNCT__ 546 #define __FUNCT__ "MatFactorInfo_SuperLU_DIST" 547 int MatFactorInfo_SuperLU_DIST(Mat A,PetscViewer viewer) 548 { 549 Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST*)A->spptr; 550 superlu_options_t options; 551 int ierr; 552 char *colperm; 553 554 PetscFunctionBegin; 555 /* check if matrix is superlu_dist type */ 556 if (A->ops->solve != MatSolve_SuperLU_DIST) PetscFunctionReturn(0); 557 558 options = lu->options; 559 ierr = PetscViewerASCIIPrintf(viewer,"SuperLU_DIST run parameters:\n");CHKERRQ(ierr); 560 ierr = PetscViewerASCIIPrintf(viewer," Equilibrate matrix %s \n",(options.Equil != NO) ? "true": "false");CHKERRQ(ierr); 561 ierr = PetscViewerASCIIPrintf(viewer," Replace tiny pivots %s \n",(options.ReplaceTinyPivot != NO) ? "true": "false");CHKERRQ(ierr); 562 ierr = PetscViewerASCIIPrintf(viewer," Use iterative refinement %s \n",(options.IterRefine == DOUBLE) ? "true": "false");CHKERRQ(ierr); 563 ierr = PetscViewerASCIIPrintf(viewer," Processors in row %d col partition %d \n",lu->nprow,lu->npcol);CHKERRQ(ierr); 564 ierr = PetscViewerASCIIPrintf(viewer," Row permutation %s \n",(options.RowPerm == NOROWPERM) ? "NATURAL": "LargeDiag");CHKERRQ(ierr); 565 if (options.ColPerm == NATURAL) { 566 colperm = "NATURAL"; 567 } else if (options.ColPerm == MMD_AT_PLUS_A) { 568 colperm = "MMD_AT_PLUS_A"; 569 } else if (options.ColPerm == MMD_ATA) { 570 colperm = "MMD_ATA"; 571 } else if (options.ColPerm == COLAMD) { 572 colperm = "COLAMD"; 573 } else { 574 SETERRQ(1,"Unknown column permutation"); 575 } 576 ierr = PetscViewerASCIIPrintf(viewer," Column permutation %s \n",colperm);CHKERRQ(ierr); 577 PetscFunctionReturn(0); 578 } 579 580 #undef __FUNCT__ 581 #define __FUNCT__ "MatView_SuperLU_DIST" 582 int MatView_SuperLU_DIST(Mat A,PetscViewer viewer) 583 { 584 int ierr; 585 PetscTruth isascii; 586 PetscViewerFormat format; 587 Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST*)(A->spptr); 588 589 PetscFunctionBegin; 590 ierr = (*lu->MatView)(A,viewer);CHKERRQ(ierr); 591 592 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 593 if (isascii) { 594 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 595 if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 596 ierr = MatFactorInfo_SuperLU_DIST(A,viewer);CHKERRQ(ierr); 597 } 598 } 599 PetscFunctionReturn(0); 600 } 601 602 603 EXTERN_C_BEGIN 604 #undef __FUNCT__ 605 #define __FUNCT__ "MatConvert_Base_SuperLU_DIST" 606 int MatConvert_Base_SuperLU_DIST(Mat A,MatType type,Mat *newmat) { 607 /* This routine is only called to convert to MATSUPERLU_DIST */ 608 /* from MATSEQAIJ if A has a single process communicator */ 609 /* or MATMPIAIJ otherwise, so we will ignore 'MatType type'. */ 610 int ierr; 611 MPI_Comm comm; 612 Mat B=*newmat; 613 Mat_SuperLU_DIST *lu; 614 615 PetscFunctionBegin; 616 if (B != A) { 617 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 618 } 619 620 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 621 ierr = PetscNew(Mat_SuperLU_DIST,&lu);CHKERRQ(ierr); 622 623 lu->MatDuplicate = A->ops->duplicate; 624 lu->MatView = A->ops->view; 625 lu->MatAssemblyEnd = A->ops->assemblyend; 626 lu->MatLUFactorSymbolic = A->ops->lufactorsymbolic; 627 lu->MatDestroy = A->ops->destroy; 628 lu->CleanUpSuperLU_Dist = PETSC_FALSE; 629 630 B->spptr = (void*)lu; 631 B->ops->duplicate = MatDuplicate_SuperLU_DIST; 632 B->ops->view = MatView_SuperLU_DIST; 633 B->ops->assemblyend = MatAssemblyEnd_SuperLU_DIST; 634 B->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST; 635 B->ops->destroy = MatDestroy_SuperLU_DIST; 636 ierr = MPI_Comm_size(comm,&(lu->size));CHKERRQ(ierr);CHKERRQ(ierr); 637 if ((lu->size) == 1) { 638 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqaij_superlu_dist_C", 639 "MatConvert_Base_SuperLU_DIST",MatConvert_Base_SuperLU_DIST);CHKERRQ(ierr); 640 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_superlu_dist_seqaij_C", 641 "MatConvert_SuperLU_DIST_Base",MatConvert_SuperLU_DIST_Base);CHKERRQ(ierr); 642 } else { 643 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_superlu_dist_C", 644 "MatConvert_Base_SuperLU_DIST",MatConvert_Base_SuperLU_DIST);CHKERRQ(ierr); 645 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_superlu_dist_mpiaij_C", 646 "MatConvert_SuperLU_DIST_Base",MatConvert_SuperLU_DIST_Base);CHKERRQ(ierr); 647 } 648 PetscLogInfo(0,"Using SuperLU_DIST for SeqAIJ LU factorization and solves."); 649 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSUPERLU_DIST);CHKERRQ(ierr); 650 *newmat = B; 651 PetscFunctionReturn(0); 652 } 653 EXTERN_C_END 654 655 #undef __FUNCT__ 656 #define __FUNCT__ "MatDuplicate_SuperLU_DIST" 657 int MatDuplicate_SuperLU_DIST(Mat A, MatDuplicateOption op, Mat *M) { 658 int ierr; 659 PetscFunctionBegin; 660 ierr = (*A->ops->duplicate)(A,op,M);CHKERRQ(ierr); 661 ierr = MatConvert_Base_SuperLU_DIST(*M,MATSUPERLU_DIST,M);CHKERRQ(ierr); 662 PetscFunctionReturn(0); 663 } 664 665 /*MC 666 MATSUPERLU_DIST = "superlu_dist" - A matrix type providing direct solvers (LU) for parallel matrices 667 via the external package SuperLU_DIST. 668 669 If SuperLU_DIST is installed (see the manual for 670 instructions on how to declare the existence of external packages), 671 a matrix type can be constructed which invokes SuperLU_DIST solvers. 672 After calling MatCreate(...,A), simply call MatSetType(A,MATSUPERLU_DIST). 673 This matrix type is only supported for double precision real. 674 675 This matrix inherits from MATSEQAIJ when constructed with a single process communicator, 676 and from MATMPIAIJ otherwise. As a result, for single process communicators, 677 MatSeqAIJSetPreallocation is supported, and similarly MatMPISBAIJSetPreallocation is supported 678 for communicators controlling multiple processes. It is recommended that you call both of 679 the above preallocation routines for simplicity. One can also call MatConvert for an inplace 680 conversion to or from the MATSEQAIJ or MATMPIAIJ type (depending on the communicator size) 681 without data copy. 682 683 Options Database Keys: 684 + -mat_type superlu_dist - sets the matrix type to "superlu_dist" during a call to MatSetFromOptions() 685 . -mat_superlu_dist_r <n> - number of rows in processor partition 686 . -mat_superlu_dist_c <n> - number of columns in processor partition 687 . -mat_superlu_dist_matinput <0,1> - matrix input mode; 0=global, 1=distributed 688 . -mat_superlu_dist_equil - equilibrate the matrix 689 . -mat_superlu_dist_rowperm <LargeDiag,NATURAL> - row permutation 690 . -mat_superlu_dist_colperm <MMD_AT_PLUS_A,MMD_ATA,COLAMD,NATURAL> - column permutation 691 . -mat_superlu_dist_replacetinypivot - replace tiny pivots 692 . -mat_superlu_dist_iterrefine - use iterative refinement 693 - -mat_superlu_dist_statprint - print factorization information 694 695 Level: beginner 696 697 .seealso: PCLU 698 M*/ 699 700 EXTERN_C_BEGIN 701 #undef __FUNCT__ 702 #define __FUNCT__ "MatCreate_SuperLU_DIST" 703 int MatCreate_SuperLU_DIST(Mat A) { 704 int ierr,size; 705 706 PetscFunctionBegin; 707 /* Change type name before calling MatSetType to force proper construction of SeqAIJ or MPIAIJ */ 708 /* and SuperLU_DIST types */ 709 ierr = PetscObjectChangeTypeName((PetscObject)A,MATSUPERLU_DIST);CHKERRQ(ierr); 710 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);CHKERRQ(ierr); 711 if (size == 1) { 712 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 713 } else { 714 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 715 } 716 ierr = MatConvert_Base_SuperLU_DIST(A,MATSUPERLU_DIST,&A);CHKERRQ(ierr); 717 PetscFunctionReturn(0); 718 } 719 EXTERN_C_END 720 721