1 /*$Id: superlu_DIST.c,v 1.10 2001/08/15 15:56:50 bsmith Exp $*/ 2 /* 3 Provides an interface to the SuperLU_DIST_2.0 sparse solver 4 */ 5 6 #include "src/mat/impls/aij/seq/aij.h" 7 #include "src/mat/impls/aij/mpi/mpiaij.h" 8 #if defined(PETSC_HAVE_STDLIB_H) /* This is to get arround weird problem with SuperLU on cray */ 9 #include "stdlib.h" 10 #endif 11 12 EXTERN_C_BEGIN 13 #if defined(PETSC_USE_COMPLEX) 14 #include "superlu_zdefs.h" 15 #else 16 #include "superlu_ddefs.h" 17 #endif 18 EXTERN_C_END 19 20 typedef enum { GLOBAL,DISTRIBUTED 21 } SuperLU_MatInputMode; 22 23 typedef struct { 24 int_t nprow,npcol,*row,*col; 25 gridinfo_t grid; 26 superlu_options_t options; 27 SuperMatrix A_sup; 28 ScalePermstruct_t ScalePermstruct; 29 LUstruct_t LUstruct; 30 int StatPrint; 31 int MatInputMode; 32 SOLVEstruct_t SOLVEstruct; 33 MatStructure flg; 34 MPI_Comm comm_superlu; 35 #if defined(PETSC_USE_COMPLEX) 36 doublecomplex *val; 37 #else 38 double *val; 39 #endif 40 41 int size; 42 /* A few function pointers for inheritance */ 43 int (*MatDuplicate)(Mat,MatDuplicateOption,Mat*); 44 int (*MatView)(Mat,PetscViewer); 45 int (*MatAssemblyEnd)(Mat,MatAssemblyType); 46 int (*MatLUFactorSymbolic)(Mat,IS,IS,MatFactorInfo*,Mat*); 47 int (*MatDestroy)(Mat); 48 49 /* Flag to clean up (non-global) SuperLU objects during Destroy */ 50 PetscTruth CleanUpSuperLU_Dist; 51 } Mat_SuperLU_DIST; 52 53 EXTERN int MatDuplicate_SuperLU_DIST(Mat,MatDuplicateOption,Mat*); 54 55 EXTERN_C_BEGIN 56 #undef __FUNCT__ 57 #define __FUNCT__ "MatConvert_SuperLU_DIST_Base" 58 int MatConvert_SuperLU_DIST_Base(Mat A,MatType type,Mat *newmat) { 59 int ierr; 60 Mat B=*newmat; 61 Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST *)A->spptr; 62 63 PetscFunctionBegin; 64 if (B != A) { 65 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 66 } 67 /* Reset the original function pointers */ 68 B->ops->duplicate = lu->MatDuplicate; 69 B->ops->view = lu->MatView; 70 B->ops->assemblyend = lu->MatAssemblyEnd; 71 B->ops->lufactorsymbolic = lu->MatLUFactorSymbolic; 72 B->ops->destroy = lu->MatDestroy; 73 74 ierr = PetscObjectChangeTypeName((PetscObject)B,type);CHKERRQ(ierr); 75 ierr = PetscFree(lu);CHKERRQ(ierr); 76 77 *newmat = B; 78 PetscFunctionReturn(0); 79 } 80 EXTERN_C_END 81 82 #undef __FUNCT__ 83 #define __FUNCT__ "MatDestroy_SuperLU_DIST" 84 int MatDestroy_SuperLU_DIST(Mat A) 85 { 86 int ierr; 87 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr; 88 89 PetscFunctionBegin; 90 if (lu->CleanUpSuperLU_Dist) { 91 /* Deallocate SuperLU_DIST storage */ 92 if (lu->MatInputMode == GLOBAL) { 93 Destroy_CompCol_Matrix_dist(&lu->A_sup); 94 } else { 95 Destroy_CompRowLoc_Matrix_dist(&lu->A_sup); 96 if ( lu->options.SolveInitialized ) { 97 #if defined(PETSC_USE_COMPLEX) 98 zSolveFinalize(&lu->options, &lu->SOLVEstruct); 99 #else 100 dSolveFinalize(&lu->options, &lu->SOLVEstruct); 101 #endif 102 } 103 } 104 Destroy_LU(A->N, &lu->grid, &lu->LUstruct); 105 ScalePermstructFree(&lu->ScalePermstruct); 106 LUstructFree(&lu->LUstruct); 107 108 /* Release the SuperLU_DIST process grid. */ 109 superlu_gridexit(&lu->grid); 110 111 ierr = MPI_Comm_free(&(lu->comm_superlu));CHKERRQ(ierr); 112 } 113 114 if (lu->size == 1) { 115 ierr = MatConvert_SuperLU_DIST_Base(A,MATSEQAIJ,&A);CHKERRQ(ierr); 116 } else { 117 ierr = MatConvert_SuperLU_DIST_Base(A,MATMPIAIJ,&A);CHKERRQ(ierr); 118 } 119 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 120 121 PetscFunctionReturn(0); 122 } 123 124 #undef __FUNCT__ 125 #define __FUNCT__ "MatSolve_SuperLU_DIST" 126 int MatSolve_SuperLU_DIST(Mat A,Vec b_mpi,Vec x) 127 { 128 Mat_MPIAIJ *aa = (Mat_MPIAIJ*)A->data; 129 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr; 130 int ierr, size=aa->size; 131 int m=A->M, N=A->N; 132 SuperLUStat_t stat; 133 double berr[1]; 134 PetscScalar *bptr; 135 int info, nrhs=1; 136 Vec x_seq; 137 IS iden; 138 VecScatter scat; 139 PetscLogDouble time0,time,time_min,time_max; 140 141 PetscFunctionBegin; 142 if (size > 1) { 143 if (lu->MatInputMode == GLOBAL) { /* global mat input, convert b to x_seq */ 144 ierr = VecCreateSeq(PETSC_COMM_SELF,N,&x_seq);CHKERRQ(ierr); 145 ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iden);CHKERRQ(ierr); 146 ierr = VecScatterCreate(b_mpi,iden,x_seq,iden,&scat);CHKERRQ(ierr); 147 ierr = ISDestroy(iden);CHKERRQ(ierr); 148 149 ierr = VecScatterBegin(b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat);CHKERRQ(ierr); 150 ierr = VecScatterEnd(b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat);CHKERRQ(ierr); 151 ierr = VecGetArray(x_seq,&bptr);CHKERRQ(ierr); 152 } else { /* distributed mat input */ 153 ierr = VecCopy(b_mpi,x);CHKERRQ(ierr); 154 ierr = VecGetArray(x,&bptr);CHKERRQ(ierr); 155 } 156 } else { /* size == 1 */ 157 ierr = VecCopy(b_mpi,x);CHKERRQ(ierr); 158 ierr = VecGetArray(x,&bptr);CHKERRQ(ierr); 159 } 160 161 lu->options.Fact = FACTORED; /* The factored form of A is supplied. Local option used by this func. only.*/ 162 163 PStatInit(&stat); /* Initialize the statistics variables. */ 164 if (lu->StatPrint) { 165 ierr = MPI_Barrier(A->comm);CHKERRQ(ierr); /* to be removed */ 166 ierr = PetscGetTime(&time0);CHKERRQ(ierr); /* to be removed */ 167 } 168 if (lu->MatInputMode == GLOBAL) { 169 #if defined(PETSC_USE_COMPLEX) 170 pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,(doublecomplex*)bptr, m, nrhs, 171 &lu->grid, &lu->LUstruct, berr, &stat, &info); 172 #else 173 pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,bptr, m, nrhs, 174 &lu->grid, &lu->LUstruct, berr, &stat, &info); 175 #endif 176 } else { /* distributed mat input */ 177 #if defined(PETSC_USE_COMPLEX) 178 pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, (doublecomplex*)bptr, A->M, nrhs, &lu->grid, 179 &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info); 180 if (info) SETERRQ1(1,"pzgssvx fails, info: %d\n",info); 181 #else 182 pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, bptr, A->M, nrhs, &lu->grid, 183 &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info); 184 if (info) SETERRQ1(1,"pdgssvx fails, info: %d\n",info); 185 #endif 186 } 187 if (lu->StatPrint) { 188 ierr = PetscGetTime(&time);CHKERRQ(ierr); /* to be removed */ 189 PStatPrint(&lu->options, &stat, &lu->grid); /* Print the statistics. */ 190 } 191 PStatFree(&stat); 192 193 if (size > 1) { 194 if (lu->MatInputMode == GLOBAL){ /* convert seq x to mpi x */ 195 ierr = VecRestoreArray(x_seq,&bptr);CHKERRQ(ierr); 196 ierr = VecScatterBegin(x_seq,x,INSERT_VALUES,SCATTER_REVERSE,scat);CHKERRQ(ierr); 197 ierr = VecScatterEnd(x_seq,x,INSERT_VALUES,SCATTER_REVERSE,scat);CHKERRQ(ierr); 198 ierr = VecScatterDestroy(scat);CHKERRQ(ierr); 199 ierr = VecDestroy(x_seq);CHKERRQ(ierr); 200 } else { 201 ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr); 202 } 203 } else { 204 ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr); 205 } 206 if (lu->StatPrint) { 207 time0 = time - time0; 208 ierr = MPI_Reduce(&time0,&time_max,1,MPI_DOUBLE,MPI_MAX,0,A->comm);CHKERRQ(ierr); 209 ierr = MPI_Reduce(&time0,&time_min,1,MPI_DOUBLE,MPI_MIN,0,A->comm);CHKERRQ(ierr); 210 ierr = MPI_Reduce(&time0,&time,1,MPI_DOUBLE,MPI_SUM,0,A->comm);CHKERRQ(ierr); 211 time = time/size; /* average time */ 212 ierr = PetscPrintf(A->comm, " Time for superlu_dist solve (max/min/avg): %g / %g / %g\n\n",time_max,time_min,time);CHKERRQ(ierr); 213 } 214 PetscFunctionReturn(0); 215 } 216 217 #undef __FUNCT__ 218 #define __FUNCT__ "MatLUFactorNumeric_SuperLU_DIST" 219 int MatLUFactorNumeric_SuperLU_DIST(Mat A,Mat *F) 220 { 221 Mat_MPIAIJ *fac = (Mat_MPIAIJ*)(*F)->data,*mat; 222 Mat *tseq,A_seq = PETSC_NULL; 223 Mat_SeqAIJ *aa,*bb; 224 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)(*F)->spptr; 225 int M=A->M,N=A->N,info,ierr,size=fac->size,i,*ai,*aj,*bi,*bj,nz,rstart,*garray, 226 m=A->m, irow,colA_start,j,jcol,jB,countA,countB,*bjj,*ajj; 227 SuperLUStat_t stat; 228 double *berr=0; 229 IS isrow; 230 PetscLogDouble time0[2],time[2],time_min[2],time_max[2]; 231 #if defined(PETSC_USE_COMPLEX) 232 doublecomplex *av, *bv; 233 #else 234 double *av, *bv; 235 #endif 236 237 PetscFunctionBegin; 238 if (lu->StatPrint) { 239 ierr = MPI_Barrier(A->comm);CHKERRQ(ierr); 240 ierr = PetscGetTime(&time0[0]);CHKERRQ(ierr); 241 } 242 243 if (lu->MatInputMode == GLOBAL) { /* global mat input */ 244 if (size > 1) { /* convert mpi A to seq mat A */ 245 ierr = ISCreateStride(PETSC_COMM_SELF,M,0,1,&isrow); CHKERRQ(ierr); 246 ierr = MatGetSubMatrices(A,1,&isrow,&isrow,MAT_INITIAL_MATRIX,&tseq); CHKERRQ(ierr); 247 ierr = ISDestroy(isrow);CHKERRQ(ierr); 248 249 A_seq = *tseq; 250 ierr = PetscFree(tseq);CHKERRQ(ierr); 251 aa = (Mat_SeqAIJ*)A_seq->data; 252 } else { 253 aa = (Mat_SeqAIJ*)A->data; 254 } 255 256 /* Allocate storage, then convert Petsc NR matrix to SuperLU_DIST NC */ 257 if (lu->flg == DIFFERENT_NONZERO_PATTERN) {/* first numeric factorization */ 258 #if defined(PETSC_USE_COMPLEX) 259 zallocateA_dist(N, aa->nz, &lu->val, &lu->col, &lu->row); 260 #else 261 dallocateA_dist(N, aa->nz, &lu->val, &lu->col, &lu->row); 262 #endif 263 } else { /* successive numeric factorization, sparsity pattern is reused. */ 264 Destroy_CompCol_Matrix_dist(&lu->A_sup); 265 Destroy_LU(N, &lu->grid, &lu->LUstruct); 266 lu->options.Fact = SamePattern; 267 } 268 #if defined(PETSC_USE_COMPLEX) 269 zCompRow_to_CompCol_dist(M,N,aa->nz,(doublecomplex*)aa->a,aa->j,aa->i,&lu->val,&lu->col, &lu->row); 270 #else 271 dCompRow_to_CompCol_dist(M,N,aa->nz,aa->a,aa->j,aa->i,&lu->val, &lu->col, &lu->row); 272 #endif 273 274 /* Create compressed column matrix A_sup. */ 275 #if defined(PETSC_USE_COMPLEX) 276 zCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_Z, SLU_GE); 277 #else 278 dCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_D, SLU_GE); 279 #endif 280 } else { /* distributed mat input */ 281 mat = (Mat_MPIAIJ*)A->data; 282 aa=(Mat_SeqAIJ*)(mat->A)->data; 283 bb=(Mat_SeqAIJ*)(mat->B)->data; 284 ai=aa->i; aj=aa->j; 285 bi=bb->i; bj=bb->j; 286 #if defined(PETSC_USE_COMPLEX) 287 av=(doublecomplex*)aa->a; 288 bv=(doublecomplex*)bb->a; 289 #else 290 av=aa->a; 291 bv=bb->a; 292 #endif 293 rstart = mat->rstart; 294 nz = aa->nz + bb->nz; 295 garray = mat->garray; 296 rstart = mat->rstart; 297 298 if (lu->flg == DIFFERENT_NONZERO_PATTERN) {/* first numeric factorization */ 299 #if defined(PETSC_USE_COMPLEX) 300 zallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row); 301 #else 302 dallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row); 303 #endif 304 } else { /* successive numeric factorization, sparsity pattern and perm_c are reused. */ 305 /* Destroy_CompRowLoc_Matrix_dist(&lu->A_sup); */ /* crash! */ 306 Destroy_LU(N, &lu->grid, &lu->LUstruct); 307 lu->options.Fact = SamePattern; 308 } 309 nz = 0; jB = 0; irow = mat->rstart; 310 for ( i=0; i<m; i++ ) { 311 lu->row[i] = nz; 312 countA = ai[i+1] - ai[i]; 313 countB = bi[i+1] - bi[i]; 314 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 315 bjj = bj + bi[i]; 316 317 /* B part, smaller col index */ 318 colA_start = mat->rstart + ajj[0]; /* the smallest global col index of A */ 319 for (j=0; j<countB; j++){ 320 jcol = garray[bjj[j]]; 321 if (jcol > colA_start) { 322 jB = j; 323 break; 324 } 325 lu->col[nz] = jcol; 326 lu->val[nz++] = *bv++; 327 if (j==countB-1) jB = countB; 328 } 329 330 /* A part */ 331 for (j=0; j<countA; j++){ 332 lu->col[nz] = mat->rstart + ajj[j]; 333 lu->val[nz++] = *av++; 334 } 335 336 /* B part, larger col index */ 337 for (j=jB; j<countB; j++){ 338 lu->col[nz] = garray[bjj[j]]; 339 lu->val[nz++] = *bv++; 340 } 341 } 342 lu->row[m] = nz; 343 #if defined(PETSC_USE_COMPLEX) 344 zCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart, 345 lu->val, lu->col, lu->row, SLU_NR_loc, SLU_Z, SLU_GE); 346 #else 347 dCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart, 348 lu->val, lu->col, lu->row, SLU_NR_loc, SLU_D, SLU_GE); 349 #endif 350 } 351 if (lu->StatPrint) { 352 ierr = PetscGetTime(&time[0]);CHKERRQ(ierr); 353 time0[0] = time[0] - time0[0]; 354 } 355 356 /* Factor the matrix. */ 357 PStatInit(&stat); /* Initialize the statistics variables. */ 358 359 if (lu->StatPrint) { 360 ierr = MPI_Barrier(A->comm);CHKERRQ(ierr); 361 ierr = PetscGetTime(&time0[1]);CHKERRQ(ierr); 362 } 363 364 if (lu->MatInputMode == GLOBAL) { /* global mat input */ 365 #if defined(PETSC_USE_COMPLEX) 366 pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, 367 &lu->grid, &lu->LUstruct, berr, &stat, &info); 368 #else 369 pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, 370 &lu->grid, &lu->LUstruct, berr, &stat, &info); 371 #endif 372 } else { /* distributed mat input */ 373 #if defined(PETSC_USE_COMPLEX) 374 pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid, 375 &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info); 376 if (info) SETERRQ1(1,"pzgssvx fails, info: %d\n",info); 377 #else 378 pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid, 379 &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info); 380 if (info) SETERRQ1(1,"pdgssvx fails, info: %d\n",info); 381 #endif 382 } 383 if (lu->StatPrint) { 384 ierr = PetscGetTime(&time[1]);CHKERRQ(ierr); /* to be removed */ 385 time0[1] = time[1] - time0[1]; 386 if (lu->StatPrint) PStatPrint(&lu->options, &stat, &lu->grid); /* Print the statistics. */ 387 } 388 PStatFree(&stat); 389 390 if (lu->MatInputMode == GLOBAL && size > 1){ 391 ierr = MatDestroy(A_seq);CHKERRQ(ierr); 392 } 393 394 if (lu->StatPrint) { 395 ierr = MPI_Reduce(time0,time_max,2,MPI_DOUBLE,MPI_MAX,0,A->comm); 396 ierr = MPI_Reduce(time0,time_min,2,MPI_DOUBLE,MPI_MIN,0,A->comm); 397 ierr = MPI_Reduce(time0,time,2,MPI_DOUBLE,MPI_SUM,0,A->comm); 398 for (i=0; i<2; i++) time[i] = time[i]/size; /* average time */ 399 ierr = PetscPrintf(A->comm, " Time for mat conversion (max/min/avg): %g / %g / %g\n",time_max[0],time_min[0],time[0]); 400 ierr = PetscPrintf(A->comm, " Time for superlu_dist fact (max/min/avg): %g / %g / %g\n\n",time_max[1],time_min[1],time[1]); 401 } 402 (*F)->assembled = PETSC_TRUE; 403 lu->flg = SAME_NONZERO_PATTERN; 404 PetscFunctionReturn(0); 405 } 406 407 /* Note the Petsc r and c permutations are ignored */ 408 #undef __FUNCT__ 409 #define __FUNCT__ "MatLUFactorSymbolic_SuperLU_DIST" 410 int MatLUFactorSymbolic_SuperLU_DIST(Mat A,IS r,IS c,MatFactorInfo *info,Mat *F) 411 { 412 Mat B; 413 Mat_SuperLU_DIST *lu; 414 int ierr,M=A->M,N=A->N,size; 415 superlu_options_t options; 416 char buff[32]; 417 PetscTruth flg; 418 char *ptype[] = {"MMD_AT_PLUS_A","NATURAL","MMD_ATA","COLAMD"}; 419 char *prtype[] = {"LargeDiag","NATURAL"}; 420 421 PetscFunctionBegin; 422 /* Create the factorization matrix */ 423 ierr = MatCreate(A->comm,A->m,A->n,M,N,&B);CHKERRQ(ierr); 424 ierr = MatSetType(B,MATSUPERLU_DIST);CHKERRQ(ierr); 425 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL); 426 ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 427 428 B->ops->lufactornumeric = MatLUFactorNumeric_SuperLU_DIST; 429 B->ops->solve = MatSolve_SuperLU_DIST; 430 B->factor = FACTOR_LU; 431 432 lu = (Mat_SuperLU_DIST*)(B->spptr); 433 434 /* Set the input options */ 435 set_default_options(&options); 436 lu->MatInputMode = GLOBAL; 437 ierr = MPI_Comm_dup(A->comm,&(lu->comm_superlu));CHKERRQ(ierr); 438 439 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 440 lu->nprow = size/2; /* Default process rows. */ 441 if (lu->nprow == 0) lu->nprow = 1; 442 lu->npcol = size/lu->nprow; /* Default process columns. */ 443 444 ierr = PetscOptionsBegin(A->comm,A->prefix,"SuperLU_Dist Options","Mat");CHKERRQ(ierr); 445 446 ierr = PetscOptionsInt("-mat_superlu_dist_r","Number rows in processor partition","None",lu->nprow,&lu->nprow,PETSC_NULL);CHKERRQ(ierr); 447 ierr = PetscOptionsInt("-mat_superlu_dist_c","Number columns in processor partition","None",lu->npcol,&lu->npcol,PETSC_NULL);CHKERRQ(ierr); 448 if (size != lu->nprow * lu->npcol) SETERRQ(1,"Number of processes should be equal to nprow*npcol"); 449 450 ierr = PetscOptionsInt("-mat_superlu_dist_matinput","Matrix input mode (0: GLOBAL; 1: DISTRIBUTED)","None",lu->MatInputMode,&lu->MatInputMode,PETSC_NULL);CHKERRQ(ierr); 451 if(lu->MatInputMode == DISTRIBUTED && size == 1) lu->MatInputMode = GLOBAL; 452 453 ierr = PetscOptionsLogical("-mat_superlu_dist_equil","Equilibrate matrix","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr); 454 if (!flg) { 455 options.Equil = NO; 456 } 457 458 ierr = PetscOptionsEList("-mat_superlu_dist_rowperm","Row permutation","None",prtype,2,prtype[0],buff,32,&flg);CHKERRQ(ierr); 459 while (flg) { 460 ierr = PetscStrcmp(buff,"LargeDiag",&flg);CHKERRQ(ierr); 461 if (flg) { 462 options.RowPerm = LargeDiag; 463 break; 464 } 465 ierr = PetscStrcmp(buff,"NATURAL",&flg);CHKERRQ(ierr); 466 if (flg) { 467 options.RowPerm = NOROWPERM; 468 break; 469 } 470 SETERRQ1(1,"Unknown row permutation %s",buff); 471 } 472 473 ierr = PetscOptionsEList("-mat_superlu_dist_colperm","Column permutation","None",ptype,4,ptype[0],buff,32,&flg);CHKERRQ(ierr); 474 while (flg) { 475 ierr = PetscStrcmp(buff,"MMD_AT_PLUS_A",&flg);CHKERRQ(ierr); 476 if (flg) { 477 options.ColPerm = MMD_AT_PLUS_A; 478 break; 479 } 480 ierr = PetscStrcmp(buff,"NATURAL",&flg);CHKERRQ(ierr); 481 if (flg) { 482 options.ColPerm = NATURAL; 483 break; 484 } 485 ierr = PetscStrcmp(buff,"MMD_ATA",&flg);CHKERRQ(ierr); 486 if (flg) { 487 options.ColPerm = MMD_ATA; 488 break; 489 } 490 ierr = PetscStrcmp(buff,"COLAMD",&flg);CHKERRQ(ierr); 491 if (flg) { 492 options.ColPerm = COLAMD; 493 break; 494 } 495 SETERRQ1(1,"Unknown column permutation %s",buff); 496 } 497 498 ierr = PetscOptionsLogical("-mat_superlu_dist_replacetinypivot","Replace tiny pivots","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr); 499 if (!flg) { 500 options.ReplaceTinyPivot = NO; 501 } 502 503 options.IterRefine = NOREFINE; 504 ierr = PetscOptionsLogical("-mat_superlu_dist_iterrefine","Use iterative refinement","None",PETSC_FALSE,&flg,0);CHKERRQ(ierr); 505 if (flg) { 506 options.IterRefine = DOUBLE; 507 } 508 509 if (PetscLogPrintInfo) { 510 lu->StatPrint = (int)PETSC_TRUE; 511 } else { 512 lu->StatPrint = (int)PETSC_FALSE; 513 } 514 ierr = PetscOptionsLogical("-mat_superlu_dist_statprint","Print factorization information","None", 515 (PetscTruth)lu->StatPrint,(PetscTruth*)&lu->StatPrint,0);CHKERRQ(ierr); 516 PetscOptionsEnd(); 517 518 /* Initialize the SuperLU process grid. */ 519 superlu_gridinit(lu->comm_superlu, lu->nprow, lu->npcol, &lu->grid); 520 521 /* Initialize ScalePermstruct and LUstruct. */ 522 ScalePermstructInit(M, N, &lu->ScalePermstruct); 523 LUstructInit(M, N, &lu->LUstruct); 524 525 lu->options = options; 526 lu->flg = DIFFERENT_NONZERO_PATTERN; 527 lu->CleanUpSuperLU_Dist = PETSC_TRUE; 528 *F = B; 529 PetscFunctionReturn(0); 530 } 531 532 #undef __FUNCT__ 533 #define __FUNCT__ "MatAssemblyEnd_SuperLU_DIST" 534 int MatAssemblyEnd_SuperLU_DIST(Mat A,MatAssemblyType mode) { 535 int ierr; 536 Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST*)(A->spptr); 537 538 PetscFunctionBegin; 539 ierr = (*lu->MatAssemblyEnd)(A,mode);CHKERRQ(ierr); 540 lu->MatLUFactorSymbolic = A->ops->lufactorsymbolic; 541 A->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST; 542 PetscFunctionReturn(0); 543 } 544 545 #undef __FUNCT__ 546 #define __FUNCT__ "MatFactorInfo_SuperLU_DIST" 547 int MatFactorInfo_SuperLU_DIST(Mat A,PetscViewer viewer) 548 { 549 Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST*)A->spptr; 550 superlu_options_t options; 551 int ierr; 552 char *colperm; 553 554 PetscFunctionBegin; 555 /* check if matrix is superlu_dist type */ 556 if (A->ops->solve != MatSolve_SuperLU_DIST) PetscFunctionReturn(0); 557 558 options = lu->options; 559 ierr = PetscViewerASCIIPrintf(viewer,"SuperLU_DIST run parameters:\n");CHKERRQ(ierr); 560 ierr = PetscViewerASCIIPrintf(viewer," Equilibrate matrix %s \n",(options.Equil != NO) ? "true": "false");CHKERRQ(ierr); 561 ierr = PetscViewerASCIIPrintf(viewer," Matrix input mode %d \n",lu->MatInputMode);CHKERRQ(ierr); 562 ierr = PetscViewerASCIIPrintf(viewer," Replace tiny pivots %s \n",(options.ReplaceTinyPivot != NO) ? "true": "false");CHKERRQ(ierr); 563 ierr = PetscViewerASCIIPrintf(viewer," Use iterative refinement %s \n",(options.IterRefine == DOUBLE) ? "true": "false");CHKERRQ(ierr); 564 ierr = PetscViewerASCIIPrintf(viewer," Processors in row %d col partition %d \n",lu->nprow,lu->npcol);CHKERRQ(ierr); 565 ierr = PetscViewerASCIIPrintf(viewer," Row permutation %s \n",(options.RowPerm == NOROWPERM) ? "NATURAL": "LargeDiag");CHKERRQ(ierr); 566 if (options.ColPerm == NATURAL) { 567 colperm = "NATURAL"; 568 } else if (options.ColPerm == MMD_AT_PLUS_A) { 569 colperm = "MMD_AT_PLUS_A"; 570 } else if (options.ColPerm == MMD_ATA) { 571 colperm = "MMD_ATA"; 572 } else if (options.ColPerm == COLAMD) { 573 colperm = "COLAMD"; 574 } else { 575 SETERRQ(1,"Unknown column permutation"); 576 } 577 ierr = PetscViewerASCIIPrintf(viewer," Column permutation %s \n",colperm);CHKERRQ(ierr); 578 PetscFunctionReturn(0); 579 } 580 581 #undef __FUNCT__ 582 #define __FUNCT__ "MatView_SuperLU_DIST" 583 int MatView_SuperLU_DIST(Mat A,PetscViewer viewer) 584 { 585 int ierr; 586 PetscTruth isascii; 587 PetscViewerFormat format; 588 Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST*)(A->spptr); 589 590 PetscFunctionBegin; 591 ierr = (*lu->MatView)(A,viewer);CHKERRQ(ierr); 592 593 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 594 if (isascii) { 595 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 596 if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 597 ierr = MatFactorInfo_SuperLU_DIST(A,viewer);CHKERRQ(ierr); 598 } 599 } 600 PetscFunctionReturn(0); 601 } 602 603 604 EXTERN_C_BEGIN 605 #undef __FUNCT__ 606 #define __FUNCT__ "MatConvert_Base_SuperLU_DIST" 607 int MatConvert_Base_SuperLU_DIST(Mat A,MatType type,Mat *newmat) { 608 /* This routine is only called to convert to MATSUPERLU_DIST */ 609 /* from MATSEQAIJ if A has a single process communicator */ 610 /* or MATMPIAIJ otherwise, so we will ignore 'MatType type'. */ 611 int ierr; 612 MPI_Comm comm; 613 Mat B=*newmat; 614 Mat_SuperLU_DIST *lu; 615 616 PetscFunctionBegin; 617 if (B != A) { 618 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 619 } 620 621 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 622 ierr = PetscNew(Mat_SuperLU_DIST,&lu);CHKERRQ(ierr); 623 624 lu->MatDuplicate = A->ops->duplicate; 625 lu->MatView = A->ops->view; 626 lu->MatAssemblyEnd = A->ops->assemblyend; 627 lu->MatLUFactorSymbolic = A->ops->lufactorsymbolic; 628 lu->MatDestroy = A->ops->destroy; 629 lu->CleanUpSuperLU_Dist = PETSC_FALSE; 630 631 B->spptr = (void*)lu; 632 B->ops->duplicate = MatDuplicate_SuperLU_DIST; 633 B->ops->view = MatView_SuperLU_DIST; 634 B->ops->assemblyend = MatAssemblyEnd_SuperLU_DIST; 635 B->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST; 636 B->ops->destroy = MatDestroy_SuperLU_DIST; 637 ierr = MPI_Comm_size(comm,&(lu->size));CHKERRQ(ierr);CHKERRQ(ierr); 638 if ((lu->size) == 1) { 639 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqaij_superlu_dist_C", 640 "MatConvert_Base_SuperLU_DIST",MatConvert_Base_SuperLU_DIST);CHKERRQ(ierr); 641 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_superlu_dist_seqaij_C", 642 "MatConvert_SuperLU_DIST_Base",MatConvert_SuperLU_DIST_Base);CHKERRQ(ierr); 643 } else { 644 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_superlu_dist_C", 645 "MatConvert_Base_SuperLU_DIST",MatConvert_Base_SuperLU_DIST);CHKERRQ(ierr); 646 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_superlu_dist_mpiaij_C", 647 "MatConvert_SuperLU_DIST_Base",MatConvert_SuperLU_DIST_Base);CHKERRQ(ierr); 648 } 649 PetscLogInfo(0,"Using SuperLU_DIST for SeqAIJ LU factorization and solves."); 650 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSUPERLU_DIST);CHKERRQ(ierr); 651 *newmat = B; 652 PetscFunctionReturn(0); 653 } 654 EXTERN_C_END 655 656 #undef __FUNCT__ 657 #define __FUNCT__ "MatDuplicate_SuperLU_DIST" 658 int MatDuplicate_SuperLU_DIST(Mat A, MatDuplicateOption op, Mat *M) { 659 int ierr; 660 PetscFunctionBegin; 661 ierr = (*A->ops->duplicate)(A,op,M);CHKERRQ(ierr); 662 ierr = MatConvert_Base_SuperLU_DIST(*M,MATSUPERLU_DIST,M);CHKERRQ(ierr); 663 PetscFunctionReturn(0); 664 } 665 666 /*MC 667 MATSUPERLU_DIST - MATSUPERLU_DIST = "superlu_dist" - A matrix type providing direct solvers (LU) for parallel matrices 668 via the external package SuperLU_DIST. 669 670 If SuperLU_DIST is installed (see the manual for 671 instructions on how to declare the existence of external packages), 672 a matrix type can be constructed which invokes SuperLU_DIST solvers. 673 After calling MatCreate(...,A), simply call MatSetType(A,MATSUPERLU_DIST). 674 This matrix type is only supported for double precision real. 675 676 This matrix inherits from MATSEQAIJ when constructed with a single process communicator, 677 and from MATMPIAIJ otherwise. As a result, for single process communicators, 678 MatSeqAIJSetPreallocation is supported, and similarly MatMPISBAIJSetPreallocation is supported 679 for communicators controlling multiple processes. It is recommended that you call both of 680 the above preallocation routines for simplicity. One can also call MatConvert for an inplace 681 conversion to or from the MATSEQAIJ or MATMPIAIJ type (depending on the communicator size) 682 without data copy. 683 684 Options Database Keys: 685 + -mat_type superlu_dist - sets the matrix type to "superlu_dist" during a call to MatSetFromOptions() 686 . -mat_superlu_dist_r <n> - number of rows in processor partition 687 . -mat_superlu_dist_c <n> - number of columns in processor partition 688 . -mat_superlu_dist_matinput <0,1> - matrix input mode; 0=global, 1=distributed 689 . -mat_superlu_dist_equil - equilibrate the matrix 690 . -mat_superlu_dist_rowperm <LargeDiag,NATURAL> - row permutation 691 . -mat_superlu_dist_colperm <MMD_AT_PLUS_A,MMD_ATA,COLAMD,NATURAL> - column permutation 692 . -mat_superlu_dist_replacetinypivot - replace tiny pivots 693 . -mat_superlu_dist_iterrefine - use iterative refinement 694 - -mat_superlu_dist_statprint - print factorization information 695 696 Level: beginner 697 698 .seealso: PCLU 699 M*/ 700 701 EXTERN_C_BEGIN 702 #undef __FUNCT__ 703 #define __FUNCT__ "MatCreate_SuperLU_DIST" 704 int MatCreate_SuperLU_DIST(Mat A) { 705 int ierr,size; 706 707 PetscFunctionBegin; 708 /* Change type name before calling MatSetType to force proper construction of SeqAIJ or MPIAIJ */ 709 /* and SuperLU_DIST types */ 710 ierr = PetscObjectChangeTypeName((PetscObject)A,MATSUPERLU_DIST);CHKERRQ(ierr); 711 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 712 if (size == 1) { 713 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 714 } else { 715 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 716 } 717 ierr = MatConvert_Base_SuperLU_DIST(A,MATSUPERLU_DIST,&A);CHKERRQ(ierr); 718 PetscFunctionReturn(0); 719 } 720 EXTERN_C_END 721 722