1 /*$Id: superlu_DIST.c,v 1.10 2001/08/15 15:56:50 bsmith Exp $*/ 2 /* 3 Provides an interface to the SuperLU_DIST_2.0 sparse solver 4 */ 5 6 #include "src/mat/impls/aij/seq/aij.h" 7 #include "src/mat/impls/aij/mpi/mpiaij.h" 8 #if defined(PETSC_HAVE_STDLIB_H) /* This is to get arround weird problem with SuperLU on cray */ 9 #include "stdlib.h" 10 #endif 11 12 /*MC 13 MATSUPERLU_DIST - a matrix type providing direct solvers for parallel matrices 14 via the external package SuperLU_DIST. 15 16 If SuperLU_DIST is installed (see the manual for 17 instructions on how to declare the existence of external packages), 18 a matrix type can be constructed which invokes SuperLU_DIST solvers. 19 After calling MatCreate(...,A), simply call MatSetType(A,MATSUPERLU_DIST). 20 This matrix type is only supported for double precision real. 21 22 This matrix inherits from MATSEQAIJ when constructed with a single process communicator, 23 and from MATMPIAIJ otherwise. 24 25 Options Database Keys: 26 + -mat_type superlu_dist 27 . -mat_superlu_dist_r <n> : number of rows in processor partition 28 . -mat_superlu_dist_c <n> : number of columns in processor partition 29 . -mat_superlu_dist_matinput 0|1 : matrix input mode; 0=global, 1=distributed 30 . -mat_superlu_dist_equil :, equilibrate the matrix 31 . -mat_superlu_dist_rowperm LargeDiag|NATURAL : row permutation 32 . -mat_superlu_dist_colperm MMD_AT_PLUS_A|MMD_ATA|COLAMD|NATURAL : column permutation 33 . -mat_superlu_dist_replacetinypivot : replace tiny pivots 34 . -mat_superlu_dist_iterrefine : use iterative refinement 35 - -mat_superlu_dist_statprint : print factorization information 36 37 .seealso: PCLU 38 M*/ 39 40 EXTERN_C_BEGIN 41 #if defined(PETSC_USE_COMPLEX) 42 #include "superlu_zdefs.h" 43 #else 44 #include "superlu_ddefs.h" 45 #endif 46 EXTERN_C_END 47 48 typedef enum { GLOBAL,DISTRIBUTED 49 } SuperLU_MatInputMode; 50 51 typedef struct { 52 int_t nprow,npcol,*row,*col; 53 gridinfo_t grid; 54 superlu_options_t options; 55 SuperMatrix A_sup; 56 ScalePermstruct_t ScalePermstruct; 57 LUstruct_t LUstruct; 58 int StatPrint; 59 int MatInputMode; 60 SOLVEstruct_t SOLVEstruct; 61 MatStructure flg; 62 MPI_Comm comm_superlu; 63 #if defined(PETSC_USE_COMPLEX) 64 doublecomplex *val; 65 #else 66 double *val; 67 #endif 68 69 /* A few function pointers for inheritance */ 70 int (*MatView)(Mat,PetscViewer); 71 int (*MatAssemblyEnd)(Mat,MatAssemblyType); 72 int (*MatDestroy)(Mat); 73 74 /* Flag to clean up (non-global) SuperLU objects during Destroy */ 75 PetscTruth CleanUpSuperLUDist; 76 } Mat_MPIAIJ_SuperLU_DIST; 77 78 #undef __FUNCT__ 79 #define __FUNCT__ "MatDestroy_MPIAIJ_SuperLU_DIST" 80 int MatDestroy_MPIAIJ_SuperLU_DIST(Mat A) 81 { 82 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 83 Mat_MPIAIJ_SuperLU_DIST *lu = (Mat_MPIAIJ_SuperLU_DIST*)A->spptr; 84 int ierr, size=a->size,(*destroy)(Mat); 85 86 PetscFunctionBegin; 87 if (lu->CleanUpSuperLUDist) { 88 /* Deallocate SuperLU_DIST storage */ 89 if (lu->MatInputMode == GLOBAL) { 90 Destroy_CompCol_Matrix_dist(&lu->A_sup); 91 } else { 92 Destroy_CompRowLoc_Matrix_dist(&lu->A_sup); 93 if ( lu->options.SolveInitialized ) { 94 #if defined(PETSC_USE_COMPLEX) 95 zSolveFinalize(&lu->options, &lu->SOLVEstruct); 96 #else 97 dSolveFinalize(&lu->options, &lu->SOLVEstruct); 98 #endif 99 } 100 } 101 Destroy_LU(A->N, &lu->grid, &lu->LUstruct); 102 ScalePermstructFree(&lu->ScalePermstruct); 103 LUstructFree(&lu->LUstruct); 104 105 /* Release the SuperLU_DIST process grid. */ 106 superlu_gridexit(&lu->grid); 107 108 ierr = MPI_Comm_free(&(lu->comm_superlu));CHKERRQ(ierr); 109 } 110 destroy = lu->MatDestroy; 111 ierr = PetscFree(lu);CHKERRQ(ierr); 112 ierr = (*destroy)(A);CHKERRQ(ierr); 113 114 PetscFunctionReturn(0); 115 } 116 117 #undef __FUNCT__ 118 #define __FUNCT__ "MatView_MPIAIJ_Spooles_DIST" 119 int MatView_MPIAIJ_SuperLU_DIST(Mat A,PetscViewer viewer) 120 { 121 int ierr; 122 PetscTruth isascii; 123 PetscViewerFormat format; 124 Mat_MPIAIJ_SuperLU_DIST *lu=(Mat_MPIAIJ_SuperLU_DIST*)(A->spptr); 125 126 PetscFunctionBegin; 127 ierr = (*lu->MatView)(A,viewer);CHKERRQ(ierr); 128 129 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 130 if (isascii) { 131 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 132 if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 133 ierr = MatMPIAIJFactorInfo_SuperLu(A,viewer);CHKERRQ(ierr); 134 } 135 } 136 PetscFunctionReturn(0); 137 } 138 139 #undef __FUNCT__ 140 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ_SuperLU_DIST" 141 int MatAssemblyEnd_MPIAIJ_SuperLU_DIST(Mat A,MatAssemblyType mode) { 142 int ierr; 143 Mat_MPIAIJ_SuperLU_DIST *lu=(Mat_MPIAIJ_SuperLU_DIST*)(A->spptr); 144 145 PetscFunctionBegin; 146 ierr = (*lu->MatAssemblyEnd)(A,mode);CHKERRQ(ierr); 147 ierr = MatUseSuperLU_DIST_MPIAIJ(A);CHKERRQ(ierr); 148 PetscFunctionReturn(0); 149 } 150 151 #undef __FUNCT__ 152 #define __FUNCT__ "MatSolve_MPIAIJ_SuperLU_DIST" 153 int MatSolve_MPIAIJ_SuperLU_DIST(Mat A,Vec b_mpi,Vec x) 154 { 155 Mat_MPIAIJ *aa = (Mat_MPIAIJ*)A->data; 156 Mat_MPIAIJ_SuperLU_DIST *lu = (Mat_MPIAIJ_SuperLU_DIST*)A->spptr; 157 int ierr, size=aa->size; 158 int m=A->M, N=A->N; 159 SuperLUStat_t stat; 160 double berr[1]; 161 PetscScalar *bptr; 162 int info, nrhs=1; 163 Vec x_seq; 164 IS iden; 165 VecScatter scat; 166 PetscLogDouble time0,time,time_min,time_max; 167 168 PetscFunctionBegin; 169 if (size > 1) { 170 if (lu->MatInputMode == GLOBAL) { /* global mat input, convert b to x_seq */ 171 ierr = VecCreateSeq(PETSC_COMM_SELF,N,&x_seq);CHKERRQ(ierr); 172 ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iden);CHKERRQ(ierr); 173 ierr = VecScatterCreate(b_mpi,iden,x_seq,iden,&scat);CHKERRQ(ierr); 174 ierr = ISDestroy(iden);CHKERRQ(ierr); 175 176 ierr = VecScatterBegin(b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat);CHKERRQ(ierr); 177 ierr = VecScatterEnd(b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat);CHKERRQ(ierr); 178 ierr = VecGetArray(x_seq,&bptr);CHKERRQ(ierr); 179 } else { /* distributed mat input */ 180 ierr = VecCopy(b_mpi,x);CHKERRQ(ierr); 181 ierr = VecGetArray(x,&bptr);CHKERRQ(ierr); 182 } 183 } else { /* size == 1 */ 184 ierr = VecCopy(b_mpi,x);CHKERRQ(ierr); 185 ierr = VecGetArray(x,&bptr);CHKERRQ(ierr); 186 } 187 188 lu->options.Fact = FACTORED; /* The factored form of A is supplied. Local option used by this func. only.*/ 189 190 PStatInit(&stat); /* Initialize the statistics variables. */ 191 if (lu->StatPrint) { 192 ierr = MPI_Barrier(A->comm);CHKERRQ(ierr); /* to be removed */ 193 ierr = PetscGetTime(&time0);CHKERRQ(ierr); /* to be removed */ 194 } 195 if (lu->MatInputMode == GLOBAL) { 196 #if defined(PETSC_USE_COMPLEX) 197 pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,(doublecomplex*)bptr, m, nrhs, 198 &lu->grid, &lu->LUstruct, berr, &stat, &info); 199 #else 200 pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,bptr, m, nrhs, 201 &lu->grid, &lu->LUstruct, berr, &stat, &info); 202 #endif 203 } else { /* distributed mat input */ 204 #if defined(PETSC_USE_COMPLEX) 205 pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, (doublecomplex*)bptr, A->M, nrhs, &lu->grid, 206 &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info); 207 if (info) SETERRQ1(1,"pzgssvx fails, info: %d\n",info); 208 #else 209 pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, bptr, A->M, nrhs, &lu->grid, 210 &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info); 211 if (info) SETERRQ1(1,"pdgssvx fails, info: %d\n",info); 212 #endif 213 } 214 if (lu->StatPrint) { 215 ierr = PetscGetTime(&time);CHKERRQ(ierr); /* to be removed */ 216 PStatPrint(&lu->options, &stat, &lu->grid); /* Print the statistics. */ 217 } 218 PStatFree(&stat); 219 220 if (size > 1) { 221 if (lu->MatInputMode == GLOBAL){ /* convert seq x to mpi x */ 222 ierr = VecRestoreArray(x_seq,&bptr);CHKERRQ(ierr); 223 ierr = VecScatterBegin(x_seq,x,INSERT_VALUES,SCATTER_REVERSE,scat);CHKERRQ(ierr); 224 ierr = VecScatterEnd(x_seq,x,INSERT_VALUES,SCATTER_REVERSE,scat);CHKERRQ(ierr); 225 ierr = VecScatterDestroy(scat);CHKERRQ(ierr); 226 ierr = VecDestroy(x_seq);CHKERRQ(ierr); 227 } else { 228 ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr); 229 } 230 } else { 231 ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr); 232 } 233 if (lu->StatPrint) { 234 time0 = time - time0; 235 ierr = MPI_Reduce(&time0,&time_max,1,MPI_DOUBLE,MPI_MAX,0,A->comm);CHKERRQ(ierr); 236 ierr = MPI_Reduce(&time0,&time_min,1,MPI_DOUBLE,MPI_MIN,0,A->comm);CHKERRQ(ierr); 237 ierr = MPI_Reduce(&time0,&time,1,MPI_DOUBLE,MPI_SUM,0,A->comm);CHKERRQ(ierr); 238 time = time/size; /* average time */ 239 ierr = PetscPrintf(A->comm, " Time for superlu_dist solve (max/min/avg): %g / %g / %g\n\n",time_max,time_min,time);CHKERRQ(ierr); 240 } 241 PetscFunctionReturn(0); 242 } 243 244 #undef __FUNCT__ 245 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ_SuperLU_DIST" 246 int MatLUFactorNumeric_MPIAIJ_SuperLU_DIST(Mat A,Mat *F) 247 { 248 Mat_MPIAIJ *fac = (Mat_MPIAIJ*)(*F)->data,*mat; 249 Mat *tseq,A_seq = PETSC_NULL; 250 Mat_SeqAIJ *aa,*bb; 251 Mat_MPIAIJ_SuperLU_DIST *lu = (Mat_MPIAIJ_SuperLU_DIST*)(*F)->spptr; 252 int M=A->M,N=A->N,info,ierr,size=fac->size,i,*ai,*aj,*bi,*bj,nz,rstart,*garray, 253 m=A->m, irow,colA_start,j,jcol,jB,countA,countB,*bjj,*ajj; 254 SuperLUStat_t stat; 255 double *berr=0; 256 IS isrow; 257 PetscLogDouble time0[2],time[2],time_min[2],time_max[2]; 258 #if defined(PETSC_USE_COMPLEX) 259 doublecomplex *av, *bv; 260 #else 261 double *av, *bv; 262 #endif 263 264 PetscFunctionBegin; 265 if (lu->StatPrint) { 266 ierr = MPI_Barrier(A->comm);CHKERRQ(ierr); 267 ierr = PetscGetTime(&time0[0]);CHKERRQ(ierr); 268 } 269 270 if (lu->MatInputMode == GLOBAL) { /* global mat input */ 271 if (size > 1) { /* convert mpi A to seq mat A */ 272 ierr = ISCreateStride(PETSC_COMM_SELF,M,0,1,&isrow); CHKERRQ(ierr); 273 ierr = MatGetSubMatrices(A,1,&isrow,&isrow,MAT_INITIAL_MATRIX,&tseq); CHKERRQ(ierr); 274 ierr = ISDestroy(isrow);CHKERRQ(ierr); 275 276 A_seq = *tseq; 277 ierr = PetscFree(tseq);CHKERRQ(ierr); 278 aa = (Mat_SeqAIJ*)A_seq->data; 279 } else { 280 aa = (Mat_SeqAIJ*)A->data; 281 } 282 283 /* Allocate storage, then convert Petsc NR matrix to SuperLU_DIST NC */ 284 if (lu->flg == DIFFERENT_NONZERO_PATTERN) {/* first numeric factorization */ 285 #if defined(PETSC_USE_COMPLEX) 286 zallocateA_dist(N, aa->nz, &lu->val, &lu->col, &lu->row); 287 #else 288 dallocateA_dist(N, aa->nz, &lu->val, &lu->col, &lu->row); 289 #endif 290 } else { /* successive numeric factorization, sparsity pattern is reused. */ 291 Destroy_CompCol_Matrix_dist(&lu->A_sup); 292 Destroy_LU(N, &lu->grid, &lu->LUstruct); 293 lu->options.Fact = SamePattern; 294 } 295 #if defined(PETSC_USE_COMPLEX) 296 zCompRow_to_CompCol_dist(M,N,aa->nz,(doublecomplex*)aa->a,aa->j,aa->i,&lu->val,&lu->col, &lu->row); 297 #else 298 dCompRow_to_CompCol_dist(M,N,aa->nz,aa->a,aa->j,aa->i,&lu->val, &lu->col, &lu->row); 299 #endif 300 301 /* Create compressed column matrix A_sup. */ 302 #if defined(PETSC_USE_COMPLEX) 303 zCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_Z, SLU_GE); 304 #else 305 dCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_D, SLU_GE); 306 #endif 307 } else { /* distributed mat input */ 308 mat = (Mat_MPIAIJ*)A->data; 309 aa=(Mat_SeqAIJ*)(mat->A)->data; 310 bb=(Mat_SeqAIJ*)(mat->B)->data; 311 ai=aa->i; aj=aa->j; 312 bi=bb->i; bj=bb->j; 313 #if defined(PETSC_USE_COMPLEX) 314 av=(doublecomplex*)aa->a; 315 bv=(doublecomplex*)bb->a; 316 #else 317 av=aa->a; 318 bv=bb->a; 319 #endif 320 rstart = mat->rstart; 321 nz = aa->nz + bb->nz; 322 garray = mat->garray; 323 rstart = mat->rstart; 324 325 if (lu->flg == DIFFERENT_NONZERO_PATTERN) {/* first numeric factorization */ 326 #if defined(PETSC_USE_COMPLEX) 327 zallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row); 328 #else 329 dallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row); 330 #endif 331 } else { /* successive numeric factorization, sparsity pattern and perm_c are reused. */ 332 /* Destroy_CompRowLoc_Matrix_dist(&lu->A_sup); */ /* crash! */ 333 Destroy_LU(N, &lu->grid, &lu->LUstruct); 334 lu->options.Fact = SamePattern; 335 } 336 nz = 0; jB = 0; irow = mat->rstart; 337 for ( i=0; i<m; i++ ) { 338 lu->row[i] = nz; 339 countA = ai[i+1] - ai[i]; 340 countB = bi[i+1] - bi[i]; 341 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 342 bjj = bj + bi[i]; 343 344 /* B part, smaller col index */ 345 colA_start = mat->rstart + ajj[0]; /* the smallest global col index of A */ 346 for (j=0; j<countB; j++){ 347 jcol = garray[bjj[j]]; 348 if (jcol > colA_start) { 349 jB = j; 350 break; 351 } 352 lu->col[nz] = jcol; 353 lu->val[nz++] = *bv++; 354 if (j==countB-1) jB = countB; 355 } 356 357 /* A part */ 358 for (j=0; j<countA; j++){ 359 lu->col[nz] = mat->rstart + ajj[j]; 360 lu->val[nz++] = *av++; 361 } 362 363 /* B part, larger col index */ 364 for (j=jB; j<countB; j++){ 365 lu->col[nz] = garray[bjj[j]]; 366 lu->val[nz++] = *bv++; 367 } 368 } 369 lu->row[m] = nz; 370 #if defined(PETSC_USE_COMPLEX) 371 zCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart, 372 lu->val, lu->col, lu->row, SLU_NR_loc, SLU_Z, SLU_GE); 373 #else 374 dCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart, 375 lu->val, lu->col, lu->row, SLU_NR_loc, SLU_D, SLU_GE); 376 #endif 377 } 378 if (lu->StatPrint) { 379 ierr = PetscGetTime(&time[0]);CHKERRQ(ierr); 380 time0[0] = time[0] - time0[0]; 381 } 382 383 /* Factor the matrix. */ 384 PStatInit(&stat); /* Initialize the statistics variables. */ 385 386 if (lu->StatPrint) { 387 ierr = MPI_Barrier(A->comm);CHKERRQ(ierr); 388 ierr = PetscGetTime(&time0[1]);CHKERRQ(ierr); 389 } 390 391 if (lu->MatInputMode == GLOBAL) { /* global mat input */ 392 #if defined(PETSC_USE_COMPLEX) 393 pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, 394 &lu->grid, &lu->LUstruct, berr, &stat, &info); 395 #else 396 pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, 397 &lu->grid, &lu->LUstruct, berr, &stat, &info); 398 #endif 399 } else { /* distributed mat input */ 400 #if defined(PETSC_USE_COMPLEX) 401 pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid, 402 &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info); 403 if (info) SETERRQ1(1,"pzgssvx fails, info: %d\n",info); 404 #else 405 pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid, 406 &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info); 407 if (info) SETERRQ1(1,"pdgssvx fails, info: %d\n",info); 408 #endif 409 } 410 if (lu->StatPrint) { 411 ierr = PetscGetTime(&time[1]);CHKERRQ(ierr); /* to be removed */ 412 time0[1] = time[1] - time0[1]; 413 if (lu->StatPrint) PStatPrint(&lu->options, &stat, &lu->grid); /* Print the statistics. */ 414 } 415 PStatFree(&stat); 416 417 if (lu->MatInputMode == GLOBAL && size > 1){ 418 ierr = MatDestroy(A_seq);CHKERRQ(ierr); 419 } 420 421 if (lu->StatPrint) { 422 ierr = MPI_Reduce(time0,time_max,2,MPI_DOUBLE,MPI_MAX,0,A->comm); 423 ierr = MPI_Reduce(time0,time_min,2,MPI_DOUBLE,MPI_MIN,0,A->comm); 424 ierr = MPI_Reduce(time0,time,2,MPI_DOUBLE,MPI_SUM,0,A->comm); 425 for (i=0; i<2; i++) time[i] = time[i]/size; /* average time */ 426 ierr = PetscPrintf(A->comm, " Time for mat conversion (max/min/avg): %g / %g / %g\n",time_max[0],time_min[0],time[0]); 427 ierr = PetscPrintf(A->comm, " Time for superlu_dist fact (max/min/avg): %g / %g / %g\n\n",time_max[1],time_min[1],time[1]); 428 } 429 (*F)->assembled = PETSC_TRUE; 430 lu->flg = SAME_NONZERO_PATTERN; 431 PetscFunctionReturn(0); 432 } 433 434 /* Note the Petsc r and c permutations are ignored */ 435 #undef __FUNCT__ 436 #define __FUNCT__ "MatLUFactorSymbolic_MPIAIJ_SuperLU_DIST" 437 int MatLUFactorSymbolic_MPIAIJ_SuperLU_DIST(Mat A,IS r,IS c,MatFactorInfo *info,Mat *F) 438 { 439 Mat B; 440 Mat_MPIAIJ_SuperLU_DIST *lu; 441 int ierr,M=A->M,N=A->N,size; 442 superlu_options_t options; 443 char buff[32]; 444 PetscTruth flg; 445 char *ptype[] = {"MMD_AT_PLUS_A","NATURAL","MMD_ATA","COLAMD"}; 446 char *prtype[] = {"LargeDiag","NATURAL"}; 447 PetscFunctionBegin; 448 449 /* Create the factorization matrix */ 450 ierr = MatCreate(A->comm,A->m,A->n,M,N,&B);CHKERRQ(ierr); 451 ierr = MatSetType(B,MATSUPERLU_DIST);CHKERRQ(ierr); 452 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL); 453 ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 454 455 B->ops->solve = MatSolve_MPIAIJ_SuperLU_DIST; 456 B->factor = FACTOR_LU; 457 458 lu = (Mat_MPIAIJ_SuperLU_DIST*)(B->spptr); 459 460 /* Set the input options */ 461 set_default_options(&options); 462 lu->MatInputMode = GLOBAL; 463 ierr = MPI_Comm_dup(A->comm,&(lu->comm_superlu));CHKERRQ(ierr); 464 465 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 466 lu->nprow = size/2; /* Default process rows. */ 467 if (lu->nprow == 0) lu->nprow = 1; 468 lu->npcol = size/lu->nprow; /* Default process columns. */ 469 470 ierr = PetscOptionsBegin(A->comm,A->prefix,"SuperLU_Dist Options","Mat");CHKERRQ(ierr); 471 472 ierr = PetscOptionsInt("-mat_superlu_dist_r","Number rows in processor partition","None",lu->nprow,&lu->nprow,PETSC_NULL);CHKERRQ(ierr); 473 ierr = PetscOptionsInt("-mat_superlu_dist_c","Number columns in processor partition","None",lu->npcol,&lu->npcol,PETSC_NULL);CHKERRQ(ierr); 474 if (size != lu->nprow * lu->npcol) SETERRQ(1,"Number of processes should be equal to nprow*npcol"); 475 476 ierr = PetscOptionsInt("-mat_superlu_dist_matinput","Matrix input mode (0: GLOBAL; 1: DISTRIBUTED)","None",lu->MatInputMode,&lu->MatInputMode,PETSC_NULL);CHKERRQ(ierr); 477 if(lu->MatInputMode == DISTRIBUTED && size == 1) lu->MatInputMode = GLOBAL; 478 479 ierr = PetscOptionsLogical("-mat_superlu_dist_equil","Equilibrate matrix","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr); 480 if (!flg) { 481 options.Equil = NO; 482 } 483 484 ierr = PetscOptionsEList("-mat_superlu_dist_rowperm","Row permutation","None",prtype,2,prtype[0],buff,32,&flg);CHKERRQ(ierr); 485 while (flg) { 486 ierr = PetscStrcmp(buff,"LargeDiag",&flg);CHKERRQ(ierr); 487 if (flg) { 488 options.RowPerm = LargeDiag; 489 break; 490 } 491 ierr = PetscStrcmp(buff,"NATURAL",&flg);CHKERRQ(ierr); 492 if (flg) { 493 options.RowPerm = NOROWPERM; 494 break; 495 } 496 SETERRQ1(1,"Unknown row permutation %s",buff); 497 } 498 499 ierr = PetscOptionsEList("-mat_superlu_dist_colperm","Column permutation","None",ptype,4,ptype[0],buff,32,&flg);CHKERRQ(ierr); 500 while (flg) { 501 ierr = PetscStrcmp(buff,"MMD_AT_PLUS_A",&flg);CHKERRQ(ierr); 502 if (flg) { 503 options.ColPerm = MMD_AT_PLUS_A; 504 break; 505 } 506 ierr = PetscStrcmp(buff,"NATURAL",&flg);CHKERRQ(ierr); 507 if (flg) { 508 options.ColPerm = NATURAL; 509 break; 510 } 511 ierr = PetscStrcmp(buff,"MMD_ATA",&flg);CHKERRQ(ierr); 512 if (flg) { 513 options.ColPerm = MMD_ATA; 514 break; 515 } 516 ierr = PetscStrcmp(buff,"COLAMD",&flg);CHKERRQ(ierr); 517 if (flg) { 518 options.ColPerm = COLAMD; 519 break; 520 } 521 SETERRQ1(1,"Unknown column permutation %s",buff); 522 } 523 524 ierr = PetscOptionsLogical("-mat_superlu_dist_replacetinypivot","Replace tiny pivots","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr); 525 if (!flg) { 526 options.ReplaceTinyPivot = NO; 527 } 528 529 options.IterRefine = NOREFINE; 530 ierr = PetscOptionsLogical("-mat_superlu_dist_iterrefine","Use iterative refinement","None",PETSC_FALSE,&flg,0);CHKERRQ(ierr); 531 if (flg) { 532 options.IterRefine = DOUBLE; 533 } 534 535 if (PetscLogPrintInfo) { 536 lu->StatPrint = (int)PETSC_TRUE; 537 } else { 538 lu->StatPrint = (int)PETSC_FALSE; 539 } 540 ierr = PetscOptionsLogical("-mat_superlu_dist_statprint","Print factorization information","None", 541 (PetscTruth)lu->StatPrint,(PetscTruth*)&lu->StatPrint,0);CHKERRQ(ierr); 542 PetscOptionsEnd(); 543 544 /* Initialize the SuperLU process grid. */ 545 superlu_gridinit(lu->comm_superlu, lu->nprow, lu->npcol, &lu->grid); 546 547 /* Initialize ScalePermstruct and LUstruct. */ 548 ScalePermstructInit(M, N, &lu->ScalePermstruct); 549 LUstructInit(M, N, &lu->LUstruct); 550 551 lu->options = options; 552 lu->flg = DIFFERENT_NONZERO_PATTERN; 553 lu->CleanUpSuperLUDist = PETSC_TRUE; 554 *F = B; 555 PetscFunctionReturn(0); 556 } 557 558 #undef __FUNCT__ 559 #define __FUNCT__ "MatUseSuperLU_DIST_MPIAIJ" 560 int MatUseSuperLU_DIST_MPIAIJ(Mat A) 561 { 562 PetscFunctionBegin; 563 A->ops->lufactorsymbolic = MatLUFactorSymbolic_MPIAIJ_SuperLU_DIST; 564 A->ops->lufactornumeric = MatLUFactorNumeric_MPIAIJ_SuperLU_DIST; 565 PetscFunctionReturn(0); 566 } 567 568 #undef __FUNCT__ 569 #define __FUNCT__ "MatMPIAIJFactorInfo_SuperLu" 570 int MatMPIAIJFactorInfo_SuperLu(Mat A,PetscViewer viewer) 571 { 572 Mat_MPIAIJ_SuperLU_DIST *lu= (Mat_MPIAIJ_SuperLU_DIST*)A->spptr; 573 superlu_options_t options; 574 int ierr; 575 char *colperm; 576 577 PetscFunctionBegin; 578 /* check if matrix is superlu_dist type */ 579 if (A->ops->solve != MatSolve_MPIAIJ_SuperLU_DIST) PetscFunctionReturn(0); 580 581 options = lu->options; 582 ierr = PetscViewerASCIIPrintf(viewer,"SuperLU_DIST run parameters:\n");CHKERRQ(ierr); 583 ierr = PetscViewerASCIIPrintf(viewer," Equilibrate matrix %s \n",(options.Equil != NO) ? "true": "false");CHKERRQ(ierr); 584 ierr = PetscViewerASCIIPrintf(viewer," Replace tiny pivots %s \n",(options.ReplaceTinyPivot != NO) ? "true": "false");CHKERRQ(ierr); 585 ierr = PetscViewerASCIIPrintf(viewer," Use iterative refinement %s \n",(options.IterRefine == DOUBLE) ? "true": "false");CHKERRQ(ierr); 586 ierr = PetscViewerASCIIPrintf(viewer," Processors in row %d col partition %d \n",lu->nprow,lu->npcol);CHKERRQ(ierr); 587 ierr = PetscViewerASCIIPrintf(viewer," Row permutation %s \n",(options.RowPerm == NOROWPERM) ? "NATURAL": "LargeDiag");CHKERRQ(ierr); 588 if (options.ColPerm == NATURAL) { 589 colperm = "NATURAL"; 590 } else if (options.ColPerm == MMD_AT_PLUS_A) { 591 colperm = "MMD_AT_PLUS_A"; 592 } else if (options.ColPerm == MMD_ATA) { 593 colperm = "MMD_ATA"; 594 } else if (options.ColPerm == COLAMD) { 595 colperm = "COLAMD"; 596 } else { 597 SETERRQ(1,"Unknown column permutation"); 598 } 599 ierr = PetscViewerASCIIPrintf(viewer," Column permutation %s \n",colperm);CHKERRQ(ierr); 600 PetscFunctionReturn(0); 601 } 602 603 EXTERN_C_BEGIN 604 #undef __FUNCT__ 605 #define __FUNCT__ "MatCreate_MPIAIJ_SuperLU_DIST" 606 int MatCreate_MPIAIJ_SuperLU_DIST(Mat A) { 607 int ierr,size; 608 MPI_Comm comm; 609 Mat_MPIAIJ_SuperLU_DIST *lu; 610 611 PetscFunctionBegin; 612 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 613 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);CHKERRQ(ierr); 614 if (size == 1) { 615 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 616 } else { 617 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 618 } 619 ierr = MatUseSuperLU_DIST_MPIAIJ(A);CHKERRQ(ierr); 620 621 ierr = PetscNew(Mat_MPIAIJ_SuperLU_DIST,&lu);CHKERRQ(ierr); 622 lu->MatView = A->ops->view; 623 lu->MatAssemblyEnd = A->ops->assemblyend; 624 lu->MatDestroy = A->ops->destroy; 625 lu->CleanUpSuperLUDist = PETSC_FALSE; 626 A->spptr = (void*)lu; 627 A->ops->view = MatView_MPIAIJ_SuperLU_DIST; 628 A->ops->assemblyend = MatAssemblyEnd_MPIAIJ_SuperLU_DIST; 629 A->ops->destroy = MatDestroy_MPIAIJ_SuperLU_DIST; 630 PetscFunctionReturn(0); 631 } 632 EXTERN_C_END 633 634 EXTERN_C_BEGIN 635 #undef __FUNCT__ 636 #define __FUNCT__ "MatLoad_MPIAIJ_SuperLU_DIST" 637 int MatLoad_MPIAIJ_SuperLU_DIST(PetscViewer viewer,MatType type,Mat *A) { 638 int ierr,size,(*r)(PetscViewer,MatType,Mat*); 639 MPI_Comm comm; 640 641 PetscFunctionBegin; 642 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 643 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 644 if (size == 1) { 645 ierr = PetscFListFind(comm,MatLoadList,MATSEQAIJ,(void(**)(void))&r);CHKERRQ(ierr); 646 } else { 647 ierr = PetscFListFind(comm,MatLoadList,MATMPIAIJ,(void(**)(void))&r);CHKERRQ(ierr); 648 } 649 ierr = (*r)(viewer,type,A);CHKERRQ(ierr); 650 PetscFunctionReturn(0); 651 } 652 EXTERN_C_END 653