1 /*$Id: superlu_DIST.c,v 1.10 2001/08/15 15:56:50 bsmith Exp $*/ 2 /* 3 Provides an interface to the SuperLU_DIST_2.0 sparse solver 4 */ 5 6 #include "src/mat/impls/aij/seq/aij.h" 7 #include "src/mat/impls/aij/mpi/mpiaij.h" 8 #if defined(PETSC_HAVE_STDLIB_H) /* This is to get arround weird problem with SuperLU on cray */ 9 #include "stdlib.h" 10 #endif 11 12 EXTERN_C_BEGIN 13 #if defined(PETSC_USE_COMPLEX) 14 #include "superlu_zdefs.h" 15 #else 16 #include "superlu_ddefs.h" 17 #endif 18 EXTERN_C_END 19 20 typedef enum { GLOBAL,DISTRIBUTED 21 } SuperLU_MatInputMode; 22 23 typedef struct { 24 int_t nprow,npcol,*row,*col; 25 gridinfo_t grid; 26 superlu_options_t options; 27 SuperMatrix A_sup; 28 ScalePermstruct_t ScalePermstruct; 29 LUstruct_t LUstruct; 30 int StatPrint; 31 int MatInputMode; 32 SOLVEstruct_t SOLVEstruct; 33 MatStructure flg; 34 MPI_Comm comm_superlu; 35 #if defined(PETSC_USE_COMPLEX) 36 doublecomplex *val; 37 #else 38 double *val; 39 #endif 40 41 MatType basetype; 42 /* A few function pointers for inheritance */ 43 int (*MatView)(Mat,PetscViewer); 44 int (*MatAssemblyEnd)(Mat,MatAssemblyType); 45 int (*MatLUFactorSymbolic)(Mat,IS,IS,MatFactorInfo*,Mat*); 46 int (*MatDestroy)(Mat); 47 48 /* Flag to clean up (non-global) SuperLU objects during Destroy */ 49 PetscTruth CleanUpSuperLU_Dist; 50 } Mat_MPIAIJ_SuperLU_DIST; 51 52 EXTERN_C_BEGIN 53 #undef __FUNCT__ 54 #define __FUNCT__ "MatConvert_SuperLU_DIST_Base" 55 int MatConvert_SuperLU_DIST_Base(Mat A,MatType type,Mat *newmat) { 56 /* This routine is only called to convert an unfactored PETSc-SuperLU_DIST matrix */ 57 /* to its base PETSc type, so we will ignore 'MatType type'. */ 58 int ierr; 59 Mat B=*newmat; 60 Mat_MPIAIJ_SuperLU_DIST *lu=(Mat_MPIAIJ_SuperLU_DIST *)A->spptr; 61 62 PetscFunctionBegin; 63 if (B != A) { 64 /* This routine was inherited from SeqAIJ. */ 65 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 66 } else { 67 /* Reset the original function pointers */ 68 B->ops->view = lu->MatView; 69 B->ops->assemblyend = lu->MatAssemblyEnd; 70 B->ops->lufactorsymbolic = lu->MatLUFactorSymbolic; 71 B->ops->destroy = lu->MatDestroy; 72 73 ierr = PetscObjectChangeTypeName((PetscObject)B,lu->basetype);CHKERRQ(ierr); 74 ierr = PetscFree(lu);CHKERRQ(ierr); 75 } 76 *newmat = B; 77 PetscFunctionReturn(0); 78 } 79 EXTERN_C_END 80 81 #undef __FUNCT__ 82 #define __FUNCT__ "MatDestroy_MPIAIJ_SuperLU_DIST" 83 int MatDestroy_MPIAIJ_SuperLU_DIST(Mat A) 84 { 85 int ierr; 86 Mat_MPIAIJ_SuperLU_DIST *lu = (Mat_MPIAIJ_SuperLU_DIST*)A->spptr; 87 88 PetscFunctionBegin; 89 if (lu->CleanUpSuperLU_Dist) { 90 /* Deallocate SuperLU_DIST storage */ 91 if (lu->MatInputMode == GLOBAL) { 92 Destroy_CompCol_Matrix_dist(&lu->A_sup); 93 } else { 94 Destroy_CompRowLoc_Matrix_dist(&lu->A_sup); 95 if ( lu->options.SolveInitialized ) { 96 #if defined(PETSC_USE_COMPLEX) 97 zSolveFinalize(&lu->options, &lu->SOLVEstruct); 98 #else 99 dSolveFinalize(&lu->options, &lu->SOLVEstruct); 100 #endif 101 } 102 } 103 Destroy_LU(A->N, &lu->grid, &lu->LUstruct); 104 ScalePermstructFree(&lu->ScalePermstruct); 105 LUstructFree(&lu->LUstruct); 106 107 /* Release the SuperLU_DIST process grid. */ 108 superlu_gridexit(&lu->grid); 109 110 ierr = MPI_Comm_free(&(lu->comm_superlu));CHKERRQ(ierr); 111 } 112 113 ierr = MatConvert_SuperLU_DIST_Base(A,lu->basetype,&A);CHKERRQ(ierr); 114 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 115 116 PetscFunctionReturn(0); 117 } 118 119 #undef __FUNCT__ 120 #define __FUNCT__ "MatSolve_MPIAIJ_SuperLU_DIST" 121 int MatSolve_MPIAIJ_SuperLU_DIST(Mat A,Vec b_mpi,Vec x) 122 { 123 Mat_MPIAIJ *aa = (Mat_MPIAIJ*)A->data; 124 Mat_MPIAIJ_SuperLU_DIST *lu = (Mat_MPIAIJ_SuperLU_DIST*)A->spptr; 125 int ierr, size=aa->size; 126 int m=A->M, N=A->N; 127 SuperLUStat_t stat; 128 double berr[1]; 129 PetscScalar *bptr; 130 int info, nrhs=1; 131 Vec x_seq; 132 IS iden; 133 VecScatter scat; 134 PetscLogDouble time0,time,time_min,time_max; 135 136 PetscFunctionBegin; 137 if (size > 1) { 138 if (lu->MatInputMode == GLOBAL) { /* global mat input, convert b to x_seq */ 139 ierr = VecCreateSeq(PETSC_COMM_SELF,N,&x_seq);CHKERRQ(ierr); 140 ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iden);CHKERRQ(ierr); 141 ierr = VecScatterCreate(b_mpi,iden,x_seq,iden,&scat);CHKERRQ(ierr); 142 ierr = ISDestroy(iden);CHKERRQ(ierr); 143 144 ierr = VecScatterBegin(b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat);CHKERRQ(ierr); 145 ierr = VecScatterEnd(b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat);CHKERRQ(ierr); 146 ierr = VecGetArray(x_seq,&bptr);CHKERRQ(ierr); 147 } else { /* distributed mat input */ 148 ierr = VecCopy(b_mpi,x);CHKERRQ(ierr); 149 ierr = VecGetArray(x,&bptr);CHKERRQ(ierr); 150 } 151 } else { /* size == 1 */ 152 ierr = VecCopy(b_mpi,x);CHKERRQ(ierr); 153 ierr = VecGetArray(x,&bptr);CHKERRQ(ierr); 154 } 155 156 lu->options.Fact = FACTORED; /* The factored form of A is supplied. Local option used by this func. only.*/ 157 158 PStatInit(&stat); /* Initialize the statistics variables. */ 159 if (lu->StatPrint) { 160 ierr = MPI_Barrier(A->comm);CHKERRQ(ierr); /* to be removed */ 161 ierr = PetscGetTime(&time0);CHKERRQ(ierr); /* to be removed */ 162 } 163 if (lu->MatInputMode == GLOBAL) { 164 #if defined(PETSC_USE_COMPLEX) 165 pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,(doublecomplex*)bptr, m, nrhs, 166 &lu->grid, &lu->LUstruct, berr, &stat, &info); 167 #else 168 pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,bptr, m, nrhs, 169 &lu->grid, &lu->LUstruct, berr, &stat, &info); 170 #endif 171 } else { /* distributed mat input */ 172 #if defined(PETSC_USE_COMPLEX) 173 pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, (doublecomplex*)bptr, A->M, nrhs, &lu->grid, 174 &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info); 175 if (info) SETERRQ1(1,"pzgssvx fails, info: %d\n",info); 176 #else 177 pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, bptr, A->M, nrhs, &lu->grid, 178 &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info); 179 if (info) SETERRQ1(1,"pdgssvx fails, info: %d\n",info); 180 #endif 181 } 182 if (lu->StatPrint) { 183 ierr = PetscGetTime(&time);CHKERRQ(ierr); /* to be removed */ 184 PStatPrint(&lu->options, &stat, &lu->grid); /* Print the statistics. */ 185 } 186 PStatFree(&stat); 187 188 if (size > 1) { 189 if (lu->MatInputMode == GLOBAL){ /* convert seq x to mpi x */ 190 ierr = VecRestoreArray(x_seq,&bptr);CHKERRQ(ierr); 191 ierr = VecScatterBegin(x_seq,x,INSERT_VALUES,SCATTER_REVERSE,scat);CHKERRQ(ierr); 192 ierr = VecScatterEnd(x_seq,x,INSERT_VALUES,SCATTER_REVERSE,scat);CHKERRQ(ierr); 193 ierr = VecScatterDestroy(scat);CHKERRQ(ierr); 194 ierr = VecDestroy(x_seq);CHKERRQ(ierr); 195 } else { 196 ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr); 197 } 198 } else { 199 ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr); 200 } 201 if (lu->StatPrint) { 202 time0 = time - time0; 203 ierr = MPI_Reduce(&time0,&time_max,1,MPI_DOUBLE,MPI_MAX,0,A->comm);CHKERRQ(ierr); 204 ierr = MPI_Reduce(&time0,&time_min,1,MPI_DOUBLE,MPI_MIN,0,A->comm);CHKERRQ(ierr); 205 ierr = MPI_Reduce(&time0,&time,1,MPI_DOUBLE,MPI_SUM,0,A->comm);CHKERRQ(ierr); 206 time = time/size; /* average time */ 207 ierr = PetscPrintf(A->comm, " Time for superlu_dist solve (max/min/avg): %g / %g / %g\n\n",time_max,time_min,time);CHKERRQ(ierr); 208 } 209 PetscFunctionReturn(0); 210 } 211 212 #undef __FUNCT__ 213 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ_SuperLU_DIST" 214 int MatLUFactorNumeric_MPIAIJ_SuperLU_DIST(Mat A,Mat *F) 215 { 216 Mat_MPIAIJ *fac = (Mat_MPIAIJ*)(*F)->data,*mat; 217 Mat *tseq,A_seq = PETSC_NULL; 218 Mat_SeqAIJ *aa,*bb; 219 Mat_MPIAIJ_SuperLU_DIST *lu = (Mat_MPIAIJ_SuperLU_DIST*)(*F)->spptr; 220 int M=A->M,N=A->N,info,ierr,size=fac->size,i,*ai,*aj,*bi,*bj,nz,rstart,*garray, 221 m=A->m, irow,colA_start,j,jcol,jB,countA,countB,*bjj,*ajj; 222 SuperLUStat_t stat; 223 double *berr=0; 224 IS isrow; 225 PetscLogDouble time0[2],time[2],time_min[2],time_max[2]; 226 #if defined(PETSC_USE_COMPLEX) 227 doublecomplex *av, *bv; 228 #else 229 double *av, *bv; 230 #endif 231 232 PetscFunctionBegin; 233 if (lu->StatPrint) { 234 ierr = MPI_Barrier(A->comm);CHKERRQ(ierr); 235 ierr = PetscGetTime(&time0[0]);CHKERRQ(ierr); 236 } 237 238 if (lu->MatInputMode == GLOBAL) { /* global mat input */ 239 if (size > 1) { /* convert mpi A to seq mat A */ 240 ierr = ISCreateStride(PETSC_COMM_SELF,M,0,1,&isrow); CHKERRQ(ierr); 241 ierr = MatGetSubMatrices(A,1,&isrow,&isrow,MAT_INITIAL_MATRIX,&tseq); CHKERRQ(ierr); 242 ierr = ISDestroy(isrow);CHKERRQ(ierr); 243 244 A_seq = *tseq; 245 ierr = PetscFree(tseq);CHKERRQ(ierr); 246 aa = (Mat_SeqAIJ*)A_seq->data; 247 } else { 248 aa = (Mat_SeqAIJ*)A->data; 249 } 250 251 /* Allocate storage, then convert Petsc NR matrix to SuperLU_DIST NC */ 252 if (lu->flg == DIFFERENT_NONZERO_PATTERN) {/* first numeric factorization */ 253 #if defined(PETSC_USE_COMPLEX) 254 zallocateA_dist(N, aa->nz, &lu->val, &lu->col, &lu->row); 255 #else 256 dallocateA_dist(N, aa->nz, &lu->val, &lu->col, &lu->row); 257 #endif 258 } else { /* successive numeric factorization, sparsity pattern is reused. */ 259 Destroy_CompCol_Matrix_dist(&lu->A_sup); 260 Destroy_LU(N, &lu->grid, &lu->LUstruct); 261 lu->options.Fact = SamePattern; 262 } 263 #if defined(PETSC_USE_COMPLEX) 264 zCompRow_to_CompCol_dist(M,N,aa->nz,(doublecomplex*)aa->a,aa->j,aa->i,&lu->val,&lu->col, &lu->row); 265 #else 266 dCompRow_to_CompCol_dist(M,N,aa->nz,aa->a,aa->j,aa->i,&lu->val, &lu->col, &lu->row); 267 #endif 268 269 /* Create compressed column matrix A_sup. */ 270 #if defined(PETSC_USE_COMPLEX) 271 zCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_Z, SLU_GE); 272 #else 273 dCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_D, SLU_GE); 274 #endif 275 } else { /* distributed mat input */ 276 mat = (Mat_MPIAIJ*)A->data; 277 aa=(Mat_SeqAIJ*)(mat->A)->data; 278 bb=(Mat_SeqAIJ*)(mat->B)->data; 279 ai=aa->i; aj=aa->j; 280 bi=bb->i; bj=bb->j; 281 #if defined(PETSC_USE_COMPLEX) 282 av=(doublecomplex*)aa->a; 283 bv=(doublecomplex*)bb->a; 284 #else 285 av=aa->a; 286 bv=bb->a; 287 #endif 288 rstart = mat->rstart; 289 nz = aa->nz + bb->nz; 290 garray = mat->garray; 291 rstart = mat->rstart; 292 293 if (lu->flg == DIFFERENT_NONZERO_PATTERN) {/* first numeric factorization */ 294 #if defined(PETSC_USE_COMPLEX) 295 zallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row); 296 #else 297 dallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row); 298 #endif 299 } else { /* successive numeric factorization, sparsity pattern and perm_c are reused. */ 300 /* Destroy_CompRowLoc_Matrix_dist(&lu->A_sup); */ /* crash! */ 301 Destroy_LU(N, &lu->grid, &lu->LUstruct); 302 lu->options.Fact = SamePattern; 303 } 304 nz = 0; jB = 0; irow = mat->rstart; 305 for ( i=0; i<m; i++ ) { 306 lu->row[i] = nz; 307 countA = ai[i+1] - ai[i]; 308 countB = bi[i+1] - bi[i]; 309 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 310 bjj = bj + bi[i]; 311 312 /* B part, smaller col index */ 313 colA_start = mat->rstart + ajj[0]; /* the smallest global col index of A */ 314 for (j=0; j<countB; j++){ 315 jcol = garray[bjj[j]]; 316 if (jcol > colA_start) { 317 jB = j; 318 break; 319 } 320 lu->col[nz] = jcol; 321 lu->val[nz++] = *bv++; 322 if (j==countB-1) jB = countB; 323 } 324 325 /* A part */ 326 for (j=0; j<countA; j++){ 327 lu->col[nz] = mat->rstart + ajj[j]; 328 lu->val[nz++] = *av++; 329 } 330 331 /* B part, larger col index */ 332 for (j=jB; j<countB; j++){ 333 lu->col[nz] = garray[bjj[j]]; 334 lu->val[nz++] = *bv++; 335 } 336 } 337 lu->row[m] = nz; 338 #if defined(PETSC_USE_COMPLEX) 339 zCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart, 340 lu->val, lu->col, lu->row, SLU_NR_loc, SLU_Z, SLU_GE); 341 #else 342 dCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart, 343 lu->val, lu->col, lu->row, SLU_NR_loc, SLU_D, SLU_GE); 344 #endif 345 } 346 if (lu->StatPrint) { 347 ierr = PetscGetTime(&time[0]);CHKERRQ(ierr); 348 time0[0] = time[0] - time0[0]; 349 } 350 351 /* Factor the matrix. */ 352 PStatInit(&stat); /* Initialize the statistics variables. */ 353 354 if (lu->StatPrint) { 355 ierr = MPI_Barrier(A->comm);CHKERRQ(ierr); 356 ierr = PetscGetTime(&time0[1]);CHKERRQ(ierr); 357 } 358 359 if (lu->MatInputMode == GLOBAL) { /* global mat input */ 360 #if defined(PETSC_USE_COMPLEX) 361 pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, 362 &lu->grid, &lu->LUstruct, berr, &stat, &info); 363 #else 364 pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, 365 &lu->grid, &lu->LUstruct, berr, &stat, &info); 366 #endif 367 } else { /* distributed mat input */ 368 #if defined(PETSC_USE_COMPLEX) 369 pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid, 370 &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info); 371 if (info) SETERRQ1(1,"pzgssvx fails, info: %d\n",info); 372 #else 373 pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid, 374 &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info); 375 if (info) SETERRQ1(1,"pdgssvx fails, info: %d\n",info); 376 #endif 377 } 378 if (lu->StatPrint) { 379 ierr = PetscGetTime(&time[1]);CHKERRQ(ierr); /* to be removed */ 380 time0[1] = time[1] - time0[1]; 381 if (lu->StatPrint) PStatPrint(&lu->options, &stat, &lu->grid); /* Print the statistics. */ 382 } 383 PStatFree(&stat); 384 385 if (lu->MatInputMode == GLOBAL && size > 1){ 386 ierr = MatDestroy(A_seq);CHKERRQ(ierr); 387 } 388 389 if (lu->StatPrint) { 390 ierr = MPI_Reduce(time0,time_max,2,MPI_DOUBLE,MPI_MAX,0,A->comm); 391 ierr = MPI_Reduce(time0,time_min,2,MPI_DOUBLE,MPI_MIN,0,A->comm); 392 ierr = MPI_Reduce(time0,time,2,MPI_DOUBLE,MPI_SUM,0,A->comm); 393 for (i=0; i<2; i++) time[i] = time[i]/size; /* average time */ 394 ierr = PetscPrintf(A->comm, " Time for mat conversion (max/min/avg): %g / %g / %g\n",time_max[0],time_min[0],time[0]); 395 ierr = PetscPrintf(A->comm, " Time for superlu_dist fact (max/min/avg): %g / %g / %g\n\n",time_max[1],time_min[1],time[1]); 396 } 397 (*F)->assembled = PETSC_TRUE; 398 lu->flg = SAME_NONZERO_PATTERN; 399 PetscFunctionReturn(0); 400 } 401 402 /* Note the Petsc r and c permutations are ignored */ 403 #undef __FUNCT__ 404 #define __FUNCT__ "MatLUFactorSymbolic_MPIAIJ_SuperLU_DIST" 405 int MatLUFactorSymbolic_MPIAIJ_SuperLU_DIST(Mat A,IS r,IS c,MatFactorInfo *info,Mat *F) 406 { 407 Mat B; 408 Mat_MPIAIJ_SuperLU_DIST *lu; 409 int ierr,M=A->M,N=A->N,size; 410 superlu_options_t options; 411 char buff[32]; 412 PetscTruth flg; 413 char *ptype[] = {"MMD_AT_PLUS_A","NATURAL","MMD_ATA","COLAMD"}; 414 char *prtype[] = {"LargeDiag","NATURAL"}; 415 PetscFunctionBegin; 416 417 /* Create the factorization matrix */ 418 ierr = MatCreate(A->comm,A->m,A->n,M,N,&B);CHKERRQ(ierr); 419 ierr = MatSetType(B,MATSUPERLU_DIST);CHKERRQ(ierr); 420 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL); 421 ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 422 423 B->ops->lufactornumeric = MatLUFactorNumeric_MPIAIJ_SuperLU_DIST; 424 B->ops->solve = MatSolve_MPIAIJ_SuperLU_DIST; 425 B->factor = FACTOR_LU; 426 427 lu = (Mat_MPIAIJ_SuperLU_DIST*)(B->spptr); 428 429 /* Set the input options */ 430 set_default_options(&options); 431 lu->MatInputMode = GLOBAL; 432 ierr = MPI_Comm_dup(A->comm,&(lu->comm_superlu));CHKERRQ(ierr); 433 434 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 435 lu->nprow = size/2; /* Default process rows. */ 436 if (lu->nprow == 0) lu->nprow = 1; 437 lu->npcol = size/lu->nprow; /* Default process columns. */ 438 439 ierr = PetscOptionsBegin(A->comm,A->prefix,"SuperLU_Dist Options","Mat");CHKERRQ(ierr); 440 441 ierr = PetscOptionsInt("-mat_superlu_dist_r","Number rows in processor partition","None",lu->nprow,&lu->nprow,PETSC_NULL);CHKERRQ(ierr); 442 ierr = PetscOptionsInt("-mat_superlu_dist_c","Number columns in processor partition","None",lu->npcol,&lu->npcol,PETSC_NULL);CHKERRQ(ierr); 443 if (size != lu->nprow * lu->npcol) SETERRQ(1,"Number of processes should be equal to nprow*npcol"); 444 445 ierr = PetscOptionsInt("-mat_superlu_dist_matinput","Matrix input mode (0: GLOBAL; 1: DISTRIBUTED)","None",lu->MatInputMode,&lu->MatInputMode,PETSC_NULL);CHKERRQ(ierr); 446 if(lu->MatInputMode == DISTRIBUTED && size == 1) lu->MatInputMode = GLOBAL; 447 448 ierr = PetscOptionsLogical("-mat_superlu_dist_equil","Equilibrate matrix","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr); 449 if (!flg) { 450 options.Equil = NO; 451 } 452 453 ierr = PetscOptionsEList("-mat_superlu_dist_rowperm","Row permutation","None",prtype,2,prtype[0],buff,32,&flg);CHKERRQ(ierr); 454 while (flg) { 455 ierr = PetscStrcmp(buff,"LargeDiag",&flg);CHKERRQ(ierr); 456 if (flg) { 457 options.RowPerm = LargeDiag; 458 break; 459 } 460 ierr = PetscStrcmp(buff,"NATURAL",&flg);CHKERRQ(ierr); 461 if (flg) { 462 options.RowPerm = NOROWPERM; 463 break; 464 } 465 SETERRQ1(1,"Unknown row permutation %s",buff); 466 } 467 468 ierr = PetscOptionsEList("-mat_superlu_dist_colperm","Column permutation","None",ptype,4,ptype[0],buff,32,&flg);CHKERRQ(ierr); 469 while (flg) { 470 ierr = PetscStrcmp(buff,"MMD_AT_PLUS_A",&flg);CHKERRQ(ierr); 471 if (flg) { 472 options.ColPerm = MMD_AT_PLUS_A; 473 break; 474 } 475 ierr = PetscStrcmp(buff,"NATURAL",&flg);CHKERRQ(ierr); 476 if (flg) { 477 options.ColPerm = NATURAL; 478 break; 479 } 480 ierr = PetscStrcmp(buff,"MMD_ATA",&flg);CHKERRQ(ierr); 481 if (flg) { 482 options.ColPerm = MMD_ATA; 483 break; 484 } 485 ierr = PetscStrcmp(buff,"COLAMD",&flg);CHKERRQ(ierr); 486 if (flg) { 487 options.ColPerm = COLAMD; 488 break; 489 } 490 SETERRQ1(1,"Unknown column permutation %s",buff); 491 } 492 493 ierr = PetscOptionsLogical("-mat_superlu_dist_replacetinypivot","Replace tiny pivots","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr); 494 if (!flg) { 495 options.ReplaceTinyPivot = NO; 496 } 497 498 options.IterRefine = NOREFINE; 499 ierr = PetscOptionsLogical("-mat_superlu_dist_iterrefine","Use iterative refinement","None",PETSC_FALSE,&flg,0);CHKERRQ(ierr); 500 if (flg) { 501 options.IterRefine = DOUBLE; 502 } 503 504 if (PetscLogPrintInfo) { 505 lu->StatPrint = (int)PETSC_TRUE; 506 } else { 507 lu->StatPrint = (int)PETSC_FALSE; 508 } 509 ierr = PetscOptionsLogical("-mat_superlu_dist_statprint","Print factorization information","None", 510 (PetscTruth)lu->StatPrint,(PetscTruth*)&lu->StatPrint,0);CHKERRQ(ierr); 511 PetscOptionsEnd(); 512 513 /* Initialize the SuperLU process grid. */ 514 superlu_gridinit(lu->comm_superlu, lu->nprow, lu->npcol, &lu->grid); 515 516 /* Initialize ScalePermstruct and LUstruct. */ 517 ScalePermstructInit(M, N, &lu->ScalePermstruct); 518 LUstructInit(M, N, &lu->LUstruct); 519 520 lu->options = options; 521 lu->flg = DIFFERENT_NONZERO_PATTERN; 522 lu->CleanUpSuperLU_Dist = PETSC_TRUE; 523 *F = B; 524 PetscFunctionReturn(0); 525 } 526 527 #undef __FUNCT__ 528 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ_SuperLU_DIST" 529 int MatAssemblyEnd_MPIAIJ_SuperLU_DIST(Mat A,MatAssemblyType mode) { 530 int ierr; 531 Mat_MPIAIJ_SuperLU_DIST *lu=(Mat_MPIAIJ_SuperLU_DIST*)(A->spptr); 532 533 PetscFunctionBegin; 534 ierr = (*lu->MatAssemblyEnd)(A,mode);CHKERRQ(ierr); 535 lu->MatLUFactorSymbolic = A->ops->lufactorsymbolic; 536 A->ops->lufactorsymbolic = MatLUFactorSymbolic_MPIAIJ_SuperLU_DIST; 537 PetscFunctionReturn(0); 538 } 539 540 #undef __FUNCT__ 541 #define __FUNCT__ "MatMPIAIJFactorInfo_SuperLu" 542 int MatMPIAIJFactorInfo_SuperLu(Mat A,PetscViewer viewer) 543 { 544 Mat_MPIAIJ_SuperLU_DIST *lu= (Mat_MPIAIJ_SuperLU_DIST*)A->spptr; 545 superlu_options_t options; 546 int ierr; 547 char *colperm; 548 549 PetscFunctionBegin; 550 /* check if matrix is superlu_dist type */ 551 if (A->ops->solve != MatSolve_MPIAIJ_SuperLU_DIST) PetscFunctionReturn(0); 552 553 options = lu->options; 554 ierr = PetscViewerASCIIPrintf(viewer,"SuperLU_DIST run parameters:\n");CHKERRQ(ierr); 555 ierr = PetscViewerASCIIPrintf(viewer," Equilibrate matrix %s \n",(options.Equil != NO) ? "true": "false");CHKERRQ(ierr); 556 ierr = PetscViewerASCIIPrintf(viewer," Replace tiny pivots %s \n",(options.ReplaceTinyPivot != NO) ? "true": "false");CHKERRQ(ierr); 557 ierr = PetscViewerASCIIPrintf(viewer," Use iterative refinement %s \n",(options.IterRefine == DOUBLE) ? "true": "false");CHKERRQ(ierr); 558 ierr = PetscViewerASCIIPrintf(viewer," Processors in row %d col partition %d \n",lu->nprow,lu->npcol);CHKERRQ(ierr); 559 ierr = PetscViewerASCIIPrintf(viewer," Row permutation %s \n",(options.RowPerm == NOROWPERM) ? "NATURAL": "LargeDiag");CHKERRQ(ierr); 560 if (options.ColPerm == NATURAL) { 561 colperm = "NATURAL"; 562 } else if (options.ColPerm == MMD_AT_PLUS_A) { 563 colperm = "MMD_AT_PLUS_A"; 564 } else if (options.ColPerm == MMD_ATA) { 565 colperm = "MMD_ATA"; 566 } else if (options.ColPerm == COLAMD) { 567 colperm = "COLAMD"; 568 } else { 569 SETERRQ(1,"Unknown column permutation"); 570 } 571 ierr = PetscViewerASCIIPrintf(viewer," Column permutation %s \n",colperm);CHKERRQ(ierr); 572 PetscFunctionReturn(0); 573 } 574 575 #undef __FUNCT__ 576 #define __FUNCT__ "MatView_MPIAIJ_Spooles_DIST" 577 int MatView_MPIAIJ_SuperLU_DIST(Mat A,PetscViewer viewer) 578 { 579 int ierr; 580 PetscTruth isascii; 581 PetscViewerFormat format; 582 Mat_MPIAIJ_SuperLU_DIST *lu=(Mat_MPIAIJ_SuperLU_DIST*)(A->spptr); 583 584 PetscFunctionBegin; 585 ierr = (*lu->MatView)(A,viewer);CHKERRQ(ierr); 586 587 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 588 if (isascii) { 589 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 590 if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 591 ierr = MatMPIAIJFactorInfo_SuperLu(A,viewer);CHKERRQ(ierr); 592 } 593 } 594 PetscFunctionReturn(0); 595 } 596 597 598 EXTERN_C_BEGIN 599 #undef __FUNCT__ 600 #define __FUNCT__ "MatConvert_Base_SuperLU_DIST" 601 int MatConvert_Base_SuperLU_DIST(Mat A,MatType type,Mat *newmat) { 602 /* This routine is only called to convert to MATSUPERLU_DIST */ 603 /* from MATSEQAIJ if A has a single process communicator */ 604 /* or MATMPIAIJ otherwise, so we will ignore 'MatType type'. */ 605 int ierr,size; 606 MPI_Comm comm; 607 Mat B=*newmat; 608 Mat_MPIAIJ_SuperLU_DIST *lu; 609 610 PetscFunctionBegin; 611 if (B != A) { 612 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 613 } 614 615 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 616 ierr = PetscNew(Mat_MPIAIJ_SuperLU_DIST,&lu);CHKERRQ(ierr); 617 618 lu->MatView = A->ops->view; 619 lu->MatAssemblyEnd = A->ops->assemblyend; 620 lu->MatLUFactorSymbolic = A->ops->lufactorsymbolic; 621 lu->MatDestroy = A->ops->destroy; 622 lu->CleanUpSuperLU_Dist = PETSC_FALSE; 623 624 B->spptr = (void*)lu; 625 B->ops->view = MatView_MPIAIJ_SuperLU_DIST; 626 B->ops->assemblyend = MatAssemblyEnd_MPIAIJ_SuperLU_DIST; 627 B->ops->lufactorsymbolic = MatLUFactorSymbolic_MPIAIJ_SuperLU_DIST; 628 B->ops->destroy = MatDestroy_MPIAIJ_SuperLU_DIST; 629 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);CHKERRQ(ierr); 630 if (size == 1) { 631 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqaij_superlu_dist_C", 632 "MatConvert_Base_SuperLU_DIST",MatConvert_Base_SuperLU_DIST);CHKERRQ(ierr); 633 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_superlu_dist_seqaij_C", 634 "MatConvert_SuperLU_DIST_Base",MatConvert_SuperLU_DIST_Base);CHKERRQ(ierr); 635 } else { 636 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_superlu_dist_C", 637 "MatConvert_Base_SuperLU_DIST",MatConvert_Base_SuperLU_DIST);CHKERRQ(ierr); 638 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_superlu_dist_mpiaij_C", 639 "MatConvert_SuperLU_DIST_Base",MatConvert_SuperLU_DIST_Base);CHKERRQ(ierr); 640 } 641 PetscLogInfo(0,"Using SuperLU_DIST for SeqAIJ LU factorization and solves."); 642 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSUPERLU_DIST);CHKERRQ(ierr); 643 *newmat = B; 644 PetscFunctionReturn(0); 645 } 646 EXTERN_C_END 647 648 /*MC 649 MATSUPERLU_DIST - a matrix type providing direct solvers (LU) for parallel matrices 650 via the external package SuperLU_DIST. 651 652 If SuperLU_DIST is installed (see the manual for 653 instructions on how to declare the existence of external packages), 654 a matrix type can be constructed which invokes SuperLU_DIST solvers. 655 After calling MatCreate(...,A), simply call MatSetType(A,MATSUPERLU_DIST). 656 This matrix type is only supported for double precision real. 657 658 This matrix inherits from MATSEQAIJ when constructed with a single process communicator, 659 and from MATMPIAIJ otherwise. As a result, for single process communicators, 660 MatSeqAIJSetPreallocation is supported, and similarly MatMPISBAIJSetPreallocation is supported 661 for communicators controlling multiple processes. It is recommended that you call both of 662 the above preallocation routines for simplicity. 663 664 Options Database Keys: 665 + -mat_type superlu_dist 666 . -mat_superlu_dist_r <n> - number of rows in processor partition 667 . -mat_superlu_dist_c <n> - number of columns in processor partition 668 . -mat_superlu_dist_matinput <0,1> - matrix input mode; 0=global, 1=distributed 669 . -mat_superlu_dist_equil - equilibrate the matrix 670 . -mat_superlu_dist_rowperm <LargeDiag,NATURAL> - row permutation 671 . -mat_superlu_dist_colperm <MMD_AT_PLUS_A,MMD_ATA,COLAMD,NATURAL> - column permutation 672 . -mat_superlu_dist_replacetinypivot - replace tiny pivots 673 . -mat_superlu_dist_iterrefine - use iterative refinement 674 - -mat_superlu_dist_statprint - print factorization information 675 676 Level: beginner 677 678 .seealso: PCLU 679 M*/ 680 681 EXTERN_C_BEGIN 682 #undef __FUNCT__ 683 #define __FUNCT__ "MatCreate_MPIAIJ_SuperLU_DIST" 684 int MatCreate_MPIAIJ_SuperLU_DIST(Mat A) { 685 int ierr,size; 686 MPI_Comm comm; 687 688 PetscFunctionBegin; 689 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 690 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);CHKERRQ(ierr); 691 if (size == 1) { 692 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 693 } else { 694 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 695 } 696 ierr = MatConvert_Base_SuperLU_DIST(A,MATSUPERLU_DIST,&A);CHKERRQ(ierr); 697 PetscFunctionReturn(0); 698 } 699 EXTERN_C_END 700 701 EXTERN_C_BEGIN 702 #undef __FUNCT__ 703 #define __FUNCT__ "MatLoad_MPIAIJ_SuperLU_DIST" 704 int MatLoad_MPIAIJ_SuperLU_DIST(PetscViewer viewer,MatType type,Mat *A) { 705 int ierr,size,(*r)(PetscViewer,MatType,Mat*); 706 MPI_Comm comm; 707 708 PetscFunctionBegin; 709 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 710 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 711 if (size == 1) { 712 ierr = PetscFListFind(comm,MatLoadList,MATSEQAIJ,(void(**)(void))&r);CHKERRQ(ierr); 713 } else { 714 ierr = PetscFListFind(comm,MatLoadList,MATMPIAIJ,(void(**)(void))&r);CHKERRQ(ierr); 715 } 716 ierr = (*r)(viewer,type,A);CHKERRQ(ierr); 717 PetscFunctionReturn(0); 718 } 719 EXTERN_C_END 720