1 /*$Id: superlu_DIST.c,v 1.10 2001/08/15 15:56:50 bsmith Exp $*/ 2 /* 3 Provides an interface to the SuperLU_DIST_2.0 sparse solver 4 */ 5 6 #include "src/mat/impls/aij/seq/aij.h" 7 #include "src/mat/impls/aij/mpi/mpiaij.h" 8 #if defined(PETSC_HAVE_STDLIB_H) /* This is to get arround weird problem with SuperLU on cray */ 9 #include "stdlib.h" 10 #endif 11 12 EXTERN_C_BEGIN 13 #if defined(PETSC_USE_COMPLEX) 14 #include "superlu_zdefs.h" 15 #else 16 #include "superlu_ddefs.h" 17 #endif 18 EXTERN_C_END 19 20 typedef enum { GLOBAL,DISTRIBUTED 21 } SuperLU_MatInputMode; 22 23 typedef struct { 24 int_t nprow,npcol,*row,*col; 25 gridinfo_t grid; 26 superlu_options_t options; 27 SuperMatrix A_sup; 28 ScalePermstruct_t ScalePermstruct; 29 LUstruct_t LUstruct; 30 int StatPrint; 31 int MatInputMode; 32 SOLVEstruct_t SOLVEstruct; 33 MatStructure flg; 34 MPI_Comm comm_superlu; 35 #if defined(PETSC_USE_COMPLEX) 36 doublecomplex *val; 37 #else 38 double *val; 39 #endif 40 41 MatType basetype; 42 /* A few function pointers for inheritance */ 43 int (*MatDuplicate)(Mat,MatDuplicateOption,Mat*); 44 int (*MatView)(Mat,PetscViewer); 45 int (*MatAssemblyEnd)(Mat,MatAssemblyType); 46 int (*MatLUFactorSymbolic)(Mat,IS,IS,MatFactorInfo*,Mat*); 47 int (*MatDestroy)(Mat); 48 49 /* Flag to clean up (non-global) SuperLU objects during Destroy */ 50 PetscTruth CleanUpSuperLU_Dist; 51 } Mat_SuperLU_DIST; 52 53 EXTERN int MatDuplicate_SuperLU_DIST(Mat,MatDuplicateOption,Mat*); 54 55 EXTERN_C_BEGIN 56 #undef __FUNCT__ 57 #define __FUNCT__ "MatConvert_SuperLU_DIST_Base" 58 int MatConvert_SuperLU_DIST_Base(Mat A,MatType type,Mat *newmat) { 59 /* This routine is only called to convert an unfactored PETSc-SuperLU_DIST matrix */ 60 /* to its base PETSc type, so we will ignore 'MatType type'. */ 61 int ierr; 62 Mat B=*newmat; 63 Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST *)A->spptr; 64 65 PetscFunctionBegin; 66 if (B != A) { 67 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 68 } 69 /* Reset the original function pointers */ 70 B->ops->duplicate = lu->MatDuplicate; 71 B->ops->view = lu->MatView; 72 B->ops->assemblyend = lu->MatAssemblyEnd; 73 B->ops->lufactorsymbolic = lu->MatLUFactorSymbolic; 74 B->ops->destroy = lu->MatDestroy; 75 76 ierr = PetscObjectChangeTypeName((PetscObject)B,lu->basetype);CHKERRQ(ierr); 77 ierr = PetscFree(lu);CHKERRQ(ierr); 78 79 *newmat = B; 80 PetscFunctionReturn(0); 81 } 82 EXTERN_C_END 83 84 #undef __FUNCT__ 85 #define __FUNCT__ "MatDestroy_SuperLU_DIST" 86 int MatDestroy_SuperLU_DIST(Mat A) 87 { 88 int ierr; 89 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr; 90 91 PetscFunctionBegin; 92 if (lu->CleanUpSuperLU_Dist) { 93 /* Deallocate SuperLU_DIST storage */ 94 if (lu->MatInputMode == GLOBAL) { 95 Destroy_CompCol_Matrix_dist(&lu->A_sup); 96 } else { 97 Destroy_CompRowLoc_Matrix_dist(&lu->A_sup); 98 if ( lu->options.SolveInitialized ) { 99 #if defined(PETSC_USE_COMPLEX) 100 zSolveFinalize(&lu->options, &lu->SOLVEstruct); 101 #else 102 dSolveFinalize(&lu->options, &lu->SOLVEstruct); 103 #endif 104 } 105 } 106 Destroy_LU(A->N, &lu->grid, &lu->LUstruct); 107 ScalePermstructFree(&lu->ScalePermstruct); 108 LUstructFree(&lu->LUstruct); 109 110 /* Release the SuperLU_DIST process grid. */ 111 superlu_gridexit(&lu->grid); 112 113 ierr = MPI_Comm_free(&(lu->comm_superlu));CHKERRQ(ierr); 114 } 115 116 ierr = MatConvert_SuperLU_DIST_Base(A,lu->basetype,&A);CHKERRQ(ierr); 117 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 118 119 PetscFunctionReturn(0); 120 } 121 122 #undef __FUNCT__ 123 #define __FUNCT__ "MatSolve_SuperLU_DIST" 124 int MatSolve_SuperLU_DIST(Mat A,Vec b_mpi,Vec x) 125 { 126 Mat_MPIAIJ *aa = (Mat_MPIAIJ*)A->data; 127 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr; 128 int ierr, size=aa->size; 129 int m=A->M, N=A->N; 130 SuperLUStat_t stat; 131 double berr[1]; 132 PetscScalar *bptr; 133 int info, nrhs=1; 134 Vec x_seq; 135 IS iden; 136 VecScatter scat; 137 PetscLogDouble time0,time,time_min,time_max; 138 139 PetscFunctionBegin; 140 if (size > 1) { 141 if (lu->MatInputMode == GLOBAL) { /* global mat input, convert b to x_seq */ 142 ierr = VecCreateSeq(PETSC_COMM_SELF,N,&x_seq);CHKERRQ(ierr); 143 ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iden);CHKERRQ(ierr); 144 ierr = VecScatterCreate(b_mpi,iden,x_seq,iden,&scat);CHKERRQ(ierr); 145 ierr = ISDestroy(iden);CHKERRQ(ierr); 146 147 ierr = VecScatterBegin(b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat);CHKERRQ(ierr); 148 ierr = VecScatterEnd(b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat);CHKERRQ(ierr); 149 ierr = VecGetArray(x_seq,&bptr);CHKERRQ(ierr); 150 } else { /* distributed mat input */ 151 ierr = VecCopy(b_mpi,x);CHKERRQ(ierr); 152 ierr = VecGetArray(x,&bptr);CHKERRQ(ierr); 153 } 154 } else { /* size == 1 */ 155 ierr = VecCopy(b_mpi,x);CHKERRQ(ierr); 156 ierr = VecGetArray(x,&bptr);CHKERRQ(ierr); 157 } 158 159 lu->options.Fact = FACTORED; /* The factored form of A is supplied. Local option used by this func. only.*/ 160 161 PStatInit(&stat); /* Initialize the statistics variables. */ 162 if (lu->StatPrint) { 163 ierr = MPI_Barrier(A->comm);CHKERRQ(ierr); /* to be removed */ 164 ierr = PetscGetTime(&time0);CHKERRQ(ierr); /* to be removed */ 165 } 166 if (lu->MatInputMode == GLOBAL) { 167 #if defined(PETSC_USE_COMPLEX) 168 pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,(doublecomplex*)bptr, m, nrhs, 169 &lu->grid, &lu->LUstruct, berr, &stat, &info); 170 #else 171 pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,bptr, m, nrhs, 172 &lu->grid, &lu->LUstruct, berr, &stat, &info); 173 #endif 174 } else { /* distributed mat input */ 175 #if defined(PETSC_USE_COMPLEX) 176 pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, (doublecomplex*)bptr, A->M, nrhs, &lu->grid, 177 &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info); 178 if (info) SETERRQ1(1,"pzgssvx fails, info: %d\n",info); 179 #else 180 pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, bptr, A->M, nrhs, &lu->grid, 181 &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info); 182 if (info) SETERRQ1(1,"pdgssvx fails, info: %d\n",info); 183 #endif 184 } 185 if (lu->StatPrint) { 186 ierr = PetscGetTime(&time);CHKERRQ(ierr); /* to be removed */ 187 PStatPrint(&lu->options, &stat, &lu->grid); /* Print the statistics. */ 188 } 189 PStatFree(&stat); 190 191 if (size > 1) { 192 if (lu->MatInputMode == GLOBAL){ /* convert seq x to mpi x */ 193 ierr = VecRestoreArray(x_seq,&bptr);CHKERRQ(ierr); 194 ierr = VecScatterBegin(x_seq,x,INSERT_VALUES,SCATTER_REVERSE,scat);CHKERRQ(ierr); 195 ierr = VecScatterEnd(x_seq,x,INSERT_VALUES,SCATTER_REVERSE,scat);CHKERRQ(ierr); 196 ierr = VecScatterDestroy(scat);CHKERRQ(ierr); 197 ierr = VecDestroy(x_seq);CHKERRQ(ierr); 198 } else { 199 ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr); 200 } 201 } else { 202 ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr); 203 } 204 if (lu->StatPrint) { 205 time0 = time - time0; 206 ierr = MPI_Reduce(&time0,&time_max,1,MPI_DOUBLE,MPI_MAX,0,A->comm);CHKERRQ(ierr); 207 ierr = MPI_Reduce(&time0,&time_min,1,MPI_DOUBLE,MPI_MIN,0,A->comm);CHKERRQ(ierr); 208 ierr = MPI_Reduce(&time0,&time,1,MPI_DOUBLE,MPI_SUM,0,A->comm);CHKERRQ(ierr); 209 time = time/size; /* average time */ 210 ierr = PetscPrintf(A->comm, " Time for superlu_dist solve (max/min/avg): %g / %g / %g\n\n",time_max,time_min,time);CHKERRQ(ierr); 211 } 212 PetscFunctionReturn(0); 213 } 214 215 #undef __FUNCT__ 216 #define __FUNCT__ "MatLUFactorNumeric_SuperLU_DIST" 217 int MatLUFactorNumeric_SuperLU_DIST(Mat A,Mat *F) 218 { 219 Mat_MPIAIJ *fac = (Mat_MPIAIJ*)(*F)->data,*mat; 220 Mat *tseq,A_seq = PETSC_NULL; 221 Mat_SeqAIJ *aa,*bb; 222 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)(*F)->spptr; 223 int M=A->M,N=A->N,info,ierr,size=fac->size,i,*ai,*aj,*bi,*bj,nz,rstart,*garray, 224 m=A->m, irow,colA_start,j,jcol,jB,countA,countB,*bjj,*ajj; 225 SuperLUStat_t stat; 226 double *berr=0; 227 IS isrow; 228 PetscLogDouble time0[2],time[2],time_min[2],time_max[2]; 229 #if defined(PETSC_USE_COMPLEX) 230 doublecomplex *av, *bv; 231 #else 232 double *av, *bv; 233 #endif 234 235 PetscFunctionBegin; 236 if (lu->StatPrint) { 237 ierr = MPI_Barrier(A->comm);CHKERRQ(ierr); 238 ierr = PetscGetTime(&time0[0]);CHKERRQ(ierr); 239 } 240 241 if (lu->MatInputMode == GLOBAL) { /* global mat input */ 242 if (size > 1) { /* convert mpi A to seq mat A */ 243 ierr = ISCreateStride(PETSC_COMM_SELF,M,0,1,&isrow); CHKERRQ(ierr); 244 ierr = MatGetSubMatrices(A,1,&isrow,&isrow,MAT_INITIAL_MATRIX,&tseq); CHKERRQ(ierr); 245 ierr = ISDestroy(isrow);CHKERRQ(ierr); 246 247 A_seq = *tseq; 248 ierr = PetscFree(tseq);CHKERRQ(ierr); 249 aa = (Mat_SeqAIJ*)A_seq->data; 250 } else { 251 aa = (Mat_SeqAIJ*)A->data; 252 } 253 254 /* Allocate storage, then convert Petsc NR matrix to SuperLU_DIST NC */ 255 if (lu->flg == DIFFERENT_NONZERO_PATTERN) {/* first numeric factorization */ 256 #if defined(PETSC_USE_COMPLEX) 257 zallocateA_dist(N, aa->nz, &lu->val, &lu->col, &lu->row); 258 #else 259 dallocateA_dist(N, aa->nz, &lu->val, &lu->col, &lu->row); 260 #endif 261 } else { /* successive numeric factorization, sparsity pattern is reused. */ 262 Destroy_CompCol_Matrix_dist(&lu->A_sup); 263 Destroy_LU(N, &lu->grid, &lu->LUstruct); 264 lu->options.Fact = SamePattern; 265 } 266 #if defined(PETSC_USE_COMPLEX) 267 zCompRow_to_CompCol_dist(M,N,aa->nz,(doublecomplex*)aa->a,aa->j,aa->i,&lu->val,&lu->col, &lu->row); 268 #else 269 dCompRow_to_CompCol_dist(M,N,aa->nz,aa->a,aa->j,aa->i,&lu->val, &lu->col, &lu->row); 270 #endif 271 272 /* Create compressed column matrix A_sup. */ 273 #if defined(PETSC_USE_COMPLEX) 274 zCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_Z, SLU_GE); 275 #else 276 dCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_D, SLU_GE); 277 #endif 278 } else { /* distributed mat input */ 279 mat = (Mat_MPIAIJ*)A->data; 280 aa=(Mat_SeqAIJ*)(mat->A)->data; 281 bb=(Mat_SeqAIJ*)(mat->B)->data; 282 ai=aa->i; aj=aa->j; 283 bi=bb->i; bj=bb->j; 284 #if defined(PETSC_USE_COMPLEX) 285 av=(doublecomplex*)aa->a; 286 bv=(doublecomplex*)bb->a; 287 #else 288 av=aa->a; 289 bv=bb->a; 290 #endif 291 rstart = mat->rstart; 292 nz = aa->nz + bb->nz; 293 garray = mat->garray; 294 rstart = mat->rstart; 295 296 if (lu->flg == DIFFERENT_NONZERO_PATTERN) {/* first numeric factorization */ 297 #if defined(PETSC_USE_COMPLEX) 298 zallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row); 299 #else 300 dallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row); 301 #endif 302 } else { /* successive numeric factorization, sparsity pattern and perm_c are reused. */ 303 /* Destroy_CompRowLoc_Matrix_dist(&lu->A_sup); */ /* crash! */ 304 Destroy_LU(N, &lu->grid, &lu->LUstruct); 305 lu->options.Fact = SamePattern; 306 } 307 nz = 0; jB = 0; irow = mat->rstart; 308 for ( i=0; i<m; i++ ) { 309 lu->row[i] = nz; 310 countA = ai[i+1] - ai[i]; 311 countB = bi[i+1] - bi[i]; 312 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 313 bjj = bj + bi[i]; 314 315 /* B part, smaller col index */ 316 colA_start = mat->rstart + ajj[0]; /* the smallest global col index of A */ 317 for (j=0; j<countB; j++){ 318 jcol = garray[bjj[j]]; 319 if (jcol > colA_start) { 320 jB = j; 321 break; 322 } 323 lu->col[nz] = jcol; 324 lu->val[nz++] = *bv++; 325 if (j==countB-1) jB = countB; 326 } 327 328 /* A part */ 329 for (j=0; j<countA; j++){ 330 lu->col[nz] = mat->rstart + ajj[j]; 331 lu->val[nz++] = *av++; 332 } 333 334 /* B part, larger col index */ 335 for (j=jB; j<countB; j++){ 336 lu->col[nz] = garray[bjj[j]]; 337 lu->val[nz++] = *bv++; 338 } 339 } 340 lu->row[m] = nz; 341 #if defined(PETSC_USE_COMPLEX) 342 zCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart, 343 lu->val, lu->col, lu->row, SLU_NR_loc, SLU_Z, SLU_GE); 344 #else 345 dCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart, 346 lu->val, lu->col, lu->row, SLU_NR_loc, SLU_D, SLU_GE); 347 #endif 348 } 349 if (lu->StatPrint) { 350 ierr = PetscGetTime(&time[0]);CHKERRQ(ierr); 351 time0[0] = time[0] - time0[0]; 352 } 353 354 /* Factor the matrix. */ 355 PStatInit(&stat); /* Initialize the statistics variables. */ 356 357 if (lu->StatPrint) { 358 ierr = MPI_Barrier(A->comm);CHKERRQ(ierr); 359 ierr = PetscGetTime(&time0[1]);CHKERRQ(ierr); 360 } 361 362 if (lu->MatInputMode == GLOBAL) { /* global mat input */ 363 #if defined(PETSC_USE_COMPLEX) 364 pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, 365 &lu->grid, &lu->LUstruct, berr, &stat, &info); 366 #else 367 pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, 368 &lu->grid, &lu->LUstruct, berr, &stat, &info); 369 #endif 370 } else { /* distributed mat input */ 371 #if defined(PETSC_USE_COMPLEX) 372 pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid, 373 &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info); 374 if (info) SETERRQ1(1,"pzgssvx fails, info: %d\n",info); 375 #else 376 pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid, 377 &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info); 378 if (info) SETERRQ1(1,"pdgssvx fails, info: %d\n",info); 379 #endif 380 } 381 if (lu->StatPrint) { 382 ierr = PetscGetTime(&time[1]);CHKERRQ(ierr); /* to be removed */ 383 time0[1] = time[1] - time0[1]; 384 if (lu->StatPrint) PStatPrint(&lu->options, &stat, &lu->grid); /* Print the statistics. */ 385 } 386 PStatFree(&stat); 387 388 if (lu->MatInputMode == GLOBAL && size > 1){ 389 ierr = MatDestroy(A_seq);CHKERRQ(ierr); 390 } 391 392 if (lu->StatPrint) { 393 ierr = MPI_Reduce(time0,time_max,2,MPI_DOUBLE,MPI_MAX,0,A->comm); 394 ierr = MPI_Reduce(time0,time_min,2,MPI_DOUBLE,MPI_MIN,0,A->comm); 395 ierr = MPI_Reduce(time0,time,2,MPI_DOUBLE,MPI_SUM,0,A->comm); 396 for (i=0; i<2; i++) time[i] = time[i]/size; /* average time */ 397 ierr = PetscPrintf(A->comm, " Time for mat conversion (max/min/avg): %g / %g / %g\n",time_max[0],time_min[0],time[0]); 398 ierr = PetscPrintf(A->comm, " Time for superlu_dist fact (max/min/avg): %g / %g / %g\n\n",time_max[1],time_min[1],time[1]); 399 } 400 (*F)->assembled = PETSC_TRUE; 401 lu->flg = SAME_NONZERO_PATTERN; 402 PetscFunctionReturn(0); 403 } 404 405 /* Note the Petsc r and c permutations are ignored */ 406 #undef __FUNCT__ 407 #define __FUNCT__ "MatLUFactorSymbolic_SuperLU_DIST" 408 int MatLUFactorSymbolic_SuperLU_DIST(Mat A,IS r,IS c,MatFactorInfo *info,Mat *F) 409 { 410 Mat B; 411 Mat_SuperLU_DIST *lu; 412 int ierr,M=A->M,N=A->N,size; 413 superlu_options_t options; 414 char buff[32]; 415 PetscTruth flg; 416 char *ptype[] = {"MMD_AT_PLUS_A","NATURAL","MMD_ATA","COLAMD"}; 417 char *prtype[] = {"LargeDiag","NATURAL"}; 418 419 PetscFunctionBegin; 420 /* Create the factorization matrix */ 421 ierr = MatCreate(A->comm,A->m,A->n,M,N,&B);CHKERRQ(ierr); 422 ierr = MatSetType(B,MATSUPERLU_DIST);CHKERRQ(ierr); 423 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL); 424 ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 425 426 B->ops->lufactornumeric = MatLUFactorNumeric_SuperLU_DIST; 427 B->ops->solve = MatSolve_SuperLU_DIST; 428 B->factor = FACTOR_LU; 429 430 lu = (Mat_SuperLU_DIST*)(B->spptr); 431 432 /* Set the input options */ 433 set_default_options(&options); 434 lu->MatInputMode = GLOBAL; 435 ierr = MPI_Comm_dup(A->comm,&(lu->comm_superlu));CHKERRQ(ierr); 436 437 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 438 lu->nprow = size/2; /* Default process rows. */ 439 if (lu->nprow == 0) lu->nprow = 1; 440 lu->npcol = size/lu->nprow; /* Default process columns. */ 441 442 ierr = PetscOptionsBegin(A->comm,A->prefix,"SuperLU_Dist Options","Mat");CHKERRQ(ierr); 443 444 ierr = PetscOptionsInt("-mat_superlu_dist_r","Number rows in processor partition","None",lu->nprow,&lu->nprow,PETSC_NULL);CHKERRQ(ierr); 445 ierr = PetscOptionsInt("-mat_superlu_dist_c","Number columns in processor partition","None",lu->npcol,&lu->npcol,PETSC_NULL);CHKERRQ(ierr); 446 if (size != lu->nprow * lu->npcol) SETERRQ(1,"Number of processes should be equal to nprow*npcol"); 447 448 ierr = PetscOptionsInt("-mat_superlu_dist_matinput","Matrix input mode (0: GLOBAL; 1: DISTRIBUTED)","None",lu->MatInputMode,&lu->MatInputMode,PETSC_NULL);CHKERRQ(ierr); 449 if(lu->MatInputMode == DISTRIBUTED && size == 1) lu->MatInputMode = GLOBAL; 450 451 ierr = PetscOptionsLogical("-mat_superlu_dist_equil","Equilibrate matrix","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr); 452 if (!flg) { 453 options.Equil = NO; 454 } 455 456 ierr = PetscOptionsEList("-mat_superlu_dist_rowperm","Row permutation","None",prtype,2,prtype[0],buff,32,&flg);CHKERRQ(ierr); 457 while (flg) { 458 ierr = PetscStrcmp(buff,"LargeDiag",&flg);CHKERRQ(ierr); 459 if (flg) { 460 options.RowPerm = LargeDiag; 461 break; 462 } 463 ierr = PetscStrcmp(buff,"NATURAL",&flg);CHKERRQ(ierr); 464 if (flg) { 465 options.RowPerm = NOROWPERM; 466 break; 467 } 468 SETERRQ1(1,"Unknown row permutation %s",buff); 469 } 470 471 ierr = PetscOptionsEList("-mat_superlu_dist_colperm","Column permutation","None",ptype,4,ptype[0],buff,32,&flg);CHKERRQ(ierr); 472 while (flg) { 473 ierr = PetscStrcmp(buff,"MMD_AT_PLUS_A",&flg);CHKERRQ(ierr); 474 if (flg) { 475 options.ColPerm = MMD_AT_PLUS_A; 476 break; 477 } 478 ierr = PetscStrcmp(buff,"NATURAL",&flg);CHKERRQ(ierr); 479 if (flg) { 480 options.ColPerm = NATURAL; 481 break; 482 } 483 ierr = PetscStrcmp(buff,"MMD_ATA",&flg);CHKERRQ(ierr); 484 if (flg) { 485 options.ColPerm = MMD_ATA; 486 break; 487 } 488 ierr = PetscStrcmp(buff,"COLAMD",&flg);CHKERRQ(ierr); 489 if (flg) { 490 options.ColPerm = COLAMD; 491 break; 492 } 493 SETERRQ1(1,"Unknown column permutation %s",buff); 494 } 495 496 ierr = PetscOptionsLogical("-mat_superlu_dist_replacetinypivot","Replace tiny pivots","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr); 497 if (!flg) { 498 options.ReplaceTinyPivot = NO; 499 } 500 501 options.IterRefine = NOREFINE; 502 ierr = PetscOptionsLogical("-mat_superlu_dist_iterrefine","Use iterative refinement","None",PETSC_FALSE,&flg,0);CHKERRQ(ierr); 503 if (flg) { 504 options.IterRefine = DOUBLE; 505 } 506 507 if (PetscLogPrintInfo) { 508 lu->StatPrint = (int)PETSC_TRUE; 509 } else { 510 lu->StatPrint = (int)PETSC_FALSE; 511 } 512 ierr = PetscOptionsLogical("-mat_superlu_dist_statprint","Print factorization information","None", 513 (PetscTruth)lu->StatPrint,(PetscTruth*)&lu->StatPrint,0);CHKERRQ(ierr); 514 PetscOptionsEnd(); 515 516 /* Initialize the SuperLU process grid. */ 517 superlu_gridinit(lu->comm_superlu, lu->nprow, lu->npcol, &lu->grid); 518 519 /* Initialize ScalePermstruct and LUstruct. */ 520 ScalePermstructInit(M, N, &lu->ScalePermstruct); 521 LUstructInit(M, N, &lu->LUstruct); 522 523 lu->options = options; 524 lu->flg = DIFFERENT_NONZERO_PATTERN; 525 lu->CleanUpSuperLU_Dist = PETSC_TRUE; 526 *F = B; 527 PetscFunctionReturn(0); 528 } 529 530 #undef __FUNCT__ 531 #define __FUNCT__ "MatAssemblyEnd_SuperLU_DIST" 532 int MatAssemblyEnd_SuperLU_DIST(Mat A,MatAssemblyType mode) { 533 int ierr; 534 Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST*)(A->spptr); 535 536 PetscFunctionBegin; 537 ierr = (*lu->MatAssemblyEnd)(A,mode);CHKERRQ(ierr); 538 lu->MatLUFactorSymbolic = A->ops->lufactorsymbolic; 539 A->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST; 540 PetscFunctionReturn(0); 541 } 542 543 #undef __FUNCT__ 544 #define __FUNCT__ "MatFactorInfo_SuperLU_DIST" 545 int MatFactorInfo_SuperLU_DIST(Mat A,PetscViewer viewer) 546 { 547 Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST*)A->spptr; 548 superlu_options_t options; 549 int ierr; 550 char *colperm; 551 552 PetscFunctionBegin; 553 /* check if matrix is superlu_dist type */ 554 if (A->ops->solve != MatSolve_SuperLU_DIST) PetscFunctionReturn(0); 555 556 options = lu->options; 557 ierr = PetscViewerASCIIPrintf(viewer,"SuperLU_DIST run parameters:\n");CHKERRQ(ierr); 558 ierr = PetscViewerASCIIPrintf(viewer," Equilibrate matrix %s \n",(options.Equil != NO) ? "true": "false");CHKERRQ(ierr); 559 ierr = PetscViewerASCIIPrintf(viewer," Replace tiny pivots %s \n",(options.ReplaceTinyPivot != NO) ? "true": "false");CHKERRQ(ierr); 560 ierr = PetscViewerASCIIPrintf(viewer," Use iterative refinement %s \n",(options.IterRefine == DOUBLE) ? "true": "false");CHKERRQ(ierr); 561 ierr = PetscViewerASCIIPrintf(viewer," Processors in row %d col partition %d \n",lu->nprow,lu->npcol);CHKERRQ(ierr); 562 ierr = PetscViewerASCIIPrintf(viewer," Row permutation %s \n",(options.RowPerm == NOROWPERM) ? "NATURAL": "LargeDiag");CHKERRQ(ierr); 563 if (options.ColPerm == NATURAL) { 564 colperm = "NATURAL"; 565 } else if (options.ColPerm == MMD_AT_PLUS_A) { 566 colperm = "MMD_AT_PLUS_A"; 567 } else if (options.ColPerm == MMD_ATA) { 568 colperm = "MMD_ATA"; 569 } else if (options.ColPerm == COLAMD) { 570 colperm = "COLAMD"; 571 } else { 572 SETERRQ(1,"Unknown column permutation"); 573 } 574 ierr = PetscViewerASCIIPrintf(viewer," Column permutation %s \n",colperm);CHKERRQ(ierr); 575 PetscFunctionReturn(0); 576 } 577 578 #undef __FUNCT__ 579 #define __FUNCT__ "MatView_SuperLU_DIST" 580 int MatView_SuperLU_DIST(Mat A,PetscViewer viewer) 581 { 582 int ierr; 583 PetscTruth isascii; 584 PetscViewerFormat format; 585 Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST*)(A->spptr); 586 587 PetscFunctionBegin; 588 ierr = (*lu->MatView)(A,viewer);CHKERRQ(ierr); 589 590 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 591 if (isascii) { 592 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 593 if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 594 ierr = MatFactorInfo_SuperLU_DIST(A,viewer);CHKERRQ(ierr); 595 } 596 } 597 PetscFunctionReturn(0); 598 } 599 600 601 EXTERN_C_BEGIN 602 #undef __FUNCT__ 603 #define __FUNCT__ "MatConvert_Base_SuperLU_DIST" 604 int MatConvert_Base_SuperLU_DIST(Mat A,MatType type,Mat *newmat) { 605 /* This routine is only called to convert to MATSUPERLU_DIST */ 606 /* from MATSEQAIJ if A has a single process communicator */ 607 /* or MATMPIAIJ otherwise, so we will ignore 'MatType type'. */ 608 int ierr,size; 609 MPI_Comm comm; 610 Mat B=*newmat; 611 Mat_SuperLU_DIST *lu; 612 613 PetscFunctionBegin; 614 if (B != A) { 615 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 616 } 617 618 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 619 ierr = PetscNew(Mat_SuperLU_DIST,&lu);CHKERRQ(ierr); 620 621 lu->MatDuplicate = A->ops->duplicate; 622 lu->MatView = A->ops->view; 623 lu->MatAssemblyEnd = A->ops->assemblyend; 624 lu->MatLUFactorSymbolic = A->ops->lufactorsymbolic; 625 lu->MatDestroy = A->ops->destroy; 626 lu->CleanUpSuperLU_Dist = PETSC_FALSE; 627 628 B->spptr = (void*)lu; 629 B->ops->duplicate = MatDuplicate_SuperLU_DIST; 630 B->ops->view = MatView_SuperLU_DIST; 631 B->ops->assemblyend = MatAssemblyEnd_SuperLU_DIST; 632 B->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST; 633 B->ops->destroy = MatDestroy_SuperLU_DIST; 634 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);CHKERRQ(ierr); 635 if (size == 1) { 636 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqaij_superlu_dist_C", 637 "MatConvert_Base_SuperLU_DIST",MatConvert_Base_SuperLU_DIST);CHKERRQ(ierr); 638 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_superlu_dist_seqaij_C", 639 "MatConvert_SuperLU_DIST_Base",MatConvert_SuperLU_DIST_Base);CHKERRQ(ierr); 640 } else { 641 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_superlu_dist_C", 642 "MatConvert_Base_SuperLU_DIST",MatConvert_Base_SuperLU_DIST);CHKERRQ(ierr); 643 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_superlu_dist_mpiaij_C", 644 "MatConvert_SuperLU_DIST_Base",MatConvert_SuperLU_DIST_Base);CHKERRQ(ierr); 645 } 646 PetscLogInfo(0,"Using SuperLU_DIST for SeqAIJ LU factorization and solves."); 647 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSUPERLU_DIST);CHKERRQ(ierr); 648 *newmat = B; 649 PetscFunctionReturn(0); 650 } 651 EXTERN_C_END 652 653 #undef __FUNCT__ 654 #define __FUNCT__ "MatDuplicate_SuperLU_DIST" 655 int MatDuplicate_SuperLU_DIST(Mat A, MatDuplicateOption op, Mat *M) { 656 int ierr; 657 PetscFunctionBegin; 658 ierr = (*A->ops->duplicate)(A,op,M);CHKERRQ(ierr); 659 ierr = MatConvert_Base_SuperLU_DIST(*M,MATSUPERLU_DIST,M);CHKERRQ(ierr); 660 PetscFunctionReturn(0); 661 } 662 663 /*MC 664 MATSUPERLU_DIST - a matrix type providing direct solvers (LU) for parallel matrices 665 via the external package SuperLU_DIST. 666 667 If SuperLU_DIST is installed (see the manual for 668 instructions on how to declare the existence of external packages), 669 a matrix type can be constructed which invokes SuperLU_DIST solvers. 670 After calling MatCreate(...,A), simply call MatSetType(A,MATSUPERLU_DIST). 671 This matrix type is only supported for double precision real. 672 673 This matrix inherits from MATSEQAIJ when constructed with a single process communicator, 674 and from MATMPIAIJ otherwise. As a result, for single process communicators, 675 MatSeqAIJSetPreallocation is supported, and similarly MatMPISBAIJSetPreallocation is supported 676 for communicators controlling multiple processes. It is recommended that you call both of 677 the above preallocation routines for simplicity. 678 679 Options Database Keys: 680 + -mat_type superlu_dist 681 . -mat_superlu_dist_r <n> - number of rows in processor partition 682 . -mat_superlu_dist_c <n> - number of columns in processor partition 683 . -mat_superlu_dist_matinput <0,1> - matrix input mode; 0=global, 1=distributed 684 . -mat_superlu_dist_equil - equilibrate the matrix 685 . -mat_superlu_dist_rowperm <LargeDiag,NATURAL> - row permutation 686 . -mat_superlu_dist_colperm <MMD_AT_PLUS_A,MMD_ATA,COLAMD,NATURAL> - column permutation 687 . -mat_superlu_dist_replacetinypivot - replace tiny pivots 688 . -mat_superlu_dist_iterrefine - use iterative refinement 689 - -mat_superlu_dist_statprint - print factorization information 690 691 Level: beginner 692 693 .seealso: PCLU 694 M*/ 695 696 EXTERN_C_BEGIN 697 #undef __FUNCT__ 698 #define __FUNCT__ "MatCreate_SuperLU_DIST" 699 int MatCreate_SuperLU_DIST(Mat A) { 700 int ierr,size; 701 702 PetscFunctionBegin; 703 /* Change type name before calling MatSetType to force proper construction of SeqAIJ or MPIAIJ */ 704 /* and SuperLU_DIST types */ 705 ierr = PetscObjectChangeTypeName((PetscObject)A,MATSUPERLU_DIST);CHKERRQ(ierr); 706 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);CHKERRQ(ierr); 707 if (size == 1) { 708 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 709 } else { 710 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 711 } 712 ierr = MatConvert_Base_SuperLU_DIST(A,MATSUPERLU_DIST,&A);CHKERRQ(ierr); 713 PetscFunctionReturn(0); 714 } 715 EXTERN_C_END 716 717 EXTERN_C_BEGIN 718 #undef __FUNCT__ 719 #define __FUNCT__ "MatLoad_SuperLU_DIST" 720 int MatLoad_SuperLU_DIST(PetscViewer viewer,MatType type,Mat *A) { 721 int ierr,size,(*r)(PetscViewer,MatType,Mat*); 722 MPI_Comm comm; 723 724 PetscFunctionBegin; 725 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 726 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 727 if (size == 1) { 728 ierr = PetscFListFind(comm,MatLoadList,MATSEQAIJ,(void(**)(void))&r);CHKERRQ(ierr); 729 } else { 730 ierr = PetscFListFind(comm,MatLoadList,MATMPIAIJ,(void(**)(void))&r);CHKERRQ(ierr); 731 } 732 ierr = (*r)(viewer,type,A);CHKERRQ(ierr); 733 PetscFunctionReturn(0); 734 } 735 EXTERN_C_END 736