1 #define PETSCMAT_DLL 2 3 /* 4 Provides an interface to the SuperLU_DIST_2.0 sparse solver 5 */ 6 7 #include "src/mat/impls/aij/seq/aij.h" 8 #include "src/mat/impls/aij/mpi/mpiaij.h" 9 #if defined(PETSC_HAVE_STDLIB_H) /* This is to get arround weird problem with SuperLU on cray */ 10 #include "stdlib.h" 11 #endif 12 13 EXTERN_C_BEGIN 14 #if defined(PETSC_USE_COMPLEX) 15 #include "superlu_zdefs.h" 16 #else 17 #include "superlu_ddefs.h" 18 #endif 19 EXTERN_C_END 20 21 typedef enum { GLOBAL,DISTRIBUTED 22 } SuperLU_MatInputMode; 23 24 typedef struct { 25 int_t nprow,npcol,*row,*col; 26 gridinfo_t grid; 27 superlu_options_t options; 28 SuperMatrix A_sup; 29 ScalePermstruct_t ScalePermstruct; 30 LUstruct_t LUstruct; 31 int StatPrint; 32 int MatInputMode; 33 SOLVEstruct_t SOLVEstruct; 34 MatStructure flg; 35 MPI_Comm comm_superlu; 36 #if defined(PETSC_USE_COMPLEX) 37 doublecomplex *val; 38 #else 39 double *val; 40 #endif 41 42 /* A few function pointers for inheritance */ 43 PetscErrorCode (*MatDuplicate)(Mat,MatDuplicateOption,Mat*); 44 PetscErrorCode (*MatView)(Mat,PetscViewer); 45 PetscErrorCode (*MatAssemblyEnd)(Mat,MatAssemblyType); 46 PetscErrorCode (*MatLUFactorSymbolic)(Mat,IS,IS,MatFactorInfo*,Mat*); 47 PetscErrorCode (*MatDestroy)(Mat); 48 49 /* Flag to clean up (non-global) SuperLU objects during Destroy */ 50 PetscTruth CleanUpSuperLU_Dist; 51 } Mat_SuperLU_DIST; 52 53 EXTERN PetscErrorCode MatDuplicate_SuperLU_DIST(Mat,MatDuplicateOption,Mat*); 54 55 EXTERN_C_BEGIN 56 #undef __FUNCT__ 57 #define __FUNCT__ "MatConvert_SuperLU_DIST_Base" 58 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SuperLU_DIST_Base(Mat A,MatType type,MatReuse reuse,Mat *newmat) 59 { 60 PetscErrorCode ierr; 61 Mat B=*newmat; 62 Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST *)A->spptr; 63 64 PetscFunctionBegin; 65 if (reuse == MAT_INITIAL_MATRIX) { 66 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 67 } 68 /* Reset the original function pointers */ 69 B->ops->duplicate = lu->MatDuplicate; 70 B->ops->view = lu->MatView; 71 B->ops->assemblyend = lu->MatAssemblyEnd; 72 B->ops->lufactorsymbolic = lu->MatLUFactorSymbolic; 73 B->ops->destroy = lu->MatDestroy; 74 75 ierr = PetscFree(lu);CHKERRQ(ierr); 76 77 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaij_superlu_dist_C","",PETSC_NULL);CHKERRQ(ierr); 78 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_superlu_dist_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 79 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_superlu_dist_C","",PETSC_NULL);CHKERRQ(ierr); 80 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_superlu_dist_mpiaij_C","",PETSC_NULL);CHKERRQ(ierr); 81 82 ierr = PetscObjectChangeTypeName((PetscObject)B,type);CHKERRQ(ierr); 83 *newmat = B; 84 PetscFunctionReturn(0); 85 } 86 EXTERN_C_END 87 88 #undef __FUNCT__ 89 #define __FUNCT__ "MatDestroy_SuperLU_DIST" 90 PetscErrorCode MatDestroy_SuperLU_DIST(Mat A) 91 { 92 PetscErrorCode ierr; 93 PetscMPIInt size; 94 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr; 95 96 PetscFunctionBegin; 97 if (lu->CleanUpSuperLU_Dist) { 98 /* Deallocate SuperLU_DIST storage */ 99 if (lu->MatInputMode == GLOBAL) { 100 Destroy_CompCol_Matrix_dist(&lu->A_sup); 101 } else { 102 Destroy_CompRowLoc_Matrix_dist(&lu->A_sup); 103 if ( lu->options.SolveInitialized ) { 104 #if defined(PETSC_USE_COMPLEX) 105 zSolveFinalize(&lu->options, &lu->SOLVEstruct); 106 #else 107 dSolveFinalize(&lu->options, &lu->SOLVEstruct); 108 #endif 109 } 110 } 111 Destroy_LU(A->N, &lu->grid, &lu->LUstruct); 112 ScalePermstructFree(&lu->ScalePermstruct); 113 LUstructFree(&lu->LUstruct); 114 115 /* Release the SuperLU_DIST process grid. */ 116 superlu_gridexit(&lu->grid); 117 118 ierr = MPI_Comm_free(&(lu->comm_superlu));CHKERRQ(ierr); 119 } 120 121 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 122 if (size == 1) { 123 ierr = MatConvert_SuperLU_DIST_Base(A,MATSEQAIJ,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr); 124 } else { 125 ierr = MatConvert_SuperLU_DIST_Base(A,MATMPIAIJ,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr); 126 } 127 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 128 129 PetscFunctionReturn(0); 130 } 131 132 #undef __FUNCT__ 133 #define __FUNCT__ "MatSolve_SuperLU_DIST" 134 PetscErrorCode MatSolve_SuperLU_DIST(Mat A,Vec b_mpi,Vec x) 135 { 136 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr; 137 PetscErrorCode ierr; 138 PetscMPIInt size; 139 PetscInt m=A->M, N=A->N; 140 SuperLUStat_t stat; 141 double berr[1]; 142 PetscScalar *bptr; 143 PetscInt info, nrhs=1; 144 Vec x_seq; 145 IS iden; 146 VecScatter scat; 147 148 PetscFunctionBegin; 149 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 150 if (size > 1) { 151 if (lu->MatInputMode == GLOBAL) { /* global mat input, convert b to x_seq */ 152 ierr = VecCreateSeq(PETSC_COMM_SELF,N,&x_seq);CHKERRQ(ierr); 153 ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iden);CHKERRQ(ierr); 154 ierr = VecScatterCreate(b_mpi,iden,x_seq,iden,&scat);CHKERRQ(ierr); 155 ierr = ISDestroy(iden);CHKERRQ(ierr); 156 157 ierr = VecScatterBegin(b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat);CHKERRQ(ierr); 158 ierr = VecScatterEnd(b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat);CHKERRQ(ierr); 159 ierr = VecGetArray(x_seq,&bptr);CHKERRQ(ierr); 160 } else { /* distributed mat input */ 161 ierr = VecCopy(b_mpi,x);CHKERRQ(ierr); 162 ierr = VecGetArray(x,&bptr);CHKERRQ(ierr); 163 } 164 } else { /* size == 1 */ 165 ierr = VecCopy(b_mpi,x);CHKERRQ(ierr); 166 ierr = VecGetArray(x,&bptr);CHKERRQ(ierr); 167 } 168 169 lu->options.Fact = FACTORED; /* The factored form of A is supplied. Local option used by this func. only.*/ 170 171 PStatInit(&stat); /* Initialize the statistics variables. */ 172 if (lu->MatInputMode == GLOBAL) { 173 #if defined(PETSC_USE_COMPLEX) 174 pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,(doublecomplex*)bptr, m, nrhs, 175 &lu->grid, &lu->LUstruct, berr, &stat, &info); 176 #else 177 pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,bptr, m, nrhs, 178 &lu->grid, &lu->LUstruct, berr, &stat, &info); 179 #endif 180 } else { /* distributed mat input */ 181 #if defined(PETSC_USE_COMPLEX) 182 pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, (doublecomplex*)bptr, A->M, nrhs, &lu->grid, 183 &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info); 184 if (info) SETERRQ1(PETSC_ERR_LIB,"pzgssvx fails, info: %d\n",info); 185 #else 186 pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, bptr, A->M, nrhs, &lu->grid, 187 &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info); 188 if (info) SETERRQ1(PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",info); 189 #endif 190 } 191 if (lu->options.PrintStat) { 192 PStatPrint(&lu->options, &stat, &lu->grid); /* Print the statistics. */ 193 } 194 PStatFree(&stat); 195 196 if (size > 1) { 197 if (lu->MatInputMode == GLOBAL){ /* convert seq x to mpi x */ 198 ierr = VecRestoreArray(x_seq,&bptr);CHKERRQ(ierr); 199 ierr = VecScatterBegin(x_seq,x,INSERT_VALUES,SCATTER_REVERSE,scat);CHKERRQ(ierr); 200 ierr = VecScatterEnd(x_seq,x,INSERT_VALUES,SCATTER_REVERSE,scat);CHKERRQ(ierr); 201 ierr = VecScatterDestroy(scat);CHKERRQ(ierr); 202 ierr = VecDestroy(x_seq);CHKERRQ(ierr); 203 } else { 204 ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr); 205 } 206 } else { 207 ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr); 208 } 209 PetscFunctionReturn(0); 210 } 211 212 #undef __FUNCT__ 213 #define __FUNCT__ "MatLUFactorNumeric_SuperLU_DIST" 214 PetscErrorCode MatLUFactorNumeric_SuperLU_DIST(Mat A,MatFactorInfo *info,Mat *F) 215 { 216 Mat *tseq,A_seq = PETSC_NULL; 217 Mat_SeqAIJ *aa,*bb; 218 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)(*F)->spptr; 219 PetscErrorCode ierr; 220 PetscInt M=A->M,N=A->N,sinfo,i,*ai,*aj,*bi,*bj,nz,rstart,*garray, 221 m=A->m, irow,colA_start,j,jcol,jB,countA,countB,*bjj,*ajj; 222 PetscMPIInt size,rank; 223 SuperLUStat_t stat; 224 double *berr=0; 225 IS isrow; 226 PetscLogDouble time0,time,time_min,time_max; 227 #if defined(PETSC_USE_COMPLEX) 228 doublecomplex *av, *bv; 229 #else 230 double *av, *bv; 231 #endif 232 233 PetscFunctionBegin; 234 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 235 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 236 237 if (lu->options.PrintStat) { /* collect time for mat conversion */ 238 ierr = MPI_Barrier(A->comm);CHKERRQ(ierr); 239 ierr = PetscGetTime(&time0);CHKERRQ(ierr); 240 } 241 242 if (lu->MatInputMode == GLOBAL) { /* global mat input */ 243 if (size > 1) { /* convert mpi A to seq mat A */ 244 ierr = ISCreateStride(PETSC_COMM_SELF,M,0,1,&isrow);CHKERRQ(ierr); 245 ierr = MatGetSubMatrices(A,1,&isrow,&isrow,MAT_INITIAL_MATRIX,&tseq);CHKERRQ(ierr); 246 ierr = ISDestroy(isrow);CHKERRQ(ierr); 247 248 A_seq = *tseq; 249 ierr = PetscFree(tseq);CHKERRQ(ierr); 250 aa = (Mat_SeqAIJ*)A_seq->data; 251 } else { 252 aa = (Mat_SeqAIJ*)A->data; 253 } 254 255 /* Allocate storage, then convert Petsc NR matrix to SuperLU_DIST NC */ 256 if (lu->flg == DIFFERENT_NONZERO_PATTERN) {/* first numeric factorization */ 257 #if defined(PETSC_USE_COMPLEX) 258 zallocateA_dist(N, aa->nz, &lu->val, &lu->col, &lu->row); 259 #else 260 dallocateA_dist(N, aa->nz, &lu->val, &lu->col, &lu->row); 261 #endif 262 } else { /* successive numeric factorization, sparsity pattern is reused. */ 263 Destroy_CompCol_Matrix_dist(&lu->A_sup); 264 Destroy_LU(N, &lu->grid, &lu->LUstruct); 265 lu->options.Fact = SamePattern; 266 } 267 #if defined(PETSC_USE_COMPLEX) 268 zCompRow_to_CompCol_dist(M,N,aa->nz,(doublecomplex*)aa->a,aa->j,aa->i,&lu->val,&lu->col, &lu->row); 269 #else 270 dCompRow_to_CompCol_dist(M,N,aa->nz,aa->a,aa->j,aa->i,&lu->val, &lu->col, &lu->row); 271 #endif 272 273 /* Create compressed column matrix A_sup. */ 274 #if defined(PETSC_USE_COMPLEX) 275 zCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_Z, SLU_GE); 276 #else 277 dCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_D, SLU_GE); 278 #endif 279 } else { /* distributed mat input */ 280 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 281 aa=(Mat_SeqAIJ*)(mat->A)->data; 282 bb=(Mat_SeqAIJ*)(mat->B)->data; 283 ai=aa->i; aj=aa->j; 284 bi=bb->i; bj=bb->j; 285 #if defined(PETSC_USE_COMPLEX) 286 av=(doublecomplex*)aa->a; 287 bv=(doublecomplex*)bb->a; 288 #else 289 av=aa->a; 290 bv=bb->a; 291 #endif 292 rstart = mat->rstart; 293 nz = aa->nz + bb->nz; 294 garray = mat->garray; 295 rstart = mat->rstart; 296 297 if (lu->flg == DIFFERENT_NONZERO_PATTERN) {/* first numeric factorization */ 298 #if defined(PETSC_USE_COMPLEX) 299 zallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row); 300 #else 301 dallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row); 302 #endif 303 } else { /* successive numeric factorization, sparsity pattern and perm_c are reused. */ 304 /* Destroy_CompRowLoc_Matrix_dist(&lu->A_sup); */ /* crash! */ 305 Destroy_LU(N, &lu->grid, &lu->LUstruct); 306 lu->options.Fact = SamePattern; 307 } 308 nz = 0; irow = mat->rstart; 309 for ( i=0; i<m; i++ ) { 310 lu->row[i] = nz; 311 countA = ai[i+1] - ai[i]; 312 countB = bi[i+1] - bi[i]; 313 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 314 bjj = bj + bi[i]; 315 316 /* B part, smaller col index */ 317 colA_start = mat->rstart + ajj[0]; /* the smallest global col index of A */ 318 jB = 0; 319 for (j=0; j<countB; j++){ 320 jcol = garray[bjj[j]]; 321 if (jcol > colA_start) { 322 jB = j; 323 break; 324 } 325 lu->col[nz] = jcol; 326 lu->val[nz++] = *bv++; 327 if (j==countB-1) jB = countB; 328 } 329 330 /* A part */ 331 for (j=0; j<countA; j++){ 332 lu->col[nz] = mat->rstart + ajj[j]; 333 lu->val[nz++] = *av++; 334 } 335 336 /* B part, larger col index */ 337 for (j=jB; j<countB; j++){ 338 lu->col[nz] = garray[bjj[j]]; 339 lu->val[nz++] = *bv++; 340 } 341 } 342 lu->row[m] = nz; 343 #if defined(PETSC_USE_COMPLEX) 344 zCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart, 345 lu->val, lu->col, lu->row, SLU_NR_loc, SLU_Z, SLU_GE); 346 #else 347 dCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart, 348 lu->val, lu->col, lu->row, SLU_NR_loc, SLU_D, SLU_GE); 349 #endif 350 } 351 if (lu->options.PrintStat) { 352 ierr = PetscGetTime(&time);CHKERRQ(ierr); 353 time0 = time - time0; 354 } 355 356 /* Factor the matrix. */ 357 PStatInit(&stat); /* Initialize the statistics variables. */ 358 359 if (lu->MatInputMode == GLOBAL) { /* global mat input */ 360 #if defined(PETSC_USE_COMPLEX) 361 pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, 362 &lu->grid, &lu->LUstruct, berr, &stat, &sinfo); 363 #else 364 pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, 365 &lu->grid, &lu->LUstruct, berr, &stat, &sinfo); 366 #endif 367 } else { /* distributed mat input */ 368 #if defined(PETSC_USE_COMPLEX) 369 pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid, 370 &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo); 371 if (sinfo) SETERRQ1(PETSC_ERR_LIB,"pzgssvx fails, info: %d\n",sinfo); 372 #else 373 pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid, 374 &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo); 375 if (sinfo) SETERRQ1(PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",sinfo); 376 #endif 377 } 378 379 if (lu->MatInputMode == GLOBAL && size > 1){ 380 ierr = MatDestroy(A_seq);CHKERRQ(ierr); 381 } 382 383 if (lu->options.PrintStat) { 384 if (size > 1){ 385 ierr = MPI_Reduce(&time0,&time_max,1,MPI_DOUBLE,MPI_MAX,0,A->comm); 386 ierr = MPI_Reduce(&time0,&time_min,1,MPI_DOUBLE,MPI_MIN,0,A->comm); 387 ierr = MPI_Reduce(&time0,&time,1,MPI_DOUBLE,MPI_SUM,0,A->comm); 388 time = time/size; /* average time */ 389 if (!rank) 390 ierr = PetscPrintf(PETSC_COMM_SELF, " Mat conversion(PETSc->SuperLU_DIST) time (max/min/avg): \n \ 391 %g / %g / %g\n",time_max,time_min,time); 392 } else { 393 ierr = PetscPrintf(PETSC_COMM_SELF, " Mat conversion(PETSc->SuperLU_DIST) time: \n \ 394 %g\n",time0); 395 } 396 397 PStatPrint(&lu->options, &stat, &lu->grid); /* Print the statistics. */ 398 } 399 PStatFree(&stat); 400 (*F)->assembled = PETSC_TRUE; 401 lu->flg = SAME_NONZERO_PATTERN; 402 PetscFunctionReturn(0); 403 } 404 405 /* Note the Petsc r and c permutations are ignored */ 406 #undef __FUNCT__ 407 #define __FUNCT__ "MatLUFactorSymbolic_SuperLU_DIST" 408 PetscErrorCode MatLUFactorSymbolic_SuperLU_DIST(Mat A,IS r,IS c,MatFactorInfo *info,Mat *F) 409 { 410 Mat B; 411 Mat_SuperLU_DIST *lu; 412 PetscErrorCode ierr; 413 PetscInt M=A->M,N=A->N,indx; 414 PetscMPIInt size; 415 superlu_options_t options; 416 PetscTruth flg; 417 const char *ptype[] = {"MMD_AT_PLUS_A","NATURAL","MMD_ATA","COLAMD"}; 418 const char *prtype[] = {"LargeDiag","NATURAL"}; 419 420 PetscFunctionBegin; 421 /* Create the factorization matrix */ 422 ierr = MatCreate(A->comm,&B);CHKERRQ(ierr); 423 ierr = MatSetSizes(B,A->m,A->n,M,N);CHKERRQ(ierr); 424 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 425 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL); 426 ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 427 428 B->ops->lufactornumeric = MatLUFactorNumeric_SuperLU_DIST; 429 B->ops->solve = MatSolve_SuperLU_DIST; 430 B->factor = FACTOR_LU; 431 432 lu = (Mat_SuperLU_DIST*)(B->spptr); 433 434 /* Set the input options */ 435 set_default_options_dist(&options); 436 437 lu->MatInputMode = GLOBAL; 438 ierr = MPI_Comm_dup(A->comm,&(lu->comm_superlu));CHKERRQ(ierr); 439 440 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 441 lu->nprow = size/2; /* Default process rows. */ 442 if (!lu->nprow) lu->nprow = 1; 443 lu->npcol = size/lu->nprow; /* Default process columns. */ 444 445 ierr = PetscOptionsBegin(A->comm,A->prefix,"SuperLU_Dist Options","Mat");CHKERRQ(ierr); 446 447 ierr = PetscOptionsInt("-mat_superlu_dist_r","Number rows in processor partition","None",lu->nprow,&lu->nprow,PETSC_NULL);CHKERRQ(ierr); 448 ierr = PetscOptionsInt("-mat_superlu_dist_c","Number columns in processor partition","None",lu->npcol,&lu->npcol,PETSC_NULL);CHKERRQ(ierr); 449 if (size != lu->nprow * lu->npcol) SETERRQ(PETSC_ERR_ARG_SIZ,"Number of processes should be equal to nprow*npcol"); 450 451 ierr = PetscOptionsInt("-mat_superlu_dist_matinput","Matrix input mode (0: GLOBAL; 1: DISTRIBUTED)","None",lu->MatInputMode,&lu->MatInputMode,PETSC_NULL);CHKERRQ(ierr); 452 if(lu->MatInputMode == DISTRIBUTED && size == 1) lu->MatInputMode = GLOBAL; 453 454 ierr = PetscOptionsTruth("-mat_superlu_dist_equil","Equilibrate matrix","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr); 455 if (!flg) { 456 options.Equil = NO; 457 } 458 459 ierr = PetscOptionsEList("-mat_superlu_dist_rowperm","Row permutation","None",prtype,2,prtype[0],&indx,&flg);CHKERRQ(ierr); 460 if (flg) { 461 switch (indx) { 462 case 0: 463 options.RowPerm = LargeDiag; 464 break; 465 case 1: 466 options.RowPerm = NOROWPERM; 467 break; 468 } 469 } 470 471 ierr = PetscOptionsEList("-mat_superlu_dist_colperm","Column permutation","None",ptype,4,ptype[0],&indx,&flg);CHKERRQ(ierr); 472 if (flg) { 473 switch (indx) { 474 case 0: 475 options.ColPerm = MMD_AT_PLUS_A; 476 break; 477 case 1: 478 options.ColPerm = NATURAL; 479 break; 480 case 2: 481 options.ColPerm = MMD_ATA; 482 break; 483 case 3: 484 options.ColPerm = COLAMD; 485 break; 486 } 487 } 488 489 ierr = PetscOptionsTruth("-mat_superlu_dist_replacetinypivot","Replace tiny pivots","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr); 490 if (!flg) { 491 options.ReplaceTinyPivot = NO; 492 } 493 494 options.IterRefine = NOREFINE; 495 ierr = PetscOptionsTruth("-mat_superlu_dist_iterrefine","Use iterative refinement","None",PETSC_FALSE,&flg,0);CHKERRQ(ierr); 496 if (flg) { 497 options.IterRefine = DOUBLE; 498 } 499 500 if (PetscLogPrintInfo) { 501 options.PrintStat = YES; 502 } else { 503 options.PrintStat = NO; 504 } 505 ierr = PetscOptionsTruth("-mat_superlu_dist_statprint","Print factorization information","None", 506 (PetscTruth)options.PrintStat,(PetscTruth*)&options.PrintStat,0);CHKERRQ(ierr); 507 PetscOptionsEnd(); 508 509 /* Initialize the SuperLU process grid. */ 510 superlu_gridinit(lu->comm_superlu, lu->nprow, lu->npcol, &lu->grid); 511 512 /* Initialize ScalePermstruct and LUstruct. */ 513 ScalePermstructInit(M, N, &lu->ScalePermstruct); 514 LUstructInit(M, N, &lu->LUstruct); 515 516 lu->options = options; 517 lu->flg = DIFFERENT_NONZERO_PATTERN; 518 lu->CleanUpSuperLU_Dist = PETSC_TRUE; 519 *F = B; 520 521 PetscFunctionReturn(0); 522 } 523 524 #undef __FUNCT__ 525 #define __FUNCT__ "MatAssemblyEnd_SuperLU_DIST" 526 PetscErrorCode MatAssemblyEnd_SuperLU_DIST(Mat A,MatAssemblyType mode) { 527 PetscErrorCode ierr; 528 Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST*)(A->spptr); 529 530 PetscFunctionBegin; 531 ierr = (*lu->MatAssemblyEnd)(A,mode);CHKERRQ(ierr); 532 lu->MatLUFactorSymbolic = A->ops->lufactorsymbolic; 533 A->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST; 534 PetscFunctionReturn(0); 535 } 536 537 #undef __FUNCT__ 538 #define __FUNCT__ "MatFactorInfo_SuperLU_DIST" 539 PetscErrorCode MatFactorInfo_SuperLU_DIST(Mat A,PetscViewer viewer) 540 { 541 Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST*)A->spptr; 542 superlu_options_t options; 543 PetscErrorCode ierr; 544 545 PetscFunctionBegin; 546 /* check if matrix is superlu_dist type */ 547 if (A->ops->solve != MatSolve_SuperLU_DIST) PetscFunctionReturn(0); 548 549 options = lu->options; 550 ierr = PetscViewerASCIIPrintf(viewer,"SuperLU_DIST run parameters:\n");CHKERRQ(ierr); 551 ierr = PetscViewerASCIIPrintf(viewer," Equilibrate matrix %s \n",PetscTruths[options.Equil != NO]);CHKERRQ(ierr); 552 ierr = PetscViewerASCIIPrintf(viewer," Matrix input mode %d \n",lu->MatInputMode);CHKERRQ(ierr); 553 ierr = PetscViewerASCIIPrintf(viewer," Replace tiny pivots %s \n",PetscTruths[options.ReplaceTinyPivot != NO]);CHKERRQ(ierr); 554 ierr = PetscViewerASCIIPrintf(viewer," Use iterative refinement %s \n",PetscTruths[options.IterRefine == DOUBLE]);CHKERRQ(ierr); 555 ierr = PetscViewerASCIIPrintf(viewer," Processors in row %d col partition %d \n",lu->nprow,lu->npcol);CHKERRQ(ierr); 556 ierr = PetscViewerASCIIPrintf(viewer," Row permutation %s \n",(options.RowPerm == NOROWPERM) ? "NATURAL": "LargeDiag");CHKERRQ(ierr); 557 if (options.ColPerm == NATURAL) { 558 ierr = PetscViewerASCIIPrintf(viewer," Column permutation NATURAL\n");CHKERRQ(ierr); 559 } else if (options.ColPerm == MMD_AT_PLUS_A) { 560 ierr = PetscViewerASCIIPrintf(viewer," Column permutation MMD_AT_PLUS_A\n");CHKERRQ(ierr); 561 } else if (options.ColPerm == MMD_ATA) { 562 ierr = PetscViewerASCIIPrintf(viewer," Column permutation MMD_ATA\n");CHKERRQ(ierr); 563 } else if (options.ColPerm == COLAMD) { 564 ierr = PetscViewerASCIIPrintf(viewer," Column permutation COLAMD\n");CHKERRQ(ierr); 565 } else { 566 SETERRQ(PETSC_ERR_ARG_WRONG,"Unknown column permutation"); 567 } 568 PetscFunctionReturn(0); 569 } 570 571 #undef __FUNCT__ 572 #define __FUNCT__ "MatView_SuperLU_DIST" 573 PetscErrorCode MatView_SuperLU_DIST(Mat A,PetscViewer viewer) 574 { 575 PetscErrorCode ierr; 576 PetscTruth iascii; 577 PetscViewerFormat format; 578 Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST*)(A->spptr); 579 580 PetscFunctionBegin; 581 ierr = (*lu->MatView)(A,viewer);CHKERRQ(ierr); 582 583 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 584 if (iascii) { 585 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 586 if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 587 ierr = MatFactorInfo_SuperLU_DIST(A,viewer);CHKERRQ(ierr); 588 } 589 } 590 PetscFunctionReturn(0); 591 } 592 593 594 EXTERN_C_BEGIN 595 #undef __FUNCT__ 596 #define __FUNCT__ "MatConvert_Base_SuperLU_DIST" 597 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_Base_SuperLU_DIST(Mat A,MatType type,MatReuse reuse,Mat *newmat) 598 { 599 /* This routine is only called to convert to MATSUPERLU_DIST */ 600 /* from MATSEQAIJ if A has a single process communicator */ 601 /* or MATMPIAIJ otherwise, so we will ignore 'MatType type'. */ 602 PetscErrorCode ierr; 603 PetscMPIInt size; 604 MPI_Comm comm; 605 Mat B=*newmat; 606 Mat_SuperLU_DIST *lu; 607 608 PetscFunctionBegin; 609 if (reuse == MAT_INITIAL_MATRIX) { 610 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 611 } 612 613 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 614 ierr = PetscNew(Mat_SuperLU_DIST,&lu);CHKERRQ(ierr); 615 616 lu->MatDuplicate = A->ops->duplicate; 617 lu->MatView = A->ops->view; 618 lu->MatAssemblyEnd = A->ops->assemblyend; 619 lu->MatLUFactorSymbolic = A->ops->lufactorsymbolic; 620 lu->MatDestroy = A->ops->destroy; 621 lu->CleanUpSuperLU_Dist = PETSC_FALSE; 622 623 B->spptr = (void*)lu; 624 B->ops->duplicate = MatDuplicate_SuperLU_DIST; 625 B->ops->view = MatView_SuperLU_DIST; 626 B->ops->assemblyend = MatAssemblyEnd_SuperLU_DIST; 627 B->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST; 628 B->ops->destroy = MatDestroy_SuperLU_DIST; 629 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);CHKERRQ(ierr); 630 if (size == 1) { 631 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqaij_superlu_dist_C", 632 "MatConvert_Base_SuperLU_DIST",MatConvert_Base_SuperLU_DIST);CHKERRQ(ierr); 633 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_superlu_dist_seqaij_C", 634 "MatConvert_SuperLU_DIST_Base",MatConvert_SuperLU_DIST_Base);CHKERRQ(ierr); 635 } else { 636 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_superlu_dist_C", 637 "MatConvert_Base_SuperLU_DIST",MatConvert_Base_SuperLU_DIST);CHKERRQ(ierr); 638 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_superlu_dist_mpiaij_C", 639 "MatConvert_SuperLU_DIST_Base",MatConvert_SuperLU_DIST_Base);CHKERRQ(ierr); 640 } 641 ierr = PetscLogInfo((0,"MatConvert_Base_SuperLU_DIST:Using SuperLU_DIST for SeqAIJ LU factorization and solves.\n"));CHKERRQ(ierr); 642 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSUPERLU_DIST);CHKERRQ(ierr); 643 *newmat = B; 644 PetscFunctionReturn(0); 645 } 646 EXTERN_C_END 647 648 #undef __FUNCT__ 649 #define __FUNCT__ "MatDuplicate_SuperLU_DIST" 650 PetscErrorCode MatDuplicate_SuperLU_DIST(Mat A, MatDuplicateOption op, Mat *M) { 651 PetscErrorCode ierr; 652 Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST *)A->spptr; 653 654 PetscFunctionBegin; 655 ierr = (*lu->MatDuplicate)(A,op,M);CHKERRQ(ierr); 656 ierr = PetscMemcpy((*M)->spptr,lu,sizeof(Mat_SuperLU_DIST));CHKERRQ(ierr); 657 PetscFunctionReturn(0); 658 } 659 660 /*MC 661 MATSUPERLU_DIST - MATSUPERLU_DIST = "superlu_dist" - A matrix type providing direct solvers (LU) for parallel matrices 662 via the external package SuperLU_DIST. 663 664 If SuperLU_DIST is installed (see the manual for 665 instructions on how to declare the existence of external packages), 666 a matrix type can be constructed which invokes SuperLU_DIST solvers. 667 After calling MatCreate(...,A), simply call MatSetType(A,MATSUPERLU_DIST). 668 669 This matrix inherits from MATSEQAIJ when constructed with a single process communicator, 670 and from MATMPIAIJ otherwise. As a result, for single process communicators, 671 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 672 for communicators controlling multiple processes. It is recommended that you call both of 673 the above preallocation routines for simplicity. One can also call MatConvert for an inplace 674 conversion to or from the MATSEQAIJ or MATMPIAIJ type (depending on the communicator size) 675 without data copy. 676 677 Options Database Keys: 678 + -mat_type superlu_dist - sets the matrix type to "superlu_dist" during a call to MatSetFromOptions() 679 . -mat_superlu_dist_r <n> - number of rows in processor partition 680 . -mat_superlu_dist_c <n> - number of columns in processor partition 681 . -mat_superlu_dist_matinput <0,1> - matrix input mode; 0=global, 1=distributed 682 . -mat_superlu_dist_equil - equilibrate the matrix 683 . -mat_superlu_dist_rowperm <LargeDiag,NATURAL> - row permutation 684 . -mat_superlu_dist_colperm <MMD_AT_PLUS_A,MMD_ATA,COLAMD,NATURAL> - column permutation 685 . -mat_superlu_dist_replacetinypivot - replace tiny pivots 686 . -mat_superlu_dist_iterrefine - use iterative refinement 687 - -mat_superlu_dist_statprint - print factorization information 688 689 Level: beginner 690 691 .seealso: PCLU 692 M*/ 693 694 EXTERN_C_BEGIN 695 #undef __FUNCT__ 696 #define __FUNCT__ "MatCreate_SuperLU_DIST" 697 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SuperLU_DIST(Mat A) 698 { 699 PetscErrorCode ierr; 700 PetscMPIInt size; 701 Mat A_diag; 702 703 PetscFunctionBegin; 704 /* Change type name before calling MatSetType to force proper construction of SeqAIJ or MPIAIJ */ 705 /* and SuperLU_DIST types */ 706 ierr = PetscObjectChangeTypeName((PetscObject)A,MATSUPERLU_DIST);CHKERRQ(ierr); 707 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 708 if (size == 1) { 709 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 710 } else { 711 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 712 A_diag = ((Mat_MPIAIJ *)A->data)->A; 713 ierr = MatConvert_Base_SuperLU_DIST(A_diag,MATSUPERLU_DIST,MAT_REUSE_MATRIX,&A_diag);CHKERRQ(ierr); 714 } 715 ierr = MatConvert_Base_SuperLU_DIST(A,MATSUPERLU_DIST,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr); 716 PetscFunctionReturn(0); 717 } 718 EXTERN_C_END 719 720