1 #define PETSCMAT_DLL 2 3 /* 4 Provides an interface to the SuperLU_DIST_2.0 sparse solver 5 */ 6 7 #include "src/mat/impls/aij/seq/aij.h" 8 #include "src/mat/impls/aij/mpi/mpiaij.h" 9 #if defined(PETSC_HAVE_STDLIB_H) /* This is to get arround weird problem with SuperLU on cray */ 10 #include "stdlib.h" 11 #endif 12 13 EXTERN_C_BEGIN 14 #if defined(PETSC_USE_COMPLEX) 15 #include "superlu_zdefs.h" 16 #else 17 #include "superlu_ddefs.h" 18 #endif 19 EXTERN_C_END 20 21 typedef enum { GLOBAL,DISTRIBUTED 22 } SuperLU_MatInputMode; 23 24 typedef struct { 25 int_t nprow,npcol,*row,*col; 26 gridinfo_t grid; 27 superlu_options_t options; 28 SuperMatrix A_sup; 29 ScalePermstruct_t ScalePermstruct; 30 LUstruct_t LUstruct; 31 int StatPrint; 32 int MatInputMode; 33 SOLVEstruct_t SOLVEstruct; 34 fact_t FactPattern; 35 MPI_Comm comm_superlu; 36 #if defined(PETSC_USE_COMPLEX) 37 doublecomplex *val; 38 #else 39 double *val; 40 #endif 41 42 /* A few function pointers for inheritance */ 43 PetscErrorCode (*MatDuplicate)(Mat,MatDuplicateOption,Mat*); 44 PetscErrorCode (*MatView)(Mat,PetscViewer); 45 PetscErrorCode (*MatAssemblyEnd)(Mat,MatAssemblyType); 46 PetscErrorCode (*MatLUFactorSymbolic)(Mat,IS,IS,MatFactorInfo*,Mat*); 47 PetscErrorCode (*MatDestroy)(Mat); 48 49 /* Flag to clean up (non-global) SuperLU objects during Destroy */ 50 PetscTruth CleanUpSuperLU_Dist; 51 } Mat_SuperLU_DIST; 52 53 EXTERN PetscErrorCode MatDuplicate_SuperLU_DIST(Mat,MatDuplicateOption,Mat*); 54 55 EXTERN_C_BEGIN 56 #undef __FUNCT__ 57 #define __FUNCT__ "MatConvert_SuperLU_DIST_Base" 58 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SuperLU_DIST_Base(Mat A,MatType type,MatReuse reuse,Mat *newmat) 59 { 60 PetscErrorCode ierr; 61 Mat B=*newmat; 62 Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST *)A->spptr; 63 64 PetscFunctionBegin; 65 if (reuse == MAT_INITIAL_MATRIX) { 66 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 67 } 68 /* Reset the original function pointers */ 69 B->ops->duplicate = lu->MatDuplicate; 70 B->ops->view = lu->MatView; 71 B->ops->assemblyend = lu->MatAssemblyEnd; 72 B->ops->lufactorsymbolic = lu->MatLUFactorSymbolic; 73 B->ops->destroy = lu->MatDestroy; 74 75 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaij_superlu_dist_C","",PETSC_NULL);CHKERRQ(ierr); 76 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_superlu_dist_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 77 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_superlu_dist_C","",PETSC_NULL);CHKERRQ(ierr); 78 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_superlu_dist_mpiaij_C","",PETSC_NULL);CHKERRQ(ierr); 79 80 ierr = PetscObjectChangeTypeName((PetscObject)B,type);CHKERRQ(ierr); 81 *newmat = B; 82 PetscFunctionReturn(0); 83 } 84 EXTERN_C_END 85 86 #undef __FUNCT__ 87 #define __FUNCT__ "MatDestroy_SuperLU_DIST" 88 PetscErrorCode MatDestroy_SuperLU_DIST(Mat A) 89 { 90 PetscErrorCode ierr; 91 PetscMPIInt size; 92 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr; 93 94 PetscFunctionBegin; 95 if (lu->CleanUpSuperLU_Dist) { 96 /* Deallocate SuperLU_DIST storage */ 97 if (lu->MatInputMode == GLOBAL) { 98 Destroy_CompCol_Matrix_dist(&lu->A_sup); 99 } else { 100 Destroy_CompRowLoc_Matrix_dist(&lu->A_sup); 101 if ( lu->options.SolveInitialized ) { 102 #if defined(PETSC_USE_COMPLEX) 103 zSolveFinalize(&lu->options, &lu->SOLVEstruct); 104 #else 105 dSolveFinalize(&lu->options, &lu->SOLVEstruct); 106 #endif 107 } 108 } 109 Destroy_LU(A->cmap.N, &lu->grid, &lu->LUstruct); 110 ScalePermstructFree(&lu->ScalePermstruct); 111 LUstructFree(&lu->LUstruct); 112 113 /* Release the SuperLU_DIST process grid. */ 114 superlu_gridexit(&lu->grid); 115 116 ierr = MPI_Comm_free(&(lu->comm_superlu));CHKERRQ(ierr); 117 } 118 119 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 120 if (size == 1) { 121 ierr = MatConvert_SuperLU_DIST_Base(A,MATSEQAIJ,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr); 122 } else { 123 ierr = MatConvert_SuperLU_DIST_Base(A,MATMPIAIJ,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr); 124 } 125 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 126 PetscFunctionReturn(0); 127 } 128 129 #undef __FUNCT__ 130 #define __FUNCT__ "MatSolve_SuperLU_DIST" 131 PetscErrorCode MatSolve_SuperLU_DIST(Mat A,Vec b_mpi,Vec x) 132 { 133 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr; 134 PetscErrorCode ierr; 135 PetscMPIInt size; 136 PetscInt m=A->rmap.N, N=A->cmap.N; 137 SuperLUStat_t stat; 138 double berr[1]; 139 PetscScalar *bptr; 140 PetscInt info, nrhs=1; 141 Vec x_seq; 142 IS iden; 143 VecScatter scat; 144 145 PetscFunctionBegin; 146 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 147 if (size > 1) { 148 if (lu->MatInputMode == GLOBAL) { /* global mat input, convert b to x_seq */ 149 ierr = VecCreateSeq(PETSC_COMM_SELF,N,&x_seq);CHKERRQ(ierr); 150 ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iden);CHKERRQ(ierr); 151 ierr = VecScatterCreate(b_mpi,iden,x_seq,iden,&scat);CHKERRQ(ierr); 152 ierr = ISDestroy(iden);CHKERRQ(ierr); 153 154 ierr = VecScatterBegin(b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat);CHKERRQ(ierr); 155 ierr = VecScatterEnd(b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat);CHKERRQ(ierr); 156 ierr = VecGetArray(x_seq,&bptr);CHKERRQ(ierr); 157 } else { /* distributed mat input */ 158 ierr = VecCopy(b_mpi,x);CHKERRQ(ierr); 159 ierr = VecGetArray(x,&bptr);CHKERRQ(ierr); 160 } 161 } else { /* size == 1 */ 162 ierr = VecCopy(b_mpi,x);CHKERRQ(ierr); 163 ierr = VecGetArray(x,&bptr);CHKERRQ(ierr); 164 } 165 166 if (lu->options.Fact != FACTORED) 167 SETERRQ(PETSC_ERR_ARG_WRONG,"SuperLU_DIST options.Fact mush equal FACTORED"); 168 169 PStatInit(&stat); /* Initialize the statistics variables. */ 170 if (lu->MatInputMode == GLOBAL) { 171 #if defined(PETSC_USE_COMPLEX) 172 pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,(doublecomplex*)bptr, m, nrhs, 173 &lu->grid, &lu->LUstruct, berr, &stat, &info); 174 #else 175 pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,bptr, m, nrhs, 176 &lu->grid, &lu->LUstruct, berr, &stat, &info); 177 #endif 178 } else { /* distributed mat input */ 179 #if defined(PETSC_USE_COMPLEX) 180 pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, (doublecomplex*)bptr, A->rmap.N, nrhs, &lu->grid, 181 &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info); 182 if (info) SETERRQ1(PETSC_ERR_LIB,"pzgssvx fails, info: %d\n",info); 183 #else 184 pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, bptr, A->rmap.N, nrhs, &lu->grid, 185 &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info); 186 if (info) SETERRQ1(PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",info); 187 #endif 188 } 189 if (lu->options.PrintStat) { 190 PStatPrint(&lu->options, &stat, &lu->grid); /* Print the statistics. */ 191 } 192 PStatFree(&stat); 193 194 if (size > 1) { 195 if (lu->MatInputMode == GLOBAL){ /* convert seq x to mpi x */ 196 ierr = VecRestoreArray(x_seq,&bptr);CHKERRQ(ierr); 197 ierr = VecScatterBegin(x_seq,x,INSERT_VALUES,SCATTER_REVERSE,scat);CHKERRQ(ierr); 198 ierr = VecScatterEnd(x_seq,x,INSERT_VALUES,SCATTER_REVERSE,scat);CHKERRQ(ierr); 199 ierr = VecScatterDestroy(scat);CHKERRQ(ierr); 200 ierr = VecDestroy(x_seq);CHKERRQ(ierr); 201 } else { 202 ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr); 203 } 204 } else { 205 ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr); 206 } 207 PetscFunctionReturn(0); 208 } 209 210 #undef __FUNCT__ 211 #define __FUNCT__ "MatLUFactorNumeric_SuperLU_DIST" 212 PetscErrorCode MatLUFactorNumeric_SuperLU_DIST(Mat A,MatFactorInfo *info,Mat *F) 213 { 214 Mat *tseq,A_seq = PETSC_NULL; 215 Mat_SeqAIJ *aa,*bb; 216 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)(*F)->spptr; 217 PetscErrorCode ierr; 218 PetscInt M=A->rmap.N,N=A->cmap.N,sinfo,i,*ai,*aj,*bi,*bj,nz,rstart,*garray, 219 m=A->rmap.n, irow,colA_start,j,jcol,jB,countA,countB,*bjj,*ajj; 220 PetscMPIInt size,rank; 221 SuperLUStat_t stat; 222 double *berr=0; 223 IS isrow; 224 PetscLogDouble time0,time,time_min,time_max; 225 Mat F_diag=PETSC_NULL; 226 #if defined(PETSC_USE_COMPLEX) 227 doublecomplex *av, *bv; 228 #else 229 double *av, *bv; 230 #endif 231 232 PetscFunctionBegin; 233 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 234 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 235 236 if (lu->options.PrintStat) { /* collect time for mat conversion */ 237 ierr = MPI_Barrier(A->comm);CHKERRQ(ierr); 238 ierr = PetscGetTime(&time0);CHKERRQ(ierr); 239 } 240 241 if (lu->MatInputMode == GLOBAL) { /* global mat input */ 242 if (size > 1) { /* convert mpi A to seq mat A */ 243 ierr = ISCreateStride(PETSC_COMM_SELF,M,0,1,&isrow);CHKERRQ(ierr); 244 ierr = MatGetSubMatrices(A,1,&isrow,&isrow,MAT_INITIAL_MATRIX,&tseq);CHKERRQ(ierr); 245 ierr = ISDestroy(isrow);CHKERRQ(ierr); 246 247 A_seq = *tseq; 248 ierr = PetscFree(tseq);CHKERRQ(ierr); 249 aa = (Mat_SeqAIJ*)A_seq->data; 250 } else { 251 aa = (Mat_SeqAIJ*)A->data; 252 } 253 254 /* Convert Petsc NR matrix to SuperLU_DIST NC. 255 Note: memories of lu->val, col and row are allocated by CompRow_to_CompCol_dist()! */ 256 if (lu->options.Fact != DOFACT) {/* successive numeric factorization, sparsity pattern is reused. */ 257 if (lu->FactPattern == SamePattern_SameRowPerm){ 258 Destroy_CompCol_Matrix_dist(&lu->A_sup); 259 /* Destroy_LU(N, &lu->grid, &lu->LUstruct); Crash! Comment it out does not lead to mem leak. */ 260 lu->options.Fact = SamePattern_SameRowPerm; /* matrix has similar numerical values */ 261 } else { 262 Destroy_CompCol_Matrix_dist(&lu->A_sup); 263 Destroy_LU(N, &lu->grid, &lu->LUstruct); 264 lu->options.Fact = SamePattern; 265 } 266 } 267 #if defined(PETSC_USE_COMPLEX) 268 zCompRow_to_CompCol_dist(M,N,aa->nz,(doublecomplex*)aa->a,aa->j,aa->i,&lu->val,&lu->col, &lu->row); 269 #else 270 dCompRow_to_CompCol_dist(M,N,aa->nz,aa->a,aa->j,aa->i,&lu->val, &lu->col, &lu->row); 271 #endif 272 273 /* Create compressed column matrix A_sup. */ 274 #if defined(PETSC_USE_COMPLEX) 275 zCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_Z, SLU_GE); 276 #else 277 dCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_D, SLU_GE); 278 #endif 279 } else { /* distributed mat input */ 280 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 281 aa=(Mat_SeqAIJ*)(mat->A)->data; 282 bb=(Mat_SeqAIJ*)(mat->B)->data; 283 ai=aa->i; aj=aa->j; 284 bi=bb->i; bj=bb->j; 285 #if defined(PETSC_USE_COMPLEX) 286 av=(doublecomplex*)aa->a; 287 bv=(doublecomplex*)bb->a; 288 #else 289 av=aa->a; 290 bv=bb->a; 291 #endif 292 rstart = A->rmap.rstart; 293 nz = aa->nz + bb->nz; 294 garray = mat->garray; 295 296 if (lu->options.Fact == DOFACT) {/* first numeric factorization */ 297 #if defined(PETSC_USE_COMPLEX) 298 zallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row); 299 #else 300 dallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row); 301 #endif 302 } else { /* successive numeric factorization, sparsity pattern and perm_c are reused. */ 303 if (lu->FactPattern == SamePattern_SameRowPerm){ 304 /* Destroy_LU(N, &lu->grid, &lu->LUstruct); Crash! Comment it out does not lead to mem leak. */ 305 lu->options.Fact = SamePattern_SameRowPerm; /* matrix has similar numerical values */ 306 } else { 307 Destroy_LU(N, &lu->grid, &lu->LUstruct); /* Deallocate storage associated with the L and U matrices. */ 308 lu->options.Fact = SamePattern; 309 } 310 } 311 nz = 0; irow = rstart; 312 for ( i=0; i<m; i++ ) { 313 lu->row[i] = nz; 314 countA = ai[i+1] - ai[i]; 315 countB = bi[i+1] - bi[i]; 316 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 317 bjj = bj + bi[i]; 318 319 /* B part, smaller col index */ 320 colA_start = rstart + ajj[0]; /* the smallest global col index of A */ 321 jB = 0; 322 for (j=0; j<countB; j++){ 323 jcol = garray[bjj[j]]; 324 if (jcol > colA_start) { 325 jB = j; 326 break; 327 } 328 lu->col[nz] = jcol; 329 lu->val[nz++] = *bv++; 330 if (j==countB-1) jB = countB; 331 } 332 333 /* A part */ 334 for (j=0; j<countA; j++){ 335 lu->col[nz] = rstart + ajj[j]; 336 lu->val[nz++] = *av++; 337 } 338 339 /* B part, larger col index */ 340 for (j=jB; j<countB; j++){ 341 lu->col[nz] = garray[bjj[j]]; 342 lu->val[nz++] = *bv++; 343 } 344 } 345 lu->row[m] = nz; 346 #if defined(PETSC_USE_COMPLEX) 347 zCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart, 348 lu->val, lu->col, lu->row, SLU_NR_loc, SLU_Z, SLU_GE); 349 #else 350 dCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart, 351 lu->val, lu->col, lu->row, SLU_NR_loc, SLU_D, SLU_GE); 352 #endif 353 } 354 if (lu->options.PrintStat) { 355 ierr = PetscGetTime(&time);CHKERRQ(ierr); 356 time0 = time - time0; 357 } 358 359 /* Factor the matrix. */ 360 PStatInit(&stat); /* Initialize the statistics variables. */ 361 362 if (lu->MatInputMode == GLOBAL) { /* global mat input */ 363 #if defined(PETSC_USE_COMPLEX) 364 pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, 365 &lu->grid, &lu->LUstruct, berr, &stat, &sinfo); 366 #else 367 pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, 368 &lu->grid, &lu->LUstruct, berr, &stat, &sinfo); 369 #endif 370 } else { /* distributed mat input */ 371 #if defined(PETSC_USE_COMPLEX) 372 pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid, 373 &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo); 374 if (sinfo) SETERRQ1(PETSC_ERR_LIB,"pzgssvx fails, info: %d\n",sinfo); 375 #else 376 pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid, 377 &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo); 378 if (sinfo) SETERRQ1(PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",sinfo); 379 #endif 380 } 381 382 if (lu->MatInputMode == GLOBAL && size > 1){ 383 ierr = MatDestroy(A_seq);CHKERRQ(ierr); 384 } 385 386 if (lu->options.PrintStat) { 387 if (size > 1){ 388 ierr = MPI_Reduce(&time0,&time_max,1,MPI_DOUBLE,MPI_MAX,0,A->comm); 389 ierr = MPI_Reduce(&time0,&time_min,1,MPI_DOUBLE,MPI_MIN,0,A->comm); 390 ierr = MPI_Reduce(&time0,&time,1,MPI_DOUBLE,MPI_SUM,0,A->comm); 391 time = time/size; /* average time */ 392 if (!rank) 393 ierr = PetscPrintf(PETSC_COMM_SELF, " Mat conversion(PETSc->SuperLU_DIST) time (max/min/avg): \n \ 394 %g / %g / %g\n",time_max,time_min,time); 395 } else { 396 ierr = PetscPrintf(PETSC_COMM_SELF, " Mat conversion(PETSc->SuperLU_DIST) time: \n \ 397 %g\n",time0); 398 } 399 400 PStatPrint(&lu->options, &stat, &lu->grid); /* Print the statistics. */ 401 } 402 PStatFree(&stat); 403 if (size > 1){ 404 F_diag = ((Mat_MPIAIJ *)(*F)->data)->A; 405 F_diag->assembled = PETSC_TRUE; 406 } 407 (*F)->assembled = PETSC_TRUE; 408 lu->options.Fact = FACTORED; /* The factored form of A is supplied. Local option used by this func. only */ 409 PetscFunctionReturn(0); 410 } 411 412 /* Note the Petsc r and c permutations are ignored */ 413 #undef __FUNCT__ 414 #define __FUNCT__ "MatLUFactorSymbolic_SuperLU_DIST" 415 PetscErrorCode MatLUFactorSymbolic_SuperLU_DIST(Mat A,IS r,IS c,MatFactorInfo *info,Mat *F) 416 { 417 Mat B; 418 Mat_SuperLU_DIST *lu; 419 PetscErrorCode ierr; 420 PetscInt M=A->rmap.N,N=A->cmap.N,indx; 421 PetscMPIInt size; 422 superlu_options_t options; 423 PetscTruth flg; 424 const char *pctype[] = {"MMD_AT_PLUS_A","NATURAL","MMD_ATA"}; 425 const char *prtype[] = {"LargeDiag","NATURAL"}; 426 const char *factPattern[] = {"SamePattern","SamePattern_SameRowPerm"}; 427 428 PetscFunctionBegin; 429 /* Create the factorization matrix */ 430 ierr = MatCreate(A->comm,&B);CHKERRQ(ierr); 431 ierr = MatSetSizes(B,A->rmap.n,A->cmap.n,M,N);CHKERRQ(ierr); 432 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 433 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL); 434 ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 435 436 B->ops->lufactornumeric = MatLUFactorNumeric_SuperLU_DIST; 437 B->ops->solve = MatSolve_SuperLU_DIST; 438 B->factor = FACTOR_LU; 439 440 lu = (Mat_SuperLU_DIST*)(B->spptr); 441 442 /* Set the default input options: 443 options.Fact = DOFACT; 444 options.Equil = YES; 445 options.ColPerm = MMD_AT_PLUS_A; 446 options.RowPerm = LargeDiag; 447 options.ReplaceTinyPivot = YES; 448 options.Trans = NOTRANS; 449 options.IterRefine = DOUBLE; 450 options.SolveInitialized = NO; 451 options.RefineInitialized = NO; 452 options.PrintStat = YES; 453 */ 454 set_default_options_dist(&options); 455 456 ierr = MPI_Comm_dup(A->comm,&(lu->comm_superlu));CHKERRQ(ierr); 457 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 458 459 ierr = PetscOptionsBegin(A->comm,A->prefix,"SuperLU_Dist Options","Mat");CHKERRQ(ierr); 460 lu->npcol = (PetscMPIInt)(sqrt(size)); /* Default num of process columns */ 461 if (!lu->npcol) lu->npcol = 1; 462 lu->nprow = (PetscMPIInt)(size/lu->npcol); /* Default num of process rows */ 463 ierr = PetscOptionsInt("-mat_superlu_dist_r","Number rows in processor partition","None",lu->nprow,&lu->nprow,PETSC_NULL);CHKERRQ(ierr); 464 ierr = PetscOptionsInt("-mat_superlu_dist_c","Number columns in processor partition","None",lu->npcol,&lu->npcol,PETSC_NULL);CHKERRQ(ierr); 465 if (size != lu->nprow * lu->npcol) 466 SETERRQ3(PETSC_ERR_ARG_SIZ,"Number of processes %d must equal to nprow %d * npcol %d",size,lu->nprow,lu->npcol); 467 468 lu->MatInputMode = DISTRIBUTED; 469 ierr = PetscOptionsInt("-mat_superlu_dist_matinput","Matrix input mode (0: GLOBAL; 1: DISTRIBUTED)","None",lu->MatInputMode,&lu->MatInputMode,PETSC_NULL);CHKERRQ(ierr); 470 if(lu->MatInputMode == DISTRIBUTED && size == 1) lu->MatInputMode = GLOBAL; 471 472 ierr = PetscOptionsTruth("-mat_superlu_dist_equil","Equilibrate matrix","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr); 473 if (!flg) { 474 options.Equil = NO; 475 } 476 477 ierr = PetscOptionsEList("-mat_superlu_dist_rowperm","Row permutation","None",prtype,2,prtype[0],&indx,&flg);CHKERRQ(ierr); 478 if (flg) { 479 switch (indx) { 480 case 0: 481 options.RowPerm = LargeDiag; 482 break; 483 case 1: 484 options.RowPerm = NOROWPERM; 485 break; 486 } 487 } 488 489 ierr = PetscOptionsEList("-mat_superlu_dist_colperm","Column permutation","None",pctype,3,pctype[0],&indx,&flg);CHKERRQ(ierr); 490 if (flg) { 491 switch (indx) { 492 case 0: 493 options.ColPerm = MMD_AT_PLUS_A; 494 break; 495 case 1: 496 options.ColPerm = NATURAL; 497 break; 498 case 2: 499 options.ColPerm = MMD_ATA; 500 break; 501 } 502 } 503 504 ierr = PetscOptionsTruth("-mat_superlu_dist_replacetinypivot","Replace tiny pivots","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr); 505 if (!flg) { 506 options.ReplaceTinyPivot = NO; 507 } 508 509 lu->FactPattern = SamePattern; 510 ierr = PetscOptionsEList("-mat_superlu_dist_fact","Sparsity pattern for repeated matrix factorization","None",factPattern,2,factPattern[0],&indx,&flg);CHKERRQ(ierr); 511 if (flg) { 512 switch (indx) { 513 case 0: 514 lu->FactPattern = SamePattern; 515 break; 516 case 1: 517 lu->FactPattern = SamePattern_SameRowPerm; 518 break; 519 } 520 } 521 522 options.IterRefine = NOREFINE; 523 ierr = PetscOptionsTruth("-mat_superlu_dist_iterrefine","Use iterative refinement","None",PETSC_FALSE,&flg,0);CHKERRQ(ierr); 524 if (flg) { 525 options.IterRefine = DOUBLE; 526 } 527 528 if (PetscLogPrintInfo) { 529 options.PrintStat = YES; 530 } else { 531 options.PrintStat = NO; 532 } 533 ierr = PetscOptionsTruth("-mat_superlu_dist_statprint","Print factorization information","None", 534 (PetscTruth)options.PrintStat,(PetscTruth*)&options.PrintStat,0);CHKERRQ(ierr); 535 PetscOptionsEnd(); 536 537 /* Initialize the SuperLU process grid. */ 538 superlu_gridinit(lu->comm_superlu, lu->nprow, lu->npcol, &lu->grid); 539 540 /* Initialize ScalePermstruct and LUstruct. */ 541 ScalePermstructInit(M, N, &lu->ScalePermstruct); 542 LUstructInit(M, N, &lu->LUstruct); 543 544 lu->options = options; 545 lu->options.Fact = DOFACT; 546 lu->CleanUpSuperLU_Dist = PETSC_TRUE; 547 *F = B; 548 PetscFunctionReturn(0); 549 } 550 551 #undef __FUNCT__ 552 #define __FUNCT__ "MatAssemblyEnd_SuperLU_DIST" 553 PetscErrorCode MatAssemblyEnd_SuperLU_DIST(Mat A,MatAssemblyType mode) { 554 PetscErrorCode ierr; 555 Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST*)(A->spptr); 556 557 PetscFunctionBegin; 558 ierr = (*lu->MatAssemblyEnd)(A,mode);CHKERRQ(ierr); 559 lu->MatLUFactorSymbolic = A->ops->lufactorsymbolic; 560 A->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST; 561 PetscFunctionReturn(0); 562 } 563 564 #undef __FUNCT__ 565 #define __FUNCT__ "MatFactorInfo_SuperLU_DIST" 566 PetscErrorCode MatFactorInfo_SuperLU_DIST(Mat A,PetscViewer viewer) 567 { 568 Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST*)A->spptr; 569 superlu_options_t options; 570 PetscErrorCode ierr; 571 572 PetscFunctionBegin; 573 /* check if matrix is superlu_dist type */ 574 if (A->ops->solve != MatSolve_SuperLU_DIST) PetscFunctionReturn(0); 575 576 options = lu->options; 577 ierr = PetscViewerASCIIPrintf(viewer,"SuperLU_DIST run parameters:\n");CHKERRQ(ierr); 578 ierr = PetscViewerASCIIPrintf(viewer," Equilibrate matrix %s \n",PetscTruths[options.Equil != NO]);CHKERRQ(ierr); 579 ierr = PetscViewerASCIIPrintf(viewer," Matrix input mode %d \n",lu->MatInputMode);CHKERRQ(ierr); 580 ierr = PetscViewerASCIIPrintf(viewer," Replace tiny pivots %s \n",PetscTruths[options.ReplaceTinyPivot != NO]);CHKERRQ(ierr); 581 ierr = PetscViewerASCIIPrintf(viewer," Use iterative refinement %s \n",PetscTruths[options.IterRefine == DOUBLE]);CHKERRQ(ierr); 582 ierr = PetscViewerASCIIPrintf(viewer," Processors in row %d col partition %d \n",lu->nprow,lu->npcol);CHKERRQ(ierr); 583 ierr = PetscViewerASCIIPrintf(viewer," Row permutation %s \n",(options.RowPerm == NOROWPERM) ? "NATURAL": "LargeDiag");CHKERRQ(ierr); 584 if (options.ColPerm == NATURAL) { 585 ierr = PetscViewerASCIIPrintf(viewer," Column permutation NATURAL\n");CHKERRQ(ierr); 586 } else if (options.ColPerm == MMD_AT_PLUS_A) { 587 ierr = PetscViewerASCIIPrintf(viewer," Column permutation MMD_AT_PLUS_A\n");CHKERRQ(ierr); 588 } else if (options.ColPerm == MMD_ATA) { 589 ierr = PetscViewerASCIIPrintf(viewer," Column permutation MMD_ATA\n");CHKERRQ(ierr); 590 } else { 591 SETERRQ(PETSC_ERR_ARG_WRONG,"Unknown column permutation"); 592 } 593 594 if (lu->FactPattern == SamePattern){ 595 ierr = PetscViewerASCIIPrintf(viewer," Repeated factorization SamePattern\n");CHKERRQ(ierr); 596 } else { 597 ierr = PetscViewerASCIIPrintf(viewer," Repeated factorization SamePattern_SameRowPerm\n");CHKERRQ(ierr); 598 } 599 PetscFunctionReturn(0); 600 } 601 602 #undef __FUNCT__ 603 #define __FUNCT__ "MatView_SuperLU_DIST" 604 PetscErrorCode MatView_SuperLU_DIST(Mat A,PetscViewer viewer) 605 { 606 PetscErrorCode ierr; 607 PetscTruth iascii; 608 PetscViewerFormat format; 609 Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST*)(A->spptr); 610 611 PetscFunctionBegin; 612 ierr = (*lu->MatView)(A,viewer);CHKERRQ(ierr); 613 614 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 615 if (iascii) { 616 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 617 if (format == PETSC_VIEWER_ASCII_INFO) { 618 ierr = MatFactorInfo_SuperLU_DIST(A,viewer);CHKERRQ(ierr); 619 } 620 } 621 PetscFunctionReturn(0); 622 } 623 624 625 EXTERN_C_BEGIN 626 #undef __FUNCT__ 627 #define __FUNCT__ "MatConvert_Base_SuperLU_DIST" 628 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_Base_SuperLU_DIST(Mat A,MatType type,MatReuse reuse,Mat *newmat) 629 { 630 /* This routine is only called to convert to MATSUPERLU_DIST */ 631 /* from MATSEQAIJ if A has a single process communicator */ 632 /* or MATMPIAIJ otherwise, so we will ignore 'MatType type'. */ 633 PetscErrorCode ierr; 634 PetscMPIInt size; 635 MPI_Comm comm; 636 Mat B=*newmat; 637 Mat_SuperLU_DIST *lu; 638 639 PetscFunctionBegin; 640 if (reuse == MAT_INITIAL_MATRIX) { 641 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 642 } 643 644 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 645 ierr = PetscNew(Mat_SuperLU_DIST,&lu);CHKERRQ(ierr); 646 647 lu->MatDuplicate = A->ops->duplicate; 648 lu->MatView = A->ops->view; 649 lu->MatAssemblyEnd = A->ops->assemblyend; 650 lu->MatLUFactorSymbolic = A->ops->lufactorsymbolic; 651 lu->MatDestroy = A->ops->destroy; 652 lu->CleanUpSuperLU_Dist = PETSC_FALSE; 653 654 B->spptr = (void*)lu; 655 B->ops->duplicate = MatDuplicate_SuperLU_DIST; 656 B->ops->view = MatView_SuperLU_DIST; 657 B->ops->assemblyend = MatAssemblyEnd_SuperLU_DIST; 658 B->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST; 659 B->ops->destroy = MatDestroy_SuperLU_DIST; 660 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);CHKERRQ(ierr); 661 if (size == 1) { 662 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqaij_superlu_dist_C", 663 "MatConvert_Base_SuperLU_DIST",MatConvert_Base_SuperLU_DIST);CHKERRQ(ierr); 664 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_superlu_dist_seqaij_C", 665 "MatConvert_SuperLU_DIST_Base",MatConvert_SuperLU_DIST_Base);CHKERRQ(ierr); 666 } else { 667 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_superlu_dist_C", 668 "MatConvert_Base_SuperLU_DIST",MatConvert_Base_SuperLU_DIST);CHKERRQ(ierr); 669 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_superlu_dist_mpiaij_C", 670 "MatConvert_SuperLU_DIST_Base",MatConvert_SuperLU_DIST_Base);CHKERRQ(ierr); 671 } 672 ierr = PetscInfo(0,"Using SuperLU_DIST for SeqAIJ LU factorization and solves.\n");CHKERRQ(ierr); 673 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSUPERLU_DIST);CHKERRQ(ierr); 674 *newmat = B; 675 PetscFunctionReturn(0); 676 } 677 EXTERN_C_END 678 679 #undef __FUNCT__ 680 #define __FUNCT__ "MatDuplicate_SuperLU_DIST" 681 PetscErrorCode MatDuplicate_SuperLU_DIST(Mat A, MatDuplicateOption op, Mat *M) { 682 PetscErrorCode ierr; 683 Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST *)A->spptr; 684 685 PetscFunctionBegin; 686 ierr = (*lu->MatDuplicate)(A,op,M);CHKERRQ(ierr); 687 ierr = PetscMemcpy((*M)->spptr,lu,sizeof(Mat_SuperLU_DIST));CHKERRQ(ierr); 688 PetscFunctionReturn(0); 689 } 690 691 /*MC 692 MATSUPERLU_DIST - MATSUPERLU_DIST = "superlu_dist" - A matrix type providing direct solvers (LU) for parallel matrices 693 via the external package SuperLU_DIST. 694 695 If SuperLU_DIST is installed (see the manual for 696 instructions on how to declare the existence of external packages), 697 a matrix type can be constructed which invokes SuperLU_DIST solvers. 698 After calling MatCreate(...,A), simply call MatSetType(A,MATSUPERLU_DIST). 699 700 This matrix inherits from MATSEQAIJ when constructed with a single process communicator, 701 and from MATMPIAIJ otherwise. As a result, for single process communicators, 702 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 703 for communicators controlling multiple processes. It is recommended that you call both of 704 the above preallocation routines for simplicity. One can also call MatConvert for an inplace 705 conversion to or from the MATSEQAIJ or MATMPIAIJ type (depending on the communicator size) 706 without data copy. 707 708 Options Database Keys: 709 + -mat_type superlu_dist - sets the matrix type to "superlu_dist" during a call to MatSetFromOptions() 710 . -mat_superlu_dist_r <n> - number of rows in processor partition 711 . -mat_superlu_dist_c <n> - number of columns in processor partition 712 . -mat_superlu_dist_matinput <0,1> - matrix input mode; 0=global, 1=distributed 713 . -mat_superlu_dist_equil - equilibrate the matrix 714 . -mat_superlu_dist_rowperm <LargeDiag,NATURAL> - row permutation 715 . -mat_superlu_dist_colperm <MMD_AT_PLUS_A,MMD_ATA,NATURAL> - column permutation 716 . -mat_superlu_dist_replacetinypivot - replace tiny pivots 717 . -mat_superlu_dist_fact <SamePattern> (choose one of) SamePattern SamePattern_SameRowPerm 718 . -mat_superlu_dist_iterrefine - use iterative refinement 719 - -mat_superlu_dist_statprint - print factorization information 720 721 Level: beginner 722 723 .seealso: PCLU 724 M*/ 725 726 EXTERN_C_BEGIN 727 #undef __FUNCT__ 728 #define __FUNCT__ "MatCreate_SuperLU_DIST" 729 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SuperLU_DIST(Mat A) 730 { 731 PetscErrorCode ierr; 732 PetscMPIInt size; 733 734 PetscFunctionBegin; 735 /* Change type name before calling MatSetType to force proper construction of SeqAIJ or MPIAIJ */ 736 /* and SuperLU_DIST types */ 737 ierr = PetscObjectChangeTypeName((PetscObject)A,MATSUPERLU_DIST);CHKERRQ(ierr); 738 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 739 if (size == 1) { 740 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 741 } else { 742 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 743 /* A_diag = 0x0 ??? -- do we need it? 744 Mat A_diag = ((Mat_MPIAIJ *)A->data)->A; 745 ierr = MatConvert_Base_SuperLU_DIST(A_diag,MATSUPERLU_DIST,MAT_REUSE_MATRIX,&A_diag);CHKERRQ(ierr); 746 */ 747 } 748 ierr = MatConvert_Base_SuperLU_DIST(A,MATSUPERLU_DIST,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr); 749 PetscFunctionReturn(0); 750 } 751 EXTERN_C_END 752 753