1 #define PETSCMAT_DLL 2 3 /* 4 Provides an interface to the SuperLU_DIST_2.0 sparse solver 5 */ 6 7 #include "src/mat/impls/aij/seq/aij.h" 8 #include "src/mat/impls/aij/mpi/mpiaij.h" 9 #if defined(PETSC_HAVE_STDLIB_H) /* This is to get around weird problem with SuperLU on cray */ 10 #include "stdlib.h" 11 #endif 12 13 EXTERN_C_BEGIN 14 #if defined(PETSC_USE_COMPLEX) 15 #include "superlu_zdefs.h" 16 #else 17 #include "superlu_ddefs.h" 18 #endif 19 EXTERN_C_END 20 21 typedef enum { GLOBAL,DISTRIBUTED 22 } SuperLU_MatInputMode; 23 24 typedef struct { 25 int_t nprow,npcol,*row,*col; 26 gridinfo_t grid; 27 superlu_options_t options; 28 SuperMatrix A_sup; 29 ScalePermstruct_t ScalePermstruct; 30 LUstruct_t LUstruct; 31 int StatPrint; 32 int MatInputMode; 33 SOLVEstruct_t SOLVEstruct; 34 fact_t FactPattern; 35 MPI_Comm comm_superlu; 36 #if defined(PETSC_USE_COMPLEX) 37 doublecomplex *val; 38 #else 39 double *val; 40 #endif 41 42 /* A few function pointers for inheritance */ 43 PetscErrorCode (*MatDuplicate)(Mat,MatDuplicateOption,Mat*); 44 PetscErrorCode (*MatView)(Mat,PetscViewer); 45 PetscErrorCode (*MatAssemblyEnd)(Mat,MatAssemblyType); 46 PetscErrorCode (*MatLUFactorSymbolic)(Mat,IS,IS,MatFactorInfo*,Mat*); 47 PetscErrorCode (*MatDestroy)(Mat); 48 49 /* Flag to clean up (non-global) SuperLU objects during Destroy */ 50 PetscTruth CleanUpSuperLU_Dist; 51 } Mat_SuperLU_DIST; 52 53 EXTERN PetscErrorCode MatDuplicate_SuperLU_DIST(Mat,MatDuplicateOption,Mat*); 54 extern PetscErrorCode MatFactorInfo_SuperLU_DIST(Mat,PetscViewer); 55 extern PetscErrorCode MatLUFactorNumeric_SuperLU_DIST(Mat,MatFactorInfo *,Mat *); 56 extern PetscErrorCode MatDestroy_SuperLU_DIST(Mat); 57 extern PetscErrorCode MatView_SuperLU_DIST(Mat,PetscViewer); 58 extern PetscErrorCode MatAssemblyEnd_SuperLU_DIST(Mat,MatAssemblyType); 59 extern PetscErrorCode MatSolve_SuperLU_DIST(Mat,Vec,Vec); 60 extern PetscErrorCode MatLUFactorSymbolic_SuperLU_DIST(Mat,IS,IS,MatFactorInfo *,Mat *); 61 extern PetscErrorCode MatDuplicate_SuperLU_DIST(Mat, MatDuplicateOption, Mat *); 62 63 EXTERN_C_BEGIN 64 #undef __FUNCT__ 65 #define __FUNCT__ "MatConvert_SuperLU_DIST_AIJ" 66 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SuperLU_DIST_AIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat) 67 { 68 PetscErrorCode ierr; 69 Mat B=*newmat; 70 Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST *)A->spptr; 71 72 PetscFunctionBegin; 73 if (reuse == MAT_INITIAL_MATRIX) { 74 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 75 } 76 /* Reset the original function pointers */ 77 B->ops->duplicate = lu->MatDuplicate; 78 B->ops->view = lu->MatView; 79 B->ops->assemblyend = lu->MatAssemblyEnd; 80 B->ops->lufactorsymbolic = lu->MatLUFactorSymbolic; 81 B->ops->destroy = lu->MatDestroy; 82 ierr = PetscFree(lu);CHKERRQ(ierr); 83 A->spptr = PETSC_NULL; 84 85 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaij_superlu_dist_C","",PETSC_NULL);CHKERRQ(ierr); 86 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_superlu_dist_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 87 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_superlu_dist_C","",PETSC_NULL);CHKERRQ(ierr); 88 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_superlu_dist_mpiaij_C","",PETSC_NULL);CHKERRQ(ierr); 89 90 ierr = PetscObjectChangeTypeName((PetscObject)B,type);CHKERRQ(ierr); 91 *newmat = B; 92 PetscFunctionReturn(0); 93 } 94 EXTERN_C_END 95 96 #undef __FUNCT__ 97 #define __FUNCT__ "MatDestroy_SuperLU_DIST" 98 PetscErrorCode MatDestroy_SuperLU_DIST(Mat A) 99 { 100 PetscErrorCode ierr; 101 PetscMPIInt size; 102 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr; 103 104 PetscFunctionBegin; 105 if (lu->CleanUpSuperLU_Dist) { 106 /* Deallocate SuperLU_DIST storage */ 107 if (lu->MatInputMode == GLOBAL) { 108 Destroy_CompCol_Matrix_dist(&lu->A_sup); 109 } else { 110 Destroy_CompRowLoc_Matrix_dist(&lu->A_sup); 111 if ( lu->options.SolveInitialized ) { 112 #if defined(PETSC_USE_COMPLEX) 113 zSolveFinalize(&lu->options, &lu->SOLVEstruct); 114 #else 115 dSolveFinalize(&lu->options, &lu->SOLVEstruct); 116 #endif 117 } 118 } 119 Destroy_LU(A->cmap.N, &lu->grid, &lu->LUstruct); 120 ScalePermstructFree(&lu->ScalePermstruct); 121 LUstructFree(&lu->LUstruct); 122 123 /* Release the SuperLU_DIST process grid. */ 124 superlu_gridexit(&lu->grid); 125 126 ierr = MPI_Comm_free(&(((PetscObject)lu)->comm_superlu));CHKERRQ(ierr); 127 } 128 129 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 130 if (size == 1) { 131 ierr = MatConvert_SuperLU_DIST_AIJ(A,MATSEQAIJ,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr); 132 } else { 133 ierr = MatConvert_SuperLU_DIST_AIJ(A,MATMPIAIJ,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr); 134 } 135 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 136 PetscFunctionReturn(0); 137 } 138 139 #undef __FUNCT__ 140 #define __FUNCT__ "MatSolve_SuperLU_DIST" 141 PetscErrorCode MatSolve_SuperLU_DIST(Mat A,Vec b_mpi,Vec x) 142 { 143 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr; 144 PetscErrorCode ierr; 145 PetscMPIInt size; 146 PetscInt m=A->rmap.N, N=A->cmap.N; 147 SuperLUStat_t stat; 148 double berr[1]; 149 PetscScalar *bptr; 150 PetscInt info, nrhs=1; 151 Vec x_seq; 152 IS iden; 153 VecScatter scat; 154 155 PetscFunctionBegin; 156 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 157 if (size > 1) { 158 if (lu->MatInputMode == GLOBAL) { /* global mat input, convert b to x_seq */ 159 ierr = VecCreateSeq(PETSC_COMM_SELF,N,&x_seq);CHKERRQ(ierr); 160 ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iden);CHKERRQ(ierr); 161 ierr = VecScatterCreate(b_mpi,iden,x_seq,iden,&scat);CHKERRQ(ierr); 162 ierr = ISDestroy(iden);CHKERRQ(ierr); 163 164 ierr = VecScatterBegin(scat,b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 165 ierr = VecScatterEnd(scat,b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 166 ierr = VecGetArray(x_seq,&bptr);CHKERRQ(ierr); 167 } else { /* distributed mat input */ 168 ierr = VecCopy(b_mpi,x);CHKERRQ(ierr); 169 ierr = VecGetArray(x,&bptr);CHKERRQ(ierr); 170 } 171 } else { /* size == 1 */ 172 ierr = VecCopy(b_mpi,x);CHKERRQ(ierr); 173 ierr = VecGetArray(x,&bptr);CHKERRQ(ierr); 174 } 175 176 if (lu->options.Fact != FACTORED) 177 SETERRQ(PETSC_ERR_ARG_WRONG,"SuperLU_DIST options.Fact mush equal FACTORED"); 178 179 PStatInit(&stat); /* Initialize the statistics variables. */ 180 if (lu->MatInputMode == GLOBAL) { 181 #if defined(PETSC_USE_COMPLEX) 182 pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,(doublecomplex*)bptr, m, nrhs, 183 &lu->grid, &lu->LUstruct, berr, &stat, &info); 184 #else 185 pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,bptr, m, nrhs, 186 &lu->grid, &lu->LUstruct, berr, &stat, &info); 187 #endif 188 } else { /* distributed mat input */ 189 #if defined(PETSC_USE_COMPLEX) 190 pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, (doublecomplex*)bptr, A->rmap.N, nrhs, &lu->grid, 191 &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info); 192 if (info) SETERRQ1(PETSC_ERR_LIB,"pzgssvx fails, info: %d\n",info); 193 #else 194 pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, bptr, A->rmap.N, nrhs, &lu->grid, 195 &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info); 196 if (info) SETERRQ1(PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",info); 197 #endif 198 } 199 if (lu->options.PrintStat) { 200 PStatPrint(&lu->options, &stat, &lu->grid); /* Print the statistics. */ 201 } 202 PStatFree(&stat); 203 204 if (size > 1) { 205 if (lu->MatInputMode == GLOBAL){ /* convert seq x to mpi x */ 206 ierr = VecRestoreArray(x_seq,&bptr);CHKERRQ(ierr); 207 ierr = VecScatterBegin(scat,x_seq,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 208 ierr = VecScatterEnd(scat,x_seq,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 209 ierr = VecScatterDestroy(scat);CHKERRQ(ierr); 210 ierr = VecDestroy(x_seq);CHKERRQ(ierr); 211 } else { 212 ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr); 213 } 214 } else { 215 ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr); 216 } 217 PetscFunctionReturn(0); 218 } 219 220 #undef __FUNCT__ 221 #define __FUNCT__ "MatLUFactorNumeric_SuperLU_DIST" 222 PetscErrorCode MatLUFactorNumeric_SuperLU_DIST(Mat A,MatFactorInfo *info,Mat *F) 223 { 224 Mat *tseq,A_seq = PETSC_NULL; 225 Mat_SeqAIJ *aa,*bb; 226 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)(*F)->spptr; 227 PetscErrorCode ierr; 228 PetscInt M=A->rmap.N,N=A->cmap.N,sinfo,i,*ai,*aj,*bi,*bj,nz,rstart,*garray, 229 m=A->rmap.n, irow,colA_start,j,jcol,jB,countA,countB,*bjj,*ajj; 230 PetscMPIInt size,rank; 231 SuperLUStat_t stat; 232 double *berr=0; 233 IS isrow; 234 PetscLogDouble time0,time,time_min,time_max; 235 Mat F_diag=PETSC_NULL; 236 #if defined(PETSC_USE_COMPLEX) 237 doublecomplex *av, *bv; 238 #else 239 double *av, *bv; 240 #endif 241 242 PetscFunctionBegin; 243 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 244 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 245 246 if (lu->options.PrintStat) { /* collect time for mat conversion */ 247 ierr = MPI_Barrier(((PetscObject)A)->comm);CHKERRQ(ierr); 248 ierr = PetscGetTime(&time0);CHKERRQ(ierr); 249 } 250 251 if (lu->MatInputMode == GLOBAL) { /* global mat input */ 252 if (size > 1) { /* convert mpi A to seq mat A */ 253 ierr = ISCreateStride(PETSC_COMM_SELF,M,0,1,&isrow);CHKERRQ(ierr); 254 ierr = MatGetSubMatrices(A,1,&isrow,&isrow,MAT_INITIAL_MATRIX,&tseq);CHKERRQ(ierr); 255 ierr = ISDestroy(isrow);CHKERRQ(ierr); 256 257 A_seq = *tseq; 258 ierr = PetscFree(tseq);CHKERRQ(ierr); 259 aa = (Mat_SeqAIJ*)A_seq->data; 260 } else { 261 aa = (Mat_SeqAIJ*)A->data; 262 } 263 264 /* Convert Petsc NR matrix to SuperLU_DIST NC. 265 Note: memories of lu->val, col and row are allocated by CompRow_to_CompCol_dist()! */ 266 if (lu->options.Fact != DOFACT) {/* successive numeric factorization, sparsity pattern is reused. */ 267 if (lu->FactPattern == SamePattern_SameRowPerm){ 268 Destroy_CompCol_Matrix_dist(&lu->A_sup); 269 /* Destroy_LU(N, &lu->grid, &lu->LUstruct); Crash! Comment it out does not lead to mem leak. */ 270 lu->options.Fact = SamePattern_SameRowPerm; /* matrix has similar numerical values */ 271 } else { 272 Destroy_CompCol_Matrix_dist(&lu->A_sup); 273 Destroy_LU(N, &lu->grid, &lu->LUstruct); 274 lu->options.Fact = SamePattern; 275 } 276 } 277 #if defined(PETSC_USE_COMPLEX) 278 zCompRow_to_CompCol_dist(M,N,aa->nz,(doublecomplex*)aa->a,aa->j,aa->i,&lu->val,&lu->col, &lu->row); 279 #else 280 dCompRow_to_CompCol_dist(M,N,aa->nz,aa->a,aa->j,aa->i,&lu->val, &lu->col, &lu->row); 281 #endif 282 283 /* Create compressed column matrix A_sup. */ 284 #if defined(PETSC_USE_COMPLEX) 285 zCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_Z, SLU_GE); 286 #else 287 dCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_D, SLU_GE); 288 #endif 289 } else { /* distributed mat input */ 290 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 291 aa=(Mat_SeqAIJ*)(mat->A)->data; 292 bb=(Mat_SeqAIJ*)(mat->B)->data; 293 ai=aa->i; aj=aa->j; 294 bi=bb->i; bj=bb->j; 295 #if defined(PETSC_USE_COMPLEX) 296 av=(doublecomplex*)aa->a; 297 bv=(doublecomplex*)bb->a; 298 #else 299 av=aa->a; 300 bv=bb->a; 301 #endif 302 rstart = A->rmap.rstart; 303 nz = aa->nz + bb->nz; 304 garray = mat->garray; 305 306 if (lu->options.Fact == DOFACT) {/* first numeric factorization */ 307 #if defined(PETSC_USE_COMPLEX) 308 zallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row); 309 #else 310 dallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row); 311 #endif 312 } else { /* successive numeric factorization, sparsity pattern and perm_c are reused. */ 313 if (lu->FactPattern == SamePattern_SameRowPerm){ 314 /* Destroy_LU(N, &lu->grid, &lu->LUstruct); Crash! Comment it out does not lead to mem leak. */ 315 lu->options.Fact = SamePattern_SameRowPerm; /* matrix has similar numerical values */ 316 } else { 317 Destroy_LU(N, &lu->grid, &lu->LUstruct); /* Deallocate storage associated with the L and U matrices. */ 318 lu->options.Fact = SamePattern; 319 } 320 } 321 nz = 0; irow = rstart; 322 for ( i=0; i<m; i++ ) { 323 lu->row[i] = nz; 324 countA = ai[i+1] - ai[i]; 325 countB = bi[i+1] - bi[i]; 326 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 327 bjj = bj + bi[i]; 328 329 /* B part, smaller col index */ 330 colA_start = rstart + ajj[0]; /* the smallest global col index of A */ 331 jB = 0; 332 for (j=0; j<countB; j++){ 333 jcol = garray[bjj[j]]; 334 if (jcol > colA_start) { 335 jB = j; 336 break; 337 } 338 lu->col[nz] = jcol; 339 lu->val[nz++] = *bv++; 340 if (j==countB-1) jB = countB; 341 } 342 343 /* A part */ 344 for (j=0; j<countA; j++){ 345 lu->col[nz] = rstart + ajj[j]; 346 lu->val[nz++] = *av++; 347 } 348 349 /* B part, larger col index */ 350 for (j=jB; j<countB; j++){ 351 lu->col[nz] = garray[bjj[j]]; 352 lu->val[nz++] = *bv++; 353 } 354 } 355 lu->row[m] = nz; 356 #if defined(PETSC_USE_COMPLEX) 357 zCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart, 358 lu->val, lu->col, lu->row, SLU_NR_loc, SLU_Z, SLU_GE); 359 #else 360 dCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart, 361 lu->val, lu->col, lu->row, SLU_NR_loc, SLU_D, SLU_GE); 362 #endif 363 } 364 if (lu->options.PrintStat) { 365 ierr = PetscGetTime(&time);CHKERRQ(ierr); 366 time0 = time - time0; 367 } 368 369 /* Factor the matrix. */ 370 PStatInit(&stat); /* Initialize the statistics variables. */ 371 372 if (lu->MatInputMode == GLOBAL) { /* global mat input */ 373 #if defined(PETSC_USE_COMPLEX) 374 pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, 375 &lu->grid, &lu->LUstruct, berr, &stat, &sinfo); 376 #else 377 pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, 378 &lu->grid, &lu->LUstruct, berr, &stat, &sinfo); 379 #endif 380 } else { /* distributed mat input */ 381 #if defined(PETSC_USE_COMPLEX) 382 pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid, 383 &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo); 384 if (sinfo) SETERRQ1(PETSC_ERR_LIB,"pzgssvx fails, info: %d\n",sinfo); 385 #else 386 pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid, 387 &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo); 388 if (sinfo) SETERRQ1(PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",sinfo); 389 #endif 390 } 391 392 if (lu->MatInputMode == GLOBAL && size > 1){ 393 ierr = MatDestroy(A_seq);CHKERRQ(ierr); 394 } 395 396 if (lu->options.PrintStat) { 397 if (size > 1){ 398 ierr = MPI_Reduce(&time0,&time_max,1,MPI_DOUBLE,MPI_MAX,0,((PetscObject)A)->comm); 399 ierr = MPI_Reduce(&time0,&time_min,1,MPI_DOUBLE,MPI_MIN,0,((PetscObject)A)->comm); 400 ierr = MPI_Reduce(&time0,&time,1,MPI_DOUBLE,MPI_SUM,0,((PetscObject)A)->comm); 401 time = time/size; /* average time */ 402 if (!rank) 403 ierr = PetscPrintf(PETSC_COMM_SELF, " Mat conversion(PETSc->SuperLU_DIST) time (max/min/avg): \n \ 404 %g / %g / %g\n",time_max,time_min,time); 405 } else { 406 ierr = PetscPrintf(PETSC_COMM_SELF, " Mat conversion(PETSc->SuperLU_DIST) time: \n \ 407 %g\n",time0); 408 } 409 410 PStatPrint(&lu->options, &stat, &lu->grid); /* Print the statistics. */ 411 } 412 PStatFree(&stat); 413 if (size > 1){ 414 F_diag = ((Mat_MPIAIJ *)(*F)->data)->A; 415 F_diag->assembled = PETSC_TRUE; 416 } 417 (*F)->assembled = PETSC_TRUE; 418 lu->options.Fact = FACTORED; /* The factored form of A is supplied. Local option used by this func. only */ 419 PetscFunctionReturn(0); 420 } 421 422 /* Note the Petsc r and c permutations are ignored */ 423 #undef __FUNCT__ 424 #define __FUNCT__ "MatLUFactorSymbolic_SuperLU_DIST" 425 PetscErrorCode MatLUFactorSymbolic_SuperLU_DIST(Mat A,IS r,IS c,MatFactorInfo *info,Mat *F) 426 { 427 Mat B; 428 Mat_SuperLU_DIST *lu; 429 PetscErrorCode ierr; 430 PetscInt M=A->rmap.N,N=A->cmap.N,indx; 431 PetscMPIInt size; 432 superlu_options_t options; 433 PetscTruth flg; 434 const char *pctype[] = {"MMD_AT_PLUS_A","NATURAL","MMD_ATA"}; 435 const char *prtype[] = {"LargeDiag","NATURAL"}; 436 const char *factPattern[] = {"SamePattern","SamePattern_SameRowPerm"}; 437 438 PetscFunctionBegin; 439 /* Create the factorization matrix */ 440 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 441 ierr = MatSetSizes(B,A->rmap.n,A->cmap.n,M,N);CHKERRQ(ierr); 442 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 443 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL); 444 ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 445 446 B->ops->lufactornumeric = MatLUFactorNumeric_SuperLU_DIST; 447 B->ops->solve = MatSolve_SuperLU_DIST; 448 B->factor = FACTOR_LU; 449 450 lu = (Mat_SuperLU_DIST*)(B->spptr); 451 452 /* Set the default input options: 453 options.Fact = DOFACT; 454 options.Equil = YES; 455 options.ColPerm = MMD_AT_PLUS_A; 456 options.RowPerm = LargeDiag; 457 options.ReplaceTinyPivot = YES; 458 options.Trans = NOTRANS; 459 options.IterRefine = DOUBLE; 460 options.SolveInitialized = NO; 461 options.RefineInitialized = NO; 462 options.PrintStat = YES; 463 */ 464 set_default_options_dist(&options); 465 466 ierr = MPI_Comm_dup(((PetscObject)A)->comm,&(((PetscObject)lu)->comm_superlu));CHKERRQ(ierr); 467 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 468 /* Default num of process columns and rows */ 469 lu->npcol = (PetscMPIInt)(0.5 + sqrt((PetscReal)size)); 470 if (!lu->npcol) lu->npcol = 1; 471 while (lu->npcol > 0) { 472 lu->nprow = (PetscMPIInt)(size/lu->npcol); 473 if (size == lu->nprow * lu->npcol) break; 474 lu->npcol --; 475 } 476 477 ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"SuperLU_Dist Options","Mat");CHKERRQ(ierr); 478 ierr = PetscOptionsInt("-mat_superlu_dist_r","Number rows in processor partition","None",lu->nprow,&lu->nprow,PETSC_NULL);CHKERRQ(ierr); 479 ierr = PetscOptionsInt("-mat_superlu_dist_c","Number columns in processor partition","None",lu->npcol,&lu->npcol,PETSC_NULL);CHKERRQ(ierr); 480 if (size != lu->nprow * lu->npcol) 481 SETERRQ3(PETSC_ERR_ARG_SIZ,"Number of processes %d must equal to nprow %d * npcol %d",size,lu->nprow,lu->npcol); 482 483 lu->MatInputMode = DISTRIBUTED; 484 ierr = PetscOptionsInt("-mat_superlu_dist_matinput","Matrix input mode (0: GLOBAL; 1: DISTRIBUTED)","None",lu->MatInputMode,&lu->MatInputMode,PETSC_NULL);CHKERRQ(ierr); 485 if(lu->MatInputMode == DISTRIBUTED && size == 1) lu->MatInputMode = GLOBAL; 486 487 ierr = PetscOptionsTruth("-mat_superlu_dist_equil","Equilibrate matrix","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr); 488 if (!flg) { 489 options.Equil = NO; 490 } 491 492 ierr = PetscOptionsEList("-mat_superlu_dist_rowperm","Row permutation","None",prtype,2,prtype[0],&indx,&flg);CHKERRQ(ierr); 493 if (flg) { 494 switch (indx) { 495 case 0: 496 options.RowPerm = LargeDiag; 497 break; 498 case 1: 499 options.RowPerm = NOROWPERM; 500 break; 501 } 502 } 503 504 ierr = PetscOptionsEList("-mat_superlu_dist_colperm","Column permutation","None",pctype,3,pctype[0],&indx,&flg);CHKERRQ(ierr); 505 if (flg) { 506 switch (indx) { 507 case 0: 508 options.ColPerm = MMD_AT_PLUS_A; 509 break; 510 case 1: 511 options.ColPerm = NATURAL; 512 break; 513 case 2: 514 options.ColPerm = MMD_ATA; 515 break; 516 } 517 } 518 519 ierr = PetscOptionsTruth("-mat_superlu_dist_replacetinypivot","Replace tiny pivots","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr); 520 if (!flg) { 521 options.ReplaceTinyPivot = NO; 522 } 523 524 lu->FactPattern = SamePattern; 525 ierr = PetscOptionsEList("-mat_superlu_dist_fact","Sparsity pattern for repeated matrix factorization","None",factPattern,2,factPattern[0],&indx,&flg);CHKERRQ(ierr); 526 if (flg) { 527 switch (indx) { 528 case 0: 529 lu->FactPattern = SamePattern; 530 break; 531 case 1: 532 lu->FactPattern = SamePattern_SameRowPerm; 533 break; 534 } 535 } 536 537 options.IterRefine = NOREFINE; 538 ierr = PetscOptionsTruth("-mat_superlu_dist_iterrefine","Use iterative refinement","None",PETSC_FALSE,&flg,0);CHKERRQ(ierr); 539 if (flg) { 540 options.IterRefine = DOUBLE; 541 } 542 543 if (PetscLogPrintInfo) { 544 options.PrintStat = YES; 545 } else { 546 options.PrintStat = NO; 547 } 548 ierr = PetscOptionsTruth("-mat_superlu_dist_statprint","Print factorization information","None", 549 (PetscTruth)options.PrintStat,(PetscTruth*)&options.PrintStat,0);CHKERRQ(ierr); 550 PetscOptionsEnd(); 551 552 /* Initialize the SuperLU process grid. */ 553 superlu_gridinit(((PetscObject)lu)->comm_superlu, lu->nprow, lu->npcol, &lu->grid); 554 555 /* Initialize ScalePermstruct and LUstruct. */ 556 ScalePermstructInit(M, N, &lu->ScalePermstruct); 557 LUstructInit(M, N, &lu->LUstruct); 558 559 lu->options = options; 560 lu->options.Fact = DOFACT; 561 lu->CleanUpSuperLU_Dist = PETSC_TRUE; 562 *F = B; 563 PetscFunctionReturn(0); 564 } 565 566 #undef __FUNCT__ 567 #define __FUNCT__ "MatAssemblyEnd_SuperLU_DIST" 568 PetscErrorCode MatAssemblyEnd_SuperLU_DIST(Mat A,MatAssemblyType mode) { 569 PetscErrorCode ierr; 570 Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST*)(A->spptr); 571 572 PetscFunctionBegin; 573 ierr = (*lu->MatAssemblyEnd)(A,mode);CHKERRQ(ierr); 574 lu->MatLUFactorSymbolic = A->ops->lufactorsymbolic; 575 A->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST; 576 PetscFunctionReturn(0); 577 } 578 579 #undef __FUNCT__ 580 #define __FUNCT__ "MatFactorInfo_SuperLU_DIST" 581 PetscErrorCode MatFactorInfo_SuperLU_DIST(Mat A,PetscViewer viewer) 582 { 583 Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST*)A->spptr; 584 superlu_options_t options; 585 PetscErrorCode ierr; 586 587 PetscFunctionBegin; 588 /* check if matrix is superlu_dist type */ 589 if (A->ops->solve != MatSolve_SuperLU_DIST) PetscFunctionReturn(0); 590 591 options = lu->options; 592 ierr = PetscViewerASCIIPrintf(viewer,"SuperLU_DIST run parameters:\n");CHKERRQ(ierr); 593 ierr = PetscViewerASCIIPrintf(viewer," Process grid nprow %D x npcol %D \n",lu->nprow,lu->npcol);CHKERRQ(ierr); 594 ierr = PetscViewerASCIIPrintf(viewer," Equilibrate matrix %s \n",PetscTruths[options.Equil != NO]);CHKERRQ(ierr); 595 ierr = PetscViewerASCIIPrintf(viewer," Matrix input mode %d \n",lu->MatInputMode);CHKERRQ(ierr); 596 ierr = PetscViewerASCIIPrintf(viewer," Replace tiny pivots %s \n",PetscTruths[options.ReplaceTinyPivot != NO]);CHKERRQ(ierr); 597 ierr = PetscViewerASCIIPrintf(viewer," Use iterative refinement %s \n",PetscTruths[options.IterRefine == DOUBLE]);CHKERRQ(ierr); 598 ierr = PetscViewerASCIIPrintf(viewer," Processors in row %d col partition %d \n",lu->nprow,lu->npcol);CHKERRQ(ierr); 599 ierr = PetscViewerASCIIPrintf(viewer," Row permutation %s \n",(options.RowPerm == NOROWPERM) ? "NATURAL": "LargeDiag");CHKERRQ(ierr); 600 if (options.ColPerm == NATURAL) { 601 ierr = PetscViewerASCIIPrintf(viewer," Column permutation NATURAL\n");CHKERRQ(ierr); 602 } else if (options.ColPerm == MMD_AT_PLUS_A) { 603 ierr = PetscViewerASCIIPrintf(viewer," Column permutation MMD_AT_PLUS_A\n");CHKERRQ(ierr); 604 } else if (options.ColPerm == MMD_ATA) { 605 ierr = PetscViewerASCIIPrintf(viewer," Column permutation MMD_ATA\n");CHKERRQ(ierr); 606 } else { 607 SETERRQ(PETSC_ERR_ARG_WRONG,"Unknown column permutation"); 608 } 609 610 if (lu->FactPattern == SamePattern){ 611 ierr = PetscViewerASCIIPrintf(viewer," Repeated factorization SamePattern\n");CHKERRQ(ierr); 612 } else { 613 ierr = PetscViewerASCIIPrintf(viewer," Repeated factorization SamePattern_SameRowPerm\n");CHKERRQ(ierr); 614 } 615 PetscFunctionReturn(0); 616 } 617 618 #undef __FUNCT__ 619 #define __FUNCT__ "MatView_SuperLU_DIST" 620 PetscErrorCode MatView_SuperLU_DIST(Mat A,PetscViewer viewer) 621 { 622 PetscErrorCode ierr; 623 PetscTruth iascii; 624 PetscViewerFormat format; 625 Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST*)(A->spptr); 626 627 PetscFunctionBegin; 628 ierr = (*lu->MatView)(A,viewer);CHKERRQ(ierr); 629 630 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 631 if (iascii) { 632 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 633 if (format == PETSC_VIEWER_ASCII_INFO) { 634 ierr = MatFactorInfo_SuperLU_DIST(A,viewer);CHKERRQ(ierr); 635 } 636 } 637 PetscFunctionReturn(0); 638 } 639 640 641 EXTERN_C_BEGIN 642 #undef __FUNCT__ 643 #define __FUNCT__ "MatConvert_AIJ_SuperLU_DIST" 644 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_AIJ_SuperLU_DIST(Mat A,MatType type,MatReuse reuse,Mat *newmat) 645 { 646 /* This routine is only called to convert to MATSUPERLU_DIST */ 647 /* from MATSEQAIJ if A has a single process communicator */ 648 /* or MATMPIAIJ otherwise, so we will ignore 'MatType type'. */ 649 PetscErrorCode ierr; 650 PetscMPIInt size; 651 MPI_Comm comm; 652 Mat B=*newmat; 653 Mat_SuperLU_DIST *lu; 654 655 PetscFunctionBegin; 656 ierr = PetscNewLog(B,Mat_SuperLU_DIST,&lu);CHKERRQ(ierr); 657 if (reuse == MAT_INITIAL_MATRIX) { 658 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 659 lu->MatDuplicate = B->ops->duplicate; 660 lu->MatView = B->ops->view; 661 lu->MatAssemblyEnd = B->ops->assemblyend; 662 lu->MatLUFactorSymbolic = B->ops->lufactorsymbolic; 663 lu->MatDestroy = B->ops->destroy; 664 } else { 665 lu->MatDuplicate = A->ops->duplicate; 666 lu->MatView = A->ops->view; 667 lu->MatAssemblyEnd = A->ops->assemblyend; 668 lu->MatLUFactorSymbolic = A->ops->lufactorsymbolic; 669 lu->MatDestroy = A->ops->destroy; 670 } 671 lu->CleanUpSuperLU_Dist = PETSC_FALSE; 672 673 B->spptr = (void*)lu; 674 B->ops->duplicate = MatDuplicate_SuperLU_DIST; 675 B->ops->view = MatView_SuperLU_DIST; 676 B->ops->assemblyend = MatAssemblyEnd_SuperLU_DIST; 677 B->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST; 678 B->ops->destroy = MatDestroy_SuperLU_DIST; 679 680 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 681 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);CHKERRQ(ierr); 682 if (size == 1) { 683 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqaij_superlu_dist_C", 684 "MatConvert_AIJ_SuperLU_DIST",MatConvert_AIJ_SuperLU_DIST);CHKERRQ(ierr); 685 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_superlu_dist_seqaij_C", 686 "MatConvert_SuperLU_DIST_AIJ",MatConvert_SuperLU_DIST_AIJ);CHKERRQ(ierr); 687 } else { 688 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_superlu_dist_C", 689 "MatConvert_AIJ_SuperLU_DIST",MatConvert_AIJ_SuperLU_DIST);CHKERRQ(ierr); 690 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_superlu_dist_mpiaij_C", 691 "MatConvert_SuperLU_DIST_AIJ",MatConvert_SuperLU_DIST_AIJ);CHKERRQ(ierr); 692 } 693 ierr = PetscInfo(0,"Using SuperLU_DIST for SeqAIJ LU factorization and solves.\n");CHKERRQ(ierr); 694 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSUPERLU_DIST);CHKERRQ(ierr); 695 *newmat = B; 696 PetscFunctionReturn(0); 697 } 698 EXTERN_C_END 699 700 #undef __FUNCT__ 701 #define __FUNCT__ "MatDuplicate_SuperLU_DIST" 702 PetscErrorCode MatDuplicate_SuperLU_DIST(Mat A, MatDuplicateOption op, Mat *M) { 703 PetscErrorCode ierr; 704 Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST *)A->spptr; 705 706 PetscFunctionBegin; 707 ierr = (*lu->MatDuplicate)(A,op,M);CHKERRQ(ierr); 708 ierr = PetscMemcpy((*M)->spptr,lu,sizeof(Mat_SuperLU_DIST));CHKERRQ(ierr); 709 PetscFunctionReturn(0); 710 } 711 712 /*MC 713 MATSUPERLU_DIST - MATSUPERLU_DIST = "superlu_dist" - A matrix type providing direct solvers (LU) for parallel matrices 714 via the external package SuperLU_DIST. 715 716 If SuperLU_DIST is installed (see the manual for 717 instructions on how to declare the existence of external packages), 718 a matrix type can be constructed which invokes SuperLU_DIST solvers. 719 After calling MatCreate(...,A), simply call MatSetType(A,MATSUPERLU_DIST) then 720 optionally call MatSeqAIJSetPreallocation() or MatMPIAIJSetPreallocation() DO NOT 721 call MatCreateSeqAIJ/MPIAIJ() directly or the preallocation information will be LOST! 722 723 This matrix inherits from MATSEQAIJ when constructed with a single process communicator, 724 and from MATMPIAIJ otherwise. As a result, for single process communicators, 725 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 726 for communicators controlling multiple processes. It is recommended that you call both of 727 the above preallocation routines for simplicity. One can also call MatConvert() for an inplace 728 conversion to or from the MATSEQAIJ or MATMPIAIJ type (depending on the communicator size) 729 without data copy; but this MUST be called AFTER the matrix values are set. 730 731 732 733 Options Database Keys: 734 + -mat_type superlu_dist - sets the matrix type to "superlu_dist" during a call to MatSetFromOptions() 735 . -mat_superlu_dist_r <n> - number of rows in processor partition 736 . -mat_superlu_dist_c <n> - number of columns in processor partition 737 . -mat_superlu_dist_matinput <0,1> - matrix input mode; 0=global, 1=distributed 738 . -mat_superlu_dist_equil - equilibrate the matrix 739 . -mat_superlu_dist_rowperm <LargeDiag,NATURAL> - row permutation 740 . -mat_superlu_dist_colperm <MMD_AT_PLUS_A,MMD_ATA,NATURAL> - column permutation 741 . -mat_superlu_dist_replacetinypivot - replace tiny pivots 742 . -mat_superlu_dist_fact <SamePattern> (choose one of) SamePattern SamePattern_SameRowPerm 743 . -mat_superlu_dist_iterrefine - use iterative refinement 744 - -mat_superlu_dist_statprint - print factorization information 745 746 Level: beginner 747 748 .seealso: PCLU 749 M*/ 750 751 EXTERN_C_BEGIN 752 #undef __FUNCT__ 753 #define __FUNCT__ "MatCreate_SuperLU_DIST" 754 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SuperLU_DIST(Mat A) 755 { 756 PetscErrorCode ierr; 757 PetscMPIInt size; 758 759 PetscFunctionBegin; 760 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 761 if (size == 1) { 762 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 763 } else { 764 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 765 /* A_diag = 0x0 ??? -- do we need it? 766 Mat A_diag = ((Mat_MPIAIJ *)A->data)->A; 767 ierr = MatConvert_AIJ_SuperLU_DIST(A_diag,MATSUPERLU_DIST,MAT_REUSE_MATRIX,&A_diag);CHKERRQ(ierr); 768 */ 769 } 770 ierr = MatConvert_AIJ_SuperLU_DIST(A,MATSUPERLU_DIST,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr); 771 PetscFunctionReturn(0); 772 } 773 EXTERN_C_END 774 775