1 /* 2 Provides an interface to the SuperLU_DIST sparse solver 3 */ 4 5 #include <../src/mat/impls/aij/seq/aij.h> 6 #include <../src/mat/impls/aij/mpi/mpiaij.h> 7 8 EXTERN_C_BEGIN 9 #if defined(PETSC_USE_COMPLEX) 10 #include <superlu_zdefs.h> 11 #else 12 #include <superlu_ddefs.h> 13 #endif 14 EXTERN_C_END 15 16 typedef struct { 17 int_t nprow,npcol,*row,*col; 18 gridinfo_t grid; 19 superlu_dist_options_t options; 20 SuperMatrix A_sup; 21 ScalePermstruct_t ScalePermstruct; 22 LUstruct_t LUstruct; 23 int StatPrint; 24 SOLVEstruct_t SOLVEstruct; 25 fact_t FactPattern; 26 MPI_Comm comm_superlu; 27 #if defined(PETSC_USE_COMPLEX) 28 doublecomplex *val; 29 #else 30 double *val; 31 #endif 32 PetscBool matsolve_iscalled,matmatsolve_iscalled; 33 PetscBool CleanUpSuperLU_Dist; /* Flag to clean up (non-global) SuperLU objects during Destroy */ 34 } Mat_SuperLU_DIST; 35 36 37 PetscErrorCode MatSuperluDistGetDiagU_SuperLU_DIST(Mat F,PetscScalar *diagU) 38 { 39 Mat_SuperLU_DIST *lu= (Mat_SuperLU_DIST*)F->data; 40 41 PetscFunctionBegin; 42 #if defined(PETSC_USE_COMPLEX) 43 PetscStackCall("SuperLU_DIST:pzGetDiagU",pzGetDiagU(F->rmap->N,&lu->LUstruct,&lu->grid,(doublecomplex*)diagU)); 44 #else 45 PetscStackCall("SuperLU_DIST:pdGetDiagU",pdGetDiagU(F->rmap->N,&lu->LUstruct,&lu->grid,diagU)); 46 #endif 47 PetscFunctionReturn(0); 48 } 49 50 PetscErrorCode MatSuperluDistGetDiagU(Mat F,PetscScalar *diagU) 51 { 52 PetscErrorCode ierr; 53 54 PetscFunctionBegin; 55 PetscValidHeaderSpecific(F,MAT_CLASSID,1); 56 ierr = PetscTryMethod(F,"MatSuperluDistGetDiagU_C",(Mat,PetscScalar*),(F,diagU));CHKERRQ(ierr); 57 PetscFunctionReturn(0); 58 } 59 60 static PetscErrorCode MatDestroy_SuperLU_DIST(Mat A) 61 { 62 PetscErrorCode ierr; 63 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->data; 64 65 PetscFunctionBegin; 66 if (lu->CleanUpSuperLU_Dist) { 67 /* Deallocate SuperLU_DIST storage */ 68 PetscStackCall("SuperLU_DIST:Destroy_CompRowLoc_Matrix_dist",Destroy_CompRowLoc_Matrix_dist(&lu->A_sup)); 69 if (lu->options.SolveInitialized) { 70 #if defined(PETSC_USE_COMPLEX) 71 PetscStackCall("SuperLU_DIST:zSolveFinalize",zSolveFinalize(&lu->options, &lu->SOLVEstruct)); 72 #else 73 PetscStackCall("SuperLU_DIST:dSolveFinalize",dSolveFinalize(&lu->options, &lu->SOLVEstruct)); 74 #endif 75 } 76 PetscStackCall("SuperLU_DIST:Destroy_LU",Destroy_LU(A->cmap->N, &lu->grid, &lu->LUstruct)); 77 PetscStackCall("SuperLU_DIST:ScalePermstructFree",ScalePermstructFree(&lu->ScalePermstruct)); 78 PetscStackCall("SuperLU_DIST:LUstructFree",LUstructFree(&lu->LUstruct)); 79 80 /* Release the SuperLU_DIST process grid. */ 81 PetscStackCall("SuperLU_DIST:superlu_gridexit",superlu_gridexit(&lu->grid)); 82 ierr = MPI_Comm_free(&(lu->comm_superlu));CHKERRQ(ierr); 83 } 84 ierr = PetscFree(A->data);CHKERRQ(ierr); 85 /* clear composed functions */ 86 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverType_C",NULL);CHKERRQ(ierr); 87 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSuperluDistGetDiagU_C",NULL);CHKERRQ(ierr); 88 89 PetscFunctionReturn(0); 90 } 91 92 static PetscErrorCode MatSolve_SuperLU_DIST(Mat A,Vec b_mpi,Vec x) 93 { 94 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->data; 95 PetscErrorCode ierr; 96 PetscMPIInt size; 97 PetscInt m=A->rmap->n; 98 SuperLUStat_t stat; 99 double berr[1]; 100 PetscScalar *bptr=NULL; 101 int info; /* SuperLU_Dist info code is ALWAYS an int, even with long long indices */ 102 static PetscBool cite = PETSC_FALSE; 103 104 PetscFunctionBegin; 105 if (lu->options.Fact != FACTORED) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"SuperLU_DIST options.Fact must equal FACTORED"); 106 ierr = PetscCitationsRegister("@article{lidemmel03,\n author = {Xiaoye S. Li and James W. Demmel},\n title = {{SuperLU_DIST}: A Scalable Distributed-Memory Sparse Direct\n Solver for Unsymmetric Linear Systems},\n journal = {ACM Trans. Mathematical Software},\n volume = {29},\n number = {2},\n pages = {110-140},\n year = 2003\n}\n",&cite);CHKERRQ(ierr); 107 108 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 109 110 if (lu->options.SolveInitialized && !lu->matsolve_iscalled) { 111 /* see comments in MatMatSolve() */ 112 #if defined(PETSC_USE_COMPLEX) 113 PetscStackCall("SuperLU_DIST:zSolveFinalize",zSolveFinalize(&lu->options, &lu->SOLVEstruct)); 114 #else 115 PetscStackCall("SuperLU_DIST:dSolveFinalize",dSolveFinalize(&lu->options, &lu->SOLVEstruct)); 116 #endif 117 lu->options.SolveInitialized = NO; 118 } 119 ierr = VecCopy(b_mpi,x);CHKERRQ(ierr); 120 ierr = VecGetArray(x,&bptr);CHKERRQ(ierr); 121 122 PetscStackCall("SuperLU_DIST:PStatInit",PStatInit(&stat)); /* Initialize the statistics variables. */ 123 #if defined(PETSC_USE_COMPLEX) 124 PetscStackCall("SuperLU_DIST:pzgssvx",pzgssvx(&lu->options,&lu->A_sup,&lu->ScalePermstruct,(doublecomplex*)bptr,m,1,&lu->grid,&lu->LUstruct,&lu->SOLVEstruct,berr,&stat,&info)); 125 #else 126 PetscStackCall("SuperLU_DIST:pdgssvx",pdgssvx(&lu->options,&lu->A_sup,&lu->ScalePermstruct,bptr,m,1,&lu->grid,&lu->LUstruct,&lu->SOLVEstruct,berr,&stat,&info)); 127 #endif 128 if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",info); 129 130 if (lu->options.PrintStat) PStatPrint(&lu->options, &stat, &lu->grid); /* Print the statistics. */ 131 PetscStackCall("SuperLU_DIST:PStatFree",PStatFree(&stat)); 132 133 ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr); 134 lu->matsolve_iscalled = PETSC_TRUE; 135 lu->matmatsolve_iscalled = PETSC_FALSE; 136 PetscFunctionReturn(0); 137 } 138 139 static PetscErrorCode MatMatSolve_SuperLU_DIST(Mat A,Mat B_mpi,Mat X) 140 { 141 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->data; 142 PetscErrorCode ierr; 143 PetscMPIInt size; 144 PetscInt m=A->rmap->n,nrhs; 145 SuperLUStat_t stat; 146 double berr[1]; 147 PetscScalar *bptr; 148 int info; /* SuperLU_Dist info code is ALWAYS an int, even with long long indices */ 149 PetscBool flg; 150 151 PetscFunctionBegin; 152 if (lu->options.Fact != FACTORED) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"SuperLU_DIST options.Fact must equal FACTORED"); 153 ierr = PetscObjectTypeCompareAny((PetscObject)B_mpi,&flg,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr); 154 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix B must be MATDENSE matrix"); 155 ierr = PetscObjectTypeCompareAny((PetscObject)X,&flg,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr); 156 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix X must be MATDENSE matrix"); 157 158 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 159 160 if (lu->options.SolveInitialized && !lu->matmatsolve_iscalled) { 161 /* communication pattern of SOLVEstruct is unlikely created for matmatsolve, 162 thus destroy it and create a new SOLVEstruct. 163 Otherwise it may result in memory corruption or incorrect solution 164 See src/mat/examples/tests/ex125.c */ 165 #if defined(PETSC_USE_COMPLEX) 166 PetscStackCall("SuperLU_DIST:zSolveFinalize",zSolveFinalize(&lu->options, &lu->SOLVEstruct)); 167 #else 168 PetscStackCall("SuperLU_DIST:dSolveFinalize",dSolveFinalize(&lu->options, &lu->SOLVEstruct)); 169 #endif 170 lu->options.SolveInitialized = NO; 171 } 172 ierr = MatCopy(B_mpi,X,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 173 174 ierr = MatGetSize(B_mpi,NULL,&nrhs);CHKERRQ(ierr); 175 176 PetscStackCall("SuperLU_DIST:PStatInit",PStatInit(&stat)); /* Initialize the statistics variables. */ 177 ierr = MatDenseGetArray(X,&bptr);CHKERRQ(ierr); 178 179 #if defined(PETSC_USE_COMPLEX) 180 PetscStackCall("SuperLU_DIST:pzgssvx",pzgssvx(&lu->options,&lu->A_sup,&lu->ScalePermstruct,(doublecomplex*)bptr,m,nrhs,&lu->grid, &lu->LUstruct,&lu->SOLVEstruct,berr,&stat,&info)); 181 #else 182 PetscStackCall("SuperLU_DIST:pdgssvx",pdgssvx(&lu->options,&lu->A_sup,&lu->ScalePermstruct,bptr,m,nrhs,&lu->grid,&lu->LUstruct,&lu->SOLVEstruct,berr,&stat,&info)); 183 #endif 184 185 if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",info); 186 ierr = MatDenseRestoreArray(X,&bptr);CHKERRQ(ierr); 187 188 if (lu->options.PrintStat) PStatPrint(&lu->options, &stat, &lu->grid); /* Print the statistics. */ 189 PetscStackCall("SuperLU_DIST:PStatFree",PStatFree(&stat)); 190 lu->matsolve_iscalled = PETSC_FALSE; 191 lu->matmatsolve_iscalled = PETSC_TRUE; 192 PetscFunctionReturn(0); 193 } 194 195 /* 196 input: 197 F: numeric Cholesky factor 198 output: 199 nneg: total number of negative pivots 200 nzero: total number of zero pivots 201 npos: (global dimension of F) - nneg - nzero 202 */ 203 static PetscErrorCode MatGetInertia_SuperLU_DIST(Mat F,PetscInt *nneg,PetscInt *nzero,PetscInt *npos) 204 { 205 PetscErrorCode ierr; 206 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)F->data; 207 PetscScalar *diagU=NULL; 208 PetscInt M,i,neg=0,zero=0,pos=0; 209 PetscReal r; 210 211 PetscFunctionBegin; 212 if (!F->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Matrix factor F is not assembled"); 213 if (lu->options.RowPerm != NOROWPERM) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must set NOROWPERM"); 214 ierr = MatGetSize(F,&M,NULL);CHKERRQ(ierr); 215 ierr = PetscMalloc1(M,&diagU);CHKERRQ(ierr); 216 ierr = MatSuperluDistGetDiagU(F,diagU);CHKERRQ(ierr); 217 for (i=0; i<M; i++) { 218 #if defined(PETSC_USE_COMPLEX) 219 r = PetscImaginaryPart(diagU[i])/10.0; 220 if (r< -PETSC_MACHINE_EPSILON || r>PETSC_MACHINE_EPSILON) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"diagU[%d]=%g + i %g is non-real",i,PetscRealPart(diagU[i]),r*10.0); 221 r = PetscRealPart(diagU[i]); 222 #else 223 r = diagU[i]; 224 #endif 225 if (r > 0) { 226 pos++; 227 } else if (r < 0) { 228 neg++; 229 } else zero++; 230 } 231 232 ierr = PetscFree(diagU);CHKERRQ(ierr); 233 if (nneg) *nneg = neg; 234 if (nzero) *nzero = zero; 235 if (npos) *npos = pos; 236 PetscFunctionReturn(0); 237 } 238 239 static PetscErrorCode MatLUFactorNumeric_SuperLU_DIST(Mat F,Mat A,const MatFactorInfo *info) 240 { 241 Mat_SeqAIJ *aa=NULL,*bb=NULL; 242 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)F->data; 243 PetscErrorCode ierr; 244 PetscInt M=A->rmap->N,N=A->cmap->N,i,*ai=NULL,*aj=NULL,*bi=NULL,*bj=NULL,nz,rstart,*garray=NULL, 245 m=A->rmap->n, colA_start,j,jcol,jB,countA,countB,*bjj=NULL,*ajj=NULL; 246 int sinfo; /* SuperLU_Dist info flag is always an int even with long long indices */ 247 PetscMPIInt size; 248 SuperLUStat_t stat; 249 double *berr=0; 250 #if defined(PETSC_USE_COMPLEX) 251 doublecomplex *av=NULL, *bv=NULL; 252 #else 253 double *av=NULL, *bv=NULL; 254 #endif 255 256 PetscFunctionBegin; 257 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 258 259 if (size == 1) { 260 aa = (Mat_SeqAIJ*)A->data; 261 rstart = 0; 262 nz = aa->nz; 263 } else { 264 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 265 aa = (Mat_SeqAIJ*)(mat->A)->data; 266 bb = (Mat_SeqAIJ*)(mat->B)->data; 267 ai = aa->i; aj = aa->j; 268 bi = bb->i; bj = bb->j; 269 #if defined(PETSC_USE_COMPLEX) 270 av = (doublecomplex*)aa->a; 271 bv = (doublecomplex*)bb->a; 272 #else 273 av =aa->a; 274 bv = bb->a; 275 #endif 276 rstart = A->rmap->rstart; 277 nz = aa->nz + bb->nz; 278 garray = mat->garray; 279 } 280 281 /* Allocations for A_sup */ 282 if (lu->options.Fact == DOFACT) { /* first numeric factorization */ 283 #if defined(PETSC_USE_COMPLEX) 284 PetscStackCall("SuperLU_DIST:zallocateA_dist",zallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row)); 285 #else 286 PetscStackCall("SuperLU_DIST:dallocateA_dist",dallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row)); 287 #endif 288 } else { /* successive numeric factorization, sparsity pattern and perm_c are reused. */ 289 if (lu->FactPattern == SamePattern_SameRowPerm) { 290 lu->options.Fact = SamePattern_SameRowPerm; /* matrix has similar numerical values */ 291 } else if (lu->FactPattern == SamePattern) { 292 PetscStackCall("SuperLU_DIST:Destroy_LU",Destroy_LU(N, &lu->grid, &lu->LUstruct)); /* Deallocate L and U matrices. */ 293 lu->options.Fact = SamePattern; 294 } else if (lu->FactPattern == DOFACT) { 295 PetscStackCall("SuperLU_DIST:Destroy_CompRowLoc_Matrix_dist",Destroy_CompRowLoc_Matrix_dist(&lu->A_sup)); 296 PetscStackCall("SuperLU_DIST:Destroy_LU",Destroy_LU(N, &lu->grid, &lu->LUstruct)); 297 lu->options.Fact = DOFACT; 298 299 #if defined(PETSC_USE_COMPLEX) 300 PetscStackCall("SuperLU_DIST:zallocateA_dist",zallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row)); 301 #else 302 PetscStackCall("SuperLU_DIST:dallocateA_dist",dallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row)); 303 #endif 304 } else { 305 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"options.Fact must be one of SamePattern SamePattern_SameRowPerm DOFACT"); 306 } 307 } 308 309 /* Copy AIJ matrix to superlu_dist matrix */ 310 if (size == 1) { /* A_sup has same SeqAIJ format as input mat */ 311 ai = aa->i; aj = aa->j; 312 #if defined(PETSC_USE_COMPLEX) 313 av = (doublecomplex*)aa->a; 314 #else 315 av = aa->a; 316 #endif 317 318 ierr = PetscArraycpy(lu->row,ai,(m+1));CHKERRQ(ierr); 319 ierr = PetscArraycpy(lu->col,aj,nz);CHKERRQ(ierr); 320 ierr = PetscArraycpy(lu->val,av,nz);CHKERRQ(ierr); 321 } else { 322 nz = 0; 323 for (i=0; i<m; i++) { 324 lu->row[i] = nz; 325 countA = ai[i+1] - ai[i]; 326 countB = bi[i+1] - bi[i]; 327 if (aj) { 328 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 329 } else { 330 ajj = NULL; 331 } 332 bjj = bj + bi[i]; 333 334 /* B part, smaller col index */ 335 if (aj) { 336 colA_start = rstart + ajj[0]; /* the smallest global col index of A */ 337 } else { /* superlu_dist does not require matrix has diagonal entries, thus aj=NULL would work */ 338 colA_start = rstart; 339 } 340 jB = 0; 341 for (j=0; j<countB; j++) { 342 jcol = garray[bjj[j]]; 343 if (jcol > colA_start) { 344 jB = j; 345 break; 346 } 347 lu->col[nz] = jcol; 348 lu->val[nz++] = *bv++; 349 if (j==countB-1) jB = countB; 350 } 351 352 /* A part */ 353 for (j=0; j<countA; j++) { 354 lu->col[nz] = rstart + ajj[j]; 355 lu->val[nz++] = *av++; 356 } 357 358 /* B part, larger col index */ 359 for (j=jB; j<countB; j++) { 360 lu->col[nz] = garray[bjj[j]]; 361 lu->val[nz++] = *bv++; 362 } 363 } 364 lu->row[m] = nz; 365 } 366 367 /* Create and setup A_sup */ 368 if (lu->options.Fact == DOFACT) { 369 #if defined(PETSC_USE_COMPLEX) 370 PetscStackCall("SuperLU_DIST:zCreate_CompRowLoc_Matrix_dist",zCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart,lu->val, lu->col, lu->row, SLU_NR_loc, SLU_Z, SLU_GE)); 371 #else 372 PetscStackCall("SuperLU_DIST:dCreate_CompRowLoc_Matrix_dist",dCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart,lu->val, lu->col, lu->row, SLU_NR_loc, SLU_D, SLU_GE)); 373 #endif 374 } 375 376 /* Factor the matrix. */ 377 PetscStackCall("SuperLU_DIST:PStatInit",PStatInit(&stat)); /* Initialize the statistics variables. */ 378 #if defined(PETSC_USE_COMPLEX) 379 PetscStackCall("SuperLU_DIST:pzgssvx",pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, m, 0, &lu->grid,&lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo)); 380 #else 381 PetscStackCall("SuperLU_DIST:pdgssvx",pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, m, 0, &lu->grid,&lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo)); 382 #endif 383 384 if (sinfo > 0) { 385 if (A->erroriffailure) { 386 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot in row %D",sinfo); 387 } else { 388 if (sinfo <= lu->A_sup.ncol) { 389 F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 390 ierr = PetscInfo1(F,"U(i,i) is exactly zero, i= %D\n",sinfo);CHKERRQ(ierr); 391 } else if (sinfo > lu->A_sup.ncol) { 392 /* 393 number of bytes allocated when memory allocation 394 failure occurred, plus A->ncol. 395 */ 396 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 397 ierr = PetscInfo1(F,"Number of bytes allocated when memory allocation fails %D\n",sinfo);CHKERRQ(ierr); 398 } 399 } 400 } else if (sinfo < 0) { 401 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB, "info = %D, argument in p*gssvx() had an illegal value", sinfo); 402 } 403 404 if (lu->options.PrintStat) { 405 PStatPrint(&lu->options, &stat, &lu->grid); /* Print the statistics. */ 406 } 407 PetscStackCall("SuperLU_DIST:PStatFree",PStatFree(&stat)); 408 F->assembled = PETSC_TRUE; 409 F->preallocated = PETSC_TRUE; 410 lu->options.Fact = FACTORED; /* The factored form of A is supplied. Local option used by this func. only */ 411 PetscFunctionReturn(0); 412 } 413 414 /* Note the Petsc r and c permutations are ignored */ 415 static PetscErrorCode MatLUFactorSymbolic_SuperLU_DIST(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) 416 { 417 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)F->data; 418 PetscInt M = A->rmap->N,N=A->cmap->N; 419 420 PetscFunctionBegin; 421 /* Initialize the SuperLU process grid. */ 422 PetscStackCall("SuperLU_DIST:superlu_gridinit",superlu_gridinit(lu->comm_superlu, lu->nprow, lu->npcol, &lu->grid)); 423 424 /* Initialize ScalePermstruct and LUstruct. */ 425 PetscStackCall("SuperLU_DIST:ScalePermstructInit",ScalePermstructInit(M, N, &lu->ScalePermstruct)); 426 PetscStackCall("SuperLU_DIST:LUstructInit",LUstructInit(N, &lu->LUstruct)); 427 F->ops->lufactornumeric = MatLUFactorNumeric_SuperLU_DIST; 428 F->ops->solve = MatSolve_SuperLU_DIST; 429 F->ops->matsolve = MatMatSolve_SuperLU_DIST; 430 F->ops->getinertia = NULL; 431 432 if (A->symmetric || A->hermitian) { 433 F->ops->getinertia = MatGetInertia_SuperLU_DIST; 434 } 435 lu->CleanUpSuperLU_Dist = PETSC_TRUE; 436 PetscFunctionReturn(0); 437 } 438 439 static PetscErrorCode MatCholeskyFactorSymbolic_SuperLU_DIST(Mat F,Mat A,IS r,const MatFactorInfo *info) 440 { 441 PetscErrorCode ierr; 442 443 PetscFunctionBegin; 444 if (!A->symmetric) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Input matrix must be symmetric\n"); 445 ierr = MatLUFactorSymbolic_SuperLU_DIST(F,A,r,r,info);CHKERRQ(ierr); 446 F->ops->choleskyfactornumeric = MatLUFactorNumeric_SuperLU_DIST; 447 PetscFunctionReturn(0); 448 } 449 450 static PetscErrorCode MatFactorGetSolverType_aij_superlu_dist(Mat A,MatSolverType *type) 451 { 452 PetscFunctionBegin; 453 *type = MATSOLVERSUPERLU_DIST; 454 PetscFunctionReturn(0); 455 } 456 457 static PetscErrorCode MatView_Info_SuperLU_DIST(Mat A,PetscViewer viewer) 458 { 459 Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST*)A->data; 460 superlu_dist_options_t options; 461 PetscErrorCode ierr; 462 463 PetscFunctionBegin; 464 /* check if matrix is superlu_dist type */ 465 if (A->ops->solve != MatSolve_SuperLU_DIST) PetscFunctionReturn(0); 466 467 options = lu->options; 468 ierr = PetscViewerASCIIPrintf(viewer,"SuperLU_DIST run parameters:\n");CHKERRQ(ierr); 469 ierr = PetscViewerASCIIPrintf(viewer," Process grid nprow %D x npcol %D \n",lu->nprow,lu->npcol);CHKERRQ(ierr); 470 ierr = PetscViewerASCIIPrintf(viewer," Equilibrate matrix %s \n",PetscBools[options.Equil != NO]);CHKERRQ(ierr); 471 ierr = PetscViewerASCIIPrintf(viewer," Replace tiny pivots %s \n",PetscBools[options.ReplaceTinyPivot != NO]);CHKERRQ(ierr); 472 ierr = PetscViewerASCIIPrintf(viewer," Use iterative refinement %s \n",PetscBools[options.IterRefine == SLU_DOUBLE]);CHKERRQ(ierr); 473 ierr = PetscViewerASCIIPrintf(viewer," Processors in row %d col partition %d \n",lu->nprow,lu->npcol);CHKERRQ(ierr); 474 475 switch (options.RowPerm) { 476 case NOROWPERM: 477 ierr = PetscViewerASCIIPrintf(viewer," Row permutation NOROWPERM\n");CHKERRQ(ierr); 478 break; 479 case LargeDiag_MC64: 480 ierr = PetscViewerASCIIPrintf(viewer," Row permutation LargeDiag_MC64\n");CHKERRQ(ierr); 481 break; 482 case LargeDiag_AWPM: 483 ierr = PetscViewerASCIIPrintf(viewer," Row permutation LargeDiag_AWPM\n");CHKERRQ(ierr); 484 break; 485 case MY_PERMR: 486 ierr = PetscViewerASCIIPrintf(viewer," Row permutation MY_PERMR\n");CHKERRQ(ierr); 487 break; 488 default: 489 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown column permutation"); 490 } 491 492 switch (options.ColPerm) { 493 case NATURAL: 494 ierr = PetscViewerASCIIPrintf(viewer," Column permutation NATURAL\n");CHKERRQ(ierr); 495 break; 496 case MMD_AT_PLUS_A: 497 ierr = PetscViewerASCIIPrintf(viewer," Column permutation MMD_AT_PLUS_A\n");CHKERRQ(ierr); 498 break; 499 case MMD_ATA: 500 ierr = PetscViewerASCIIPrintf(viewer," Column permutation MMD_ATA\n");CHKERRQ(ierr); 501 break; 502 /* Even though this is called METIS, the SuperLU_DIST code sets this by default if PARMETIS is defined, not METIS */ 503 case METIS_AT_PLUS_A: 504 ierr = PetscViewerASCIIPrintf(viewer," Column permutation METIS_AT_PLUS_A\n");CHKERRQ(ierr); 505 break; 506 case PARMETIS: 507 ierr = PetscViewerASCIIPrintf(viewer," Column permutation PARMETIS\n");CHKERRQ(ierr); 508 break; 509 default: 510 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown column permutation"); 511 } 512 513 ierr = PetscViewerASCIIPrintf(viewer," Parallel symbolic factorization %s \n",PetscBools[options.ParSymbFact != NO]);CHKERRQ(ierr); 514 515 if (lu->FactPattern == SamePattern) { 516 ierr = PetscViewerASCIIPrintf(viewer," Repeated factorization SamePattern\n");CHKERRQ(ierr); 517 } else if (lu->FactPattern == SamePattern_SameRowPerm) { 518 ierr = PetscViewerASCIIPrintf(viewer," Repeated factorization SamePattern_SameRowPerm\n");CHKERRQ(ierr); 519 } else if (lu->FactPattern == DOFACT) { 520 ierr = PetscViewerASCIIPrintf(viewer," Repeated factorization DOFACT\n");CHKERRQ(ierr); 521 } else { 522 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown factorization pattern"); 523 } 524 PetscFunctionReturn(0); 525 } 526 527 static PetscErrorCode MatView_SuperLU_DIST(Mat A,PetscViewer viewer) 528 { 529 PetscErrorCode ierr; 530 PetscBool iascii; 531 PetscViewerFormat format; 532 533 PetscFunctionBegin; 534 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 535 if (iascii) { 536 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 537 if (format == PETSC_VIEWER_ASCII_INFO) { 538 ierr = MatView_Info_SuperLU_DIST(A,viewer);CHKERRQ(ierr); 539 } 540 } 541 PetscFunctionReturn(0); 542 } 543 544 static PetscErrorCode MatGetFactor_aij_superlu_dist(Mat A,MatFactorType ftype,Mat *F) 545 { 546 Mat B; 547 Mat_SuperLU_DIST *lu; 548 PetscErrorCode ierr; 549 PetscInt M=A->rmap->N,N=A->cmap->N,indx; 550 PetscMPIInt size; 551 superlu_dist_options_t options; 552 PetscBool flg; 553 const char *colperm[] = {"NATURAL","MMD_AT_PLUS_A","MMD_ATA","METIS_AT_PLUS_A","PARMETIS"}; 554 const char *rowperm[] = {"NOROWPERM","LargeDiag_MC64","LargeDiag_AWPM","MY_PERMR"}; 555 const char *factPattern[] = {"SamePattern","SamePattern_SameRowPerm","DOFACT"}; 556 PetscBool set; 557 558 PetscFunctionBegin; 559 /* Create the factorization matrix */ 560 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 561 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,M,N);CHKERRQ(ierr); 562 ierr = PetscStrallocpy("superlu_dist",&((PetscObject)B)->type_name);CHKERRQ(ierr); 563 ierr = MatSetUp(B);CHKERRQ(ierr); 564 B->ops->getinfo = MatGetInfo_External; 565 B->ops->view = MatView_SuperLU_DIST; 566 B->ops->destroy = MatDestroy_SuperLU_DIST; 567 568 if (ftype == MAT_FACTOR_LU) { 569 B->factortype = MAT_FACTOR_LU; 570 B->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST; 571 } else { 572 B->factortype = MAT_FACTOR_CHOLESKY; 573 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SuperLU_DIST; 574 } 575 576 /* set solvertype */ 577 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 578 ierr = PetscStrallocpy(MATSOLVERSUPERLU_DIST,&B->solvertype);CHKERRQ(ierr); 579 580 ierr = PetscNewLog(B,&lu);CHKERRQ(ierr); 581 B->data = lu; 582 583 /* Set the default input options: 584 options.Fact = DOFACT; 585 options.Equil = YES; 586 options.ParSymbFact = NO; 587 options.ColPerm = METIS_AT_PLUS_A; 588 options.RowPerm = LargeDiag_MC64; 589 options.ReplaceTinyPivot = YES; 590 options.IterRefine = DOUBLE; 591 options.Trans = NOTRANS; 592 options.SolveInitialized = NO; -hold the communication pattern used MatSolve() and MatMatSolve() 593 options.RefineInitialized = NO; 594 options.PrintStat = YES; 595 */ 596 set_default_options_dist(&options); 597 598 ierr = MPI_Comm_dup(PetscObjectComm((PetscObject)A),&(lu->comm_superlu));CHKERRQ(ierr); 599 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 600 /* Default num of process columns and rows */ 601 lu->nprow = (int_t) (0.5 + PetscSqrtReal((PetscReal)size)); 602 if (!lu->nprow) lu->nprow = 1; 603 while (lu->nprow > 0) { 604 lu->npcol = (int_t) (size/lu->nprow); 605 if (size == lu->nprow * lu->npcol) break; 606 lu->nprow--; 607 } 608 609 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"SuperLU_Dist Options","Mat");CHKERRQ(ierr); 610 ierr = PetscOptionsInt("-mat_superlu_dist_r","Number rows in processor partition","None",lu->nprow,(PetscInt*)&lu->nprow,NULL);CHKERRQ(ierr); 611 ierr = PetscOptionsInt("-mat_superlu_dist_c","Number columns in processor partition","None",lu->npcol,(PetscInt*)&lu->npcol,NULL);CHKERRQ(ierr); 612 if (size != lu->nprow * lu->npcol) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number of processes %d must equal to nprow %d * npcol %d",size,lu->nprow,lu->npcol); 613 614 ierr = PetscOptionsBool("-mat_superlu_dist_equil","Equilibrate matrix","None",options.Equil ? PETSC_TRUE : PETSC_FALSE,&flg,&set);CHKERRQ(ierr); 615 if (set && !flg) options.Equil = NO; 616 617 ierr = PetscOptionsEList("-mat_superlu_dist_rowperm","Row permutation","None",rowperm,4,rowperm[1],&indx,&flg);CHKERRQ(ierr); 618 if (flg) { 619 switch (indx) { 620 case 0: 621 options.RowPerm = NOROWPERM; 622 break; 623 case 1: 624 options.RowPerm = LargeDiag_MC64; 625 break; 626 case 2: 627 options.RowPerm = LargeDiag_AWPM; 628 break; 629 case 3: 630 options.RowPerm = MY_PERMR; 631 break; 632 default: 633 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown row permutation"); 634 } 635 } 636 637 ierr = PetscOptionsEList("-mat_superlu_dist_colperm","Column permutation","None",colperm,5,colperm[3],&indx,&flg);CHKERRQ(ierr); 638 if (flg) { 639 switch (indx) { 640 case 0: 641 options.ColPerm = NATURAL; 642 break; 643 case 1: 644 options.ColPerm = MMD_AT_PLUS_A; 645 break; 646 case 2: 647 options.ColPerm = MMD_ATA; 648 break; 649 case 3: 650 options.ColPerm = METIS_AT_PLUS_A; 651 break; 652 case 4: 653 options.ColPerm = PARMETIS; /* only works for np>1 */ 654 break; 655 default: 656 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown column permutation"); 657 } 658 } 659 660 options.ReplaceTinyPivot = NO; 661 ierr = PetscOptionsBool("-mat_superlu_dist_replacetinypivot","Replace tiny pivots","None",options.ReplaceTinyPivot ? PETSC_TRUE : PETSC_FALSE,&flg,&set);CHKERRQ(ierr); 662 if (set && flg) options.ReplaceTinyPivot = YES; 663 664 options.ParSymbFact = NO; 665 ierr = PetscOptionsBool("-mat_superlu_dist_parsymbfact","Parallel symbolic factorization","None",PETSC_FALSE,&flg,&set);CHKERRQ(ierr); 666 if (set && flg && size>1) { 667 #if defined(PETSC_HAVE_PARMETIS) 668 options.ParSymbFact = YES; 669 options.ColPerm = PARMETIS; /* in v2.2, PARMETIS is forced for ParSymbFact regardless of user ordering setting */ 670 #else 671 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"parsymbfact needs PARMETIS"); 672 #endif 673 } 674 675 lu->FactPattern = SamePattern; 676 ierr = PetscOptionsEList("-mat_superlu_dist_fact","Sparsity pattern for repeated matrix factorization","None",factPattern,3,factPattern[0],&indx,&flg);CHKERRQ(ierr); 677 if (flg) { 678 switch (indx) { 679 case 0: 680 lu->FactPattern = SamePattern; 681 break; 682 case 1: 683 lu->FactPattern = SamePattern_SameRowPerm; 684 break; 685 case 2: 686 lu->FactPattern = DOFACT; 687 break; 688 } 689 } 690 691 options.IterRefine = NOREFINE; 692 ierr = PetscOptionsBool("-mat_superlu_dist_iterrefine","Use iterative refinement","None",options.IterRefine == NOREFINE ? PETSC_FALSE : PETSC_TRUE ,&flg,&set);CHKERRQ(ierr); 693 if (set) { 694 if (flg) options.IterRefine = SLU_DOUBLE; 695 else options.IterRefine = NOREFINE; 696 } 697 698 if (PetscLogPrintInfo) options.PrintStat = YES; 699 else options.PrintStat = NO; 700 ierr = PetscOptionsBool("-mat_superlu_dist_statprint","Print factorization information","None",(PetscBool)options.PrintStat,(PetscBool*)&options.PrintStat,NULL);CHKERRQ(ierr); 701 ierr = PetscOptionsEnd();CHKERRQ(ierr); 702 703 lu->options = options; 704 lu->options.Fact = DOFACT; 705 lu->matsolve_iscalled = PETSC_FALSE; 706 lu->matmatsolve_iscalled = PETSC_FALSE; 707 708 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_aij_superlu_dist);CHKERRQ(ierr); 709 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSuperluDistGetDiagU_C",MatSuperluDistGetDiagU_SuperLU_DIST);CHKERRQ(ierr); 710 711 *F = B; 712 PetscFunctionReturn(0); 713 } 714 715 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_SuperLU_DIST(void) 716 { 717 PetscErrorCode ierr; 718 PetscFunctionBegin; 719 ierr = MatSolverTypeRegister(MATSOLVERSUPERLU_DIST,MATMPIAIJ, MAT_FACTOR_LU,MatGetFactor_aij_superlu_dist);CHKERRQ(ierr); 720 ierr = MatSolverTypeRegister(MATSOLVERSUPERLU_DIST,MATSEQAIJ, MAT_FACTOR_LU,MatGetFactor_aij_superlu_dist);CHKERRQ(ierr); 721 ierr = MatSolverTypeRegister(MATSOLVERSUPERLU_DIST,MATMPIAIJ, MAT_FACTOR_CHOLESKY,MatGetFactor_aij_superlu_dist);CHKERRQ(ierr); 722 ierr = MatSolverTypeRegister(MATSOLVERSUPERLU_DIST,MATSEQAIJ, MAT_FACTOR_CHOLESKY,MatGetFactor_aij_superlu_dist);CHKERRQ(ierr); 723 PetscFunctionReturn(0); 724 } 725 726 /*MC 727 MATSOLVERSUPERLU_DIST - Parallel direct solver package for LU factorization 728 729 Use ./configure --download-superlu_dist --download-parmetis --download-metis --download-ptscotch to have PETSc installed with SuperLU_DIST 730 731 Use -pc_type lu -pc_factor_mat_solver_type superlu_dist to use this direct solver 732 733 Works with AIJ matrices 734 735 Options Database Keys: 736 + -mat_superlu_dist_r <n> - number of rows in processor partition 737 . -mat_superlu_dist_c <n> - number of columns in processor partition 738 . -mat_superlu_dist_equil - equilibrate the matrix 739 . -mat_superlu_dist_rowperm <NOROWPERM,LargeDiag_MC64,LargeDiag_AWPM,MY_PERMR> - row permutation 740 . -mat_superlu_dist_colperm <MMD_AT_PLUS_A,MMD_ATA,NATURAL> - column permutation 741 . -mat_superlu_dist_replacetinypivot - replace tiny pivots 742 . -mat_superlu_dist_fact <SamePattern> - (choose one of) SamePattern SamePattern_SameRowPerm DOFACT 743 . -mat_superlu_dist_iterrefine - use iterative refinement 744 - -mat_superlu_dist_statprint - print factorization information 745 746 Level: beginner 747 748 .seealso: PCLU 749 750 .seealso: PCFactorSetMatSolverType(), MatSolverType 751 752 M*/ 753