1 /* 2 Provides an interface to the SuperLU_DIST sparse solver 3 */ 4 5 #include <../src/mat/impls/aij/seq/aij.h> 6 #include <../src/mat/impls/aij/mpi/mpiaij.h> 7 8 EXTERN_C_BEGIN 9 #if defined(PETSC_USE_COMPLEX) 10 #include <superlu_zdefs.h> 11 #else 12 #include <superlu_ddefs.h> 13 #endif 14 EXTERN_C_END 15 16 typedef struct { 17 int_t nprow,npcol,*row,*col; 18 gridinfo_t grid; 19 superlu_dist_options_t options; 20 SuperMatrix A_sup; 21 ScalePermstruct_t ScalePermstruct; 22 LUstruct_t LUstruct; 23 int StatPrint; 24 SOLVEstruct_t SOLVEstruct; 25 fact_t FactPattern; 26 MPI_Comm comm_superlu; 27 #if defined(PETSC_USE_COMPLEX) 28 doublecomplex *val; 29 #else 30 double *val; 31 #endif 32 PetscBool matsolve_iscalled,matmatsolve_iscalled; 33 PetscBool CleanUpSuperLU_Dist; /* Flag to clean up (non-global) SuperLU objects during Destroy */ 34 } Mat_SuperLU_DIST; 35 36 37 PetscErrorCode MatSuperluDistGetDiagU_SuperLU_DIST(Mat F,PetscScalar *diagU) 38 { 39 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)F->data; 40 41 PetscFunctionBegin; 42 #if defined(PETSC_USE_COMPLEX) 43 PetscStackCall("SuperLU_DIST:pzGetDiagU",pzGetDiagU(F->rmap->N,&lu->LUstruct,&lu->grid,(doublecomplex*)diagU)); 44 #else 45 PetscStackCall("SuperLU_DIST:pdGetDiagU",pdGetDiagU(F->rmap->N,&lu->LUstruct,&lu->grid,diagU)); 46 #endif 47 PetscFunctionReturn(0); 48 } 49 50 PetscErrorCode MatSuperluDistGetDiagU(Mat F,PetscScalar *diagU) 51 { 52 PetscErrorCode ierr; 53 54 PetscFunctionBegin; 55 PetscValidHeaderSpecific(F,MAT_CLASSID,1); 56 ierr = PetscTryMethod(F,"MatSuperluDistGetDiagU_C",(Mat,PetscScalar*),(F,diagU));CHKERRQ(ierr); 57 PetscFunctionReturn(0); 58 } 59 60 static PetscErrorCode MatDestroy_SuperLU_DIST(Mat A) 61 { 62 PetscErrorCode ierr; 63 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->data; 64 65 PetscFunctionBegin; 66 if (lu->CleanUpSuperLU_Dist) { 67 /* Deallocate SuperLU_DIST storage */ 68 PetscStackCall("SuperLU_DIST:Destroy_CompRowLoc_Matrix_dist",Destroy_CompRowLoc_Matrix_dist(&lu->A_sup)); 69 if (lu->options.SolveInitialized) { 70 #if defined(PETSC_USE_COMPLEX) 71 PetscStackCall("SuperLU_DIST:zSolveFinalize",zSolveFinalize(&lu->options, &lu->SOLVEstruct)); 72 #else 73 PetscStackCall("SuperLU_DIST:dSolveFinalize",dSolveFinalize(&lu->options, &lu->SOLVEstruct)); 74 #endif 75 } 76 PetscStackCall("SuperLU_DIST:Destroy_LU",Destroy_LU(A->cmap->N, &lu->grid, &lu->LUstruct)); 77 PetscStackCall("SuperLU_DIST:ScalePermstructFree",ScalePermstructFree(&lu->ScalePermstruct)); 78 PetscStackCall("SuperLU_DIST:LUstructFree",LUstructFree(&lu->LUstruct)); 79 80 /* Release the SuperLU_DIST process grid. */ 81 PetscStackCall("SuperLU_DIST:superlu_gridexit",superlu_gridexit(&lu->grid)); 82 ierr = MPI_Comm_free(&(lu->comm_superlu));CHKERRQ(ierr); 83 } 84 ierr = PetscFree(A->data);CHKERRQ(ierr); 85 /* clear composed functions */ 86 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverType_C",NULL);CHKERRQ(ierr); 87 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSuperluDistGetDiagU_C",NULL);CHKERRQ(ierr); 88 89 PetscFunctionReturn(0); 90 } 91 92 static PetscErrorCode MatSolve_SuperLU_DIST(Mat A,Vec b_mpi,Vec x) 93 { 94 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->data; 95 PetscErrorCode ierr; 96 PetscInt m=A->rmap->n; 97 SuperLUStat_t stat; 98 double berr[1]; 99 PetscScalar *bptr=NULL; 100 int info; /* SuperLU_Dist info code is ALWAYS an int, even with long long indices */ 101 static PetscBool cite = PETSC_FALSE; 102 103 PetscFunctionBegin; 104 if (lu->options.Fact != FACTORED) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"SuperLU_DIST options.Fact must equal FACTORED"); 105 ierr = PetscCitationsRegister("@article{lidemmel03,\n author = {Xiaoye S. Li and James W. Demmel},\n title = {{SuperLU_DIST}: A Scalable Distributed-Memory Sparse Direct\n Solver for Unsymmetric Linear Systems},\n journal = {ACM Trans. Mathematical Software},\n volume = {29},\n number = {2},\n pages = {110-140},\n year = 2003\n}\n",&cite);CHKERRQ(ierr); 106 107 if (lu->options.SolveInitialized && !lu->matsolve_iscalled) { 108 /* see comments in MatMatSolve() */ 109 #if defined(PETSC_USE_COMPLEX) 110 PetscStackCall("SuperLU_DIST:zSolveFinalize",zSolveFinalize(&lu->options, &lu->SOLVEstruct)); 111 #else 112 PetscStackCall("SuperLU_DIST:dSolveFinalize",dSolveFinalize(&lu->options, &lu->SOLVEstruct)); 113 #endif 114 lu->options.SolveInitialized = NO; 115 } 116 ierr = VecCopy(b_mpi,x);CHKERRQ(ierr); 117 ierr = VecGetArray(x,&bptr);CHKERRQ(ierr); 118 119 PetscStackCall("SuperLU_DIST:PStatInit",PStatInit(&stat)); /* Initialize the statistics variables. */ 120 #if defined(PETSC_USE_COMPLEX) 121 PetscStackCall("SuperLU_DIST:pzgssvx",pzgssvx(&lu->options,&lu->A_sup,&lu->ScalePermstruct,(doublecomplex*)bptr,m,1,&lu->grid,&lu->LUstruct,&lu->SOLVEstruct,berr,&stat,&info)); 122 #else 123 PetscStackCall("SuperLU_DIST:pdgssvx",pdgssvx(&lu->options,&lu->A_sup,&lu->ScalePermstruct,bptr,m,1,&lu->grid,&lu->LUstruct,&lu->SOLVEstruct,berr,&stat,&info)); 124 #endif 125 if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",info); 126 127 if (lu->options.PrintStat) PetscStackCall("SuperLU_DIST:PStatPrint",PStatPrint(&lu->options, &stat, &lu->grid)); /* Print the statistics. */ 128 PetscStackCall("SuperLU_DIST:PStatFree",PStatFree(&stat)); 129 130 ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr); 131 lu->matsolve_iscalled = PETSC_TRUE; 132 lu->matmatsolve_iscalled = PETSC_FALSE; 133 PetscFunctionReturn(0); 134 } 135 136 static PetscErrorCode MatMatSolve_SuperLU_DIST(Mat A,Mat B_mpi,Mat X) 137 { 138 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->data; 139 PetscErrorCode ierr; 140 PetscInt m=A->rmap->n,nrhs; 141 SuperLUStat_t stat; 142 double berr[1]; 143 PetscScalar *bptr; 144 int info; /* SuperLU_Dist info code is ALWAYS an int, even with long long indices */ 145 PetscBool flg; 146 147 PetscFunctionBegin; 148 if (lu->options.Fact != FACTORED) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"SuperLU_DIST options.Fact must equal FACTORED"); 149 ierr = PetscObjectTypeCompareAny((PetscObject)B_mpi,&flg,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr); 150 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix B must be MATDENSE matrix"); 151 if (X != B_mpi) { 152 ierr = PetscObjectTypeCompareAny((PetscObject)X,&flg,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr); 153 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix X must be MATDENSE matrix"); 154 } 155 156 if (lu->options.SolveInitialized && !lu->matmatsolve_iscalled) { 157 /* communication pattern of SOLVEstruct is unlikely created for matmatsolve, 158 thus destroy it and create a new SOLVEstruct. 159 Otherwise it may result in memory corruption or incorrect solution 160 See src/mat/examples/tests/ex125.c */ 161 #if defined(PETSC_USE_COMPLEX) 162 PetscStackCall("SuperLU_DIST:zSolveFinalize",zSolveFinalize(&lu->options, &lu->SOLVEstruct)); 163 #else 164 PetscStackCall("SuperLU_DIST:dSolveFinalize",dSolveFinalize(&lu->options, &lu->SOLVEstruct)); 165 #endif 166 lu->options.SolveInitialized = NO; 167 } 168 if (X != B_mpi) { 169 ierr = MatCopy(B_mpi,X,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 170 } 171 172 ierr = MatGetSize(B_mpi,NULL,&nrhs);CHKERRQ(ierr); 173 174 PetscStackCall("SuperLU_DIST:PStatInit",PStatInit(&stat)); /* Initialize the statistics variables. */ 175 ierr = MatDenseGetArray(X,&bptr);CHKERRQ(ierr); 176 177 #if defined(PETSC_USE_COMPLEX) 178 PetscStackCall("SuperLU_DIST:pzgssvx",pzgssvx(&lu->options,&lu->A_sup,&lu->ScalePermstruct,(doublecomplex*)bptr,m,nrhs,&lu->grid, &lu->LUstruct,&lu->SOLVEstruct,berr,&stat,&info)); 179 #else 180 PetscStackCall("SuperLU_DIST:pdgssvx",pdgssvx(&lu->options,&lu->A_sup,&lu->ScalePermstruct,bptr,m,nrhs,&lu->grid,&lu->LUstruct,&lu->SOLVEstruct,berr,&stat,&info)); 181 #endif 182 183 if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",info); 184 ierr = MatDenseRestoreArray(X,&bptr);CHKERRQ(ierr); 185 186 if (lu->options.PrintStat) PetscStackCall("SuperLU_DIST:PStatPrint",PStatPrint(&lu->options, &stat, &lu->grid)); /* Print the statistics. */ 187 PetscStackCall("SuperLU_DIST:PStatFree",PStatFree(&stat)); 188 lu->matsolve_iscalled = PETSC_FALSE; 189 lu->matmatsolve_iscalled = PETSC_TRUE; 190 PetscFunctionReturn(0); 191 } 192 193 /* 194 input: 195 F: numeric Cholesky factor 196 output: 197 nneg: total number of negative pivots 198 nzero: total number of zero pivots 199 npos: (global dimension of F) - nneg - nzero 200 */ 201 static PetscErrorCode MatGetInertia_SuperLU_DIST(Mat F,PetscInt *nneg,PetscInt *nzero,PetscInt *npos) 202 { 203 PetscErrorCode ierr; 204 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)F->data; 205 PetscScalar *diagU=NULL; 206 PetscInt M,i,neg=0,zero=0,pos=0; 207 PetscReal r; 208 209 PetscFunctionBegin; 210 if (!F->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Matrix factor F is not assembled"); 211 if (lu->options.RowPerm != NOROWPERM) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must set NOROWPERM"); 212 ierr = MatGetSize(F,&M,NULL);CHKERRQ(ierr); 213 ierr = PetscMalloc1(M,&diagU);CHKERRQ(ierr); 214 ierr = MatSuperluDistGetDiagU(F,diagU);CHKERRQ(ierr); 215 for (i=0; i<M; i++) { 216 #if defined(PETSC_USE_COMPLEX) 217 r = PetscImaginaryPart(diagU[i])/10.0; 218 if (r< -PETSC_MACHINE_EPSILON || r>PETSC_MACHINE_EPSILON) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"diagU[%d]=%g + i %g is non-real",i,PetscRealPart(diagU[i]),r*10.0); 219 r = PetscRealPart(diagU[i]); 220 #else 221 r = diagU[i]; 222 #endif 223 if (r > 0) { 224 pos++; 225 } else if (r < 0) { 226 neg++; 227 } else zero++; 228 } 229 230 ierr = PetscFree(diagU);CHKERRQ(ierr); 231 if (nneg) *nneg = neg; 232 if (nzero) *nzero = zero; 233 if (npos) *npos = pos; 234 PetscFunctionReturn(0); 235 } 236 237 static PetscErrorCode MatLUFactorNumeric_SuperLU_DIST(Mat F,Mat A,const MatFactorInfo *info) 238 { 239 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)F->data; 240 Mat Aloc; 241 const PetscScalar *av; 242 const PetscInt *ai=NULL,*aj=NULL; 243 PetscInt nz,dummy; 244 int sinfo; /* SuperLU_Dist info flag is always an int even with long long indices */ 245 SuperLUStat_t stat; 246 double *berr=0; 247 PetscBool ismpiaij,isseqaij,flg; 248 PetscErrorCode ierr; 249 250 PetscFunctionBegin; 251 ierr = PetscObjectBaseTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 252 ierr = PetscObjectBaseTypeCompare((PetscObject)A,MATMPIAIJ,&ismpiaij);CHKERRQ(ierr); 253 if (ismpiaij) { 254 ierr = MatMPIAIJGetLocalMat(A,MAT_INITIAL_MATRIX,&Aloc);CHKERRQ(ierr); 255 } else if (isseqaij) { 256 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 257 Aloc = A; 258 } else SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Not for type %s",((PetscObject)A)->type_name); 259 260 ierr = MatGetRowIJ(Aloc,0,PETSC_FALSE,PETSC_FALSE,&dummy,&ai,&aj,&flg);CHKERRQ(ierr); 261 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GetRowIJ failed"); 262 ierr = MatSeqAIJGetArrayRead(Aloc,&av);CHKERRQ(ierr); 263 nz = ai[Aloc->rmap->n]; 264 265 /* Allocations for A_sup */ 266 if (lu->options.Fact == DOFACT) { /* first numeric factorization */ 267 #if defined(PETSC_USE_COMPLEX) 268 PetscStackCall("SuperLU_DIST:zallocateA_dist",zallocateA_dist(Aloc->rmap->n, nz, &lu->val, &lu->col, &lu->row)); 269 #else 270 PetscStackCall("SuperLU_DIST:dallocateA_dist",dallocateA_dist(Aloc->rmap->n, nz, &lu->val, &lu->col, &lu->row)); 271 #endif 272 } else { /* successive numeric factorization, sparsity pattern and perm_c are reused. */ 273 if (lu->FactPattern == SamePattern_SameRowPerm) { 274 lu->options.Fact = SamePattern_SameRowPerm; /* matrix has similar numerical values */ 275 } else if (lu->FactPattern == SamePattern) { 276 PetscStackCall("SuperLU_DIST:Destroy_LU",Destroy_LU(A->rmap->N, &lu->grid, &lu->LUstruct)); /* Deallocate L and U matrices. */ 277 lu->options.Fact = SamePattern; 278 } else if (lu->FactPattern == DOFACT) { 279 PetscStackCall("SuperLU_DIST:Destroy_CompRowLoc_Matrix_dist",Destroy_CompRowLoc_Matrix_dist(&lu->A_sup)); 280 PetscStackCall("SuperLU_DIST:Destroy_LU",Destroy_LU(A->rmap->N, &lu->grid, &lu->LUstruct)); 281 lu->options.Fact = DOFACT; 282 283 #if defined(PETSC_USE_COMPLEX) 284 PetscStackCall("SuperLU_DIST:zallocateA_dist",zallocateA_dist(Aloc->rmap->n, nz, &lu->val, &lu->col, &lu->row)); 285 #else 286 PetscStackCall("SuperLU_DIST:dallocateA_dist",dallocateA_dist(Aloc->rmap->n, nz, &lu->val, &lu->col, &lu->row)); 287 #endif 288 } else { 289 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"options.Fact must be one of SamePattern SamePattern_SameRowPerm DOFACT"); 290 } 291 } 292 293 /* Copy AIJ matrix to superlu_dist matrix */ 294 ierr = PetscArraycpy(lu->row,ai,Aloc->rmap->n+1);CHKERRQ(ierr); 295 ierr = PetscArraycpy(lu->col,aj,nz);CHKERRQ(ierr); 296 ierr = PetscArraycpy(lu->val,av,nz);CHKERRQ(ierr); 297 ierr = MatRestoreRowIJ(Aloc,0,PETSC_FALSE,PETSC_FALSE,&dummy,&ai,&aj,&flg);CHKERRQ(ierr); 298 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"RestoreRowIJ failed"); 299 ierr = MatSeqAIJRestoreArrayRead(Aloc,&av);CHKERRQ(ierr); 300 ierr = MatDestroy(&Aloc);CHKERRQ(ierr); 301 302 /* Create and setup A_sup */ 303 if (lu->options.Fact == DOFACT) { 304 #if defined(PETSC_USE_COMPLEX) 305 PetscStackCall("SuperLU_DIST:zCreate_CompRowLoc_Matrix_dist",zCreate_CompRowLoc_Matrix_dist(&lu->A_sup, A->rmap->N, A->cmap->N, nz, A->rmap->n, A->rmap->rstart, lu->val, lu->col, lu->row, SLU_NR_loc, SLU_Z, SLU_GE)); 306 #else 307 PetscStackCall("SuperLU_DIST:dCreate_CompRowLoc_Matrix_dist",dCreate_CompRowLoc_Matrix_dist(&lu->A_sup, A->rmap->N, A->cmap->N, nz, A->rmap->n, A->rmap->rstart, lu->val, lu->col, lu->row, SLU_NR_loc, SLU_D, SLU_GE)); 308 #endif 309 } 310 311 /* Factor the matrix. */ 312 PetscStackCall("SuperLU_DIST:PStatInit",PStatInit(&stat)); /* Initialize the statistics variables. */ 313 #if defined(PETSC_USE_COMPLEX) 314 PetscStackCall("SuperLU_DIST:pzgssvx",pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, A->rmap->n, 0, &lu->grid, &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo)); 315 #else 316 PetscStackCall("SuperLU_DIST:pdgssvx",pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, A->rmap->n, 0, &lu->grid, &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo)); 317 #endif 318 319 if (sinfo > 0) { 320 if (A->erroriffailure) { 321 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot in row %D",sinfo); 322 } else { 323 if (sinfo <= lu->A_sup.ncol) { 324 F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 325 ierr = PetscInfo1(F,"U(i,i) is exactly zero, i= %D\n",sinfo);CHKERRQ(ierr); 326 } else if (sinfo > lu->A_sup.ncol) { 327 /* 328 number of bytes allocated when memory allocation 329 failure occurred, plus A->ncol. 330 */ 331 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 332 ierr = PetscInfo1(F,"Number of bytes allocated when memory allocation fails %D\n",sinfo);CHKERRQ(ierr); 333 } 334 } 335 } else if (sinfo < 0) { 336 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB, "info = %D, argument in p*gssvx() had an illegal value", sinfo); 337 } 338 339 if (lu->options.PrintStat) { 340 PetscStackCall("SuperLU_DIST:PStatPrint",PStatPrint(&lu->options, &stat, &lu->grid)); /* Print the statistics. */ 341 } 342 PetscStackCall("SuperLU_DIST:PStatFree",PStatFree(&stat)); 343 F->assembled = PETSC_TRUE; 344 F->preallocated = PETSC_TRUE; 345 lu->options.Fact = FACTORED; /* The factored form of A is supplied. Local option used by this func. only */ 346 PetscFunctionReturn(0); 347 } 348 349 /* Note the Petsc r and c permutations are ignored */ 350 static PetscErrorCode MatLUFactorSymbolic_SuperLU_DIST(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) 351 { 352 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)F->data; 353 PetscInt M = A->rmap->N,N=A->cmap->N; 354 355 PetscFunctionBegin; 356 /* Initialize the SuperLU process grid. */ 357 PetscStackCall("SuperLU_DIST:superlu_gridinit",superlu_gridinit(lu->comm_superlu, lu->nprow, lu->npcol, &lu->grid)); 358 359 /* Initialize ScalePermstruct and LUstruct. */ 360 PetscStackCall("SuperLU_DIST:ScalePermstructInit",ScalePermstructInit(M, N, &lu->ScalePermstruct)); 361 PetscStackCall("SuperLU_DIST:LUstructInit",LUstructInit(N, &lu->LUstruct)); 362 F->ops->lufactornumeric = MatLUFactorNumeric_SuperLU_DIST; 363 F->ops->solve = MatSolve_SuperLU_DIST; 364 F->ops->matsolve = MatMatSolve_SuperLU_DIST; 365 F->ops->getinertia = NULL; 366 367 if (A->symmetric || A->hermitian) F->ops->getinertia = MatGetInertia_SuperLU_DIST; 368 lu->CleanUpSuperLU_Dist = PETSC_TRUE; 369 PetscFunctionReturn(0); 370 } 371 372 static PetscErrorCode MatCholeskyFactorSymbolic_SuperLU_DIST(Mat F,Mat A,IS r,const MatFactorInfo *info) 373 { 374 PetscErrorCode ierr; 375 376 PetscFunctionBegin; 377 ierr = MatLUFactorSymbolic_SuperLU_DIST(F,A,r,r,info);CHKERRQ(ierr); 378 F->ops->choleskyfactornumeric = MatLUFactorNumeric_SuperLU_DIST; 379 PetscFunctionReturn(0); 380 } 381 382 static PetscErrorCode MatFactorGetSolverType_aij_superlu_dist(Mat A,MatSolverType *type) 383 { 384 PetscFunctionBegin; 385 *type = MATSOLVERSUPERLU_DIST; 386 PetscFunctionReturn(0); 387 } 388 389 static PetscErrorCode MatView_Info_SuperLU_DIST(Mat A,PetscViewer viewer) 390 { 391 Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST*)A->data; 392 superlu_dist_options_t options; 393 PetscErrorCode ierr; 394 395 PetscFunctionBegin; 396 /* check if matrix is superlu_dist type */ 397 if (A->ops->solve != MatSolve_SuperLU_DIST) PetscFunctionReturn(0); 398 399 options = lu->options; 400 ierr = PetscViewerASCIIPrintf(viewer,"SuperLU_DIST run parameters:\n");CHKERRQ(ierr); 401 ierr = PetscViewerASCIIPrintf(viewer," Process grid nprow %D x npcol %D \n",lu->nprow,lu->npcol);CHKERRQ(ierr); 402 ierr = PetscViewerASCIIPrintf(viewer," Equilibrate matrix %s \n",PetscBools[options.Equil != NO]);CHKERRQ(ierr); 403 ierr = PetscViewerASCIIPrintf(viewer," Replace tiny pivots %s \n",PetscBools[options.ReplaceTinyPivot != NO]);CHKERRQ(ierr); 404 ierr = PetscViewerASCIIPrintf(viewer," Use iterative refinement %s \n",PetscBools[options.IterRefine == SLU_DOUBLE]);CHKERRQ(ierr); 405 ierr = PetscViewerASCIIPrintf(viewer," Processors in row %d col partition %d \n",lu->nprow,lu->npcol);CHKERRQ(ierr); 406 407 switch (options.RowPerm) { 408 case NOROWPERM: 409 ierr = PetscViewerASCIIPrintf(viewer," Row permutation NOROWPERM\n");CHKERRQ(ierr); 410 break; 411 case LargeDiag_MC64: 412 ierr = PetscViewerASCIIPrintf(viewer," Row permutation LargeDiag_MC64\n");CHKERRQ(ierr); 413 break; 414 case LargeDiag_AWPM: 415 ierr = PetscViewerASCIIPrintf(viewer," Row permutation LargeDiag_AWPM\n");CHKERRQ(ierr); 416 break; 417 case MY_PERMR: 418 ierr = PetscViewerASCIIPrintf(viewer," Row permutation MY_PERMR\n");CHKERRQ(ierr); 419 break; 420 default: 421 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown column permutation"); 422 } 423 424 switch (options.ColPerm) { 425 case NATURAL: 426 ierr = PetscViewerASCIIPrintf(viewer," Column permutation NATURAL\n");CHKERRQ(ierr); 427 break; 428 case MMD_AT_PLUS_A: 429 ierr = PetscViewerASCIIPrintf(viewer," Column permutation MMD_AT_PLUS_A\n");CHKERRQ(ierr); 430 break; 431 case MMD_ATA: 432 ierr = PetscViewerASCIIPrintf(viewer," Column permutation MMD_ATA\n");CHKERRQ(ierr); 433 break; 434 /* Even though this is called METIS, the SuperLU_DIST code sets this by default if PARMETIS is defined, not METIS */ 435 case METIS_AT_PLUS_A: 436 ierr = PetscViewerASCIIPrintf(viewer," Column permutation METIS_AT_PLUS_A\n");CHKERRQ(ierr); 437 break; 438 case PARMETIS: 439 ierr = PetscViewerASCIIPrintf(viewer," Column permutation PARMETIS\n");CHKERRQ(ierr); 440 break; 441 default: 442 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown column permutation"); 443 } 444 445 ierr = PetscViewerASCIIPrintf(viewer," Parallel symbolic factorization %s \n",PetscBools[options.ParSymbFact != NO]);CHKERRQ(ierr); 446 447 if (lu->FactPattern == SamePattern) { 448 ierr = PetscViewerASCIIPrintf(viewer," Repeated factorization SamePattern\n");CHKERRQ(ierr); 449 } else if (lu->FactPattern == SamePattern_SameRowPerm) { 450 ierr = PetscViewerASCIIPrintf(viewer," Repeated factorization SamePattern_SameRowPerm\n");CHKERRQ(ierr); 451 } else if (lu->FactPattern == DOFACT) { 452 ierr = PetscViewerASCIIPrintf(viewer," Repeated factorization DOFACT\n");CHKERRQ(ierr); 453 } else { 454 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown factorization pattern"); 455 } 456 PetscFunctionReturn(0); 457 } 458 459 static PetscErrorCode MatView_SuperLU_DIST(Mat A,PetscViewer viewer) 460 { 461 PetscErrorCode ierr; 462 PetscBool iascii; 463 PetscViewerFormat format; 464 465 PetscFunctionBegin; 466 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 467 if (iascii) { 468 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 469 if (format == PETSC_VIEWER_ASCII_INFO) { 470 ierr = MatView_Info_SuperLU_DIST(A,viewer);CHKERRQ(ierr); 471 } 472 } 473 PetscFunctionReturn(0); 474 } 475 476 static PetscErrorCode MatGetFactor_aij_superlu_dist(Mat A,MatFactorType ftype,Mat *F) 477 { 478 Mat B; 479 Mat_SuperLU_DIST *lu; 480 PetscErrorCode ierr; 481 PetscInt M=A->rmap->N,N=A->cmap->N,indx; 482 PetscMPIInt size; 483 superlu_dist_options_t options; 484 PetscBool flg; 485 const char *colperm[] = {"NATURAL","MMD_AT_PLUS_A","MMD_ATA","METIS_AT_PLUS_A","PARMETIS"}; 486 const char *rowperm[] = {"NOROWPERM","LargeDiag_MC64","LargeDiag_AWPM","MY_PERMR"}; 487 const char *factPattern[] = {"SamePattern","SamePattern_SameRowPerm","DOFACT"}; 488 PetscBool set; 489 490 PetscFunctionBegin; 491 /* Create the factorization matrix */ 492 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 493 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,M,N);CHKERRQ(ierr); 494 ierr = PetscStrallocpy("superlu_dist",&((PetscObject)B)->type_name);CHKERRQ(ierr); 495 ierr = MatSetUp(B);CHKERRQ(ierr); 496 B->ops->getinfo = MatGetInfo_External; 497 B->ops->view = MatView_SuperLU_DIST; 498 B->ops->destroy = MatDestroy_SuperLU_DIST; 499 500 /* Set the default input options: 501 options.Fact = DOFACT; 502 options.Equil = YES; 503 options.ParSymbFact = NO; 504 options.ColPerm = METIS_AT_PLUS_A; 505 options.RowPerm = LargeDiag_MC64; 506 options.ReplaceTinyPivot = YES; 507 options.IterRefine = DOUBLE; 508 options.Trans = NOTRANS; 509 options.SolveInitialized = NO; -hold the communication pattern used MatSolve() and MatMatSolve() 510 options.RefineInitialized = NO; 511 options.PrintStat = YES; 512 options.SymPattern = NO; 513 */ 514 set_default_options_dist(&options); 515 516 if (ftype == MAT_FACTOR_LU) { 517 B->factortype = MAT_FACTOR_LU; 518 B->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST; 519 } else { 520 B->factortype = MAT_FACTOR_CHOLESKY; 521 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SuperLU_DIST; 522 options.SymPattern = YES; 523 } 524 525 /* set solvertype */ 526 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 527 ierr = PetscStrallocpy(MATSOLVERSUPERLU_DIST,&B->solvertype);CHKERRQ(ierr); 528 529 ierr = PetscNewLog(B,&lu);CHKERRQ(ierr); 530 B->data = lu; 531 532 ierr = MPI_Comm_dup(PetscObjectComm((PetscObject)A),&(lu->comm_superlu));CHKERRQ(ierr); 533 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 534 /* Default num of process columns and rows */ 535 lu->nprow = (int_t) (0.5 + PetscSqrtReal((PetscReal)size)); 536 if (!lu->nprow) lu->nprow = 1; 537 while (lu->nprow > 0) { 538 lu->npcol = (int_t) (size/lu->nprow); 539 if (size == lu->nprow * lu->npcol) break; 540 lu->nprow--; 541 } 542 543 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"SuperLU_Dist Options","Mat");CHKERRQ(ierr); 544 ierr = PetscOptionsInt("-mat_superlu_dist_r","Number rows in processor partition","None",lu->nprow,(PetscInt*)&lu->nprow,NULL);CHKERRQ(ierr); 545 ierr = PetscOptionsInt("-mat_superlu_dist_c","Number columns in processor partition","None",lu->npcol,(PetscInt*)&lu->npcol,NULL);CHKERRQ(ierr); 546 if (size != lu->nprow * lu->npcol) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number of processes %d must equal to nprow %d * npcol %d",size,lu->nprow,lu->npcol); 547 548 ierr = PetscOptionsBool("-mat_superlu_dist_equil","Equilibrate matrix","None",options.Equil ? PETSC_TRUE : PETSC_FALSE,&flg,&set);CHKERRQ(ierr); 549 if (set && !flg) options.Equil = NO; 550 551 ierr = PetscOptionsEList("-mat_superlu_dist_rowperm","Row permutation","None",rowperm,4,rowperm[1],&indx,&flg);CHKERRQ(ierr); 552 if (flg) { 553 switch (indx) { 554 case 0: 555 options.RowPerm = NOROWPERM; 556 break; 557 case 1: 558 options.RowPerm = LargeDiag_MC64; 559 break; 560 case 2: 561 options.RowPerm = LargeDiag_AWPM; 562 break; 563 case 3: 564 options.RowPerm = MY_PERMR; 565 break; 566 default: 567 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown row permutation"); 568 } 569 } 570 571 ierr = PetscOptionsEList("-mat_superlu_dist_colperm","Column permutation","None",colperm,5,colperm[3],&indx,&flg);CHKERRQ(ierr); 572 if (flg) { 573 switch (indx) { 574 case 0: 575 options.ColPerm = NATURAL; 576 break; 577 case 1: 578 options.ColPerm = MMD_AT_PLUS_A; 579 break; 580 case 2: 581 options.ColPerm = MMD_ATA; 582 break; 583 case 3: 584 options.ColPerm = METIS_AT_PLUS_A; 585 break; 586 case 4: 587 options.ColPerm = PARMETIS; /* only works for np>1 */ 588 break; 589 default: 590 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown column permutation"); 591 } 592 } 593 594 options.ReplaceTinyPivot = NO; 595 ierr = PetscOptionsBool("-mat_superlu_dist_replacetinypivot","Replace tiny pivots","None",options.ReplaceTinyPivot ? PETSC_TRUE : PETSC_FALSE,&flg,&set);CHKERRQ(ierr); 596 if (set && flg) options.ReplaceTinyPivot = YES; 597 598 options.ParSymbFact = NO; 599 ierr = PetscOptionsBool("-mat_superlu_dist_parsymbfact","Parallel symbolic factorization","None",PETSC_FALSE,&flg,&set);CHKERRQ(ierr); 600 if (set && flg && size>1) { 601 #if defined(PETSC_HAVE_PARMETIS) 602 options.ParSymbFact = YES; 603 options.ColPerm = PARMETIS; /* in v2.2, PARMETIS is forced for ParSymbFact regardless of user ordering setting */ 604 #else 605 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"parsymbfact needs PARMETIS"); 606 #endif 607 } 608 609 lu->FactPattern = SamePattern; 610 ierr = PetscOptionsEList("-mat_superlu_dist_fact","Sparsity pattern for repeated matrix factorization","None",factPattern,3,factPattern[0],&indx,&flg);CHKERRQ(ierr); 611 if (flg) { 612 switch (indx) { 613 case 0: 614 lu->FactPattern = SamePattern; 615 break; 616 case 1: 617 lu->FactPattern = SamePattern_SameRowPerm; 618 break; 619 case 2: 620 lu->FactPattern = DOFACT; 621 break; 622 } 623 } 624 625 options.IterRefine = NOREFINE; 626 ierr = PetscOptionsBool("-mat_superlu_dist_iterrefine","Use iterative refinement","None",options.IterRefine == NOREFINE ? PETSC_FALSE : PETSC_TRUE ,&flg,&set);CHKERRQ(ierr); 627 if (set) { 628 if (flg) options.IterRefine = SLU_DOUBLE; 629 else options.IterRefine = NOREFINE; 630 } 631 632 if (PetscLogPrintInfo) options.PrintStat = YES; 633 else options.PrintStat = NO; 634 ierr = PetscOptionsBool("-mat_superlu_dist_statprint","Print factorization information","None",(PetscBool)options.PrintStat,(PetscBool*)&options.PrintStat,NULL);CHKERRQ(ierr); 635 ierr = PetscOptionsEnd();CHKERRQ(ierr); 636 637 lu->options = options; 638 lu->options.Fact = DOFACT; 639 lu->matsolve_iscalled = PETSC_FALSE; 640 lu->matmatsolve_iscalled = PETSC_FALSE; 641 642 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_aij_superlu_dist);CHKERRQ(ierr); 643 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSuperluDistGetDiagU_C",MatSuperluDistGetDiagU_SuperLU_DIST);CHKERRQ(ierr); 644 645 *F = B; 646 PetscFunctionReturn(0); 647 } 648 649 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_SuperLU_DIST(void) 650 { 651 PetscErrorCode ierr; 652 PetscFunctionBegin; 653 ierr = MatSolverTypeRegister(MATSOLVERSUPERLU_DIST,MATMPIAIJ,MAT_FACTOR_LU,MatGetFactor_aij_superlu_dist);CHKERRQ(ierr); 654 ierr = MatSolverTypeRegister(MATSOLVERSUPERLU_DIST,MATSEQAIJ,MAT_FACTOR_LU,MatGetFactor_aij_superlu_dist);CHKERRQ(ierr); 655 ierr = MatSolverTypeRegister(MATSOLVERSUPERLU_DIST,MATMPIAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_superlu_dist);CHKERRQ(ierr); 656 ierr = MatSolverTypeRegister(MATSOLVERSUPERLU_DIST,MATSEQAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_superlu_dist);CHKERRQ(ierr); 657 PetscFunctionReturn(0); 658 } 659 660 /*MC 661 MATSOLVERSUPERLU_DIST - Parallel direct solver package for LU factorization 662 663 Use ./configure --download-superlu_dist --download-parmetis --download-metis --download-ptscotch to have PETSc installed with SuperLU_DIST 664 665 Use -pc_type lu -pc_factor_mat_solver_type superlu_dist to use this direct solver 666 667 Works with AIJ matrices 668 669 Options Database Keys: 670 + -mat_superlu_dist_r <n> - number of rows in processor partition 671 . -mat_superlu_dist_c <n> - number of columns in processor partition 672 . -mat_superlu_dist_equil - equilibrate the matrix 673 . -mat_superlu_dist_rowperm <NOROWPERM,LargeDiag_MC64,LargeDiag_AWPM,MY_PERMR> - row permutation 674 . -mat_superlu_dist_colperm <MMD_AT_PLUS_A,MMD_ATA,NATURAL> - column permutation 675 . -mat_superlu_dist_replacetinypivot - replace tiny pivots 676 . -mat_superlu_dist_fact <SamePattern> - (choose one of) SamePattern SamePattern_SameRowPerm DOFACT 677 . -mat_superlu_dist_iterrefine - use iterative refinement 678 - -mat_superlu_dist_statprint - print factorization information 679 680 Level: beginner 681 682 .seealso: PCLU 683 684 .seealso: PCFactorSetMatSolverType(), MatSolverType 685 686 M*/ 687