1 /* 2 Provides an interface to the SuperLU_DIST sparse solver 3 */ 4 5 #include <../src/mat/impls/aij/seq/aij.h> 6 #include <../src/mat/impls/aij/mpi/mpiaij.h> 7 #include <petscpkg_version.h> 8 9 EXTERN_C_BEGIN 10 #if defined(PETSC_USE_COMPLEX) 11 #define CASTDOUBLECOMPLEX (doublecomplex*) 12 #define CASTDOUBLECOMPLEXSTAR (doublecomplex**) 13 #include <superlu_zdefs.h> 14 #define LUstructInit zLUstructInit 15 #define ScalePermstructInit zScalePermstructInit 16 #define ScalePermstructFree zScalePermstructFree 17 #define LUstructFree zLUstructFree 18 #define Destroy_LU zDestroy_LU 19 #define ScalePermstruct_t zScalePermstruct_t 20 #define LUstruct_t zLUstruct_t 21 #define SOLVEstruct_t zSOLVEstruct_t 22 #define SolveFinalize zSolveFinalize 23 #define pGetDiagU pzGetDiagU 24 #define pgssvx pzgssvx 25 #define allocateA_dist zallocateA_dist 26 #define Create_CompRowLoc_Matrix_dist zCreate_CompRowLoc_Matrix_dist 27 #define SLU SLU_Z 28 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7,2,0) 29 #define DeAllocLlu_3d zDeAllocLlu_3d 30 #define DeAllocGlu_3d zDeAllocGlu_3d 31 #define Destroy_A3d_gathered_on_2d zDestroy_A3d_gathered_on_2d 32 #define pgssvx3d pzgssvx3d 33 #endif 34 #elif defined(PETSC_USE_REAL_SINGLE) 35 #define CASTDOUBLECOMPLEX 36 #define CASTDOUBLECOMPLEXSTAR 37 #include <superlu_sdefs.h> 38 #define LUstructInit sLUstructInit 39 #define ScalePermstructInit sScalePermstructInit 40 #define ScalePermstructFree sScalePermstructFree 41 #define LUstructFree sLUstructFree 42 #define Destroy_LU sDestroy_LU 43 #define ScalePermstruct_t sScalePermstruct_t 44 #define LUstruct_t sLUstruct_t 45 #define SOLVEstruct_t sSOLVEstruct_t 46 #define SolveFinalize sSolveFinalize 47 #define pGetDiagU psGetDiagU 48 #define pgssvx psgssvx 49 #define allocateA_dist sallocateA_dist 50 #define Create_CompRowLoc_Matrix_dist sCreate_CompRowLoc_Matrix_dist 51 #define SLU SLU_S 52 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7,2,0) 53 #define DeAllocLlu_3d sDeAllocLlu_3d 54 #define DeAllocGlu_3d sDeAllocGlu_3d 55 #define Destroy_A3d_gathered_on_2d sDestroy_A3d_gathered_on_2d 56 #define pgssvx3d psgssvx3d 57 #endif 58 #else 59 #define CASTDOUBLECOMPLEX 60 #define CASTDOUBLECOMPLEXSTAR 61 #include <superlu_ddefs.h> 62 #define LUstructInit dLUstructInit 63 #define ScalePermstructInit dScalePermstructInit 64 #define ScalePermstructFree dScalePermstructFree 65 #define LUstructFree dLUstructFree 66 #define Destroy_LU dDestroy_LU 67 #define ScalePermstruct_t dScalePermstruct_t 68 #define LUstruct_t dLUstruct_t 69 #define SOLVEstruct_t dSOLVEstruct_t 70 #define SolveFinalize dSolveFinalize 71 #define pGetDiagU pdGetDiagU 72 #define pgssvx pdgssvx 73 #define allocateA_dist dallocateA_dist 74 #define Create_CompRowLoc_Matrix_dist dCreate_CompRowLoc_Matrix_dist 75 #define SLU SLU_D 76 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7,2,0) 77 #define DeAllocLlu_3d dDeAllocLlu_3d 78 #define DeAllocGlu_3d dDeAllocGlu_3d 79 #define Destroy_A3d_gathered_on_2d dDestroy_A3d_gathered_on_2d 80 #define pgssvx3d pdgssvx3d 81 #endif 82 #endif 83 EXTERN_C_END 84 85 typedef struct { 86 int_t nprow,npcol,*row,*col; 87 gridinfo_t grid; 88 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7,2,0) 89 PetscBool use3d; 90 int_t npdep; /* replication factor, must be power of two */ 91 gridinfo3d_t grid3d; 92 #endif 93 superlu_dist_options_t options; 94 SuperMatrix A_sup; 95 ScalePermstruct_t ScalePermstruct; 96 LUstruct_t LUstruct; 97 int StatPrint; 98 SOLVEstruct_t SOLVEstruct; 99 fact_t FactPattern; 100 MPI_Comm comm_superlu; 101 PetscScalar *val; 102 PetscBool matsolve_iscalled,matmatsolve_iscalled; 103 PetscBool CleanUpSuperLU_Dist; /* Flag to clean up (non-global) SuperLU objects during Destroy */ 104 } Mat_SuperLU_DIST; 105 106 PetscErrorCode MatSuperluDistGetDiagU_SuperLU_DIST(Mat F,PetscScalar *diagU) 107 { 108 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)F->data; 109 110 PetscFunctionBegin; 111 PetscStackCall("SuperLU_DIST:pGetDiagU",pGetDiagU(F->rmap->N,&lu->LUstruct,&lu->grid,CASTDOUBLECOMPLEX diagU)); 112 PetscFunctionReturn(0); 113 } 114 115 PetscErrorCode MatSuperluDistGetDiagU(Mat F,PetscScalar *diagU) 116 { 117 PetscFunctionBegin; 118 PetscValidHeaderSpecific(F,MAT_CLASSID,1); 119 PetscTryMethod(F,"MatSuperluDistGetDiagU_C",(Mat,PetscScalar*),(F,diagU)); 120 PetscFunctionReturn(0); 121 } 122 123 /* This allows reusing the Superlu_DIST communicator and grid when only a single SuperLU_DIST matrix is used at a time */ 124 typedef struct { 125 MPI_Comm comm; 126 PetscBool busy; 127 gridinfo_t grid; 128 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7,2,0) 129 PetscBool use3d; 130 gridinfo3d_t grid3d; 131 #endif 132 } PetscSuperLU_DIST; 133 static PetscMPIInt Petsc_Superlu_dist_keyval = MPI_KEYVAL_INVALID; 134 135 PETSC_EXTERN PetscMPIInt MPIAPI Petsc_Superlu_dist_keyval_Delete_Fn(MPI_Comm comm,PetscMPIInt keyval,void *attr_val,void *extra_state) 136 { 137 PetscSuperLU_DIST *context = (PetscSuperLU_DIST *) attr_val; 138 139 PetscFunctionBegin; 140 if (keyval != Petsc_Superlu_dist_keyval) SETERRMPI(PETSC_COMM_SELF,PETSC_ERR_ARG_CORRUPT,"Unexpected keyval"); 141 PetscCall(PetscInfo(NULL,"Removing Petsc_Superlu_dist_keyval attribute from communicator that is being freed\n")); 142 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7,2,0) 143 if (context->use3d) { 144 PetscStackCall("SuperLU_DIST:superlu_gridexit3d",superlu_gridexit3d(&context->grid3d)); 145 } else 146 #endif 147 PetscStackCall("SuperLU_DIST:superlu_gridexit",superlu_gridexit(&context->grid)); 148 PetscCallMPI(MPI_Comm_free(&context->comm)); 149 PetscCall(PetscFree(context)); 150 PetscFunctionReturn(MPI_SUCCESS); 151 } 152 153 /* 154 Performs MPI_Comm_free_keyval() on Petsc_Superlu_dist_keyval but keeps the global variable for 155 users who do not destroy all PETSc objects before PetscFinalize(). 156 157 The value Petsc_Superlu_dist_keyval is retained so that Petsc_Superlu_dist_keyval_Delete_Fn() 158 can still check that the keyval associated with the MPI communicator is correct when the MPI 159 communicator is destroyed. 160 161 This is called in PetscFinalize() 162 */ 163 static PetscErrorCode Petsc_Superlu_dist_keyval_free(void) 164 { 165 PetscMPIInt Petsc_Superlu_dist_keyval_temp = Petsc_Superlu_dist_keyval; 166 167 PetscFunctionBegin; 168 PetscCall(PetscInfo(NULL,"Freeing Petsc_Superlu_dist_keyval\n")); 169 PetscCallMPI(MPI_Comm_free_keyval(&Petsc_Superlu_dist_keyval_temp)); 170 PetscFunctionReturn(0); 171 } 172 173 static PetscErrorCode MatDestroy_SuperLU_DIST(Mat A) 174 { 175 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->data; 176 177 PetscFunctionBegin; 178 if (lu->CleanUpSuperLU_Dist) { 179 /* Deallocate SuperLU_DIST storage */ 180 PetscStackCall("SuperLU_DIST:Destroy_CompRowLoc_Matrix_dist",Destroy_CompRowLoc_Matrix_dist(&lu->A_sup)); 181 if (lu->options.SolveInitialized) { 182 PetscStackCall("SuperLU_DIST:SolveFinalize",SolveFinalize(&lu->options, &lu->SOLVEstruct)); 183 } 184 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7,2,0) 185 if (lu->use3d) { 186 if (lu->grid3d.zscp.Iam == 0) { 187 PetscStackCall("SuperLU_DIST:Destroy_LU",Destroy_LU(A->cmap->N, &lu->grid3d.grid2d, &lu->LUstruct)); 188 } else { 189 PetscStackCall("SuperLU_DIST:DeAllocLlu_3d",DeAllocLlu_3d(lu->A_sup.ncol, &lu->LUstruct, &lu->grid3d)); 190 PetscStackCall("SuperLU_DIST:DeAllocGlu_3d",DeAllocGlu_3d(&lu->LUstruct)); 191 } 192 PetscStackCall("SuperLU_DIST:Destroy_A3d_gathered_on_2d",Destroy_A3d_gathered_on_2d(&lu->SOLVEstruct, &lu->grid3d)); 193 } else 194 #endif 195 PetscStackCall("SuperLU_DIST:Destroy_LU",Destroy_LU(A->cmap->N, &lu->grid, &lu->LUstruct)); 196 PetscStackCall("SuperLU_DIST:ScalePermstructFree",ScalePermstructFree(&lu->ScalePermstruct)); 197 PetscStackCall("SuperLU_DIST:LUstructFree",LUstructFree(&lu->LUstruct)); 198 199 /* Release the SuperLU_DIST process grid only if the matrix has its own copy, that is it is not in the communicator context */ 200 if (lu->comm_superlu) { 201 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7,2,0) 202 if (lu->use3d) { 203 PetscStackCall("SuperLU_DIST:superlu_gridexit3d",superlu_gridexit3d(&lu->grid3d)); 204 } else 205 #endif 206 PetscStackCall("SuperLU_DIST:superlu_gridexit",superlu_gridexit(&lu->grid)); 207 } 208 } 209 /* 210 * We always need to release the communicator that was created in MatGetFactor_aij_superlu_dist. 211 * lu->CleanUpSuperLU_Dist was turned on in MatLUFactorSymbolic_SuperLU_DIST. There are some use 212 * cases where we only create a matrix but do not solve mat. In these cases, lu->CleanUpSuperLU_Dist 213 * is off, and the communicator was not released or marked as "not busy " in the old code. 214 * Here we try to release comm regardless. 215 */ 216 if (lu->comm_superlu) { 217 PetscCall(PetscCommRestoreComm(PetscObjectComm((PetscObject)A),&lu->comm_superlu)); 218 } else { 219 PetscSuperLU_DIST *context; 220 MPI_Comm comm; 221 PetscMPIInt flg; 222 223 PetscCall(PetscObjectGetComm((PetscObject)A,&comm)); 224 PetscCallMPI(MPI_Comm_get_attr(comm,Petsc_Superlu_dist_keyval,&context,&flg)); 225 PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Communicator does not have expected Petsc_Superlu_dist_keyval attribute"); 226 context->busy = PETSC_FALSE; 227 } 228 229 PetscCall(PetscFree(A->data)); 230 /* clear composed functions */ 231 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverType_C",NULL)); 232 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatSuperluDistGetDiagU_C",NULL)); 233 234 PetscFunctionReturn(0); 235 } 236 237 static PetscErrorCode MatSolve_SuperLU_DIST(Mat A,Vec b_mpi,Vec x) 238 { 239 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->data; 240 PetscInt m=A->rmap->n; 241 SuperLUStat_t stat; 242 PetscReal berr[1]; 243 PetscScalar *bptr = NULL; 244 int info; /* SuperLU_Dist info code is ALWAYS an int, even with long long indices */ 245 static PetscBool cite = PETSC_FALSE; 246 247 PetscFunctionBegin; 248 PetscCheck(lu->options.Fact == FACTORED,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"SuperLU_DIST options.Fact must equal FACTORED"); 249 PetscCall(PetscCitationsRegister("@article{lidemmel03,\n author = {Xiaoye S. Li and James W. Demmel},\n title = {{SuperLU_DIST}: A Scalable Distributed-Memory Sparse Direct\n Solver for Unsymmetric Linear Systems},\n journal = {ACM Trans. Mathematical Software},\n volume = {29},\n number = {2},\n pages = {110-140},\n year = 2003\n}\n",&cite)); 250 251 if (lu->options.SolveInitialized && !lu->matsolve_iscalled) { 252 /* see comments in MatMatSolve() */ 253 PetscStackCall("SuperLU_DIST:SolveFinalize",SolveFinalize(&lu->options, &lu->SOLVEstruct)); 254 lu->options.SolveInitialized = NO; 255 } 256 PetscCall(VecCopy(b_mpi,x)); 257 PetscCall(VecGetArray(x,&bptr)); 258 259 PetscStackCall("SuperLU_DIST:PStatInit",PStatInit(&stat)); /* Initialize the statistics variables. */ 260 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7,2,0) && !PetscDefined(MISSING_GETLINE) 261 if (lu->use3d) 262 PetscStackCall("SuperLU_DIST:pgssvx3d",pgssvx3d(&lu->options,&lu->A_sup,&lu->ScalePermstruct,CASTDOUBLECOMPLEX bptr,m,1,&lu->grid3d,&lu->LUstruct,&lu->SOLVEstruct,berr,&stat,&info)); 263 else 264 #endif 265 PetscStackCall("SuperLU_DIST:pgssvx",pgssvx(&lu->options,&lu->A_sup,&lu->ScalePermstruct,CASTDOUBLECOMPLEX bptr,m,1,&lu->grid,&lu->LUstruct,&lu->SOLVEstruct,berr,&stat,&info)); 266 PetscCheck(!info,PETSC_COMM_SELF,PETSC_ERR_LIB,"pdgssvx fails, info: %d",info); 267 268 if (lu->options.PrintStat) PetscStackCall("SuperLU_DIST:PStatPrint",PStatPrint(&lu->options, &stat, &lu->grid)); /* Print the statistics. */ 269 PetscStackCall("SuperLU_DIST:PStatFree",PStatFree(&stat)); 270 271 PetscCall(VecRestoreArray(x,&bptr)); 272 lu->matsolve_iscalled = PETSC_TRUE; 273 lu->matmatsolve_iscalled = PETSC_FALSE; 274 PetscFunctionReturn(0); 275 } 276 277 static PetscErrorCode MatMatSolve_SuperLU_DIST(Mat A,Mat B_mpi,Mat X) 278 { 279 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->data; 280 PetscInt m = A->rmap->n,nrhs; 281 SuperLUStat_t stat; 282 PetscReal berr[1]; 283 PetscScalar *bptr; 284 int info; /* SuperLU_Dist info code is ALWAYS an int, even with long long indices */ 285 PetscBool flg; 286 287 PetscFunctionBegin; 288 PetscCheck(lu->options.Fact == FACTORED,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"SuperLU_DIST options.Fact must equal FACTORED"); 289 PetscCall(PetscObjectTypeCompareAny((PetscObject)B_mpi,&flg,MATSEQDENSE,MATMPIDENSE,NULL)); 290 PetscCheck(flg,PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix B must be MATDENSE matrix"); 291 if (X != B_mpi) { 292 PetscCall(PetscObjectTypeCompareAny((PetscObject)X,&flg,MATSEQDENSE,MATMPIDENSE,NULL)); 293 PetscCheck(flg,PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix X must be MATDENSE matrix"); 294 } 295 296 if (lu->options.SolveInitialized && !lu->matmatsolve_iscalled) { 297 /* communication pattern of SOLVEstruct is unlikely created for matmatsolve, 298 thus destroy it and create a new SOLVEstruct. 299 Otherwise it may result in memory corruption or incorrect solution 300 See src/mat/tests/ex125.c */ 301 PetscStackCall("SuperLU_DIST:SolveFinalize",SolveFinalize(&lu->options, &lu->SOLVEstruct)); 302 lu->options.SolveInitialized = NO; 303 } 304 if (X != B_mpi) { 305 PetscCall(MatCopy(B_mpi,X,SAME_NONZERO_PATTERN)); 306 } 307 308 PetscCall(MatGetSize(B_mpi,NULL,&nrhs)); 309 310 PetscStackCall("SuperLU_DIST:PStatInit",PStatInit(&stat)); /* Initialize the statistics variables. */ 311 PetscCall(MatDenseGetArray(X,&bptr)); 312 313 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7,2,0) && !PetscDefined(MISSING_GETLINE) 314 if (lu->use3d) 315 PetscStackCall("SuperLU_DIST:pgssvx3d",pgssvx3d(&lu->options,&lu->A_sup,&lu->ScalePermstruct,CASTDOUBLECOMPLEX bptr,m,nrhs,&lu->grid3d,&lu->LUstruct,&lu->SOLVEstruct,berr,&stat,&info)); 316 else 317 #endif 318 PetscStackCall("SuperLU_DIST:pgssvx",pgssvx(&lu->options,&lu->A_sup,&lu->ScalePermstruct,CASTDOUBLECOMPLEX bptr,m,nrhs,&lu->grid,&lu->LUstruct,&lu->SOLVEstruct,berr,&stat,&info)); 319 320 PetscCheck(!info,PETSC_COMM_SELF,PETSC_ERR_LIB,"pdgssvx fails, info: %d",info); 321 PetscCall(MatDenseRestoreArray(X,&bptr)); 322 323 if (lu->options.PrintStat) PetscStackCall("SuperLU_DIST:PStatPrint",PStatPrint(&lu->options, &stat, &lu->grid)); /* Print the statistics. */ 324 PetscStackCall("SuperLU_DIST:PStatFree",PStatFree(&stat)); 325 lu->matsolve_iscalled = PETSC_FALSE; 326 lu->matmatsolve_iscalled = PETSC_TRUE; 327 PetscFunctionReturn(0); 328 } 329 330 /* 331 input: 332 F: numeric Cholesky factor 333 output: 334 nneg: total number of negative pivots 335 nzero: total number of zero pivots 336 npos: (global dimension of F) - nneg - nzero 337 */ 338 static PetscErrorCode MatGetInertia_SuperLU_DIST(Mat F,PetscInt *nneg,PetscInt *nzero,PetscInt *npos) 339 { 340 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)F->data; 341 PetscScalar *diagU=NULL; 342 PetscInt M,i,neg=0,zero=0,pos=0; 343 PetscReal r; 344 345 PetscFunctionBegin; 346 PetscCheck(F->assembled,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Matrix factor F is not assembled"); 347 PetscCheck(lu->options.RowPerm == NOROWPERM,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must set NOROWPERM"); 348 PetscCall(MatGetSize(F,&M,NULL)); 349 PetscCall(PetscMalloc1(M,&diagU)); 350 PetscCall(MatSuperluDistGetDiagU(F,diagU)); 351 for (i=0; i<M; i++) { 352 #if defined(PETSC_USE_COMPLEX) 353 r = PetscImaginaryPart(diagU[i])/10.0; 354 PetscCheck(r > -PETSC_MACHINE_EPSILON && r < PETSC_MACHINE_EPSILON,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"diagU[%" PetscInt_FMT "]=%g + i %g is non-real",i,(double)PetscRealPart(diagU[i]),(double)(r*10.0)); 355 r = PetscRealPart(diagU[i]); 356 #else 357 r = diagU[i]; 358 #endif 359 if (r > 0) { 360 pos++; 361 } else if (r < 0) { 362 neg++; 363 } else zero++; 364 } 365 366 PetscCall(PetscFree(diagU)); 367 if (nneg) *nneg = neg; 368 if (nzero) *nzero = zero; 369 if (npos) *npos = pos; 370 PetscFunctionReturn(0); 371 } 372 373 static PetscErrorCode MatLUFactorNumeric_SuperLU_DIST(Mat F,Mat A,const MatFactorInfo *info) 374 { 375 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)F->data; 376 Mat Aloc; 377 const PetscScalar *av; 378 const PetscInt *ai = NULL,*aj = NULL; 379 PetscInt nz,dummy; 380 int sinfo; /* SuperLU_Dist info flag is always an int even with long long indices */ 381 SuperLUStat_t stat; 382 PetscReal *berr = 0; 383 PetscBool ismpiaij,isseqaij,flg; 384 385 PetscFunctionBegin; 386 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij)); 387 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A,MATMPIAIJ,&ismpiaij)); 388 if (ismpiaij) { 389 PetscCall(MatMPIAIJGetLocalMat(A,MAT_INITIAL_MATRIX,&Aloc)); 390 } else if (isseqaij) { 391 PetscCall(PetscObjectReference((PetscObject)A)); 392 Aloc = A; 393 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Not for type %s",((PetscObject)A)->type_name); 394 395 PetscCall(MatGetRowIJ(Aloc,0,PETSC_FALSE,PETSC_FALSE,&dummy,&ai,&aj,&flg)); 396 PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_SUP,"GetRowIJ failed"); 397 PetscCall(MatSeqAIJGetArrayRead(Aloc,&av)); 398 nz = ai[Aloc->rmap->n]; 399 400 /* Allocations for A_sup */ 401 if (lu->options.Fact == DOFACT) { /* first numeric factorization */ 402 PetscStackCall("SuperLU_DIST:allocateA_dist",allocateA_dist(Aloc->rmap->n, nz, CASTDOUBLECOMPLEXSTAR &lu->val, &lu->col, &lu->row)); 403 } else { /* successive numeric factorization, sparsity pattern and perm_c are reused. */ 404 if (lu->FactPattern == SamePattern_SameRowPerm) { 405 lu->options.Fact = SamePattern_SameRowPerm; /* matrix has similar numerical values */ 406 } else if (lu->FactPattern == SamePattern) { 407 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7,2,0) 408 if (lu->use3d) { 409 if (lu->grid3d.zscp.Iam == 0) { 410 PetscStackCall("SuperLU_DIST:Destroy_LU",Destroy_LU(A->cmap->N, &lu->grid3d.grid2d, &lu->LUstruct)); 411 PetscStackCall("SuperLU_DIST:SolveFinalize",SolveFinalize(&lu->options, &lu->SOLVEstruct)); 412 } else { 413 PetscStackCall("SuperLU_DIST:DeAllocLlu_3d",DeAllocLlu_3d(lu->A_sup.ncol, &lu->LUstruct, &lu->grid3d)); 414 PetscStackCall("SuperLU_DIST:DeAllocGlu_3d",DeAllocGlu_3d(&lu->LUstruct)); 415 } 416 } else 417 #endif 418 PetscStackCall("SuperLU_DIST:Destroy_LU",Destroy_LU(A->rmap->N, &lu->grid, &lu->LUstruct)); 419 lu->options.Fact = SamePattern; 420 } else if (lu->FactPattern == DOFACT) { 421 PetscStackCall("SuperLU_DIST:Destroy_CompRowLoc_Matrix_dist",Destroy_CompRowLoc_Matrix_dist(&lu->A_sup)); 422 PetscStackCall("SuperLU_DIST:Destroy_LU",Destroy_LU(A->rmap->N, &lu->grid, &lu->LUstruct)); 423 lu->options.Fact = DOFACT; 424 PetscStackCall("SuperLU_DIST:allocateA_dist",allocateA_dist(Aloc->rmap->n, nz, CASTDOUBLECOMPLEXSTAR &lu->val, &lu->col, &lu->row)); 425 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"options.Fact must be one of SamePattern SamePattern_SameRowPerm DOFACT"); 426 } 427 428 /* Copy AIJ matrix to superlu_dist matrix */ 429 PetscCall(PetscArraycpy(lu->row,ai,Aloc->rmap->n+1)); 430 PetscCall(PetscArraycpy(lu->col,aj,nz)); 431 PetscCall(PetscArraycpy(lu->val,av,nz)); 432 PetscCall(MatRestoreRowIJ(Aloc,0,PETSC_FALSE,PETSC_FALSE,&dummy,&ai,&aj,&flg)); 433 PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_SUP,"RestoreRowIJ failed"); 434 PetscCall(MatSeqAIJRestoreArrayRead(Aloc,&av)); 435 PetscCall(MatDestroy(&Aloc)); 436 437 /* Create and setup A_sup */ 438 if (lu->options.Fact == DOFACT) { 439 PetscStackCall("SuperLU_DIST:Create_CompRowLoc_Matrix_dist",Create_CompRowLoc_Matrix_dist(&lu->A_sup, A->rmap->N, A->cmap->N, nz, A->rmap->n, A->rmap->rstart, CASTDOUBLECOMPLEX lu->val, lu->col, lu->row, SLU_NR_loc, SLU, SLU_GE)); 440 } 441 442 /* Factor the matrix. */ 443 PetscStackCall("SuperLU_DIST:PStatInit",PStatInit(&stat)); /* Initialize the statistics variables. */ 444 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7,2,0) && !PetscDefined(MISSING_GETLINE) 445 if (lu->use3d) { 446 PetscStackCall("SuperLU_DIST:pgssvx3d",pgssvx3d(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, A->rmap->n, 0, &lu->grid3d, &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo)); 447 } else 448 #endif 449 PetscStackCall("SuperLU_DIST:pgssvx",pgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, A->rmap->n, 0, &lu->grid, &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo)); 450 if (sinfo > 0) { 451 PetscCheck(!A->erroriffailure,PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot in row %d",sinfo); 452 else { 453 if (sinfo <= lu->A_sup.ncol) { 454 F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 455 PetscCall(PetscInfo(F,"U(i,i) is exactly zero, i= %d\n",sinfo)); 456 } else if (sinfo > lu->A_sup.ncol) { 457 /* 458 number of bytes allocated when memory allocation 459 failure occurred, plus A->ncol. 460 */ 461 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 462 PetscCall(PetscInfo(F,"Number of bytes allocated when memory allocation fails %d\n",sinfo)); 463 } 464 } 465 } else PetscCheck(sinfo >= 0,PETSC_COMM_SELF,PETSC_ERR_LIB, "info = %d, argument in p*gssvx() had an illegal value", sinfo); 466 467 if (lu->options.PrintStat) { 468 PetscStackCall("SuperLU_DIST:PStatPrint",PStatPrint(&lu->options, &stat, &lu->grid)); /* Print the statistics. */ 469 } 470 PetscStackCall("SuperLU_DIST:PStatFree",PStatFree(&stat)); 471 F->assembled = PETSC_TRUE; 472 F->preallocated = PETSC_TRUE; 473 lu->options.Fact = FACTORED; /* The factored form of A is supplied. Local option used by this func. only */ 474 PetscFunctionReturn(0); 475 } 476 477 /* Note the Petsc r and c permutations are ignored */ 478 static PetscErrorCode MatLUFactorSymbolic_SuperLU_DIST(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) 479 { 480 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)F->data; 481 PetscInt M = A->rmap->N,N = A->cmap->N,indx; 482 PetscMPIInt size,mpiflg; 483 PetscBool flg,set; 484 const char *colperm[] = {"NATURAL","MMD_AT_PLUS_A","MMD_ATA","METIS_AT_PLUS_A","PARMETIS"}; 485 const char *rowperm[] = {"NOROWPERM","LargeDiag_MC64","LargeDiag_AWPM","MY_PERMR"}; 486 const char *factPattern[] = {"SamePattern","SamePattern_SameRowPerm","DOFACT"}; 487 MPI_Comm comm; 488 PetscSuperLU_DIST *context = NULL; 489 490 PetscFunctionBegin; 491 /* Set options to F */ 492 PetscCall(PetscObjectGetComm((PetscObject)F,&comm)); 493 PetscCallMPI(MPI_Comm_size(comm,&size)); 494 495 PetscOptionsBegin(PetscObjectComm((PetscObject)F),((PetscObject)F)->prefix,"SuperLU_Dist Options","Mat"); 496 PetscCall(PetscOptionsBool("-mat_superlu_dist_equil","Equilibrate matrix","None",lu->options.Equil ? PETSC_TRUE : PETSC_FALSE,&flg,&set)); 497 if (set && !flg) lu->options.Equil = NO; 498 499 PetscCall(PetscOptionsEList("-mat_superlu_dist_rowperm","Row permutation","None",rowperm,4,rowperm[1],&indx,&flg)); 500 if (flg) { 501 switch (indx) { 502 case 0: 503 lu->options.RowPerm = NOROWPERM; 504 break; 505 case 1: 506 lu->options.RowPerm = LargeDiag_MC64; 507 break; 508 case 2: 509 lu->options.RowPerm = LargeDiag_AWPM; 510 break; 511 case 3: 512 lu->options.RowPerm = MY_PERMR; 513 break; 514 default: 515 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown row permutation"); 516 } 517 } 518 519 PetscCall(PetscOptionsEList("-mat_superlu_dist_colperm","Column permutation","None",colperm,5,colperm[3],&indx,&flg)); 520 if (flg) { 521 switch (indx) { 522 case 0: 523 lu->options.ColPerm = NATURAL; 524 break; 525 case 1: 526 lu->options.ColPerm = MMD_AT_PLUS_A; 527 break; 528 case 2: 529 lu->options.ColPerm = MMD_ATA; 530 break; 531 case 3: 532 lu->options.ColPerm = METIS_AT_PLUS_A; 533 break; 534 case 4: 535 lu->options.ColPerm = PARMETIS; /* only works for np>1 */ 536 break; 537 default: 538 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown column permutation"); 539 } 540 } 541 542 lu->options.ReplaceTinyPivot = NO; 543 PetscCall(PetscOptionsBool("-mat_superlu_dist_replacetinypivot","Replace tiny pivots","None",lu->options.ReplaceTinyPivot ? PETSC_TRUE : PETSC_FALSE,&flg,&set)); 544 if (set && flg) lu->options.ReplaceTinyPivot = YES; 545 546 lu->options.ParSymbFact = NO; 547 PetscCall(PetscOptionsBool("-mat_superlu_dist_parsymbfact","Parallel symbolic factorization","None",PETSC_FALSE,&flg,&set)); 548 if (set && flg && size>1) { 549 #if defined(PETSC_HAVE_PARMETIS) 550 lu->options.ParSymbFact = YES; 551 lu->options.ColPerm = PARMETIS; /* in v2.2, PARMETIS is forced for ParSymbFact regardless of user ordering setting */ 552 #else 553 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"parsymbfact needs PARMETIS"); 554 #endif 555 } 556 557 lu->FactPattern = SamePattern; 558 PetscCall(PetscOptionsEList("-mat_superlu_dist_fact","Sparsity pattern for repeated matrix factorization","None",factPattern,3,factPattern[0],&indx,&flg)); 559 if (flg) { 560 switch (indx) { 561 case 0: 562 lu->FactPattern = SamePattern; 563 break; 564 case 1: 565 lu->FactPattern = SamePattern_SameRowPerm; 566 break; 567 case 2: 568 lu->FactPattern = DOFACT; 569 break; 570 } 571 } 572 573 lu->options.IterRefine = NOREFINE; 574 PetscCall(PetscOptionsBool("-mat_superlu_dist_iterrefine","Use iterative refinement","None",lu->options.IterRefine == NOREFINE ? PETSC_FALSE : PETSC_TRUE ,&flg,&set)); 575 if (set) { 576 if (flg) lu->options.IterRefine = SLU_DOUBLE; 577 else lu->options.IterRefine = NOREFINE; 578 } 579 580 if (PetscLogPrintInfo) lu->options.PrintStat = YES; 581 else lu->options.PrintStat = NO; 582 PetscCall(PetscOptionsBool("-mat_superlu_dist_statprint","Print factorization information","None",(PetscBool)lu->options.PrintStat,(PetscBool*)&lu->options.PrintStat,NULL)); 583 584 /* Additional options for special cases */ 585 if (Petsc_Superlu_dist_keyval == MPI_KEYVAL_INVALID) { 586 PetscCallMPI(MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN,Petsc_Superlu_dist_keyval_Delete_Fn,&Petsc_Superlu_dist_keyval,(void*)0)); 587 PetscCall(PetscRegisterFinalize(Petsc_Superlu_dist_keyval_free)); 588 } 589 PetscCallMPI(MPI_Comm_get_attr(comm,Petsc_Superlu_dist_keyval,&context,&mpiflg)); 590 if (!mpiflg || context->busy) { /* additional options */ 591 if (!mpiflg) { 592 PetscCall(PetscNew(&context)); 593 context->busy = PETSC_TRUE; 594 PetscCallMPI(MPI_Comm_dup(comm,&context->comm)); 595 PetscCallMPI(MPI_Comm_set_attr(comm,Petsc_Superlu_dist_keyval,context)); 596 } else { 597 PetscCall(PetscCommGetComm(PetscObjectComm((PetscObject)A),&lu->comm_superlu)); 598 } 599 600 /* Default number of process columns and rows */ 601 lu->nprow = (int_t) (0.5 + PetscSqrtReal((PetscReal)size)); 602 if (!lu->nprow) lu->nprow = 1; 603 while (lu->nprow > 0) { 604 lu->npcol = (int_t) (size/lu->nprow); 605 if (size == lu->nprow * lu->npcol) break; 606 lu->nprow--; 607 } 608 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7,2,0) 609 lu->use3d = PETSC_FALSE; 610 lu->npdep = 1; 611 #endif 612 613 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7,2,0) 614 PetscCall(PetscOptionsBool("-mat_superlu_dist_3d","Use SuperLU_DIST 3D distribution","None",lu->use3d,&lu->use3d,NULL)); 615 PetscCheck(!PetscDefined(MISSING_GETLINE) || !lu->use3d,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP_SYS,"-mat_superlu_dist_3d requires a system with a getline() implementation"); 616 if (lu->use3d) { 617 PetscInt t; 618 PetscCall(PetscOptionsInt("-mat_superlu_dist_d","Number of z entries in processor partition","None",lu->npdep,(PetscInt*)&lu->npdep,NULL)); 619 t = (PetscInt) PetscLog2Real((PetscReal)lu->npdep); 620 PetscCheck(PetscPowInt(2,t) == lu->npdep,PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_OUTOFRANGE,"-mat_superlu_dist_d %lld must be a power of 2",(long long)lu->npdep); 621 if (lu->npdep > 1) { 622 lu->nprow = (int_t) (0.5 + PetscSqrtReal((PetscReal)(size/lu->npdep))); 623 if (!lu->nprow) lu->nprow = 1; 624 while (lu->nprow > 0) { 625 lu->npcol = (int_t) (size/(lu->npdep*lu->nprow)); 626 if (size == lu->nprow * lu->npcol * lu->npdep) break; 627 lu->nprow--; 628 } 629 } 630 } 631 #endif 632 PetscCall(PetscOptionsInt("-mat_superlu_dist_r","Number rows in processor partition","None",lu->nprow,(PetscInt*)&lu->nprow,NULL)); 633 PetscCall(PetscOptionsInt("-mat_superlu_dist_c","Number columns in processor partition","None",lu->npcol,(PetscInt*)&lu->npcol,NULL)); 634 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7,2,0) 635 PetscCheck(size == lu->nprow*lu->npcol*lu->npdep,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number of processes %d must equal to nprow %lld * npcol %lld * npdep %lld",size,(long long)lu->nprow,(long long)lu->npcol,(long long)lu->npdep); 636 #else 637 PetscCheck(size == lu->nprow*lu->npcol,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number of processes %d must equal to nprow %lld * npcol %lld",size,(long long)lu->nprow,(long long)lu->npcol); 638 #endif 639 /* end of adding additional options */ 640 641 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7,2,0) 642 if (lu->use3d) { 643 PetscStackCall("SuperLU_DIST:superlu_gridinit3d",superlu_gridinit3d(context ? context->comm : lu->comm_superlu, lu->nprow, lu->npcol,lu->npdep, &lu->grid3d)); 644 if (context) {context->grid3d = lu->grid3d; context->use3d = lu->use3d;} 645 } else { 646 #endif 647 PetscStackCall("SuperLU_DIST:superlu_gridinit",superlu_gridinit(context ? context->comm : lu->comm_superlu, lu->nprow, lu->npcol, &lu->grid)); 648 if (context) context->grid = lu->grid; 649 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7,2,0) 650 } 651 #endif 652 PetscCall(PetscInfo(NULL,"Duplicating a communicator for SuperLU_DIST and calling superlu_gridinit()\n")); 653 if (mpiflg) { 654 PetscCall(PetscInfo(NULL,"Communicator attribute already in use so not saving communicator and SuperLU_DIST grid in communicator attribute \n")); 655 } else { 656 PetscCall(PetscInfo(NULL,"Storing communicator and SuperLU_DIST grid in communicator attribute\n")); 657 } 658 } else { /* (mpiflg && !context->busy) */ 659 PetscCall(PetscInfo(NULL,"Reusing communicator and superlu_gridinit() for SuperLU_DIST from communicator attribute.")); 660 context->busy = PETSC_TRUE; 661 lu->grid = context->grid; 662 } 663 PetscOptionsEnd(); 664 665 /* Initialize ScalePermstruct and LUstruct. */ 666 PetscStackCall("SuperLU_DIST:ScalePermstructInit",ScalePermstructInit(M, N, &lu->ScalePermstruct)); 667 PetscStackCall("SuperLU_DIST:LUstructInit",LUstructInit(N, &lu->LUstruct)); 668 F->ops->lufactornumeric = MatLUFactorNumeric_SuperLU_DIST; 669 F->ops->solve = MatSolve_SuperLU_DIST; 670 F->ops->matsolve = MatMatSolve_SuperLU_DIST; 671 F->ops->getinertia = NULL; 672 673 if (A->symmetric || A->hermitian) F->ops->getinertia = MatGetInertia_SuperLU_DIST; 674 lu->CleanUpSuperLU_Dist = PETSC_TRUE; 675 PetscFunctionReturn(0); 676 } 677 678 static PetscErrorCode MatCholeskyFactorSymbolic_SuperLU_DIST(Mat F,Mat A,IS r,const MatFactorInfo *info) 679 { 680 PetscFunctionBegin; 681 PetscCall(MatLUFactorSymbolic_SuperLU_DIST(F,A,r,r,info)); 682 F->ops->choleskyfactornumeric = MatLUFactorNumeric_SuperLU_DIST; 683 PetscFunctionReturn(0); 684 } 685 686 static PetscErrorCode MatFactorGetSolverType_aij_superlu_dist(Mat A,MatSolverType *type) 687 { 688 PetscFunctionBegin; 689 *type = MATSOLVERSUPERLU_DIST; 690 PetscFunctionReturn(0); 691 } 692 693 static PetscErrorCode MatView_Info_SuperLU_DIST(Mat A,PetscViewer viewer) 694 { 695 Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST*)A->data; 696 superlu_dist_options_t options; 697 698 PetscFunctionBegin; 699 /* check if matrix is superlu_dist type */ 700 if (A->ops->solve != MatSolve_SuperLU_DIST) PetscFunctionReturn(0); 701 702 options = lu->options; 703 PetscCall(PetscViewerASCIIPrintf(viewer,"SuperLU_DIST run parameters:\n")); 704 /* would love to use superlu 'IFMT' macro but it looks like it's inconsistently applied, the 705 * format spec for int64_t is set to %d for whatever reason */ 706 PetscCall(PetscViewerASCIIPrintf(viewer," Process grid nprow %lld x npcol %lld \n",(long long)lu->nprow,(long long)lu->npcol)); 707 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7,2,0) 708 if (lu->use3d) { 709 PetscCall(PetscViewerASCIIPrintf(viewer," Using 3d decomposition with npdep %lld \n",(long long)lu->npdep)); 710 } 711 #endif 712 713 PetscCall(PetscViewerASCIIPrintf(viewer," Equilibrate matrix %s \n",PetscBools[options.Equil != NO])); 714 PetscCall(PetscViewerASCIIPrintf(viewer," Replace tiny pivots %s \n",PetscBools[options.ReplaceTinyPivot != NO])); 715 PetscCall(PetscViewerASCIIPrintf(viewer," Use iterative refinement %s \n",PetscBools[options.IterRefine == SLU_DOUBLE])); 716 PetscCall(PetscViewerASCIIPrintf(viewer," Processors in row %lld col partition %lld \n",(long long)lu->nprow,(long long)lu->npcol)); 717 718 switch (options.RowPerm) { 719 case NOROWPERM: 720 PetscCall(PetscViewerASCIIPrintf(viewer," Row permutation NOROWPERM\n")); 721 break; 722 case LargeDiag_MC64: 723 PetscCall(PetscViewerASCIIPrintf(viewer," Row permutation LargeDiag_MC64\n")); 724 break; 725 case LargeDiag_AWPM: 726 PetscCall(PetscViewerASCIIPrintf(viewer," Row permutation LargeDiag_AWPM\n")); 727 break; 728 case MY_PERMR: 729 PetscCall(PetscViewerASCIIPrintf(viewer," Row permutation MY_PERMR\n")); 730 break; 731 default: 732 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown column permutation"); 733 } 734 735 switch (options.ColPerm) { 736 case NATURAL: 737 PetscCall(PetscViewerASCIIPrintf(viewer," Column permutation NATURAL\n")); 738 break; 739 case MMD_AT_PLUS_A: 740 PetscCall(PetscViewerASCIIPrintf(viewer," Column permutation MMD_AT_PLUS_A\n")); 741 break; 742 case MMD_ATA: 743 PetscCall(PetscViewerASCIIPrintf(viewer," Column permutation MMD_ATA\n")); 744 break; 745 /* Even though this is called METIS, the SuperLU_DIST code sets this by default if PARMETIS is defined, not METIS */ 746 case METIS_AT_PLUS_A: 747 PetscCall(PetscViewerASCIIPrintf(viewer," Column permutation METIS_AT_PLUS_A\n")); 748 break; 749 case PARMETIS: 750 PetscCall(PetscViewerASCIIPrintf(viewer," Column permutation PARMETIS\n")); 751 break; 752 default: 753 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown column permutation"); 754 } 755 756 PetscCall(PetscViewerASCIIPrintf(viewer," Parallel symbolic factorization %s \n",PetscBools[options.ParSymbFact != NO])); 757 758 if (lu->FactPattern == SamePattern) { 759 PetscCall(PetscViewerASCIIPrintf(viewer," Repeated factorization SamePattern\n")); 760 } else if (lu->FactPattern == SamePattern_SameRowPerm) { 761 PetscCall(PetscViewerASCIIPrintf(viewer," Repeated factorization SamePattern_SameRowPerm\n")); 762 } else if (lu->FactPattern == DOFACT) { 763 PetscCall(PetscViewerASCIIPrintf(viewer," Repeated factorization DOFACT\n")); 764 } else { 765 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown factorization pattern"); 766 } 767 PetscFunctionReturn(0); 768 } 769 770 static PetscErrorCode MatView_SuperLU_DIST(Mat A,PetscViewer viewer) 771 { 772 PetscBool iascii; 773 PetscViewerFormat format; 774 775 PetscFunctionBegin; 776 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii)); 777 if (iascii) { 778 PetscCall(PetscViewerGetFormat(viewer,&format)); 779 if (format == PETSC_VIEWER_ASCII_INFO) { 780 PetscCall(MatView_Info_SuperLU_DIST(A,viewer)); 781 } 782 } 783 PetscFunctionReturn(0); 784 } 785 786 static PetscErrorCode MatGetFactor_aij_superlu_dist(Mat A,MatFactorType ftype,Mat *F) 787 { 788 Mat B; 789 Mat_SuperLU_DIST *lu; 790 PetscInt M=A->rmap->N,N=A->cmap->N; 791 PetscMPIInt size; 792 superlu_dist_options_t options; 793 794 PetscFunctionBegin; 795 /* Create the factorization matrix */ 796 PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B)); 797 PetscCall(MatSetSizes(B,A->rmap->n,A->cmap->n,M,N)); 798 PetscCall(PetscStrallocpy("superlu_dist",&((PetscObject)B)->type_name)); 799 PetscCall(MatSetUp(B)); 800 B->ops->getinfo = MatGetInfo_External; 801 B->ops->view = MatView_SuperLU_DIST; 802 B->ops->destroy = MatDestroy_SuperLU_DIST; 803 804 /* Set the default input options: 805 options.Fact = DOFACT; 806 options.Equil = YES; 807 options.ParSymbFact = NO; 808 options.ColPerm = METIS_AT_PLUS_A; 809 options.RowPerm = LargeDiag_MC64; 810 options.ReplaceTinyPivot = YES; 811 options.IterRefine = DOUBLE; 812 options.Trans = NOTRANS; 813 options.SolveInitialized = NO; -hold the communication pattern used MatSolve() and MatMatSolve() 814 options.RefineInitialized = NO; 815 options.PrintStat = YES; 816 options.SymPattern = NO; 817 */ 818 set_default_options_dist(&options); 819 820 B->trivialsymbolic = PETSC_TRUE; 821 if (ftype == MAT_FACTOR_LU) { 822 B->factortype = MAT_FACTOR_LU; 823 B->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST; 824 } else { 825 B->factortype = MAT_FACTOR_CHOLESKY; 826 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SuperLU_DIST; 827 options.SymPattern = YES; 828 } 829 830 /* set solvertype */ 831 PetscCall(PetscFree(B->solvertype)); 832 PetscCall(PetscStrallocpy(MATSOLVERSUPERLU_DIST,&B->solvertype)); 833 834 PetscCall(PetscNewLog(B,&lu)); 835 B->data = lu; 836 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size)); 837 838 lu->options = options; 839 lu->options.Fact = DOFACT; 840 lu->matsolve_iscalled = PETSC_FALSE; 841 lu->matmatsolve_iscalled = PETSC_FALSE; 842 843 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_aij_superlu_dist)); 844 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatSuperluDistGetDiagU_C",MatSuperluDistGetDiagU_SuperLU_DIST)); 845 846 *F = B; 847 PetscFunctionReturn(0); 848 } 849 850 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_SuperLU_DIST(void) 851 { 852 PetscFunctionBegin; 853 PetscCall(MatSolverTypeRegister(MATSOLVERSUPERLU_DIST,MATMPIAIJ,MAT_FACTOR_LU,MatGetFactor_aij_superlu_dist)); 854 PetscCall(MatSolverTypeRegister(MATSOLVERSUPERLU_DIST,MATSEQAIJ,MAT_FACTOR_LU,MatGetFactor_aij_superlu_dist)); 855 PetscCall(MatSolverTypeRegister(MATSOLVERSUPERLU_DIST,MATMPIAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_superlu_dist)); 856 PetscCall(MatSolverTypeRegister(MATSOLVERSUPERLU_DIST,MATSEQAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_superlu_dist)); 857 PetscFunctionReturn(0); 858 } 859 860 /*MC 861 MATSOLVERSUPERLU_DIST - Parallel direct solver package for LU factorization 862 863 Use ./configure --download-superlu_dist --download-parmetis --download-metis --download-ptscotch to have PETSc installed with SuperLU_DIST 864 865 Use -pc_type lu -pc_factor_mat_solver_type superlu_dist to use this direct solver 866 867 Works with AIJ matrices 868 869 Options Database Keys: 870 + -mat_superlu_dist_r <n> - number of rows in processor partition 871 . -mat_superlu_dist_c <n> - number of columns in processor partition 872 . -mat_superlu_dist_3d - use 3d partition, requires SuperLU_DIST 7.2 or later 873 . -mat_superlu_dist_d <n> - depth in 3d partition (valid only if -mat_superlu_dist_3d) is provided 874 . -mat_superlu_dist_equil - equilibrate the matrix 875 . -mat_superlu_dist_rowperm <NOROWPERM,LargeDiag_MC64,LargeDiag_AWPM,MY_PERMR> - row permutation 876 . -mat_superlu_dist_colperm <NATURAL,MMD_AT_PLUS_A,MMD_ATA,METIS_AT_PLUS_A,PARMETIS> - column permutation 877 . -mat_superlu_dist_replacetinypivot - replace tiny pivots 878 . -mat_superlu_dist_fact <SamePattern> - (choose one of) SamePattern SamePattern_SameRowPerm DOFACT 879 . -mat_superlu_dist_iterrefine - use iterative refinement 880 - -mat_superlu_dist_statprint - print factorization information 881 882 Notes: 883 If PETSc was configured with --with-cuda than this solver will automatically use the GPUs. 884 885 Level: beginner 886 887 .seealso: `PCLU` 888 889 .seealso: `PCFactorSetMatSolverType()`, `MatSolverType` 890 891 M*/ 892