1 /* 2 Provides an interface to the SuperLU_DIST sparse solver 3 */ 4 5 #include <../src/mat/impls/aij/seq/aij.h> 6 #include <../src/mat/impls/aij/mpi/mpiaij.h> 7 #include <petscpkg_version.h> 8 9 EXTERN_C_BEGIN 10 #if defined(PETSC_USE_COMPLEX) 11 #define CASTDOUBLECOMPLEX (doublecomplex*) 12 #define CASTDOUBLECOMPLEXSTAR (doublecomplex**) 13 #include <superlu_zdefs.h> 14 #define LUstructInit zLUstructInit 15 #define ScalePermstructInit zScalePermstructInit 16 #define ScalePermstructFree zScalePermstructFree 17 #define LUstructFree zLUstructFree 18 #define Destroy_LU zDestroy_LU 19 #define ScalePermstruct_t zScalePermstruct_t 20 #define LUstruct_t zLUstruct_t 21 #define SOLVEstruct_t zSOLVEstruct_t 22 #define SolveFinalize zSolveFinalize 23 #define pGetDiagU pzGetDiagU 24 #define pgssvx pzgssvx 25 #define allocateA_dist zallocateA_dist 26 #define Create_CompRowLoc_Matrix_dist zCreate_CompRowLoc_Matrix_dist 27 #define SLU SLU_Z 28 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7,2,0) 29 #define DeAllocLlu_3d zDeAllocLlu_3d 30 #define DeAllocGlu_3d zDeAllocGlu_3d 31 #define Destroy_A3d_gathered_on_2d zDestroy_A3d_gathered_on_2d 32 #define pgssvx3d pzgssvx3d 33 #endif 34 #elif defined(PETSC_USE_REAL_SINGLE) 35 #define CASTDOUBLECOMPLEX 36 #define CASTDOUBLECOMPLEXSTAR 37 #include <superlu_sdefs.h> 38 #define LUstructInit sLUstructInit 39 #define ScalePermstructInit sScalePermstructInit 40 #define ScalePermstructFree sScalePermstructFree 41 #define LUstructFree sLUstructFree 42 #define Destroy_LU sDestroy_LU 43 #define ScalePermstruct_t sScalePermstruct_t 44 #define LUstruct_t sLUstruct_t 45 #define SOLVEstruct_t sSOLVEstruct_t 46 #define SolveFinalize sSolveFinalize 47 #define pGetDiagU psGetDiagU 48 #define pgssvx psgssvx 49 #define allocateA_dist sallocateA_dist 50 #define Create_CompRowLoc_Matrix_dist sCreate_CompRowLoc_Matrix_dist 51 #define SLU SLU_S 52 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7,2,0) 53 #define DeAllocLlu_3d sDeAllocLlu_3d 54 #define DeAllocGlu_3d sDeAllocGlu_3d 55 #define Destroy_A3d_gathered_on_2d sDestroy_A3d_gathered_on_2d 56 #define pgssvx3d psgssvx3d 57 #endif 58 #else 59 #define CASTDOUBLECOMPLEX 60 #define CASTDOUBLECOMPLEXSTAR 61 #include <superlu_ddefs.h> 62 #define LUstructInit dLUstructInit 63 #define ScalePermstructInit dScalePermstructInit 64 #define ScalePermstructFree dScalePermstructFree 65 #define LUstructFree dLUstructFree 66 #define Destroy_LU dDestroy_LU 67 #define ScalePermstruct_t dScalePermstruct_t 68 #define LUstruct_t dLUstruct_t 69 #define SOLVEstruct_t dSOLVEstruct_t 70 #define SolveFinalize dSolveFinalize 71 #define pGetDiagU pdGetDiagU 72 #define pgssvx pdgssvx 73 #define allocateA_dist dallocateA_dist 74 #define Create_CompRowLoc_Matrix_dist dCreate_CompRowLoc_Matrix_dist 75 #define SLU SLU_D 76 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7,2,0) 77 #define DeAllocLlu_3d dDeAllocLlu_3d 78 #define DeAllocGlu_3d dDeAllocGlu_3d 79 #define Destroy_A3d_gathered_on_2d dDestroy_A3d_gathered_on_2d 80 #define pgssvx3d pdgssvx3d 81 #endif 82 #endif 83 EXTERN_C_END 84 85 typedef struct { 86 int_t nprow,npcol,*row,*col; 87 gridinfo_t grid; 88 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7,2,0) 89 PetscBool use3d; 90 int_t npdep; /* replication factor, must be power of two */ 91 gridinfo3d_t grid3d; 92 #endif 93 superlu_dist_options_t options; 94 SuperMatrix A_sup; 95 ScalePermstruct_t ScalePermstruct; 96 LUstruct_t LUstruct; 97 int StatPrint; 98 SOLVEstruct_t SOLVEstruct; 99 fact_t FactPattern; 100 MPI_Comm comm_superlu; 101 PetscScalar *val; 102 PetscBool matsolve_iscalled,matmatsolve_iscalled; 103 PetscBool CleanUpSuperLU_Dist; /* Flag to clean up (non-global) SuperLU objects during Destroy */ 104 } Mat_SuperLU_DIST; 105 106 PetscErrorCode MatSuperluDistGetDiagU_SuperLU_DIST(Mat F,PetscScalar *diagU) 107 { 108 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)F->data; 109 110 PetscFunctionBegin; 111 PetscStackCall("SuperLU_DIST:pGetDiagU",pGetDiagU(F->rmap->N,&lu->LUstruct,&lu->grid,CASTDOUBLECOMPLEX diagU)); 112 PetscFunctionReturn(0); 113 } 114 115 PetscErrorCode MatSuperluDistGetDiagU(Mat F,PetscScalar *diagU) 116 { 117 PetscFunctionBegin; 118 PetscValidHeaderSpecific(F,MAT_CLASSID,1); 119 PetscTryMethod(F,"MatSuperluDistGetDiagU_C",(Mat,PetscScalar*),(F,diagU)); 120 PetscFunctionReturn(0); 121 } 122 123 /* This allows reusing the Superlu_DIST communicator and grid when only a single SuperLU_DIST matrix is used at a time */ 124 typedef struct { 125 MPI_Comm comm; 126 PetscBool busy; 127 gridinfo_t grid; 128 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7,2,0) 129 PetscBool use3d; 130 gridinfo3d_t grid3d; 131 #endif 132 } PetscSuperLU_DIST; 133 static PetscMPIInt Petsc_Superlu_dist_keyval = MPI_KEYVAL_INVALID; 134 135 PETSC_EXTERN PetscMPIInt MPIAPI Petsc_Superlu_dist_keyval_Delete_Fn(MPI_Comm comm,PetscMPIInt keyval,void *attr_val,void *extra_state) 136 { 137 PetscSuperLU_DIST *context = (PetscSuperLU_DIST *) attr_val; 138 139 PetscFunctionBegin; 140 if (keyval != Petsc_Superlu_dist_keyval) SETERRMPI(PETSC_COMM_SELF,PETSC_ERR_ARG_CORRUPT,"Unexpected keyval"); 141 PetscCall(PetscInfo(NULL,"Removing Petsc_Superlu_dist_keyval attribute from communicator that is being freed\n")); 142 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7,2,0) 143 if (context->use3d) { 144 PetscStackCall("SuperLU_DIST:superlu_gridexit3d",superlu_gridexit3d(&context->grid3d)); 145 } else 146 #endif 147 PetscStackCall("SuperLU_DIST:superlu_gridexit",superlu_gridexit(&context->grid)); 148 PetscCallMPI(MPI_Comm_free(&context->comm)); 149 PetscCall(PetscFree(context)); 150 PetscFunctionReturn(MPI_SUCCESS); 151 } 152 153 /* 154 Performs MPI_Comm_free_keyval() on Petsc_Superlu_dist_keyval but keeps the global variable for 155 users who do not destroy all PETSc objects before PetscFinalize(). 156 157 The value Petsc_Superlu_dist_keyval is retained so that Petsc_Superlu_dist_keyval_Delete_Fn() 158 can still check that the keyval associated with the MPI communicator is correct when the MPI 159 communicator is destroyed. 160 161 This is called in PetscFinalize() 162 */ 163 static PetscErrorCode Petsc_Superlu_dist_keyval_free(void) 164 { 165 PetscMPIInt Petsc_Superlu_dist_keyval_temp = Petsc_Superlu_dist_keyval; 166 167 PetscFunctionBegin; 168 PetscCall(PetscInfo(NULL,"Freeing Petsc_Superlu_dist_keyval\n")); 169 PetscCallMPI(MPI_Comm_free_keyval(&Petsc_Superlu_dist_keyval_temp)); 170 PetscFunctionReturn(0); 171 } 172 173 static PetscErrorCode MatDestroy_SuperLU_DIST(Mat A) 174 { 175 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->data; 176 177 PetscFunctionBegin; 178 if (lu->CleanUpSuperLU_Dist) { 179 /* Deallocate SuperLU_DIST storage */ 180 PetscStackCall("SuperLU_DIST:Destroy_CompRowLoc_Matrix_dist",Destroy_CompRowLoc_Matrix_dist(&lu->A_sup)); 181 if (lu->options.SolveInitialized) { 182 PetscStackCall("SuperLU_DIST:SolveFinalize",SolveFinalize(&lu->options, &lu->SOLVEstruct)); 183 } 184 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7,2,0) 185 if (lu->use3d) { 186 if (lu->grid3d.zscp.Iam == 0) { 187 PetscStackCall("SuperLU_DIST:Destroy_LU",Destroy_LU(A->cmap->N, &lu->grid3d.grid2d, &lu->LUstruct)); 188 } else { 189 PetscStackCall("SuperLU_DIST:DeAllocLlu_3d",DeAllocLlu_3d(lu->A_sup.ncol, &lu->LUstruct, &lu->grid3d)); 190 PetscStackCall("SuperLU_DIST:DeAllocGlu_3d",DeAllocGlu_3d(&lu->LUstruct)); 191 } 192 PetscStackCall("SuperLU_DIST:Destroy_A3d_gathered_on_2d",Destroy_A3d_gathered_on_2d(&lu->SOLVEstruct, &lu->grid3d)); 193 } else 194 #endif 195 PetscStackCall("SuperLU_DIST:Destroy_LU",Destroy_LU(A->cmap->N, &lu->grid, &lu->LUstruct)); 196 PetscStackCall("SuperLU_DIST:ScalePermstructFree",ScalePermstructFree(&lu->ScalePermstruct)); 197 PetscStackCall("SuperLU_DIST:LUstructFree",LUstructFree(&lu->LUstruct)); 198 199 /* Release the SuperLU_DIST process grid only if the matrix has its own copy, that is it is not in the communicator context */ 200 if (lu->comm_superlu) { 201 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7,2,0) 202 if (lu->use3d) { 203 PetscStackCall("SuperLU_DIST:superlu_gridexit3d",superlu_gridexit3d(&lu->grid3d)); 204 } else 205 #endif 206 PetscStackCall("SuperLU_DIST:superlu_gridexit",superlu_gridexit(&lu->grid)); 207 } 208 } 209 /* 210 * We always need to release the communicator that was created in MatGetFactor_aij_superlu_dist. 211 * lu->CleanUpSuperLU_Dist was turned on in MatLUFactorSymbolic_SuperLU_DIST. There are some use 212 * cases where we only create a matrix but do not solve mat. In these cases, lu->CleanUpSuperLU_Dist 213 * is off, and the communicator was not released or marked as "not busy " in the old code. 214 * Here we try to release comm regardless. 215 */ 216 if (lu->comm_superlu) { 217 PetscCall(PetscCommRestoreComm(PetscObjectComm((PetscObject)A),&lu->comm_superlu)); 218 } else { 219 PetscSuperLU_DIST *context; 220 MPI_Comm comm; 221 PetscMPIInt flg; 222 223 PetscCall(PetscObjectGetComm((PetscObject)A,&comm)); 224 PetscCallMPI(MPI_Comm_get_attr(comm,Petsc_Superlu_dist_keyval,&context,&flg)); 225 PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Communicator does not have expected Petsc_Superlu_dist_keyval attribute"); 226 context->busy = PETSC_FALSE; 227 } 228 229 PetscCall(PetscFree(A->data)); 230 /* clear composed functions */ 231 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverType_C",NULL)); 232 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatSuperluDistGetDiagU_C",NULL)); 233 234 PetscFunctionReturn(0); 235 } 236 237 static PetscErrorCode MatSolve_SuperLU_DIST(Mat A,Vec b_mpi,Vec x) 238 { 239 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->data; 240 PetscInt m=A->rmap->n; 241 SuperLUStat_t stat; 242 PetscReal berr[1]; 243 PetscScalar *bptr = NULL; 244 int info; /* SuperLU_Dist info code is ALWAYS an int, even with long long indices */ 245 static PetscBool cite = PETSC_FALSE; 246 247 PetscFunctionBegin; 248 PetscCheck(lu->options.Fact == FACTORED,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"SuperLU_DIST options.Fact must equal FACTORED"); 249 PetscCall(PetscCitationsRegister("@article{lidemmel03,\n author = {Xiaoye S. Li and James W. Demmel},\n title = {{SuperLU_DIST}: A Scalable Distributed-Memory Sparse Direct\n Solver for Unsymmetric Linear Systems},\n journal = {ACM Trans. Mathematical Software},\n volume = {29},\n number = {2},\n pages = {110-140},\n year = 2003\n}\n",&cite)); 250 251 if (lu->options.SolveInitialized && !lu->matsolve_iscalled) { 252 /* see comments in MatMatSolve() */ 253 PetscStackCall("SuperLU_DIST:SolveFinalize",SolveFinalize(&lu->options, &lu->SOLVEstruct)); 254 lu->options.SolveInitialized = NO; 255 } 256 PetscCall(VecCopy(b_mpi,x)); 257 PetscCall(VecGetArray(x,&bptr)); 258 259 PetscStackCall("SuperLU_DIST:PStatInit",PStatInit(&stat)); /* Initialize the statistics variables. */ 260 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7,2,0) && !PetscDefined(MISSING_GETLINE) 261 if (lu->use3d) 262 PetscStackCall("SuperLU_DIST:pgssvx3d",pgssvx3d(&lu->options,&lu->A_sup,&lu->ScalePermstruct,CASTDOUBLECOMPLEX bptr,m,1,&lu->grid3d,&lu->LUstruct,&lu->SOLVEstruct,berr,&stat,&info)); 263 else 264 #endif 265 PetscStackCall("SuperLU_DIST:pgssvx",pgssvx(&lu->options,&lu->A_sup,&lu->ScalePermstruct,CASTDOUBLECOMPLEX bptr,m,1,&lu->grid,&lu->LUstruct,&lu->SOLVEstruct,berr,&stat,&info)); 266 PetscCheck(!info,PETSC_COMM_SELF,PETSC_ERR_LIB,"pdgssvx fails, info: %d",info); 267 268 if (lu->options.PrintStat) PetscStackCall("SuperLU_DIST:PStatPrint",PStatPrint(&lu->options, &stat, &lu->grid)); /* Print the statistics. */ 269 PetscStackCall("SuperLU_DIST:PStatFree",PStatFree(&stat)); 270 271 PetscCall(VecRestoreArray(x,&bptr)); 272 lu->matsolve_iscalled = PETSC_TRUE; 273 lu->matmatsolve_iscalled = PETSC_FALSE; 274 PetscFunctionReturn(0); 275 } 276 277 static PetscErrorCode MatMatSolve_SuperLU_DIST(Mat A,Mat B_mpi,Mat X) 278 { 279 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->data; 280 PetscInt m = A->rmap->n,nrhs; 281 SuperLUStat_t stat; 282 PetscReal berr[1]; 283 PetscScalar *bptr; 284 int info; /* SuperLU_Dist info code is ALWAYS an int, even with long long indices */ 285 PetscBool flg; 286 287 PetscFunctionBegin; 288 PetscCheck(lu->options.Fact == FACTORED,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"SuperLU_DIST options.Fact must equal FACTORED"); 289 PetscCall(PetscObjectTypeCompareAny((PetscObject)B_mpi,&flg,MATSEQDENSE,MATMPIDENSE,NULL)); 290 PetscCheck(flg,PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix B must be MATDENSE matrix"); 291 if (X != B_mpi) { 292 PetscCall(PetscObjectTypeCompareAny((PetscObject)X,&flg,MATSEQDENSE,MATMPIDENSE,NULL)); 293 PetscCheck(flg,PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix X must be MATDENSE matrix"); 294 } 295 296 if (lu->options.SolveInitialized && !lu->matmatsolve_iscalled) { 297 /* communication pattern of SOLVEstruct is unlikely created for matmatsolve, 298 thus destroy it and create a new SOLVEstruct. 299 Otherwise it may result in memory corruption or incorrect solution 300 See src/mat/tests/ex125.c */ 301 PetscStackCall("SuperLU_DIST:SolveFinalize",SolveFinalize(&lu->options, &lu->SOLVEstruct)); 302 lu->options.SolveInitialized = NO; 303 } 304 if (X != B_mpi) { 305 PetscCall(MatCopy(B_mpi,X,SAME_NONZERO_PATTERN)); 306 } 307 308 PetscCall(MatGetSize(B_mpi,NULL,&nrhs)); 309 310 PetscStackCall("SuperLU_DIST:PStatInit",PStatInit(&stat)); /* Initialize the statistics variables. */ 311 PetscCall(MatDenseGetArray(X,&bptr)); 312 313 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7,2,0) && !PetscDefined(MISSING_GETLINE) 314 if (lu->use3d) 315 PetscStackCall("SuperLU_DIST:pgssvx3d",pgssvx3d(&lu->options,&lu->A_sup,&lu->ScalePermstruct,CASTDOUBLECOMPLEX bptr,m,nrhs,&lu->grid3d,&lu->LUstruct,&lu->SOLVEstruct,berr,&stat,&info)); 316 else 317 #endif 318 PetscStackCall("SuperLU_DIST:pgssvx",pgssvx(&lu->options,&lu->A_sup,&lu->ScalePermstruct,CASTDOUBLECOMPLEX bptr,m,nrhs,&lu->grid,&lu->LUstruct,&lu->SOLVEstruct,berr,&stat,&info)); 319 320 PetscCheck(!info,PETSC_COMM_SELF,PETSC_ERR_LIB,"pdgssvx fails, info: %d",info); 321 PetscCall(MatDenseRestoreArray(X,&bptr)); 322 323 if (lu->options.PrintStat) PetscStackCall("SuperLU_DIST:PStatPrint",PStatPrint(&lu->options, &stat, &lu->grid)); /* Print the statistics. */ 324 PetscStackCall("SuperLU_DIST:PStatFree",PStatFree(&stat)); 325 lu->matsolve_iscalled = PETSC_FALSE; 326 lu->matmatsolve_iscalled = PETSC_TRUE; 327 PetscFunctionReturn(0); 328 } 329 330 /* 331 input: 332 F: numeric Cholesky factor 333 output: 334 nneg: total number of negative pivots 335 nzero: total number of zero pivots 336 npos: (global dimension of F) - nneg - nzero 337 */ 338 static PetscErrorCode MatGetInertia_SuperLU_DIST(Mat F,PetscInt *nneg,PetscInt *nzero,PetscInt *npos) 339 { 340 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)F->data; 341 PetscScalar *diagU=NULL; 342 PetscInt M,i,neg=0,zero=0,pos=0; 343 PetscReal r; 344 345 PetscFunctionBegin; 346 PetscCheck(F->assembled,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Matrix factor F is not assembled"); 347 PetscCheck(lu->options.RowPerm == NOROWPERM,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must set NOROWPERM"); 348 PetscCall(MatGetSize(F,&M,NULL)); 349 PetscCall(PetscMalloc1(M,&diagU)); 350 PetscCall(MatSuperluDistGetDiagU(F,diagU)); 351 for (i=0; i<M; i++) { 352 #if defined(PETSC_USE_COMPLEX) 353 r = PetscImaginaryPart(diagU[i])/10.0; 354 PetscCheck(r > -PETSC_MACHINE_EPSILON && r < PETSC_MACHINE_EPSILON,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"diagU[%" PetscInt_FMT "]=%g + i %g is non-real",i,(double)PetscRealPart(diagU[i]),(double)(r*10.0)); 355 r = PetscRealPart(diagU[i]); 356 #else 357 r = diagU[i]; 358 #endif 359 if (r > 0) { 360 pos++; 361 } else if (r < 0) { 362 neg++; 363 } else zero++; 364 } 365 366 PetscCall(PetscFree(diagU)); 367 if (nneg) *nneg = neg; 368 if (nzero) *nzero = zero; 369 if (npos) *npos = pos; 370 PetscFunctionReturn(0); 371 } 372 373 static PetscErrorCode MatLUFactorNumeric_SuperLU_DIST(Mat F,Mat A,const MatFactorInfo *info) 374 { 375 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)F->data; 376 Mat Aloc; 377 const PetscScalar *av; 378 const PetscInt *ai = NULL,*aj = NULL; 379 PetscInt nz,dummy; 380 int sinfo; /* SuperLU_Dist info flag is always an int even with long long indices */ 381 SuperLUStat_t stat; 382 PetscReal *berr = 0; 383 PetscBool ismpiaij,isseqaij,flg; 384 385 PetscFunctionBegin; 386 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij)); 387 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A,MATMPIAIJ,&ismpiaij)); 388 if (ismpiaij) { 389 PetscCall(MatMPIAIJGetLocalMat(A,MAT_INITIAL_MATRIX,&Aloc)); 390 } else if (isseqaij) { 391 PetscCall(PetscObjectReference((PetscObject)A)); 392 Aloc = A; 393 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Not for type %s",((PetscObject)A)->type_name); 394 395 PetscCall(MatGetRowIJ(Aloc,0,PETSC_FALSE,PETSC_FALSE,&dummy,&ai,&aj,&flg)); 396 PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_SUP,"GetRowIJ failed"); 397 PetscCall(MatSeqAIJGetArrayRead(Aloc,&av)); 398 nz = ai[Aloc->rmap->n]; 399 400 /* Allocations for A_sup */ 401 if (lu->options.Fact == DOFACT) { /* first numeric factorization */ 402 PetscStackCall("SuperLU_DIST:allocateA_dist",allocateA_dist(Aloc->rmap->n, nz, CASTDOUBLECOMPLEXSTAR &lu->val, &lu->col, &lu->row)); 403 } else { /* successive numeric factorization, sparsity pattern and perm_c are reused. */ 404 if (lu->FactPattern == SamePattern_SameRowPerm) { 405 lu->options.Fact = SamePattern_SameRowPerm; /* matrix has similar numerical values */ 406 } else if (lu->FactPattern == SamePattern) { 407 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7,2,0) 408 if (lu->use3d) { 409 if (lu->grid3d.zscp.Iam == 0) { 410 PetscStackCall("SuperLU_DIST:Destroy_LU",Destroy_LU(A->cmap->N, &lu->grid3d.grid2d, &lu->LUstruct)); 411 PetscStackCall("SuperLU_DIST:SolveFinalize",SolveFinalize(&lu->options, &lu->SOLVEstruct)); 412 } else { 413 PetscStackCall("SuperLU_DIST:DeAllocLlu_3d",DeAllocLlu_3d(lu->A_sup.ncol, &lu->LUstruct, &lu->grid3d)); 414 PetscStackCall("SuperLU_DIST:DeAllocGlu_3d",DeAllocGlu_3d(&lu->LUstruct)); 415 } 416 } else 417 #endif 418 PetscStackCall("SuperLU_DIST:Destroy_LU",Destroy_LU(A->rmap->N, &lu->grid, &lu->LUstruct)); 419 lu->options.Fact = SamePattern; 420 } else if (lu->FactPattern == DOFACT) { 421 PetscStackCall("SuperLU_DIST:Destroy_CompRowLoc_Matrix_dist",Destroy_CompRowLoc_Matrix_dist(&lu->A_sup)); 422 PetscStackCall("SuperLU_DIST:Destroy_LU",Destroy_LU(A->rmap->N, &lu->grid, &lu->LUstruct)); 423 lu->options.Fact = DOFACT; 424 PetscStackCall("SuperLU_DIST:allocateA_dist",allocateA_dist(Aloc->rmap->n, nz, CASTDOUBLECOMPLEXSTAR &lu->val, &lu->col, &lu->row)); 425 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"options.Fact must be one of SamePattern SamePattern_SameRowPerm DOFACT"); 426 } 427 428 /* Copy AIJ matrix to superlu_dist matrix */ 429 PetscCall(PetscArraycpy(lu->row,ai,Aloc->rmap->n+1)); 430 PetscCall(PetscArraycpy(lu->col,aj,nz)); 431 PetscCall(PetscArraycpy(lu->val,av,nz)); 432 PetscCall(MatRestoreRowIJ(Aloc,0,PETSC_FALSE,PETSC_FALSE,&dummy,&ai,&aj,&flg)); 433 PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_SUP,"RestoreRowIJ failed"); 434 PetscCall(MatSeqAIJRestoreArrayRead(Aloc,&av)); 435 PetscCall(MatDestroy(&Aloc)); 436 437 /* Create and setup A_sup */ 438 if (lu->options.Fact == DOFACT) { 439 PetscStackCall("SuperLU_DIST:Create_CompRowLoc_Matrix_dist",Create_CompRowLoc_Matrix_dist(&lu->A_sup, A->rmap->N, A->cmap->N, nz, A->rmap->n, A->rmap->rstart, CASTDOUBLECOMPLEX lu->val, lu->col, lu->row, SLU_NR_loc, SLU, SLU_GE)); 440 } 441 442 /* Factor the matrix. */ 443 PetscStackCall("SuperLU_DIST:PStatInit",PStatInit(&stat)); /* Initialize the statistics variables. */ 444 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7,2,0) && !PetscDefined(MISSING_GETLINE) 445 if (lu->use3d) { 446 PetscStackCall("SuperLU_DIST:pgssvx3d",pgssvx3d(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, A->rmap->n, 0, &lu->grid3d, &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo)); 447 } else 448 #endif 449 PetscStackCall("SuperLU_DIST:pgssvx",pgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, A->rmap->n, 0, &lu->grid, &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo)); 450 if (sinfo > 0) { 451 PetscCheck(!A->erroriffailure,PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot in row %d",sinfo); 452 if (sinfo <= lu->A_sup.ncol) { 453 F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 454 PetscCall(PetscInfo(F,"U(i,i) is exactly zero, i= %d\n",sinfo)); 455 } else if (sinfo > lu->A_sup.ncol) { 456 /* 457 number of bytes allocated when memory allocation 458 failure occurred, plus A->ncol. 459 */ 460 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 461 PetscCall(PetscInfo(F,"Number of bytes allocated when memory allocation fails %d\n",sinfo)); 462 } 463 } else PetscCheck(sinfo >= 0,PETSC_COMM_SELF,PETSC_ERR_LIB, "info = %d, argument in p*gssvx() had an illegal value", sinfo); 464 465 if (lu->options.PrintStat) { 466 PetscStackCall("SuperLU_DIST:PStatPrint",PStatPrint(&lu->options, &stat, &lu->grid)); /* Print the statistics. */ 467 } 468 PetscStackCall("SuperLU_DIST:PStatFree",PStatFree(&stat)); 469 F->assembled = PETSC_TRUE; 470 F->preallocated = PETSC_TRUE; 471 lu->options.Fact = FACTORED; /* The factored form of A is supplied. Local option used by this func. only */ 472 PetscFunctionReturn(0); 473 } 474 475 /* Note the Petsc r and c permutations are ignored */ 476 static PetscErrorCode MatLUFactorSymbolic_SuperLU_DIST(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) 477 { 478 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)F->data; 479 PetscInt M = A->rmap->N,N = A->cmap->N,indx; 480 PetscMPIInt size,mpiflg; 481 PetscBool flg,set; 482 const char *colperm[] = {"NATURAL","MMD_AT_PLUS_A","MMD_ATA","METIS_AT_PLUS_A","PARMETIS"}; 483 const char *rowperm[] = {"NOROWPERM","LargeDiag_MC64","LargeDiag_AWPM","MY_PERMR"}; 484 const char *factPattern[] = {"SamePattern","SamePattern_SameRowPerm","DOFACT"}; 485 MPI_Comm comm; 486 PetscSuperLU_DIST *context = NULL; 487 488 PetscFunctionBegin; 489 /* Set options to F */ 490 PetscCall(PetscObjectGetComm((PetscObject)F,&comm)); 491 PetscCallMPI(MPI_Comm_size(comm,&size)); 492 493 PetscOptionsBegin(PetscObjectComm((PetscObject)F),((PetscObject)F)->prefix,"SuperLU_Dist Options","Mat"); 494 PetscCall(PetscOptionsBool("-mat_superlu_dist_equil","Equilibrate matrix","None",lu->options.Equil ? PETSC_TRUE : PETSC_FALSE,&flg,&set)); 495 if (set && !flg) lu->options.Equil = NO; 496 497 PetscCall(PetscOptionsEList("-mat_superlu_dist_rowperm","Row permutation","None",rowperm,4,rowperm[1],&indx,&flg)); 498 if (flg) { 499 switch (indx) { 500 case 0: 501 lu->options.RowPerm = NOROWPERM; 502 break; 503 case 1: 504 lu->options.RowPerm = LargeDiag_MC64; 505 break; 506 case 2: 507 lu->options.RowPerm = LargeDiag_AWPM; 508 break; 509 case 3: 510 lu->options.RowPerm = MY_PERMR; 511 break; 512 default: 513 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown row permutation"); 514 } 515 } 516 517 PetscCall(PetscOptionsEList("-mat_superlu_dist_colperm","Column permutation","None",colperm,5,colperm[3],&indx,&flg)); 518 if (flg) { 519 switch (indx) { 520 case 0: 521 lu->options.ColPerm = NATURAL; 522 break; 523 case 1: 524 lu->options.ColPerm = MMD_AT_PLUS_A; 525 break; 526 case 2: 527 lu->options.ColPerm = MMD_ATA; 528 break; 529 case 3: 530 lu->options.ColPerm = METIS_AT_PLUS_A; 531 break; 532 case 4: 533 lu->options.ColPerm = PARMETIS; /* only works for np>1 */ 534 break; 535 default: 536 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown column permutation"); 537 } 538 } 539 540 lu->options.ReplaceTinyPivot = NO; 541 PetscCall(PetscOptionsBool("-mat_superlu_dist_replacetinypivot","Replace tiny pivots","None",lu->options.ReplaceTinyPivot ? PETSC_TRUE : PETSC_FALSE,&flg,&set)); 542 if (set && flg) lu->options.ReplaceTinyPivot = YES; 543 544 lu->options.ParSymbFact = NO; 545 PetscCall(PetscOptionsBool("-mat_superlu_dist_parsymbfact","Parallel symbolic factorization","None",PETSC_FALSE,&flg,&set)); 546 if (set && flg && size>1) { 547 #if defined(PETSC_HAVE_PARMETIS) 548 lu->options.ParSymbFact = YES; 549 lu->options.ColPerm = PARMETIS; /* in v2.2, PARMETIS is forced for ParSymbFact regardless of user ordering setting */ 550 #else 551 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"parsymbfact needs PARMETIS"); 552 #endif 553 } 554 555 lu->FactPattern = SamePattern; 556 PetscCall(PetscOptionsEList("-mat_superlu_dist_fact","Sparsity pattern for repeated matrix factorization","None",factPattern,3,factPattern[0],&indx,&flg)); 557 if (flg) { 558 switch (indx) { 559 case 0: 560 lu->FactPattern = SamePattern; 561 break; 562 case 1: 563 lu->FactPattern = SamePattern_SameRowPerm; 564 break; 565 case 2: 566 lu->FactPattern = DOFACT; 567 break; 568 } 569 } 570 571 lu->options.IterRefine = NOREFINE; 572 PetscCall(PetscOptionsBool("-mat_superlu_dist_iterrefine","Use iterative refinement","None",lu->options.IterRefine == NOREFINE ? PETSC_FALSE : PETSC_TRUE ,&flg,&set)); 573 if (set) { 574 if (flg) lu->options.IterRefine = SLU_DOUBLE; 575 else lu->options.IterRefine = NOREFINE; 576 } 577 578 if (PetscLogPrintInfo) lu->options.PrintStat = YES; 579 else lu->options.PrintStat = NO; 580 PetscCall(PetscOptionsBool("-mat_superlu_dist_statprint","Print factorization information","None",(PetscBool)lu->options.PrintStat,(PetscBool*)&lu->options.PrintStat,NULL)); 581 582 /* Additional options for special cases */ 583 if (Petsc_Superlu_dist_keyval == MPI_KEYVAL_INVALID) { 584 PetscCallMPI(MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN,Petsc_Superlu_dist_keyval_Delete_Fn,&Petsc_Superlu_dist_keyval,(void*)0)); 585 PetscCall(PetscRegisterFinalize(Petsc_Superlu_dist_keyval_free)); 586 } 587 PetscCallMPI(MPI_Comm_get_attr(comm,Petsc_Superlu_dist_keyval,&context,&mpiflg)); 588 if (!mpiflg || context->busy) { /* additional options */ 589 if (!mpiflg) { 590 PetscCall(PetscNew(&context)); 591 context->busy = PETSC_TRUE; 592 PetscCallMPI(MPI_Comm_dup(comm,&context->comm)); 593 PetscCallMPI(MPI_Comm_set_attr(comm,Petsc_Superlu_dist_keyval,context)); 594 } else { 595 PetscCall(PetscCommGetComm(PetscObjectComm((PetscObject)A),&lu->comm_superlu)); 596 } 597 598 /* Default number of process columns and rows */ 599 lu->nprow = (int_t) (0.5 + PetscSqrtReal((PetscReal)size)); 600 if (!lu->nprow) lu->nprow = 1; 601 while (lu->nprow > 0) { 602 lu->npcol = (int_t) (size/lu->nprow); 603 if (size == lu->nprow * lu->npcol) break; 604 lu->nprow--; 605 } 606 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7,2,0) 607 lu->use3d = PETSC_FALSE; 608 lu->npdep = 1; 609 #endif 610 611 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7,2,0) 612 PetscCall(PetscOptionsBool("-mat_superlu_dist_3d","Use SuperLU_DIST 3D distribution","None",lu->use3d,&lu->use3d,NULL)); 613 PetscCheck(!PetscDefined(MISSING_GETLINE) || !lu->use3d,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP_SYS,"-mat_superlu_dist_3d requires a system with a getline() implementation"); 614 if (lu->use3d) { 615 PetscInt t; 616 PetscCall(PetscOptionsInt("-mat_superlu_dist_d","Number of z entries in processor partition","None",lu->npdep,(PetscInt*)&lu->npdep,NULL)); 617 t = (PetscInt) PetscLog2Real((PetscReal)lu->npdep); 618 PetscCheck(PetscPowInt(2,t) == lu->npdep,PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_OUTOFRANGE,"-mat_superlu_dist_d %lld must be a power of 2",(long long)lu->npdep); 619 if (lu->npdep > 1) { 620 lu->nprow = (int_t) (0.5 + PetscSqrtReal((PetscReal)(size/lu->npdep))); 621 if (!lu->nprow) lu->nprow = 1; 622 while (lu->nprow > 0) { 623 lu->npcol = (int_t) (size/(lu->npdep*lu->nprow)); 624 if (size == lu->nprow * lu->npcol * lu->npdep) break; 625 lu->nprow--; 626 } 627 } 628 } 629 #endif 630 PetscCall(PetscOptionsInt("-mat_superlu_dist_r","Number rows in processor partition","None",lu->nprow,(PetscInt*)&lu->nprow,NULL)); 631 PetscCall(PetscOptionsInt("-mat_superlu_dist_c","Number columns in processor partition","None",lu->npcol,(PetscInt*)&lu->npcol,NULL)); 632 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7,2,0) 633 PetscCheck(size == lu->nprow*lu->npcol*lu->npdep,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number of processes %d must equal to nprow %lld * npcol %lld * npdep %lld",size,(long long)lu->nprow,(long long)lu->npcol,(long long)lu->npdep); 634 #else 635 PetscCheck(size == lu->nprow*lu->npcol,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number of processes %d must equal to nprow %lld * npcol %lld",size,(long long)lu->nprow,(long long)lu->npcol); 636 #endif 637 /* end of adding additional options */ 638 639 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7,2,0) 640 if (lu->use3d) { 641 PetscStackCall("SuperLU_DIST:superlu_gridinit3d",superlu_gridinit3d(context ? context->comm : lu->comm_superlu, lu->nprow, lu->npcol,lu->npdep, &lu->grid3d)); 642 if (context) {context->grid3d = lu->grid3d; context->use3d = lu->use3d;} 643 } else { 644 #endif 645 PetscStackCall("SuperLU_DIST:superlu_gridinit",superlu_gridinit(context ? context->comm : lu->comm_superlu, lu->nprow, lu->npcol, &lu->grid)); 646 if (context) context->grid = lu->grid; 647 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7,2,0) 648 } 649 #endif 650 PetscCall(PetscInfo(NULL,"Duplicating a communicator for SuperLU_DIST and calling superlu_gridinit()\n")); 651 if (mpiflg) { 652 PetscCall(PetscInfo(NULL,"Communicator attribute already in use so not saving communicator and SuperLU_DIST grid in communicator attribute \n")); 653 } else { 654 PetscCall(PetscInfo(NULL,"Storing communicator and SuperLU_DIST grid in communicator attribute\n")); 655 } 656 } else { /* (mpiflg && !context->busy) */ 657 PetscCall(PetscInfo(NULL,"Reusing communicator and superlu_gridinit() for SuperLU_DIST from communicator attribute.")); 658 context->busy = PETSC_TRUE; 659 lu->grid = context->grid; 660 } 661 PetscOptionsEnd(); 662 663 /* Initialize ScalePermstruct and LUstruct. */ 664 PetscStackCall("SuperLU_DIST:ScalePermstructInit",ScalePermstructInit(M, N, &lu->ScalePermstruct)); 665 PetscStackCall("SuperLU_DIST:LUstructInit",LUstructInit(N, &lu->LUstruct)); 666 F->ops->lufactornumeric = MatLUFactorNumeric_SuperLU_DIST; 667 F->ops->solve = MatSolve_SuperLU_DIST; 668 F->ops->matsolve = MatMatSolve_SuperLU_DIST; 669 F->ops->getinertia = NULL; 670 671 if (A->symmetric || A->hermitian) F->ops->getinertia = MatGetInertia_SuperLU_DIST; 672 lu->CleanUpSuperLU_Dist = PETSC_TRUE; 673 PetscFunctionReturn(0); 674 } 675 676 static PetscErrorCode MatCholeskyFactorSymbolic_SuperLU_DIST(Mat F,Mat A,IS r,const MatFactorInfo *info) 677 { 678 PetscFunctionBegin; 679 PetscCall(MatLUFactorSymbolic_SuperLU_DIST(F,A,r,r,info)); 680 F->ops->choleskyfactornumeric = MatLUFactorNumeric_SuperLU_DIST; 681 PetscFunctionReturn(0); 682 } 683 684 static PetscErrorCode MatFactorGetSolverType_aij_superlu_dist(Mat A,MatSolverType *type) 685 { 686 PetscFunctionBegin; 687 *type = MATSOLVERSUPERLU_DIST; 688 PetscFunctionReturn(0); 689 } 690 691 static PetscErrorCode MatView_Info_SuperLU_DIST(Mat A,PetscViewer viewer) 692 { 693 Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST*)A->data; 694 superlu_dist_options_t options; 695 696 PetscFunctionBegin; 697 /* check if matrix is superlu_dist type */ 698 if (A->ops->solve != MatSolve_SuperLU_DIST) PetscFunctionReturn(0); 699 700 options = lu->options; 701 PetscCall(PetscViewerASCIIPrintf(viewer,"SuperLU_DIST run parameters:\n")); 702 /* would love to use superlu 'IFMT' macro but it looks like it's inconsistently applied, the 703 * format spec for int64_t is set to %d for whatever reason */ 704 PetscCall(PetscViewerASCIIPrintf(viewer," Process grid nprow %lld x npcol %lld \n",(long long)lu->nprow,(long long)lu->npcol)); 705 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7,2,0) 706 if (lu->use3d) { 707 PetscCall(PetscViewerASCIIPrintf(viewer," Using 3d decomposition with npdep %lld \n",(long long)lu->npdep)); 708 } 709 #endif 710 711 PetscCall(PetscViewerASCIIPrintf(viewer," Equilibrate matrix %s \n",PetscBools[options.Equil != NO])); 712 PetscCall(PetscViewerASCIIPrintf(viewer," Replace tiny pivots %s \n",PetscBools[options.ReplaceTinyPivot != NO])); 713 PetscCall(PetscViewerASCIIPrintf(viewer," Use iterative refinement %s \n",PetscBools[options.IterRefine == SLU_DOUBLE])); 714 PetscCall(PetscViewerASCIIPrintf(viewer," Processors in row %lld col partition %lld \n",(long long)lu->nprow,(long long)lu->npcol)); 715 716 switch (options.RowPerm) { 717 case NOROWPERM: 718 PetscCall(PetscViewerASCIIPrintf(viewer," Row permutation NOROWPERM\n")); 719 break; 720 case LargeDiag_MC64: 721 PetscCall(PetscViewerASCIIPrintf(viewer," Row permutation LargeDiag_MC64\n")); 722 break; 723 case LargeDiag_AWPM: 724 PetscCall(PetscViewerASCIIPrintf(viewer," Row permutation LargeDiag_AWPM\n")); 725 break; 726 case MY_PERMR: 727 PetscCall(PetscViewerASCIIPrintf(viewer," Row permutation MY_PERMR\n")); 728 break; 729 default: 730 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown column permutation"); 731 } 732 733 switch (options.ColPerm) { 734 case NATURAL: 735 PetscCall(PetscViewerASCIIPrintf(viewer," Column permutation NATURAL\n")); 736 break; 737 case MMD_AT_PLUS_A: 738 PetscCall(PetscViewerASCIIPrintf(viewer," Column permutation MMD_AT_PLUS_A\n")); 739 break; 740 case MMD_ATA: 741 PetscCall(PetscViewerASCIIPrintf(viewer," Column permutation MMD_ATA\n")); 742 break; 743 /* Even though this is called METIS, the SuperLU_DIST code sets this by default if PARMETIS is defined, not METIS */ 744 case METIS_AT_PLUS_A: 745 PetscCall(PetscViewerASCIIPrintf(viewer," Column permutation METIS_AT_PLUS_A\n")); 746 break; 747 case PARMETIS: 748 PetscCall(PetscViewerASCIIPrintf(viewer," Column permutation PARMETIS\n")); 749 break; 750 default: 751 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown column permutation"); 752 } 753 754 PetscCall(PetscViewerASCIIPrintf(viewer," Parallel symbolic factorization %s \n",PetscBools[options.ParSymbFact != NO])); 755 756 if (lu->FactPattern == SamePattern) { 757 PetscCall(PetscViewerASCIIPrintf(viewer," Repeated factorization SamePattern\n")); 758 } else if (lu->FactPattern == SamePattern_SameRowPerm) { 759 PetscCall(PetscViewerASCIIPrintf(viewer," Repeated factorization SamePattern_SameRowPerm\n")); 760 } else if (lu->FactPattern == DOFACT) { 761 PetscCall(PetscViewerASCIIPrintf(viewer," Repeated factorization DOFACT\n")); 762 } else { 763 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown factorization pattern"); 764 } 765 PetscFunctionReturn(0); 766 } 767 768 static PetscErrorCode MatView_SuperLU_DIST(Mat A,PetscViewer viewer) 769 { 770 PetscBool iascii; 771 PetscViewerFormat format; 772 773 PetscFunctionBegin; 774 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii)); 775 if (iascii) { 776 PetscCall(PetscViewerGetFormat(viewer,&format)); 777 if (format == PETSC_VIEWER_ASCII_INFO) { 778 PetscCall(MatView_Info_SuperLU_DIST(A,viewer)); 779 } 780 } 781 PetscFunctionReturn(0); 782 } 783 784 static PetscErrorCode MatGetFactor_aij_superlu_dist(Mat A,MatFactorType ftype,Mat *F) 785 { 786 Mat B; 787 Mat_SuperLU_DIST *lu; 788 PetscInt M=A->rmap->N,N=A->cmap->N; 789 PetscMPIInt size; 790 superlu_dist_options_t options; 791 792 PetscFunctionBegin; 793 /* Create the factorization matrix */ 794 PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B)); 795 PetscCall(MatSetSizes(B,A->rmap->n,A->cmap->n,M,N)); 796 PetscCall(PetscStrallocpy("superlu_dist",&((PetscObject)B)->type_name)); 797 PetscCall(MatSetUp(B)); 798 B->ops->getinfo = MatGetInfo_External; 799 B->ops->view = MatView_SuperLU_DIST; 800 B->ops->destroy = MatDestroy_SuperLU_DIST; 801 802 /* Set the default input options: 803 options.Fact = DOFACT; 804 options.Equil = YES; 805 options.ParSymbFact = NO; 806 options.ColPerm = METIS_AT_PLUS_A; 807 options.RowPerm = LargeDiag_MC64; 808 options.ReplaceTinyPivot = YES; 809 options.IterRefine = DOUBLE; 810 options.Trans = NOTRANS; 811 options.SolveInitialized = NO; -hold the communication pattern used MatSolve() and MatMatSolve() 812 options.RefineInitialized = NO; 813 options.PrintStat = YES; 814 options.SymPattern = NO; 815 */ 816 set_default_options_dist(&options); 817 818 B->trivialsymbolic = PETSC_TRUE; 819 if (ftype == MAT_FACTOR_LU) { 820 B->factortype = MAT_FACTOR_LU; 821 B->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST; 822 } else { 823 B->factortype = MAT_FACTOR_CHOLESKY; 824 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SuperLU_DIST; 825 options.SymPattern = YES; 826 } 827 828 /* set solvertype */ 829 PetscCall(PetscFree(B->solvertype)); 830 PetscCall(PetscStrallocpy(MATSOLVERSUPERLU_DIST,&B->solvertype)); 831 832 PetscCall(PetscNewLog(B,&lu)); 833 B->data = lu; 834 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size)); 835 836 lu->options = options; 837 lu->options.Fact = DOFACT; 838 lu->matsolve_iscalled = PETSC_FALSE; 839 lu->matmatsolve_iscalled = PETSC_FALSE; 840 841 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_aij_superlu_dist)); 842 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatSuperluDistGetDiagU_C",MatSuperluDistGetDiagU_SuperLU_DIST)); 843 844 *F = B; 845 PetscFunctionReturn(0); 846 } 847 848 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_SuperLU_DIST(void) 849 { 850 PetscFunctionBegin; 851 PetscCall(MatSolverTypeRegister(MATSOLVERSUPERLU_DIST,MATMPIAIJ,MAT_FACTOR_LU,MatGetFactor_aij_superlu_dist)); 852 PetscCall(MatSolverTypeRegister(MATSOLVERSUPERLU_DIST,MATSEQAIJ,MAT_FACTOR_LU,MatGetFactor_aij_superlu_dist)); 853 PetscCall(MatSolverTypeRegister(MATSOLVERSUPERLU_DIST,MATMPIAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_superlu_dist)); 854 PetscCall(MatSolverTypeRegister(MATSOLVERSUPERLU_DIST,MATSEQAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_superlu_dist)); 855 PetscFunctionReturn(0); 856 } 857 858 /*MC 859 MATSOLVERSUPERLU_DIST - Parallel direct solver package for LU factorization 860 861 Use ./configure --download-superlu_dist --download-parmetis --download-metis --download-ptscotch to have PETSc installed with SuperLU_DIST 862 863 Use -pc_type lu -pc_factor_mat_solver_type superlu_dist to use this direct solver 864 865 Works with AIJ matrices 866 867 Options Database Keys: 868 + -mat_superlu_dist_r <n> - number of rows in processor partition 869 . -mat_superlu_dist_c <n> - number of columns in processor partition 870 . -mat_superlu_dist_3d - use 3d partition, requires SuperLU_DIST 7.2 or later 871 . -mat_superlu_dist_d <n> - depth in 3d partition (valid only if -mat_superlu_dist_3d) is provided 872 . -mat_superlu_dist_equil - equilibrate the matrix 873 . -mat_superlu_dist_rowperm <NOROWPERM,LargeDiag_MC64,LargeDiag_AWPM,MY_PERMR> - row permutation 874 . -mat_superlu_dist_colperm <NATURAL,MMD_AT_PLUS_A,MMD_ATA,METIS_AT_PLUS_A,PARMETIS> - column permutation 875 . -mat_superlu_dist_replacetinypivot - replace tiny pivots 876 . -mat_superlu_dist_fact <SamePattern> - (choose one of) SamePattern SamePattern_SameRowPerm DOFACT 877 . -mat_superlu_dist_iterrefine - use iterative refinement 878 - -mat_superlu_dist_statprint - print factorization information 879 880 Notes: 881 If PETSc was configured with --with-cuda than this solver will automatically use the GPUs. 882 883 Level: beginner 884 885 .seealso: `PCLU` 886 887 .seealso: `PCFactorSetMatSolverType()`, `MatSolverType` 888 889 M*/ 890